mirror of https://github.com/rust-lang/rust.git
optimize zipping over array iterators
This commit is contained in:
parent
6683f13fa1
commit
b018ad3d41
|
@ -4,7 +4,7 @@ use crate::num::NonZeroUsize;
|
|||
use crate::{
|
||||
fmt,
|
||||
intrinsics::transmute_unchecked,
|
||||
iter::{self, ExactSizeIterator, FusedIterator, TrustedLen},
|
||||
iter::{self, ExactSizeIterator, FusedIterator, TrustedLen, TrustedRandomAccessNoCoerce},
|
||||
mem::MaybeUninit,
|
||||
ops::{IndexRange, Range},
|
||||
ptr,
|
||||
|
@ -293,6 +293,12 @@ impl<T, const N: usize> Iterator for IntoIter<T, N> {
|
|||
|
||||
NonZeroUsize::new(remaining).map_or(Ok(()), Err)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item {
|
||||
// SAFETY: The caller must provide an idx that is in bound of the remainder.
|
||||
unsafe { self.data.as_ptr().add(self.alive.start()).add(idx).cast::<T>().read() }
|
||||
}
|
||||
}
|
||||
|
||||
#[stable(feature = "array_value_iter_impls", since = "1.40.0")]
|
||||
|
@ -374,6 +380,25 @@ impl<T, const N: usize> FusedIterator for IntoIter<T, N> {}
|
|||
#[stable(feature = "array_value_iter_impls", since = "1.40.0")]
|
||||
unsafe impl<T, const N: usize> TrustedLen for IntoIter<T, N> {}
|
||||
|
||||
#[doc(hidden)]
|
||||
#[unstable(issue = "none", feature = "std_internals")]
|
||||
#[rustc_unsafe_specialization_marker]
|
||||
pub trait NonDrop {}
|
||||
|
||||
// T: Copy as approximation for !Drop since get_unchecked does not advance self.alive
|
||||
// and thus we can't implement drop-handling
|
||||
#[unstable(issue = "none", feature = "std_internals")]
|
||||
impl<T: Copy> NonDrop for T {}
|
||||
|
||||
#[doc(hidden)]
|
||||
#[unstable(issue = "none", feature = "std_internals")]
|
||||
unsafe impl<T, const N: usize> TrustedRandomAccessNoCoerce for IntoIter<T, N>
|
||||
where
|
||||
T: NonDrop,
|
||||
{
|
||||
const MAY_HAVE_SIDE_EFFECT: bool = false;
|
||||
}
|
||||
|
||||
#[stable(feature = "array_value_iter_impls", since = "1.40.0")]
|
||||
impl<T: Clone, const N: usize> Clone for IntoIter<T, N> {
|
||||
fn clone(&self) -> Self {
|
||||
|
|
|
@ -94,6 +94,14 @@ where
|
|||
ZipImpl::nth(self, n)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn fold<Acc, F>(self, init: Acc, f: F) -> Acc
|
||||
where
|
||||
F: FnMut(Acc, Self::Item) -> Acc,
|
||||
{
|
||||
ZipImpl::fold(self, init, f)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item
|
||||
where
|
||||
|
@ -129,6 +137,9 @@ trait ZipImpl<A, B> {
|
|||
where
|
||||
A: DoubleEndedIterator + ExactSizeIterator,
|
||||
B: DoubleEndedIterator + ExactSizeIterator;
|
||||
fn fold<Acc, F>(self, init: Acc, f: F) -> Acc
|
||||
where
|
||||
F: FnMut(Acc, Self::Item) -> Acc;
|
||||
// This has the same safety requirements as `Iterator::__iterator_get_unchecked`
|
||||
unsafe fn get_unchecked(&mut self, idx: usize) -> <Self as Iterator>::Item
|
||||
where
|
||||
|
@ -228,6 +239,14 @@ where
|
|||
{
|
||||
unreachable!("Always specialized");
|
||||
}
|
||||
|
||||
#[inline]
|
||||
default fn fold<Acc, F>(self, init: Acc, f: F) -> Acc
|
||||
where
|
||||
F: FnMut(Acc, Self::Item) -> Acc,
|
||||
{
|
||||
SpecFold::spec_fold(self, init, f)
|
||||
}
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
|
@ -251,6 +270,24 @@ where
|
|||
// `Iterator::__iterator_get_unchecked`.
|
||||
unsafe { (self.a.__iterator_get_unchecked(idx), self.b.__iterator_get_unchecked(idx)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn fold<Acc, F>(mut self, init: Acc, mut f: F) -> Acc
|
||||
where
|
||||
F: FnMut(Acc, Self::Item) -> Acc,
|
||||
{
|
||||
let mut accum = init;
|
||||
let len = ZipImpl::size_hint(&self).0;
|
||||
for i in 0..len {
|
||||
// SAFETY: since Self: TrustedRandomAccessNoCoerce we can trust the size-hint to
|
||||
// calculate the length and then use that to do unchecked iteration.
|
||||
// fold consumes the iterator so we don't need to fixup any state.
|
||||
unsafe {
|
||||
accum = f(accum, self.get_unchecked(i));
|
||||
}
|
||||
}
|
||||
accum
|
||||
}
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
|
@ -590,3 +627,56 @@ unsafe impl<I: Iterator + TrustedRandomAccessNoCoerce> SpecTrustedRandomAccess f
|
|||
unsafe { self.__iterator_get_unchecked(index) }
|
||||
}
|
||||
}
|
||||
|
||||
trait SpecFold: Iterator {
|
||||
fn spec_fold<B, F>(self, init: B, f: F) -> B
|
||||
where
|
||||
Self: Sized,
|
||||
F: FnMut(B, Self::Item) -> B;
|
||||
}
|
||||
|
||||
impl<A: Iterator, B: Iterator> SpecFold for Zip<A, B> {
|
||||
// Adapted from default impl from the Iterator trait
|
||||
#[inline]
|
||||
default fn spec_fold<Acc, F>(mut self, init: Acc, mut f: F) -> Acc
|
||||
where
|
||||
F: FnMut(Acc, Self::Item) -> Acc,
|
||||
{
|
||||
let mut accum = init;
|
||||
while let Some(x) = ZipImpl::next(&mut self) {
|
||||
accum = f(accum, x);
|
||||
}
|
||||
accum
|
||||
}
|
||||
}
|
||||
|
||||
impl<A: TrustedLen, B: TrustedLen> SpecFold for Zip<A, B> {
|
||||
#[inline]
|
||||
fn spec_fold<Acc, F>(mut self, init: Acc, mut f: F) -> Acc
|
||||
where
|
||||
F: FnMut(Acc, Self::Item) -> Acc,
|
||||
{
|
||||
let mut accum = init;
|
||||
loop {
|
||||
let (upper, more) = if let Some(upper) = ZipImpl::size_hint(&self).1 {
|
||||
(upper, false)
|
||||
} else {
|
||||
// Per TrustedLen contract a None upper bound means more than usize::MAX items
|
||||
(usize::MAX, true)
|
||||
};
|
||||
|
||||
for _ in 0..upper {
|
||||
let pair =
|
||||
// SAFETY: TrustedLen guarantees that at least `upper` many items are available
|
||||
// therefore we know they can't be None
|
||||
unsafe { (self.a.next().unwrap_unchecked(), self.b.next().unwrap_unchecked()) };
|
||||
accum = f(accum, pair);
|
||||
}
|
||||
|
||||
if !more {
|
||||
break;
|
||||
}
|
||||
}
|
||||
accum
|
||||
}
|
||||
}
|
||||
|
|
|
@ -184,7 +184,11 @@ fn test_zip_nested_sideffectful() {
|
|||
let it = xs.iter_mut().map(|x| *x = 1).enumerate().zip(&ys);
|
||||
it.count();
|
||||
}
|
||||
assert_eq!(&xs, &[1, 1, 1, 1, 1, 0]);
|
||||
let length_aware = &xs == &[1, 1, 1, 1, 0, 0];
|
||||
let probe_first = &xs == &[1, 1, 1, 1, 1, 0];
|
||||
|
||||
// either implementation is valid according to zip documentation
|
||||
assert!(length_aware || probe_first);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
// assembly-output: emit-asm
|
||||
// # zen3 previously exhibited odd vectorization
|
||||
// compile-flags: --crate-type=lib -Ctarget-cpu=znver3 -O
|
||||
// only-x86_64
|
||||
// ignore-sgx
|
||||
|
||||
use std::iter;
|
||||
|
||||
// previously this produced a long chain of
|
||||
// 56: vpextrb $6, %xmm0, %ecx
|
||||
// 57: orb %cl, 22(%rsi)
|
||||
// 58: vpextrb $7, %xmm0, %ecx
|
||||
// 59: orb %cl, 23(%rsi)
|
||||
// [...]
|
||||
|
||||
// CHECK-LABEL: zip_arrays:
|
||||
#[no_mangle]
|
||||
pub fn zip_arrays(mut a: [u8; 32], b: [u8; 32]) -> [u8; 32] {
|
||||
// CHECK-NOT: vpextrb
|
||||
// CHECK-NOT: orb %cl
|
||||
// CHECK: vorps
|
||||
iter::zip(&mut a, b).for_each(|(a, b)| *a |= b);
|
||||
// CHECK: retq
|
||||
a
|
||||
}
|
Loading…
Reference in New Issue