Sync portable-simd to 2023 July 07

Sync up to rust-lang/portable-simd@7c7dbe0c50
2023-07-07 04:07:00 -07:00 · 2023-07-07 04:07:00 -07:00 · 8765f91727
parent 7cc3da05f9 7c7dbe0c50
commit 8765f91727
18 changed files with 338 additions and 133 deletions
--- a/library/portable-simd/.github/workflows/ci.yml
+++ b/library/portable-simd/.github/workflows/ci.yml
@ -38,8 +38,9 @@ jobs:
          - i586-unknown-linux-gnu
          - aarch64-unknown-linux-gnu
          - armv7-unknown-linux-gnueabihf
-          - mips-unknown-linux-gnu
-          - mips64-unknown-linux-gnuabi64
+          # non-nightly since https://github.com/rust-lang/rust/pull/113274
+          # - mips-unknown-linux-gnu
+          # - mips64-unknown-linux-gnuabi64
          - powerpc-unknown-linux-gnu
          - powerpc64-unknown-linux-gnu
          - riscv64gc-unknown-linux-gnu
@ -191,8 +192,8 @@ jobs:
          # Note: The issue above means neither of these mips targets will use
          # MSA (mips simd) but MIPS uses a nonstandard binary representation
          # for NaNs which makes it worth testing on despite that.
-          - mips-unknown-linux-gnu
-          - mips64-unknown-linux-gnuabi64
+          # - mips-unknown-linux-gnu
+          # - mips64-unknown-linux-gnuabi64
          - riscv64gc-unknown-linux-gnu
          # TODO this test works, but it appears to time out
          # - powerpc-unknown-linux-gnu
--- a/library/portable-simd/crates/core_simd/src/cast.rs
+++ b/library/portable-simd/crates/core_simd/src/cast.rs
@ -1,55 +1,51 @@
 use crate::simd::SimdElement;

+mod sealed {
+    /// Cast vector elements to other types.
+    ///
+    /// # Safety
+    /// Implementing this trait asserts that the type is a valid vector element for the `simd_cast`
+    /// or `simd_as` intrinsics.
+    pub unsafe trait Sealed {}
+}
+use sealed::Sealed;
+
 /// Supporting trait for `Simd::cast`.  Typically doesn't need to be used directly.
-///
-/// # Safety
-/// Implementing this trait asserts that the type is a valid vector element for the `simd_cast` or
-/// `simd_as` intrinsics.
-pub unsafe trait SimdCast: SimdElement {}
+pub trait SimdCast: Sealed + SimdElement {}

 // Safety: primitive number types can be cast to other primitive number types
-unsafe impl SimdCast for i8 {}
+unsafe impl Sealed for i8 {}
+impl SimdCast for i8 {}
 // Safety: primitive number types can be cast to other primitive number types
-unsafe impl SimdCast for i16 {}
+unsafe impl Sealed for i16 {}
+impl SimdCast for i16 {}
 // Safety: primitive number types can be cast to other primitive number types
-unsafe impl SimdCast for i32 {}
+unsafe impl Sealed for i32 {}
+impl SimdCast for i32 {}
 // Safety: primitive number types can be cast to other primitive number types
-unsafe impl SimdCast for i64 {}
+unsafe impl Sealed for i64 {}
+impl SimdCast for i64 {}
 // Safety: primitive number types can be cast to other primitive number types
-unsafe impl SimdCast for isize {}
+unsafe impl Sealed for isize {}
+impl SimdCast for isize {}
 // Safety: primitive number types can be cast to other primitive number types
-unsafe impl SimdCast for u8 {}
+unsafe impl Sealed for u8 {}
+impl SimdCast for u8 {}
 // Safety: primitive number types can be cast to other primitive number types
-unsafe impl SimdCast for u16 {}
+unsafe impl Sealed for u16 {}
+impl SimdCast for u16 {}
 // Safety: primitive number types can be cast to other primitive number types
-unsafe impl SimdCast for u32 {}
+unsafe impl Sealed for u32 {}
+impl SimdCast for u32 {}
 // Safety: primitive number types can be cast to other primitive number types
-unsafe impl SimdCast for u64 {}
+unsafe impl Sealed for u64 {}
+impl SimdCast for u64 {}
 // Safety: primitive number types can be cast to other primitive number types
-unsafe impl SimdCast for usize {}
+unsafe impl Sealed for usize {}
+impl SimdCast for usize {}
 // Safety: primitive number types can be cast to other primitive number types
-unsafe impl SimdCast for f32 {}
+unsafe impl Sealed for f32 {}
+impl SimdCast for f32 {}
 // Safety: primitive number types can be cast to other primitive number types
-unsafe impl SimdCast for f64 {}
-
-/// Supporting trait for `Simd::cast_ptr`.  Typically doesn't need to be used directly.
-///
-/// # Safety
-/// Implementing this trait asserts that the type is a valid vector element for the `simd_cast_ptr`
-/// intrinsic.
-pub unsafe trait SimdCastPtr<T> {}
-
-// Safety: pointers can be cast to other pointer types
-unsafe impl<T, U> SimdCastPtr<T> for *const U
-where
-    U: core::ptr::Pointee,
-    T: core::ptr::Pointee<Metadata = U::Metadata>,
-{
-}
-// Safety: pointers can be cast to other pointer types
-unsafe impl<T, U> SimdCastPtr<T> for *mut U
-where
-    U: core::ptr::Pointee,
-    T: core::ptr::Pointee<Metadata = U::Metadata>,
-{
-}
+unsafe impl Sealed for f64 {}
+impl SimdCast for f64 {}
--- a/library/portable-simd/crates/core_simd/src/elements/const_ptr.rs
+++ b/library/portable-simd/crates/core_simd/src/elements/const_ptr.rs
@ -1,5 +1,5 @@
 use super::sealed::Sealed;
-use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdPartialEq, SupportedLaneCount};
+use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdPartialEq, SimdUint, SupportedLaneCount};

 /// Operations on SIMD vectors of constant pointers.
 pub trait SimdConstPtr: Copy + Sealed {
@ -9,6 +9,9 @@ pub trait SimdConstPtr: Copy + Sealed {
    /// Vector of `isize` with the same number of lanes.
    type Isize;

+    /// Vector of const pointers with the same number of lanes.
+    type CastPtr<T>;
+
    /// Vector of mutable pointers to the same type.
    type MutPtr;

@ -18,6 +21,11 @@ pub trait SimdConstPtr: Copy + Sealed {
    /// Returns `true` for each lane that is null.
    fn is_null(self) -> Self::Mask;

+    /// Casts to a pointer of another type.
+    ///
+    /// Equivalent to calling [`pointer::cast`] on each lane.
+    fn cast<T>(self) -> Self::CastPtr<T>;
+
    /// Changes constness without changing the type.
    ///
    /// Equivalent to calling [`pointer::cast_mut`] on each lane.
@ -78,6 +86,7 @@ where
 {
    type Usize = Simd<usize, LANES>;
    type Isize = Simd<isize, LANES>;
+    type CastPtr<U> = Simd<*const U, LANES>;
    type MutPtr = Simd<*mut T, LANES>;
    type Mask = Mask<isize, LANES>;

@ -86,9 +95,22 @@ where
        Simd::splat(core::ptr::null()).simd_eq(self)
    }

+    #[inline]
+    fn cast<U>(self) -> Self::CastPtr<U> {
+        // SimdElement currently requires zero-sized metadata, so this should never fail.
+        // If this ever changes, `simd_cast_ptr` should produce a post-mono error.
+        use core::{mem::size_of, ptr::Pointee};
+        assert_eq!(size_of::<<T as Pointee>::Metadata>(), 0);
+        assert_eq!(size_of::<<U as Pointee>::Metadata>(), 0);
+
+        // Safety: pointers can be cast
+        unsafe { intrinsics::simd_cast_ptr(self) }
+    }
+
    #[inline]
    fn cast_mut(self) -> Self::MutPtr {
-        self.cast_ptr()
+        // Safety: pointers can be cast
+        unsafe { intrinsics::simd_cast_ptr(self) }
    }

    #[inline]
@ -106,9 +128,9 @@ where
        // In the mean-time, this operation is defined to be "as if" it was
        // a wrapping_offset, so we can emulate it as such. This should properly
        // restore pointer provenance even under today's compiler.
-        self.cast_ptr::<*const u8>()
+        self.cast::<u8>()
            .wrapping_offset(addr.cast::<isize>() - self.addr().cast::<isize>())
-            .cast_ptr()
+            .cast()
    }

    #[inline]
--- a/library/portable-simd/crates/core_simd/src/elements/float.rs
+++ b/library/portable-simd/crates/core_simd/src/elements/float.rs
@ -1,6 +1,6 @@
 use super::sealed::Sealed;
 use crate::simd::{
-    intrinsics, LaneCount, Mask, Simd, SimdElement, SimdPartialEq, SimdPartialOrd,
+    intrinsics, LaneCount, Mask, Simd, SimdCast, SimdElement, SimdPartialEq, SimdPartialOrd,
    SupportedLaneCount,
 };

@ -15,6 +15,53 @@ pub trait SimdFloat: Copy + Sealed {
    /// Bit representation of this SIMD vector type.
    type Bits;

+    /// A SIMD vector with a different element type.
+    type Cast<T: SimdElement>;
+
+    /// Performs elementwise conversion of this vector's elements to another SIMD-valid type.
+    ///
+    /// This follows the semantics of Rust's `as` conversion for floats (truncating or saturating
+    /// at the limits) for each element.
+    ///
+    /// # Example
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{SimdFloat, SimdInt, Simd};
+    /// let floats: Simd<f32, 4> = Simd::from_array([1.9, -4.5, f32::INFINITY, f32::NAN]);
+    /// let ints = floats.cast::<i32>();
+    /// assert_eq!(ints, Simd::from_array([1, -4, i32::MAX, 0]));
+    ///
+    /// // Formally equivalent, but `Simd::cast` can optimize better.
+    /// assert_eq!(ints, Simd::from_array(floats.to_array().map(|x| x as i32)));
+    ///
+    /// // The float conversion does not round-trip.
+    /// let floats_again = ints.cast();
+    /// assert_ne!(floats, floats_again);
+    /// assert_eq!(floats_again, Simd::from_array([1.0, -4.0, 2147483647.0, 0.0]));
+    /// ```
+    #[must_use]
+    fn cast<T: SimdCast>(self) -> Self::Cast<T>;
+
+    /// Rounds toward zero and converts to the same-width integer type, assuming that
+    /// the value is finite and fits in that type.
+    ///
+    /// # Safety
+    /// The value must:
+    ///
+    /// * Not be NaN
+    /// * Not be infinite
+    /// * Be representable in the return type, after truncating off its fractional part
+    ///
+    /// If these requirements are infeasible or costly, consider using the safe function [cast],
+    /// which saturates on conversion.
+    ///
+    /// [cast]: Simd::cast
+    unsafe fn to_int_unchecked<I: SimdCast>(self) -> Self::Cast<I>
+    where
+        Self::Scalar: core::convert::FloatToInt<I>;
+
    /// Raw transmutation to an unsigned integer vector type with the
    /// same size and number of lanes.
    #[must_use = "method returns a new vector and does not mutate the original value"]
@ -206,6 +253,24 @@ macro_rules! impl_trait {
            type Mask = Mask<<$mask_ty as SimdElement>::Mask, LANES>;
            type Scalar = $ty;
            type Bits = Simd<$bits_ty, LANES>;
+            type Cast<T: SimdElement> = Simd<T, LANES>;
+
+            #[inline]
+            fn cast<T: SimdCast>(self) -> Self::Cast<T>
+            {
+                // Safety: supported types are guaranteed by SimdCast
+                unsafe { intrinsics::simd_as(self) }
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            unsafe fn to_int_unchecked<I: SimdCast>(self) -> Self::Cast<I>
+            where
+                Self::Scalar: core::convert::FloatToInt<I>,
+            {
+                // Safety: supported types are guaranteed by SimdCast, the caller is responsible for the extra invariants
+                unsafe { intrinsics::simd_cast(self) }
+            }

            #[inline]
            fn to_bits(self) -> Simd<$bits_ty, LANES> {
--- a/library/portable-simd/crates/core_simd/src/elements/int.rs
+++ b/library/portable-simd/crates/core_simd/src/elements/int.rs
@ -1,6 +1,6 @@
 use super::sealed::Sealed;
 use crate::simd::{
-    intrinsics, LaneCount, Mask, Simd, SimdElement, SimdPartialOrd, SupportedLaneCount,
+    intrinsics, LaneCount, Mask, Simd, SimdCast, SimdElement, SimdPartialOrd, SupportedLaneCount,
 };

 /// Operations on SIMD vectors of signed integers.
@ -11,6 +11,16 @@ pub trait SimdInt: Copy + Sealed {
    /// Scalar type contained by this SIMD vector type.
    type Scalar;

+    /// A SIMD vector with a different element type.
+    type Cast<T: SimdElement>;
+
+    /// Performs elementwise conversion of this vector's elements to another SIMD-valid type.
+    ///
+    /// This follows the semantics of Rust's `as` conversion for casting integers (wrapping to
+    /// other integer types, and saturating to float types).
+    #[must_use]
+    fn cast<T: SimdCast>(self) -> Self::Cast<T>;
+
    /// Lanewise saturating add.
    ///
    /// # Examples
@ -198,6 +208,13 @@ macro_rules! impl_trait {
        {
            type Mask = Mask<<$ty as SimdElement>::Mask, LANES>;
            type Scalar = $ty;
+            type Cast<T: SimdElement> = Simd<T, LANES>;
+
+            #[inline]
+            fn cast<T: SimdCast>(self) -> Self::Cast<T> {
+                // Safety: supported types are guaranteed by SimdCast
+                unsafe { intrinsics::simd_as(self) }
+            }

            #[inline]
            fn saturating_add(self, second: Self) -> Self {
--- a/library/portable-simd/crates/core_simd/src/elements/mut_ptr.rs
+++ b/library/portable-simd/crates/core_simd/src/elements/mut_ptr.rs
@ -1,5 +1,5 @@
 use super::sealed::Sealed;
-use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdPartialEq, SupportedLaneCount};
+use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdPartialEq, SimdUint, SupportedLaneCount};

 /// Operations on SIMD vectors of mutable pointers.
 pub trait SimdMutPtr: Copy + Sealed {
@ -9,6 +9,9 @@ pub trait SimdMutPtr: Copy + Sealed {
    /// Vector of `isize` with the same number of lanes.
    type Isize;

+    /// Vector of const pointers with the same number of lanes.
+    type CastPtr<T>;
+
    /// Vector of constant pointers to the same type.
    type ConstPtr;

@ -18,6 +21,11 @@ pub trait SimdMutPtr: Copy + Sealed {
    /// Returns `true` for each lane that is null.
    fn is_null(self) -> Self::Mask;

+    /// Casts to a pointer of another type.
+    ///
+    /// Equivalent to calling [`pointer::cast`] on each lane.
+    fn cast<T>(self) -> Self::CastPtr<T>;
+
    /// Changes constness without changing the type.
    ///
    /// Equivalent to calling [`pointer::cast_const`] on each lane.
@ -73,6 +81,7 @@ where
 {
    type Usize = Simd<usize, LANES>;
    type Isize = Simd<isize, LANES>;
+    type CastPtr<U> = Simd<*mut U, LANES>;
    type ConstPtr = Simd<*const T, LANES>;
    type Mask = Mask<isize, LANES>;

@ -81,9 +90,22 @@ where
        Simd::splat(core::ptr::null_mut()).simd_eq(self)
    }

+    #[inline]
+    fn cast<U>(self) -> Self::CastPtr<U> {
+        // SimdElement currently requires zero-sized metadata, so this should never fail.
+        // If this ever changes, `simd_cast_ptr` should produce a post-mono error.
+        use core::{mem::size_of, ptr::Pointee};
+        assert_eq!(size_of::<<T as Pointee>::Metadata>(), 0);
+        assert_eq!(size_of::<<U as Pointee>::Metadata>(), 0);
+
+        // Safety: pointers can be cast
+        unsafe { intrinsics::simd_cast_ptr(self) }
+    }
+
    #[inline]
    fn cast_const(self) -> Self::ConstPtr {
-        self.cast_ptr()
+        // Safety: pointers can be cast
+        unsafe { intrinsics::simd_cast_ptr(self) }
    }

    #[inline]
@ -101,9 +123,9 @@ where
        // In the mean-time, this operation is defined to be "as if" it was
        // a wrapping_offset, so we can emulate it as such. This should properly
        // restore pointer provenance even under today's compiler.
-        self.cast_ptr::<*mut u8>()
+        self.cast::<u8>()
            .wrapping_offset(addr.cast::<isize>() - self.addr().cast::<isize>())
-            .cast_ptr()
+            .cast()
    }

    #[inline]
--- a/library/portable-simd/crates/core_simd/src/elements/uint.rs
+++ b/library/portable-simd/crates/core_simd/src/elements/uint.rs
@ -1,11 +1,21 @@
 use super::sealed::Sealed;
-use crate::simd::{intrinsics, LaneCount, Simd, SupportedLaneCount};
+use crate::simd::{intrinsics, LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount};

 /// Operations on SIMD vectors of unsigned integers.
 pub trait SimdUint: Copy + Sealed {
    /// Scalar type contained by this SIMD vector type.
    type Scalar;

+    /// A SIMD vector with a different element type.
+    type Cast<T: SimdElement>;
+
+    /// Performs elementwise conversion of this vector's elements to another SIMD-valid type.
+    ///
+    /// This follows the semantics of Rust's `as` conversion for casting integers (wrapping to
+    /// other integer types, and saturating to float types).
+    #[must_use]
+    fn cast<T: SimdCast>(self) -> Self::Cast<T>;
+
    /// Lanewise saturating add.
    ///
    /// # Examples
@ -77,6 +87,13 @@ macro_rules! impl_trait {
            LaneCount<LANES>: SupportedLaneCount,
        {
            type Scalar = $ty;
+            type Cast<T: SimdElement> = Simd<T, LANES>;
+
+            #[inline]
+            fn cast<T: SimdCast>(self) -> Self::Cast<T> {
+                // Safety: supported types are guaranteed by SimdCast
+                unsafe { intrinsics::simd_as(self) }
+            }

            #[inline]
            fn saturating_add(self, second: Self) -> Self {
--- a/library/portable-simd/crates/core_simd/src/iter.rs
+++ b/library/portable-simd/crates/core_simd/src/iter.rs
@ -10,6 +10,7 @@ macro_rules! impl_traits {
        where
            LaneCount<LANES>: SupportedLaneCount,
        {
+            #[inline]
            fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
                iter.fold(Simd::splat(0 as $type), Add::add)
            }
@ -19,6 +20,7 @@ macro_rules! impl_traits {
        where
            LaneCount<LANES>: SupportedLaneCount,
        {
+            #[inline]
            fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
                iter.fold(Simd::splat(1 as $type), Mul::mul)
            }
@ -28,6 +30,7 @@ macro_rules! impl_traits {
        where
            LaneCount<LANES>: SupportedLaneCount,
        {
+            #[inline]
            fn sum<I: Iterator<Item = &'a Self>>(iter: I) -> Self {
                iter.fold(Simd::splat(0 as $type), Add::add)
            }
@ -37,6 +40,7 @@ macro_rules! impl_traits {
        where
            LaneCount<LANES>: SupportedLaneCount,
        {
+            #[inline]
            fn product<I: Iterator<Item = &'a Self>>(iter: I) -> Self {
                iter.fold(Simd::splat(1 as $type), Mul::mul)
            }
--- a/library/portable-simd/crates/core_simd/src/lib.rs
+++ b/library/portable-simd/crates/core_simd/src/lib.rs
@ -16,7 +16,7 @@
 )]
 #![cfg_attr(feature = "generic_const_exprs", feature(generic_const_exprs))]
 #![cfg_attr(feature = "generic_const_exprs", allow(incomplete_features))]
-#![warn(missing_docs)]
+#![warn(missing_docs, clippy::missing_inline_in_public_items)] // basically all items, really
 #![deny(unsafe_op_in_unsafe_fn, clippy::undocumented_unsafe_blocks)]
 #![unstable(feature = "portable_simd", issue = "86656")]
 //! Portable SIMD module.
--- a/library/portable-simd/crates/core_simd/src/masks.rs
+++ b/library/portable-simd/crates/core_simd/src/masks.rs
@ -179,6 +179,7 @@ where
    /// Panics if any lane is not 0 or -1.
    #[inline]
    #[must_use = "method returns a new mask and does not mutate the original value"]
+    #[track_caller]
    pub fn from_int(value: Simd<T, LANES>) -> Self {
        assert!(T::valid(value), "all values must be either 0 or -1",);
        // Safety: the validity has been checked
@ -217,6 +218,7 @@ where
    /// Panics if `lane` is greater than or equal to the number of lanes in the vector.
    #[inline]
    #[must_use = "method returns a new bool and does not mutate the original value"]
+    #[track_caller]
    pub fn test(&self, lane: usize) -> bool {
        assert!(lane < LANES, "lane index out of range");
        // Safety: the lane index has been checked
@ -240,6 +242,7 @@ where
    /// # Panics
    /// Panics if `lane` is greater than or equal to the number of lanes in the vector.
    #[inline]
+    #[track_caller]
    pub fn set(&mut self, lane: usize, value: bool) {
        assert!(lane < LANES, "lane index out of range");
        // Safety: the lane index has been checked
@ -327,6 +330,7 @@ where
    T: MaskElement + fmt::Debug,
    LaneCount<LANES>: SupportedLaneCount,
 {
+    #[inline]
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_list()
            .entries((0..LANES).map(|lane| self.test(lane)))
--- a/library/portable-simd/crates/core_simd/src/mod.rs
+++ b/library/portable-simd/crates/core_simd/src/mod.rs
@ -23,6 +23,8 @@ mod vendor;

 #[doc = include_str!("core_simd_docs.md")]
 pub mod simd {
+    pub mod prelude;
+
    pub(crate) use crate::core_simd::intrinsics;

    pub use crate::core_simd::alias::*;
--- a/library/portable-simd/crates/core_simd/src/ops.rs
+++ b/library/portable-simd/crates/core_simd/src/ops.rs
@ -15,6 +15,7 @@ where
    I: core::slice::SliceIndex<[T]>,
 {
    type Output = I::Output;
+    #[inline]
    fn index(&self, index: I) -> &Self::Output {
        &self.as_array()[index]
    }
@ -26,6 +27,7 @@ where
    LaneCount<LANES>: SupportedLaneCount,
    I: core::slice::SliceIndex<[T]>,
 {
+    #[inline]
    fn index_mut(&mut self, index: I) -> &mut Self::Output {
        &mut self.as_mut_array()[index]
    }
@ -118,10 +120,14 @@ macro_rules! for_base_types {

                    #[inline]
                    #[must_use = "operator returns a new vector without mutating the inputs"]
+                    // TODO: only useful for int Div::div, but we hope that this
+                    // will essentially always always get inlined anyway.
+                    #[track_caller]
                    fn $call(self, rhs: Self) -> Self::Output {
                        $macro_impl!(self, rhs, $inner, $scalar)
                    }
-                })*
+                }
+            )*
    }
 }

--- a/library/portable-simd/crates/core_simd/src/ord.rs
+++ b/library/portable-simd/crates/core_simd/src/ord.rs
@ -94,6 +94,7 @@ macro_rules! impl_integer {
            }

            #[inline]
+            #[track_caller]
            fn simd_clamp(self, min: Self, max: Self) -> Self {
                assert!(
                    min.simd_le(max).all(),
@ -200,6 +201,7 @@ macro_rules! impl_mask {
            }

            #[inline]
+            #[track_caller]
            fn simd_clamp(self, min: Self, max: Self) -> Self {
                assert!(
                    min.simd_le(max).all(),
@ -254,6 +256,7 @@ where
    }

    #[inline]
+    #[track_caller]
    fn simd_clamp(self, min: Self, max: Self) -> Self {
        assert!(
            min.simd_le(max).all(),
@ -303,6 +306,7 @@ where
    }

    #[inline]
+    #[track_caller]
    fn simd_clamp(self, min: Self, max: Self) -> Self {
        assert!(
            min.simd_le(max).all(),
--- a/library/portable-simd/crates/core_simd/src/simd/prelude.rs
+++ b/library/portable-simd/crates/core_simd/src/simd/prelude.rs
@ -0,0 +1,80 @@
+//! The portable SIMD prelude.
+//!
+//! Includes important traits and types to be imported with a glob:
+//! ```ignore
+//! use std::simd::prelude::*;
+//! ```
+
+#[doc(no_inline)]
+pub use super::{
+    simd_swizzle, Mask, Simd, SimdConstPtr, SimdFloat, SimdInt, SimdMutPtr, SimdOrd, SimdPartialEq,
+    SimdPartialOrd, SimdUint,
+};
+
+#[rustfmt::skip]
+#[doc(no_inline)]
+pub use super::{f32x1, f32x2, f32x4, f32x8, f32x16, f32x32, f32x64};
+
+#[rustfmt::skip]
+#[doc(no_inline)]
+pub use super::{f64x1, f64x2, f64x4, f64x8, f64x16, f64x32, f64x64};
+
+#[rustfmt::skip]
+#[doc(no_inline)]
+pub use super::{i8x1, i8x2, i8x4, i8x8, i8x16, i8x32, i8x64};
+
+#[rustfmt::skip]
+#[doc(no_inline)]
+pub use super::{i16x1, i16x2, i16x4, i16x8, i16x16, i16x32, i16x64};
+
+#[rustfmt::skip]
+#[doc(no_inline)]
+pub use super::{i32x1, i32x2, i32x4, i32x8, i32x16, i32x32, i32x64};
+
+#[rustfmt::skip]
+#[doc(no_inline)]
+pub use super::{i64x1, i64x2, i64x4, i64x8, i64x16, i64x32, i64x64};
+
+#[rustfmt::skip]
+#[doc(no_inline)]
+pub use super::{isizex1, isizex2, isizex4, isizex8, isizex16, isizex32, isizex64};
+
+#[rustfmt::skip]
+#[doc(no_inline)]
+pub use super::{u8x1, u8x2, u8x4, u8x8, u8x16, u8x32, u8x64};
+
+#[rustfmt::skip]
+#[doc(no_inline)]
+pub use super::{u16x1, u16x2, u16x4, u16x8, u16x16, u16x32, u16x64};
+
+#[rustfmt::skip]
+#[doc(no_inline)]
+pub use super::{u32x1, u32x2, u32x4, u32x8, u32x16, u32x32, u32x64};
+
+#[rustfmt::skip]
+#[doc(no_inline)]
+pub use super::{u64x1, u64x2, u64x4, u64x8, u64x16, u64x32, u64x64};
+
+#[rustfmt::skip]
+#[doc(no_inline)]
+pub use super::{usizex1, usizex2, usizex4, usizex8, usizex16, usizex32, usizex64};
+
+#[rustfmt::skip]
+#[doc(no_inline)]
+pub use super::{mask8x1, mask8x2, mask8x4, mask8x8, mask8x16, mask8x32, mask8x64};
+
+#[rustfmt::skip]
+#[doc(no_inline)]
+pub use super::{mask16x1, mask16x2, mask16x4, mask16x8, mask16x16, mask16x32, mask16x64};
+
+#[rustfmt::skip]
+#[doc(no_inline)]
+pub use super::{mask32x1, mask32x2, mask32x4, mask32x8, mask32x16, mask32x32, mask32x64};
+
+#[rustfmt::skip]
+#[doc(no_inline)]
+pub use super::{mask64x1, mask64x2, mask64x4, mask64x8, mask64x16, mask64x32, mask64x64};
+
+#[rustfmt::skip]
+#[doc(no_inline)]
+pub use super::{masksizex1, masksizex2, masksizex4, masksizex8, masksizex16, masksizex32, masksizex64};
--- a/library/portable-simd/crates/core_simd/src/swizzle_dyn.rs
+++ b/library/portable-simd/crates/core_simd/src/swizzle_dyn.rs
@ -16,9 +16,14 @@ where
    #[inline]
    pub fn swizzle_dyn(self, idxs: Simd<u8, N>) -> Self {
        #![allow(unused_imports, unused_unsafe)]
-        #[cfg(target_arch = "aarch64")]
+        #[cfg(all(target_arch = "aarch64", target_endian = "little"))]
        use core::arch::aarch64::{uint8x8_t, vqtbl1q_u8, vtbl1_u8};
-        #[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon"))]
+        #[cfg(all(
+            target_arch = "arm",
+            target_feature = "v7",
+            target_feature = "neon",
+            target_endian = "little"
+        ))]
        use core::arch::arm::{uint8x8_t, vtbl1_u8};
        #[cfg(target_arch = "wasm32")]
        use core::arch::wasm32 as wasm;
@ -29,13 +34,24 @@ where
        // SAFETY: Intrinsics covered by cfg
        unsafe {
            match N {
-                #[cfg(target_feature = "neon")]
+                #[cfg(all(
+                    any(
+                        target_arch = "aarch64",
+                        all(target_arch = "arm", target_feature = "v7")
+                    ),
+                    target_feature = "neon",
+                    target_endian = "little"
+                ))]
                8 => transize(vtbl1_u8, self, idxs),
                #[cfg(target_feature = "ssse3")]
                16 => transize(x86::_mm_shuffle_epi8, self, idxs),
                #[cfg(target_feature = "simd128")]
                16 => transize(wasm::i8x16_swizzle, self, idxs),
-                #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
+                #[cfg(all(
+                    target_arch = "aarch64",
+                    target_feature = "neon",
+                    target_endian = "little"
+                ))]
                16 => transize(vqtbl1q_u8, self, idxs),
                #[cfg(all(target_feature = "avx2", not(target_feature = "avx512vbmi")))]
                32 => transize_raw(avx2_pshufb, self, idxs),
--- a/library/portable-simd/crates/core_simd/src/vector.rs
+++ b/library/portable-simd/crates/core_simd/src/vector.rs
@ -1,6 +1,6 @@
 use crate::simd::{
-    intrinsics, LaneCount, Mask, MaskElement, SimdCast, SimdCastPtr, SimdConstPtr, SimdMutPtr,
-    SimdPartialOrd, SupportedLaneCount, Swizzle,
+    intrinsics, LaneCount, Mask, MaskElement, SimdConstPtr, SimdMutPtr, SimdPartialOrd,
+    SupportedLaneCount, Swizzle,
 };
 use core::convert::{TryFrom, TryInto};

@ -122,6 +122,7 @@ where
    /// let v = u32x4::splat(0);
    /// assert_eq!(v.lanes(), 4);
    /// ```
+    #[inline]
    pub const fn lanes(&self) -> usize {
        Self::LANES
    }
@ -136,6 +137,7 @@ where
    /// let v = u32x4::splat(8);
    /// assert_eq!(v.as_array(), &[8, 8, 8, 8]);
    /// ```
+    #[inline]
    pub fn splat(value: T) -> Self {
        // This is preferred over `[value; N]`, since it's explicitly a splat:
        // https://github.com/rust-lang/rust/issues/97804
@ -156,6 +158,7 @@ where
    /// let v: u64x4 = Simd::from_array([0, 1, 2, 3]);
    /// assert_eq!(v.as_array(), &[0, 1, 2, 3]);
    /// ```
+    #[inline]
    pub const fn as_array(&self) -> &[T; N] {
        // SAFETY: `Simd<T, N>` is just an overaligned `[T; N]` with
        // potential padding at the end, so pointer casting to a
@ -167,6 +170,7 @@ where
    }

    /// Returns a mutable array reference containing the entire SIMD vector.
+    #[inline]
    pub fn as_mut_array(&mut self) -> &mut [T; N] {
        // SAFETY: `Simd<T, N>` is just an overaligned `[T; N]` with
        // potential padding at the end, so pointer casting to a
@ -184,6 +188,7 @@ where
    ///
    /// # Safety
    /// Reading `ptr` must be safe, as if by `<*const [T; N]>::read_unaligned`.
+    #[inline]
    const unsafe fn load(ptr: *const [T; N]) -> Self {
        // There are potentially simpler ways to write this function, but this should result in
        // LLVM `load <N x T>`
@ -204,6 +209,7 @@ where
    ///
    /// # Safety
    /// Writing to `ptr` must be safe, as if by `<*mut [T; N]>::write_unaligned`.
+    #[inline]
    const unsafe fn store(self, ptr: *mut [T; N]) {
        // There are potentially simpler ways to write this function, but this should result in
        // LLVM `store <N x T>`
@ -216,6 +222,7 @@ where
    }

    /// Converts an array to a SIMD vector.
+    #[inline]
    pub const fn from_array(array: [T; N]) -> Self {
        // SAFETY: `&array` is safe to read.
        //
@ -228,6 +235,7 @@ where
    }

    /// Converts a SIMD vector to an array.
+    #[inline]
    pub const fn to_array(self) -> [T; N] {
        let mut tmp = core::mem::MaybeUninit::uninit();
        // SAFETY: writing to `tmp` is safe and initializes it.
@ -259,6 +267,8 @@ where
    /// assert_eq!(v.as_array(), &[1, 2, 3, 4]);
    /// ```
    #[must_use]
+    #[inline]
+    #[track_caller]
    pub const fn from_slice(slice: &[T]) -> Self {
        assert!(
            slice.len() >= Self::LANES,
@ -287,6 +297,8 @@ where
    /// v.copy_to_slice(&mut dest);
    /// assert_eq!(&dest, &[1, 2, 3, 4, 0, 0]);
    /// ```
+    #[inline]
+    #[track_caller]
    pub fn copy_to_slice(self, slice: &mut [T]) {
        assert!(
            slice.len() >= Self::LANES,
@ -297,76 +309,6 @@ where
        unsafe { self.store(slice.as_mut_ptr().cast()) }
    }

-    /// Performs elementwise conversion of a SIMD vector's elements to another SIMD-valid type.
-    ///
-    /// This follows the semantics of Rust's `as` conversion for casting integers between
-    /// signed and unsigned (interpreting integers as 2s complement, so `-1` to `U::MAX` and
-    /// `1 << (U::BITS -1)` becoming `I::MIN` ), and from floats to integers (truncating,
-    /// or saturating at the limits) for each element.
-    ///
-    /// # Examples
-    /// ```
-    /// # #![feature(portable_simd)]
-    /// # use core::simd::Simd;
-    /// let floats: Simd<f32, 4> = Simd::from_array([1.9, -4.5, f32::INFINITY, f32::NAN]);
-    /// let ints = floats.cast::<i32>();
-    /// assert_eq!(ints, Simd::from_array([1, -4, i32::MAX, 0]));
-    ///
-    /// // Formally equivalent, but `Simd::cast` can optimize better.
-    /// assert_eq!(ints, Simd::from_array(floats.to_array().map(|x| x as i32)));
-    ///
-    /// // The float conversion does not round-trip.
-    /// let floats_again = ints.cast();
-    /// assert_ne!(floats, floats_again);
-    /// assert_eq!(floats_again, Simd::from_array([1.0, -4.0, 2147483647.0, 0.0]));
-    /// ```
-    #[must_use]
-    #[inline]
-    pub fn cast<U: SimdCast>(self) -> Simd<U, N>
-    where
-        T: SimdCast,
-    {
-        // Safety: supported types are guaranteed by SimdCast
-        unsafe { intrinsics::simd_as(self) }
-    }
-
-    /// Casts a vector of pointers to another pointer type.
-    #[must_use]
-    #[inline]
-    pub fn cast_ptr<U>(self) -> Simd<U, N>
-    where
-        T: SimdCastPtr<U>,
-        U: SimdElement,
-    {
-        // Safety: supported types are guaranteed by SimdCastPtr
-        unsafe { intrinsics::simd_cast_ptr(self) }
-    }
-
-    /// Rounds toward zero and converts to the same-width integer type, assuming that
-    /// the value is finite and fits in that type.
-    ///
-    /// # Safety
-    /// The value must:
-    ///
-    /// * Not be NaN
-    /// * Not be infinite
-    /// * Be representable in the return type, after truncating off its fractional part
-    ///
-    /// If these requirements are infeasible or costly, consider using the safe function [cast],
-    /// which saturates on conversion.
-    ///
-    /// [cast]: Simd::cast
-    #[inline]
-    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
-    pub unsafe fn to_int_unchecked<I>(self) -> Simd<I, N>
-    where
-        T: core::convert::FloatToInt<I> + SimdCast,
-        I: SimdCast,
-    {
-        // Safety: supported types are guaranteed by SimdCast, the caller is responsible for the extra invariants
-        unsafe { intrinsics::simd_cast(self) }
-    }
-
    /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector.
    /// If an index is out-of-bounds, the element is instead selected from the `or` vector.
    ///
@ -717,6 +659,7 @@ where
    LaneCount<N>: SupportedLaneCount,
    T: SimdElement,
 {
+    #[inline]
    fn clone(&self) -> Self {
        *self
    }
@ -861,6 +804,7 @@ where
    LaneCount<N>: SupportedLaneCount,
    T: SimdElement,
 {
+    #[inline]
    fn from(array: [T; N]) -> Self {
        Self::from_array(array)
    }
@ -871,6 +815,7 @@ where
    LaneCount<N>: SupportedLaneCount,
    T: SimdElement,
 {
+    #[inline]
    fn from(vector: Simd<T, N>) -> Self {
        vector.to_array()
    }
@ -883,6 +828,7 @@ where
 {
    type Error = core::array::TryFromSliceError;

+    #[inline]
    fn try_from(slice: &[T]) -> Result<Self, core::array::TryFromSliceError> {
        Ok(Self::from_array(slice.try_into()?))
    }
@ -895,6 +841,7 @@ where
 {
    type Error = core::array::TryFromSliceError;

+    #[inline]
    fn try_from(slice: &mut [T]) -> Result<Self, core::array::TryFromSliceError> {
        Ok(Self::from_array(slice.try_into()?))
    }
--- a/library/portable-simd/crates/core_simd/tests/cast.rs
+++ b/library/portable-simd/crates/core_simd/tests/cast.rs
@ -2,7 +2,8 @@
 macro_rules! cast_types {
    ($start:ident, $($target:ident),*) => {
        mod $start {
-            use core_simd::simd::Simd;
+            #[allow(unused)]
+            use core_simd::simd::{Simd, SimdInt, SimdUint, SimdFloat};
            type Vector<const N: usize> = Simd<$start, N>;
            $(
                mod $target {
--- a/library/portable-simd/crates/core_simd/tests/round.rs
+++ b/library/portable-simd/crates/core_simd/tests/round.rs
@ -53,6 +53,7 @@ macro_rules! float_rounding_test {

            test_helpers::test_lanes! {
                fn to_int_unchecked<const LANES: usize>() {
+                    use core_simd::simd::SimdFloat;
                    // The maximum integer that can be represented by the equivalently sized float has
                    // all of the mantissa digits set to 1, pushed up to the MSB.
                    const ALL_MANTISSA_BITS: IntScalar = ((1 << <Scalar>::MANTISSA_DIGITS) - 1);