forked from OSchip/llvm-project
[NFC] Cleanup miscellaneous header items
- Explain the use of the _MM_SHUFFLE and _MM_SHUFFLE2 macros - Update some doxygen parameter descriptions to match the implementations - Add "see also" doxygen tags to some intrinsics - Minor clang-format changes Reviewers: RKSimon Differential Revision: https://reviews.llvm.org/D124469
This commit is contained in:
parent
4041c44853
commit
df08b34938
|
@ -22,23 +22,23 @@
|
|||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// __m128i _mm_clmulepi64_si128(__m128i __X, __m128i __Y, const int __I);
|
||||
/// __m128i _mm_clmulepi64_si128(__m128i X, __m128i Y, const int I);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VPCLMULQDQ </c> instruction.
|
||||
///
|
||||
/// \param __X
|
||||
/// \param X
|
||||
/// A 128-bit vector of [2 x i64] containing one of the source operands.
|
||||
/// \param __Y
|
||||
/// \param Y
|
||||
/// A 128-bit vector of [2 x i64] containing one of the source operands.
|
||||
/// \param __I
|
||||
/// \param I
|
||||
/// An immediate value specifying which 64-bit values to select from the
|
||||
/// operands. Bit 0 is used to select a value from operand \a __X, and bit
|
||||
/// 4 is used to select a value from operand \a __Y: \n
|
||||
/// Bit[0]=0 indicates that bits[63:0] of operand \a __X are used. \n
|
||||
/// Bit[0]=1 indicates that bits[127:64] of operand \a __X are used. \n
|
||||
/// Bit[4]=0 indicates that bits[63:0] of operand \a __Y are used. \n
|
||||
/// Bit[4]=1 indicates that bits[127:64] of operand \a __Y are used.
|
||||
/// operands. Bit 0 is used to select a value from operand \a X, and bit
|
||||
/// 4 is used to select a value from operand \a Y: \n
|
||||
/// Bit[0]=0 indicates that bits[63:0] of operand \a X are used. \n
|
||||
/// Bit[0]=1 indicates that bits[127:64] of operand \a X are used. \n
|
||||
/// Bit[4]=0 indicates that bits[63:0] of operand \a Y are used. \n
|
||||
/// Bit[4]=1 indicates that bits[127:64] of operand \a Y are used.
|
||||
/// \returns The 128-bit integer vector containing the result of the carry-less
|
||||
/// multiplication of the selected 64-bit values.
|
||||
#define _mm_clmulepi64_si128(X, Y, I) \
|
||||
|
|
|
@ -1504,7 +1504,10 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
|
|||
/// 00: Bits [31:0] and [159:128] are copied from the selected operand. \n
|
||||
/// 01: Bits [63:32] and [191:160] are copied from the selected operand. \n
|
||||
/// 10: Bits [95:64] and [223:192] are copied from the selected operand. \n
|
||||
/// 11: Bits [127:96] and [255:224] are copied from the selected operand.
|
||||
/// 11: Bits [127:96] and [255:224] are copied from the selected operand. \n
|
||||
/// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro.
|
||||
/// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form
|
||||
/// <c>[b6, b4, b2, b0]</c>.
|
||||
/// \returns A 256-bit vector of [8 x float] containing the shuffled values.
|
||||
#define _mm256_shuffle_ps(a, b, mask) \
|
||||
((__m256)__builtin_ia32_shufps256((__v8sf)(__m256)(a), \
|
||||
|
@ -1953,12 +1956,16 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
|
|||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// int _mm256_extract_epi32(__m256i X, const int N);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>
|
||||
/// instruction.
|
||||
///
|
||||
/// \param __a
|
||||
/// \param X
|
||||
/// A 256-bit vector of [8 x i32].
|
||||
/// \param __imm
|
||||
/// \param N
|
||||
/// An immediate integer operand with bits [2:0] determining which vector
|
||||
/// element is extracted and returned.
|
||||
/// \returns A 32-bit integer containing the extracted 32 bits of extended
|
||||
|
@ -1971,12 +1978,16 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
|
|||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// int _mm256_extract_epi16(__m256i X, const int N);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>
|
||||
/// instruction.
|
||||
///
|
||||
/// \param __a
|
||||
/// \param X
|
||||
/// A 256-bit integer vector of [16 x i16].
|
||||
/// \param __imm
|
||||
/// \param N
|
||||
/// An immediate integer operand with bits [3:0] determining which vector
|
||||
/// element is extracted and returned.
|
||||
/// \returns A 32-bit integer containing the extracted 16 bits of zero extended
|
||||
|
@ -1990,12 +2001,16 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
|
|||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// int _mm256_extract_epi8(__m256i X, const int N);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>
|
||||
/// instruction.
|
||||
///
|
||||
/// \param __a
|
||||
/// \param X
|
||||
/// A 256-bit integer vector of [32 x i8].
|
||||
/// \param __imm
|
||||
/// \param N
|
||||
/// An immediate integer operand with bits [4:0] determining which vector
|
||||
/// element is extracted and returned.
|
||||
/// \returns A 32-bit integer containing the extracted 8 bits of zero extended
|
||||
|
@ -2010,12 +2025,16 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
|
|||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// long long _mm256_extract_epi64(__m256i X, const int N);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>
|
||||
/// instruction.
|
||||
///
|
||||
/// \param __a
|
||||
/// \param X
|
||||
/// A 256-bit integer vector of [4 x i64].
|
||||
/// \param __imm
|
||||
/// \param N
|
||||
/// An immediate integer operand with bits [1:0] determining which vector
|
||||
/// element is extracted and returned.
|
||||
/// \returns A 64-bit integer containing the extracted 64 bits of extended
|
||||
|
@ -2030,18 +2049,22 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
|
|||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// __m256i _mm256_insert_epi32(__m256i X, int I, const int N);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>
|
||||
/// instruction.
|
||||
///
|
||||
/// \param __a
|
||||
/// \param X
|
||||
/// A vector of [8 x i32] to be used by the insert operation.
|
||||
/// \param __b
|
||||
/// \param I
|
||||
/// An integer value. The replacement value for the insert operation.
|
||||
/// \param __imm
|
||||
/// \param N
|
||||
/// An immediate integer specifying the index of the vector element to be
|
||||
/// replaced.
|
||||
/// \returns A copy of vector \a __a, after replacing its element indexed by
|
||||
/// \a __imm with \a __b.
|
||||
/// \returns A copy of vector \a X, after replacing its element indexed by
|
||||
/// \a N with \a I.
|
||||
#define _mm256_insert_epi32(X, I, N) \
|
||||
((__m256i)__builtin_ia32_vec_set_v8si((__v8si)(__m256i)(X), \
|
||||
(int)(I), (int)(N)))
|
||||
|
@ -2053,18 +2076,22 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
|
|||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// __m256i _mm256_insert_epi16(__m256i X, int I, const int N);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>
|
||||
/// instruction.
|
||||
///
|
||||
/// \param __a
|
||||
/// \param X
|
||||
/// A vector of [16 x i16] to be used by the insert operation.
|
||||
/// \param __b
|
||||
/// \param I
|
||||
/// An i16 integer value. The replacement value for the insert operation.
|
||||
/// \param __imm
|
||||
/// \param N
|
||||
/// An immediate integer specifying the index of the vector element to be
|
||||
/// replaced.
|
||||
/// \returns A copy of vector \a __a, after replacing its element indexed by
|
||||
/// \a __imm with \a __b.
|
||||
/// \returns A copy of vector \a X, after replacing its element indexed by
|
||||
/// \a N with \a I.
|
||||
#define _mm256_insert_epi16(X, I, N) \
|
||||
((__m256i)__builtin_ia32_vec_set_v16hi((__v16hi)(__m256i)(X), \
|
||||
(int)(I), (int)(N)))
|
||||
|
@ -2075,18 +2102,22 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
|
|||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// __m256i _mm256_insert_epi8(__m256i X, int I, const int N);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>
|
||||
/// instruction.
|
||||
///
|
||||
/// \param __a
|
||||
/// \param X
|
||||
/// A vector of [32 x i8] to be used by the insert operation.
|
||||
/// \param __b
|
||||
/// \param I
|
||||
/// An i8 integer value. The replacement value for the insert operation.
|
||||
/// \param __imm
|
||||
/// \param N
|
||||
/// An immediate integer specifying the index of the vector element to be
|
||||
/// replaced.
|
||||
/// \returns A copy of vector \a __a, after replacing its element indexed by
|
||||
/// \a __imm with \a __b.
|
||||
/// \returns A copy of vector \a X, after replacing its element indexed by
|
||||
/// \a N with \a I.
|
||||
#define _mm256_insert_epi8(X, I, N) \
|
||||
((__m256i)__builtin_ia32_vec_set_v32qi((__v32qi)(__m256i)(X), \
|
||||
(int)(I), (int)(N)))
|
||||
|
@ -2098,18 +2129,22 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
|
|||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// __m256i _mm256_insert_epi64(__m256i X, int I, const int N);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>
|
||||
/// instruction.
|
||||
///
|
||||
/// \param __a
|
||||
/// \param X
|
||||
/// A vector of [4 x i64] to be used by the insert operation.
|
||||
/// \param __b
|
||||
/// \param I
|
||||
/// A 64-bit integer value. The replacement value for the insert operation.
|
||||
/// \param __imm
|
||||
/// \param N
|
||||
/// An immediate integer specifying the index of the vector element to be
|
||||
/// replaced.
|
||||
/// \returns A copy of vector \a __a, after replacing its element indexed by
|
||||
/// \a __imm with \a __b.
|
||||
/// \returns A copy of vector \a X, after replacing its element indexed by
|
||||
/// \a N with \a I.
|
||||
#define _mm256_insert_epi64(X, I, N) \
|
||||
((__m256i)__builtin_ia32_vec_set_v4di((__v4di)(__m256i)(X), \
|
||||
(long long)(I), (int)(N)))
|
||||
|
|
|
@ -47,6 +47,7 @@ __tzcnt_u16(unsigned short __X)
|
|||
/// An unsigned 32-bit integer whose trailing zeros are to be counted.
|
||||
/// \returns An unsigned 32-bit integer containing the number of trailing zero
|
||||
/// bits in the operand.
|
||||
/// \see _mm_tzcnt_32
|
||||
static __inline__ unsigned int __RELAXED_FN_ATTRS
|
||||
__tzcnt_u32(unsigned int __X)
|
||||
{
|
||||
|
@ -63,6 +64,7 @@ __tzcnt_u32(unsigned int __X)
|
|||
/// An unsigned 32-bit integer whose trailing zeros are to be counted.
|
||||
/// \returns An 32-bit integer containing the number of trailing zero bits in
|
||||
/// the operand.
|
||||
/// \see __tzcnt_u32
|
||||
static __inline__ int __RELAXED_FN_ATTRS
|
||||
_mm_tzcnt_32(unsigned int __X)
|
||||
{
|
||||
|
@ -83,6 +85,7 @@ _mm_tzcnt_32(unsigned int __X)
|
|||
/// An unsigned 64-bit integer whose trailing zeros are to be counted.
|
||||
/// \returns An unsigned 64-bit integer containing the number of trailing zero
|
||||
/// bits in the operand.
|
||||
/// \see _mm_tzcnt_64
|
||||
static __inline__ unsigned long long __RELAXED_FN_ATTRS
|
||||
__tzcnt_u64(unsigned long long __X)
|
||||
{
|
||||
|
@ -99,6 +102,7 @@ __tzcnt_u64(unsigned long long __X)
|
|||
/// An unsigned 64-bit integer whose trailing zeros are to be counted.
|
||||
/// \returns An 64-bit integer containing the number of trailing zero bits in
|
||||
/// the operand.
|
||||
/// \see __tzcnt_u64
|
||||
static __inline__ long long __RELAXED_FN_ATTRS
|
||||
_mm_tzcnt_64(unsigned long long __X)
|
||||
{
|
||||
|
|
|
@ -4126,21 +4126,25 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a,
|
|||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// __m128i _mm_extract_epi16(__m256i a, const int imm);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VPEXTRW / PEXTRW </c> instruction.
|
||||
///
|
||||
/// \param __a
|
||||
/// \param a
|
||||
/// A 128-bit integer vector.
|
||||
/// \param __imm
|
||||
/// An immediate value. Bits [2:0] selects values from \a __a to be assigned
|
||||
/// \param imm
|
||||
/// An immediate value. Bits [2:0] selects values from \a a to be assigned
|
||||
/// to bits[15:0] of the result. \n
|
||||
/// 000: assign values from bits [15:0] of \a __a. \n
|
||||
/// 001: assign values from bits [31:16] of \a __a. \n
|
||||
/// 010: assign values from bits [47:32] of \a __a. \n
|
||||
/// 011: assign values from bits [63:48] of \a __a. \n
|
||||
/// 100: assign values from bits [79:64] of \a __a. \n
|
||||
/// 101: assign values from bits [95:80] of \a __a. \n
|
||||
/// 110: assign values from bits [111:96] of \a __a. \n
|
||||
/// 111: assign values from bits [127:112] of \a __a.
|
||||
/// 000: assign values from bits [15:0] of \a a. \n
|
||||
/// 001: assign values from bits [31:16] of \a a. \n
|
||||
/// 010: assign values from bits [47:32] of \a a. \n
|
||||
/// 011: assign values from bits [63:48] of \a a. \n
|
||||
/// 100: assign values from bits [79:64] of \a a. \n
|
||||
/// 101: assign values from bits [95:80] of \a a. \n
|
||||
/// 110: assign values from bits [111:96] of \a a. \n
|
||||
/// 111: assign values from bits [127:112] of \a a.
|
||||
/// \returns An integer, whose lower 16 bits are selected from the 128-bit
|
||||
/// integer vector parameter and the remaining bits are assigned zeros.
|
||||
#define _mm_extract_epi16(a, imm) \
|
||||
|
@ -4154,18 +4158,22 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a,
|
|||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// __m128i _mm_insert_epi16(__m256i a, int b, const int imm);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VPINSRW / PINSRW </c> instruction.
|
||||
///
|
||||
/// \param __a
|
||||
/// \param a
|
||||
/// A 128-bit integer vector of [8 x i16]. This vector is copied to the
|
||||
/// result and then one of the eight elements in the result is replaced by
|
||||
/// the lower 16 bits of \a __b.
|
||||
/// \param __b
|
||||
/// the lower 16 bits of \a b.
|
||||
/// \param b
|
||||
/// An integer. The lower 16 bits of this parameter are written to the
|
||||
/// result beginning at an offset specified by \a __imm.
|
||||
/// \param __imm
|
||||
/// result beginning at an offset specified by \a imm.
|
||||
/// \param imm
|
||||
/// An immediate value specifying the bit offset in the result at which the
|
||||
/// lower 16 bits of \a __b are written.
|
||||
/// lower 16 bits of \a b are written.
|
||||
/// \returns A 128-bit integer vector containing the constructed values.
|
||||
#define _mm_insert_epi16(a, b, imm) \
|
||||
((__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \
|
||||
|
@ -4213,7 +4221,10 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a) {
|
|||
/// 00: assign values from bits [31:0] of \a a. \n
|
||||
/// 01: assign values from bits [63:32] of \a a. \n
|
||||
/// 10: assign values from bits [95:64] of \a a. \n
|
||||
/// 11: assign values from bits [127:96] of \a a.
|
||||
/// 11: assign values from bits [127:96] of \a a. \n
|
||||
/// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro.
|
||||
/// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form
|
||||
/// <c>[b6, b4, b2, b0]</c>.
|
||||
/// \returns A 128-bit integer vector containing the shuffled values.
|
||||
#define _mm_shuffle_epi32(a, imm) \
|
||||
((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm)))
|
||||
|
@ -4244,6 +4255,9 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a) {
|
|||
/// 01: assign values from bits [31:16] of \a a. \n
|
||||
/// 10: assign values from bits [47:32] of \a a. \n
|
||||
/// 11: assign values from bits [63:48] of \a a. \n
|
||||
/// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro.
|
||||
/// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form
|
||||
/// <c>[b6, b4, b2, b0]</c>.
|
||||
/// \returns A 128-bit integer vector containing the shuffled values.
|
||||
#define _mm_shufflelo_epi16(a, imm) \
|
||||
((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm)))
|
||||
|
@ -4274,6 +4288,9 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a) {
|
|||
/// 01: assign values from bits [95:80] of \a a. \n
|
||||
/// 10: assign values from bits [111:96] of \a a. \n
|
||||
/// 11: assign values from bits [127:112] of \a a. \n
|
||||
/// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro.
|
||||
/// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form
|
||||
/// <c>[b6, b4, b2, b0]</c>.
|
||||
/// \returns A 128-bit integer vector containing the shuffled values.
|
||||
#define _mm_shufflehi_epi16(a, imm) \
|
||||
((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm)))
|
||||
|
@ -4617,6 +4634,9 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pd(__m128d __a) {
|
|||
/// Bit[0] = 1: upper element of \a a copied to lower element of result. \n
|
||||
/// Bit[1] = 0: lower element of \a b copied to upper element of result. \n
|
||||
/// Bit[1] = 1: upper element of \a b copied to upper element of result. \n
|
||||
/// Note: To generate a mask, you can use the \c _MM_SHUFFLE2 macro.
|
||||
/// <c>_MM_SHUFFLE2(b1, b0)</c> can create a 2-bit mask of the form
|
||||
/// <c>[b1, b0]</c>.
|
||||
/// \returns A 128-bit vector of [2 x double] containing the shuffled values.
|
||||
#define _mm_shuffle_pd(a, b, i) \
|
||||
((__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
|
||||
|
|
|
@ -1213,8 +1213,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi64(__m128i __V1,
|
|||
/// This intrinsic corresponds to the <c> VPMOVSXBW / PMOVSXBW </c> instruction.
|
||||
///
|
||||
/// \param __V
|
||||
/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are sign-
|
||||
/// extended to 16-bit values.
|
||||
/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are
|
||||
/// sign-extended to 16-bit values.
|
||||
/// \returns A 128-bit vector of [8 x i16] containing the sign-extended values.
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi16(__m128i __V) {
|
||||
/* This function always performs a signed extension, but __v16qi is a char
|
||||
|
|
|
@ -2086,7 +2086,7 @@ _mm_storer_ps(float *__p, __m128 __a)
|
|||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// void _mm_prefetch(const void * a, const int sel);
|
||||
/// void _mm_prefetch(const void *a, const int sel);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> PREFETCHNTA </c> instruction.
|
||||
|
@ -2360,7 +2360,10 @@ _mm_mulhi_pu16(__m64 __a, __m64 __b)
|
|||
/// 00: assigned from bits [15:0] of \a a. \n
|
||||
/// 01: assigned from bits [31:16] of \a a. \n
|
||||
/// 10: assigned from bits [47:32] of \a a. \n
|
||||
/// 11: assigned from bits [63:48] of \a a.
|
||||
/// 11: assigned from bits [63:48] of \a a. \n
|
||||
/// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro.
|
||||
/// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form
|
||||
/// <c>[b6, b4, b2, b0]</c>.
|
||||
/// \returns A 64-bit integer vector containing the shuffled values.
|
||||
#define _mm_shuffle_pi16(a, n) \
|
||||
((__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n)))
|
||||
|
@ -2602,7 +2605,10 @@ void _mm_setcsr(unsigned int __i);
|
|||
/// 00: Bits [31:0] copied from the specified operand. \n
|
||||
/// 01: Bits [63:32] copied from the specified operand. \n
|
||||
/// 10: Bits [95:64] copied from the specified operand. \n
|
||||
/// 11: Bits [127:96] copied from the specified operand.
|
||||
/// 11: Bits [127:96] copied from the specified operand. \n
|
||||
/// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro.
|
||||
/// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form
|
||||
/// <c>[b6, b4, b2, b0]</c>.
|
||||
/// \returns A 128-bit vector of [4 x float] containing the shuffled values.
|
||||
#define _mm_shuffle_ps(a, b, mask) \
|
||||
((__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \
|
||||
|
|
Loading…
Reference in New Issue