forked from OSchip/llvm-project
Minor changes in x86 intrinsics headers; NFC
I made several changes for consistency with the rest of x86 instrinsics header files. Some of these changes help to render doxygen comments better. 1. avxintrin.h – Moved the opening bracket on a separate line for several intrinsics (for consistency with the rest of the intrinsics). 2. emmintrin.h - Moved the doxygen comment next to the body of the function; - Added braces after extern "C" even though there is only one declaration each time 3. xmmintrin.h - Moved the doxygen comment next to the body of the function; - Added intrinsic prototypes for a couple of macro definitions into the doxygen comment; - Added braces after extern "C" even though there is only one declaration each time 4. ammintrin.h – Removed extra line between the doxygen comment and the body of the functions (for consistency with the rest of the files). Desk reviewed by Paul Robinson. llvm-svn: 287278
This commit is contained in:
parent
8651144353
commit
2174b6fe72
|
@ -114,7 +114,6 @@ _mm_extract_si64(__m128i __x, __m128i __y)
|
|||
/// destination operand x with the specified bitfields replaced by the lower
|
||||
/// bits of source operand y. The upper 64 bits of the return value are
|
||||
/// undefined.
|
||||
|
||||
#define _mm_inserti_si64(x, y, len, idx) \
|
||||
((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \
|
||||
(__v2di)(__m128i)(y), \
|
||||
|
@ -146,7 +145,6 @@ _mm_extract_si64(__m128i __x, __m128i __y)
|
|||
/// destination operand __x with the specified bitfields replaced by the
|
||||
/// lower bits of source operand __y. The upper 64 bits of the return value
|
||||
/// are undefined.
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_insert_si64(__m128i __x, __m128i __y)
|
||||
{
|
||||
|
|
|
@ -3823,7 +3823,8 @@ _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a)
|
|||
/// \returns A 256-bit floating-point vector of [8 x float] containing the
|
||||
/// concatenated result.
|
||||
static __inline __m256 __DEFAULT_FN_ATTRS
|
||||
_mm256_set_m128 (__m128 __hi, __m128 __lo) {
|
||||
_mm256_set_m128 (__m128 __hi, __m128 __lo)
|
||||
{
|
||||
return (__m256) __builtin_shufflevector((__v4sf)__lo, (__v4sf)__hi, 0, 1, 2, 3, 4, 5, 6, 7);
|
||||
}
|
||||
|
||||
|
@ -3843,7 +3844,8 @@ _mm256_set_m128 (__m128 __hi, __m128 __lo) {
|
|||
/// \returns A 256-bit floating-point vector of [4 x double] containing the
|
||||
/// concatenated result.
|
||||
static __inline __m256d __DEFAULT_FN_ATTRS
|
||||
_mm256_set_m128d (__m128d __hi, __m128d __lo) {
|
||||
_mm256_set_m128d (__m128d __hi, __m128d __lo)
|
||||
{
|
||||
return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo);
|
||||
}
|
||||
|
||||
|
@ -3862,7 +3864,8 @@ _mm256_set_m128d (__m128d __hi, __m128d __lo) {
|
|||
/// result.
|
||||
/// \returns A 256-bit integer vector containing the concatenated result.
|
||||
static __inline __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_set_m128i (__m128i __hi, __m128i __lo) {
|
||||
_mm256_set_m128i (__m128i __hi, __m128i __lo)
|
||||
{
|
||||
return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo);
|
||||
}
|
||||
|
||||
|
@ -3884,7 +3887,8 @@ _mm256_set_m128i (__m128i __hi, __m128i __lo) {
|
|||
/// \returns A 256-bit floating-point vector of [8 x float] containing the
|
||||
/// concatenated result.
|
||||
static __inline __m256 __DEFAULT_FN_ATTRS
|
||||
_mm256_setr_m128 (__m128 __lo, __m128 __hi) {
|
||||
_mm256_setr_m128 (__m128 __lo, __m128 __hi)
|
||||
{
|
||||
return _mm256_set_m128(__hi, __lo);
|
||||
}
|
||||
|
||||
|
@ -3906,7 +3910,8 @@ _mm256_setr_m128 (__m128 __lo, __m128 __hi) {
|
|||
/// \returns A 256-bit floating-point vector of [4 x double] containing the
|
||||
/// concatenated result.
|
||||
static __inline __m256d __DEFAULT_FN_ATTRS
|
||||
_mm256_setr_m128d (__m128d __lo, __m128d __hi) {
|
||||
_mm256_setr_m128d (__m128d __lo, __m128d __hi)
|
||||
{
|
||||
return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo);
|
||||
}
|
||||
|
||||
|
@ -3926,7 +3931,8 @@ _mm256_setr_m128d (__m128d __lo, __m128d __hi) {
|
|||
/// result.
|
||||
/// \returns A 256-bit integer vector containing the concatenated result.
|
||||
static __inline __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_setr_m128i (__m128i __lo, __m128i __hi) {
|
||||
_mm256_setr_m128i (__m128i __lo, __m128i __hi)
|
||||
{
|
||||
return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo);
|
||||
}
|
||||
|
||||
|
|
|
@ -4752,6 +4752,10 @@ _mm_castsi128_pd(__m128i __a)
|
|||
return (__m128d)__a;
|
||||
}
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/// \brief Indicates that a spin loop is being executed for the purposes of
|
||||
/// optimizing power consumption during the loop.
|
||||
///
|
||||
|
@ -4759,11 +4763,11 @@ _mm_castsi128_pd(__m128i __a)
|
|||
///
|
||||
/// This intrinsic corresponds to the \c PAUSE instruction.
|
||||
///
|
||||
#if defined(__cplusplus)
|
||||
extern "C"
|
||||
#endif
|
||||
void _mm_pause(void);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
} // extern "C"
|
||||
#endif
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
|
||||
|
|
|
@ -2084,6 +2084,10 @@ _mm_stream_ps(float *__p, __m128 __a)
|
|||
__builtin_nontemporal_store((__v4sf)__a, (__v4sf*)__p);
|
||||
}
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/// \brief Forces strong memory ordering (serialization) between store
|
||||
/// instructions preceding this instruction and store instructions following
|
||||
/// this instruction, ensuring the system completes all previous stores
|
||||
|
@ -2093,16 +2097,21 @@ _mm_stream_ps(float *__p, __m128 __a)
|
|||
///
|
||||
/// This intrinsic corresponds to the \c SFENCE instruction.
|
||||
///
|
||||
#if defined(__cplusplus)
|
||||
extern "C"
|
||||
#endif
|
||||
void _mm_sfence(void);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
/// \brief Extracts 16-bit element from a 64-bit vector of [4 x i16] and
|
||||
/// returns it, as specified by the immediate integer operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// void _mm_extract_pi(__m64 a, int n);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the \c VPEXTRW / PEXTRW instruction.
|
||||
///
|
||||
/// \param __a
|
||||
|
@ -2123,6 +2132,10 @@ void _mm_sfence(void);
|
|||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// void _mm_insert_pi(__m64 a, int d, int n);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the \c VPINSRW / PINSRW instruction.
|
||||
///
|
||||
/// \param __a
|
||||
|
@ -2262,12 +2275,12 @@ _mm_mulhi_pu16(__m64 __a, __m64 __b)
|
|||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c PSHUFW instruction.
|
||||
///
|
||||
/// \code
|
||||
/// __m64 _mm_shuffle_pi16(__m64 a, const int n);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the \c PSHUFW instruction.
|
||||
///
|
||||
/// \param a
|
||||
/// A 64-bit integer vector containing the values to be shuffled.
|
||||
/// \param n
|
||||
|
|
Loading…
Reference in New Issue