forked from OSchip/llvm-project
[DOXYGEN] Documentation for the newly added x86 intrinsics.
Added doxygen comments for the newly added intrinsics in avxintrin.h, namely _mm256_cvtsd_f64, _mm256_cvtsi256_si32 and _mm256_cvtss_f32 Added doxygen comments for the new intrinsics in emmintrin.h, namely _mm_loadu_si64 and _mm_load_sd. Explicit parameter names were added for _mm_clflush and _mm_setcsr The rest of the changes are editorial, removing trailing spaces at the end of the lines. Differential Revision: https://reviews.llvm.org/D28503 llvm-svn: 291876
This commit is contained in:
parent
db08e83280
commit
2e041c9c20
|
@ -2184,12 +2184,32 @@ _mm256_cvttps_epi32(__m256 __a)
|
|||
return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a);
|
||||
}
|
||||
|
||||
/// \brief Returns the first element of the input vector of [4 x double].
|
||||
///
|
||||
/// \headerfile <avxintrin.h>
|
||||
///
|
||||
/// This intrinsic is a utility function and does not correspond to a specific
|
||||
/// instruction.
|
||||
///
|
||||
/// \param __a
|
||||
/// A 256-bit vector of [4 x double].
|
||||
/// \returns A 64 bit double containing the first element of the input vector.
|
||||
static __inline double __DEFAULT_FN_ATTRS
|
||||
_mm256_cvtsd_f64(__m256d __a)
|
||||
{
|
||||
return __a[0];
|
||||
}
|
||||
|
||||
/// \brief Returns the first element of the input vector of [8 x i32].
|
||||
///
|
||||
/// \headerfile <avxintrin.h>
|
||||
///
|
||||
/// This intrinsic is a utility function and does not correspond to a specific
|
||||
/// instruction.
|
||||
///
|
||||
/// \param __a
|
||||
/// A 256-bit vector of [8 x i32].
|
||||
/// \returns A 32 bit integer containing the first element of the input vector.
|
||||
static __inline int __DEFAULT_FN_ATTRS
|
||||
_mm256_cvtsi256_si32(__m256i __a)
|
||||
{
|
||||
|
@ -2197,6 +2217,16 @@ _mm256_cvtsi256_si32(__m256i __a)
|
|||
return __b[0];
|
||||
}
|
||||
|
||||
/// \brief Returns the first element of the input vector of [8 x float].
|
||||
///
|
||||
/// \headerfile <avxintrin.h>
|
||||
///
|
||||
/// This intrinsic is a utility function and does not correspond to a specific
|
||||
/// instruction.
|
||||
///
|
||||
/// \param __a
|
||||
/// A 256-bit vector of [8 x float].
|
||||
/// \returns A 32 bit float containing the first element of the input vector.
|
||||
static __inline float __DEFAULT_FN_ATTRS
|
||||
_mm256_cvtss_f32(__m256 __a)
|
||||
{
|
||||
|
|
|
@ -1599,6 +1599,17 @@ _mm_loadu_pd(double const *__dp)
|
|||
return ((struct __loadu_pd*)__dp)->__v;
|
||||
}
|
||||
|
||||
/// \brief Loads a 64-bit integer value to the low element of a 128-bit integer
|
||||
/// vector and clears the upper element.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
|
||||
///
|
||||
/// \param __dp
|
||||
/// A pointer to a 64-bit memory location. The address of the memory
|
||||
/// location does not have to be aligned.
|
||||
/// \returns A 128-bit vector of [2 x i64] containing the loaded value.
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_loadu_si64(void const *__a)
|
||||
{
|
||||
|
@ -1609,6 +1620,17 @@ _mm_loadu_si64(void const *__a)
|
|||
return (__m128i){__u, 0L};
|
||||
}
|
||||
|
||||
/// \brief Loads a 64-bit double-precision value to the low element of a
|
||||
/// 128-bit integer vector and clears the upper element.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction.
|
||||
///
|
||||
/// \param __dp
|
||||
/// An pointer to a memory location containing a double-precision value.
|
||||
/// The address of the memory location does not have to be aligned.
|
||||
/// \returns A 128-bit vector of [2 x double] containing the loaded value.
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
_mm_load_sd(double const *__dp)
|
||||
{
|
||||
|
@ -4019,7 +4041,7 @@ extern "C" {
|
|||
/// \param __p
|
||||
/// A pointer to the memory location used to identify the cache line to be
|
||||
/// flushed.
|
||||
void _mm_clflush(void const *);
|
||||
void _mm_clflush(void const * __p);
|
||||
|
||||
/// \brief Forces strong memory ordering (serialization) between load
|
||||
/// instructions preceding this instruction and load instructions following
|
||||
|
@ -4141,7 +4163,7 @@ _mm_packus_epi16(__m128i __a, __m128i __b)
|
|||
/// \param __a
|
||||
/// A 128-bit integer vector.
|
||||
/// \param __imm
|
||||
/// An immediate value. Bits [3:0] selects values from \a __a to be assigned
|
||||
/// An immediate value. Bits [2:0] selects values from \a __a to be assigned
|
||||
/// to bits[15:0] of the result. \n
|
||||
/// 000: assign values from bits [15:0] of \a __a. \n
|
||||
/// 001: assign values from bits [31:16] of \a __a. \n
|
||||
|
|
|
@ -211,7 +211,7 @@ _mm_packs_pu16(__m64 __m1, __m64 __m2)
|
|||
/// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction.
|
||||
///
|
||||
/// \param __m1
|
||||
/// A 64-bit integer vector of [8 x i8]. \n
|
||||
/// A 64-bit integer vector of [8 x i8]. \n
|
||||
/// Bits [39:32] are written to bits [7:0] of the result. \n
|
||||
/// Bits [47:40] are written to bits [23:16] of the result. \n
|
||||
/// Bits [55:48] are written to bits [39:32] of the result. \n
|
||||
|
|
|
@ -115,7 +115,7 @@ _mm_hsub_ps(__m128 __a, __m128 __b)
|
|||
|
||||
/// \brief Moves and duplicates high-order (odd-indexed) values from a 128-bit
|
||||
/// vector of [4 x float] to float values stored in a 128-bit vector of
|
||||
/// [4 x float].
|
||||
/// [4 x float].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
|
@ -136,7 +136,7 @@ _mm_movehdup_ps(__m128 __a)
|
|||
}
|
||||
|
||||
/// \brief Duplicates low-order (even-indexed) values from a 128-bit vector of
|
||||
/// [4 x float] to float values stored in a 128-bit vector of [4 x float].
|
||||
/// [4 x float] to float values stored in a 128-bit vector of [4 x float].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
|
|
|
@ -2067,7 +2067,7 @@ _mm_storer_ps(float *__p, __m128 __a)
|
|||
/// _MM_HINT_T1: Move data using the T1 hint. The PREFETCHT1 instruction will
|
||||
/// be generated. \n
|
||||
/// _MM_HINT_T2: Move data using the T2 hint. The PREFETCHT2 instruction will
|
||||
/// be generated.
|
||||
/// be generated.
|
||||
#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), 0, (sel)))
|
||||
#endif
|
||||
|
||||
|
@ -2435,17 +2435,17 @@ extern "C" {
|
|||
/// For checking exception masks: _MM_MASK_UNDERFLOW, _MM_MASK_OVERFLOW,
|
||||
/// _MM_MASK_INVALID, _MM_MASK_DENORM, _MM_MASK_DIV_ZERO, _MM_MASK_INEXACT.
|
||||
/// There is a convenience wrapper _MM_GET_EXCEPTION_MASK().
|
||||
/// </li>
|
||||
/// </li>
|
||||
/// <li>
|
||||
/// For checking rounding modes: _MM_ROUND_NEAREST, _MM_ROUND_DOWN,
|
||||
/// _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO. There is a convenience wrapper
|
||||
/// _MM_GET_ROUNDING_MODE(x) where x is one of these macros.
|
||||
/// </li>
|
||||
/// <li>
|
||||
/// <li>
|
||||
/// For checking flush-to-zero mode: _MM_FLUSH_ZERO_ON, _MM_FLUSH_ZERO_OFF.
|
||||
/// There is a convenience wrapper _MM_GET_FLUSH_ZERO_MODE().
|
||||
/// </li>
|
||||
/// <li>
|
||||
/// <li>
|
||||
/// For checking denormals-are-zero mode: _MM_DENORMALS_ZERO_ON,
|
||||
/// _MM_DENORMALS_ZERO_OFF. There is a convenience wrapper
|
||||
/// _MM_GET_DENORMALS_ZERO_MODE().
|
||||
|
@ -2468,11 +2468,11 @@ extern "C" {
|
|||
unsigned int _mm_getcsr(void);
|
||||
|
||||
/// \brief Sets the MXCSR register with the 32-bit unsigned integer value.
|
||||
///
|
||||
///
|
||||
/// There are several groups of macros associated with this intrinsic,
|
||||
/// including:
|
||||
/// <ul>
|
||||
/// <li>
|
||||
/// <li>
|
||||
/// For setting exception states: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO,
|
||||
/// _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW,
|
||||
/// _MM_EXCEPT_INEXACT. There is a convenience wrapper
|
||||
|
@ -2517,7 +2517,7 @@ unsigned int _mm_getcsr(void);
|
|||
///
|
||||
/// \param __i
|
||||
/// A 32-bit unsigned integer value to be written to the MXCSR register.
|
||||
void _mm_setcsr(unsigned int);
|
||||
void _mm_setcsr(unsigned int __i);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
} // extern "C"
|
||||
|
|
Loading…
Reference in New Issue