[DOXYGEN] Improved doxygen comments for x86 intrinsics.

Improved doxygen comments for the following intrinsics headers: __wmmintrin_pclmul.h, bmiintrin.h, emmintrin.h, f16cintrin.h, immintrin.h, mmintrin.h, pmmintrin.h, tmmintrin.h Added \n commands to insert a line breaks where necessary, since one long line of documentation is nearly unreadable. Formatted comments to fit into 80 chars. In some cases added \a command in front of the parameter names to display them in italics. llvm-svn: 290561
2016-12-27 00:49:38 +00:00 · 2016-12-27 00:49:38 +00:00 · dffe45b3e6
parent 88c4ffb4e0
commit dffe45b3e6
8 changed files with 217 additions and 203 deletions
--- a/clang/lib/Headers/__wmmintrin_pclmul.h
+++ b/clang/lib/Headers/__wmmintrin_pclmul.h
@ -42,12 +42,11 @@
 ///    A 128-bit vector of [2 x i64] containing one of the source operands.
 /// \param __I
 ///    An immediate value specifying which 64-bit values to select from the
-///    operands.
-///    Bit 0 is used to select a value from operand \a __X, and bit 4 is used
-///    to select a value from operand \a __Y:
-///    Bit[0]=0 indicates that bits[63:0] of operand \a __X are used.
-///    Bit[0]=1 indicates that bits[127:64] of operand \a __X are used.
-///    Bit[4]=0 indicates that bits[63:0] of operand \a __Y are used.
+///    operands. Bit 0 is used to select a value from operand \a __X, and bit
+///    4 is used to select a value from operand \a __Y: \n
+///    Bit[0]=0 indicates that bits[63:0] of operand \a __X are used. \n
+///    Bit[0]=1 indicates that bits[127:64] of operand \a __X are used. \n
+///    Bit[4]=0 indicates that bits[63:0] of operand \a __Y are used. \n
 ///    Bit[4]=1 indicates that bits[127:64] of operand \a __Y are used.
 /// \returns The 128-bit integer vector containing the result of the carry-less
 ///    multiplication of the selected 64-bit values.
--- a/clang/lib/Headers/bmiintrin.h
+++ b/clang/lib/Headers/bmiintrin.h
@ -295,8 +295,8 @@ __tzcnt_u32(unsigned int __X)
 ///
 /// \param __X
 ///    An unsigned 32-bit integer whose trailing zeros are to be counted.
-/// \returns An 32-bit integer containing the number of trailing zero
-///    bits in the operand.
+/// \returns An 32-bit integer containing the number of trailing zero bits in
+///    the operand.
 static __inline__ int __RELAXED_FN_ATTRS
 _mm_tzcnt_32(unsigned int __X)
 {
@ -532,8 +532,8 @@ __tzcnt_u64(unsigned long long __X)
 ///
 /// \param __X
 ///    An unsigned 64-bit integer whose trailing zeros are to be counted.
-/// \returns An 64-bit integer containing the number of trailing zero
-///    bits in the operand.
+/// \returns An 64-bit integer containing the number of trailing zero bits in
+///    the operand.
 static __inline__ long long __RELAXED_FN_ATTRS
 _mm_tzcnt_64(unsigned long long __X)
 {
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@ -551,7 +551,8 @@ _mm_cmpord_pd(__m128d __a, __m128d __b)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VCMPUNORDPD / CMPUNORDPD </c> instruction.
+/// This intrinsic corresponds to the <c> VCMPUNORDPD / CMPUNORDPD </c>
+///   instruction.
 ///
 /// \param __a
 ///    A 128-bit vector of [2 x double].
@ -734,10 +735,10 @@ _mm_cmple_sd(__m128d __a, __m128d __b)
  return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b);
 }

-/// \brief Compares the lower double-precision floating-point values in each
-///    of the two 128-bit floating-point vectors of [2 x double] to determine
-///    if the value in the first parameter is greater than the corresponding
-///    value in the second parameter. The comparison yields 0h for false,
+/// \brief Compares the lower double-precision floating-point values in each of
+///    the two 128-bit floating-point vectors of [2 x double] to determine if
+///    the value in the first parameter is greater than the corresponding value
+///    in the second parameter. The comparison yields 0h for false,
 ///    FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile <x86intrin.h>
@ -784,9 +785,9 @@ _mm_cmpge_sd(__m128d __a, __m128d __b)
  return (__m128d) { __c[0], __a[1] };
 }

-/// \brief Compares the lower double-precision floating-point values in each
-///    of the two 128-bit floating-point vectors of [2 x double] to determine
-///    if the value in the first parameter is "ordered" with respect to the
+/// \brief Compares the lower double-precision floating-point values in each of
+///    the two 128-bit floating-point vectors of [2 x double] to determine if
+///    the value in the first parameter is "ordered" with respect to the
 ///    corresponding value in the second parameter. The comparison yields 0h for
 ///    false, FFFFFFFFFFFFFFFFh for true. A pair of double-precision values are
 ///    "ordered" with respect to each other if neither value is a NaN.
@ -809,16 +810,17 @@ _mm_cmpord_sd(__m128d __a, __m128d __b)
  return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b);
 }

-/// \brief Compares the lower double-precision floating-point values in each
-///    of the two 128-bit floating-point vectors of [2 x double] to determine
-///    if the value in the first parameter is "unordered" with respect to the
+/// \brief Compares the lower double-precision floating-point values in each of
+///    the two 128-bit floating-point vectors of [2 x double] to determine if
+///    the value in the first parameter is "unordered" with respect to the
 ///    corresponding value in the second parameter. The comparison yields 0h
 ///    for false, FFFFFFFFFFFFFFFFh for true. A pair of double-precision values
 ///    are "unordered" with respect to each other if one or both values are NaN.
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VCMPUNORDSD / CMPUNORDSD </c> instruction.
+/// This intrinsic corresponds to the <c> VCMPUNORDSD / CMPUNORDSD </c>
+///   instruction.
 ///
 /// \param __a
 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
@ -1115,8 +1117,8 @@ _mm_ucomieq_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is less than the corresponding value in
-///    the second parameter. The comparison yields 0 for false, 1 for true.
-///    If either of the two lower double-precision values is NaN, 1 is returned.
+///    the second parameter. The comparison yields 0 for false, 1 for true. If
+///    either of the two lower double-precision values is NaN, 1 is returned.
 ///
 /// \headerfile <x86intrin.h>
 ///
@ -1414,7 +1416,8 @@ _mm_cvtss_sd(__m128d __a, __m128 __b)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VCVTTPD2DQ / CVTTPD2DQ </c> instruction.
+/// This intrinsic corresponds to the <c> VCVTTPD2DQ / CVTTPD2DQ </c>
+///   instruction.
 ///
 /// \param __a
 ///    A 128-bit vector of [2 x double].
@ -1431,7 +1434,8 @@ _mm_cvttpd_epi32(__m128d __a)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c> instruction.
+/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c>
+///   instruction.
 ///
 /// \param __a
 ///    A 128-bit vector of [2 x double]. The lower 64 bits are used in the
@ -1559,9 +1563,9 @@ _mm_load1_pd(double const *__dp)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction + needed
-/// shuffling instructions. In AVX mode, the shuffling may be combined with the
-/// \c VMOVAPD, resulting in only a \c VPERMILPD instruction.
+/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction +
+/// needed shuffling instructions. In AVX mode, the shuffling may be combined
+/// with the \c VMOVAPD, resulting in only a \c VPERMILPD instruction.
 ///
 /// \param __dp
 ///    A 16-byte aligned pointer to an array of double-precision values to be
@ -1624,7 +1628,7 @@ _mm_load_sd(double const *__dp)
 /// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction.
 ///
 /// \param __a
-///    A 128-bit vector of [2 x double].
+///    A 128-bit vector of [2 x double]. \n
 ///    Bits [63:0] are written to bits [63:0] of the result.
 /// \param __dp
 ///    A pointer to a 64-bit memory location containing a double-precision
@ -1651,7 +1655,7 @@ _mm_loadh_pd(__m128d __a, double const *__dp)
 /// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction.
 ///
 /// \param __a
-///    A 128-bit vector of [2 x double].
+///    A 128-bit vector of [2 x double]. \n
 ///    Bits [127:64] are written to bits [127:64] of the result.
 /// \param __dp
 ///    A pointer to a 64-bit memory location containing a double-precision
@ -2645,8 +2649,8 @@ _mm_xor_si128(__m128i __a, __m128i __b)
 /// \param a
 ///    A 128-bit integer vector containing the source operand.
 /// \param imm
-///    An immediate value specifying the number of bytes to left-shift
-///    operand \a a.
+///    An immediate value specifying the number of bytes to left-shift operand
+///    \a a.
 /// \returns A 128-bit integer vector containing the left-shifted value.
 #define _mm_slli_si128(a, imm) __extension__ ({                              \
  (__m128i)__builtin_shufflevector(                                          \
@ -3247,7 +3251,8 @@ _mm_cvtsd_si64(__m128d __a)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c> instruction.
+/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c>
+///   instruction.
 ///
 /// \param __a
 ///    A 128-bit vector of [2 x double]. The lower 64 bits are used in the
@ -3296,7 +3301,8 @@ _mm_cvtps_epi32(__m128 __a)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VCVTTPS2DQ / CVTTPS2DQ </c> instruction.
+/// This intrinsic corresponds to the <c> VCVTTPS2DQ / CVTTPS2DQ </c>
+///   instruction.
 ///
 /// \param __a
 ///    A 128-bit vector of [4 x float].
@ -3709,7 +3715,8 @@ _mm_set1_epi8(char __b)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c> instruction.
+/// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c>
+///   instruction.
 ///
 /// \param __q0
 ///    A 64-bit integral value used to initialize the lower 64 bits of the
@ -3885,7 +3892,8 @@ _mm_storeu_si128(__m128i *__p, __m128i __b)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VMASKMOVDQU / MASKMOVDQU </c> instruction.
+/// This intrinsic corresponds to the <c> VMASKMOVDQU / MASKMOVDQU </c>
+///   instruction.
 ///
 /// \param __d
 ///    A 128-bit integer vector containing the values to be moved.
@ -4134,14 +4142,14 @@ _mm_packus_epi16(__m128i __a, __m128i __b)
 ///    A 128-bit integer vector.
 /// \param __imm
 ///    An immediate value. Bits [3:0] selects values from \a __a to be assigned
-///    to bits[15:0] of the result.
-///    000: assign values from bits [15:0] of \a __a.
-///    001: assign values from bits [31:16] of \a __a.
-///    010: assign values from bits [47:32] of \a __a.
-///    011: assign values from bits [63:48] of \a __a.
-///    100: assign values from bits [79:64] of \a __a.
-///    101: assign values from bits [95:80] of \a __a.
-///    110: assign values from bits [111:96] of \a __a.
+///    to bits[15:0] of the result. \n
+///    000: assign values from bits [15:0] of \a __a. \n
+///    001: assign values from bits [31:16] of \a __a. \n
+///    010: assign values from bits [47:32] of \a __a. \n
+///    011: assign values from bits [63:48] of \a __a. \n
+///    100: assign values from bits [79:64] of \a __a. \n
+///    101: assign values from bits [95:80] of \a __a. \n
+///    110: assign values from bits [111:96] of \a __a. \n
 ///    111: assign values from bits [127:112] of \a __a.
 /// \returns An integer, whose lower 16 bits are selected from the 128-bit
 ///    integer vector parameter and the remaining bits are assigned zeros.
@ -4215,16 +4223,16 @@ _mm_movemask_epi8(__m128i __a)
 /// \param imm
 ///    An immediate value containing an 8-bit value specifying which elements to
 ///    copy from a. The destinations within the 128-bit destination are assigned
-///    values as follows:
-///    Bits [1:0] are used to assign values to bits [31:0] of the result.
-///    Bits [3:2] are used to assign values to bits [63:32] of the result.
-///    Bits [5:4] are used to assign values to bits [95:64] of the result.
-///    Bits [7:6] are used to assign values to bits [127:96] of the result.
-///    Bit value assignments:
-///    00: assign values from bits [31:0] of a.
-///    01: assign values from bits [63:32] of a.
-///    10: assign values from bits [95:64] of a.
-///    11: assign values from bits [127:96] of a.
+///    values as follows: \n
+///    Bits [1:0] are used to assign values to bits [31:0] of the result. \n
+///    Bits [3:2] are used to assign values to bits [63:32] of the result. \n
+///    Bits [5:4] are used to assign values to bits [95:64] of the result. \n
+///    Bits [7:6] are used to assign values to bits [127:96] of the result. \n
+///    Bit value assignments: \n
+///    00: assign values from bits [31:0] of \a a. \n
+///    01: assign values from bits [63:32] of \a a. \n
+///    10: assign values from bits [95:64] of \a a. \n
+///    11: assign values from bits [127:96] of \a a.
 /// \returns A 128-bit integer vector containing the shuffled values.
 #define _mm_shuffle_epi32(a, imm) __extension__ ({ \
  (__m128i)__builtin_shufflevector((__v4si)(__m128i)(a), \
@ -4248,16 +4256,16 @@ _mm_movemask_epi8(__m128i __a)
 ///    A 128-bit integer vector of [8 x i16]. Bits [127:64] are copied to bits
 ///    [127:64] of the result.
 /// \param imm
-///    An 8-bit immediate value specifying which elements to copy from a.
-///    Bits[1:0] are used to assign values to bits [15:0] of the result.
-///    Bits[3:2] are used to assign values to bits [31:16] of the result.
-///    Bits[5:4] are used to assign values to bits [47:32] of the result.
-///    Bits[7:6] are used to assign values to bits [63:48] of the result.
-///    Bit value assignments:
-///    00: assign values from bits [15:0] of a.
-///    01: assign values from bits [31:16] of a.
-///    10: assign values from bits [47:32] of a.
-///    11: assign values from bits [63:48] of a.
+///    An 8-bit immediate value specifying which elements to copy from \a a. \n
+///    Bits[1:0] are used to assign values to bits [15:0] of the result. \n
+///    Bits[3:2] are used to assign values to bits [31:16] of the result. \n
+///    Bits[5:4] are used to assign values to bits [47:32] of the result. \n
+///    Bits[7:6] are used to assign values to bits [63:48] of the result. \n
+///    Bit value assignments: \n
+///    00: assign values from bits [15:0] of \a a. \n
+///    01: assign values from bits [31:16] of \a a. \n
+///    10: assign values from bits [47:32] of \a a. \n
+///    11: assign values from bits [63:48] of \a a. \n
 /// \returns A 128-bit integer vector containing the shuffled values.
 #define _mm_shufflelo_epi16(a, imm) __extension__ ({ \
  (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
@ -4282,16 +4290,16 @@ _mm_movemask_epi8(__m128i __a)
 ///    A 128-bit integer vector of [8 x i16]. Bits [63:0] are copied to bits
 ///    [63:0] of the result.
 /// \param imm
-///    An 8-bit immediate value specifying which elements to copy from a.
-///    Bits[1:0] are used to assign values to bits [79:64] of the result.
-///    Bits[3:2] are used to assign values to bits [95:80] of the result.
-///    Bits[5:4] are used to assign values to bits [111:96] of the result.
-///    Bits[7:6] are used to assign values to bits [127:112] of the result.
-///    Bit value assignments:
-///    00: assign values from bits [79:64] of a.
-///    01: assign values from bits [95:80] of a.
-///    10: assign values from bits [111:96] of a.
-///    11: assign values from bits [127:112] of a.
+///    An 8-bit immediate value specifying which elements to copy from \a a. \n
+///    Bits[1:0] are used to assign values to bits [79:64] of the result. \n
+///    Bits[3:2] are used to assign values to bits [95:80] of the result. \n
+///    Bits[5:4] are used to assign values to bits [111:96] of the result. \n
+///    Bits[7:6] are used to assign values to bits [127:112] of the result. \n
+///    Bit value assignments: \n
+///    00: assign values from bits [79:64] of \a a. \n
+///    01: assign values from bits [95:80] of \a a. \n
+///    10: assign values from bits [111:96] of \a a. \n
+///    11: assign values from bits [127:112] of \a a. \n
 /// \returns A 128-bit integer vector containing the shuffled values.
 #define _mm_shufflehi_epi16(a, imm) __extension__ ({ \
  (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
@ -4307,28 +4315,29 @@ _mm_movemask_epi8(__m128i __a)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VPUNPCKHBW / PUNPCKHBW </c> instruction.
+/// This intrinsic corresponds to the <c> VPUNPCKHBW / PUNPCKHBW </c>
+///   instruction.
 ///
 /// \param __a
 ///    A 128-bit vector of [16 x i8].
-///    Bits [71:64] are written to bits [7:0] of the result
-///    Bits [79:72] are written to bits [23:16] of the result.
-///    Bits [87:80] are written to bits [39:32] of the result.
-///    Bits [95:88] are written to bits [55:48] of the result.
-///    Bits [103:96] are written to bits [71:64] of the result.
-///    Bits [111:104] are written to bits [87:80] of the result.
-///    Bits [119:112] are written to bits [103:96] of the result.
+///    Bits [71:64] are written to bits [7:0] of the result. \n
+///    Bits [79:72] are written to bits [23:16] of the result. \n
+///    Bits [87:80] are written to bits [39:32] of the result. \n
+///    Bits [95:88] are written to bits [55:48] of the result. \n
+///    Bits [103:96] are written to bits [71:64] of the result. \n
+///    Bits [111:104] are written to bits [87:80] of the result. \n
+///    Bits [119:112] are written to bits [103:96] of the result. \n
 ///    Bits [127:120] are written to bits [119:112] of the result.
 /// \param __b
-///    A 128-bit vector of [16 x i8].
-///    Bits [71:64] are written to bits [15:8] of the result.
-///    Bits [79:72] are written to bits [31:24] of the result.
-///    Bits [87:80] are written to bits [47:40] of the result.
-///    Bits [95:88] are written to bits [63:56] of the result.
-///    Bits [103:96] are written to bits [79:72] of the result.
-///    Bits [111:104] are written to bits [95:88] of the result.
-///    Bits [119:112] are written to bits [111:104] of the result.
-///    Bits [127:120] are written to bits [127:120] of the destination.
+///    A 128-bit vector of [16 x i8]. \n
+///    Bits [71:64] are written to bits [15:8] of the result. \n
+///    Bits [79:72] are written to bits [31:24] of the result. \n
+///    Bits [87:80] are written to bits [47:40] of the result. \n
+///    Bits [95:88] are written to bits [63:56] of the result. \n
+///    Bits [103:96] are written to bits [79:72] of the result. \n
+///    Bits [111:104] are written to bits [95:88] of the result. \n
+///    Bits [119:112] are written to bits [111:104] of the result. \n
+///    Bits [127:120] are written to bits [127:120] of the result.
 /// \returns A 128-bit vector of [16 x i8] containing the interleaved values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_unpackhi_epi8(__m128i __a, __m128i __b)
@ -4341,19 +4350,20 @@ _mm_unpackhi_epi8(__m128i __a, __m128i __b)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VPUNPCKHWD / PUNPCKHWD </c> instruction.
+/// This intrinsic corresponds to the <c> VPUNPCKHWD / PUNPCKHWD </c>
+///   instruction.
 ///
 /// \param __a
 ///    A 128-bit vector of [8 x i16].
-///    Bits [79:64] are written to bits [15:0] of the result.
-///    Bits [95:80] are written to bits [47:32] of the result.
-///    Bits [111:96] are written to bits [79:64] of the result.
+///    Bits [79:64] are written to bits [15:0] of the result. \n
+///    Bits [95:80] are written to bits [47:32] of the result. \n
+///    Bits [111:96] are written to bits [79:64] of the result. \n
 ///    Bits [127:112] are written to bits [111:96] of the result.
 /// \param __b
 ///    A 128-bit vector of [8 x i16].
-///    Bits [79:64] are written to bits [31:16] of the result.
-///    Bits [95:80] are written to bits [63:48] of the result.
-///    Bits [111:96] are written to bits [95:80] of the result.
+///    Bits [79:64] are written to bits [31:16] of the result. \n
+///    Bits [95:80] are written to bits [63:48] of the result. \n
+///    Bits [111:96] are written to bits [95:80] of the result. \n
 ///    Bits [127:112] are written to bits [127:112] of the result.
 /// \returns A 128-bit vector of [8 x i16] containing the interleaved values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@ -4367,15 +4377,16 @@ _mm_unpackhi_epi16(__m128i __a, __m128i __b)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VPUNPCKHDQ / PUNPCKHDQ </c> instruction.
+/// This intrinsic corresponds to the <c> VPUNPCKHDQ / PUNPCKHDQ </c>
+///   instruction.
 ///
 /// \param __a
-///    A 128-bit vector of [4 x i32].
-///    Bits [95:64] are written to bits [31:0] of the destination.
+///    A 128-bit vector of [4 x i32]. \n
+///    Bits [95:64] are written to bits [31:0] of the destination. \n
 ///    Bits [127:96] are written to bits [95:64] of the destination.
 /// \param __b
-///    A 128-bit vector of [4 x i32].
-///    Bits [95:64] are written to bits [64:32] of the destination.
+///    A 128-bit vector of [4 x i32]. \n
+///    Bits [95:64] are written to bits [64:32] of the destination. \n
 ///    Bits [127:96] are written to bits [127:96] of the destination.
 /// \returns A 128-bit vector of [4 x i32] containing the interleaved values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@ -4389,13 +4400,14 @@ _mm_unpackhi_epi32(__m128i __a, __m128i __b)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VPUNPCKHQDQ / PUNPCKHQDQ </c> instruction.
+/// This intrinsic corresponds to the <c> VPUNPCKHQDQ / PUNPCKHQDQ </c>
+///   instruction.
 ///
 /// \param __a
-///    A 128-bit vector of [2 x i64].
+///    A 128-bit vector of [2 x i64]. \n
 ///    Bits [127:64] are written to bits [63:0] of the destination.
 /// \param __b
-///    A 128-bit vector of [2 x i64].
+///    A 128-bit vector of [2 x i64]. \n
 ///    Bits [127:64] are written to bits [127:64] of the destination.
 /// \returns A 128-bit vector of [2 x i64] containing the interleaved values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@ -4409,27 +4421,28 @@ _mm_unpackhi_epi64(__m128i __a, __m128i __b)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VPUNPCKLBW / PUNPCKLBW </c> instruction.
+/// This intrinsic corresponds to the <c> VPUNPCKLBW / PUNPCKLBW </c>
+///   instruction.
 ///
 /// \param __a
-///    A 128-bit vector of [16 x i8].
-///    Bits [7:0] are written to bits [7:0] of the result.
-///    Bits [15:8] are written to bits [23:16] of the result.
-///    Bits [23:16] are written to bits [39:32] of the result.
-///    Bits [31:24] are written to bits [55:48] of the result.
-///    Bits [39:32] are written to bits [71:64] of the result.
-///    Bits [47:40] are written to bits [87:80] of the result.
-///    Bits [55:48] are written to bits [103:96] of the result.
-///    Bits [63:56] are written to bits [119:112] of the destination.
+///    A 128-bit vector of [16 x i8]. \n
+///    Bits [7:0] are written to bits [7:0] of the result. \n
+///    Bits [15:8] are written to bits [23:16] of the result. \n
+///    Bits [23:16] are written to bits [39:32] of the result. \n
+///    Bits [31:24] are written to bits [55:48] of the result. \n
+///    Bits [39:32] are written to bits [71:64] of the result. \n
+///    Bits [47:40] are written to bits [87:80] of the result. \n
+///    Bits [55:48] are written to bits [103:96] of the result. \n
+///    Bits [63:56] are written to bits [119:112] of the result.
 /// \param __b
 ///    A 128-bit vector of [16 x i8].
-///    Bits [7:0] are written to bits [15:8] of the result.
-///    Bits [15:8] are written to bits [31:24] of the result.
-///    Bits [23:16] are written to bits [47:40] of the result.
-///    Bits [31:24] are written to bits [63:56] of the result.
-///    Bits [39:32] are written to bits [79:72] of the result.
-///    Bits [47:40] are written to bits [95:88] of the result.
-///    Bits [55:48] are written to bits [111:104] of the result.
+///    Bits [7:0] are written to bits [15:8] of the result. \n
+///    Bits [15:8] are written to bits [31:24] of the result. \n
+///    Bits [23:16] are written to bits [47:40] of the result. \n
+///    Bits [31:24] are written to bits [63:56] of the result. \n
+///    Bits [39:32] are written to bits [79:72] of the result. \n
+///    Bits [47:40] are written to bits [95:88] of the result. \n
+///    Bits [55:48] are written to bits [111:104] of the result. \n
 ///    Bits [63:56] are written to bits [127:120] of the result.
 /// \returns A 128-bit vector of [16 x i8] containing the interleaved values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@ -4444,19 +4457,20 @@ _mm_unpacklo_epi8(__m128i __a, __m128i __b)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VPUNPCKLWD / PUNPCKLWD </c> instruction.
+/// This intrinsic corresponds to the <c> VPUNPCKLWD / PUNPCKLWD </c>
+///   instruction.
 ///
 /// \param __a
 ///    A 128-bit vector of [8 x i16].
-///    Bits [15:0] are written to bits [15:0] of the result.
-///    Bits [31:16] are written to bits [47:32] of the result.
-///    Bits [47:32] are written to bits [79:64] of the result.
+///    Bits [15:0] are written to bits [15:0] of the result. \n
+///    Bits [31:16] are written to bits [47:32] of the result. \n
+///    Bits [47:32] are written to bits [79:64] of the result. \n
 ///    Bits [63:48] are written to bits [111:96] of the result.
 /// \param __b
 ///    A 128-bit vector of [8 x i16].
-///    Bits [15:0] are written to bits [31:16] of the result.
-///    Bits [31:16] are written to bits [63:48] of the result.
-///    Bits [47:32] are written to bits [95:80] of the result.
+///    Bits [15:0] are written to bits [31:16] of the result. \n
+///    Bits [31:16] are written to bits [63:48] of the result. \n
+///    Bits [47:32] are written to bits [95:80] of the result. \n
 ///    Bits [63:48] are written to bits [127:112] of the result.
 /// \returns A 128-bit vector of [8 x i16] containing the interleaved values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@ -4470,15 +4484,16 @@ _mm_unpacklo_epi16(__m128i __a, __m128i __b)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VPUNPCKLDQ / PUNPCKLDQ </c> instruction.
+/// This intrinsic corresponds to the <c> VPUNPCKLDQ / PUNPCKLDQ </c>
+///   instruction.
 ///
 /// \param __a
-///    A 128-bit vector of [4 x i32].
-///    Bits [31:0] are written to bits [31:0] of the destination.
+///    A 128-bit vector of [4 x i32]. \n
+///    Bits [31:0] are written to bits [31:0] of the destination. \n
 ///    Bits [63:32] are written to bits [95:64] of the destination.
 /// \param __b
-///    A 128-bit vector of [4 x i32].
-///    Bits [31:0] are written to bits [64:32] of the destination.
+///    A 128-bit vector of [4 x i32]. \n
+///    Bits [31:0] are written to bits [64:32] of the destination. \n
 ///    Bits [63:32] are written to bits [127:96] of the destination.
 /// \returns A 128-bit vector of [4 x i32] containing the interleaved values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@ -4492,14 +4507,15 @@ _mm_unpacklo_epi32(__m128i __a, __m128i __b)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c> instruction.
+/// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c>
+///   instruction.
 ///
 /// \param __a
-///    A 128-bit vector of [2 x i64].
-///    Bits [63:0] are written to bits [63:0] of the destination.
+///    A 128-bit vector of [2 x i64]. \n
+///    Bits [63:0] are written to bits [63:0] of the destination. \n
 /// \param __b
-///    A 128-bit vector of [2 x i64].
-///    Bits [63:0] are written to bits [127:64] of the destination.
+///    A 128-bit vector of [2 x i64]. \n
+///    Bits [63:0] are written to bits [127:64] of the destination. \n
 /// \returns A 128-bit vector of [2 x i64] containing the interleaved values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_unpacklo_epi64(__m128i __a, __m128i __b)
@ -4568,10 +4584,10 @@ _mm_move_epi64(__m128i __a)
 /// This intrinsic corresponds to the <c> VUNPCKHPD / UNPCKHPD </c> instruction.
 ///
 /// \param __a
-///    A 128-bit vector of [2 x double].
+///    A 128-bit vector of [2 x double]. \n
 ///    Bits [127:64] are written to bits [63:0] of the destination.
 /// \param __b
-///    A 128-bit vector of [2 x double].
+///    A 128-bit vector of [2 x double]. \n
 ///    Bits [127:64] are written to bits [127:64] of the destination.
 /// \returns A 128-bit vector of [2 x double] containing the interleaved values.
 static __inline__ __m128d __DEFAULT_FN_ATTRS
@ -4589,10 +4605,10 @@ _mm_unpackhi_pd(__m128d __a, __m128d __b)
 /// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.
 ///
 /// \param __a
-///    A 128-bit vector of [2 x double].
+///    A 128-bit vector of [2 x double]. \n
 ///    Bits [63:0] are written to bits [63:0] of the destination.
 /// \param __b
-///    A 128-bit vector of [2 x double].
+///    A 128-bit vector of [2 x double]. \n
 ///    Bits [63:0] are written to bits [127:64] of the destination.
 /// \returns A 128-bit vector of [2 x double] containing the interleaved values.
 static __inline__ __m128d __DEFAULT_FN_ATTRS
@ -4639,11 +4655,11 @@ _mm_movemask_pd(__m128d __a)
 ///    A 128-bit vector of [2 x double].
 /// \param i
 ///    An 8-bit immediate value. The least significant two bits specify which
-///    elements to copy from a and b:
-///    Bit[0] = 0: lower element of a copied to lower element of result.
-///    Bit[0] = 1: upper element of a copied to lower element of result.
-///    Bit[1] = 0: lower element of \a b copied to upper element of result.
-///    Bit[1] = 1: upper element of \a b copied to upper element of result.
+///    elements to copy from a and b: \n
+///    Bit[0] = 0: lower element of a copied to lower element of result. \n
+///    Bit[0] = 1: upper element of a copied to lower element of result. \n
+///    Bit[1] = 0: lower element of \a b copied to upper element of result. \n
+///    Bit[1] = 1: upper element of \a b copied to upper element of result. \n
 /// \returns A 128-bit vector of [2 x double] containing the shuffled values.
 #define _mm_shuffle_pd(a, b, i) __extension__ ({ \
  (__m128d)__builtin_shufflevector((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
--- a/clang/lib/Headers/f16cintrin.h
+++ b/clang/lib/Headers/f16cintrin.h
@ -65,11 +65,11 @@ _cvtsh_ss(unsigned short __a)
 ///    A 32-bit single-precision float value to be converted to a 16-bit
 ///    half-precision float value.
 /// \param imm
-///    An immediate value controlling rounding using bits [2:0]:
-///    000: Nearest
-///    001: Down
-///    010: Up
-///    011: Truncate
+///    An immediate value controlling rounding using bits [2:0]: \n
+///    000: Nearest \n
+///    001: Down \n
+///    010: Up \n
+///    011: Truncate \n
 ///    1XX: Use MXCSR.RC for rounding
 /// \returns The converted 16-bit half-precision float value.
 #define _cvtss_sh(a, imm)  \
@ -90,11 +90,11 @@ _cvtsh_ss(unsigned short __a)
 /// \param a
 ///    A 128-bit vector containing 32-bit float values.
 /// \param imm
-///    An immediate value controlling rounding using bits [2:0]:
-///    000: Nearest
-///    001: Down
-///    010: Up
-///    011: Truncate
+///    An immediate value controlling rounding using bits [2:0]: \n
+///    000: Nearest \n
+///    001: Down \n
+///    010: Up \n
+///    011: Truncate \n
 ///    1XX: Use MXCSR.RC for rounding
 /// \returns A 128-bit vector containing converted 16-bit half-precision float
 ///    values. The lower 64 bits are used to store the converted 16-bit
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@ -84,11 +84,11 @@
 ///    A 256-bit vector containing 32-bit single-precision float values to be
 ///    converted to 16-bit half-precision float values.
 /// \param imm
-///    An immediate value controlling rounding using bits [2:0]:
-///    000: Nearest
-///    001: Down
-///    010: Up
-///    011: Truncate
+///    An immediate value controlling rounding using bits [2:0]: \n
+///    000: Nearest \n
+///    001: Down \n
+///    010: Up \n
+///    011: Truncate \n
 ///    1XX: Use MXCSR.RC for rounding
 /// \returns A 128-bit vector containing the converted 16-bit half-precision
 ///    float values.
--- a/clang/lib/Headers/mmintrin.h
+++ b/clang/lib/Headers/mmintrin.h
@ -211,16 +211,16 @@ _mm_packs_pu16(__m64 __m1, __m64 __m2)
 /// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction.
 ///
 /// \param __m1
-///    A 64-bit integer vector of [8 x i8].
-///    Bits [39:32] are written to bits [7:0] of the result.
-///    Bits [47:40] are written to bits [23:16] of the result.
-///    Bits [55:48] are written to bits [39:32] of the result.
+///    A 64-bit integer vector of [8 x i8]. \n 
+///    Bits [39:32] are written to bits [7:0] of the result. \n
+///    Bits [47:40] are written to bits [23:16] of the result. \n
+///    Bits [55:48] are written to bits [39:32] of the result. \n
 ///    Bits [63:56] are written to bits [55:48] of the result.
 /// \param __m2
 ///    A 64-bit integer vector of [8 x i8].
-///    Bits [39:32] are written to bits [15:8] of the result.
-///    Bits [47:40] are written to bits [31:24] of the result.
-///    Bits [55:48] are written to bits [47:40] of the result.
+///    Bits [39:32] are written to bits [15:8] of the result. \n
+///    Bits [47:40] are written to bits [31:24] of the result. \n
+///    Bits [55:48] are written to bits [47:40] of the result. \n
 ///    Bits [63:56] are written to bits [63:56] of the result.
 /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
 ///    values.
@ -239,11 +239,11 @@ _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
 ///
 /// \param __m1
 ///    A 64-bit integer vector of [4 x i16].
-///    Bits [47:32] are written to bits [15:0] of the result.
+///    Bits [47:32] are written to bits [15:0] of the result. \n
 ///    Bits [63:48] are written to bits [47:32] of the result.
 /// \param __m2
 ///    A 64-bit integer vector of [4 x i16].
-///    Bits [47:32] are written to bits [31:16] of the result.
+///    Bits [47:32] are written to bits [31:16] of the result. \n
 ///    Bits [63:48] are written to bits [63:48] of the result.
 /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
 ///    values.
@ -283,15 +283,15 @@ _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
 ///
 /// \param __m1
 ///    A 64-bit integer vector of [8 x i8].
-///    Bits [7:0] are written to bits [7:0] of the result.
-///    Bits [15:8] are written to bits [23:16] of the result.
-///    Bits [23:16] are written to bits [39:32] of the result.
+///    Bits [7:0] are written to bits [7:0] of the result. \n
+///    Bits [15:8] are written to bits [23:16] of the result. \n
+///    Bits [23:16] are written to bits [39:32] of the result. \n
 ///    Bits [31:24] are written to bits [55:48] of the result.
 /// \param __m2
 ///    A 64-bit integer vector of [8 x i8].
-///    Bits [7:0] are written to bits [15:8] of the result.
-///    Bits [15:8] are written to bits [31:24] of the result.
-///    Bits [23:16] are written to bits [47:40] of the result.
+///    Bits [7:0] are written to bits [15:8] of the result. \n
+///    Bits [15:8] are written to bits [31:24] of the result. \n
+///    Bits [23:16] are written to bits [47:40] of the result. \n
 ///    Bits [31:24] are written to bits [63:56] of the result.
 /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
 ///    values.
@ -310,11 +310,11 @@ _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
 ///
 /// \param __m1
 ///    A 64-bit integer vector of [4 x i16].
-///    Bits [15:0] are written to bits [15:0] of the result.
+///    Bits [15:0] are written to bits [15:0] of the result. \n
 ///    Bits [31:16] are written to bits [47:32] of the result.
 /// \param __m2
 ///    A 64-bit integer vector of [4 x i16].
-///    Bits [15:0] are written to bits [31:16] of the result.
+///    Bits [15:0] are written to bits [31:16] of the result. \n
 ///    Bits [31:16] are written to bits [63:48] of the result.
 /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
 ///    values.
--- a/clang/lib/Headers/pmmintrin.h
+++ b/clang/lib/Headers/pmmintrin.h
@ -115,18 +115,18 @@ _mm_hsub_ps(__m128 __a, __m128 __b)

 /// \brief Moves and duplicates high-order (odd-indexed) values from a 128-bit
 ///    vector of [4 x float] to float values stored in a 128-bit vector of
-///    [4 x float].
-///    Bits [127:96] of the source are written to bits [127:96] and [95:64] of
-///    the destination.
-///    Bits [63:32] of the source are written to bits [63:32] and [31:0] of the
-///    destination.
+///    [4 x float]. 
 ///
 /// \headerfile <x86intrin.h>
 ///
 /// This intrinsic corresponds to the <c> VMOVSHDUP </c> instruction.
 ///
 /// \param __a
-///    A 128-bit vector of [4 x float].
+///    A 128-bit vector of [4 x float]. \n
+///    Bits [127:96] of the source are written to bits [127:96] and [95:64] of
+///    the destination. \n
+///    Bits [63:32] of the source are written to bits [63:32] and [31:0] of the
+///    destination.
 /// \returns A 128-bit vector of [4 x float] containing the moved and duplicated
 ///    values.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
@ -135,20 +135,19 @@ _mm_movehdup_ps(__m128 __a)
  return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3);
 }

-/// \brief Duplicates low-order (even-indexed) values from a 128-bit
-///    vector of [4 x float] to float values stored in a 128-bit vector of
-///    [4 x float].
-///    Bits [95:64] of the source are written to bits [127:96] and [95:64] of
-///    the destination.
-///    Bits [31:0] of the source are written to bits [63:32] and [31:0] of the
-///    destination.
+/// \brief Duplicates low-order (even-indexed) values from a 128-bit vector of
+///    [4 x float] to float values stored in a 128-bit vector of [4 x float]. 
 ///
 /// \headerfile <x86intrin.h>
 ///
 /// This intrinsic corresponds to the <c> VMOVSLDUP </c> instruction.
 ///
 /// \param __a
-///    A 128-bit vector of [4 x float].
+///    A 128-bit vector of [4 x float] \n
+///    Bits [95:64] of the source are written to bits [127:96] and [95:64] of
+///    the destination. \n
+///    Bits [31:0] of the source are written to bits [63:32] and [31:0] of the
+///    destination.
 /// \returns A 128-bit vector of [4 x float] containing the moved and duplicated
 ///    values.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
--- a/clang/lib/Headers/tmmintrin.h
+++ b/clang/lib/Headers/tmmintrin.h
@ -580,11 +580,11 @@ _mm_mulhrs_pi16(__m64 __a, __m64 __b)
 /// \param __b
 ///    A 128-bit integer vector containing control bytes corresponding to
 ///    positions in the destination:
-///    Bit 7:
-///    1: Clear the corresponding byte in the destination.
+///    Bit 7: \n
+///    1: Clear the corresponding byte in the destination. \n
 ///    0: Copy the selected source byte to the corresponding byte in the
-///    destination.
-///    Bits [6:4] Reserved.
+///    destination. \n
+///    Bits [6:4] Reserved.  \n
 ///    Bits [3:0] select the source byte to be copied.
 /// \returns A 128-bit integer vector containing the copied or cleared values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@ -606,10 +606,10 @@ _mm_shuffle_epi8(__m128i __a, __m128i __b)
 /// \param __b
 ///    A 64-bit integer vector containing control bytes corresponding to
 ///    positions in the destination:
-///    Bit 7:
-///    1: Clear the corresponding byte in the destination.
+///    Bit 7: \n
+///    1: Clear the corresponding byte in the destination. \n
 ///    0: Copy the selected source byte to the corresponding byte in the
-///    destination.
+///    destination. \n
 ///    Bits [3:0] select the source byte to be copied.
 /// \returns A 64-bit integer vector containing the copied or cleared values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS