forked from OSchip/llvm-project
AVX-512: Added intrinsics to clang.
The set is small, that what I have right now. Everybody is welcome to add more. llvm-svn: 213641
This commit is contained in:
parent
f164859efc
commit
fcc6df310d
|
@ -615,8 +615,10 @@ BUILTIN(__builtin_ia32_gatherq_d256, "V4iV4iV4iC*V4LLiV4iIc", "")
|
|||
// F16C
|
||||
BUILTIN(__builtin_ia32_vcvtps2ph, "V8sV4fIi", "")
|
||||
BUILTIN(__builtin_ia32_vcvtps2ph256, "V8sV8fIi", "")
|
||||
BUILTIN(__builtin_ia32_vcvtps2ph512, "V16sV16fIi", "")
|
||||
BUILTIN(__builtin_ia32_vcvtph2ps, "V4fV8s", "")
|
||||
BUILTIN(__builtin_ia32_vcvtph2ps256, "V8fV8s", "")
|
||||
BUILTIN(__builtin_ia32_vcvtph2ps512, "V16fV16s", "")
|
||||
|
||||
// RDRAND
|
||||
BUILTIN(__builtin_ia32_rdrand16_step, "UiUs*", "")
|
||||
|
@ -761,4 +763,104 @@ BUILTIN(__builtin_ia32_rdpmc, "ULLii", "")
|
|||
BUILTIN(__builtin_ia32_rdtsc, "ULLi", "")
|
||||
BUILTIN(__builtin_ia32_rdtscp, "ULLiUi*", "")
|
||||
|
||||
// AVX-512
|
||||
BUILTIN(__builtin_ia32_sqrtpd512_mask, "V8dV8dV8dUcCi", "")
|
||||
BUILTIN(__builtin_ia32_sqrtps512_mask, "V16fV16fV16fUsCi", "")
|
||||
BUILTIN(__builtin_ia32_rsqrt14sd_mask, "V2dV2dV2dV2dUc", "")
|
||||
BUILTIN(__builtin_ia32_rsqrt14ss_mask, "V4fV4fV4fV4fUc", "")
|
||||
BUILTIN(__builtin_ia32_rsqrt14pd512_mask, "V8dV8dV8dUc", "")
|
||||
BUILTIN(__builtin_ia32_rsqrt14ps512_mask, "V16fV16fV16fUs", "")
|
||||
BUILTIN(__builtin_ia32_rsqrt28sd_mask, "V2dV2dV2dV2dUcCi", "")
|
||||
BUILTIN(__builtin_ia32_rsqrt28ss_mask, "V4fV4fV4fV4fUcCi", "")
|
||||
BUILTIN(__builtin_ia32_rsqrt28pd_mask, "V8dV8dV8dUcCi", "")
|
||||
BUILTIN(__builtin_ia32_rsqrt28ps_mask, "V16fV16fV16fUsCi", "")
|
||||
BUILTIN(__builtin_ia32_rcp14sd_mask, "V2dV2dV2dV2dUc", "")
|
||||
BUILTIN(__builtin_ia32_rcp14ss_mask, "V4fV4fV4fV4fUc", "")
|
||||
BUILTIN(__builtin_ia32_rcp14pd512_mask, "V8dV8dV8dUc", "")
|
||||
BUILTIN(__builtin_ia32_rcp14ps512_mask, "V16fV16fV16fUs", "")
|
||||
BUILTIN(__builtin_ia32_rcp28sd_mask, "V2dV2dV2dV2dUcCi", "")
|
||||
BUILTIN(__builtin_ia32_rcp28ss_mask, "V4fV4fV4fV4fUcCi", "")
|
||||
BUILTIN(__builtin_ia32_rcp28pd_mask, "V8dV8dV8dUcCi", "")
|
||||
BUILTIN(__builtin_ia32_rcp28ps_mask, "V16fV16fV16fUsCi", "")
|
||||
BUILTIN(__builtin_ia32_cvttps2dq512_mask, "V16iV16fV16iUsCi", "")
|
||||
BUILTIN(__builtin_ia32_cvttps2udq512_mask, "V16iV16fV16iUsCi", "")
|
||||
BUILTIN(__builtin_ia32_cvttpd2dq512_mask, "V8iV8dV8iUcCi", "")
|
||||
BUILTIN(__builtin_ia32_cvttpd2udq512_mask, "V8iV8dV8iUcCi", "")
|
||||
BUILTIN(__builtin_ia32_cmpps512_mask, "UsV16fV16fCiUsi", "")
|
||||
BUILTIN(__builtin_ia32_cmppd512_mask, "UcV8dV8dCiUci", "")
|
||||
BUILTIN(__builtin_ia32_rndscaleps_mask, "V16fV16fCiV16fUsCi", "")
|
||||
BUILTIN(__builtin_ia32_rndscalepd_mask, "V8dV8dCiV8dUcCi", "")
|
||||
BUILTIN(__builtin_ia32_cvtps2dq512_mask, "V16iV16fV16iUsCi", "")
|
||||
BUILTIN(__builtin_ia32_cvtpd2dq512_mask, "V8iV8dV8iUcCi", "")
|
||||
BUILTIN(__builtin_ia32_cvtps2udq512_mask, "V16iV16fV16iUsCi", "")
|
||||
BUILTIN(__builtin_ia32_cvtpd2udq512_mask, "V8iV8dV8iUcCi", "")
|
||||
BUILTIN(__builtin_ia32_minps512_mask, "V16fV16fV16fV16fUsCi", "")
|
||||
BUILTIN(__builtin_ia32_minpd512_mask, "V8dV8dV8dV8dUcCi", "")
|
||||
BUILTIN(__builtin_ia32_maxps512_mask, "V16fV16fV16fV16fUsCi", "")
|
||||
BUILTIN(__builtin_ia32_maxpd512_mask, "V8dV8dV8dV8dUcCi", "")
|
||||
BUILTIN(__builtin_ia32_cvtdq2ps512_mask, "V16fV16iV16fUsCi", "")
|
||||
BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsCi", "")
|
||||
BUILTIN(__builtin_ia32_cvtdq2pd512_mask, "V8dV8iV8dUc", "")
|
||||
BUILTIN(__builtin_ia32_cvtudq2pd512_mask, "V8dV8iV8dUc", "")
|
||||
BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcCi", "")
|
||||
BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fCiV16sUs", "")
|
||||
BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsCi", "")
|
||||
BUILTIN(__builtin_ia32_pabsd512_mask, "V16iV16iV16iUs", "")
|
||||
BUILTIN(__builtin_ia32_pabsq512_mask, "V8LLiV8LLiV8LLiUc", "")
|
||||
BUILTIN(__builtin_ia32_pmaxsd512_mask, "V16iV16iV16iV16iUs", "")
|
||||
BUILTIN(__builtin_ia32_pmaxsq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "")
|
||||
BUILTIN(__builtin_ia32_pmaxud512_mask, "V16iV16iV16iV16iUs", "")
|
||||
BUILTIN(__builtin_ia32_pmaxuq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "")
|
||||
BUILTIN(__builtin_ia32_pminsd512_mask, "V16iV16iV16iV16iUs", "")
|
||||
BUILTIN(__builtin_ia32_pminsq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "")
|
||||
BUILTIN(__builtin_ia32_pminud512_mask, "V16iV16iV16iV16iUs", "")
|
||||
BUILTIN(__builtin_ia32_pminuq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "")
|
||||
BUILTIN(__builtin_ia32_pmuldq512_mask, "V8LLiV16iV16iV8LLiUc", "")
|
||||
BUILTIN(__builtin_ia32_pmuludq512_mask, "V8LLiV16iV16iV8LLiUc", "")
|
||||
BUILTIN(__builtin_ia32_blendmd_512_mask, "V16iV16iV16iUs", "")
|
||||
BUILTIN(__builtin_ia32_blendmq_512_mask, "V8LLiV8LLiV8LLiUc", "")
|
||||
BUILTIN(__builtin_ia32_blendmps_512_mask, "V16fV16fV16fUs", "")
|
||||
BUILTIN(__builtin_ia32_blendmpd_512_mask, "V8dV8dV8dUc", "")
|
||||
BUILTIN(__builtin_ia32_ptestmd512, "UsV16iV16iUs", "")
|
||||
BUILTIN(__builtin_ia32_ptestmq512, "UcV8LLiV8LLiUc", "")
|
||||
BUILTIN(__builtin_ia32_pbroadcastd512_gpr_mask, "V16iiV16iUs", "")
|
||||
BUILTIN(__builtin_ia32_pbroadcastq512_gpr_mask, "V8LLiLLiV8LLiUc", "")
|
||||
BUILTIN(__builtin_ia32_pbroadcastq512_mem_mask, "V8LLiLLiV8LLiUc", "")
|
||||
BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16ivC*V16iUs", "")
|
||||
BUILTIN(__builtin_ia32_loaddqudi512_mask, "V8LLivC*V8LLiUc", "")
|
||||
BUILTIN(__builtin_ia32_loadups512_mask, "V16fvC*V16fUs", "")
|
||||
BUILTIN(__builtin_ia32_loadupd512_mask, "V8dvC*V8dUc", "")
|
||||
BUILTIN(__builtin_ia32_storedqudi512_mask, "vv*V8LLiUc", "")
|
||||
BUILTIN(__builtin_ia32_storedqusi512_mask, "vv*V16iUs", "")
|
||||
BUILTIN(__builtin_ia32_storeupd512_mask, "vv*V8dUc", "")
|
||||
BUILTIN(__builtin_ia32_storeups512_mask, "vv*V16fUs", "")
|
||||
BUILTIN(__builtin_ia32_vpermt2vard512_mask, "V16iV16iV16iV16iUs", "")
|
||||
BUILTIN(__builtin_ia32_vpermt2varq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "")
|
||||
BUILTIN(__builtin_ia32_vpermt2varps512_mask, "V16fV16iV16fV16fUs", "")
|
||||
BUILTIN(__builtin_ia32_vpermt2varpd512_mask, "V8dV8LLiV8dV8dUc", "")
|
||||
BUILTIN(__builtin_ia32_gathersiv8df, "V8dV8dv*V8iUcCi", "")
|
||||
BUILTIN(__builtin_ia32_gathersiv16sf, "V16fV16fv*UsCi", "")
|
||||
BUILTIN(__builtin_ia32_gatherdiv8df, "V8dV8dv*V8LLiUcCi", "")
|
||||
BUILTIN(__builtin_ia32_gatherdiv16sf, "V8fV8fv*V8LLiUcCi", "")
|
||||
BUILTIN(__builtin_ia32_gathersiv8di, "V8LLiV8LLiv*V8iUcCi", "")
|
||||
BUILTIN(__builtin_ia32_gathersiv16si, "V16iV16iv*UsCi", "")
|
||||
BUILTIN(__builtin_ia32_gatherdiv8di, "V8LLiV8LLiv*V8LLiUcCi", "")
|
||||
BUILTIN(__builtin_ia32_gatherdiv16si, "V8iV8iv*V8LLiUcCi", "")
|
||||
BUILTIN(__builtin_ia32_scattersiv8df, "vv*UcV8iV8dCi", "")
|
||||
BUILTIN(__builtin_ia32_scattersiv16sf, "vv*UsV16iV16fCi", "")
|
||||
BUILTIN(__builtin_ia32_scatterdiv8df, "vv*UcV8LLiV8dCi", "")
|
||||
BUILTIN(__builtin_ia32_scatterdiv16sf, "vv*UcV8LLiV8fCi", "")
|
||||
BUILTIN(__builtin_ia32_scattersiv8di, "vv*UcV8iV8LLiCi", "")
|
||||
BUILTIN(__builtin_ia32_scattersiv16si, "vv*UsV16iV16iCi", "")
|
||||
BUILTIN(__builtin_ia32_scatterdiv8di, "vv*UcV8LLiV8LLiCi", "")
|
||||
BUILTIN(__builtin_ia32_scatterdiv16si, "vv*UcV8LLiV8iCi", "")
|
||||
BUILTIN(__builtin_ia32_gatherpfdpd, "vUcV8iv*CiCi", "")
|
||||
BUILTIN(__builtin_ia32_gatherpfdps, "vUsV16iv*CiCi", "")
|
||||
BUILTIN(__builtin_ia32_gatherpfqpd, "vUcV8LLiv*CiCi", "")
|
||||
BUILTIN(__builtin_ia32_gatherpfqps, "vUcV8LLiv*CiCi", "")
|
||||
BUILTIN(__builtin_ia32_scatterpfdpd, "vUcV8iv*CiCi", "")
|
||||
BUILTIN(__builtin_ia32_scatterpfdps, "vUsV16iv*CiCi", "")
|
||||
BUILTIN(__builtin_ia32_scatterpfqpd, "vUcV8LLiv*CiCi", "")
|
||||
BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8LLiv*CiCi", "")
|
||||
|
||||
#undef BUILTIN
|
||||
|
|
|
@ -0,0 +1,112 @@
|
|||
/*===---- avx512fintrin.h - AVX2 intrinsics -----------------------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512ERINTRIN_H
|
||||
#define __AVX512ERINTRIN_H
|
||||
|
||||
|
||||
// rsqrt28
|
||||
static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
|
||||
_mm512_rsqrt28_round_pd (__m512d __A, int __R)
|
||||
{
|
||||
return (__m512d)__builtin_ia32_rsqrt28pd_mask ((__v8df)__A,
|
||||
(__v8df)_mm512_setzero_pd(),
|
||||
(__mmask8)-1,
|
||||
__R);
|
||||
}
|
||||
static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm512_rsqrt28_round_ps(__m512 __A, int __R)
|
||||
{
|
||||
return (__m512)__builtin_ia32_rsqrt28ps_mask ((__v16sf)__A,
|
||||
(__v16sf)_mm512_setzero_ps(),
|
||||
(__mmask16)-1,
|
||||
__R);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_rsqrt28_round_ss(__m128 __A, __m128 __B, int __R)
|
||||
{
|
||||
return (__m128) __builtin_ia32_rsqrt28ss_mask ((__v4sf) __A,
|
||||
(__v4sf) __B,
|
||||
(__v4sf)
|
||||
_mm_setzero_ps (),
|
||||
(__mmask8) -1,
|
||||
__R);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_rsqrt28sd_mask ((__v2df) __A,
|
||||
(__v2df) __B,
|
||||
(__v2df)
|
||||
_mm_setzero_pd (),
|
||||
(__mmask8) -1,
|
||||
__R);
|
||||
}
|
||||
|
||||
|
||||
// rcp28
|
||||
static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
|
||||
_mm512_rcp28_round_pd (__m512d __A, int __R)
|
||||
{
|
||||
return (__m512d)__builtin_ia32_rcp28pd_mask ((__v8df)__A,
|
||||
(__v8df)_mm512_setzero_pd(),
|
||||
(__mmask8)-1,
|
||||
__R);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm512_rcp28_round_ps (__m512 __A, int __R)
|
||||
{
|
||||
return (__m512)__builtin_ia32_rcp28ps_mask ((__v16sf)__A,
|
||||
(__v16sf)_mm512_setzero_ps (),
|
||||
(__mmask16)-1,
|
||||
__R);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
|
||||
{
|
||||
return (__m128) __builtin_ia32_rcp28ss_mask ((__v4sf) __A,
|
||||
(__v4sf) __B,
|
||||
(__v4sf)
|
||||
_mm_setzero_ps (),
|
||||
(__mmask8) -1,
|
||||
__R);
|
||||
}
|
||||
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_rcp28sd_mask ((__v2df) __A,
|
||||
(__v2df) __B,
|
||||
(__v2df)
|
||||
_mm_setzero_pd (),
|
||||
(__mmask8) -1,
|
||||
__R);
|
||||
}
|
||||
|
||||
#endif // __AVX512ERINTRIN_H
|
|
@ -0,0 +1,716 @@
|
|||
/*===---- avx512fintrin.h - AVX2 intrinsics -----------------------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512FINTRIN_H
|
||||
#define __AVX512FINTRIN_H
|
||||
|
||||
typedef double __v8df __attribute__((__vector_size__(64)));
|
||||
typedef float __v16sf __attribute__((__vector_size__(64)));
|
||||
typedef long long __v8di __attribute__((__vector_size__(64)));
|
||||
typedef int __v16si __attribute__((__vector_size__(64)));
|
||||
|
||||
typedef float __m512 __attribute__((__vector_size__(64)));
|
||||
typedef double __m512d __attribute__((__vector_size__(64)));
|
||||
typedef long long __m512i __attribute__((__vector_size__(64)));
|
||||
|
||||
typedef unsigned char __mmask8;
|
||||
typedef unsigned short __mmask16;
|
||||
|
||||
/* Rounding mode macros. */
|
||||
#define _MM_FROUND_TO_NEAREST_INT 0x00
|
||||
#define _MM_FROUND_TO_NEG_INF 0x01
|
||||
#define _MM_FROUND_TO_POS_INF 0x02
|
||||
#define _MM_FROUND_TO_ZERO 0x03
|
||||
#define _MM_FROUND_CUR_DIRECTION 0x04
|
||||
|
||||
static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_setzero_ps (void)
|
||||
{
|
||||
return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
|
||||
}
|
||||
static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_setzero_pd (void)
|
||||
{
|
||||
return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
|
||||
}
|
||||
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_setzero_si512 (void)
|
||||
{
|
||||
return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
}
|
||||
|
||||
static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
|
||||
_mm512_sqrt_pd(__m512d a)
|
||||
{
|
||||
return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)a,
|
||||
(__v8df) _mm512_setzero_pd (),
|
||||
(__mmask8) -1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm512_sqrt_ps(__m512 a)
|
||||
{
|
||||
return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)a,
|
||||
(__v16sf) _mm512_setzero_ps (),
|
||||
(__mmask16) -1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
// rsqrt14
|
||||
static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
|
||||
_mm512_rsqrt14_pd(__m512d __A)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
|
||||
(__v8df)
|
||||
_mm512_setzero_pd (),
|
||||
(__mmask8) -1);}
|
||||
|
||||
static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm512_rsqrt14_ps(__m512 __A)
|
||||
{
|
||||
return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
|
||||
(__v16sf)
|
||||
_mm512_setzero_ps (),
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_rsqrt14_ss(__m128 __A, __m128 __B)
|
||||
{
|
||||
return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
|
||||
(__v4sf) __B,
|
||||
(__v4sf)
|
||||
_mm_setzero_ps (),
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_rsqrt14_sd(__m128d __A, __m128d __B)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
|
||||
(__v2df) __B,
|
||||
(__v2df)
|
||||
_mm_setzero_pd (),
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
// rcp14
|
||||
static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
|
||||
_mm512_rcp14_pd(__m512d __A)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
|
||||
(__v8df)
|
||||
_mm512_setzero_pd (),
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm512_rcp14_ps(__m512 __A)
|
||||
{
|
||||
return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
|
||||
(__v16sf)
|
||||
_mm512_setzero_ps (),
|
||||
(__mmask16) -1);
|
||||
}
|
||||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_rcp14_ss (__m128 __A, __m128 __B)
|
||||
{
|
||||
return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
|
||||
(__v4sf) __B,
|
||||
(__v4sf)
|
||||
_mm_setzero_ps (),
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_rcp14_sd (__m128d __A, __m128d __B)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
|
||||
(__v2df) __B,
|
||||
(__v2df)
|
||||
_mm_setzero_pd (),
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
// min/max
|
||||
static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
|
||||
_mm512_max_pd(__m512d __A, __m512d __B)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
|
||||
(__v8df) __B,
|
||||
(__v8df)
|
||||
_mm512_setzero_pd (),
|
||||
(__mmask8) -1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm512_max_ps(__m512 __A, __m512 __B)
|
||||
{
|
||||
return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
|
||||
(__v16sf) __B,
|
||||
(__v16sf)
|
||||
_mm512_setzero_ps (),
|
||||
(__mmask16) -1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
|
||||
_mm512_min_pd(__m512d __A, __m512d __B)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
|
||||
(__v8df) __B,
|
||||
(__v8df)
|
||||
_mm512_setzero_pd (),
|
||||
(__mmask8) -1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm512_min_ps(__m512 __A, __m512 __B)
|
||||
{
|
||||
return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
|
||||
(__v16sf) __B,
|
||||
(__v16sf)
|
||||
_mm512_setzero_ps (),
|
||||
(__mmask16) -1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_cvtps_ph (__m512 __A, const int __I)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
|
||||
__I,
|
||||
(__v16hi)
|
||||
_mm256_setzero_si256 (),
|
||||
-1);
|
||||
}
|
||||
|
||||
static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_cvtph_ps (__m256i __A)
|
||||
{
|
||||
return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
|
||||
(__v16sf)
|
||||
_mm512_setzero_ps (),
|
||||
(__mmask16) -1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline __m512i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm512_cvttps_epi32(__m512 a)
|
||||
{
|
||||
return (__m512i)
|
||||
__builtin_ia32_cvttps2dq512_mask((__v16sf) a,
|
||||
(__v16si) _mm512_setzero_si512 (),
|
||||
(__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm512_cvttpd_epi32(__m512d a)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) a,
|
||||
(__v8si)_mm256_setzero_si256(),
|
||||
(__mmask8) -1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
|
||||
(__v8si)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask8) -1,
|
||||
__R);
|
||||
}
|
||||
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
|
||||
(__v16si)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask16) -1,
|
||||
__R);
|
||||
}
|
||||
|
||||
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
|
||||
(__v16si)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask16) -1,
|
||||
__R);
|
||||
}
|
||||
static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
|
||||
(__v8si)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask8) -1,
|
||||
__R);
|
||||
}
|
||||
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
|
||||
(__v16si)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask16) -1,
|
||||
__R);
|
||||
}
|
||||
static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
|
||||
(__v8si)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask8) -1,
|
||||
__R);
|
||||
}
|
||||
|
||||
static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_roundscale_ps (__m512 __A, const int __imm)
|
||||
{
|
||||
return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
|
||||
(__v16sf) __A, -1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_roundscale_pd (__m512d __A, const int __imm)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
|
||||
(__v8df) __A, -1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_cmp_ps_mask (__m512 a, __m512 b, const int p)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) a,
|
||||
(__v16sf) b, p, (__mmask16) -1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline __mmask8 __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
|
||||
{
|
||||
return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
|
||||
(__v8df) __Y, __P,
|
||||
(__mmask8) -1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_cvttps_epu32 (__m512 __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
|
||||
(__v16si)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask16) -1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline __m512 __attribute__ (( __always_inline__, __nodebug__))
|
||||
_mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
|
||||
(__v16sf)
|
||||
_mm512_setzero_ps (),
|
||||
(__mmask16) -1,
|
||||
__R);
|
||||
}
|
||||
|
||||
static __inline __m512 __attribute__ (( __always_inline__, __nodebug__))
|
||||
_mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
|
||||
(__v16sf)
|
||||
_mm512_setzero_ps (),
|
||||
(__mmask16) -1,
|
||||
__R);
|
||||
}
|
||||
|
||||
static __inline __m512d __attribute__ (( __always_inline__, __nodebug__))
|
||||
_mm512_cvtepi32_pd (__m256i __A)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
|
||||
(__v8df)
|
||||
_mm512_setzero_pd (),
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline __m512d __attribute__ (( __always_inline__, __nodebug__))
|
||||
_mm512_cvtepu32_pd (__m256i __A)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
|
||||
(__v8df)
|
||||
_mm512_setzero_pd (),
|
||||
(__mmask8) -1);
|
||||
}
|
||||
static __inline __m256 __attribute__ (( __always_inline__, __nodebug__))
|
||||
_mm512_cvt_roundpd_ps (__m512d __A, const int __R)
|
||||
{
|
||||
return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
|
||||
(__v8sf)
|
||||
_mm256_setzero_ps (),
|
||||
(__mmask8) -1,
|
||||
__R);
|
||||
}
|
||||
|
||||
static __inline __m512i __attribute__ (( __always_inline__, __nodebug__))
|
||||
_mm512_abs_epi64 (__m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
|
||||
(__v8di)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline __m512i __attribute__ (( __always_inline__, __nodebug__))
|
||||
_mm512_abs_epi32 (__m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
|
||||
(__v16si)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
static __inline __m512i
|
||||
__attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_max_epi32 (__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__v16si)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_max_epu32 (__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__v16si)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_max_epi64 (__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
|
||||
(__v8di) __B,
|
||||
(__v8di)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_max_epu64 (__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
|
||||
(__v8di) __B,
|
||||
(__v8di)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask8) -1);
|
||||
}
|
||||
static __inline __m512i
|
||||
__attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_min_epi32 (__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__v16si)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_min_epu32 (__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__v16si)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_min_epi64 (__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
|
||||
(__v8di) __B,
|
||||
(__v8di)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_min_epu64 (__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
|
||||
(__v8di) __B,
|
||||
(__v8di)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_mul_epi32 (__m512i __X, __m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
|
||||
(__v16si) __Y,
|
||||
(__v8di)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_mul_epu32 (__m512i __X, __m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
|
||||
(__v16si) __Y,
|
||||
(__v8di)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
|
||||
(__v8di) __W,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
|
||||
(__v16si) __W,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_test_epi32_mask (__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
static __inline __mmask8 __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_test_epi64_mask (__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
|
||||
(__v8di) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
|
||||
(__v8df) __W,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
|
||||
{
|
||||
return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
|
||||
(__v16sf) __W,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
|
||||
(__v16si)
|
||||
_mm512_setzero_si512 (),
|
||||
__M);
|
||||
}
|
||||
|
||||
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
|
||||
{
|
||||
#ifdef __x86_64__
|
||||
return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
|
||||
(__v8di)
|
||||
_mm512_setzero_si512 (),
|
||||
__M);
|
||||
#else
|
||||
return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
|
||||
(__v8di)
|
||||
_mm512_setzero_si512 (),
|
||||
__M);
|
||||
#endif
|
||||
}
|
||||
|
||||
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P,
|
||||
(__v16si)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P,
|
||||
(__v8di)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
|
||||
{
|
||||
return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P,
|
||||
(__v16sf)
|
||||
_mm512_setzero_ps (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P,
|
||||
(__v8df)
|
||||
_mm512_setzero_pd (),
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline void __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
|
||||
{
|
||||
__builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline void __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
|
||||
{
|
||||
__builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline void __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
|
||||
{
|
||||
__builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline void __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
|
||||
{
|
||||
__builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
|
||||
/* idx */ ,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
|
||||
/* idx */ ,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
|
||||
/* idx */ ,
|
||||
(__v8df) __A,
|
||||
(__v8df) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
|
||||
{
|
||||
return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
|
||||
/* idx */ ,
|
||||
(__v16sf) __A,
|
||||
(__v16sf) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_floor_ps (__m512 __A)
|
||||
{
|
||||
return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
|
||||
_MM_FROUND_FLOOR,
|
||||
(__v16sf) __A, -1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_floor_pd (__m512d __A)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
|
||||
_MM_FROUND_FLOOR,
|
||||
(__v8df) __A, -1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_ceil_ps (__m512 __A)
|
||||
{
|
||||
return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
|
||||
_MM_FROUND_CEIL,
|
||||
(__v16sf) __A, -1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
|
||||
_mm512_ceil_pd (__m512d __A)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
|
||||
_MM_FROUND_CEIL,
|
||||
(__v8df) __A, -1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#endif // __AVX512FINTRIN_H
|
|
@ -76,6 +76,14 @@
|
|||
#include <fmaintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef __AVX512F__
|
||||
#include <avx512fintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef __AVX512ER__
|
||||
#include <avx512erintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef __RDRND__
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
||||
_rdrand16_step(unsigned short *__p)
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Werror | FileCheck %s
|
||||
|
||||
// Don't include mm_malloc.h, it's system specific.
|
||||
#define __MM_MALLOC_H
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
__m512d test_mm512_sqrt_pd(__m512d a)
|
||||
{
|
||||
// CHECK: @llvm.x86.avx512.sqrt.pd.512
|
||||
return _mm512_sqrt_pd(a);
|
||||
}
|
||||
|
||||
__m512 test_mm512_sqrt_ps(__m512 a)
|
||||
{
|
||||
// CHECK: @llvm.x86.avx512.sqrt.ps.512
|
||||
return _mm512_sqrt_ps(a);
|
||||
}
|
||||
|
||||
__m512d test_mm512_rsqrt14_pd(__m512d a)
|
||||
{
|
||||
// CHECK: @llvm.x86.avx512.rsqrt14.pd.512
|
||||
return _mm512_rsqrt14_pd(a);
|
||||
}
|
||||
|
||||
__m512 test_mm512_rsqrt14_ps(__m512 a)
|
||||
{
|
||||
// CHECK: @llvm.x86.avx512.rsqrt14.ps.512
|
||||
return _mm512_rsqrt14_ps(a);
|
||||
}
|
|
@ -84,6 +84,12 @@
|
|||
#ifndef __RDRND__
|
||||
#define __RDRND__
|
||||
#endif
|
||||
#ifndef __AVX512F__
|
||||
#define __AVX512F__
|
||||
#endif
|
||||
#ifndef __AVX512ER__
|
||||
#define __AVX512ER__
|
||||
#endif
|
||||
|
||||
// Now include the metaheader that includes all x86 intrinsic headers.
|
||||
#include <x86intrin.h>
|
||||
|
|
Loading…
Reference in New Issue