forked from OSchip/llvm-project
[X86] Add back some masked vector truncate builtins. Custom IRgen a a few others.
I'd like to make the select builtins require an avx512f, avx512bw, or avx512vl fature to match what is normally required to get masking. Truncate is special in that there are instructions with a 128/256-bit masked result even without avx512vl. By using special buitlins we can emit a select without using the 128/256-bit select builtins. llvm-svn: 334331
This commit is contained in:
parent
5f50f33806
commit
88097d9355
|
@ -1298,6 +1298,7 @@ TARGET_BUILTIN(__builtin_ia32_vpshrdw512_mask, "V32sV32sV32sIiV32sUi", "nc", "av
|
|||
|
||||
TARGET_BUILTIN(__builtin_ia32_pmovswb512_mask, "V32cV32sV32cUi", "nc", "avx512bw")
|
||||
TARGET_BUILTIN(__builtin_ia32_pmovuswb512_mask, "V32cV32sV32cUi", "nc", "avx512bw")
|
||||
TARGET_BUILTIN(__builtin_ia32_pmovwb512_mask, "V32cV32sV32cUi", "nc", "avx512bw")
|
||||
TARGET_BUILTIN(__builtin_ia32_cvtpd2qq128_mask, "V2LLiV2dV2LLiUc", "nc", "avx512vl,avx512dq")
|
||||
TARGET_BUILTIN(__builtin_ia32_cvtpd2qq256_mask, "V4LLiV4dV4LLiUc", "nc", "avx512vl,avx512dq")
|
||||
TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq128_mask, "V2LLiV2dV2LLiUc", "nc", "avx512vl,avx512dq")
|
||||
|
@ -1648,6 +1649,7 @@ TARGET_BUILTIN(__builtin_ia32_pmovdw512_mask, "V16sV16iV16sUs", "nc", "avx512f")
|
|||
TARGET_BUILTIN(__builtin_ia32_pmovdw512mem_mask, "vV16s*V16iUs", "n", "avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_pmovqb512_mask, "V16cV8LLiV16cUc", "nc", "avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_pmovqb512mem_mask, "vV16c*V8LLiUc", "n", "avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_pmovqd512_mask, "V8iV8LLiV8iUc", "nc", "avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_pmovqd512mem_mask, "vV8i*V8LLiUc", "n", "avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_pmovqw512_mask, "V8sV8LLiV8sUc", "nc", "avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_pmovqw512mem_mask, "vV8s*V8LLiUc", "n", "avx512f")
|
||||
|
|
|
@ -9309,6 +9309,35 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
|
|||
makeArrayRef(Indices, DstNumElts),
|
||||
"insert");
|
||||
}
|
||||
case X86::BI__builtin_ia32_pmovqd512_mask:
|
||||
case X86::BI__builtin_ia32_pmovwb512_mask: {
|
||||
Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
|
||||
return EmitX86Select(*this, Ops[2], Res, Ops[1]);
|
||||
}
|
||||
case X86::BI__builtin_ia32_pmovdb512_mask:
|
||||
case X86::BI__builtin_ia32_pmovdw512_mask:
|
||||
case X86::BI__builtin_ia32_pmovqw512_mask: {
|
||||
if (const auto *C = dyn_cast<Constant>(Ops[2]))
|
||||
if (C->isAllOnesValue())
|
||||
return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
|
||||
|
||||
Intrinsic::ID IID;
|
||||
switch (BuiltinID) {
|
||||
default: llvm_unreachable("Unsupported intrinsic!");
|
||||
case X86::BI__builtin_ia32_pmovdb512_mask:
|
||||
IID = Intrinsic::x86_avx512_mask_pmov_db_512;
|
||||
break;
|
||||
case X86::BI__builtin_ia32_pmovdw512_mask:
|
||||
IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
|
||||
break;
|
||||
case X86::BI__builtin_ia32_pmovqw512_mask:
|
||||
IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
|
||||
break;
|
||||
}
|
||||
|
||||
Function *Intr = CGM.getIntrinsic(IID);
|
||||
return Builder.CreateCall(Intr, Ops);
|
||||
}
|
||||
case X86::BI__builtin_ia32_pblendw128:
|
||||
case X86::BI__builtin_ia32_blendpd:
|
||||
case X86::BI__builtin_ia32_blendps:
|
||||
|
|
|
@ -1080,21 +1080,23 @@ _mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A) {
|
|||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm512_cvtepi16_epi8 (__m512i __A) {
|
||||
return (__m256i)__builtin_convertvector((__v32hi)__A, __v32qi);
|
||||
return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
|
||||
(__v32qi) _mm256_undefined_si256(),
|
||||
(__mmask32) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
|
||||
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
|
||||
(__v32qi)_mm512_cvtepi16_epi8(__A),
|
||||
(__v32qi)__O);
|
||||
return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
|
||||
(__v32qi) __O,
|
||||
__M);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A) {
|
||||
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
|
||||
(__v32qi)_mm512_cvtepi16_epi8(__A),
|
||||
(__v32qi)_mm256_setzero_si256());
|
||||
return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
|
||||
(__v32qi) _mm256_setzero_si256(),
|
||||
__M);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
|
|
|
@ -7402,7 +7402,9 @@ _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
|
|||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm512_cvtepi32_epi8 (__m512i __A)
|
||||
{
|
||||
return (__m128i)__builtin_convertvector((__v16si)__A, __v16qi);
|
||||
return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
|
||||
(__v16qi) _mm_undefined_si128 (),
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
|
@ -7429,7 +7431,9 @@ _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
|
|||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm512_cvtepi32_epi16 (__m512i __A)
|
||||
{
|
||||
return (__m256i)__builtin_convertvector((__v16si)__A, __v16hi);
|
||||
return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
|
||||
(__v16hi) _mm256_undefined_si256 (),
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
|
@ -7485,23 +7489,24 @@ _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
|
|||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm512_cvtepi64_epi32 (__m512i __A)
|
||||
{
|
||||
return (__m256i)__builtin_convertvector((__v8di) __A, __v8si);
|
||||
return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
|
||||
(__v8si) _mm256_undefined_si256 (),
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
|
||||
(__v8si)_mm512_cvtepi64_epi32(__A),
|
||||
(__v8si)__O);
|
||||
return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
|
||||
(__v8si) __O, __M);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
|
||||
(__v8si)_mm512_cvtepi64_epi32(__A),
|
||||
(__v8si)_mm256_setzero_si256());
|
||||
return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
|
||||
(__v8si) _mm256_setzero_si256 (),
|
||||
__M);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
|
@ -7513,7 +7518,9 @@ _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
|
|||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm512_cvtepi64_epi16 (__m512i __A)
|
||||
{
|
||||
return (__m128i)__builtin_convertvector((__v8di)__A, __v8hi);
|
||||
return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
|
||||
(__v8hi) _mm_undefined_si128 (),
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
|
|
Loading…
Reference in New Issue