forked from OSchip/llvm-project
Expand vector oparation to as IR constants, PR28129.
llvm-svn: 305551
This commit is contained in:
parent
63cfb6872b
commit
7b22425dff
|
@ -7923,6 +7923,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
|
|||
}
|
||||
|
||||
// We can't handle 8-31 immediates with native IR, use the intrinsic.
|
||||
// Except for predicates that create constants.
|
||||
Intrinsic::ID ID;
|
||||
switch (BuiltinID) {
|
||||
default: llvm_unreachable("Unsupported intrinsic!");
|
||||
|
@ -7930,12 +7931,32 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
|
|||
ID = Intrinsic::x86_sse_cmp_ps;
|
||||
break;
|
||||
case X86::BI__builtin_ia32_cmpps256:
|
||||
// _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
|
||||
// on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
|
||||
if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
|
||||
Value *Constant = (CC == 0xf || CC == 0x1f) ?
|
||||
llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) :
|
||||
llvm::Constant::getNullValue(Builder.getInt32Ty());
|
||||
Value *Vec = Builder.CreateVectorSplat(
|
||||
Ops[0]->getType()->getVectorNumElements(), Constant);
|
||||
return Builder.CreateBitCast(Vec, Ops[0]->getType());
|
||||
}
|
||||
ID = Intrinsic::x86_avx_cmp_ps_256;
|
||||
break;
|
||||
case X86::BI__builtin_ia32_cmppd:
|
||||
ID = Intrinsic::x86_sse2_cmp_pd;
|
||||
break;
|
||||
case X86::BI__builtin_ia32_cmppd256:
|
||||
// _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
|
||||
// on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
|
||||
if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
|
||||
Value *Constant = (CC == 0xf || CC == 0x1f) ?
|
||||
llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) :
|
||||
llvm::Constant::getNullValue(Builder.getInt64Ty());
|
||||
Value *Vec = Builder.CreateVectorSplat(
|
||||
Ops[0]->getType()->getVectorNumElements(), Constant);
|
||||
return Builder.CreateBitCast(Vec, Ops[0]->getType());
|
||||
}
|
||||
ID = Intrinsic::x86_avx_cmp_pd_256;
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -1427,3 +1427,51 @@ float test_mm256_cvtss_f32(__m256 __a)
|
|||
// CHECK: extractelement <8 x float> %{{.*}}, i32 0
|
||||
return _mm256_cvtss_f32(__a);
|
||||
}
|
||||
|
||||
__m256 test_mm256_cmp_ps_true(__m256 a, __m256 b) {
|
||||
// CHECK-LABEL: @test_mm256_cmp_ps_true
|
||||
// CHECK: store <8 x float> <float 0xFFFFFFFFE0000000,
|
||||
return _mm256_cmp_ps(a, b, _CMP_TRUE_UQ);
|
||||
}
|
||||
|
||||
__m256 test_mm256_cmp_pd_true(__m256 a, __m256 b) {
|
||||
// CHECK-LABEL: @test_mm256_cmp_pd_true
|
||||
// CHECK: store <4 x double> <double 0xFFFFFFFFFFFFFFFF,
|
||||
return _mm256_cmp_pd(a, b, _CMP_TRUE_UQ);
|
||||
}
|
||||
|
||||
__m256 test_mm256_cmp_ps_false(__m256 a, __m256 b) {
|
||||
// CHECK-LABEL: @test_mm256_cmp_ps_false
|
||||
// CHECK: store <8 x float> zeroinitializer, <8 x float>* %tmp, align 32
|
||||
return _mm256_cmp_ps(a, b, _CMP_FALSE_OQ);
|
||||
}
|
||||
|
||||
__m256 test_mm256_cmp_pd_false(__m256 a, __m256 b) {
|
||||
// CHECK-LABEL: @test_mm256_cmp_pd_false
|
||||
// CHECK: store <4 x double> zeroinitializer, <4 x double>* %tmp, align 32
|
||||
return _mm256_cmp_pd(a, b, _CMP_FALSE_OQ);
|
||||
}
|
||||
|
||||
__m256 test_mm256_cmp_ps_strue(__m256 a, __m256 b) {
|
||||
// CHECK-LABEL: @test_mm256_cmp_ps_strue
|
||||
// CHECK: store <8 x float> <float 0xFFFFFFFFE0000000,
|
||||
return _mm256_cmp_ps(a, b, _CMP_TRUE_US);
|
||||
}
|
||||
|
||||
__m256 test_mm256_cmp_pd_strue(__m256 a, __m256 b) {
|
||||
// CHECK-LABEL: @test_mm256_cmp_pd_strue
|
||||
// CHECK: store <4 x double> <double 0xFFFFFFFFFFFFFFFF,
|
||||
return _mm256_cmp_pd(a, b, _CMP_TRUE_US);
|
||||
}
|
||||
|
||||
__m256 test_mm256_cmp_ps_sfalse(__m256 a, __m256 b) {
|
||||
// CHECK-LABEL: @test_mm256_cmp_ps_sfalse
|
||||
// CHECK: store <8 x float> zeroinitializer, <8 x float>* %tmp, align 32
|
||||
return _mm256_cmp_ps(a, b, _CMP_FALSE_OS);
|
||||
}
|
||||
|
||||
__m256 test_mm256_cmp_pd_sfalse(__m256 a, __m256 b) {
|
||||
// CHECK-LABEL: @test_mm256_cmp_pd_sfalse
|
||||
// CHECK: store <4 x double> zeroinitializer, <4 x double>* %tmp, align 32
|
||||
return _mm256_cmp_pd(a, b, _CMP_FALSE_OS);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue