forked from OSchip/llvm-project
[X86] Remove and autoupgrade masked vpermd/vpermps intrinsics.
llvm-svn: 332198
This commit is contained in:
parent
a39c409619
commit
85906cf041
|
@ -3289,15 +3289,9 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||
def int_x86_avx512_mask_permvar_qi_512 : GCCBuiltin<"__builtin_ia32_permvarqi512_mask">,
|
||||
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty,
|
||||
llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_permvar_sf_256 : // TODO: Remove this intrinsic
|
||||
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
|
||||
llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_permvar_sf_512 : GCCBuiltin<"__builtin_ia32_permvarsf512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty,
|
||||
llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_permvar_si_256 : // TODO: Remove this intrinsic
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
|
||||
llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_permvar_si_512 : GCCBuiltin<"__builtin_ia32_permvarsi512_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
|
||||
llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
|
|
|
@ -181,6 +181,8 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
|
|||
Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
|
||||
Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
|
||||
Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
|
||||
Name == "avx512.mask.permvar.sf.256" || // Added in 7.0
|
||||
Name == "avx512.mask.permvar.si.256" || // Added in 7.0
|
||||
Name == "sse2.pmulu.dq" || // Added in 7.0
|
||||
Name == "sse41.pmuldq" || // Added in 7.0
|
||||
Name == "avx2.pmulu.dq" || // Added in 7.0
|
||||
|
@ -1190,19 +1192,23 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
|
|||
else
|
||||
llvm_unreachable("Unexpected intrinsic");
|
||||
} else if (Name == "cvtdq2ps.128") {
|
||||
IID = Intrinsic::x86_sse2_cvtdq2ps;
|
||||
IID = Intrinsic::x86_sse2_cvtdq2ps;
|
||||
} else if (Name == "cvtdq2ps.256") {
|
||||
IID = Intrinsic::x86_avx_cvtdq2_ps_256;
|
||||
IID = Intrinsic::x86_avx_cvtdq2_ps_256;
|
||||
} else if (Name == "cvtpd2dq.256") {
|
||||
IID = Intrinsic::x86_avx_cvt_pd2dq_256;
|
||||
IID = Intrinsic::x86_avx_cvt_pd2dq_256;
|
||||
} else if (Name == "cvtpd2ps.256") {
|
||||
IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
|
||||
IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
|
||||
} else if (Name == "cvttpd2dq.256") {
|
||||
IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
|
||||
IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
|
||||
} else if (Name == "cvttps2dq.128") {
|
||||
IID = Intrinsic::x86_sse2_cvttps2dq;
|
||||
IID = Intrinsic::x86_sse2_cvttps2dq;
|
||||
} else if (Name == "cvttps2dq.256") {
|
||||
IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
|
||||
IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
|
||||
} else if (Name == "permvar.sf.256") {
|
||||
IID = Intrinsic::x86_avx2_permps;
|
||||
} else if (Name == "permvar.si.256") {
|
||||
IID = Intrinsic::x86_avx2_permd;
|
||||
} else
|
||||
return false;
|
||||
|
||||
|
|
|
@ -817,12 +817,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_permvar_qi_512, VPERM_2OP_MASK,
|
||||
X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_permvar_sf_256, VPERM_2OP_MASK,
|
||||
X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_permvar_sf_512, VPERM_2OP_MASK,
|
||||
X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_permvar_si_256, VPERM_2OP_MASK,
|
||||
X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_permvar_si_512, VPERM_2OP_MASK,
|
||||
X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_db_128, INTR_TYPE_1OP_MASK,
|
||||
|
|
|
@ -2796,9 +2796,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
|||
case Intrinsic::x86_avx512_mask_permvar_qi_128:
|
||||
case Intrinsic::x86_avx512_mask_permvar_qi_256:
|
||||
case Intrinsic::x86_avx512_mask_permvar_qi_512:
|
||||
case Intrinsic::x86_avx512_mask_permvar_sf_256:
|
||||
case Intrinsic::x86_avx512_mask_permvar_sf_512:
|
||||
case Intrinsic::x86_avx512_mask_permvar_si_256:
|
||||
case Intrinsic::x86_avx512_mask_permvar_si_512:
|
||||
if (Value *V = simplifyX86vpermv(*II, Builder)) {
|
||||
// We simplified the permuting, now create a select for the masking.
|
||||
|
|
|
@ -6737,3 +6737,43 @@ define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_256(<8 x float> %x0, <8 x i
|
|||
%res2 = add <8 x i32> %res, %res1
|
||||
ret <8 x i32> %res2
|
||||
}
|
||||
|
||||
declare <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float>, <8 x i32>, <8 x float>, i8)
|
||||
|
||||
define <8 x float>@test_int_x86_avx512_mask_permvar_sf_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_sf_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xd8]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x16,0xd0]
|
||||
; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x16,0xc0]
|
||||
; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
|
||||
; CHECK-NEXT: vaddps %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
|
||||
%res1 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3)
|
||||
%res2 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
|
||||
%res3 = fadd <8 x float> %res, %res1
|
||||
%res4 = fadd <8 x float> %res3, %res2
|
||||
ret <8 x float> %res4
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
|
||||
|
||||
define <8 x i32>@test_int_x86_avx512_mask_permvar_si_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_si_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x36,0xd8]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x36,0xd0]
|
||||
; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x36,0xc0]
|
||||
; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
|
||||
; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
|
||||
%res1 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
|
||||
%res2 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
|
||||
%res3 = add <8 x i32> %res, %res1
|
||||
%res4 = add <8 x i32> %res3, %res2
|
||||
ret <8 x i32> %res4
|
||||
}
|
||||
|
|
|
@ -3075,46 +3075,6 @@ define <4 x i64>@test_int_x86_avx512_mask_permvar_di_256(<4 x i64> %x0, <4 x i64
|
|||
ret <4 x i64> %res4
|
||||
}
|
||||
|
||||
declare <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float>, <8 x i32>, <8 x float>, i8)
|
||||
|
||||
define <8 x float>@test_int_x86_avx512_mask_permvar_sf_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_sf_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xd8]
|
||||
; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x16,0xd0]
|
||||
; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x16,0xc0]
|
||||
; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
|
||||
; CHECK-NEXT: vaddps %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
|
||||
%res1 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3)
|
||||
%res2 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
|
||||
%res3 = fadd <8 x float> %res, %res1
|
||||
%res4 = fadd <8 x float> %res3, %res2
|
||||
ret <8 x float> %res4
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
|
||||
|
||||
define <8 x i32>@test_int_x86_avx512_mask_permvar_si_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_si_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x36,0xd8]
|
||||
; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x36,0xd0]
|
||||
; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x36,0xc0]
|
||||
; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
|
||||
; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
|
||||
%res1 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
|
||||
%res2 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
|
||||
%res3 = add <8 x i32> %res, %res1
|
||||
%res4 = add <8 x i32> %res3, %res2
|
||||
ret <8 x i32> %res4
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double>, <2 x double>, <2 x i64>, i32, i8)
|
||||
|
||||
define <2 x double>@test_int_x86_avx512_mask_fixupimm_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) {
|
||||
|
|
Loading…
Reference in New Issue