[X86] Remove and autoupgrade masked vpermd/vpermps intrinsics.

llvm-svn: 332198
This commit is contained in:
Craig Topper 2018-05-13 18:03:59 +00:00
parent a39c409619
commit 85906cf041
6 changed files with 53 additions and 59 deletions

View File

@ -3289,15 +3289,9 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_mask_permvar_qi_512 : GCCBuiltin<"__builtin_ia32_permvarqi512_mask">,
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty,
llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
def int_x86_avx512_mask_permvar_sf_256 : // TODO: Remove this intrinsic
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_permvar_sf_512 : GCCBuiltin<"__builtin_ia32_permvarsf512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty,
llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_permvar_si_256 : // TODO: Remove this intrinsic
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_permvar_si_512 : GCCBuiltin<"__builtin_ia32_permvarsi512_mask">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;

View File

@ -181,6 +181,8 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
Name == "avx512.mask.permvar.sf.256" || // Added in 7.0
Name == "avx512.mask.permvar.si.256" || // Added in 7.0
Name == "sse2.pmulu.dq" || // Added in 7.0
Name == "sse41.pmuldq" || // Added in 7.0
Name == "avx2.pmulu.dq" || // Added in 7.0
@ -1203,6 +1205,10 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
IID = Intrinsic::x86_sse2_cvttps2dq;
} else if (Name == "cvttps2dq.256") {
IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
} else if (Name == "permvar.sf.256") {
IID = Intrinsic::x86_avx2_permps;
} else if (Name == "permvar.si.256") {
IID = Intrinsic::x86_avx2_permd;
} else
return false;

View File

@ -817,12 +817,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx512_mask_permvar_qi_512, VPERM_2OP_MASK,
X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx512_mask_permvar_sf_256, VPERM_2OP_MASK,
X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx512_mask_permvar_sf_512, VPERM_2OP_MASK,
X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx512_mask_permvar_si_256, VPERM_2OP_MASK,
X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx512_mask_permvar_si_512, VPERM_2OP_MASK,
X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_db_128, INTR_TYPE_1OP_MASK,

View File

@ -2796,9 +2796,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_avx512_mask_permvar_qi_128:
case Intrinsic::x86_avx512_mask_permvar_qi_256:
case Intrinsic::x86_avx512_mask_permvar_qi_512:
case Intrinsic::x86_avx512_mask_permvar_sf_256:
case Intrinsic::x86_avx512_mask_permvar_sf_512:
case Intrinsic::x86_avx512_mask_permvar_si_256:
case Intrinsic::x86_avx512_mask_permvar_si_512:
if (Value *V = simplifyX86vpermv(*II, Builder)) {
// We simplified the permuting, now create a select for the masking.

View File

@ -6737,3 +6737,43 @@ define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_256(<8 x float> %x0, <8 x i
%res2 = add <8 x i32> %res, %res1
ret <8 x i32> %res2
}
declare <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float>, <8 x i32>, <8 x float>, i8)
define <8 x float>@test_int_x86_avx512_mask_permvar_sf_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_sf_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xd8]
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x16,0xd0]
; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x16,0xc0]
; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
; CHECK-NEXT: vaddps %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
%res1 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3)
%res2 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
%res3 = fadd <8 x float> %res, %res1
%res4 = fadd <8 x float> %res3, %res2
ret <8 x float> %res4
}
declare <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
define <8 x i32>@test_int_x86_avx512_mask_permvar_si_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_si_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x36,0xd8]
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x36,0xd0]
; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x36,0xc0]
; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
%res1 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
%res2 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
%res3 = add <8 x i32> %res, %res1
%res4 = add <8 x i32> %res3, %res2
ret <8 x i32> %res4
}

View File

@ -3075,46 +3075,6 @@ define <4 x i64>@test_int_x86_avx512_mask_permvar_di_256(<4 x i64> %x0, <4 x i64
ret <4 x i64> %res4
}
declare <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float>, <8 x i32>, <8 x float>, i8)
define <8 x float>@test_int_x86_avx512_mask_permvar_sf_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_sf_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xd8]
; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x16,0xd0]
; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x16,0xc0]
; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
; CHECK-NEXT: vaddps %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
%res1 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3)
%res2 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
%res3 = fadd <8 x float> %res, %res1
%res4 = fadd <8 x float> %res3, %res2
ret <8 x float> %res4
}
declare <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
define <8 x i32>@test_int_x86_avx512_mask_permvar_si_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_si_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x36,0xd8]
; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x36,0xd0]
; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x36,0xc0]
; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
%res1 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
%res2 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
%res3 = add <8 x i32> %res, %res1
%res4 = add <8 x i32> %res3, %res2
ret <8 x i32> %res4
}
declare <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double>, <2 x double>, <2 x i64>, i32, i8)
define <2 x double>@test_int_x86_avx512_mask_fixupimm_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) {