From abe7c5b5e949649b49fa89fff78864ff6a489e0a Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 10 Dec 2016 21:15:52 +0000 Subject: [PATCH] [AVX-512] Remove 128/256 masked vpermil instrinsics and autoupgrade to a select around the unmasked avx1 intrinsics. llvm-svn: 289340 --- llvm/include/llvm/IR/IntrinsicsX86.td | 24 ------ llvm/lib/IR/AutoUpgrade.cpp | 22 +++++ llvm/lib/Target/X86/X86IntrinsicsInfo.h | 8 -- .../X86/avx512vl-intrinsics-upgrade.ll | 80 +++++++++++++++++++ llvm/test/CodeGen/X86/avx512vl-intrinsics.ll | 80 ------------------- 5 files changed, 102 insertions(+), 112 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 565ecc623779..fa6ae58d47bc 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -1316,42 +1316,18 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; - def int_x86_avx512_mask_vpermilvar_pd_256 : - GCCBuiltin<"__builtin_ia32_vpermilvarpd256_mask">, - Intrinsic<[llvm_v4f64_ty], - [llvm_v4f64_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_vpermilvar_pd_512 : GCCBuiltin<"__builtin_ia32_vpermilvarpd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_vpermilvar_pd_128 : - GCCBuiltin<"__builtin_ia32_vpermilvarpd_mask">, - Intrinsic<[llvm_v2f64_ty], - [llvm_v2f64_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_vpermilvar_ps_256 : - GCCBuiltin<"__builtin_ia32_vpermilvarps256_mask">, - Intrinsic<[llvm_v8f32_ty], - [llvm_v8f32_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_vpermilvar_ps_512 : GCCBuiltin<"__builtin_ia32_vpermilvarps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_mask_vpermilvar_ps_128 : - GCCBuiltin<"__builtin_ia32_vpermilvarps_mask">, - Intrinsic<[llvm_v4f32_ty], - [llvm_v4f32_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pshuf_b_512 : GCCBuiltin<"__builtin_ia32_pshufb512_mask">, Intrinsic<[llvm_v64i8_ty], diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 86dc59f13b4f..fe7ba8b16077 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -313,6 +313,10 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name == "avx512.mask.sub.pd.256" || // Added in 4.0 Name == "avx512.mask.sub.ps.128" || // Added in 4.0 Name == "avx512.mask.sub.ps.256" || // Added in 4.0 + Name == "avx512.mask.vpermilvar.ps.128" || // Added in 4.0 + Name == "avx512.mask.vpermilvar.ps.256" || // Added in 4.0 + Name == "avx512.mask.vpermilvar.pd.128" || // Added in 4.0 + Name == "avx512.mask.vpermilvar.pd.256" || // Added in 4.0 Name.startswith("avx512.mask.psll.d") || // Added in 4.0 Name.startswith("avx512.mask.psll.q") || // Added in 4.0 Name.startswith("avx512.mask.psll.w") || // Added in 4.0 @@ -1658,6 +1662,24 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Rep = UpgradeX86MaskedShift(Builder, *CI, IID); } else if (IsX86 && Name.startswith("avx512.mask.move.s")) { Rep = upgradeMaskedMove(Builder, *CI); + } else if (IsX86 && Name.startswith("avx512.mask.vpermilvar.")) { + Intrinsic::ID IID; + if (Name.endswith("ps.128")) + IID = Intrinsic::x86_avx_vpermilvar_ps; + else if (Name.endswith("pd.128")) + IID = Intrinsic::x86_avx_vpermilvar_pd; + else if (Name.endswith("ps.256")) + IID = Intrinsic::x86_avx_vpermilvar_ps_256; + else if (Name.endswith("pd.256")) + IID = Intrinsic::x86_avx_vpermilvar_pd_256; + else + llvm_unreachable("Unexpected vpermilvar intrinsic"); + + Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID); + Rep = Builder.CreateCall(Intrin, + { CI->getArgOperand(0), CI->getArgOperand(1) }); + Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, + CI->getArgOperand(2)); } else { llvm_unreachable("Unknown function for CallInst upgrade."); } diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 002e2e1ef346..24235f45c946 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -1257,16 +1257,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::VPERMIV3, 0), X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_512, VPERM_3OP_MASK, X86ISD::VPERMIV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermilvar_pd_128, INTR_TYPE_2OP_MASK, - X86ISD::VPERMILPV, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermilvar_pd_256, INTR_TYPE_2OP_MASK, - X86ISD::VPERMILPV, 0), X86_INTRINSIC_DATA(avx512_mask_vpermilvar_pd_512, INTR_TYPE_2OP_MASK, X86ISD::VPERMILPV, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermilvar_ps_128, INTR_TYPE_2OP_MASK, - X86ISD::VPERMILPV, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermilvar_ps_256, INTR_TYPE_2OP_MASK, - X86ISD::VPERMILPV, 0), X86_INTRINSIC_DATA(avx512_mask_vpermilvar_ps_512, INTR_TYPE_2OP_MASK, X86ISD::VPERMILPV, 0), X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK, diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll index b3d61b1d1bdf..c3d15f33f187 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -4693,3 +4693,83 @@ define <4 x i64>@test_int_x86_avx512_mask_valign_q_256(<4 x i64> %x0, <4 x i64> ret <4 x i64> %res2 } +declare <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8) + +define <4 x double>@test_int_x86_avx512_mask_vpermilvar_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x0d,0xd9] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x0d,0xd1] +; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x0d,0xc1] +; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0] +; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) + %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3) + %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1) + %res3 = fadd <4 x double> %res, %res1 + %res4 = fadd <4 x double> %res2, %res3 + ret <4 x double> %res4 +} + +declare <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8) + +define <2 x double>@test_int_x86_avx512_mask_vpermilvar_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_128: +; CHECK: ## BB#0: +; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x0d,0xd9] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x0d,0xd1] +; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x0d,0xc1] +; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc0] +; CHECK-NEXT: vaddpd %xmm3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x58,0xc3] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) + %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> zeroinitializer, i8 %x3) + %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1) + %res3 = fadd <2 x double> %res, %res1 + %res4 = fadd <2 x double> %res3, %res2 + ret <2 x double> %res4 +} + +declare <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8) + +define <8 x float>@test_int_x86_avx512_mask_vpermilvar_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0x7d,0x28,0x0c,0xd9] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x0c,0xd1] +; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x0c,0xc1] +; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc0] +; CHECK-NEXT: vaddps %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc3] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) + %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3) + %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1) + %res3 = fadd <8 x float> %res, %res1 + %res4 = fadd <8 x float> %res3, %res2 + ret <8 x float> %res4 +} + +declare <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8) + +define <4 x float>@test_int_x86_avx512_mask_vpermilvar_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_128: +; CHECK: ## BB#0: +; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x0c,0xd9] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x0c,0xd1] +; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x0c,0xc1] +; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc0] +; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) + %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> zeroinitializer, i8 %x3) + %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1) + %res3 = fadd <4 x float> %res, %res1 + %res4 = fadd <4 x float> %res2, %res3 + ret <4 x float> %res4 +} + diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll index 76217aebd12e..a99c8641d9c1 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -3709,86 +3709,6 @@ define <8 x float>@test_int_x86_avx512_mask_getmant_ps_256(<8 x float> %x0, <8 x ret <8 x float> %res2 } -declare <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8) - -define <4 x double>@test_int_x86_avx512_mask_vpermilvar_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) { -; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x0d,0xd1] -; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x0d,0xd9] -; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x0d,0xc1] -; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xcb] -; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc1] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) - %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3) - %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1) - %res3 = fadd <4 x double> %res, %res1 - %res4 = fadd <4 x double> %res2, %res3 - ret <4 x double> %res4 -} - -declare <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8) - -define <2 x double>@test_int_x86_avx512_mask_vpermilvar_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) { -; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x0d,0xd1] -; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x0d,0xd9] -; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x0d,0xc1] -; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xcb] -; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) - %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> zeroinitializer, i8 %x3) - %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1) - %res3 = fadd <2 x double> %res, %res1 - %res4 = fadd <2 x double> %res3, %res2 - ret <2 x double> %res4 -} - -declare <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8) - -define <8 x float>@test_int_x86_avx512_mask_vpermilvar_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) { -; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x0c,0xd1] -; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x0c,0xd9] -; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x0c,0xc1] -; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xcb] -; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) - %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3) - %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1) - %res3 = fadd <8 x float> %res, %res1 - %res4 = fadd <8 x float> %res3, %res2 - ret <8 x float> %res4 -} - -declare <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8) - -define <4 x float>@test_int_x86_avx512_mask_vpermilvar_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) { -; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x0c,0xd1] -; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x0c,0xd9] -; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x0c,0xc1] -; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xcb] -; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) - %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> zeroinitializer, i8 %x3) - %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1) - %res3 = fadd <4 x float> %res, %res1 - %res4 = fadd <4 x float> %res2, %res3 - ret <4 x float> %res4 -} - declare <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float>, <4 x float>, i32, <8 x float>, i8) define <8 x float>@test_int_x86_avx512_mask_insertf32x4_256(<8 x float> %x0, <4 x float> %x1, <8 x float> %x3, i8 %x4) {