From f7bf6de0afc88a919742f8131210526fef3fb19a Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 8 Jul 2016 06:14:47 +0000 Subject: [PATCH] [AVX512] Remove and autoupgrade a duplicate set of 512-bit masked shift intrinsics. I'm not sure if clang ever used these builtin names or not. llvm-svn: 274827 --- llvm/include/llvm/IR/IntrinsicsX86.td | 19 -- llvm/lib/IR/AutoUpgrade.cpp | 25 ++- llvm/lib/Target/X86/X86ISelLowering.cpp | 7 - llvm/lib/Target/X86/X86IntrinsicsInfo.h | 8 +- .../CodeGen/X86/avx512-intrinsics-upgrade.ll | 192 ++++++++++++++++++ llvm/test/CodeGen/X86/avx512-intrinsics.ll | 192 ------------------ 6 files changed, 217 insertions(+), 226 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 9bd7e8425892..c7c17b4ae88b 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -2040,25 +2040,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi512">, - Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, - llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_mask_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi512">, - Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, - llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi512">, - Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, - llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_mask_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi512">, - Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, - llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_psrai_d : GCCBuiltin<"__builtin_ia32_psradi512">, - Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, - llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_mask_psrai_q : GCCBuiltin<"__builtin_ia32_psraqi512">, - Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, - llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_psrl_w_128 : GCCBuiltin<"__builtin_ia32_psrlw128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 468644314e3f..7e52bdb36a99 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -342,6 +342,23 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Intrinsic::x86_xop_vfrcz_sd); return true; } + if (IsX86 && (Name.startswith("avx512.mask.pslli.") || + Name.startswith("avx512.mask.psrai.") || + Name.startswith("avx512.mask.psrli."))) { + F->setName("llvm.x86." + Name + ".old"); + Intrinsic::ID ShiftID; + if (Name.slice(12, 16) == "psll") + ShiftID = Name[18] == 'd' ? Intrinsic::x86_avx512_mask_psll_di_512 + : Intrinsic::x86_avx512_mask_psll_qi_512; + else if (Name.slice(12, 16) == "psra") + ShiftID = Name[18] == 'd' ? Intrinsic::x86_avx512_mask_psra_di_512 + : Intrinsic::x86_avx512_mask_psra_qi_512; + else + ShiftID = Name[18] == 'd' ? Intrinsic::x86_avx512_mask_psrl_di_512 + : Intrinsic::x86_avx512_mask_psrl_qi_512; + NewFn = Intrinsic::getDeclaration(F->getParent(), ShiftID); + return true; + } // Fix the FMA4 intrinsics to remove the 4 if (IsX86 && Name.startswith("fma4.")) { F->setName("llvm.x86.fma" + Name.substr(5)); @@ -353,7 +370,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { auto Params = F->getFunctionType()->params(); auto Idx = Params[2]; if (Idx->getScalarType()->isFloatingPointTy()) { - F->setName(Name + ".old"); + F->setName("llvm.x86." + Name + ".old"); unsigned IdxSize = Idx->getPrimitiveSizeInBits(); unsigned EltSize = Idx->getScalarSizeInBits(); Intrinsic::ID Permil2ID; @@ -1179,6 +1196,12 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { default: llvm_unreachable("Unknown function for CallInst upgrade."); + case Intrinsic::x86_avx512_mask_psll_di_512: + case Intrinsic::x86_avx512_mask_psra_di_512: + case Intrinsic::x86_avx512_mask_psrl_di_512: + case Intrinsic::x86_avx512_mask_psll_qi_512: + case Intrinsic::x86_avx512_mask_psra_qi_512: + case Intrinsic::x86_avx512_mask_psrl_qi_512: case Intrinsic::arm_neon_vld1: case Intrinsic::arm_neon_vld2: case Intrinsic::arm_neon_vld3: diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a98d69544457..162c6b157404 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -17796,13 +17796,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget case VSHIFT: return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(), Op.getOperand(1), Op.getOperand(2), DAG); - case VSHIFT_MASK: - return getVectorMaskingNode(getTargetVShiftNode(IntrData->Opc0, dl, - Op.getSimpleValueType(), - Op.getOperand(1), - Op.getOperand(2), DAG), - Op.getOperand(4), Op.getOperand(3), Subtarget, - DAG); case COMPRESS_EXPAND_IN_REG: { SDValue Mask = Op.getOperand(3); SDValue DataToCompress = Op.getOperand(1); diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 2197896dbbac..bfd208969249 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -23,7 +23,7 @@ enum IntrinsicType : uint16_t { INTR_NO_TYPE, GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, FPCLASS, FPCLASSS, INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_2OP_IMM8, INTR_TYPE_3OP, INTR_TYPE_4OP, - CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, VSHIFT_MASK, COMI, COMI_RM, + CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM, INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, INTR_TYPE_2OP_IMM8_MASK, INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, INTR_TYPE_3OP_IMM8_MASK, @@ -1278,8 +1278,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_psll_wi_128, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx512_mask_psll_wi_256, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx512_mask_psll_wi_512, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSHLI, 0), - X86_INTRINSIC_DATA(avx512_mask_pslli_d, VSHIFT_MASK, X86ISD::VSHLI, 0), - X86_INTRINSIC_DATA(avx512_mask_pslli_q, VSHIFT_MASK, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx512_mask_psllv_d, INTR_TYPE_2OP_MASK, ISD::SHL, 0), X86_INTRINSIC_DATA(avx512_mask_psllv_q, INTR_TYPE_2OP_MASK, ISD::SHL, 0), X86_INTRINSIC_DATA(avx512_mask_psllv16_hi, INTR_TYPE_2OP_MASK, ISD::SHL, 0), @@ -1307,8 +1305,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_psra_wi_128, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0), X86_INTRINSIC_DATA(avx512_mask_psra_wi_256, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0), X86_INTRINSIC_DATA(avx512_mask_psra_wi_512, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0), - X86_INTRINSIC_DATA(avx512_mask_psrai_d, VSHIFT_MASK, X86ISD::VSRAI, 0), - X86_INTRINSIC_DATA(avx512_mask_psrai_q, VSHIFT_MASK, X86ISD::VSRAI, 0), X86_INTRINSIC_DATA(avx512_mask_psrav_d, INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0), X86_INTRINSIC_DATA(avx512_mask_psrav_q, INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0), X86_INTRINSIC_DATA(avx512_mask_psrav_q_128, INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0), @@ -1336,8 +1332,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_psrl_wi_128, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx512_mask_psrl_wi_256, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx512_mask_psrl_wi_512, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRLI, 0), - X86_INTRINSIC_DATA(avx512_mask_psrli_d, VSHIFT_MASK, X86ISD::VSRLI, 0), - X86_INTRINSIC_DATA(avx512_mask_psrli_q, VSHIFT_MASK, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx512_mask_psrlv_d, INTR_TYPE_2OP_MASK, ISD::SRL, 0), X86_INTRINSIC_DATA(avx512_mask_psrlv_q, INTR_TYPE_2OP_MASK, ISD::SRL, 0), X86_INTRINSIC_DATA(avx512_mask_psrlv16_hi, INTR_TYPE_2OP_MASK, ISD::SRL, 0), diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll index f0d9082b4633..41704fd8c53a 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -723,3 +723,195 @@ define <16 x i32>@test_int_x86_avx512_mask_punpckld_q_512(<16 x i32> %x0, <16 x ret <16 x i32> %res2 } +define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) { +; CHECK-LABEL: test_x86_avx512_pslli_d: +; CHECK: ## BB#0: +; CHECK-NEXT: vpslld $7, %zmm0, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { +; CHECK-LABEL: test_x86_avx512_mask_pslli_d: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpslld $7, %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) { +; CHECK-LABEL: test_x86_avx512_maskz_pslli_d: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpslld $7, %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) + ret <16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone + +define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) { +; CHECK-LABEL: test_x86_avx512_pslli_q: +; CHECK: ## BB#0: +; CHECK-NEXT: vpsllq $7, %zmm0, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { +; CHECK-LABEL: test_x86_avx512_mask_pslli_q: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpsllq $7, %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) { +; CHECK-LABEL: test_x86_avx512_maskz_pslli_q: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpsllq $7, %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) + ret <8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone + +define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) { +; CHECK-LABEL: test_x86_avx512_psrli_d: +; CHECK: ## BB#0: +; CHECK-NEXT: vpsrld $7, %zmm0, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { +; CHECK-LABEL: test_x86_avx512_mask_psrli_d: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpsrld $7, %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) { +; CHECK-LABEL: test_x86_avx512_maskz_psrli_d: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpsrld $7, %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) + ret <16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone + +define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) { +; CHECK-LABEL: test_x86_avx512_psrli_q: +; CHECK: ## BB#0: +; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { +; CHECK-LABEL: test_x86_avx512_mask_psrli_q: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) { +; CHECK-LABEL: test_x86_avx512_maskz_psrli_q: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) + ret <8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone + +define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) { +; CHECK-LABEL: test_x86_avx512_psrai_d: +; CHECK: ## BB#0: +; CHECK-NEXT: vpsrad $7, %zmm0, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { +; CHECK-LABEL: test_x86_avx512_mask_psrai_d: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpsrad $7, %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) { +; CHECK-LABEL: test_x86_avx512_maskz_psrai_d: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpsrad $7, %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) + ret <16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone + +define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) { +; CHECK-LABEL: test_x86_avx512_psrai_q: +; CHECK: ## BB#0: +; CHECK-NEXT: vpsraq $7, %zmm0, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { +; CHECK-LABEL: test_x86_avx512_mask_psrai_q: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpsraq $7, %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) { +; CHECK-LABEL: test_x86_avx512_maskz_psrai_q: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpsraq $7, %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) + ret <8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone + diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index 1d51b6f65491..b692bcfed955 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -1323,198 +1323,6 @@ define <4 x double> @test_vextractf64x4(<8 x double> %a) { declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i32, <4 x double>, i8) -define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) { -; CHECK-LABEL: test_x86_avx512_pslli_d: -; CHECK: ## BB#0: -; CHECK-NEXT: vpslld $7, %zmm0, %zmm0 -; CHECK-NEXT: retq - %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) - ret <16 x i32> %res -} - -define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { -; CHECK-LABEL: test_x86_avx512_mask_pslli_d: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpslld $7, %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq - %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) - ret <16 x i32> %res -} - -define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) { -; CHECK-LABEL: test_x86_avx512_maskz_pslli_d: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpslld $7, %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq - %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) - ret <16 x i32> %res -} - -declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone - -define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) { -; CHECK-LABEL: test_x86_avx512_pslli_q: -; CHECK: ## BB#0: -; CHECK-NEXT: vpsllq $7, %zmm0, %zmm0 -; CHECK-NEXT: retq - %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) - ret <8 x i64> %res -} - -define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { -; CHECK-LABEL: test_x86_avx512_mask_pslli_q: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpsllq $7, %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq - %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) - ret <8 x i64> %res -} - -define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) { -; CHECK-LABEL: test_x86_avx512_maskz_pslli_q: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpsllq $7, %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq - %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) - ret <8 x i64> %res -} - -declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone - -define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) { -; CHECK-LABEL: test_x86_avx512_psrli_d: -; CHECK: ## BB#0: -; CHECK-NEXT: vpsrld $7, %zmm0, %zmm0 -; CHECK-NEXT: retq - %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) - ret <16 x i32> %res -} - -define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { -; CHECK-LABEL: test_x86_avx512_mask_psrli_d: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpsrld $7, %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq - %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) - ret <16 x i32> %res -} - -define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) { -; CHECK-LABEL: test_x86_avx512_maskz_psrli_d: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpsrld $7, %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq - %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) - ret <16 x i32> %res -} - -declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone - -define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) { -; CHECK-LABEL: test_x86_avx512_psrli_q: -; CHECK: ## BB#0: -; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm0 -; CHECK-NEXT: retq - %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) - ret <8 x i64> %res -} - -define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { -; CHECK-LABEL: test_x86_avx512_mask_psrli_q: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq - %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) - ret <8 x i64> %res -} - -define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) { -; CHECK-LABEL: test_x86_avx512_maskz_psrli_q: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq - %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) - ret <8 x i64> %res -} - -declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone - -define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) { -; CHECK-LABEL: test_x86_avx512_psrai_d: -; CHECK: ## BB#0: -; CHECK-NEXT: vpsrad $7, %zmm0, %zmm0 -; CHECK-NEXT: retq - %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) - ret <16 x i32> %res -} - -define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { -; CHECK-LABEL: test_x86_avx512_mask_psrai_d: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpsrad $7, %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq - %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) - ret <16 x i32> %res -} - -define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) { -; CHECK-LABEL: test_x86_avx512_maskz_psrai_d: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpsrad $7, %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq - %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) - ret <16 x i32> %res -} - -declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone - -define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) { -; CHECK-LABEL: test_x86_avx512_psrai_q: -; CHECK: ## BB#0: -; CHECK-NEXT: vpsraq $7, %zmm0, %zmm0 -; CHECK-NEXT: retq - %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) - ret <8 x i64> %res -} - -define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { -; CHECK-LABEL: test_x86_avx512_mask_psrai_q: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpsraq $7, %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq - %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) - ret <8 x i64> %res -} - -define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) { -; CHECK-LABEL: test_x86_avx512_maskz_psrai_q: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpsraq $7, %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq - %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) - ret <8 x i64> %res -} - -declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone - define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: test_x86_avx512_psll_d: ; CHECK: ## BB#0: