forked from OSchip/llvm-project
[AVX-512] Remove 128/256-bit masked fp max/min intrinsics. Upgrade them to legacy unmasked intrinsics and select instructions.
llvm-svn: 295543
This commit is contained in:
parent
b70afdb5e8
commit
a505169ca5
|
@ -4571,39 +4571,15 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||
def int_x86_avx512_mask_div_pd_512 : GCCBuiltin<"__builtin_ia32_divpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_max_ps_128 : GCCBuiltin<"__builtin_ia32_maxps_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_max_ps_256 : GCCBuiltin<"__builtin_ia32_maxps256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
|
||||
llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_max_ps_512 : GCCBuiltin<"__builtin_ia32_maxps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_max_pd_128 : GCCBuiltin<"__builtin_ia32_maxpd_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_max_pd_256 : GCCBuiltin<"__builtin_ia32_maxpd256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
|
||||
llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_max_pd_512 : GCCBuiltin<"__builtin_ia32_maxpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_min_ps_128 : GCCBuiltin<"__builtin_ia32_minps_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_min_ps_256 : GCCBuiltin<"__builtin_ia32_minps256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
|
||||
llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_min_ps_512 : GCCBuiltin<"__builtin_ia32_minps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_min_pd_128 : GCCBuiltin<"__builtin_ia32_minpd_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_min_pd_256 : GCCBuiltin<"__builtin_ia32_minpd256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
|
||||
llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_min_pd_512 : GCCBuiltin<"__builtin_ia32_minpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
|
|
|
@ -158,6 +158,14 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
|
|||
Name == "avx512.mask.sub.pd.256" || // Added in 4.0
|
||||
Name == "avx512.mask.sub.ps.128" || // Added in 4.0
|
||||
Name == "avx512.mask.sub.ps.256" || // Added in 4.0
|
||||
Name == "avx512.mask.max.pd.128" || // Added in 4.1
|
||||
Name == "avx512.mask.max.pd.256" || // Added in 4.1
|
||||
Name == "avx512.mask.max.ps.128" || // Added in 4.1
|
||||
Name == "avx512.mask.max.ps.256" || // Added in 4.1
|
||||
Name == "avx512.mask.min.pd.128" || // Added in 4.1
|
||||
Name == "avx512.mask.min.pd.256" || // Added in 4.1
|
||||
Name == "avx512.mask.min.ps.128" || // Added in 4.1
|
||||
Name == "avx512.mask.min.ps.256" || // Added in 4.1
|
||||
Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
|
||||
Name.startswith("avx512.mask.psll.d") || // Added in 4.0
|
||||
Name.startswith("avx512.mask.psll.q") || // Added in 4.0
|
||||
|
@ -1520,6 +1528,36 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||
Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||
CI->getArgOperand(2));
|
||||
} else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
|
||||
Name.startswith("avx512.mask.min.p"))) {
|
||||
bool IsMin = Name[13] == 'i';
|
||||
VectorType *VecTy = cast<VectorType>(CI->getType());
|
||||
unsigned VecWidth = VecTy->getPrimitiveSizeInBits();
|
||||
unsigned EltWidth = VecTy->getScalarSizeInBits();
|
||||
Intrinsic::ID IID;
|
||||
if (!IsMin && VecWidth == 128 && EltWidth == 32)
|
||||
IID = Intrinsic::x86_sse_max_ps;
|
||||
else if (!IsMin && VecWidth == 128 && EltWidth == 64)
|
||||
IID = Intrinsic::x86_sse2_max_pd;
|
||||
else if (!IsMin && VecWidth == 256 && EltWidth == 32)
|
||||
IID = Intrinsic::x86_avx_max_ps_256;
|
||||
else if (!IsMin && VecWidth == 256 && EltWidth == 64)
|
||||
IID = Intrinsic::x86_avx_max_pd_256;
|
||||
else if (IsMin && VecWidth == 128 && EltWidth == 32)
|
||||
IID = Intrinsic::x86_sse_min_ps;
|
||||
else if (IsMin && VecWidth == 128 && EltWidth == 64)
|
||||
IID = Intrinsic::x86_sse2_min_pd;
|
||||
else if (IsMin && VecWidth == 256 && EltWidth == 32)
|
||||
IID = Intrinsic::x86_avx_min_ps_256;
|
||||
else if (IsMin && VecWidth == 256 && EltWidth == 64)
|
||||
IID = Intrinsic::x86_avx_min_pd_256;
|
||||
else
|
||||
llvm_unreachable("Unexpected intrinsic");
|
||||
|
||||
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
|
||||
{ CI->getArgOperand(0), CI->getArgOperand(1) });
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||
CI->getArgOperand(2));
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.pshuf.b.")) {
|
||||
VectorType *VecTy = cast<VectorType>(CI->getType());
|
||||
Intrinsic::ID IID;
|
||||
|
|
|
@ -807,24 +807,16 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
ISD::CTLZ, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_lzcnt_q_512, INTR_TYPE_1OP_MASK,
|
||||
ISD::CTLZ, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_max_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_max_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_max_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX,
|
||||
X86ISD::FMAX_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_max_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_max_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_max_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX,
|
||||
X86ISD::FMAX_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK_RM,
|
||||
X86ISD::FMAX_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK_RM,
|
||||
X86ISD::FMAX_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_min_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_min_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_min_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN,
|
||||
X86ISD::FMIN_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_min_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_min_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_min_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN,
|
||||
X86ISD::FMIN_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK_RM,
|
||||
|
|
|
@ -4833,3 +4833,128 @@ define <8 x i32>@test_int_x86_avx512_mask_inserti32x4_256(<8 x i32> %x0, <4 x i3
|
|||
%res4 = add <8 x i32> %res2, %res3
|
||||
ret <8 x i32> %res4
|
||||
}
|
||||
|
||||
define <8 x float> @test_mm512_maskz_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
|
||||
; CHECK-LABEL: test_mm512_maskz_max_ps_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vmaxps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x5f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
define <8 x float> @test_mm512_mask_max_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
|
||||
; CHECK-LABEL: test_mm512_mask_max_ps_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vmaxps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5f,0xd1]
|
||||
; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
define <8 x float> @test_mm512_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
|
||||
; CHECK-LABEL: test_mm512_max_ps_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmaxps %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
|
||||
|
||||
define <4 x float> @test_mm512_maskz_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
|
||||
; CHECK-LABEL: test_mm512_maskz_max_ps_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x5f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm512_mask_max_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
|
||||
; CHECK-LABEL: test_mm512_mask_max_ps_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5f,0xd1]
|
||||
; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm512_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
|
||||
; CHECK-LABEL: test_mm512_max_ps_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
|
||||
|
||||
define <8 x float> @test_mm512_maskz_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
|
||||
; CHECK-LABEL: test_mm512_maskz_min_ps_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vminps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x5d,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
define <8 x float> @test_mm512_mask_min_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
|
||||
; CHECK-LABEL: test_mm512_mask_min_ps_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vminps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5d,0xd1]
|
||||
; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
define <8 x float> @test_mm512_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
|
||||
; CHECK-LABEL: test_mm512_min_ps_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vminps %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5d,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
|
||||
|
||||
define <4 x float> @test_mm512_maskz_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
|
||||
; CHECK-LABEL: test_mm512_maskz_min_ps_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x5d,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm512_mask_min_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
|
||||
; CHECK-LABEL: test_mm512_mask_min_ps_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5d,0xd1]
|
||||
; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm512_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
|
||||
; CHECK-LABEL: test_mm512_min_ps_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5d,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
|
||||
|
||||
|
|
|
@ -1543,8 +1543,10 @@ define <8 x float> @test_mm512_maskz_max_ps_256(<8 x float> %a0, <8 x float> %a1
|
|||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vmaxps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x5f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
|
||||
ret <8 x float> %res
|
||||
%1 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1)
|
||||
%2 = bitcast i8 %mask to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x float> %1, <8 x float> zeroinitializer
|
||||
ret <8 x float> %3
|
||||
}
|
||||
|
||||
define <8 x float> @test_mm512_mask_max_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
|
||||
|
@ -1554,8 +1556,10 @@ define <8 x float> @test_mm512_mask_max_ps_256(<8 x float> %a0, <8 x float> %a1,
|
|||
; CHECK-NEXT: vmaxps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5f,0xd1]
|
||||
; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
|
||||
ret <8 x float> %res
|
||||
%1 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1)
|
||||
%2 = bitcast i8 %mask to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %src
|
||||
ret <8 x float> %3
|
||||
}
|
||||
|
||||
define <8 x float> @test_mm512_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
|
||||
|
@ -1563,10 +1567,10 @@ define <8 x float> @test_mm512_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmaxps %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
|
||||
ret <8 x float> %res
|
||||
%1 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1)
|
||||
ret <8 x float> %1
|
||||
}
|
||||
declare <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
|
||||
declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>)
|
||||
|
||||
define <4 x float> @test_mm512_maskz_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
|
||||
; CHECK-LABEL: test_mm512_maskz_max_ps_128:
|
||||
|
@ -1574,8 +1578,11 @@ define <4 x float> @test_mm512_maskz_max_ps_128(<4 x float> %a0, <4 x float> %a1
|
|||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x5f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
|
||||
ret <4 x float> %res
|
||||
%1 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
|
||||
%2 = bitcast i8 %mask to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> zeroinitializer
|
||||
ret <4 x float> %3
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm512_mask_max_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
|
||||
|
@ -1585,8 +1592,11 @@ define <4 x float> @test_mm512_mask_max_ps_128(<4 x float> %a0, <4 x float> %a1,
|
|||
; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5f,0xd1]
|
||||
; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
|
||||
ret <4 x float> %res
|
||||
%1 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
|
||||
%2 = bitcast i8 %mask to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %src
|
||||
ret <4 x float> %3
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm512_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
|
||||
|
@ -1594,10 +1604,10 @@ define <4 x float> @test_mm512_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
|
||||
ret <4 x float> %res
|
||||
%1 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
|
||||
ret <4 x float> %1
|
||||
}
|
||||
declare <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
|
||||
declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>)
|
||||
|
||||
define <8 x float> @test_mm512_maskz_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
|
||||
; CHECK-LABEL: test_mm512_maskz_min_ps_256:
|
||||
|
@ -1605,8 +1615,10 @@ define <8 x float> @test_mm512_maskz_min_ps_256(<8 x float> %a0, <8 x float> %a1
|
|||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vminps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x5d,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
|
||||
ret <8 x float> %res
|
||||
%1 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1)
|
||||
%2 = bitcast i8 %mask to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x float> %1, <8 x float> zeroinitializer
|
||||
ret <8 x float> %3
|
||||
}
|
||||
|
||||
define <8 x float> @test_mm512_mask_min_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
|
||||
|
@ -1616,8 +1628,10 @@ define <8 x float> @test_mm512_mask_min_ps_256(<8 x float> %a0, <8 x float> %a1,
|
|||
; CHECK-NEXT: vminps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5d,0xd1]
|
||||
; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
|
||||
ret <8 x float> %res
|
||||
%1 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1)
|
||||
%2 = bitcast i8 %mask to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %src
|
||||
ret <8 x float> %3
|
||||
}
|
||||
|
||||
define <8 x float> @test_mm512_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
|
||||
|
@ -1625,10 +1639,10 @@ define <8 x float> @test_mm512_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vminps %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5d,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
|
||||
ret <8 x float> %res
|
||||
%1 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1)
|
||||
ret <8 x float> %1
|
||||
}
|
||||
declare <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
|
||||
declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>)
|
||||
|
||||
define <4 x float> @test_mm512_maskz_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
|
||||
; CHECK-LABEL: test_mm512_maskz_min_ps_128:
|
||||
|
@ -1636,8 +1650,11 @@ define <4 x float> @test_mm512_maskz_min_ps_128(<4 x float> %a0, <4 x float> %a1
|
|||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x5d,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
|
||||
ret <4 x float> %res
|
||||
%1 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
|
||||
%2 = bitcast i8 %mask to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> zeroinitializer
|
||||
ret <4 x float> %3
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm512_mask_min_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
|
||||
|
@ -1647,8 +1664,11 @@ define <4 x float> @test_mm512_mask_min_ps_128(<4 x float> %a0, <4 x float> %a1,
|
|||
; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5d,0xd1]
|
||||
; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
|
||||
ret <4 x float> %res
|
||||
%1 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
|
||||
%2 = bitcast i8 %mask to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %src
|
||||
ret <4 x float> %3
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm512_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
|
||||
|
@ -1656,10 +1676,10 @@ define <4 x float> @test_mm512_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5d,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
|
||||
ret <4 x float> %res
|
||||
%1 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
|
||||
ret <4 x float> %1
|
||||
}
|
||||
declare <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
|
||||
declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>)
|
||||
|
||||
define <4 x double> @test_sqrt_pd_256(<4 x double> %a0, i8 %mask) {
|
||||
; CHECK-LABEL: test_sqrt_pd_256:
|
||||
|
|
Loading…
Reference in New Issue