forked from OSchip/llvm-project
[X86] Remove the blendpd/blendps/pblendw/pblendd intrinsics. They can represented by shuffle_vector instructions.
llvm-svn: 230860
This commit is contained in:
parent
9e1ce99d81
commit
782d620657
|
@ -882,15 +882,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||
def int_x86_sse41_pblendvb : GCCBuiltin<"__builtin_ia32_pblendvb128">,
|
||||
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_v16i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_sse41_pblendw : GCCBuiltin<"__builtin_ia32_pblendw128">,
|
||||
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_sse41_blendpd : GCCBuiltin<"__builtin_ia32_blendpd">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_sse41_blendps : GCCBuiltin<"__builtin_ia32_blendps">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_sse41_blendvpd : GCCBuiltin<"__builtin_ia32_blendvpd">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
|
@ -1156,12 +1147,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||
|
||||
// Vector blend
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx_blend_pd_256 : GCCBuiltin<"__builtin_ia32_blendpd256">,
|
||||
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
|
||||
llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx_blend_ps_256 : GCCBuiltin<"__builtin_ia32_blendps256">,
|
||||
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
|
||||
llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx_blendv_pd_256 : GCCBuiltin<"__builtin_ia32_blendvpd256">,
|
||||
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
|
||||
llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
|
||||
|
@ -1734,15 +1719,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||
def int_x86_avx2_pblendvb : GCCBuiltin<"__builtin_ia32_pblendvb256">,
|
||||
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
|
||||
llvm_v32i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx2_pblendw : GCCBuiltin<"__builtin_ia32_pblendw256">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx2_pblendd_128 : GCCBuiltin<"__builtin_ia32_pblendd128">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx2_pblendd_256 : GCCBuiltin<"__builtin_ia32_pblendd256">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Vector load with broadcast
|
||||
|
|
|
@ -171,6 +171,14 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
|||
Name == "x86.sse2.psrl.dq.bs" ||
|
||||
Name == "x86.avx2.psll.dq.bs" ||
|
||||
Name == "x86.avx2.psrl.dq.bs" ||
|
||||
Name == "x86.sse41.pblendw" ||
|
||||
Name == "x86.sse41.blendpd" ||
|
||||
Name == "x86.sse41.blendps" ||
|
||||
Name == "x86.avx.blend.pd.256" ||
|
||||
Name == "x86.avx.blend.ps.256" ||
|
||||
Name == "x86.avx2.pblendw" ||
|
||||
Name == "x86.avx2.pblendd.128" ||
|
||||
Name == "x86.avx2.pblendd.256" ||
|
||||
(Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
|
||||
NewFn = nullptr;
|
||||
return true;
|
||||
|
@ -186,15 +194,6 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
|||
}
|
||||
// Several blend and other instructions with maskes used the wrong number of
|
||||
// bits.
|
||||
if (Name == "x86.sse41.pblendw")
|
||||
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_pblendw,
|
||||
NewFn);
|
||||
if (Name == "x86.sse41.blendpd")
|
||||
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_blendpd,
|
||||
NewFn);
|
||||
if (Name == "x86.sse41.blendps")
|
||||
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_blendps,
|
||||
NewFn);
|
||||
if (Name == "x86.sse41.insertps")
|
||||
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
|
||||
NewFn);
|
||||
|
@ -207,24 +206,9 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
|||
if (Name == "x86.sse41.mpsadbw")
|
||||
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
|
||||
NewFn);
|
||||
if (Name == "x86.avx.blend.pd.256")
|
||||
return UpgradeX86IntrinsicsWith8BitMask(
|
||||
F, Intrinsic::x86_avx_blend_pd_256, NewFn);
|
||||
if (Name == "x86.avx.blend.ps.256")
|
||||
return UpgradeX86IntrinsicsWith8BitMask(
|
||||
F, Intrinsic::x86_avx_blend_ps_256, NewFn);
|
||||
if (Name == "x86.avx.dp.ps.256")
|
||||
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
|
||||
NewFn);
|
||||
if (Name == "x86.avx2.pblendw")
|
||||
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_pblendw,
|
||||
NewFn);
|
||||
if (Name == "x86.avx2.pblendd.128")
|
||||
return UpgradeX86IntrinsicsWith8BitMask(
|
||||
F, Intrinsic::x86_avx2_pblendd_128, NewFn);
|
||||
if (Name == "x86.avx2.pblendd.256")
|
||||
return UpgradeX86IntrinsicsWith8BitMask(
|
||||
F, Intrinsic::x86_avx2_pblendd_256, NewFn);
|
||||
if (Name == "x86.avx2.mpsadbw")
|
||||
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
|
||||
NewFn);
|
||||
|
@ -609,6 +593,27 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||
unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
|
||||
Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
|
||||
Shift);
|
||||
} else if (Name == "llvm.x86.sse41.pblendw" ||
|
||||
Name == "llvm.x86.sse41.blendpd" ||
|
||||
Name == "llvm.x86.sse41.blendps" ||
|
||||
Name == "llvm.x86.avx.blend.pd.256" ||
|
||||
Name == "llvm.x86.avx.blend.ps.256" ||
|
||||
Name == "llvm.x86.avx2.pblendw" ||
|
||||
Name == "llvm.x86.avx2.pblendd.128" ||
|
||||
Name == "llvm.x86.avx2.pblendd.256") {
|
||||
Value *Op0 = CI->getArgOperand(0);
|
||||
Value *Op1 = CI->getArgOperand(1);
|
||||
unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
|
||||
VectorType *VecTy = cast<VectorType>(CI->getType());
|
||||
unsigned NumElts = VecTy->getNumElements();
|
||||
|
||||
SmallVector<Constant*, 16> Idxs;
|
||||
for (unsigned i = 0; i != NumElts; ++i) {
|
||||
unsigned Idx = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
|
||||
Idxs.push_back(Builder.getInt32(Idx));
|
||||
}
|
||||
|
||||
Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
|
||||
} else {
|
||||
bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
|
||||
if (Name == "llvm.x86.avx.vpermil.pd.256")
|
||||
|
@ -739,19 +744,11 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||
return;
|
||||
}
|
||||
|
||||
case Intrinsic::x86_sse41_pblendw:
|
||||
case Intrinsic::x86_sse41_blendpd:
|
||||
case Intrinsic::x86_sse41_blendps:
|
||||
case Intrinsic::x86_sse41_insertps:
|
||||
case Intrinsic::x86_sse41_dppd:
|
||||
case Intrinsic::x86_sse41_dpps:
|
||||
case Intrinsic::x86_sse41_mpsadbw:
|
||||
case Intrinsic::x86_avx_blend_pd_256:
|
||||
case Intrinsic::x86_avx_blend_ps_256:
|
||||
case Intrinsic::x86_avx_dp_ps_256:
|
||||
case Intrinsic::x86_avx2_pblendw:
|
||||
case Intrinsic::x86_avx2_pblendd_128:
|
||||
case Intrinsic::x86_avx2_pblendd_256:
|
||||
case Intrinsic::x86_avx2_mpsadbw: {
|
||||
// Need to truncate the last argument from i32 to i8 -- this argument models
|
||||
// an inherently 8-bit immediate operand to these x86 instructions.
|
||||
|
|
|
@ -21190,24 +21190,16 @@ static SDValue PerformINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
|
|||
default: return SDValue();
|
||||
// SSE/AVX/AVX2 blend intrinsics.
|
||||
case Intrinsic::x86_avx2_pblendvb:
|
||||
case Intrinsic::x86_avx2_pblendw:
|
||||
case Intrinsic::x86_avx2_pblendd_128:
|
||||
case Intrinsic::x86_avx2_pblendd_256:
|
||||
// Don't try to simplify this intrinsic if we don't have AVX2.
|
||||
if (!Subtarget->hasAVX2())
|
||||
return SDValue();
|
||||
// FALL-THROUGH
|
||||
case Intrinsic::x86_avx_blend_pd_256:
|
||||
case Intrinsic::x86_avx_blend_ps_256:
|
||||
case Intrinsic::x86_avx_blendv_pd_256:
|
||||
case Intrinsic::x86_avx_blendv_ps_256:
|
||||
// Don't try to simplify this intrinsic if we don't have AVX.
|
||||
if (!Subtarget->hasAVX())
|
||||
return SDValue();
|
||||
// FALL-THROUGH
|
||||
case Intrinsic::x86_sse41_pblendw:
|
||||
case Intrinsic::x86_sse41_blendpd:
|
||||
case Intrinsic::x86_sse41_blendps:
|
||||
case Intrinsic::x86_sse41_blendvps:
|
||||
case Intrinsic::x86_sse41_blendvpd:
|
||||
case Intrinsic::x86_sse41_pblendvb: {
|
||||
|
|
|
@ -6955,6 +6955,34 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
|
|||
Sched<[itins.Sched.Folded, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
/// SS41I_binop_rmi - SSE 4.1 binary operator with 8-bit immediate
|
||||
multiclass SS41I_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
|
||||
X86MemOperand x86memop, bit Is2Addr = 1,
|
||||
OpndItins itins = DEFAULT_ITINS> {
|
||||
let isCommutable = 1 in
|
||||
def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, u8imm:$src3),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))],
|
||||
itins.rr>, Sched<[itins.Sched]>;
|
||||
def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, x86memop:$src2, u8imm:$src3),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set RC:$dst,
|
||||
(OpVT (OpNode RC:$src1,
|
||||
(bitconvert (memop_frag addr:$src2)), imm:$src3)))], itins.rm>,
|
||||
Sched<[itins.Sched.Folded, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
let isCommutable = 0 in {
|
||||
defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
|
||||
|
@ -6963,26 +6991,24 @@ let Predicates = [HasAVX] in {
|
|||
}
|
||||
|
||||
let ExeDomain = SSEPackedSingle in {
|
||||
defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
|
||||
VR128, loadv4f32, f128mem, 0,
|
||||
DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
|
||||
defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
|
||||
int_x86_avx_blend_ps_256, VR256, loadv8f32,
|
||||
f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>,
|
||||
VEX_4V, VEX_L;
|
||||
defm VBLENDPS : SS41I_binop_rmi<0x0C, "vblendps", X86Blendi, v4f32,
|
||||
VR128, loadv4f32, f128mem, 0,
|
||||
DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
|
||||
defm VBLENDPSY : SS41I_binop_rmi<0x0C, "vblendps", X86Blendi, v8f32,
|
||||
VR256, loadv8f32, f256mem, 0,
|
||||
DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_L;
|
||||
}
|
||||
let ExeDomain = SSEPackedDouble in {
|
||||
defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
|
||||
VR128, loadv2f64, f128mem, 0,
|
||||
DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
|
||||
defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
|
||||
int_x86_avx_blend_pd_256,VR256, loadv4f64,
|
||||
f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>,
|
||||
VEX_4V, VEX_L;
|
||||
defm VBLENDPD : SS41I_binop_rmi<0x0D, "vblendpd", X86Blendi, v2f64,
|
||||
VR128, loadv2f64, f128mem, 0,
|
||||
DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
|
||||
defm VBLENDPDY : SS41I_binop_rmi<0x0D, "vblendpd", X86Blendi, v4f64,
|
||||
VR256, loadv4f64, f256mem, 0,
|
||||
DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_L;
|
||||
}
|
||||
defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
|
||||
VR128, loadv2i64, i128mem, 0,
|
||||
DEFAULT_ITINS_BLENDSCHED>, VEX_4V;
|
||||
defm VPBLENDW : SS41I_binop_rmi<0x0E, "vpblendw", X86Blendi, v8i16,
|
||||
VR128, loadv2i64, i128mem, 0,
|
||||
DEFAULT_ITINS_BLENDSCHED>, VEX_4V;
|
||||
|
||||
let ExeDomain = SSEPackedSingle in
|
||||
defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
|
||||
|
@ -7004,9 +7030,9 @@ let Predicates = [HasAVX2] in {
|
|||
VR256, loadv4i64, i256mem, 0,
|
||||
DEFAULT_ITINS_MPSADSCHED>, VEX_4V, VEX_L;
|
||||
}
|
||||
defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw,
|
||||
VR256, loadv4i64, i256mem, 0,
|
||||
DEFAULT_ITINS_BLENDSCHED>, VEX_4V, VEX_L;
|
||||
defm VPBLENDWY : SS41I_binop_rmi<0x0E, "vpblendw", X86Blendi, v16i16,
|
||||
VR256, loadv4i64, i256mem, 0,
|
||||
DEFAULT_ITINS_BLENDSCHED>, VEX_4V, VEX_L;
|
||||
}
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
|
@ -7016,16 +7042,16 @@ let Constraints = "$src1 = $dst" in {
|
|||
1, SSE_MPSADBW_ITINS>;
|
||||
}
|
||||
let ExeDomain = SSEPackedSingle in
|
||||
defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
|
||||
VR128, memopv4f32, f128mem,
|
||||
1, SSE_INTALU_ITINS_FBLEND_P>;
|
||||
defm BLENDPS : SS41I_binop_rmi<0x0C, "blendps", X86Blendi, v4f32,
|
||||
VR128, memopv4f32, f128mem,
|
||||
1, SSE_INTALU_ITINS_FBLEND_P>;
|
||||
let ExeDomain = SSEPackedDouble in
|
||||
defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd,
|
||||
VR128, memopv2f64, f128mem,
|
||||
1, SSE_INTALU_ITINS_FBLEND_P>;
|
||||
defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
|
||||
VR128, memopv2i64, i128mem,
|
||||
1, SSE_INTALU_ITINS_BLEND_P>;
|
||||
defm BLENDPD : SS41I_binop_rmi<0x0D, "blendpd", X86Blendi, v2f64,
|
||||
VR128, memopv2f64, f128mem,
|
||||
1, SSE_INTALU_ITINS_FBLEND_P>;
|
||||
defm PBLENDW : SS41I_binop_rmi<0x0E, "pblendw", X86Blendi, v8i16,
|
||||
VR128, memopv2i64, i128mem,
|
||||
1, SSE_INTALU_ITINS_BLEND_P>;
|
||||
let ExeDomain = SSEPackedSingle in
|
||||
defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
|
||||
VR128, memopv4f32, f128mem, 1,
|
||||
|
@ -7116,32 +7142,12 @@ let Predicates = [HasAVX] in {
|
|||
def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1),
|
||||
(v4f64 VR256:$src2))),
|
||||
(VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
|
||||
|
||||
def : Pat<(v8f32 (X86Blendi (v8f32 VR256:$src1), (v8f32 VR256:$src2),
|
||||
(imm:$mask))),
|
||||
(VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$mask)>;
|
||||
def : Pat<(v4f64 (X86Blendi (v4f64 VR256:$src1), (v4f64 VR256:$src2),
|
||||
(imm:$mask))),
|
||||
(VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$mask)>;
|
||||
|
||||
def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2),
|
||||
(imm:$mask))),
|
||||
(VPBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>;
|
||||
def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2),
|
||||
(imm:$mask))),
|
||||
(VBLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>;
|
||||
def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2),
|
||||
(imm:$mask))),
|
||||
(VBLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX2] in {
|
||||
def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1),
|
||||
(v32i8 VR256:$src2))),
|
||||
(VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
|
||||
def : Pat<(v16i16 (X86Blendi (v16i16 VR256:$src1), (v16i16 VR256:$src2),
|
||||
(imm:$mask))),
|
||||
(VPBLENDWYrri VR256:$src1, VR256:$src2, imm:$mask)>;
|
||||
}
|
||||
|
||||
// Patterns
|
||||
|
@ -7260,17 +7266,6 @@ let Predicates = [UseSSE41] in {
|
|||
def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1),
|
||||
(v2f64 VR128:$src2))),
|
||||
(BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
|
||||
|
||||
def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2),
|
||||
(imm:$mask))),
|
||||
(PBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>;
|
||||
def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2),
|
||||
(imm:$mask))),
|
||||
(BLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>;
|
||||
def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2),
|
||||
(imm:$mask))),
|
||||
(BLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>;
|
||||
|
||||
}
|
||||
|
||||
let SchedRW = [WriteLoad] in {
|
||||
|
@ -8238,38 +8233,31 @@ let Predicates = [HasF16C] in {
|
|||
// AVX2 Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// AVX2_binop_rmi_int - AVX2 binary operator with 8-bit immediate
|
||||
multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr,
|
||||
Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
|
||||
X86MemOperand x86memop> {
|
||||
/// AVX2_binop_rmi - AVX2 binary operator with 8-bit immediate
|
||||
multiclass AVX2_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
|
||||
X86MemOperand x86memop> {
|
||||
let isCommutable = 1 in
|
||||
def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, u8imm:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
|
||||
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>,
|
||||
Sched<[WriteBlend]>, VEX_4V;
|
||||
def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, x86memop:$src2, u8imm:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set RC:$dst,
|
||||
(IntId RC:$src1,
|
||||
(bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
|
||||
(OpVT (OpNode RC:$src1,
|
||||
(bitconvert (memop_frag addr:$src2)), imm:$src3)))]>,
|
||||
Sched<[WriteBlendLd, ReadAfterLd]>, VEX_4V;
|
||||
}
|
||||
|
||||
defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128,
|
||||
VR128, loadv2i64, i128mem>;
|
||||
defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256,
|
||||
VR256, loadv4i64, i256mem>, VEX_L;
|
||||
|
||||
def : Pat<(v4i32 (X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2),
|
||||
imm:$mask)),
|
||||
(VPBLENDDrri VR128:$src1, VR128:$src2, imm:$mask)>;
|
||||
def : Pat<(v8i32 (X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2),
|
||||
imm:$mask)),
|
||||
(VPBLENDDYrri VR256:$src1, VR256:$src2, imm:$mask)>;
|
||||
defm VPBLENDD : AVX2_binop_rmi<0x02, "vpblendd", X86Blendi, v4i32,
|
||||
VR128, loadv2i64, i128mem>;
|
||||
defm VPBLENDDY : AVX2_binop_rmi<0x02, "vpblendd", X86Blendi, v8i32,
|
||||
VR256, loadv4i64, i256mem>, VEX_L;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VPBROADCAST - Load from memory and broadcast to all elements of the
|
||||
|
|
|
@ -38,3 +38,27 @@ define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
|
|||
ret <2 x i64> %res
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
|
||||
|
||||
|
||||
define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; CHECK: vblendpd
|
||||
%res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i8 2) ; <<2 x double>> [#uses=1]
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone
|
||||
|
||||
|
||||
define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
|
||||
; CHECK: vblendps
|
||||
%res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone
|
||||
|
||||
|
||||
define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
; CHECK: vpblendw
|
||||
%res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7) ; <<8 x i16>> [#uses=1]
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
|
||||
|
|
|
@ -784,22 +784,6 @@ define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) {
|
|||
declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
|
||||
|
||||
|
||||
define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; CHECK: vblendpd
|
||||
%res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone
|
||||
|
||||
|
||||
define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
|
||||
; CHECK: vblendps
|
||||
%res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone
|
||||
|
||||
|
||||
define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
|
||||
; CHECK: vblendvpd
|
||||
%res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1]
|
||||
|
@ -865,14 +849,6 @@ define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8
|
|||
declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
|
||||
|
||||
|
||||
define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
; CHECK: vpblendw
|
||||
%res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7) ; <<8 x i16>> [#uses=1]
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
|
||||
|
||||
|
||||
define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
|
||||
; CHECK: vphminposuw
|
||||
%res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
|
||||
|
@ -1736,22 +1712,6 @@ define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1)
|
|||
declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
|
||||
|
||||
|
||||
define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
|
||||
; CHECK: vblendpd
|
||||
%res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
|
||||
ret <4 x double> %res
|
||||
}
|
||||
declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
|
||||
|
||||
|
||||
define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) {
|
||||
; CHECK: vblendps
|
||||
%res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
|
||||
|
||||
|
||||
define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
|
||||
; CHECK: vblendvpd
|
||||
%res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1]
|
||||
|
|
|
@ -46,11 +46,11 @@ declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i8) nounwind
|
|||
|
||||
define <4 x float> @commute_fold_vblendps_128(<4 x float> %a, <4 x float>* %b) #0 {
|
||||
%1 = load <4 x float>, <4 x float>* %b
|
||||
%2 = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %1, <4 x float> %a, i8 3)
|
||||
%2 = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %1, <4 x float> %a, i8 5)
|
||||
ret <4 x float> %2
|
||||
|
||||
;LABEL: commute_fold_vblendps_128
|
||||
;CHECK: vblendps {{.*#+}} xmm0 = xmm0[0,1],mem[2,3]
|
||||
;CHECK: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3]
|
||||
;CHECK-NEXT: retq
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone
|
||||
|
|
|
@ -13,11 +13,11 @@ declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind rea
|
|||
|
||||
define <4 x float> @commute_fold_blendps(<4 x float> %a, <4 x float>* %b) #0 {
|
||||
%1 = load <4 x float>, <4 x float>* %b
|
||||
%2 = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %1, <4 x float> %a, i8 3)
|
||||
%2 = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %1, <4 x float> %a, i8 5)
|
||||
ret <4 x float> %2
|
||||
|
||||
;LABEL: commute_fold_blendps
|
||||
;CHECK: blendps {{.*#+}} xmm0 = xmm0[0,1],mem[2,3]
|
||||
;CHECK: blendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3]
|
||||
;CHECK-NEXT: retq
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone
|
||||
|
|
Loading…
Reference in New Issue