forked from OSchip/llvm-project
Custom lower vector shift intrinsics to target specific nodes and remove the patterns that are no longer needed.
llvm-svn: 148684
This commit is contained in:
parent
4bc649943f
commit
5e80db4e4f
|
@ -64,17 +64,6 @@ static cl::opt<bool> UseRegMask("x86-use-regmask",
|
|||
static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
|
||||
SDValue V2);
|
||||
|
||||
static SDValue Insert128BitVector(SDValue Result,
|
||||
SDValue Vec,
|
||||
SDValue Idx,
|
||||
SelectionDAG &DAG,
|
||||
DebugLoc dl);
|
||||
|
||||
static SDValue Extract128BitVector(SDValue Vec,
|
||||
SDValue Idx,
|
||||
SelectionDAG &DAG,
|
||||
DebugLoc dl);
|
||||
|
||||
/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
|
||||
/// sets things up to match to an AVX VEXTRACTF128 instruction or a
|
||||
/// simple subregister reference. Idx is an index in the 128 bits we
|
||||
|
@ -9157,6 +9146,43 @@ SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
|
|||
MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
|
||||
}
|
||||
|
||||
// getTargetVShiftNOde - Handle vector element shifts where the shift amount
|
||||
// may or may not be a constant. Takes immediate version of shift as input.
|
||||
static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT,
|
||||
SDValue SrcOp, SDValue ShAmt,
|
||||
SelectionDAG &DAG) {
|
||||
assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32");
|
||||
|
||||
if (isa<ConstantSDNode>(ShAmt)) {
|
||||
switch (Opc) {
|
||||
default: llvm_unreachable("Unknown target vector shift node");
|
||||
case X86ISD::VSHLI:
|
||||
case X86ISD::VSRLI:
|
||||
case X86ISD::VSRAI:
|
||||
return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
|
||||
}
|
||||
}
|
||||
|
||||
// Change opcode to non-immediate version
|
||||
switch (Opc) {
|
||||
default: llvm_unreachable("Unknown target vector shift node");
|
||||
case X86ISD::VSHLI: Opc = X86ISD::VSHL; break;
|
||||
case X86ISD::VSRLI: Opc = X86ISD::VSRL; break;
|
||||
case X86ISD::VSRAI: Opc = X86ISD::VSRA; break;
|
||||
}
|
||||
|
||||
// Need to build a vector containing shift amount
|
||||
// Shift amount is 32-bits, but SSE instructions read 64-bit, so fill with 0
|
||||
SDValue ShOps[4];
|
||||
ShOps[0] = ShAmt;
|
||||
ShOps[1] = DAG.getConstant(0, MVT::i32);
|
||||
ShOps[2] = DAG.getUNDEF(MVT::i32);
|
||||
ShOps[3] = DAG.getUNDEF(MVT::i32);
|
||||
ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4);
|
||||
ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt);
|
||||
return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
|
||||
}
|
||||
|
||||
SDValue
|
||||
X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const {
|
||||
DebugLoc dl = Op.getDebugLoc();
|
||||
|
@ -9359,24 +9385,53 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
|
|||
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
|
||||
}
|
||||
|
||||
// Fix vector shift instructions where the last operand is a non-immediate
|
||||
// i32 value.
|
||||
case Intrinsic::x86_avx2_pslli_w:
|
||||
case Intrinsic::x86_avx2_pslli_d:
|
||||
case Intrinsic::x86_avx2_pslli_q:
|
||||
case Intrinsic::x86_avx2_psrli_w:
|
||||
case Intrinsic::x86_avx2_psrli_d:
|
||||
case Intrinsic::x86_avx2_psrli_q:
|
||||
case Intrinsic::x86_avx2_psrai_w:
|
||||
case Intrinsic::x86_avx2_psrai_d:
|
||||
// SSE/AVX shift intrinsics
|
||||
case Intrinsic::x86_sse2_psll_w:
|
||||
case Intrinsic::x86_sse2_psll_d:
|
||||
case Intrinsic::x86_sse2_psll_q:
|
||||
case Intrinsic::x86_avx2_psll_w:
|
||||
case Intrinsic::x86_avx2_psll_d:
|
||||
case Intrinsic::x86_avx2_psll_q:
|
||||
return DAG.getNode(X86ISD::VSHL, dl, Op.getValueType(),
|
||||
Op.getOperand(1), Op.getOperand(2));
|
||||
case Intrinsic::x86_sse2_psrl_w:
|
||||
case Intrinsic::x86_sse2_psrl_d:
|
||||
case Intrinsic::x86_sse2_psrl_q:
|
||||
case Intrinsic::x86_avx2_psrl_w:
|
||||
case Intrinsic::x86_avx2_psrl_d:
|
||||
case Intrinsic::x86_avx2_psrl_q:
|
||||
return DAG.getNode(X86ISD::VSRL, dl, Op.getValueType(),
|
||||
Op.getOperand(1), Op.getOperand(2));
|
||||
case Intrinsic::x86_sse2_psra_w:
|
||||
case Intrinsic::x86_sse2_psra_d:
|
||||
case Intrinsic::x86_avx2_psra_w:
|
||||
case Intrinsic::x86_avx2_psra_d:
|
||||
return DAG.getNode(X86ISD::VSRA, dl, Op.getValueType(),
|
||||
Op.getOperand(1), Op.getOperand(2));
|
||||
case Intrinsic::x86_sse2_pslli_w:
|
||||
case Intrinsic::x86_sse2_pslli_d:
|
||||
case Intrinsic::x86_sse2_pslli_q:
|
||||
case Intrinsic::x86_avx2_pslli_w:
|
||||
case Intrinsic::x86_avx2_pslli_d:
|
||||
case Intrinsic::x86_avx2_pslli_q:
|
||||
return getTargetVShiftNode(X86ISD::VSHLI, dl, Op.getValueType(),
|
||||
Op.getOperand(1), Op.getOperand(2), DAG);
|
||||
case Intrinsic::x86_sse2_psrli_w:
|
||||
case Intrinsic::x86_sse2_psrli_d:
|
||||
case Intrinsic::x86_sse2_psrli_q:
|
||||
case Intrinsic::x86_avx2_psrli_w:
|
||||
case Intrinsic::x86_avx2_psrli_d:
|
||||
case Intrinsic::x86_avx2_psrli_q:
|
||||
return getTargetVShiftNode(X86ISD::VSRLI, dl, Op.getValueType(),
|
||||
Op.getOperand(1), Op.getOperand(2), DAG);
|
||||
case Intrinsic::x86_sse2_psrai_w:
|
||||
case Intrinsic::x86_sse2_psrai_d:
|
||||
case Intrinsic::x86_avx2_psrai_w:
|
||||
case Intrinsic::x86_avx2_psrai_d:
|
||||
return getTargetVShiftNode(X86ISD::VSRAI, dl, Op.getValueType(),
|
||||
Op.getOperand(1), Op.getOperand(2), DAG);
|
||||
// Fix vector shift instructions where the last operand is a non-immediate
|
||||
// i32 value.
|
||||
case Intrinsic::x86_mmx_pslli_w:
|
||||
case Intrinsic::x86_mmx_pslli_d:
|
||||
case Intrinsic::x86_mmx_pslli_q:
|
||||
|
@ -9390,103 +9445,40 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
|
|||
return SDValue();
|
||||
|
||||
unsigned NewIntNo = 0;
|
||||
EVT ShAmtVT = MVT::v4i32;
|
||||
switch (IntNo) {
|
||||
case Intrinsic::x86_sse2_pslli_w:
|
||||
NewIntNo = Intrinsic::x86_sse2_psll_w;
|
||||
case Intrinsic::x86_mmx_pslli_w:
|
||||
NewIntNo = Intrinsic::x86_mmx_psll_w;
|
||||
break;
|
||||
case Intrinsic::x86_sse2_pslli_d:
|
||||
NewIntNo = Intrinsic::x86_sse2_psll_d;
|
||||
case Intrinsic::x86_mmx_pslli_d:
|
||||
NewIntNo = Intrinsic::x86_mmx_psll_d;
|
||||
break;
|
||||
case Intrinsic::x86_sse2_pslli_q:
|
||||
NewIntNo = Intrinsic::x86_sse2_psll_q;
|
||||
case Intrinsic::x86_mmx_pslli_q:
|
||||
NewIntNo = Intrinsic::x86_mmx_psll_q;
|
||||
break;
|
||||
case Intrinsic::x86_sse2_psrli_w:
|
||||
NewIntNo = Intrinsic::x86_sse2_psrl_w;
|
||||
case Intrinsic::x86_mmx_psrli_w:
|
||||
NewIntNo = Intrinsic::x86_mmx_psrl_w;
|
||||
break;
|
||||
case Intrinsic::x86_sse2_psrli_d:
|
||||
NewIntNo = Intrinsic::x86_sse2_psrl_d;
|
||||
case Intrinsic::x86_mmx_psrli_d:
|
||||
NewIntNo = Intrinsic::x86_mmx_psrl_d;
|
||||
break;
|
||||
case Intrinsic::x86_sse2_psrli_q:
|
||||
NewIntNo = Intrinsic::x86_sse2_psrl_q;
|
||||
case Intrinsic::x86_mmx_psrli_q:
|
||||
NewIntNo = Intrinsic::x86_mmx_psrl_q;
|
||||
break;
|
||||
case Intrinsic::x86_sse2_psrai_w:
|
||||
NewIntNo = Intrinsic::x86_sse2_psra_w;
|
||||
case Intrinsic::x86_mmx_psrai_w:
|
||||
NewIntNo = Intrinsic::x86_mmx_psra_w;
|
||||
break;
|
||||
case Intrinsic::x86_sse2_psrai_d:
|
||||
NewIntNo = Intrinsic::x86_sse2_psra_d;
|
||||
case Intrinsic::x86_mmx_psrai_d:
|
||||
NewIntNo = Intrinsic::x86_mmx_psra_d;
|
||||
break;
|
||||
case Intrinsic::x86_avx2_pslli_w:
|
||||
NewIntNo = Intrinsic::x86_avx2_psll_w;
|
||||
break;
|
||||
case Intrinsic::x86_avx2_pslli_d:
|
||||
NewIntNo = Intrinsic::x86_avx2_psll_d;
|
||||
break;
|
||||
case Intrinsic::x86_avx2_pslli_q:
|
||||
NewIntNo = Intrinsic::x86_avx2_psll_q;
|
||||
break;
|
||||
case Intrinsic::x86_avx2_psrli_w:
|
||||
NewIntNo = Intrinsic::x86_avx2_psrl_w;
|
||||
break;
|
||||
case Intrinsic::x86_avx2_psrli_d:
|
||||
NewIntNo = Intrinsic::x86_avx2_psrl_d;
|
||||
break;
|
||||
case Intrinsic::x86_avx2_psrli_q:
|
||||
NewIntNo = Intrinsic::x86_avx2_psrl_q;
|
||||
break;
|
||||
case Intrinsic::x86_avx2_psrai_w:
|
||||
NewIntNo = Intrinsic::x86_avx2_psra_w;
|
||||
break;
|
||||
case Intrinsic::x86_avx2_psrai_d:
|
||||
NewIntNo = Intrinsic::x86_avx2_psra_d;
|
||||
break;
|
||||
default: {
|
||||
ShAmtVT = MVT::v2i32;
|
||||
switch (IntNo) {
|
||||
case Intrinsic::x86_mmx_pslli_w:
|
||||
NewIntNo = Intrinsic::x86_mmx_psll_w;
|
||||
break;
|
||||
case Intrinsic::x86_mmx_pslli_d:
|
||||
NewIntNo = Intrinsic::x86_mmx_psll_d;
|
||||
break;
|
||||
case Intrinsic::x86_mmx_pslli_q:
|
||||
NewIntNo = Intrinsic::x86_mmx_psll_q;
|
||||
break;
|
||||
case Intrinsic::x86_mmx_psrli_w:
|
||||
NewIntNo = Intrinsic::x86_mmx_psrl_w;
|
||||
break;
|
||||
case Intrinsic::x86_mmx_psrli_d:
|
||||
NewIntNo = Intrinsic::x86_mmx_psrl_d;
|
||||
break;
|
||||
case Intrinsic::x86_mmx_psrli_q:
|
||||
NewIntNo = Intrinsic::x86_mmx_psrl_q;
|
||||
break;
|
||||
case Intrinsic::x86_mmx_psrai_w:
|
||||
NewIntNo = Intrinsic::x86_mmx_psra_w;
|
||||
break;
|
||||
case Intrinsic::x86_mmx_psrai_d:
|
||||
NewIntNo = Intrinsic::x86_mmx_psra_d;
|
||||
break;
|
||||
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
|
||||
}
|
||||
|
||||
// The vector shift intrinsics with scalars uses 32b shift amounts but
|
||||
// the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
|
||||
// to be zero.
|
||||
SDValue ShOps[4];
|
||||
ShOps[0] = ShAmt;
|
||||
ShOps[1] = DAG.getConstant(0, MVT::i32);
|
||||
if (ShAmtVT == MVT::v4i32) {
|
||||
ShOps[2] = DAG.getUNDEF(MVT::i32);
|
||||
ShOps[3] = DAG.getUNDEF(MVT::i32);
|
||||
ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 4);
|
||||
} else {
|
||||
ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
|
||||
ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, ShAmt,
|
||||
DAG.getConstant(0, MVT::i32));
|
||||
// FIXME this must be lowered to get rid of the invalid type.
|
||||
}
|
||||
|
||||
EVT VT = Op.getValueType();
|
||||
ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt);
|
||||
|
@ -10006,43 +9998,6 @@ SDValue X86TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
|
|||
return Res;
|
||||
}
|
||||
|
||||
// getTargetVShiftNOde - Handle vector element shifts where the shift amount
|
||||
// may or may not be a constant. Takes immediate version of shift as input.
|
||||
static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT,
|
||||
SDValue SrcOp, SDValue ShAmt,
|
||||
SelectionDAG &DAG) {
|
||||
assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32");
|
||||
|
||||
if (isa<ConstantSDNode>(ShAmt)) {
|
||||
switch (Opc) {
|
||||
default: llvm_unreachable("Unknown target vector shift node");
|
||||
case X86ISD::VSHLI:
|
||||
case X86ISD::VSRLI:
|
||||
case X86ISD::VSRAI:
|
||||
return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
|
||||
}
|
||||
}
|
||||
|
||||
// Change opcode to non-immediate version
|
||||
switch (Opc) {
|
||||
default: llvm_unreachable("Unknown target vector shift node");
|
||||
case X86ISD::VSHLI: Opc = X86ISD::VSHL; break;
|
||||
case X86ISD::VSRLI: Opc = X86ISD::VSRL; break;
|
||||
case X86ISD::VSRAI: Opc = X86ISD::VSRA; break;
|
||||
}
|
||||
|
||||
// Need to build a vector containing shift amount
|
||||
// Shift amount is 32-bits, but SSE instructions read 64-bit, so fill with 0
|
||||
SDValue ShOps[4];
|
||||
ShOps[0] = ShAmt;
|
||||
ShOps[1] = DAG.getConstant(0, MVT::i32);
|
||||
ShOps[2] = DAG.getUNDEF(MVT::i32);
|
||||
ShOps[3] = DAG.getUNDEF(MVT::i32);
|
||||
ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4);
|
||||
ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt);
|
||||
return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
EVT VT = Op.getValueType();
|
||||
|
|
|
@ -3511,8 +3511,9 @@ multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
|
|||
}
|
||||
|
||||
multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
|
||||
string OpcodeStr, Intrinsic IntId,
|
||||
Intrinsic IntId2, RegisterClass RC,
|
||||
string OpcodeStr, SDNode OpNode,
|
||||
SDNode OpNode2, RegisterClass RC,
|
||||
ValueType DstVT, ValueType SrcVT, PatFrag bc_frag,
|
||||
bit Is2Addr = 1> {
|
||||
// src2 is always 128-bit
|
||||
def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
|
||||
|
@ -3520,19 +3521,20 @@ multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
|
|||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst, (IntId RC:$src1, VR128:$src2))]>;
|
||||
[(set RC:$dst, (OpNode (DstVT RC:$src1), (SrcVT VR128:$src2)))]>;
|
||||
def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, i128mem:$src2),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst, (IntId RC:$src1, (bitconvert (memopv2i64 addr:$src2))))]>;
|
||||
[(set RC:$dst, (OpNode (DstVT RC:$src1),
|
||||
(bc_frag (memopv2i64 addr:$src2))))]>;
|
||||
def ri : PDIi8<opc2, ImmForm, (outs RC:$dst),
|
||||
(ins RC:$src1, i32i8imm:$src2),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst, (IntId2 RC:$src1, (i32 imm:$src2)))]>;
|
||||
[(set RC:$dst, (OpNode2 (DstVT RC:$src1), (i32 imm:$src2)))]>;
|
||||
}
|
||||
|
||||
} // ExeDomain = SSEPackedInt
|
||||
|
@ -3728,32 +3730,24 @@ defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw,
|
|||
//===---------------------------------------------------------------------===//
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw",
|
||||
int_x86_sse2_psll_w, int_x86_sse2_pslli_w,
|
||||
VR128, 0>, VEX_4V;
|
||||
defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld",
|
||||
int_x86_sse2_psll_d, int_x86_sse2_pslli_d,
|
||||
VR128, 0>, VEX_4V;
|
||||
defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq",
|
||||
int_x86_sse2_psll_q, int_x86_sse2_pslli_q,
|
||||
VR128, 0>, VEX_4V;
|
||||
defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
|
||||
VR128, v8i16, v8i16, bc_v8i16, 0>, VEX_4V;
|
||||
defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
|
||||
VR128, v4i32, v4i32, bc_v4i32, 0>, VEX_4V;
|
||||
defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
|
||||
VR128, v2i64, v2i64, bc_v2i64, 0>, VEX_4V;
|
||||
|
||||
defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw",
|
||||
int_x86_sse2_psrl_w, int_x86_sse2_psrli_w,
|
||||
VR128, 0>, VEX_4V;
|
||||
defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld",
|
||||
int_x86_sse2_psrl_d, int_x86_sse2_psrli_d,
|
||||
VR128, 0>, VEX_4V;
|
||||
defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq",
|
||||
int_x86_sse2_psrl_q, int_x86_sse2_psrli_q,
|
||||
VR128, 0>, VEX_4V;
|
||||
defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
|
||||
VR128, v8i16, v8i16, bc_v8i16, 0>, VEX_4V;
|
||||
defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
|
||||
VR128, v4i32, v4i32, bc_v4i32, 0>, VEX_4V;
|
||||
defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
|
||||
VR128, v2i64, v2i64, bc_v2i64, 0>, VEX_4V;
|
||||
|
||||
defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw",
|
||||
int_x86_sse2_psra_w, int_x86_sse2_psrai_w,
|
||||
VR128, 0>, VEX_4V;
|
||||
defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad",
|
||||
int_x86_sse2_psra_d, int_x86_sse2_psrai_d,
|
||||
VR128, 0>, VEX_4V;
|
||||
defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
|
||||
VR128, v8i16, v8i16, bc_v8i16, 0>, VEX_4V;
|
||||
defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
|
||||
VR128, v4i32, v4i32, bc_v4i32, 0>, VEX_4V;
|
||||
|
||||
let ExeDomain = SSEPackedInt in {
|
||||
// 128-bit logical shifts.
|
||||
|
@ -3774,32 +3768,24 @@ let ExeDomain = SSEPackedInt in {
|
|||
} // Predicates = [HasAVX]
|
||||
|
||||
let Predicates = [HasAVX2] in {
|
||||
defm VPSLLWY : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw",
|
||||
int_x86_avx2_psll_w, int_x86_avx2_pslli_w,
|
||||
VR256, 0>, VEX_4V;
|
||||
defm VPSLLDY : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld",
|
||||
int_x86_avx2_psll_d, int_x86_avx2_pslli_d,
|
||||
VR256, 0>, VEX_4V;
|
||||
defm VPSLLQY : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq",
|
||||
int_x86_avx2_psll_q, int_x86_avx2_pslli_q,
|
||||
VR256, 0>, VEX_4V;
|
||||
defm VPSLLWY : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
|
||||
VR256, v16i16, v8i16, bc_v8i16, 0>, VEX_4V;
|
||||
defm VPSLLDY : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
|
||||
VR256, v8i32, v4i32, bc_v4i32, 0>, VEX_4V;
|
||||
defm VPSLLQY : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
|
||||
VR256, v4i64, v2i64, bc_v2i64, 0>, VEX_4V;
|
||||
|
||||
defm VPSRLWY : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw",
|
||||
int_x86_avx2_psrl_w, int_x86_avx2_psrli_w,
|
||||
VR256, 0>, VEX_4V;
|
||||
defm VPSRLDY : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld",
|
||||
int_x86_avx2_psrl_d, int_x86_avx2_psrli_d,
|
||||
VR256, 0>, VEX_4V;
|
||||
defm VPSRLQY : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq",
|
||||
int_x86_avx2_psrl_q, int_x86_avx2_psrli_q,
|
||||
VR256, 0>, VEX_4V;
|
||||
defm VPSRLWY : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
|
||||
VR256, v16i16, v8i16, bc_v8i16, 0>, VEX_4V;
|
||||
defm VPSRLDY : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
|
||||
VR256, v8i32, v4i32, bc_v4i32, 0>, VEX_4V;
|
||||
defm VPSRLQY : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
|
||||
VR256, v4i64, v2i64, bc_v2i64, 0>, VEX_4V;
|
||||
|
||||
defm VPSRAWY : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw",
|
||||
int_x86_avx2_psra_w, int_x86_avx2_psrai_w,
|
||||
VR256, 0>, VEX_4V;
|
||||
defm VPSRADY : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad",
|
||||
int_x86_avx2_psra_d, int_x86_avx2_psrai_d,
|
||||
VR256, 0>, VEX_4V;
|
||||
defm VPSRAWY : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
|
||||
VR256, v16i16, v8i16, bc_v8i16, 0>, VEX_4V;
|
||||
defm VPSRADY : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
|
||||
VR256, v8i32, v4i32, bc_v4i32, 0>, VEX_4V;
|
||||
|
||||
let ExeDomain = SSEPackedInt in {
|
||||
// 256-bit logical shifts.
|
||||
|
@ -3820,32 +3806,24 @@ let ExeDomain = SSEPackedInt in {
|
|||
} // Predicates = [HasAVX2]
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
|
||||
int_x86_sse2_psll_w, int_x86_sse2_pslli_w,
|
||||
VR128>;
|
||||
defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
|
||||
int_x86_sse2_psll_d, int_x86_sse2_pslli_d,
|
||||
VR128>;
|
||||
defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
|
||||
int_x86_sse2_psll_q, int_x86_sse2_pslli_q,
|
||||
VR128>;
|
||||
defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli,
|
||||
VR128, v8i16, v8i16, bc_v8i16>;
|
||||
defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli,
|
||||
VR128, v4i32, v4i32, bc_v4i32>;
|
||||
defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli,
|
||||
VR128, v2i64, v2i64, bc_v2i64>;
|
||||
|
||||
defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
|
||||
int_x86_sse2_psrl_w, int_x86_sse2_psrli_w,
|
||||
VR128>;
|
||||
defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
|
||||
int_x86_sse2_psrl_d, int_x86_sse2_psrli_d,
|
||||
VR128>;
|
||||
defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
|
||||
int_x86_sse2_psrl_q, int_x86_sse2_psrli_q,
|
||||
VR128>;
|
||||
defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli,
|
||||
VR128, v8i16, v8i16, bc_v8i16>;
|
||||
defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli,
|
||||
VR128, v4i32, v4i32, bc_v4i32>;
|
||||
defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli,
|
||||
VR128, v2i64, v2i64, bc_v2i64>;
|
||||
|
||||
defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
|
||||
int_x86_sse2_psra_w, int_x86_sse2_psrai_w,
|
||||
VR128>;
|
||||
defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
|
||||
int_x86_sse2_psra_d, int_x86_sse2_psrai_d,
|
||||
VR128>;
|
||||
defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai,
|
||||
VR128, v8i16, v8i16, bc_v8i16>;
|
||||
defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai,
|
||||
VR128, v4i32, v4i32, bc_v4i32>;
|
||||
|
||||
let ExeDomain = SSEPackedInt in {
|
||||
// 128-bit logical shifts.
|
||||
|
@ -3876,60 +3854,6 @@ let Predicates = [HasAVX] in {
|
|||
(VPSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
|
||||
def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))),
|
||||
(VPSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
|
||||
|
||||
def : Pat<(v8i16 (X86vshli VR128:$src1, (i32 imm:$src2))),
|
||||
(VPSLLWri VR128:$src1, imm:$src2)>;
|
||||
def : Pat<(v4i32 (X86vshli VR128:$src1, (i32 imm:$src2))),
|
||||
(VPSLLDri VR128:$src1, imm:$src2)>;
|
||||
def : Pat<(v2i64 (X86vshli VR128:$src1, (i32 imm:$src2))),
|
||||
(VPSLLQri VR128:$src1, imm:$src2)>;
|
||||
|
||||
def : Pat<(v8i16 (X86vsrli VR128:$src1, (i32 imm:$src2))),
|
||||
(VPSRLWri VR128:$src1, imm:$src2)>;
|
||||
def : Pat<(v4i32 (X86vsrli VR128:$src1, (i32 imm:$src2))),
|
||||
(VPSRLDri VR128:$src1, imm:$src2)>;
|
||||
def : Pat<(v2i64 (X86vsrli VR128:$src1, (i32 imm:$src2))),
|
||||
(VPSRLQri VR128:$src1, imm:$src2)>;
|
||||
|
||||
def : Pat<(v8i16 (X86vsrai VR128:$src1, (i32 imm:$src2))),
|
||||
(VPSRAWri VR128:$src1, imm:$src2)>;
|
||||
def : Pat<(v4i32 (X86vsrai VR128:$src1, (i32 imm:$src2))),
|
||||
(VPSRADri VR128:$src1, imm:$src2)>;
|
||||
|
||||
def : Pat<(v8i16 (X86vshl VR128:$src1, (v8i16 VR128:$src2))),
|
||||
(VPSLLWrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v8i16 (X86vshl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
|
||||
(VPSLLWrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v4i32 (X86vshl VR128:$src1, (v4i32 VR128:$src2))),
|
||||
(VPSLLDrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v4i32 (X86vshl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
|
||||
(VPSLLDrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v2i64 (X86vshl VR128:$src1, (v2i64 VR128:$src2))),
|
||||
(VPSLLQrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v2i64 (X86vshl VR128:$src1, (memopv2i64 addr:$src2))),
|
||||
(VPSLLQrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
def : Pat<(v8i16 (X86vsrl VR128:$src1, (v8i16 VR128:$src2))),
|
||||
(VPSRLWrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v8i16 (X86vsrl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
|
||||
(VPSRLWrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v4i32 (X86vsrl VR128:$src1, (v4i32 VR128:$src2))),
|
||||
(VPSRLDrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v4i32 (X86vsrl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
|
||||
(VPSRLDrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v2i64 (X86vsrl VR128:$src1, (v2i64 VR128:$src2))),
|
||||
(VPSRLQrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v2i64 (X86vsrl VR128:$src1, (memopv2i64 addr:$src2))),
|
||||
(VPSRLQrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
def : Pat<(v8i16 (X86vsra VR128:$src1, (v8i16 VR128:$src2))),
|
||||
(VPSRAWrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v8i16 (X86vsra VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
|
||||
(VPSRAWrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v4i32 (X86vsra VR128:$src1, (v4i32 VR128:$src2))),
|
||||
(VPSRADrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v4i32 (X86vsra VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
|
||||
(VPSRADrm VR128:$src1, addr:$src2)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX2] in {
|
||||
|
@ -3937,60 +3861,6 @@ let Predicates = [HasAVX2] in {
|
|||
(VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
|
||||
def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2),
|
||||
(VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
|
||||
|
||||
def : Pat<(v16i16 (X86vshli VR256:$src1, (i32 imm:$src2))),
|
||||
(VPSLLWYri VR256:$src1, imm:$src2)>;
|
||||
def : Pat<(v8i32 (X86vshli VR256:$src1, (i32 imm:$src2))),
|
||||
(VPSLLDYri VR256:$src1, imm:$src2)>;
|
||||
def : Pat<(v4i64 (X86vshli VR256:$src1, (i32 imm:$src2))),
|
||||
(VPSLLQYri VR256:$src1, imm:$src2)>;
|
||||
|
||||
def : Pat<(v16i16 (X86vsrli VR256:$src1, (i32 imm:$src2))),
|
||||
(VPSRLWYri VR256:$src1, imm:$src2)>;
|
||||
def : Pat<(v8i32 (X86vsrli VR256:$src1, (i32 imm:$src2))),
|
||||
(VPSRLDYri VR256:$src1, imm:$src2)>;
|
||||
def : Pat<(v4i64 (X86vsrli VR256:$src1, (i32 imm:$src2))),
|
||||
(VPSRLQYri VR256:$src1, imm:$src2)>;
|
||||
|
||||
def : Pat<(v16i16 (X86vsrai VR256:$src1, (i32 imm:$src2))),
|
||||
(VPSRAWYri VR256:$src1, imm:$src2)>;
|
||||
def : Pat<(v8i32 (X86vsrai VR256:$src1, (i32 imm:$src2))),
|
||||
(VPSRADYri VR256:$src1, imm:$src2)>;
|
||||
|
||||
def : Pat<(v16i16 (X86vshl VR256:$src1, (v8i16 VR128:$src2))),
|
||||
(VPSLLWYrr VR256:$src1, VR128:$src2)>;
|
||||
def : Pat<(v16i16 (X86vshl VR256:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
|
||||
(VPSLLWYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v8i32 (X86vshl VR256:$src1, (v4i32 VR128:$src2))),
|
||||
(VPSLLDYrr VR256:$src1, VR128:$src2)>;
|
||||
def : Pat<(v8i32 (X86vshl VR256:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
|
||||
(VPSLLDYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v4i64 (X86vshl VR256:$src1, (v2i64 VR128:$src2))),
|
||||
(VPSLLQYrr VR256:$src1, VR128:$src2)>;
|
||||
def : Pat<(v4i64 (X86vshl VR256:$src1, (memopv2i64 addr:$src2))),
|
||||
(VPSLLQYrm VR256:$src1, addr:$src2)>;
|
||||
|
||||
def : Pat<(v16i16 (X86vsrl VR256:$src1, (v8i16 VR128:$src2))),
|
||||
(VPSRLWYrr VR256:$src1, VR128:$src2)>;
|
||||
def : Pat<(v16i16 (X86vsrl VR256:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
|
||||
(VPSRLWYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v8i32 (X86vsrl VR256:$src1, (v4i32 VR128:$src2))),
|
||||
(VPSRLDYrr VR256:$src1, VR128:$src2)>;
|
||||
def : Pat<(v8i32 (X86vsrl VR256:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
|
||||
(VPSRLDYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v4i64 (X86vsrl VR256:$src1, (v2i64 VR128:$src2))),
|
||||
(VPSRLQYrr VR256:$src1, VR128:$src2)>;
|
||||
def : Pat<(v4i64 (X86vsrl VR256:$src1, (memopv2i64 addr:$src2))),
|
||||
(VPSRLQYrm VR256:$src1, addr:$src2)>;
|
||||
|
||||
def : Pat<(v16i16 (X86vsra VR256:$src1, (v8i16 VR128:$src2))),
|
||||
(VPSRAWYrr VR256:$src1, VR128:$src2)>;
|
||||
def : Pat<(v16i16 (X86vsra VR256:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
|
||||
(VPSRAWYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v8i32 (X86vsra VR256:$src1, (v4i32 VR128:$src2))),
|
||||
(VPSRADYrr VR256:$src1, VR128:$src2)>;
|
||||
def : Pat<(v8i32 (X86vsra VR256:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
|
||||
(VPSRADYrm VR256:$src1, addr:$src2)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasSSE2] in {
|
||||
|
@ -4006,60 +3876,6 @@ let Predicates = [HasSSE2] in {
|
|||
(PSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
|
||||
def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))),
|
||||
(PSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
|
||||
|
||||
def : Pat<(v8i16 (X86vshli VR128:$src1, (i32 imm:$src2))),
|
||||
(PSLLWri VR128:$src1, imm:$src2)>;
|
||||
def : Pat<(v4i32 (X86vshli VR128:$src1, (i32 imm:$src2))),
|
||||
(PSLLDri VR128:$src1, imm:$src2)>;
|
||||
def : Pat<(v2i64 (X86vshli VR128:$src1, (i32 imm:$src2))),
|
||||
(PSLLQri VR128:$src1, imm:$src2)>;
|
||||
|
||||
def : Pat<(v8i16 (X86vsrli VR128:$src1, (i32 imm:$src2))),
|
||||
(PSRLWri VR128:$src1, imm:$src2)>;
|
||||
def : Pat<(v4i32 (X86vsrli VR128:$src1, (i32 imm:$src2))),
|
||||
(PSRLDri VR128:$src1, imm:$src2)>;
|
||||
def : Pat<(v2i64 (X86vsrli VR128:$src1, (i32 imm:$src2))),
|
||||
(PSRLQri VR128:$src1, imm:$src2)>;
|
||||
|
||||
def : Pat<(v8i16 (X86vsrai VR128:$src1, (i32 imm:$src2))),
|
||||
(PSRAWri VR128:$src1, imm:$src2)>;
|
||||
def : Pat<(v4i32 (X86vsrai VR128:$src1, (i32 imm:$src2))),
|
||||
(PSRADri VR128:$src1, imm:$src2)>;
|
||||
|
||||
def : Pat<(v8i16 (X86vshl VR128:$src1, (v8i16 VR128:$src2))),
|
||||
(PSLLWrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v8i16 (X86vshl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
|
||||
(PSLLWrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v4i32 (X86vshl VR128:$src1, (v4i32 VR128:$src2))),
|
||||
(PSLLDrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v4i32 (X86vshl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
|
||||
(PSLLDrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v2i64 (X86vshl VR128:$src1, (v2i64 VR128:$src2))),
|
||||
(PSLLQrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v2i64 (X86vshl VR128:$src1, (memopv2i64 addr:$src2))),
|
||||
(PSLLQrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
def : Pat<(v8i16 (X86vsrl VR128:$src1, (v8i16 VR128:$src2))),
|
||||
(PSRLWrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v8i16 (X86vsrl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
|
||||
(PSRLWrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v4i32 (X86vsrl VR128:$src1, (v4i32 VR128:$src2))),
|
||||
(PSRLDrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v4i32 (X86vsrl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
|
||||
(PSRLDrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v2i64 (X86vsrl VR128:$src1, (v2i64 VR128:$src2))),
|
||||
(PSRLQrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v2i64 (X86vsrl VR128:$src1, (memopv2i64 addr:$src2))),
|
||||
(PSRLQrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
def : Pat<(v8i16 (X86vsra VR128:$src1, (v8i16 VR128:$src2))),
|
||||
(PSRAWrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v8i16 (X86vsra VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
|
||||
(PSRAWrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v4i32 (X86vsra VR128:$src1, (v4i32 VR128:$src2))),
|
||||
(PSRADrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v4i32 (X86vsra VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
|
||||
(PSRADrm VR128:$src1, addr:$src2)>;
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
|
Loading…
Reference in New Issue