forked from OSchip/llvm-project
Extend VPBLENDVB and VPSIGN lowering to work for AVX2.
llvm-svn: 144987
This commit is contained in:
parent
75ffc5fbb5
commit
de6b73bb4d
|
@ -13859,15 +13859,16 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
|
|||
return R;
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64 && VT != MVT::v2i64)
|
||||
return SDValue();
|
||||
|
||||
SDValue N0 = N->getOperand(0);
|
||||
SDValue N1 = N->getOperand(1);
|
||||
|
||||
// look for psign/blend
|
||||
if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) {
|
||||
if (VT == MVT::v2i64) {
|
||||
if (VT == MVT::v2i64 || VT == MVT::v4i64) {
|
||||
if (!(Subtarget->hasSSSE3() || Subtarget->hasAVX()) ||
|
||||
(VT == MVT::v4i64 && !Subtarget->hasAVX2()))
|
||||
return SDValue();
|
||||
|
||||
// Canonicalize pandn to RHS
|
||||
if (N0.getOpcode() == X86ISD::ANDNP)
|
||||
std::swap(N0, N1);
|
||||
|
@ -13905,6 +13906,8 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
|
|||
switch (cast<ConstantSDNode>(Mask.getOperand(0))->getZExtValue()) {
|
||||
case Intrinsic::x86_sse2_psrai_w:
|
||||
case Intrinsic::x86_sse2_psrai_d:
|
||||
case Intrinsic::x86_avx2_psrai_w:
|
||||
case Intrinsic::x86_avx2_psrai_d:
|
||||
break;
|
||||
default: return SDValue();
|
||||
}
|
||||
|
@ -13935,22 +13938,26 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
|
|||
}
|
||||
if (Opc) {
|
||||
SDValue Sign = DAG.getNode(Opc, DL, MaskVT, X, Mask.getOperand(1));
|
||||
return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Sign);
|
||||
return DAG.getNode(ISD::BITCAST, DL, VT, Sign);
|
||||
}
|
||||
}
|
||||
// PBLENDVB only available on SSE 4.1
|
||||
if (!(Subtarget->hasSSE41() || Subtarget->hasAVX()))
|
||||
return SDValue();
|
||||
|
||||
X = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, X);
|
||||
Y = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Y);
|
||||
Mask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Mask);
|
||||
Mask = DAG.getNode(ISD::VSELECT, DL, MVT::v16i8, Mask, X, Y);
|
||||
return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Mask);
|
||||
}
|
||||
EVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8;
|
||||
|
||||
X = DAG.getNode(ISD::BITCAST, DL, BlendVT, X);
|
||||
Y = DAG.getNode(ISD::BITCAST, DL, BlendVT, Y);
|
||||
Mask = DAG.getNode(ISD::BITCAST, DL, BlendVT, Mask);
|
||||
Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, X, Y);
|
||||
return DAG.getNode(ISD::BITCAST, DL, VT, Mask);
|
||||
}
|
||||
}
|
||||
|
||||
if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
|
||||
return SDValue();
|
||||
|
||||
// fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
|
||||
if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
|
||||
std::swap(N0, N1);
|
||||
|
|
|
@ -52,13 +52,13 @@ def X86andnp : SDNode<"X86ISD::ANDNP",
|
|||
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
|
||||
SDTCisSameAs<0,2>]>>;
|
||||
def X86psignb : SDNode<"X86ISD::PSIGNB",
|
||||
SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
|
||||
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
|
||||
SDTCisSameAs<0,2>]>>;
|
||||
def X86psignw : SDNode<"X86ISD::PSIGNW",
|
||||
SDTypeProfile<1, 2, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
|
||||
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
|
||||
SDTCisSameAs<0,2>]>>;
|
||||
def X86psignd : SDNode<"X86ISD::PSIGND",
|
||||
SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,
|
||||
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
|
||||
SDTCisSameAs<0,2>]>>;
|
||||
def X86pextrb : SDNode<"X86ISD::PEXTRB",
|
||||
SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
|
||||
|
|
|
@ -3824,51 +3824,51 @@ let ExeDomain = SSEPackedInt in {
|
|||
|
||||
let Predicates = [HasAVX] in {
|
||||
def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
|
||||
(v2i64 (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
|
||||
(VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
|
||||
def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
|
||||
(v2i64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
|
||||
(VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
|
||||
def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2),
|
||||
(v2i64 (VPSLLDQri VR128:$src1, imm:$src2))>;
|
||||
(VPSLLDQri VR128:$src1, imm:$src2)>;
|
||||
def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2),
|
||||
(v2i64 (VPSRLDQri VR128:$src1, imm:$src2))>;
|
||||
(VPSRLDQri VR128:$src1, imm:$src2)>;
|
||||
def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
|
||||
(v2f64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
|
||||
(VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
|
||||
|
||||
// Shift up / down and insert zero's.
|
||||
def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))),
|
||||
(v2i64 (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
|
||||
(VPSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
|
||||
def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))),
|
||||
(v2i64 (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
|
||||
(VPSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX2] in {
|
||||
def : Pat<(int_x86_avx2_psll_dq VR256:$src1, imm:$src2),
|
||||
(v4i64 (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2)))>;
|
||||
(VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
|
||||
def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2),
|
||||
(v4i64 (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2)))>;
|
||||
(VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
|
||||
def : Pat<(int_x86_avx2_psll_dq_bs VR256:$src1, imm:$src2),
|
||||
(v4i64 (VPSLLDQYri VR256:$src1, imm:$src2))>;
|
||||
(VPSLLDQYri VR256:$src1, imm:$src2)>;
|
||||
def : Pat<(int_x86_avx2_psrl_dq_bs VR256:$src1, imm:$src2),
|
||||
(v4i64 (VPSRLDQYri VR256:$src1, imm:$src2))>;
|
||||
(VPSRLDQYri VR256:$src1, imm:$src2)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasSSE2] in {
|
||||
def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
|
||||
(v2i64 (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
|
||||
(PSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
|
||||
def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
|
||||
(v2i64 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
|
||||
(PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
|
||||
def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2),
|
||||
(v2i64 (PSLLDQri VR128:$src1, imm:$src2))>;
|
||||
(PSLLDQri VR128:$src1, imm:$src2)>;
|
||||
def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2),
|
||||
(v2i64 (PSRLDQri VR128:$src1, imm:$src2))>;
|
||||
(PSRLDQri VR128:$src1, imm:$src2)>;
|
||||
def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
|
||||
(v2f64 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
|
||||
(PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
|
||||
|
||||
// Shift up / down and insert zero's.
|
||||
def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))),
|
||||
(v2i64 (PSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
|
||||
(PSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
|
||||
def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))),
|
||||
(v2i64 (PSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
|
||||
(PSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
@ -5316,11 +5316,11 @@ let isCommutable = 0 in {
|
|||
int_x86_avx2_pmadd_ub_sw>, VEX_4V;
|
||||
defm VPSHUFB : SS3I_binop_rm_int_y<0x00, "vpshufb", memopv32i8,
|
||||
int_x86_avx2_pshuf_b>, VEX_4V;
|
||||
defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", memopv16i8,
|
||||
defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", memopv32i8,
|
||||
int_x86_avx2_psign_b>, VEX_4V;
|
||||
defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", memopv8i16,
|
||||
defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", memopv16i16,
|
||||
int_x86_avx2_psign_w>, VEX_4V;
|
||||
defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", memopv4i32,
|
||||
defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", memopv8i32,
|
||||
int_x86_avx2_psign_d>, VEX_4V;
|
||||
}
|
||||
defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", memopv16i16,
|
||||
|
@ -5363,11 +5363,11 @@ let Predicates = [HasSSSE3] in {
|
|||
def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
|
||||
(PSHUFBrm128 VR128:$src, addr:$mask)>;
|
||||
|
||||
def : Pat<(X86psignb VR128:$src1, VR128:$src2),
|
||||
def : Pat<(v16i8 (X86psignb VR128:$src1, VR128:$src2)),
|
||||
(PSIGNBrr128 VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(X86psignw VR128:$src1, VR128:$src2),
|
||||
def : Pat<(v8i16 (X86psignw VR128:$src1, VR128:$src2)),
|
||||
(PSIGNWrr128 VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(X86psignd VR128:$src1, VR128:$src2),
|
||||
def : Pat<(v4i32 (X86psignd VR128:$src1, VR128:$src2)),
|
||||
(PSIGNDrr128 VR128:$src1, VR128:$src2)>;
|
||||
}
|
||||
|
||||
|
@ -5377,14 +5377,23 @@ let Predicates = [HasAVX] in {
|
|||
def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
|
||||
(VPSHUFBrm128 VR128:$src, addr:$mask)>;
|
||||
|
||||
def : Pat<(X86psignb VR128:$src1, VR128:$src2),
|
||||
def : Pat<(v16i8 (X86psignb VR128:$src1, VR128:$src2)),
|
||||
(VPSIGNBrr128 VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(X86psignw VR128:$src1, VR128:$src2),
|
||||
def : Pat<(v8i16 (X86psignw VR128:$src1, VR128:$src2)),
|
||||
(VPSIGNWrr128 VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(X86psignd VR128:$src1, VR128:$src2),
|
||||
def : Pat<(v4i32 (X86psignd VR128:$src1, VR128:$src2)),
|
||||
(VPSIGNDrr128 VR128:$src1, VR128:$src2)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX2] in {
|
||||
def : Pat<(v32i8 (X86psignb VR256:$src1, VR256:$src2)),
|
||||
(VPSIGNBrr256 VR256:$src1, VR256:$src2)>;
|
||||
def : Pat<(v16i16 (X86psignw VR256:$src1, VR256:$src2)),
|
||||
(VPSIGNWrr256 VR256:$src1, VR256:$src2)>;
|
||||
def : Pat<(v8i32 (X86psignd VR256:$src1, VR256:$src2)),
|
||||
(VPSIGNDrr256 VR256:$src1, VR256:$src2)>;
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// SSSE3 - Packed Align Instruction Patterns
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
|
|
@ -53,3 +53,32 @@ define <32 x i8> @vpblendvb(<32 x i8> %x, <32 x i8> %y) {
|
|||
%min = select <32 x i1> %min_is_x, <32 x i8> %x, <32 x i8> %y
|
||||
ret <32 x i8> %min
|
||||
}
|
||||
|
||||
define <8 x i32> @signd(<8 x i32> %a, <8 x i32> %b) nounwind {
|
||||
entry:
|
||||
; CHECK: signd:
|
||||
; CHECK: psignd
|
||||
; CHECK-NOT: sub
|
||||
; CHECK: ret
|
||||
%b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
|
||||
%sub = sub nsw <8 x i32> zeroinitializer, %a
|
||||
%0 = xor <8 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%1 = and <8 x i32> %a, %0
|
||||
%2 = and <8 x i32> %b.lobit, %sub
|
||||
%cond = or <8 x i32> %1, %2
|
||||
ret <8 x i32> %cond
|
||||
}
|
||||
|
||||
define <8 x i32> @blendvb(<8 x i32> %b, <8 x i32> %a, <8 x i32> %c) nounwind {
|
||||
entry:
|
||||
; CHECK: blendvb:
|
||||
; CHECK: pblendvb
|
||||
; CHECK: ret
|
||||
%b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
|
||||
%sub = sub nsw <8 x i32> zeroinitializer, %a
|
||||
%0 = xor <8 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%1 = and <8 x i32> %c, %0
|
||||
%2 = and <8 x i32> %a, %b.lobit
|
||||
%cond = or <8 x i32> %1, %2
|
||||
ret <8 x i32> %cond
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue