Extend VPBLENDVB and VPSIGN lowering to work for AVX2.

llvm-svn: 144987
This commit is contained in:
Craig Topper 2011-11-19 07:07:26 +00:00
parent 75ffc5fbb5
commit de6b73bb4d
4 changed files with 152 additions and 107 deletions

View File

@ -13859,15 +13859,16 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
return R; return R;
EVT VT = N->getValueType(0); EVT VT = N->getValueType(0);
if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64 && VT != MVT::v2i64)
return SDValue();
SDValue N0 = N->getOperand(0); SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1); SDValue N1 = N->getOperand(1);
// look for psign/blend // look for psign/blend
if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) { if (VT == MVT::v2i64 || VT == MVT::v4i64) {
if (VT == MVT::v2i64) { if (!(Subtarget->hasSSSE3() || Subtarget->hasAVX()) ||
(VT == MVT::v4i64 && !Subtarget->hasAVX2()))
return SDValue();
// Canonicalize pandn to RHS // Canonicalize pandn to RHS
if (N0.getOpcode() == X86ISD::ANDNP) if (N0.getOpcode() == X86ISD::ANDNP)
std::swap(N0, N1); std::swap(N0, N1);
@ -13905,6 +13906,8 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
switch (cast<ConstantSDNode>(Mask.getOperand(0))->getZExtValue()) { switch (cast<ConstantSDNode>(Mask.getOperand(0))->getZExtValue()) {
case Intrinsic::x86_sse2_psrai_w: case Intrinsic::x86_sse2_psrai_w:
case Intrinsic::x86_sse2_psrai_d: case Intrinsic::x86_sse2_psrai_d:
case Intrinsic::x86_avx2_psrai_w:
case Intrinsic::x86_avx2_psrai_d:
break; break;
default: return SDValue(); default: return SDValue();
} }
@ -13935,22 +13938,26 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
} }
if (Opc) { if (Opc) {
SDValue Sign = DAG.getNode(Opc, DL, MaskVT, X, Mask.getOperand(1)); SDValue Sign = DAG.getNode(Opc, DL, MaskVT, X, Mask.getOperand(1));
return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Sign); return DAG.getNode(ISD::BITCAST, DL, VT, Sign);
} }
} }
// PBLENDVB only available on SSE 4.1 // PBLENDVB only available on SSE 4.1
if (!(Subtarget->hasSSE41() || Subtarget->hasAVX())) if (!(Subtarget->hasSSE41() || Subtarget->hasAVX()))
return SDValue(); return SDValue();
X = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, X); EVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8;
Y = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Y);
Mask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Mask); X = DAG.getNode(ISD::BITCAST, DL, BlendVT, X);
Mask = DAG.getNode(ISD::VSELECT, DL, MVT::v16i8, Mask, X, Y); Y = DAG.getNode(ISD::BITCAST, DL, BlendVT, Y);
return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Mask); Mask = DAG.getNode(ISD::BITCAST, DL, BlendVT, Mask);
} Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, X, Y);
return DAG.getNode(ISD::BITCAST, DL, VT, Mask);
} }
} }
if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
return SDValue();
// fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
std::swap(N0, N1); std::swap(N0, N1);

View File

@ -52,13 +52,13 @@ def X86andnp : SDNode<"X86ISD::ANDNP",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>; SDTCisSameAs<0,2>]>>;
def X86psignb : SDNode<"X86ISD::PSIGNB", def X86psignb : SDNode<"X86ISD::PSIGNB",
SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>; SDTCisSameAs<0,2>]>>;
def X86psignw : SDNode<"X86ISD::PSIGNW", def X86psignw : SDNode<"X86ISD::PSIGNW",
SDTypeProfile<1, 2, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>, SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>; SDTCisSameAs<0,2>]>>;
def X86psignd : SDNode<"X86ISD::PSIGND", def X86psignd : SDNode<"X86ISD::PSIGND",
SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>, SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>; SDTCisSameAs<0,2>]>>;
def X86pextrb : SDNode<"X86ISD::PEXTRB", def X86pextrb : SDNode<"X86ISD::PEXTRB",
SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;

View File

@ -3824,51 +3824,51 @@ let ExeDomain = SSEPackedInt in {
let Predicates = [HasAVX] in { let Predicates = [HasAVX] in {
def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
(v2i64 (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2), def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
(v2i64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2), def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2),
(v2i64 (VPSLLDQri VR128:$src1, imm:$src2))>; (VPSLLDQri VR128:$src1, imm:$src2)>;
def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2), def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2),
(v2i64 (VPSRLDQri VR128:$src1, imm:$src2))>; (VPSRLDQri VR128:$src1, imm:$src2)>;
def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
(v2f64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
// Shift up / down and insert zero's. // Shift up / down and insert zero's.
def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))), def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))),
(v2i64 (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>; (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))), def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))),
(v2i64 (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>; (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
} }
let Predicates = [HasAVX2] in { let Predicates = [HasAVX2] in {
def : Pat<(int_x86_avx2_psll_dq VR256:$src1, imm:$src2), def : Pat<(int_x86_avx2_psll_dq VR256:$src1, imm:$src2),
(v4i64 (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2)))>; (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2), def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2),
(v4i64 (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2)))>; (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(int_x86_avx2_psll_dq_bs VR256:$src1, imm:$src2), def : Pat<(int_x86_avx2_psll_dq_bs VR256:$src1, imm:$src2),
(v4i64 (VPSLLDQYri VR256:$src1, imm:$src2))>; (VPSLLDQYri VR256:$src1, imm:$src2)>;
def : Pat<(int_x86_avx2_psrl_dq_bs VR256:$src1, imm:$src2), def : Pat<(int_x86_avx2_psrl_dq_bs VR256:$src1, imm:$src2),
(v4i64 (VPSRLDQYri VR256:$src1, imm:$src2))>; (VPSRLDQYri VR256:$src1, imm:$src2)>;
} }
let Predicates = [HasSSE2] in { let Predicates = [HasSSE2] in {
def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
(v2i64 (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2), def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
(v2i64 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2), def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2),
(v2i64 (PSLLDQri VR128:$src1, imm:$src2))>; (PSLLDQri VR128:$src1, imm:$src2)>;
def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2), def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2),
(v2i64 (PSRLDQri VR128:$src1, imm:$src2))>; (PSRLDQri VR128:$src1, imm:$src2)>;
def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
(v2f64 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
// Shift up / down and insert zero's. // Shift up / down and insert zero's.
def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))), def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))),
(v2i64 (PSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>; (PSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))), def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))),
(v2i64 (PSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>; (PSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
} }
//===---------------------------------------------------------------------===// //===---------------------------------------------------------------------===//
@ -5316,11 +5316,11 @@ let isCommutable = 0 in {
int_x86_avx2_pmadd_ub_sw>, VEX_4V; int_x86_avx2_pmadd_ub_sw>, VEX_4V;
defm VPSHUFB : SS3I_binop_rm_int_y<0x00, "vpshufb", memopv32i8, defm VPSHUFB : SS3I_binop_rm_int_y<0x00, "vpshufb", memopv32i8,
int_x86_avx2_pshuf_b>, VEX_4V; int_x86_avx2_pshuf_b>, VEX_4V;
defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", memopv16i8, defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", memopv32i8,
int_x86_avx2_psign_b>, VEX_4V; int_x86_avx2_psign_b>, VEX_4V;
defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", memopv8i16, defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", memopv16i16,
int_x86_avx2_psign_w>, VEX_4V; int_x86_avx2_psign_w>, VEX_4V;
defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", memopv4i32, defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", memopv8i32,
int_x86_avx2_psign_d>, VEX_4V; int_x86_avx2_psign_d>, VEX_4V;
} }
defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", memopv16i16, defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", memopv16i16,
@ -5363,11 +5363,11 @@ let Predicates = [HasSSSE3] in {
def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
(PSHUFBrm128 VR128:$src, addr:$mask)>; (PSHUFBrm128 VR128:$src, addr:$mask)>;
def : Pat<(X86psignb VR128:$src1, VR128:$src2), def : Pat<(v16i8 (X86psignb VR128:$src1, VR128:$src2)),
(PSIGNBrr128 VR128:$src1, VR128:$src2)>; (PSIGNBrr128 VR128:$src1, VR128:$src2)>;
def : Pat<(X86psignw VR128:$src1, VR128:$src2), def : Pat<(v8i16 (X86psignw VR128:$src1, VR128:$src2)),
(PSIGNWrr128 VR128:$src1, VR128:$src2)>; (PSIGNWrr128 VR128:$src1, VR128:$src2)>;
def : Pat<(X86psignd VR128:$src1, VR128:$src2), def : Pat<(v4i32 (X86psignd VR128:$src1, VR128:$src2)),
(PSIGNDrr128 VR128:$src1, VR128:$src2)>; (PSIGNDrr128 VR128:$src1, VR128:$src2)>;
} }
@ -5377,14 +5377,23 @@ let Predicates = [HasAVX] in {
def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
(VPSHUFBrm128 VR128:$src, addr:$mask)>; (VPSHUFBrm128 VR128:$src, addr:$mask)>;
def : Pat<(X86psignb VR128:$src1, VR128:$src2), def : Pat<(v16i8 (X86psignb VR128:$src1, VR128:$src2)),
(VPSIGNBrr128 VR128:$src1, VR128:$src2)>; (VPSIGNBrr128 VR128:$src1, VR128:$src2)>;
def : Pat<(X86psignw VR128:$src1, VR128:$src2), def : Pat<(v8i16 (X86psignw VR128:$src1, VR128:$src2)),
(VPSIGNWrr128 VR128:$src1, VR128:$src2)>; (VPSIGNWrr128 VR128:$src1, VR128:$src2)>;
def : Pat<(X86psignd VR128:$src1, VR128:$src2), def : Pat<(v4i32 (X86psignd VR128:$src1, VR128:$src2)),
(VPSIGNDrr128 VR128:$src1, VR128:$src2)>; (VPSIGNDrr128 VR128:$src1, VR128:$src2)>;
} }
let Predicates = [HasAVX2] in {
def : Pat<(v32i8 (X86psignb VR256:$src1, VR256:$src2)),
(VPSIGNBrr256 VR256:$src1, VR256:$src2)>;
def : Pat<(v16i16 (X86psignw VR256:$src1, VR256:$src2)),
(VPSIGNWrr256 VR256:$src1, VR256:$src2)>;
def : Pat<(v8i32 (X86psignd VR256:$src1, VR256:$src2)),
(VPSIGNDrr256 VR256:$src1, VR256:$src2)>;
}
//===---------------------------------------------------------------------===// //===---------------------------------------------------------------------===//
// SSSE3 - Packed Align Instruction Patterns // SSSE3 - Packed Align Instruction Patterns
//===---------------------------------------------------------------------===// //===---------------------------------------------------------------------===//

View File

@ -53,3 +53,32 @@ define <32 x i8> @vpblendvb(<32 x i8> %x, <32 x i8> %y) {
%min = select <32 x i1> %min_is_x, <32 x i8> %x, <32 x i8> %y %min = select <32 x i1> %min_is_x, <32 x i8> %x, <32 x i8> %y
ret <32 x i8> %min ret <32 x i8> %min
} }
define <8 x i32> @signd(<8 x i32> %a, <8 x i32> %b) nounwind {
entry:
; CHECK: signd:
; CHECK: psignd
; CHECK-NOT: sub
; CHECK: ret
%b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
%sub = sub nsw <8 x i32> zeroinitializer, %a
%0 = xor <8 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
%1 = and <8 x i32> %a, %0
%2 = and <8 x i32> %b.lobit, %sub
%cond = or <8 x i32> %1, %2
ret <8 x i32> %cond
}
define <8 x i32> @blendvb(<8 x i32> %b, <8 x i32> %a, <8 x i32> %c) nounwind {
entry:
; CHECK: blendvb:
; CHECK: pblendvb
; CHECK: ret
%b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
%sub = sub nsw <8 x i32> zeroinitializer, %a
%0 = xor <8 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
%1 = and <8 x i32> %c, %0
%2 = and <8 x i32> %a, %b.lobit
%cond = or <8 x i32> %1, %2
ret <8 x i32> %cond
}