forked from OSchip/llvm-project
X86: use vpsllvd (& friends) for 16-bit shifts on Haswell
llvm-svn: 201558
This commit is contained in:
parent
50bef914a6
commit
f06df5866f
|
@ -13161,6 +13161,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
|
|||
Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
|
||||
return DAG.getNode(ISD::MUL, dl, VT, Op, R);
|
||||
}
|
||||
|
||||
if (VT == MVT::v16i8 && Op->getOpcode() == ISD::SHL) {
|
||||
assert(Subtarget->hasSSE2() && "Need SSE2 for pslli/pcmpeq.");
|
||||
|
||||
|
@ -13204,6 +13205,19 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
|
|||
return R;
|
||||
}
|
||||
|
||||
// It's worth extending once and using the v8i32 shifts for 16-bit types, but
|
||||
// the extra overheads to get from v16i8 to v8i32 make the existing SSE
|
||||
// solution better.
|
||||
if (Subtarget->hasInt256() && VT == MVT::v8i16) {
|
||||
MVT NewVT = VT == MVT::v8i16 ? MVT::v8i32 : MVT::v16i16;
|
||||
unsigned ExtOpc =
|
||||
Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
|
||||
R = DAG.getNode(ExtOpc, dl, NewVT, R);
|
||||
Amt = DAG.getNode(ISD::ANY_EXTEND, dl, NewVT, Amt);
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, VT,
|
||||
DAG.getNode(Op.getOpcode(), dl, NewVT, R, Amt));
|
||||
}
|
||||
|
||||
// Decompose 256-bit shifts into smaller 128-bit shifts.
|
||||
if (VT.is256BitVector()) {
|
||||
unsigned NumElems = VT.getVectorNumElements();
|
||||
|
|
|
@ -266,3 +266,36 @@ define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
|
|||
%c = sext <8 x i16> %b to <8 x i32>
|
||||
ret <8 x i32> %c
|
||||
}
|
||||
|
||||
define <8 x i16> @variable_shl16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK-LABEL: variable_shl16:
|
||||
; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]]
|
||||
; CHECK-DAG: vpmovzxwd %xmm0, [[LHS:%ymm[0-9]+]]
|
||||
; CHECK: vpsllvd [[AMT]], [[LHS]], {{%ymm[0-9]+}}
|
||||
; CHECK: vpshufb
|
||||
; CHECK: vpermq
|
||||
%res = shl <8 x i16> %lhs, %rhs
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @variable_ashr16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK-LABEL: variable_ashr16:
|
||||
; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]]
|
||||
; CHECK-DAG: vpmovsxwd %xmm0, [[LHS:%ymm[0-9]+]]
|
||||
; CHECK: vpsravd [[AMT]], [[LHS]], {{%ymm[0-9]+}}
|
||||
; CHECK: vpshufb
|
||||
; CHECK: vpermq
|
||||
%res = ashr <8 x i16> %lhs, %rhs
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @variable_lshr16(<8 x i16> %lhs, <8 x i16> %rhs) {
|
||||
; CHECK-LABEL: variable_lshr16:
|
||||
; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]]
|
||||
; CHECK-DAG: vpmovzxwd %xmm0, [[LHS:%ymm[0-9]+]]
|
||||
; CHECK: vpsrlvd [[AMT]], [[LHS]], {{%ymm[0-9]+}}
|
||||
; CHECK: vpshufb
|
||||
; CHECK: vpermq
|
||||
%res = lshr <8 x i16> %lhs, %rhs
|
||||
ret <8 x i16> %res
|
||||
}
|
Loading…
Reference in New Issue