forked from OSchip/llvm-project
Select vector shift with non-immediate i32 shift amount operand by first moving the operand into the right register.
llvm-svn: 50619
This commit is contained in:
parent
2d7a4d70c3
commit
d9481366e3
|
@ -4909,7 +4909,7 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {
|
|||
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue();
|
||||
switch (IntNo) {
|
||||
default: return SDOperand(); // Don't custom lower most intrinsics.
|
||||
// Comparison intrinsics.
|
||||
// Comparison intrinsics.
|
||||
case Intrinsic::x86_sse_comieq_ss:
|
||||
case Intrinsic::x86_sse_comilt_ss:
|
||||
case Intrinsic::x86_sse_comile_ss:
|
||||
|
@ -5010,6 +5010,95 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {
|
|||
DAG.getConstant(X86CC, MVT::i8), Cond);
|
||||
return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC);
|
||||
}
|
||||
|
||||
// Fix vector shift instructions where the last operand is a non-immediate
|
||||
// i32 value.
|
||||
case Intrinsic::x86_sse2_pslli_w:
|
||||
case Intrinsic::x86_sse2_pslli_d:
|
||||
case Intrinsic::x86_sse2_pslli_q:
|
||||
case Intrinsic::x86_sse2_psrli_w:
|
||||
case Intrinsic::x86_sse2_psrli_d:
|
||||
case Intrinsic::x86_sse2_psrli_q:
|
||||
case Intrinsic::x86_sse2_psrai_w:
|
||||
case Intrinsic::x86_sse2_psrai_d:
|
||||
case Intrinsic::x86_mmx_pslli_w:
|
||||
case Intrinsic::x86_mmx_pslli_d:
|
||||
case Intrinsic::x86_mmx_pslli_q:
|
||||
case Intrinsic::x86_mmx_psrli_w:
|
||||
case Intrinsic::x86_mmx_psrli_d:
|
||||
case Intrinsic::x86_mmx_psrli_q:
|
||||
case Intrinsic::x86_mmx_psrai_w:
|
||||
case Intrinsic::x86_mmx_psrai_d: {
|
||||
SDOperand ShAmt = Op.getOperand(2);
|
||||
if (isa<ConstantSDNode>(ShAmt))
|
||||
return SDOperand();
|
||||
|
||||
unsigned NewIntNo = 0;
|
||||
MVT::ValueType ShAmtVT = MVT::v4i32;
|
||||
switch (IntNo) {
|
||||
case Intrinsic::x86_sse2_pslli_w:
|
||||
NewIntNo = Intrinsic::x86_sse2_psll_w;
|
||||
break;
|
||||
case Intrinsic::x86_sse2_pslli_d:
|
||||
NewIntNo = Intrinsic::x86_sse2_psll_d;
|
||||
break;
|
||||
case Intrinsic::x86_sse2_pslli_q:
|
||||
NewIntNo = Intrinsic::x86_sse2_psll_q;
|
||||
break;
|
||||
case Intrinsic::x86_sse2_psrli_w:
|
||||
NewIntNo = Intrinsic::x86_sse2_psrl_w;
|
||||
break;
|
||||
case Intrinsic::x86_sse2_psrli_d:
|
||||
NewIntNo = Intrinsic::x86_sse2_psrl_d;
|
||||
break;
|
||||
case Intrinsic::x86_sse2_psrli_q:
|
||||
NewIntNo = Intrinsic::x86_sse2_psrl_q;
|
||||
break;
|
||||
case Intrinsic::x86_sse2_psrai_w:
|
||||
NewIntNo = Intrinsic::x86_sse2_psra_w;
|
||||
break;
|
||||
case Intrinsic::x86_sse2_psrai_d:
|
||||
NewIntNo = Intrinsic::x86_sse2_psra_d;
|
||||
break;
|
||||
default: {
|
||||
ShAmtVT = MVT::v2i32;
|
||||
switch (IntNo) {
|
||||
case Intrinsic::x86_mmx_pslli_w:
|
||||
NewIntNo = Intrinsic::x86_mmx_psll_w;
|
||||
break;
|
||||
case Intrinsic::x86_mmx_pslli_d:
|
||||
NewIntNo = Intrinsic::x86_mmx_psll_d;
|
||||
break;
|
||||
case Intrinsic::x86_mmx_pslli_q:
|
||||
NewIntNo = Intrinsic::x86_mmx_psll_q;
|
||||
break;
|
||||
case Intrinsic::x86_mmx_psrli_w:
|
||||
NewIntNo = Intrinsic::x86_mmx_psrl_w;
|
||||
break;
|
||||
case Intrinsic::x86_mmx_psrli_d:
|
||||
NewIntNo = Intrinsic::x86_mmx_psrl_d;
|
||||
break;
|
||||
case Intrinsic::x86_mmx_psrli_q:
|
||||
NewIntNo = Intrinsic::x86_mmx_psrl_q;
|
||||
break;
|
||||
case Intrinsic::x86_mmx_psrai_w:
|
||||
NewIntNo = Intrinsic::x86_mmx_psra_w;
|
||||
break;
|
||||
case Intrinsic::x86_mmx_psrai_d:
|
||||
NewIntNo = Intrinsic::x86_mmx_psra_d;
|
||||
break;
|
||||
default: abort(); // Can't reach here.
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
MVT::ValueType VT = Op.getValueType();
|
||||
ShAmt = DAG.getNode(ISD::BIT_CONVERT, VT,
|
||||
DAG.getNode(ISD::SCALAR_TO_VECTOR, ShAmtVT, ShAmt));
|
||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
|
||||
DAG.getConstant(NewIntNo, MVT::i32),
|
||||
Op.getOperand(1), ShAmt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep psllq | grep 32
|
||||
; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep psllq | grep 32
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep psrad
|
||||
; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep psrlw
|
||||
|
||||
define i64 @t1(<1 x i64> %mm1) nounwind {
|
||||
entry:
|
||||
|
@ -19,3 +20,13 @@ entry:
|
|||
}
|
||||
|
||||
declare <2 x i32> @llvm.x86.mmx.psra.d(<2 x i32>, <2 x i32>) nounwind readnone
|
||||
|
||||
define i64 @t3(<1 x i64> %mm1, i32 %bits) nounwind {
|
||||
entry:
|
||||
%tmp6 = bitcast <1 x i64> %mm1 to <4 x i16> ; <<4 x i16>> [#uses=1]
|
||||
%tmp8 = tail call <4 x i16> @llvm.x86.mmx.psrli.w( <4 x i16> %tmp6, i32 %bits ) nounwind readnone ; <<4 x i16>> [#uses=1]
|
||||
%retval1314 = bitcast <4 x i16> %tmp8 to i64 ; <i64> [#uses=1]
|
||||
ret i64 %retval1314
|
||||
}
|
||||
|
||||
declare <4 x i16> @llvm.x86.mmx.psrli.w(<4 x i16>, i32) nounwind readnone
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep psllq
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep psraw
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movd | count 2
|
||||
|
||||
define <2 x i64> @t1(<2 x i64> %x1, i32 %bits) nounwind {
|
||||
entry:
|
||||
%tmp3 = tail call <2 x i64> @llvm.x86.sse2.pslli.q( <2 x i64> %x1, i32 %bits ) nounwind readnone ; <<2 x i64>> [#uses=1]
|
||||
ret <2 x i64> %tmp3
|
||||
}
|
||||
|
||||
define <2 x i64> @t2(<2 x i64> %x1) nounwind {
|
||||
entry:
|
||||
%tmp3 = tail call <2 x i64> @llvm.x86.sse2.pslli.q( <2 x i64> %x1, i32 10 ) nounwind readnone ; <<2 x i64>> [#uses=1]
|
||||
ret <2 x i64> %tmp3
|
||||
}
|
||||
|
||||
define <2 x i64> @t3(<2 x i64> %x1, i32 %bits) nounwind {
|
||||
entry:
|
||||
%tmp2 = bitcast <2 x i64> %x1 to <8 x i16> ; <<8 x i16>> [#uses=1]
|
||||
%tmp4 = tail call <8 x i16> @llvm.x86.sse2.psrai.w( <8 x i16> %tmp2, i32 %bits ) nounwind readnone ; <<8 x i16>> [#uses=1]
|
||||
%tmp5 = bitcast <8 x i16> %tmp4 to <2 x i64> ; <<2 x i64>> [#uses=1]
|
||||
ret <2 x i64> %tmp5
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
|
||||
declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
|
Loading…
Reference in New Issue