forked from OSchip/llvm-project
[X86] Add custom type legalization for extending v4i8/v4i16->v4i64.
Pre-SSE4.1 sext_invec for v2i64 is complicated because we don't have a v2i64 sra instruction. So instead we sign extend to i32 using unpack and sra, then copy the elements and do a v4i32 sra to fill with sign bits, then interleave the i32 sign extend and the sign bits. So really we're doing to two sign extends but only using half of the v4i32 intermediate result. When the result is more than 128 bits, default type legalization would prefer to split the destination type all the way down to v2i64 with shuffles followed by v16i8/v8i16->v2i64 sext_inreg operations. This results in more instructions than necessary because we are only utilizing the lower 2 elements of the v4i32 intermediate result. Instead we can custom split a v4i8/v4i16->v4i64 sign_extend. Then we can sign extend v4i8/v4i16->v4i32 invec producing a full v4i32 result. Create the sign bit vector as a v4i32 then split and interleave with the sign bits using an punpackldq and punpackhdq. llvm-svn: 347176
This commit is contained in:
parent
950f3842cc
commit
0468c860b7
|
@ -949,12 +949,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
|
||||
|
||||
if (ExperimentalVectorWideningLegalization) {
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
|
||||
setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
|
||||
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
|
||||
}
|
||||
|
||||
// In the customized shift lowering, the legal v4i32/v2i64 cases
|
||||
|
@ -26349,10 +26351,34 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
|
|||
return;
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
assert((VT == MVT::v16i32 || VT == MVT::v8i64) && "Unexpected VT!");
|
||||
SDValue In = N->getOperand(0);
|
||||
EVT InVT = In.getValueType();
|
||||
if (InVT.is128BitVector()) {
|
||||
if (!Subtarget.hasSSE41() && VT == MVT::v4i64 &&
|
||||
(InVT == MVT::v4i16 || InVT == MVT::v4i8)) {
|
||||
// Custom split this so we can extend i8/i16->i32 invec. This is better
|
||||
// since sign_extend_inreg i8/i16->i64 requires two sra operations. So
|
||||
// this allows the first to be shared.
|
||||
In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, In);
|
||||
|
||||
// Fill a vector with sign bits for each element.
|
||||
SDValue SignBits = DAG.getNode(ISD::SRA, dl, MVT::v4i32, In,
|
||||
DAG.getConstant(31, dl, MVT::v4i32));
|
||||
|
||||
// Create an unpackl and unpackh to interleave the sign bits then bitcast
|
||||
// to v2i64.
|
||||
SDValue Lo = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits,
|
||||
{0, 4, 1, 5});
|
||||
Lo = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Lo);
|
||||
SDValue Hi = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits,
|
||||
{2, 6, 3, 7});
|
||||
Hi = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Hi);
|
||||
|
||||
SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
|
||||
Results.push_back(Res);
|
||||
return;
|
||||
}
|
||||
|
||||
if ((VT == MVT::v16i32 || VT == MVT::v8i64) && InVT.is128BitVector()) {
|
||||
// Perform custom splitting instead of the two stage extend we would get
|
||||
// by default.
|
||||
EVT LoVT, HiVT;
|
||||
|
|
|
@ -496,35 +496,25 @@ define <4 x i64> @sext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp
|
|||
; SSE2-LABEL: sext_16i8_to_4i64:
|
||||
; SSE2: # %bb.0: # %entry
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm1
|
||||
; SSE2-NEXT: psrad $31, %xmm1
|
||||
; SSE2-NEXT: psrad $24, %xmm2
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE2-NEXT: psrad $31, %xmm0
|
||||
; SSE2-NEXT: psrad $24, %xmm1
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSE2-NEXT: psrad $31, %xmm2
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: sext_16i8_to_4i64:
|
||||
; SSSE3: # %bb.0: # %entry
|
||||
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
|
||||
; SSSE3-NEXT: movdqa %xmm2, %xmm1
|
||||
; SSSE3-NEXT: psrad $31, %xmm1
|
||||
; SSSE3-NEXT: psrad $24, %xmm2
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSSE3-NEXT: psrad $31, %xmm0
|
||||
; SSSE3-NEXT: psrad $24, %xmm1
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
||||
; SSSE3-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSSE3-NEXT: psrad $31, %xmm2
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: sext_16i8_to_4i64:
|
||||
|
@ -556,18 +546,13 @@ define <4 x i64> @sext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp
|
|||
; X32-SSE2-LABEL: sext_16i8_to_4i64:
|
||||
; X32-SSE2: # %bb.0: # %entry
|
||||
; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
|
||||
; X32-SSE2-NEXT: movdqa %xmm2, %xmm1
|
||||
; X32-SSE2-NEXT: psrad $31, %xmm1
|
||||
; X32-SSE2-NEXT: psrad $24, %xmm2
|
||||
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; X32-SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||
; X32-SSE2-NEXT: psrad $31, %xmm0
|
||||
; X32-SSE2-NEXT: psrad $24, %xmm1
|
||||
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
||||
; X32-SSE2-NEXT: movdqa %xmm2, %xmm0
|
||||
; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; X32-SSE2-NEXT: psrad $31, %xmm2
|
||||
; X32-SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; X32-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
|
||||
; X32-SSE2-NEXT: retl
|
||||
;
|
||||
; X32-SSE41-LABEL: sext_16i8_to_4i64:
|
||||
|
@ -587,56 +572,40 @@ define <8 x i64> @sext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp
|
|||
; SSE2-LABEL: sext_16i8_to_8i64:
|
||||
; SSE2: # %bb.0: # %entry
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
|
||||
; SSE2-NEXT: movdqa %xmm4, %xmm1
|
||||
; SSE2-NEXT: psrad $31, %xmm1
|
||||
; SSE2-NEXT: psrad $24, %xmm4
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
|
||||
; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm1
|
||||
; SSE2-NEXT: psrad $31, %xmm1
|
||||
; SSE2-NEXT: psrad $24, %xmm2
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm3
|
||||
; SSE2-NEXT: psrad $31, %xmm3
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; SSE2-NEXT: psrad $24, %xmm1
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSE2-NEXT: psrad $31, %xmm2
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm4
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
|
||||
; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
|
||||
; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
|
||||
; SSE2-NEXT: psrad $24, %xmm3
|
||||
; SSE2-NEXT: movdqa %xmm3, %xmm0
|
||||
; SSE2-NEXT: psrad $31, %xmm0
|
||||
; SSE2-NEXT: psrad $24, %xmm3
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
|
||||
; SSE2-NEXT: movdqa %xmm3, %xmm2
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
|
||||
; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
|
||||
; SSE2-NEXT: movdqa %xmm4, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: sext_16i8_to_8i64:
|
||||
; SSSE3: # %bb.0: # %entry
|
||||
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
|
||||
; SSSE3-NEXT: movdqa %xmm4, %xmm1
|
||||
; SSSE3-NEXT: psrad $31, %xmm1
|
||||
; SSSE3-NEXT: psrad $24, %xmm4
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
|
||||
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; SSSE3-NEXT: movdqa %xmm2, %xmm1
|
||||
; SSSE3-NEXT: psrad $31, %xmm1
|
||||
; SSSE3-NEXT: psrad $24, %xmm2
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm3
|
||||
; SSSE3-NEXT: psrad $31, %xmm3
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; SSSE3-NEXT: psrad $24, %xmm1
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSSE3-NEXT: psrad $31, %xmm2
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm4
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
|
||||
; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
|
||||
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
|
||||
; SSSE3-NEXT: psrad $24, %xmm3
|
||||
; SSSE3-NEXT: movdqa %xmm3, %xmm0
|
||||
; SSSE3-NEXT: psrad $31, %xmm0
|
||||
; SSSE3-NEXT: psrad $24, %xmm3
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
|
||||
; SSSE3-NEXT: movdqa %xmm3, %xmm2
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
|
||||
; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
|
||||
; SSSE3-NEXT: movdqa %xmm4, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
|
@ -683,28 +652,20 @@ define <8 x i64> @sext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp
|
|||
; X32-SSE2-LABEL: sext_16i8_to_8i64:
|
||||
; X32-SSE2: # %bb.0: # %entry
|
||||
; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
|
||||
; X32-SSE2-NEXT: movdqa %xmm4, %xmm1
|
||||
; X32-SSE2-NEXT: psrad $31, %xmm1
|
||||
; X32-SSE2-NEXT: psrad $24, %xmm4
|
||||
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
|
||||
; X32-SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; X32-SSE2-NEXT: movdqa %xmm2, %xmm1
|
||||
; X32-SSE2-NEXT: psrad $31, %xmm1
|
||||
; X32-SSE2-NEXT: psrad $24, %xmm2
|
||||
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
|
||||
; X32-SSE2-NEXT: movdqa %xmm1, %xmm3
|
||||
; X32-SSE2-NEXT: psrad $31, %xmm3
|
||||
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; X32-SSE2-NEXT: psrad $24, %xmm1
|
||||
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
|
||||
; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
|
||||
; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; X32-SSE2-NEXT: psrad $31, %xmm2
|
||||
; X32-SSE2-NEXT: movdqa %xmm1, %xmm4
|
||||
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
|
||||
; X32-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
|
||||
; X32-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
|
||||
; X32-SSE2-NEXT: psrad $24, %xmm3
|
||||
; X32-SSE2-NEXT: movdqa %xmm3, %xmm0
|
||||
; X32-SSE2-NEXT: psrad $31, %xmm0
|
||||
; X32-SSE2-NEXT: psrad $24, %xmm3
|
||||
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
|
||||
; X32-SSE2-NEXT: movdqa %xmm3, %xmm2
|
||||
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
|
||||
; X32-SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
|
||||
; X32-SSE2-NEXT: movdqa %xmm4, %xmm0
|
||||
; X32-SSE2-NEXT: retl
|
||||
;
|
||||
|
@ -980,32 +941,24 @@ entry:
|
|||
define <4 x i64> @sext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp {
|
||||
; SSE2-LABEL: sext_8i16_to_4i64:
|
||||
; SSE2: # %bb.0: # %entry
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm1
|
||||
; SSE2-NEXT: psrad $31, %xmm1
|
||||
; SSE2-NEXT: psrad $16, %xmm2
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE2-NEXT: psrad $31, %xmm0
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; SSE2-NEXT: psrad $16, %xmm1
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSE2-NEXT: psrad $31, %xmm2
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: sext_8i16_to_4i64:
|
||||
; SSSE3: # %bb.0: # %entry
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
|
||||
; SSSE3-NEXT: movdqa %xmm2, %xmm1
|
||||
; SSSE3-NEXT: psrad $31, %xmm1
|
||||
; SSSE3-NEXT: psrad $16, %xmm2
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSSE3-NEXT: psrad $31, %xmm0
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; SSSE3-NEXT: psrad $16, %xmm1
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
||||
; SSSE3-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSSE3-NEXT: psrad $31, %xmm2
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: sext_8i16_to_4i64:
|
||||
|
@ -1036,17 +989,13 @@ define <4 x i64> @sext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp
|
|||
;
|
||||
; X32-SSE2-LABEL: sext_8i16_to_4i64:
|
||||
; X32-SSE2: # %bb.0: # %entry
|
||||
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
|
||||
; X32-SSE2-NEXT: movdqa %xmm2, %xmm1
|
||||
; X32-SSE2-NEXT: psrad $31, %xmm1
|
||||
; X32-SSE2-NEXT: psrad $16, %xmm2
|
||||
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; X32-SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||
; X32-SSE2-NEXT: psrad $31, %xmm0
|
||||
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; X32-SSE2-NEXT: psrad $16, %xmm1
|
||||
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
||||
; X32-SSE2-NEXT: movdqa %xmm2, %xmm0
|
||||
; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; X32-SSE2-NEXT: psrad $31, %xmm2
|
||||
; X32-SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; X32-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
|
||||
; X32-SSE2-NEXT: retl
|
||||
;
|
||||
; X32-SSE41-LABEL: sext_8i16_to_4i64:
|
||||
|
@ -1065,53 +1014,39 @@ entry:
|
|||
define <8 x i64> @sext_8i16_to_8i64(<8 x i16> %A) nounwind uwtable readnone ssp {
|
||||
; SSE2-LABEL: sext_8i16_to_8i64:
|
||||
; SSE2: # %bb.0: # %entry
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
|
||||
; SSE2-NEXT: movdqa %xmm4, %xmm1
|
||||
; SSE2-NEXT: psrad $31, %xmm1
|
||||
; SSE2-NEXT: psrad $16, %xmm4
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
|
||||
; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm1
|
||||
; SSE2-NEXT: psrad $31, %xmm1
|
||||
; SSE2-NEXT: psrad $16, %xmm2
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm3
|
||||
; SSE2-NEXT: psrad $31, %xmm3
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; SSE2-NEXT: psrad $16, %xmm1
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSE2-NEXT: psrad $31, %xmm2
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm4
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
|
||||
; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
|
||||
; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
|
||||
; SSE2-NEXT: psrad $16, %xmm3
|
||||
; SSE2-NEXT: movdqa %xmm3, %xmm0
|
||||
; SSE2-NEXT: psrad $31, %xmm0
|
||||
; SSE2-NEXT: psrad $16, %xmm3
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
|
||||
; SSE2-NEXT: movdqa %xmm3, %xmm2
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
|
||||
; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
|
||||
; SSE2-NEXT: movdqa %xmm4, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: sext_8i16_to_8i64:
|
||||
; SSSE3: # %bb.0: # %entry
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
|
||||
; SSSE3-NEXT: movdqa %xmm4, %xmm1
|
||||
; SSSE3-NEXT: psrad $31, %xmm1
|
||||
; SSSE3-NEXT: psrad $16, %xmm4
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
|
||||
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; SSSE3-NEXT: movdqa %xmm2, %xmm1
|
||||
; SSSE3-NEXT: psrad $31, %xmm1
|
||||
; SSSE3-NEXT: psrad $16, %xmm2
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm3
|
||||
; SSSE3-NEXT: psrad $31, %xmm3
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; SSSE3-NEXT: psrad $16, %xmm1
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
|
||||
; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSSE3-NEXT: psrad $31, %xmm2
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm4
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
|
||||
; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
|
||||
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
|
||||
; SSSE3-NEXT: psrad $16, %xmm3
|
||||
; SSSE3-NEXT: movdqa %xmm3, %xmm0
|
||||
; SSSE3-NEXT: psrad $31, %xmm0
|
||||
; SSSE3-NEXT: psrad $16, %xmm3
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
|
||||
; SSSE3-NEXT: movdqa %xmm3, %xmm2
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
|
||||
; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
|
||||
; SSSE3-NEXT: movdqa %xmm4, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
|
@ -1156,27 +1091,20 @@ define <8 x i64> @sext_8i16_to_8i64(<8 x i16> %A) nounwind uwtable readnone ssp
|
|||
;
|
||||
; X32-SSE2-LABEL: sext_8i16_to_8i64:
|
||||
; X32-SSE2: # %bb.0: # %entry
|
||||
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
|
||||
; X32-SSE2-NEXT: movdqa %xmm4, %xmm1
|
||||
; X32-SSE2-NEXT: psrad $31, %xmm1
|
||||
; X32-SSE2-NEXT: psrad $16, %xmm4
|
||||
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
|
||||
; X32-SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; X32-SSE2-NEXT: movdqa %xmm2, %xmm1
|
||||
; X32-SSE2-NEXT: psrad $31, %xmm1
|
||||
; X32-SSE2-NEXT: psrad $16, %xmm2
|
||||
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; X32-SSE2-NEXT: movdqa %xmm1, %xmm3
|
||||
; X32-SSE2-NEXT: psrad $31, %xmm3
|
||||
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; X32-SSE2-NEXT: psrad $16, %xmm1
|
||||
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
|
||||
; X32-SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
|
||||
; X32-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
|
||||
; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; X32-SSE2-NEXT: psrad $31, %xmm2
|
||||
; X32-SSE2-NEXT: movdqa %xmm1, %xmm4
|
||||
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
|
||||
; X32-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
|
||||
; X32-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
|
||||
; X32-SSE2-NEXT: psrad $16, %xmm3
|
||||
; X32-SSE2-NEXT: movdqa %xmm3, %xmm0
|
||||
; X32-SSE2-NEXT: psrad $31, %xmm0
|
||||
; X32-SSE2-NEXT: psrad $16, %xmm3
|
||||
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
|
||||
; X32-SSE2-NEXT: movdqa %xmm3, %xmm2
|
||||
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
|
||||
; X32-SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
|
||||
; X32-SSE2-NEXT: movdqa %xmm4, %xmm0
|
||||
; X32-SSE2-NEXT: retl
|
||||
;
|
||||
|
@ -5880,35 +5808,25 @@ define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) {
|
|||
; SSE2-LABEL: sext_4i8_to_4i64:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm1
|
||||
; SSE2-NEXT: psrad $31, %xmm1
|
||||
; SSE2-NEXT: psrad $24, %xmm2
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE2-NEXT: psrad $31, %xmm0
|
||||
; SSE2-NEXT: psrad $24, %xmm1
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSE2-NEXT: psrad $31, %xmm2
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: sext_4i8_to_4i64:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
|
||||
; SSSE3-NEXT: movdqa %xmm2, %xmm1
|
||||
; SSSE3-NEXT: psrad $31, %xmm1
|
||||
; SSSE3-NEXT: psrad $24, %xmm2
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSSE3-NEXT: psrad $31, %xmm0
|
||||
; SSSE3-NEXT: psrad $24, %xmm1
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
||||
; SSSE3-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSSE3-NEXT: psrad $31, %xmm2
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: sext_4i8_to_4i64:
|
||||
|
@ -5940,18 +5858,13 @@ define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) {
|
|||
; X32-SSE2-LABEL: sext_4i8_to_4i64:
|
||||
; X32-SSE2: # %bb.0:
|
||||
; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
|
||||
; X32-SSE2-NEXT: movdqa %xmm2, %xmm1
|
||||
; X32-SSE2-NEXT: psrad $31, %xmm1
|
||||
; X32-SSE2-NEXT: psrad $24, %xmm2
|
||||
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; X32-SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||
; X32-SSE2-NEXT: psrad $31, %xmm0
|
||||
; X32-SSE2-NEXT: psrad $24, %xmm1
|
||||
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
||||
; X32-SSE2-NEXT: movdqa %xmm2, %xmm0
|
||||
; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; X32-SSE2-NEXT: psrad $31, %xmm2
|
||||
; X32-SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; X32-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
|
||||
; X32-SSE2-NEXT: retl
|
||||
;
|
||||
; X32-SSE41-LABEL: sext_4i8_to_4i64:
|
||||
|
|
Loading…
Reference in New Issue