forked from OSchip/llvm-project
[X86] Add vector shift by immediate to SimplifyDemandedBitsForTargetNode.
Summary: This also enables some constant folding from KnownBits propagation. This helps on some cases vXi64 case in 32-bit mode where constant vectors appear as vXi32 and a bitcast. This can prevent getNode from constant folding sra/shl/srl. Reviewers: RKSimon, spatel Reviewed By: spatel Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D54069 llvm-svn: 346102
This commit is contained in:
parent
bc5c3f5727
commit
ed6a0a817f
|
@ -31817,6 +31817,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
|
|||
bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
|
||||
SDValue Op, const APInt &OriginalDemandedBits, KnownBits &Known,
|
||||
TargetLoweringOpt &TLO, unsigned Depth) const {
|
||||
unsigned BitWidth = OriginalDemandedBits.getBitWidth();
|
||||
unsigned Opc = Op.getOpcode();
|
||||
switch(Opc) {
|
||||
case X86ISD::PMULDQ:
|
||||
|
@ -31833,6 +31834,42 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
|
|||
return true;
|
||||
break;
|
||||
}
|
||||
case X86ISD::VSHLI: {
|
||||
if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
|
||||
if (ShiftImm->getAPIntValue().uge(BitWidth))
|
||||
break;
|
||||
|
||||
KnownBits KnownOp;
|
||||
unsigned ShAmt = ShiftImm->getZExtValue();
|
||||
APInt DemandedMask = OriginalDemandedBits.lshr(ShAmt);
|
||||
if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask, KnownOp, TLO,
|
||||
Depth + 1))
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case X86ISD::VSRAI:
|
||||
case X86ISD::VSRLI: {
|
||||
if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
|
||||
if (ShiftImm->getAPIntValue().uge(BitWidth))
|
||||
break;
|
||||
|
||||
KnownBits KnownOp;
|
||||
unsigned ShAmt = ShiftImm->getZExtValue();
|
||||
APInt DemandedMask = OriginalDemandedBits << ShAmt;
|
||||
|
||||
// If any of the demanded bits are produced by the sign extension, we also
|
||||
// demand the input sign bit.
|
||||
if (Opc == X86ISD::VSRAI &&
|
||||
OriginalDemandedBits.countLeadingZeros() < ShAmt)
|
||||
DemandedMask.setSignBit();
|
||||
|
||||
if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask, KnownOp, TLO,
|
||||
Depth + 1))
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return TargetLowering::SimplifyDemandedBitsForTargetNode(
|
||||
|
@ -34861,6 +34898,11 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
|
|||
return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N));
|
||||
}
|
||||
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
if (TLI.SimplifyDemandedBits(SDValue(N, 0),
|
||||
APInt::getAllOnesValue(NumBitsPerElt), DCI))
|
||||
return SDValue(N, 0);
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
|
|
@ -63,17 +63,7 @@ define <4 x i32> @combine_vec_lshr_known_zero0(<4 x i32> %x) {
|
|||
define <4 x i32> @combine_vec_lshr_known_zero1(<4 x i32> %x) {
|
||||
; SSE-LABEL: combine_vec_lshr_known_zero1:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE-NEXT: psrld $11, %xmm1
|
||||
; SSE-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE-NEXT: psrld $9, %xmm2
|
||||
; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
|
||||
; SSE-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE-NEXT: psrld $10, %xmm1
|
||||
; SSE-NEXT: psrld $8, %xmm0
|
||||
; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
|
||||
; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
|
||||
; SSE-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: combine_vec_lshr_known_zero1:
|
||||
|
|
|
@ -669,20 +669,15 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
|
|||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; SSE41-NEXT: pmullw %xmm0, %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SSE41-NEXT: pmullw %xmm3, %xmm0
|
||||
; SSE41-NEXT: psrlw $8, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSE41-NEXT: packuswb %xmm0, %xmm3
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; SSE41-NEXT: psllw $1, %xmm3
|
||||
; SSE41-NEXT: psllw $8, %xmm2
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3,4,5,6,7]
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE41-NEXT: psllw $8, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm3, %xmm3
|
||||
; SSE41-NEXT: packuswb %xmm3, %xmm2
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; SSE41-NEXT: psllw $1, %xmm2
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3,4,5,6,7]
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: packuswb %xmm0, %xmm2
|
||||
; SSE41-NEXT: packuswb %xmm3, %xmm2
|
||||
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm0
|
||||
|
@ -693,21 +688,16 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
|
|||
; AVX1-NEXT: movl $171, %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm1
|
||||
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX1-NEXT: vpmullw %xmm1, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX1-NEXT: vpmullw %xmm1, %xmm3, %xmm1
|
||||
; AVX1-NEXT: vpmullw %xmm1, %xmm2, %xmm1
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm3
|
||||
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; AVX1-NEXT: vpsllw $1, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpsllw $8, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3,4,5,6,7]
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1
|
||||
; AVX1-NEXT: vpsllw $8, %xmm1, %xmm2
|
||||
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX1-NEXT: vpsllw $1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX1-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
|
|
|
@ -91,17 +91,14 @@ define float @signbits_ashr_extract_sitofp_1(<2 x i64> %a0) nounwind {
|
|||
; X32-LABEL: signbits_ashr_extract_sitofp_1:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: pushl %eax
|
||||
; X32-NEXT: vmovdqa {{.*#+}} xmm1 = [0,2147483648,0,2147483648]
|
||||
; X32-NEXT: vpsrlq $63, %xmm1, %xmm2
|
||||
; X32-NEXT: vpsrlq $32, %xmm1, %xmm1
|
||||
; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
|
||||
; X32-NEXT: vpsrlq $63, %xmm0, %xmm2
|
||||
; X32-NEXT: vpsrlq $63, %xmm0, %xmm1
|
||||
; X32-NEXT: vpsrlq $32, %xmm0, %xmm0
|
||||
; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
|
||||
; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
|
||||
; X32-NEXT: vmovdqa {{.*#+}} xmm1 = [0,32768,0,0,1,0,0,0]
|
||||
; X32-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X32-NEXT: vpsubq %xmm1, %xmm0, %xmm0
|
||||
; X32-NEXT: vmovd %xmm0, %eax
|
||||
; X32-NEXT: vcvtsi2ssl %eax, %xmm3, %xmm0
|
||||
; X32-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
|
||||
; X32-NEXT: vmovss %xmm0, (%esp)
|
||||
; X32-NEXT: flds (%esp)
|
||||
; X32-NEXT: popl %eax
|
||||
|
@ -128,18 +125,15 @@ define float @signbits_ashr_shl_extract_sitofp(<2 x i64> %a0) nounwind {
|
|||
; X32-LABEL: signbits_ashr_shl_extract_sitofp:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: pushl %eax
|
||||
; X32-NEXT: vmovdqa {{.*#+}} xmm1 = [0,2147483648,0,2147483648]
|
||||
; X32-NEXT: vpsrlq $60, %xmm1, %xmm2
|
||||
; X32-NEXT: vpsrlq $61, %xmm1, %xmm1
|
||||
; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
|
||||
; X32-NEXT: vpsrlq $60, %xmm0, %xmm2
|
||||
; X32-NEXT: vpsrlq $60, %xmm0, %xmm1
|
||||
; X32-NEXT: vpsrlq $61, %xmm0, %xmm0
|
||||
; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
|
||||
; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
|
||||
; X32-NEXT: vmovdqa {{.*#+}} xmm1 = [4,0,0,0,8,0,0,0]
|
||||
; X32-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X32-NEXT: vpsubq %xmm1, %xmm0, %xmm0
|
||||
; X32-NEXT: vpsllq $20, %xmm0, %xmm0
|
||||
; X32-NEXT: vmovd %xmm0, %eax
|
||||
; X32-NEXT: vcvtsi2ssl %eax, %xmm3, %xmm0
|
||||
; X32-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
|
||||
; X32-NEXT: vmovss %xmm0, (%esp)
|
||||
; X32-NEXT: flds (%esp)
|
||||
; X32-NEXT: popl %eax
|
||||
|
@ -263,13 +257,10 @@ define float @signbits_ashr_sext_sextinreg_and_extract_sitofp(<2 x i64> %a0, <2
|
|||
; X32: # %bb.0:
|
||||
; X32-NEXT: pushl %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vmovdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
|
||||
; X32-NEXT: vpsrlq $60, %xmm2, %xmm3
|
||||
; X32-NEXT: vpsrlq $61, %xmm2, %xmm2
|
||||
; X32-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7]
|
||||
; X32-NEXT: vpsrlq $60, %xmm0, %xmm3
|
||||
; X32-NEXT: vpsrlq $60, %xmm0, %xmm2
|
||||
; X32-NEXT: vpsrlq $61, %xmm0, %xmm0
|
||||
; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
|
||||
; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
|
||||
; X32-NEXT: vmovdqa {{.*#+}} xmm2 = [4,0,0,0,8,0,0,0]
|
||||
; X32-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
||||
; X32-NEXT: vpsubq %xmm2, %xmm0, %xmm0
|
||||
; X32-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1
|
||||
|
@ -281,7 +272,7 @@ define float @signbits_ashr_sext_sextinreg_and_extract_sitofp(<2 x i64> %a0, <2
|
|||
; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
|
||||
; X32-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; X32-NEXT: vmovd %xmm0, %eax
|
||||
; X32-NEXT: vcvtsi2ssl %eax, %xmm4, %xmm0
|
||||
; X32-NEXT: vcvtsi2ssl %eax, %xmm3, %xmm0
|
||||
; X32-NEXT: vmovss %xmm0, (%esp)
|
||||
; X32-NEXT: flds (%esp)
|
||||
; X32-NEXT: popl %eax
|
||||
|
@ -320,13 +311,10 @@ define float @signbits_ashr_sextvecinreg_bitops_extract_sitofp(<2 x i64> %a0, <4
|
|||
; X32-LABEL: signbits_ashr_sextvecinreg_bitops_extract_sitofp:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: pushl %eax
|
||||
; X32-NEXT: vmovdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
|
||||
; X32-NEXT: vpsrlq $60, %xmm2, %xmm3
|
||||
; X32-NEXT: vpsrlq $61, %xmm2, %xmm2
|
||||
; X32-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7]
|
||||
; X32-NEXT: vpsrlq $60, %xmm0, %xmm3
|
||||
; X32-NEXT: vpsrlq $60, %xmm0, %xmm2
|
||||
; X32-NEXT: vpsrlq $61, %xmm0, %xmm0
|
||||
; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
|
||||
; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
|
||||
; X32-NEXT: vmovdqa {{.*#+}} xmm2 = [4,0,0,0,8,0,0,0]
|
||||
; X32-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
||||
; X32-NEXT: vpsubq %xmm2, %xmm0, %xmm0
|
||||
; X32-NEXT: vpmovsxdq %xmm1, %xmm1
|
||||
|
@ -334,7 +322,7 @@ define float @signbits_ashr_sextvecinreg_bitops_extract_sitofp(<2 x i64> %a0, <4
|
|||
; X32-NEXT: vpor %xmm1, %xmm2, %xmm1
|
||||
; X32-NEXT: vpxor %xmm0, %xmm1, %xmm0
|
||||
; X32-NEXT: vmovd %xmm0, %eax
|
||||
; X32-NEXT: vcvtsi2ssl %eax, %xmm4, %xmm0
|
||||
; X32-NEXT: vcvtsi2ssl %eax, %xmm3, %xmm0
|
||||
; X32-NEXT: vmovss %xmm0, (%esp)
|
||||
; X32-NEXT: flds (%esp)
|
||||
; X32-NEXT: popl %eax
|
||||
|
@ -375,22 +363,19 @@ define <4 x float> @signbits_ashr_sext_select_shuffle_sitofp(<4 x i64> %a0, <4 x
|
|||
; X32-NEXT: subl $16, %esp
|
||||
; X32-NEXT: vpmovsxdq 16(%ebp), %xmm3
|
||||
; X32-NEXT: vpmovsxdq 8(%ebp), %xmm4
|
||||
; X32-NEXT: vmovdqa {{.*#+}} xmm5 = [0,2147483648,0,2147483648]
|
||||
; X32-NEXT: vextractf128 $1, %ymm2, %xmm5
|
||||
; X32-NEXT: vpsrlq $63, %xmm5, %xmm6
|
||||
; X32-NEXT: vpsrlq $33, %xmm5, %xmm5
|
||||
; X32-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm6[4,5,6,7]
|
||||
; X32-NEXT: vextractf128 $1, %ymm2, %xmm6
|
||||
; X32-NEXT: vpsrlq $63, %xmm6, %xmm7
|
||||
; X32-NEXT: vpsrlq $33, %xmm6, %xmm6
|
||||
; X32-NEXT: vpblendw {{.*#+}} xmm6 = xmm6[0,1,2,3],xmm7[4,5,6,7]
|
||||
; X32-NEXT: vpxor %xmm5, %xmm6, %xmm6
|
||||
; X32-NEXT: vpsubq %xmm5, %xmm6, %xmm6
|
||||
; X32-NEXT: vmovdqa {{.*#+}} xmm6 = [0,16384,0,0,1,0,0,0]
|
||||
; X32-NEXT: vpxor %xmm6, %xmm5, %xmm5
|
||||
; X32-NEXT: vpsubq %xmm6, %xmm5, %xmm5
|
||||
; X32-NEXT: vpsrlq $63, %xmm2, %xmm7
|
||||
; X32-NEXT: vpsrlq $33, %xmm2, %xmm2
|
||||
; X32-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm7[4,5,6,7]
|
||||
; X32-NEXT: vpxor %xmm5, %xmm2, %xmm2
|
||||
; X32-NEXT: vpsubq %xmm5, %xmm2, %xmm2
|
||||
; X32-NEXT: vinsertf128 $1, %xmm6, %ymm2, %ymm2
|
||||
; X32-NEXT: vpxor %xmm6, %xmm2, %xmm2
|
||||
; X32-NEXT: vpsubq %xmm6, %xmm2, %xmm2
|
||||
; X32-NEXT: vinsertf128 $1, %xmm5, %ymm2, %ymm2
|
||||
; X32-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
|
||||
; X32-NEXT: vextractf128 $1, %ymm1, %xmm4
|
||||
; X32-NEXT: vextractf128 $1, %ymm0, %xmm5
|
||||
|
|
|
@ -11,9 +11,9 @@ define void @fetch_r16g16_snorm_unorm8(<4 x i8>*, i8*, i32, i32, { [2048 x i32],
|
|||
; X86-SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SKYLAKE-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X86-SKYLAKE-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; X86-SKYLAKE-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; X86-SKYLAKE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
|
||||
; X86-SKYLAKE-NEXT: vpsrad $16, %xmm0, %xmm0
|
||||
; X86-SKYLAKE-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; X86-SKYLAKE-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
; X86-SKYLAKE-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
|
||||
; X86-SKYLAKE-NEXT: vpsrld $7, %xmm0, %xmm0
|
||||
|
@ -29,7 +29,7 @@ define void @fetch_r16g16_snorm_unorm8(<4 x i8>*, i8*, i32, i32, { [2048 x i32],
|
|||
; X86-SKX-NEXT: subl $8, %esp
|
||||
; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SKX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X86-SKX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[0,1],zero,zero,xmm0[2,3],zero,zero,xmm0[u,u],zero,zero,xmm0[u,u]
|
||||
; X86-SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,0,1,u,u,2,3,u,u,u,u,u,u,u,u]
|
||||
; X86-SKX-NEXT: vpsrad $16, %xmm0, %xmm0
|
||||
; X86-SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; X86-SKX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
|
@ -50,9 +50,9 @@ define void @fetch_r16g16_snorm_unorm8(<4 x i8>*, i8*, i32, i32, { [2048 x i32],
|
|||
; X64-SKYLAKE-LABEL: fetch_r16g16_snorm_unorm8:
|
||||
; X64-SKYLAKE: # %bb.0: # %entry
|
||||
; X64-SKYLAKE-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-SKYLAKE-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; X64-SKYLAKE-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; X64-SKYLAKE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
|
||||
; X64-SKYLAKE-NEXT: vpsrad $16, %xmm0, %xmm0
|
||||
; X64-SKYLAKE-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; X64-SKYLAKE-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
; X64-SKYLAKE-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
|
||||
; X64-SKYLAKE-NEXT: vpsrld $7, %xmm0, %xmm0
|
||||
|
@ -65,7 +65,7 @@ define void @fetch_r16g16_snorm_unorm8(<4 x i8>*, i8*, i32, i32, { [2048 x i32],
|
|||
; X64-SKX-LABEL: fetch_r16g16_snorm_unorm8:
|
||||
; X64-SKX: # %bb.0: # %entry
|
||||
; X64-SKX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-SKX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[0,1],zero,zero,xmm0[2,3],zero,zero,xmm0[u,u],zero,zero,xmm0[u,u]
|
||||
; X64-SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,0,1,u,u,2,3,u,u,u,u,u,u,u,u]
|
||||
; X64-SKX-NEXT: vpsrad $16, %xmm0, %xmm0
|
||||
; X64-SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; X64-SKX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
||||
|
|
|
@ -990,15 +990,11 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind {
|
|||
;
|
||||
; X32-SSE-LABEL: constant_shift_v2i64:
|
||||
; X32-SSE: # %bb.0:
|
||||
; X32-SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,2147483648,0,2147483648]
|
||||
; X32-SSE-NEXT: movdqa %xmm1, %xmm2
|
||||
; X32-SSE-NEXT: psrlq $1, %xmm2
|
||||
; X32-SSE-NEXT: psrlq $7, %xmm1
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm2
|
||||
; X32-SSE-NEXT: psrlq $1, %xmm2
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm1
|
||||
; X32-SSE-NEXT: psrlq $1, %xmm1
|
||||
; X32-SSE-NEXT: psrlq $7, %xmm0
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; X32-SSE-NEXT: movapd {{.*#+}} xmm1 = [2.0E+0,7.2911220195563975E-304]
|
||||
; X32-SSE-NEXT: xorpd %xmm1, %xmm0
|
||||
; X32-SSE-NEXT: psubq %xmm1, %xmm0
|
||||
; X32-SSE-NEXT: retl
|
||||
|
|
|
@ -1066,25 +1066,20 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind {
|
|||
;
|
||||
; X32-AVX1-LABEL: constant_shift_v4i64:
|
||||
; X32-AVX1: # %bb.0:
|
||||
; X32-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [0,2147483648,0,2147483648]
|
||||
; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X32-AVX1-NEXT: vpsrlq $62, %xmm1, %xmm2
|
||||
; X32-AVX1-NEXT: vpsrlq $31, %xmm1, %xmm3
|
||||
; X32-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
|
||||
; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; X32-AVX1-NEXT: vpsrlq $62, %xmm3, %xmm4
|
||||
; X32-AVX1-NEXT: vpsrlq $31, %xmm3, %xmm3
|
||||
; X32-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm4[4,5,6,7]
|
||||
; X32-AVX1-NEXT: vpxor %xmm2, %xmm3, %xmm3
|
||||
; X32-AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2
|
||||
; X32-AVX1-NEXT: vpsrlq $7, %xmm1, %xmm3
|
||||
; X32-AVX1-NEXT: vpsrlq $1, %xmm1, %xmm1
|
||||
; X32-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
|
||||
; X32-AVX1-NEXT: vpsrlq $7, %xmm0, %xmm3
|
||||
; X32-AVX1-NEXT: vpsrlq $31, %xmm1, %xmm1
|
||||
; X32-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
|
||||
; X32-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,1,0,2,0,0,0]
|
||||
; X32-AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
||||
; X32-AVX1-NEXT: vpsubq %xmm2, %xmm1, %xmm1
|
||||
; X32-AVX1-NEXT: vpsrlq $7, %xmm0, %xmm2
|
||||
; X32-AVX1-NEXT: vpsrlq $1, %xmm0, %xmm0
|
||||
; X32-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
|
||||
; X32-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X32-AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm0
|
||||
; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; X32-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
|
||||
; X32-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,16384,0,0,0,256]
|
||||
; X32-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
||||
; X32-AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0
|
||||
; X32-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; X32-AVX1-NEXT: retl
|
||||
;
|
||||
; X32-AVX2-LABEL: constant_shift_v4i64:
|
||||
|
|
|
@ -716,26 +716,26 @@ define <8 x i16> @trunc_usat_v8i64_v8i16(<8 x i64> %a0) {
|
|||
define <8 x i16> @trunc_usat_v8i32_v8i16(<8 x i32> %a0) {
|
||||
; SSE2-LABEL: trunc_usat_v8i32_v8i16:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm4
|
||||
; SSE2-NEXT: pxor %xmm3, %xmm4
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147549183,2147549183,2147549183,2147549183]
|
||||
; SSE2-NEXT: movdqa %xmm5, %xmm6
|
||||
; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
|
||||
; SSE2-NEXT: pand %xmm6, %xmm0
|
||||
; SSE2-NEXT: pandn %xmm2, %xmm6
|
||||
; SSE2-NEXT: por %xmm6, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm3
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm3
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147549183,2147549183,2147549183,2147549183]
|
||||
; SSE2-NEXT: movdqa %xmm4, %xmm5
|
||||
; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
|
||||
; SSE2-NEXT: pand %xmm5, %xmm1
|
||||
; SSE2-NEXT: pandn %xmm2, %xmm5
|
||||
; SSE2-NEXT: por %xmm1, %xmm5
|
||||
; SSE2-NEXT: pslld $16, %xmm5
|
||||
; SSE2-NEXT: psrad $16, %xmm5
|
||||
; SSE2-NEXT: pcmpeqd %xmm3, %xmm3
|
||||
; SSE2-NEXT: pand %xmm5, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm3, %xmm5
|
||||
; SSE2-NEXT: por %xmm5, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm2
|
||||
; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
|
||||
; SSE2-NEXT: pxor %xmm4, %xmm3
|
||||
; SSE2-NEXT: pand %xmm1, %xmm4
|
||||
; SSE2-NEXT: por %xmm3, %xmm4
|
||||
; SSE2-NEXT: pslld $16, %xmm4
|
||||
; SSE2-NEXT: psrad $16, %xmm4
|
||||
; SSE2-NEXT: pslld $16, %xmm0
|
||||
; SSE2-NEXT: psrad $16, %xmm0
|
||||
; SSE2-NEXT: packssdw %xmm5, %xmm0
|
||||
; SSE2-NEXT: packssdw %xmm4, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: trunc_usat_v8i32_v8i16:
|
||||
|
@ -826,36 +826,36 @@ define <8 x i16> @trunc_usat_v8i32_v8i16(<8 x i32> %a0) {
|
|||
define <16 x i16> @trunc_usat_v16i32_v16i16(<16 x i32> %a0) {
|
||||
; SSE2-LABEL: trunc_usat_v16i32_v16i16:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm4
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [65535,65535,65535,65535]
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm6
|
||||
; SSE2-NEXT: pxor %xmm7, %xmm6
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm8
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm7
|
||||
; SSE2-NEXT: pxor %xmm6, %xmm7
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147549183,2147549183,2147549183,2147549183]
|
||||
; SSE2-NEXT: movdqa %xmm5, %xmm1
|
||||
; SSE2-NEXT: pcmpgtd %xmm6, %xmm1
|
||||
; SSE2-NEXT: pcmpgtd %xmm7, %xmm1
|
||||
; SSE2-NEXT: pcmpeqd %xmm7, %xmm7
|
||||
; SSE2-NEXT: pand %xmm1, %xmm2
|
||||
; SSE2-NEXT: pandn %xmm8, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm7, %xmm1
|
||||
; SSE2-NEXT: por %xmm2, %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm3, %xmm6
|
||||
; SSE2-NEXT: pxor %xmm7, %xmm6
|
||||
; SSE2-NEXT: movdqa %xmm3, %xmm4
|
||||
; SSE2-NEXT: pxor %xmm6, %xmm4
|
||||
; SSE2-NEXT: movdqa %xmm5, %xmm2
|
||||
; SSE2-NEXT: pcmpgtd %xmm6, %xmm2
|
||||
; SSE2-NEXT: pcmpgtd %xmm4, %xmm2
|
||||
; SSE2-NEXT: pand %xmm2, %xmm3
|
||||
; SSE2-NEXT: pandn %xmm8, %xmm2
|
||||
; SSE2-NEXT: pxor %xmm7, %xmm2
|
||||
; SSE2-NEXT: por %xmm3, %xmm2
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE2-NEXT: pxor %xmm7, %xmm3
|
||||
; SSE2-NEXT: movdqa %xmm5, %xmm6
|
||||
; SSE2-NEXT: pcmpgtd %xmm3, %xmm6
|
||||
; SSE2-NEXT: pand %xmm6, %xmm0
|
||||
; SSE2-NEXT: pandn %xmm8, %xmm6
|
||||
; SSE2-NEXT: por %xmm6, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm4, %xmm7
|
||||
; SSE2-NEXT: pcmpgtd %xmm7, %xmm5
|
||||
; SSE2-NEXT: pand %xmm5, %xmm4
|
||||
; SSE2-NEXT: pandn %xmm8, %xmm5
|
||||
; SSE2-NEXT: por %xmm4, %xmm5
|
||||
; SSE2-NEXT: pxor %xmm6, %xmm3
|
||||
; SSE2-NEXT: movdqa %xmm5, %xmm4
|
||||
; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
|
||||
; SSE2-NEXT: pand %xmm4, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm7, %xmm4
|
||||
; SSE2-NEXT: por %xmm4, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm8, %xmm6
|
||||
; SSE2-NEXT: pcmpgtd %xmm6, %xmm5
|
||||
; SSE2-NEXT: pxor %xmm5, %xmm7
|
||||
; SSE2-NEXT: pand %xmm8, %xmm5
|
||||
; SSE2-NEXT: por %xmm7, %xmm5
|
||||
; SSE2-NEXT: pslld $16, %xmm5
|
||||
; SSE2-NEXT: psrad $16, %xmm5
|
||||
; SSE2-NEXT: pslld $16, %xmm0
|
||||
|
@ -870,36 +870,36 @@ define <16 x i16> @trunc_usat_v16i32_v16i16(<16 x i32> %a0) {
|
|||
;
|
||||
; SSSE3-LABEL: trunc_usat_v16i32_v16i16:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm4
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [65535,65535,65535,65535]
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; SSSE3-NEXT: movdqa %xmm2, %xmm6
|
||||
; SSSE3-NEXT: pxor %xmm7, %xmm6
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm8
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; SSSE3-NEXT: movdqa %xmm2, %xmm7
|
||||
; SSSE3-NEXT: pxor %xmm6, %xmm7
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2147549183,2147549183,2147549183,2147549183]
|
||||
; SSSE3-NEXT: movdqa %xmm5, %xmm1
|
||||
; SSSE3-NEXT: pcmpgtd %xmm6, %xmm1
|
||||
; SSSE3-NEXT: pcmpgtd %xmm7, %xmm1
|
||||
; SSSE3-NEXT: pcmpeqd %xmm7, %xmm7
|
||||
; SSSE3-NEXT: pand %xmm1, %xmm2
|
||||
; SSSE3-NEXT: pandn %xmm8, %xmm1
|
||||
; SSSE3-NEXT: pxor %xmm7, %xmm1
|
||||
; SSSE3-NEXT: por %xmm2, %xmm1
|
||||
; SSSE3-NEXT: movdqa %xmm3, %xmm6
|
||||
; SSSE3-NEXT: pxor %xmm7, %xmm6
|
||||
; SSSE3-NEXT: movdqa %xmm3, %xmm4
|
||||
; SSSE3-NEXT: pxor %xmm6, %xmm4
|
||||
; SSSE3-NEXT: movdqa %xmm5, %xmm2
|
||||
; SSSE3-NEXT: pcmpgtd %xmm6, %xmm2
|
||||
; SSSE3-NEXT: pcmpgtd %xmm4, %xmm2
|
||||
; SSSE3-NEXT: pand %xmm2, %xmm3
|
||||
; SSSE3-NEXT: pandn %xmm8, %xmm2
|
||||
; SSSE3-NEXT: pxor %xmm7, %xmm2
|
||||
; SSSE3-NEXT: por %xmm3, %xmm2
|
||||
; SSSE3-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSSE3-NEXT: pxor %xmm7, %xmm3
|
||||
; SSSE3-NEXT: movdqa %xmm5, %xmm6
|
||||
; SSSE3-NEXT: pcmpgtd %xmm3, %xmm6
|
||||
; SSSE3-NEXT: pand %xmm6, %xmm0
|
||||
; SSSE3-NEXT: pandn %xmm8, %xmm6
|
||||
; SSSE3-NEXT: por %xmm6, %xmm0
|
||||
; SSSE3-NEXT: pxor %xmm4, %xmm7
|
||||
; SSSE3-NEXT: pcmpgtd %xmm7, %xmm5
|
||||
; SSSE3-NEXT: pand %xmm5, %xmm4
|
||||
; SSSE3-NEXT: pandn %xmm8, %xmm5
|
||||
; SSSE3-NEXT: por %xmm4, %xmm5
|
||||
; SSSE3-NEXT: pxor %xmm6, %xmm3
|
||||
; SSSE3-NEXT: movdqa %xmm5, %xmm4
|
||||
; SSSE3-NEXT: pcmpgtd %xmm3, %xmm4
|
||||
; SSSE3-NEXT: pand %xmm4, %xmm0
|
||||
; SSSE3-NEXT: pxor %xmm7, %xmm4
|
||||
; SSSE3-NEXT: por %xmm4, %xmm0
|
||||
; SSSE3-NEXT: pxor %xmm8, %xmm6
|
||||
; SSSE3-NEXT: pcmpgtd %xmm6, %xmm5
|
||||
; SSSE3-NEXT: pxor %xmm5, %xmm7
|
||||
; SSSE3-NEXT: pand %xmm8, %xmm5
|
||||
; SSSE3-NEXT: por %xmm7, %xmm5
|
||||
; SSSE3-NEXT: pslld $16, %xmm5
|
||||
; SSSE3-NEXT: psrad $16, %xmm5
|
||||
; SSSE3-NEXT: pslld $16, %xmm0
|
||||
|
|
Loading…
Reference in New Issue