[X86][SSE] Attempt to break register dependencies during lowerBuildVector

LowerBuildVectorv16i8/LowerBuildVectorv8i16 insert values into a UNDEF vector if the build vector doesn't contain any zero elements, resulting in register dependencies with a previous use of the register.

This patch attempts to break the register dependency by either always zeroing the vector before hand or (if we're inserting to the 0'th element) by using VZEXT_MOVL(SCALAR_TO_VECTOR(i32 AEXT(Elt))) which lowers to (V)MOVD and performs a similar function. Additionally (V)MOVD is a shorter instruction than PINSRB/PINSRW. We already do something similar for SSE41 PINSRD.

On pre-SSE41 LowerBuildVectorv16i8 we go a little further and use VZEXT_MOVL(SCALAR_TO_VECTOR(i32 ZEXT(Elt))) if the build vector contains zeros to avoid the vector zeroing at the cost of a scalar zero extension, which can probably be brought over to the other cases in a future patch in some cases (load folding etc.)

Differential Revision: https://reviews.llvm.org/D29720

llvm-svn: 294581
This commit is contained in:
Simon Pilgrim 2017-02-09 11:50:19 +00:00
parent 4200948c5a
commit 563e23e66e
12 changed files with 111 additions and 82 deletions

View File

@ -5942,12 +5942,21 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
for (unsigned i = 0; i < 16; ++i) {
bool IsNonZero = (NonZeros & (1 << i)) != 0;
if (IsNonZero) {
// If the build vector contains zeros or our first insertion is not the
// first index then insert into zero vector to break any register
// dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL.
if (First) {
if (NumZero)
V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl);
else
V = DAG.getUNDEF(MVT::v16i8);
First = false;
if (NumZero || 0 != i)
V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl);
else {
assert(0 == i && "Expected insertion into zero-index");
V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
V = DAG.getBitcast(MVT::v16i8, V);
continue;
}
}
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v16i8, V,
Op.getOperand(i), DAG.getIntPtrConstant(i, dl));
@ -5969,6 +5978,8 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
}
if ((i & 1) != 0) {
// FIXME: Investigate extending to i32 instead of just i16.
// FIXME: Investigate combining the first 4 bytes as a i32 instead.
SDValue ThisElt, LastElt;
bool LastIsNonZero = (NonZeros & (1 << (i - 1))) != 0;
if (LastIsNonZero) {
@ -5984,9 +5995,18 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
} else
ThisElt = LastElt;
if (ThisElt)
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
DAG.getIntPtrConstant(i / 2, dl));
if (ThisElt) {
if (1 == i) {
V = NumZero ? DAG.getZExtOrTrunc(ThisElt, dl, MVT::i32)
: DAG.getAnyExtOrTrunc(ThisElt, dl, MVT::i32);
V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
V = DAG.getBitcast(MVT::v8i16, V);
} else {
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
DAG.getIntPtrConstant(i / 2, dl));
}
}
}
}
@ -6007,12 +6027,21 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
for (unsigned i = 0; i < 8; ++i) {
bool IsNonZero = (NonZeros & (1 << i)) != 0;
if (IsNonZero) {
// If the build vector contains zeros or our first insertion is not the
// first index then insert into zero vector to break any register
// dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL.
if (First) {
if (NumZero)
V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
else
V = DAG.getUNDEF(MVT::v8i16);
First = false;
if (NumZero || 0 != i)
V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
else {
assert(0 == i && "Expected insertion into zero-index");
V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
V = DAG.getBitcast(MVT::v8i16, V);
continue;
}
}
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V,
Op.getOperand(i), DAG.getIntPtrConstant(i, dl));

View File

@ -1062,7 +1062,7 @@ define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k4, %eax
; CHECK-NEXT: kmovw %k3, %ecx
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
; CHECK-NEXT: vmovd %ecx, %xmm0
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k5, %eax
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
@ -1110,7 +1110,7 @@ define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k3 {%k3}
; CHECK-NEXT: kmovw %k5, %eax
; CHECK-NEXT: kmovw %k4, %ecx
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
; CHECK-NEXT: vmovd %ecx, %xmm0
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k6, %eax
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
@ -1159,7 +1159,7 @@ define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k4, %eax
; CHECK-NEXT: kmovw %k3, %ecx
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
; CHECK-NEXT: vmovd %ecx, %xmm0
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k5, %eax
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
@ -1207,7 +1207,7 @@ define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k3 {%k3}
; CHECK-NEXT: kmovw %k5, %eax
; CHECK-NEXT: kmovw %k4, %ecx
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
; CHECK-NEXT: vmovd %ecx, %xmm0
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k6, %eax
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0

View File

@ -696,7 +696,7 @@ define <8 x i8> @test_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-NEXT: vpcmpordw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc1,0x07]
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@ -744,7 +744,7 @@ define <8 x i8> @test_mask_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) {
; CHECK-NEXT: vpcmpordw %xmm1, %xmm0, %k3 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3f,0xd9,0x07]
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@ -793,7 +793,7 @@ define <8 x i8> @test_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-NEXT: vpcmporduw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc1,0x07]
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@ -841,7 +841,7 @@ define <8 x i8> @test_mask_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) {
; CHECK-NEXT: vpcmporduw %xmm1, %xmm0, %k3 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3e,0xd9,0x07]
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]

View File

@ -16,7 +16,7 @@ define <8 x i8> @test_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
; CHECK-NEXT: vpcmpordd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc1,0x07]
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@ -64,7 +64,7 @@ define <8 x i8> @test_mask_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
; CHECK-NEXT: vpcmpordd %ymm1, %ymm0, %k3 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1f,0xd9,0x07]
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@ -113,7 +113,7 @@ define <8 x i8> @test_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
; CHECK-NEXT: vpcmpordud %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc1,0x07]
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@ -161,7 +161,7 @@ define <8 x i8> @test_mask_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
; CHECK-NEXT: vpcmpordud %ymm1, %ymm0, %k3 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1e,0xd9,0x07]
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@ -210,7 +210,7 @@ define <8 x i8> @test_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
; CHECK-NEXT: vpcmpordq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc1,0x07]
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@ -258,7 +258,7 @@ define <8 x i8> @test_mask_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
; CHECK-NEXT: vpcmpordq %ymm1, %ymm0, %k7 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1f,0xf9,0x07]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@ -307,7 +307,7 @@ define <8 x i8> @test_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
; CHECK-NEXT: vpcmporduq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc1,0x07]
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@ -355,7 +355,7 @@ define <8 x i8> @test_mask_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
; CHECK-NEXT: vpcmporduq %ymm1, %ymm0, %k7 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1e,0xf9,0x07]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@ -406,7 +406,7 @@ define <8 x i8> @test_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-NEXT: vpcmpordd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc1,0x07]
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@ -454,7 +454,7 @@ define <8 x i8> @test_mask_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
; CHECK-NEXT: vpcmpordd %xmm1, %xmm0, %k7 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1f,0xf9,0x07]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@ -503,7 +503,7 @@ define <8 x i8> @test_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-NEXT: vpcmpordud %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc1,0x07]
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@ -551,7 +551,7 @@ define <8 x i8> @test_mask_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
; CHECK-NEXT: vpcmpordud %xmm1, %xmm0, %k7 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1e,0xf9,0x07]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@ -600,7 +600,7 @@ define <8 x i8> @test_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK-NEXT: vpcmpordq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc1,0x07]
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@ -648,7 +648,7 @@ define <8 x i8> @test_mask_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
; CHECK-NEXT: vpcmpordq %xmm1, %xmm0, %k7 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1f,0xf9,0x07]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@ -697,7 +697,7 @@ define <8 x i8> @test_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK-NEXT: vpcmporduq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc1,0x07]
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@ -745,7 +745,7 @@ define <8 x i8> @test_mask_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
; CHECK-NEXT: vpcmporduq %xmm1, %xmm0, %k7 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xf9,0x07]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]

View File

@ -270,6 +270,7 @@ define <8 x i16> @test_buildvector_v8i16_register(i16 %a0, i16 %a1, i16 %a2, i16
define <8 x i16> @test_buildvector_v8i16_partial(i16 %a1, i16 %a3, i16 %a4, i16 %a5) {
; CHECK-LABEL: test_buildvector_v8i16_partial:
; CHECK: # BB#0:
; CHECK-NEXT: pxor %xmm0, %xmm0
; CHECK-NEXT: pinsrw $1, %edi, %xmm0
; CHECK-NEXT: pinsrw $3, %esi, %xmm0
; CHECK-NEXT: pinsrw $4, %edx, %xmm0
@ -419,6 +420,7 @@ define <16 x i8> @test_buildvector_v16i8_partial(i8 %a2, i8 %a6, i8 %a8, i8 %a11
;
; SSE41-LABEL: test_buildvector_v16i8_partial:
; SSE41: # BB#0:
; SSE41-NEXT: pxor %xmm0, %xmm0
; SSE41-NEXT: pinsrb $2, %edi, %xmm0
; SSE41-NEXT: pinsrb $6, %esi, %xmm0
; SSE41-NEXT: pinsrb $8, %edx, %xmm0
@ -448,10 +450,9 @@ define <16 x i8> @test_buildvector_v16i8_partial(i8 %a2, i8 %a6, i8 %a8, i8 %a11
define <16 x i8> @test_buildvector_v16i8_register_zero(i8 %a0, i8 %a4, i8 %a6, i8 %a8, i8 %a11, i8 %a12, i8 %a15) {
; SSE2-LABEL: test_buildvector_v16i8_register_zero:
; SSE2: # BB#0:
; SSE2-NEXT: movzbl %dil, %eax
; SSE2-NEXT: pxor %xmm0, %xmm0
; SSE2-NEXT: pinsrw $0, %eax, %xmm0
; SSE2-NEXT: movzbl %sil, %eax
; SSE2-NEXT: movzbl %dil, %esi
; SSE2-NEXT: movd %esi, %xmm0
; SSE2-NEXT: pinsrw $2, %eax, %xmm0
; SSE2-NEXT: movzbl %dl, %eax
; SSE2-NEXT: pinsrw $3, %eax, %xmm0

View File

@ -9,17 +9,16 @@ define <3 x i16> @zext_i8(<3 x i8>) {
; SSE3-LABEL: zext_i8:
; SSE3: # BB#0:
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; SSE3-NEXT: pxor %xmm0, %xmm0
; SSE3-NEXT: movd %eax, %xmm0
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; SSE3-NEXT: pinsrw $1, %eax, %xmm0
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; SSE3-NEXT: pinsrw $2, %eax, %xmm0
; SSE3-NEXT: pxor %xmm1, %xmm1
; SSE3-NEXT: pinsrw $0, %eax, %xmm1
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; SSE3-NEXT: pinsrw $1, %eax, %xmm1
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; SSE3-NEXT: pinsrw $2, %eax, %xmm1
; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSE3-NEXT: movd %xmm1, %eax
; SSE3-NEXT: pextrw $2, %xmm1, %edx
; SSE3-NEXT: pextrw $4, %xmm1, %ecx
; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; SSE3-NEXT: movd %xmm0, %eax
; SSE3-NEXT: pextrw $2, %xmm0, %edx
; SSE3-NEXT: pextrw $4, %xmm0, %ecx
; SSE3-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; SSE3-NEXT: # kill: %DX<def> %DX<kill> %EDX<kill>
; SSE3-NEXT: # kill: %CX<def> %CX<kill> %ECX<kill>
@ -74,7 +73,7 @@ define <3 x i16> @sext_i8(<3 x i8>) {
; SSE3-LABEL: sext_i8:
; SSE3: # BB#0:
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; SSE3-NEXT: pinsrw $0, %eax, %xmm0
; SSE3-NEXT: movd %eax, %xmm0
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; SSE3-NEXT: pinsrw $1, %eax, %xmm0
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
@ -93,7 +92,7 @@ define <3 x i16> @sext_i8(<3 x i8>) {
;
; SSE41-LABEL: sext_i8:
; SSE41: # BB#0:
; SSE41-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE41-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0
; SSE41-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0
; SSE41-NEXT: pslld $24, %xmm0
@ -108,7 +107,7 @@ define <3 x i16> @sext_i8(<3 x i8>) {
;
; AVX-32-LABEL: sext_i8:
; AVX-32: # BB#0:
; AVX-32-NEXT: vpinsrb $0, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT: vpinsrb $4, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT: vpinsrb $8, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT: vpslld $24, %xmm0, %xmm0

View File

@ -482,7 +482,7 @@ define void @shuffle_v64i8_to_v8i8(<64 x i8>* %L, <8 x i8>* %S) nounwind {
; AVX512BW-NEXT: vpextrb $0, %xmm1, %ecx
; AVX512BW-NEXT: vpextrb $8, %xmm0, %edx
; AVX512BW-NEXT: vpextrb $0, %xmm0, %edi
; AVX512BW-NEXT: vpinsrb $0, %edi, %xmm0, %xmm0
; AVX512BW-NEXT: vmovd %edi, %xmm0
; AVX512BW-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
; AVX512BW-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
@ -496,9 +496,9 @@ define void @shuffle_v64i8_to_v8i8(<64 x i8>* %L, <8 x i8>* %S) nounwind {
; AVX512BWVL-LABEL: shuffle_v64i8_to_v8i8:
; AVX512BWVL: # BB#0:
; AVX512BWVL-NEXT: vmovdqu8 (%rdi), %zmm0
; AVX512BWVL-NEXT: vpextrb $0, %xmm0, %eax
; AVX512BWVL-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
; AVX512BWVL-NEXT: vpextrb $8, %xmm0, %eax
; AVX512BWVL-NEXT: vpextrb $0, %xmm0, %ecx
; AVX512BWVL-NEXT: vmovd %ecx, %xmm1
; AVX512BWVL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
; AVX512BWVL-NEXT: vextracti32x4 $1, %zmm0, %xmm2
; AVX512BWVL-NEXT: vpextrb $0, %xmm2, %eax

View File

@ -97,10 +97,10 @@ define <8 x i8> @foo3_8(<8 x float> %src) {
;
; CHECK-WIDE-LABEL: foo3_8:
; CHECK-WIDE: ## BB#0:
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
; CHECK-WIDE-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
; CHECK-WIDE-NEXT: vmovd %ecx, %xmm1
; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
@ -134,10 +134,10 @@ define <4 x i8> @foo3_4(<4 x float> %src) {
;
; CHECK-WIDE-LABEL: foo3_4:
; CHECK-WIDE: ## BB#0:
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
; CHECK-WIDE-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
; CHECK-WIDE-NEXT: vmovd %ecx, %xmm1
; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax

View File

@ -1718,17 +1718,17 @@ define <16 x i8> @PR31364(i8* nocapture readonly %a, i8* nocapture readonly %b)
; SSE2-NEXT: movzbl (%rsi), %ecx
; SSE2-NEXT: shll $8, %ecx
; SSE2-NEXT: orl %eax, %ecx
; SSE2-NEXT: pxor %xmm0, %xmm0
; SSE2-NEXT: movzwl %cx, %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pinsrw $0, %ecx, %xmm1
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm0[0,1,2,3,5,4,4,4]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,4,4,4]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,7]
; SSE2-NEXT: packuswb %xmm2, %xmm0
; SSE2-NEXT: packuswb %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: PR31364:
@ -1737,8 +1737,8 @@ define <16 x i8> @PR31364(i8* nocapture readonly %a, i8* nocapture readonly %b)
; SSSE3-NEXT: movzbl (%rsi), %ecx
; SSSE3-NEXT: shll $8, %ecx
; SSSE3-NEXT: orl %eax, %ecx
; SSSE3-NEXT: pxor %xmm0, %xmm0
; SSSE3-NEXT: pinsrw $0, %ecx, %xmm0
; SSSE3-NEXT: movzwl %cx, %eax
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1],zero,xmm0[1,1,1,1,1,0,0,0]
; SSSE3-NEXT: retq
;

View File

@ -131,10 +131,10 @@ define i24 @or_i24_as_v8i3(i24 %a, i24 %b) nounwind {
define <3 x i8> @and_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X32-SSE-LABEL: and_v3i8_as_i24:
; X32-SSE: # BB#0:
; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm1
; X32-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1
; X32-SSE-NEXT: pand %xmm0, %xmm1
@ -172,10 +172,10 @@ define <3 x i8> @and_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
define <3 x i8> @xor_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X32-SSE-LABEL: xor_v3i8_as_i24:
; X32-SSE: # BB#0:
; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm1
; X32-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1
; X32-SSE-NEXT: pxor %xmm0, %xmm1
@ -213,10 +213,10 @@ define <3 x i8> @xor_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
define <3 x i8> @or_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X32-SSE-LABEL: or_v3i8_as_i24:
; X32-SSE: # BB#0:
; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm1
; X32-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1
; X32-SSE-NEXT: por %xmm0, %xmm1

View File

@ -65,7 +65,7 @@ define void @convert_v3i8_to_v3f32(<3 x float>* %dst.addr, <3 x i8>* %src.addr)
; X86-SSE2-NEXT: shll $8, %edx
; X86-SSE2-NEXT: movzbl (%esp), %esi
; X86-SSE2-NEXT: orl %edx, %esi
; X86-SSE2-NEXT: pinsrw $0, %esi, %xmm0
; X86-SSE2-NEXT: movd %esi, %xmm0
; X86-SSE2-NEXT: movzbl 2(%ecx), %ecx
; X86-SSE2-NEXT: pinsrw $1, %ecx, %xmm0
; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@ -115,7 +115,7 @@ define void @convert_v3i8_to_v3f32(<3 x float>* %dst.addr, <3 x i8>* %src.addr)
; X64-SSE2-NEXT: shll $8, %eax
; X64-SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
; X64-SSE2-NEXT: orl %eax, %ecx
; X64-SSE2-NEXT: pinsrw $0, %ecx, %xmm0
; X64-SSE2-NEXT: movd %ecx, %xmm0
; X64-SSE2-NEXT: movzbl 2(%rsi), %eax
; X64-SSE2-NEXT: pinsrw $1, %eax, %xmm0
; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]

View File

@ -91,7 +91,7 @@ define void @convert_v3i8_to_v3f32(<3 x float>* %dst.addr, <3 x i8>* %src.addr)
; X86-SSE2-NEXT: shll $8, %edx
; X86-SSE2-NEXT: movzbl (%esp), %esi
; X86-SSE2-NEXT: orl %edx, %esi
; X86-SSE2-NEXT: pinsrw $0, %esi, %xmm0
; X86-SSE2-NEXT: movd %esi, %xmm0
; X86-SSE2-NEXT: movzbl 2(%ecx), %ecx
; X86-SSE2-NEXT: pinsrw $1, %ecx, %xmm0
; X86-SSE2-NEXT: pxor %xmm1, %xmm1
@ -140,7 +140,7 @@ define void @convert_v3i8_to_v3f32(<3 x float>* %dst.addr, <3 x i8>* %src.addr)
; X64-SSE2-NEXT: shll $8, %eax
; X64-SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
; X64-SSE2-NEXT: orl %eax, %ecx
; X64-SSE2-NEXT: pinsrw $0, %ecx, %xmm0
; X64-SSE2-NEXT: movd %ecx, %xmm0
; X64-SSE2-NEXT: movzbl 2(%rsi), %eax
; X64-SSE2-NEXT: pinsrw $1, %eax, %xmm0
; X64-SSE2-NEXT: pxor %xmm1, %xmm1