forked from OSchip/llvm-project
[X86][SSE] Attempt to break register dependencies during lowerBuildVector
LowerBuildVectorv16i8/LowerBuildVectorv8i16 insert values into a UNDEF vector if the build vector doesn't contain any zero elements, resulting in register dependencies with a previous use of the register. This patch attempts to break the register dependency by either always zeroing the vector before hand or (if we're inserting to the 0'th element) by using VZEXT_MOVL(SCALAR_TO_VECTOR(i32 AEXT(Elt))) which lowers to (V)MOVD and performs a similar function. Additionally (V)MOVD is a shorter instruction than PINSRB/PINSRW. We already do something similar for SSE41 PINSRD. On pre-SSE41 LowerBuildVectorv16i8 we go a little further and use VZEXT_MOVL(SCALAR_TO_VECTOR(i32 ZEXT(Elt))) if the build vector contains zeros to avoid the vector zeroing at the cost of a scalar zero extension, which can probably be brought over to the other cases in a future patch in some cases (load folding etc.) Differential Revision: https://reviews.llvm.org/D29720 llvm-svn: 294581
This commit is contained in:
parent
4200948c5a
commit
563e23e66e
|
@ -5942,12 +5942,21 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
|
|||
for (unsigned i = 0; i < 16; ++i) {
|
||||
bool IsNonZero = (NonZeros & (1 << i)) != 0;
|
||||
if (IsNonZero) {
|
||||
// If the build vector contains zeros or our first insertion is not the
|
||||
// first index then insert into zero vector to break any register
|
||||
// dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL.
|
||||
if (First) {
|
||||
if (NumZero)
|
||||
V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl);
|
||||
else
|
||||
V = DAG.getUNDEF(MVT::v16i8);
|
||||
First = false;
|
||||
if (NumZero || 0 != i)
|
||||
V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl);
|
||||
else {
|
||||
assert(0 == i && "Expected insertion into zero-index");
|
||||
V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
|
||||
V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
|
||||
V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
|
||||
V = DAG.getBitcast(MVT::v16i8, V);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v16i8, V,
|
||||
Op.getOperand(i), DAG.getIntPtrConstant(i, dl));
|
||||
|
@ -5969,6 +5978,8 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
|
|||
}
|
||||
|
||||
if ((i & 1) != 0) {
|
||||
// FIXME: Investigate extending to i32 instead of just i16.
|
||||
// FIXME: Investigate combining the first 4 bytes as a i32 instead.
|
||||
SDValue ThisElt, LastElt;
|
||||
bool LastIsNonZero = (NonZeros & (1 << (i - 1))) != 0;
|
||||
if (LastIsNonZero) {
|
||||
|
@ -5984,9 +5995,18 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
|
|||
} else
|
||||
ThisElt = LastElt;
|
||||
|
||||
if (ThisElt)
|
||||
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
|
||||
DAG.getIntPtrConstant(i / 2, dl));
|
||||
if (ThisElt) {
|
||||
if (1 == i) {
|
||||
V = NumZero ? DAG.getZExtOrTrunc(ThisElt, dl, MVT::i32)
|
||||
: DAG.getAnyExtOrTrunc(ThisElt, dl, MVT::i32);
|
||||
V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
|
||||
V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
|
||||
V = DAG.getBitcast(MVT::v8i16, V);
|
||||
} else {
|
||||
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
|
||||
DAG.getIntPtrConstant(i / 2, dl));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6007,12 +6027,21 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
|
|||
for (unsigned i = 0; i < 8; ++i) {
|
||||
bool IsNonZero = (NonZeros & (1 << i)) != 0;
|
||||
if (IsNonZero) {
|
||||
// If the build vector contains zeros or our first insertion is not the
|
||||
// first index then insert into zero vector to break any register
|
||||
// dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL.
|
||||
if (First) {
|
||||
if (NumZero)
|
||||
V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
|
||||
else
|
||||
V = DAG.getUNDEF(MVT::v8i16);
|
||||
First = false;
|
||||
if (NumZero || 0 != i)
|
||||
V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
|
||||
else {
|
||||
assert(0 == i && "Expected insertion into zero-index");
|
||||
V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
|
||||
V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
|
||||
V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
|
||||
V = DAG.getBitcast(MVT::v8i16, V);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V,
|
||||
Op.getOperand(i), DAG.getIntPtrConstant(i, dl));
|
||||
|
|
|
@ -1062,7 +1062,7 @@ define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
|
|||
; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k0
|
||||
; CHECK-NEXT: kmovw %k4, %eax
|
||||
; CHECK-NEXT: kmovw %k3, %ecx
|
||||
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
||||
; CHECK-NEXT: kmovw %k5, %eax
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
||||
|
@ -1110,7 +1110,7 @@ define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
|
|||
; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k3 {%k3}
|
||||
; CHECK-NEXT: kmovw %k5, %eax
|
||||
; CHECK-NEXT: kmovw %k4, %ecx
|
||||
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
||||
; CHECK-NEXT: kmovw %k6, %eax
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
||||
|
@ -1159,7 +1159,7 @@ define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
|
|||
; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k0
|
||||
; CHECK-NEXT: kmovw %k4, %eax
|
||||
; CHECK-NEXT: kmovw %k3, %ecx
|
||||
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
||||
; CHECK-NEXT: kmovw %k5, %eax
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
||||
|
@ -1207,7 +1207,7 @@ define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
|
|||
; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k3 {%k3}
|
||||
; CHECK-NEXT: kmovw %k5, %eax
|
||||
; CHECK-NEXT: kmovw %k4, %ecx
|
||||
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
||||
; CHECK-NEXT: kmovw %k6, %eax
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
||||
|
|
|
@ -696,7 +696,7 @@ define <8 x i8> @test_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1) {
|
|||
; CHECK-NEXT: vpcmpordw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc1,0x07]
|
||||
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
|
||||
; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
|
||||
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
|
||||
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
|
@ -744,7 +744,7 @@ define <8 x i8> @test_mask_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) {
|
|||
; CHECK-NEXT: vpcmpordw %xmm1, %xmm0, %k3 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3f,0xd9,0x07]
|
||||
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
|
||||
; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
|
||||
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
|
||||
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
|
@ -793,7 +793,7 @@ define <8 x i8> @test_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1) {
|
|||
; CHECK-NEXT: vpcmporduw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc1,0x07]
|
||||
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
|
||||
; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
|
||||
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
|
||||
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
|
@ -841,7 +841,7 @@ define <8 x i8> @test_mask_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) {
|
|||
; CHECK-NEXT: vpcmporduw %xmm1, %xmm0, %k3 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3e,0xd9,0x07]
|
||||
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
|
||||
; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
|
||||
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
|
||||
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
|
|
|
@ -16,7 +16,7 @@ define <8 x i8> @test_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
|
|||
; CHECK-NEXT: vpcmpordd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc1,0x07]
|
||||
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
|
||||
; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
|
||||
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
|
||||
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
|
@ -64,7 +64,7 @@ define <8 x i8> @test_mask_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
|
|||
; CHECK-NEXT: vpcmpordd %ymm1, %ymm0, %k3 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1f,0xd9,0x07]
|
||||
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
|
||||
; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
|
||||
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
|
||||
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
|
@ -113,7 +113,7 @@ define <8 x i8> @test_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
|
|||
; CHECK-NEXT: vpcmpordud %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc1,0x07]
|
||||
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
|
||||
; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
|
||||
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
|
||||
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
|
@ -161,7 +161,7 @@ define <8 x i8> @test_mask_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
|
|||
; CHECK-NEXT: vpcmpordud %ymm1, %ymm0, %k3 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1e,0xd9,0x07]
|
||||
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
|
||||
; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
|
||||
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
|
||||
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
|
@ -210,7 +210,7 @@ define <8 x i8> @test_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
|
|||
; CHECK-NEXT: vpcmpordq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc1,0x07]
|
||||
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
|
||||
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
|
||||
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
|
||||
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
|
@ -258,7 +258,7 @@ define <8 x i8> @test_mask_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
|
|||
; CHECK-NEXT: vpcmpordq %ymm1, %ymm0, %k7 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1f,0xf9,0x07]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
|
||||
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
|
||||
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
|
@ -307,7 +307,7 @@ define <8 x i8> @test_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
|
|||
; CHECK-NEXT: vpcmporduq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc1,0x07]
|
||||
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
|
||||
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
|
||||
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
|
||||
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
|
@ -355,7 +355,7 @@ define <8 x i8> @test_mask_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
|
|||
; CHECK-NEXT: vpcmporduq %ymm1, %ymm0, %k7 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1e,0xf9,0x07]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
|
||||
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
|
||||
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
|
@ -406,7 +406,7 @@ define <8 x i8> @test_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
|
|||
; CHECK-NEXT: vpcmpordd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc1,0x07]
|
||||
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
|
||||
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
|
||||
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
|
||||
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
|
@ -454,7 +454,7 @@ define <8 x i8> @test_mask_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
|
|||
; CHECK-NEXT: vpcmpordd %xmm1, %xmm0, %k7 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1f,0xf9,0x07]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
|
||||
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
|
||||
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
|
@ -503,7 +503,7 @@ define <8 x i8> @test_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
|
|||
; CHECK-NEXT: vpcmpordud %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc1,0x07]
|
||||
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
|
||||
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
|
||||
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
|
||||
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
|
@ -551,7 +551,7 @@ define <8 x i8> @test_mask_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
|
|||
; CHECK-NEXT: vpcmpordud %xmm1, %xmm0, %k7 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1e,0xf9,0x07]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
|
||||
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
|
||||
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
|
@ -600,7 +600,7 @@ define <8 x i8> @test_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
|
|||
; CHECK-NEXT: vpcmpordq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc1,0x07]
|
||||
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
|
||||
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
|
||||
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
|
||||
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
|
@ -648,7 +648,7 @@ define <8 x i8> @test_mask_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
|
|||
; CHECK-NEXT: vpcmpordq %xmm1, %xmm0, %k7 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1f,0xf9,0x07]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
|
||||
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
|
||||
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
|
@ -697,7 +697,7 @@ define <8 x i8> @test_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
|
|||
; CHECK-NEXT: vpcmporduq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc1,0x07]
|
||||
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
|
||||
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
|
||||
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
|
||||
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
|
@ -745,7 +745,7 @@ define <8 x i8> @test_mask_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
|
|||
; CHECK-NEXT: vpcmporduq %xmm1, %xmm0, %k7 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xf9,0x07]
|
||||
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
|
||||
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
|
||||
; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
|
||||
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
|
||||
|
|
|
@ -270,6 +270,7 @@ define <8 x i16> @test_buildvector_v8i16_register(i16 %a0, i16 %a1, i16 %a2, i16
|
|||
define <8 x i16> @test_buildvector_v8i16_partial(i16 %a1, i16 %a3, i16 %a4, i16 %a5) {
|
||||
; CHECK-LABEL: test_buildvector_v8i16_partial:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: pxor %xmm0, %xmm0
|
||||
; CHECK-NEXT: pinsrw $1, %edi, %xmm0
|
||||
; CHECK-NEXT: pinsrw $3, %esi, %xmm0
|
||||
; CHECK-NEXT: pinsrw $4, %edx, %xmm0
|
||||
|
@ -419,6 +420,7 @@ define <16 x i8> @test_buildvector_v16i8_partial(i8 %a2, i8 %a6, i8 %a8, i8 %a11
|
|||
;
|
||||
; SSE41-LABEL: test_buildvector_v16i8_partial:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE41-NEXT: pinsrb $2, %edi, %xmm0
|
||||
; SSE41-NEXT: pinsrb $6, %esi, %xmm0
|
||||
; SSE41-NEXT: pinsrb $8, %edx, %xmm0
|
||||
|
@ -448,10 +450,9 @@ define <16 x i8> @test_buildvector_v16i8_partial(i8 %a2, i8 %a6, i8 %a8, i8 %a11
|
|||
define <16 x i8> @test_buildvector_v16i8_register_zero(i8 %a0, i8 %a4, i8 %a6, i8 %a8, i8 %a11, i8 %a12, i8 %a15) {
|
||||
; SSE2-LABEL: test_buildvector_v16i8_register_zero:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movzbl %dil, %eax
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE2-NEXT: pinsrw $0, %eax, %xmm0
|
||||
; SSE2-NEXT: movzbl %sil, %eax
|
||||
; SSE2-NEXT: movzbl %dil, %esi
|
||||
; SSE2-NEXT: movd %esi, %xmm0
|
||||
; SSE2-NEXT: pinsrw $2, %eax, %xmm0
|
||||
; SSE2-NEXT: movzbl %dl, %eax
|
||||
; SSE2-NEXT: pinsrw $3, %eax, %xmm0
|
||||
|
|
|
@ -9,17 +9,16 @@ define <3 x i16> @zext_i8(<3 x i8>) {
|
|||
; SSE3-LABEL: zext_i8:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
||||
; SSE3-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE3-NEXT: movd %eax, %xmm0
|
||||
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
||||
; SSE3-NEXT: pinsrw $1, %eax, %xmm0
|
||||
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
||||
; SSE3-NEXT: pinsrw $2, %eax, %xmm0
|
||||
; SSE3-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE3-NEXT: pinsrw $0, %eax, %xmm1
|
||||
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
||||
; SSE3-NEXT: pinsrw $1, %eax, %xmm1
|
||||
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
||||
; SSE3-NEXT: pinsrw $2, %eax, %xmm1
|
||||
; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; SSE3-NEXT: movd %xmm1, %eax
|
||||
; SSE3-NEXT: pextrw $2, %xmm1, %edx
|
||||
; SSE3-NEXT: pextrw $4, %xmm1, %ecx
|
||||
; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; SSE3-NEXT: movd %xmm0, %eax
|
||||
; SSE3-NEXT: pextrw $2, %xmm0, %edx
|
||||
; SSE3-NEXT: pextrw $4, %xmm0, %ecx
|
||||
; SSE3-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
||||
; SSE3-NEXT: # kill: %DX<def> %DX<kill> %EDX<kill>
|
||||
; SSE3-NEXT: # kill: %CX<def> %CX<kill> %ECX<kill>
|
||||
|
@ -74,7 +73,7 @@ define <3 x i16> @sext_i8(<3 x i8>) {
|
|||
; SSE3-LABEL: sext_i8:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
||||
; SSE3-NEXT: pinsrw $0, %eax, %xmm0
|
||||
; SSE3-NEXT: movd %eax, %xmm0
|
||||
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
||||
; SSE3-NEXT: pinsrw $1, %eax, %xmm0
|
||||
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
||||
|
@ -93,7 +92,7 @@ define <3 x i16> @sext_i8(<3 x i8>) {
|
|||
;
|
||||
; SSE41-LABEL: sext_i8:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
|
||||
; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE41-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0
|
||||
; SSE41-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0
|
||||
; SSE41-NEXT: pslld $24, %xmm0
|
||||
|
@ -108,7 +107,7 @@ define <3 x i16> @sext_i8(<3 x i8>) {
|
|||
;
|
||||
; AVX-32-LABEL: sext_i8:
|
||||
; AVX-32: # BB#0:
|
||||
; AVX-32-NEXT: vpinsrb $0, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
||||
; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-32-NEXT: vpinsrb $4, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
||||
; AVX-32-NEXT: vpinsrb $8, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
||||
; AVX-32-NEXT: vpslld $24, %xmm0, %xmm0
|
||||
|
|
|
@ -482,7 +482,7 @@ define void @shuffle_v64i8_to_v8i8(<64 x i8>* %L, <8 x i8>* %S) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $0, %xmm1, %ecx
|
||||
; AVX512BW-NEXT: vpextrb $8, %xmm0, %edx
|
||||
; AVX512BW-NEXT: vpextrb $0, %xmm0, %edi
|
||||
; AVX512BW-NEXT: vpinsrb $0, %edi, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vmovd %edi, %xmm0
|
||||
; AVX512BW-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
||||
|
@ -496,9 +496,9 @@ define void @shuffle_v64i8_to_v8i8(<64 x i8>* %L, <8 x i8>* %S) nounwind {
|
|||
; AVX512BWVL-LABEL: shuffle_v64i8_to_v8i8:
|
||||
; AVX512BWVL: # BB#0:
|
||||
; AVX512BWVL-NEXT: vmovdqu8 (%rdi), %zmm0
|
||||
; AVX512BWVL-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512BWVL-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
|
||||
; AVX512BWVL-NEXT: vpextrb $8, %xmm0, %eax
|
||||
; AVX512BWVL-NEXT: vpextrb $0, %xmm0, %ecx
|
||||
; AVX512BWVL-NEXT: vmovd %ecx, %xmm1
|
||||
; AVX512BWVL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
|
||||
; AVX512BWVL-NEXT: vextracti32x4 $1, %zmm0, %xmm2
|
||||
; AVX512BWVL-NEXT: vpextrb $0, %xmm2, %eax
|
||||
|
|
|
@ -97,10 +97,10 @@ define <8 x i8> @foo3_8(<8 x float> %src) {
|
|||
;
|
||||
; CHECK-WIDE-LABEL: foo3_8:
|
||||
; CHECK-WIDE: ## BB#0:
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
|
||||
; CHECK-WIDE-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
|
||||
; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
|
||||
; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
|
||||
; CHECK-WIDE-NEXT: vmovd %ecx, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
|
||||
|
@ -134,10 +134,10 @@ define <4 x i8> @foo3_4(<4 x float> %src) {
|
|||
;
|
||||
; CHECK-WIDE-LABEL: foo3_4:
|
||||
; CHECK-WIDE: ## BB#0:
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
|
||||
; CHECK-WIDE-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
|
||||
; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
|
||||
; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
|
||||
; CHECK-WIDE-NEXT: vmovd %ecx, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
|
||||
|
|
|
@ -1718,17 +1718,17 @@ define <16 x i8> @PR31364(i8* nocapture readonly %a, i8* nocapture readonly %b)
|
|||
; SSE2-NEXT: movzbl (%rsi), %ecx
|
||||
; SSE2-NEXT: shll $8, %ecx
|
||||
; SSE2-NEXT: orl %eax, %ecx
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE2-NEXT: movzwl %cx, %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE2-NEXT: pinsrw $0, %ecx, %xmm1
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm0[0,1,2,3,5,4,4,4]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,4,4,4]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,7]
|
||||
; SSE2-NEXT: packuswb %xmm2, %xmm0
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: PR31364:
|
||||
|
@ -1737,8 +1737,8 @@ define <16 x i8> @PR31364(i8* nocapture readonly %a, i8* nocapture readonly %b)
|
|||
; SSSE3-NEXT: movzbl (%rsi), %ecx
|
||||
; SSSE3-NEXT: shll $8, %ecx
|
||||
; SSSE3-NEXT: orl %eax, %ecx
|
||||
; SSSE3-NEXT: pxor %xmm0, %xmm0
|
||||
; SSSE3-NEXT: pinsrw $0, %ecx, %xmm0
|
||||
; SSSE3-NEXT: movzwl %cx, %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm0
|
||||
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1],zero,xmm0[1,1,1,1,1,0,0,0]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
|
|
|
@ -131,10 +131,10 @@ define i24 @or_i24_as_v8i3(i24 %a, i24 %b) nounwind {
|
|||
define <3 x i8> @and_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
|
||||
; X32-SSE-LABEL: and_v3i8_as_i24:
|
||||
; X32-SSE: # BB#0:
|
||||
; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
|
||||
; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0
|
||||
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0
|
||||
; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm1
|
||||
; X32-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1
|
||||
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1
|
||||
; X32-SSE-NEXT: pand %xmm0, %xmm1
|
||||
|
@ -172,10 +172,10 @@ define <3 x i8> @and_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
|
|||
define <3 x i8> @xor_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
|
||||
; X32-SSE-LABEL: xor_v3i8_as_i24:
|
||||
; X32-SSE: # BB#0:
|
||||
; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
|
||||
; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0
|
||||
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0
|
||||
; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm1
|
||||
; X32-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1
|
||||
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1
|
||||
; X32-SSE-NEXT: pxor %xmm0, %xmm1
|
||||
|
@ -213,10 +213,10 @@ define <3 x i8> @xor_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
|
|||
define <3 x i8> @or_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
|
||||
; X32-SSE-LABEL: or_v3i8_as_i24:
|
||||
; X32-SSE: # BB#0:
|
||||
; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
|
||||
; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0
|
||||
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0
|
||||
; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm1
|
||||
; X32-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1
|
||||
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1
|
||||
; X32-SSE-NEXT: por %xmm0, %xmm1
|
||||
|
|
|
@ -65,7 +65,7 @@ define void @convert_v3i8_to_v3f32(<3 x float>* %dst.addr, <3 x i8>* %src.addr)
|
|||
; X86-SSE2-NEXT: shll $8, %edx
|
||||
; X86-SSE2-NEXT: movzbl (%esp), %esi
|
||||
; X86-SSE2-NEXT: orl %edx, %esi
|
||||
; X86-SSE2-NEXT: pinsrw $0, %esi, %xmm0
|
||||
; X86-SSE2-NEXT: movd %esi, %xmm0
|
||||
; X86-SSE2-NEXT: movzbl 2(%ecx), %ecx
|
||||
; X86-SSE2-NEXT: pinsrw $1, %ecx, %xmm0
|
||||
; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
|
@ -115,7 +115,7 @@ define void @convert_v3i8_to_v3f32(<3 x float>* %dst.addr, <3 x i8>* %src.addr)
|
|||
; X64-SSE2-NEXT: shll $8, %eax
|
||||
; X64-SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
|
||||
; X64-SSE2-NEXT: orl %eax, %ecx
|
||||
; X64-SSE2-NEXT: pinsrw $0, %ecx, %xmm0
|
||||
; X64-SSE2-NEXT: movd %ecx, %xmm0
|
||||
; X64-SSE2-NEXT: movzbl 2(%rsi), %eax
|
||||
; X64-SSE2-NEXT: pinsrw $1, %eax, %xmm0
|
||||
; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
|
|
|
@ -91,7 +91,7 @@ define void @convert_v3i8_to_v3f32(<3 x float>* %dst.addr, <3 x i8>* %src.addr)
|
|||
; X86-SSE2-NEXT: shll $8, %edx
|
||||
; X86-SSE2-NEXT: movzbl (%esp), %esi
|
||||
; X86-SSE2-NEXT: orl %edx, %esi
|
||||
; X86-SSE2-NEXT: pinsrw $0, %esi, %xmm0
|
||||
; X86-SSE2-NEXT: movd %esi, %xmm0
|
||||
; X86-SSE2-NEXT: movzbl 2(%ecx), %ecx
|
||||
; X86-SSE2-NEXT: pinsrw $1, %ecx, %xmm0
|
||||
; X86-SSE2-NEXT: pxor %xmm1, %xmm1
|
||||
|
@ -140,7 +140,7 @@ define void @convert_v3i8_to_v3f32(<3 x float>* %dst.addr, <3 x i8>* %src.addr)
|
|||
; X64-SSE2-NEXT: shll $8, %eax
|
||||
; X64-SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
|
||||
; X64-SSE2-NEXT: orl %eax, %ecx
|
||||
; X64-SSE2-NEXT: pinsrw $0, %ecx, %xmm0
|
||||
; X64-SSE2-NEXT: movd %ecx, %xmm0
|
||||
; X64-SSE2-NEXT: movzbl 2(%rsi), %eax
|
||||
; X64-SSE2-NEXT: pinsrw $1, %eax, %xmm0
|
||||
; X64-SSE2-NEXT: pxor %xmm1, %xmm1
|
||||
|
|
Loading…
Reference in New Issue