[DAGCombiner] reduce buildvec of zexted extracted element to shuffle

The motivating case for this is shown in the first regression test. We are 
transferring to scalar and back rather than just zero-extending with 'vpmovzxdq'.

That's a special-case for a more general pattern as shown here. In all tests, 
we're avoiding the vector-scalar-vector moves in favor of vector ops.

We aren't producing optimal shuffle code in some cases though, so the patch is
limited to reduce regressions.

Differential Revision: https://reviews.llvm.org/D56281

llvm-svn: 351198
This commit is contained in:
Sanjay Patel 2019-01-15 16:11:05 +00:00
parent 2b46d30fc7
commit fad5bdaf95
2 changed files with 236 additions and 155 deletions

View File

@ -16195,6 +16195,78 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
return Shuffle;
}
static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
// First, determine where the build vector is not undef.
// TODO: We could extend this to handle zero elements as well as undefs.
int NumBVOps = BV->getNumOperands();
int ZextElt = -1;
for (int i = 0; i != NumBVOps; ++i) {
SDValue Op = BV->getOperand(i);
if (Op.isUndef())
continue;
if (ZextElt == -1)
ZextElt = i;
else
return SDValue();
}
// Bail out if there's no non-undef element.
if (ZextElt == -1)
return SDValue();
// The build vector contains some number of undef elements and exactly
// one other element. That other element must be a zero-extended scalar
// extracted from a vector at a constant index to turn this into a shuffle.
// TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
SDValue Zext = BV->getOperand(ZextElt);
if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
!isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)))
return SDValue();
// The zero-extend must be a multiple of the source size.
SDValue Extract = Zext.getOperand(0);
unsigned DestSize = Zext.getValueSizeInBits();
unsigned SrcSize = Extract.getValueSizeInBits();
if (DestSize % SrcSize != 0)
return SDValue();
// Create a shuffle mask that will combine the extracted element with zeros
// and undefs.
int ZextRatio = DestSize / SrcSize;
int NumMaskElts = NumBVOps * ZextRatio;
SmallVector<int, 32> ShufMask(NumMaskElts, -1);
for (int i = 0; i != NumMaskElts; ++i) {
if (i / ZextRatio == ZextElt) {
// The low bits of the (potentially translated) extracted element map to
// the source vector. The high bits map to zero. We will use a zero vector
// as the 2nd source operand of the shuffle, so use the 1st element of
// that vector (mask value is number-of-elements) for the high bits.
if (i % ZextRatio == 0)
ShufMask[i] = Extract.getConstantOperandVal(1);
else
ShufMask[i] = NumMaskElts;
}
// Undef elements of the build vector remain undef because we initialize
// the shuffle mask with -1.
}
// Turn this into a shuffle with zero if that's legal.
EVT VecVT = Extract.getOperand(0).getValueType();
if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(ShufMask, VecVT))
return SDValue();
// buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
// bitcast (shuffle V, ZeroVec, VectorMask)
SDLoc DL(BV);
SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec,
ShufMask);
return DAG.getBitcast(BV->getValueType(0), Shuf);
}
// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
// operations. If the types of the vectors we're extracting from allow it,
// turn this into a vector_shuffle node.
@ -16206,6 +16278,9 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
if (!isTypeLegal(VT))
return SDValue();
if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
return V;
// May only combine to shuffle after legalize if shuffle is legal.
if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
return SDValue();

View File

@ -4,16 +4,20 @@
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=ANY,AVX
define <2 x i64> @extract0_i32_zext_insert0_i64_undef(<4 x i32> %x) {
; SSE-LABEL: extract0_i32_zext_insert0_i64_undef:
; SSE: # %bb.0:
; SSE-NEXT: movd %xmm0, %eax
; SSE-NEXT: movq %rax, %xmm0
; SSE-NEXT: retq
; SSE2-LABEL: extract0_i32_zext_insert0_i64_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract0_i32_zext_insert0_i64_undef:
; SSE41: # %bb.0:
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; SSE41-NEXT: retq
;
; AVX-LABEL: extract0_i32_zext_insert0_i64_undef:
; AVX: # %bb.0:
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 0
%z = zext i32 %e to i64
@ -40,23 +44,14 @@ define <2 x i64> @extract0_i32_zext_insert0_i64_zero(<4 x i32> %x) {
}
define <2 x i64> @extract1_i32_zext_insert0_i64_undef(<4 x i32> %x) {
; SSE2-LABEL: extract1_i32_zext_insert0_i64_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: movq %rax, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract1_i32_zext_insert0_i64_undef:
; SSE41: # %bb.0:
; SSE41-NEXT: extractps $1, %xmm0, %eax
; SSE41-NEXT: movq %rax, %xmm0
; SSE41-NEXT: retq
; SSE-LABEL: extract1_i32_zext_insert0_i64_undef:
; SSE: # %bb.0:
; SSE-NEXT: psrlq $32, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: extract1_i32_zext_insert0_i64_undef:
; AVX: # %bb.0:
; AVX-NEXT: vextractps $1, %xmm0, %eax
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0
; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 1
%z = zext i32 %e to i64
@ -90,23 +85,16 @@ define <2 x i64> @extract1_i32_zext_insert0_i64_zero(<4 x i32> %x) {
}
define <2 x i64> @extract2_i32_zext_insert0_i64_undef(<4 x i32> %x) {
; SSE2-LABEL: extract2_i32_zext_insert0_i64_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: movq %rax, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract2_i32_zext_insert0_i64_undef:
; SSE41: # %bb.0:
; SSE41-NEXT: extractps $2, %xmm0, %eax
; SSE41-NEXT: movq %rax, %xmm0
; SSE41-NEXT: retq
; SSE-LABEL: extract2_i32_zext_insert0_i64_undef:
; SSE: # %bb.0:
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; SSE-NEXT: retq
;
; AVX-LABEL: extract2_i32_zext_insert0_i64_undef:
; AVX: # %bb.0:
; AVX-NEXT: vextractps $2, %xmm0, %eax
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 2
%z = zext i32 %e to i64
@ -140,23 +128,14 @@ define <2 x i64> @extract2_i32_zext_insert0_i64_zero(<4 x i32> %x) {
}
define <2 x i64> @extract3_i32_zext_insert0_i64_undef(<4 x i32> %x) {
; SSE2-LABEL: extract3_i32_zext_insert0_i64_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: movq %rax, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract3_i32_zext_insert0_i64_undef:
; SSE41: # %bb.0:
; SSE41-NEXT: extractps $3, %xmm0, %eax
; SSE41-NEXT: movq %rax, %xmm0
; SSE41-NEXT: retq
; SSE-LABEL: extract3_i32_zext_insert0_i64_undef:
; SSE: # %bb.0:
; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE-NEXT: retq
;
; AVX-LABEL: extract3_i32_zext_insert0_i64_undef:
; AVX: # %bb.0:
; AVX-NEXT: vextractps $3, %xmm0, %eax
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 3
%z = zext i32 %e to i64
@ -190,18 +169,25 @@ define <2 x i64> @extract3_i32_zext_insert0_i64_zero(<4 x i32> %x) {
}
define <2 x i64> @extract0_i32_zext_insert1_i64_undef(<4 x i32> %x) {
; SSE-LABEL: extract0_i32_zext_insert1_i64_undef:
; SSE: # %bb.0:
; SSE-NEXT: movd %xmm0, %eax
; SSE-NEXT: movq %rax, %xmm0
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE-NEXT: retq
; SSE2-LABEL: extract0_i32_zext_insert1_i64_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract0_i32_zext_insert1_i64_undef:
; SSE41: # %bb.0:
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
; SSE41-NEXT: pxor %xmm0, %xmm0
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
; SSE41-NEXT: retq
;
; AVX-LABEL: extract0_i32_zext_insert1_i64_undef:
; AVX: # %bb.0:
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 0
%z = zext i32 %e to i64
@ -232,24 +218,18 @@ define <2 x i64> @extract0_i32_zext_insert1_i64_zero(<4 x i32> %x) {
define <2 x i64> @extract1_i32_zext_insert1_i64_undef(<4 x i32> %x) {
; SSE2-LABEL: extract1_i32_zext_insert1_i64_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: movq %rax, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract1_i32_zext_insert1_i64_undef:
; SSE41: # %bb.0:
; SSE41-NEXT: extractps $1, %xmm0, %eax
; SSE41-NEXT: movq %rax, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; SSE41-NEXT: retq
;
; AVX-LABEL: extract1_i32_zext_insert1_i64_undef:
; AVX: # %bb.0:
; AVX-NEXT: vextractps $1, %xmm0, %eax
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 1
%z = zext i32 %e to i64
@ -288,24 +268,19 @@ define <2 x i64> @extract1_i32_zext_insert1_i64_zero(<4 x i32> %x) {
define <2 x i64> @extract2_i32_zext_insert1_i64_undef(<4 x i32> %x) {
; SSE2-LABEL: extract2_i32_zext_insert1_i64_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: movq %rax, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract2_i32_zext_insert1_i64_undef:
; SSE41: # %bb.0:
; SSE41-NEXT: extractps $2, %xmm0, %eax
; SSE41-NEXT: movq %rax, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE41-NEXT: xorps %xmm1, %xmm1
; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
; SSE41-NEXT: retq
;
; AVX-LABEL: extract2_i32_zext_insert1_i64_undef:
; AVX: # %bb.0:
; AVX-NEXT: vextractps $2, %xmm0, %eax
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 2
%z = zext i32 %e to i64
@ -342,26 +317,14 @@ define <2 x i64> @extract2_i32_zext_insert1_i64_zero(<4 x i32> %x) {
}
define <2 x i64> @extract3_i32_zext_insert1_i64_undef(<4 x i32> %x) {
; SSE2-LABEL: extract3_i32_zext_insert1_i64_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: movq %rax, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract3_i32_zext_insert1_i64_undef:
; SSE41: # %bb.0:
; SSE41-NEXT: extractps $3, %xmm0, %eax
; SSE41-NEXT: movq %rax, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE41-NEXT: retq
; SSE-LABEL: extract3_i32_zext_insert1_i64_undef:
; SSE: # %bb.0:
; SSE-NEXT: psrlq $32, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: extract3_i32_zext_insert1_i64_undef:
; AVX: # %bb.0:
; AVX-NEXT: vextractps $3, %xmm0, %eax
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0
; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 3
%z = zext i32 %e to i64
@ -398,16 +361,21 @@ define <2 x i64> @extract3_i32_zext_insert1_i64_zero(<4 x i32> %x) {
}
define <2 x i64> @extract0_i16_zext_insert0_i64_undef(<8 x i16> %x) {
; SSE-LABEL: extract0_i16_zext_insert0_i64_undef:
; SSE: # %bb.0:
; SSE-NEXT: pextrw $0, %xmm0, %eax
; SSE-NEXT: movq %rax, %xmm0
; SSE-NEXT: retq
; SSE2-LABEL: extract0_i16_zext_insert0_i64_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract0_i16_zext_insert0_i64_undef:
; SSE41: # %bb.0:
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: extract0_i16_zext_insert0_i64_undef:
; AVX: # %bb.0:
; AVX-NEXT: vpextrw $0, %xmm0, %eax
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 0
%z = zext i16 %e to i64
@ -434,16 +402,25 @@ define <2 x i64> @extract0_i16_zext_insert0_i64_zero(<8 x i16> %x) {
}
define <2 x i64> @extract1_i16_zext_insert0_i64_undef(<8 x i16> %x) {
; SSE-LABEL: extract1_i16_zext_insert0_i64_undef:
; SSE: # %bb.0:
; SSE-NEXT: pextrw $1, %xmm0, %eax
; SSE-NEXT: movq %rax, %xmm0
; SSE-NEXT: retq
; SSE2-LABEL: extract1_i16_zext_insert0_i64_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,6,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,0,1,4,5,6,7]
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract1_i16_zext_insert0_i64_undef:
; SSE41: # %bb.0:
; SSE41-NEXT: psrld $16, %xmm0
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: extract1_i16_zext_insert0_i64_undef:
; AVX: # %bb.0:
; AVX-NEXT: vpextrw $1, %xmm0, %eax
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 1
%z = zext i16 %e to i64
@ -470,16 +447,26 @@ define <2 x i64> @extract1_i16_zext_insert0_i64_zero(<8 x i16> %x) {
}
define <2 x i64> @extract2_i16_zext_insert0_i64_undef(<8 x i16> %x) {
; SSE-LABEL: extract2_i16_zext_insert0_i64_undef:
; SSE: # %bb.0:
; SSE-NEXT: pextrw $2, %xmm0, %eax
; SSE-NEXT: movq %rax, %xmm0
; SSE-NEXT: retq
; SSE2-LABEL: extract2_i16_zext_insert0_i64_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,0]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract2_i16_zext_insert0_i64_undef:
; SSE41: # %bb.0:
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: extract2_i16_zext_insert0_i64_undef:
; AVX: # %bb.0:
; AVX-NEXT: vpextrw $2, %xmm0, %eax
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 2
%z = zext i16 %e to i64
@ -508,14 +495,12 @@ define <2 x i64> @extract2_i16_zext_insert0_i64_zero(<8 x i16> %x) {
define <2 x i64> @extract3_i16_zext_insert0_i64_undef(<8 x i16> %x) {
; SSE-LABEL: extract3_i16_zext_insert0_i64_undef:
; SSE: # %bb.0:
; SSE-NEXT: pextrw $3, %xmm0, %eax
; SSE-NEXT: movq %rax, %xmm0
; SSE-NEXT: psrlq $48, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: extract3_i16_zext_insert0_i64_undef:
; AVX: # %bb.0:
; AVX-NEXT: vpextrw $3, %xmm0, %eax
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpsrlq $48, %xmm0, %xmm0
; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 3
%z = zext i16 %e to i64
@ -542,18 +527,24 @@ define <2 x i64> @extract3_i16_zext_insert0_i64_zero(<8 x i16> %x) {
}
define <2 x i64> @extract0_i16_zext_insert1_i64_undef(<8 x i16> %x) {
; SSE-LABEL: extract0_i16_zext_insert1_i64_undef:
; SSE: # %bb.0:
; SSE-NEXT: pextrw $0, %xmm0, %eax
; SSE-NEXT: movq %rax, %xmm0
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE-NEXT: retq
; SSE2-LABEL: extract0_i16_zext_insert1_i64_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract0_i16_zext_insert1_i64_undef:
; SSE41: # %bb.0:
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
; SSE41-NEXT: pxor %xmm0, %xmm0
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
; SSE41-NEXT: retq
;
; AVX-LABEL: extract0_i16_zext_insert1_i64_undef:
; AVX: # %bb.0:
; AVX-NEXT: vpextrw $0, %xmm0, %eax
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4],xmm1[5,6,7]
; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 0
%z = zext i16 %e to i64
@ -582,18 +573,21 @@ define <2 x i64> @extract0_i16_zext_insert1_i64_zero(<8 x i16> %x) {
}
define <2 x i64> @extract1_i16_zext_insert1_i64_undef(<8 x i16> %x) {
; SSE-LABEL: extract1_i16_zext_insert1_i64_undef:
; SSE: # %bb.0:
; SSE-NEXT: pextrw $1, %xmm0, %eax
; SSE-NEXT: movq %rax, %xmm0
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE-NEXT: retq
; SSE2-LABEL: extract1_i16_zext_insert1_i64_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract1_i16_zext_insert1_i64_undef:
; SSE41: # %bb.0:
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: extract1_i16_zext_insert1_i64_undef:
; AVX: # %bb.0:
; AVX-NEXT: vpextrw $1, %xmm0, %eax
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 1
%z = zext i16 %e to i64
@ -622,18 +616,24 @@ define <2 x i64> @extract1_i16_zext_insert1_i64_zero(<8 x i16> %x) {
}
define <2 x i64> @extract2_i16_zext_insert1_i64_undef(<8 x i16> %x) {
; SSE-LABEL: extract2_i16_zext_insert1_i64_undef:
; SSE: # %bb.0:
; SSE-NEXT: pextrw $2, %xmm0, %eax
; SSE-NEXT: movq %rax, %xmm0
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE-NEXT: retq
; SSE2-LABEL: extract2_i16_zext_insert1_i64_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract2_i16_zext_insert1_i64_undef:
; SSE41: # %bb.0:
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
; SSE41-NEXT: pxor %xmm0, %xmm0
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
; SSE41-NEXT: retq
;
; AVX-LABEL: extract2_i16_zext_insert1_i64_undef:
; AVX: # %bb.0:
; AVX-NEXT: vpextrw $2, %xmm0, %eax
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4],xmm1[5,6,7]
; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 2
%z = zext i16 %e to i64
@ -662,18 +662,24 @@ define <2 x i64> @extract2_i16_zext_insert1_i64_zero(<8 x i16> %x) {
}
define <2 x i64> @extract3_i16_zext_insert1_i64_undef(<8 x i16> %x) {
; SSE-LABEL: extract3_i16_zext_insert1_i64_undef:
; SSE: # %bb.0:
; SSE-NEXT: pextrw $3, %xmm0, %eax
; SSE-NEXT: movq %rax, %xmm0
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE-NEXT: retq
; SSE2-LABEL: extract3_i16_zext_insert1_i64_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract3_i16_zext_insert1_i64_undef:
; SSE41: # %bb.0:
; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
; SSE41-NEXT: pxor %xmm1, %xmm1
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4],xmm1[5,6,7]
; SSE41-NEXT: retq
;
; AVX-LABEL: extract3_i16_zext_insert1_i64_undef:
; AVX: # %bb.0:
; AVX-NEXT: vpextrw $3, %xmm0, %eax
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4],xmm1[5,6,7]
; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 3
%z = zext i16 %e to i64