[SelectionDAG] move splat util functions up from x86 lowering

This was supposed to be NFC, but the change in SDLoc
definitions causes instruction scheduling changes.

There's nothing x86-specific in this code, and it can
likely be used from DAGCombiner's simplifyVBinOp().

llvm-svn: 358930
This commit is contained in:
Sanjay Patel 2019-04-22 22:43:36 +00:00
parent a38b8c8abc
commit bf8aacb715
9 changed files with 101 additions and 93 deletions

View File

@ -1539,6 +1539,13 @@ public:
/// Test whether \p V has a splatted value. /// Test whether \p V has a splatted value.
bool isSplatValue(SDValue V, bool AllowUndefs = false); bool isSplatValue(SDValue V, bool AllowUndefs = false);
/// If V is a splatted value, return the source vector and its splat index.
SDValue getSplatSourceVector(SDValue V, int &SplatIndex);
/// If V is a splat vector, return its scalar source operand by extracting
/// that element from the source vector.
SDValue getSplatValue(SDValue V);
/// Match a binop + shuffle pyramid that represents a horizontal reduction /// Match a binop + shuffle pyramid that represents a horizontal reduction
/// over the elements of a vector starting from the EXTRACT_VECTOR_ELT node /p /// over the elements of a vector starting from the EXTRACT_VECTOR_ELT node /p
/// Extract. The reduction must use one of the opcodes listed in /p /// Extract. The reduction must use one of the opcodes listed in /p

View File

@ -1641,6 +1641,10 @@ SDValue peekThroughBitcasts(SDValue V);
/// If \p V is not a bitcasted one-use value, it is returned as-is. /// If \p V is not a bitcasted one-use value, it is returned as-is.
SDValue peekThroughOneUseBitcasts(SDValue V); SDValue peekThroughOneUseBitcasts(SDValue V);
/// Return the non-extracted vector source operand of \p V if it exists.
/// If \p V is not an extracted subvector, it is returned as-is.
SDValue peekThroughExtractSubvectors(SDValue V);
/// Returns true if \p V is a bitwise not operation. Assumes that an all ones /// Returns true if \p V is a bitwise not operation. Assumes that an all ones
/// constant is canonicalized to be operand 1. /// constant is canonicalized to be operand 1.
bool isBitwiseNot(SDValue V); bool isBitwiseNot(SDValue V);

View File

@ -2300,6 +2300,52 @@ bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) {
(AllowUndefs || !UndefElts); (AllowUndefs || !UndefElts);
} }
SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) {
V = peekThroughExtractSubvectors(V);
EVT VT = V.getValueType();
unsigned Opcode = V.getOpcode();
switch (Opcode) {
default: {
APInt UndefElts;
APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
if (isSplatValue(V, DemandedElts, UndefElts)) {
// Handle case where all demanded elements are UNDEF.
if (DemandedElts.isSubsetOf(UndefElts)) {
SplatIdx = 0;
return getUNDEF(VT);
}
SplatIdx = (UndefElts & DemandedElts).countTrailingOnes();
return V;
}
break;
}
case ISD::VECTOR_SHUFFLE: {
// Check if this is a shuffle node doing a splat.
// TODO - remove this and rely purely on SelectionDAG::isSplatValue,
// getTargetVShiftNode currently struggles without the splat source.
auto *SVN = cast<ShuffleVectorSDNode>(V);
if (!SVN->isSplat())
break;
int Idx = SVN->getSplatIndex();
int NumElts = V.getValueType().getVectorNumElements();
SplatIdx = Idx % NumElts;
return V.getOperand(Idx / NumElts);
}
}
return SDValue();
}
SDValue SelectionDAG::getSplatValue(SDValue V) {
int SplatIdx;
if (SDValue SrcVector = getSplatSourceVector(V, SplatIdx))
return getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V),
SrcVector.getValueType().getScalarType(), SrcVector,
getIntPtrConstant(SplatIdx, SDLoc(V)));
return SDValue();
}
/// If a SHL/SRA/SRL node has a constant or splat constant shift amount that /// If a SHL/SRA/SRL node has a constant or splat constant shift amount that
/// is less than the element bit-width of the shift node, return it. /// is less than the element bit-width of the shift node, return it.
static const APInt *getValidShiftAmountConstant(SDValue V) { static const APInt *getValidShiftAmountConstant(SDValue V) {
@ -8585,6 +8631,12 @@ SDValue llvm::peekThroughOneUseBitcasts(SDValue V) {
return V; return V;
} }
SDValue llvm::peekThroughExtractSubvectors(SDValue V) {
while (V.getOpcode() == ISD::EXTRACT_SUBVECTOR)
V = V.getOperand(0);
return V;
}
bool llvm::isBitwiseNot(SDValue V) { bool llvm::isBitwiseNot(SDValue V) {
if (V.getOpcode() != ISD::XOR) if (V.getOpcode() != ISD::XOR)
return false; return false;

View File

@ -5711,13 +5711,6 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec); return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);
} }
// Peek through EXTRACT_SUBVECTORs - typically used for AVX1 256-bit intops.
static SDValue peekThroughEXTRACT_SUBVECTORs(SDValue V) {
while (V.getOpcode() == ISD::EXTRACT_SUBVECTOR)
V = V.getOperand(0);
return V;
}
static const Constant *getTargetConstantFromNode(SDValue Op) { static const Constant *getTargetConstantFromNode(SDValue Op) {
Op = peekThroughBitcasts(Op); Op = peekThroughBitcasts(Op);
@ -24722,54 +24715,6 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
return SDValue(); return SDValue();
} }
// If V is a splat value, return the source vector and splat index;
static SDValue IsSplatVector(SDValue V, int &SplatIdx, SelectionDAG &DAG) {
V = peekThroughEXTRACT_SUBVECTORs(V);
EVT VT = V.getValueType();
unsigned Opcode = V.getOpcode();
switch (Opcode) {
default: {
APInt UndefElts;
APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
if (DAG.isSplatValue(V, DemandedElts, UndefElts)) {
// Handle case where all demanded elements are UNDEF.
if (DemandedElts.isSubsetOf(UndefElts)) {
SplatIdx = 0;
return DAG.getUNDEF(VT);
}
SplatIdx = (UndefElts & DemandedElts).countTrailingOnes();
return V;
}
break;
}
case ISD::VECTOR_SHUFFLE: {
// Check if this is a shuffle node doing a splat.
// TODO - remove this and rely purely on SelectionDAG::isSplatValue,
// getTargetVShiftNode currently struggles without the splat source.
auto *SVN = cast<ShuffleVectorSDNode>(V);
if (!SVN->isSplat())
break;
int Idx = SVN->getSplatIndex();
int NumElts = V.getValueType().getVectorNumElements();
SplatIdx = Idx % NumElts;
return V.getOperand(Idx / NumElts);
}
}
return SDValue();
}
static SDValue GetSplatValue(SDValue V, const SDLoc &dl,
SelectionDAG &DAG) {
int SplatIdx;
if (SDValue SrcVector = IsSplatVector(V, SplatIdx, DAG))
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
SrcVector.getValueType().getScalarType(), SrcVector,
DAG.getIntPtrConstant(SplatIdx, dl));
return SDValue();
}
static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) { const X86Subtarget &Subtarget) {
MVT VT = Op.getSimpleValueType(); MVT VT = Op.getSimpleValueType();
@ -24780,7 +24725,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
unsigned X86OpcI = getTargetVShiftUniformOpcode(Opcode, false); unsigned X86OpcI = getTargetVShiftUniformOpcode(Opcode, false);
unsigned X86OpcV = getTargetVShiftUniformOpcode(Opcode, true); unsigned X86OpcV = getTargetVShiftUniformOpcode(Opcode, true);
if (SDValue BaseShAmt = GetSplatValue(Amt, dl, DAG)) { if (SDValue BaseShAmt = DAG.getSplatValue(Amt)) {
if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) { if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) {
MVT EltVT = VT.getVectorElementType(); MVT EltVT = VT.getVectorElementType();
assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!"); assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!");

View File

@ -527,11 +527,11 @@ define <4 x i64> @splatvar_rotate_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
; ;
; AVX2-LABEL: splatvar_rotate_v4i64: ; AVX2-LABEL: splatvar_rotate_v4i64:
; AVX2: # %bb.0: ; AVX2: # %bb.0:
; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm2 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [64,64]
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [64,64] ; AVX2-NEXT: vpsubq %xmm1, %xmm2, %xmm2
; AVX2-NEXT: vpsubq %xmm1, %xmm3, %xmm1 ; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm1
; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: vpsrlq %xmm2, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512F-LABEL: splatvar_rotate_v4i64: ; AVX512F-LABEL: splatvar_rotate_v4i64:

View File

@ -344,10 +344,10 @@ define <32 x i16> @splatvar_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind
; AVX512BW-LABEL: splatvar_rotate_v32i16: ; AVX512BW-LABEL: splatvar_rotate_v32i16:
; AVX512BW: # %bb.0: ; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm2
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm2
; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512BW-NEXT: retq ; AVX512BW-NEXT: retq
@ -355,10 +355,10 @@ define <32 x i16> @splatvar_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind
; AVX512VLBW-LABEL: splatvar_rotate_v32i16: ; AVX512VLBW-LABEL: splatvar_rotate_v32i16:
; AVX512VLBW: # %bb.0: ; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm2
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm2
; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512VLBW-NEXT: retq ; AVX512VLBW-NEXT: retq
@ -426,14 +426,14 @@ define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512BW-LABEL: splatvar_rotate_v64i8: ; AVX512BW-LABEL: splatvar_rotate_v64i8:
; AVX512BW: # %bb.0: ; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX512BW-NEXT: vpsubb %xmm1, %xmm3, %xmm1
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm3 ; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm3
; AVX512BW-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 ; AVX512BW-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4
; AVX512BW-NEXT: vpsllw %xmm2, %zmm4, %zmm2 ; AVX512BW-NEXT: vpsllw %xmm2, %zmm4, %zmm2
; AVX512BW-NEXT: vpbroadcastb %xmm2, %zmm2 ; AVX512BW-NEXT: vpbroadcastb %xmm2, %zmm2
; AVX512BW-NEXT: vpandq %zmm2, %zmm3, %zmm2 ; AVX512BW-NEXT: vpandq %zmm2, %zmm3, %zmm2
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX512BW-NEXT: vpsubb %xmm1, %xmm3, %xmm1
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrlw %xmm1, %zmm4, %zmm1 ; AVX512BW-NEXT: vpsrlw %xmm1, %zmm4, %zmm1
; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 ; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
@ -445,14 +445,14 @@ define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512VLBW-LABEL: splatvar_rotate_v64i8: ; AVX512VLBW-LABEL: splatvar_rotate_v64i8:
; AVX512VLBW: # %bb.0: ; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm3, %xmm1
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm3 ; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm3
; AVX512VLBW-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 ; AVX512VLBW-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4
; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm4, %zmm2 ; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm4, %zmm2
; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %zmm2 ; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %zmm2
; AVX512VLBW-NEXT: vpandq %zmm2, %zmm3, %zmm2 ; AVX512VLBW-NEXT: vpandq %zmm2, %zmm3, %zmm2
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm3, %xmm1
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm4, %zmm1 ; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm4, %zmm1
; AVX512VLBW-NEXT: vpsrlw $8, %zmm1, %zmm1 ; AVX512VLBW-NEXT: vpsrlw $8, %zmm1, %zmm1

View File

@ -737,8 +737,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v8i32: ; AVX1-LABEL: splatvar_shift_v8i32:
; AVX1: # %bb.0: ; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -752,8 +752,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; ;
; XOPAVX1-LABEL: splatvar_shift_v8i32: ; XOPAVX1-LABEL: splatvar_shift_v8i32:
; XOPAVX1: # %bb.0: ; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -779,8 +779,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; ;
; X32-AVX1-LABEL: splatvar_shift_v8i32: ; X32-AVX1-LABEL: splatvar_shift_v8i32:
; X32-AVX1: # %bb.0: ; X32-AVX1: # %bb.0:
; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; X32-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2 ; X32-AVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
; X32-AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0 ; X32-AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -799,8 +799,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v16i16: ; AVX1-LABEL: splatvar_shift_v16i16:
; AVX1: # %bb.0: ; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -814,8 +814,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
; ;
; XOPAVX1-LABEL: splatvar_shift_v16i16: ; XOPAVX1-LABEL: splatvar_shift_v16i16:
; XOPAVX1: # %bb.0: ; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -841,8 +841,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
; ;
; X32-AVX1-LABEL: splatvar_shift_v16i16: ; X32-AVX1-LABEL: splatvar_shift_v16i16:
; X32-AVX1: # %bb.0: ; X32-AVX1: # %bb.0:
; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; X32-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2 ; X32-AVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
; X32-AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0 ; X32-AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -861,8 +861,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v32i8: ; AVX1-LABEL: splatvar_shift_v32i8:
; AVX1: # %bb.0: ; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3 ; AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3
@ -968,8 +968,8 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; ;
; X32-AVX1-LABEL: splatvar_shift_v32i8: ; X32-AVX1-LABEL: splatvar_shift_v32i8:
; X32-AVX1: # %bb.0: ; X32-AVX1: # %bb.0:
; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; X32-AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 ; X32-AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; X32-AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 ; X32-AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; X32-AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3 ; X32-AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3

View File

@ -580,8 +580,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v8i32: ; AVX1-LABEL: splatvar_shift_v8i32:
; AVX1: # %bb.0: ; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -595,8 +595,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; ;
; XOPAVX1-LABEL: splatvar_shift_v8i32: ; XOPAVX1-LABEL: splatvar_shift_v8i32:
; XOPAVX1: # %bb.0: ; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -622,8 +622,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; ;
; X32-AVX1-LABEL: splatvar_shift_v8i32: ; X32-AVX1-LABEL: splatvar_shift_v8i32:
; X32-AVX1: # %bb.0: ; X32-AVX1: # %bb.0:
; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; X32-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2 ; X32-AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
; X32-AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 ; X32-AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -642,8 +642,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v16i16: ; AVX1-LABEL: splatvar_shift_v16i16:
; AVX1: # %bb.0: ; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -657,8 +657,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
; ;
; XOPAVX1-LABEL: splatvar_shift_v16i16: ; XOPAVX1-LABEL: splatvar_shift_v16i16:
; XOPAVX1: # %bb.0: ; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -684,8 +684,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
; ;
; X32-AVX1-LABEL: splatvar_shift_v16i16: ; X32-AVX1-LABEL: splatvar_shift_v16i16:
; X32-AVX1: # %bb.0: ; X32-AVX1: # %bb.0:
; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; X32-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 ; X32-AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; X32-AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 ; X32-AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -704,8 +704,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v32i8: ; AVX1-LABEL: splatvar_shift_v32i8:
; AVX1: # %bb.0: ; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3 ; AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3
@ -793,8 +793,8 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; ;
; X32-AVX1-LABEL: splatvar_shift_v32i8: ; X32-AVX1-LABEL: splatvar_shift_v32i8:
; X32-AVX1: # %bb.0: ; X32-AVX1: # %bb.0:
; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; X32-AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 ; X32-AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; X32-AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 ; X32-AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; X32-AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3 ; X32-AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3

View File

@ -512,8 +512,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v8i32: ; AVX1-LABEL: splatvar_shift_v8i32:
; AVX1: # %bb.0: ; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -527,8 +527,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; ;
; XOPAVX1-LABEL: splatvar_shift_v8i32: ; XOPAVX1-LABEL: splatvar_shift_v8i32:
; XOPAVX1: # %bb.0: ; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -554,8 +554,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; ;
; X32-AVX1-LABEL: splatvar_shift_v8i32: ; X32-AVX1-LABEL: splatvar_shift_v8i32:
; X32-AVX1: # %bb.0: ; X32-AVX1: # %bb.0:
; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; X32-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2 ; X32-AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
; X32-AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0 ; X32-AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -574,8 +574,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v16i16: ; AVX1-LABEL: splatvar_shift_v16i16:
; AVX1: # %bb.0: ; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -589,8 +589,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
; ;
; XOPAVX1-LABEL: splatvar_shift_v16i16: ; XOPAVX1-LABEL: splatvar_shift_v16i16:
; XOPAVX1: # %bb.0: ; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -616,8 +616,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
; ;
; X32-AVX1-LABEL: splatvar_shift_v16i16: ; X32-AVX1-LABEL: splatvar_shift_v16i16:
; X32-AVX1: # %bb.0: ; X32-AVX1: # %bb.0:
; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; X32-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 ; X32-AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
; X32-AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; X32-AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -636,8 +636,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v32i8: ; AVX1-LABEL: splatvar_shift_v32i8:
; AVX1: # %bb.0: ; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpsllw %xmm1, %xmm3, %xmm3 ; AVX1-NEXT: vpsllw %xmm1, %xmm3, %xmm3
@ -719,8 +719,8 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; ;
; X32-AVX1-LABEL: splatvar_shift_v32i8: ; X32-AVX1-LABEL: splatvar_shift_v32i8:
; X32-AVX1: # %bb.0: ; X32-AVX1: # %bb.0:
; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; X32-AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 ; X32-AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
; X32-AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 ; X32-AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; X32-AVX1-NEXT: vpsllw %xmm1, %xmm3, %xmm3 ; X32-AVX1-NEXT: vpsllw %xmm1, %xmm3, %xmm3