forked from OSchip/llvm-project
[X86][SSE] Add support for combining VZEXT_MOVL target shuffles
Includes adding more general support for the pattern: VZEXT_MOVL(VZEXT_LOAD(ptr)) -> VZEXT_LOAD(ptr) This has unearthed a couple of latent poor codegen issues (MINSS/MAXSS scalar load folding and MOVDDUP/BROADCAST load folding patterns), which will be fixed shortly. Its also reduced a couple of tests so that they no longer reach the instruction threshold necessary to be combined to PSHUFB (see PR26183). llvm-svn: 279646
This commit is contained in:
parent
26d9c41ff6
commit
941bd6bbae
|
@ -4236,6 +4236,21 @@ static bool isUndefOrInRange(ArrayRef<int> Mask,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return true if Val is undef, zero or if its value falls within the
|
||||||
|
/// specified range (L, H].
|
||||||
|
static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
|
||||||
|
return isUndefOrZero(Val) || (Val >= Low && Val < Hi);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return true if every element in Mask is undef, zero or if its value
|
||||||
|
/// falls within the specified range (L, H].
|
||||||
|
static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
|
||||||
|
for (int M : Mask)
|
||||||
|
if (!isUndefOrZeroOrInRange(M, Low, Hi))
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/// Return true if every element in Mask, beginning
|
/// Return true if every element in Mask, beginning
|
||||||
/// from position Pos and ending in Pos+Size, falls within the specified
|
/// from position Pos and ending in Pos+Size, falls within the specified
|
||||||
/// sequential range (Low, Low+Size]. or is undef.
|
/// sequential range (Low, Low+Size]. or is undef.
|
||||||
|
@ -25347,6 +25362,21 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
||||||
unsigned Shuffle, PermuteImm;
|
unsigned Shuffle, PermuteImm;
|
||||||
|
|
||||||
if (UnaryShuffle) {
|
if (UnaryShuffle) {
|
||||||
|
// If we are shuffling a X86ISD::VZEXT_LOAD then we can use the load
|
||||||
|
// directly if we don't shuffle the lower element and we shuffle the upper
|
||||||
|
// (zero) elements within themselves.
|
||||||
|
if (V1.getOpcode() == X86ISD::VZEXT_LOAD &&
|
||||||
|
(V1.getScalarValueSizeInBits() % MaskEltSizeInBits) == 0) {
|
||||||
|
unsigned Scale = V1.getScalarValueSizeInBits() / MaskEltSizeInBits;
|
||||||
|
ArrayRef<int> HiMask(Mask.data() + Scale, NumMaskElts - Scale);
|
||||||
|
if (isSequentialOrUndefInRange(Mask, 0, Scale, 0) &&
|
||||||
|
isUndefOrZeroOrInRange(HiMask, Scale, NumMaskElts)) {
|
||||||
|
DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, V1),
|
||||||
|
/*AddTo*/ true);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (matchUnaryVectorShuffle(MaskVT, Mask, Subtarget, Shuffle, ShuffleVT)) {
|
if (matchUnaryVectorShuffle(MaskVT, Mask, Subtarget, Shuffle, ShuffleVT)) {
|
||||||
if (Depth == 1 && Root.getOpcode() == Shuffle)
|
if (Depth == 1 && Root.getOpcode() == Shuffle)
|
||||||
return false; // Nothing to do!
|
return false; // Nothing to do!
|
||||||
|
@ -30502,17 +30532,6 @@ static SDValue combineBT(SDNode *N, SelectionDAG &DAG,
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
static SDValue combineVZextMovl(SDNode *N, SelectionDAG &DAG) {
|
|
||||||
SDValue Op = peekThroughBitcasts(N->getOperand(0));
|
|
||||||
EVT VT = N->getValueType(0), OpVT = Op.getValueType();
|
|
||||||
if (Op.getOpcode() == X86ISD::VZEXT_LOAD &&
|
|
||||||
VT.getVectorElementType().getSizeInBits() ==
|
|
||||||
OpVT.getVectorElementType().getSizeInBits()) {
|
|
||||||
return DAG.getBitcast(VT, Op);
|
|
||||||
}
|
|
||||||
return SDValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
static SDValue combineSignExtendInReg(SDNode *N, SelectionDAG &DAG,
|
static SDValue combineSignExtendInReg(SDNode *N, SelectionDAG &DAG,
|
||||||
const X86Subtarget &Subtarget) {
|
const X86Subtarget &Subtarget) {
|
||||||
EVT VT = N->getValueType(0);
|
EVT VT = N->getValueType(0);
|
||||||
|
@ -31498,7 +31517,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
||||||
case X86ISD::FAND: return combineFAnd(N, DAG, Subtarget);
|
case X86ISD::FAND: return combineFAnd(N, DAG, Subtarget);
|
||||||
case X86ISD::FANDN: return combineFAndn(N, DAG, Subtarget);
|
case X86ISD::FANDN: return combineFAndn(N, DAG, Subtarget);
|
||||||
case X86ISD::BT: return combineBT(N, DAG, DCI);
|
case X86ISD::BT: return combineBT(N, DAG, DCI);
|
||||||
case X86ISD::VZEXT_MOVL: return combineVZextMovl(N, DAG);
|
|
||||||
case ISD::ANY_EXTEND:
|
case ISD::ANY_EXTEND:
|
||||||
case ISD::ZERO_EXTEND: return combineZext(N, DAG, DCI, Subtarget);
|
case ISD::ZERO_EXTEND: return combineZext(N, DAG, DCI, Subtarget);
|
||||||
case ISD::SIGN_EXTEND: return combineSext(N, DAG, DCI, Subtarget);
|
case ISD::SIGN_EXTEND: return combineSext(N, DAG, DCI, Subtarget);
|
||||||
|
@ -31534,6 +31552,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
||||||
case X86ISD::VPERMILPI:
|
case X86ISD::VPERMILPI:
|
||||||
case X86ISD::VPERMILPV:
|
case X86ISD::VPERMILPV:
|
||||||
case X86ISD::VPERM2X128:
|
case X86ISD::VPERM2X128:
|
||||||
|
case X86ISD::VZEXT_MOVL:
|
||||||
case ISD::VECTOR_SHUFFLE: return combineShuffle(N, DAG, DCI,Subtarget);
|
case ISD::VECTOR_SHUFFLE: return combineShuffle(N, DAG, DCI,Subtarget);
|
||||||
case ISD::FMA: return combineFMA(N, DAG, Subtarget);
|
case ISD::FMA: return combineFMA(N, DAG, Subtarget);
|
||||||
case ISD::MGATHER:
|
case ISD::MGATHER:
|
||||||
|
|
|
@ -112,14 +112,12 @@ define <4 x float> @shuffle_v4f32_z06z(<4 x float> %a, <4 x float> %b) {
|
||||||
define <4 x float> @shuffle_v4f32_05zz(<4 x float> %a, <4 x float> %b) {
|
define <4 x float> @shuffle_v4f32_05zz(<4 x float> %a, <4 x float> %b) {
|
||||||
; SSE-LABEL: shuffle_v4f32_05zz:
|
; SSE-LABEL: shuffle_v4f32_05zz:
|
||||||
; SSE: # BB#0:
|
; SSE: # BB#0:
|
||||||
; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
|
; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[1],zero,zero
|
||||||
; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
|
|
||||||
; SSE-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: shuffle_v4f32_05zz:
|
; AVX-LABEL: shuffle_v4f32_05zz:
|
||||||
; AVX: # BB#0:
|
; AVX: # BB#0:
|
||||||
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[1],zero,zero
|
||||||
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
|
%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
|
||||||
%shuffle1 = shufflevector <4 x float> %shuffle, <4 x float> <float undef, float undef, float 0.000000e+00, float 0.000000e+00>, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
|
%shuffle1 = shufflevector <4 x float> %shuffle, <4 x float> <float undef, float undef, float 0.000000e+00, float 0.000000e+00>, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
|
||||||
|
|
|
@ -9,7 +9,8 @@ define i16 @test1(float %f) nounwind {
|
||||||
; X32-NEXT: xorps %xmm1, %xmm1
|
; X32-NEXT: xorps %xmm1, %xmm1
|
||||||
; X32-NEXT: subss LCPI0_0, %xmm0
|
; X32-NEXT: subss LCPI0_0, %xmm0
|
||||||
; X32-NEXT: mulss LCPI0_1, %xmm0
|
; X32-NEXT: mulss LCPI0_1, %xmm0
|
||||||
; X32-NEXT: minss LCPI0_2, %xmm0
|
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||||
|
; X32-NEXT: minss %xmm2, %xmm0
|
||||||
; X32-NEXT: maxss %xmm1, %xmm0
|
; X32-NEXT: maxss %xmm1, %xmm0
|
||||||
; X32-NEXT: cvttss2si %xmm0, %eax
|
; X32-NEXT: cvttss2si %xmm0, %eax
|
||||||
; X32-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
|
; X32-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
|
||||||
|
@ -21,7 +22,8 @@ define i16 @test1(float %f) nounwind {
|
||||||
; X64-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
; X64-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||||
; X64-NEXT: subss {{.*}}(%rip), %xmm0
|
; X64-NEXT: subss {{.*}}(%rip), %xmm0
|
||||||
; X64-NEXT: mulss {{.*}}(%rip), %xmm0
|
; X64-NEXT: mulss {{.*}}(%rip), %xmm0
|
||||||
; X64-NEXT: minss {{.*}}(%rip), %xmm0
|
; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||||
|
; X64-NEXT: minss %xmm2, %xmm0
|
||||||
; X64-NEXT: maxss %xmm1, %xmm0
|
; X64-NEXT: maxss %xmm1, %xmm0
|
||||||
; X64-NEXT: cvttss2si %xmm0, %eax
|
; X64-NEXT: cvttss2si %xmm0, %eax
|
||||||
; X64-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
|
; X64-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
|
||||||
|
|
|
@ -2176,29 +2176,18 @@ define <8 x i16> @mask_v8i16_012345ef(<8 x i16> %a, <8 x i16> %b) {
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x i16> @insert_dup_mem_v8i16_i32(i32* %ptr) {
|
define <8 x i16> @insert_dup_mem_v8i16_i32(i32* %ptr) {
|
||||||
; SSE2-LABEL: insert_dup_mem_v8i16_i32:
|
; SSE-LABEL: insert_dup_mem_v8i16_i32:
|
||||||
; SSE2: # BB#0:
|
; SSE: # BB#0:
|
||||||
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||||
; SSE2-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
|
||||||
; SSSE3-LABEL: insert_dup_mem_v8i16_i32:
|
|
||||||
; SSSE3: # BB#0:
|
|
||||||
; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
||||||
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
|
||||||
; SSSE3-NEXT: retq
|
|
||||||
;
|
|
||||||
; SSE41-LABEL: insert_dup_mem_v8i16_i32:
|
|
||||||
; SSE41: # BB#0:
|
|
||||||
; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
||||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
|
||||||
; SSE41-NEXT: retq
|
|
||||||
;
|
;
|
||||||
; AVX1-LABEL: insert_dup_mem_v8i16_i32:
|
; AVX1-LABEL: insert_dup_mem_v8i16_i32:
|
||||||
; AVX1: # BB#0:
|
; AVX1: # BB#0:
|
||||||
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||||
|
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: insert_dup_mem_v8i16_i32:
|
; AVX2-LABEL: insert_dup_mem_v8i16_i32:
|
||||||
|
@ -2257,29 +2246,18 @@ define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) {
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x i16> @insert_dup_elt1_mem_v8i16_i32(i32* %ptr) {
|
define <8 x i16> @insert_dup_elt1_mem_v8i16_i32(i32* %ptr) {
|
||||||
; SSE2-LABEL: insert_dup_elt1_mem_v8i16_i32:
|
; SSE-LABEL: insert_dup_elt1_mem_v8i16_i32:
|
||||||
; SSE2: # BB#0:
|
; SSE: # BB#0:
|
||||||
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
|
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
|
||||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||||
; SSE2-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
|
||||||
; SSSE3-LABEL: insert_dup_elt1_mem_v8i16_i32:
|
|
||||||
; SSSE3: # BB#0:
|
|
||||||
; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
||||||
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
|
|
||||||
; SSSE3-NEXT: retq
|
|
||||||
;
|
|
||||||
; SSE41-LABEL: insert_dup_elt1_mem_v8i16_i32:
|
|
||||||
; SSE41: # BB#0:
|
|
||||||
; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
||||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
|
|
||||||
; SSE41-NEXT: retq
|
|
||||||
;
|
;
|
||||||
; AVX1-LABEL: insert_dup_elt1_mem_v8i16_i32:
|
; AVX1-LABEL: insert_dup_elt1_mem_v8i16_i32:
|
||||||
; AVX1: # BB#0:
|
; AVX1: # BB#0:
|
||||||
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
|
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
|
||||||
|
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: insert_dup_elt1_mem_v8i16_i32:
|
; AVX2-LABEL: insert_dup_elt1_mem_v8i16_i32:
|
||||||
|
|
|
@ -3564,7 +3564,9 @@ define <16 x i16> @insert_dup_elt3_mem_v16i16_i32(i32* %ptr) #0 {
|
||||||
; AVX1-LABEL: insert_dup_elt3_mem_v16i16_i32:
|
; AVX1-LABEL: insert_dup_elt3_mem_v16i16_i32:
|
||||||
; AVX1: # BB#0:
|
; AVX1: # BB#0:
|
||||||
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
|
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
|
||||||
|
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
|
||||||
|
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
;
|
;
|
||||||
|
|
|
@ -2814,31 +2814,12 @@ define <4 x float> @combine_insertps4(<4 x float> %a, <4 x float> %b) {
|
||||||
ret <4 x float> %d
|
ret <4 x float> %d
|
||||||
}
|
}
|
||||||
|
|
||||||
; FIXME: Failed to recognise that the VMOVSD has already zero'd the upper element
|
|
||||||
define void @combine_scalar_load_with_blend_with_zero(double* %a0, <4 x float>* %a1) {
|
define void @combine_scalar_load_with_blend_with_zero(double* %a0, <4 x float>* %a1) {
|
||||||
; SSE2-LABEL: combine_scalar_load_with_blend_with_zero:
|
; SSE-LABEL: combine_scalar_load_with_blend_with_zero:
|
||||||
; SSE2: # BB#0:
|
; SSE: # BB#0:
|
||||||
; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
; SSE-NEXT: movapd %xmm0, (%rsi)
|
||||||
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
|
; SSE-NEXT: retq
|
||||||
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
|
|
||||||
; SSE2-NEXT: movaps %xmm0, (%rsi)
|
|
||||||
; SSE2-NEXT: retq
|
|
||||||
;
|
|
||||||
; SSSE3-LABEL: combine_scalar_load_with_blend_with_zero:
|
|
||||||
; SSSE3: # BB#0:
|
|
||||||
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
||||||
; SSSE3-NEXT: xorps %xmm1, %xmm1
|
|
||||||
; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
|
|
||||||
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
|
|
||||||
; SSSE3-NEXT: movaps %xmm0, (%rsi)
|
|
||||||
; SSSE3-NEXT: retq
|
|
||||||
;
|
|
||||||
; SSE41-LABEL: combine_scalar_load_with_blend_with_zero:
|
|
||||||
; SSE41: # BB#0:
|
|
||||||
; SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
||||||
; SSE41-NEXT: movapd %xmm0, (%rsi)
|
|
||||||
; SSE41-NEXT: retq
|
|
||||||
;
|
;
|
||||||
; AVX-LABEL: combine_scalar_load_with_blend_with_zero:
|
; AVX-LABEL: combine_scalar_load_with_blend_with_zero:
|
||||||
; AVX: # BB#0:
|
; AVX: # BB#0:
|
||||||
|
|
|
@ -41,7 +41,8 @@ define <4 x double> @var_shuffle_v4f64_v4f64_uxx0_i64(<4 x double> %x, i64 %i0,
|
||||||
; ALL-NEXT: andq $-32, %rsp
|
; ALL-NEXT: andq $-32, %rsp
|
||||||
; ALL-NEXT: subq $64, %rsp
|
; ALL-NEXT: subq $64, %rsp
|
||||||
; ALL-NEXT: vmovaps %ymm0, (%rsp)
|
; ALL-NEXT: vmovaps %ymm0, (%rsp)
|
||||||
; ALL-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
|
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||||
|
; ALL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||||
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||||
; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||||
; ALL-NEXT: movq %rbp, %rsp
|
; ALL-NEXT: movq %rbp, %rsp
|
||||||
|
|
Loading…
Reference in New Issue