forked from OSchip/llvm-project
[X86] Revert r299387 due to AVX legalization infinite loop.
llvm-svn: 299720
This commit is contained in:
parent
813de71329
commit
6129887d21
|
@ -6120,54 +6120,6 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
// Attempt to lower a build vector of repeated elts as a build vector of unique
|
||||
// ops followed by a shuffle.
|
||||
static SDValue
|
||||
lowerBuildVectorWithRepeatedEltsUsingShuffle(SDValue V, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
MVT VT = V.getSimpleValueType();
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
|
||||
// TODO - vXi8 insertions+shuffles often cause PSHUFBs which can lead to
|
||||
// excessive/bulky shuffle mask creation.
|
||||
if (VT.getScalarSizeInBits() < 16)
|
||||
return SDValue();
|
||||
|
||||
// Create list of unique operands to be passed to a build vector and a shuffle
|
||||
// mask describing the repetitions.
|
||||
// TODO - we currently insert the first occurances in place - sometimes it
|
||||
// might be better to insert them in other locations for shuffle efficiency.
|
||||
bool HasRepeatedElts = false;
|
||||
SmallVector<int, 16> Mask(NumElts, SM_SentinelUndef);
|
||||
SmallVector<SDValue, 16> Uniques(V->op_begin(), V->op_end());
|
||||
for (unsigned i = 0; i != NumElts; ++i) {
|
||||
SDValue Op = Uniques[i];
|
||||
if (Op.isUndef())
|
||||
continue;
|
||||
Mask[i] = i;
|
||||
|
||||
// Zeros can be efficiently repeated, so don't shuffle these.
|
||||
if (X86::isZeroNode(Op))
|
||||
continue;
|
||||
|
||||
// If any repeated operands are found then mark the build vector entry as
|
||||
// undef and setup a copy in the shuffle mask.
|
||||
for (unsigned j = i + 1; j != NumElts; ++j)
|
||||
if (Op == Uniques[j]) {
|
||||
HasRepeatedElts = true;
|
||||
Mask[j] = i;
|
||||
Uniques[j] = DAG.getUNDEF(VT.getScalarType());
|
||||
}
|
||||
}
|
||||
|
||||
if (!HasRepeatedElts)
|
||||
return SDValue();
|
||||
|
||||
SDLoc DL(V);
|
||||
return DAG.getVectorShuffle(VT, DL, DAG.getBuildVector(VT, DL, Uniques),
|
||||
DAG.getUNDEF(VT), Mask);
|
||||
}
|
||||
|
||||
/// Custom lower build_vector of v16i8.
|
||||
static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
|
||||
unsigned NumNonZero, unsigned NumZero,
|
||||
|
@ -7800,17 +7752,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
|||
if (IsAllConstants)
|
||||
return SDValue();
|
||||
|
||||
// See if we can use a vector load to get all of the elements.
|
||||
if (VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) {
|
||||
// See if we can use a vector load to get all of the elements.
|
||||
SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElems);
|
||||
if (SDValue LD = EltsFromConsecutiveLoads(VT, Ops, dl, DAG, false))
|
||||
return LD;
|
||||
|
||||
// Attempt to lower a build vector of repeated elts as single insertions
|
||||
// followed by a shuffle.
|
||||
if (SDValue V =
|
||||
lowerBuildVectorWithRepeatedEltsUsingShuffle(Op, DAG, Subtarget))
|
||||
return V;
|
||||
}
|
||||
|
||||
// For AVX-length vectors, build the individual 128-bit pieces and use
|
||||
|
|
|
@ -2425,9 +2425,12 @@ define <4 x i64> @test_mm256_set1_epi32(i32 %a0) nounwind {
|
|||
define <4 x i64> @test_mm256_set1_epi64x(i64 %a0) nounwind {
|
||||
; X32-LABEL: test_mm256_set1_epi64x:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
||||
; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: vmovd %ecx, %xmm0
|
||||
; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
|
||||
; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
|
||||
; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
|
||||
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
|
|
|
@ -6,8 +6,12 @@ define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp {
|
|||
; X32-LABEL: A:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; X32-NEXT: movl (%eax), %ecx
|
||||
; X32-NEXT: movl 4(%eax), %eax
|
||||
; X32-NEXT: vmovd %ecx, %xmm0
|
||||
; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
|
||||
; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
|
||||
; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
|
||||
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
|
@ -27,21 +31,17 @@ entry:
|
|||
define <4 x i64> @A2(i64* %ptr, i64* %ptr2) nounwind uwtable readnone ssp {
|
||||
; X32-LABEL: A2:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32-NEXT: pushl %esi
|
||||
; X32-NEXT: Lcfi0:
|
||||
; X32-NEXT: .cfi_def_cfa_offset 8
|
||||
; X32-NEXT: Lcfi1:
|
||||
; X32-NEXT: .cfi_offset %esi, -8
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: movl (%ecx), %edx
|
||||
; X32-NEXT: movl 4(%ecx), %esi
|
||||
; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X32-NEXT: movl %esi, 4(%eax)
|
||||
; X32-NEXT: movl 4(%ecx), %ecx
|
||||
; X32-NEXT: movl %ecx, 4(%eax)
|
||||
; X32-NEXT: movl %edx, (%eax)
|
||||
; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; X32-NEXT: vmovd %edx, %xmm0
|
||||
; X32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
|
||||
; X32-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0
|
||||
; X32-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
|
||||
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X32-NEXT: popl %esi
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: A2:
|
||||
|
@ -592,8 +592,12 @@ define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp {
|
|||
; X32-LABEL: G:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
||||
; X32-NEXT: movl (%eax), %ecx
|
||||
; X32-NEXT: movl 4(%eax), %eax
|
||||
; X32-NEXT: vmovd %ecx, %xmm0
|
||||
; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
|
||||
; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
|
||||
; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: G:
|
||||
|
@ -611,20 +615,16 @@ entry:
|
|||
define <2 x i64> @G2(i64* %ptr, i64* %ptr2) nounwind uwtable readnone ssp {
|
||||
; X32-LABEL: G2:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32-NEXT: pushl %esi
|
||||
; X32-NEXT: Lcfi2:
|
||||
; X32-NEXT: .cfi_def_cfa_offset 8
|
||||
; X32-NEXT: Lcfi3:
|
||||
; X32-NEXT: .cfi_offset %esi, -8
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: movl (%ecx), %edx
|
||||
; X32-NEXT: movl 4(%ecx), %esi
|
||||
; X32-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; X32-NEXT: movl %esi, 4(%eax)
|
||||
; X32-NEXT: movl 4(%ecx), %ecx
|
||||
; X32-NEXT: movl %ecx, 4(%eax)
|
||||
; X32-NEXT: movl %edx, (%eax)
|
||||
; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
||||
; X32-NEXT: popl %esi
|
||||
; X32-NEXT: vmovd %edx, %xmm0
|
||||
; X32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
|
||||
; X32-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0
|
||||
; X32-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: G2:
|
||||
|
|
|
@ -189,7 +189,12 @@ define <2 x i64> @Q64(i64* %ptr) nounwind uwtable readnone ssp {
|
|||
; X32-LABEL: Q64:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vpbroadcastq (%eax), %xmm0
|
||||
; X32-NEXT: movl (%eax), %ecx
|
||||
; X32-NEXT: movl 4(%eax), %eax
|
||||
; X32-NEXT: vmovd %ecx, %xmm0
|
||||
; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
|
||||
; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
|
||||
; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: Q64:
|
||||
|
@ -207,8 +212,13 @@ define <4 x i64> @QQ64(i64* %ptr) nounwind uwtable readnone ssp {
|
|||
; X32-LABEL: QQ64:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X32-NEXT: vbroadcastsd %xmm0, %ymm0
|
||||
; X32-NEXT: movl (%eax), %ecx
|
||||
; X32-NEXT: movl 4(%eax), %eax
|
||||
; X32-NEXT: vmovd %ecx, %xmm0
|
||||
; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
|
||||
; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
|
||||
; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
|
||||
; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: QQ64:
|
||||
|
@ -1430,8 +1440,12 @@ define void @isel_crash_2q(i64* %cV_R.addr) {
|
|||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; X32-NEXT: vmovaps %xmm0, (%esp)
|
||||
; X32-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; X32-NEXT: vpbroadcastq %xmm1, %xmm1
|
||||
; X32-NEXT: movl (%eax), %ecx
|
||||
; X32-NEXT: movl 4(%eax), %eax
|
||||
; X32-NEXT: vmovd %ecx, %xmm1
|
||||
; X32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
|
||||
; X32-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
|
||||
; X32-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
|
||||
; X32-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: vmovdqa %xmm1, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: addl $60, %esp
|
||||
|
@ -1487,10 +1501,15 @@ define void @isel_crash_4q(i64* %cV_R.addr) {
|
|||
; X32-NEXT: movl 8(%ebp), %eax
|
||||
; X32-NEXT: vxorps %ymm0, %ymm0, %ymm0
|
||||
; X32-NEXT: vmovaps %ymm0, (%esp)
|
||||
; X32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; X32-NEXT: vbroadcastsd %xmm1, %ymm1
|
||||
; X32-NEXT: movl (%eax), %ecx
|
||||
; X32-NEXT: movl 4(%eax), %eax
|
||||
; X32-NEXT: vmovd %ecx, %xmm1
|
||||
; X32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
|
||||
; X32-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
|
||||
; X32-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
|
||||
; X32-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1
|
||||
; X32-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: movl %ebp, %esp
|
||||
; X32-NEXT: popl %ebp
|
||||
; X32-NEXT: vzeroupper
|
||||
|
|
|
@ -1102,44 +1102,28 @@ define <4 x float> @merge_4f32_f32_2345_volatile(float* %ptr) nounwind uwtable n
|
|||
;
|
||||
|
||||
define <4 x float> @merge_4f32_f32_X0YY(float* %ptr0, float* %ptr1) nounwind uwtable noinline ssp {
|
||||
; SSE2-LABEL: merge_4f32_f32_X0YY:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: merge_4f32_f32_X0YY:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,mem[0],zero
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,2,2]
|
||||
; SSE41-NEXT: retq
|
||||
; SSE-LABEL: merge_4f32_f32_X0YY:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: merge_4f32_f32_X0YY:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],zero,mem[0],zero
|
||||
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,2,2]
|
||||
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0,0]
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; X32-SSE1-LABEL: merge_4f32_f32_X0YY:
|
||||
; X32-SSE1: # BB#0:
|
||||
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
|
||||
; X32-SSE1-NEXT: retl
|
||||
;
|
||||
; X32-SSE41-LABEL: merge_4f32_f32_X0YY:
|
||||
; X32-SSE41: # BB#0:
|
||||
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,mem[0],zero
|
||||
; X32-SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,2,2]
|
||||
; X32-SSE41-NEXT: retl
|
||||
; X32-SSE-LABEL: merge_4f32_f32_X0YY:
|
||||
; X32-SSE: # BB#0:
|
||||
; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; X32-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
|
||||
; X32-SSE-NEXT: retl
|
||||
%val0 = load float, float* %ptr0, align 4
|
||||
%val1 = load float, float* %ptr1, align 4
|
||||
%res0 = insertelement <4 x float> undef, float %val0, i32 0
|
||||
|
|
|
@ -2425,9 +2425,10 @@ define <2 x i64> @test_mm_set1_epi64x(i64 %a0) nounwind {
|
|||
; X32-LABEL: test_mm_set1_epi64x:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
|
||||
; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_set1_epi64x:
|
||||
|
|
|
@ -537,7 +537,7 @@ define <4 x i32> @fptoui_4f64_to_2i32(<2 x double> %a) {
|
|||
; VEX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
|
||||
; VEX-NEXT: vcvttsd2si %xmm0, %rax
|
||||
; VEX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
|
||||
; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,2]
|
||||
; VEX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
|
||||
; VEX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: fptoui_4f64_to_2i32:
|
||||
|
|
|
@ -1177,8 +1177,8 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
|
|||
; SSE-NEXT: movd %xmm0, %rax
|
||||
; SSE-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE-NEXT: cvtsi2ssq %rax, %xmm0
|
||||
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
||||
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1,2,2]
|
||||
; SSE-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
|
@ -1879,8 +1879,8 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
|
|||
; SSE-NEXT: cvtsi2ssq %rax, %xmm1
|
||||
; SSE-NEXT: addss %xmm1, %xmm1
|
||||
; SSE-NEXT: .LBB41_8:
|
||||
; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,2,2]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; VEX-LABEL: uitofp_4i64_to_4f32_undef:
|
||||
|
|
|
@ -1263,13 +1263,14 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) {
|
|||
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-SSE41-NEXT: movzbl (%eax), %eax
|
||||
; X32-SSE41-NEXT: movl %eax, %ecx
|
||||
; X32-SSE41-NEXT: shll $30, %ecx
|
||||
; X32-SSE41-NEXT: shll $31, %ecx
|
||||
; X32-SSE41-NEXT: sarl $31, %ecx
|
||||
; X32-SSE41-NEXT: shll $31, %eax
|
||||
; X32-SSE41-NEXT: movd %ecx, %xmm0
|
||||
; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm0
|
||||
; X32-SSE41-NEXT: shll $30, %eax
|
||||
; X32-SSE41-NEXT: sarl $31, %eax
|
||||
; X32-SSE41-NEXT: movd %eax, %xmm0
|
||||
; X32-SSE41-NEXT: pinsrd $2, %ecx, %xmm0
|
||||
; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
|
||||
; X32-SSE41-NEXT: pinsrd $2, %eax, %xmm0
|
||||
; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0
|
||||
; X32-SSE41-NEXT: retl
|
||||
entry:
|
||||
%X = load <2 x i1>, <2 x i1>* %ptr
|
||||
|
|
|
@ -318,20 +318,21 @@ define <4 x i32> @combine_vpperm_10zz32BA(<4 x i32> %a0, <4 x i32> %a1) {
|
|||
ret <4 x i32> %res3
|
||||
}
|
||||
|
||||
; FIXME: Duplicated load in i686
|
||||
define void @buildvector_v4f32_0404(float %a, float %b, <4 x float>* %ptr) {
|
||||
; X32-LABEL: buildvector_v4f32_0404:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; X32-NEXT: vmovapd %xmm0, (%eax)
|
||||
; X32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
|
||||
; X32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
|
||||
; X32-NEXT: vmovaps %xmm0, (%eax)
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: buildvector_v4f32_0404:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
|
||||
; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; X64-NEXT: vmovapd %xmm0, (%rdi)
|
||||
; X64-NEXT: vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[0],xmm1[0]
|
||||
; X64-NEXT: vmovaps %xmm0, (%rdi)
|
||||
; X64-NEXT: retq
|
||||
%v0 = insertelement <4 x float> undef, float %a, i32 0
|
||||
%v1 = insertelement <4 x float> %v0, float %b, i32 1
|
||||
|
|
|
@ -28,9 +28,12 @@ define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
|
|||
; X32-LABEL: shift1b:
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
|
||||
; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
|
||||
; X32-NEXT: psllq %xmm1, %xmm0
|
||||
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
|
||||
; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
|
||||
; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; X32-NEXT: psllq %xmm2, %xmm0
|
||||
; X32-NEXT: movdqa %xmm0, (%eax)
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
|
|
|
@ -28,9 +28,12 @@ define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
|
|||
; X32-LABEL: shift1b:
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
|
||||
; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
|
||||
; X32-NEXT: psrlq %xmm1, %xmm0
|
||||
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
|
||||
; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
|
||||
; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; X32-NEXT: psrlq %xmm2, %xmm0
|
||||
; X32-NEXT: movdqa %xmm0, (%eax)
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
|
|
Loading…
Reference in New Issue