forked from OSchip/llvm-project
[X86][SSE] Don't replace an existing 32-bit load with its duplicate
If we are already loading a single 32-bit float/integer then just reuse it. Fix for regression in D16729 llvm-svn: 259991
This commit is contained in:
parent
11e4d1146f
commit
9e369f2a51
|
@ -5677,7 +5677,8 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
|
|||
MVT VecSVT = VT.isFloatingPoint() ? MVT::f32 : MVT::i32;
|
||||
MVT VecVT = MVT::getVectorVT(VecSVT, VT.getSizeInBits() / 32);
|
||||
if (TLI.isTypeLegal(VecVT)) {
|
||||
SDValue V = CreateLoad(VecSVT, LDBase);
|
||||
SDValue V = LastLoadedElt != 0 ? CreateLoad(VecSVT, LDBase)
|
||||
: DAG.getBitcast(VecSVT, EltBase);
|
||||
V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, V);
|
||||
V = DAG.getNode(X86ISD::VZEXT_MOVL, DL, VecVT, V);
|
||||
return DAG.getBitcast(VT, V);
|
||||
|
|
|
@ -602,6 +602,48 @@ define <16 x i8> @merge_16i8_i8_0123uu67uuuuuzzz(i8* %ptr) nounwind uwtable noin
|
|||
%res7 = insertelement <16 x i8> %res6, i8 %val7, i32 7
|
||||
%resD = insertelement <16 x i8> %res7, i8 0, i32 13
|
||||
%resE = insertelement <16 x i8> %resD, i8 0, i32 14
|
||||
%resF = insertelement <16 x i8> %resE, i8 0, i32 15
|
||||
ret <16 x i8> %resF
|
||||
}
|
||||
%resF = insertelement <16 x i8> %resE, i8 0, i32 15
|
||||
ret <16 x i8> %resF
|
||||
}
|
||||
|
||||
define void @merge_4i32_i32_combine(<4 x i32>* %dst, i32* %src) {
|
||||
; SSE-LABEL: merge_4i32_i32_combine:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE-NEXT: movaps %xmm0, (%rdi)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: merge_4i32_i32_combine:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vmovaps %xmm0, (%rdi)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: merge_4i32_i32_combine:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vmovaps %xmm0, (%rdi)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: merge_4i32_i32_combine:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512F-NEXT: vmovdqa %xmm0, (%rdi)
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; X32-SSE-LABEL: merge_4i32_i32_combine:
|
||||
; X32-SSE: # BB#0:
|
||||
; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32-SSE-NEXT: movaps %xmm0, (%eax)
|
||||
; X32-SSE-NEXT: retl
|
||||
%1 = getelementptr i32, i32* %src, i32 0
|
||||
%2 = load i32, i32* %1
|
||||
%3 = insertelement <4 x i32> undef, i32 %2, i32 0
|
||||
%4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%5 = lshr <4 x i32> %4, <i32 0, i32 undef, i32 undef, i32 undef>
|
||||
%6 = and <4 x i32> %5, <i32 -1, i32 0, i32 0, i32 0>
|
||||
store <4 x i32> %6, <4 x i32>* %dst
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue