forked from OSchip/llvm-project
[DAGCombiner] fix load narrowing transform to exclude loads with extension
The extending load possibility was missed in: https://reviews.llvm.org/rL304072 We might want to handle this cases as a follow-up, but bailing out for now to avoid miscompiling. llvm-svn: 304153
This commit is contained in:
parent
fe0c0935c8
commit
51152a3727
|
@ -14567,7 +14567,8 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
|
|||
// extract instead or remove that condition entirely.
|
||||
auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
|
||||
auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
|
||||
if (!Ld || !Ld->hasOneUse() || Ld->isVolatile() || !ExtIdx)
|
||||
if (!Ld || !Ld->hasOneUse() || Ld->getExtensionType() || Ld->isVolatile() ||
|
||||
!ExtIdx)
|
||||
return SDValue();
|
||||
|
||||
// The narrow load will be offset from the base address of the old load if
|
||||
|
|
|
@ -1749,6 +1749,62 @@ entry:
|
|||
ret <4 x i64> %Y
|
||||
}
|
||||
|
||||
define <2 x i64> @load_sext_4i8_to_4i64_extract(<4 x i8> *%ptr) {
|
||||
; SSE2-LABEL: load_sext_4i8_to_4i64_extract:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movsbq 3(%rdi), %rax
|
||||
; SSE2-NEXT: movq %rax, %xmm1
|
||||
; SSE2-NEXT: movsbq 2(%rdi), %rax
|
||||
; SSE2-NEXT: movq %rax, %xmm0
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: load_sext_4i8_to_4i64_extract:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movsbq 3(%rdi), %rax
|
||||
; SSSE3-NEXT: movq %rax, %xmm1
|
||||
; SSSE3-NEXT: movsbq 2(%rdi), %rax
|
||||
; SSSE3-NEXT: movq %rax, %xmm0
|
||||
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: load_sext_4i8_to_4i64_extract:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pmovsxbq 2(%rdi), %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: load_sext_4i8_to_4i64_extract:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpmovsxbd (%rdi), %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: load_sext_4i8_to_4i64_extract:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpmovsxbq (%rdi), %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: load_sext_4i8_to_4i64_extract:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpmovsxbq (%rdi), %ymm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; X32-SSE41-LABEL: load_sext_4i8_to_4i64_extract:
|
||||
; X32-SSE41: # BB#0:
|
||||
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-SSE41-NEXT: pmovsxbq 2(%eax), %xmm0
|
||||
; X32-SSE41-NEXT: retl
|
||||
%ld = load <4 x i8>, <4 x i8>* %ptr
|
||||
%sext = sext <4 x i8> %ld to <4 x i64>
|
||||
%extract = shufflevector <4 x i64> %sext, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
|
||||
ret <2 x i64> %extract
|
||||
}
|
||||
|
||||
define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
|
||||
; SSE2-LABEL: load_sext_8i1_to_8i16:
|
||||
; SSE2: # BB#0: # %entry
|
||||
|
|
Loading…
Reference in New Issue