forked from OSchip/llvm-project
[DAGCombiner] Discard pointer info when combining extract_vector_elt of a vector load when the index isn't constant
Summary: If the index isn't constant, this transform inserts a multiply and an add on the index to calculating the base pointer for a scalar load. But we still create a memory operand with an offset of 0 and the size of the scalar access. But the access is really to an unknown offset within the original access size. This can cause the machine scheduler to incorrectly calculate dependencies between this load and other accesses. In the case we saw, there was a 32 byte vector store that was split into two 16 byte stores, one with offset 0 and one with offset 16. The size of the memory operand for both was 16. The scheduler correctly detected the alias with the offset 0 store, but not the offset 16 store. This patch discards the pointer info so we don't incorrectly detect aliasing. I wasn't sure if we could keep using the original offset and size without risking some other transform on the load changing the size. I tried to reduce a test case, but there's still a lot of memory operations needed to get the scheduler to do the bad reordering. So it looked pretty fragile to maintain. Reviewers: efriedma Reviewed By: efriedma Subscribers: arphaman, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D57616 llvm-svn: 353124
This commit is contained in:
parent
4bdf82ce79
commit
d4e37afe45
|
@ -15656,7 +15656,9 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
|
|||
Offset = DAG.getNode(
|
||||
ISD::MUL, DL, PtrType, Offset,
|
||||
DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
|
||||
MPI = OriginalLoad->getPointerInfo();
|
||||
// Discard the pointer info except the address space because the memory
|
||||
// operand can't represent this new access since the offset is variable.
|
||||
MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
|
||||
}
|
||||
NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
|
||||
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
;RUN: llc < %s -mtriple=i686 -mattr=sse4.1 -stop-after=expand-isel-pseudos 2>&1 | FileCheck %s
|
||||
|
||||
; This test makes sure we discard pointer info when we combine a vector load
|
||||
; and a variable extractelement into a scalar load using an index. There's also
|
||||
; a test to ensure we don't discard it for the constant index case.
|
||||
|
||||
; CHECK: name: const_index
|
||||
; CHECK: bb.0 (%ir-block.0):
|
||||
; CHECK: [[POINTER:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.0)
|
||||
; CHECK: [[LOAD:%[0-9]+]]:gr32 = MOV32rm killed [[POINTER]], 1, $noreg, 4, $noreg :: (load 4 from %ir.v + 4)
|
||||
; CHECK: $eax = COPY [[LOAD]]
|
||||
; CHECK: RET 0, $eax
|
||||
define i32 @const_index(<8 x i32>* %v) {
|
||||
%a = load <8 x i32>, <8 x i32>* %v
|
||||
%b = extractelement <8 x i32> %a, i32 1
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
; CHECK: name: variable_index
|
||||
; CHECK: bb.0 (%ir-block.0):
|
||||
; CHECK: [[INDEX:%[0-9]+]]:gr32_nosp = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.0)
|
||||
; CHECK: [[POINTER:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.1)
|
||||
; CHECK: [[LOAD:%[0-9]+]]:gr32 = MOV32rm killed [[POINTER]], 4, killed [[INDEX]], 0, $noreg :: (load 4)
|
||||
; CHECK: $eax = COPY [[LOAD]]
|
||||
; CHECK: RET 0, $eax
|
||||
define i32 @variable_index(<8 x i32>* %v, i32 %i) {
|
||||
%a = load <8 x i32>, <8 x i32>* %v
|
||||
%b = extractelement <8 x i32> %a, i32 %i
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
; CHECK: name: variable_index_with_addrspace
|
||||
; CHECK: bb.0 (%ir-block.0):
|
||||
; CHECK: [[INDEX:%[0-9]+]]:gr32_nosp = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.0)
|
||||
; CHECK: [[POINTER:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.1)
|
||||
; CHECK: [[LOAD:%[0-9]+]]:gr32 = MOV32rm killed [[POINTER]], 4, killed [[INDEX]], 0, $noreg :: (load 4, addrspace 1)
|
||||
; CHECK: $eax = COPY [[LOAD]]
|
||||
; CHECK: RET 0, $eax
|
||||
define i32 @variable_index_with_addrspace(<8 x i32> addrspace(1)* %v, i32 %i) {
|
||||
%a = load <8 x i32>, <8 x i32> addrspace(1)* %v
|
||||
%b = extractelement <8 x i32> %a, i32 %i
|
||||
ret i32 %b
|
||||
}
|
Loading…
Reference in New Issue