forked from OSchip/llvm-project
Fix alignment checks in MergeConsecutiveStores.
1) check whether the alignment of the memory is sufficient for the *merged* store or load to be efficient. Not doing so can result in some ridiculously poor code generation, if merging creates a vector operation which must be aligned but isn't. 2) DON'T check that the alignment of each load/store is equal. If you're merging 2 4-byte stores, the first *might* have 8-byte alignment, but the second certainly will have 4-byte alignment. We do want to allow those to be merged. llvm-svn: 236850
This commit is contained in:
parent
9d3932bf96
commit
284e7b3d6c
|
@ -10653,6 +10653,17 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool allowableAlignment(const SelectionDAG &DAG,
|
||||||
|
const TargetLowering &TLI, EVT EVTTy,
|
||||||
|
unsigned AS, unsigned Align) {
|
||||||
|
if (TLI.allowsMisalignedMemoryAccesses(EVTTy, AS, Align))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
Type *Ty = EVTTy.getTypeForEVT(*DAG.getContext());
|
||||||
|
unsigned ABIAlignment = TLI.getDataLayout()->getPrefTypeAlignment(Ty);
|
||||||
|
return (Align >= ABIAlignment);
|
||||||
|
}
|
||||||
|
|
||||||
bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
|
bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
|
||||||
if (OptLevel == CodeGenOpt::None)
|
if (OptLevel == CodeGenOpt::None)
|
||||||
return false;
|
return false;
|
||||||
|
@ -10719,10 +10730,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
|
||||||
if (!Ptr.equalBaseIndex(BasePtr))
|
if (!Ptr.equalBaseIndex(BasePtr))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// Check that the alignment is the same.
|
|
||||||
if (Index->getAlignment() != St->getAlignment())
|
|
||||||
break;
|
|
||||||
|
|
||||||
// The memory operands must not be volatile.
|
// The memory operands must not be volatile.
|
||||||
if (Index->isVolatile() || Index->isIndexed())
|
if (Index->isVolatile() || Index->isIndexed())
|
||||||
break;
|
break;
|
||||||
|
@ -10736,11 +10743,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
|
||||||
if (Index->getMemoryVT() != MemVT)
|
if (Index->getMemoryVT() != MemVT)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// We do not allow unaligned stores because we want to prevent overriding
|
|
||||||
// stores.
|
|
||||||
if (Index->getAlignment()*8 != MemVT.getSizeInBits())
|
|
||||||
break;
|
|
||||||
|
|
||||||
// We found a potential memory operand to merge.
|
// We found a potential memory operand to merge.
|
||||||
StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
|
StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
|
||||||
|
|
||||||
|
@ -10814,6 +10816,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
|
||||||
|
|
||||||
// The node with the lowest store address.
|
// The node with the lowest store address.
|
||||||
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
|
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
|
||||||
|
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
|
||||||
|
unsigned FirstStoreAlign = FirstInChain->getAlignment();
|
||||||
|
|
||||||
// Store the constants into memory as one consecutive store.
|
// Store the constants into memory as one consecutive store.
|
||||||
if (IsConstantSrc) {
|
if (IsConstantSrc) {
|
||||||
|
@ -10836,21 +10840,28 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
|
||||||
// Find a legal type for the constant store.
|
// Find a legal type for the constant store.
|
||||||
unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
|
unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
|
||||||
EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
|
EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
|
||||||
if (TLI.isTypeLegal(StoreTy))
|
if (TLI.isTypeLegal(StoreTy) &&
|
||||||
|
allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS,
|
||||||
|
FirstStoreAlign)) {
|
||||||
LastLegalType = i+1;
|
LastLegalType = i+1;
|
||||||
// Or check whether a truncstore is legal.
|
// Or check whether a truncstore is legal.
|
||||||
else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
|
} else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
|
||||||
TargetLowering::TypePromoteInteger) {
|
TargetLowering::TypePromoteInteger) {
|
||||||
EVT LegalizedStoredValueTy =
|
EVT LegalizedStoredValueTy =
|
||||||
TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
|
TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
|
||||||
if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy))
|
if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
|
||||||
LastLegalType = i+1;
|
allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS,
|
||||||
|
FirstStoreAlign)) {
|
||||||
|
LastLegalType = i + 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find a legal type for the vector store.
|
// Find a legal type for the vector store.
|
||||||
EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
|
EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
|
||||||
if (TLI.isTypeLegal(Ty))
|
if (TLI.isTypeLegal(Ty) &&
|
||||||
|
allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign)) {
|
||||||
LastLegalVectorType = i + 1;
|
LastLegalVectorType = i + 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// We only use vectors if the constant is known to be zero and the
|
// We only use vectors if the constant is known to be zero and the
|
||||||
|
@ -10886,7 +10897,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
|
||||||
|
|
||||||
// Find a legal type for the vector store.
|
// Find a legal type for the vector store.
|
||||||
EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
|
EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
|
||||||
if (TLI.isTypeLegal(Ty))
|
if (TLI.isTypeLegal(Ty) &&
|
||||||
|
allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign))
|
||||||
NumElem = i + 1;
|
NumElem = i + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -10913,10 +10925,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
|
||||||
if (!Ld->hasNUsesOfValue(1, 0))
|
if (!Ld->hasNUsesOfValue(1, 0))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// Check that the alignment is the same as the stores.
|
|
||||||
if (Ld->getAlignment() != St->getAlignment())
|
|
||||||
break;
|
|
||||||
|
|
||||||
// The memory operands must not be volatile.
|
// The memory operands must not be volatile.
|
||||||
if (Ld->isVolatile() || Ld->isIndexed())
|
if (Ld->isVolatile() || Ld->isIndexed())
|
||||||
break;
|
break;
|
||||||
|
@ -10954,6 +10962,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
|
||||||
St->getAlignment() >= RequiredAlignment)
|
St->getAlignment() >= RequiredAlignment)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
|
||||||
|
unsigned FirstLoadAS = FirstLoad->getAddressSpace();
|
||||||
|
unsigned FirstLoadAlign = FirstLoad->getAlignment();
|
||||||
|
|
||||||
// Scan the memory operations on the chain and find the first non-consecutive
|
// Scan the memory operations on the chain and find the first non-consecutive
|
||||||
// load memory address. These variables hold the index in the store node
|
// load memory address. These variables hold the index in the store node
|
||||||
// array.
|
// array.
|
||||||
|
@ -10962,7 +10974,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
|
||||||
unsigned LastLegalVectorType = 0;
|
unsigned LastLegalVectorType = 0;
|
||||||
unsigned LastLegalIntegerType = 0;
|
unsigned LastLegalIntegerType = 0;
|
||||||
StartAddress = LoadNodes[0].OffsetFromBase;
|
StartAddress = LoadNodes[0].OffsetFromBase;
|
||||||
SDValue FirstChain = LoadNodes[0].MemNode->getChain();
|
SDValue FirstChain = FirstLoad->getChain();
|
||||||
for (unsigned i = 1; i < LoadNodes.size(); ++i) {
|
for (unsigned i = 1; i < LoadNodes.size(); ++i) {
|
||||||
// All loads much share the same chain.
|
// All loads much share the same chain.
|
||||||
if (LoadNodes[i].MemNode->getChain() != FirstChain)
|
if (LoadNodes[i].MemNode->getChain() != FirstChain)
|
||||||
|
@ -10975,13 +10987,18 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
|
||||||
|
|
||||||
// Find a legal type for the vector store.
|
// Find a legal type for the vector store.
|
||||||
EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
|
EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
|
||||||
if (TLI.isTypeLegal(StoreTy))
|
if (TLI.isTypeLegal(StoreTy) &&
|
||||||
|
allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) &&
|
||||||
|
allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign)) {
|
||||||
LastLegalVectorType = i + 1;
|
LastLegalVectorType = i + 1;
|
||||||
|
}
|
||||||
|
|
||||||
// Find a legal type for the integer store.
|
// Find a legal type for the integer store.
|
||||||
unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
|
unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
|
||||||
StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
|
StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
|
||||||
if (TLI.isTypeLegal(StoreTy))
|
if (TLI.isTypeLegal(StoreTy) &&
|
||||||
|
allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) &&
|
||||||
|
allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign))
|
||||||
LastLegalIntegerType = i + 1;
|
LastLegalIntegerType = i + 1;
|
||||||
// Or check whether a truncstore and extload is legal.
|
// Or check whether a truncstore and extload is legal.
|
||||||
else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
|
else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
|
||||||
|
@ -10991,7 +11008,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
|
||||||
if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
|
if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
|
||||||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
|
TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
|
||||||
TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
|
TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
|
||||||
TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy))
|
TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
|
||||||
|
allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS,
|
||||||
|
FirstStoreAlign) &&
|
||||||
|
allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstLoadAS,
|
||||||
|
FirstLoadAlign))
|
||||||
LastLegalIntegerType = i+1;
|
LastLegalIntegerType = i+1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -11035,18 +11056,13 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
|
||||||
SDLoc LoadDL(LoadNodes[0].MemNode);
|
SDLoc LoadDL(LoadNodes[0].MemNode);
|
||||||
SDLoc StoreDL(StoreNodes[0].MemNode);
|
SDLoc StoreDL(StoreNodes[0].MemNode);
|
||||||
|
|
||||||
LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
|
SDValue NewLoad = DAG.getLoad(
|
||||||
SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL,
|
JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
|
||||||
FirstLoad->getChain(),
|
FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign);
|
||||||
FirstLoad->getBasePtr(),
|
|
||||||
FirstLoad->getPointerInfo(),
|
|
||||||
false, false, false,
|
|
||||||
FirstLoad->getAlignment());
|
|
||||||
|
|
||||||
SDValue NewStore = DAG.getStore(LatestOp->getChain(), StoreDL, NewLoad,
|
SDValue NewStore = DAG.getStore(
|
||||||
FirstInChain->getBasePtr(),
|
LatestOp->getChain(), StoreDL, NewLoad, FirstInChain->getBasePtr(),
|
||||||
FirstInChain->getPointerInfo(), false, false,
|
FirstInChain->getPointerInfo(), false, false, FirstStoreAlign);
|
||||||
FirstInChain->getAlignment());
|
|
||||||
|
|
||||||
// Replace one of the loads with the new load.
|
// Replace one of the loads with the new load.
|
||||||
LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode);
|
LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode);
|
||||||
|
|
|
@ -0,0 +1,68 @@
|
||||||
|
; RUN: llc -march=ppc32 -mattr=+altivec < %s | FileCheck %s
|
||||||
|
|
||||||
|
;; This test ensures that MergeConsecutiveStores does not attempt to
|
||||||
|
;; merge stores or loads when doing so would result in unaligned
|
||||||
|
;; memory operations (unless the target supports those, e.g. X86).
|
||||||
|
|
||||||
|
;; This issue happen in other situations for other targets, but PPC
|
||||||
|
;; with Altivec extensions was chosen for the test because it does not
|
||||||
|
;; support unaligned access with AltiVec instructions. If the 4
|
||||||
|
;; load/stores get merged to an v4i32 vector type severely bad code
|
||||||
|
;; gets generated: it painstakingly copies the values to a temporary
|
||||||
|
;; location on the stack, with vector ops, in order to then use
|
||||||
|
;; integer ops to load from the temporary stack location and store to
|
||||||
|
;; the final location. Yuck!
|
||||||
|
|
||||||
|
%struct.X = type { i32, i32, i32, i32 }
|
||||||
|
|
||||||
|
@fx = common global %struct.X zeroinitializer, align 4
|
||||||
|
@fy = common global %struct.X zeroinitializer, align 4
|
||||||
|
|
||||||
|
;; In this test case, lvx and stvx instructions should NOT be
|
||||||
|
;; generated, as the alignment is not sufficient for it to be
|
||||||
|
;; worthwhile.
|
||||||
|
|
||||||
|
;; CHECK-LABEL: f:
|
||||||
|
;; CHECK: lwzu
|
||||||
|
;; CHECK-NEXT: lwz
|
||||||
|
;; CHECK-NEXT: lwz
|
||||||
|
;; CHECK-NEXT: lwz
|
||||||
|
;; CHECK-NEXT: stwu
|
||||||
|
;; CHECK-NEXT: stw
|
||||||
|
;; CHECK-NEXT: stw
|
||||||
|
;; CHECK-NEXT: stw
|
||||||
|
;; CHECK-NEXT: blr
|
||||||
|
define void @f() {
|
||||||
|
entry:
|
||||||
|
%0 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 0), align 4
|
||||||
|
%1 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 1), align 4
|
||||||
|
%2 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 2), align 4
|
||||||
|
%3 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 3), align 4
|
||||||
|
store i32 %0, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 0), align 4
|
||||||
|
store i32 %1, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 1), align 4
|
||||||
|
store i32 %2, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 2), align 4
|
||||||
|
store i32 %3, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 3), align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
@gx = common global %struct.X zeroinitializer, align 16
|
||||||
|
@gy = common global %struct.X zeroinitializer, align 16
|
||||||
|
|
||||||
|
;; In this test, lvx and stvx instructions SHOULD be generated, as
|
||||||
|
;; the 16-byte alignment of the new load/store is acceptable.
|
||||||
|
;; CHECK-LABEL: g:
|
||||||
|
;; CHECK: lvx
|
||||||
|
;; CHECK: stvx
|
||||||
|
;; CHECK: blr
|
||||||
|
define void @g() {
|
||||||
|
entry:
|
||||||
|
%0 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 0), align 16
|
||||||
|
%1 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 1), align 4
|
||||||
|
%2 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 2), align 4
|
||||||
|
%3 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 3), align 4
|
||||||
|
store i32 %0, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 0), align 16
|
||||||
|
store i32 %1, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 1), align 4
|
||||||
|
store i32 %2, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 2), align 4
|
||||||
|
store i32 %3, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 3), align 4
|
||||||
|
ret void
|
||||||
|
}
|
|
@ -291,17 +291,12 @@ block4: ; preds = %4, %.lr.ph
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;; On x86, even unaligned copies can be merged to vector ops.
|
||||||
; CHECK-LABEL: merge_loads_no_align:
|
; CHECK-LABEL: merge_loads_no_align:
|
||||||
; load:
|
; load:
|
||||||
; CHECK: movl
|
; CHECK: vmovups
|
||||||
; CHECK: movl
|
|
||||||
; CHECK: movl
|
|
||||||
; CHECK: movl
|
|
||||||
; store:
|
; store:
|
||||||
; CHECK: movl
|
; CHECK: vmovups
|
||||||
; CHECK: movl
|
|
||||||
; CHECK: movl
|
|
||||||
; CHECK: movl
|
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
|
define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
|
||||||
%a1 = icmp sgt i32 %count, 0
|
%a1 = icmp sgt i32 %count, 0
|
||||||
|
|
Loading…
Reference in New Issue