forked from OSchip/llvm-project
GVN-hoist: improve code generation for recursive GEPs
When loading or storing in a field of a struct like "a.b.c", GVN is able to detect the equivalent expressions, and GVN-hoist would fail in the code generation. This is because the GEPs are not hoisted as scalar operations to avoid moving the GEPs too far from their ld/st instruction when the ld/st is not movable. So we end up having to generate code for the GEP of a ld/st when we move the ld/st. In the case of a GEP referring to another GEP as in "a.b.c" we need to code generate all the GEPs necessary to make all the operands available at the new location for the ld/st. With this patch we recursively walk through the GEP operands checking whether all operands are available, and in the case of a GEP operand, it recursively makes all its operands available. Code generation happens from the inner GEPs out until reaching the GEP that appears as an operand of the ld/st. Differential Revision: https://reviews.llvm.org/D22599 llvm-svn: 276841
This commit is contained in:
parent
586d3eaeb5
commit
55c3007b88
|
@ -186,7 +186,8 @@ class GVNHoist {
|
|||
public:
|
||||
GVNHoist(DominatorTree *Dt, AliasAnalysis *Aa, MemoryDependenceResults *Md,
|
||||
bool OptForMinSize)
|
||||
: DT(Dt), AA(Aa), MD(Md), OptForMinSize(OptForMinSize), HoistedCtr(0) {}
|
||||
: DT(Dt), AA(Aa), MD(Md), OptForMinSize(OptForMinSize),
|
||||
HoistingGeps(OptForMinSize), HoistedCtr(0) {}
|
||||
bool run(Function &F) {
|
||||
VN.setDomTree(DT);
|
||||
VN.setAliasAnalysis(AA);
|
||||
|
@ -230,6 +231,7 @@ private:
|
|||
AliasAnalysis *AA;
|
||||
MemoryDependenceResults *MD;
|
||||
const bool OptForMinSize;
|
||||
const bool HoistingGeps;
|
||||
DenseMap<const Value *, unsigned> DFSNumber;
|
||||
BBSideEffectsSet BBSideEffects;
|
||||
MemorySSA *MSSA;
|
||||
|
@ -609,21 +611,86 @@ private:
|
|||
return true;
|
||||
}
|
||||
|
||||
bool makeOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt,
|
||||
const SmallVecInsn &InstructionsToHoist) const {
|
||||
// Same as allOperandsAvailable with recursive check for GEP operands.
|
||||
bool allGepOperandsAvailable(const Instruction *I,
|
||||
const BasicBlock *HoistPt) const {
|
||||
for (const Use &Op : I->operands())
|
||||
if (const auto *Inst = dyn_cast<Instruction>(&Op))
|
||||
if (!DT->dominates(Inst->getParent(), HoistPt)) {
|
||||
if (const GetElementPtrInst *GepOp = dyn_cast<GetElementPtrInst>(Inst)) {
|
||||
if (!allGepOperandsAvailable(GepOp, HoistPt))
|
||||
return false;
|
||||
// Gep is available if all operands of GepOp are available.
|
||||
} else {
|
||||
// Gep is not available if it has operands other than GEPs that are
|
||||
// defined in blocks not dominating HoistPt.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Make all operands of the GEP available.
|
||||
void makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt,
|
||||
const SmallVecInsn &InstructionsToHoist,
|
||||
Instruction *Gep) const {
|
||||
assert(allGepOperandsAvailable(Gep, HoistPt) && "GEP operands not available");
|
||||
|
||||
Instruction *ClonedGep = Gep->clone();
|
||||
for (unsigned i = 0, e = Gep->getNumOperands(); i != e; ++i)
|
||||
if (Instruction *Op = dyn_cast<Instruction>(Gep->getOperand(i))) {
|
||||
|
||||
// Check whether the operand is already available.
|
||||
if (DT->dominates(Op->getParent(), HoistPt))
|
||||
continue;
|
||||
|
||||
// As a GEP can refer to other GEPs, recursively make all the operands
|
||||
// of this GEP available at HoistPt.
|
||||
if (GetElementPtrInst *GepOp = dyn_cast<GetElementPtrInst>(Op))
|
||||
makeGepsAvailable(ClonedGep, HoistPt, InstructionsToHoist, GepOp);
|
||||
}
|
||||
|
||||
// Copy Gep and replace its uses in Repl with ClonedGep.
|
||||
ClonedGep->insertBefore(HoistPt->getTerminator());
|
||||
|
||||
// Conservatively discard any optimization hints, they may differ on the
|
||||
// other paths.
|
||||
ClonedGep->dropUnknownNonDebugMetadata();
|
||||
|
||||
// If we have optimization hints which agree with each other along different
|
||||
// paths, preserve them.
|
||||
for (const Instruction *OtherInst : InstructionsToHoist) {
|
||||
const GetElementPtrInst *OtherGep;
|
||||
if (auto *OtherLd = dyn_cast<LoadInst>(OtherInst))
|
||||
OtherGep = cast<GetElementPtrInst>(OtherLd->getPointerOperand());
|
||||
else
|
||||
OtherGep = cast<GetElementPtrInst>(
|
||||
cast<StoreInst>(OtherInst)->getPointerOperand());
|
||||
ClonedGep->intersectOptionalDataWith(OtherGep);
|
||||
}
|
||||
|
||||
// Replace uses of Gep with ClonedGep in Repl.
|
||||
Repl->replaceUsesOfWith(Gep, ClonedGep);
|
||||
}
|
||||
|
||||
// In the case Repl is a load or a store, we make all their GEPs
|
||||
// available: GEPs are not hoisted by default to avoid the address
|
||||
// computations to be hoisted without the associated load or store.
|
||||
bool makeGepOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt,
|
||||
const SmallVecInsn &InstructionsToHoist) const {
|
||||
// Check whether the GEP of a ld/st can be synthesized at HoistPt.
|
||||
GetElementPtrInst *Gep = nullptr;
|
||||
Instruction *Val = nullptr;
|
||||
if (auto *Ld = dyn_cast<LoadInst>(Repl))
|
||||
if (auto *Ld = dyn_cast<LoadInst>(Repl)) {
|
||||
Gep = dyn_cast<GetElementPtrInst>(Ld->getPointerOperand());
|
||||
if (auto *St = dyn_cast<StoreInst>(Repl)) {
|
||||
} else if (auto *St = dyn_cast<StoreInst>(Repl)) {
|
||||
Gep = dyn_cast<GetElementPtrInst>(St->getPointerOperand());
|
||||
Val = dyn_cast<Instruction>(St->getValueOperand());
|
||||
// Check that the stored value is available.
|
||||
if (Val) {
|
||||
if (isa<GetElementPtrInst>(Val)) {
|
||||
// Check whether we can compute the GEP at HoistPt.
|
||||
if (!allOperandsAvailable(Val, HoistPt))
|
||||
if (!allGepOperandsAvailable(Val, HoistPt))
|
||||
return false;
|
||||
} else if (!DT->dominates(Val->getParent(), HoistPt))
|
||||
return false;
|
||||
|
@ -631,40 +698,13 @@ private:
|
|||
}
|
||||
|
||||
// Check whether we can compute the Gep at HoistPt.
|
||||
if (!Gep || !allOperandsAvailable(Gep, HoistPt))
|
||||
if (!Gep || !allGepOperandsAvailable(Gep, HoistPt))
|
||||
return false;
|
||||
|
||||
// Copy the gep before moving the ld/st.
|
||||
Instruction *ClonedGep = Gep->clone();
|
||||
ClonedGep->insertBefore(HoistPt->getTerminator());
|
||||
// Conservatively discard any optimization hints, they may differ on the
|
||||
// other paths.
|
||||
for (Instruction *OtherInst : InstructionsToHoist) {
|
||||
GetElementPtrInst *OtherGep;
|
||||
if (auto *OtherLd = dyn_cast<LoadInst>(OtherInst))
|
||||
OtherGep = cast<GetElementPtrInst>(OtherLd->getPointerOperand());
|
||||
else
|
||||
OtherGep = cast<GetElementPtrInst>(
|
||||
cast<StoreInst>(OtherInst)->getPointerOperand());
|
||||
ClonedGep->intersectOptionalDataWith(OtherGep);
|
||||
combineKnownMetadata(ClonedGep, OtherGep);
|
||||
}
|
||||
Repl->replaceUsesOfWith(Gep, ClonedGep);
|
||||
makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Gep);
|
||||
|
||||
// Also copy Val when it is a GEP.
|
||||
if (Val && isa<GetElementPtrInst>(Val)) {
|
||||
Instruction *ClonedVal = Val->clone();
|
||||
ClonedVal->insertBefore(HoistPt->getTerminator());
|
||||
// Conservatively discard any optimization hints, they may differ on the
|
||||
// other paths.
|
||||
for (Instruction *OtherInst : InstructionsToHoist) {
|
||||
auto *OtherVal =
|
||||
cast<Instruction>(cast<StoreInst>(OtherInst)->getValueOperand());
|
||||
ClonedVal->intersectOptionalDataWith(OtherVal);
|
||||
combineKnownMetadata(ClonedVal, OtherVal);
|
||||
}
|
||||
Repl->replaceUsesOfWith(Val, ClonedVal);
|
||||
}
|
||||
if (Val && isa<GetElementPtrInst>(Val))
|
||||
makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Val);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -695,14 +735,14 @@ private:
|
|||
Repl = InstructionsToHoist.front();
|
||||
|
||||
// We can move Repl in HoistPt only when all operands are available.
|
||||
// When not HoistingGeps we need to copy the GEPs now.
|
||||
// The order in which hoistings are done may influence the availability
|
||||
// of operands.
|
||||
if (!allOperandsAvailable(Repl, HoistPt) &&
|
||||
!makeOperandsAvailable(Repl, HoistPt, InstructionsToHoist))
|
||||
if (!allOperandsAvailable(Repl, HoistPt) && !HoistingGeps &&
|
||||
!makeGepOperandsAvailable(Repl, HoistPt, InstructionsToHoist))
|
||||
continue;
|
||||
|
||||
Repl->moveBefore(HoistPt->getTerminator());
|
||||
// TBAA may differ on one of the other paths, we need to get rid of
|
||||
// anything which might conflict.
|
||||
}
|
||||
|
||||
if (isa<LoadInst>(Repl))
|
||||
|
@ -783,7 +823,7 @@ private:
|
|||
break;
|
||||
}
|
||||
CI.insert(Call, VN);
|
||||
} else if (OptForMinSize || !isa<GetElementPtrInst>(&I1))
|
||||
} else if (HoistingGeps || !isa<GetElementPtrInst>(&I1))
|
||||
// Do not hoist scalars past calls that may write to memory because
|
||||
// that could result in spills later. geps are handled separately.
|
||||
// TODO: We can relax this for targets like AArch64 as they have more
|
||||
|
|
|
@ -0,0 +1,103 @@
|
|||
; RUN: opt -gvn-hoist -S < %s | FileCheck %s
|
||||
|
||||
; Check that recursive GEPs are hoisted.
|
||||
; CHECK-LABEL: @fun
|
||||
; CHECK: fdiv
|
||||
; CHECK: load
|
||||
; CHECK: load
|
||||
; CHECK: load
|
||||
; CHECK: load
|
||||
; CHECK: fsub
|
||||
; CHECK: fmul
|
||||
; CHECK: fsub
|
||||
; CHECK: fmul
|
||||
; CHECK-NOT: fsub
|
||||
; CHECK-NOT: fmul
|
||||
|
||||
%0 = type { double, double, double }
|
||||
%1 = type { double, double, double }
|
||||
%2 = type { %3, %1, %1 }
|
||||
%3 = type { i32 (...)**, %4, %10*, %11, %11, %11, %11, %11, %11, %11, %11, %11 }
|
||||
%4 = type { %5 }
|
||||
%5 = type { %6 }
|
||||
%6 = type { %7 }
|
||||
%7 = type { %8 }
|
||||
%8 = type { %9 }
|
||||
%9 = type { i64, i64, i8* }
|
||||
%10 = type <{ i32 (...)**, i32, [4 x i8] }>
|
||||
%11 = type { [4 x [4 x double]] }
|
||||
%12 = type <{ %1, %0, i32, [4 x i8] }>
|
||||
%13 = type { %1, %0, %12, %3*, %14* }
|
||||
%14 = type opaque
|
||||
|
||||
@d = external global %0, align 8
|
||||
@p = external global %1, align 8
|
||||
|
||||
define zeroext i1 @fun(%2*, %12* dereferenceable(56), double*, %13*) {
|
||||
%5 = alloca %2*, align 8
|
||||
%6 = alloca %12*, align 8
|
||||
%7 = alloca double*, align 8
|
||||
%8 = alloca %13*, align 8
|
||||
%9 = alloca double, align 8
|
||||
%10 = alloca double, align 8
|
||||
%11 = alloca double, align 8
|
||||
%12 = alloca double, align 8
|
||||
%13 = alloca double, align 8
|
||||
%14 = alloca double, align 8
|
||||
%15 = alloca double, align 8
|
||||
store %2* %0, %2** %5, align 8
|
||||
store %12* %1, %12** %6, align 8
|
||||
store double* %2, double** %7, align 8
|
||||
store %13* %3, %13** %8, align 8
|
||||
%16 = load %2*, %2** %5, align 8
|
||||
%17 = load double, double* getelementptr inbounds (%0, %0* @d, i32 0, i32 0), align 8
|
||||
%18 = fdiv double 1.000000e+00, %17
|
||||
store double %18, double* %15, align 8
|
||||
%19 = load double, double* %15, align 8
|
||||
%20 = fcmp oge double %19, 0.000000e+00
|
||||
br i1 %20, label %21, label %36
|
||||
|
||||
; <label>:21: ; preds = %4
|
||||
%22 = getelementptr inbounds %2, %2* %16, i32 0, i32 1
|
||||
%23 = getelementptr inbounds %1, %1* %22, i32 0, i32 0
|
||||
%24 = load double, double* %23, align 8
|
||||
%25 = load double, double* getelementptr inbounds (%1, %1* @p, i32 0, i32 0), align 8
|
||||
%26 = fsub double %24, %25
|
||||
%27 = load double, double* %15, align 8
|
||||
%28 = fmul double %26, %27
|
||||
store double %28, double* %9, align 8
|
||||
%29 = getelementptr inbounds %2, %2* %16, i32 0, i32 2
|
||||
%30 = getelementptr inbounds %1, %1* %29, i32 0, i32 0
|
||||
%31 = load double, double* %30, align 8
|
||||
%32 = load double, double* getelementptr inbounds (%1, %1* @p, i32 0, i32 0), align 8
|
||||
%33 = fsub double %31, %32
|
||||
%34 = load double, double* %15, align 8
|
||||
%35 = fmul double %33, %34
|
||||
store double %35, double* %12, align 8
|
||||
br label %51
|
||||
|
||||
; <label>:36: ; preds = %4
|
||||
%37 = getelementptr inbounds %2, %2* %16, i32 0, i32 2
|
||||
%38 = getelementptr inbounds %1, %1* %37, i32 0, i32 0
|
||||
%39 = load double, double* %38, align 8
|
||||
%40 = load double, double* getelementptr inbounds (%1, %1* @p, i32 0, i32 0), align 8
|
||||
%41 = fsub double %39, %40
|
||||
%42 = load double, double* %15, align 8
|
||||
%43 = fmul double %41, %42
|
||||
store double %43, double* %9, align 8
|
||||
%44 = getelementptr inbounds %2, %2* %16, i32 0, i32 1
|
||||
%45 = getelementptr inbounds %1, %1* %44, i32 0, i32 0
|
||||
%46 = load double, double* %45, align 8
|
||||
%47 = load double, double* getelementptr inbounds (%1, %1* @p, i32 0, i32 0), align 8
|
||||
%48 = fsub double %46, %47
|
||||
%49 = load double, double* %15, align 8
|
||||
%50 = fmul double %48, %49
|
||||
store double %50, double* %12, align 8
|
||||
br label %51
|
||||
|
||||
; <label>:51: ; preds = %36, %21
|
||||
%52 = load double, double* %12, align 8
|
||||
%53 = load double, double* %9, align 8
|
||||
%54 = fcmp olt double %52, %53
|
||||
ret i1 %54
|
||||
}
|
Loading…
Reference in New Issue