GVN-hoist: improve code generation for recursive GEPs

When loading or storing in a field of a struct like "a.b.c", GVN is able to
detect the equivalent expressions, and GVN-hoist would fail in the code
generation.  This is because the GEPs are not hoisted as scalar operations to
avoid moving the GEPs too far from their ld/st instruction when the ld/st is not
movable.  So we end up having to generate code for the GEP of a ld/st when we
move the ld/st.  In the case of a GEP referring to another GEP as in "a.b.c" we
need to code generate all the GEPs necessary to make all the operands available
at the new location for the ld/st.  With this patch we recursively walk through
the GEP operands checking whether all operands are available, and in the case of
a GEP operand, it recursively makes all its operands available. Code generation
happens from the inner GEPs out until reaching the GEP that appears as an
operand of the ld/st.

Differential Revision: https://reviews.llvm.org/D22599

llvm-svn: 276841
This commit is contained in:
Sebastian Pop 2016-07-27 05:48:12 +00:00
parent 586d3eaeb5
commit 55c3007b88
2 changed files with 185 additions and 42 deletions

View File

@ -186,7 +186,8 @@ class GVNHoist {
public:
GVNHoist(DominatorTree *Dt, AliasAnalysis *Aa, MemoryDependenceResults *Md,
bool OptForMinSize)
: DT(Dt), AA(Aa), MD(Md), OptForMinSize(OptForMinSize), HoistedCtr(0) {}
: DT(Dt), AA(Aa), MD(Md), OptForMinSize(OptForMinSize),
HoistingGeps(OptForMinSize), HoistedCtr(0) {}
bool run(Function &F) {
VN.setDomTree(DT);
VN.setAliasAnalysis(AA);
@ -230,6 +231,7 @@ private:
AliasAnalysis *AA;
MemoryDependenceResults *MD;
const bool OptForMinSize;
const bool HoistingGeps;
DenseMap<const Value *, unsigned> DFSNumber;
BBSideEffectsSet BBSideEffects;
MemorySSA *MSSA;
@ -609,21 +611,86 @@ private:
return true;
}
bool makeOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt,
const SmallVecInsn &InstructionsToHoist) const {
// Same as allOperandsAvailable with recursive check for GEP operands.
bool allGepOperandsAvailable(const Instruction *I,
const BasicBlock *HoistPt) const {
for (const Use &Op : I->operands())
if (const auto *Inst = dyn_cast<Instruction>(&Op))
if (!DT->dominates(Inst->getParent(), HoistPt)) {
if (const GetElementPtrInst *GepOp = dyn_cast<GetElementPtrInst>(Inst)) {
if (!allGepOperandsAvailable(GepOp, HoistPt))
return false;
// Gep is available if all operands of GepOp are available.
} else {
// Gep is not available if it has operands other than GEPs that are
// defined in blocks not dominating HoistPt.
return false;
}
}
return true;
}
// Make all operands of the GEP available.
void makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt,
const SmallVecInsn &InstructionsToHoist,
Instruction *Gep) const {
assert(allGepOperandsAvailable(Gep, HoistPt) && "GEP operands not available");
Instruction *ClonedGep = Gep->clone();
for (unsigned i = 0, e = Gep->getNumOperands(); i != e; ++i)
if (Instruction *Op = dyn_cast<Instruction>(Gep->getOperand(i))) {
// Check whether the operand is already available.
if (DT->dominates(Op->getParent(), HoistPt))
continue;
// As a GEP can refer to other GEPs, recursively make all the operands
// of this GEP available at HoistPt.
if (GetElementPtrInst *GepOp = dyn_cast<GetElementPtrInst>(Op))
makeGepsAvailable(ClonedGep, HoistPt, InstructionsToHoist, GepOp);
}
// Copy Gep and replace its uses in Repl with ClonedGep.
ClonedGep->insertBefore(HoistPt->getTerminator());
// Conservatively discard any optimization hints, they may differ on the
// other paths.
ClonedGep->dropUnknownNonDebugMetadata();
// If we have optimization hints which agree with each other along different
// paths, preserve them.
for (const Instruction *OtherInst : InstructionsToHoist) {
const GetElementPtrInst *OtherGep;
if (auto *OtherLd = dyn_cast<LoadInst>(OtherInst))
OtherGep = cast<GetElementPtrInst>(OtherLd->getPointerOperand());
else
OtherGep = cast<GetElementPtrInst>(
cast<StoreInst>(OtherInst)->getPointerOperand());
ClonedGep->intersectOptionalDataWith(OtherGep);
}
// Replace uses of Gep with ClonedGep in Repl.
Repl->replaceUsesOfWith(Gep, ClonedGep);
}
// In the case Repl is a load or a store, we make all their GEPs
// available: GEPs are not hoisted by default to avoid the address
// computations to be hoisted without the associated load or store.
bool makeGepOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt,
const SmallVecInsn &InstructionsToHoist) const {
// Check whether the GEP of a ld/st can be synthesized at HoistPt.
GetElementPtrInst *Gep = nullptr;
Instruction *Val = nullptr;
if (auto *Ld = dyn_cast<LoadInst>(Repl))
if (auto *Ld = dyn_cast<LoadInst>(Repl)) {
Gep = dyn_cast<GetElementPtrInst>(Ld->getPointerOperand());
if (auto *St = dyn_cast<StoreInst>(Repl)) {
} else if (auto *St = dyn_cast<StoreInst>(Repl)) {
Gep = dyn_cast<GetElementPtrInst>(St->getPointerOperand());
Val = dyn_cast<Instruction>(St->getValueOperand());
// Check that the stored value is available.
if (Val) {
if (isa<GetElementPtrInst>(Val)) {
// Check whether we can compute the GEP at HoistPt.
if (!allOperandsAvailable(Val, HoistPt))
if (!allGepOperandsAvailable(Val, HoistPt))
return false;
} else if (!DT->dominates(Val->getParent(), HoistPt))
return false;
@ -631,40 +698,13 @@ private:
}
// Check whether we can compute the Gep at HoistPt.
if (!Gep || !allOperandsAvailable(Gep, HoistPt))
if (!Gep || !allGepOperandsAvailable(Gep, HoistPt))
return false;
// Copy the gep before moving the ld/st.
Instruction *ClonedGep = Gep->clone();
ClonedGep->insertBefore(HoistPt->getTerminator());
// Conservatively discard any optimization hints, they may differ on the
// other paths.
for (Instruction *OtherInst : InstructionsToHoist) {
GetElementPtrInst *OtherGep;
if (auto *OtherLd = dyn_cast<LoadInst>(OtherInst))
OtherGep = cast<GetElementPtrInst>(OtherLd->getPointerOperand());
else
OtherGep = cast<GetElementPtrInst>(
cast<StoreInst>(OtherInst)->getPointerOperand());
ClonedGep->intersectOptionalDataWith(OtherGep);
combineKnownMetadata(ClonedGep, OtherGep);
}
Repl->replaceUsesOfWith(Gep, ClonedGep);
makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Gep);
// Also copy Val when it is a GEP.
if (Val && isa<GetElementPtrInst>(Val)) {
Instruction *ClonedVal = Val->clone();
ClonedVal->insertBefore(HoistPt->getTerminator());
// Conservatively discard any optimization hints, they may differ on the
// other paths.
for (Instruction *OtherInst : InstructionsToHoist) {
auto *OtherVal =
cast<Instruction>(cast<StoreInst>(OtherInst)->getValueOperand());
ClonedVal->intersectOptionalDataWith(OtherVal);
combineKnownMetadata(ClonedVal, OtherVal);
}
Repl->replaceUsesOfWith(Val, ClonedVal);
}
if (Val && isa<GetElementPtrInst>(Val))
makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Val);
return true;
}
@ -695,14 +735,14 @@ private:
Repl = InstructionsToHoist.front();
// We can move Repl in HoistPt only when all operands are available.
// When not HoistingGeps we need to copy the GEPs now.
// The order in which hoistings are done may influence the availability
// of operands.
if (!allOperandsAvailable(Repl, HoistPt) &&
!makeOperandsAvailable(Repl, HoistPt, InstructionsToHoist))
if (!allOperandsAvailable(Repl, HoistPt) && !HoistingGeps &&
!makeGepOperandsAvailable(Repl, HoistPt, InstructionsToHoist))
continue;
Repl->moveBefore(HoistPt->getTerminator());
// TBAA may differ on one of the other paths, we need to get rid of
// anything which might conflict.
}
if (isa<LoadInst>(Repl))
@ -783,7 +823,7 @@ private:
break;
}
CI.insert(Call, VN);
} else if (OptForMinSize || !isa<GetElementPtrInst>(&I1))
} else if (HoistingGeps || !isa<GetElementPtrInst>(&I1))
// Do not hoist scalars past calls that may write to memory because
// that could result in spills later. geps are handled separately.
// TODO: We can relax this for targets like AArch64 as they have more

View File

@ -0,0 +1,103 @@
; RUN: opt -gvn-hoist -S < %s | FileCheck %s
; Check that recursive GEPs are hoisted.
; CHECK-LABEL: @fun
; CHECK: fdiv
; CHECK: load
; CHECK: load
; CHECK: load
; CHECK: load
; CHECK: fsub
; CHECK: fmul
; CHECK: fsub
; CHECK: fmul
; CHECK-NOT: fsub
; CHECK-NOT: fmul
%0 = type { double, double, double }
%1 = type { double, double, double }
%2 = type { %3, %1, %1 }
%3 = type { i32 (...)**, %4, %10*, %11, %11, %11, %11, %11, %11, %11, %11, %11 }
%4 = type { %5 }
%5 = type { %6 }
%6 = type { %7 }
%7 = type { %8 }
%8 = type { %9 }
%9 = type { i64, i64, i8* }
%10 = type <{ i32 (...)**, i32, [4 x i8] }>
%11 = type { [4 x [4 x double]] }
%12 = type <{ %1, %0, i32, [4 x i8] }>
%13 = type { %1, %0, %12, %3*, %14* }
%14 = type opaque
@d = external global %0, align 8
@p = external global %1, align 8
define zeroext i1 @fun(%2*, %12* dereferenceable(56), double*, %13*) {
%5 = alloca %2*, align 8
%6 = alloca %12*, align 8
%7 = alloca double*, align 8
%8 = alloca %13*, align 8
%9 = alloca double, align 8
%10 = alloca double, align 8
%11 = alloca double, align 8
%12 = alloca double, align 8
%13 = alloca double, align 8
%14 = alloca double, align 8
%15 = alloca double, align 8
store %2* %0, %2** %5, align 8
store %12* %1, %12** %6, align 8
store double* %2, double** %7, align 8
store %13* %3, %13** %8, align 8
%16 = load %2*, %2** %5, align 8
%17 = load double, double* getelementptr inbounds (%0, %0* @d, i32 0, i32 0), align 8
%18 = fdiv double 1.000000e+00, %17
store double %18, double* %15, align 8
%19 = load double, double* %15, align 8
%20 = fcmp oge double %19, 0.000000e+00
br i1 %20, label %21, label %36
; <label>:21: ; preds = %4
%22 = getelementptr inbounds %2, %2* %16, i32 0, i32 1
%23 = getelementptr inbounds %1, %1* %22, i32 0, i32 0
%24 = load double, double* %23, align 8
%25 = load double, double* getelementptr inbounds (%1, %1* @p, i32 0, i32 0), align 8
%26 = fsub double %24, %25
%27 = load double, double* %15, align 8
%28 = fmul double %26, %27
store double %28, double* %9, align 8
%29 = getelementptr inbounds %2, %2* %16, i32 0, i32 2
%30 = getelementptr inbounds %1, %1* %29, i32 0, i32 0
%31 = load double, double* %30, align 8
%32 = load double, double* getelementptr inbounds (%1, %1* @p, i32 0, i32 0), align 8
%33 = fsub double %31, %32
%34 = load double, double* %15, align 8
%35 = fmul double %33, %34
store double %35, double* %12, align 8
br label %51
; <label>:36: ; preds = %4
%37 = getelementptr inbounds %2, %2* %16, i32 0, i32 2
%38 = getelementptr inbounds %1, %1* %37, i32 0, i32 0
%39 = load double, double* %38, align 8
%40 = load double, double* getelementptr inbounds (%1, %1* @p, i32 0, i32 0), align 8
%41 = fsub double %39, %40
%42 = load double, double* %15, align 8
%43 = fmul double %41, %42
store double %43, double* %9, align 8
%44 = getelementptr inbounds %2, %2* %16, i32 0, i32 1
%45 = getelementptr inbounds %1, %1* %44, i32 0, i32 0
%46 = load double, double* %45, align 8
%47 = load double, double* getelementptr inbounds (%1, %1* @p, i32 0, i32 0), align 8
%48 = fsub double %46, %47
%49 = load double, double* %15, align 8
%50 = fmul double %48, %49
store double %50, double* %12, align 8
br label %51
; <label>:51: ; preds = %36, %21
%52 = load double, double* %12, align 8
%53 = load double, double* %9, align 8
%54 = fcmp olt double %52, %53
ret i1 %54
}