forked from OSchip/llvm-project
[GlobalOpt] Perform store->dominated load forwarding for stored once globals
The initial land incorrectly optimized forwarding non-Constants in non-nosync/norecurse functions. Bail on non-Constants since norecurse should cause global -> alloca promotion anyway. The initial land also incorrectly assumed that StoredOnceStore was the only store to the global, but it actually means that only one value other than the global initializer is stored. Add a check that there's only one store. Compile time tracker: https://llvm-compile-time-tracker.com/compare.php?from=c80b88ee29f34078d2149de94e27600093e6c7c0&to=ef2c2b7772424b6861a75e794f3c31b45167304a&stat=instructions Reviewed By: nikic, asbirlea, jdoerfert Differential Revision: https://reviews.llvm.org/D128128
This commit is contained in:
parent
d3cbcc4e89
commit
e422c0d3b2
|
@ -35,6 +35,9 @@ struct GlobalStatus {
|
|||
/// can be deleted.
|
||||
bool IsLoaded = false;
|
||||
|
||||
/// Number of stores to the global.
|
||||
unsigned NumStores = 0;
|
||||
|
||||
/// Keep track of what stores to the global look like.
|
||||
enum StoredType {
|
||||
/// There is no store to this global. It can thus be marked constant.
|
||||
|
|
|
@ -1444,6 +1444,42 @@ static void makeAllConstantUsesInstructions(Constant *C) {
|
|||
}
|
||||
}
|
||||
|
||||
// For a global variable with one store, if the store dominates any loads,
|
||||
// those loads will always load the stored value (as opposed to the
|
||||
// initializer), even in the presence of recursion.
|
||||
static bool forwardStoredOnceStore(
|
||||
GlobalVariable *GV, const StoreInst *StoredOnceStore,
|
||||
function_ref<DominatorTree &(Function &)> LookupDomTree) {
|
||||
const Value *StoredOnceValue = StoredOnceStore->getValueOperand();
|
||||
// We can do this optimization for non-constants in nosync + norecurse
|
||||
// functions, but globals used in exactly one norecurse functions are already
|
||||
// promoted to an alloca.
|
||||
if (!isa<Constant>(StoredOnceValue))
|
||||
return false;
|
||||
const Function *F = StoredOnceStore->getFunction();
|
||||
SmallVector<LoadInst *> Loads;
|
||||
for (User *U : GV->users()) {
|
||||
if (auto *LI = dyn_cast<LoadInst>(U)) {
|
||||
if (LI->getFunction() == F &&
|
||||
LI->getType() == StoredOnceValue->getType() && LI->isSimple())
|
||||
Loads.push_back(LI);
|
||||
}
|
||||
}
|
||||
// Only compute DT if we have any loads to examine.
|
||||
bool MadeChange = false;
|
||||
if (!Loads.empty()) {
|
||||
auto &DT = LookupDomTree(*const_cast<Function *>(F));
|
||||
for (auto *LI : Loads) {
|
||||
if (DT.dominates(StoredOnceStore, LI)) {
|
||||
LI->replaceAllUsesWith(const_cast<Value *>(StoredOnceValue));
|
||||
LI->eraseFromParent();
|
||||
MadeChange = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return MadeChange;
|
||||
}
|
||||
|
||||
/// Analyze the specified global variable and optimize
|
||||
/// it if possible. If we make a change, return true.
|
||||
static bool
|
||||
|
@ -1603,6 +1639,12 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
|
|||
if (optimizeOnceStoredGlobal(GV, StoredOnceValue, DL, GetTLI))
|
||||
return true;
|
||||
|
||||
// Try to forward the store to any loads. If we have more than one store, we
|
||||
// may have a store of the initializer between StoredOnceStore and a load.
|
||||
if (GS.NumStores == 1)
|
||||
if (forwardStoredOnceStore(GV, GS.StoredOnceStore, LookupDomTree))
|
||||
return true;
|
||||
|
||||
// Otherwise, if the global was not a boolean, we can shrink it to be a
|
||||
// boolean. Skip this optimization for AS that doesn't allow an initializer.
|
||||
if (SOVConstant && GS.Ordering == AtomicOrdering::NotAtomic &&
|
||||
|
|
|
@ -104,6 +104,8 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
|
|||
if (SI->isVolatile())
|
||||
return true;
|
||||
|
||||
++GS.NumStores;
|
||||
|
||||
GS.Ordering = strongerOrdering(GS.Ordering, SI->getOrdering());
|
||||
|
||||
// If this is a direct store to the global (i.e., the global is a scalar
|
||||
|
|
|
@ -2,21 +2,24 @@
|
|||
|
||||
@foo = internal global i32 0, align 4
|
||||
|
||||
define dso_local i32 @bar() {
|
||||
define void @store() {
|
||||
entry:
|
||||
store i32 5, i32* @foo, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @bar() {
|
||||
entry:
|
||||
%0 = load i32, i32* @foo, align 4
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
;CHECK: @bar
|
||||
;CHECK-NEXT: entry:
|
||||
;CHECK-NEXT: store i1 true, i1* @foo, align 1, !dbg ![[DbgLocStore:[0-9]+]]
|
||||
;CHECK-NEXT: %.b = load i1, i1* @foo, align 1, !dbg ![[DbgLocLoadSel:[0-9]+]]
|
||||
;CHECK-NEXT: %0 = select i1 %.b, i32 5, i32 0, !dbg ![[DbgLocLoadSel]]
|
||||
;CHECK-NEXT: call void @llvm.dbg.value({{.*}}), !dbg ![[DbgLocLoadSel]]
|
||||
;CHECK-NEXT: ret i32 %0, !dbg ![[DbgLocRet:[0-9]+]]
|
||||
|
||||
;CHECK: ![[DbgLocStore]] = !DILocation(line: 1,
|
||||
;CHECK: ![[DbgLocLoadSel]] = !DILocation(line: 2,
|
||||
;CHECK: ![[DbgLocRet]] = !DILocation(line: 3,
|
||||
;CHECK: ![[DbgLocLoadSel]] = !DILocation(line: 3,
|
||||
;CHECK: ![[DbgLocRet]] = !DILocation(line: 4,
|
||||
|
|
|
@ -10,11 +10,13 @@
|
|||
; Negative test for AS(3). Skip shrink global to bool optimization.
|
||||
; CHECK: @lvar = internal unnamed_addr addrspace(3) global i32 undef
|
||||
|
||||
define void @test_global_var() {
|
||||
define void @test_global_var(i1 %i) {
|
||||
; CHECK-LABEL: @test_global_var(
|
||||
; CHECK: store volatile i32 10, i32* undef, align 4
|
||||
;
|
||||
entry:
|
||||
br i1 %i, label %bb1, label %exit
|
||||
bb1:
|
||||
store i32 10, i32* @gvar
|
||||
br label %exit
|
||||
exit:
|
||||
|
@ -23,13 +25,15 @@ exit:
|
|||
ret void
|
||||
}
|
||||
|
||||
define void @test_lds_var() {
|
||||
define void @test_lds_var(i1 %i) {
|
||||
; CHECK-LABEL: @test_lds_var(
|
||||
; CHECK: store i32 10, i32 addrspace(3)* @lvar, align 4
|
||||
; CHECK: [[LD:%.*]] = load i32, i32 addrspace(3)* @lvar, align 4
|
||||
; CHECK: store volatile i32 [[LD]], i32* undef, align 4
|
||||
;
|
||||
entry:
|
||||
br i1 %i, label %bb1, label %exit
|
||||
bb1:
|
||||
store i32 10, i32 addrspace(3)* @lvar
|
||||
br label %exit
|
||||
exit:
|
||||
|
|
|
@ -15,10 +15,8 @@ declare void @b()
|
|||
|
||||
define i1 @dom_const() {
|
||||
; CHECK-LABEL: @dom_const(
|
||||
; CHECK-NEXT: store i1 true, ptr @g1, align 1
|
||||
; CHECK-NEXT: call void @b()
|
||||
; CHECK-NEXT: [[R:%.*]] = load i1, ptr @g1, align 1
|
||||
; CHECK-NEXT: ret i1 [[R]]
|
||||
; CHECK-NEXT: ret i1 true
|
||||
;
|
||||
store i1 true, ptr @g1
|
||||
call void @b()
|
||||
|
@ -90,8 +88,7 @@ define i1 @dom_multiple_function_loads() {
|
|||
; CHECK-LABEL: @dom_multiple_function_loads(
|
||||
; CHECK-NEXT: store i1 true, ptr @g6, align 1
|
||||
; CHECK-NEXT: call void @b()
|
||||
; CHECK-NEXT: [[R:%.*]] = load i1, ptr @g6, align 1
|
||||
; CHECK-NEXT: ret i1 [[R]]
|
||||
; CHECK-NEXT: ret i1 true
|
||||
;
|
||||
store i1 true, ptr @g6
|
||||
call void @b()
|
||||
|
|
|
@ -9,7 +9,6 @@
|
|||
define i32 @main() {
|
||||
; CHECK-LABEL: @main(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: store i1 true, i1* @a, align 4
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** @e, align 8
|
||||
; CHECK-NEXT: store i32 0, i32* [[TMP0]], align 4
|
||||
; CHECK-NEXT: store i32* null, i32** @e, align 8
|
||||
|
|
Loading…
Reference in New Issue