[LICM] Allow load-only scalar promotion in the presence of unwinding

Currently, we bail out of scalar promotion if the loop may unwind
and the memory may be visible on unwind. This is because we can't
insert stores of the promoted value on unwind edges.

However, nowadays scalar promotion also has support for only
promoting loads, while leaving stores in place. This kind of
promotion is safe even in the presence of unwinding.

Differential Revision: https://reviews.llvm.org/D133111
This commit is contained in:
Nikita Popov 2022-09-01 14:33:55 +02:00
parent cd8f3e7581
commit 639d912282
3 changed files with 45 additions and 34 deletions

View File

@ -1957,9 +1957,14 @@ bool llvm::promoteLoopAccessesToScalars(
// store is never executed, but the exit blocks are not executed either.
bool DereferenceableInPH = false;
bool SafeToInsertStore = false;
bool StoreIsGuanteedToExecute = false;
bool FoundLoadToPromote = false;
// Goes from Unknown to either Safe or Unsafe, but can't switch between them.
enum {
StoreSafe,
StoreUnsafe,
StoreSafetyUnknown,
} StoreSafety = StoreSafetyUnknown;
SmallVector<Instruction *, 64> LoopUses;
@ -1981,7 +1986,7 @@ bool llvm::promoteLoopAccessesToScalars(
// after return and thus can't possibly load from the object.
Value *Object = getUnderlyingObject(SomePtr);
if (!isNotVisibleOnUnwindInLoop(Object, CurLoop, DT))
return false;
StoreSafety = StoreUnsafe;
}
// Check that all accesses to pointers in the alias set use the same type.
@ -2040,7 +2045,8 @@ bool llvm::promoteLoopAccessesToScalars(
StoreIsGuanteedToExecute |= GuaranteedToExecute;
if (GuaranteedToExecute) {
DereferenceableInPH = true;
SafeToInsertStore = true;
if (StoreSafety == StoreSafetyUnknown)
StoreSafety = StoreSafe;
Alignment = std::max(Alignment, InstAlignment);
}
@ -2050,10 +2056,11 @@ bool llvm::promoteLoopAccessesToScalars(
// introducing stores on paths that did not have them.
// Note that this only looks at explicit exit blocks. If we ever
// start sinking stores into unwind edges (see above), this will break.
if (!SafeToInsertStore)
SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
return DT->dominates(Store->getParent(), Exit);
});
if (StoreSafety == StoreSafetyUnknown &&
llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
return DT->dominates(Store->getParent(), Exit);
}))
StoreSafety = StoreSafe;
// If the store is not guaranteed to execute, we may still get
// deref info through it.
@ -2105,22 +2112,22 @@ bool llvm::promoteLoopAccessesToScalars(
// Check whether the location is thread-local. If it is, then we can insert
// stores along paths which originally didn't have them without violating the
// memory model.
if (!SafeToInsertStore) {
if (StoreSafety == StoreSafetyUnknown) {
Value *Object = getUnderlyingObject(SomePtr);
SafeToInsertStore =
(isNoAliasCall(Object) || isa<AllocaInst>(Object) ||
if ((isNoAliasCall(Object) || isa<AllocaInst>(Object) ||
(isa<Argument>(Object) && cast<Argument>(Object)->hasByValAttr())) &&
isNotCapturedBeforeOrInLoop(Object, CurLoop, DT);
isNotCapturedBeforeOrInLoop(Object, CurLoop, DT))
StoreSafety = StoreSafe;
}
// If we've still failed to prove we can sink the store, hoist the load
// only, if possible.
if (!SafeToInsertStore && !FoundLoadToPromote)
if (StoreSafety != StoreSafe && !FoundLoadToPromote)
// If we cannot hoist the load either, give up.
return false;
// Lets do the promotion!
if (SafeToInsertStore)
if (StoreSafety == StoreSafe)
LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr
<< '\n');
else
@ -2146,7 +2153,7 @@ bool llvm::promoteLoopAccessesToScalars(
LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
InsertPts, MSSAInsertPts, PIC, MSSAU, *LI, DL,
Alignment, SawUnorderedAtomic, AATags, *SafetyInfo,
SafeToInsertStore);
StoreSafety == StoreSafe);
// Set up the preheader to have a definition of the value. It is the live-out
// value from the preheader that uses in the loop will use.

View File

@ -109,17 +109,18 @@ loop:
br label %loop
}
; Hoist guard. Cannot hoist load because of aliasing.
; Hoist guard. Cannot hoist load because of aliasing, but can promote.
define void @test3(i1 %cond, i32* %ptr) {
; CHECK-LABEL: @test3(
; CHECK-NEXT: entry:
; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[COND:%.*]]) [ "deopt"(i32 0) ]
; CHECK-NEXT: [[PTR_PROMOTED:%.*]] = load i32, i32* [[PTR:%.*]], align 4
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[X:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[X_INC:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[PTR:%.*]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[PTR_PROMOTED]], [[ENTRY:%.*]] ], [ 0, [[LOOP]] ]
; CHECK-NEXT: [[X:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[X_INC:%.*]], [[LOOP]] ]
; CHECK-NEXT: store i32 0, i32* [[PTR]], align 4
; CHECK-NEXT: [[X_INC]] = add i32 [[X]], [[VAL]]
; CHECK-NEXT: [[X_INC]] = add i32 [[X]], [[TMP0]]
; CHECK-NEXT: br label [[LOOP]]
;

View File

@ -10,11 +10,12 @@ target triple = "x86_64-unknown-linux-gnu"
define void @test1(i32* nocapture noalias %a, i1 zeroext %y) uwtable {
; CHECK-LABEL: @test1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A_PROMOTED:%.*]] = load i32, i32* [[A:%.*]], align 4
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[I_03:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
; CHECK-NEXT: [[ADD1:%.*]] = phi i32 [ [[A_PROMOTED]], [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_INC:%.*]] ]
; CHECK-NEXT: [[I_03:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_INC]] ]
; CHECK-NEXT: [[ADD]] = add nsw i32 [[ADD1]], 1
; CHECK-NEXT: store i32 [[ADD]], i32* [[A]], align 4
; CHECK-NEXT: br i1 [[Y:%.*]], label [[IF_THEN:%.*]], label [[FOR_INC]]
; CHECK: if.then:
@ -150,11 +151,12 @@ for.cond.cleanup:
define void @test_sret(i32* noalias sret(i32) %a, i1 zeroext %y) uwtable {
; CHECK-LABEL: @test_sret(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A_PROMOTED:%.*]] = load i32, i32* [[A:%.*]], align 4
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[I_03:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
; CHECK-NEXT: [[ADD1:%.*]] = phi i32 [ [[A_PROMOTED]], [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_INC:%.*]] ]
; CHECK-NEXT: [[I_03:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_INC]] ]
; CHECK-NEXT: [[ADD]] = add nsw i32 [[ADD1]], 1
; CHECK-NEXT: store i32 [[ADD]], i32* [[A]], align 4
; CHECK-NEXT: br i1 [[Y:%.*]], label [[IF_THEN:%.*]], label [[FOR_INC]]
; CHECK: if.then:
@ -466,17 +468,18 @@ fun.ret:
ret void
}
; The malloc'ed memory can be captured and therefore not promoted.
; The malloc'ed memory can be captured and therefore only loads can be promoted.
define void @malloc_capture(i32** noalias %A) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
; CHECK-LABEL: @malloc_capture(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CALL:%.*]] = call i8* @malloc(i64 4)
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32*
; CHECK-NEXT: [[DOTPROMOTED:%.*]] = load i32, i32* [[TMP0]], align 4
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_LATCH:%.*]] ]
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
; CHECK-NEXT: [[ADD1:%.*]] = phi i32 [ [[DOTPROMOTED]], [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_LATCH:%.*]] ]
; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_LATCH]] ]
; CHECK-NEXT: [[ADD]] = add nsw i32 [[ADD1]], 1
; CHECK-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4
; CHECK-NEXT: br label [[FOR_CALL:%.*]]
; CHECK: for.call:
@ -492,15 +495,15 @@ define void @malloc_capture(i32** noalias %A) personality i8* bitcast (i32 (...)
; CHECK: for.end:
; CHECK-NEXT: br label [[FUN_RET:%.*]]
; CHECK: lpad:
; CHECK-NEXT: [[TMP2:%.*]] = landingpad { i8*, i32 }
; CHECK-NEXT: [[TMP1:%.*]] = landingpad { i8*, i32 }
; CHECK-NEXT: catch i8* null
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 0
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 1
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 0
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 1
; CHECK-NEXT: br label [[CATCH:%.*]]
; CHECK: catch:
; CHECK-NEXT: [[TMP5:%.*]] = call i8* @__cxa_begin_catch(i8* [[TMP3]])
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP0]] to i8*
; CHECK-NEXT: call void @free(i8* [[TMP6]])
; CHECK-NEXT: [[TMP4:%.*]] = call i8* @__cxa_begin_catch(i8* [[TMP2]])
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP0]] to i8*
; CHECK-NEXT: call void @free(i8* [[TMP5]])
; CHECK-NEXT: call void @__cxa_end_catch()
; CHECK-NEXT: br label [[FUN_RET]]
; CHECK: fun.ret: