From 639d9122825d2f697d2dadc396c2f872f2a97921 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 1 Sep 2022 14:33:55 +0200 Subject: [PATCH] [LICM] Allow load-only scalar promotion in the presence of unwinding Currently, we bail out of scalar promotion if the loop may unwind and the memory may be visible on unwind. This is because we can't insert stores of the promoted value on unwind edges. However, nowadays scalar promotion also has support for only promoting loads, while leaving stores in place. This kind of promotion is safe even in the presence of unwinding. Differential Revision: https://reviews.llvm.org/D133111 --- llvm/lib/Transforms/Scalar/LICM.cpp | 35 +++++++++++-------- llvm/test/Transforms/LICM/guards.ll | 9 ++--- .../Transforms/LICM/scalar-promote-unwind.ll | 35 ++++++++++--------- 3 files changed, 45 insertions(+), 34 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 1fd66be06b5a..9d98beaa19e6 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -1957,9 +1957,14 @@ bool llvm::promoteLoopAccessesToScalars( // store is never executed, but the exit blocks are not executed either. bool DereferenceableInPH = false; - bool SafeToInsertStore = false; bool StoreIsGuanteedToExecute = false; bool FoundLoadToPromote = false; + // Goes from Unknown to either Safe or Unsafe, but can't switch between them. + enum { + StoreSafe, + StoreUnsafe, + StoreSafetyUnknown, + } StoreSafety = StoreSafetyUnknown; SmallVector LoopUses; @@ -1981,7 +1986,7 @@ bool llvm::promoteLoopAccessesToScalars( // after return and thus can't possibly load from the object. Value *Object = getUnderlyingObject(SomePtr); if (!isNotVisibleOnUnwindInLoop(Object, CurLoop, DT)) - return false; + StoreSafety = StoreUnsafe; } // Check that all accesses to pointers in the alias set use the same type. @@ -2040,7 +2045,8 @@ bool llvm::promoteLoopAccessesToScalars( StoreIsGuanteedToExecute |= GuaranteedToExecute; if (GuaranteedToExecute) { DereferenceableInPH = true; - SafeToInsertStore = true; + if (StoreSafety == StoreSafetyUnknown) + StoreSafety = StoreSafe; Alignment = std::max(Alignment, InstAlignment); } @@ -2050,10 +2056,11 @@ bool llvm::promoteLoopAccessesToScalars( // introducing stores on paths that did not have them. // Note that this only looks at explicit exit blocks. If we ever // start sinking stores into unwind edges (see above), this will break. - if (!SafeToInsertStore) - SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) { - return DT->dominates(Store->getParent(), Exit); - }); + if (StoreSafety == StoreSafetyUnknown && + llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) { + return DT->dominates(Store->getParent(), Exit); + })) + StoreSafety = StoreSafe; // If the store is not guaranteed to execute, we may still get // deref info through it. @@ -2105,22 +2112,22 @@ bool llvm::promoteLoopAccessesToScalars( // Check whether the location is thread-local. If it is, then we can insert // stores along paths which originally didn't have them without violating the // memory model. - if (!SafeToInsertStore) { + if (StoreSafety == StoreSafetyUnknown) { Value *Object = getUnderlyingObject(SomePtr); - SafeToInsertStore = - (isNoAliasCall(Object) || isa(Object) || + if ((isNoAliasCall(Object) || isa(Object) || (isa(Object) && cast(Object)->hasByValAttr())) && - isNotCapturedBeforeOrInLoop(Object, CurLoop, DT); + isNotCapturedBeforeOrInLoop(Object, CurLoop, DT)) + StoreSafety = StoreSafe; } // If we've still failed to prove we can sink the store, hoist the load // only, if possible. - if (!SafeToInsertStore && !FoundLoadToPromote) + if (StoreSafety != StoreSafe && !FoundLoadToPromote) // If we cannot hoist the load either, give up. return false; // Lets do the promotion! - if (SafeToInsertStore) + if (StoreSafety == StoreSafe) LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr << '\n'); else @@ -2146,7 +2153,7 @@ bool llvm::promoteLoopAccessesToScalars( LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, MSSAU, *LI, DL, Alignment, SawUnorderedAtomic, AATags, *SafetyInfo, - SafeToInsertStore); + StoreSafety == StoreSafe); // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. diff --git a/llvm/test/Transforms/LICM/guards.ll b/llvm/test/Transforms/LICM/guards.ll index 919f4151ea83..7feb1291483d 100644 --- a/llvm/test/Transforms/LICM/guards.ll +++ b/llvm/test/Transforms/LICM/guards.ll @@ -109,17 +109,18 @@ loop: br label %loop } -; Hoist guard. Cannot hoist load because of aliasing. +; Hoist guard. Cannot hoist load because of aliasing, but can promote. define void @test3(i1 %cond, i32* %ptr) { ; CHECK-LABEL: @test3( ; CHECK-NEXT: entry: ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[COND:%.*]]) [ "deopt"(i32 0) ] +; CHECK-NEXT: [[PTR_PROMOTED:%.*]] = load i32, i32* [[PTR:%.*]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[X:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[X_INC:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[PTR_PROMOTED]], [[ENTRY:%.*]] ], [ 0, [[LOOP]] ] +; CHECK-NEXT: [[X:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[X_INC:%.*]], [[LOOP]] ] ; CHECK-NEXT: store i32 0, i32* [[PTR]], align 4 -; CHECK-NEXT: [[X_INC]] = add i32 [[X]], [[VAL]] +; CHECK-NEXT: [[X_INC]] = add i32 [[X]], [[TMP0]] ; CHECK-NEXT: br label [[LOOP]] ; diff --git a/llvm/test/Transforms/LICM/scalar-promote-unwind.ll b/llvm/test/Transforms/LICM/scalar-promote-unwind.ll index 506b5baaa3f7..4b57e4029fbf 100644 --- a/llvm/test/Transforms/LICM/scalar-promote-unwind.ll +++ b/llvm/test/Transforms/LICM/scalar-promote-unwind.ll @@ -10,11 +10,12 @@ target triple = "x86_64-unknown-linux-gnu" define void @test1(i32* nocapture noalias %a, i1 zeroext %y) uwtable { ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_PROMOTED:%.*]] = load i32, i32* [[A:%.*]], align 4 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[I_03:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +; CHECK-NEXT: [[ADD1:%.*]] = phi i32 [ [[A_PROMOTED]], [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[I_03:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_INC]] ] +; CHECK-NEXT: [[ADD]] = add nsw i32 [[ADD1]], 1 ; CHECK-NEXT: store i32 [[ADD]], i32* [[A]], align 4 ; CHECK-NEXT: br i1 [[Y:%.*]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK: if.then: @@ -150,11 +151,12 @@ for.cond.cleanup: define void @test_sret(i32* noalias sret(i32) %a, i1 zeroext %y) uwtable { ; CHECK-LABEL: @test_sret( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_PROMOTED:%.*]] = load i32, i32* [[A:%.*]], align 4 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[I_03:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +; CHECK-NEXT: [[ADD1:%.*]] = phi i32 [ [[A_PROMOTED]], [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[I_03:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_INC]] ] +; CHECK-NEXT: [[ADD]] = add nsw i32 [[ADD1]], 1 ; CHECK-NEXT: store i32 [[ADD]], i32* [[A]], align 4 ; CHECK-NEXT: br i1 [[Y:%.*]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK: if.then: @@ -466,17 +468,18 @@ fun.ret: ret void } -; The malloc'ed memory can be captured and therefore not promoted. +; The malloc'ed memory can be captured and therefore only loads can be promoted. define void @malloc_capture(i32** noalias %A) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { ; CHECK-LABEL: @malloc_capture( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = call i8* @malloc(i64 4) ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32* +; CHECK-NEXT: [[DOTPROMOTED:%.*]] = load i32, i32* [[TMP0]], align 4 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_LATCH:%.*]] ] -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 +; CHECK-NEXT: [[ADD1:%.*]] = phi i32 [ [[DOTPROMOTED]], [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_LATCH:%.*]] ] +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_LATCH]] ] +; CHECK-NEXT: [[ADD]] = add nsw i32 [[ADD1]], 1 ; CHECK-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 ; CHECK-NEXT: br label [[FOR_CALL:%.*]] ; CHECK: for.call: @@ -492,15 +495,15 @@ define void @malloc_capture(i32** noalias %A) personality i8* bitcast (i32 (...) ; CHECK: for.end: ; CHECK-NEXT: br label [[FUN_RET:%.*]] ; CHECK: lpad: -; CHECK-NEXT: [[TMP2:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: [[TMP1:%.*]] = landingpad { i8*, i32 } ; CHECK-NEXT: catch i8* null -; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 1 ; CHECK-NEXT: br label [[CATCH:%.*]] ; CHECK: catch: -; CHECK-NEXT: [[TMP5:%.*]] = call i8* @__cxa_begin_catch(i8* [[TMP3]]) -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP0]] to i8* -; CHECK-NEXT: call void @free(i8* [[TMP6]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i8* @__cxa_begin_catch(i8* [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP0]] to i8* +; CHECK-NEXT: call void @free(i8* [[TMP5]]) ; CHECK-NEXT: call void @__cxa_end_catch() ; CHECK-NEXT: br label [[FUN_RET]] ; CHECK: fun.ret: