[GlobalOpt] Simplify CleanupConstantGlobalUsers()

This bases the CleanupConstantGlobalUsers() implementation around
the ConstantFoldLoadFromConst() API. The general approach is that
we discover all users while looking through casts, and then
constant fold loads and drop stores and memintrinsics.

This avoids special cases and limitations in the previous
implementation, which is also incompatible with opaque pointers.
The result is a bit more powerful than before, because we now use
more general load folding logic which can for example look through
pointer bitcasts between different sizes. This is where the test
changes come from, as we now fold more loads and can thus remove
more globals.

Differential Revision: https://reviews.llvm.org/D114889
This commit is contained in:
Nikita Popov 2021-10-23 17:23:56 +02:00
parent 0efd9a03fa
commit 8d1759c404
4 changed files with 56 additions and 87 deletions

View File

@ -25,6 +25,7 @@
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@ -275,94 +276,64 @@ CleanupPointerRootUsers(GlobalVariable *GV,
/// We just marked GV constant. Loop over all users of the global, cleaning up
/// the obvious ones. This is largely just a quick scan over the use list to
/// clean up the easy and obvious cruft. This returns true if it made a change.
static bool CleanupConstantGlobalUsers(
Value *V, Constant *Init, const DataLayout &DL,
function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
static bool CleanupConstantGlobalUsers(GlobalVariable *GV,
const DataLayout &DL) {
Constant *Init = GV->getInitializer();
SmallVector<User *, 8> WorkList(GV->users());
SmallPtrSet<User *, 8> Visited;
bool Changed = false;
// Note that we need to use a weak value handle for the worklist items. When
// we delete a constant array, we may also be holding pointer to one of its
// elements (or an element of one of its elements if we're dealing with an
// array of arrays) in the worklist.
SmallVector<WeakTrackingVH, 8> WorkList(V->users());
SmallVector<WeakTrackingVH> MaybeDeadInsts;
auto EraseFromParent = [&](Instruction *I) {
for (Value *Op : I->operands())
if (auto *OpI = dyn_cast<Instruction>(Op))
MaybeDeadInsts.push_back(OpI);
I->eraseFromParent();
Changed = true;
};
while (!WorkList.empty()) {
Value *UV = WorkList.pop_back_val();
if (!UV)
User *U = WorkList.pop_back_val();
if (!Visited.insert(U).second)
continue;
User *U = cast<User>(UV);
if (auto *BO = dyn_cast<BitCastOperator>(U))
append_range(WorkList, BO->users());
if (auto *ASC = dyn_cast<AddrSpaceCastOperator>(U))
append_range(WorkList, ASC->users());
else if (auto *GEP = dyn_cast<GEPOperator>(U))
append_range(WorkList, GEP->users());
else if (auto *LI = dyn_cast<LoadInst>(U)) {
// A load from zeroinitializer is always zeroinitializer, regardless of
// any applied offset.
if (Init->isNullValue()) {
LI->replaceAllUsesWith(Constant::getNullValue(LI->getType()));
EraseFromParent(LI);
continue;
}
if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
if (Init) {
if (auto *Casted =
ConstantFoldLoadThroughBitcast(Init, LI->getType(), DL)) {
// Replace the load with the initializer.
LI->replaceAllUsesWith(Casted);
LI->eraseFromParent();
Changed = true;
Value *PtrOp = LI->getPointerOperand();
APInt Offset(DL.getIndexTypeSizeInBits(PtrOp->getType()), 0);
PtrOp = PtrOp->stripAndAccumulateConstantOffsets(
DL, Offset, /* AllowNonInbounds */ true);
if (PtrOp == GV) {
if (auto *Value = ConstantFoldLoadFromConst(Init, LI->getType(),
Offset, DL)) {
LI->replaceAllUsesWith(Value);
EraseFromParent(LI);
}
}
} else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
// Store must be unreachable or storing Init into the global.
SI->eraseFromParent();
Changed = true;
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
if (CE->getOpcode() == Instruction::GetElementPtr) {
Constant *SubInit = nullptr;
if (Init)
SubInit = ConstantFoldLoadThroughGEPConstantExpr(
Init, CE, V->getType()->getPointerElementType(), DL);
Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, GetTLI);
} else if ((CE->getOpcode() == Instruction::BitCast &&
CE->getType()->isPointerTy()) ||
CE->getOpcode() == Instruction::AddrSpaceCast) {
// Pointer cast, delete any stores and memsets to the global.
Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, GetTLI);
}
if (CE->use_empty()) {
CE->destroyConstant();
Changed = true;
}
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
// Do not transform "gepinst (gep constexpr (GV))" here, because forming
// "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold
// and will invalidate our notion of what Init is.
Constant *SubInit = nullptr;
if (!isa<ConstantExpr>(GEP->getOperand(0))) {
ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(
ConstantFoldInstruction(GEP, DL, &GetTLI(*GEP->getFunction())));
if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
SubInit = ConstantFoldLoadThroughGEPConstantExpr(
Init, CE, V->getType()->getPointerElementType(), DL);
// If the initializer is an all-null value and we have an inbounds GEP,
// we already know what the result of any load from that GEP is.
// TODO: Handle splats.
if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds())
SubInit = Constant::getNullValue(GEP->getResultElementType());
}
Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, GetTLI);
if (GEP->use_empty()) {
GEP->eraseFromParent();
Changed = true;
}
EraseFromParent(SI);
} else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
if (MI->getRawDest() == V) {
MI->eraseFromParent();
Changed = true;
}
} else if (Constant *C = dyn_cast<Constant>(U)) {
// If we have a chain of dead constantexprs or other things dangling from
// us, and if they are all dead, nuke them without remorse.
if (isSafeToDestroyConstant(C)) {
C->destroyConstant();
CleanupConstantGlobalUsers(V, Init, DL, GetTLI);
return true;
}
if (getUnderlyingObject(MI->getRawDest()) == GV)
EraseFromParent(MI);
}
}
Changed |=
RecursivelyDeleteTriviallyDeadInstructionsPermissive(MaybeDeadInsts);
GV->removeDeadConstantUsers();
return Changed;
}
@ -889,7 +860,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(
Changed |= CleanupPointerRootUsers(GV, GetTLI);
} else {
Changed = true;
CleanupConstantGlobalUsers(GV, nullptr, DL, GetTLI);
CleanupConstantGlobalUsers(GV, DL);
}
if (GV->use_empty()) {
LLVM_DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n");
@ -1557,8 +1528,7 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
} else {
// Delete any stores we can find to the global. We may not be able to
// make it completely dead though.
Changed =
CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
Changed = CleanupConstantGlobalUsers(GV, DL);
}
// If the global is dead now, delete it.
@ -1583,7 +1553,7 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
}
// Clean up any obviously simplifiable users now.
Changed |= CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
Changed |= CleanupConstantGlobalUsers(GV, DL);
// If the global is dead now, just nuke it.
if (GV->use_empty()) {
@ -1628,7 +1598,7 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
GV->setInitializer(SOVConstant);
// Clean up any obviously simplifiable users now.
CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
CleanupConstantGlobalUsers(GV, DL);
if (GV->use_empty()) {
LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to "

View File

@ -8,12 +8,12 @@
@g0 = internal global i16 undef
@g1 = internal addrspace(3) global i16 undef
@g2 = internal addrspace(1) global i16 undef
; CHECK: internal unnamed_addr constant i16 3
; CHECK-NOT: @g0 =
; CHECK: internal unnamed_addr addrspace(3) global i16 undef
; CHECK: internal unnamed_addr addrspace(1) global i16 undef
; GPU: internal unnamed_addr constant i16 3
; GPU-NOT: @g0 =
; GPU: internal unnamed_addr addrspace(3) global i16 undef
; GPU: internal unnamed_addr addrspace(1) constant i16 7
; GPU-NOT: @g2 =
define void @a() {
store i16 3, i16* @g0, align 8

View File

@ -3,7 +3,7 @@
@GV1 = internal global i64 1, align 8
@GV2 = internal global i32 0, align 4
; CHECK: @GV1 = internal unnamed_addr global i64 1, align 8
; CHECK-NOT: @GV1 =
; CHECK: @GV2 = internal unnamed_addr global i32 0, align 4
define void @test1() {

View File

@ -7,11 +7,10 @@
@GV1 = internal unnamed_addr global i64 1, align 8
; CHECK: @GV1 = internal unnamed_addr global i64 1, align 8
; CHECK-NOT: @GV1 =
define void @test1() local_unnamed_addr {
; CHECK-LABEL: @test1
; CHECK-NEXT: %val = load atomic i8
; CHECK-NEXT: ret void
%val = load atomic i8, i8* bitcast (i64* @GV1 to i8*) acquire, align 8