forked from OSchip/llvm-project
[CSSPGO] Unblock optimizations with pseudo probe instrumentation part 2.
As a follow-up to D95982, this patch continues unblocking optimizations that are blocked by pseudu probe instrumention. The optimizations unblocked are: - In-block load propagation. - In-block dead store elimination - Memory copy optimization that turns stores to consecutive memories into a memset. These optimizations are local to a block, so they shouldn't affect the profile quality. Reviewed By: wmi Differential Revision: https://reviews.llvm.org/D100075
This commit is contained in:
parent
18839be9c5
commit
30bb5be389
|
@ -532,7 +532,7 @@ Value *llvm::findAvailablePtrLoadStore(
|
||||||
// We must ignore debug info directives when counting (otherwise they
|
// We must ignore debug info directives when counting (otherwise they
|
||||||
// would affect codegen).
|
// would affect codegen).
|
||||||
Instruction *Inst = &*--ScanFrom;
|
Instruction *Inst = &*--ScanFrom;
|
||||||
if (isa<DbgInfoIntrinsic>(Inst))
|
if (Inst->isDebugOrPseudoInst())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// Restore ScanFrom to expected value in case next test succeeds
|
// Restore ScanFrom to expected value in case next test succeeds
|
||||||
|
@ -620,7 +620,7 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, AAResults &AA,
|
||||||
SmallVector<Instruction *> MustNotAliasInsts;
|
SmallVector<Instruction *> MustNotAliasInsts;
|
||||||
for (Instruction &Inst : make_range(++Load->getReverseIterator(),
|
for (Instruction &Inst : make_range(++Load->getReverseIterator(),
|
||||||
ScanBB->rend())) {
|
ScanBB->rend())) {
|
||||||
if (isa<DbgInfoIntrinsic>(&Inst))
|
if (Inst.isDebugOrPseudoInst())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (MaxInstsToScan-- == 0)
|
if (MaxInstsToScan-- == 0)
|
||||||
|
|
|
@ -1396,7 +1396,7 @@ Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) {
|
||||||
--BBI;
|
--BBI;
|
||||||
// Don't count debug info directives, lest they affect codegen,
|
// Don't count debug info directives, lest they affect codegen,
|
||||||
// and we skip pointer-to-pointer bitcasts, which are NOPs.
|
// and we skip pointer-to-pointer bitcasts, which are NOPs.
|
||||||
if (isa<DbgInfoIntrinsic>(BBI) ||
|
if (BBI->isDebugOrPseudoInst() ||
|
||||||
(isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
|
(isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
|
||||||
ScanInsts++;
|
ScanInsts++;
|
||||||
continue;
|
continue;
|
||||||
|
|
|
@ -399,6 +399,13 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Calls that only access inaccessible memory do not block merging
|
||||||
|
// accessible stores.
|
||||||
|
if (auto *CB = dyn_cast<CallBase>(BI)) {
|
||||||
|
if (CB->onlyAccessesInaccessibleMemory())
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
|
if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
|
||||||
// If the instruction is readnone, ignore it, otherwise bail out. We
|
// If the instruction is readnone, ignore it, otherwise bail out. We
|
||||||
// don't even allow readonly here because we don't want something like:
|
// don't even allow readonly here because we don't want something like:
|
||||||
|
|
|
@ -202,7 +202,7 @@ static bool ProcessBlock(BasicBlock &BB, DominatorTree &DT, LoopInfo &LI,
|
||||||
if (!ProcessedBegin)
|
if (!ProcessedBegin)
|
||||||
--I;
|
--I;
|
||||||
|
|
||||||
if (isa<DbgInfoIntrinsic>(Inst))
|
if (Inst->isDebugOrPseudoInst())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (SinkInstruction(Inst, Stores, DT, LI, AA)) {
|
if (SinkInstruction(Inst, Stores, DT, LI, AA)) {
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
; RUN: opt -passes=instcombine -S < %s | FileCheck %s
|
; RUN: opt -passes=instcombine -available-load-scan-limit=2 -S < %s | FileCheck %s
|
||||||
|
|
||||||
%struct.nonbonded = type { [2 x %struct.CompAtom*], [2 x %struct.CompAtomExt*], [2 x %struct.CompAtom*], [2 x %class.Vector*], [2 x %class.Vector*], [2 x i32], %class.Vector, double*, double*, %class.ComputeNonbondedWorkArrays*, %class.Pairlists*, i32, i32, double, double, i32, i32, i32, i32 }
|
%struct.nonbonded = type { [2 x %struct.CompAtom*], [2 x %struct.CompAtomExt*], [2 x %struct.CompAtom*], [2 x %class.Vector*], [2 x %class.Vector*], [2 x i32], %class.Vector, double*, double*, %class.ComputeNonbondedWorkArrays*, %class.Pairlists*, i32, i32, double, double, i32, i32, i32, i32 }
|
||||||
%struct.CompAtomExt = type { i32 }
|
%struct.CompAtomExt = type { i32 }
|
||||||
|
@ -13,11 +13,11 @@
|
||||||
%class.ResizeArrayRaw.3 = type <{ %class.Vector*, i8*, i32, i32, i32, float, i32, [4 x i8] }>
|
%class.ResizeArrayRaw.3 = type <{ %class.Vector*, i8*, i32, i32, i32, float, i32, [4 x i8] }>
|
||||||
%class.Pairlists = type { i16*, i32, i32 }
|
%class.Pairlists = type { i16*, i32, i32 }
|
||||||
|
|
||||||
|
define dso_local void @merge(%struct.nonbonded* nocapture readonly %params) local_unnamed_addr align 2 {
|
||||||
;; Check the minPart4 and minPart assignments are merged.
|
;; Check the minPart4 and minPart assignments are merged.
|
||||||
|
; CHECK-LABEL: @merge(
|
||||||
; CHECK-COUNT-1: getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16
|
; CHECK-COUNT-1: getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16
|
||||||
; CHECK-NOT: getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16
|
; CHECK-NOT: getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16
|
||||||
|
|
||||||
define dso_local void @_ZN20ComputeNonbondedUtil9calc_pairEP9nonbonded(%struct.nonbonded* nocapture readonly %params) local_unnamed_addr align 2 {
|
|
||||||
entry:
|
entry:
|
||||||
%savePairlists3 = getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 11
|
%savePairlists3 = getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 11
|
||||||
%0 = load i32, i32* %savePairlists3, align 8
|
%0 = load i32, i32* %savePairlists3, align 8
|
||||||
|
@ -58,7 +58,36 @@ if.else147: ; preds = %if.then138
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
declare dso_local void @_ZN9Pairlists8addIndexEv() align 2
|
define i32 @load(i32* nocapture %a, i32* nocapture %b) {
|
||||||
|
;; Check the last store is deleted.
|
||||||
|
; CHECK-LABEL: @load(
|
||||||
|
; CHECK-NEXT: %1 = getelementptr inbounds i32, i32* %a, i64 1
|
||||||
|
; CHECK-NEXT: %2 = load i32, i32* %1, align 8
|
||||||
|
; CHECK-NEXT: %3 = getelementptr inbounds i32, i32* %b, i64 1
|
||||||
|
; CHECK-NEXT: store i32 %2, i32* %3, align 8
|
||||||
|
; CHECK-NEXT: call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1)
|
||||||
|
; CHECK-NEXT: ret i32 %[[#]]
|
||||||
|
%1 = getelementptr inbounds i32, i32* %a, i32 1
|
||||||
|
%2 = load i32, i32* %1, align 8
|
||||||
|
%3 = getelementptr inbounds i32, i32* %b, i32 1
|
||||||
|
store i32 %2, i32* %3, align 8
|
||||||
|
%4 = getelementptr inbounds i32, i32* %b, i32 1
|
||||||
|
call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1)
|
||||||
|
%5 = load i32, i32* %4, align 8
|
||||||
|
ret i32 %5
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @dse(i32* %p) {
|
||||||
|
;; Check the first store is deleted.
|
||||||
|
; CHECK-LABEL: @dse(
|
||||||
|
; CHECK-NEXT: call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1)
|
||||||
|
; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
store i32 0, i32* %p
|
||||||
|
call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1)
|
||||||
|
store i32 0, i32* %p
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
; Function Attrs: inaccessiblememonly nounwind willreturn
|
; Function Attrs: inaccessiblememonly nounwind willreturn
|
||||||
declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0
|
declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0
|
||||||
|
|
|
@ -0,0 +1,25 @@
|
||||||
|
; RUN: opt < %s -memcpyopt -S | FileCheck %s
|
||||||
|
|
||||||
|
%struct.MV = type { i16, i16 }
|
||||||
|
|
||||||
|
define void @test(i32* nocapture %c) nounwind optsize {
|
||||||
|
; All the stores in this example should be merged into a single memset.
|
||||||
|
; CHECK-NOT: store i32 -1
|
||||||
|
; CHECK: call void @llvm.memset.p0i8.i64
|
||||||
|
store i32 -1, i32* %c, align 4
|
||||||
|
%1 = getelementptr inbounds i32, i32* %c, i32 1
|
||||||
|
store i32 -1, i32* %1, align 4
|
||||||
|
%2 = getelementptr inbounds i32, i32* %c, i32 2
|
||||||
|
store i32 -1, i32* %2, align 4
|
||||||
|
call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1)
|
||||||
|
%3 = getelementptr inbounds i32, i32* %c, i32 3
|
||||||
|
store i32 -1, i32* %3, align 4
|
||||||
|
%4 = getelementptr inbounds i32, i32* %c, i32 4
|
||||||
|
store i32 -1, i32* %4, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Function Attrs: inaccessiblememonly nounwind willreturn
|
||||||
|
declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0
|
||||||
|
|
||||||
|
attributes #0 = { inaccessiblememonly nounwind willreturn }
|
Loading…
Reference in New Issue