forked from OSchip/llvm-project
[DSE] Support looking through memory phis at end of function.
Update isWriteAtEndOfFunction to look through MemoryPhis. The reason MemoryPhis were skipped so far was the known AliasAnalysis issue with it missing loop-carried dependences. This problem is already addressed in other parts of the code by skipping MemoryDefs that may be in difference loops. I think the same logic can be applied here. This can have a substantial impact on the number of stores removed in some cases. For MultiSource/SPEC2006/SPEC2017 with -O3: ``` Metric: dse.NumFastStores Program dse.NumFastStores base patch diff External/S...CINT2017rate/557.xz_r/557.xz_r 14.00 45.00 221.4% External/S...te/538.imagick_r/538.imagick_r 439.00 1267.00 188.6% MultiSourc...e/Applications/SIBsim4/SIBsim4 6.00 15.00 150.0% MultiSourc...Prolangs-C/simulator/simulator 3.00 7.00 133.3% MultiSource/Applications/siod/siod 3.00 7.00 133.3% MultiSourc...arks/FreeBench/distray/distray 6.00 9.00 50.0% MultiSourc...e/Applications/obsequi/Obsequi 22.00 30.00 36.4% MultiSource/Benchmarks/Ptrdist/bc/bc 23.00 28.00 21.7% External/S...NT2017rate/502.gcc_r/502.gcc_r 1258.00 1512.00 20.2% External/S...te/520.omnetpp_r/520.omnetpp_r 954.00 1143.00 19.8% External/S...rate/510.parest_r/510.parest_r 5961.00 7122.00 19.5% External/S...C/CINT2006/445.gobmk/445.gobmk 47.00 56.00 19.1% External/S...00.perlbench_r/500.perlbench_r 241.00 286.00 18.7% External/S...NT2006/471.omnetpp/471.omnetpp 36.00 42.00 16.7% External/S...06/400.perlbench/400.perlbench 183.00 210.00 14.8% MultiSource/Applications/SPASS/SPASS 72.00 81.00 12.5% External/S...17rate/541.leela_r/541.leela_r 72.00 80.00 11.1% External/SPEC/CINT2006/403.gcc/403.gcc 585.00 642.00 9.7% MultiSourc...e/Applications/sqlite3/sqlite3 120.00 131.00 9.2% MultiSourc...Applications/hexxagon/hexxagon 11.00 12.00 9.1% External/S.../CFP2006/453.povray/453.povray 566.00 615.00 8.7% External/S...rate/511.povray_r/511.povray_r 578.00 627.00 8.5% External/S...FP2006/482.sphinx3/482.sphinx3 12.00 13.00 8.3% MultiSource/Applications/oggenc/oggenc 130.00 140.00 7.7% MultiSourc...e/Applications/ClamAV/clamscan 250.00 268.00 7.2% MultiSourc.../mediabench/jpeg/jpeg-6a/cjpeg 19.00 20.00 5.3% MultiSourc...ch/consumer-jpeg/consumer-jpeg 19.00 20.00 5.3% External/S...te/526.blender_r/526.blender_r 3747.00 3928.00 4.8% MultiSourc...OE-ProxyApps-C++/miniFE/miniFE 104.00 108.00 3.8% MultiSourc...ch/consumer-lame/consumer-lame 54.00 56.00 3.7% MultiSource/Benchmarks/Bullet/bullet 1222.00 1264.00 3.4% MultiSourc...nchmarks/tramp3d-v4/tramp3d-v4 973.00 1005.00 3.3% External/S.../CFP2006/447.dealII/447.dealII 2699.00 2780.00 3.0% External/S...06/483.xalancbmk/483.xalancbmk 788.00 810.00 2.8% External/S.../CFP2006/450.soplex/450.soplex 180.00 185.00 2.8% MultiSourc.../DOE-ProxyApps-C++/CLAMR/CLAMR 338.00 345.00 2.1% MultiSourc...Benchmarks/7zip/7zip-benchmark 685.00 699.00 2.0% External/S...FP2017rate/544.nab_r/544.nab_r 158.00 160.00 1.3% MultiSourc...sumer-typeset/consumer-typeset 772.00 781.00 1.2% External/S...2017rate/525.x264_r/525.x264_r 410.00 414.00 1.0% External/S...23.xalancbmk_r/523.xalancbmk_r 998.00 1002.00 0.4% ``` Compile-time is almost neutral: https://llvm-compile-time-tracker.com/compare.php?from=b3125ad3d60531a97eea20009cc9629a87755862&to=84007eee59004f43464eda7f5ba8263ed5158df8&stat=instructions NewPM-O3: +0.03% NewPM-ReleaseThinLTO: -0.01% NewPM-ReleaseLTO-g: +0.03% Reviewed By: asbirlea Differential Revision: https://reviews.llvm.org/D132365
This commit is contained in:
parent
23dec4a352
commit
b5e208fcba
|
@ -1075,13 +1075,16 @@ struct DSEState {
|
|||
}
|
||||
|
||||
MemoryAccess *UseAccess = WorkList[I];
|
||||
// Simply adding the users of MemoryPhi to the worklist is not enough,
|
||||
// because we might miss read clobbers in different iterations of a loop,
|
||||
// for example.
|
||||
// TODO: Add support for phi translation to handle the loop case.
|
||||
if (isa<MemoryPhi>(UseAccess))
|
||||
return false;
|
||||
if (isa<MemoryPhi>(UseAccess)) {
|
||||
// AliasAnalysis does not account for loops. Limit elimination to
|
||||
// candidates for which we can guarantee they always store to the same
|
||||
// memory location.
|
||||
if (!isGuaranteedLoopInvariant(MaybeLoc->Ptr))
|
||||
return false;
|
||||
|
||||
PushMemUses(cast<MemoryPhi>(UseAccess));
|
||||
continue;
|
||||
}
|
||||
// TODO: Checking for aliasing is expensive. Consider reducing the amount
|
||||
// of times this is called and/or caching it.
|
||||
Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
|
||||
|
|
|
@ -138,8 +138,6 @@ define void @alloca_1(i1 %c) {
|
|||
; CHECK: bb1:
|
||||
; CHECK-NEXT: br label [[BB3:%.*]]
|
||||
; CHECK: bb2:
|
||||
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
|
||||
; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4
|
||||
; CHECK-NEXT: br label [[BB3]]
|
||||
; CHECK: bb3:
|
||||
; CHECK-NEXT: ret void
|
||||
|
@ -177,12 +175,8 @@ define void @alloca_2(i1 %c) {
|
|||
; CHECK-NEXT: call void @readonly_use(i32* [[P]])
|
||||
; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
|
||||
; CHECK: bb1:
|
||||
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
|
||||
; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4
|
||||
; CHECK-NEXT: br label [[BB3:%.*]]
|
||||
; CHECK: bb2:
|
||||
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
|
||||
; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX2]], align 4
|
||||
; CHECK-NEXT: br label [[BB3]]
|
||||
; CHECK: bb3:
|
||||
; CHECK-NEXT: ret void
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -dse -S %s | FileCheck %s
|
||||
|
||||
; TODO: Both the stores in %then and %else can be eliminated by translating %p
|
||||
; Both the stores in %then and %else can be eliminated by translating %p
|
||||
; through the phi.
|
||||
define void @memoryphi_translate_1(i1 %c) {
|
||||
; CHECK-LABEL: @memoryphi_translate_1(
|
||||
|
@ -10,10 +10,8 @@ define void @memoryphi_translate_1(i1 %c) {
|
|||
; CHECK-NEXT: [[A_2:%.*]] = alloca i8, align 1
|
||||
; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
|
||||
; CHECK: then:
|
||||
; CHECK-NEXT: store i8 0, i8* [[A_1]], align 1
|
||||
; CHECK-NEXT: br label [[END:%.*]]
|
||||
; CHECK: else:
|
||||
; CHECK-NEXT: store i8 9, i8* [[A_2]], align 1
|
||||
; CHECK-NEXT: br label [[END]]
|
||||
; CHECK: end:
|
||||
; CHECK-NEXT: [[P:%.*]] = phi i8* [ [[A_1]], [[THEN]] ], [ [[A_2]], [[ELSE]] ]
|
||||
|
@ -39,7 +37,7 @@ end:
|
|||
ret void
|
||||
}
|
||||
|
||||
; TODO: The store in %else can be eliminated by translating %p through the phi.
|
||||
; The store in %else can be eliminated by translating %p through the phi.
|
||||
; The store in %then cannot be eliminated, because %a.1 is read before the final
|
||||
; store.
|
||||
define i8 @memoryphi_translate_2(i1 %c) {
|
||||
|
@ -52,7 +50,6 @@ define i8 @memoryphi_translate_2(i1 %c) {
|
|||
; CHECK-NEXT: store i8 0, i8* [[A_1]], align 1
|
||||
; CHECK-NEXT: br label [[END:%.*]]
|
||||
; CHECK: else:
|
||||
; CHECK-NEXT: store i8 9, i8* [[A_2]], align 1
|
||||
; CHECK-NEXT: br label [[END]]
|
||||
; CHECK: end:
|
||||
; CHECK-NEXT: [[P:%.*]] = phi i8* [ [[A_1]], [[THEN]] ], [ [[A_2]], [[ELSE]] ]
|
||||
|
@ -80,7 +77,7 @@ end:
|
|||
ret i8 %l
|
||||
}
|
||||
|
||||
; TODO: The store in %then can be eliminated by translating %p through the phi.
|
||||
; The store in %then can be eliminated by translating %p through the phi.
|
||||
; The store in %else cannot be eliminated, because %a.2 is read before the final
|
||||
; store.
|
||||
define i8 @memoryphi_translate_3(i1 %c) {
|
||||
|
@ -90,7 +87,6 @@ define i8 @memoryphi_translate_3(i1 %c) {
|
|||
; CHECK-NEXT: [[A_2:%.*]] = alloca i8, align 1
|
||||
; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
|
||||
; CHECK: then:
|
||||
; CHECK-NEXT: store i8 0, i8* [[A_1]], align 1
|
||||
; CHECK-NEXT: br label [[END:%.*]]
|
||||
; CHECK: else:
|
||||
; CHECK-NEXT: store i8 9, i8* [[A_2]], align 1
|
||||
|
@ -166,11 +162,9 @@ define void @memoryphi_translate_5(i1 %cond) {
|
|||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[A:%.*]] = alloca i8, align 1
|
||||
; CHECK-NEXT: [[B:%.*]] = alloca i8, align 1
|
||||
; CHECK-NEXT: [[C:%.*]] = alloca i8, align 1
|
||||
; CHECK-NEXT: store i8 0, i8* [[A]], align 1
|
||||
; CHECK-NEXT: br i1 [[COND:%.*]], label [[COND_TRUE:%.*]], label [[COND_END:%.*]]
|
||||
; CHECK: cond.true:
|
||||
; CHECK-NEXT: store i8 0, i8* [[C]], align 1
|
||||
; CHECK-NEXT: br label [[COND_END]]
|
||||
; CHECK: cond.end:
|
||||
; CHECK-NEXT: [[P:%.*]] = phi i8* [ [[B]], [[COND_TRUE]] ], [ [[A]], [[ENTRY:%.*]] ]
|
||||
|
|
|
@ -225,12 +225,9 @@ define i8 @test4_read_between(i8 *%P) {
|
|||
|
||||
define void @test4_non_local(i8 *%P, i1 %c) {
|
||||
; CHECK-LABEL: @test4_non_local(
|
||||
; CHECK-NEXT: [[A1:%.*]] = alloca [[TMP1:%.*]], align 8
|
||||
; CHECK-NEXT: [[A2:%.*]] = bitcast %1* [[A1]] to i8*
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[A2]], i8* align 4 [[P:%.*]], i64 8, i1 false)
|
||||
; CHECK-NEXT: br i1 [[C:%.*]], label [[CALL:%.*]], label [[EXIT:%.*]]
|
||||
; CHECK: call:
|
||||
; CHECK-NEXT: call void @test4a(i8* byval(i8) align 1 [[P]])
|
||||
; CHECK-NEXT: call void @test4a(i8* byval(i8) align 1 [[P:%.*]])
|
||||
; CHECK-NEXT: br label [[EXIT]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: ret void
|
||||
|
|
Loading…
Reference in New Issue