From 8d5b289a4681d89f4dedcd8926225333e0772967 Mon Sep 17 00:00:00 2001 From: Tyker Date: Sat, 26 Sep 2020 12:31:12 +0200 Subject: [PATCH] [LoopDelete][Assume] Allow deleting loops with assumes This pervent very poor optimization caused by a signle assume like https://godbolt.org/z/EK3oMh baseline flags: -O3 patched flags: -O3 -mllvm --enable-knowledge-retention Before the patch ``` Metric: compile_time Program baseline patched diff test-suite :: CTMark/tramp3d-v4/tramp3d-v4.test 20.72 29.74 43.5% test-suite :: CTMark/Bullet/bullet.test 24.39 24.91 2.2% test-suite :: CTMark/7zip/7zip-benchmark.test 37.39 38.06 1.8% test-suite :: CTMark/kimwitu++/kc.test 11.76 11.94 1.5% test-suite :: CTMark/sqlite3/sqlite3.test 12.94 12.91 -0.3% test-suite :: CTMark/SPASS/SPASS.test 11.72 11.70 -0.2% test-suite :: CTMark/lencod/lencod.test 16.12 16.10 -0.1% test-suite :: CTMark/ClamAV/clamscan.test 13.31 13.30 -0.1% test-suite :: CTMark/mafft/pairlocalalign.test 9.12 9.12 -0.1% test-suite :: CTMark/consumer-typeset/consumer-typeset.test 9.34 9.34 -0.1% Geomean difference 4.2% Metric: compiler_Kinst_count Program baseline patched diff test-suite :: CTMark/tramp3d-v4/tramp3d-v4.test 107576069.87 172886418.90 60.7% test-suite :: CTMark/Bullet/bullet.test 123291865.66 125457117.96 1.8% test-suite :: CTMark/kimwitu++/kc.test 56347884.64 57298544.14 1.7% test-suite :: CTMark/7zip/7zip-benchmark.test 180637699.58 183341656.57 1.5% test-suite :: CTMark/sqlite3/sqlite3.test 66723788.85 66664692.80 -0.1% test-suite :: CTMark/ClamAV/clamscan.test 69581500.56 69597863.92 0.0% test-suite :: CTMark/lencod/lencod.test 94236501.48 94216545.32 -0.0% test-suite :: CTMark/SPASS/SPASS.test 58516756.95 58505089.07 -0.0% test-suite :: CTMark/consumer-typeset/consumer-typeset.test 48832815.53 48841989.39 0.0% test-suite :: CTMark/mafft/pairlocalalign.test 49682720.53 49686324.34 0.0% Geomean difference 5.4% ``` After the patch ``` Metric: compile_time Program baseline patched diff test-suite :: CTMark/tramp3d-v4/tramp3d-v4.test 20.70 22.40 8.2% test-suite :: CTMark/7zip/7zip-benchmark.test 37.13 38.05 2.5% test-suite :: CTMark/Bullet/bullet.test 24.25 24.83 2.4% test-suite :: CTMark/kimwitu++/kc.test 11.69 11.94 2.2% test-suite :: CTMark/ClamAV/clamscan.test 13.19 13.36 1.3% test-suite :: CTMark/lencod/lencod.test 16.02 16.19 1.1% test-suite :: CTMark/consumer-typeset/consumer-typeset.test 9.29 9.36 0.7% test-suite :: CTMark/SPASS/SPASS.test 11.64 11.73 0.7% test-suite :: CTMark/mafft/pairlocalalign.test 9.10 9.15 0.5% test-suite :: CTMark/sqlite3/sqlite3.test 12.95 12.96 0.0% Geomean difference 1.9% Metric: compiler_Kinst_count Program baseline patched diff test-suite :: CTMark/tramp3d-v4/tramp3d-v4.test 107590933.61 114044834.72 6.0% test-suite :: CTMark/kimwitu++/kc.test 56344526.77 57235806.29 1.6% test-suite :: CTMark/Bullet/bullet.test 123291285.10 125128334.97 1.5% test-suite :: CTMark/7zip/7zip-benchmark.test 180641540.10 183155706.39 1.4% test-suite :: CTMark/sqlite3/sqlite3.test 66725619.22 66668713.92 -0.1% test-suite :: CTMark/SPASS/SPASS.test 58509029.85 58478704.75 -0.1% test-suite :: CTMark/consumer-typeset/consumer-typeset.test 48843711.23 48826894.68 -0.0% test-suite :: CTMark/lencod/lencod.test 94233305.79 94207544.23 -0.0% test-suite :: CTMark/ClamAV/clamscan.test 69587887.66 69603549.90 0.0% test-suite :: CTMark/mafft/pairlocalalign.test 49686968.65 49689291.04 0.0% Geomean difference 1.0% ``` Reviewed By: jdoerfert, efriedma Differential Revision: https://reviews.llvm.org/D86816 --- llvm/lib/Transforms/Scalar/LoopDeletion.cpp | 4 ++- llvm/test/Transforms/LoopDeletion/assume.ll | 40 +++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/LoopDeletion/assume.ll diff --git a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp index be209d34be42..d9cde031cc60 100644 --- a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp @@ -85,7 +85,9 @@ static bool isLoopDead(Loop *L, ScalarEvolution &SE, // This includes instructions that could write to memory, and loads that are // marked volatile. for (auto &I : L->blocks()) - if (any_of(*I, [](Instruction &I) { return I.mayHaveSideEffects(); })) + if (any_of(*I, [](Instruction &I) { + return I.mayHaveSideEffects() && !I.isDroppable(); + })) return false; return true; } diff --git a/llvm/test/Transforms/LoopDeletion/assume.ll b/llvm/test/Transforms/LoopDeletion/assume.ll new file mode 100644 index 000000000000..124188cd5b65 --- /dev/null +++ b/llvm/test/Transforms/LoopDeletion/assume.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -passes=loop-deletion -S | FileCheck %s --check-prefixes=CHECK + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%class.Loc.95 = type { %class.Domain.96 } +%class.Domain.96 = type { %class.DomainBase.97 } +%class.DomainBase.97 = type { [3 x %struct.WrapNoInit] } +%struct.WrapNoInit = type { %class.Loc } +%class.Loc = type { %class.Domain.67 } +%class.Domain.67 = type { %class.DomainBase.68 } +%class.DomainBase.68 = type { i32 } + +define dso_local void @_ZSt8_DestroyIP3LocILi3EES1_EvT_S3_RSaIT0_E(%class.Loc.95* %0) #0 { +; CHECK-LABEL: @_ZSt8_DestroyIP3LocILi3EES1_EvT_S3_RSaIT0_E( +; CHECK-NEXT: br label [[_ZST8_DESTROYIP3LOCILI3EEEVT_S3__EXIT:%.*]] +; CHECK: _ZSt8_DestroyIP3LocILi3EEEvT_S3_.exit: +; CHECK-NEXT: ret void +; + br label %2 + +2: ; preds = %4, %1 + %.0.i.i = phi %class.Loc.95* [ undef, %1 ], [ %5, %4 ] + %3 = icmp ne %class.Loc.95* %.0.i.i, %0 + br i1 %3, label %4, label %_ZSt8_DestroyIP3LocILi3EEEvT_S3_.exit + +4: ; preds = %2 + call void @llvm.assume(i1 true) [ "align"(%class.Loc.95* %.0.i.i, i64 4) ] + %5 = getelementptr inbounds %class.Loc.95, %class.Loc.95* %.0.i.i, i32 1 + br label %2 + +_ZSt8_DestroyIP3LocILi3EEEvT_S3_.exit: ; preds = %2 + ret void +} + +declare void @llvm.assume(i1) #1 + +attributes #0 = { "target-cpu"="x86-64" } +attributes #1 = { willreturn readnone norecurse nocapture nofree }