From 94c495295135dd45a0b67a5b6d6c3c318f4f8628 Mon Sep 17 00:00:00 2001 From: Jingu Kang Date: Wed, 14 Jul 2021 11:43:29 +0100 Subject: [PATCH] [AArch64] Enable Upper bound unrolling universally Differential Revision: https://reviews.llvm.org/D105996 --- .../AArch64/AArch64TargetTransformInfo.cpp | 3 +- .../LoopUnroll/AArch64/unroll-upperbound.ll | 65 +++++++++++++++++++ 2 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/LoopUnroll/AArch64/unroll-upperbound.ll diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 2fd1229f1826..b479b4808ec2 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1754,6 +1754,8 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, // Enable partial unrolling and runtime unrolling. BaseT::getUnrollingPreferences(L, SE, UP, ORE); + UP.UpperBound = true; + // For inner loop, it is more likely to be a hot one, and the runtime check // can be promoted out from LICM pass, so the overhead is less, let's try // a larger threshold to unroll more loops. @@ -1794,7 +1796,6 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, !ST->getSchedModel().isOutOfOrder()) { UP.Runtime = true; UP.Partial = true; - UP.UpperBound = true; UP.UnrollRemainder = true; UP.DefaultUnrollRuntimeCount = 4; diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/unroll-upperbound.ll b/llvm/test/Transforms/LoopUnroll/AArch64/unroll-upperbound.ll new file mode 100644 index 000000000000..3b82365d1a6e --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/AArch64/unroll-upperbound.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -loop-unroll -mtriple aarch64 | FileCheck %s + +; Below loop's trip count is not constant and it blocks to unroll the loop. +; After setting up `UP.UpperBound = true` in `getUnrollingPreferences`, +; the loop should be unrolled. + +define void @test(i1 %cond) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: switch i32 0, label [[SW_DEFAULT:%.*]] [ +; CHECK-NEXT: i32 2, label [[LATCH:%.*]] +; CHECK-NEXT: ] +; CHECK: sw.default: +; CHECK-NEXT: tail call void @foo() +; CHECK-NEXT: br label [[LATCH]] +; CHECK: latch: +; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[FOR_BODY_1:%.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; CHECK: for.body.1: +; CHECK-NEXT: switch i32 1, label [[SW_DEFAULT_1:%.*]] [ +; CHECK-NEXT: i32 2, label [[LATCH_1:%.*]] +; CHECK-NEXT: ] +; CHECK: sw.default.1: +; CHECK-NEXT: tail call void @foo() +; CHECK-NEXT: br label [[LATCH_1]] +; CHECK: latch.1: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[FOR_END]], label [[FOR_BODY_2:%.*]] +; CHECK: for.body.2: +; CHECK-NEXT: switch i32 2, label [[SW_DEFAULT_2:%.*]] [ +; CHECK-NEXT: i32 2, label [[LATCH_2:%.*]] +; CHECK-NEXT: ] +; CHECK: sw.default.2: +; CHECK-NEXT: tail call void @foo() +; CHECK-NEXT: br label [[LATCH_2]] +; CHECK: latch.2: +; CHECK-NEXT: br label [[FOR_END]] +; +entry: + %0 = select i1 %cond, i32 2, i32 3 + br label %for.body + +for.body: + %i.017 = phi i32 [ 0, %entry ], [ %inc, %latch ] + switch i32 %i.017, label %sw.default [ + i32 2, label %latch + ] + +sw.default: + tail call void @foo() + br label %latch + +latch: + %inc = add nuw nsw i32 %i.017, 1 + %exitcond.not = icmp eq i32 %inc, %0 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret void +} + +declare void @foo()