[mlir][affine] Option to unroll cleanup loop if smaller trip count.

Add an option (cleanUpUnroll) to unroll cleanup loop even if the trip count is smaller the unroll factor.

Differential Revision: https://reviews.llvm.org/D129171
This commit is contained in:
lewuathe 2022-08-18 14:28:38 +09:00 committed by Kai Sasaki
parent fba0367e03
commit cdc8d0fcd7
6 changed files with 106 additions and 6 deletions
mlir
include/mlir/Dialect/Affine
lib/Dialect/Affine
test/Dialect

View File

@ -45,9 +45,12 @@ LogicalResult loopUnrollFull(AffineForOp forOp);
/// if the loop cannot be unrolled either due to restrictions or due to invalid
/// unroll factors. Requires positive loop bounds and step. If specified,
/// annotates the Ops in each unrolled iteration by applying `annotateFn`.
/// When `cleanUpUnroll` is true, we can ensure the cleanup loop is unrolled
/// regardless of the unroll factor.
LogicalResult loopUnrollByFactor(
AffineForOp forOp, uint64_t unrollFactor,
function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn = nullptr);
function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn = nullptr,
bool cleanUpUnroll = false);
/// Unrolls this loop by the specified unroll factor or its trip count,
/// whichever is lower.

View File

@ -212,6 +212,8 @@ def AffineLoopUnroll : Pass<"affine-loop-unroll", "func::FuncOp"> {
Option<"unrollFullThreshold", "unroll-full-threshold", "unsigned",
/*default=*/"1",
"Unroll all loops with trip count less than or equal to this">,
Option<"cleanUpUnroll", "cleanup-unroll", "bool", /*default=*/"false",
"Fully unroll the cleanup loop when possible.">,
];
}

View File

@ -122,14 +122,16 @@ void LoopUnroll::runOnOperation() {
LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) {
// Use the function callback if one was provided.
if (getUnrollFactor)
return loopUnrollByFactor(forOp, getUnrollFactor(forOp));
return loopUnrollByFactor(forOp, getUnrollFactor(forOp),
/*annotateFn=*/nullptr, cleanUpUnroll);
// Unroll completely if full loop unroll was specified.
if (unrollFull)
return loopUnrollFull(forOp);
// Otherwise, unroll by the given unroll factor.
if (unrollUpToFactor)
return loopUnrollUpToFactor(forOp, unrollFactor);
return loopUnrollByFactor(forOp, unrollFactor);
return loopUnrollByFactor(forOp, unrollFactor, /*annotateFn=*/nullptr,
cleanUpUnroll);
}
std::unique_ptr<OperationPass<func::FuncOp>> mlir::createLoopUnrollPass(

View File

@ -1090,7 +1090,8 @@ static LogicalResult generateCleanupLoopForUnroll(AffineForOp forOp,
/// is successfully unrolled.
LogicalResult mlir::loopUnrollByFactor(
AffineForOp forOp, uint64_t unrollFactor,
function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn) {
function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn,
bool cleanUpUnroll) {
assert(unrollFactor > 0 && "unroll factor should be positive");
Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
@ -1106,9 +1107,14 @@ LogicalResult mlir::loopUnrollByFactor(
return success();
// If the trip count is lower than the unroll factor, no unrolled body.
// TODO: option to specify cleanup loop unrolling.
if (mayBeConstantTripCount && *mayBeConstantTripCount < unrollFactor)
if (mayBeConstantTripCount && *mayBeConstantTripCount < unrollFactor) {
if (cleanUpUnroll) {
// Unroll the cleanup loop if cleanUpUnroll is specified.
return loopUnrollFull(forOp);
}
return failure();
}
// Generate the cleanup loop if trip count isn't a multiple of unrollFactor.
if (getLargestDivisorOfTripCount(forOp) % unrollFactor != 0) {
@ -1119,6 +1125,9 @@ LogicalResult mlir::loopUnrollByFactor(
if (forOp.getLowerBoundMap().getNumResults() != 1 ||
forOp.getUpperBoundMap().getNumResults() != 1)
return failure();
if (cleanUpUnroll)
// Force unroll including cleanup loop
return loopUnrollFull(forOp);
if (failed(generateCleanupLoopForUnroll(forOp, unrollFactor)))
assert(false && "cleanup loop lower bound map for single result lower "
"and upper bound maps can always be determined");

View File

@ -2,6 +2,7 @@
// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-full unroll-full-threshold=2" | FileCheck %s --check-prefix SHORT
// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-factor=4" | FileCheck %s --check-prefix UNROLL-BY-4
// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-factor=1" | FileCheck %s --check-prefix UNROLL-BY-1
// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-factor=5 cleanup-unroll=true" | FileCheck %s --check-prefix UNROLL-CLEANUP-LOOP
// UNROLL-FULL-DAG: [[$MAP0:#map[0-9]+]] = affine_map<(d0) -> (d0 + 1)>
// UNROLL-FULL-DAG: [[$MAP1:#map[0-9]+]] = affine_map<(d0) -> (d0 + 2)>
@ -689,3 +690,60 @@ func.func @unroll_zero_trip_count_case() {
}
return
}
// UNROLL-CLEANUP-LOOP-LABEL: func @unroll_cleanup_loop_with_larger_unroll_factor()
func.func @unroll_cleanup_loop_with_larger_unroll_factor() {
affine.for %i = 0 to 3 {
%x = "foo"(%i) : (index) -> i32
}
return
// UNROLL-CLEANUP-LOOP-NEXT: %[[C0:.*]] = arith.constant 0 : index
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[C0]]) : (index) -> i32
// UNROLL-CLEANUP-LOOP-NEXT: %[[V1:.*]] = affine.apply {{.*}}
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V1]]) : (index) -> i32
// UNROLL-CLEANUP-LOOP-NEXT: %[[V2:.*]] = affine.apply {{.*}}
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V2]]) : (index) -> i32
// UNROLL-CLEANUP-LOOP-NEXT: return
}
// UNROLL-CLEANUP-LOOP-LABEL: func @unroll_cleanup_loop_with_smaller_unroll_factor()
func.func @unroll_cleanup_loop_with_smaller_unroll_factor() {
affine.for %i = 0 to 7 {
%x = "foo"(%i) : (index) -> i32
}
return
// UNROLL-CLEANUP-LOOP-NEXT: %[[C0:.*]] = arith.constant 0 : index
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[C0]]) : (index) -> i32
// UNROLL-CLEANUP-LOOP-NEXT: %[[V1:.*]] = affine.apply {{.*}}
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V1]]) : (index) -> i32
// UNROLL-CLEANUP-LOOP-NEXT: %[[V2:.*]] = affine.apply {{.*}}
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V2]]) : (index) -> i32
// UNROLL-CLEANUP-LOOP-NEXT: %[[V3:.*]] = affine.apply {{.*}}
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V3]]) : (index) -> i32
// UNROLL-CLEANUP-LOOP-NEXT: %[[V4:.*]] = affine.apply {{.*}}
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V4]]) : (index) -> i32
// UNROLL-CLEANUP-LOOP-NEXT: %[[V5:.*]] = affine.apply {{.*}}
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V5]]) : (index) -> i32
// UNROLL-CLEANUP-LOOP-NEXT: %[[V6:.*]] = affine.apply {{.*}}
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V6]]) : (index) -> i32
// UNROLL-CLEANUP-LOOP-NEXT: return
}
// UNROLL-CLEANUP-LOOP-LABEL: func @unroll_cleanup_loop_with_identical_unroll_factor()
func.func @unroll_cleanup_loop_with_identical_unroll_factor() {
affine.for %i = 0 to 5 {
%x = "foo"(%i) : (index) -> i32
}
return
// UNROLL-CLEANUP-LOOP-NEXT: %[[C0:.*]] = arith.constant 0 : index
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[C0]]) : (index) -> i32
// UNROLL-CLEANUP-LOOP-NEXT: %[[V1:.*]] = affine.apply {{.*}}
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V1]]) : (index) -> i32
// UNROLL-CLEANUP-LOOP-NEXT: %[[V2:.*]] = affine.apply {{.*}}
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V2]]) : (index) -> i32
// UNROLL-CLEANUP-LOOP-NEXT: %[[V3:.*]] = affine.apply {{.*}}
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V3]]) : (index) -> i32
// UNROLL-CLEANUP-LOOP-NEXT: %[[V4:.*]] = affine.apply {{.*}}
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V4]]) : (index) -> i32
// UNROLL-CLEANUP-LOOP-NEXT: return
}

View File

@ -4,6 +4,7 @@
// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 loop-depth=1' | FileCheck %s --check-prefix UNROLL-INNER-BY-2
// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 annotate=true' | FileCheck %s --check-prefix UNROLL-BY-2-ANNOTATE
// RUN: mlir-opt %s --affine-loop-unroll='unroll-factor=6 unroll-up-to-factor=true' | FileCheck %s --check-prefix UNROLL-UP-TO
// RUN: mlir-opt %s --affine-loop-unroll='unroll-factor=5 cleanup-unroll=true' | FileCheck %s --check-prefix CLEANUP-UNROLL-BY-5
func.func @dynamic_loop_unroll(%arg0 : index, %arg1 : index, %arg2 : index,
%arg3: memref<?xf32>) {
@ -314,3 +315,28 @@ func.func @static_loop_unroll_by_3_rename_epilogue_arguments() -> (f32, f32) {
// UNROLL-BY-3-NEXT: scf.yield %[[EADD]], %[[EMUL]] : f32, f32
// UNROLL-BY-3-NEXT: }
// UNROLL-BY-3-NEXT: return %[[EFOR]]#0, %[[EFOR]]#1 : f32, f32
// Test that epilogue clean up loop is generated (trip count is less
// than an unroll factor).
func.func @static_loop_unroll_by_5_with_cleanup(%arg0 : memref<?xf32>) {
%0 = arith.constant 7.0 : f32
%lb = arith.constant 0 : index
%ub = arith.constant 3 : index
affine.for %i0 = %lb to %ub {
memref.store %0, %arg0[%i0] : memref<?xf32>
}
return
}
// CLEANUP-UNROLL-BY-5-LABEL: func @static_loop_unroll_by_5_with_cleanup
// CLEANUP-UNROLL-BY-5-SAME: %[[MEM:.*0]]: memref<?xf32>
//
// CLEANUP-UNROLL-BY-5-DAG: %[[C0:.*]] = arith.constant 0 : index
// CLEANUP-UNROLL-BY-5-DAG: %[[C3:.*]] = arith.constant 3 : index
// CLEANUP-UNROLL-BY-5-NEXT: %[[V0:.*]] = affine.apply {{.*}}
// CLEANUP-UNROLL-BY-5-NEXT: memref.store %{{.*}}, %[[MEM]][%[[V0]]] : memref<?xf32>
// CLEANUP-UNROLL-BY-5-NEXT: %[[V1:.*]] = affine.apply {{.*}}
// CLEANUP-UNROLL-BY-5-NEXT: memref.store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
// CLEANUP-UNROLL-BY-5-NEXT: %[[V2:.*]] = affine.apply {{.*}}
// CLEANUP-UNROLL-BY-5-NEXT: memref.store %{{.*}}, %[[MEM]][%[[V2]]] : memref<?xf32>
// CLEANUP-UNROLL-BY-5-NEXT: return