forked from OSchip/llvm-project
[mlir][affine] Option to unroll cleanup loop if smaller trip count.
Add an option (cleanUpUnroll) to unroll cleanup loop even if the trip count is smaller the unroll factor. Differential Revision: https://reviews.llvm.org/D129171
This commit is contained in:
parent
fba0367e03
commit
cdc8d0fcd7
mlir
include/mlir/Dialect/Affine
lib/Dialect/Affine
test/Dialect
|
@ -45,9 +45,12 @@ LogicalResult loopUnrollFull(AffineForOp forOp);
|
|||
/// if the loop cannot be unrolled either due to restrictions or due to invalid
|
||||
/// unroll factors. Requires positive loop bounds and step. If specified,
|
||||
/// annotates the Ops in each unrolled iteration by applying `annotateFn`.
|
||||
/// When `cleanUpUnroll` is true, we can ensure the cleanup loop is unrolled
|
||||
/// regardless of the unroll factor.
|
||||
LogicalResult loopUnrollByFactor(
|
||||
AffineForOp forOp, uint64_t unrollFactor,
|
||||
function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn = nullptr);
|
||||
function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn = nullptr,
|
||||
bool cleanUpUnroll = false);
|
||||
|
||||
/// Unrolls this loop by the specified unroll factor or its trip count,
|
||||
/// whichever is lower.
|
||||
|
|
|
@ -212,6 +212,8 @@ def AffineLoopUnroll : Pass<"affine-loop-unroll", "func::FuncOp"> {
|
|||
Option<"unrollFullThreshold", "unroll-full-threshold", "unsigned",
|
||||
/*default=*/"1",
|
||||
"Unroll all loops with trip count less than or equal to this">,
|
||||
Option<"cleanUpUnroll", "cleanup-unroll", "bool", /*default=*/"false",
|
||||
"Fully unroll the cleanup loop when possible.">,
|
||||
];
|
||||
}
|
||||
|
||||
|
|
|
@ -122,14 +122,16 @@ void LoopUnroll::runOnOperation() {
|
|||
LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) {
|
||||
// Use the function callback if one was provided.
|
||||
if (getUnrollFactor)
|
||||
return loopUnrollByFactor(forOp, getUnrollFactor(forOp));
|
||||
return loopUnrollByFactor(forOp, getUnrollFactor(forOp),
|
||||
/*annotateFn=*/nullptr, cleanUpUnroll);
|
||||
// Unroll completely if full loop unroll was specified.
|
||||
if (unrollFull)
|
||||
return loopUnrollFull(forOp);
|
||||
// Otherwise, unroll by the given unroll factor.
|
||||
if (unrollUpToFactor)
|
||||
return loopUnrollUpToFactor(forOp, unrollFactor);
|
||||
return loopUnrollByFactor(forOp, unrollFactor);
|
||||
return loopUnrollByFactor(forOp, unrollFactor, /*annotateFn=*/nullptr,
|
||||
cleanUpUnroll);
|
||||
}
|
||||
|
||||
std::unique_ptr<OperationPass<func::FuncOp>> mlir::createLoopUnrollPass(
|
||||
|
|
|
@ -1090,7 +1090,8 @@ static LogicalResult generateCleanupLoopForUnroll(AffineForOp forOp,
|
|||
/// is successfully unrolled.
|
||||
LogicalResult mlir::loopUnrollByFactor(
|
||||
AffineForOp forOp, uint64_t unrollFactor,
|
||||
function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn) {
|
||||
function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn,
|
||||
bool cleanUpUnroll) {
|
||||
assert(unrollFactor > 0 && "unroll factor should be positive");
|
||||
|
||||
Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
|
||||
|
@ -1106,9 +1107,14 @@ LogicalResult mlir::loopUnrollByFactor(
|
|||
return success();
|
||||
|
||||
// If the trip count is lower than the unroll factor, no unrolled body.
|
||||
// TODO: option to specify cleanup loop unrolling.
|
||||
if (mayBeConstantTripCount && *mayBeConstantTripCount < unrollFactor)
|
||||
if (mayBeConstantTripCount && *mayBeConstantTripCount < unrollFactor) {
|
||||
if (cleanUpUnroll) {
|
||||
// Unroll the cleanup loop if cleanUpUnroll is specified.
|
||||
return loopUnrollFull(forOp);
|
||||
}
|
||||
|
||||
return failure();
|
||||
}
|
||||
|
||||
// Generate the cleanup loop if trip count isn't a multiple of unrollFactor.
|
||||
if (getLargestDivisorOfTripCount(forOp) % unrollFactor != 0) {
|
||||
|
@ -1119,6 +1125,9 @@ LogicalResult mlir::loopUnrollByFactor(
|
|||
if (forOp.getLowerBoundMap().getNumResults() != 1 ||
|
||||
forOp.getUpperBoundMap().getNumResults() != 1)
|
||||
return failure();
|
||||
if (cleanUpUnroll)
|
||||
// Force unroll including cleanup loop
|
||||
return loopUnrollFull(forOp);
|
||||
if (failed(generateCleanupLoopForUnroll(forOp, unrollFactor)))
|
||||
assert(false && "cleanup loop lower bound map for single result lower "
|
||||
"and upper bound maps can always be determined");
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-full unroll-full-threshold=2" | FileCheck %s --check-prefix SHORT
|
||||
// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-factor=4" | FileCheck %s --check-prefix UNROLL-BY-4
|
||||
// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-factor=1" | FileCheck %s --check-prefix UNROLL-BY-1
|
||||
// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-factor=5 cleanup-unroll=true" | FileCheck %s --check-prefix UNROLL-CLEANUP-LOOP
|
||||
|
||||
// UNROLL-FULL-DAG: [[$MAP0:#map[0-9]+]] = affine_map<(d0) -> (d0 + 1)>
|
||||
// UNROLL-FULL-DAG: [[$MAP1:#map[0-9]+]] = affine_map<(d0) -> (d0 + 2)>
|
||||
|
@ -689,3 +690,60 @@ func.func @unroll_zero_trip_count_case() {
|
|||
}
|
||||
return
|
||||
}
|
||||
|
||||
// UNROLL-CLEANUP-LOOP-LABEL: func @unroll_cleanup_loop_with_larger_unroll_factor()
|
||||
func.func @unroll_cleanup_loop_with_larger_unroll_factor() {
|
||||
affine.for %i = 0 to 3 {
|
||||
%x = "foo"(%i) : (index) -> i32
|
||||
}
|
||||
return
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: %[[C0:.*]] = arith.constant 0 : index
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[C0]]) : (index) -> i32
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: %[[V1:.*]] = affine.apply {{.*}}
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V1]]) : (index) -> i32
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: %[[V2:.*]] = affine.apply {{.*}}
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V2]]) : (index) -> i32
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: return
|
||||
}
|
||||
|
||||
// UNROLL-CLEANUP-LOOP-LABEL: func @unroll_cleanup_loop_with_smaller_unroll_factor()
|
||||
func.func @unroll_cleanup_loop_with_smaller_unroll_factor() {
|
||||
affine.for %i = 0 to 7 {
|
||||
%x = "foo"(%i) : (index) -> i32
|
||||
}
|
||||
return
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: %[[C0:.*]] = arith.constant 0 : index
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[C0]]) : (index) -> i32
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: %[[V1:.*]] = affine.apply {{.*}}
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V1]]) : (index) -> i32
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: %[[V2:.*]] = affine.apply {{.*}}
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V2]]) : (index) -> i32
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: %[[V3:.*]] = affine.apply {{.*}}
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V3]]) : (index) -> i32
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: %[[V4:.*]] = affine.apply {{.*}}
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V4]]) : (index) -> i32
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: %[[V5:.*]] = affine.apply {{.*}}
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V5]]) : (index) -> i32
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: %[[V6:.*]] = affine.apply {{.*}}
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V6]]) : (index) -> i32
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: return
|
||||
}
|
||||
|
||||
// UNROLL-CLEANUP-LOOP-LABEL: func @unroll_cleanup_loop_with_identical_unroll_factor()
|
||||
func.func @unroll_cleanup_loop_with_identical_unroll_factor() {
|
||||
affine.for %i = 0 to 5 {
|
||||
%x = "foo"(%i) : (index) -> i32
|
||||
}
|
||||
return
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: %[[C0:.*]] = arith.constant 0 : index
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[C0]]) : (index) -> i32
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: %[[V1:.*]] = affine.apply {{.*}}
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V1]]) : (index) -> i32
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: %[[V2:.*]] = affine.apply {{.*}}
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V2]]) : (index) -> i32
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: %[[V3:.*]] = affine.apply {{.*}}
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V3]]) : (index) -> i32
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: %[[V4:.*]] = affine.apply {{.*}}
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V4]]) : (index) -> i32
|
||||
// UNROLL-CLEANUP-LOOP-NEXT: return
|
||||
}
|
|
@ -4,6 +4,7 @@
|
|||
// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 loop-depth=1' | FileCheck %s --check-prefix UNROLL-INNER-BY-2
|
||||
// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 annotate=true' | FileCheck %s --check-prefix UNROLL-BY-2-ANNOTATE
|
||||
// RUN: mlir-opt %s --affine-loop-unroll='unroll-factor=6 unroll-up-to-factor=true' | FileCheck %s --check-prefix UNROLL-UP-TO
|
||||
// RUN: mlir-opt %s --affine-loop-unroll='unroll-factor=5 cleanup-unroll=true' | FileCheck %s --check-prefix CLEANUP-UNROLL-BY-5
|
||||
|
||||
func.func @dynamic_loop_unroll(%arg0 : index, %arg1 : index, %arg2 : index,
|
||||
%arg3: memref<?xf32>) {
|
||||
|
@ -314,3 +315,28 @@ func.func @static_loop_unroll_by_3_rename_epilogue_arguments() -> (f32, f32) {
|
|||
// UNROLL-BY-3-NEXT: scf.yield %[[EADD]], %[[EMUL]] : f32, f32
|
||||
// UNROLL-BY-3-NEXT: }
|
||||
// UNROLL-BY-3-NEXT: return %[[EFOR]]#0, %[[EFOR]]#1 : f32, f32
|
||||
|
||||
// Test that epilogue clean up loop is generated (trip count is less
|
||||
// than an unroll factor).
|
||||
func.func @static_loop_unroll_by_5_with_cleanup(%arg0 : memref<?xf32>) {
|
||||
%0 = arith.constant 7.0 : f32
|
||||
%lb = arith.constant 0 : index
|
||||
%ub = arith.constant 3 : index
|
||||
affine.for %i0 = %lb to %ub {
|
||||
memref.store %0, %arg0[%i0] : memref<?xf32>
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// CLEANUP-UNROLL-BY-5-LABEL: func @static_loop_unroll_by_5_with_cleanup
|
||||
// CLEANUP-UNROLL-BY-5-SAME: %[[MEM:.*0]]: memref<?xf32>
|
||||
//
|
||||
// CLEANUP-UNROLL-BY-5-DAG: %[[C0:.*]] = arith.constant 0 : index
|
||||
// CLEANUP-UNROLL-BY-5-DAG: %[[C3:.*]] = arith.constant 3 : index
|
||||
// CLEANUP-UNROLL-BY-5-NEXT: %[[V0:.*]] = affine.apply {{.*}}
|
||||
// CLEANUP-UNROLL-BY-5-NEXT: memref.store %{{.*}}, %[[MEM]][%[[V0]]] : memref<?xf32>
|
||||
// CLEANUP-UNROLL-BY-5-NEXT: %[[V1:.*]] = affine.apply {{.*}}
|
||||
// CLEANUP-UNROLL-BY-5-NEXT: memref.store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
|
||||
// CLEANUP-UNROLL-BY-5-NEXT: %[[V2:.*]] = affine.apply {{.*}}
|
||||
// CLEANUP-UNROLL-BY-5-NEXT: memref.store %{{.*}}, %[[MEM]][%[[V2]]] : memref<?xf32>
|
||||
// CLEANUP-UNROLL-BY-5-NEXT: return
|
Loading…
Reference in New Issue