forked from OSchip/llvm-project
[MLIR] Update affine.for unroll utility for iter_args support
Update affine.for loop unroll utility for iteration arguments support. Fix promoteIfSingleIteration as well. Fixes PR49084: https://bugs.llvm.org/show_bug.cgi?id=49084 Differential Revision: https://reviews.llvm.org/D96383
This commit is contained in:
parent
8caf835337
commit
5400f602cd
|
@ -147,6 +147,26 @@ static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
|
||||||
return builder.create<SignedDivIOp>(loc, sum, divisor);
|
return builder.create<SignedDivIOp>(loc, sum, divisor);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Helper to replace uses of loop carried values (iter_args) and loop
|
||||||
|
/// yield values while promoting single iteration affine.for and scf.for ops.
|
||||||
|
template <typename AffineOrSCFForOp>
|
||||||
|
static void replaceIterArgsAndYieldResults(AffineOrSCFForOp forOp) {
|
||||||
|
static_assert(
|
||||||
|
llvm::is_one_of<AffineOrSCFForOp, AffineForOp, scf::ForOp>::value,
|
||||||
|
"only for affine.for and scf.for ops");
|
||||||
|
// Replace uses of iter arguments with iter operands (initial values).
|
||||||
|
auto iterOperands = forOp.getIterOperands();
|
||||||
|
auto iterArgs = forOp.getRegionIterArgs();
|
||||||
|
for (auto e : llvm::zip(iterOperands, iterArgs))
|
||||||
|
std::get<1>(e).replaceAllUsesWith(std::get<0>(e));
|
||||||
|
|
||||||
|
// Replace uses of loop results with the values yielded by the loop.
|
||||||
|
auto outerResults = forOp.getResults();
|
||||||
|
auto innerResults = forOp.getBody()->getTerminator()->getOperands();
|
||||||
|
for (auto e : llvm::zip(outerResults, innerResults))
|
||||||
|
std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
|
||||||
|
}
|
||||||
|
|
||||||
/// Promotes the loop body of a forOp to its containing block if the forOp
|
/// Promotes the loop body of a forOp to its containing block if the forOp
|
||||||
/// was known to have a single iteration.
|
/// was known to have a single iteration.
|
||||||
// TODO: extend this for arbitrary affine bounds.
|
// TODO: extend this for arbitrary affine bounds.
|
||||||
|
@ -181,6 +201,9 @@ LogicalResult mlir::promoteIfSingleIteration(AffineForOp forOp) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
replaceIterArgsAndYieldResults(forOp);
|
||||||
|
|
||||||
// Move the loop body operations, except for its terminator, to the loop's
|
// Move the loop body operations, except for its terminator, to the loop's
|
||||||
// containing block.
|
// containing block.
|
||||||
forOp.getBody()->back().erase();
|
forOp.getBody()->back().erase();
|
||||||
|
@ -206,17 +229,7 @@ LogicalResult mlir::promoteIfSingleIteration(scf::ForOp forOp) {
|
||||||
auto iv = forOp.getInductionVar();
|
auto iv = forOp.getInductionVar();
|
||||||
iv.replaceAllUsesWith(lbCstOp);
|
iv.replaceAllUsesWith(lbCstOp);
|
||||||
|
|
||||||
// Replace uses of iterArgs with iterOperands.
|
replaceIterArgsAndYieldResults(forOp);
|
||||||
auto iterOperands = forOp.getIterOperands();
|
|
||||||
auto iterArgs = forOp.getRegionIterArgs();
|
|
||||||
for (auto e : llvm::zip(iterOperands, iterArgs))
|
|
||||||
std::get<1>(e).replaceAllUsesWith(std::get<0>(e));
|
|
||||||
|
|
||||||
// Replace uses of loop results with the values yielded by the loop.
|
|
||||||
auto outerResults = forOp.getResults();
|
|
||||||
auto innerResults = forOp.getBody()->getTerminator()->getOperands();
|
|
||||||
for (auto e : llvm::zip(outerResults, innerResults))
|
|
||||||
std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
|
|
||||||
|
|
||||||
// Move the loop body operations, except for its terminator, to the loop's
|
// Move the loop body operations, except for its terminator, to the loop's
|
||||||
// containing block.
|
// containing block.
|
||||||
|
@ -1127,6 +1140,17 @@ LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp,
|
||||||
if (getLargestDivisorOfTripCount(forOp) % unrollFactor != 0) {
|
if (getLargestDivisorOfTripCount(forOp) % unrollFactor != 0) {
|
||||||
OpBuilder builder(forOp->getBlock(), std::next(Block::iterator(forOp)));
|
OpBuilder builder(forOp->getBlock(), std::next(Block::iterator(forOp)));
|
||||||
auto cleanupForOp = cast<AffineForOp>(builder.clone(*forOp));
|
auto cleanupForOp = cast<AffineForOp>(builder.clone(*forOp));
|
||||||
|
|
||||||
|
// Update users of loop results.
|
||||||
|
auto results = forOp.getResults();
|
||||||
|
auto cleanupResults = cleanupForOp.getResults();
|
||||||
|
auto cleanupIterOperands = cleanupForOp.getIterOperands();
|
||||||
|
|
||||||
|
for (auto e : llvm::zip(results, cleanupResults, cleanupIterOperands)) {
|
||||||
|
std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
|
||||||
|
cleanupForOp->replaceUsesOfWith(std::get<2>(e), std::get<0>(e));
|
||||||
|
}
|
||||||
|
|
||||||
AffineMap cleanupMap;
|
AffineMap cleanupMap;
|
||||||
SmallVector<Value, 4> cleanupOperands;
|
SmallVector<Value, 4> cleanupOperands;
|
||||||
getCleanupLoopLowerBound(forOp, unrollFactor, cleanupMap, cleanupOperands);
|
getCleanupLoopLowerBound(forOp, unrollFactor, cleanupMap, cleanupOperands);
|
||||||
|
@ -1142,18 +1166,21 @@ LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp,
|
||||||
forOp.setUpperBound(cleanupOperands, cleanupMap);
|
forOp.setUpperBound(cleanupOperands, cleanupMap);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ValueRange iterArgs(forOp.getRegionIterArgs());
|
||||||
|
auto yieldedValues = forOp.getBody()->getTerminator()->getOperands();
|
||||||
|
|
||||||
// Scale the step of loop being unrolled by unroll factor.
|
// Scale the step of loop being unrolled by unroll factor.
|
||||||
int64_t step = forOp.getStep();
|
int64_t step = forOp.getStep();
|
||||||
forOp.setStep(step * unrollFactor);
|
forOp.setStep(step * unrollFactor);
|
||||||
generateUnrolledLoop(forOp.getBody(), forOp.getInductionVar(), unrollFactor,
|
generateUnrolledLoop(
|
||||||
[&](unsigned i, Value iv, OpBuilder b) {
|
forOp.getBody(), forOp.getInductionVar(), unrollFactor,
|
||||||
// iv' = iv + i * step
|
[&](unsigned i, Value iv, OpBuilder b) {
|
||||||
auto d0 = b.getAffineDimExpr(0);
|
// iv' = iv + i * step
|
||||||
auto bumpMap = AffineMap::get(1, 0, d0 + i * step);
|
auto d0 = b.getAffineDimExpr(0);
|
||||||
return b.create<AffineApplyOp>(forOp.getLoc(), bumpMap,
|
auto bumpMap = AffineMap::get(1, 0, d0 + i * step);
|
||||||
iv);
|
return b.create<AffineApplyOp>(forOp.getLoc(), bumpMap, iv);
|
||||||
},
|
},
|
||||||
/*iterArgs=*/{}, /*yieldedValues=*/{});
|
/*iterArgs=*/iterArgs, /*yieldedValues=*/yieldedValues);
|
||||||
|
|
||||||
// Promote the loop body up if this has turned into a single iteration loop.
|
// Promote the loop body up if this has turned into a single iteration loop.
|
||||||
(void)promoteIfSingleIteration(forOp);
|
(void)promoteIfSingleIteration(forOp);
|
||||||
|
|
|
@ -590,3 +590,54 @@ func @unroll_by_one_should_promote_single_iteration_loop() {
|
||||||
// UNROLL-BY-1-NEXT: %0 = "foo"(%c0) : (index) -> i32
|
// UNROLL-BY-1-NEXT: %0 = "foo"(%c0) : (index) -> i32
|
||||||
// UNROLL-BY-1-NEXT: return
|
// UNROLL-BY-1-NEXT: return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Test unrolling with affine.for iter_args.
|
||||||
|
|
||||||
|
// UNROLL-BY-4-LABEL: loop_unroll_with_iter_args_and_cleanup
|
||||||
|
func @loop_unroll_with_iter_args_and_cleanup(%arg0 : f32, %arg1 : f32, %n : index) -> (f32,f32) {
|
||||||
|
%cf1 = constant 1.0 : f32
|
||||||
|
%cf2 = constant 2.0 : f32
|
||||||
|
%sum:2 = affine.for %iv = 0 to 10 iter_args(%i0 = %arg0, %i1 = %arg1) -> (f32, f32) {
|
||||||
|
%sum0 = addf %i0, %cf1 : f32
|
||||||
|
%sum1 = addf %i1, %cf2 : f32
|
||||||
|
affine.yield %sum0, %sum1 : f32, f32
|
||||||
|
}
|
||||||
|
return %sum#0, %sum#1 : f32, f32
|
||||||
|
// UNROLL-BY-4: %[[SUM:.*]]:2 = affine.for {{.*}} = 0 to 8 step 4 iter_args
|
||||||
|
// UNROLL-BY-4-NEXT: addf
|
||||||
|
// UNROLL-BY-4-NEXT: addf
|
||||||
|
// UNROLL-BY-4-NEXT: addf
|
||||||
|
// UNROLL-BY-4-NEXT: addf
|
||||||
|
// UNROLL-BY-4-NEXT: addf
|
||||||
|
// UNROLL-BY-4-NEXT: addf
|
||||||
|
// UNROLL-BY-4-NEXT: %[[Y1:.*]] = addf
|
||||||
|
// UNROLL-BY-4-NEXT: %[[Y2:.*]] = addf
|
||||||
|
// UNROLL-BY-4-NEXT: affine.yield %[[Y1]], %[[Y2]]
|
||||||
|
// UNROLL-BY-4-NEXT: }
|
||||||
|
// UNROLL-BY-4-NEXT: %[[SUM1:.*]]:2 = affine.for {{.*}} = 8 to 10 iter_args(%[[V1:.*]] = %[[SUM]]#0, %[[V2:.*]] = %[[SUM]]#1)
|
||||||
|
// UNROLL-BY-4: }
|
||||||
|
// UNROLL-BY-4-NEXT: return %[[SUM1]]#0, %[[SUM1]]#1
|
||||||
|
}
|
||||||
|
|
||||||
|
// The epilogue being a single iteration loop gets promoted here.
|
||||||
|
|
||||||
|
// UNROLL-BY-4-LABEL: unroll_with_iter_args_and_promotion
|
||||||
|
func @unroll_with_iter_args_and_promotion(%arg0 : f32, %arg1 : f32) -> f32 {
|
||||||
|
%from = constant 0 : index
|
||||||
|
%to = constant 10 : index
|
||||||
|
%step = constant 1 : index
|
||||||
|
%sum = affine.for %iv = 0 to 9 iter_args(%sum_iter = %arg0) -> (f32) {
|
||||||
|
%next = addf %sum_iter, %arg1 : f32
|
||||||
|
affine.yield %next : f32
|
||||||
|
}
|
||||||
|
// UNROLL-BY-4: %[[SUM:.*]] = affine.for %{{.*}} = 0 to 8 step 4 iter_args(%[[V0:.*]] =
|
||||||
|
// UNROLL-BY-4-NEXT: %[[V1:.*]] = addf %[[V0]]
|
||||||
|
// UNROLL-BY-4-NEXT: %[[V2:.*]] = addf %[[V1]]
|
||||||
|
// UNROLL-BY-4-NEXT: %[[V3:.*]] = addf %[[V2]]
|
||||||
|
// UNROLL-BY-4-NEXT: %[[V4:.*]] = addf %[[V3]]
|
||||||
|
// UNROLL-BY-4-NEXT: affine.yield %[[V4]]
|
||||||
|
// UNROLL-BY-4-NEXT: }
|
||||||
|
// UNROLL-BY-4-NEXT: %[[RES:.*]] = addf %[[SUM]],
|
||||||
|
// UNROLL-BY-4-NEXT: return %[[RES]]
|
||||||
|
return %sum : f32
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue