forked from OSchip/llvm-project
Introduce loop coalescing utility and a simple pass
Multiple (perfectly) nested loops with independent bounds can be combined into a single loop and than subdivided into blocks of arbitrary size for load balancing or more efficient parallelism exploitation. However, MLIR wants to preserve the multi-dimensional multi-loop structure at higher levels of abstraction. Introduce a transformation that coalesces nested loops with independent bounds so that they can be further subdivided by tiling. PiperOrigin-RevId: 258151016
This commit is contained in:
parent
4de019901b
commit
fc044e8929
|
@ -86,6 +86,7 @@ def ForOp : Loop_Op<"for"> {
|
|||
}
|
||||
void setLowerBound(Value *bound) { getOperation()->setOperand(0, bound); }
|
||||
void setUpperBound(Value *bound) { getOperation()->setOperand(1, bound); }
|
||||
void setStep(Value *step) { getOperation()->setOperand(2, step); }
|
||||
}];
|
||||
}
|
||||
|
||||
|
|
|
@ -57,6 +57,8 @@ LogicalResult loopUnrollUpToFactor(AffineForOp forOp, uint64_t unrollFactor);
|
|||
/// AffineForOp, and the second op is a terminator).
|
||||
void getPerfectlyNestedLoops(SmallVectorImpl<AffineForOp> &nestedLoops,
|
||||
AffineForOp root);
|
||||
void getPerfectlyNestedLoops(SmallVectorImpl<loop::ForOp> &nestedLoops,
|
||||
loop::ForOp root);
|
||||
|
||||
/// Unrolls and jams this loop by the specified factor. Returns success if the
|
||||
/// loop is successfully unroll-jammed.
|
||||
|
@ -154,6 +156,11 @@ void tile(loop::ForOp rootForOp, ArrayRef<Value *> sizes);
|
|||
/// as defined in `sizes`.
|
||||
void extractFixedOuterLoops(loop::ForOp rootFOrOp, ArrayRef<int64_t> sizes);
|
||||
|
||||
/// Replace a perfect nest of "for" loops with a single linearized loop. Assumes
|
||||
/// `loops` contains a list of perfectly nested loops with bounds and steps
|
||||
/// independent of any loop induction variable involved in the nest.
|
||||
void coalesceLoops(MutableArrayRef<loop::ForOp> loops);
|
||||
|
||||
} // end namespace mlir
|
||||
|
||||
#endif // MLIR_TRANSFORMS_LOOP_UTILS_H
|
||||
|
|
|
@ -105,6 +105,10 @@ FunctionPassBase *createLoopTilingPass(uint64_t cacheSizeBytes);
|
|||
FunctionPassBase *
|
||||
createSimpleParametricTilingPass(ArrayRef<int64_t> outerLoopSizes);
|
||||
|
||||
/// Creates a pass that transforms perfectly nested loops with independent
|
||||
/// bounds into a single loop.
|
||||
FunctionPassBase *createLoopCoalescingPass();
|
||||
|
||||
/// Promotes all accessed memref regions to the specified faster memory space
|
||||
/// while generating DMAs to move data.
|
||||
FunctionPassBase *createDmaGenerationPass(
|
||||
|
|
|
@ -5,6 +5,7 @@ add_llvm_library(MLIRTransforms
|
|||
CSE.cpp
|
||||
DialectConversion.cpp
|
||||
DmaGeneration.cpp
|
||||
LoopCoalescing.cpp
|
||||
LoopFusion.cpp
|
||||
LoopInvariantCodeMotion.cpp
|
||||
LoopParametricTiling.cpp
|
||||
|
|
|
@ -0,0 +1,105 @@
|
|||
//===- LoopCoalescing.cpp - Pass transforming loop nests into single loops-===//
|
||||
//
|
||||
// Copyright 2019 The MLIR Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
// =============================================================================
|
||||
|
||||
#include "mlir/Dialect/LoopOps/LoopOps.h"
|
||||
#include "mlir/Pass/Pass.h"
|
||||
#include "mlir/StandardOps/Ops.h"
|
||||
#include "mlir/Transforms/LoopUtils.h"
|
||||
#include "mlir/Transforms/Passes.h"
|
||||
#include "mlir/Transforms/RegionUtils.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
|
||||
#define PASS_NAME "loop-coalescing"
|
||||
#define DEBUG_TYPE PASS_NAME
|
||||
|
||||
using namespace mlir;
|
||||
|
||||
namespace {
|
||||
class LoopCoalescingPass : public FunctionPass<LoopCoalescingPass> {
|
||||
public:
|
||||
void runOnFunction() override {
|
||||
FuncOp func = getFunction();
|
||||
|
||||
func.walk<loop::ForOp>([](loop::ForOp op) {
|
||||
// Ignore nested loops.
|
||||
if (op.getParentOfType<loop::ForOp>())
|
||||
return;
|
||||
|
||||
SmallVector<loop::ForOp, 4> loops;
|
||||
getPerfectlyNestedLoops(loops, op);
|
||||
LLVM_DEBUG(llvm::dbgs()
|
||||
<< "found a perfect nest of depth " << loops.size() << '\n');
|
||||
|
||||
// Look for a band of loops that can be coalesced, i.e. perfectly nested
|
||||
// loops with bounds defined above some loop.
|
||||
// 1. For each loop, find above which parent loop its operands are
|
||||
// defined.
|
||||
SmallVector<unsigned, 4> operandsDefinedAbove(loops.size());
|
||||
for (unsigned i = 0, e = loops.size(); i < e; ++i) {
|
||||
operandsDefinedAbove[i] = i;
|
||||
for (unsigned j = 0; j < i; ++j) {
|
||||
if (areValuesDefinedAbove(loops[i].getOperands(),
|
||||
loops[j].region())) {
|
||||
operandsDefinedAbove[i] = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
LLVM_DEBUG(llvm::dbgs()
|
||||
<< " bounds of loop " << i << " are known above depth "
|
||||
<< operandsDefinedAbove[i] << '\n');
|
||||
}
|
||||
|
||||
// 2. Identify bands of loops such that the operands of all of them are
|
||||
// defined above the first loop in the band. Traverse the nest bottom-up
|
||||
// so that modifications don't invalidate the inner loops.
|
||||
for (unsigned end = loops.size(); end > 0; --end) {
|
||||
unsigned start = 0;
|
||||
for (; start < end - 1; ++start) {
|
||||
auto maxPos =
|
||||
*std::max_element(std::next(operandsDefinedAbove.begin(), start),
|
||||
std::next(operandsDefinedAbove.begin(), end));
|
||||
if (maxPos > start)
|
||||
continue;
|
||||
|
||||
assert(maxPos == start &&
|
||||
"expected loop bounds to be known at the start of the band");
|
||||
LLVM_DEBUG(llvm::dbgs() << " found coalesceable band from " << start
|
||||
<< " to " << end << '\n');
|
||||
|
||||
auto band =
|
||||
llvm::makeMutableArrayRef(loops.data() + start, end - start);
|
||||
coalesceLoops(band);
|
||||
break;
|
||||
}
|
||||
// If a band was found and transformed, keep looking at the loops above
|
||||
// the outermost transformed loop.
|
||||
if (start != end - 1)
|
||||
end = start + 1;
|
||||
}
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
FunctionPassBase *mlir::createLoopCoalescingPass() {
|
||||
return new LoopCoalescingPass;
|
||||
}
|
||||
|
||||
static PassRegistration<LoopCoalescingPass>
|
||||
reg(PASS_NAME,
|
||||
"coalesce nested loops with independent bounds into a single loop");
|
|
@ -33,9 +33,13 @@
|
|||
#include "mlir/IR/Function.h"
|
||||
#include "mlir/IR/Operation.h"
|
||||
#include "mlir/StandardOps/Ops.h"
|
||||
#include "mlir/Transforms/RegionUtils.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
|
||||
#include "mlir/IR/Module.h"
|
||||
|
||||
#define DEBUG_TYPE "LoopUtils"
|
||||
|
||||
using namespace mlir;
|
||||
|
@ -385,6 +389,11 @@ void mlir::getPerfectlyNestedLoops(SmallVectorImpl<AffineForOp> &nestedLoops,
|
|||
getPerfectlyNestedLoopsImpl(nestedLoops, root);
|
||||
}
|
||||
|
||||
void mlir::getPerfectlyNestedLoops(SmallVectorImpl<loop::ForOp> &nestedLoops,
|
||||
loop::ForOp root) {
|
||||
getPerfectlyNestedLoopsImpl(nestedLoops, root);
|
||||
}
|
||||
|
||||
/// Unrolls this loop completely.
|
||||
LogicalResult mlir::loopUnrollFull(AffineForOp forOp) {
|
||||
Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
|
||||
|
@ -870,6 +879,10 @@ static Value *ceilDivPositive(OpBuilder &builder, Location loc, Value *dividend,
|
|||
return builder.create<DivISOp>(loc, sum, divisorCst);
|
||||
}
|
||||
|
||||
// Build the IR that performs ceil division of a positive value by another
|
||||
// positive value:
|
||||
// ceildiv(a, b) = divis(a + (b - 1), b)
|
||||
// where divis is rounding-to-zero division.
|
||||
static Value *ceilDivPositive(OpBuilder &builder, Location loc, Value *dividend,
|
||||
Value *divisor) {
|
||||
assert(dividend->getType().isIndex() && "expected index-typed value");
|
||||
|
@ -914,3 +927,134 @@ void mlir::extractFixedOuterLoops(loop::ForOp rootForOp,
|
|||
// Call parametric tiling with the given sizes.
|
||||
return ::tile(forOps, tileSizes);
|
||||
}
|
||||
|
||||
// Replaces all uses of `orig` with `replacement` except if the user is listed
|
||||
// in `exceptions`.
|
||||
static void
|
||||
replaceAllUsesExcept(Value *orig, Value *replacement,
|
||||
const SmallPtrSetImpl<Operation *> &exceptions) {
|
||||
for (auto &use : orig->getUses()) {
|
||||
if (exceptions.count(use.getOwner()) == 0)
|
||||
use.set(replacement);
|
||||
}
|
||||
}
|
||||
|
||||
// Transform a loop with a strictly positive step
|
||||
// for %i = %lb to %ub step %s
|
||||
// into a 0-based loop with step 1
|
||||
// for %ii = 0 to ceildiv(%ub - %lb, %s) step 1 {
|
||||
// %i = %ii * %s + %lb
|
||||
// Insert the induction variable remapping in the body of `inner`, which is
|
||||
// expected to be either `loop` or another loop perfectly nested under `loop`.
|
||||
// Insert the definition of new bounds immediate before `outer`, which is
|
||||
// expected to be either `loop` or its parent in the loop nest.
|
||||
static void normalizeLoop(loop::ForOp loop, loop::ForOp outer,
|
||||
loop::ForOp inner) {
|
||||
OpBuilder builder(outer);
|
||||
Location loc = loop.getLoc();
|
||||
|
||||
// Check if the loop is already known to have a constant zero lower bound or
|
||||
// a constant one step.
|
||||
bool isZeroBased = false;
|
||||
if (auto ubCst =
|
||||
dyn_cast_or_null<ConstantIndexOp>(loop.lowerBound()->getDefiningOp()))
|
||||
isZeroBased = ubCst.getValue() == 0;
|
||||
|
||||
bool isStepOne = false;
|
||||
if (auto stepCst =
|
||||
dyn_cast_or_null<ConstantIndexOp>(loop.step()->getDefiningOp()))
|
||||
isStepOne = stepCst.getValue() == 1;
|
||||
|
||||
if (isZeroBased && isStepOne)
|
||||
return;
|
||||
|
||||
// Compute the number of iterations the loop executes: ceildiv(ub - lb, step)
|
||||
// assuming the step is strictly positive. Update the bounds and the step
|
||||
// of the loop to go from 0 to the number of iterations, if necessary.
|
||||
// TODO(zinenko): introduce support for negative steps or emit dynamic asserts
|
||||
// on step positivity, whatever gets implemented first.
|
||||
Value *diff =
|
||||
builder.create<SubIOp>(loc, loop.upperBound(), loop.lowerBound());
|
||||
Value *numIterations = ceilDivPositive(builder, loc, diff, loop.step());
|
||||
loop.setUpperBound(numIterations);
|
||||
|
||||
Value *lb = loop.lowerBound();
|
||||
if (!isZeroBased) {
|
||||
Value *cst0 = builder.create<ConstantIndexOp>(loc, 0);
|
||||
loop.setLowerBound(cst0);
|
||||
}
|
||||
|
||||
Value *step = loop.step();
|
||||
if (!isStepOne) {
|
||||
Value *cst1 = builder.create<ConstantIndexOp>(loc, 1);
|
||||
loop.setStep(cst1);
|
||||
}
|
||||
|
||||
// Insert code computing the value of the original loop induction variable
|
||||
// from the "normalized" one.
|
||||
builder.setInsertionPointToStart(inner.body());
|
||||
Value *scaled =
|
||||
isStepOne ? loop.getInductionVar()
|
||||
: builder.create<MulIOp>(loc, loop.getInductionVar(), step);
|
||||
Value *shifted =
|
||||
isZeroBased ? scaled : builder.create<AddIOp>(loc, scaled, lb);
|
||||
|
||||
SmallPtrSet<Operation *, 2> preserve{scaled->getDefiningOp(),
|
||||
shifted->getDefiningOp()};
|
||||
replaceAllUsesExcept(loop.getInductionVar(), shifted, preserve);
|
||||
}
|
||||
|
||||
void mlir::coalesceLoops(MutableArrayRef<loop::ForOp> loops) {
|
||||
if (loops.size() < 2)
|
||||
return;
|
||||
|
||||
loop::ForOp innermost = loops.back();
|
||||
loop::ForOp outermost = loops.front();
|
||||
|
||||
// 1. Make sure all loops iterate from 0 to upperBound with step 1. This
|
||||
// allows the following code to assume upperBound is the number of iterations.
|
||||
for (auto loop : loops)
|
||||
normalizeLoop(loop, outermost, innermost);
|
||||
|
||||
// 2. Emit code computing the upper bound of the coalesced loop as product
|
||||
// of the number of iterations of all loops.
|
||||
OpBuilder builder(outermost);
|
||||
Location loc = outermost.getLoc();
|
||||
Value *upperBound = outermost.upperBound();
|
||||
for (auto loop : loops.drop_front())
|
||||
upperBound = builder.create<MulIOp>(loc, upperBound, loop.upperBound());
|
||||
outermost.setUpperBound(upperBound);
|
||||
|
||||
builder.setInsertionPointToStart(outermost.body());
|
||||
|
||||
// 3. Remap induction variables. For each original loop, the value of the
|
||||
// induction variable can be obtained by dividing the induction variable of
|
||||
// the linearized loop by the total number of iterations of the loops nested
|
||||
// in it modulo the number of iterations in this loop (remove the values
|
||||
// related to the outer loops):
|
||||
// iv_i = floordiv(iv_linear, product-of-loop-ranges-until-i) mod range_i.
|
||||
// Compute these iteratively from the innermost loop by creating a "running
|
||||
// quotient" of division by the range.
|
||||
Value *previous = outermost.getInductionVar();
|
||||
for (unsigned i = 0, e = loops.size(); i < e; ++i) {
|
||||
unsigned idx = loops.size() - i - 1;
|
||||
if (i != 0)
|
||||
previous =
|
||||
builder.create<DivISOp>(loc, previous, loops[idx + 1].upperBound());
|
||||
|
||||
Value *iv = (i == e - 1) ? previous
|
||||
: builder.create<RemISOp>(loc, previous,
|
||||
loops[idx].upperBound());
|
||||
replaceAllUsesInRegionWith(loops[idx].getInductionVar(), iv,
|
||||
loops.back().region());
|
||||
}
|
||||
|
||||
// 4. Move the operations from the innermost just above the second-outermost
|
||||
// loop, delete the extra terminator and the second-outermost loop.
|
||||
loop::ForOp second = loops[1];
|
||||
innermost.body()->back().erase();
|
||||
outermost.body()->getOperations().splice(
|
||||
Block::iterator(second.getOperation()),
|
||||
innermost.body()->getOperations());
|
||||
second.erase();
|
||||
}
|
||||
|
|
|
@ -0,0 +1,161 @@
|
|||
// RUN: mlir-opt -loop-coalescing %s | FileCheck %s
|
||||
|
||||
// CHECK-LABEL: @one_3d_nest
|
||||
func @one_3d_nest() {
|
||||
// Capture original bounds. Note that for zero-based step-one loops, the
|
||||
// upper bound is also the number of iterations.
|
||||
// CHECK: %[[orig_lb:.*]] = constant 0
|
||||
// CHECK: %[[orig_step:.*]] = constant 1
|
||||
// CHECK: %[[orig_ub_k:.*]] = constant 3
|
||||
// CHECK: %[[orig_ub_i:.*]] = constant 42
|
||||
// CHECK: %[[orig_ub_j:.*]] = constant 56
|
||||
%c0 = constant 0 : index
|
||||
%c1 = constant 1 : index
|
||||
%c2 = constant 2 : index
|
||||
%c3 = constant 3 : index
|
||||
%c42 = constant 42 : index
|
||||
%c56 = constant 56 : index
|
||||
// The range of the new loop.
|
||||
// CHECK: %[[partial_range:.*]] = muli %[[orig_ub_i]], %[[orig_ub_j]]
|
||||
// CHECK-NEXT:%[[range:.*]] = muli %[[partial_range]], %[[orig_ub_k]]
|
||||
|
||||
// Updated loop bounds.
|
||||
// CHECK: loop.for %[[i:.*]] = %[[orig_lb]] to %[[range]] step %[[orig_step]]
|
||||
loop.for %i = %c0 to %c42 step %c1 {
|
||||
// Inner loops must have been removed.
|
||||
// CHECK-NOT: loop.for
|
||||
|
||||
// Reconstruct original IVs from the linearized one.
|
||||
// CHECK: %[[orig_k:.*]] = remis %[[i]], %[[orig_ub_k]]
|
||||
// CHECK: %[[div:.*]] = divis %[[i]], %[[orig_ub_k]]
|
||||
// CHECK: %[[orig_j:.*]] = remis %[[div]], %[[orig_ub_j]]
|
||||
// CHECK: %[[orig_i:.*]] = divis %[[div]], %[[orig_ub_j]]
|
||||
loop.for %j = %c0 to %c56 step %c1 {
|
||||
loop.for %k = %c0 to %c3 step %c1 {
|
||||
// CHECK: "use"(%[[orig_i]], %[[orig_j]], %[[orig_k]])
|
||||
"use"(%i, %j, %k) : (index, index, index) -> ()
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func @unnormalized_loops() {
|
||||
// CHECK: %[[orig_step_i:.*]] = constant 2
|
||||
// CHECK: %[[orig_step_j:.*]] = constant 3
|
||||
// CHECK: %[[orig_lb_i:.*]] = constant 5
|
||||
// CHECK: %[[orig_lb_j:.*]] = constant 7
|
||||
// CHECK: %[[orig_ub_i:.*]] = constant 10
|
||||
// CHECK: %[[orig_ub_j:.*]] = constant 17
|
||||
%c2 = constant 2 : index
|
||||
%c3 = constant 3 : index
|
||||
%c5 = constant 5 : index
|
||||
%c7 = constant 7 : index
|
||||
%c10 = constant 10 : index
|
||||
%c17 = constant 17 : index
|
||||
|
||||
// Number of iterations in the outer loop.
|
||||
// CHECK: %[[diff_i:.*]] = subi %[[orig_ub_i]], %[[orig_lb_i]]
|
||||
// CHECK: %[[c1:.*]] = constant 1
|
||||
// CHECK: %[[step_minus_c1:.*]] = subi %[[orig_step_i]], %[[c1]]
|
||||
// CHECK: %[[dividend:.*]] = addi %[[diff_i]], %[[step_minus_c1]]
|
||||
// CHECK: %[[numiter_i:.*]] = divis %[[dividend]], %[[orig_step_i]]
|
||||
|
||||
// Normalized lower bound and step for the outer loop.
|
||||
// CHECK: %[[lb_i:.*]] = constant 0
|
||||
// CHECK: %[[step_i:.*]] = constant 1
|
||||
|
||||
// Number of iterations in the inner loop, the pattern is the same as above,
|
||||
// only capture the final result.
|
||||
// CHECK: %[[numiter_j:.*]] = divis {{.*}}, %[[orig_step_j]]
|
||||
|
||||
// New bounds of the outer loop.
|
||||
// CHECK: %[[range:.*]] = muli %[[numiter_i]], %[[numiter_j]]
|
||||
// CHECK: loop.for %[[i:.*]] = %[[lb_i]] to %[[range]] step %[[step_i]]
|
||||
loop.for %i = %c5 to %c10 step %c2 {
|
||||
// The inner loop has been removed.
|
||||
// CHECK-NOT: loop.for
|
||||
loop.for %j = %c7 to %c17 step %c3 {
|
||||
// The IVs are rewritten.
|
||||
// CHECK: %[[normalized_j:.*]] = remis %[[i]], %[[numiter_j]]
|
||||
// CHECK: %[[normalized_i:.*]] = divis %[[i]], %[[numiter_j]]
|
||||
// CHECK: %[[scaled_j:.*]] = muli %[[normalized_j]], %[[orig_step_j]]
|
||||
// CHECK: %[[orig_j:.*]] = addi %[[scaled_j]], %[[orig_lb_j]]
|
||||
// CHECK: %[[scaled_i:.*]] = muli %[[normalized_i]], %[[orig_step_i]]
|
||||
// CHECK: %[[orig_i:.*]] = addi %[[scaled_i]], %[[orig_lb_i]]
|
||||
// CHECK: "use"(%[[orig_i]], %[[orig_j]])
|
||||
"use"(%i, %j) : (index, index) -> ()
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Check with parametric loop bounds and steps, capture the bounds here.
|
||||
// CHECK-LABEL: @parametric
|
||||
// CHECK-SAME: %[[orig_lb1:[A-Za-z0-9]+]]:
|
||||
// CHECK-SAME: %[[orig_ub1:[A-Za-z0-9]+]]:
|
||||
// CHECK-SAME: %[[orig_step1:[A-Za-z0-9]+]]:
|
||||
// CHECK-SAME: %[[orig_lb2:[A-Za-z0-9]+]]:
|
||||
// CHECK-SAME: %[[orig_ub2:[A-Za-z0-9]+]]:
|
||||
// CHECK-SAME: %[[orig_step2:[A-Za-z0-9]+]]:
|
||||
func @parametric(%lb1 : index, %ub1 : index, %step1 : index,
|
||||
%lb2 : index, %ub2 : index, %step2 : index) {
|
||||
// Compute the number of iterations for each of the loops and the total
|
||||
// number of iterations.
|
||||
// CHECK: %[[range1:.*]] = subi %[[orig_ub1]], %[[orig_lb1]]
|
||||
// CHECK: %[[orig_step1_minus_1:.*]] = subi %[[orig_step1]], %c1
|
||||
// CHECK: %[[dividend1:.*]] = addi %[[range1]], %[[orig_step1_minus_1]]
|
||||
// CHECK: %[[numiter1:.*]] = divis %[[dividend1]], %[[orig_step1]]
|
||||
// CHECK: %[[range2:.*]] = subi %[[orig_ub2]], %[[orig_lb2]]
|
||||
// CHECK: %[[orig_step2_minus_1:.*]] = subi %arg5, %c1
|
||||
// CHECK: %[[dividend2:.*]] = addi %[[range2]], %[[orig_step2_minus_1]]
|
||||
// CHECK: %[[numiter2:.*]] = divis %[[dividend2]], %[[orig_step2]]
|
||||
// CHECK: %[[range:.*]] = muli %[[numiter1]], %[[numiter2]] : index
|
||||
|
||||
// Check that the outer loop is updated.
|
||||
// CHECK: loop.for %[[i:.*]] = %c0{{.*}} to %[[range]] step %c1
|
||||
loop.for %i = %lb1 to %ub1 step %step1 {
|
||||
// Check that the inner loop is removed.
|
||||
// CHECK-NOT: loop.for
|
||||
loop.for %j = %lb2 to %ub2 step %step2 {
|
||||
// Remapping of the induction variables.
|
||||
// CHECK: %[[normalized_j:.*]] = remis %[[i]], %[[numiter2]] : index
|
||||
// CHECK: %[[normalized_i:.*]] = divis %[[i]], %[[numiter2]] : index
|
||||
// CHECK: %[[scaled_j:.*]] = muli %[[normalized_j]], %[[orig_step2]]
|
||||
// CHECK: %[[orig_j:.*]] = addi %[[scaled_j]], %[[orig_lb2]]
|
||||
// CHECK: %[[scaled_i:.*]] = muli %[[normalized_i]], %[[orig_step1]]
|
||||
// CHECK: %[[orig_i:.*]] = addi %[[scaled_i]], %[[orig_lb1]]
|
||||
|
||||
// CHECK: "foo"(%[[orig_i]], %[[orig_j]])
|
||||
"foo"(%i, %j) : (index, index) -> ()
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @two_bands
|
||||
func @two_bands() {
|
||||
%c0 = constant 0 : index
|
||||
%c1 = constant 1 : index
|
||||
%c10 = constant 10 : index
|
||||
// CHECK: %[[outer_range:.*]] = muli
|
||||
// CHECK: loop.for %{{.*}} = %{{.*}} to %[[outer_range]]
|
||||
loop.for %i = %c0 to %c10 step %c1 {
|
||||
// Check that the "j" loop was removed and that the inner loops were
|
||||
// coalesced as well. The preparation step for coalescing will inject the
|
||||
// subtraction operation unlike the IV remapping.
|
||||
// CHECK-NOT: loop.for
|
||||
// CHECK: subi
|
||||
loop.for %j = %c0 to %c10 step %c1 {
|
||||
// The inner pair of loops is coalesced separately.
|
||||
// CHECK: loop.for
|
||||
loop.for %k = %i to %j step %c1 {
|
||||
// CHECK_NOT: loop.for
|
||||
loop.for %l = %i to %j step %c1 {
|
||||
"foo"() : () -> ()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
Loading…
Reference in New Issue