forked from OSchip/llvm-project
[MLIR] Add affine.parallel folder and normalizer
Add a folder to the affine.parallel op so that loop bounds expressions are canonicalized. Additionally, a new AffineParallelNormalizePass is added to adjust affine.parallel ops so that the lower bound is always 0 and the upper bound always represents a range with a step size of 1. Differential Revision: https://reviews.llvm.org/D84998
This commit is contained in:
parent
4e266eaf13
commit
cca3f3dd26
|
@ -621,11 +621,6 @@ def AffineParallelOp : Affine_Op<"parallel",
|
|||
/// Get the number of dimensions.
|
||||
unsigned getNumDims();
|
||||
|
||||
operand_range getLowerBoundsOperands();
|
||||
operand_range getUpperBoundsOperands();
|
||||
|
||||
AffineValueMap getLowerBoundsValueMap();
|
||||
AffineValueMap getUpperBoundsValueMap();
|
||||
AffineValueMap getRangesValueMap();
|
||||
|
||||
/// Get ranges as constants, may fail in dynamic case.
|
||||
|
@ -636,6 +631,18 @@ def AffineParallelOp : Affine_Op<"parallel",
|
|||
MutableArrayRef<BlockArgument> getIVs() {
|
||||
return getBody()->getArguments();
|
||||
}
|
||||
|
||||
operand_range getLowerBoundsOperands();
|
||||
AffineValueMap getLowerBoundsValueMap();
|
||||
void setLowerBounds(ValueRange operands, AffineMap map);
|
||||
void setLowerBoundsMap(AffineMap map);
|
||||
|
||||
operand_range getUpperBoundsOperands();
|
||||
AffineValueMap getUpperBoundsValueMap();
|
||||
void setUpperBounds(ValueRange operands, AffineMap map);
|
||||
void setUpperBoundsMap(AffineMap map);
|
||||
|
||||
SmallVector<int64_t, 8> getSteps();
|
||||
void setSteps(ArrayRef<int64_t> newSteps);
|
||||
|
||||
static StringRef getReductionsAttrName() { return "reductions"; }
|
||||
|
@ -643,6 +650,8 @@ def AffineParallelOp : Affine_Op<"parallel",
|
|||
static StringRef getUpperBoundsMapAttrName() { return "upperBoundsMap"; }
|
||||
static StringRef getStepsAttrName() { return "steps"; }
|
||||
}];
|
||||
|
||||
let hasFolder = 1;
|
||||
}
|
||||
|
||||
def AffinePrefetchOp : Affine_Op<"prefetch"> {
|
||||
|
|
|
@ -74,6 +74,10 @@ public:
|
|||
ArrayRef<Value> getOperands() const;
|
||||
AffineMap getAffineMap() const;
|
||||
|
||||
/// Attempts to canonicalize the map and operands. Return success if the map
|
||||
/// and/or operands have been modified.
|
||||
LogicalResult canonicalize();
|
||||
|
||||
private:
|
||||
// A mutable affine map.
|
||||
MutableAffineMap map;
|
||||
|
|
|
@ -35,6 +35,9 @@ createAffineLoopInvariantCodeMotionPass();
|
|||
/// ops.
|
||||
std::unique_ptr<OperationPass<FuncOp>> createAffineParallelizePass();
|
||||
|
||||
/// Normalize affine.parallel ops so that lower bounds are 0 and steps are 1.
|
||||
std::unique_ptr<OperationPass<FuncOp>> createAffineParallelNormalizePass();
|
||||
|
||||
/// Performs packing (or explicit copying) of accessed memref regions into
|
||||
/// buffers in the specified faster memory space through either pointwise copies
|
||||
/// or DMA operations.
|
||||
|
|
|
@ -118,6 +118,12 @@ def AffineParallelize : FunctionPass<"affine-parallelize"> {
|
|||
let constructor = "mlir::createAffineParallelizePass()";
|
||||
}
|
||||
|
||||
def AffineParallelNormalize : FunctionPass<"affine-parallel-normalize"> {
|
||||
let summary = "Normalize affine.parallel ops so that lower bounds are 0 and "
|
||||
"steps are 1";
|
||||
let constructor = "mlir::createAffineParallelNormalizePass()";
|
||||
}
|
||||
|
||||
def SimplifyAffineStructures : FunctionPass<"simplify-affine-structures"> {
|
||||
let summary = "Simplify affine expressions in maps/sets and normalize "
|
||||
"memrefs";
|
||||
|
|
|
@ -43,6 +43,11 @@ void vectorizeAffineLoops(
|
|||
llvm::DenseSet<Operation *, DenseMapInfo<Operation *>> &loops,
|
||||
ArrayRef<int64_t> vectorSizes, ArrayRef<int64_t> fastestVaryingPattern);
|
||||
|
||||
/// Normalize a affine.parallel op so that lower bounds are 0 and steps are 1.
|
||||
/// As currently implemented, this transformation cannot fail and will return
|
||||
/// early if the op is already in a normalized form.
|
||||
void normalizeAffineParallel(AffineParallelOp op);
|
||||
|
||||
} // namespace mlir
|
||||
|
||||
#endif // MLIR_DIALECT_AFFINE_UTILS_H
|
||||
|
|
|
@ -2505,9 +2505,58 @@ OpBuilder AffineParallelOp::getBodyBuilder() {
|
|||
return OpBuilder(getBody(), std::prev(getBody()->end()));
|
||||
}
|
||||
|
||||
void AffineParallelOp::setLowerBounds(ValueRange lbOperands, AffineMap map) {
|
||||
assert(lbOperands.size() == map.getNumInputs() &&
|
||||
"operands to map must match number of inputs");
|
||||
assert(map.getNumResults() >= 1 && "bounds map has at least one result");
|
||||
|
||||
auto ubOperands = getUpperBoundsOperands();
|
||||
|
||||
SmallVector<Value, 4> newOperands(lbOperands);
|
||||
newOperands.append(ubOperands.begin(), ubOperands.end());
|
||||
getOperation()->setOperands(newOperands);
|
||||
|
||||
lowerBoundsMapAttr(AffineMapAttr::get(map));
|
||||
}
|
||||
|
||||
void AffineParallelOp::setUpperBounds(ValueRange ubOperands, AffineMap map) {
|
||||
assert(ubOperands.size() == map.getNumInputs() &&
|
||||
"operands to map must match number of inputs");
|
||||
assert(map.getNumResults() >= 1 && "bounds map has at least one result");
|
||||
|
||||
SmallVector<Value, 4> newOperands(getLowerBoundsOperands());
|
||||
newOperands.append(ubOperands.begin(), ubOperands.end());
|
||||
getOperation()->setOperands(newOperands);
|
||||
|
||||
upperBoundsMapAttr(AffineMapAttr::get(map));
|
||||
}
|
||||
|
||||
void AffineParallelOp::setLowerBoundsMap(AffineMap map) {
|
||||
AffineMap lbMap = lowerBoundsMap();
|
||||
assert(lbMap.getNumDims() == map.getNumDims() &&
|
||||
lbMap.getNumSymbols() == map.getNumSymbols());
|
||||
(void)lbMap;
|
||||
lowerBoundsMapAttr(AffineMapAttr::get(map));
|
||||
}
|
||||
|
||||
void AffineParallelOp::setUpperBoundsMap(AffineMap map) {
|
||||
AffineMap ubMap = upperBoundsMap();
|
||||
assert(ubMap.getNumDims() == map.getNumDims() &&
|
||||
ubMap.getNumSymbols() == map.getNumSymbols());
|
||||
(void)ubMap;
|
||||
upperBoundsMapAttr(AffineMapAttr::get(map));
|
||||
}
|
||||
|
||||
SmallVector<int64_t, 8> AffineParallelOp::getSteps() {
|
||||
SmallVector<int64_t, 8> result;
|
||||
for (Attribute attr : steps()) {
|
||||
result.push_back(attr.cast<IntegerAttr>().getInt());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void AffineParallelOp::setSteps(ArrayRef<int64_t> newSteps) {
|
||||
assert(newSteps.size() == getNumDims() && "steps & num dims mismatch");
|
||||
setAttr(getStepsAttrName(), getBodyBuilder().getI64ArrayAttr(newSteps));
|
||||
stepsAttr(getBodyBuilder().getI64ArrayAttr(newSteps));
|
||||
}
|
||||
|
||||
static LogicalResult verify(AffineParallelOp op) {
|
||||
|
@ -2541,6 +2590,41 @@ static LogicalResult verify(AffineParallelOp op) {
|
|||
return success();
|
||||
}
|
||||
|
||||
LogicalResult AffineValueMap::canonicalize() {
|
||||
SmallVector<Value, 4> newOperands{operands};
|
||||
auto newMap = getAffineMap();
|
||||
composeAffineMapAndOperands(&newMap, &newOperands);
|
||||
if (newMap == getAffineMap() && newOperands == operands)
|
||||
return failure();
|
||||
reset(newMap, newOperands);
|
||||
return success();
|
||||
}
|
||||
|
||||
/// Canonicalize the bounds of the given loop.
|
||||
static LogicalResult canonicalizeLoopBounds(AffineParallelOp op) {
|
||||
AffineValueMap lb = op.getLowerBoundsValueMap();
|
||||
bool lbCanonicalized = succeeded(lb.canonicalize());
|
||||
|
||||
AffineValueMap ub = op.getUpperBoundsValueMap();
|
||||
bool ubCanonicalized = succeeded(ub.canonicalize());
|
||||
|
||||
// Any canonicalization change always leads to updated map(s).
|
||||
if (!lbCanonicalized && !ubCanonicalized)
|
||||
return failure();
|
||||
|
||||
if (lbCanonicalized)
|
||||
op.setLowerBounds(lb.getOperands(), lb.getAffineMap());
|
||||
if (ubCanonicalized)
|
||||
op.setUpperBounds(ub.getOperands(), ub.getAffineMap());
|
||||
|
||||
return success();
|
||||
}
|
||||
|
||||
LogicalResult AffineParallelOp::fold(ArrayRef<Attribute> operands,
|
||||
SmallVectorImpl<OpFoldResult> &results) {
|
||||
return canonicalizeLoopBounds(*this);
|
||||
}
|
||||
|
||||
static void print(OpAsmPrinter &p, AffineParallelOp op) {
|
||||
p << op.getOperationName() << " (" << op.getBody()->getArguments() << ") = (";
|
||||
p.printAffineMapOfSSAIds(op.lowerBoundsMapAttr(),
|
||||
|
@ -2549,13 +2633,8 @@ static void print(OpAsmPrinter &p, AffineParallelOp op) {
|
|||
p.printAffineMapOfSSAIds(op.upperBoundsMapAttr(),
|
||||
op.getUpperBoundsOperands());
|
||||
p << ')';
|
||||
SmallVector<int64_t, 4> steps;
|
||||
bool elideSteps = true;
|
||||
for (auto attr : op.steps()) {
|
||||
auto step = attr.cast<IntegerAttr>().getInt();
|
||||
elideSteps &= (step == 1);
|
||||
steps.push_back(step);
|
||||
}
|
||||
SmallVector<int64_t, 8> steps = op.getSteps();
|
||||
bool elideSteps = llvm::all_of(steps, [](int64_t step) { return step == 1; });
|
||||
if (!elideSteps) {
|
||||
p << " step (";
|
||||
llvm::interleaveComma(steps, p);
|
||||
|
@ -2641,7 +2720,7 @@ static ParseResult parseAffineParallelOp(OpAsmParser &parser,
|
|||
}
|
||||
|
||||
// Parse optional clause of the form: `reduce ("addf", "maxf")`, where the
|
||||
// quoted strings a member of the enum AtomicRMWKind.
|
||||
// quoted strings are a member of the enum AtomicRMWKind.
|
||||
SmallVector<Attribute, 4> reductions;
|
||||
if (succeeded(parser.parseOptionalKeyword("reduce"))) {
|
||||
if (parser.parseLParen())
|
||||
|
|
|
@ -0,0 +1,96 @@
|
|||
//===- AffineParallelNormalize.cpp - AffineParallelNormalize Pass ---------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements a normalizer for affine parallel loops.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "PassDetail.h"
|
||||
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
||||
#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
|
||||
#include "mlir/Dialect/Affine/Passes.h"
|
||||
#include "mlir/IR/PatternMatch.h"
|
||||
|
||||
using namespace mlir;
|
||||
|
||||
void normalizeAffineParallel(AffineParallelOp op) {
|
||||
AffineMap lbMap = op.lowerBoundsMap();
|
||||
SmallVector<int64_t, 8> steps = op.getSteps();
|
||||
// No need to do any work if the parallel op is already normalized.
|
||||
bool isAlreadyNormalized =
|
||||
llvm::all_of(llvm::zip(steps, lbMap.getResults()), [](auto tuple) {
|
||||
int64_t step = std::get<0>(tuple);
|
||||
auto lbExpr =
|
||||
std::get<1>(tuple).template dyn_cast<AffineConstantExpr>();
|
||||
return lbExpr && lbExpr.getValue() == 0 && step == 1;
|
||||
});
|
||||
if (isAlreadyNormalized)
|
||||
return;
|
||||
|
||||
AffineValueMap ranges = op.getRangesValueMap();
|
||||
auto builder = OpBuilder::atBlockBegin(op.getBody());
|
||||
auto zeroExpr = builder.getAffineConstantExpr(0);
|
||||
SmallVector<AffineExpr, 8> lbExprs;
|
||||
SmallVector<AffineExpr, 8> ubExprs;
|
||||
for (unsigned i = 0, e = steps.size(); i < e; ++i) {
|
||||
int64_t step = steps[i];
|
||||
|
||||
// Adjust the lower bound to be 0.
|
||||
lbExprs.push_back(zeroExpr);
|
||||
|
||||
// Adjust the upper bound expression: 'range / step'.
|
||||
AffineExpr ubExpr = ranges.getResult(i).ceilDiv(step);
|
||||
ubExprs.push_back(ubExpr);
|
||||
|
||||
// Adjust the corresponding IV: 'lb + i * step'.
|
||||
BlockArgument iv = op.getBody()->getArgument(i);
|
||||
AffineExpr lbExpr = lbMap.getResult(i);
|
||||
unsigned nDims = lbMap.getNumDims();
|
||||
auto expr = lbExpr + builder.getAffineDimExpr(nDims) * step;
|
||||
auto map = AffineMap::get(/*dimCount=*/nDims + 1,
|
||||
/*symbolCount=*/lbMap.getNumSymbols(), expr);
|
||||
|
||||
// Use an 'affine.apply' op that will be simplified later in subsequent
|
||||
// canonicalizations.
|
||||
OperandRange lbOperands = op.getLowerBoundsOperands();
|
||||
OperandRange dimOperands = lbOperands.take_front(nDims);
|
||||
OperandRange symbolOperands = lbOperands.drop_front(nDims);
|
||||
SmallVector<Value, 8> applyOperands{dimOperands};
|
||||
applyOperands.push_back(iv);
|
||||
applyOperands.append(symbolOperands.begin(), symbolOperands.end());
|
||||
auto apply = builder.create<AffineApplyOp>(op.getLoc(), map, applyOperands);
|
||||
iv.replaceAllUsesExcept(apply, SmallPtrSet<Operation *, 1>{apply});
|
||||
}
|
||||
|
||||
SmallVector<int64_t, 8> newSteps(op.getNumDims(), 1);
|
||||
op.setSteps(newSteps);
|
||||
auto newLowerMap = AffineMap::get(
|
||||
/*dimCount=*/0, /*symbolCount=*/0, lbExprs, op.getContext());
|
||||
op.setLowerBounds({}, newLowerMap);
|
||||
auto newUpperMap = AffineMap::get(ranges.getNumDims(), ranges.getNumSymbols(),
|
||||
ubExprs, op.getContext());
|
||||
op.setUpperBounds(ranges.getOperands(), newUpperMap);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
/// Normalize affine.parallel ops so that lower bounds are 0 and steps are 1.
|
||||
/// As currently implemented, this pass cannot fail, but it might skip over ops
|
||||
/// that are already in a normalized form.
|
||||
struct AffineParallelNormalizePass
|
||||
: public AffineParallelNormalizeBase<AffineParallelNormalizePass> {
|
||||
|
||||
void runOnFunction() override { getFunction().walk(normalizeAffineParallel); }
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
std::unique_ptr<OperationPass<FuncOp>>
|
||||
mlir::createAffineParallelNormalizePass() {
|
||||
return std::make_unique<AffineParallelNormalizePass>();
|
||||
}
|
|
@ -2,6 +2,7 @@ add_mlir_dialect_library(MLIRAffineTransforms
|
|||
AffineDataCopyGeneration.cpp
|
||||
AffineLoopInvariantCodeMotion.cpp
|
||||
AffineParallelize.cpp
|
||||
AffineParallelNormalize.cpp
|
||||
LoopTiling.cpp
|
||||
LoopUnroll.cpp
|
||||
LoopUnrollAndJam.cpp
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
// RUN: mlir-opt %s -affine-parallel-normalize -split-input-file | FileCheck %s
|
||||
|
||||
// Normalize steps to 1 and lower bounds to 0.
|
||||
|
||||
// CHECK-DAG: [[$MAP0:#map[0-9]+]] = affine_map<(d0) -> (d0 * 3)>
|
||||
// CHECK-DAG: [[$MAP1:#map[0-9]+]] = affine_map<(d0) -> (d0 * 2 + 1)>
|
||||
// CHECK-DAG: [[$MAP2:#map[0-9]+]] = affine_map<(d0, d1) -> (d0 + d1)>
|
||||
|
||||
// CHECK-LABEL: func @normalize_parallel()
|
||||
func @normalize_parallel() {
|
||||
%cst = constant 1.0 : f32
|
||||
%0 = alloc() : memref<2x4xf32>
|
||||
// CHECK: affine.parallel (%[[i0:.*]], %[[j0:.*]]) = (0, 0) to (4, 2)
|
||||
affine.parallel (%i, %j) = (0, 1) to (10, 5) step (3, 2) {
|
||||
// CHECK: %[[i1:.*]] = affine.apply [[$MAP0]](%[[i0]])
|
||||
// CHECK: %[[j1:.*]] = affine.apply [[$MAP1]](%[[j0]])
|
||||
// CHECK: affine.parallel (%[[k0:.*]]) = (0) to (%[[j1]] - %[[i1]])
|
||||
affine.parallel (%k) = (%i) to (%j) {
|
||||
// CHECK: %[[k1:.*]] = affine.apply [[$MAP2]](%[[i1]], %[[k0]])
|
||||
// CHECK: affine.store %{{.*}}, %{{.*}}[%[[i1]], %[[k1]]] : memref<2x4xf32>
|
||||
affine.store %cst, %0[%i, %k] : memref<2x4xf32>
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
|
@ -604,3 +604,26 @@ func @drop_duplicate_bounds(%N : index) {
|
|||
}
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// Ensure affine.parallel bounds expressions are canonicalized.
|
||||
|
||||
#map3 = affine_map<(d0) -> (d0 * 5)>
|
||||
|
||||
// CHECK-LABEL: func @affine_parallel_const_bounds
|
||||
func @affine_parallel_const_bounds() {
|
||||
%cst = constant 1.0 : f32
|
||||
%c0 = constant 0 : index
|
||||
%c4 = constant 4 : index
|
||||
%0 = alloc() : memref<4xf32>
|
||||
// CHECK: affine.parallel (%{{.*}}) = (0) to (4)
|
||||
affine.parallel (%i) = (%c0) to (%c0 + %c4) {
|
||||
%1 = affine.apply #map3(%i)
|
||||
// CHECK: affine.parallel (%{{.*}}) = (0) to (%{{.*}} * 5)
|
||||
affine.parallel (%j) = (%c0) to (%1) {
|
||||
affine.store %cst, %0[%j] : memref<4xf32>
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue