forked from OSchip/llvm-project
[mlir][sparse] Add rewriting rules for concatente using foreach operator.
Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D134895
This commit is contained in:
parent
550288cbc3
commit
00ad065548
|
@ -815,6 +815,12 @@ def SparseTensor_ForeachOp : SparseTensor_Op<"foreach",
|
||||||
```
|
```
|
||||||
}];
|
}];
|
||||||
|
|
||||||
|
let builders = [
|
||||||
|
OpBuilder<(
|
||||||
|
ins "Value":$tensor,
|
||||||
|
"function_ref<void(OpBuilder &, Location, ValueRange)>")>
|
||||||
|
];
|
||||||
|
|
||||||
let regions = (region AnyRegion:$region);
|
let regions = (region AnyRegion:$region);
|
||||||
let assemblyFormat = "`in` $tensor attr-dict `:` type($tensor) `do` $region";
|
let assemblyFormat = "`in` $tensor attr-dict `:` type($tensor) `do` $region";
|
||||||
let hasVerifier = 1;
|
let hasVerifier = 1;
|
||||||
|
|
|
@ -597,6 +597,32 @@ LogicalResult CompressOp::verify() {
|
||||||
return success();
|
return success();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ForeachOp::build(
|
||||||
|
OpBuilder &builder, OperationState &result, Value tensor,
|
||||||
|
function_ref<void(OpBuilder &, Location, ValueRange)> bodyBuilder) {
|
||||||
|
build(builder, result, tensor);
|
||||||
|
if (!bodyBuilder)
|
||||||
|
return;
|
||||||
|
|
||||||
|
auto rtp = tensor.getType().cast<RankedTensorType>();
|
||||||
|
int64_t rank = rtp.getRank();
|
||||||
|
|
||||||
|
SmallVector<Type, 4> blockArgTypes;
|
||||||
|
// Starts with n index.
|
||||||
|
std::fill_n(std::back_inserter(blockArgTypes), rank, builder.getIndexType());
|
||||||
|
// Followed by one value.
|
||||||
|
blockArgTypes.push_back(rtp.getElementType());
|
||||||
|
|
||||||
|
SmallVector<Location, 4> blockArgLocs;
|
||||||
|
std::fill_n(std::back_inserter(blockArgLocs), rank + 1, tensor.getLoc());
|
||||||
|
|
||||||
|
OpBuilder::InsertionGuard guard(builder);
|
||||||
|
auto ®ion = *result.regions.front();
|
||||||
|
Block *bodyBlock =
|
||||||
|
builder.createBlock(®ion, region.end(), blockArgTypes, blockArgLocs);
|
||||||
|
bodyBuilder(builder, result.location, bodyBlock->getArguments());
|
||||||
|
}
|
||||||
|
|
||||||
LogicalResult ForeachOp::verify() {
|
LogicalResult ForeachOp::verify() {
|
||||||
auto t = getTensor().getType().cast<RankedTensorType>();
|
auto t = getTensor().getType().cast<RankedTensorType>();
|
||||||
auto args = getBody()->getArguments();
|
auto args = getBody()->getArguments();
|
||||||
|
|
|
@ -111,6 +111,32 @@ static bool isZeroYield(GenericOp op) {
|
||||||
return isZeroValue(yieldOp.getOperand(0));
|
return isZeroValue(yieldOp.getOperand(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: The dim level property of the COO type relies on input tensors, the
|
||||||
|
// shape relies on the output tensor
|
||||||
|
// Helpers to setup a COO type.
|
||||||
|
static RankedTensorType getUnorderedCOOFromType(RankedTensorType src) {
|
||||||
|
auto *ctx = src.getContext();
|
||||||
|
auto rank = src.getRank();
|
||||||
|
SmallVector<SparseTensorEncodingAttr::DimLevelType, 4> dims;
|
||||||
|
|
||||||
|
// An unordered and non-unique compressed dim at beginning.
|
||||||
|
dims.push_back(SparseTensorEncodingAttr::DimLevelType::CompressedNuNo);
|
||||||
|
// TODO: it is actually ordered at the level for ordered input.
|
||||||
|
// Followed by unordered non-unique n-2 singleton levels.
|
||||||
|
std::fill_n(std::back_inserter(dims), rank - 2,
|
||||||
|
SparseTensorEncodingAttr::DimLevelType::SingletonNuNo);
|
||||||
|
// TODO: only if all the inputs (for concatentate) are unique at the last
|
||||||
|
// level should the COO has a unique level at the end. Ends by a unordered
|
||||||
|
// unique singleton level.
|
||||||
|
dims.push_back(SparseTensorEncodingAttr::DimLevelType::SingletonNo);
|
||||||
|
// TODO: Maybe pick the bitwidth based on input/output tensors (probably the
|
||||||
|
// largest one among them) in the original operation instead of using the
|
||||||
|
// default value.
|
||||||
|
auto enc = SparseTensorEncodingAttr::get(
|
||||||
|
ctx, dims, AffineMap::getMultiDimIdentityMap(rank, ctx), 0, 0);
|
||||||
|
return RankedTensorType::get(src.getShape(), src.getElementType(), enc);
|
||||||
|
}
|
||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
// The actual sparse tensor rewriting rules.
|
// The actual sparse tensor rewriting rules.
|
||||||
//===---------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
|
@ -296,6 +322,61 @@ public:
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ConcatenateRewriter : public OpRewritePattern<ConcatenateOp> {
|
||||||
|
using OpRewritePattern::OpRewritePattern;
|
||||||
|
LogicalResult matchAndRewrite(ConcatenateOp op,
|
||||||
|
PatternRewriter &rewriter) const override {
|
||||||
|
auto loc = op.getLoc();
|
||||||
|
auto rtp = op.getType().cast<RankedTensorType>();
|
||||||
|
// TODO: Build the output shape if needed.
|
||||||
|
assert(rtp.hasStaticShape());
|
||||||
|
auto rank = rtp.getRank();
|
||||||
|
size_t conDim = op.getDimension().getZExtValue();
|
||||||
|
// %t = concatenate %s1, %s2, %s3 {dim = 1}
|
||||||
|
// ==>
|
||||||
|
// %tmp = bufferization.alloc_tensor : unordered COO
|
||||||
|
// foreach in %s1 : insert d0, d1, %tmp
|
||||||
|
// foreach in %s2 : insert d0, d1 + size(s1), %tmp
|
||||||
|
// foreach in %s3 : insert d0, d1 + size(s1) + size(s2), %tmp
|
||||||
|
// %t = sparse_tensor.cast %tmp
|
||||||
|
auto cooTp = getUnorderedCOOFromType(rtp);
|
||||||
|
auto cooBuffer =
|
||||||
|
rewriter.create<AllocTensorOp>(loc, cooTp, ValueRange()).getResult();
|
||||||
|
|
||||||
|
Value offset = constantIndex(rewriter, loc, 0);
|
||||||
|
for (Value input : op.getInputs()) {
|
||||||
|
// Builds the indexing map.
|
||||||
|
|
||||||
|
// Build a for op for each input tensor to append new values into the
|
||||||
|
// output tensor.
|
||||||
|
rewriter.create<ForeachOp>(
|
||||||
|
loc, input, [&](OpBuilder &builder, Location loc, ValueRange args) {
|
||||||
|
SmallVector<Value, 4> indices;
|
||||||
|
for (int64_t i = 0; i < rank; i++) {
|
||||||
|
uint64_t dim =
|
||||||
|
toStoredDim(getSparseTensorEncoding(input.getType()), i);
|
||||||
|
Value idx = args[dim];
|
||||||
|
if (i == static_cast<int64_t>(conDim))
|
||||||
|
// transform coordinates on matching dim
|
||||||
|
idx = builder.create<arith::AddIOp>(loc, idx, offset);
|
||||||
|
indices.push_back(idx);
|
||||||
|
}
|
||||||
|
builder.create<InsertOp>(loc, args.back(), cooBuffer, indices);
|
||||||
|
builder.create<sparse_tensor::YieldOp>(loc);
|
||||||
|
});
|
||||||
|
// Accumulates the offset. Note that only static-shaped inputs are allowed
|
||||||
|
// by concatenate op verifier, which saves us from computing the offset
|
||||||
|
// dynamically.
|
||||||
|
auto d = input.getType().cast<RankedTensorType>().getShape()[conDim];
|
||||||
|
assert(!ShapedType::isDynamic(d));
|
||||||
|
offset = rewriter.create<arith::AddIOp>(loc, offset,
|
||||||
|
constantIndex(rewriter, loc, d));
|
||||||
|
}
|
||||||
|
rewriter.replaceOpWithNewOp<ConvertOp>(op, rtp, cooBuffer);
|
||||||
|
return success();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
/// Sparse rewriting rule for the foreach operator.
|
/// Sparse rewriting rule for the foreach operator.
|
||||||
struct ForeachRewriter : public OpRewritePattern<ForeachOp> {
|
struct ForeachRewriter : public OpRewritePattern<ForeachOp> {
|
||||||
public:
|
public:
|
||||||
|
@ -363,4 +444,6 @@ void mlir::populateSparseTensorRewriting(RewritePatternSet &patterns,
|
||||||
ReshapeRewriter<tensor::CollapseShapeOp>, ForeachRewriter>(
|
ReshapeRewriter<tensor::CollapseShapeOp>, ForeachRewriter>(
|
||||||
patterns.getContext());
|
patterns.getContext());
|
||||||
// TODO: If RT not enabled, rewrite concatenate ops, etc here.
|
// TODO: If RT not enabled, rewrite concatenate ops, etc here.
|
||||||
|
if (!enableRT)
|
||||||
|
patterns.add<ConcatenateRewriter>(patterns.getContext());
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,81 @@
|
||||||
|
// RUN: mlir-opt %s --sparsification=enable-runtime-library=false | FileCheck %s
|
||||||
|
|
||||||
|
#DCSR = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}>
|
||||||
|
|
||||||
|
// CHECK-LABEL: @concat_sparse_sparse(
|
||||||
|
// CHECK-SAME: %[[TMP_arg0:.*]]: tensor<2x4xf64, #sparse_tensor
|
||||||
|
// CHECK-SAME: %[[TMP_arg1:.*]]: tensor<3x4xf64, #sparse_tensor
|
||||||
|
// CHECK-SAME: %[[TMP_arg2:.*]]: tensor<4x4xf64, #sparse_tensor
|
||||||
|
// CHECK: %[[TMP_c0:.*]] = arith.constant 0 : index
|
||||||
|
// CHECK: %[[TMP_c1:.*]] = arith.constant 1 : index
|
||||||
|
// CHECK: %[[TMP_c5:.*]] = arith.constant 5 : index
|
||||||
|
// CHECK: %[[TMP_c2:.*]] = arith.constant 2 : index
|
||||||
|
// CHECK: %[[TMP_0:.*]] = bufferization.alloc_tensor() : tensor<9x4xf64, #sparse_tensor
|
||||||
|
// CHECK: %[[TMP_1:.*]] = sparse_tensor.pointers %[[TMP_arg0]] {dimension = 0 : index} : tensor<2x4xf64, #sparse_tensor
|
||||||
|
// CHECK: %[[TMP_2:.*]] = sparse_tensor.indices %[[TMP_arg0]] {dimension = 0 : index} : tensor<2x4xf64, #sparse_tensor
|
||||||
|
// CHECK: %[[TMP_3:.*]] = sparse_tensor.pointers %[[TMP_arg0]] {dimension = 1 : index} : tensor<2x4xf64, #sparse_tensor
|
||||||
|
// CHECK: %[[TMP_4:.*]] = sparse_tensor.indices %[[TMP_arg0]] {dimension = 1 : index} : tensor<2x4xf64, #sparse_tensor
|
||||||
|
// CHECK: %[[TMP_5:.*]] = sparse_tensor.values %[[TMP_arg0]] : tensor<2x4xf64, #sparse_tensor
|
||||||
|
// CHECK: %[[TMP_6:.*]] = memref.load %[[TMP_1]][%[[TMP_c0]]] : memref<?xindex>
|
||||||
|
// CHECK: %[[TMP_7:.*]] = memref.load %[[TMP_1]][%[[TMP_c1]]] : memref<?xindex>
|
||||||
|
// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_6]] to %[[TMP_7]] step %[[TMP_c1]] {
|
||||||
|
// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_2]][%[[TMP_arg3]]] : memref<?xindex>
|
||||||
|
// CHECK: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index
|
||||||
|
// CHECK: %[[TMP_25:.*]] = memref.load %[[TMP_3]][%[[TMP_arg3]]] : memref<?xindex>
|
||||||
|
// CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_3]][%[[TMP_24]]] : memref<?xindex>
|
||||||
|
// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] {
|
||||||
|
// CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_4]][%[[TMP_arg4]]] : memref<?xindex>
|
||||||
|
// CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_5]][%[[TMP_arg4]]] : memref<?xf64>
|
||||||
|
// CHECK: sparse_tensor.insert %[[TMP_28]] into %[[TMP_0]][%[[TMP_23]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: %[[TMP_8:.*]] = sparse_tensor.pointers %[[TMP_arg1]] {dimension = 0 : index} : tensor<3x4xf64, #sparse_tensor
|
||||||
|
// CHECK: %[[TMP_9:.*]] = sparse_tensor.indices %[[TMP_arg1]] {dimension = 0 : index} : tensor<3x4xf64, #sparse_tensor
|
||||||
|
// CHECK: %[[TMP_10:.*]] = sparse_tensor.pointers %[[TMP_arg1]] {dimension = 1 : index} : tensor<3x4xf64, #sparse_tensor
|
||||||
|
// CHECK: %[[TMP_11:.*]] = sparse_tensor.indices %[[TMP_arg1]] {dimension = 1 : index} : tensor<3x4xf64, #sparse_tensor
|
||||||
|
// CHECK: %[[TMP_12:.*]] = sparse_tensor.values %[[TMP_arg1]] : tensor<3x4xf64, #sparse_tensor
|
||||||
|
// CHECK: %[[TMP_13:.*]] = memref.load %[[TMP_8]][%[[TMP_c0]]] : memref<?xindex>
|
||||||
|
// CHECK: %[[TMP_14:.*]] = memref.load %[[TMP_8]][%[[TMP_c1]]] : memref<?xindex>
|
||||||
|
// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_13]] to %[[TMP_14]] step %[[TMP_c1]] {
|
||||||
|
// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_9]][%[[TMP_arg3]]] : memref<?xindex>
|
||||||
|
// CHECK: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index
|
||||||
|
// CHECK: %[[TMP_25:.*]] = memref.load %[[TMP_10]][%[[TMP_arg3]]] : memref<?xindex>
|
||||||
|
// CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_10]][%[[TMP_24]]] : memref<?xindex>
|
||||||
|
// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] {
|
||||||
|
// CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_11]][%[[TMP_arg4]]] : memref<?xindex>
|
||||||
|
// CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_12]][%[[TMP_arg4]]] : memref<?xf64>
|
||||||
|
// CHECK: %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c2]] : index
|
||||||
|
// CHECK: sparse_tensor.insert %[[TMP_28]] into %[[TMP_0]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: %[[TMP_15:.*]] = sparse_tensor.pointers %[[TMP_arg2]] {dimension = 0 : index} : tensor<4x4xf64, #sparse_tensor
|
||||||
|
// CHECK: %[[TMP_16:.*]] = sparse_tensor.indices %[[TMP_arg2]] {dimension = 0 : index} : tensor<4x4xf64, #sparse_tensor
|
||||||
|
// CHECK: %[[TMP_17:.*]] = sparse_tensor.pointers %[[TMP_arg2]] {dimension = 1 : index} : tensor<4x4xf64, #sparse_tensor
|
||||||
|
// CHECK: %[[TMP_18:.*]] = sparse_tensor.indices %[[TMP_arg2]] {dimension = 1 : index} : tensor<4x4xf64, #sparse_tensor
|
||||||
|
// CHECK: %[[TMP_19:.*]] = sparse_tensor.values %[[TMP_arg2]] : tensor<4x4xf64, #sparse_tensor
|
||||||
|
// CHECK: %[[TMP_20:.*]] = memref.load %[[TMP_15]][%[[TMP_c0]]] : memref<?xindex>
|
||||||
|
// CHECK: %[[TMP_21:.*]] = memref.load %[[TMP_15]][%[[TMP_c1]]] : memref<?xindex>
|
||||||
|
// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_20]] to %[[TMP_21]] step %[[TMP_c1]] {
|
||||||
|
// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_16]][%[[TMP_arg3]]] : memref<?xindex>
|
||||||
|
// CHECK: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index
|
||||||
|
// CHECK: %[[TMP_25:.*]] = memref.load %[[TMP_17]][%[[TMP_arg3]]] : memref<?xindex>
|
||||||
|
// CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_17]][%[[TMP_24]]] : memref<?xindex>
|
||||||
|
// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] {
|
||||||
|
// CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_18]][%[[TMP_arg4]]] : memref<?xindex>
|
||||||
|
// CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_19]][%[[TMP_arg4]]] : memref<?xf64>
|
||||||
|
// CHECK: %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c5]] : index
|
||||||
|
// CHECK: sparse_tensor.insert %[[TMP_28]] into %[[TMP_0]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: %[[TMP_22:.*]] = sparse_tensor.convert %[[TMP_0]] : tensor<9x4xf64, #sparse_tensor
|
||||||
|
// CHECK: return %[[TMP_22]] : tensor<9x4xf64, #sparse_tensor
|
||||||
|
func.func @concat_sparse_sparse(%arg0: tensor<2x4xf64, #DCSR>,
|
||||||
|
%arg1: tensor<3x4xf64, #DCSR>,
|
||||||
|
%arg2: tensor<4x4xf64, #DCSR>)
|
||||||
|
-> tensor<9x4xf64, #DCSR> {
|
||||||
|
%0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 0 : index}
|
||||||
|
: tensor<2x4xf64, #DCSR>,
|
||||||
|
tensor<3x4xf64, #DCSR>,
|
||||||
|
tensor<4x4xf64, #DCSR> to tensor<9x4xf64, #DCSR>
|
||||||
|
return %0 : tensor<9x4xf64, #DCSR>
|
||||||
|
}
|
Loading…
Reference in New Issue