2021-05-11 01:34:21 +08:00
|
|
|
//===- Sparsification.cpp - Implementation of sparsification --------------===//
|
2020-11-18 04:13:18 +08:00
|
|
|
//
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
2021-07-31 03:59:20 +08:00
|
|
|
// This file implements converting sparse tensor types to actual sparse code.
|
2020-11-18 04:13:18 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2022-01-05 07:06:28 +08:00
|
|
|
#include "CodegenUtils.h"
|
[mlir][sparse] index support in sparse compiler codegen
This revision adds support for the linalg.index to the sparse compiler
pipeline. In essence, this adds the ability to refer to indices in
the tensor index expression, as illustrated below:
Y[i, j, k, l, m] = T[i, j, k, l, m] * i * j
Reviewed By: bixia
Differential Revision: https://reviews.llvm.org/D121251
2022-03-09 05:24:45 +08:00
|
|
|
|
2021-08-19 20:46:12 +08:00
|
|
|
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
2021-10-13 07:14:57 +08:00
|
|
|
#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
|
2022-01-20 17:14:59 +08:00
|
|
|
#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
|
2021-11-25 18:42:16 +08:00
|
|
|
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
|
2022-03-08 11:16:03 +08:00
|
|
|
#include "mlir/Dialect/Func/IR/FuncOps.h"
|
2022-04-26 21:39:29 +08:00
|
|
|
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
|
2021-12-15 20:14:35 +08:00
|
|
|
#include "mlir/Dialect/Linalg/IR/Linalg.h"
|
2020-11-18 04:13:18 +08:00
|
|
|
#include "mlir/Dialect/Linalg/Utils/Utils.h"
|
2021-06-21 16:45:16 +08:00
|
|
|
#include "mlir/Dialect/MemRef/IR/MemRef.h"
|
2020-11-18 04:13:18 +08:00
|
|
|
#include "mlir/Dialect/SCF/SCF.h"
|
2021-08-19 20:46:12 +08:00
|
|
|
#include "mlir/Dialect/SCF/Transforms.h"
|
2021-04-30 05:31:18 +08:00
|
|
|
#include "mlir/Dialect/SparseTensor/IR/SparseTensor.h"
|
2021-05-04 11:55:12 +08:00
|
|
|
#include "mlir/Dialect/SparseTensor/Transforms/Passes.h"
|
2021-06-25 06:18:40 +08:00
|
|
|
#include "mlir/Dialect/SparseTensor/Utils/Merger.h"
|
2022-01-31 18:10:51 +08:00
|
|
|
#include "mlir/Dialect/Vector/IR/VectorOps.h"
|
2021-01-14 02:33:28 +08:00
|
|
|
#include "mlir/IR/Matchers.h"
|
2021-05-11 01:34:21 +08:00
|
|
|
#include "mlir/IR/TensorEncoding.h"
|
2021-05-04 11:55:12 +08:00
|
|
|
#include "llvm/ADT/SmallBitVector.h"
|
2020-11-18 04:13:18 +08:00
|
|
|
|
|
|
|
using namespace mlir;
|
2021-05-11 01:34:21 +08:00
|
|
|
using namespace mlir::sparse_tensor;
|
2020-11-18 04:13:18 +08:00
|
|
|
|
2021-09-22 05:48:49 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Declarations of data structures.
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2020-11-18 04:13:18 +08:00
|
|
|
namespace {
|
|
|
|
|
2021-09-02 06:29:52 +08:00
|
|
|
// Iteration graph sorting.
|
|
|
|
enum SortMask { kSparseOnly = 0x0, kIncludeDense = 0x1, kIncludeUndef = 0x2 };
|
|
|
|
|
2021-09-22 05:48:49 +08:00
|
|
|
// Reduction kinds.
|
2021-11-03 10:58:33 +08:00
|
|
|
enum Reduction { kNoReduc, kSum, kProduct, kAnd, kOr, kXor };
|
2021-09-22 05:48:49 +08:00
|
|
|
|
2020-11-18 04:13:18 +08:00
|
|
|
// Code generation.
|
|
|
|
struct CodeGen {
|
2021-11-12 02:05:01 +08:00
|
|
|
CodeGen(SparsificationOptions o, unsigned numTensors, unsigned numLoops,
|
2021-11-23 05:32:04 +08:00
|
|
|
OpOperand *op, unsigned nest)
|
2020-11-25 07:36:10 +08:00
|
|
|
: options(o), loops(numLoops), sizes(numLoops), buffers(numTensors),
|
2020-11-18 04:13:18 +08:00
|
|
|
pointers(numTensors, std::vector<Value>(numLoops)),
|
|
|
|
indices(numTensors, std::vector<Value>(numLoops)),
|
|
|
|
highs(numTensors, std::vector<Value>(numLoops)),
|
|
|
|
pidxs(numTensors, std::vector<Value>(numLoops)),
|
2022-03-07 18:12:43 +08:00
|
|
|
idxs(numTensors, std::vector<Value>(numLoops)), redVal(), sparseOut(op),
|
|
|
|
outerParNest(nest), lexIdx(), expValues(), expFilled(), expAdded(),
|
|
|
|
expCount(), curVecMask() {}
|
2020-12-18 07:42:23 +08:00
|
|
|
/// Sparsification options.
|
2021-05-11 01:34:21 +08:00
|
|
|
SparsificationOptions options;
|
2020-12-18 07:42:23 +08:00
|
|
|
/// Universal dense indices and upper bounds (by index). The loops array
|
|
|
|
/// is updated with the value of the universal dense index in the current
|
|
|
|
/// loop. The sizes array is set once with the inferred dimension sizes.
|
2020-11-18 04:13:18 +08:00
|
|
|
std::vector<Value> loops;
|
|
|
|
std::vector<Value> sizes;
|
2020-12-18 07:42:23 +08:00
|
|
|
/// Buffers for storing dense and sparse numerical values (by tensor).
|
|
|
|
/// This array is set once during bufferization of all tensors.
|
2020-11-18 04:13:18 +08:00
|
|
|
std::vector<Value> buffers;
|
2020-12-18 07:42:23 +08:00
|
|
|
/// Sparse storage schemes (1-D): pointers and indices (by tensor and index).
|
|
|
|
/// This array is set once during bufferization of all sparse tensors.
|
2020-11-18 04:13:18 +08:00
|
|
|
std::vector<std::vector<Value>> pointers;
|
|
|
|
std::vector<std::vector<Value>> indices;
|
2020-12-18 07:42:23 +08:00
|
|
|
/// Sparse iteration information (by tensor and index). These arrays
|
|
|
|
/// are updated to remain current within the current loop.
|
2020-11-18 04:13:18 +08:00
|
|
|
std::vector<std::vector<Value>> highs;
|
|
|
|
std::vector<std::vector<Value>> pidxs;
|
|
|
|
std::vector<std::vector<Value>> idxs;
|
2020-12-18 07:42:23 +08:00
|
|
|
/// Current reduction, updated during code generation. When indices of a
|
2021-11-03 10:58:33 +08:00
|
|
|
/// reduction are exhausted, all inner loops can use a scalarized reduction.
|
2022-03-07 18:12:43 +08:00
|
|
|
unsigned redExp = -1u;
|
2020-12-18 07:42:23 +08:00
|
|
|
Value redVal;
|
2022-03-07 18:12:43 +08:00
|
|
|
Reduction redKind = kNoReduc;
|
2021-11-23 05:32:04 +08:00
|
|
|
// Sparse tensor as output. Implemented either through direct injective
|
|
|
|
// insertion in lexicographic index order (where indices are updated
|
2021-12-04 08:55:43 +08:00
|
|
|
// in the temporary array `lexIdx`) or through access pattern expansion
|
|
|
|
// in the innermost loop nest (`expValues` through `expCount`).
|
2021-11-12 02:05:01 +08:00
|
|
|
OpOperand *sparseOut;
|
2021-11-23 05:32:04 +08:00
|
|
|
unsigned outerParNest;
|
2021-11-12 02:05:01 +08:00
|
|
|
Value lexIdx;
|
2021-12-04 08:55:43 +08:00
|
|
|
Value expValues;
|
|
|
|
Value expFilled;
|
|
|
|
Value expAdded;
|
|
|
|
Value expCount;
|
2021-01-14 02:33:28 +08:00
|
|
|
// Current vector length and mask.
|
2022-03-07 18:12:43 +08:00
|
|
|
unsigned curVecLength = 1;
|
2021-01-14 02:33:28 +08:00
|
|
|
Value curVecMask;
|
2020-11-18 04:13:18 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
} // namespace
|
|
|
|
|
2021-09-22 05:48:49 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Sparse compiler analysis methods.
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
/// Helper method to apply dimension ordering permutation.
|
|
|
|
static unsigned perm(const SparseTensorEncodingAttr &enc, unsigned d) {
|
[mlir][sparse] add full dimension ordering support
This revision completes the "dimension ordering" feature
of sparse tensor types that enables the programmer to
define a preferred order on dimension access (other than
the default left-to-right order). This enables e.g. selection
of column-major over row-major storage for sparse matrices,
but generalized to any rank, as in:
dimOrdering = affine_map<(i,j,k,l,m,n,o,p) -> (p,o,j,k,i,l,m,n)>
Reviewed By: bixia
Differential Revision: https://reviews.llvm.org/D102856
2021-05-22 02:52:34 +08:00
|
|
|
if (enc) {
|
|
|
|
auto order = enc.getDimOrdering();
|
|
|
|
if (order) {
|
|
|
|
assert(order.isPermutation());
|
|
|
|
return order.getDimPosition(d);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return d;
|
|
|
|
}
|
|
|
|
|
2021-09-22 05:48:49 +08:00
|
|
|
/// Helper method to translate dim level type to internal representation.
|
|
|
|
static Dim toDim(const SparseTensorEncodingAttr &enc, unsigned d) {
|
2021-05-11 01:34:21 +08:00
|
|
|
if (enc) {
|
|
|
|
SparseTensorEncodingAttr::DimLevelType tp = enc.getDimLevelType()[d];
|
|
|
|
if (tp == SparseTensorEncodingAttr::DimLevelType::Compressed)
|
|
|
|
return Dim::kSparse;
|
|
|
|
if (tp == SparseTensorEncodingAttr::DimLevelType::Singleton)
|
|
|
|
return Dim::kSingle;
|
|
|
|
}
|
|
|
|
return Dim::kDense;
|
|
|
|
}
|
|
|
|
|
2021-09-16 06:18:19 +08:00
|
|
|
/// Helper method to inspect affine expressions. Rejects cases where the
|
2021-10-26 11:10:42 +08:00
|
|
|
/// same index is used more than once. Also rejects affine expressions
|
|
|
|
/// that are not a direct index for annotated tensors.
|
|
|
|
// TODO: accept more affine cases for sparse tensors
|
2021-09-16 06:18:19 +08:00
|
|
|
static bool findAffine(Merger &merger, unsigned tensor, AffineExpr a, Dim dim,
|
|
|
|
bool isDense) {
|
|
|
|
switch (a.getKind()) {
|
|
|
|
case AffineExprKind::DimId: {
|
|
|
|
unsigned idx = a.cast<AffineDimExpr>().getPosition();
|
|
|
|
if (!merger.isDim(tensor, idx, Dim::kUndef))
|
|
|
|
return false; // used more than once
|
|
|
|
merger.setDim(tensor, idx, dim);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
case AffineExprKind::Add:
|
|
|
|
case AffineExprKind::Mul: {
|
|
|
|
if (!isDense)
|
|
|
|
return false;
|
|
|
|
auto binOp = a.cast<AffineBinaryOpExpr>();
|
|
|
|
return findAffine(merger, tensor, binOp.getLHS(), dim, isDense) &&
|
|
|
|
findAffine(merger, tensor, binOp.getRHS(), dim, isDense);
|
|
|
|
}
|
|
|
|
case AffineExprKind::Constant:
|
|
|
|
return isDense;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-05-11 01:34:21 +08:00
|
|
|
/// Helper method to inspect sparse encodings in the tensor types.
|
2020-11-18 04:13:18 +08:00
|
|
|
/// Fills the per-dimension sparsity information for all tensors.
|
2021-09-16 06:18:19 +08:00
|
|
|
/// Returns true if the sparse annotations and affine subscript
|
|
|
|
/// expressions of all tensors are admissable. Returns false if
|
|
|
|
/// no annotations are found or inadmissable constructs occur.
|
2021-05-20 01:13:40 +08:00
|
|
|
static bool findSparseAnnotations(Merger &merger, linalg::GenericOp op) {
|
|
|
|
bool annotated = false;
|
2021-06-02 14:21:13 +08:00
|
|
|
for (OpOperand *t : op.getInputAndOutputOperands()) {
|
|
|
|
auto map = op.getTiedIndexingMap(t);
|
|
|
|
auto enc = getSparseTensorEncoding(t->get().getType());
|
2021-06-12 01:33:43 +08:00
|
|
|
if (enc)
|
2021-05-20 01:13:40 +08:00
|
|
|
annotated = true;
|
2021-06-02 14:21:13 +08:00
|
|
|
assert(map.getNumResults() == op.getRank(t));
|
[mlir][sparse] add full dimension ordering support
This revision completes the "dimension ordering" feature
of sparse tensor types that enables the programmer to
define a preferred order on dimension access (other than
the default left-to-right order). This enables e.g. selection
of column-major over row-major storage for sparse matrices,
but generalized to any rank, as in:
dimOrdering = affine_map<(i,j,k,l,m,n,o,p) -> (p,o,j,k,i,l,m,n)>
Reviewed By: bixia
Differential Revision: https://reviews.llvm.org/D102856
2021-05-22 02:52:34 +08:00
|
|
|
for (unsigned d = 0, rank = map.getNumResults(); d < rank; d++) {
|
2021-09-16 06:18:19 +08:00
|
|
|
unsigned tensor = t->getOperandNumber();
|
|
|
|
AffineExpr a = map.getResult(perm(enc, d));
|
|
|
|
if (!findAffine(merger, tensor, a, toDim(enc, d), !enc))
|
|
|
|
return false; // inadmissable affine expression
|
2020-12-18 07:42:23 +08:00
|
|
|
}
|
2020-11-18 04:13:18 +08:00
|
|
|
}
|
2021-05-20 01:13:40 +08:00
|
|
|
return annotated;
|
2020-11-18 04:13:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// A DFS helper to compute a topological sort. Note that recursion is
|
|
|
|
/// bounded by the number of implicit loops, which is always small.
|
|
|
|
/// Returns false when a cycle is detected.
|
|
|
|
static bool topSortDFS(unsigned i, std::vector<unsigned> &visit,
|
|
|
|
std::vector<unsigned> &topSort,
|
|
|
|
std::vector<std::vector<bool>> &adjM) {
|
|
|
|
if (visit[i] != 0)
|
|
|
|
return visit[i] != 1; // 1 denotes cycle!
|
|
|
|
visit[i] = 1;
|
|
|
|
for (unsigned j = 0, e = visit.size(); j < e; j++)
|
|
|
|
if (adjM[i][j])
|
|
|
|
if (!topSortDFS(j, visit, topSort, adjM))
|
|
|
|
return false;
|
|
|
|
visit[i] = 2;
|
|
|
|
topSort.push_back(i);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-09-16 06:18:19 +08:00
|
|
|
/// Helper method to add all constraints from the indices in one affine
|
|
|
|
/// expression before all indices in the other affine expression. For
|
|
|
|
/// example i0+i1 < i2+i3+1 yields i0<i2, i0<i3, i1<i2, and i1<i3.
|
|
|
|
static void addAffineOrderings(std::vector<std::vector<bool>> &adjM,
|
|
|
|
AffineExpr a, AffineExpr b, unsigned fidx) {
|
|
|
|
switch (a.getKind()) {
|
|
|
|
case AffineExprKind::DimId: {
|
|
|
|
unsigned idx = a.cast<AffineDimExpr>().getPosition();
|
|
|
|
if (b)
|
|
|
|
addAffineOrderings(adjM, b, AffineExpr(), idx);
|
|
|
|
else
|
|
|
|
adjM[fidx][idx] = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case AffineExprKind::Add:
|
|
|
|
case AffineExprKind::Mul: {
|
|
|
|
auto binOp = a.cast<AffineBinaryOpExpr>();
|
|
|
|
addAffineOrderings(adjM, binOp.getLHS(), b, fidx);
|
|
|
|
addAffineOrderings(adjM, binOp.getRHS(), b, fidx);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-11-18 04:13:18 +08:00
|
|
|
/// Computes a topologically sorted iteration graph for the linalg operation.
|
|
|
|
/// Ensures all tensors are visited in natural index order. This is essential
|
|
|
|
/// for sparse storage formats since these only support access along fixed
|
|
|
|
/// dimensions. Even for dense storage formats, however, the natural index
|
|
|
|
/// order yields innermost unit-stride access with better spatial locality.
|
2021-01-15 04:04:49 +08:00
|
|
|
static bool computeIterationGraph(Merger &merger, linalg::GenericOp op,
|
|
|
|
std::vector<unsigned> &topSort,
|
2021-09-02 06:29:52 +08:00
|
|
|
unsigned mask) {
|
2020-11-18 04:13:18 +08:00
|
|
|
// Set up an n x n from/to adjacency matrix of the iteration graph
|
|
|
|
// for the implicit loop indices i_0 .. i_n-1.
|
|
|
|
unsigned n = op.getNumLoops();
|
|
|
|
std::vector<std::vector<bool>> adjM(n, std::vector<bool>(n, false));
|
|
|
|
|
|
|
|
// Iterate over the indexing maps of every tensor in the tensor expression.
|
2021-06-02 14:21:13 +08:00
|
|
|
for (OpOperand *t : op.getInputAndOutputOperands()) {
|
|
|
|
auto map = op.getTiedIndexingMap(t);
|
|
|
|
auto enc = getSparseTensorEncoding(t->get().getType());
|
2020-11-18 04:13:18 +08:00
|
|
|
assert(map.getNumDims() == n);
|
2021-09-02 06:29:52 +08:00
|
|
|
// Skip dense tensor constraints when not requested.
|
|
|
|
if (!(mask & SortMask::kIncludeDense) && !enc)
|
2021-01-15 04:04:49 +08:00
|
|
|
continue;
|
[mlir][sparse] add full dimension ordering support
This revision completes the "dimension ordering" feature
of sparse tensor types that enables the programmer to
define a preferred order on dimension access (other than
the default left-to-right order). This enables e.g. selection
of column-major over row-major storage for sparse matrices,
but generalized to any rank, as in:
dimOrdering = affine_map<(i,j,k,l,m,n,o,p) -> (p,o,j,k,i,l,m,n)>
Reviewed By: bixia
Differential Revision: https://reviews.llvm.org/D102856
2021-05-22 02:52:34 +08:00
|
|
|
// Each tensor expression and optional dimension ordering (row-major
|
|
|
|
// by default) puts an ordering constraint on the loop indices. For
|
|
|
|
// example, the tensor expresion A_ijk forces the ordering i < j < k
|
|
|
|
// on the loop indices if no explicit dimension ordering is given.
|
|
|
|
for (unsigned d = 1, rank = map.getNumResults(); d < rank; d++) {
|
2021-09-16 06:18:19 +08:00
|
|
|
AffineExpr f = map.getResult(perm(enc, d - 1));
|
|
|
|
AffineExpr t = map.getResult(perm(enc, d));
|
|
|
|
addAffineOrderings(adjM, f, t, 0);
|
2020-11-18 04:13:18 +08:00
|
|
|
}
|
2021-09-02 06:29:52 +08:00
|
|
|
// Push unrelated loops into sparse iteration space, so these
|
|
|
|
// will be skipped more often.
|
|
|
|
if (mask & SortMask::kIncludeUndef) {
|
|
|
|
unsigned tensor = t->getOperandNumber();
|
|
|
|
for (unsigned i = 0; i < n; i++)
|
|
|
|
if (merger.isDim(tensor, i, Dim::kSparse))
|
|
|
|
for (unsigned j = 0; j < n; j++)
|
|
|
|
if (merger.isDim(tensor, j, Dim::kUndef))
|
|
|
|
adjM[i][j] = true;
|
|
|
|
}
|
2020-11-18 04:13:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Topologically sort the iteration graph to determine loop order.
|
|
|
|
// Report failure for a cyclic iteration graph.
|
2021-01-15 04:04:49 +08:00
|
|
|
topSort.clear();
|
2020-11-18 04:13:18 +08:00
|
|
|
topSort.reserve(n);
|
|
|
|
std::vector<unsigned> visit(n, 0);
|
|
|
|
for (unsigned i = 0; i < n; i++)
|
|
|
|
if (visit[i] == 0)
|
|
|
|
if (!topSortDFS(i, visit, topSort, adjM))
|
|
|
|
return false; // cycle!
|
|
|
|
std::reverse(std::begin(topSort), std::end(topSort));
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-10-26 11:10:42 +08:00
|
|
|
/// Returns true if tensor has an in-place annotation.
|
|
|
|
static bool isInPlace(Value val) {
|
|
|
|
if (auto arg = val.dyn_cast<BlockArgument>())
|
2022-04-19 02:53:47 +08:00
|
|
|
if (auto funcOp = dyn_cast<func::FuncOp>(arg.getOwner()->getParentOp()))
|
2021-10-26 11:10:42 +08:00
|
|
|
if (auto attr = funcOp.getArgAttrOfType<BoolAttr>(
|
2021-11-11 17:50:14 +08:00
|
|
|
arg.getArgNumber(),
|
2022-01-20 17:14:59 +08:00
|
|
|
bufferization::BufferizableOpInterface::kInplaceableAttrName))
|
2021-10-26 11:10:42 +08:00
|
|
|
return attr.getValue();
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-11-12 02:05:01 +08:00
|
|
|
/// Returns true if tensor materializes uninitialized into the computation.
|
2021-10-26 11:10:42 +08:00
|
|
|
static bool isMaterializing(Value val) {
|
|
|
|
return val.getDefiningOp<linalg::InitTensorOp>() ||
|
2022-06-02 05:59:58 +08:00
|
|
|
val.getDefiningOp<bufferization::AllocTensorOp>();
|
2021-10-26 11:10:42 +08:00
|
|
|
}
|
|
|
|
|
2021-06-19 07:24:55 +08:00
|
|
|
/// Returns true when the tensor expression is admissable for codegen.
|
|
|
|
/// Since all sparse input tensors are admissable, we just need to check
|
2021-11-23 05:32:04 +08:00
|
|
|
/// whether the out tensor in the tensor expression codegen is admissable.
|
|
|
|
/// Sets `sparseOut` to the tensor and `outerParNest` to the outer injective
|
|
|
|
/// nesting depth when a "truly dynamic" sparse tensor output occurs.
|
2021-06-19 07:24:55 +08:00
|
|
|
static bool isAdmissableTensorExp(Merger &merger, linalg::GenericOp op,
|
2021-11-23 05:32:04 +08:00
|
|
|
std::vector<unsigned> &topSort, unsigned exp,
|
|
|
|
OpOperand **sparseOut,
|
|
|
|
unsigned &outerParNest) {
|
2021-06-19 07:24:55 +08:00
|
|
|
OpOperand *lhs = op.getOutputOperand(0);
|
|
|
|
unsigned tensor = lhs->getOperandNumber();
|
|
|
|
auto enc = getSparseTensorEncoding(lhs->get().getType());
|
|
|
|
// An non-annotated output tensor is assumed dense, and becomes a random
|
2021-09-16 06:18:19 +08:00
|
|
|
// access n-dim memref. Admissable since insertions cannot occur.
|
2021-06-19 07:24:55 +08:00
|
|
|
if (!enc)
|
|
|
|
return true;
|
|
|
|
// An all-dense annotated "sparse" output tensor becomes a linearized random
|
|
|
|
// access 1-dim memref. Also admissable since insertions cannot occur.
|
|
|
|
bool allDense = true;
|
2021-11-23 05:32:04 +08:00
|
|
|
auto iteratorTypes = op.iterator_types().getValue();
|
|
|
|
unsigned numLoops = iteratorTypes.size();
|
2021-06-19 07:24:55 +08:00
|
|
|
for (unsigned i = 0; i < numLoops; i++)
|
|
|
|
if (merger.isDim(tensor, i, Dim::kSparse)) {
|
|
|
|
allDense = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (allDense)
|
|
|
|
return true;
|
|
|
|
// A tensor expression with a sparse output tensor that changes its values
|
|
|
|
// but not its nonzero structure, an operation called "simply dynamic" in
|
2021-10-26 11:10:42 +08:00
|
|
|
// [Bik96,Ch9], is also admissable without special codegen, provided
|
|
|
|
// the tensor's underlying sparse storage scheme can be modified in place.
|
2021-12-01 02:58:13 +08:00
|
|
|
if (merger.isSingleCondition(tensor, exp) && isInPlace(lhs->get()))
|
2021-11-12 02:05:01 +08:00
|
|
|
return true;
|
|
|
|
// Accept "truly dynamic" if the output tensor materializes uninitialized
|
|
|
|
// into the computation and insertions occur in lexicographic index order.
|
|
|
|
if (isMaterializing(lhs->get())) {
|
2021-11-23 05:32:04 +08:00
|
|
|
unsigned nest = 0;
|
|
|
|
for (unsigned i = 0; i < numLoops; i++) {
|
|
|
|
if (isReductionIterator(iteratorTypes[topSort[i]]))
|
|
|
|
break; // terminate at first reduction
|
|
|
|
nest++;
|
|
|
|
}
|
|
|
|
// Determine admissable dynamic insertion situations:
|
|
|
|
// (1) fully injective, since there are no reductions,
|
2021-12-04 08:55:43 +08:00
|
|
|
// (2) admissable 1-d expansion in innermost dimension.
|
|
|
|
if (nest >= op.getRank(lhs) - 1) {
|
2021-11-23 05:32:04 +08:00
|
|
|
*sparseOut = lhs;
|
|
|
|
outerParNest = nest;
|
|
|
|
return true;
|
|
|
|
}
|
2021-11-12 02:05:01 +08:00
|
|
|
}
|
2021-06-19 07:24:55 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-09-22 05:48:49 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
2021-11-03 10:58:33 +08:00
|
|
|
// Sparse compiler synthesis methods (reductions).
|
2021-09-22 05:48:49 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2022-02-10 18:12:46 +08:00
|
|
|
/// Maps reduction kind to vector::CombiningKind.
|
|
|
|
static vector::CombiningKind getCombiningKind(Reduction kind) {
|
2021-09-22 05:48:49 +08:00
|
|
|
switch (kind) {
|
2021-11-03 10:58:33 +08:00
|
|
|
case kNoReduc:
|
|
|
|
break;
|
2021-09-22 05:48:49 +08:00
|
|
|
case kSum:
|
2022-02-10 18:12:46 +08:00
|
|
|
return vector::CombiningKind::ADD;
|
2021-09-22 05:48:49 +08:00
|
|
|
case kProduct:
|
2022-02-10 18:12:46 +08:00
|
|
|
return vector::CombiningKind::MUL;
|
2021-09-22 05:48:49 +08:00
|
|
|
case kAnd:
|
2022-02-10 18:12:46 +08:00
|
|
|
return vector::CombiningKind::AND;
|
2021-09-22 05:48:49 +08:00
|
|
|
case kOr:
|
2022-02-10 18:12:46 +08:00
|
|
|
return vector::CombiningKind::OR;
|
2021-09-22 05:48:49 +08:00
|
|
|
case kXor:
|
2022-02-10 18:12:46 +08:00
|
|
|
return vector::CombiningKind::XOR;
|
2021-09-22 05:48:49 +08:00
|
|
|
}
|
|
|
|
llvm_unreachable("unknown reduction kind");
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Maps operation to reduction.
|
|
|
|
static Reduction getReduction(Kind kind) {
|
|
|
|
switch (kind) {
|
|
|
|
case Kind::kAddF:
|
2022-05-25 02:49:47 +08:00
|
|
|
case Kind::kAddC:
|
2021-09-22 05:48:49 +08:00
|
|
|
case Kind::kAddI:
|
|
|
|
case Kind::kSubF:
|
2022-05-25 02:49:47 +08:00
|
|
|
case Kind::kSubC:
|
2021-09-22 05:48:49 +08:00
|
|
|
case Kind::kSubI:
|
|
|
|
return kSum;
|
|
|
|
case Kind::kMulF:
|
2022-05-25 02:49:47 +08:00
|
|
|
case Kind::kMulC:
|
2021-09-22 05:48:49 +08:00
|
|
|
case Kind::kMulI:
|
|
|
|
return kProduct;
|
|
|
|
case Kind::kAndI:
|
|
|
|
return kAnd;
|
|
|
|
case Kind::kOrI:
|
|
|
|
return kOr;
|
|
|
|
case Kind::kXorI:
|
|
|
|
return kXor;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("unexpected reduction operator");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-11-03 10:58:33 +08:00
|
|
|
/// Generates an initial value for a vector reduction, following the scheme
|
2021-09-22 05:48:49 +08:00
|
|
|
/// given in Chapter 5 of "The Software Vectorization Handbook", where the
|
|
|
|
/// initial scalar value is correctly embedded in the vector reduction value,
|
|
|
|
/// and a straightforward horizontal reduction will complete the operation.
|
2022-05-13 17:32:14 +08:00
|
|
|
static Value genVectorReducInit(CodeGen &codegen, OpBuilder &builder,
|
2021-11-03 10:58:33 +08:00
|
|
|
Location loc, VectorType vtp) {
|
|
|
|
Value r = codegen.redVal;
|
|
|
|
switch (codegen.redKind) {
|
|
|
|
case kNoReduc:
|
|
|
|
break;
|
2021-09-22 05:48:49 +08:00
|
|
|
case kSum:
|
2022-01-05 07:06:28 +08:00
|
|
|
case kXor:
|
2021-09-22 05:48:49 +08:00
|
|
|
// Initialize reduction vector to: | 0 | .. | 0 | r |
|
2022-05-13 17:32:14 +08:00
|
|
|
return builder.create<vector::InsertElementOp>(
|
|
|
|
loc, r, constantZero(builder, loc, vtp),
|
|
|
|
constantIndex(builder, loc, 0));
|
2022-01-05 07:06:28 +08:00
|
|
|
case kProduct:
|
2021-09-22 05:48:49 +08:00
|
|
|
// Initialize reduction vector to: | 1 | .. | 1 | r |
|
2022-05-13 17:32:14 +08:00
|
|
|
return builder.create<vector::InsertElementOp>(
|
|
|
|
loc, r, constantOne(builder, loc, vtp), constantIndex(builder, loc, 0));
|
2021-09-22 05:48:49 +08:00
|
|
|
case kAnd:
|
|
|
|
case kOr:
|
|
|
|
// Initialize reduction vector to: | r | .. | r | r |
|
2022-05-13 17:32:14 +08:00
|
|
|
return builder.create<vector::BroadcastOp>(loc, vtp, r);
|
2021-09-22 05:48:49 +08:00
|
|
|
}
|
|
|
|
llvm_unreachable("unknown reduction kind");
|
|
|
|
}
|
|
|
|
|
2021-11-03 10:58:33 +08:00
|
|
|
/// Generates final value for a vector reduction.
|
2022-05-13 17:32:14 +08:00
|
|
|
static Value genVectorReducEnd(CodeGen &codegen, OpBuilder &builder,
|
2021-11-03 10:58:33 +08:00
|
|
|
Location loc, VectorType vtp) {
|
2022-02-10 18:12:46 +08:00
|
|
|
vector::CombiningKind kind = getCombiningKind(codegen.redKind);
|
2022-05-13 17:32:14 +08:00
|
|
|
return builder.create<vector::ReductionOp>(loc, kind, codegen.redVal);
|
2021-11-03 10:58:33 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Updates scalarized reduction value.
|
|
|
|
static void updateReduc(Merger &merger, CodeGen &codegen, Value reduc) {
|
|
|
|
assert(codegen.redKind != kNoReduc);
|
|
|
|
codegen.redVal = merger.exp(codegen.redExp).val = reduc;
|
|
|
|
}
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Sparse compiler synthesis methods (statements and expressions).
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2021-09-25 04:15:17 +08:00
|
|
|
/// Generates buffer for the output tensor. Note that all sparse kernels
|
|
|
|
/// assume that when all elements are written to (viz. x(i) = y(i) * z(i)),
|
|
|
|
/// the output buffer is already initialized to all zeroes and only nonzeroes
|
|
|
|
/// values are computed and written out. For updates (viz. x(i) += y(i) * z(i)),
|
|
|
|
/// only nonzeroes values are used for the updates and no assumption on the
|
|
|
|
/// original contents of the output buffer is necessary..
|
2022-05-13 17:32:14 +08:00
|
|
|
static Value genOutputBuffer(CodeGen &codegen, OpBuilder &builder,
|
2021-02-10 08:22:22 +08:00
|
|
|
linalg::GenericOp op, MemRefType denseTp,
|
|
|
|
ArrayRef<Value> args) {
|
|
|
|
Location loc = op.getLoc();
|
2021-06-02 14:21:13 +08:00
|
|
|
Value tensor = op.getOutputOperand(0)->get();
|
2021-02-10 08:22:22 +08:00
|
|
|
// The output tensor simply could materialize from the buffer that will
|
|
|
|
// be generated for the tensor present in the outs() clause. This has
|
|
|
|
// the major advantage that the sparse kernel only updates the nonzero
|
2021-05-15 12:02:31 +08:00
|
|
|
// positions for the output tensor.
|
2021-10-26 11:10:42 +08:00
|
|
|
if (isInPlace(tensor))
|
2022-05-13 17:32:14 +08:00
|
|
|
return builder.create<bufferization::ToMemrefOp>(loc, denseTp, tensor);
|
2021-02-10 08:22:22 +08:00
|
|
|
// By default, a new buffer is allocated which is initialized to the
|
|
|
|
// tensor defined in the outs() clause. This is always correct but
|
|
|
|
// introduces a dense initialization component that may negatively
|
2021-09-25 04:15:17 +08:00
|
|
|
// impact the running complexity of the sparse kernel. If the tensor
|
2021-10-26 11:10:42 +08:00
|
|
|
// materializes into the computation, we need to preserve the zero
|
2021-09-25 04:15:17 +08:00
|
|
|
// initialization assumption of all sparse output buffers.
|
2022-05-13 17:32:14 +08:00
|
|
|
Value alloc = builder.create<memref::AllocOp>(loc, denseTp, args);
|
2021-10-26 11:10:42 +08:00
|
|
|
if (isMaterializing(tensor)) {
|
2022-05-13 17:32:14 +08:00
|
|
|
Value zero = constantZero(builder, loc, denseTp.getElementType());
|
|
|
|
builder.create<linalg::FillOp>(loc, ValueRange{zero}, ValueRange{alloc});
|
2022-02-17 08:30:46 +08:00
|
|
|
} else {
|
|
|
|
Value init =
|
2022-05-13 17:32:14 +08:00
|
|
|
builder.create<bufferization::ToMemrefOp>(loc, denseTp, tensor);
|
|
|
|
builder.create<memref::CopyOp>(loc, init, alloc);
|
2021-09-25 04:15:17 +08:00
|
|
|
}
|
2021-02-10 08:22:22 +08:00
|
|
|
return alloc;
|
|
|
|
}
|
|
|
|
|
2020-11-18 04:13:18 +08:00
|
|
|
/// Local bufferization of all dense and sparse data structures.
|
|
|
|
/// This code enables testing the first prototype sparse compiler.
|
|
|
|
// TODO: replace this with a proliferated bufferization strategy
|
2022-05-13 17:32:14 +08:00
|
|
|
static void genBuffers(Merger &merger, CodeGen &codegen, OpBuilder &builder,
|
|
|
|
linalg::GenericOp op) {
|
2020-11-18 04:13:18 +08:00
|
|
|
Location loc = op.getLoc();
|
2021-06-02 14:21:13 +08:00
|
|
|
assert(op.getNumInputsAndOutputs() == op.getNumInputs() + 1);
|
2020-11-18 04:13:18 +08:00
|
|
|
// For every tensor, find lower and upper bound on dimensions, set the
|
2021-02-10 08:22:22 +08:00
|
|
|
// same bounds on loop indices, and obtain dense or sparse buffer(s).
|
2020-11-18 04:13:18 +08:00
|
|
|
SmallVector<Value, 4> args;
|
2021-06-02 14:21:13 +08:00
|
|
|
for (OpOperand *t : op.getInputAndOutputOperands()) {
|
2021-06-12 01:33:43 +08:00
|
|
|
unsigned tensor = t->getOperandNumber();
|
2021-06-02 14:21:13 +08:00
|
|
|
auto shape = op.getShape(t);
|
|
|
|
auto map = op.getTiedIndexingMap(t);
|
|
|
|
auto enc = getSparseTensorEncoding(t->get().getType());
|
2020-11-18 04:13:18 +08:00
|
|
|
// Scan all dimensions of current tensor.
|
|
|
|
args.clear();
|
[mlir][sparse] add full dimension ordering support
This revision completes the "dimension ordering" feature
of sparse tensor types that enables the programmer to
define a preferred order on dimension access (other than
the default left-to-right order). This enables e.g. selection
of column-major over row-major storage for sparse matrices,
but generalized to any rank, as in:
dimOrdering = affine_map<(i,j,k,l,m,n,o,p) -> (p,o,j,k,i,l,m,n)>
Reviewed By: bixia
Differential Revision: https://reviews.llvm.org/D102856
2021-05-22 02:52:34 +08:00
|
|
|
for (unsigned d = 0, rank = map.getNumResults(); d < rank; d++) {
|
2021-09-16 06:18:19 +08:00
|
|
|
AffineExpr a = map.getResult(perm(enc, d));
|
|
|
|
if (a.getKind() != AffineExprKind::DimId)
|
|
|
|
continue; // compound
|
|
|
|
unsigned idx = a.cast<AffineDimExpr>().getPosition();
|
2020-11-18 04:13:18 +08:00
|
|
|
// Handle sparse storage schemes.
|
2021-06-12 01:33:43 +08:00
|
|
|
if (merger.isDim(tensor, idx, Dim::kSparse)) {
|
2020-11-26 04:29:05 +08:00
|
|
|
auto dynShape = {ShapedType::kDynamicSize};
|
2022-01-05 07:06:28 +08:00
|
|
|
auto ptrTp =
|
2022-05-13 17:32:14 +08:00
|
|
|
MemRefType::get(dynShape, getPointerOverheadType(builder, enc));
|
2022-01-05 07:06:28 +08:00
|
|
|
auto indTp =
|
2022-05-13 17:32:14 +08:00
|
|
|
MemRefType::get(dynShape, getIndexOverheadType(builder, enc));
|
|
|
|
Value dim = constantIndex(builder, loc, d);
|
2021-02-10 08:22:22 +08:00
|
|
|
// Generate sparse primitives to obtains pointer and indices.
|
2021-06-12 01:33:43 +08:00
|
|
|
codegen.pointers[tensor][idx] =
|
2022-05-13 17:32:14 +08:00
|
|
|
builder.create<ToPointersOp>(loc, ptrTp, t->get(), dim);
|
2021-06-12 01:33:43 +08:00
|
|
|
codegen.indices[tensor][idx] =
|
2022-05-13 17:32:14 +08:00
|
|
|
builder.create<ToIndicesOp>(loc, indTp, t->get(), dim);
|
2020-11-18 04:13:18 +08:00
|
|
|
}
|
2021-08-19 01:39:14 +08:00
|
|
|
// Find upper bound in current dimension.
|
2021-08-04 02:09:31 +08:00
|
|
|
unsigned p = perm(enc, d);
|
2022-05-13 17:32:14 +08:00
|
|
|
Value up = linalg::createOrFoldDimOp(builder, loc, t->get(), p);
|
2022-01-11 02:55:57 +08:00
|
|
|
if (ShapedType::isDynamic(shape[p]))
|
2020-11-18 04:13:18 +08:00
|
|
|
args.push_back(up);
|
2021-08-04 02:09:31 +08:00
|
|
|
assert(codegen.highs[tensor][idx] == nullptr);
|
2021-06-12 01:33:43 +08:00
|
|
|
codegen.sizes[idx] = codegen.highs[tensor][idx] = up;
|
2020-11-18 04:13:18 +08:00
|
|
|
}
|
2021-06-12 01:33:43 +08:00
|
|
|
// Perform the required bufferization. Dense inputs materialize
|
|
|
|
// from the input tensors. Dense outputs need special handling.
|
|
|
|
// Sparse inputs use sparse primitives to obtain the values.
|
|
|
|
// We also accept in-place all-dense annotated "sparse" outputs.
|
2021-06-02 14:21:13 +08:00
|
|
|
Type elementType = getElementTypeOrSelf(t->get().getType());
|
2021-05-11 01:34:21 +08:00
|
|
|
if (!enc) {
|
2021-06-12 01:33:43 +08:00
|
|
|
// Non-annotated dense tensors.
|
2021-06-02 14:21:13 +08:00
|
|
|
auto denseTp = MemRefType::get(shape, elementType);
|
2021-06-12 01:33:43 +08:00
|
|
|
if (tensor < op.getNumInputs())
|
|
|
|
codegen.buffers[tensor] =
|
2022-05-13 17:32:14 +08:00
|
|
|
builder.create<bufferization::ToMemrefOp>(loc, denseTp, t->get());
|
2021-02-10 08:22:22 +08:00
|
|
|
else
|
2021-06-12 01:33:43 +08:00
|
|
|
codegen.buffers[tensor] =
|
2022-05-13 17:32:14 +08:00
|
|
|
genOutputBuffer(codegen, builder, op, denseTp, args);
|
2021-11-12 02:05:01 +08:00
|
|
|
} else if (t == codegen.sparseOut) {
|
|
|
|
// True sparse output needs a lexIdx array.
|
2022-05-13 17:32:14 +08:00
|
|
|
Value rank = constantIndex(builder, loc, op.getRank(t));
|
2021-11-12 02:05:01 +08:00
|
|
|
auto dynShape = {ShapedType::kDynamicSize};
|
2022-05-13 17:32:14 +08:00
|
|
|
auto memTp = MemRefType::get(dynShape, builder.getIndexType());
|
|
|
|
codegen.lexIdx = builder.create<memref::AllocaOp>(loc, memTp, rank);
|
2020-11-18 04:13:18 +08:00
|
|
|
} else {
|
2021-06-12 01:33:43 +08:00
|
|
|
// Annotated sparse tensors.
|
2021-02-10 08:22:22 +08:00
|
|
|
auto dynShape = {ShapedType::kDynamicSize};
|
2021-06-02 14:21:13 +08:00
|
|
|
auto sparseTp = MemRefType::get(dynShape, elementType);
|
2021-06-12 01:33:43 +08:00
|
|
|
codegen.buffers[tensor] =
|
2022-05-13 17:32:14 +08:00
|
|
|
builder.create<ToValuesOp>(loc, sparseTp, t->get());
|
2020-11-18 04:13:18 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-02 07:23:17 +08:00
|
|
|
/// Constructs vector type.
|
|
|
|
static VectorType vectorType(CodeGen &codegen, Type etp) {
|
2021-12-02 23:09:33 +08:00
|
|
|
unsigned numScalableDims = codegen.options.enableVLAVectorization;
|
|
|
|
return VectorType::get(codegen.curVecLength, etp, numScalableDims);
|
2021-04-02 07:23:17 +08:00
|
|
|
}
|
|
|
|
|
2021-01-14 02:33:28 +08:00
|
|
|
/// Constructs vector type from pointer.
|
|
|
|
static VectorType vectorType(CodeGen &codegen, Value ptr) {
|
2021-04-02 07:23:17 +08:00
|
|
|
return vectorType(codegen, ptr.getType().cast<MemRefType>().getElementType());
|
2021-01-14 02:33:28 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Constructs vector iteration mask.
|
2022-05-13 17:32:14 +08:00
|
|
|
static Value genVectorMask(CodeGen &codegen, OpBuilder &builder, Value iv,
|
|
|
|
Value lo, Value hi, Value step) {
|
2021-01-14 02:33:28 +08:00
|
|
|
Location loc = iv.getLoc();
|
2022-05-13 17:32:14 +08:00
|
|
|
VectorType mtp = vectorType(codegen, builder.getI1Type());
|
2021-01-14 02:33:28 +08:00
|
|
|
// Special case if the vector length evenly divides the trip count (for
|
|
|
|
// example, "for i = 0, 128, 16"). A constant all-true mask is generated
|
|
|
|
// so that all subsequent masked memory operations are immediately folded
|
|
|
|
// into unconditional memory operations.
|
|
|
|
IntegerAttr loInt, hiInt, stepInt;
|
|
|
|
if (matchPattern(lo, m_Constant(&loInt)) &&
|
|
|
|
matchPattern(hi, m_Constant(&hiInt)) &&
|
|
|
|
matchPattern(step, m_Constant(&stepInt))) {
|
|
|
|
if (((hiInt.getInt() - loInt.getInt()) % stepInt.getInt()) == 0)
|
2022-05-13 17:32:14 +08:00
|
|
|
return builder.create<vector::BroadcastOp>(
|
|
|
|
loc, mtp, constantI1(builder, loc, true));
|
2021-01-14 02:33:28 +08:00
|
|
|
}
|
|
|
|
// Otherwise, generate a vector mask that avoids overrunning the upperbound
|
|
|
|
// during vector execution. Here we rely on subsequent loop optimizations to
|
|
|
|
// avoid executing the mask in all iterations, for example, by splitting the
|
|
|
|
// loop into an unconditional vector loop and a scalar cleanup loop.
|
2021-08-19 20:46:12 +08:00
|
|
|
auto minMap = AffineMap::get(
|
|
|
|
/*dimCount=*/2, /*symbolCount=*/1,
|
2022-05-13 17:32:14 +08:00
|
|
|
{builder.getAffineSymbolExpr(0),
|
|
|
|
builder.getAffineDimExpr(0) - builder.getAffineDimExpr(1)},
|
|
|
|
builder.getContext());
|
2021-08-19 20:46:12 +08:00
|
|
|
Value end =
|
2022-05-13 17:32:14 +08:00
|
|
|
builder.createOrFold<AffineMinOp>(loc, minMap, ValueRange{hi, iv, step});
|
|
|
|
return builder.create<vector::CreateMaskOp>(loc, mtp, end);
|
2021-01-14 02:33:28 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Generates a vectorized load lhs = a[ind[lo:hi]] or lhs = a[lo:hi].
|
2022-05-13 17:32:14 +08:00
|
|
|
static Value genVectorLoad(CodeGen &codegen, OpBuilder &builder, Value ptr,
|
|
|
|
ArrayRef<Value> args) {
|
2021-01-14 02:33:28 +08:00
|
|
|
Location loc = ptr.getLoc();
|
|
|
|
VectorType vtp = vectorType(codegen, ptr);
|
2022-05-13 17:32:14 +08:00
|
|
|
Value pass = constantZero(builder, loc, vtp);
|
2021-02-26 10:04:39 +08:00
|
|
|
if (args.back().getType().isa<VectorType>()) {
|
|
|
|
SmallVector<Value, 4> scalarArgs(args.begin(), args.end());
|
|
|
|
Value indexVec = args.back();
|
2022-05-13 17:32:14 +08:00
|
|
|
scalarArgs.back() = constantIndex(builder, loc, 0);
|
|
|
|
return builder.create<vector::GatherOp>(loc, vtp, ptr, scalarArgs, indexVec,
|
|
|
|
codegen.curVecMask, pass);
|
2021-02-26 10:04:39 +08:00
|
|
|
}
|
2022-05-13 17:32:14 +08:00
|
|
|
return builder.create<vector::MaskedLoadOp>(loc, vtp, ptr, args,
|
|
|
|
codegen.curVecMask, pass);
|
2021-01-14 02:33:28 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Generates a vectorized store a[ind[lo:hi]] = rhs or a[lo:hi] = rhs.
|
2022-05-13 17:32:14 +08:00
|
|
|
static void genVectorStore(CodeGen &codegen, OpBuilder &builder, Value rhs,
|
|
|
|
Value ptr, ArrayRef<Value> args) {
|
2021-01-14 02:33:28 +08:00
|
|
|
Location loc = ptr.getLoc();
|
2021-02-26 10:04:39 +08:00
|
|
|
if (args.back().getType().isa<VectorType>()) {
|
|
|
|
SmallVector<Value, 4> scalarArgs(args.begin(), args.end());
|
|
|
|
Value indexVec = args.back();
|
2022-05-13 17:32:14 +08:00
|
|
|
scalarArgs.back() = constantIndex(builder, loc, 0);
|
|
|
|
builder.create<vector::ScatterOp>(loc, ptr, scalarArgs, indexVec,
|
|
|
|
codegen.curVecMask, rhs);
|
2021-02-26 10:04:39 +08:00
|
|
|
return;
|
|
|
|
}
|
2022-05-13 17:32:14 +08:00
|
|
|
builder.create<vector::MaskedStoreOp>(loc, ptr, args, codegen.curVecMask,
|
|
|
|
rhs);
|
2021-01-14 02:33:28 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Generates a vectorized invariant. Here we rely on subsequent loop
|
|
|
|
/// optimizations to hoist the invariant broadcast out of the vector loop.
|
2022-05-13 17:32:14 +08:00
|
|
|
static Value genVectorInvariantValue(CodeGen &codegen, OpBuilder &builder,
|
|
|
|
Value val) {
|
2021-04-02 07:23:17 +08:00
|
|
|
VectorType vtp = vectorType(codegen, val.getType());
|
2022-05-13 17:32:14 +08:00
|
|
|
return builder.create<vector::BroadcastOp>(val.getLoc(), vtp, val);
|
2021-01-14 02:33:28 +08:00
|
|
|
}
|
|
|
|
|
2021-09-16 06:18:19 +08:00
|
|
|
/// Generates an affine expression.
|
|
|
|
//
|
|
|
|
// TODO: generalize for sparse tensor subscripts
|
|
|
|
//
|
2022-05-13 17:32:14 +08:00
|
|
|
static Value genAffine(CodeGen &codegen, OpBuilder &builder, AffineExpr a,
|
|
|
|
Location loc) {
|
2021-09-16 06:18:19 +08:00
|
|
|
switch (a.getKind()) {
|
|
|
|
case AffineExprKind::DimId: {
|
|
|
|
unsigned idx = a.cast<AffineDimExpr>().getPosition();
|
|
|
|
return codegen.loops[idx]; // universal dense index
|
|
|
|
}
|
|
|
|
case AffineExprKind::Add: {
|
|
|
|
auto binOp = a.cast<AffineBinaryOpExpr>();
|
2022-05-13 17:32:14 +08:00
|
|
|
return builder.create<arith::AddIOp>(
|
|
|
|
loc, genAffine(codegen, builder, binOp.getLHS(), loc),
|
|
|
|
genAffine(codegen, builder, binOp.getRHS(), loc));
|
2021-09-16 06:18:19 +08:00
|
|
|
}
|
|
|
|
case AffineExprKind::Mul: {
|
|
|
|
auto binOp = a.cast<AffineBinaryOpExpr>();
|
2022-05-13 17:32:14 +08:00
|
|
|
return builder.create<arith::MulIOp>(
|
|
|
|
loc, genAffine(codegen, builder, binOp.getLHS(), loc),
|
|
|
|
genAffine(codegen, builder, binOp.getRHS(), loc));
|
2021-09-16 06:18:19 +08:00
|
|
|
}
|
|
|
|
case AffineExprKind::Constant: {
|
|
|
|
int64_t c = a.cast<AffineConstantExpr>().getValue();
|
2022-05-13 17:32:14 +08:00
|
|
|
return constantIndex(builder, loc, c);
|
2021-09-16 06:18:19 +08:00
|
|
|
}
|
|
|
|
default:
|
|
|
|
llvm_unreachable("unexpected affine subscript");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-12-04 08:55:43 +08:00
|
|
|
/// Generates index for load/store on sparse tensor.
|
|
|
|
static Value genIndex(CodeGen &codegen, linalg::GenericOp op, OpOperand *t) {
|
|
|
|
auto map = op.getTiedIndexingMap(t);
|
|
|
|
auto enc = getSparseTensorEncoding(t->get().getType());
|
|
|
|
AffineExpr a = map.getResult(perm(enc, map.getNumResults() - 1));
|
|
|
|
assert(a.getKind() == AffineExprKind::DimId);
|
|
|
|
unsigned idx = a.cast<AffineDimExpr>().getPosition();
|
|
|
|
return codegen.loops[idx];
|
|
|
|
}
|
|
|
|
|
2021-09-16 06:18:19 +08:00
|
|
|
/// Generates subscript for load/store on a dense or sparse tensor.
|
2022-05-13 17:32:14 +08:00
|
|
|
static Value genSubscript(CodeGen &codegen, OpBuilder &builder,
|
2021-09-16 06:18:19 +08:00
|
|
|
linalg::GenericOp op, OpOperand *t,
|
|
|
|
SmallVector<Value, 4> &args) {
|
|
|
|
unsigned tensor = t->getOperandNumber();
|
|
|
|
auto map = op.getTiedIndexingMap(t);
|
|
|
|
auto enc = getSparseTensorEncoding(t->get().getType());
|
|
|
|
unsigned rank = map.getNumResults();
|
|
|
|
if (enc) {
|
|
|
|
// Note that currently, all sparse subscripts are simple.
|
|
|
|
// TODO: accept affine too?
|
2021-10-26 11:10:42 +08:00
|
|
|
AffineExpr a = map.getResult(perm(enc, rank - 1));
|
|
|
|
assert(a.getKind() == AffineExprKind::DimId);
|
|
|
|
unsigned idx = a.cast<AffineDimExpr>().getPosition();
|
2021-09-16 06:18:19 +08:00
|
|
|
assert(codegen.pidxs[tensor][idx] != nullptr);
|
|
|
|
args.push_back(codegen.pidxs[tensor][idx]); // position index
|
|
|
|
} else {
|
|
|
|
for (unsigned d = 0; d < rank; d++) {
|
|
|
|
AffineExpr a = map.getResult(perm(enc, d));
|
2022-05-13 17:32:14 +08:00
|
|
|
args.push_back(genAffine(codegen, builder, a, op.getLoc()));
|
2021-09-16 06:18:19 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return codegen.buffers[tensor];
|
|
|
|
}
|
|
|
|
|
2021-12-04 08:55:43 +08:00
|
|
|
/// Generates insertion code to implement dynamic tensor load.
|
2022-05-13 17:32:14 +08:00
|
|
|
static Value genInsertionLoad(CodeGen &codegen, OpBuilder &builder,
|
2021-12-04 08:55:43 +08:00
|
|
|
linalg::GenericOp op, OpOperand *t) {
|
|
|
|
Location loc = op.getLoc();
|
|
|
|
// Direct lexicographic index order, tensor loads as zero.
|
|
|
|
if (!codegen.expValues) {
|
|
|
|
Type tp = getElementTypeOrSelf(t->get().getType());
|
2022-05-13 17:32:14 +08:00
|
|
|
return constantZero(builder, loc, tp);
|
2021-12-04 08:55:43 +08:00
|
|
|
}
|
|
|
|
// Load from expanded access pattern.
|
|
|
|
Value index = genIndex(codegen, op, t);
|
2022-05-13 17:32:14 +08:00
|
|
|
return builder.create<memref::LoadOp>(loc, codegen.expValues, index);
|
2021-12-04 08:55:43 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Generates insertion code to implement dynamic tensor store.
|
2022-05-13 17:32:14 +08:00
|
|
|
static void genInsertionStore(CodeGen &codegen, OpBuilder &builder,
|
2021-12-04 08:55:43 +08:00
|
|
|
linalg::GenericOp op, OpOperand *t, Value rhs) {
|
|
|
|
Location loc = op.getLoc();
|
|
|
|
// Direct insertion in lexicographic index order.
|
|
|
|
if (!codegen.expValues) {
|
2022-05-13 17:32:14 +08:00
|
|
|
builder.create<LexInsertOp>(loc, t->get(), codegen.lexIdx, rhs);
|
2021-12-04 08:55:43 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
// Generates insertion code along expanded access pattern.
|
|
|
|
// if (!expFilled[i]) then
|
|
|
|
// expFilled[i] = true
|
|
|
|
// expAdded[inserts++] = i
|
|
|
|
// endif
|
|
|
|
// values[i] = rhs
|
|
|
|
Value index = genIndex(codegen, op, t);
|
2022-05-13 17:32:14 +08:00
|
|
|
Value fval = constantI1(builder, loc, false);
|
|
|
|
Value tval = constantI1(builder, loc, true);
|
2021-12-04 08:55:43 +08:00
|
|
|
// If statement.
|
2022-05-13 17:32:14 +08:00
|
|
|
Value filled = builder.create<memref::LoadOp>(loc, codegen.expFilled, index);
|
|
|
|
Value cond = builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::eq,
|
|
|
|
filled, fval);
|
|
|
|
scf::IfOp ifOp = builder.create<scf::IfOp>(loc, builder.getIndexType(), cond,
|
|
|
|
/*else=*/true);
|
2021-12-04 08:55:43 +08:00
|
|
|
// True branch.
|
2022-05-13 17:32:14 +08:00
|
|
|
builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
|
|
|
|
builder.create<memref::StoreOp>(loc, tval, codegen.expFilled, index);
|
|
|
|
builder.create<memref::StoreOp>(loc, index, codegen.expAdded,
|
|
|
|
codegen.expCount);
|
|
|
|
Value one = constantIndex(builder, loc, 1);
|
|
|
|
Value add = builder.create<arith::AddIOp>(loc, codegen.expCount, one);
|
|
|
|
builder.create<scf::YieldOp>(loc, add);
|
2021-12-04 08:55:43 +08:00
|
|
|
// False branch.
|
2022-05-13 17:32:14 +08:00
|
|
|
builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
|
|
|
|
builder.create<scf::YieldOp>(loc, codegen.expCount);
|
|
|
|
builder.setInsertionPointAfter(ifOp);
|
2021-12-04 08:55:43 +08:00
|
|
|
// Value assignment.
|
|
|
|
codegen.expCount = ifOp.getResult(0);
|
2022-05-13 17:32:14 +08:00
|
|
|
builder.create<memref::StoreOp>(loc, rhs, codegen.expValues, index);
|
2021-12-04 08:55:43 +08:00
|
|
|
}
|
|
|
|
|
2020-11-18 04:13:18 +08:00
|
|
|
/// Generates a load on a dense or sparse tensor.
|
2022-05-13 17:32:14 +08:00
|
|
|
static Value genTensorLoad(Merger &merger, CodeGen &codegen, OpBuilder &builder,
|
|
|
|
linalg::GenericOp op, unsigned exp) {
|
2020-12-08 03:54:58 +08:00
|
|
|
// Test if the load was hoisted to a higher loop nest.
|
|
|
|
Value val = merger.exp(exp).val;
|
2021-02-27 06:59:32 +08:00
|
|
|
if (val) {
|
|
|
|
if (codegen.curVecLength > 1 && !val.getType().isa<VectorType>())
|
2022-05-13 17:32:14 +08:00
|
|
|
return genVectorInvariantValue(codegen, builder, val);
|
2020-12-08 03:54:58 +08:00
|
|
|
return val;
|
2021-02-27 06:59:32 +08:00
|
|
|
}
|
2021-12-04 08:55:43 +08:00
|
|
|
// Load during insertion.
|
2021-11-23 05:32:04 +08:00
|
|
|
OpOperand *t = op.getInputAndOutputOperands()[merger.exp(exp).tensor];
|
2021-12-04 08:55:43 +08:00
|
|
|
if (t == codegen.sparseOut)
|
2022-05-13 17:32:14 +08:00
|
|
|
return genInsertionLoad(codegen, builder, op, t);
|
2020-12-08 03:54:58 +08:00
|
|
|
// Actual load.
|
2020-11-18 04:13:18 +08:00
|
|
|
SmallVector<Value, 4> args;
|
2022-05-13 17:32:14 +08:00
|
|
|
Value ptr = genSubscript(codegen, builder, op, t, args);
|
2021-01-14 02:33:28 +08:00
|
|
|
if (codegen.curVecLength > 1)
|
2022-05-13 17:32:14 +08:00
|
|
|
return genVectorLoad(codegen, builder, ptr, args);
|
|
|
|
return builder.create<memref::LoadOp>(op.getLoc(), ptr, args);
|
2020-11-18 04:13:18 +08:00
|
|
|
}
|
|
|
|
|
2021-06-12 01:33:43 +08:00
|
|
|
/// Generates a store on a dense or sparse tensor.
|
2022-05-13 17:32:14 +08:00
|
|
|
static void genTensorStore(Merger &merger, CodeGen &codegen, OpBuilder &builder,
|
|
|
|
linalg::GenericOp op, unsigned exp, Value rhs) {
|
2021-11-12 02:05:01 +08:00
|
|
|
Location loc = op.getLoc();
|
2020-12-18 07:42:23 +08:00
|
|
|
// Test if this is a scalarized reduction.
|
2021-09-16 06:18:19 +08:00
|
|
|
if (codegen.redVal) {
|
2021-03-05 11:05:37 +08:00
|
|
|
if (codegen.curVecLength > 1)
|
2022-05-13 17:32:14 +08:00
|
|
|
rhs = builder.create<arith::SelectOp>(loc, codegen.curVecMask, rhs,
|
|
|
|
codegen.redVal);
|
2021-11-03 10:58:33 +08:00
|
|
|
updateReduc(merger, codegen, rhs);
|
2020-12-18 07:42:23 +08:00
|
|
|
return;
|
|
|
|
}
|
2021-12-04 08:55:43 +08:00
|
|
|
// Store during insertion.
|
2021-11-12 02:05:01 +08:00
|
|
|
OpOperand *t = op.getOutputOperand(0);
|
|
|
|
if (t == codegen.sparseOut) {
|
2022-05-04 04:50:26 +08:00
|
|
|
if (!rhs) {
|
|
|
|
// Only unary and binary are allowed to return uninitialized rhs
|
|
|
|
// to indicate missing output.
|
2022-05-04 05:49:08 +08:00
|
|
|
assert(merger.exp(exp).kind == kUnary || merger.exp(exp).kind == kBinary);
|
2022-05-04 04:50:26 +08:00
|
|
|
} else {
|
2022-05-13 17:32:14 +08:00
|
|
|
genInsertionStore(codegen, builder, op, t, rhs);
|
2022-05-04 04:50:26 +08:00
|
|
|
}
|
2021-11-12 02:05:01 +08:00
|
|
|
return;
|
|
|
|
}
|
2021-01-14 02:33:28 +08:00
|
|
|
// Actual store.
|
2020-11-18 04:13:18 +08:00
|
|
|
SmallVector<Value, 4> args;
|
2022-05-13 17:32:14 +08:00
|
|
|
Value ptr = genSubscript(codegen, builder, op, t, args);
|
2021-01-14 02:33:28 +08:00
|
|
|
if (codegen.curVecLength > 1)
|
2022-05-13 17:32:14 +08:00
|
|
|
genVectorStore(codegen, builder, rhs, ptr, args);
|
2021-01-14 02:33:28 +08:00
|
|
|
else
|
2022-05-13 17:32:14 +08:00
|
|
|
builder.create<memref::StoreOp>(loc, rhs, ptr, args);
|
2020-11-18 04:13:18 +08:00
|
|
|
}
|
|
|
|
|
2021-04-02 07:23:17 +08:00
|
|
|
/// Generates a pointer/index load from the sparse storage scheme. Narrower
|
|
|
|
/// data types need to be zero extended before casting the value into the
|
|
|
|
/// index type used for looping and indexing.
|
2022-05-13 17:32:14 +08:00
|
|
|
static Value genLoad(CodeGen &codegen, OpBuilder &builder, Location loc,
|
2021-01-14 02:33:28 +08:00
|
|
|
Value ptr, Value s) {
|
2021-04-02 07:23:17 +08:00
|
|
|
// See https://llvm.org/docs/GetElementPtr.html for some background on
|
|
|
|
// the complications described below.
|
|
|
|
if (codegen.curVecLength > 1) {
|
|
|
|
// Since the index vector is used in a subsequent gather/scatter operations,
|
|
|
|
// which effectively defines an unsigned pointer + signed index, we must
|
|
|
|
// zero extend the vector to an index width. For 8-bit and 16-bit values,
|
|
|
|
// an 32-bit index width suffices. For 32-bit values, zero extending the
|
|
|
|
// elements into 64-bit loses some performance since the 32-bit indexed
|
2021-06-05 07:17:16 +08:00
|
|
|
// gather/scatter is more efficient than the 64-bit index variant (if the
|
|
|
|
// negative 32-bit index space is unused, the enableSIMDIndex32 flag can
|
2021-06-12 01:33:43 +08:00
|
|
|
// preserve this performance). For 64-bit values, there is no good way
|
2021-04-02 07:23:17 +08:00
|
|
|
// to state that the indices are unsigned, with creates the potential of
|
|
|
|
// incorrect address calculations in the unlikely case we need such
|
|
|
|
// extremely large offsets.
|
|
|
|
Type etp = ptr.getType().cast<MemRefType>().getElementType();
|
2022-05-13 17:32:14 +08:00
|
|
|
Value vload = genVectorLoad(codegen, builder, ptr, {s});
|
2021-04-15 09:53:30 +08:00
|
|
|
if (!etp.isa<IndexType>()) {
|
|
|
|
if (etp.getIntOrFloatBitWidth() < 32)
|
2022-05-13 17:32:14 +08:00
|
|
|
vload = builder.create<arith::ExtUIOp>(
|
|
|
|
loc, vectorType(codegen, builder.getI32Type()), vload);
|
2021-06-05 07:17:16 +08:00
|
|
|
else if (etp.getIntOrFloatBitWidth() < 64 &&
|
|
|
|
!codegen.options.enableSIMDIndex32)
|
2022-05-13 17:32:14 +08:00
|
|
|
vload = builder.create<arith::ExtUIOp>(
|
|
|
|
loc, vectorType(codegen, builder.getI64Type()), vload);
|
2021-04-15 09:53:30 +08:00
|
|
|
}
|
2021-04-02 07:23:17 +08:00
|
|
|
return vload;
|
|
|
|
}
|
|
|
|
// For the scalar case, we simply zero extend narrower indices into 64-bit
|
|
|
|
// values before casting to index without a performance penalty. Here too,
|
|
|
|
// however, indices that already are 64-bit, in theory, cannot express the
|
|
|
|
// full range as explained above.
|
2022-05-13 17:32:14 +08:00
|
|
|
Value load = builder.create<memref::LoadOp>(loc, ptr, s);
|
2021-04-02 07:23:17 +08:00
|
|
|
if (!load.getType().isa<IndexType>()) {
|
|
|
|
if (load.getType().getIntOrFloatBitWidth() < 64)
|
2022-05-13 17:32:14 +08:00
|
|
|
load = builder.create<arith::ExtUIOp>(loc, builder.getI64Type(), load);
|
2021-10-13 07:14:57 +08:00
|
|
|
load =
|
2022-05-13 17:32:14 +08:00
|
|
|
builder.create<arith::IndexCastOp>(loc, builder.getIndexType(), load);
|
2021-04-02 07:23:17 +08:00
|
|
|
}
|
|
|
|
return load;
|
2020-11-26 04:29:05 +08:00
|
|
|
}
|
|
|
|
|
2020-12-08 03:54:58 +08:00
|
|
|
/// Generates an invariant value.
|
|
|
|
static Value genInvariantValue(Merger &merger, CodeGen &codegen,
|
2022-05-13 17:32:14 +08:00
|
|
|
OpBuilder &builder, unsigned exp) {
|
2021-01-14 02:33:28 +08:00
|
|
|
Value val = merger.exp(exp).val;
|
|
|
|
if (codegen.curVecLength > 1)
|
2022-05-13 17:32:14 +08:00
|
|
|
return genVectorInvariantValue(codegen, builder, val);
|
2021-01-14 02:33:28 +08:00
|
|
|
return val;
|
2020-12-08 03:54:58 +08:00
|
|
|
}
|
|
|
|
|
2021-02-24 03:43:03 +08:00
|
|
|
/// Generates an address computation "sz * p + i".
|
2022-05-13 17:32:14 +08:00
|
|
|
static Value genAddress(CodeGen &codegen, OpBuilder &builder, Location loc,
|
|
|
|
Value size, Value p, Value i) {
|
|
|
|
Value mul = builder.create<arith::MulIOp>(loc, size, p);
|
2021-02-24 03:43:03 +08:00
|
|
|
if (auto vtp = i.getType().dyn_cast<VectorType>()) {
|
2021-10-13 07:14:57 +08:00
|
|
|
Value inv =
|
2022-05-13 17:32:14 +08:00
|
|
|
builder.create<arith::IndexCastOp>(loc, vtp.getElementType(), mul);
|
|
|
|
mul = genVectorInvariantValue(codegen, builder, inv);
|
2021-02-24 03:43:03 +08:00
|
|
|
}
|
2022-05-13 17:32:14 +08:00
|
|
|
return builder.create<arith::AddIOp>(loc, mul, i);
|
2021-02-24 03:43:03 +08:00
|
|
|
}
|
|
|
|
|
[mlir][sparse] index support in sparse compiler codegen
This revision adds support for the linalg.index to the sparse compiler
pipeline. In essence, this adds the ability to refer to indices in
the tensor index expression, as illustrated below:
Y[i, j, k, l, m] = T[i, j, k, l, m] * i * j
Reviewed By: bixia
Differential Revision: https://reviews.llvm.org/D121251
2022-03-09 05:24:45 +08:00
|
|
|
/// Generates an index value.
|
2022-06-17 03:02:34 +08:00
|
|
|
static Value genIndexValue(CodeGen &codegen, OpBuilder &builder, unsigned idx,
|
|
|
|
unsigned ldx) {
|
2022-03-19 04:28:59 +08:00
|
|
|
Value ival = codegen.loops[idx];
|
|
|
|
Type itype = ival.getType();
|
|
|
|
// During vectorization, we either encounter:
|
|
|
|
// (1) indices already in vector form, as in ... = ind[lo:hi], good to go, or
|
|
|
|
// (2) single index, as in ... = i, must convert to [i, i+1, ...] for inner i.
|
|
|
|
unsigned vl = codegen.curVecLength;
|
|
|
|
if (vl > 1 && !itype.isa<VectorType>()) {
|
|
|
|
Location loc = ival.getLoc();
|
|
|
|
VectorType vtp = vectorType(codegen, itype);
|
2022-05-13 17:32:14 +08:00
|
|
|
ival = builder.create<vector::BroadcastOp>(loc, vtp, ival);
|
2022-03-19 04:28:59 +08:00
|
|
|
if (idx == ldx) {
|
2022-04-26 21:39:29 +08:00
|
|
|
Value incr;
|
|
|
|
if (vtp.isScalable()) {
|
2022-05-13 17:32:14 +08:00
|
|
|
Type stepvty = vectorType(codegen, builder.getI64Type());
|
|
|
|
Value stepv = builder.create<LLVM::StepVectorOp>(loc, stepvty);
|
|
|
|
incr = builder.create<arith::IndexCastOp>(loc, vtp, stepv);
|
2022-04-26 21:39:29 +08:00
|
|
|
} else {
|
|
|
|
SmallVector<APInt, 4> integers;
|
|
|
|
for (unsigned i = 0; i < vl; i++)
|
|
|
|
integers.push_back(APInt(/*width=*/64, i));
|
|
|
|
auto values = DenseElementsAttr::get(vtp, integers);
|
2022-05-13 17:32:14 +08:00
|
|
|
incr = builder.create<arith::ConstantOp>(loc, vtp, values);
|
2022-04-26 21:39:29 +08:00
|
|
|
}
|
2022-05-13 17:32:14 +08:00
|
|
|
ival = builder.create<arith::AddIOp>(loc, ival, incr);
|
2022-03-19 04:28:59 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return ival;
|
[mlir][sparse] index support in sparse compiler codegen
This revision adds support for the linalg.index to the sparse compiler
pipeline. In essence, this adds the ability to refer to indices in
the tensor index expression, as illustrated below:
Y[i, j, k, l, m] = T[i, j, k, l, m] * i * j
Reviewed By: bixia
Differential Revision: https://reviews.llvm.org/D121251
2022-03-09 05:24:45 +08:00
|
|
|
}
|
|
|
|
|
2022-06-17 03:02:34 +08:00
|
|
|
/// Semi-ring branches are simply inlined by the sparse compiler. Prior
|
|
|
|
/// analysis has verified that all computations are "local" to the inlined
|
|
|
|
/// branch or otherwise invariantly defined outside the loop nest, with the
|
|
|
|
/// exception of index computations, which need to be relinked to actual
|
|
|
|
/// inlined cloned code.
|
|
|
|
static Value relinkBranch(CodeGen &codegen, RewriterBase &rewriter,
|
|
|
|
Block *block, Value e, unsigned ldx) {
|
|
|
|
if (Operation *def = e.getDefiningOp()) {
|
|
|
|
if (auto indexOp = dyn_cast<linalg::IndexOp>(def))
|
|
|
|
return genIndexValue(codegen, rewriter, indexOp.dim(), ldx);
|
|
|
|
if (def->getBlock() == block) {
|
|
|
|
for (unsigned i = 0, n = def->getNumOperands(); i < n; i++)
|
|
|
|
def->setOperand(
|
|
|
|
i, relinkBranch(codegen, rewriter, block, def->getOperand(i), ldx));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return e;
|
|
|
|
}
|
|
|
|
|
2020-11-18 04:13:18 +08:00
|
|
|
/// Recursively generates tensor expression.
|
2022-05-13 17:32:14 +08:00
|
|
|
static Value genExp(Merger &merger, CodeGen &codegen, RewriterBase &rewriter,
|
2022-03-19 04:28:59 +08:00
|
|
|
linalg::GenericOp op, unsigned exp, unsigned ldx) {
|
2021-07-03 03:26:18 +08:00
|
|
|
Location loc = op.getLoc();
|
2021-07-14 03:13:39 +08:00
|
|
|
if (exp == -1u)
|
|
|
|
return Value();
|
2020-11-18 04:13:18 +08:00
|
|
|
if (merger.exp(exp).kind == Kind::kTensor)
|
2020-12-08 03:54:58 +08:00
|
|
|
return genTensorLoad(merger, codegen, rewriter, op, exp);
|
2021-07-03 03:26:18 +08:00
|
|
|
if (merger.exp(exp).kind == Kind::kInvariant)
|
|
|
|
return genInvariantValue(merger, codegen, rewriter, exp);
|
[mlir][sparse] index support in sparse compiler codegen
This revision adds support for the linalg.index to the sparse compiler
pipeline. In essence, this adds the ability to refer to indices in
the tensor index expression, as illustrated below:
Y[i, j, k, l, m] = T[i, j, k, l, m] * i * j
Reviewed By: bixia
Differential Revision: https://reviews.llvm.org/D121251
2022-03-09 05:24:45 +08:00
|
|
|
if (merger.exp(exp).kind == Kind::kIndex)
|
2022-06-17 03:02:34 +08:00
|
|
|
return genIndexValue(codegen, rewriter, merger.exp(exp).index, ldx);
|
2022-03-19 04:28:59 +08:00
|
|
|
Value v0 =
|
|
|
|
genExp(merger, codegen, rewriter, op, merger.exp(exp).children.e0, ldx);
|
|
|
|
Value v1 =
|
|
|
|
genExp(merger, codegen, rewriter, op, merger.exp(exp).children.e1, ldx);
|
2022-06-17 03:02:34 +08:00
|
|
|
Value ee = merger.buildExp(rewriter, loc, exp, v0, v1);
|
|
|
|
if (ee && (merger.exp(exp).kind == Kind::kUnary ||
|
|
|
|
merger.exp(exp).kind == Kind::kBinary ||
|
|
|
|
merger.exp(exp).kind == Kind::kBinaryBranch))
|
|
|
|
ee = relinkBranch(codegen, rewriter, ee.getParentBlock(), ee, ldx);
|
|
|
|
return ee;
|
2020-11-18 04:13:18 +08:00
|
|
|
}
|
|
|
|
|
2021-09-16 06:18:19 +08:00
|
|
|
/// Determines if affine expression is invariant.
|
|
|
|
static bool isInvariantAffine(const CodeGen &codegen, AffineExpr a,
|
|
|
|
unsigned ldx, bool &atLevel) {
|
|
|
|
switch (a.getKind()) {
|
|
|
|
case AffineExprKind::DimId: {
|
|
|
|
unsigned idx = a.cast<AffineDimExpr>().getPosition();
|
|
|
|
if (idx == ldx)
|
|
|
|
atLevel = true;
|
|
|
|
return codegen.loops[idx] != nullptr; // no longer in play?
|
|
|
|
}
|
|
|
|
case AffineExprKind::Add:
|
|
|
|
case AffineExprKind::Mul: {
|
|
|
|
auto binOp = a.cast<AffineBinaryOpExpr>();
|
|
|
|
return isInvariantAffine(codegen, binOp.getLHS(), ldx, atLevel) &&
|
|
|
|
isInvariantAffine(codegen, binOp.getRHS(), ldx, atLevel);
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-08 03:54:58 +08:00
|
|
|
/// Hoists loop invariant tensor loads for which indices have been exhausted.
|
2022-05-13 17:32:14 +08:00
|
|
|
static void genInvariants(Merger &merger, CodeGen &codegen, OpBuilder &builder,
|
|
|
|
linalg::GenericOp op, unsigned exp, unsigned ldx,
|
|
|
|
bool atStart, Kind last = Kind::kTensor) {
|
2021-07-14 03:13:39 +08:00
|
|
|
if (exp == -1u)
|
|
|
|
return;
|
2020-12-08 03:54:58 +08:00
|
|
|
if (merger.exp(exp).kind == Kind::kTensor) {
|
2020-12-18 07:42:23 +08:00
|
|
|
// Inspect tensor indices.
|
|
|
|
bool atLevel = ldx == -1u;
|
2021-07-02 05:45:18 +08:00
|
|
|
OpOperand *t = op.getInputAndOutputOperands()[merger.exp(exp).tensor];
|
2021-06-16 06:56:32 +08:00
|
|
|
auto map = op.getTiedIndexingMap(t);
|
|
|
|
auto enc = getSparseTensorEncoding(t->get().getType());
|
[mlir][sparse] add full dimension ordering support
This revision completes the "dimension ordering" feature
of sparse tensor types that enables the programmer to
define a preferred order on dimension access (other than
the default left-to-right order). This enables e.g. selection
of column-major over row-major storage for sparse matrices,
but generalized to any rank, as in:
dimOrdering = affine_map<(i,j,k,l,m,n,o,p) -> (p,o,j,k,i,l,m,n)>
Reviewed By: bixia
Differential Revision: https://reviews.llvm.org/D102856
2021-05-22 02:52:34 +08:00
|
|
|
for (unsigned d = 0, rank = map.getNumResults(); d < rank; d++) {
|
2021-09-16 06:18:19 +08:00
|
|
|
AffineExpr a = map.getResult(perm(enc, d));
|
|
|
|
if (!isInvariantAffine(codegen, a, ldx, atLevel))
|
2020-12-08 03:54:58 +08:00
|
|
|
return; // still in play
|
2020-12-18 07:42:23 +08:00
|
|
|
}
|
|
|
|
// All exhausted at this level (atLevel denotes exactly at this level).
|
2021-11-03 10:58:33 +08:00
|
|
|
if (!atLevel)
|
|
|
|
return;
|
2021-06-02 14:21:13 +08:00
|
|
|
OpOperand *lhs = op.getOutputOperand(0);
|
2021-06-16 06:56:32 +08:00
|
|
|
if (lhs == t) {
|
2021-11-03 10:58:33 +08:00
|
|
|
// Start or end a scalarized reduction
|
|
|
|
if (atStart) {
|
2022-05-13 17:32:14 +08:00
|
|
|
Value load = genTensorLoad(merger, codegen, builder, op, exp);
|
2021-11-03 10:58:33 +08:00
|
|
|
codegen.redKind = getReduction(last);
|
|
|
|
codegen.redExp = exp;
|
|
|
|
updateReduc(merger, codegen, load);
|
|
|
|
} else {
|
|
|
|
Value redVal = codegen.redVal;
|
|
|
|
updateReduc(merger, codegen, Value());
|
|
|
|
codegen.redExp = -1u;
|
|
|
|
codegen.redKind = kNoReduc;
|
2022-05-13 17:32:14 +08:00
|
|
|
genTensorStore(merger, codegen, builder, op, exp, redVal);
|
2021-11-03 10:58:33 +08:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Start or end loop invariant hoisting of a tensor load.
|
2020-12-18 07:42:23 +08:00
|
|
|
merger.exp(exp).val =
|
2022-05-13 17:32:14 +08:00
|
|
|
atStart ? genTensorLoad(merger, codegen, builder, op, exp) : Value();
|
2020-12-08 03:54:58 +08:00
|
|
|
}
|
[mlir][sparse] index support in sparse compiler codegen
This revision adds support for the linalg.index to the sparse compiler
pipeline. In essence, this adds the ability to refer to indices in
the tensor index expression, as illustrated below:
Y[i, j, k, l, m] = T[i, j, k, l, m] * i * j
Reviewed By: bixia
Differential Revision: https://reviews.llvm.org/D121251
2022-03-09 05:24:45 +08:00
|
|
|
} else if (merger.exp(exp).kind != Kind::kInvariant &&
|
|
|
|
merger.exp(exp).kind != Kind::kIndex) {
|
2020-12-08 03:54:58 +08:00
|
|
|
// Traverse into the binary operations. Note that we only hoist
|
|
|
|
// tensor loads, since subsequent MLIR/LLVM passes know how to
|
|
|
|
// deal with all other kinds of derived loop invariants.
|
2021-09-22 05:48:49 +08:00
|
|
|
Kind last = merger.exp(exp).kind;
|
2021-07-02 05:45:18 +08:00
|
|
|
unsigned e0 = merger.exp(exp).children.e0;
|
|
|
|
unsigned e1 = merger.exp(exp).children.e1;
|
2022-05-13 17:32:14 +08:00
|
|
|
genInvariants(merger, codegen, builder, op, e0, ldx, atStart, last);
|
|
|
|
genInvariants(merger, codegen, builder, op, e1, ldx, atStart, last);
|
2020-12-08 03:54:58 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-12-04 08:55:43 +08:00
|
|
|
/// Generates an expanded access pattern in innermost dimension.
|
2022-05-13 17:32:14 +08:00
|
|
|
static void genExpansion(Merger &merger, CodeGen &codegen, OpBuilder &builder,
|
|
|
|
linalg::GenericOp op, unsigned at, bool atStart) {
|
2021-12-04 08:55:43 +08:00
|
|
|
OpOperand *lhs = codegen.sparseOut;
|
|
|
|
if (!lhs || codegen.outerParNest != op.getRank(lhs) - 1 ||
|
|
|
|
at != codegen.outerParNest)
|
|
|
|
return; // not needed at this level
|
|
|
|
// Generate start or end of an expanded access pattern.
|
|
|
|
Value tensor = lhs->get();
|
|
|
|
Location loc = op.getLoc();
|
|
|
|
if (atStart) {
|
|
|
|
auto dynShape = {ShapedType::kDynamicSize};
|
|
|
|
Type etp = tensor.getType().cast<ShapedType>().getElementType();
|
|
|
|
Type t1 = MemRefType::get(dynShape, etp);
|
2022-05-13 17:32:14 +08:00
|
|
|
Type t2 = MemRefType::get(dynShape, builder.getI1Type());
|
|
|
|
Type t3 = MemRefType::get(dynShape, builder.getIndexType());
|
|
|
|
Type t4 = builder.getIndexType();
|
2021-12-04 08:55:43 +08:00
|
|
|
auto res =
|
2022-05-13 17:32:14 +08:00
|
|
|
builder.create<ExpandOp>(loc, TypeRange({t1, t2, t3, t4}), tensor);
|
2021-12-04 08:55:43 +08:00
|
|
|
assert(res.getNumResults() == 4);
|
|
|
|
assert(!codegen.expValues);
|
|
|
|
codegen.expValues = res.getResult(0);
|
|
|
|
codegen.expFilled = res.getResult(1);
|
|
|
|
codegen.expAdded = res.getResult(2);
|
|
|
|
codegen.expCount = res.getResult(3);
|
|
|
|
} else {
|
|
|
|
assert(codegen.expValues);
|
2022-05-13 17:32:14 +08:00
|
|
|
builder.create<CompressOp>(loc, tensor, codegen.lexIdx, codegen.expValues,
|
|
|
|
codegen.expFilled, codegen.expAdded,
|
|
|
|
codegen.expCount);
|
2021-12-04 08:55:43 +08:00
|
|
|
codegen.expValues = codegen.expFilled = codegen.expAdded =
|
|
|
|
codegen.expCount = Value();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-11-18 04:13:18 +08:00
|
|
|
/// Generates initialization code for the subsequent loop sequence at
|
|
|
|
/// current index level. Returns true if the loop sequence needs to
|
|
|
|
/// maintain the universal index.
|
2022-05-13 17:32:14 +08:00
|
|
|
static bool genInit(Merger &merger, CodeGen &codegen, OpBuilder &builder,
|
2020-11-18 04:13:18 +08:00
|
|
|
linalg::GenericOp op, std::vector<unsigned> &topSort,
|
2022-01-27 07:30:56 +08:00
|
|
|
unsigned at, BitVector &inits) {
|
2020-11-18 04:13:18 +08:00
|
|
|
bool needsUniv = false;
|
|
|
|
Location loc = op.getLoc();
|
|
|
|
unsigned idx = topSort[at];
|
|
|
|
|
|
|
|
// Initialize sparse positions.
|
|
|
|
for (unsigned b = 0, be = inits.size(); b < be; b++) {
|
|
|
|
if (inits[b]) {
|
|
|
|
unsigned tensor = merger.tensor(b);
|
|
|
|
assert(idx == merger.index(b));
|
2020-12-18 07:42:23 +08:00
|
|
|
if (merger.isDim(b, Dim::kSparse)) {
|
2020-11-18 04:13:18 +08:00
|
|
|
// Initialize sparse index.
|
|
|
|
unsigned pat = at;
|
|
|
|
for (; pat != 0; pat--) {
|
|
|
|
if (codegen.pidxs[tensor][topSort[pat - 1]])
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
Value ptr = codegen.pointers[tensor][idx];
|
2022-05-13 17:32:14 +08:00
|
|
|
Value one = constantIndex(builder, loc, 1);
|
|
|
|
Value p0 = (pat == 0) ? constantIndex(builder, loc, 0)
|
2020-11-26 04:29:05 +08:00
|
|
|
: codegen.pidxs[tensor][topSort[pat - 1]];
|
2022-05-13 17:32:14 +08:00
|
|
|
codegen.pidxs[tensor][idx] = genLoad(codegen, builder, loc, ptr, p0);
|
|
|
|
Value p1 = builder.create<arith::AddIOp>(loc, p0, one);
|
|
|
|
codegen.highs[tensor][idx] = genLoad(codegen, builder, loc, ptr, p1);
|
2020-11-18 04:13:18 +08:00
|
|
|
} else {
|
|
|
|
// Dense index still in play.
|
|
|
|
needsUniv = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Initialize the universal dense index.
|
2022-05-13 17:32:14 +08:00
|
|
|
codegen.loops[idx] = constantIndex(builder, loc, 0);
|
2020-11-18 04:13:18 +08:00
|
|
|
return needsUniv;
|
|
|
|
}
|
|
|
|
|
2021-01-14 02:33:28 +08:00
|
|
|
/// Returns vectorization strategy. Any implicit inner loop in the Linalg
|
|
|
|
/// operation is a candidate. Whether it is actually converted to SIMD code
|
|
|
|
/// depends on the requested strategy.
|
[mlir][sparse] index support in sparse compiler codegen
This revision adds support for the linalg.index to the sparse compiler
pipeline. In essence, this adds the ability to refer to indices in
the tensor index expression, as illustrated below:
Y[i, j, k, l, m] = T[i, j, k, l, m] * i * j
Reviewed By: bixia
Differential Revision: https://reviews.llvm.org/D121251
2022-03-09 05:24:45 +08:00
|
|
|
static bool isVectorFor(CodeGen &codegen, bool isInner, bool isReduction,
|
|
|
|
bool isSparse) {
|
|
|
|
// Reject vectorization of sparse output, unless innermost is reduction.
|
|
|
|
if (codegen.sparseOut && !isReduction)
|
|
|
|
return false;
|
|
|
|
// Inspect strategy.
|
2021-01-14 02:33:28 +08:00
|
|
|
switch (codegen.options.vectorizationStrategy) {
|
2021-05-04 11:55:12 +08:00
|
|
|
case SparseVectorizationStrategy::kNone:
|
2021-01-14 02:33:28 +08:00
|
|
|
return false;
|
2021-05-04 11:55:12 +08:00
|
|
|
case SparseVectorizationStrategy::kDenseInnerLoop:
|
2021-01-14 02:33:28 +08:00
|
|
|
return isInner && !isSparse;
|
2021-05-04 11:55:12 +08:00
|
|
|
case SparseVectorizationStrategy::kAnyStorageInnerLoop:
|
2021-01-14 02:33:28 +08:00
|
|
|
return isInner;
|
|
|
|
}
|
2021-01-19 13:59:15 +08:00
|
|
|
llvm_unreachable("unexpected vectorization strategy");
|
2021-01-14 02:33:28 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns parallelization strategy. Any implicit loop in the Linalg operation
|
|
|
|
/// that is marked "parallel" is a candidate. Whether it is actually converted
|
|
|
|
/// to a parallel operation depends on the requested strategy.
|
|
|
|
static bool isParallelFor(CodeGen &codegen, bool isOuter, bool isReduction,
|
|
|
|
bool isSparse, bool isVector) {
|
[mlir][sparse] index support in sparse compiler codegen
This revision adds support for the linalg.index to the sparse compiler
pipeline. In essence, this adds the ability to refer to indices in
the tensor index expression, as illustrated below:
Y[i, j, k, l, m] = T[i, j, k, l, m] * i * j
Reviewed By: bixia
Differential Revision: https://reviews.llvm.org/D121251
2022-03-09 05:24:45 +08:00
|
|
|
// Reject parallelization of sparse output.
|
|
|
|
if (codegen.sparseOut)
|
|
|
|
return false;
|
|
|
|
// Inspect strategy.
|
2021-01-14 02:33:28 +08:00
|
|
|
switch (codegen.options.parallelizationStrategy) {
|
2021-05-04 11:55:12 +08:00
|
|
|
case SparseParallelizationStrategy::kNone:
|
2021-01-14 02:33:28 +08:00
|
|
|
return false;
|
2021-05-04 11:55:12 +08:00
|
|
|
case SparseParallelizationStrategy::kDenseOuterLoop:
|
2021-01-14 02:33:28 +08:00
|
|
|
return isOuter && !isSparse && !isReduction && !isVector;
|
2021-05-04 11:55:12 +08:00
|
|
|
case SparseParallelizationStrategy::kAnyStorageOuterLoop:
|
2021-01-14 02:33:28 +08:00
|
|
|
return isOuter && !isReduction && !isVector;
|
2021-05-04 11:55:12 +08:00
|
|
|
case SparseParallelizationStrategy::kDenseAnyLoop:
|
2021-01-14 02:33:28 +08:00
|
|
|
return !isSparse && !isReduction && !isVector;
|
2021-05-04 11:55:12 +08:00
|
|
|
case SparseParallelizationStrategy::kAnyStorageAnyLoop:
|
2021-01-14 02:33:28 +08:00
|
|
|
return !isReduction && !isVector;
|
|
|
|
}
|
2021-01-19 13:59:15 +08:00
|
|
|
llvm_unreachable("unexpected parallelization strategy");
|
2021-01-14 02:33:28 +08:00
|
|
|
}
|
|
|
|
|
2021-10-09 07:13:29 +08:00
|
|
|
/// Checks unit stride for dense tensors. The iteration graph may have ignored
|
2021-03-13 06:56:23 +08:00
|
|
|
/// dense access patterns in order to avoid cycles (sparse access patterns are
|
|
|
|
/// always placed innermost), but that means dense access has become strided.
|
2021-10-09 07:13:29 +08:00
|
|
|
/// This prevents effective vectorization.
|
2021-03-13 06:56:23 +08:00
|
|
|
static bool denseUnitStrides(Merger &merger, linalg::GenericOp op,
|
2021-10-09 07:13:29 +08:00
|
|
|
unsigned idx) {
|
2021-06-02 14:21:13 +08:00
|
|
|
for (OpOperand *t : op.getInputAndOutputOperands()) {
|
|
|
|
if (!getSparseTensorEncoding(t->get().getType())) {
|
|
|
|
auto map = op.getTiedIndexingMap(t);
|
[mlir][sparse] add full dimension ordering support
This revision completes the "dimension ordering" feature
of sparse tensor types that enables the programmer to
define a preferred order on dimension access (other than
the default left-to-right order). This enables e.g. selection
of column-major over row-major storage for sparse matrices,
but generalized to any rank, as in:
dimOrdering = affine_map<(i,j,k,l,m,n,o,p) -> (p,o,j,k,i,l,m,n)>
Reviewed By: bixia
Differential Revision: https://reviews.llvm.org/D102856
2021-05-22 02:52:34 +08:00
|
|
|
for (unsigned d = 0, rank = map.getNumResults(); d < rank; d++) {
|
2021-09-16 06:18:19 +08:00
|
|
|
AffineExpr a = map.getResult(d);
|
2021-10-09 07:13:29 +08:00
|
|
|
// Report non-unit stride if innermost index appears at an outer
|
|
|
|
// dimension (true non-unit stride) or if the innermost index appears
|
|
|
|
// in a compound subscript in the innermost dimension. Even if the
|
|
|
|
// latter is unit stride, it does not play well with scatter/gather.
|
2021-10-26 11:10:42 +08:00
|
|
|
// TODO: accept unit stride affine innermost like a[i,j+k+1]?
|
2021-10-09 07:13:29 +08:00
|
|
|
if (a.isFunctionOfDim(idx) &&
|
|
|
|
((d != rank - 1) || (a.getKind() != AffineExprKind::DimId)))
|
2021-04-02 07:23:17 +08:00
|
|
|
return false;
|
|
|
|
}
|
2021-03-13 06:56:23 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-11-25 07:36:10 +08:00
|
|
|
/// Generates a for-loop on a single index.
|
2022-05-13 17:32:14 +08:00
|
|
|
static Operation *genFor(Merger &merger, CodeGen &codegen, OpBuilder &builder,
|
|
|
|
linalg::GenericOp op, bool isOuter, bool isInner,
|
|
|
|
unsigned idx, BitVector &indices) {
|
2020-11-25 07:36:10 +08:00
|
|
|
unsigned fb = indices.find_first();
|
|
|
|
unsigned tensor = merger.tensor(fb);
|
|
|
|
assert(idx == merger.index(fb));
|
|
|
|
auto iteratorTypes = op.iterator_types().getValue();
|
2021-08-17 15:04:21 +08:00
|
|
|
bool isReduction = isReductionIterator(iteratorTypes[idx]);
|
2020-12-18 07:42:23 +08:00
|
|
|
bool isSparse = merger.isDim(fb, Dim::kSparse);
|
[mlir][sparse] index support in sparse compiler codegen
This revision adds support for the linalg.index to the sparse compiler
pipeline. In essence, this adds the ability to refer to indices in
the tensor index expression, as illustrated below:
Y[i, j, k, l, m] = T[i, j, k, l, m] * i * j
Reviewed By: bixia
Differential Revision: https://reviews.llvm.org/D121251
2022-03-09 05:24:45 +08:00
|
|
|
bool isVector = isVectorFor(codegen, isInner, isReduction, isSparse) &&
|
2021-03-13 06:56:23 +08:00
|
|
|
denseUnitStrides(merger, op, idx);
|
2021-01-14 02:33:28 +08:00
|
|
|
bool isParallel =
|
|
|
|
isParallelFor(codegen, isOuter, isReduction, isSparse, isVector);
|
|
|
|
|
|
|
|
// Prepare vector length.
|
|
|
|
if (isVector)
|
|
|
|
codegen.curVecLength = codegen.options.vectorLength;
|
2020-11-25 07:36:10 +08:00
|
|
|
|
|
|
|
// Loop bounds and increment.
|
2020-11-18 04:13:18 +08:00
|
|
|
Location loc = op.getLoc();
|
2021-01-14 02:33:28 +08:00
|
|
|
Value lo = isSparse ? codegen.pidxs[tensor][idx] : codegen.loops[idx];
|
|
|
|
Value hi = isSparse ? codegen.highs[tensor][idx] : codegen.sizes[idx];
|
2022-05-13 17:32:14 +08:00
|
|
|
Value step = constantIndex(builder, loc, codegen.curVecLength);
|
2021-12-02 23:09:33 +08:00
|
|
|
if (isVector && codegen.options.enableVLAVectorization) {
|
2022-05-13 17:32:14 +08:00
|
|
|
Value vscale = builder.create<vector::VectorScaleOp>(
|
|
|
|
loc, IndexType::get(builder.getContext()));
|
|
|
|
step = builder.create<arith::MulIOp>(loc, vscale, step);
|
2021-12-02 23:09:33 +08:00
|
|
|
}
|
2020-11-25 07:36:10 +08:00
|
|
|
|
|
|
|
// Emit a parallel loop.
|
|
|
|
if (isParallel) {
|
2021-01-14 02:33:28 +08:00
|
|
|
assert(!isVector);
|
2022-05-13 17:32:14 +08:00
|
|
|
scf::ParallelOp parOp = builder.create<scf::ParallelOp>(loc, lo, hi, step);
|
2020-11-25 07:36:10 +08:00
|
|
|
if (isSparse)
|
|
|
|
codegen.pidxs[tensor][idx] = parOp.getInductionVars()[0];
|
|
|
|
else
|
|
|
|
codegen.loops[idx] = parOp.getInductionVars()[0];
|
2022-05-13 17:32:14 +08:00
|
|
|
builder.setInsertionPointToStart(parOp.getBody());
|
2020-11-25 07:36:10 +08:00
|
|
|
return parOp;
|
|
|
|
}
|
|
|
|
|
2021-11-03 10:58:33 +08:00
|
|
|
// Emit a sequential or vector loop.
|
2020-12-18 07:42:23 +08:00
|
|
|
SmallVector<Value, 4> operands;
|
2021-11-03 10:58:33 +08:00
|
|
|
if (codegen.redVal) {
|
|
|
|
// In a vector loop, bring reduction into SIMD form, if not already.
|
|
|
|
if (isVector && !codegen.redVal.getType().isa<VectorType>()) {
|
|
|
|
VectorType vtp = vectorType(codegen, codegen.redVal.getType());
|
2022-05-13 17:32:14 +08:00
|
|
|
Value vred = genVectorReducInit(codegen, builder, loc, vtp);
|
2021-11-03 10:58:33 +08:00
|
|
|
updateReduc(merger, codegen, vred);
|
|
|
|
}
|
|
|
|
operands.push_back(codegen.redVal);
|
2020-12-18 07:42:23 +08:00
|
|
|
}
|
2021-12-04 08:55:43 +08:00
|
|
|
if (codegen.expValues)
|
|
|
|
operands.push_back(codegen.expCount);
|
2022-05-13 17:32:14 +08:00
|
|
|
scf::ForOp forOp = builder.create<scf::ForOp>(loc, lo, hi, step, operands);
|
2021-11-03 10:58:33 +08:00
|
|
|
if (codegen.redVal)
|
|
|
|
updateReduc(merger, codegen, forOp.getRegionIterArgs().front());
|
2021-12-04 08:55:43 +08:00
|
|
|
if (codegen.expValues)
|
|
|
|
codegen.expCount = forOp.getRegionIterArgs().back();
|
2020-12-18 07:42:23 +08:00
|
|
|
// Assign induction variable to sparse or dense index.
|
2021-01-14 02:33:28 +08:00
|
|
|
Value iv = forOp.getInductionVar();
|
2020-11-25 07:36:10 +08:00
|
|
|
if (isSparse)
|
2021-01-14 02:33:28 +08:00
|
|
|
codegen.pidxs[tensor][idx] = iv;
|
2020-11-25 07:36:10 +08:00
|
|
|
else
|
2021-01-14 02:33:28 +08:00
|
|
|
codegen.loops[idx] = iv;
|
2022-05-13 17:32:14 +08:00
|
|
|
builder.setInsertionPointToStart(forOp.getBody());
|
2021-01-14 02:33:28 +08:00
|
|
|
// Share vector iteration mask between all subsequent loads/stores.
|
|
|
|
if (isVector)
|
2022-05-13 17:32:14 +08:00
|
|
|
codegen.curVecMask = genVectorMask(codegen, builder, iv, lo, hi, step);
|
2020-11-25 07:36:10 +08:00
|
|
|
return forOp;
|
|
|
|
}
|
2020-11-18 04:13:18 +08:00
|
|
|
|
2020-11-25 07:36:10 +08:00
|
|
|
/// Emit a while-loop for co-iteration over multiple indices.
|
2022-05-13 17:32:14 +08:00
|
|
|
static Operation *genWhile(Merger &merger, CodeGen &codegen, OpBuilder &builder,
|
|
|
|
linalg::GenericOp op, unsigned idx, bool needsUniv,
|
|
|
|
BitVector &indices) {
|
2020-11-18 04:13:18 +08:00
|
|
|
SmallVector<Type, 4> types;
|
|
|
|
SmallVector<Value, 4> operands;
|
2020-11-25 07:36:10 +08:00
|
|
|
// Construct the while-loop with a parameter for each index.
|
2022-05-13 17:32:14 +08:00
|
|
|
Type indexType = builder.getIndexType();
|
2020-11-18 04:13:18 +08:00
|
|
|
for (unsigned b = 0, be = indices.size(); b < be; b++) {
|
2020-12-18 07:42:23 +08:00
|
|
|
if (indices[b] && merger.isDim(b, Dim::kSparse)) {
|
2020-11-18 04:13:18 +08:00
|
|
|
unsigned tensor = merger.tensor(b);
|
|
|
|
assert(idx == merger.index(b));
|
|
|
|
types.push_back(indexType);
|
|
|
|
operands.push_back(codegen.pidxs[tensor][idx]);
|
|
|
|
}
|
|
|
|
}
|
2021-11-03 10:58:33 +08:00
|
|
|
if (codegen.redVal) {
|
|
|
|
types.push_back(codegen.redVal.getType());
|
|
|
|
operands.push_back(codegen.redVal);
|
|
|
|
}
|
2021-12-04 08:55:43 +08:00
|
|
|
if (codegen.expValues) {
|
|
|
|
types.push_back(indexType);
|
|
|
|
operands.push_back(codegen.expCount);
|
|
|
|
}
|
2020-11-18 04:13:18 +08:00
|
|
|
if (needsUniv) {
|
|
|
|
types.push_back(indexType);
|
|
|
|
operands.push_back(codegen.loops[idx]);
|
|
|
|
}
|
2021-11-03 10:58:33 +08:00
|
|
|
assert(types.size() == operands.size());
|
2020-11-25 07:36:10 +08:00
|
|
|
Location loc = op.getLoc();
|
2022-05-13 17:32:14 +08:00
|
|
|
scf::WhileOp whileOp = builder.create<scf::WhileOp>(loc, types, operands);
|
2022-01-19 10:28:51 +08:00
|
|
|
|
|
|
|
SmallVector<Location> locs(types.size(), loc);
|
2022-05-13 17:32:14 +08:00
|
|
|
Block *before = builder.createBlock(&whileOp.getBefore(), {}, types, locs);
|
|
|
|
Block *after = builder.createBlock(&whileOp.getAfter(), {}, types, locs);
|
2020-11-25 07:36:10 +08:00
|
|
|
|
2020-11-18 04:13:18 +08:00
|
|
|
// Build the "before" region, which effectively consists
|
|
|
|
// of a conjunction of "i < upper" tests on all induction.
|
2022-05-13 17:32:14 +08:00
|
|
|
builder.setInsertionPointToStart(&whileOp.getBefore().front());
|
2020-11-18 04:13:18 +08:00
|
|
|
Value cond;
|
|
|
|
unsigned o = 0;
|
|
|
|
for (unsigned b = 0, be = indices.size(); b < be; b++) {
|
2020-12-18 07:42:23 +08:00
|
|
|
if (indices[b] && merger.isDim(b, Dim::kSparse)) {
|
2020-11-18 04:13:18 +08:00
|
|
|
unsigned tensor = merger.tensor(b);
|
|
|
|
assert(idx == merger.index(b));
|
|
|
|
Value op1 = before->getArgument(o);
|
|
|
|
Value op2 = codegen.highs[tensor][idx];
|
2022-05-13 17:32:14 +08:00
|
|
|
Value opc = builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::ult,
|
|
|
|
op1, op2);
|
|
|
|
cond = cond ? builder.create<arith::AndIOp>(loc, cond, opc) : opc;
|
2020-11-18 04:13:18 +08:00
|
|
|
codegen.pidxs[tensor][idx] = after->getArgument(o++);
|
|
|
|
}
|
|
|
|
}
|
2021-11-03 10:58:33 +08:00
|
|
|
if (codegen.redVal)
|
|
|
|
updateReduc(merger, codegen, after->getArgument(o++));
|
2021-12-04 08:55:43 +08:00
|
|
|
if (codegen.expValues)
|
|
|
|
codegen.expCount = after->getArgument(o++);
|
2020-11-18 04:13:18 +08:00
|
|
|
if (needsUniv)
|
|
|
|
codegen.loops[idx] = after->getArgument(o++);
|
|
|
|
assert(o == operands.size());
|
2022-05-13 17:32:14 +08:00
|
|
|
builder.create<scf::ConditionOp>(loc, cond, before->getArguments());
|
|
|
|
builder.setInsertionPointToStart(&whileOp.getAfter().front());
|
2020-11-25 07:36:10 +08:00
|
|
|
return whileOp;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Generates a for-loop or a while-loop, depending on whether it implements
|
|
|
|
/// singleton iteration or co-iteration over the given conjunction.
|
2022-05-13 17:32:14 +08:00
|
|
|
static Operation *genLoop(Merger &merger, CodeGen &codegen, OpBuilder &builder,
|
|
|
|
linalg::GenericOp op, std::vector<unsigned> &topSort,
|
|
|
|
unsigned at, bool needsUniv, BitVector &indices) {
|
2020-12-08 03:54:58 +08:00
|
|
|
unsigned idx = topSort[at];
|
|
|
|
if (indices.count() == 1) {
|
|
|
|
bool isOuter = at == 0;
|
|
|
|
bool isInner = at == topSort.size() - 1;
|
2022-05-13 17:32:14 +08:00
|
|
|
return genFor(merger, codegen, builder, op, isOuter, isInner, idx, indices);
|
2020-12-08 03:54:58 +08:00
|
|
|
}
|
2022-05-13 17:32:14 +08:00
|
|
|
return genWhile(merger, codegen, builder, op, idx, needsUniv, indices);
|
2020-11-18 04:13:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Generates the local variables for this loop, consisting of the sparse
|
|
|
|
/// indices, restored universal dense index, and dense positions.
|
2022-05-13 17:32:14 +08:00
|
|
|
static void genLocals(Merger &merger, CodeGen &codegen, OpBuilder &builder,
|
|
|
|
linalg::GenericOp op, std::vector<unsigned> &topSort,
|
|
|
|
unsigned at, bool needsUniv, BitVector &locals) {
|
2020-11-18 04:13:18 +08:00
|
|
|
Location loc = op.getLoc();
|
|
|
|
unsigned idx = topSort[at];
|
|
|
|
|
|
|
|
// Initialize sparse indices.
|
|
|
|
Value min;
|
|
|
|
for (unsigned b = 0, be = locals.size(); b < be; b++) {
|
2020-12-18 07:42:23 +08:00
|
|
|
if (locals[b] && merger.isDim(b, Dim::kSparse)) {
|
2020-11-18 04:13:18 +08:00
|
|
|
unsigned tensor = merger.tensor(b);
|
|
|
|
assert(idx == merger.index(b));
|
2020-11-26 04:29:05 +08:00
|
|
|
Value ptr = codegen.indices[tensor][idx];
|
|
|
|
Value s = codegen.pidxs[tensor][idx];
|
2022-05-13 17:32:14 +08:00
|
|
|
Value load = genLoad(codegen, builder, loc, ptr, s);
|
2020-11-26 04:29:05 +08:00
|
|
|
codegen.idxs[tensor][idx] = load;
|
2020-11-18 04:13:18 +08:00
|
|
|
if (!needsUniv) {
|
|
|
|
if (min) {
|
2022-05-13 17:32:14 +08:00
|
|
|
Value cmp = builder.create<arith::CmpIOp>(
|
2021-10-13 07:14:57 +08:00
|
|
|
loc, arith::CmpIPredicate::ult, load, min);
|
2022-05-13 17:32:14 +08:00
|
|
|
min = builder.create<arith::SelectOp>(loc, cmp, load, min);
|
2020-11-18 04:13:18 +08:00
|
|
|
} else {
|
2020-11-26 04:29:05 +08:00
|
|
|
min = load;
|
2020-11-18 04:13:18 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Merge dense universal index over minimum.
|
|
|
|
if (min) {
|
|
|
|
assert(!needsUniv);
|
|
|
|
codegen.loops[idx] = min;
|
|
|
|
}
|
|
|
|
|
2021-06-12 01:33:43 +08:00
|
|
|
// Initialize dense positions. Note that we generate dense indices of the
|
|
|
|
// output tensor unconditionally, since they may not appear in the lattice,
|
|
|
|
// but may be needed for linearized codegen.
|
2020-11-18 04:13:18 +08:00
|
|
|
for (unsigned b = 0, be = locals.size(); b < be; b++) {
|
2021-06-12 01:33:43 +08:00
|
|
|
if ((locals[b] || merger.isOutTensor(b, idx)) &&
|
|
|
|
merger.isDim(b, Dim::kDense)) {
|
2020-11-18 04:13:18 +08:00
|
|
|
unsigned tensor = merger.tensor(b);
|
|
|
|
assert(idx == merger.index(b));
|
|
|
|
unsigned pat = at;
|
|
|
|
for (; pat != 0; pat--)
|
|
|
|
if (codegen.pidxs[tensor][topSort[pat - 1]])
|
|
|
|
break;
|
2022-05-13 17:32:14 +08:00
|
|
|
Value p = (pat == 0) ? constantIndex(builder, loc, 0)
|
2020-11-18 04:13:18 +08:00
|
|
|
: codegen.pidxs[tensor][topSort[pat - 1]];
|
2021-02-24 03:43:03 +08:00
|
|
|
codegen.pidxs[tensor][idx] = genAddress(
|
2022-05-13 17:32:14 +08:00
|
|
|
codegen, builder, loc, codegen.sizes[idx], p, codegen.loops[idx]);
|
2020-11-18 04:13:18 +08:00
|
|
|
}
|
|
|
|
}
|
2021-11-12 02:05:01 +08:00
|
|
|
|
2021-12-04 08:55:43 +08:00
|
|
|
// Move the insertion indices in lexicographic index order. During access
|
|
|
|
// pattern expansion, we can skip setting the innermost dimension.
|
|
|
|
if (codegen.sparseOut && !codegen.expValues) {
|
2022-05-13 17:32:14 +08:00
|
|
|
Value pos = constantIndex(builder, loc, at);
|
|
|
|
builder.create<memref::StoreOp>(loc, codegen.loops[idx], codegen.lexIdx,
|
|
|
|
pos);
|
2021-11-12 02:05:01 +08:00
|
|
|
}
|
2020-11-18 04:13:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Generates the induction structure for a while-loop.
|
|
|
|
static void genWhileInduction(Merger &merger, CodeGen &codegen,
|
2022-05-13 17:32:14 +08:00
|
|
|
OpBuilder &builder, linalg::GenericOp op,
|
2020-11-18 04:13:18 +08:00
|
|
|
unsigned idx, bool needsUniv,
|
2022-05-04 04:50:26 +08:00
|
|
|
BitVector &induction, scf::WhileOp whileOp) {
|
2020-11-18 04:13:18 +08:00
|
|
|
Location loc = op.getLoc();
|
2021-11-03 10:58:33 +08:00
|
|
|
// Finalize each else branch of all if statements.
|
2021-12-04 08:55:43 +08:00
|
|
|
if (codegen.redVal || codegen.expValues) {
|
2021-11-03 10:58:33 +08:00
|
|
|
while (auto ifOp = dyn_cast_or_null<scf::IfOp>(
|
2022-05-13 17:32:14 +08:00
|
|
|
builder.getInsertionBlock()->getParentOp())) {
|
2021-12-04 08:55:43 +08:00
|
|
|
unsigned y = 0;
|
|
|
|
SmallVector<Value, 4> yields;
|
|
|
|
if (codegen.redVal) {
|
|
|
|
yields.push_back(codegen.redVal);
|
|
|
|
updateReduc(merger, codegen, ifOp.getResult(y++));
|
|
|
|
}
|
|
|
|
if (codegen.expValues) {
|
|
|
|
yields.push_back(codegen.expCount);
|
|
|
|
codegen.expCount = ifOp->getResult(y++);
|
|
|
|
}
|
|
|
|
assert(y == yields.size());
|
2022-05-13 17:32:14 +08:00
|
|
|
builder.create<scf::YieldOp>(loc, yields);
|
|
|
|
builder.setInsertionPointAfter(ifOp);
|
2021-11-03 10:58:33 +08:00
|
|
|
}
|
|
|
|
}
|
2022-05-13 17:32:14 +08:00
|
|
|
builder.setInsertionPointToEnd(&whileOp.getAfter().front());
|
2021-11-03 10:58:33 +08:00
|
|
|
// Finalize the induction. Note that the induction could be performed
|
|
|
|
// in the individual if-branches to avoid re-evaluating the conditions.
|
|
|
|
// However, that would result in a rather elaborate forest of yield
|
|
|
|
// instructions during code generation. Moreover, performing the induction
|
|
|
|
// after the if-statements more closely resembles code generated by TACO.
|
2020-11-18 04:13:18 +08:00
|
|
|
unsigned o = 0;
|
|
|
|
SmallVector<Value, 4> operands;
|
2022-05-13 17:32:14 +08:00
|
|
|
Value one = constantIndex(builder, loc, 1);
|
2020-12-18 07:42:23 +08:00
|
|
|
for (unsigned b = 0, be = induction.size(); b < be; b++) {
|
|
|
|
if (induction[b] && merger.isDim(b, Dim::kSparse)) {
|
2020-11-18 04:13:18 +08:00
|
|
|
unsigned tensor = merger.tensor(b);
|
|
|
|
assert(idx == merger.index(b));
|
|
|
|
Value op1 = codegen.idxs[tensor][idx];
|
|
|
|
Value op2 = codegen.loops[idx];
|
|
|
|
Value op3 = codegen.pidxs[tensor][idx];
|
2022-05-13 17:32:14 +08:00
|
|
|
Value cmp = builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::eq,
|
|
|
|
op1, op2);
|
|
|
|
Value add = builder.create<arith::AddIOp>(loc, op3, one);
|
|
|
|
operands.push_back(builder.create<arith::SelectOp>(loc, cmp, add, op3));
|
2021-11-03 10:58:33 +08:00
|
|
|
codegen.pidxs[tensor][idx] = whileOp->getResult(o++);
|
2020-11-18 04:13:18 +08:00
|
|
|
}
|
2020-12-18 07:42:23 +08:00
|
|
|
}
|
2021-11-03 10:58:33 +08:00
|
|
|
if (codegen.redVal) {
|
|
|
|
operands.push_back(codegen.redVal);
|
|
|
|
updateReduc(merger, codegen, whileOp->getResult(o++));
|
|
|
|
}
|
2021-12-04 08:55:43 +08:00
|
|
|
if (codegen.expValues) {
|
|
|
|
operands.push_back(codegen.expCount);
|
|
|
|
codegen.expCount = whileOp->getResult(o++);
|
|
|
|
}
|
2020-11-18 04:13:18 +08:00
|
|
|
if (needsUniv) {
|
2021-10-13 07:14:57 +08:00
|
|
|
operands.push_back(
|
2022-05-13 17:32:14 +08:00
|
|
|
builder.create<arith::AddIOp>(loc, codegen.loops[idx], one));
|
2021-11-03 10:58:33 +08:00
|
|
|
codegen.loops[idx] = whileOp->getResult(o++);
|
2020-11-18 04:13:18 +08:00
|
|
|
}
|
|
|
|
assert(o == operands.size());
|
2022-05-13 17:32:14 +08:00
|
|
|
builder.create<scf::YieldOp>(loc, operands);
|
|
|
|
builder.setInsertionPointAfter(whileOp);
|
2021-11-03 10:58:33 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Generates the induction structure for a for-loop.
|
|
|
|
static void genForInduction(Merger &merger, CodeGen &codegen,
|
2022-05-13 17:32:14 +08:00
|
|
|
OpBuilder &builder, linalg::GenericOp op,
|
2021-11-03 10:58:33 +08:00
|
|
|
Operation *loop) {
|
|
|
|
Location loc = op.getLoc();
|
|
|
|
unsigned o = 0;
|
|
|
|
SmallVector<Value, 4> operands;
|
|
|
|
if (codegen.redVal) {
|
|
|
|
operands.push_back(codegen.redVal);
|
|
|
|
updateReduc(merger, codegen, loop->getResult(o++));
|
|
|
|
}
|
2021-12-04 08:55:43 +08:00
|
|
|
if (codegen.expValues) {
|
|
|
|
operands.push_back(codegen.expCount);
|
|
|
|
codegen.expCount = loop->getResult(o++);
|
|
|
|
}
|
2021-11-03 10:58:33 +08:00
|
|
|
assert(o == operands.size());
|
|
|
|
if (o > 0)
|
2022-05-13 17:32:14 +08:00
|
|
|
builder.create<scf::YieldOp>(loc, operands);
|
|
|
|
builder.setInsertionPointAfter(loop);
|
2020-11-18 04:13:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Generates a single if-statement within a while-loop.
|
2022-05-13 17:32:14 +08:00
|
|
|
static scf::IfOp genIf(Merger &merger, CodeGen &codegen, OpBuilder &builder,
|
|
|
|
linalg::GenericOp op, unsigned idx,
|
|
|
|
BitVector &conditions) {
|
2020-11-18 04:13:18 +08:00
|
|
|
Location loc = op.getLoc();
|
2021-11-03 10:58:33 +08:00
|
|
|
SmallVector<Type, 4> types;
|
2020-11-18 04:13:18 +08:00
|
|
|
Value cond;
|
|
|
|
for (unsigned b = 0, be = conditions.size(); b < be; b++) {
|
|
|
|
if (conditions[b]) {
|
|
|
|
unsigned tensor = merger.tensor(b);
|
|
|
|
assert(idx == merger.index(b));
|
|
|
|
Value clause;
|
2020-12-18 07:42:23 +08:00
|
|
|
if (merger.isDim(b, Dim::kSparse)) {
|
2020-11-18 04:13:18 +08:00
|
|
|
Value op1 = codegen.idxs[tensor][idx];
|
|
|
|
Value op2 = codegen.loops[idx];
|
2022-05-13 17:32:14 +08:00
|
|
|
clause = builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::eq,
|
|
|
|
op1, op2);
|
2020-11-18 04:13:18 +08:00
|
|
|
} else {
|
2022-05-13 17:32:14 +08:00
|
|
|
clause = constantI1(builder, loc, true);
|
2020-11-18 04:13:18 +08:00
|
|
|
}
|
2022-05-13 17:32:14 +08:00
|
|
|
cond = cond ? builder.create<arith::AndIOp>(loc, cond, clause) : clause;
|
2020-11-18 04:13:18 +08:00
|
|
|
}
|
|
|
|
}
|
2021-11-03 10:58:33 +08:00
|
|
|
if (codegen.redVal)
|
|
|
|
types.push_back(codegen.redVal.getType());
|
2021-12-04 08:55:43 +08:00
|
|
|
if (codegen.expValues)
|
2022-05-13 17:32:14 +08:00
|
|
|
types.push_back(builder.getIndexType());
|
|
|
|
scf::IfOp ifOp = builder.create<scf::IfOp>(loc, types, cond, /*else=*/true);
|
|
|
|
builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
|
2020-12-18 07:42:23 +08:00
|
|
|
return ifOp;
|
2020-11-18 04:13:18 +08:00
|
|
|
}
|
|
|
|
|
2021-11-03 10:58:33 +08:00
|
|
|
/// Generates end of true branch of if-statement within a while-loop.
|
2022-05-13 17:32:14 +08:00
|
|
|
static void endIf(Merger &merger, CodeGen &codegen, OpBuilder &builder,
|
2021-12-04 08:55:43 +08:00
|
|
|
linalg::GenericOp op, scf::IfOp ifOp, Operation *loop,
|
|
|
|
Value redInput, Value cntInput) {
|
|
|
|
SmallVector<Value, 4> operands;
|
2021-11-03 10:58:33 +08:00
|
|
|
if (codegen.redVal) {
|
2021-12-04 08:55:43 +08:00
|
|
|
operands.push_back(codegen.redVal);
|
|
|
|
updateReduc(merger, codegen, redInput);
|
|
|
|
}
|
|
|
|
if (codegen.expValues) {
|
|
|
|
operands.push_back(codegen.expCount);
|
|
|
|
codegen.expCount = cntInput;
|
2021-11-03 10:58:33 +08:00
|
|
|
}
|
2021-12-04 08:55:43 +08:00
|
|
|
if (!operands.empty())
|
2022-05-13 17:32:14 +08:00
|
|
|
builder.create<scf::YieldOp>(op.getLoc(), operands);
|
|
|
|
builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
|
2021-11-03 10:58:33 +08:00
|
|
|
}
|
|
|
|
|
2021-10-26 11:10:42 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Sparse compiler synthesis methods (loop sequence).
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
/// Starts a loop sequence at given level. Returns true if
|
|
|
|
/// the universal loop index must be maintained at this level.
|
2022-05-13 17:32:14 +08:00
|
|
|
static bool startLoopSeq(Merger &merger, CodeGen &codegen, OpBuilder &builder,
|
|
|
|
linalg::GenericOp op, std::vector<unsigned> &topSort,
|
|
|
|
unsigned exp, unsigned at, unsigned idx, unsigned ldx,
|
2021-10-26 11:10:42 +08:00
|
|
|
unsigned lts) {
|
|
|
|
assert(codegen.curVecLength == 1);
|
2021-11-03 10:58:33 +08:00
|
|
|
assert(!codegen.loops[idx]);
|
2021-10-26 11:10:42 +08:00
|
|
|
// Emit invariants at this loop sequence level.
|
2022-05-13 17:32:14 +08:00
|
|
|
genInvariants(merger, codegen, builder, op, exp, ldx, /*atStart=*/true);
|
2021-12-04 08:55:43 +08:00
|
|
|
// Emit access pattern expansion for sparse tensor output.
|
2022-05-13 17:32:14 +08:00
|
|
|
genExpansion(merger, codegen, builder, op, at, /*atStart=*/true);
|
2021-10-26 11:10:42 +08:00
|
|
|
// Emit further intitialization at this loop sequence level.
|
|
|
|
unsigned l0 = merger.set(lts)[0];
|
2021-11-03 10:58:33 +08:00
|
|
|
bool needsUniv =
|
2022-05-13 17:32:14 +08:00
|
|
|
genInit(merger, codegen, builder, op, topSort, at, merger.lat(l0).bits);
|
2021-11-03 10:58:33 +08:00
|
|
|
// Maintain the universal index only if it is actually
|
|
|
|
// consumed by a subsequent lattice point.
|
|
|
|
if (needsUniv) {
|
2021-10-26 11:10:42 +08:00
|
|
|
unsigned lsize = merger.set(lts).size();
|
|
|
|
for (unsigned i = 1; i < lsize; i++) {
|
|
|
|
unsigned li = merger.set(lts)[i];
|
|
|
|
if (!merger.hasAnyDimOf(merger.lat(li).simple, Dim::kSparse))
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Starts a single loop in current sequence.
|
|
|
|
static Operation *startLoop(Merger &merger, CodeGen &codegen,
|
2022-05-13 17:32:14 +08:00
|
|
|
OpBuilder &builder, linalg::GenericOp op,
|
2021-10-26 11:10:42 +08:00
|
|
|
std::vector<unsigned> &topSort, unsigned at,
|
|
|
|
unsigned li, bool needsUniv) {
|
|
|
|
assert(codegen.curVecLength == 1);
|
|
|
|
// Emit the for/while-loop control.
|
2022-05-13 17:32:14 +08:00
|
|
|
Operation *loop = genLoop(merger, codegen, builder, op, topSort, at,
|
2021-10-26 11:10:42 +08:00
|
|
|
needsUniv, merger.lat(li).simple);
|
|
|
|
// Emit the locals for this loop.
|
2022-05-13 17:32:14 +08:00
|
|
|
genLocals(merger, codegen, builder, op, topSort, at, needsUniv,
|
2021-10-26 11:10:42 +08:00
|
|
|
merger.lat(li).bits);
|
|
|
|
return loop;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Ends a single loop in current sequence. Returns new values for needsUniv.
|
2022-05-13 17:32:14 +08:00
|
|
|
static bool endLoop(Merger &merger, CodeGen &codegen, OpBuilder &builder,
|
2021-10-26 11:10:42 +08:00
|
|
|
linalg::GenericOp op, Operation *loop, unsigned idx,
|
|
|
|
unsigned li, bool needsUniv) {
|
|
|
|
codegen.curVecLength = 1;
|
|
|
|
// End a while-loop.
|
|
|
|
if (auto whileOp = dyn_cast<scf::WhileOp>(loop)) {
|
2022-05-13 17:32:14 +08:00
|
|
|
genWhileInduction(merger, codegen, builder, op, idx, needsUniv,
|
2021-11-03 10:58:33 +08:00
|
|
|
merger.lat(li).bits, whileOp);
|
2021-10-26 11:10:42 +08:00
|
|
|
return needsUniv;
|
|
|
|
}
|
|
|
|
// End a for-loop.
|
2022-05-13 17:32:14 +08:00
|
|
|
genForInduction(merger, codegen, builder, op, loop);
|
2021-10-26 11:10:42 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Ends a loop sequence at given level.
|
2022-05-13 17:32:14 +08:00
|
|
|
static void endLoopSeq(Merger &merger, CodeGen &codegen, OpBuilder &builder,
|
|
|
|
linalg::GenericOp op, unsigned exp, unsigned at,
|
|
|
|
unsigned idx, unsigned ldx) {
|
2021-10-26 11:10:42 +08:00
|
|
|
assert(codegen.curVecLength == 1);
|
|
|
|
codegen.loops[idx] = Value();
|
2021-11-03 10:58:33 +08:00
|
|
|
// Bring a pending reduction back from SIMD form when sequence ends.
|
|
|
|
if (codegen.redVal)
|
|
|
|
if (auto vtp = codegen.redVal.getType().dyn_cast<VectorType>())
|
|
|
|
updateReduc(merger, codegen,
|
2022-05-13 17:32:14 +08:00
|
|
|
genVectorReducEnd(codegen, builder, op.getLoc(), vtp));
|
2021-11-03 10:58:33 +08:00
|
|
|
// Unmark bookkeeping of invariants and loop index.
|
2022-05-13 17:32:14 +08:00
|
|
|
genInvariants(merger, codegen, builder, op, exp, ldx, /*atStart=*/false);
|
2021-12-04 08:55:43 +08:00
|
|
|
// Finalize access pattern expansion for sparse tensor output.
|
2022-05-13 17:32:14 +08:00
|
|
|
genExpansion(merger, codegen, builder, op, at, /*atStart=*/false);
|
2021-10-26 11:10:42 +08:00
|
|
|
}
|
|
|
|
|
2020-11-18 04:13:18 +08:00
|
|
|
/// Recursively generates code while computing iteration lattices in order
|
|
|
|
/// to manage the complexity of implementing co-iteration over unions
|
|
|
|
/// and intersections of sparse iterations spaces.
|
2022-05-13 17:32:14 +08:00
|
|
|
static void genStmt(Merger &merger, CodeGen &codegen, RewriterBase &rewriter,
|
2020-11-18 04:13:18 +08:00
|
|
|
linalg::GenericOp op, std::vector<unsigned> &topSort,
|
|
|
|
unsigned exp, unsigned at) {
|
|
|
|
// At each leaf, assign remaining tensor (sub)expression to output tensor.
|
|
|
|
if (at == topSort.size()) {
|
2022-03-19 04:28:59 +08:00
|
|
|
unsigned ldx = topSort[at - 1];
|
|
|
|
Value rhs = genExp(merger, codegen, rewriter, op, exp, ldx);
|
2022-05-04 04:50:26 +08:00
|
|
|
genTensorStore(merger, codegen, rewriter, op, exp, rhs);
|
2020-11-18 04:13:18 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Construct iteration lattices for current loop index, with L0 at top.
|
|
|
|
unsigned idx = topSort[at];
|
2020-12-18 07:42:23 +08:00
|
|
|
unsigned ldx = at == 0 ? -1u : topSort[at - 1];
|
2021-10-26 11:10:42 +08:00
|
|
|
unsigned lts = merger.optimizeSet(merger.buildLattices(exp, idx));
|
|
|
|
|
|
|
|
// Start a loop sequence.
|
|
|
|
bool needsUniv = startLoopSeq(merger, codegen, rewriter, op, topSort, exp, at,
|
|
|
|
idx, ldx, lts);
|
2020-11-18 04:13:18 +08:00
|
|
|
|
2021-10-26 11:10:42 +08:00
|
|
|
// Emit a loop for every lattice point L0 >= Li in this loop sequence.
|
|
|
|
unsigned lsize = merger.set(lts).size();
|
2020-12-18 07:42:23 +08:00
|
|
|
for (unsigned i = 0; i < lsize; i++) {
|
2021-10-26 11:10:42 +08:00
|
|
|
// Start a loop.
|
2020-12-18 07:42:23 +08:00
|
|
|
unsigned li = merger.set(lts)[i];
|
2020-12-08 03:54:58 +08:00
|
|
|
Operation *loop =
|
2021-10-26 11:10:42 +08:00
|
|
|
startLoop(merger, codegen, rewriter, op, topSort, at, li, needsUniv);
|
2020-11-18 04:13:18 +08:00
|
|
|
|
|
|
|
// Visit all lattices points with Li >= Lj to generate the
|
|
|
|
// loop-body, possibly with if statements for coiteration.
|
2021-12-04 08:55:43 +08:00
|
|
|
Value redInput = codegen.redVal;
|
|
|
|
Value cntInput = codegen.expCount;
|
2020-11-25 07:36:10 +08:00
|
|
|
bool isWhile = dyn_cast<scf::WhileOp>(loop) != nullptr;
|
2020-12-18 07:42:23 +08:00
|
|
|
for (unsigned j = 0; j < lsize; j++) {
|
|
|
|
unsigned lj = merger.set(lts)[j];
|
|
|
|
unsigned ej = merger.lat(lj).exp;
|
2020-11-18 04:13:18 +08:00
|
|
|
if (li == lj || merger.latGT(li, lj)) {
|
|
|
|
// Recurse into body of each branch.
|
2020-12-18 07:42:23 +08:00
|
|
|
if (isWhile) {
|
|
|
|
scf::IfOp ifOp =
|
|
|
|
genIf(merger, codegen, rewriter, op, idx, merger.lat(lj).simple);
|
|
|
|
genStmt(merger, codegen, rewriter, op, topSort, ej, at + 1);
|
2021-12-04 08:55:43 +08:00
|
|
|
endIf(merger, codegen, rewriter, op, ifOp, loop, redInput, cntInput);
|
2020-12-18 07:42:23 +08:00
|
|
|
} else {
|
|
|
|
genStmt(merger, codegen, rewriter, op, topSort, ej, at + 1);
|
|
|
|
}
|
2020-11-18 04:13:18 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-26 11:10:42 +08:00
|
|
|
// End a loop.
|
|
|
|
needsUniv =
|
|
|
|
endLoop(merger, codegen, rewriter, op, loop, idx, li, needsUniv);
|
2020-11-18 04:13:18 +08:00
|
|
|
}
|
2020-12-18 07:42:23 +08:00
|
|
|
|
2021-10-26 11:10:42 +08:00
|
|
|
// End a loop sequence.
|
2021-12-04 08:55:43 +08:00
|
|
|
endLoopSeq(merger, codegen, rewriter, op, exp, at, idx, ldx);
|
2020-11-18 04:13:18 +08:00
|
|
|
}
|
|
|
|
|
2021-06-12 01:33:43 +08:00
|
|
|
/// Converts the result computed by the sparse kernel into the required form.
|
2022-05-13 17:32:14 +08:00
|
|
|
static void genResult(Merger &merger, CodeGen &codegen, RewriterBase &rewriter,
|
|
|
|
linalg::GenericOp op) {
|
2021-06-19 07:24:55 +08:00
|
|
|
OpOperand *lhs = op.getOutputOperand(0);
|
|
|
|
Type resType = lhs->get().getType();
|
2021-11-12 02:05:01 +08:00
|
|
|
if (getSparseTensorEncoding(resType)) {
|
|
|
|
// The sparse tensor rematerializes from the original sparse tensor's
|
|
|
|
// underlying sparse storage format.
|
|
|
|
rewriter.replaceOpWithNewOp<LoadOp>(op, resType, lhs->get(),
|
|
|
|
codegen.sparseOut == lhs);
|
2021-06-19 07:24:55 +08:00
|
|
|
} else {
|
2021-11-12 02:05:01 +08:00
|
|
|
// To rematerialize an non-annotated tensor, simply load it
|
2021-06-19 07:24:55 +08:00
|
|
|
// from the bufferized value.
|
2021-11-12 02:05:01 +08:00
|
|
|
Value val = codegen.buffers.back(); // value array
|
2021-11-25 18:42:16 +08:00
|
|
|
rewriter.replaceOpWithNewOp<bufferization::ToTensorOp>(op, resType, val);
|
2021-06-19 07:24:55 +08:00
|
|
|
}
|
2021-06-12 01:33:43 +08:00
|
|
|
}
|
|
|
|
|
2021-09-22 05:48:49 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Sparse compiler rewriting methods.
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2020-11-18 04:13:18 +08:00
|
|
|
namespace {
|
|
|
|
|
|
|
|
/// Sparse rewriting rule for generic Lingalg operation.
|
|
|
|
struct GenericOpSparsifier : public OpRewritePattern<linalg::GenericOp> {
|
2020-11-25 07:36:10 +08:00
|
|
|
public:
|
2021-05-04 11:55:12 +08:00
|
|
|
GenericOpSparsifier(MLIRContext *context, SparsificationOptions o)
|
2020-11-25 07:36:10 +08:00
|
|
|
: OpRewritePattern<linalg::GenericOp>(context), options(o) {}
|
2020-11-18 04:13:18 +08:00
|
|
|
|
|
|
|
LogicalResult matchAndRewrite(linalg::GenericOp op,
|
|
|
|
PatternRewriter &rewriter) const override {
|
|
|
|
// Detects sparse annotations and translate the per-dimension sparsity
|
|
|
|
// information for all tensors to loop indices in the kernel.
|
2020-11-19 07:35:57 +08:00
|
|
|
assert(op.getNumOutputs() == 1);
|
2021-06-02 14:21:13 +08:00
|
|
|
unsigned numTensors = op.getNumInputsAndOutputs();
|
2020-11-19 07:35:57 +08:00
|
|
|
unsigned numLoops = op.iterator_types().getValue().size();
|
|
|
|
Merger merger(numTensors, numLoops);
|
2021-05-20 01:13:40 +08:00
|
|
|
if (!findSparseAnnotations(merger, op))
|
|
|
|
return failure();
|
2020-11-18 04:13:18 +08:00
|
|
|
|
|
|
|
// Computes a topologically sorted iteration graph to ensure
|
|
|
|
// tensors are visited in natural index order. Fails on cycles.
|
|
|
|
// This assumes that higher-level passes have already put the
|
|
|
|
// tensors in each tensor expression in a feasible order.
|
|
|
|
std::vector<unsigned> topSort;
|
2021-09-02 06:29:52 +08:00
|
|
|
if (!computeIterationGraph(merger, op, topSort,
|
|
|
|
SortMask::kIncludeUndef |
|
|
|
|
SortMask::kIncludeDense) &&
|
|
|
|
!computeIterationGraph(merger, op, topSort, SortMask::kIncludeUndef) &&
|
|
|
|
!computeIterationGraph(merger, op, topSort, SortMask::kIncludeDense) &&
|
|
|
|
!computeIterationGraph(merger, op, topSort, SortMask::kSparseOnly))
|
2020-11-18 04:13:18 +08:00
|
|
|
return failure();
|
|
|
|
|
2021-07-01 05:41:10 +08:00
|
|
|
// Builds the tensor expression for the Linalg operation in SSA form.
|
2021-11-03 10:58:33 +08:00
|
|
|
Optional<unsigned> optExp = merger.buildTensorExpFromLinalg(op);
|
|
|
|
if (!optExp.hasValue())
|
2021-07-01 05:41:10 +08:00
|
|
|
return failure();
|
2021-11-03 10:58:33 +08:00
|
|
|
unsigned exp = optExp.getValue();
|
2020-11-18 04:13:18 +08:00
|
|
|
|
2021-07-01 05:41:10 +08:00
|
|
|
// Rejects an inadmissable tensor expression.
|
2021-11-12 02:05:01 +08:00
|
|
|
OpOperand *sparseOut = nullptr;
|
2021-11-23 05:32:04 +08:00
|
|
|
unsigned outerParNest = 0;
|
|
|
|
if (!isAdmissableTensorExp(merger, op, topSort, exp, &sparseOut,
|
|
|
|
outerParNest))
|
2021-06-19 07:24:55 +08:00
|
|
|
return failure();
|
|
|
|
|
2020-11-18 04:13:18 +08:00
|
|
|
// Recursively generates code.
|
2021-11-23 05:32:04 +08:00
|
|
|
merger.setHasSparseOut(sparseOut != nullptr);
|
|
|
|
CodeGen codegen(options, numTensors, numLoops, sparseOut, outerParNest);
|
2021-10-26 11:10:42 +08:00
|
|
|
genBuffers(merger, codegen, rewriter, op);
|
2021-11-03 10:58:33 +08:00
|
|
|
genStmt(merger, codegen, rewriter, op, topSort, exp, 0);
|
2021-06-19 07:24:55 +08:00
|
|
|
genResult(merger, codegen, rewriter, op);
|
2020-11-18 04:13:18 +08:00
|
|
|
return success();
|
|
|
|
}
|
2020-11-25 07:36:10 +08:00
|
|
|
|
|
|
|
private:
|
|
|
|
/// Options to control sparse code generation.
|
2021-05-04 11:55:12 +08:00
|
|
|
SparsificationOptions options;
|
2020-11-18 04:13:18 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
/// Populates the given patterns list with rewriting rules required for
|
|
|
|
/// the sparsification of linear algebra operations.
|
2021-05-04 11:55:12 +08:00
|
|
|
void mlir::populateSparsificationPatterns(
|
2021-03-23 07:58:34 +08:00
|
|
|
RewritePatternSet &patterns, const SparsificationOptions &options) {
|
|
|
|
patterns.add<GenericOpSparsifier>(patterns.getContext(), options);
|
2020-11-18 04:13:18 +08:00
|
|
|
}
|