forked from OSchip/llvm-project
[mlir][sparse] support affine expression on sparse dimensions (analysis implementation)
Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D138171
This commit is contained in:
parent
d23b63cecc
commit
372e7939d7
|
@ -148,14 +148,29 @@ struct LatPoint {
|
|||
/// independently from the basic algorithm if bottlenecks are identified.
|
||||
class Merger {
|
||||
public:
|
||||
/// Constructs a merger for the given number of tensors and loops. The
|
||||
/// user supplies the number of tensors involved in the kernel, with the
|
||||
/// last tensor in this set denoting the output tensor. The merger adds an
|
||||
/// additional synthetic tensor at the end of this set to represent all
|
||||
/// invariant expressions in the kernel.
|
||||
Merger(unsigned t, unsigned l)
|
||||
: outTensor(t - 1), syntheticTensor(t), numTensors(t + 1), numLoops(l),
|
||||
hasSparseOut(false),
|
||||
/// Constructs a merger for the given number of tensors, native loops, and
|
||||
/// filter loops. The user supplies the number of tensors involved in the
|
||||
/// kernel, with the last tensor in this set denoting the output tensor. The
|
||||
/// merger adds an additional synthetic tensor at the end of this set to
|
||||
/// represent all invariant expressions in the kernel.
|
||||
/// In addition to natives
|
||||
/// loops (which are specified by the GenericOp), extra filter loops are
|
||||
/// needed in order to handle affine expressions on sparse dimensions.
|
||||
/// E.g., (d0, d1, d2) => (d0 + d1, d2), a naive implementation of the filter
|
||||
/// loop could be generated as:
|
||||
/// for (coord : sparse_dim[0])
|
||||
/// if (coord == d0 + d1) {
|
||||
/// generated_code;
|
||||
/// }
|
||||
/// }
|
||||
/// to filter out coordinates that are not equal to the affine expression
|
||||
/// result.
|
||||
/// TODO: we want to make the filter loop more efficient in the future, e.g.,
|
||||
/// by avoiding scanning the full stored index sparse (keeping the last
|
||||
/// position in ordered list) or even apply binary search to find the index.
|
||||
Merger(unsigned t, unsigned l, unsigned fl)
|
||||
: outTensor(t - 1), syntheticTensor(t), numTensors(t + 1),
|
||||
numNativeLoops(l), numLoops(l + fl), hasSparseOut(false),
|
||||
dimTypes(numTensors,
|
||||
std::vector<DimLevelType>(numLoops, DimLevelType::Undef)),
|
||||
loopIdxToDim(numTensors,
|
||||
|
@ -231,6 +246,15 @@ public:
|
|||
/// Bit translation (get loop index).
|
||||
unsigned index(unsigned b) const { return b / numTensors; }
|
||||
|
||||
/// Get the number of total loops (native loops + filter loops).
|
||||
unsigned getNumLoops() const { return numLoops; }
|
||||
/// Get the number of native loops.
|
||||
unsigned getNumNativeLoops() const { return numNativeLoops; }
|
||||
/// Get the number of filter loops.
|
||||
unsigned getNumFilterLoops() const { return numLoops - numNativeLoops; }
|
||||
/// Get the starting filter loop index.
|
||||
unsigned getFilterLoopStartingIdx() const { return getNumNativeLoops(); }
|
||||
|
||||
/// Returns true if bit corresponds to index of output tensor.
|
||||
bool isOutTensor(unsigned b, unsigned i) const {
|
||||
return tensor(b) == outTensor && index(b) == i;
|
||||
|
@ -242,6 +266,11 @@ public:
|
|||
/// expressions).
|
||||
unsigned getSynTensorID() const { return syntheticTensor; }
|
||||
|
||||
bool isFilterLoop(unsigned ldx) const {
|
||||
assert(ldx < numLoops);
|
||||
return ldx >= numNativeLoops;
|
||||
}
|
||||
|
||||
/// Returns true if given tensor iterates *only* in the given tensor
|
||||
/// expression. For the output tensor, this defines a "simply dynamic"
|
||||
/// operation [Bik96]. For instance: a(i) *= 2.0 or a(i) += a(i) for
|
||||
|
@ -345,6 +374,7 @@ private:
|
|||
const unsigned outTensor;
|
||||
const unsigned syntheticTensor;
|
||||
const unsigned numTensors;
|
||||
const unsigned numNativeLoops;
|
||||
const unsigned numLoops;
|
||||
bool hasSparseOut;
|
||||
// Map that converts pair<tensor id, loop id> to the corresponding dimension
|
||||
|
|
|
@ -329,6 +329,12 @@ Operation *SparseTensorLoopEmitter::enterLoopOverTensorAtDim(
|
|||
return loop;
|
||||
}
|
||||
|
||||
Operation *SparseTensorLoopEmitter::enterFilterLoopOverTensorAtDim(
|
||||
OpBuilder &builder, Location loc, size_t tid, size_t dim, AffineExpr affine,
|
||||
MutableArrayRef<Value> reduc) {
|
||||
llvm_unreachable("need to be implemented");
|
||||
}
|
||||
|
||||
void SparseTensorLoopEmitter::genDenseAffineAddressAtCurLevel(
|
||||
OpBuilder &builder, Location loc, size_t tid, size_t dim,
|
||||
AffineExpr affine) {
|
||||
|
|
|
@ -406,6 +406,11 @@ public:
|
|||
ArrayRef<size_t> extraTids = {},
|
||||
ArrayRef<size_t> extraDims = {});
|
||||
|
||||
Operation *enterFilterLoopOverTensorAtDim(OpBuilder &builder, Location loc,
|
||||
size_t tid, size_t dim,
|
||||
AffineExpr affine,
|
||||
MutableArrayRef<Value> reduc = {});
|
||||
|
||||
void genDenseAffineAddressAtCurLevel(OpBuilder &builder, Location loc,
|
||||
size_t tid, size_t dim,
|
||||
AffineExpr affine);
|
||||
|
|
|
@ -155,8 +155,11 @@ static AffineMap permute(MLIRContext *context, AffineMap m,
|
|||
/// Helper method to inspect affine expressions. Rejects cases where the
|
||||
/// same index is used more than once. Also rejects compound affine
|
||||
/// expressions in sparse dimensions.
|
||||
/// filterIdx stores the current filter loop idx should be used for the next
|
||||
/// compound affine sparse level, and it will be incremented by one when
|
||||
/// used.
|
||||
static bool findAffine(Merger &merger, unsigned tensor, unsigned dim,
|
||||
AffineExpr a, DimLevelType dlt,
|
||||
AffineExpr a, DimLevelType dlt, unsigned &filterLdx,
|
||||
bool setLvlFormat = true) {
|
||||
switch (a.getKind()) {
|
||||
case AffineExprKind::DimId: {
|
||||
|
@ -169,22 +172,68 @@ static bool findAffine(Merger &merger, unsigned tensor, unsigned dim,
|
|||
return true;
|
||||
}
|
||||
case AffineExprKind::Add:
|
||||
case AffineExprKind::Mul: {
|
||||
if (!isDenseDLT(dlt))
|
||||
return false; // compound only in dense dim
|
||||
auto binOp = a.cast<AffineBinaryOpExpr>();
|
||||
// We do not set dim level format for affine expresssion like d0 + d1 on
|
||||
// both loop index at d0 and d1,
|
||||
return findAffine(merger, tensor, dim, binOp.getLHS(), dlt, false) &&
|
||||
findAffine(merger, tensor, dim, binOp.getRHS(), dlt, false);
|
||||
case AffineExprKind::Mul:
|
||||
case AffineExprKind::Constant: {
|
||||
if (!isDenseDLT(dlt) && setLvlFormat) {
|
||||
assert(isUndefDLT(merger.getDimLevelType(tensor, filterLdx)));
|
||||
// Use a filter loop for sparse affine expression.
|
||||
merger.setDimAndDimLevelType(tensor, filterLdx++, dim, dlt);
|
||||
}
|
||||
|
||||
if (auto binOp = a.dyn_cast<AffineBinaryOpExpr>()) {
|
||||
// We do not set dim level format for affine expresssion like d0 + d1 on
|
||||
// either loop index at d0 or d1.
|
||||
// We continue the recursion merely to check whether current affine is
|
||||
// admissible or not.
|
||||
return findAffine(merger, tensor, dim, binOp.getLHS(), dlt, filterLdx,
|
||||
false) &&
|
||||
findAffine(merger, tensor, dim, binOp.getRHS(), dlt, filterLdx,
|
||||
false);
|
||||
}
|
||||
// Falls through when it is a constant Affine
|
||||
return true;
|
||||
}
|
||||
case AffineExprKind::Constant:
|
||||
return isDenseDLT(dlt); // const only in dense dim
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the total number of compound affine expressions in affineMap that are
|
||||
/// attached to the given tensor. For the following inputs:
|
||||
///
|
||||
/// affineMap = (d0, d1, d2) => (d0 + d1, d2)
|
||||
/// tensor = ["compressed", "compressed"]
|
||||
///
|
||||
/// Returns 1 (because the first level is compressed and its corresponding
|
||||
/// affineMap is d0 + d1)
|
||||
static unsigned getNumCompoundAffineOnSparseDims(AffineMap affineMap,
|
||||
Value tensor) {
|
||||
unsigned num = 0;
|
||||
auto enc = getSparseTensorEncoding(tensor.getType());
|
||||
if (enc) {
|
||||
ArrayRef<AffineExpr> exps = affineMap.getResults();
|
||||
for (unsigned rank = 0; rank < exps.size(); rank++) {
|
||||
auto aidx = toOrigDim(enc, rank);
|
||||
auto affine = exps[aidx];
|
||||
if (!affine.isa<AffineDimExpr>())
|
||||
if (!isDenseDLT(getDimLevelType(enc, rank)))
|
||||
num++;
|
||||
}
|
||||
}
|
||||
|
||||
return num;
|
||||
}
|
||||
|
||||
/// Get the total number of compound affine expressions attached on a sparse
|
||||
/// level in the given GenericOp.
|
||||
static unsigned getNumCompoundAffineOnSparseDims(linalg::GenericOp op) {
|
||||
unsigned num = 0;
|
||||
for (OpOperand &t : op->getOpOperands())
|
||||
num += getNumCompoundAffineOnSparseDims(op.getMatchingIndexingMap(&t),
|
||||
t.get());
|
||||
return num;
|
||||
}
|
||||
|
||||
/// Helper method to inspect sparse encodings in the tensor types.
|
||||
/// Fills the per-dimension sparsity information for all tensors.
|
||||
/// Returns true if the sparse annotations and affine subscript
|
||||
|
@ -192,19 +241,22 @@ static bool findAffine(Merger &merger, unsigned tensor, unsigned dim,
|
|||
/// no annotations are found or inadmissible constructs occur.
|
||||
static bool findSparseAnnotations(Merger &merger, linalg::GenericOp op) {
|
||||
bool annotated = false;
|
||||
unsigned filterLdx = merger.getFilterLoopStartingIdx();
|
||||
for (OpOperand &t : op->getOpOperands()) {
|
||||
auto map = op.getMatchingIndexingMap(&t);
|
||||
auto enc = getSparseTensorEncoding(t.get().getType());
|
||||
if (enc)
|
||||
annotated = true;
|
||||
assert(map.getNumResults() == op.getRank(&t));
|
||||
|
||||
for (unsigned d = 0, rank = map.getNumResults(); d < rank; d++) {
|
||||
unsigned tensor = t.getOperandNumber();
|
||||
AffineExpr a = map.getResult(toOrigDim(enc, d));
|
||||
if (!findAffine(merger, tensor, d, a, getDimLevelType(enc, d)))
|
||||
if (!findAffine(merger, tensor, d, a, getDimLevelType(enc, d), filterLdx))
|
||||
return false; // inadmissible affine expression
|
||||
}
|
||||
}
|
||||
assert(filterLdx == merger.getNumLoops());
|
||||
return annotated;
|
||||
}
|
||||
|
||||
|
@ -214,34 +266,58 @@ static bool findSparseAnnotations(Merger &merger, linalg::GenericOp op) {
|
|||
/// latest possible index.
|
||||
static bool topSortOptimal(unsigned n,
|
||||
ArrayRef<utils::IteratorType> iteratorTypes,
|
||||
std::vector<unsigned> &topSort,
|
||||
const Merger &merger, std::vector<unsigned> &topSort,
|
||||
std::vector<unsigned> &inDegree,
|
||||
std::vector<std::vector<bool>> &adjM) {
|
||||
std::vector<unsigned> redIt; // reduce iterator with 0 degree
|
||||
std::vector<unsigned> parIt; // parallel iterator with 0 degree
|
||||
std::vector<unsigned> redIt; // reduce iterator with 0 degree
|
||||
std::vector<unsigned> parIt; // parallel iterator with 0 degree
|
||||
std::vector<unsigned> filterIt; // filter loop with 0 degree
|
||||
for (unsigned i = 0; i < n; i++) {
|
||||
if (inDegree[i] == 0) {
|
||||
if (linalg::isReductionIterator(iteratorTypes[i]))
|
||||
if (merger.isFilterLoop(i))
|
||||
filterIt.push_back(i);
|
||||
else if (linalg::isReductionIterator(iteratorTypes[i]))
|
||||
redIt.push_back(i);
|
||||
else
|
||||
parIt.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
while (!redIt.empty() || !parIt.empty()) {
|
||||
// We always choose parallel iterator if there is any.
|
||||
auto &it = !parIt.empty() ? parIt : redIt;
|
||||
while (!redIt.empty() || !parIt.empty() || !filterIt.empty()) {
|
||||
// We always choose in order of filter loop -> parallel loop -> reduction
|
||||
// loop because
|
||||
// 1. Putting reduction loop early might make the loop sequence
|
||||
// inadmissible.
|
||||
// 2. Filter loops should be put as early as possible for better
|
||||
// performance, since only one (if any) iteration will carry the
|
||||
// computation. E.g., for (1 to N)
|
||||
// for (1 to M)
|
||||
// for (1 to K)
|
||||
// if (xxx)
|
||||
// O(X) computation => O(NMK+NMX) time complexity
|
||||
//
|
||||
// By putting the filter loop one level up, we got
|
||||
//
|
||||
// for (1 to N)
|
||||
// for (1 to K)
|
||||
// if (xxx)
|
||||
// for (1 to M)
|
||||
// O(X) computation => O(NK+NMX) time complexity
|
||||
auto &it = !filterIt.empty() ? filterIt : (!parIt.empty() ? parIt : redIt);
|
||||
auto src = it.back();
|
||||
topSort.push_back(src);
|
||||
it.pop_back();
|
||||
// Update in-degree, and push 0-degree node into worklist.
|
||||
for (unsigned dst = 0; dst < n; dst++)
|
||||
for (unsigned dst = 0; dst < n; dst++) {
|
||||
if (adjM[src][dst] && --inDegree[dst] == 0) {
|
||||
if (linalg::isReductionIterator(iteratorTypes[dst]))
|
||||
if (merger.isFilterLoop(dst))
|
||||
filterIt.push_back(dst);
|
||||
else if (linalg::isReductionIterator(iteratorTypes[dst]))
|
||||
redIt.push_back(dst);
|
||||
else
|
||||
parIt.push_back(dst);
|
||||
}
|
||||
}
|
||||
}
|
||||
return topSort.size() == n;
|
||||
}
|
||||
|
@ -340,7 +416,7 @@ static bool computeIterationGraph(Merger &merger, linalg::GenericOp op,
|
|||
OpOperand *skip = nullptr) {
|
||||
// Set up an n x n from/to adjacency matrix of the iteration graph
|
||||
// for the implicit loop indices i_0 .. i_n-1.
|
||||
unsigned n = op.getNumLoops();
|
||||
unsigned n = merger.getNumLoops();
|
||||
std::vector<std::vector<bool>> adjM(n, std::vector<bool>(n, false));
|
||||
std::vector<unsigned> inDegree(n, 0); // in-degree of each node.
|
||||
auto iteratorTypes = op.getIteratorTypesArray();
|
||||
|
@ -352,7 +428,7 @@ static bool computeIterationGraph(Merger &merger, linalg::GenericOp op,
|
|||
// Get map and encoding.
|
||||
auto map = op.getMatchingIndexingMap(&t);
|
||||
auto enc = getSparseTensorEncoding(t.get().getType());
|
||||
assert(map.getNumDims() == n);
|
||||
assert(map.getNumDims() + getNumCompoundAffineOnSparseDims(op) == n);
|
||||
// Skip dense tensor constraints when not requested.
|
||||
if (!(mask & SortMask::kIncludeDense) && !enc)
|
||||
continue;
|
||||
|
@ -364,6 +440,19 @@ static bool computeIterationGraph(Merger &merger, linalg::GenericOp op,
|
|||
AffineExpr ta = map.getResult(toOrigDim(enc, d));
|
||||
Optional<unsigned> tldx = merger.getLoopIdx(t.getOperandNumber(), d);
|
||||
|
||||
// Filter loops should be constructed after all the dependent loops,
|
||||
// i.e., d0 + d1 < filter_loop(d0 + d1)
|
||||
if (tldx && merger.isFilterLoop(tldx.value())) {
|
||||
assert(!ta.isa<AffineDimExpr>() &&
|
||||
!isDenseDLT(getDimLevelType(enc, d)));
|
||||
addAffineOrderings(adjM, inDegree, ta, AffineExpr(), llvm::None, tldx);
|
||||
// Now that the ordering of affine expression is captured by filter
|
||||
// loop idx, we only need to ensure the affine ordering against filter
|
||||
// loop. Thus, we reset the affine express to nil here to mark it as
|
||||
// resolved.
|
||||
ta = AffineExpr();
|
||||
}
|
||||
|
||||
if (d > 0) {
|
||||
AffineExpr fa = map.getResult(toOrigDim(enc, d - 1));
|
||||
Optional<unsigned> fldx =
|
||||
|
@ -377,6 +466,11 @@ static bool computeIterationGraph(Merger &merger, linalg::GenericOp op,
|
|||
if (!(mask & SortMask::kIncludeDense))
|
||||
tryLoosenAffineDenseConstraints(op, fldx, fa, tldx, ta);
|
||||
|
||||
// (d0 + d1) < (d2 + d3), or
|
||||
// filter_loop_d-1 < (d2 + d3), or
|
||||
// (d0 + d1) < filter_loop_d, or
|
||||
// filter_loop_d-1 < filter_loop_d depending on whether fa/ta is reset
|
||||
// above.
|
||||
addAffineOrderings(adjM, inDegree, fa, ta, fldx, tldx);
|
||||
}
|
||||
}
|
||||
|
@ -402,7 +496,7 @@ static bool computeIterationGraph(Merger &merger, linalg::GenericOp op,
|
|||
// Report failure for a cyclic iteration graph.
|
||||
topSort.clear();
|
||||
topSort.reserve(n);
|
||||
return topSortOptimal(n, iteratorTypes, topSort, inDegree, adjM);
|
||||
return topSortOptimal(n, iteratorTypes, merger, topSort, inDegree, adjM);
|
||||
}
|
||||
|
||||
/// Returns true if tensor materializes uninitialized into the computation.
|
||||
|
@ -430,9 +524,8 @@ static bool isAdmissibleTensorExp(Merger &merger, linalg::GenericOp op,
|
|||
// An all-dense annotated "sparse" output tensor becomes a linearized random
|
||||
// access 1-dim memref. Also admissible since insertions cannot occur.
|
||||
bool allDense = true;
|
||||
auto iteratorTypes = op.getIteratorTypesArray();
|
||||
unsigned numLoops = iteratorTypes.size();
|
||||
for (unsigned i = 0; i < numLoops; i++)
|
||||
unsigned numLoops = merger.getNumLoops(); // numNativeLoops + numFilterLoops
|
||||
for (unsigned i = 0; i < merger.getNumLoops(); i++)
|
||||
if (isCompressedDLT(merger.getDimLevelType(tensor, i)) ||
|
||||
isSingletonDLT(merger.getDimLevelType(tensor, i))) {
|
||||
allDense = false;
|
||||
|
@ -443,19 +536,31 @@ static bool isAdmissibleTensorExp(Merger &merger, linalg::GenericOp op,
|
|||
}
|
||||
if (allDense)
|
||||
return true;
|
||||
|
||||
// TODO: support compound affine expression on sparse output.
|
||||
if (getNumCompoundAffineOnSparseDims(op.getMatchingIndexingMap(lhs),
|
||||
lhs->get()) != 0)
|
||||
return false;
|
||||
|
||||
// A tensor expression with a sparse output tensor that changes its values
|
||||
// but not its nonzero structure, an operation called "simply dynamic" in
|
||||
// [Bik96,Ch9], is also admissible without special codegen.
|
||||
if (merger.isSingleCondition(tensor, exp))
|
||||
return true;
|
||||
|
||||
// Accept "truly dynamic" if the output tensor materializes uninitialized
|
||||
// into the computation and insertions occur in lexicographic index order.
|
||||
if (isMaterializing(lhs->get())) {
|
||||
auto iteratorTypes = op.getIteratorTypesArray();
|
||||
unsigned nest = 0;
|
||||
for (unsigned i = 0; i < numLoops; i++) {
|
||||
if (linalg::isReductionIterator(iteratorTypes[topSort[i]]))
|
||||
break; // terminate at first reduction
|
||||
nest++;
|
||||
if (!merger.isFilterLoop(topSort[i])) {
|
||||
// We only count non-filter loops as filter loops should be considered
|
||||
// as a special type of parallel loops.
|
||||
if (linalg::isReductionIterator(iteratorTypes[topSort[i]]))
|
||||
break; // terminate at first reduction
|
||||
nest++;
|
||||
}
|
||||
}
|
||||
// Determine admissible dynamic insertion situations:
|
||||
// (1) fully injective, since there are no reductions,
|
||||
|
@ -878,7 +983,14 @@ static void genInvariants(Merger &merger, CodeGen &codegen, OpBuilder &builder,
|
|||
auto enc = getSparseTensorEncoding(t.get().getType());
|
||||
for (unsigned d = 0, rank = map.getNumResults(); d < rank; d++) {
|
||||
AffineExpr a = map.getResult(toOrigDim(enc, d));
|
||||
if (!isInvariantAffine(codegen, a, ldx, atLevel))
|
||||
Optional<unsigned> sldx = merger.getLoopIdx(t.getOperandNumber(), d);
|
||||
if (sldx && merger.isFilterLoop(sldx.value())) {
|
||||
if (!codegen.getLoopIdxValue(sldx.value()))
|
||||
// The filter loops has not been constructed.
|
||||
return;
|
||||
if (sldx.value() == ldx)
|
||||
atLevel = true;
|
||||
} else if (!isInvariantAffine(codegen, a, ldx, atLevel))
|
||||
return; // still in play
|
||||
}
|
||||
// All exhausted at this level (atLevel denotes exactly at this level).
|
||||
|
@ -1003,6 +1115,16 @@ static Operation *genFor(Merger &merger, CodeGen &codegen, OpBuilder &builder,
|
|||
|
||||
Operation *loop =
|
||||
genLoopBoundary(codegen, merger, [&](MutableArrayRef<Value> reduc) {
|
||||
if (merger.isFilterLoop(idx)) {
|
||||
assert(isSparse);
|
||||
OpOperand *t = &op->getOpOperand(tid);
|
||||
auto enc = getSparseTensorEncoding(t->get().getType());
|
||||
// Retrieves the affine expression for the filter loop.
|
||||
AffineExpr a =
|
||||
op.getMatchingIndexingMap(t).getResult(toOrigDim(enc, dim));
|
||||
return codegen.loopEmitter.enterFilterLoopOverTensorAtDim(
|
||||
builder, loc, tid, dim, a, reduc);
|
||||
}
|
||||
return codegen.loopEmitter.enterLoopOverTensorAtDim(
|
||||
builder, loc, tid, dim, reduc, isParallel, extraTids, extraDims);
|
||||
}).value();
|
||||
|
@ -1488,7 +1610,8 @@ public:
|
|||
return failure();
|
||||
unsigned numTensors = op->getNumOperands();
|
||||
unsigned numLoops = op.getNumLoops();
|
||||
Merger merger(numTensors, numLoops);
|
||||
unsigned numFilterLoops = getNumCompoundAffineOnSparseDims(op);
|
||||
Merger merger(numTensors, numLoops, numFilterLoops);
|
||||
if (!findSparseAnnotations(merger, op))
|
||||
return failure();
|
||||
|
||||
|
|
|
@ -127,7 +127,7 @@ class MergerTestBase : public ::testing::Test {
|
|||
protected:
|
||||
MergerTestBase(unsigned numTensors, unsigned numLoops)
|
||||
: numTensors(numTensors), numLoops(numLoops),
|
||||
merger(numTensors, numLoops) {}
|
||||
merger(numTensors, numLoops, /*numFilterLoops=*/0) {}
|
||||
|
||||
///
|
||||
/// Expression construction helpers.
|
||||
|
|
Loading…
Reference in New Issue