[mlir][sparse] refine lexicographic insertion to any tensor

First version was vectors only. With some clever "path" insertion,
we now support any d-dimensional tensor. Up next: reductions too

Reviewed By: bixia, wrengr

Differential Revision: https://reviews.llvm.org/D114024
This commit is contained in:
Aart Bik 2021-11-16 12:46:55 -08:00
parent a68ccda203
commit 1ce77b562d
5 changed files with 418 additions and 33 deletions

View File

@ -325,9 +325,6 @@ static bool isAdmissableTensorExp(Merger &merger, linalg::GenericOp op,
for (auto attr : op.iterator_types())
if (isReductionIterator(attr))
return false;
// TODO: generalize support lib beyond vectors
if (op.iterator_types().size() != 1)
return false;
*sparseOut = lhs;
return true;
}

View File

@ -247,12 +247,9 @@ public:
if (tensor) {
uint64_t nnz = tensor->getElements().size();
values.reserve(nnz);
fromCOO(tensor, sparsity, 0, nnz, 0);
} else {
if (allDense)
values.resize(sz, 0);
for (uint64_t r = 0; r < rank; r++)
idx[r] = -1u;
fromCOO(tensor, 0, nnz, 0);
} else if (allDense) {
values.resize(sz, 0);
}
}
@ -279,16 +276,26 @@ public:
void getValues(std::vector<V> **out) override { *out = &values; }
/// Partially specialize lexicographic insertions based on template types.
// TODO: 1-dim tensors only for now, generalize soon
void lexInsert(uint64_t *cursor, V val) override {
assert((idx[0] == -1u || idx[0] < cursor[0]) && "not lexicographic");
indices[0].push_back(cursor[0]);
values.push_back(val);
idx[0] = cursor[0];
// First, wrap up pending insertion path.
uint64_t diff = 0;
uint64_t top = 0;
if (!values.empty()) {
diff = lexDiff(cursor);
endPath(diff + 1);
top = idx[diff] + 1;
}
// Then continue with insertion path.
insPath(cursor, diff, top, val);
}
/// Finalizes lexicographic insertions.
void endInsert() override { pointers[0].push_back(indices[0].size()); }
void endInsert() override {
if (values.empty())
endDim(0);
else
endPath(0);
}
/// Returns this sparse tensor storage scheme as a new memory-resident
/// sparse tensor in coordinate scheme with the given dimension order.
@ -342,14 +349,14 @@ private:
/// Initializes sparse tensor storage scheme from a memory-resident sparse
/// tensor in coordinate scheme. This method prepares the pointers and
/// indices arrays under the given per-dimension dense/sparse annotations.
void fromCOO(SparseTensorCOO<V> *tensor, const DimLevelType *sparsity,
uint64_t lo, uint64_t hi, uint64_t d) {
void fromCOO(SparseTensorCOO<V> *tensor, uint64_t lo, uint64_t hi,
uint64_t d) {
const std::vector<Element<V>> &elements = tensor->getElements();
// Once dimensions are exhausted, insert the numerical values.
assert(d <= getRank());
if (d == getRank()) {
assert(lo >= hi || lo < elements.size());
values.push_back(lo < hi ? elements[lo].value : 0);
assert(lo < hi && hi <= elements.size());
values.push_back(elements[lo].value);
return;
}
// Visit all elements in this interval.
@ -362,28 +369,28 @@ private:
while (seg < hi && elements[seg].indices[d] == i)
seg++;
// Handle segment in interval for sparse or dense dimension.
if (sparsity[d] == DimLevelType::kCompressed) {
if (isCompressedDim(d)) {
indices[d].push_back(i);
} else {
// For dense storage we must fill in all the zero values between
// the previous element (when last we ran this for-loop) and the
// current element.
for (; full < i; full++)
fromCOO(tensor, sparsity, 0, 0, d + 1); // pass empty
endDim(d + 1);
full++;
}
fromCOO(tensor, sparsity, lo, seg, d + 1);
fromCOO(tensor, lo, seg, d + 1);
// And move on to next segment in interval.
lo = seg;
}
// Finalize the sparse pointer structure at this dimension.
if (sparsity[d] == DimLevelType::kCompressed) {
if (isCompressedDim(d)) {
pointers[d].push_back(indices[d].size());
} else {
// For dense storage we must fill in all the zero values after
// the last element.
for (uint64_t sz = sizes[d]; full < sz; full++)
fromCOO(tensor, sparsity, 0, 0, d + 1); // pass empty
endDim(d + 1);
}
}
@ -395,21 +402,83 @@ private:
if (d == getRank()) {
assert(pos < values.size());
tensor->add(idx, values[pos]);
} else if (pointers[d].empty()) {
// Dense dimension.
for (uint64_t i = 0, sz = sizes[d], off = pos * sz; i < sz; i++) {
idx[reord[d]] = i;
toCOO(tensor, reord, off + i, d + 1);
}
} else {
} else if (isCompressedDim(d)) {
// Sparse dimension.
for (uint64_t ii = pointers[d][pos]; ii < pointers[d][pos + 1]; ii++) {
idx[reord[d]] = indices[d][ii];
toCOO(tensor, reord, ii, d + 1);
}
} else {
// Dense dimension.
for (uint64_t i = 0, sz = sizes[d], off = pos * sz; i < sz; i++) {
idx[reord[d]] = i;
toCOO(tensor, reord, off + i, d + 1);
}
}
}
/// Ends a deeper, never seen before dimension.
void endDim(uint64_t d) {
assert(d <= getRank());
if (d == getRank()) {
values.push_back(0);
} else if (isCompressedDim(d)) {
pointers[d].push_back(indices[d].size());
} else {
for (uint64_t full = 0, sz = sizes[d]; full < sz; full++)
endDim(d + 1);
}
}
/// Wraps up a single insertion path, inner to outer.
void endPath(uint64_t diff) {
uint64_t rank = getRank();
assert(diff <= rank);
for (uint64_t i = 0; i < rank - diff; i++) {
uint64_t d = rank - i - 1;
if (isCompressedDim(d)) {
pointers[d].push_back(indices[d].size());
} else {
for (uint64_t full = idx[d] + 1, sz = sizes[d]; full < sz; full++)
endDim(d + 1);
}
}
}
/// Continues a single insertion path, outer to inner.
void insPath(uint64_t *cursor, uint64_t diff, uint64_t top, V val) {
uint64_t rank = getRank();
assert(diff < rank);
for (uint64_t d = diff; d < rank; d++) {
uint64_t i = cursor[d];
if (isCompressedDim(d)) {
indices[d].push_back(i);
} else {
for (uint64_t full = top; full < i; full++)
endDim(d + 1);
}
top = 0;
idx[d] = i;
}
values.push_back(val);
}
/// Finds the lexicographic differing dimension.
uint64_t lexDiff(uint64_t *cursor) {
for (uint64_t r = 0, rank = getRank(); r < rank; r++)
if (cursor[r] > idx[r])
return r;
else
assert(cursor[r] == idx[r] && "non-lexicographic insertion");
assert(0 && "duplication insertion");
return -1u;
}
/// Returns true if dimension is compressed.
inline bool isCompressedDim(uint64_t d) const {
return (!pointers[d].empty());
}
private:
std::vector<uint64_t> sizes; // per-dimension sizes
std::vector<uint64_t> rev; // "reverse" permutation

View File

@ -11,7 +11,7 @@
dimOrdering = affine_map<(i,j) -> (i,j)>
}>
#trait_scale = {
#trait_scale_inpl = {
indexing_maps = [
affine_map<(i,j) -> (i,j)> // X (out)
],
@ -44,7 +44,7 @@
// CHECK: }
func @sparse_simply_dynamic1(%argx: tensor<32x16xf32, #DCSR> {linalg.inplaceable = true}) -> tensor<32x16xf32, #DCSR> {
%c = arith.constant 2.0 : f32
%0 = linalg.generic #trait_scale
%0 = linalg.generic #trait_scale_inpl
outs(%argx: tensor<32x16xf32, #DCSR>) {
^bb(%x: f32):
%1 = arith.mulf %x, %c : f32
@ -129,3 +129,56 @@ func @sparse_simply_dynamic2(%arga: tensor<32x16xf32, #CSR>,
} -> tensor<32x16xf32, #DCSR>
return %0 : tensor<32x16xf32, #DCSR>
}
#trait_scale = {
indexing_maps = [
affine_map<(i,j) -> (i,j)>, // A
affine_map<(i,j) -> (i,j)> // X (out)
],
iterator_types = ["parallel", "parallel"],
doc = "X(i,j) = A(i,j) * 2.0"
}
// CHECK-LABEL: func @sparse_truly_dynamic(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<10x20xf32, #sparse_tensor.encoding<{{.*}}>>
// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 2.000000e+00 : f32
// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 10 : index
// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 20 : index
// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 1 : index
// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 2 : index
// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index
// CHECK: %[[VAL_7:.*]] = sparse_tensor.init{{\[}}%[[VAL_2]], %[[VAL_3]]] : tensor<10x20xf32, #sparse_tensor.encoding<{{.*}}>>
// CHECK: %[[VAL_8:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<10x20xf32, #sparse_tensor.encoding<{{.*}}>>
// CHECK: %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_4]] : tensor<10x20xf32, #sparse_tensor.encoding<{{.*}}>>
// CHECK: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20xf32, #sparse_tensor.encoding<{{.*}}>>
// CHECK: %[[VAL_11:.*]] = memref.alloca(%[[VAL_5]]) : memref<?xindex>
// CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_6]] to %[[VAL_2]] step %[[VAL_4]] {
// CHECK: memref.store %[[VAL_12]], %[[VAL_11]]{{\[}}%[[VAL_6]]] : memref<?xindex>
// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<?xindex>
// CHECK: %[[VAL_14:.*]] = arith.addi %[[VAL_12]], %[[VAL_4]] : index
// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_14]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_16:.*]] = %[[VAL_13]] to %[[VAL_15]] step %[[VAL_4]] {
// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_16]]] : memref<?xindex>
// CHECK: memref.store %[[VAL_17]], %[[VAL_11]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_16]]] : memref<?xf32>
// CHECK: %[[VAL_19:.*]] = arith.mulf %[[VAL_18]], %[[VAL_1]] : f32
// CHECK: sparse_tensor.lex_insert %[[VAL_7]], %[[VAL_11]], %[[VAL_19]] : tensor<10x20xf32, #sparse_tensor.encoding<{{.*}}>>
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_20:.*]] = sparse_tensor.load %[[VAL_7]] hasInserts : tensor<10x20xf32, #sparse_tensor.encoding<{{.*}}>>
// CHECK: return %[[VAL_20]] : tensor<10x20xf32, #sparse_tensor.encoding<{
// CHECK: }
func @sparse_truly_dynamic(%arga: tensor<10x20xf32, #CSR>) -> tensor<10x20xf32, #DCSR> {
%s = arith.constant 2.0 : f32
%d10 = arith.constant 10 : index
%d20 = arith.constant 20 : index
%xm = sparse_tensor.init [%d10, %d20] : tensor<10x20xf32, #DCSR>
%0 = linalg.generic #trait_scale
ins(%arga: tensor<10x20xf32, #CSR>)
outs(%xm: tensor<10x20xf32, #DCSR>) {
^bb(%a: f32, %x: f32):
%1 = arith.mulf %a, %s : f32
linalg.yield %1 : f32
} -> tensor<10x20xf32, #DCSR>
return %0 : tensor<10x20xf32, #DCSR>
}

View File

@ -0,0 +1,176 @@
// RUN: mlir-opt %s \
// RUN: --sparsification --sparse-tensor-conversion \
// RUN: --linalg-bufferize --convert-linalg-to-loops \
// RUN: --convert-vector-to-scf --convert-scf-to-std \
// RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \
// RUN: --std-bufferize --finalizing-bufferize --lower-affine \
// RUN: --convert-vector-to-llvm --convert-memref-to-llvm --convert-math-to-llvm \
// RUN: --convert-std-to-llvm --reconcile-unrealized-casts | \
// RUN: mlir-cpu-runner \
// RUN: -e entry -entry-point-result=void \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s
#DCSR = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}>
//
// Traits for 2-d tensor (aka matrix) operations.
//
#trait_scale = {
indexing_maps = [
affine_map<(i,j) -> (i,j)>, // A (in)
affine_map<(i,j) -> (i,j)> // X (out)
],
iterator_types = ["parallel", "parallel"],
doc = "X(i,j) = A(i,j) * 2.0"
}
#trait_scale_inpl = {
indexing_maps = [
affine_map<(i,j) -> (i,j)> // X (out)
],
iterator_types = ["parallel", "parallel"],
doc = "X(i,j) *= 2.0"
}
#trait_op = {
indexing_maps = [
affine_map<(i,j) -> (i,j)>, // A (in)
affine_map<(i,j) -> (i,j)>, // B (in)
affine_map<(i,j) -> (i,j)> // X (out)
],
iterator_types = ["parallel", "parallel"],
doc = "X(i,j) = A(i,j) OP B(i,j)"
}
module {
// Scales a sparse matrix into a new sparse matrix.
func @matrix_scale(%arga: tensor<?x?xf64, #DCSR>) -> tensor<?x?xf64, #DCSR> {
%s = arith.constant 2.0 : f64
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%d0 = tensor.dim %arga, %c0 : tensor<?x?xf64, #DCSR>
%d1 = tensor.dim %arga, %c1 : tensor<?x?xf64, #DCSR>
%xm = sparse_tensor.init [%d0, %d1] : tensor<?x?xf64, #DCSR>
%0 = linalg.generic #trait_scale
ins(%arga: tensor<?x?xf64, #DCSR>)
outs(%xm: tensor<?x?xf64, #DCSR>) {
^bb(%a: f64, %x: f64):
%1 = arith.mulf %a, %s : f64
linalg.yield %1 : f64
} -> tensor<?x?xf64, #DCSR>
return %0 : tensor<?x?xf64, #DCSR>
}
// Scales a sparse matrix in place.
func @matrix_scale_inplace(%argx: tensor<?x?xf64, #DCSR>
{linalg.inplaceable = true}) -> tensor<?x?xf64, #DCSR> {
%s = arith.constant 2.0 : f64
%0 = linalg.generic #trait_scale_inpl
outs(%argx: tensor<?x?xf64, #DCSR>) {
^bb(%x: f64):
%1 = arith.mulf %x, %s : f64
linalg.yield %1 : f64
} -> tensor<?x?xf64, #DCSR>
return %0 : tensor<?x?xf64, #DCSR>
}
// Adds two sparse matrices element-wise into a new sparse matrix.
func @matrix_add(%arga: tensor<?x?xf64, #DCSR>,
%argb: tensor<?x?xf64, #DCSR>) -> tensor<?x?xf64, #DCSR> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%d0 = tensor.dim %arga, %c0 : tensor<?x?xf64, #DCSR>
%d1 = tensor.dim %arga, %c1 : tensor<?x?xf64, #DCSR>
%xv = sparse_tensor.init [%d0, %d1] : tensor<?x?xf64, #DCSR>
%0 = linalg.generic #trait_op
ins(%arga, %argb: tensor<?x?xf64, #DCSR>, tensor<?x?xf64, #DCSR>)
outs(%xv: tensor<?x?xf64, #DCSR>) {
^bb(%a: f64, %b: f64, %x: f64):
%1 = arith.addf %a, %b : f64
linalg.yield %1 : f64
} -> tensor<?x?xf64, #DCSR>
return %0 : tensor<?x?xf64, #DCSR>
}
// Multiplies two sparse matrices element-wise into a new sparse matrix.
func @matrix_mul(%arga: tensor<?x?xf64, #DCSR>,
%argb: tensor<?x?xf64, #DCSR>) -> tensor<?x?xf64, #DCSR> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%d0 = tensor.dim %arga, %c0 : tensor<?x?xf64, #DCSR>
%d1 = tensor.dim %arga, %c1 : tensor<?x?xf64, #DCSR>
%xv = sparse_tensor.init [%d0, %d1] : tensor<?x?xf64, #DCSR>
%0 = linalg.generic #trait_op
ins(%arga, %argb: tensor<?x?xf64, #DCSR>, tensor<?x?xf64, #DCSR>)
outs(%xv: tensor<?x?xf64, #DCSR>) {
^bb(%a: f64, %b: f64, %x: f64):
%1 = arith.mulf %a, %b : f64
linalg.yield %1 : f64
} -> tensor<?x?xf64, #DCSR>
return %0 : tensor<?x?xf64, #DCSR>
}
// Dump a sparse matrix.
func @dump(%arg0: tensor<?x?xf64, #DCSR>) {
%d0 = arith.constant 0.0 : f64
%c0 = arith.constant 0 : index
%dm = sparse_tensor.convert %arg0 : tensor<?x?xf64, #DCSR> to tensor<?x?xf64>
%0 = memref.buffer_cast %dm : memref<?x?xf64>
%1 = vector.transfer_read %0[%c0, %c0], %d0: memref<?x?xf64>, vector<4x8xf64>
vector.print %1 : vector<4x8xf64>
memref.dealloc %0 : memref<?x?xf64>
return
}
// Driver method to call and verify matrix kernels.
func @entry() {
%c0 = arith.constant 0 : index
%d1 = arith.constant 1.1 : f64
// Setup sparse matrices.
%m1 = arith.constant sparse<
[ [0,0], [0,1], [1,7], [2,2], [2,4], [2,7], [3,0], [3,2], [3,3] ],
[ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 ]
> : tensor<4x8xf64>
%m2 = arith.constant sparse<
[ [0,0], [0,7], [1,0], [1,6], [2,1], [2,7] ],
[6.0, 5.0, 4.0, 3.0, 2.0, 1.0 ]
> : tensor<4x8xf64>
%sm1 = sparse_tensor.convert %m1 : tensor<4x8xf64> to tensor<?x?xf64, #DCSR>
%sm2 = sparse_tensor.convert %m2 : tensor<4x8xf64> to tensor<?x?xf64, #DCSR>
// Call sparse vector kernels.
%0 = call @matrix_scale(%sm1)
: (tensor<?x?xf64, #DCSR>) -> tensor<?x?xf64, #DCSR>
%1 = call @matrix_scale_inplace(%sm1)
: (tensor<?x?xf64, #DCSR>) -> tensor<?x?xf64, #DCSR>
%2 = call @matrix_add(%sm1, %sm2)
: (tensor<?x?xf64, #DCSR>, tensor<?x?xf64, #DCSR>) -> tensor<?x?xf64, #DCSR>
%3 = call @matrix_mul(%sm1, %sm2)
: (tensor<?x?xf64, #DCSR>, tensor<?x?xf64, #DCSR>) -> tensor<?x?xf64, #DCSR>
//
// Verify the results.
//
// CHECK: ( ( 2, 4, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 6 ), ( 0, 0, 8, 0, 10, 0, 0, 12 ), ( 14, 0, 16, 18, 0, 0, 0, 0 ) )
// CHECK-NEXT: ( ( 6, 0, 0, 0, 0, 0, 0, 5 ), ( 4, 0, 0, 0, 0, 0, 3, 0 ), ( 0, 2, 0, 0, 0, 0, 0, 1 ), ( 0, 0, 0, 0, 0, 0, 0, 0 ) )
// CHECK-NEXT: ( ( 2, 4, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 6 ), ( 0, 0, 8, 0, 10, 0, 0, 12 ), ( 14, 0, 16, 18, 0, 0, 0, 0 ) )
// CHECK-NEXT: ( ( 2, 4, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 6 ), ( 0, 0, 8, 0, 10, 0, 0, 12 ), ( 14, 0, 16, 18, 0, 0, 0, 0 ) )
// CHECK-NEXT: ( ( 8, 4, 0, 0, 0, 0, 0, 5 ), ( 4, 0, 0, 0, 0, 0, 3, 6 ), ( 0, 2, 8, 0, 10, 0, 0, 13 ), ( 14, 0, 16, 18, 0, 0, 0, 0 ) )
// CHECK-NEXT: ( ( 12, 0, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 12 ), ( 0, 0, 0, 0, 0, 0, 0, 0 ) )
//
call @dump(%sm1) : (tensor<?x?xf64, #DCSR>) -> ()
call @dump(%sm2) : (tensor<?x?xf64, #DCSR>) -> ()
call @dump(%0) : (tensor<?x?xf64, #DCSR>) -> ()
call @dump(%1) : (tensor<?x?xf64, #DCSR>) -> ()
call @dump(%2) : (tensor<?x?xf64, #DCSR>) -> ()
call @dump(%3) : (tensor<?x?xf64, #DCSR>) -> ()
// Release the resources.
sparse_tensor.release %sm1 : tensor<?x?xf64, #DCSR>
sparse_tensor.release %sm2 : tensor<?x?xf64, #DCSR>
sparse_tensor.release %0 : tensor<?x?xf64, #DCSR>
sparse_tensor.release %2 : tensor<?x?xf64, #DCSR>
sparse_tensor.release %3 : tensor<?x?xf64, #DCSR>
return
}
}

View File

@ -0,0 +1,90 @@
// RUN: mlir-opt %s \
// RUN: --sparsification --sparse-tensor-conversion \
// RUN: --linalg-bufferize --convert-linalg-to-loops \
// RUN: --convert-vector-to-scf --convert-scf-to-std \
// RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \
// RUN: --std-bufferize --finalizing-bufferize --lower-affine \
// RUN: --convert-vector-to-llvm --convert-memref-to-llvm --convert-math-to-llvm \
// RUN: --convert-std-to-llvm --reconcile-unrealized-casts | \
// RUN: mlir-cpu-runner \
// RUN: -e entry -entry-point-result=void \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s
#ST1 = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed", "compressed"]}>
#ST2 = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed", "dense"]}>
//
// Trait for 3-d tensor operation.
//
#trait_scale = {
indexing_maps = [
affine_map<(i,j,k) -> (i,j,k)>, // A (in)
affine_map<(i,j,k) -> (i,j,k)> // X (out)
],
iterator_types = ["parallel", "parallel", "parallel"],
doc = "X(i,j,k) = A(i,j,k) * 2.0"
}
module {
// Scales a sparse tensor into a new sparse tensor.
func @tensor_scale(%arga: tensor<?x?x?xf64, #ST1>) -> tensor<?x?x?xf64, #ST2> {
%s = arith.constant 2.0 : f64
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%d0 = tensor.dim %arga, %c0 : tensor<?x?x?xf64, #ST1>
%d1 = tensor.dim %arga, %c1 : tensor<?x?x?xf64, #ST1>
%d2 = tensor.dim %arga, %c2 : tensor<?x?x?xf64, #ST1>
%xm = sparse_tensor.init [%d0, %d1, %d2] : tensor<?x?x?xf64, #ST2>
%0 = linalg.generic #trait_scale
ins(%arga: tensor<?x?x?xf64, #ST1>)
outs(%xm: tensor<?x?x?xf64, #ST2>) {
^bb(%a: f64, %x: f64):
%1 = arith.mulf %a, %s : f64
linalg.yield %1 : f64
} -> tensor<?x?x?xf64, #ST2>
return %0 : tensor<?x?x?xf64, #ST2>
}
// Driver method to call and verify tensor kernel.
func @entry() {
%c0 = arith.constant 0 : index
%d1 = arith.constant -1.0 : f64
// Setup sparse tensor.
%t = arith.constant dense<
[ [ [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0 ] ],
[ [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ] ],
[ [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ],
[0.0, 3.0, 4.0, 0.0, 0.0, 0.0, 0.0, 5.0 ],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ] ] ]> : tensor<3x4x8xf64>
%st = sparse_tensor.convert %t : tensor<3x4x8xf64> to tensor<?x?x?xf64, #ST1>
// Call sparse vector kernels.
%0 = call @tensor_scale(%st) : (tensor<?x?x?xf64, #ST1>) -> tensor<?x?x?xf64, #ST2>
// Sanity check on stored values.
//
// CHECK: ( 1, 2, 3, 4, 5, -1, -1, -1 )
// CHECK-NEXT: ( 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 6, 8, 0, 0, 0, 0, 10, -1, -1, -1, -1, -1, -1, -1, -1 )
%m1 = sparse_tensor.values %st : tensor<?x?x?xf64, #ST1> to memref<?xf64>
%m2 = sparse_tensor.values %0 : tensor<?x?x?xf64, #ST2> to memref<?xf64>
%v1 = vector.transfer_read %m1[%c0], %d1: memref<?xf64>, vector<8xf64>
%v2 = vector.transfer_read %m2[%c0], %d1: memref<?xf64>, vector<32xf64>
vector.print %v1 : vector<8xf64>
vector.print %v2 : vector<32xf64>
// Release the resources.
sparse_tensor.release %st : tensor<?x?x?xf64, #ST1>
sparse_tensor.release %0 : tensor<?x?x?xf64, #ST2>
return
}
}