From 1ce77b562de4d9a6fc703d39c9242f9786082ef1 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Tue, 16 Nov 2021 12:46:55 -0800 Subject: [PATCH] [mlir][sparse] refine lexicographic insertion to any tensor First version was vectors only. With some clever "path" insertion, we now support any d-dimensional tensor. Up next: reductions too Reviewed By: bixia, wrengr Differential Revision: https://reviews.llvm.org/D114024 --- .../Transforms/Sparsification.cpp | 3 - .../lib/ExecutionEngine/SparseTensorUtils.cpp | 125 ++++++++++--- .../test/Dialect/SparseTensor/sparse_out.mlir | 57 +++++- .../SparseTensor/CPU/sparse_matrix_ops.mlir | 176 ++++++++++++++++++ .../SparseTensor/CPU/sparse_tensor_ops.mlir | 90 +++++++++ 5 files changed, 418 insertions(+), 33 deletions(-) create mode 100644 mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir create mode 100644 mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_ops.mlir diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp index cdcbfc2a54ad..31d3ee520fbd 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp @@ -325,9 +325,6 @@ static bool isAdmissableTensorExp(Merger &merger, linalg::GenericOp op, for (auto attr : op.iterator_types()) if (isReductionIterator(attr)) return false; - // TODO: generalize support lib beyond vectors - if (op.iterator_types().size() != 1) - return false; *sparseOut = lhs; return true; } diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp index 69e678ce3657..75664ecabd07 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp @@ -247,12 +247,9 @@ public: if (tensor) { uint64_t nnz = tensor->getElements().size(); values.reserve(nnz); - fromCOO(tensor, sparsity, 0, nnz, 0); - } else { - if (allDense) - values.resize(sz, 0); - for (uint64_t r = 0; r < rank; r++) - idx[r] = -1u; + fromCOO(tensor, 0, nnz, 0); + } else if (allDense) { + values.resize(sz, 0); } } @@ -279,16 +276,26 @@ public: void getValues(std::vector **out) override { *out = &values; } /// Partially specialize lexicographic insertions based on template types. - // TODO: 1-dim tensors only for now, generalize soon void lexInsert(uint64_t *cursor, V val) override { - assert((idx[0] == -1u || idx[0] < cursor[0]) && "not lexicographic"); - indices[0].push_back(cursor[0]); - values.push_back(val); - idx[0] = cursor[0]; + // First, wrap up pending insertion path. + uint64_t diff = 0; + uint64_t top = 0; + if (!values.empty()) { + diff = lexDiff(cursor); + endPath(diff + 1); + top = idx[diff] + 1; + } + // Then continue with insertion path. + insPath(cursor, diff, top, val); } /// Finalizes lexicographic insertions. - void endInsert() override { pointers[0].push_back(indices[0].size()); } + void endInsert() override { + if (values.empty()) + endDim(0); + else + endPath(0); + } /// Returns this sparse tensor storage scheme as a new memory-resident /// sparse tensor in coordinate scheme with the given dimension order. @@ -342,14 +349,14 @@ private: /// Initializes sparse tensor storage scheme from a memory-resident sparse /// tensor in coordinate scheme. This method prepares the pointers and /// indices arrays under the given per-dimension dense/sparse annotations. - void fromCOO(SparseTensorCOO *tensor, const DimLevelType *sparsity, - uint64_t lo, uint64_t hi, uint64_t d) { + void fromCOO(SparseTensorCOO *tensor, uint64_t lo, uint64_t hi, + uint64_t d) { const std::vector> &elements = tensor->getElements(); // Once dimensions are exhausted, insert the numerical values. assert(d <= getRank()); if (d == getRank()) { - assert(lo >= hi || lo < elements.size()); - values.push_back(lo < hi ? elements[lo].value : 0); + assert(lo < hi && hi <= elements.size()); + values.push_back(elements[lo].value); return; } // Visit all elements in this interval. @@ -362,28 +369,28 @@ private: while (seg < hi && elements[seg].indices[d] == i) seg++; // Handle segment in interval for sparse or dense dimension. - if (sparsity[d] == DimLevelType::kCompressed) { + if (isCompressedDim(d)) { indices[d].push_back(i); } else { // For dense storage we must fill in all the zero values between // the previous element (when last we ran this for-loop) and the // current element. for (; full < i; full++) - fromCOO(tensor, sparsity, 0, 0, d + 1); // pass empty + endDim(d + 1); full++; } - fromCOO(tensor, sparsity, lo, seg, d + 1); + fromCOO(tensor, lo, seg, d + 1); // And move on to next segment in interval. lo = seg; } // Finalize the sparse pointer structure at this dimension. - if (sparsity[d] == DimLevelType::kCompressed) { + if (isCompressedDim(d)) { pointers[d].push_back(indices[d].size()); } else { // For dense storage we must fill in all the zero values after // the last element. for (uint64_t sz = sizes[d]; full < sz; full++) - fromCOO(tensor, sparsity, 0, 0, d + 1); // pass empty + endDim(d + 1); } } @@ -395,21 +402,83 @@ private: if (d == getRank()) { assert(pos < values.size()); tensor->add(idx, values[pos]); - } else if (pointers[d].empty()) { - // Dense dimension. - for (uint64_t i = 0, sz = sizes[d], off = pos * sz; i < sz; i++) { - idx[reord[d]] = i; - toCOO(tensor, reord, off + i, d + 1); - } - } else { + } else if (isCompressedDim(d)) { // Sparse dimension. for (uint64_t ii = pointers[d][pos]; ii < pointers[d][pos + 1]; ii++) { idx[reord[d]] = indices[d][ii]; toCOO(tensor, reord, ii, d + 1); } + } else { + // Dense dimension. + for (uint64_t i = 0, sz = sizes[d], off = pos * sz; i < sz; i++) { + idx[reord[d]] = i; + toCOO(tensor, reord, off + i, d + 1); + } } } + /// Ends a deeper, never seen before dimension. + void endDim(uint64_t d) { + assert(d <= getRank()); + if (d == getRank()) { + values.push_back(0); + } else if (isCompressedDim(d)) { + pointers[d].push_back(indices[d].size()); + } else { + for (uint64_t full = 0, sz = sizes[d]; full < sz; full++) + endDim(d + 1); + } + } + + /// Wraps up a single insertion path, inner to outer. + void endPath(uint64_t diff) { + uint64_t rank = getRank(); + assert(diff <= rank); + for (uint64_t i = 0; i < rank - diff; i++) { + uint64_t d = rank - i - 1; + if (isCompressedDim(d)) { + pointers[d].push_back(indices[d].size()); + } else { + for (uint64_t full = idx[d] + 1, sz = sizes[d]; full < sz; full++) + endDim(d + 1); + } + } + } + + /// Continues a single insertion path, outer to inner. + void insPath(uint64_t *cursor, uint64_t diff, uint64_t top, V val) { + uint64_t rank = getRank(); + assert(diff < rank); + for (uint64_t d = diff; d < rank; d++) { + uint64_t i = cursor[d]; + if (isCompressedDim(d)) { + indices[d].push_back(i); + } else { + for (uint64_t full = top; full < i; full++) + endDim(d + 1); + } + top = 0; + idx[d] = i; + } + values.push_back(val); + } + + /// Finds the lexicographic differing dimension. + uint64_t lexDiff(uint64_t *cursor) { + for (uint64_t r = 0, rank = getRank(); r < rank; r++) + if (cursor[r] > idx[r]) + return r; + else + assert(cursor[r] == idx[r] && "non-lexicographic insertion"); + assert(0 && "duplication insertion"); + return -1u; + } + + /// Returns true if dimension is compressed. + inline bool isCompressedDim(uint64_t d) const { + return (!pointers[d].empty()); + } + private: std::vector sizes; // per-dimension sizes std::vector rev; // "reverse" permutation diff --git a/mlir/test/Dialect/SparseTensor/sparse_out.mlir b/mlir/test/Dialect/SparseTensor/sparse_out.mlir index e17e3e89bef1..90ba2ff4d6df 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_out.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_out.mlir @@ -11,7 +11,7 @@ dimOrdering = affine_map<(i,j) -> (i,j)> }> -#trait_scale = { +#trait_scale_inpl = { indexing_maps = [ affine_map<(i,j) -> (i,j)> // X (out) ], @@ -44,7 +44,7 @@ // CHECK: } func @sparse_simply_dynamic1(%argx: tensor<32x16xf32, #DCSR> {linalg.inplaceable = true}) -> tensor<32x16xf32, #DCSR> { %c = arith.constant 2.0 : f32 - %0 = linalg.generic #trait_scale + %0 = linalg.generic #trait_scale_inpl outs(%argx: tensor<32x16xf32, #DCSR>) { ^bb(%x: f32): %1 = arith.mulf %x, %c : f32 @@ -129,3 +129,56 @@ func @sparse_simply_dynamic2(%arga: tensor<32x16xf32, #CSR>, } -> tensor<32x16xf32, #DCSR> return %0 : tensor<32x16xf32, #DCSR> } + +#trait_scale = { + indexing_maps = [ + affine_map<(i,j) -> (i,j)>, // A + affine_map<(i,j) -> (i,j)> // X (out) + ], + iterator_types = ["parallel", "parallel"], + doc = "X(i,j) = A(i,j) * 2.0" +} + +// CHECK-LABEL: func @sparse_truly_dynamic( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<10x20xf32, #sparse_tensor.encoding<{{.*}}>> +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 2.000000e+00 : f32 +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 10 : index +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 20 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_7:.*]] = sparse_tensor.init{{\[}}%[[VAL_2]], %[[VAL_3]]] : tensor<10x20xf32, #sparse_tensor.encoding<{{.*}}>> +// CHECK: %[[VAL_8:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<10x20xf32, #sparse_tensor.encoding<{{.*}}>> +// CHECK: %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_4]] : tensor<10x20xf32, #sparse_tensor.encoding<{{.*}}>> +// CHECK: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20xf32, #sparse_tensor.encoding<{{.*}}>> +// CHECK: %[[VAL_11:.*]] = memref.alloca(%[[VAL_5]]) : memref +// CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_6]] to %[[VAL_2]] step %[[VAL_4]] { +// CHECK: memref.store %[[VAL_12]], %[[VAL_11]]{{\[}}%[[VAL_6]]] : memref +// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref +// CHECK: %[[VAL_14:.*]] = arith.addi %[[VAL_12]], %[[VAL_4]] : index +// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_14]]] : memref +// CHECK: scf.for %[[VAL_16:.*]] = %[[VAL_13]] to %[[VAL_15]] step %[[VAL_4]] { +// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_16]]] : memref +// CHECK: memref.store %[[VAL_17]], %[[VAL_11]]{{\[}}%[[VAL_4]]] : memref +// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_16]]] : memref +// CHECK: %[[VAL_19:.*]] = arith.mulf %[[VAL_18]], %[[VAL_1]] : f32 +// CHECK: sparse_tensor.lex_insert %[[VAL_7]], %[[VAL_11]], %[[VAL_19]] : tensor<10x20xf32, #sparse_tensor.encoding<{{.*}}>> +// CHECK: } +// CHECK: } +// CHECK: %[[VAL_20:.*]] = sparse_tensor.load %[[VAL_7]] hasInserts : tensor<10x20xf32, #sparse_tensor.encoding<{{.*}}>> +// CHECK: return %[[VAL_20]] : tensor<10x20xf32, #sparse_tensor.encoding<{ +// CHECK: } +func @sparse_truly_dynamic(%arga: tensor<10x20xf32, #CSR>) -> tensor<10x20xf32, #DCSR> { + %s = arith.constant 2.0 : f32 + %d10 = arith.constant 10 : index + %d20 = arith.constant 20 : index + %xm = sparse_tensor.init [%d10, %d20] : tensor<10x20xf32, #DCSR> + %0 = linalg.generic #trait_scale + ins(%arga: tensor<10x20xf32, #CSR>) + outs(%xm: tensor<10x20xf32, #DCSR>) { + ^bb(%a: f32, %x: f32): + %1 = arith.mulf %a, %s : f32 + linalg.yield %1 : f32 + } -> tensor<10x20xf32, #DCSR> + return %0 : tensor<10x20xf32, #DCSR> +} diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir new file mode 100644 index 000000000000..318f99aa20e5 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir @@ -0,0 +1,176 @@ +// RUN: mlir-opt %s \ +// RUN: --sparsification --sparse-tensor-conversion \ +// RUN: --linalg-bufferize --convert-linalg-to-loops \ +// RUN: --convert-vector-to-scf --convert-scf-to-std \ +// RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \ +// RUN: --std-bufferize --finalizing-bufferize --lower-affine \ +// RUN: --convert-vector-to-llvm --convert-memref-to-llvm --convert-math-to-llvm \ +// RUN: --convert-std-to-llvm --reconcile-unrealized-casts | \ +// RUN: mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +#DCSR = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}> + +// +// Traits for 2-d tensor (aka matrix) operations. +// +#trait_scale = { + indexing_maps = [ + affine_map<(i,j) -> (i,j)>, // A (in) + affine_map<(i,j) -> (i,j)> // X (out) + ], + iterator_types = ["parallel", "parallel"], + doc = "X(i,j) = A(i,j) * 2.0" +} +#trait_scale_inpl = { + indexing_maps = [ + affine_map<(i,j) -> (i,j)> // X (out) + ], + iterator_types = ["parallel", "parallel"], + doc = "X(i,j) *= 2.0" +} +#trait_op = { + indexing_maps = [ + affine_map<(i,j) -> (i,j)>, // A (in) + affine_map<(i,j) -> (i,j)>, // B (in) + affine_map<(i,j) -> (i,j)> // X (out) + ], + iterator_types = ["parallel", "parallel"], + doc = "X(i,j) = A(i,j) OP B(i,j)" +} + +module { + // Scales a sparse matrix into a new sparse matrix. + func @matrix_scale(%arga: tensor) -> tensor { + %s = arith.constant 2.0 : f64 + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %d0 = tensor.dim %arga, %c0 : tensor + %d1 = tensor.dim %arga, %c1 : tensor + %xm = sparse_tensor.init [%d0, %d1] : tensor + %0 = linalg.generic #trait_scale + ins(%arga: tensor) + outs(%xm: tensor) { + ^bb(%a: f64, %x: f64): + %1 = arith.mulf %a, %s : f64 + linalg.yield %1 : f64 + } -> tensor + return %0 : tensor + } + + // Scales a sparse matrix in place. + func @matrix_scale_inplace(%argx: tensor + {linalg.inplaceable = true}) -> tensor { + %s = arith.constant 2.0 : f64 + %0 = linalg.generic #trait_scale_inpl + outs(%argx: tensor) { + ^bb(%x: f64): + %1 = arith.mulf %x, %s : f64 + linalg.yield %1 : f64 + } -> tensor + return %0 : tensor + } + + // Adds two sparse matrices element-wise into a new sparse matrix. + func @matrix_add(%arga: tensor, + %argb: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %d0 = tensor.dim %arga, %c0 : tensor + %d1 = tensor.dim %arga, %c1 : tensor + %xv = sparse_tensor.init [%d0, %d1] : tensor + %0 = linalg.generic #trait_op + ins(%arga, %argb: tensor, tensor) + outs(%xv: tensor) { + ^bb(%a: f64, %b: f64, %x: f64): + %1 = arith.addf %a, %b : f64 + linalg.yield %1 : f64 + } -> tensor + return %0 : tensor + } + + // Multiplies two sparse matrices element-wise into a new sparse matrix. + func @matrix_mul(%arga: tensor, + %argb: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %d0 = tensor.dim %arga, %c0 : tensor + %d1 = tensor.dim %arga, %c1 : tensor + %xv = sparse_tensor.init [%d0, %d1] : tensor + %0 = linalg.generic #trait_op + ins(%arga, %argb: tensor, tensor) + outs(%xv: tensor) { + ^bb(%a: f64, %b: f64, %x: f64): + %1 = arith.mulf %a, %b : f64 + linalg.yield %1 : f64 + } -> tensor + return %0 : tensor + } + + // Dump a sparse matrix. + func @dump(%arg0: tensor) { + %d0 = arith.constant 0.0 : f64 + %c0 = arith.constant 0 : index + %dm = sparse_tensor.convert %arg0 : tensor to tensor + %0 = memref.buffer_cast %dm : memref + %1 = vector.transfer_read %0[%c0, %c0], %d0: memref, vector<4x8xf64> + vector.print %1 : vector<4x8xf64> + memref.dealloc %0 : memref + return + } + + // Driver method to call and verify matrix kernels. + func @entry() { + %c0 = arith.constant 0 : index + %d1 = arith.constant 1.1 : f64 + + // Setup sparse matrices. + %m1 = arith.constant sparse< + [ [0,0], [0,1], [1,7], [2,2], [2,4], [2,7], [3,0], [3,2], [3,3] ], + [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 ] + > : tensor<4x8xf64> + %m2 = arith.constant sparse< + [ [0,0], [0,7], [1,0], [1,6], [2,1], [2,7] ], + [6.0, 5.0, 4.0, 3.0, 2.0, 1.0 ] + > : tensor<4x8xf64> + %sm1 = sparse_tensor.convert %m1 : tensor<4x8xf64> to tensor + %sm2 = sparse_tensor.convert %m2 : tensor<4x8xf64> to tensor + + // Call sparse vector kernels. + %0 = call @matrix_scale(%sm1) + : (tensor) -> tensor + %1 = call @matrix_scale_inplace(%sm1) + : (tensor) -> tensor + %2 = call @matrix_add(%sm1, %sm2) + : (tensor, tensor) -> tensor + %3 = call @matrix_mul(%sm1, %sm2) + : (tensor, tensor) -> tensor + + // + // Verify the results. + // + // CHECK: ( ( 2, 4, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 6 ), ( 0, 0, 8, 0, 10, 0, 0, 12 ), ( 14, 0, 16, 18, 0, 0, 0, 0 ) ) + // CHECK-NEXT: ( ( 6, 0, 0, 0, 0, 0, 0, 5 ), ( 4, 0, 0, 0, 0, 0, 3, 0 ), ( 0, 2, 0, 0, 0, 0, 0, 1 ), ( 0, 0, 0, 0, 0, 0, 0, 0 ) ) + // CHECK-NEXT: ( ( 2, 4, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 6 ), ( 0, 0, 8, 0, 10, 0, 0, 12 ), ( 14, 0, 16, 18, 0, 0, 0, 0 ) ) + // CHECK-NEXT: ( ( 2, 4, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 6 ), ( 0, 0, 8, 0, 10, 0, 0, 12 ), ( 14, 0, 16, 18, 0, 0, 0, 0 ) ) + // CHECK-NEXT: ( ( 8, 4, 0, 0, 0, 0, 0, 5 ), ( 4, 0, 0, 0, 0, 0, 3, 6 ), ( 0, 2, 8, 0, 10, 0, 0, 13 ), ( 14, 0, 16, 18, 0, 0, 0, 0 ) ) + // CHECK-NEXT: ( ( 12, 0, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 12 ), ( 0, 0, 0, 0, 0, 0, 0, 0 ) ) + // + call @dump(%sm1) : (tensor) -> () + call @dump(%sm2) : (tensor) -> () + call @dump(%0) : (tensor) -> () + call @dump(%1) : (tensor) -> () + call @dump(%2) : (tensor) -> () + call @dump(%3) : (tensor) -> () + + // Release the resources. + sparse_tensor.release %sm1 : tensor + sparse_tensor.release %sm2 : tensor + sparse_tensor.release %0 : tensor + sparse_tensor.release %2 : tensor + sparse_tensor.release %3 : tensor + return + } +} diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_ops.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_ops.mlir new file mode 100644 index 000000000000..2156ada00ba3 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_ops.mlir @@ -0,0 +1,90 @@ +// RUN: mlir-opt %s \ +// RUN: --sparsification --sparse-tensor-conversion \ +// RUN: --linalg-bufferize --convert-linalg-to-loops \ +// RUN: --convert-vector-to-scf --convert-scf-to-std \ +// RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \ +// RUN: --std-bufferize --finalizing-bufferize --lower-affine \ +// RUN: --convert-vector-to-llvm --convert-memref-to-llvm --convert-math-to-llvm \ +// RUN: --convert-std-to-llvm --reconcile-unrealized-casts | \ +// RUN: mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +#ST1 = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed", "compressed"]}> +#ST2 = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed", "dense"]}> + +// +// Trait for 3-d tensor operation. +// +#trait_scale = { + indexing_maps = [ + affine_map<(i,j,k) -> (i,j,k)>, // A (in) + affine_map<(i,j,k) -> (i,j,k)> // X (out) + ], + iterator_types = ["parallel", "parallel", "parallel"], + doc = "X(i,j,k) = A(i,j,k) * 2.0" +} + +module { + // Scales a sparse tensor into a new sparse tensor. + func @tensor_scale(%arga: tensor) -> tensor { + %s = arith.constant 2.0 : f64 + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index + %d0 = tensor.dim %arga, %c0 : tensor + %d1 = tensor.dim %arga, %c1 : tensor + %d2 = tensor.dim %arga, %c2 : tensor + %xm = sparse_tensor.init [%d0, %d1, %d2] : tensor + %0 = linalg.generic #trait_scale + ins(%arga: tensor) + outs(%xm: tensor) { + ^bb(%a: f64, %x: f64): + %1 = arith.mulf %a, %s : f64 + linalg.yield %1 : f64 + } -> tensor + return %0 : tensor + } + + // Driver method to call and verify tensor kernel. + func @entry() { + %c0 = arith.constant 0 : index + %d1 = arith.constant -1.0 : f64 + + // Setup sparse tensor. + %t = arith.constant dense< + [ [ [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0 ] ], + [ [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ] ], + [ [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], + [0.0, 3.0, 4.0, 0.0, 0.0, 0.0, 0.0, 5.0 ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ] ] ]> : tensor<3x4x8xf64> + %st = sparse_tensor.convert %t : tensor<3x4x8xf64> to tensor + + // Call sparse vector kernels. + %0 = call @tensor_scale(%st) : (tensor) -> tensor + + // Sanity check on stored values. + // + // CHECK: ( 1, 2, 3, 4, 5, -1, -1, -1 ) + // CHECK-NEXT: ( 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 6, 8, 0, 0, 0, 0, 10, -1, -1, -1, -1, -1, -1, -1, -1 ) + %m1 = sparse_tensor.values %st : tensor to memref + %m2 = sparse_tensor.values %0 : tensor to memref + %v1 = vector.transfer_read %m1[%c0], %d1: memref, vector<8xf64> + %v2 = vector.transfer_read %m2[%c0], %d1: memref, vector<32xf64> + vector.print %v1 : vector<8xf64> + vector.print %v2 : vector<32xf64> + + // Release the resources. + sparse_tensor.release %st : tensor + sparse_tensor.release %0 : tensor + return + } +}