[mlir][sparse] prepare runtime support lib for multiple dim level types

We are moving from just dense/compressed to more general dim level types, so we need more than just an "i1" array for annotations. Reviewed By: bixia Differential Revision: https://reviews.llvm.org/D102520
2021-05-14 19:11:39 -07:00 · 2021-05-14 19:11:39 -07:00 · 56fd4c1cf8
parent fcd12fed41
commit 56fd4c1cf8
3 changed files with 125 additions and 22 deletions
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@ -41,6 +41,19 @@ static unsigned getOverheadTypeEncoding(unsigned width) {
  }
 }

+/// Returns internal dimension level type encoding.
+static unsigned
+getDimLevelTypeEncoding(SparseTensorEncodingAttr::DimLevelType dlt) {
+  switch (dlt) {
+  case SparseTensorEncodingAttr::DimLevelType::Dense:
+    return 0;
+  case SparseTensorEncodingAttr::DimLevelType::Compressed:
+    return 1;
+  case SparseTensorEncodingAttr::DimLevelType::Singleton:
+    return 2;
+  }
+}
+
 /// Returns function reference (first hit also inserts into module).
 static FlatSymbolRefAttr getFunc(Operation *op, StringRef name, Type result,
                                 ValueRange operands) {
@ -107,12 +120,12 @@ class SparseTensorNewConverter : public OpConversionPattern<NewOp> {
    // Sparsity annotations in tensor constant form. Note that we cast
    // the static shape into a dynamic shape to ensure that the method
    // signature remains uniform accross different tensor dimensions.
-    SmallVector<bool, 4> attrs;
+    SmallVector<APInt, 4> attrs;
    unsigned sz = enc.getDimLevelType().size();
    for (unsigned i = 0; i < sz; i++)
-      attrs.push_back(enc.getDimLevelType()[i] ==
-                      SparseTensorEncodingAttr::DimLevelType::Compressed);
-    Type etp = rewriter.getIntegerType(1);
+      attrs.push_back(
+          APInt(8, getDimLevelTypeEncoding(enc.getDimLevelType()[i])));
+    Type etp = rewriter.getIntegerType(8);
    RankedTensorType tt1 = RankedTensorType::get({sz}, etp);
    RankedTensorType tt2 =
        RankedTensorType::get({ShapedType::kDynamicSize}, etp);
--- a/mlir/lib/ExecutionEngine/SparseUtils.cpp
+++ b/mlir/lib/ExecutionEngine/SparseUtils.cpp
@ -18,6 +18,8 @@

 #ifdef MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS

+#define AART
+
 #include <algorithm>
 #include <cassert>
 #include <cctype>
@ -112,6 +114,8 @@ private:
 /// function overloading to implement "partial" method specialization.
 class SparseTensorStorageBase {
 public:
+  enum DimLevelType : uint8_t { kDense = 0, kCompressed = 1, kSingleton = 2 };
+
  virtual uint64_t getDimSize(uint64_t) = 0;

  // Overhead storage.
@ -152,7 +156,7 @@ class SparseTensorStorage : public SparseTensorStorageBase {
 public:
  /// Constructs sparse tensor storage scheme following the given
  /// per-rank dimension dense/sparse annotations.
-  SparseTensorStorage(SparseTensor *tensor, bool *sparsity)
+  SparseTensorStorage(SparseTensor *tensor, uint8_t *sparsity)
      : sizes(tensor->getSizes()), pointers(getRank()), indices(getRank()) {
    // Provide hints on capacity.
    // TODO: needs fine-tuning based on sparsity
@ -160,16 +164,92 @@ public:
    values.reserve(nnz);
    for (uint64_t d = 0, s = 1, rank = getRank(); d < rank; d++) {
      s *= sizes[d];
-      if (sparsity[d]) {
+      if (sparsity[d] == kCompressed) {
        pointers[d].reserve(s + 1);
        indices[d].reserve(s);
        s = 1;
+      } else {
+        assert(sparsity[d] == kDense && "singleton not yet supported");
      }
    }
    // Then setup the tensor.
    traverse(tensor, sparsity, 0, nnz, 0);
+#ifdef AART
+    dump();
+#endif
  }

+#ifdef AART
+  void dump() {
+    fprintf(stderr, "++++++++++ rank=%lu +++++++++++\n", sizes.size());
+    if constexpr (std::is_same_v<P, uint64_t>)
+      fprintf(stderr, "PTR64 ");
+    else if constexpr (std::is_same_v<P, uint32_t>)
+      fprintf(stderr, "PTR32 ");
+    else if constexpr (std::is_same_v<P, uint16_t>)
+      fprintf(stderr, "PTR16 ");
+    else if constexpr (std::is_same_v<P, uint8_t>)
+      fprintf(stderr, "PTR8 ");
+    if constexpr (std::is_same_v<I, uint64_t>)
+      fprintf(stderr, "INDX64 ");
+    else if constexpr (std::is_same_v<I, uint32_t>)
+      fprintf(stderr, "INDX32 ");
+    else if constexpr (std::is_same_v<I, uint16_t>)
+      fprintf(stderr, "INDX16 ");
+    else if constexpr (std::is_same_v<I, uint8_t>)
+      fprintf(stderr, "INDX8 ");
+    if constexpr (std::is_same_v<V, double>)
+      fprintf(stderr, "VALF64\n");
+    else if constexpr (std::is_same_v<V, float>)
+      fprintf(stderr, "VALF32\n");
+    else if constexpr (std::is_same_v<V, int64_t>)
+      fprintf(stderr, "VALI64\n");
+    else if constexpr (std::is_same_v<V, int32_t>)
+      fprintf(stderr, "VALI32\n");
+    else if constexpr (std::is_same_v<V, int16_t>)
+      fprintf(stderr, "VALI16\n");
+    else if constexpr (std::is_same_v<V, int8_t>)
+      fprintf(stderr, "VALI8\n");
+    for (uint64_t r = 0; r < sizes.size(); r++) {
+      fprintf(stderr, "dim %lu #%lu\n", r, sizes[r]);
+      fprintf(stderr, "  positions[%lu] #%lu :", r, pointers[r].size());
+      for (uint64_t i = 0; i < pointers[r].size(); i++)
+        if constexpr (std::is_same_v<P, uint64_t>)
+          fprintf(stderr, " %lu", pointers[r][i]);
+        else if constexpr (std::is_same_v<P, uint32_t>)
+          fprintf(stderr, " %u", pointers[r][i]);
+        else if constexpr (std::is_same_v<P, uint16_t>)
+          fprintf(stderr, " %u", pointers[r][i]);
+        else if constexpr (std::is_same_v<P, uint8_t>)
+          fprintf(stderr, " %u", pointers[r][i]);
+      fprintf(stderr, "\n  indices[%lu] #%lu :", r, indices[r].size());
+      for (uint64_t i = 0; i < indices[r].size(); i++)
+        if constexpr (std::is_same_v<I, uint64_t>)
+          fprintf(stderr, " %lu", indices[r][i]);
+        else if constexpr (std::is_same_v<I, uint32_t>)
+          fprintf(stderr, " %u", indices[r][i]);
+        else if constexpr (std::is_same_v<I, uint16_t>)
+          fprintf(stderr, " %u", indices[r][i]);
+        else if constexpr (std::is_same_v<I, uint8_t>)
+          fprintf(stderr, " %u", indices[r][i]);
+      fprintf(stderr, "\n");
+    }
+    fprintf(stderr, "values #%lu :", values.size());
+    for (uint64_t i = 0; i < values.size(); i++)
+      if constexpr (std::is_same_v<V, double>)
+        fprintf(stderr, " %lf", values[i]);
+      else if constexpr (std::is_same_v<V, float>)
+        fprintf(stderr, " %f", values[i]);
+      else if constexpr (std::is_same_v<V, int32_t>)
+        fprintf(stderr, " %d", values[i]);
+      else if constexpr (std::is_same_v<V, int16_t>)
+        fprintf(stderr, " %d", values[i]);
+      else if constexpr (std::is_same_v<V, int8_t>)
+        fprintf(stderr, " %d", values[i]);
+    fprintf(stderr, "\n+++++++++++++++++++++++++++++\n");
+  }
+#endif
+
  virtual ~SparseTensorStorage() {}

  uint64_t getRank() const { return sizes.size(); }
@ -190,8 +270,8 @@ private:
  /// representation of an external sparse tensor. This method prepares
  /// the pointers and indices arrays under the given per-rank dimension
  /// dense/sparse annotations.
-  void traverse(SparseTensor *tensor, bool *sparsity, uint64_t lo, uint64_t hi,
-                uint64_t d) {
+  void traverse(SparseTensor *tensor, uint8_t *sparsity, uint64_t lo,
+                uint64_t hi, uint64_t d) {
    const std::vector<Element> &elements = tensor->getElements();
    // Once dimensions are exhausted, insert the numerical values.
    if (d == getRank()) {
@ -199,7 +279,7 @@ private:
      return;
    }
    // Prepare a sparse pointer structure at this dimension.
-    if (sparsity[d] && pointers[d].empty())
+    if (sparsity[d] == kCompressed && pointers[d].empty())
      pointers[d].push_back(0);
    // Visit all elements in this interval.
    uint64_t full = 0;
@ -210,7 +290,7 @@ private:
      while (seg < hi && elements[seg].indices[d] == idx)
        seg++;
      // Handle segment in interval for sparse or dense dimension.
-      if (sparsity[d]) {
+      if (sparsity[d] == kCompressed) {
        indices[d].push_back(idx);
      } else {
        for (; full < idx; full++)
@ -222,7 +302,7 @@ private:
      lo = seg;
    }
    // Finalize the sparse pointer structure at this dimension.
-    if (sparsity[d]) {
+    if (sparsity[d] == kCompressed) {
      pointers[d].push_back(indices[d].size());
    } else {
      for (uint64_t sz = tensor->getSizes()[d]; full < sz; full++)
@ -239,7 +319,7 @@ private:

 /// Templated reader.
 template <typename P, typename I, typename V>
-void *newSparseTensor(char *filename, bool *sparsity, uint64_t size) {
+void *newSparseTensor(char *filename, uint8_t *sparsity, uint64_t size) {
  uint64_t idata[64];
  SparseTensor *t = static_cast<SparseTensor *>(openTensorC(filename, idata));
  assert(size == t->getRank()); // sparsity array must match rank
@ -410,6 +490,16 @@ void *openTensorC(char *filename, uint64_t *idata) {
  // Close the file and return sorted tensor.
  fclose(file);
  tensor->sort(); // sort lexicographically
+#if 1
+  const std::vector<Element> &elements = tensor->getElements();
+  for (uint64_t k = 1; k < nnz; k++) {
+    uint64_t same = 0;
+    for (uint64_t r = 0; r < rank; r++)
+      if (elements[k].indices[r] == elements[k - 1].indices[r])
+        same++;
+    assert(same < rank && "duplicate element");
+  }
+#endif
  return tensor;
 }

@ -509,11 +599,11 @@ enum PrimaryTypeEnum : uint64_t {
  kI8 = 5
 };

-void *newSparseTensor(char *filename, bool *abase, bool *adata, uint64_t aoff,
-                      uint64_t asize, uint64_t astride, uint64_t ptrTp,
-                      uint64_t indTp, uint64_t valTp) {
+void *newSparseTensor(char *filename, uint8_t *abase, uint8_t *adata,
+                      uint64_t aoff, uint64_t asize, uint64_t astride,
+                      uint64_t ptrTp, uint64_t indTp, uint64_t valTp) {
  assert(astride == 1);
-  bool *sparsity = adata + aoff;
+  uint8_t *sparsity = adata + aoff;

  // The most common cases: 64-bit or 32-bit overhead, double/float values.
  CASE(kU64, kU64, kF64, uint64_t, uint64_t, double);
--- a/mlir/test/Dialect/SparseTensor/conversion.mlir
+++ b/mlir/test/Dialect/SparseTensor/conversion.mlir
@ -33,9 +33,9 @@ func @sparse_dim(%arg0: tensor<?xf64, #SparseVector>) -> index {

 // CHECK-LABEL: func @sparse_new1d(
 //  CHECK-SAME: %[[A:.*]]: !llvm.ptr<i8>) -> !llvm.ptr<i8>
-//       CHECK: %[[D:.*]] = constant dense<true> : tensor<1xi1>
-//       CHECK: %[[C:.*]] = tensor.cast %[[D]] : tensor<1xi1> to tensor<?xi1>
-//       CHECK: %[[T:.*]] = call @newSparseTensor(%[[A]], %[[C]], %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr<i8>, tensor<?xi1>, i64, i64, i64) -> !llvm.ptr<i8>
+//       CHECK: %[[D:.*]] = constant dense<1> : tensor<1xi8>
+//       CHECK: %[[C:.*]] = tensor.cast %[[D]] : tensor<1xi8> to tensor<?xi8>
+//       CHECK: %[[T:.*]] = call @newSparseTensor(%[[A]], %[[C]], %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr<i8>, tensor<?xi8>, i64, i64, i64) -> !llvm.ptr<i8>
 //       CHECK: return %[[T]] : !llvm.ptr<i8>
 func @sparse_new1d(%arg0: !llvm.ptr<i8>) -> tensor<128xf64, #SparseVector> {
  %0 = sparse_tensor.new %arg0 : !llvm.ptr<i8> to tensor<128xf64, #SparseVector>
@ -44,9 +44,9 @@ func @sparse_new1d(%arg0: !llvm.ptr<i8>) -> tensor<128xf64, #SparseVector> {

 // CHECK-LABEL: func @sparse_new2d(
 //  CHECK-SAME: %[[A:.*]]: !llvm.ptr<i8>) -> !llvm.ptr<i8>
-//       CHECK: %[[D:.*]] = constant dense<[false, true]> : tensor<2xi1>
-//       CHECK: %[[C:.*]] = tensor.cast %[[D]] : tensor<2xi1> to tensor<?xi1>
-//       CHECK: %[[T:.*]] = call @newSparseTensor(%[[A]], %[[C]], %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr<i8>, tensor<?xi1>, i64, i64, i64) -> !llvm.ptr<i8>
+//       CHECK: %[[D:.*]] = constant dense<[0, 1]> : tensor<2xi8>
+//       CHECK: %[[C:.*]] = tensor.cast %[[D]] : tensor<2xi8> to tensor<?xi8>
+//       CHECK: %[[T:.*]] = call @newSparseTensor(%[[A]], %[[C]], %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr<i8>, tensor<?xi8>, i64, i64, i64) -> !llvm.ptr<i8>
 //       CHECK: return %[[T]] : !llvm.ptr<i8>
 func @sparse_new2d(%arg0: !llvm.ptr<i8>) -> tensor<?x?xf32, #SparseMatrix> {
  %0 = sparse_tensor.new %arg0 : !llvm.ptr<i8> to tensor<?x?xf32, #SparseMatrix>