llvm-project/mlir/test/Dialect/SparseTensor/dense.mlir

// NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
// RUN: mlir-opt %s -sparsification | FileCheck %s

// Test to demonstrate the difference between non-annotated dense tensors
// and all-dense-annotated "sparse" tensors. The former class remains as
// two-dimensional tensors that are bufferized by subsequent passes. The
// latter class is linearized into one-dimensional buffers that are backed
// by the runtime support library.

#DenseMatrix = #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense"  ] }>

#trait_2d = {
  indexing_maps = [
    affine_map<(i,j) -> (i,j)>,  // A
    affine_map<(i,j) -> (i,j)>   // X (out)
  ],
  iterator_types = ["parallel", "parallel"],
  doc = "X(i,j) = A(i,j) + 1"
}

#trait_3d = {
  indexing_maps = [
    affine_map<(i,j,k) -> (i,j,k)>,  // A
    affine_map<(i,j,k) -> (i,j)>     // X (out)
  ],
  iterator_types = ["parallel", "parallel", "reduction"],
  doc = "X(i,j) += A(i,j,k)"
}

//
// Test with an all-dense-annotated "sparse" matrix as input and
// a non-annotated dense matrix as output that is not inplacable.
// This results in an explicit allocation to facilitate output.
//
// CHECK-LABEL:   func @dense1(
// CHECK-SAME:                 %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>,
// CHECK-SAME:                 %[[VAL_1:.*]]: tensor<32x16xf32> {linalg.inplaceable = false}) -> tensor<32x16xf32> {
// CHECK:           %[[VAL_2:.*]] = constant 1.000000e+00 : f32
// CHECK:           %[[VAL_3:.*]] = constant 32 : index
// CHECK:           %[[VAL_4:.*]] = constant 16 : index
// CHECK:           %[[VAL_5:.*]] = constant 0 : index
// CHECK:           %[[VAL_6:.*]] = constant 1 : index
// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xf32>
// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16xf32>
// CHECK:           %[[VAL_9:.*]] = memref.alloc() : memref<32x16xf32>
// CHECK:           linalg.copy(%[[VAL_8]], %[[VAL_9]]) : memref<32x16xf32>, memref<32x16xf32>
// CHECK:           scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
// CHECK:             scf.for %[[VAL_11:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
// CHECK:               %[[VAL_12:.*]] = muli %[[VAL_10]], %[[VAL_4]] : index
// CHECK:               %[[VAL_13:.*]] = addi %[[VAL_12]], %[[VAL_11]] : index
// CHECK:               %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_13]]] : memref<?xf32>
// CHECK:               %[[VAL_15:.*]] = addf %[[VAL_14]], %[[VAL_2]] : f32
// CHECK:               memref.store %[[VAL_15]], %[[VAL_9]]{{\[}}%[[VAL_10]], %[[VAL_11]]] : memref<32x16xf32>
// CHECK:             }
// CHECK:           }
// CHECK:           %[[VAL_16:.*]] = memref.tensor_load %[[VAL_9]] : memref<32x16xf32>
// CHECK:           return %[[VAL_16]] : tensor<32x16xf32>
// CHECK:         }
func @dense1(%arga: tensor<32x16xf32, #DenseMatrix>,
             %argx: tensor<32x16xf32> {linalg.inplaceable = false})
	     -> tensor<32x16xf32> {
  %c = constant 1.0 : f32
  %0 = linalg.generic #trait_2d
     ins(%arga: tensor<32x16xf32, #DenseMatrix>)
    outs(%argx: tensor<32x16xf32>) {
      ^bb(%a: f32, %x: f32):
        %1 = addf %a, %c : f32
        linalg.yield %1 : f32
  } -> tensor<32x16xf32>
  return %0 : tensor<32x16xf32>
}

//
// Test with an all-dense-annotated "sparse" matrix as input and
// a non-annotated dense matrix as output that is inplacable.
// This allows updating the dense output in place.
//
// CHECK-LABEL:   func @dense2(
// CHECK-SAME:                 %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>,
// CHECK-SAME:                 %[[VAL_1:.*]]: tensor<32x16xf32> {linalg.inplaceable = true}) -> tensor<32x16xf32> {
// CHECK:           %[[VAL_2:.*]] = constant 1.000000e+00 : f32
// CHECK:           %[[VAL_3:.*]] = constant 32 : index
// CHECK:           %[[VAL_4:.*]] = constant 16 : index
// CHECK:           %[[VAL_5:.*]] = constant 0 : index
// CHECK:           %[[VAL_6:.*]] = constant 1 : index
// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xf32>
// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16xf32>
// CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
// CHECK:             scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
// CHECK:               %[[VAL_11:.*]] = muli %[[VAL_9]], %[[VAL_4]] : index
// CHECK:               %[[VAL_12:.*]] = addi %[[VAL_11]], %[[VAL_10]] : index
// CHECK:               %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_12]]] : memref<?xf32>
// CHECK:               %[[VAL_14:.*]] = addf %[[VAL_13]], %[[VAL_2]] : f32
// CHECK:               memref.store %[[VAL_14]], %[[VAL_8]]{{\[}}%[[VAL_9]], %[[VAL_10]]] : memref<32x16xf32>
// CHECK:             }
// CHECK:           }
// CHECK:           %[[VAL_15:.*]] = memref.tensor_load %[[VAL_8]] : memref<32x16xf32>
// CHECK:           return %[[VAL_15]] : tensor<32x16xf32>
// CHECK:         }
func @dense2(%arga: tensor<32x16xf32, #DenseMatrix>,
             %argx: tensor<32x16xf32> {linalg.inplaceable = true})
	     -> tensor<32x16xf32> {
  %c = constant 1.0 : f32
  %0 = linalg.generic #trait_2d
     ins(%arga: tensor<32x16xf32, #DenseMatrix>)
    outs(%argx: tensor<32x16xf32>) {
      ^bb(%a: f32, %x: f32):
        %1 = addf %a, %c : f32
        linalg.yield %1 : f32
  } -> tensor<32x16xf32>
  return %0 : tensor<32x16xf32>
}

//
// Test with a non-annotated dense matrix as input and
// an all-dense annotated "sparse" matrix as output.
// The rewriting would fail if argx was not in-placeable.
//
// CHECK-LABEL:   func @dense3(
// CHECK-SAME:                 %[[VAL_0:.*]]: tensor<32x16xf32>,
// CHECK-SAME:                 %[[VAL_1:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> {linalg.inplaceable = true}) -> tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> {
// CHECK:           %[[VAL_2:.*]] = constant 1.000000e+00 : f32
// CHECK:           %[[VAL_3:.*]] = constant 32 : index
// CHECK:           %[[VAL_4:.*]] = constant 16 : index
// CHECK:           %[[VAL_5:.*]] = constant 0 : index
// CHECK:           %[[VAL_6:.*]] = constant 1 : index
// CHECK:           %[[VAL_7:.*]] = memref.buffer_cast %[[VAL_0]] : memref<32x16xf32>
// CHECK:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xf32>
// CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
// CHECK:             scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
// CHECK:               %[[VAL_11:.*]] = muli %[[VAL_9]], %[[VAL_4]] : index
// CHECK:               %[[VAL_12:.*]] = addi %[[VAL_11]], %[[VAL_10]] : index
// CHECK:               %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_9]], %[[VAL_10]]] : memref<32x16xf32>
// CHECK:               %[[VAL_14:.*]] = addf %[[VAL_13]], %[[VAL_2]] : f32
// CHECK:               memref.store %[[VAL_14]], %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<?xf32>
// CHECK:             }
// CHECK:           }
// CHECK:           %[[VAL_15:.*]] = sparse_tensor.tensor %[[VAL_8]] : memref<?xf32> to tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>
// CHECK:           return %[[VAL_15]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>
// CHECK:         }
func @dense3(%arga: tensor<32x16xf32>,
             %argx: tensor<32x16xf32, #DenseMatrix> {linalg.inplaceable = true})
	     -> tensor<32x16xf32, #DenseMatrix> {
  %c = constant 1.0 : f32
  %0 = linalg.generic #trait_2d
     ins(%arga: tensor<32x16xf32>)
    outs(%argx: tensor<32x16xf32, #DenseMatrix>) {
      ^bb(%a: f32, %x: f32):
        %1 = addf %a, %c : f32
        linalg.yield %1 : f32
  } -> tensor<32x16xf32, #DenseMatrix>
  return %0 : tensor<32x16xf32, #DenseMatrix>
}


//
// Test with a non-annotated dense matrix as input and
// an all-dense annotated "sparse" matrix as output.
// The rewriting would fail if argx was not in-placeable.
// The missing innermost "k" index (due to a reduction) is accounted
// for by scalarizing the reduction operation for the output tensor.
//
// CHECK-LABEL:   func @dense4(
// CHECK-SAME:                 %[[VAL_0:.*]]: tensor<32x16x8xf32>,
// CHECK-SAME:                 %[[VAL_1:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> {linalg.inplaceable = true}) -> tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> {
// CHECK:           %[[VAL_2:.*]] = constant 8 : index
// CHECK:           %[[VAL_3:.*]] = constant 32 : index
// CHECK:           %[[VAL_4:.*]] = constant 16 : index
// CHECK:           %[[VAL_5:.*]] = constant 0 : index
// CHECK:           %[[VAL_6:.*]] = constant 1 : index
// CHECK:           %[[VAL_7:.*]] = memref.buffer_cast %[[VAL_0]] : memref<32x16x8xf32>
// CHECK:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}}>> to memref<?xf32>
// CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
// CHECK:             scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
// CHECK:               %[[VAL_11:.*]] = muli %[[VAL_9]], %[[VAL_4]] : index
// CHECK:               %[[VAL_12:.*]] = addi %[[VAL_11]], %[[VAL_10]] : index
// CHECK:               %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<?xf32>
// CHECK:               %[[VAL_14:.*]] = scf.for %[[VAL_15:.*]] = %[[VAL_5]] to %[[VAL_2]] step %[[VAL_6]] iter_args(%[[VAL_16:.*]] = %[[VAL_13]]) -> (f32) {
// CHECK:                 %[[VAL_17:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_9]], %[[VAL_10]], %[[VAL_15]]] : memref<32x16x8xf32>
// CHECK:                 %[[VAL_18:.*]] = addf %[[VAL_16]], %[[VAL_17]] : f32
// CHECK:                 scf.yield %[[VAL_18]] : f32
// CHECK:               }
// CHECK:               memref.store %[[VAL_19:.*]], %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<?xf32>
// CHECK:             }
// CHECK:           }
// CHECK:           %[[VAL_20:.*]] = sparse_tensor.tensor %[[VAL_8]] : memref<?xf32> to tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>
// CHECK:           return %[[VAL_20]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>
// CHECK:         }
func @dense4(%arga: tensor<32x16x8xf32>,
             %argx: tensor<32x16xf32, #DenseMatrix> {linalg.inplaceable = true})
	     -> tensor<32x16xf32, #DenseMatrix> {
  %0 = linalg.generic #trait_3d
     ins(%arga: tensor<32x16x8xf32>)
    outs(%argx: tensor<32x16xf32, #DenseMatrix>) {
      ^bb(%a: f32, %x: f32):
        %1 = addf %x, %a : f32
        linalg.yield %1 : f32
  } -> tensor<32x16xf32, #DenseMatrix>
  return %0 : tensor<32x16xf32, #DenseMatrix>
}
[mlir][sparse] allow all-dense annotated "sparse" tensor output This is a very careful start with alllowing sparse tensors at the left-hand-side of tensor index expressions (viz. sparse output). Note that there is a subtle difference between non-annotated tensors (dense, remain n-dim, handled by classic bufferization) and all-dense annotated "sparse" tensors (linearized to 1-dim without overhead storage, bufferized by sparse compiler, backed by runtime support library). This revision gently introduces some new IR to facilitate annotated outputs, to be generalized to truly sparse tensors in the future. Reviewed By: gussmith23, bixia Differential Revision: https://reviews.llvm.org/D104074 2021-06-12 01:33:43 +08:00			`// NOTE: Assertions have been autogenerated by utils/generate-test-checks.py`
			`// RUN: mlir-opt %s -sparsification \| FileCheck %s`

			`// Test to demonstrate the difference between non-annotated dense tensors`
			`// and all-dense-annotated "sparse" tensors. The former class remains as`
			`// two-dimensional tensors that are bufferized by subsequent passes. The`
			`// latter class is linearized into one-dimensional buffers that are backed`
			`// by the runtime support library.`

			`#DenseMatrix = #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense" ] }>`

			`#trait_2d = {`
			`indexing_maps = [`
			`affine_map<(i,j) -> (i,j)>, // A`
			`affine_map<(i,j) -> (i,j)> // X (out)`
			`],`
			`iterator_types = ["parallel", "parallel"],`
			`doc = "X(i,j) = A(i,j) + 1"`
			`}`

			`#trait_3d = {`
			`indexing_maps = [`
			`affine_map<(i,j,k) -> (i,j,k)>, // A`
			`affine_map<(i,j,k) -> (i,j)> // X (out)`
			`],`
			`iterator_types = ["parallel", "parallel", "reduction"],`
			`doc = "X(i,j) += A(i,j,k)"`
			`}`

			`//`
			`// Test with an all-dense-annotated "sparse" matrix as input and`
			`// a non-annotated dense matrix as output that is not inplacable.`
			`// This results in an explicit allocation to facilitate output.`
			`//`
			`// CHECK-LABEL: func @dense1(`
			`// CHECK-SAME: %[[VAL_0:.]]: tensor<32x16xf32, #sparse_tensor.encoding<{{.}}>>,`
			`// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32> {linalg.inplaceable = false}) -> tensor<32x16xf32> {`
			`// CHECK: %[[VAL_2:.*]] = constant 1.000000e+00 : f32`
			`// CHECK: %[[VAL_3:.*]] = constant 32 : index`
			`// CHECK: %[[VAL_4:.*]] = constant 16 : index`
			`// CHECK: %[[VAL_5:.*]] = constant 0 : index`
			`// CHECK: %[[VAL_6:.*]] = constant 1 : index`
			`// CHECK: %[[VAL_7:.]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.}}>> to memref<?xf32>`
			`// CHECK: %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16xf32>`
			`// CHECK: %[[VAL_9:.*]] = memref.alloc() : memref<32x16xf32>`
			`// CHECK: linalg.copy(%[[VAL_8]], %[[VAL_9]]) : memref<32x16xf32>, memref<32x16xf32>`
			`// CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {`
			`// CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {`
			`// CHECK: %[[VAL_12:.*]] = muli %[[VAL_10]], %[[VAL_4]] : index`
			`// CHECK: %[[VAL_13:.*]] = addi %[[VAL_12]], %[[VAL_11]] : index`
			`// CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_13]]] : memref<?xf32>`
			`// CHECK: %[[VAL_15:.*]] = addf %[[VAL_14]], %[[VAL_2]] : f32`
			`// CHECK: memref.store %[[VAL_15]], %[[VAL_9]]{{\[}}%[[VAL_10]], %[[VAL_11]]] : memref<32x16xf32>`
			`// CHECK: }`
			`// CHECK: }`
			`// CHECK: %[[VAL_16:.*]] = memref.tensor_load %[[VAL_9]] : memref<32x16xf32>`
			`// CHECK: return %[[VAL_16]] : tensor<32x16xf32>`
			`// CHECK: }`
			`func @dense1(%arga: tensor<32x16xf32, #DenseMatrix>,`
			`%argx: tensor<32x16xf32> {linalg.inplaceable = false})`
			`-> tensor<32x16xf32> {`
			`%c = constant 1.0 : f32`
			`%0 = linalg.generic #trait_2d`
			`ins(%arga: tensor<32x16xf32, #DenseMatrix>)`
			`outs(%argx: tensor<32x16xf32>) {`
			`^bb(%a: f32, %x: f32):`
			`%1 = addf %a, %c : f32`
			`linalg.yield %1 : f32`
			`} -> tensor<32x16xf32>`
			`return %0 : tensor<32x16xf32>`
			`}`

			`//`
			`// Test with an all-dense-annotated "sparse" matrix as input and`
			`// a non-annotated dense matrix as output that is inplacable.`
			`// This allows updating the dense output in place.`
			`//`
			`// CHECK-LABEL: func @dense2(`
			`// CHECK-SAME: %[[VAL_0:.]]: tensor<32x16xf32, #sparse_tensor.encoding<{{.}}>>,`
			`// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32> {linalg.inplaceable = true}) -> tensor<32x16xf32> {`
			`// CHECK: %[[VAL_2:.*]] = constant 1.000000e+00 : f32`
			`// CHECK: %[[VAL_3:.*]] = constant 32 : index`
			`// CHECK: %[[VAL_4:.*]] = constant 16 : index`
			`// CHECK: %[[VAL_5:.*]] = constant 0 : index`
			`// CHECK: %[[VAL_6:.*]] = constant 1 : index`
			`// CHECK: %[[VAL_7:.]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.}}>> to memref<?xf32>`
			`// CHECK: %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16xf32>`
			`// CHECK: scf.for %[[VAL_9:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {`
			`// CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {`
			`// CHECK: %[[VAL_11:.*]] = muli %[[VAL_9]], %[[VAL_4]] : index`
			`// CHECK: %[[VAL_12:.*]] = addi %[[VAL_11]], %[[VAL_10]] : index`
			`// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_12]]] : memref<?xf32>`
			`// CHECK: %[[VAL_14:.*]] = addf %[[VAL_13]], %[[VAL_2]] : f32`
			`// CHECK: memref.store %[[VAL_14]], %[[VAL_8]]{{\[}}%[[VAL_9]], %[[VAL_10]]] : memref<32x16xf32>`
			`// CHECK: }`
			`// CHECK: }`
			`// CHECK: %[[VAL_15:.*]] = memref.tensor_load %[[VAL_8]] : memref<32x16xf32>`
			`// CHECK: return %[[VAL_15]] : tensor<32x16xf32>`
			`// CHECK: }`
			`func @dense2(%arga: tensor<32x16xf32, #DenseMatrix>,`
			`%argx: tensor<32x16xf32> {linalg.inplaceable = true})`
			`-> tensor<32x16xf32> {`
			`%c = constant 1.0 : f32`
			`%0 = linalg.generic #trait_2d`
			`ins(%arga: tensor<32x16xf32, #DenseMatrix>)`
			`outs(%argx: tensor<32x16xf32>) {`
			`^bb(%a: f32, %x: f32):`
			`%1 = addf %a, %c : f32`
			`linalg.yield %1 : f32`
			`} -> tensor<32x16xf32>`
			`return %0 : tensor<32x16xf32>`
			`}`

			`//`
			`// Test with a non-annotated dense matrix as input and`
			`// an all-dense annotated "sparse" matrix as output.`
			`// The rewriting would fail if argx was not in-placeable.`
			`//`
			`// CHECK-LABEL: func @dense3(`
			`// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32>,`
			`// CHECK-SAME: %[[VAL_1:.]]: tensor<32x16xf32, #sparse_tensor.encoding<{{.}}>> {linalg.inplaceable = true}) -> tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> {`
			`// CHECK: %[[VAL_2:.*]] = constant 1.000000e+00 : f32`
			`// CHECK: %[[VAL_3:.*]] = constant 32 : index`
			`// CHECK: %[[VAL_4:.*]] = constant 16 : index`
			`// CHECK: %[[VAL_5:.*]] = constant 0 : index`
			`// CHECK: %[[VAL_6:.*]] = constant 1 : index`
			`// CHECK: %[[VAL_7:.*]] = memref.buffer_cast %[[VAL_0]] : memref<32x16xf32>`
			`// CHECK: %[[VAL_8:.]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.}}>> to memref<?xf32>`
			`// CHECK: scf.for %[[VAL_9:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {`
			`// CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {`
			`// CHECK: %[[VAL_11:.*]] = muli %[[VAL_9]], %[[VAL_4]] : index`
			`// CHECK: %[[VAL_12:.*]] = addi %[[VAL_11]], %[[VAL_10]] : index`
			`// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_9]], %[[VAL_10]]] : memref<32x16xf32>`
			`// CHECK: %[[VAL_14:.*]] = addf %[[VAL_13]], %[[VAL_2]] : f32`
			`// CHECK: memref.store %[[VAL_14]], %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<?xf32>`
			`// CHECK: }`
			`// CHECK: }`
			`// CHECK: %[[VAL_15:.]] = sparse_tensor.tensor %[[VAL_8]] : memref<?xf32> to tensor<32x16xf32, #sparse_tensor.encoding<{{.}}>>`
			`// CHECK: return %[[VAL_15]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>`
			`// CHECK: }`
			`func @dense3(%arga: tensor<32x16xf32>,`
			`%argx: tensor<32x16xf32, #DenseMatrix> {linalg.inplaceable = true})`
			`-> tensor<32x16xf32, #DenseMatrix> {`
			`%c = constant 1.0 : f32`
			`%0 = linalg.generic #trait_2d`
			`ins(%arga: tensor<32x16xf32>)`
			`outs(%argx: tensor<32x16xf32, #DenseMatrix>) {`
			`^bb(%a: f32, %x: f32):`
			`%1 = addf %a, %c : f32`
			`linalg.yield %1 : f32`
			`} -> tensor<32x16xf32, #DenseMatrix>`
			`return %0 : tensor<32x16xf32, #DenseMatrix>`
			`}`


			`//`
			`// Test with a non-annotated dense matrix as input and`
			`// an all-dense annotated "sparse" matrix as output.`
			`// The rewriting would fail if argx was not in-placeable.`
			`// The missing innermost "k" index (due to a reduction) is accounted`
			`// for by scalarizing the reduction operation for the output tensor.`
			`//`
			`// CHECK-LABEL: func @dense4(`
			`// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16x8xf32>,`
			`// CHECK-SAME: %[[VAL_1:.]]: tensor<32x16xf32, #sparse_tensor.encoding<{{.}}>> {linalg.inplaceable = true}) -> tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> {`
			`// CHECK: %[[VAL_2:.*]] = constant 8 : index`
			`// CHECK: %[[VAL_3:.*]] = constant 32 : index`
			`// CHECK: %[[VAL_4:.*]] = constant 16 : index`
			`// CHECK: %[[VAL_5:.*]] = constant 0 : index`
			`// CHECK: %[[VAL_6:.*]] = constant 1 : index`
			`// CHECK: %[[VAL_7:.*]] = memref.buffer_cast %[[VAL_0]] : memref<32x16x8xf32>`
			`// CHECK: %[[VAL_8:.]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.}}}>> to memref<?xf32>`
			`// CHECK: scf.for %[[VAL_9:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {`
			`// CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {`
			`// CHECK: %[[VAL_11:.*]] = muli %[[VAL_9]], %[[VAL_4]] : index`
			`// CHECK: %[[VAL_12:.*]] = addi %[[VAL_11]], %[[VAL_10]] : index`
			`// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<?xf32>`
			`// CHECK: %[[VAL_14:.]] = scf.for %[[VAL_15:.]] = %[[VAL_5]] to %[[VAL_2]] step %[[VAL_6]] iter_args(%[[VAL_16:.*]] = %[[VAL_13]]) -> (f32) {`
			`// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_9]], %[[VAL_10]], %[[VAL_15]]] : memref<32x16x8xf32>`
			`// CHECK: %[[VAL_18:.*]] = addf %[[VAL_16]], %[[VAL_17]] : f32`
			`// CHECK: scf.yield %[[VAL_18]] : f32`
			`// CHECK: }`
			`// CHECK: memref.store %[[VAL_19:.*]], %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<?xf32>`
			`// CHECK: }`
			`// CHECK: }`
			`// CHECK: %[[VAL_20:.]] = sparse_tensor.tensor %[[VAL_8]] : memref<?xf32> to tensor<32x16xf32, #sparse_tensor.encoding<{{.}}>>`
			`// CHECK: return %[[VAL_20]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>`
			`// CHECK: }`
			`func @dense4(%arga: tensor<32x16x8xf32>,`
			`%argx: tensor<32x16xf32, #DenseMatrix> {linalg.inplaceable = true})`
			`-> tensor<32x16xf32, #DenseMatrix> {`
			`%0 = linalg.generic #trait_3d`
			`ins(%arga: tensor<32x16x8xf32>)`
			`outs(%argx: tensor<32x16xf32, #DenseMatrix>) {`
			`^bb(%a: f32, %x: f32):`
			`%1 = addf %x, %a : f32`
			`linalg.yield %1 : f32`
			`} -> tensor<32x16xf32, #DenseMatrix>`
			`return %0 : tensor<32x16xf32, #DenseMatrix>`
			`}`