forked from OSchip/llvm-project
115 lines
4.2 KiB
MLIR
115 lines
4.2 KiB
MLIR
// RUN: mlir-opt %s -pass-pipeline="func.func(test-match-reduction)" -verify-diagnostics -split-input-file
|
|
|
|
// Verify that the generic reduction detection utility works on different
|
|
// dialects.
|
|
|
|
// expected-remark@below {{Testing function}}
|
|
func.func @linalg_red_add(%in0t : tensor<?xf32>, %out0t : tensor<1xf32>) {
|
|
// expected-remark@below {{Reduction found in output #0!}}
|
|
// expected-remark@below {{Reduced Value: <block argument> of type 'f32' at index: 0}}
|
|
// expected-remark@below {{Combiner Op: %1 = arith.addf %arg2, %arg3 : f32}}
|
|
%red = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>,
|
|
affine_map<(d0) -> (0)>],
|
|
iterator_types = ["reduction"]}
|
|
ins(%in0t : tensor<?xf32>)
|
|
outs(%out0t : tensor<1xf32>) {
|
|
^bb0(%in0: f32, %out0: f32):
|
|
%add = arith.addf %in0, %out0 : f32
|
|
linalg.yield %add : f32
|
|
} -> tensor<1xf32>
|
|
return
|
|
}
|
|
|
|
// -----
|
|
|
|
// expected-remark@below {{Testing function}}
|
|
func.func @affine_red_add(%in: memref<256x512xf32>, %out: memref<256xf32>) {
|
|
%cst = arith.constant 0.000000e+00 : f32
|
|
affine.for %i = 0 to 256 {
|
|
// expected-remark@below {{Reduction found in output #0!}}
|
|
// expected-remark@below {{Reduced Value: %1 = affine.load %arg0[%arg2, %arg3] : memref<256x512xf32>}}
|
|
// expected-remark@below {{Combiner Op: %2 = arith.addf %arg4, %1 : f32}}
|
|
%final_red = affine.for %j = 0 to 512 iter_args(%red_iter = %cst) -> (f32) {
|
|
%ld = affine.load %in[%i, %j] : memref<256x512xf32>
|
|
%add = arith.addf %red_iter, %ld : f32
|
|
affine.yield %add : f32
|
|
}
|
|
affine.store %final_red, %out[%i] : memref<256xf32>
|
|
}
|
|
return
|
|
}
|
|
|
|
// -----
|
|
|
|
// TODO: Iteration-carried values with multiple uses are not supported yet.
|
|
// expected-remark@below {{Testing function}}
|
|
func.func @linalg_red_max(%in0t: tensor<4x4xf32>, %out0t: tensor<4xf32>) {
|
|
// expected-remark@below {{Reduction NOT found in output #0!}}
|
|
%red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
|
|
affine_map<(d0, d1) -> (d0)>],
|
|
iterator_types = ["parallel", "reduction"]}
|
|
ins(%in0t : tensor<4x4xf32>)
|
|
outs(%out0t : tensor<4xf32>) {
|
|
^bb0(%in0: f32, %out0: f32):
|
|
%cmp = arith.cmpf ogt, %in0, %out0 : f32
|
|
%sel = arith.select %cmp, %in0, %out0 : f32
|
|
linalg.yield %sel : f32
|
|
} -> tensor<4xf32>
|
|
return
|
|
}
|
|
|
|
// -----
|
|
|
|
// expected-remark@below {{Testing function}}
|
|
func.func @linalg_fused_red_add(%in0t: tensor<4x4xf32>, %out0t: tensor<4xf32>) {
|
|
// expected-remark@below {{Reduction found in output #0!}}
|
|
// expected-remark@below {{Reduced Value: %2 = arith.subf %1, %arg2 : f32}}
|
|
// expected-remark@below {{Combiner Op: %3 = arith.addf %2, %arg3 : f32}}
|
|
%red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
|
|
affine_map<(d0, d1) -> (d0)>],
|
|
iterator_types = ["parallel", "reduction"]}
|
|
ins(%in0t : tensor<4x4xf32>)
|
|
outs(%out0t : tensor<4xf32>) {
|
|
^bb0(%in0: f32, %out0: f32):
|
|
%mul = arith.mulf %in0, %in0 : f32
|
|
%sub = arith.subf %mul, %in0 : f32
|
|
%add = arith.addf %sub, %out0 : f32
|
|
linalg.yield %add : f32
|
|
} -> tensor<4xf32>
|
|
return
|
|
}
|
|
|
|
// -----
|
|
|
|
// expected-remark@below {{Testing function}}
|
|
func.func @affine_no_red_rec(%in: memref<512xf32>) {
|
|
%cst = arith.constant 0.000000e+00 : f32
|
|
// %rec is the value loaded in the previous iteration.
|
|
// expected-remark@below {{Reduction NOT found in output #0!}}
|
|
%final_val = affine.for %j = 0 to 512 iter_args(%rec = %cst) -> (f32) {
|
|
%ld = affine.load %in[%j] : memref<512xf32>
|
|
%add = arith.addf %ld, %rec : f32
|
|
affine.yield %ld : f32
|
|
}
|
|
return
|
|
}
|
|
|
|
// -----
|
|
|
|
// expected-remark@below {{Testing function}}
|
|
func.func @affine_output_dep(%in: memref<512xf32>) {
|
|
%cst = arith.constant 0.000000e+00 : f32
|
|
// Reduction %red is not supported because it depends on another
|
|
// loop-carried dependence.
|
|
// expected-remark@below {{Reduction NOT found in output #0!}}
|
|
// expected-remark@below {{Reduction NOT found in output #1!}}
|
|
%final_red, %final_dep = affine.for %j = 0 to 512
|
|
iter_args(%red = %cst, %dep = %cst) -> (f32, f32) {
|
|
%ld = affine.load %in[%j] : memref<512xf32>
|
|
%add = arith.addf %dep, %red : f32
|
|
affine.yield %add, %ld : f32, f32
|
|
}
|
|
return
|
|
}
|
|
|