[mlir][scf] Fix bug in pipelining prologue emission

Induction variable calculation was ignoring scf.for step value. Fix it to get
the correct induction variable value in the prologue.

Differential Revision: https://reviews.llvm.org/D118932
This commit is contained in:
Thomas Raoux 2022-02-03 11:42:49 -08:00
parent 502f14d6f2
commit c3c1c5c695
2 changed files with 41 additions and 1 deletions

View File

@ -138,7 +138,8 @@ void LoopPipelinerInternal::emitPrologue(PatternRewriter &rewriter) {
auto yield = cast<scf::YieldOp>(forOp.getBody()->getTerminator());
for (int64_t i = 0; i < maxStage; i++) {
// special handling for induction variable as the increment is implicit.
Value iv = rewriter.create<arith::ConstantIndexOp>(forOp.getLoc(), lb + i);
Value iv =
rewriter.create<arith::ConstantIndexOp>(forOp.getLoc(), lb + i * step);
setValueMapping(forOp.getInductionVar(), iv, i);
for (Operation *op : opOrder) {
if (stages[op] > i)

View File

@ -34,6 +34,45 @@ func @simple_pipeline(%A: memref<?xf32>, %result: memref<?xf32>) {
// -----
// CHECK-LABEL: simple_pipeline_step(
// CHECK-SAME: %[[A:.*]]: memref<?xf32>, %[[R:.*]]: memref<?xf32>) {
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index
// CHECK-DAG: %[[C5:.*]] = arith.constant 5 : index
// CHECK-DAG: %[[C6:.*]] = arith.constant 6 : index
// CHECK-DAG: %[[C9:.*]] = arith.constant 9 : index
// Prologue:
// CHECK: %[[L0:.*]] = memref.load %[[A]][%[[C0]]] : memref<?xf32>
// CHECK: %[[L1:.*]] = memref.load %[[A]][%[[C3]]] : memref<?xf32>
// Kernel:
// CHECK-NEXT: %[[L2:.*]]:2 = scf.for %[[IV:.*]] = %[[C0]] to %[[C5]]
// CHECK-SAME: step %[[C3]] iter_args(%[[LARG0:.*]] = %[[L0]], %[[LARG1:.*]] = %[[L1]]) -> (f32, f32) {
// CHECK-NEXT: %[[ADD0:.*]] = arith.addf %[[LARG0]], %{{.*}} : f32
// CHECK-NEXT: memref.store %[[ADD0]], %[[R]][%[[IV]]] : memref<?xf32>
// CHECK-NEXT: %[[IV1:.*]] = arith.addi %[[IV]], %[[C6]] : index
// CHECK-NEXT: %[[LR:.*]] = memref.load %[[A]][%[[IV1]]] : memref<?xf32>
// CHECK-NEXT: scf.yield %[[LARG1]], %[[LR]] : f32, f32
// CHECK-NEXT: }
// Epilogue:
// CHECK-NEXT: %[[ADD1:.*]] = arith.addf %[[L2]]#0, %{{.*}} : f32
// CHECK-NEXT: memref.store %[[ADD1]], %[[R]][%[[C6]]] : memref<?xf32>
// CHECK-NEXT: %[[ADD2:.*]] = arith.addf %[[L2]]#1, %{{.*}} : f32
// CHECK-NEXT: memref.store %[[ADD2]], %[[R]][%[[C9]]] : memref<?xf32>
func @simple_pipeline_step(%A: memref<?xf32>, %result: memref<?xf32>) {
%c0 = arith.constant 0 : index
%c3 = arith.constant 3 : index
%c11 = arith.constant 11 : index
%cf = arith.constant 1.0 : f32
scf.for %i0 = %c0 to %c11 step %c3 {
%A_elem = memref.load %A[%i0] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 2 } : memref<?xf32>
%A1_elem = arith.addf %A_elem, %cf { __test_pipelining_stage__ = 2, __test_pipelining_op_order__ = 0 } : f32
memref.store %A1_elem, %result[%i0] { __test_pipelining_stage__ = 2, __test_pipelining_op_order__ = 1 } : memref<?xf32>
} { __test_pipelining_loop__ }
return
}
// -----
// CHECK-LABEL: three_stage(
// CHECK-SAME: %[[A:.*]]: memref<?xf32>, %[[R:.*]]: memref<?xf32>) {
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index