[MLIR] Fix affine parallelize pass.

To control the number of outer parallel loops, we need to process the
 outer loops first and hence pre-order walk fixes the issue.

Reviewed By: bondhugula

Differential Revision: https://reviews.llvm.org/D104361
This commit is contained in:
Prashant Kumar 2021-06-17 01:15:35 +05:30 committed by Uday Bondhugula
parent 0e760a0870
commit 51d43bbc46
2 changed files with 26 additions and 6 deletions

View File

@ -50,14 +50,13 @@ struct ParallelizationCandidate {
void AffineParallelize::runOnFunction() {
FuncOp f = getFunction();
// The walker proceeds in post-order, but we need to process outer loops first
// to control the number of outer parallel loops, so push candidate loops to
// the front of a deque.
std::deque<ParallelizationCandidate> parallelizableLoops;
f.walk([&](AffineForOp loop) {
// The walker proceeds in pre-order to process the outer loops first
// and control the number of outer parallel loops.
std::vector<ParallelizationCandidate> parallelizableLoops;
f.walk<WalkOrder::PreOrder>([&](AffineForOp loop) {
SmallVector<LoopReduction> reductions;
if (isLoopParallel(loop, parallelReductions ? &reductions : nullptr))
parallelizableLoops.emplace_back(loop, std::move(reductions));
parallelizableLoops.push_back({loop, std::move(reductions)});
});
for (const ParallelizationCandidate &candidate : parallelizableLoops) {

View File

@ -155,6 +155,27 @@ func @max_nested(%m: memref<?x?xf32>, %lb0: index, %lb1: index,
return
}
// MAX-NESTED-LABEL: @max_nested_1
func @max_nested_1(%arg0: memref<4096x4096xf32>, %arg1: memref<4096x4096xf32>, %arg2: memref<4096x4096xf32>) {
%0 = memref.alloc() : memref<4096x4096xf32>
// MAX-NESTED: affine.parallel
affine.for %arg3 = 0 to 4096 {
// MAX-NESTED-NEXT: affine.for
affine.for %arg4 = 0 to 4096 {
// MAX-NESTED-NEXT: affine.for
affine.for %arg5 = 0 to 4096 {
%1 = affine.load %arg0[%arg3, %arg5] : memref<4096x4096xf32>
%2 = affine.load %arg1[%arg5, %arg4] : memref<4096x4096xf32>
%3 = affine.load %0[%arg3, %arg4] : memref<4096x4096xf32>
%4 = mulf %1, %2 : f32
%5 = addf %3, %4 : f32
affine.store %5, %0[%arg3, %arg4] : memref<4096x4096xf32>
}
}
}
return
}
// CHECK-LABEL: @iter_args
// REDUCE-LABEL: @iter_args
func @iter_args(%in: memref<10xf32>) {