[mlir] Support hoisting whole affine for loops in LICM

Reviewed By: bondhugula

Differential Revision: https://reviews.llvm.org/D100512
This commit is contained in:
Amy Zhuang 2021-04-20 17:09:15 -07:00 committed by Sergei Grechanik
parent 79b5280a6c
commit 9194071626
2 changed files with 68 additions and 32 deletions

View File

@ -71,10 +71,11 @@ bool isOpLoopInvariant(Operation &op, Value indVar,
if (!checkInvarianceOfNestedIfOps(&op, indVar, opsWithUsers, opsToHoist)) {
return false;
}
} else if (isa<AffineForOp>(op)) {
// If the body of a predicated region has a for loop, we don't hoist the
// 'affine.if'.
return false;
} else if (auto forOp = dyn_cast<AffineForOp>(op)) {
if (!areAllOpsInTheBlockListInvariant(forOp.getLoopBody(), indVar,
opsWithUsers, opsToHoist)) {
return false;
}
} else if (isa<AffineDmaStartOp, AffineDmaWaitOp>(op)) {
// TODO: Support DMA ops.
return false;
@ -113,30 +114,30 @@ bool isOpLoopInvariant(Operation &op, Value indVar,
LLVM_DEBUG(llvm::dbgs() << "\nNon-constant op with 0 operands\n");
return false;
}
for (unsigned int i = 0; i < op.getNumOperands(); ++i) {
auto *operandSrc = op.getOperand(i).getDefiningOp();
}
LLVM_DEBUG(
op.getOperand(i).print(llvm::dbgs() << "\nIterating on operand\n"));
// Check operands.
for (unsigned int i = 0; i < op.getNumOperands(); ++i) {
auto *operandSrc = op.getOperand(i).getDefiningOp();
// If the loop IV is the operand, this op isn't loop invariant.
if (indVar == op.getOperand(i)) {
LLVM_DEBUG(llvm::dbgs() << "\nLoop IV is the operand\n");
LLVM_DEBUG(
op.getOperand(i).print(llvm::dbgs() << "\nIterating on operand\n"));
// If the loop IV is the operand, this op isn't loop invariant.
if (indVar == op.getOperand(i)) {
LLVM_DEBUG(llvm::dbgs() << "\nLoop IV is the operand\n");
return false;
}
if (operandSrc != nullptr) {
LLVM_DEBUG(llvm::dbgs() << *operandSrc << "\nIterating on operand src\n");
// If the value was defined in the loop (outside of the
// if/else region), and that operation itself wasn't meant to
// be hoisted, then mark this operation loop dependent.
if (opsWithUsers.count(operandSrc) && opsToHoist.count(operandSrc) == 0) {
return false;
}
if (operandSrc != nullptr) {
LLVM_DEBUG(llvm::dbgs()
<< *operandSrc << "\nIterating on operand src\n");
// If the value was defined in the loop (outside of the
// if/else region), and that operation itself wasn't meant to
// be hoisted, then mark this operation loop dependent.
if (opsWithUsers.count(operandSrc) &&
opsToHoist.count(operandSrc) == 0) {
return false;
}
}
}
}
@ -198,12 +199,9 @@ void LoopInvariantCodeMotion::runOnAffineForOp(AffineForOp forOp) {
// not being hoisted.
if (!op.use_empty())
opsWithUsers.insert(&op);
// We don't hoist for loops.
if (!isa<AffineForOp>(op)) {
if (!isa<AffineYieldOp>(op)) {
if (isOpLoopInvariant(op, indVar, opsWithUsers, opsToHoist)) {
opsToMove.push_back(&op);
}
if (!isa<AffineYieldOp>(op)) {
if (isOpLoopInvariant(op, indVar, opsWithUsers, opsToHoist)) {
opsToMove.push_back(&op);
}
}
}

View File

@ -17,6 +17,8 @@ func @nested_loops_both_having_invariant_code() {
// CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
// CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: }
// CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
// CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
return
@ -67,6 +69,33 @@ func @nested_loops_code_invariant_to_both() {
// -----
// CHECK-LABEL: func @nested_loops_inner_loops_invariant_to_outermost_loop
func @nested_loops_inner_loops_invariant_to_outermost_loop(%m : memref<10xindex>) {
affine.for %arg0 = 0 to 20 {
affine.for %arg1 = 0 to 30 {
%v0 = affine.for %arg2 = 0 to 10 iter_args (%prevAccum = %arg1) -> index {
%v1 = affine.load %m[%arg2] : memref<10xindex>
%newAccum = addi %prevAccum, %v1 : index
affine.yield %newAccum : index
}
}
}
// CHECK: affine.for %{{.*}} = 0 to 30 {
// CHECK-NEXT: %{{.*}} = affine.for %{{.*}} = 0 to 10 iter_args(%{{.*}} = %{{.*}}) -> (index) {
// CHECK-NEXT: %{{.*}} = affine.load %{{.*}}[%{{.*}} : memref<10xindex>
// CHECK-NEXT: %{{.*}} = addi %{{.*}}, %{{.*}} : index
// CHECK-NEXT: affine.yield %{{.*}} : index
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: affine.for %{{.*}} = 0 to 20 {
// CHECK-NEXT: }
return
}
// -----
func @single_loop_nothing_invariant() {
%m1 = memref.alloc() : memref<10xf32>
%m2 = memref.alloc() : memref<10xf32>
@ -228,8 +257,9 @@ func @load_after_load() {
// CHECK-NEXT: %2 = addf %cst, %cst : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: %3 = affine.load %0[%arg0] : memref<10xf32>
// CHECK-NEXT: affine.for %arg1 = 0 to 10 {
// CHECK-NEXT: %4 = affine.load %0[%arg1] : memref<10xf32>
// CHECK-NEXT: }
// CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
// CHECK-NEXT: %{{.*}} = affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
return
}
@ -252,6 +282,8 @@ func @invariant_affine_if() {
// CHECK: %0 = memref.alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 8.000000e+00 : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: }
// CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
// CHECK-NEXT: affine.if #set(%arg0, %arg0) {
// CHECK-NEXT: %1 = addf %cst, %cst : f32
// CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
@ -386,6 +418,8 @@ func @invariant_affine_nested_if_else2() {
// CHECK-NEXT: %1 = memref.alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 8.000000e+00 : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: }
// CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
// CHECK-NEXT: affine.if #set(%arg0, %arg0) {
// CHECK-NEXT: %2 = addf %cst, %cst : f32
// CHECK-NEXT: %3 = affine.load %0[%arg0] : memref<10xf32>
@ -420,6 +454,8 @@ func @invariant_affine_nested_if2() {
// CHECK: %0 = memref.alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 8.000000e+00 : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: }
// CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
// CHECK-NEXT: affine.if #set(%arg0, %arg0) {
// CHECK-NEXT: %1 = addf %cst, %cst : f32
// CHECK-NEXT: %2 = affine.load %0[%arg0] : memref<10xf32>
@ -530,6 +566,8 @@ func @nested_load_store_same_memref2() {
// CHECK-NEXT: %cst = constant 8.000000e+00 : f32
// CHECK-NEXT: %c0 = constant 0 : index
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: }
// CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
// CHECK-NEXT: affine.store %cst, %0[%c0] : memref<10xf32>
// CHECK-NEXT: %1 = affine.load %0[%arg0] : memref<10xf32>