forked from OSchip/llvm-project
[mlir] Support hoisting whole affine for loops in LICM
Reviewed By: bondhugula Differential Revision: https://reviews.llvm.org/D100512
This commit is contained in:
parent
79b5280a6c
commit
9194071626
|
@ -71,10 +71,11 @@ bool isOpLoopInvariant(Operation &op, Value indVar,
|
|||
if (!checkInvarianceOfNestedIfOps(&op, indVar, opsWithUsers, opsToHoist)) {
|
||||
return false;
|
||||
}
|
||||
} else if (isa<AffineForOp>(op)) {
|
||||
// If the body of a predicated region has a for loop, we don't hoist the
|
||||
// 'affine.if'.
|
||||
return false;
|
||||
} else if (auto forOp = dyn_cast<AffineForOp>(op)) {
|
||||
if (!areAllOpsInTheBlockListInvariant(forOp.getLoopBody(), indVar,
|
||||
opsWithUsers, opsToHoist)) {
|
||||
return false;
|
||||
}
|
||||
} else if (isa<AffineDmaStartOp, AffineDmaWaitOp>(op)) {
|
||||
// TODO: Support DMA ops.
|
||||
return false;
|
||||
|
@ -113,30 +114,30 @@ bool isOpLoopInvariant(Operation &op, Value indVar,
|
|||
LLVM_DEBUG(llvm::dbgs() << "\nNon-constant op with 0 operands\n");
|
||||
return false;
|
||||
}
|
||||
for (unsigned int i = 0; i < op.getNumOperands(); ++i) {
|
||||
auto *operandSrc = op.getOperand(i).getDefiningOp();
|
||||
}
|
||||
|
||||
LLVM_DEBUG(
|
||||
op.getOperand(i).print(llvm::dbgs() << "\nIterating on operand\n"));
|
||||
// Check operands.
|
||||
for (unsigned int i = 0; i < op.getNumOperands(); ++i) {
|
||||
auto *operandSrc = op.getOperand(i).getDefiningOp();
|
||||
|
||||
// If the loop IV is the operand, this op isn't loop invariant.
|
||||
if (indVar == op.getOperand(i)) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "\nLoop IV is the operand\n");
|
||||
LLVM_DEBUG(
|
||||
op.getOperand(i).print(llvm::dbgs() << "\nIterating on operand\n"));
|
||||
|
||||
// If the loop IV is the operand, this op isn't loop invariant.
|
||||
if (indVar == op.getOperand(i)) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "\nLoop IV is the operand\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (operandSrc != nullptr) {
|
||||
LLVM_DEBUG(llvm::dbgs() << *operandSrc << "\nIterating on operand src\n");
|
||||
|
||||
// If the value was defined in the loop (outside of the
|
||||
// if/else region), and that operation itself wasn't meant to
|
||||
// be hoisted, then mark this operation loop dependent.
|
||||
if (opsWithUsers.count(operandSrc) && opsToHoist.count(operandSrc) == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (operandSrc != nullptr) {
|
||||
LLVM_DEBUG(llvm::dbgs()
|
||||
<< *operandSrc << "\nIterating on operand src\n");
|
||||
|
||||
// If the value was defined in the loop (outside of the
|
||||
// if/else region), and that operation itself wasn't meant to
|
||||
// be hoisted, then mark this operation loop dependent.
|
||||
if (opsWithUsers.count(operandSrc) &&
|
||||
opsToHoist.count(operandSrc) == 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -198,12 +199,9 @@ void LoopInvariantCodeMotion::runOnAffineForOp(AffineForOp forOp) {
|
|||
// not being hoisted.
|
||||
if (!op.use_empty())
|
||||
opsWithUsers.insert(&op);
|
||||
// We don't hoist for loops.
|
||||
if (!isa<AffineForOp>(op)) {
|
||||
if (!isa<AffineYieldOp>(op)) {
|
||||
if (isOpLoopInvariant(op, indVar, opsWithUsers, opsToHoist)) {
|
||||
opsToMove.push_back(&op);
|
||||
}
|
||||
if (!isa<AffineYieldOp>(op)) {
|
||||
if (isOpLoopInvariant(op, indVar, opsWithUsers, opsToHoist)) {
|
||||
opsToMove.push_back(&op);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,6 +17,8 @@ func @nested_loops_both_having_invariant_code() {
|
|||
// CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
|
||||
// CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
|
||||
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
|
||||
// CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
|
||||
|
||||
return
|
||||
|
@ -67,6 +69,33 @@ func @nested_loops_code_invariant_to_both() {
|
|||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: func @nested_loops_inner_loops_invariant_to_outermost_loop
|
||||
func @nested_loops_inner_loops_invariant_to_outermost_loop(%m : memref<10xindex>) {
|
||||
affine.for %arg0 = 0 to 20 {
|
||||
affine.for %arg1 = 0 to 30 {
|
||||
%v0 = affine.for %arg2 = 0 to 10 iter_args (%prevAccum = %arg1) -> index {
|
||||
%v1 = affine.load %m[%arg2] : memref<10xindex>
|
||||
%newAccum = addi %prevAccum, %v1 : index
|
||||
affine.yield %newAccum : index
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// CHECK: affine.for %{{.*}} = 0 to 30 {
|
||||
// CHECK-NEXT: %{{.*}} = affine.for %{{.*}} = 0 to 10 iter_args(%{{.*}} = %{{.*}}) -> (index) {
|
||||
// CHECK-NEXT: %{{.*}} = affine.load %{{.*}}[%{{.*}} : memref<10xindex>
|
||||
// CHECK-NEXT: %{{.*}} = addi %{{.*}}, %{{.*}} : index
|
||||
// CHECK-NEXT: affine.yield %{{.*}} : index
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: affine.for %{{.*}} = 0 to 20 {
|
||||
// CHECK-NEXT: }
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func @single_loop_nothing_invariant() {
|
||||
%m1 = memref.alloc() : memref<10xf32>
|
||||
%m2 = memref.alloc() : memref<10xf32>
|
||||
|
@ -228,8 +257,9 @@ func @load_after_load() {
|
|||
// CHECK-NEXT: %2 = addf %cst, %cst : f32
|
||||
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
|
||||
// CHECK-NEXT: %3 = affine.load %0[%arg0] : memref<10xf32>
|
||||
// CHECK-NEXT: affine.for %arg1 = 0 to 10 {
|
||||
// CHECK-NEXT: %4 = affine.load %0[%arg1] : memref<10xf32>
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
|
||||
// CHECK-NEXT: %{{.*}} = affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
|
||||
|
||||
return
|
||||
}
|
||||
|
@ -252,6 +282,8 @@ func @invariant_affine_if() {
|
|||
// CHECK: %0 = memref.alloc() : memref<10xf32>
|
||||
// CHECK-NEXT: %cst = constant 8.000000e+00 : f32
|
||||
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
|
||||
// CHECK-NEXT: affine.if #set(%arg0, %arg0) {
|
||||
// CHECK-NEXT: %1 = addf %cst, %cst : f32
|
||||
// CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
|
||||
|
@ -386,6 +418,8 @@ func @invariant_affine_nested_if_else2() {
|
|||
// CHECK-NEXT: %1 = memref.alloc() : memref<10xf32>
|
||||
// CHECK-NEXT: %cst = constant 8.000000e+00 : f32
|
||||
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
|
||||
// CHECK-NEXT: affine.if #set(%arg0, %arg0) {
|
||||
// CHECK-NEXT: %2 = addf %cst, %cst : f32
|
||||
// CHECK-NEXT: %3 = affine.load %0[%arg0] : memref<10xf32>
|
||||
|
@ -420,6 +454,8 @@ func @invariant_affine_nested_if2() {
|
|||
// CHECK: %0 = memref.alloc() : memref<10xf32>
|
||||
// CHECK-NEXT: %cst = constant 8.000000e+00 : f32
|
||||
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
|
||||
// CHECK-NEXT: affine.if #set(%arg0, %arg0) {
|
||||
// CHECK-NEXT: %1 = addf %cst, %cst : f32
|
||||
// CHECK-NEXT: %2 = affine.load %0[%arg0] : memref<10xf32>
|
||||
|
@ -530,6 +566,8 @@ func @nested_load_store_same_memref2() {
|
|||
// CHECK-NEXT: %cst = constant 8.000000e+00 : f32
|
||||
// CHECK-NEXT: %c0 = constant 0 : index
|
||||
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
|
||||
// CHECK-NEXT: affine.store %cst, %0[%c0] : memref<10xf32>
|
||||
// CHECK-NEXT: %1 = affine.load %0[%arg0] : memref<10xf32>
|
||||
|
||||
|
|
Loading…
Reference in New Issue