[Flang] Lower Unstructured do loops

The FIR `do_loop` is designed as a structured operation with a single
block inside it. Presence of unstructured constructs like jumps, exits
inside the loop will cause the loop to be marked as unstructured. These
loops are lowered using the `control-flow` dialect branch operations.

Fortran semantics do not allow the loop variable to be modified inside
the loop. To prevent accidental modification, the iteration of the
loop is modeled by two variables, trip-count and loop-variable.
-> The trip-count and loop-variable are initialized in the pre-header.
The trip-count is set as (end-start+step)/step where end, start and
step have the usual meanings. The loop-variable is initialized to start.
-> The header block contains a conditional branch instruction which
selects between branching to the body of the loop or the exit block
depending on the value of the trip-count.
-> Inside the body, the trip-count is decremented and the loop-variable
incremented by the step value. Finally it branches to the header of the
loop.

Part of the upstreaming effort to move LLVM Flang from fir-dev branch of
https://github.com/flang-compiler/f18-llvm-project to the LLVM Project.

Reviewed By: awarzynski

Differential Revision: https://reviews.llvm.org/D124837

Co-authored-by: Val Donaldson <vdonaldson@nvidia.com>
Co-authored-by: Eric Schweitz <eschweitz@nvidia.com>
Co-authored-by: Jean Perier <jperier@nvidia.com>
Co-authored-by: Peter Klausler <pklausler@nvidia.com>
This commit is contained in:
Kiran Chandramohan 2022-05-06 09:09:01 +00:00
parent db7a87ed4f
commit aa0e167fab
2 changed files with 287 additions and 10 deletions

View File

@ -75,8 +75,7 @@ struct IncrementLoopInfo {
IncrementLoopInfo(IncrementLoopInfo &&) = default;
IncrementLoopInfo &operator=(IncrementLoopInfo &&x) { return x; }
// TODO: change when unstructured loops are also supported
bool isStructured() const { return true; }
bool isStructured() const { return !headerBlock; }
mlir::Type getLoopVariableType() const {
assert(loopVariable && "must be set");
@ -96,7 +95,10 @@ struct IncrementLoopInfo {
fir::DoLoopOp doLoop = nullptr;
// Data members for unstructured loops.
// TODO:
mlir::Value tripVariable = nullptr;
mlir::Block *headerBlock = nullptr; // loop entry and test block
mlir::Block *bodyBlock = nullptr; // first loop body block
mlir::Block *exitBlock = nullptr; // loop exit target block
};
/// Helper class to generate the runtime type info global data. This data
@ -964,19 +966,37 @@ private:
// Collect loop nest information.
// Generate begin loop code directly for infinite and while loops.
Fortran::lower::pft::Evaluation &eval = getEval();
bool unstructuredContext = eval.lowerAsUnstructured();
Fortran::lower::pft::Evaluation &doStmtEval =
eval.getFirstNestedEvaluation();
auto *doStmt = doStmtEval.getIf<Fortran::parser::NonLabelDoStmt>();
const auto &loopControl =
std::get<std::optional<Fortran::parser::LoopControl>>(doStmt->t);
mlir::Block *preheaderBlock = doStmtEval.block;
mlir::Block *beginBlock =
preheaderBlock ? preheaderBlock : builder->getBlock();
auto createNextBeginBlock = [&]() {
// Step beginBlock through unstructured preheader, header, and mask
// blocks, created in outermost to innermost order.
return beginBlock = beginBlock->splitBlock(beginBlock->end());
};
mlir::Block *headerBlock =
unstructuredContext ? createNextBeginBlock() : nullptr;
mlir::Block *bodyBlock = doStmtEval.lexicalSuccessor->block;
mlir::Block *exitBlock = doStmtEval.parentConstruct->constructExit->block;
IncrementLoopNestInfo incrementLoopNestInfo;
if (const auto *bounds = std::get_if<Fortran::parser::LoopControl::Bounds>(
&loopControl->u)) {
// Non-concurrent increment loop.
incrementLoopNestInfo.emplace_back(*bounds->name.thing.symbol,
bounds->lower, bounds->upper,
bounds->step);
// TODO: unstructured loop
IncrementLoopInfo &info = incrementLoopNestInfo.emplace_back(
*bounds->name.thing.symbol, bounds->lower, bounds->upper,
bounds->step);
if (unstructuredContext) {
maybeStartBlock(preheaderBlock);
info.headerBlock = headerBlock;
info.bodyBlock = bodyBlock;
info.exitBlock = exitBlock;
}
} else {
TODO(toLocation(), "infinite/unstructured loop/concurrent loop");
}
@ -988,7 +1008,7 @@ private:
// Loop body code - NonLabelDoStmt and EndDoStmt code is generated here.
// Their genFIR calls are nops except for block management in some cases.
for (Fortran::lower::pft::Evaluation &e : eval.getNestedEvaluations())
genFIR(e, /*unstructuredContext=*/false);
genFIR(e, unstructuredContext);
// Loop end code. (TODO: infinite/while loop)
genFIRIncrementLoopEnd(incrementLoopNestInfo);
@ -1028,7 +1048,30 @@ private:
// TODO: handle Locality Spec
continue;
}
// TODO: Unstructured loop handling
// Unstructured loop preheader - initialize tripVariable and loopVariable.
mlir::Value tripCount;
auto diff1 =
builder->create<mlir::arith::SubIOp>(loc, upperValue, lowerValue);
auto diff2 =
builder->create<mlir::arith::AddIOp>(loc, diff1, info.stepValue);
tripCount =
builder->create<mlir::arith::DivSIOp>(loc, diff2, info.stepValue);
info.tripVariable = builder->createTemporary(loc, tripCount.getType());
builder->create<fir::StoreOp>(loc, tripCount, info.tripVariable);
builder->create<fir::StoreOp>(loc, lowerValue, info.loopVariable);
// Unstructured loop header - generate loop condition and mask.
startBlock(info.headerBlock);
tripCount = builder->create<fir::LoadOp>(loc, info.tripVariable);
mlir::Value zero =
builder->createIntegerConstant(loc, tripCount.getType(), 0);
auto cond = builder->create<mlir::arith::CmpIOp>(
loc, mlir::arith::CmpIPredicate::sgt, tripCount, zero);
// TODO: mask expression
genFIRConditionalBranch(cond, info.bodyBlock, info.exitBlock);
if (&info != &incrementLoopNestInfo.back()) // not innermost
startBlock(info.bodyBlock); // preheader block of enclosed dimension
}
}
@ -1058,7 +1101,20 @@ private:
continue;
}
// TODO: Unstructured loop
// Unstructured loop - decrement tripVariable and step loopVariable.
mlir::Value tripCount =
builder->create<fir::LoadOp>(loc, info.tripVariable);
mlir::Value one =
builder->createIntegerConstant(loc, tripCount.getType(), 1);
tripCount = builder->create<mlir::arith::SubIOp>(loc, tripCount, one);
builder->create<fir::StoreOp>(loc, tripCount, info.tripVariable);
mlir::Value value = builder->create<fir::LoadOp>(loc, info.loopVariable);
value = builder->create<mlir::arith::AddIOp>(loc, value, info.stepValue);
builder->create<fir::StoreOp>(loc, value, info.loopVariable);
genFIRBranch(info.headerBlock);
if (&info != &incrementLoopNestInfo.front()) // not outermost
startBlock(info.exitBlock); // latch block of enclosing dimension
}
}

View File

@ -0,0 +1,221 @@
! RUN: bbc -emit-fir -o - %s | FileCheck %s
! RUN: %flang_fc1 -emit-fir -o - %s | FileCheck %s
! Tests for unstructured loops.
! Test a simple unstructured loop. Test for the existence of,
! -> The initialization of the trip-count and loop-variable
! -> The branch to the body or the exit inside the header
! -> The increment of the trip-count and the loop-variable inside the body
subroutine simple_unstructured()
integer :: i
do i=1,100
goto 404
404 continue
end do
end subroutine
! CHECK-LABEL: simple_unstructured
! CHECK: %[[TRIP_VAR_REF:.*]] = fir.alloca i32
! CHECK: %[[LOOP_VAR_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_unstructuredEi"}
! CHECK: %[[ONE:.*]] = arith.constant 1 : i32
! CHECK: %[[HUNDRED:.*]] = arith.constant 100 : i32
! CHECK: %[[STEP_ONE:.*]] = arith.constant 1 : i32
! CHECK: %[[TMP1:.*]] = arith.subi %[[HUNDRED]], %[[ONE]] : i32
! CHECK: %[[TMP2:.*]] = arith.addi %[[TMP1]], %[[STEP_ONE]] : i32
! CHECK: %[[TRIP_COUNT:.*]] = arith.divsi %[[TMP2]], %[[STEP_ONE]] : i32
! CHECK: fir.store %[[TRIP_COUNT]] to %[[TRIP_VAR_REF]] : !fir.ref<i32>
! CHECK: fir.store %[[ONE]] to %[[LOOP_VAR_REF]] : !fir.ref<i32>
! CHECK: cf.br ^[[HEADER:.*]]
! CHECK: ^[[HEADER]]:
! CHECK: %[[TRIP_VAR:.*]] = fir.load %[[TRIP_VAR_REF]] : !fir.ref<i32>
! CHECK: %[[ZERO:.*]] = arith.constant 0 : i32
! CHECK: %[[COND:.*]] = arith.cmpi sgt, %[[TRIP_VAR]], %[[ZERO]] : i32
! CHECK: cf.cond_br %[[COND]], ^[[BODY:.*]], ^[[EXIT:.*]]
! CHECK: ^[[BODY]]:
! CHECK: %[[TRIP_VAR:.*]] = fir.load %[[TRIP_VAR_REF]] : !fir.ref<i32>
! CHECK: %[[ONE_1:.*]] = arith.constant 1 : i32
! CHECK: %[[TRIP_VAR_NEXT:.*]] = arith.subi %[[TRIP_VAR]], %[[ONE_1]] : i32
! CHECK: fir.store %[[TRIP_VAR_NEXT]] to %[[TRIP_VAR_REF]] : !fir.ref<i32>
! CHECK: %[[LOOP_VAR:.*]] = fir.load %[[LOOP_VAR_REF]] : !fir.ref<i32>
! CHECK: %[[LOOP_VAR_NEXT:.*]] = arith.addi %[[LOOP_VAR]], %[[STEP_ONE]] : i32
! CHECK: fir.store %[[LOOP_VAR_NEXT]] to %[[LOOP_VAR_REF]] : !fir.ref<i32>
! CHECK: cf.br ^[[HEADER]]
! CHECK: ^[[EXIT]]:
! CHECK: return
! Test an unstructured loop with a step. Mostly similar to the previous one.
! Only difference is a non-unit step.
subroutine simple_unstructured_with_step()
integer :: i
do i=1,100,2
goto 404
404 continue
end do
end subroutine
! CHECK-LABEL: simple_unstructured_with_step
! CHECK: %[[TRIP_VAR_REF:.*]] = fir.alloca i32
! CHECK: %[[LOOP_VAR_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_unstructured_with_stepEi"}
! CHECK: %[[ONE:.*]] = arith.constant 1 : i32
! CHECK: %[[HUNDRED:.*]] = arith.constant 100 : i32
! CHECK: %[[STEP:.*]] = arith.constant 2 : i32
! CHECK: %[[TMP1:.*]] = arith.subi %[[HUNDRED]], %[[ONE]] : i32
! CHECK: %[[TMP2:.*]] = arith.addi %[[TMP1]], %[[STEP]] : i32
! CHECK: %[[TRIP_COUNT:.*]] = arith.divsi %[[TMP2]], %[[STEP]] : i32
! CHECK: fir.store %[[TRIP_COUNT]] to %[[TRIP_VAR_REF]] : !fir.ref<i32>
! CHECK: fir.store %[[ONE]] to %[[LOOP_VAR_REF]] : !fir.ref<i32>
! CHECK: cf.br ^[[HEADER:.*]]
! CHECK: ^[[HEADER]]:
! CHECK: %[[TRIP_VAR:.*]] = fir.load %[[TRIP_VAR_REF]] : !fir.ref<i32>
! CHECK: %[[ZERO:.*]] = arith.constant 0 : i32
! CHECK: %[[COND:.*]] = arith.cmpi sgt, %[[TRIP_VAR]], %[[ZERO]] : i32
! CHECK: cf.cond_br %[[COND]], ^[[BODY:.*]], ^[[EXIT:.*]]
! CHECK: ^[[BODY]]:
! CHECK: %[[TRIP_VAR:.*]] = fir.load %[[TRIP_VAR_REF]] : !fir.ref<i32>
! CHECK: %[[ONE_1:.*]] = arith.constant 1 : i32
! CHECK: %[[TRIP_VAR_NEXT:.*]] = arith.subi %[[TRIP_VAR]], %[[ONE_1]] : i32
! CHECK: fir.store %[[TRIP_VAR_NEXT]] to %[[TRIP_VAR_REF]] : !fir.ref<i32>
! CHECK: %[[LOOP_VAR:.*]] = fir.load %[[LOOP_VAR_REF]] : !fir.ref<i32>
! CHECK: %[[LOOP_VAR_NEXT:.*]] = arith.addi %[[LOOP_VAR]], %[[STEP]] : i32
! CHECK: fir.store %[[LOOP_VAR_NEXT]] to %[[LOOP_VAR_REF]] : !fir.ref<i32>
! CHECK: cf.br ^[[HEADER]]
! CHECK: ^[[EXIT]]:
! CHECK: return
! Test a three nested unstructured loop. Three nesting is the basic case where
! we have loops that are neither innermost or outermost.
subroutine nested_unstructured()
integer :: i, j, k
do i=1,100
do j=1,200
do k=1,300
goto 404
404 continue
end do
end do
end do
end subroutine
! CHECK-LABEL: nested_unstructured
! CHECK: %[[TRIP_VAR_K_REF:.*]] = fir.alloca i32
! CHECK: %[[TRIP_VAR_J_REF:.*]] = fir.alloca i32
! CHECK: %[[TRIP_VAR_I_REF:.*]] = fir.alloca i32
! CHECK: %[[LOOP_VAR_I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFnested_unstructuredEi"}
! CHECK: %[[LOOP_VAR_J_REF:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFnested_unstructuredEj"}
! CHECK: %[[LOOP_VAR_K_REF:.*]] = fir.alloca i32 {bindc_name = "k", uniq_name = "_QFnested_unstructuredEk"}
! CHECK: %[[I_START:.*]] = arith.constant 1 : i32
! CHECK: %[[I_END:.*]] = arith.constant 100 : i32
! CHECK: %[[I_STEP:.*]] = arith.constant 1 : i32
! CHECK: %[[TMP1:.*]] = arith.subi %[[I_END]], %[[I_START]] : i32
! CHECK: %[[TMP2:.*]] = arith.addi %[[TMP1]], %[[I_STEP]] : i32
! CHECK: %[[TRIP_COUNT_I:.*]] = arith.divsi %[[TMP2]], %[[I_STEP]] : i32
! CHECK: fir.store %[[TRIP_COUNT_I]] to %[[TRIP_VAR_I_REF]] : !fir.ref<i32>
! CHECK: fir.store %[[I_START]] to %[[LOOP_VAR_I_REF]] : !fir.ref<i32>
! CHECK: cf.br ^[[HEADER_I:.*]]
! CHECK: ^[[HEADER_I]]:
! CHECK: %[[TRIP_VAR_I:.*]] = fir.load %[[TRIP_VAR_I_REF]] : !fir.ref<i32>
! CHECK: %[[ZERO_1:.*]] = arith.constant 0 : i32
! CHECK: %[[COND_I:.*]] = arith.cmpi sgt, %[[TRIP_VAR_I]], %[[ZERO_1]] : i32
! CHECK: cf.cond_br %[[COND_I]], ^[[BODY_I:.*]], ^[[EXIT_I:.*]]
! CHECK: ^[[BODY_I]]:
! CHECK: %[[J_START:.*]] = arith.constant 1 : i32
! CHECK: %[[J_END:.*]] = arith.constant 200 : i32
! CHECK: %[[J_STEP:.*]] = arith.constant 1 : i32
! CHECK: %[[TMP3:.*]] = arith.subi %[[J_END]], %[[J_START]] : i32
! CHECK: %[[TMP4:.*]] = arith.addi %[[TMP3]], %[[J_STEP]] : i32
! CHECK: %[[TRIP_COUNT_J:.*]] = arith.divsi %[[TMP4]], %[[J_STEP]] : i32
! CHECK: fir.store %[[TRIP_COUNT_J]] to %[[TRIP_VAR_J_REF]] : !fir.ref<i32>
! CHECK: fir.store %[[J_START]] to %[[LOOP_VAR_J_REF]] : !fir.ref<i32>
! CHECK: cf.br ^[[HEADER_J:.*]]
! CHECK: ^[[HEADER_J]]:
! CHECK: %[[TRIP_VAR_J:.*]] = fir.load %[[TRIP_VAR_J_REF]] : !fir.ref<i32>
! CHECK: %[[ZERO_2:.*]] = arith.constant 0 : i32
! CHECK: %[[COND_J:.*]] = arith.cmpi sgt, %[[TRIP_VAR_J]], %[[ZERO_2]] : i32
! CHECK: cf.cond_br %[[COND_J]], ^[[BODY_J:.*]], ^[[EXIT_J:.*]]
! CHECK: ^[[BODY_J]]:
! CHECK: %[[K_START:.*]] = arith.constant 1 : i32
! CHECK: %[[K_END:.*]] = arith.constant 300 : i32
! CHECK: %[[K_STEP:.*]] = arith.constant 1 : i32
! CHECK: %[[TMP3:.*]] = arith.subi %[[K_END]], %[[K_START]] : i32
! CHECK: %[[TMP4:.*]] = arith.addi %[[TMP3]], %[[K_STEP]] : i32
! CHECK: %[[TRIP_COUNT_K:.*]] = arith.divsi %[[TMP4]], %[[K_STEP]] : i32
! CHECK: fir.store %[[TRIP_COUNT_K]] to %[[TRIP_VAR_K_REF]] : !fir.ref<i32>
! CHECK: fir.store %[[K_START]] to %[[LOOP_VAR_K_REF]] : !fir.ref<i32>
! CHECK: cf.br ^[[HEADER_K:.*]]
! CHECK: ^[[HEADER_K]]:
! CHECK: %[[TRIP_VAR_K:.*]] = fir.load %[[TRIP_VAR_K_REF]] : !fir.ref<i32>
! CHECK: %[[ZERO_2:.*]] = arith.constant 0 : i32
! CHECK: %[[COND_K:.*]] = arith.cmpi sgt, %[[TRIP_VAR_K]], %[[ZERO_2]] : i32
! CHECK: cf.cond_br %[[COND_K]], ^[[BODY_K:.*]], ^[[EXIT_K:.*]]
! CHECK: ^[[BODY_K]]:
! CHECK: %[[TRIP_VAR_K:.*]] = fir.load %[[TRIP_VAR_K_REF]] : !fir.ref<i32>
! CHECK: %[[ONE_1:.*]] = arith.constant 1 : i32
! CHECK: %[[TRIP_VAR_K_NEXT:.*]] = arith.subi %[[TRIP_VAR_K]], %[[ONE_1]] : i32
! CHECK: fir.store %[[TRIP_VAR_K_NEXT]] to %[[TRIP_VAR_K_REF]] : !fir.ref<i32>
! CHECK: %[[LOOP_VAR_K:.*]] = fir.load %[[LOOP_VAR_K_REF]] : !fir.ref<i32>
! CHECK: %[[LOOP_VAR_K_NEXT:.*]] = arith.addi %[[LOOP_VAR_K]], %[[K_STEP]] : i32
! CHECK: fir.store %[[LOOP_VAR_K_NEXT]] to %[[LOOP_VAR_K_REF]] : !fir.ref<i32>
! CHECK: cf.br ^[[HEADER_K]]
! CHECK: ^[[EXIT_K]]:
! CHECK: %[[TRIP_VAR_J:.*]] = fir.load %[[TRIP_VAR_J_REF]] : !fir.ref<i32>
! CHECK: %[[ONE_1:.*]] = arith.constant 1 : i32
! CHECK: %[[TRIP_VAR_J_NEXT:.*]] = arith.subi %[[TRIP_VAR_J]], %[[ONE_1]] : i32
! CHECK: fir.store %[[TRIP_VAR_J_NEXT]] to %[[TRIP_VAR_J_REF]] : !fir.ref<i32>
! CHECK: %[[LOOP_VAR_J:.*]] = fir.load %[[LOOP_VAR_J_REF]] : !fir.ref<i32>
! CHECK: %[[LOOP_VAR_J_NEXT:.*]] = arith.addi %[[LOOP_VAR_J]], %[[J_STEP]] : i32
! CHECK: fir.store %[[LOOP_VAR_J_NEXT]] to %[[LOOP_VAR_J_REF]] : !fir.ref<i32>
! CHECK: cf.br ^[[HEADER_J]]
! CHECK: ^[[EXIT_J]]:
! CHECK: %[[TRIP_VAR_I:.*]] = fir.load %[[TRIP_VAR_I_REF]] : !fir.ref<i32>
! CHECK: %[[ONE_1:.*]] = arith.constant 1 : i32
! CHECK: %[[TRIP_VAR_I_NEXT:.*]] = arith.subi %[[TRIP_VAR_I]], %[[ONE_1]] : i32
! CHECK: fir.store %[[TRIP_VAR_I_NEXT]] to %[[TRIP_VAR_I_REF]] : !fir.ref<i32>
! CHECK: %[[LOOP_VAR_I:.*]] = fir.load %[[LOOP_VAR_I_REF]] : !fir.ref<i32>
! CHECK: %[[LOOP_VAR_I_NEXT:.*]] = arith.addi %[[LOOP_VAR_I]], %[[I_STEP]] : i32
! CHECK: fir.store %[[LOOP_VAR_I_NEXT]] to %[[LOOP_VAR_I_REF]] : !fir.ref<i32>
! CHECK: cf.br ^[[HEADER_I]]
! CHECK: ^[[EXIT_I]]:
! CHECK: return
! Test the existence of a structured loop inside an unstructured loop.
! Only minimal checks are inserted for the structured loop.
subroutine nested_structured_in_unstructured()
integer :: i, j
do i=1,100
do j=1,100
end do
goto 404
404 continue
end do
end subroutine
! CHECK-LABEL: nested_structured_in_unstructured
! CHECK: %[[TRIP_VAR_I_REF:.*]] = fir.alloca i32
! CHECK: %[[LOOP_VAR_I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFnested_structured_in_unstructuredEi"}
! CHECK: %[[LOOP_VAR_J_REF:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFnested_structured_in_unstructuredEj"}
! CHECK: %[[I_START:.*]] = arith.constant 1 : i32
! CHECK: %[[I_END:.*]] = arith.constant 100 : i32
! CHECK: %[[I_STEP:.*]] = arith.constant 1 : i32
! CHECK: %[[TMP1:.*]] = arith.subi %[[I_END]], %[[I_START]] : i32
! CHECK: %[[TMP2:.*]] = arith.addi %[[TMP1]], %[[I_STEP]] : i32
! CHECK: %[[TRIP_COUNT:.*]] = arith.divsi %[[TMP2]], %[[I_STEP]] : i32
! CHECK: fir.store %[[TRIP_COUNT]] to %[[TRIP_VAR_I_REF]] : !fir.ref<i32>
! CHECK: fir.store %[[I_START]] to %[[LOOP_VAR_I_REF]] : !fir.ref<i32>
! CHECK: cf.br ^[[HEADER:.*]]
! CHECK: ^[[HEADER]]:
! CHECK: %[[TRIP_VAR:.*]] = fir.load %[[TRIP_VAR_I_REF]] : !fir.ref<i32>
! CHECK: %[[ZERO:.*]] = arith.constant 0 : i32
! CHECK: %[[COND:.*]] = arith.cmpi sgt, %[[TRIP_VAR]], %[[ZERO]] : i32
! CHECK: cf.cond_br %[[COND]], ^[[BODY:.*]], ^[[EXIT:.*]]
! CHECK: ^[[BODY]]:
! CHECK: %{{.*}} = fir.do_loop %[[J_INDEX:.*]] = %{{.*}} to %{{.*}} step %{{.*}} -> index {
! CHECK: %[[J_INDEX_CVT:.*]] = fir.convert %[[J_INDEX]] : (index) -> i32
! CHECK: fir.store %[[J_INDEX_CVT]] to %[[LOOP_VAR_J_REF]] : !fir.ref<i32>
! CHECK: }
! CHECK: %[[TRIP_VAR_I:.*]] = fir.load %[[TRIP_VAR_I_REF]] : !fir.ref<i32>
! CHECK: %[[C1_3:.*]] = arith.constant 1 : i32
! CHECK: %[[TRIP_VAR_I_NEXT:.*]] = arith.subi %[[TRIP_VAR_I]], %[[C1_3]] : i32
! CHECK: fir.store %[[TRIP_VAR_I_NEXT]] to %[[TRIP_VAR_I_REF]] : !fir.ref<i32>
! CHECK: %[[LOOP_VAR_I:.*]] = fir.load %[[LOOP_VAR_I_REF]] : !fir.ref<i32>
! CHECK: %[[LOOP_VAR_I_NEXT:.*]] = arith.addi %[[LOOP_VAR_I]], %c1_i32_0 : i32
! CHECK: fir.store %[[LOOP_VAR_I_NEXT]] to %[[LOOP_VAR_I_REF]] : !fir.ref<i32>
! CHECK: cf.br ^[[HEADER]]
! CHECK: ^[[EXIT]]:
! CHECK: return