[flang] Lower where statement

This patch lowers where statement to FIR. The where statement is lowered to a conbination of loops and if conditions. This patch is part of the upstreaming effort from fir-dev branch. Reviewed By: PeteSteinfeld Differential Revision: https://reviews.llvm.org/D121385 Co-authored-by: Jean Perier <jperier@nvidia.com> Co-authored-by: Eric Schweitz <eschweitz@nvidia.com>
2022-03-10 18:43:40 +01:00 · 2022-03-10 18:43:40 +01:00 · 7a6a1655d8
parent 13d3307176
commit 7a6a1655d8
4 changed files with 761 additions and 36 deletions
--- a/flang/include/flang/Lower/ConvertExpr.h
+++ b/flang/include/flang/Lower/ConvertExpr.h
@ -140,6 +140,40 @@ void createSomeArrayAssignment(AbstractConverter &converter,
                               const fir::ExtendedValue &rhs, SymMap &symMap,
                               StatementContext &stmtCtx);

+/// Common entry point for both explicit iteration spaces and implicit iteration
+/// spaces with masks.
+///
+/// For an implicit iteration space with masking, lowers an array assignment
+/// expression with masking expression(s).
+///
+/// 1. Evaluate the lhs to determine the rank and how to form the ArrayLoad
+/// (e.g., if there is a slicing op).
+/// 2. Scan the rhs, creating the ArrayLoads and evaluate the scalar subparts to
+/// be added to the map.
+/// 3. Create the loop nest.
+/// 4. Create the masking condition. Step 5 is conditionally executed only when
+/// the mask condition evaluates to true.
+/// 5. Evaluate the elemental expression, threading the results.
+/// 6. Copy the resulting array back with ArrayMergeStore to the lhs as
+/// determined per step 1.
+///
+/// For an explicit iteration space, lower a scalar or array assignment
+/// expression with a user-defined iteration space and possibly with masking
+/// expression(s).
+///
+/// If the expression is scalar, then the assignment is an array assignment but
+/// the array accesses are explicitly defined by the user and not implied for
+/// each element in the array. Mask expressions are optional.
+///
+/// If the expression has rank, then the assignment has a combined user-defined
+/// iteration space as well as a inner (subordinate) implied iteration
+/// space. The implied iteration space may include WHERE conditions, `masks`.
+void createAnyMaskedArrayAssignment(AbstractConverter &converter,
+                                    const SomeExpr &lhs, const SomeExpr &rhs,
+                                    ExplicitIterSpace &explicitIterSpace,
+                                    ImplicitIterSpace &implicitIterSpace,
+                                    SymMap &symMap, StatementContext &stmtCtx);
+
 /// Lower an assignment to an allocatable array, allocating the array if
 /// it is not allocated yet or reallocation it if it does not conform
 /// with the right hand side.
@ -157,6 +191,19 @@ fir::ExtendedValue createSomeArrayTempValue(AbstractConverter &converter,
                                            SymMap &symMap,
                                            StatementContext &stmtCtx);

+/// Somewhat similar to createSomeArrayTempValue, but the temporary buffer is
+/// allocated lazily (inside the loops instead of before the loops) to
+/// accomodate buffers with shapes that cannot be precomputed. In fact, the
+/// buffer need not even be hyperrectangular. The buffer may be created as an
+/// instance of a ragged array, which may be useful if an array's extents are
+/// functions of other loop indices. The ragged array structure is built with \p
+/// raggedHeader being the root header variable. The header is a tuple of
+/// `{rank, data-is-headers, [data]*, [extents]*}`, which is built recursively.
+/// The base header, \p raggedHeader, must be initialized to zeros.
+void createLazyArrayTempValue(AbstractConverter &converter,
+                              const SomeExpr &expr, mlir::Value raggedHeader,
+                              SymMap &symMap, StatementContext &stmtCtx);
+
 // Attribute for an alloca that is a trivial adaptor for converting a value to
 // pass-by-ref semantics for a VALUE parameter. The optimizer may be able to
 // eliminate these.
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@ -1165,6 +1165,12 @@ private:
    TODO(toLocation(), "CaseConstruct lowering");
  }

+  template <typename A>
+  void genNestedStatement(const Fortran::parser::Statement<A> &stmt) {
+    setCurrentPosition(stmt.source);
+    genFIR(stmt.statement);
+  }
+
  void genFIR(const Fortran::parser::ConcurrentHeader &header) {
    TODO(toLocation(), "ConcurrentHeader lowering");
  }
@ -1461,6 +1467,15 @@ private:
    TODO(toLocation(), "LockStmt lowering");
  }

+  /// Return true if the current context is a conditionalized and implied
+  /// iteration space.
+  bool implicitIterationSpace() { return !implicitIterSpace.empty(); }
+
+  /// Return true if context is currently an explicit iteration space. A scalar
+  /// assignment expression may be contextually within a user-defined iteration
+  /// space, transforming it into an array expression.
+  bool explicitIterationSpace() { return explicitIterSpace.isActive(); }
+
  /// Generate an array assignment.
  /// This is an assignment expression with rank > 0. The assignment may or may
  /// not be in a WHERE and/or FORALL context.
@ -1475,46 +1490,106 @@ private:
      return;
    }

+    if (!implicitIterationSpace() && !explicitIterationSpace()) {
      // No masks and the iteration space is implied by the array, so create a
      // simple array assignment.
      Fortran::lower::createSomeArrayAssignment(*this, assign.lhs, assign.rhs,
                                                localSymbols, stmtCtx);
+      return;
+    }
+
+    // If there is an explicit iteration space, generate an array assignment
+    // with a user-specified iteration space and possibly with masks. These
+    // assignments may *appear* to be scalar expressions, but the scalar
+    // expression is evaluated at all points in the user-defined space much like
+    // an ordinary array assignment. More specifically, the semantics inside the
+    // FORALL much more closely resembles that of WHERE than a scalar
+    // assignment.
+    // Otherwise, generate a masked array assignment. The iteration space is
+    // implied by the lhs array expression.
+    Fortran::lower::createAnyMaskedArrayAssignment(
+        *this, assign.lhs, assign.rhs, explicitIterSpace, implicitIterSpace,
+        localSymbols,
+        explicitIterationSpace() ? explicitIterSpace.stmtContext()
+                                 : implicitIterSpace.stmtContext());
  }

  void genFIR(const Fortran::parser::WhereConstruct &c) {
-    TODO(toLocation(), "WhereConstruct lowering");
+    implicitIterSpace.growStack();
+    genNestedStatement(
+        std::get<
+            Fortran::parser::Statement<Fortran::parser::WhereConstructStmt>>(
+            c.t));
+    for (const auto &body :
+         std::get<std::list<Fortran::parser::WhereBodyConstruct>>(c.t))
+      genFIR(body);
+    for (const auto &e :
+         std::get<std::list<Fortran::parser::WhereConstruct::MaskedElsewhere>>(
+             c.t))
+      genFIR(e);
+    if (const auto &e =
+            std::get<std::optional<Fortran::parser::WhereConstruct::Elsewhere>>(
+                c.t);
+        e.has_value())
+      genFIR(*e);
+    genNestedStatement(
+        std::get<Fortran::parser::Statement<Fortran::parser::EndWhereStmt>>(
+            c.t));
  }
-
  void genFIR(const Fortran::parser::WhereBodyConstruct &body) {
-    TODO(toLocation(), "WhereBodyConstruct lowering");
+    std::visit(
+        Fortran::common::visitors{
+            [&](const Fortran::parser::Statement<
+                Fortran::parser::AssignmentStmt> &stmt) {
+              genNestedStatement(stmt);
+            },
+            [&](const Fortran::parser::Statement<Fortran::parser::WhereStmt>
+                    &stmt) { genNestedStatement(stmt); },
+            [&](const Fortran::common::Indirection<
+                Fortran::parser::WhereConstruct> &c) { genFIR(c.value()); },
+        },
+        body.u);
  }
-
  void genFIR(const Fortran::parser::WhereConstructStmt &stmt) {
-    TODO(toLocation(), "WhereConstructStmt lowering");
+    implicitIterSpace.append(Fortran::semantics::GetExpr(
+        std::get<Fortran::parser::LogicalExpr>(stmt.t)));
  }
-
  void genFIR(const Fortran::parser::WhereConstruct::MaskedElsewhere &ew) {
-    TODO(toLocation(), "MaskedElsewhere lowering");
+    genNestedStatement(
+        std::get<
+            Fortran::parser::Statement<Fortran::parser::MaskedElsewhereStmt>>(
+            ew.t));
+    for (const auto &body :
+         std::get<std::list<Fortran::parser::WhereBodyConstruct>>(ew.t))
+      genFIR(body);
  }
-
  void genFIR(const Fortran::parser::MaskedElsewhereStmt &stmt) {
-    TODO(toLocation(), "MaskedElsewhereStmt lowering");
+    implicitIterSpace.append(Fortran::semantics::GetExpr(
+        std::get<Fortran::parser::LogicalExpr>(stmt.t)));
  }
-
  void genFIR(const Fortran::parser::WhereConstruct::Elsewhere &ew) {
-    TODO(toLocation(), "Elsewhere lowering");
+    genNestedStatement(
+        std::get<Fortran::parser::Statement<Fortran::parser::ElsewhereStmt>>(
+            ew.t));
+    for (const auto &body :
+         std::get<std::list<Fortran::parser::WhereBodyConstruct>>(ew.t))
+      genFIR(body);
  }
-
  void genFIR(const Fortran::parser::ElsewhereStmt &stmt) {
-    TODO(toLocation(), "ElsewhereStmt lowering");
+    implicitIterSpace.append(nullptr);
  }
-
  void genFIR(const Fortran::parser::EndWhereStmt &) {
-    TODO(toLocation(), "EndWhereStmt lowering");
+    implicitIterSpace.shrinkStack();
  }

  void genFIR(const Fortran::parser::WhereStmt &stmt) {
-    TODO(toLocation(), "WhereStmt lowering");
+    Fortran::lower::StatementContext stmtCtx;
+    const auto &assign = std::get<Fortran::parser::AssignmentStmt>(stmt.t);
+    implicitIterSpace.growStack();
+    implicitIterSpace.append(Fortran::semantics::GetExpr(
+        std::get<Fortran::parser::LogicalExpr>(stmt.t)));
+    genAssignment(*assign.typedAssignment->v);
+    implicitIterSpace.shrinkStack();
  }

  void genFIR(const Fortran::parser::PointerAssignmentStmt &stmt) {
--- a/flang/lib/Lower/ConvertExpr.cpp
+++ b/flang/lib/Lower/ConvertExpr.cpp
@ -30,7 +30,9 @@
 #include "flang/Optimizer/Builder/Factory.h"
 #include "flang/Optimizer/Builder/LowLevelIntrinsics.h"
 #include "flang/Optimizer/Builder/MutableBox.h"
+#include "flang/Optimizer/Builder/Runtime/Character.h"
 #include "flang/Optimizer/Builder/Runtime/RTBuilder.h"
+#include "flang/Optimizer/Builder/Runtime/Ragged.h"
 #include "flang/Optimizer/Dialect/FIROpsSupport.h"
 #include "flang/Semantics/expression.h"
 #include "flang/Semantics/symbol.h"
@ -2425,6 +2427,36 @@ public:
    }
  }

+  //===--------------------------------------------------------------------===//
+  // WHERE array assignment, FORALL assignment, and FORALL+WHERE array
+  // assignment
+  //===--------------------------------------------------------------------===//
+
+  /// Entry point for array assignment when the iteration space is explicitly
+  /// defined (Fortran's FORALL) with or without masks, and/or the implied
+  /// iteration space involves masks (Fortran's WHERE). Both contexts (explicit
+  /// space and implicit space with masks) may be present.
+  static void lowerAnyMaskedArrayAssignment(
+      Fortran::lower::AbstractConverter &converter,
+      Fortran::lower::SymMap &symMap, Fortran::lower::StatementContext &stmtCtx,
+      const Fortran::lower::SomeExpr &lhs, const Fortran::lower::SomeExpr &rhs,
+      Fortran::lower::ExplicitIterSpace &explicitSpace,
+      Fortran::lower::ImplicitIterSpace &implicitSpace) {
+    if (explicitSpace.isActive() && lhs.Rank() == 0) {
+      // Scalar assignment expression in a FORALL context.
+      ArrayExprLowering ael(converter, stmtCtx, symMap,
+                            ConstituentSemantics::RefTransparent,
+                            &explicitSpace, &implicitSpace);
+      ael.lowerScalarAssignment(lhs, rhs);
+      return;
+    }
+    // Array assignment expression in a FORALL and/or WHERE context.
+    ArrayExprLowering ael(converter, stmtCtx, symMap,
+                          ConstituentSemantics::CopyInCopyOut, &explicitSpace,
+                          &implicitSpace);
+    ael.lowerArrayAssignment(lhs, rhs);
+  }
+
  //===--------------------------------------------------------------------===//
  // Array assignment to allocatable array
  //===--------------------------------------------------------------------===//
@ -2568,6 +2600,291 @@ public:
    return fir::ArrayBoxValue(tempRes, dest.getExtents());
  }

+  static void lowerLazyArrayExpression(
+      Fortran::lower::AbstractConverter &converter,
+      Fortran::lower::SymMap &symMap, Fortran::lower::StatementContext &stmtCtx,
+      const Fortran::lower::SomeExpr &expr, mlir::Value raggedHeader) {
+    ArrayExprLowering ael(converter, stmtCtx, symMap);
+    ael.lowerLazyArrayExpression(expr, raggedHeader);
+  }
+
+  /// Lower the expression \p expr into a buffer that is created on demand. The
+  /// variable containing the pointer to the buffer is \p var and the variable
+  /// containing the shape of the buffer is \p shapeBuffer.
+  void lowerLazyArrayExpression(const Fortran::lower::SomeExpr &expr,
+                                mlir::Value header) {
+    mlir::Location loc = getLoc();
+    mlir::TupleType hdrTy = fir::factory::getRaggedArrayHeaderType(builder);
+    mlir::IntegerType i32Ty = builder.getIntegerType(32);
+
+    // Once the loop extents have been computed, which may require being inside
+    // some explicit loops, lazily allocate the expression on the heap. The
+    // following continuation creates the buffer as needed.
+    ccPrelude = [=](llvm::ArrayRef<mlir::Value> shape) {
+      mlir::IntegerType i64Ty = builder.getIntegerType(64);
+      mlir::Value byteSize = builder.createIntegerConstant(loc, i64Ty, 1);
+      fir::runtime::genRaggedArrayAllocate(
+          loc, builder, header, /*asHeaders=*/false, byteSize, shape);
+    };
+
+    // Create a dummy array_load before the loop. We're storing to a lazy
+    // temporary, so there will be no conflict and no copy-in. TODO: skip this
+    // as there isn't any necessity for it.
+    ccLoadDest = [=](llvm::ArrayRef<mlir::Value> shape) -> fir::ArrayLoadOp {
+      mlir::Value one = builder.createIntegerConstant(loc, i32Ty, 1);
+      auto var = builder.create<fir::CoordinateOp>(
+          loc, builder.getRefType(hdrTy.getType(1)), header, one);
+      auto load = builder.create<fir::LoadOp>(loc, var);
+      mlir::Type eleTy =
+          fir::unwrapSequenceType(fir::unwrapRefType(load.getType()));
+      auto seqTy = fir::SequenceType::get(eleTy, shape.size());
+      mlir::Value castTo =
+          builder.createConvert(loc, fir::HeapType::get(seqTy), load);
+      mlir::Value shapeOp = builder.genShape(loc, shape);
+      return builder.create<fir::ArrayLoadOp>(
+          loc, seqTy, castTo, shapeOp, /*slice=*/mlir::Value{}, llvm::None);
+    };
+    // Custom lowering of the element store to deal with the extra indirection
+    // to the lazy allocated buffer.
+    ccStoreToDest = [=](IterSpace iters) {
+      mlir::Value one = builder.createIntegerConstant(loc, i32Ty, 1);
+      auto var = builder.create<fir::CoordinateOp>(
+          loc, builder.getRefType(hdrTy.getType(1)), header, one);
+      auto load = builder.create<fir::LoadOp>(loc, var);
+      mlir::Type eleTy =
+          fir::unwrapSequenceType(fir::unwrapRefType(load.getType()));
+      auto seqTy = fir::SequenceType::get(eleTy, iters.iterVec().size());
+      auto toTy = fir::HeapType::get(seqTy);
+      mlir::Value castTo = builder.createConvert(loc, toTy, load);
+      mlir::Value shape = builder.genShape(loc, genIterationShape());
+      llvm::SmallVector<mlir::Value> indices = fir::factory::originateIndices(
+          loc, builder, castTo.getType(), shape, iters.iterVec());
+      auto eleAddr = builder.create<fir::ArrayCoorOp>(
+          loc, builder.getRefType(eleTy), castTo, shape,
+          /*slice=*/mlir::Value{}, indices, destination.getTypeparams());
+      mlir::Value eleVal =
+          builder.createConvert(loc, eleTy, iters.getElement());
+      builder.create<fir::StoreOp>(loc, eleVal, eleAddr);
+      return iters.innerArgument();
+    };
+
+    // Lower the array expression now. Clean-up any temps that may have
+    // been generated when lowering `expr` right after the lowered value
+    // was stored to the ragged array temporary. The local temps will not
+    // be needed afterwards.
+    stmtCtx.pushScope();
+    [[maybe_unused]] ExtValue loopRes = lowerArrayExpression(expr);
+    stmtCtx.finalize(/*popScope=*/true);
+    assert(fir::getBase(loopRes));
+  }
+
+  template <typename A, typename B>
+  ExtValue lowerScalarAssignment(const A &lhs, const B &rhs) {
+    // 1) Lower the rhs expression with array_fetch op(s).
+    IterationSpace iters;
+    iters.setElement(genarr(rhs)(iters));
+    fir::ExtendedValue elementalExv = iters.elementExv();
+    // 2) Lower the lhs expression to an array_update.
+    semant = ConstituentSemantics::ProjectedCopyInCopyOut;
+    auto lexv = genarr(lhs)(iters);
+    // 3) Finalize the inner context.
+    explicitSpace->finalizeContext();
+    // 4) Thread the array value updated forward. Note: the lhs might be
+    // ill-formed (performing scalar assignment in an array context),
+    // in which case there is no array to thread.
+    auto createResult = [&](auto op) {
+      mlir::Value oldInnerArg = op.getSequence();
+      std::size_t offset = explicitSpace->argPosition(oldInnerArg);
+      explicitSpace->setInnerArg(offset, fir::getBase(lexv));
+      builder.create<fir::ResultOp>(getLoc(), fir::getBase(lexv));
+    };
+    if (auto updateOp = mlir::dyn_cast<fir::ArrayUpdateOp>(
+            fir::getBase(lexv).getDefiningOp()))
+      createResult(updateOp);
+    else if (auto amend = mlir::dyn_cast<fir::ArrayAmendOp>(
+                 fir::getBase(lexv).getDefiningOp()))
+      createResult(amend);
+    else if (auto modifyOp = mlir::dyn_cast<fir::ArrayModifyOp>(
+                 fir::getBase(lexv).getDefiningOp()))
+      createResult(modifyOp);
+    return lexv;
+  }
+
+  bool explicitSpaceIsActive() const {
+    return explicitSpace && explicitSpace->isActive();
+  }
+
+  bool implicitSpaceHasMasks() const {
+    return implicitSpace && !implicitSpace->empty();
+  }
+
+  CC genMaskAccess(mlir::Value tmp, mlir::Value shape) {
+    mlir::Location loc = getLoc();
+    return [=, builder = &converter.getFirOpBuilder()](IterSpace iters) {
+      mlir::Type arrTy = fir::dyn_cast_ptrOrBoxEleTy(tmp.getType());
+      auto eleTy = arrTy.cast<fir::SequenceType>().getEleTy();
+      mlir::Type eleRefTy = builder->getRefType(eleTy);
+      mlir::IntegerType i1Ty = builder->getI1Type();
+      // Adjust indices for any shift of the origin of the array.
+      llvm::SmallVector<mlir::Value> indices = fir::factory::originateIndices(
+          loc, *builder, tmp.getType(), shape, iters.iterVec());
+      auto addr = builder->create<fir::ArrayCoorOp>(
+          loc, eleRefTy, tmp, shape, /*slice=*/mlir::Value{}, indices,
+          /*typeParams=*/llvm::None);
+      auto load = builder->create<fir::LoadOp>(loc, addr);
+      return builder->createConvert(loc, i1Ty, load);
+    };
+  }
+
+  /// Construct the incremental instantiations of the ragged array structure.
+  /// Rebind the lazy buffer variable, etc. as we go.
+  template <bool withAllocation = false>
+  mlir::Value prepareRaggedArrays(Fortran::lower::FrontEndExpr expr) {
+    assert(explicitSpaceIsActive());
+    mlir::Location loc = getLoc();
+    mlir::TupleType raggedTy = fir::factory::getRaggedArrayHeaderType(builder);
+    llvm::SmallVector<llvm::SmallVector<fir::DoLoopOp>> loopStack =
+        explicitSpace->getLoopStack();
+    const std::size_t depth = loopStack.size();
+    mlir::IntegerType i64Ty = builder.getIntegerType(64);
+    [[maybe_unused]] mlir::Value byteSize =
+        builder.createIntegerConstant(loc, i64Ty, 1);
+    mlir::Value header = implicitSpace->lookupMaskHeader(expr);
+    for (std::remove_const_t<decltype(depth)> i = 0; i < depth; ++i) {
+      auto insPt = builder.saveInsertionPoint();
+      if (i < depth - 1)
+        builder.setInsertionPoint(loopStack[i + 1][0]);
+
+      // Compute and gather the extents.
+      llvm::SmallVector<mlir::Value> extents;
+      for (auto doLoop : loopStack[i])
+        extents.push_back(builder.genExtentFromTriplet(
+            loc, doLoop.getLowerBound(), doLoop.getUpperBound(),
+            doLoop.getStep(), i64Ty));
+      if constexpr (withAllocation) {
+        fir::runtime::genRaggedArrayAllocate(
+            loc, builder, header, /*asHeader=*/true, byteSize, extents);
+      }
+
+      // Compute the dynamic position into the header.
+      llvm::SmallVector<mlir::Value> offsets;
+      for (auto doLoop : loopStack[i]) {
+        auto m = builder.create<mlir::arith::SubIOp>(
+            loc, doLoop.getInductionVar(), doLoop.getLowerBound());
+        auto n = builder.create<mlir::arith::DivSIOp>(loc, m, doLoop.getStep());
+        mlir::Value one = builder.createIntegerConstant(loc, n.getType(), 1);
+        offsets.push_back(builder.create<mlir::arith::AddIOp>(loc, n, one));
+      }
+      mlir::IntegerType i32Ty = builder.getIntegerType(32);
+      mlir::Value uno = builder.createIntegerConstant(loc, i32Ty, 1);
+      mlir::Type coorTy = builder.getRefType(raggedTy.getType(1));
+      auto hdOff = builder.create<fir::CoordinateOp>(loc, coorTy, header, uno);
+      auto toTy = fir::SequenceType::get(raggedTy, offsets.size());
+      mlir::Type toRefTy = builder.getRefType(toTy);
+      auto ldHdr = builder.create<fir::LoadOp>(loc, hdOff);
+      mlir::Value hdArr = builder.createConvert(loc, toRefTy, ldHdr);
+      auto shapeOp = builder.genShape(loc, extents);
+      header = builder.create<fir::ArrayCoorOp>(
+          loc, builder.getRefType(raggedTy), hdArr, shapeOp,
+          /*slice=*/mlir::Value{}, offsets,
+          /*typeparams=*/mlir::ValueRange{});
+      auto hdrVar = builder.create<fir::CoordinateOp>(loc, coorTy, header, uno);
+      auto inVar = builder.create<fir::LoadOp>(loc, hdrVar);
+      mlir::Value two = builder.createIntegerConstant(loc, i32Ty, 2);
+      mlir::Type coorTy2 = builder.getRefType(raggedTy.getType(2));
+      auto hdrSh = builder.create<fir::CoordinateOp>(loc, coorTy2, header, two);
+      auto shapePtr = builder.create<fir::LoadOp>(loc, hdrSh);
+      // Replace the binding.
+      implicitSpace->rebind(expr, genMaskAccess(inVar, shapePtr));
+      if (i < depth - 1)
+        builder.restoreInsertionPoint(insPt);
+    }
+    return header;
+  }
+
+  /// Lower mask expressions with implied iteration spaces from the variants of
+  /// WHERE syntax. Since it is legal for mask expressions to have side-effects
+  /// and modify values that will be used for the lhs, rhs, or both of
+  /// subsequent assignments, the mask must be evaluated before the assignment
+  /// is processed.
+  /// Mask expressions are array expressions too.
+  void genMasks() {
+    // Lower the mask expressions, if any.
+    if (implicitSpaceHasMasks()) {
+      mlir::Location loc = getLoc();
+      // Mask expressions are array expressions too.
+      for (const auto *e : implicitSpace->getExprs())
+        if (e && !implicitSpace->isLowered(e)) {
+          if (mlir::Value var = implicitSpace->lookupMaskVariable(e)) {
+            // Allocate the mask buffer lazily.
+            assert(explicitSpaceIsActive());
+            mlir::Value header =
+                prepareRaggedArrays</*withAllocations=*/true>(e);
+            Fortran::lower::createLazyArrayTempValue(converter, *e, header,
+                                                     symMap, stmtCtx);
+            // Close the explicit loops.
+            builder.create<fir::ResultOp>(loc, explicitSpace->getInnerArgs());
+            builder.setInsertionPointAfter(explicitSpace->getOuterLoop());
+            // Open a new copy of the explicit loop nest.
+            explicitSpace->genLoopNest();
+            continue;
+          }
+          fir::ExtendedValue tmp = Fortran::lower::createSomeArrayTempValue(
+              converter, *e, symMap, stmtCtx);
+          mlir::Value shape = builder.createShape(loc, tmp);
+          implicitSpace->bind(e, genMaskAccess(fir::getBase(tmp), shape));
+        }
+
+      // Set buffer from the header.
+      for (const auto *e : implicitSpace->getExprs()) {
+        if (!e)
+          continue;
+        if (implicitSpace->lookupMaskVariable(e)) {
+          // Index into the ragged buffer to retrieve cached results.
+          const int rank = e->Rank();
+          assert(destShape.empty() ||
+                 static_cast<std::size_t>(rank) == destShape.size());
+          mlir::Value header = prepareRaggedArrays(e);
+          mlir::TupleType raggedTy =
+              fir::factory::getRaggedArrayHeaderType(builder);
+          mlir::IntegerType i32Ty = builder.getIntegerType(32);
+          mlir::Value one = builder.createIntegerConstant(loc, i32Ty, 1);
+          auto coor1 = builder.create<fir::CoordinateOp>(
+              loc, builder.getRefType(raggedTy.getType(1)), header, one);
+          auto db = builder.create<fir::LoadOp>(loc, coor1);
+          mlir::Type eleTy =
+              fir::unwrapSequenceType(fir::unwrapRefType(db.getType()));
+          mlir::Type buffTy =
+              builder.getRefType(fir::SequenceType::get(eleTy, rank));
+          // Address of ragged buffer data.
+          mlir::Value buff = builder.createConvert(loc, buffTy, db);
+
+          mlir::Value two = builder.createIntegerConstant(loc, i32Ty, 2);
+          auto coor2 = builder.create<fir::CoordinateOp>(
+              loc, builder.getRefType(raggedTy.getType(2)), header, two);
+          auto shBuff = builder.create<fir::LoadOp>(loc, coor2);
+          mlir::IntegerType i64Ty = builder.getIntegerType(64);
+          mlir::IndexType idxTy = builder.getIndexType();
+          llvm::SmallVector<mlir::Value> extents;
+          for (std::remove_const_t<decltype(rank)> i = 0; i < rank; ++i) {
+            mlir::Value off = builder.createIntegerConstant(loc, i32Ty, i);
+            auto coor = builder.create<fir::CoordinateOp>(
+                loc, builder.getRefType(i64Ty), shBuff, off);
+            auto ldExt = builder.create<fir::LoadOp>(loc, coor);
+            extents.push_back(builder.createConvert(loc, idxTy, ldExt));
+          }
+          if (destShape.empty())
+            destShape = extents;
+          // Construct shape of buffer.
+          mlir::Value shapeOp = builder.genShape(loc, extents);
+
+          // Replace binding with the local result.
+          implicitSpace->rebind(e, genMaskAccess(buff, shapeOp));
+        }
+      }
+    }
+  }
+
  // FIXME: should take multiple inner arguments.
  std::pair<IterationSpace, mlir::OpBuilder::InsertPoint>
  genImplicitLoops(mlir::ValueRange shape, mlir::Value innerArg) {
@ -2688,7 +3005,7 @@ public:
          builder.create<fir::ResultOp>(loc, innerArg);
          builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
        };
-        for (std::remove_const_t<decltype(size)> i = 0; i < size; ++i)
+        for (std::size_t i = 0; i < size; ++i)
          if (const auto *e = maskExprs[i])
            genFalseBlock(e, genCond(e, iters));

@ -3048,7 +3365,11 @@ public:
  template <int KIND>
  CC genarr(const Fortran::evaluate::Negate<Fortran::evaluate::Type<
                Fortran::common::TypeCategory::Real, KIND>> &x) {
-    TODO(getLoc(), "");
+    mlir::Location loc = getLoc();
+    auto f = genarr(x.left());
+    return [=](IterSpace iters) -> ExtValue {
+      return builder.create<mlir::arith::NegFOp>(loc, fir::getBase(f(iters)));
+    };
  }
  template <int KIND>
  CC genarr(const Fortran::evaluate::Negate<Fortran::evaluate::Type<
@ -3629,29 +3950,56 @@ public:
    TODO(getLoc(), "genarr LogicalOperation");
  }

+  //===--------------------------------------------------------------------===//
+  // Relational operators (<, <=, ==, etc.)
+  //===--------------------------------------------------------------------===//
+
+  template <typename OP, typename PRED, typename A>
+  CC createCompareOp(PRED pred, const A &x) {
+    mlir::Location loc = getLoc();
+    auto lf = genarr(x.left());
+    auto rf = genarr(x.right());
+    return [=](IterSpace iters) -> ExtValue {
+      mlir::Value lhs = fir::getBase(lf(iters));
+      mlir::Value rhs = fir::getBase(rf(iters));
+      return builder.create<OP>(loc, pred, lhs, rhs);
+    };
+  }
+  template <typename A>
+  CC createCompareCharOp(mlir::arith::CmpIPredicate pred, const A &x) {
+    mlir::Location loc = getLoc();
+    auto lf = genarr(x.left());
+    auto rf = genarr(x.right());
+    return [=](IterSpace iters) -> ExtValue {
+      auto lhs = lf(iters);
+      auto rhs = rf(iters);
+      return fir::runtime::genCharCompare(builder, loc, pred, lhs, rhs);
+    };
+  }
  template <int KIND>
  CC genarr(const Fortran::evaluate::Relational<Fortran::evaluate::Type<
                Fortran::common::TypeCategory::Integer, KIND>> &x) {
-    TODO(getLoc(), "genarr Relational Integer");
+    return createCompareOp<mlir::arith::CmpIOp>(translateRelational(x.opr), x);
  }
  template <int KIND>
  CC genarr(const Fortran::evaluate::Relational<Fortran::evaluate::Type<
                Fortran::common::TypeCategory::Character, KIND>> &x) {
-    TODO(getLoc(), "genarr Relational Character");
+    return createCompareCharOp(translateRelational(x.opr), x);
  }
  template <int KIND>
  CC genarr(const Fortran::evaluate::Relational<Fortran::evaluate::Type<
                Fortran::common::TypeCategory::Real, KIND>> &x) {
-    TODO(getLoc(), "genarr Relational Real");
+    return createCompareOp<mlir::arith::CmpFOp>(translateFloatRelational(x.opr),
+                                                x);
  }
  template <int KIND>
  CC genarr(const Fortran::evaluate::Relational<Fortran::evaluate::Type<
                Fortran::common::TypeCategory::Complex, KIND>> &x) {
-    TODO(getLoc(), "genarr Relational Complex");
+    return createCompareOp<fir::CmpcOp>(translateFloatRelational(x.opr), x);
  }
  CC genarr(
      const Fortran::evaluate::Relational<Fortran::evaluate::SomeType> &r) {
-    TODO(getLoc(), "genarr Relational SomeType");
+    return std::visit([&](const auto &x) { return genarr(x); }, r.u);
  }

  template <typename A>
@ -4322,14 +4670,6 @@ private:
                        "failed to compute the array expression shape");
  }

-  bool explicitSpaceIsActive() const {
-    return explicitSpace && explicitSpace->isActive();
-  }
-
-  bool implicitSpaceHasMasks() const {
-    return implicitSpace && !implicitSpace->empty();
-  }
-
  explicit ArrayExprLowering(Fortran::lower::AbstractConverter &converter,
                             Fortran::lower::StatementContext &stmtCtx,
                             Fortran::lower::SymMap &symMap)
@ -4355,7 +4695,7 @@ private:
        implicitSpace(impSpace->empty() ? nullptr : impSpace), semant{sem} {
    // Generate any mask expressions, as necessary. This is the compute step
    // that creates the effective masks. See 10.2.3.2 in particular.
-    // genMasks();
+    genMasks();
  }

  mlir::Location getLoc() { return converter.getCurrentLocation(); }
@ -4552,6 +4892,21 @@ void Fortran::lower::createSomeArrayAssignment(
  ArrayExprLowering::lowerArrayAssignment(converter, symMap, stmtCtx, lhs, rhs);
 }

+void Fortran::lower::createAnyMaskedArrayAssignment(
+    Fortran::lower::AbstractConverter &converter,
+    const Fortran::lower::SomeExpr &lhs, const Fortran::lower::SomeExpr &rhs,
+    Fortran::lower::ExplicitIterSpace &explicitSpace,
+    Fortran::lower::ImplicitIterSpace &implicitSpace,
+    Fortran::lower::SymMap &symMap, Fortran::lower::StatementContext &stmtCtx) {
+  LLVM_DEBUG(lhs.AsFortran(llvm::dbgs() << "onto array: ") << '\n';
+             rhs.AsFortran(llvm::dbgs() << "assign expression: ")
+             << " given the explicit iteration space:\n"
+             << explicitSpace << "\n and implied mask conditions:\n"
+             << implicitSpace << '\n';);
+  ArrayExprLowering::lowerAnyMaskedArrayAssignment(
+      converter, symMap, stmtCtx, lhs, rhs, explicitSpace, implicitSpace);
+}
+
 void Fortran::lower::createAllocatableArrayAssignment(
    Fortran::lower::AbstractConverter &converter,
    const Fortran::lower::SomeExpr &lhs, const Fortran::lower::SomeExpr &rhs,
@ -4576,6 +4931,15 @@ fir::ExtendedValue Fortran::lower::createSomeArrayTempValue(
                                                    expr);
 }

+void Fortran::lower::createLazyArrayTempValue(
+    Fortran::lower::AbstractConverter &converter,
+    const Fortran::lower::SomeExpr &expr, mlir::Value raggedHeader,
+    Fortran::lower::SymMap &symMap, Fortran::lower::StatementContext &stmtCtx) {
+  LLVM_DEBUG(expr.AsFortran(llvm::dbgs() << "array value: ") << '\n');
+  ArrayExprLowering::lowerLazyArrayExpression(converter, symMap, stmtCtx, expr,
+                                              raggedHeader);
+}
+
 mlir::Value Fortran::lower::genMaxWithZero(fir::FirOpBuilder &builder,
                                           mlir::Location loc,
                                           mlir::Value value) {
--- a/flang/test/Lower/where.f90
+++ b/flang/test/Lower/where.f90
@ -0,0 +1,239 @@
+  ! RUN: bbc -emit-fir %s -o - | FileCheck %s
+
+  ! CHECK-LABEL: func @_QQmain() {
+  ! CHECK:         %[[VAL_0:.*]] = fir.address_of(@_QFEa) : !fir.ref<!fir.array<10xf32>>
+  ! CHECK:         %[[VAL_1:.*]] = arith.constant 10 : index
+  ! CHECK:         %[[VAL_2:.*]] = fir.address_of(@_QFEb) : !fir.ref<!fir.array<10xf32>>
+  ! CHECK:         %[[VAL_3:.*]] = arith.constant 10 : index
+  ! CHECK:         %[[VAL_5:.*]] = arith.constant 10 : index
+  ! CHECK:         %[[VAL_6:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
+  ! CHECK:         %[[VAL_7:.*]] = fir.array_load %[[VAL_0]](%[[VAL_6]]) : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> !fir.array<10xf32>
+  ! CHECK:         %[[VAL_8:.*]] = arith.constant 4.000000e+00 : f32
+  ! CHECK:         %[[VAL_9:.*]] = fir.allocmem !fir.array<10x!fir.logical<4>>
+  ! CHECK:         %[[VAL_10:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1>
+  ! CHECK:         %[[VAL_11:.*]] = fir.array_load %[[VAL_9]](%[[VAL_10]]) : (!fir.heap<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>) -> !fir.array<10x!fir.logical<4>>
+  ! CHECK:         %[[VAL_12:.*]] = arith.constant 1 : index
+  ! CHECK:         %[[VAL_13:.*]] = arith.constant 0 : index
+  ! CHECK:         %[[VAL_14:.*]] = arith.subi %[[VAL_5]], %[[VAL_12]] : index
+  ! CHECK:         %[[VAL_15:.*]] = fir.do_loop %[[VAL_16:.*]] = %[[VAL_13]] to %[[VAL_14]] step %[[VAL_12]] unordered iter_args(%[[VAL_17:.*]] = %[[VAL_11]]) -> (!fir.array<10x!fir.logical<4>>) {
+  ! CHECK:           %[[VAL_18:.*]] = fir.array_fetch %[[VAL_7]], %[[VAL_16]] : (!fir.array<10xf32>, index) -> f32
+  ! CHECK:           %[[VAL_19:.*]] = arith.cmpf ogt, %[[VAL_18]], %[[VAL_8]] : f32
+  ! CHECK:           %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i1) -> !fir.logical<4>
+  ! CHECK:           %[[VAL_21:.*]] = fir.array_update %[[VAL_17]], %[[VAL_20]], %[[VAL_16]] : (!fir.array<10x!fir.logical<4>>, !fir.logical<4>, index) -> !fir.array<10x!fir.logical<4>>
+  ! CHECK:           fir.result %[[VAL_21]] : !fir.array<10x!fir.logical<4>>
+  ! CHECK:         }
+  ! CHECK:         fir.array_merge_store %[[VAL_11]], %[[VAL_22:.*]] to %[[VAL_9]] : !fir.array<10x!fir.logical<4>>, !fir.array<10x!fir.logical<4>>, !fir.heap<!fir.array<10x!fir.logical<4>>>
+  ! CHECK:         %[[VAL_23:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1>
+  ! CHECK:         %[[VAL_24:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1>
+  ! CHECK:         %[[VAL_25:.*]] = fir.array_load %[[VAL_2]](%[[VAL_24]]) : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> !fir.array<10xf32>
+  ! CHECK:         %[[VAL_26:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
+  ! CHECK:         %[[VAL_27:.*]] = fir.array_load %[[VAL_0]](%[[VAL_26]]) : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> !fir.array<10xf32>
+  ! CHECK:         %[[VAL_28:.*]] = arith.constant 1 : index
+  ! CHECK:         %[[VAL_29:.*]] = arith.constant 0 : index
+  ! CHECK:         %[[VAL_30:.*]] = arith.subi %[[VAL_3]], %[[VAL_28]] : index
+  ! CHECK:         %[[VAL_31:.*]] = fir.do_loop %[[VAL_32:.*]] = %[[VAL_29]] to %[[VAL_30]] step %[[VAL_28]] unordered iter_args(%[[VAL_33:.*]] = %[[VAL_25]]) -> (!fir.array<10xf32>) {
+  ! CHECK:           %[[VAL_34:.*]] = arith.constant 1 : index
+  ! CHECK:           %[[VAL_35:.*]] = arith.addi %[[VAL_32]], %[[VAL_34]] : index
+  ! CHECK:           %[[VAL_36:.*]] = fir.array_coor %[[VAL_9]](%[[VAL_23]]) %[[VAL_35]] : (!fir.heap<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>, index) -> !fir.ref<!fir.logical<4>>
+  ! CHECK:           %[[VAL_37:.*]] = fir.load %[[VAL_36]] : !fir.ref<!fir.logical<4>>
+  ! CHECK:           %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (!fir.logical<4>) -> i1
+  ! CHECK:           %[[VAL_39:.*]] = fir.if %[[VAL_38]] -> (!fir.array<10xf32>) {
+  ! CHECK:             %[[VAL_40:.*]] = fir.array_fetch %[[VAL_27]], %[[VAL_32]] : (!fir.array<10xf32>, index) -> f32
+  ! CHECK:             %[[VAL_41:.*]] = arith.negf %[[VAL_40]] : f32
+  ! CHECK:             %[[VAL_42:.*]] = fir.array_update %[[VAL_33]], %[[VAL_41]], %[[VAL_32]] : (!fir.array<10xf32>, f32, index) -> !fir.array<10xf32>
+  ! CHECK:             fir.result %[[VAL_42]] : !fir.array<10xf32>
+  ! CHECK:           } else {
+  ! CHECK:             fir.result %[[VAL_33]] : !fir.array<10xf32>
+  ! CHECK:           }
+  ! CHECK:           fir.result %[[VAL_43:.*]] : !fir.array<10xf32>
+  ! CHECK:         }
+  ! CHECK:         fir.array_merge_store %[[VAL_25]], %[[VAL_44:.*]] to %[[VAL_2]] : !fir.array<10xf32>, !fir.array<10xf32>, !fir.ref<!fir.array<10xf32>>
+  ! CHECK:         fir.freemem %[[VAL_9]]
+  ! CHECK:         %[[VAL_46:.*]] = arith.constant 10 : index
+  ! CHECK:         %[[VAL_47:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
+  ! CHECK:         %[[VAL_48:.*]] = fir.array_load %[[VAL_0]](%[[VAL_47]]) : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> !fir.array<10xf32>
+  ! CHECK:         %[[VAL_49:.*]] = arith.constant 1.000000e+02 : f32
+  ! CHECK:         %[[VAL_50:.*]] = fir.allocmem !fir.array<10x!fir.logical<4>>
+  ! CHECK:         %[[VAL_51:.*]] = fir.shape %[[VAL_46]] : (index) -> !fir.shape<1>
+  ! CHECK:         %[[VAL_52:.*]] = fir.array_load %[[VAL_50]](%[[VAL_51]]) : (!fir.heap<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>) -> !fir.array<10x!fir.logical<4>>
+  ! CHECK:         %[[VAL_53:.*]] = arith.constant 1 : index
+  ! CHECK:         %[[VAL_54:.*]] = arith.constant 0 : index
+  ! CHECK:         %[[VAL_55:.*]] = arith.subi %[[VAL_46]], %[[VAL_53]] : index
+  ! CHECK:         %[[VAL_56:.*]] = fir.do_loop %[[VAL_57:.*]] = %[[VAL_54]] to %[[VAL_55]] step %[[VAL_53]] unordered iter_args(%[[VAL_58:.*]] = %[[VAL_52]]) -> (!fir.array<10x!fir.logical<4>>) {
+  ! CHECK:           %[[VAL_59:.*]] = fir.array_fetch %[[VAL_48]], %[[VAL_57]] : (!fir.array<10xf32>, index) -> f32
+  ! CHECK:           %[[VAL_60:.*]] = arith.cmpf ogt, %[[VAL_59]], %[[VAL_49]] : f32
+  ! CHECK:           %[[VAL_61:.*]] = fir.convert %[[VAL_60]] : (i1) -> !fir.logical<4>
+  ! CHECK:           %[[VAL_62:.*]] = fir.array_update %[[VAL_58]], %[[VAL_61]], %[[VAL_57]] : (!fir.array<10x!fir.logical<4>>, !fir.logical<4>, index) -> !fir.array<10x!fir.logical<4>>
+  ! CHECK:           fir.result %[[VAL_62]] : !fir.array<10x!fir.logical<4>>
+  ! CHECK:         }
+  ! CHECK:         fir.array_merge_store %[[VAL_52]], %[[VAL_63:.*]] to %[[VAL_50]] : !fir.array<10x!fir.logical<4>>, !fir.array<10x!fir.logical<4>>, !fir.heap<!fir.array<10x!fir.logical<4>>>
+  ! CHECK:         %[[VAL_64:.*]] = fir.shape %[[VAL_46]] : (index) -> !fir.shape<1>
+  ! CHECK:         %[[VAL_65:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1>
+  ! CHECK:         %[[VAL_66:.*]] = fir.array_load %[[VAL_2]](%[[VAL_65]]) : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> !fir.array<10xf32>
+  ! CHECK:         %[[VAL_67:.*]] = arith.constant 2.000000e+00 : f32
+  ! CHECK:         %[[VAL_68:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
+  ! CHECK:         %[[VAL_69:.*]] = fir.array_load %[[VAL_0]](%[[VAL_68]]) : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> !fir.array<10xf32>
+  ! CHECK:         %[[VAL_70:.*]] = arith.constant 1 : index
+  ! CHECK:         %[[VAL_71:.*]] = arith.constant 0 : index
+  ! CHECK:         %[[VAL_72:.*]] = arith.subi %[[VAL_3]], %[[VAL_70]] : index
+  ! CHECK:         %[[VAL_73:.*]] = fir.do_loop %[[VAL_74:.*]] = %[[VAL_71]] to %[[VAL_72]] step %[[VAL_70]] unordered iter_args(%[[VAL_75:.*]] = %[[VAL_66]]) -> (!fir.array<10xf32>) {
+  ! CHECK:           %[[VAL_76:.*]] = arith.constant 1 : index
+  ! CHECK:           %[[VAL_77:.*]] = arith.addi %[[VAL_74]], %[[VAL_76]] : index
+  ! CHECK:           %[[VAL_78:.*]] = fir.array_coor %[[VAL_50]](%[[VAL_64]]) %[[VAL_77]] : (!fir.heap<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>, index) -> !fir.ref<!fir.logical<4>>
+  ! CHECK:           %[[VAL_79:.*]] = fir.load %[[VAL_78]] : !fir.ref<!fir.logical<4>>
+  ! CHECK:           %[[VAL_80:.*]] = fir.convert %[[VAL_79]] : (!fir.logical<4>) -> i1
+  ! CHECK:           %[[VAL_81:.*]] = fir.if %[[VAL_80]] -> (!fir.array<10xf32>) {
+  ! CHECK:             %[[VAL_82:.*]] = fir.array_fetch %[[VAL_69]], %[[VAL_74]] : (!fir.array<10xf32>, index) -> f32
+  ! CHECK:             %[[VAL_83:.*]] = arith.mulf %[[VAL_67]], %[[VAL_82]] : f32
+  ! CHECK:             %[[VAL_84:.*]] = fir.array_update %[[VAL_75]], %[[VAL_83]], %[[VAL_74]] : (!fir.array<10xf32>, f32, index) -> !fir.array<10xf32>
+  ! CHECK:             fir.result %[[VAL_84]] : !fir.array<10xf32>
+  ! CHECK:           } else {
+  ! CHECK:             fir.result %[[VAL_75]] : !fir.array<10xf32>
+  ! CHECK:           }
+  ! CHECK:           fir.result %[[VAL_85:.*]] : !fir.array<10xf32>
+  ! CHECK:         }
+  ! CHECK:         fir.array_merge_store %[[VAL_66]], %[[VAL_86:.*]] to %[[VAL_2]] : !fir.array<10xf32>, !fir.array<10xf32>, !fir.ref<!fir.array<10xf32>>
+  ! CHECK:         %[[VAL_88:.*]] = arith.constant 10 : index
+  ! CHECK:         %[[VAL_89:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
+  ! CHECK:         %[[VAL_90:.*]] = fir.array_load %[[VAL_0]](%[[VAL_89]]) : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> !fir.array<10xf32>
+  ! CHECK:         %[[VAL_91:.*]] = arith.constant 5.000000e+01 : f32
+  ! CHECK:         %[[VAL_92:.*]] = fir.allocmem !fir.array<10x!fir.logical<4>>
+  ! CHECK:         %[[VAL_93:.*]] = fir.shape %[[VAL_88]] : (index) -> !fir.shape<1>
+  ! CHECK:         %[[VAL_94:.*]] = fir.array_load %[[VAL_92]](%[[VAL_93]]) : (!fir.heap<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>) -> !fir.array<10x!fir.logical<4>>
+  ! CHECK:         %[[VAL_95:.*]] = arith.constant 1 : index
+  ! CHECK:         %[[VAL_96:.*]] = arith.constant 0 : index
+  ! CHECK:         %[[VAL_97:.*]] = arith.subi %[[VAL_88]], %[[VAL_95]] : index
+  ! CHECK:         %[[VAL_98:.*]] = fir.do_loop %[[VAL_99:.*]] = %[[VAL_96]] to %[[VAL_97]] step %[[VAL_95]] unordered iter_args(%[[VAL_100:.*]] = %[[VAL_94]]) -> (!fir.array<10x!fir.logical<4>>) {
+  ! CHECK:           %[[VAL_101:.*]] = fir.array_fetch %[[VAL_90]], %[[VAL_99]] : (!fir.array<10xf32>, index) -> f32
+  ! CHECK:           %[[VAL_102:.*]] = arith.cmpf ogt, %[[VAL_101]], %[[VAL_91]] : f32
+  ! CHECK:           %[[VAL_103:.*]] = fir.convert %[[VAL_102]] : (i1) -> !fir.logical<4>
+  ! CHECK:           %[[VAL_104:.*]] = fir.array_update %[[VAL_100]], %[[VAL_103]], %[[VAL_99]] : (!fir.array<10x!fir.logical<4>>, !fir.logical<4>, index) -> !fir.array<10x!fir.logical<4>>
+  ! CHECK:           fir.result %[[VAL_104]] : !fir.array<10x!fir.logical<4>>
+  ! CHECK:         }
+  ! CHECK:         fir.array_merge_store %[[VAL_94]], %[[VAL_105:.*]] to %[[VAL_92]] : !fir.array<10x!fir.logical<4>>, !fir.array<10x!fir.logical<4>>, !fir.heap<!fir.array<10x!fir.logical<4>>>
+  ! CHECK:         %[[VAL_106:.*]] = fir.shape %[[VAL_88]] : (index) -> !fir.shape<1>
+  ! CHECK:         %[[VAL_107:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1>
+  ! CHECK:         %[[VAL_108:.*]] = fir.array_load %[[VAL_2]](%[[VAL_107]]) : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> !fir.array<10xf32>
+  ! CHECK:         %[[VAL_109:.*]] = arith.constant 3.000000e+00 : f32
+  ! CHECK:         %[[VAL_110:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
+  ! CHECK:         %[[VAL_111:.*]] = fir.array_load %[[VAL_0]](%[[VAL_110]]) : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> !fir.array<10xf32>
+  ! CHECK:         %[[VAL_112:.*]] = arith.constant 1 : index
+  ! CHECK:         %[[VAL_113:.*]] = arith.constant 0 : index
+  ! CHECK:         %[[VAL_114:.*]] = arith.subi %[[VAL_3]], %[[VAL_112]] : index
+  ! CHECK:         %[[VAL_115:.*]] = fir.do_loop %[[VAL_116:.*]] = %[[VAL_113]] to %[[VAL_114]] step %[[VAL_112]] unordered iter_args(%[[VAL_117:.*]] = %[[VAL_108]]) -> (!fir.array<10xf32>) {
+  ! CHECK:           %[[VAL_118:.*]] = arith.constant 1 : index
+  ! CHECK:           %[[VAL_119:.*]] = arith.addi %[[VAL_116]], %[[VAL_118]] : index
+  ! CHECK:           %[[VAL_120:.*]] = fir.array_coor %[[VAL_50]](%[[VAL_64]]) %[[VAL_119]] : (!fir.heap<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>, index) -> !fir.ref<!fir.logical<4>>
+  ! CHECK:           %[[VAL_121:.*]] = fir.load %[[VAL_120]] : !fir.ref<!fir.logical<4>>
+  ! CHECK:           %[[VAL_122:.*]] = fir.convert %[[VAL_121]] : (!fir.logical<4>) -> i1
+  ! CHECK:           %[[VAL_123:.*]] = fir.if %[[VAL_122]] -> (!fir.array<10xf32>) {
+  ! CHECK:             fir.result %[[VAL_117]] : !fir.array<10xf32>
+  ! CHECK:           } else {
+  ! CHECK:             %[[VAL_124:.*]] = arith.constant 1 : index
+  ! CHECK:             %[[VAL_125:.*]] = arith.addi %[[VAL_116]], %[[VAL_124]] : index
+  ! CHECK:             %[[VAL_126:.*]] = fir.array_coor %[[VAL_92]](%[[VAL_106]]) %[[VAL_125]] : (!fir.heap<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>, index) -> !fir.ref<!fir.logical<4>>
+  ! CHECK:             %[[VAL_127:.*]] = fir.load %[[VAL_126]] : !fir.ref<!fir.logical<4>>
+  ! CHECK:             %[[VAL_128:.*]] = fir.convert %[[VAL_127]] : (!fir.logical<4>) -> i1
+  ! CHECK:             %[[VAL_129:.*]] = fir.if %[[VAL_128]] -> (!fir.array<10xf32>) {
+  ! CHECK:               %[[VAL_130:.*]] = fir.array_fetch %[[VAL_111]], %[[VAL_116]] : (!fir.array<10xf32>, index) -> f32
+  ! CHECK:               %[[VAL_131:.*]] = arith.addf %[[VAL_109]], %[[VAL_130]] : f32
+  ! CHECK:               %[[VAL_132:.*]] = fir.array_update %[[VAL_117]], %[[VAL_131]], %[[VAL_116]] : (!fir.array<10xf32>, f32, index) -> !fir.array<10xf32>
+  ! CHECK:               fir.result %[[VAL_132]] : !fir.array<10xf32>
+  ! CHECK:             } else {
+  ! CHECK:               fir.result %[[VAL_117]] : !fir.array<10xf32>
+  ! CHECK:             }
+  ! CHECK:             fir.result %[[VAL_133:.*]] : !fir.array<10xf32>
+  ! CHECK:           }
+  ! CHECK:           fir.result %[[VAL_134:.*]] : !fir.array<10xf32>
+  ! CHECK:         }
+  ! CHECK:         fir.array_merge_store %[[VAL_108]], %[[VAL_135:.*]] to %[[VAL_2]] : !fir.array<10xf32>, !fir.array<10xf32>, !fir.ref<!fir.array<10xf32>>
+  ! CHECK:         %[[VAL_136:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
+  ! CHECK:         %[[VAL_137:.*]] = fir.array_load %[[VAL_0]](%[[VAL_136]]) : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> !fir.array<10xf32>
+  ! CHECK:         %[[VAL_138:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
+  ! CHECK:         %[[VAL_139:.*]] = fir.array_load %[[VAL_0]](%[[VAL_138]]) : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> !fir.array<10xf32>
+  ! CHECK:         %[[VAL_140:.*]] = arith.constant 1.000000e+00 : f32
+  ! CHECK:         %[[VAL_141:.*]] = arith.constant 1 : index
+  ! CHECK:         %[[VAL_142:.*]] = arith.constant 0 : index
+  ! CHECK:         %[[VAL_143:.*]] = arith.subi %[[VAL_1]], %[[VAL_141]] : index
+  ! CHECK:         %[[VAL_144:.*]] = fir.do_loop %[[VAL_145:.*]] = %[[VAL_142]] to %[[VAL_143]] step %[[VAL_141]] unordered iter_args(%[[VAL_146:.*]] = %[[VAL_137]]) -> (!fir.array<10xf32>) {
+  ! CHECK:           %[[VAL_147:.*]] = arith.constant 1 : index
+  ! CHECK:           %[[VAL_148:.*]] = arith.addi %[[VAL_145]], %[[VAL_147]] : index
+  ! CHECK:           %[[VAL_149:.*]] = fir.array_coor %[[VAL_50]](%[[VAL_64]]) %[[VAL_148]] : (!fir.heap<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>, index) -> !fir.ref<!fir.logical<4>>
+  ! CHECK:           %[[VAL_150:.*]] = fir.load %[[VAL_149]] : !fir.ref<!fir.logical<4>>
+  ! CHECK:           %[[VAL_151:.*]] = fir.convert %[[VAL_150]] : (!fir.logical<4>) -> i1
+  ! CHECK:           %[[VAL_152:.*]] = fir.if %[[VAL_151]] -> (!fir.array<10xf32>) {
+  ! CHECK:             fir.result %[[VAL_146]] : !fir.array<10xf32>
+  ! CHECK:           } else {
+  ! CHECK:             %[[VAL_153:.*]] = arith.constant 1 : index
+  ! CHECK:             %[[VAL_154:.*]] = arith.addi %[[VAL_145]], %[[VAL_153]] : index
+  ! CHECK:             %[[VAL_155:.*]] = fir.array_coor %[[VAL_92]](%[[VAL_106]]) %[[VAL_154]] : (!fir.heap<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>, index) -> !fir.ref<!fir.logical<4>>
+  ! CHECK:             %[[VAL_156:.*]] = fir.load %[[VAL_155]] : !fir.ref<!fir.logical<4>>
+  ! CHECK:             %[[VAL_157:.*]] = fir.convert %[[VAL_156]] : (!fir.logical<4>) -> i1
+  ! CHECK:             %[[VAL_158:.*]] = fir.if %[[VAL_157]] -> (!fir.array<10xf32>) {
+  ! CHECK:               %[[VAL_159:.*]] = fir.array_fetch %[[VAL_139]], %[[VAL_145]] : (!fir.array<10xf32>, index) -> f32
+  ! CHECK:               %[[VAL_160:.*]] = arith.subf %[[VAL_159]], %[[VAL_140]] : f32
+  ! CHECK:               %[[VAL_161:.*]] = fir.array_update %[[VAL_146]], %[[VAL_160]], %[[VAL_145]] : (!fir.array<10xf32>, f32, index) -> !fir.array<10xf32>
+  ! CHECK:               fir.result %[[VAL_161]] : !fir.array<10xf32>
+  ! CHECK:             } else {
+  ! CHECK:               fir.result %[[VAL_146]] : !fir.array<10xf32>
+  ! CHECK:             }
+  ! CHECK:             fir.result %[[VAL_162:.*]] : !fir.array<10xf32>
+  ! CHECK:           }
+  ! CHECK:           fir.result %[[VAL_163:.*]] : !fir.array<10xf32>
+  ! CHECK:         }
+  ! CHECK:         fir.array_merge_store %[[VAL_137]], %[[VAL_164:.*]] to %[[VAL_0]] : !fir.array<10xf32>, !fir.array<10xf32>, !fir.ref<!fir.array<10xf32>>
+  ! CHECK:         %[[VAL_165:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
+  ! CHECK:         %[[VAL_166:.*]] = fir.array_load %[[VAL_0]](%[[VAL_165]]) : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> !fir.array<10xf32>
+  ! CHECK:         %[[VAL_167:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
+  ! CHECK:         %[[VAL_168:.*]] = fir.array_load %[[VAL_0]](%[[VAL_167]]) : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> !fir.array<10xf32>
+  ! CHECK:         %[[VAL_169:.*]] = arith.constant 2.000000e+00 : f32
+  ! CHECK:         %[[VAL_170:.*]] = arith.constant 1 : index
+  ! CHECK:         %[[VAL_171:.*]] = arith.constant 0 : index
+  ! CHECK:         %[[VAL_172:.*]] = arith.subi %[[VAL_1]], %[[VAL_170]] : index
+  ! CHECK:         %[[VAL_173:.*]] = fir.do_loop %[[VAL_174:.*]] = %[[VAL_171]] to %[[VAL_172]] step %[[VAL_170]] unordered iter_args(%[[VAL_175:.*]] = %[[VAL_166]]) -> (!fir.array<10xf32>) {
+  ! CHECK:           %[[VAL_176:.*]] = arith.constant 1 : index
+  ! CHECK:           %[[VAL_177:.*]] = arith.addi %[[VAL_174]], %[[VAL_176]] : index
+  ! CHECK:           %[[VAL_178:.*]] = fir.array_coor %[[VAL_50]](%[[VAL_64]]) %[[VAL_177]] : (!fir.heap<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>, index) -> !fir.ref<!fir.logical<4>>
+  ! CHECK:           %[[VAL_179:.*]] = fir.load %[[VAL_178]] : !fir.ref<!fir.logical<4>>
+  ! CHECK:           %[[VAL_180:.*]] = fir.convert %[[VAL_179]] : (!fir.logical<4>) -> i1
+  ! CHECK:           %[[VAL_181:.*]] = fir.if %[[VAL_180]] -> (!fir.array<10xf32>) {
+  ! CHECK:             fir.result %[[VAL_175]] : !fir.array<10xf32>
+  ! CHECK:           } else {
+  ! CHECK:             %[[VAL_182:.*]] = arith.constant 1 : index
+  ! CHECK:             %[[VAL_183:.*]] = arith.addi %[[VAL_174]], %[[VAL_182]] : index
+  ! CHECK:             %[[VAL_184:.*]] = fir.array_coor %[[VAL_92]](%[[VAL_106]]) %[[VAL_183]] : (!fir.heap<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>, index) -> !fir.ref<!fir.logical<4>>
+  ! CHECK:             %[[VAL_185:.*]] = fir.load %[[VAL_184]] : !fir.ref<!fir.logical<4>>
+  ! CHECK:             %[[VAL_186:.*]] = fir.convert %[[VAL_185]] : (!fir.logical<4>) -> i1
+  ! CHECK:             %[[VAL_187:.*]] = fir.if %[[VAL_186]] -> (!fir.array<10xf32>) {
+  ! CHECK:               fir.result %[[VAL_175]] : !fir.array<10xf32>
+  ! CHECK:             } else {
+  ! CHECK:               %[[VAL_188:.*]] = fir.array_fetch %[[VAL_168]], %[[VAL_174]] : (!fir.array<10xf32>, index) -> f32
+  ! CHECK:               %[[VAL_189:.*]] = arith.divf %[[VAL_188]], %[[VAL_169]] : f32
+  ! CHECK:               %[[VAL_190:.*]] = fir.array_update %[[VAL_175]], %[[VAL_189]], %[[VAL_174]] : (!fir.array<10xf32>, f32, index) -> !fir.array<10xf32>
+  ! CHECK:               fir.result %[[VAL_190]] : !fir.array<10xf32>
+  ! CHECK:             }
+  ! CHECK:             fir.result %[[VAL_191:.*]] : !fir.array<10xf32>
+  ! CHECK:           }
+  ! CHECK:           fir.result %[[VAL_192:.*]] : !fir.array<10xf32>
+  ! CHECK:         }
+  ! CHECK:         fir.array_merge_store %[[VAL_166]], %[[VAL_193:.*]] to %[[VAL_0]] : !fir.array<10xf32>, !fir.array<10xf32>, !fir.ref<!fir.array<10xf32>>
+  ! CHECK:         fir.freemem %[[VAL_92]]
+  ! CHECK:         fir.freemem %[[VAL_50]]
+  ! CHECK:         return
+  ! CHECK:       }
+
+  real :: a(10), b(10)
+
+  ! Statement
+  where (a > 4.0) b = -a
+
+  ! Construct
+  where (a > 100.0)
+     b = 2.0 * a
+  elsewhere (a > 50.0)
+     b = 3.0 + a
+     a = a - 1.0
+  elsewhere
+     a = a / 2.0
+  end where
+end