[Flang][OpenMP] Add support for real typed reductions in worksharing-loop

Allows addition/multiplication reductions to be used with real types by adding getReductionOperation() to OpenMP.cpp, which can select either integer or floating-point instruction. Reviewed By: kiranchandramohan Differential Revision: https://reviews.llvm.org/D132459
2022-09-01 10:24:32 +00:00 · 2022-09-01 10:24:32 +00:00 · e5caa0f229
parent fa154a9170
commit e5caa0f229
6 changed files with 574 additions and 310 deletions
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@ -855,11 +855,28 @@ static int getOperationIdentity(llvm::StringRef reductionOpName,
 static Value getReductionInitValue(mlir::Location loc, mlir::Type type,
                                   llvm::StringRef reductionOpName,
                                   fir::FirOpBuilder &builder) {
+  assert(type.isIntOrIndexOrFloat() && "only integer and float types are currently supported");
+  if (type.isa<FloatType>())
+    return builder.create<mlir::arith::ConstantOp>(
+        loc, type,
+        builder.getFloatAttr(
+            type, (double)getOperationIdentity(reductionOpName, loc)));
+
  return builder.create<mlir::arith::ConstantOp>(
      loc, type,
      builder.getIntegerAttr(type, getOperationIdentity(reductionOpName, loc)));
 }

+template <typename FloatOp, typename IntegerOp>
+static Value getReductionOperation(fir::FirOpBuilder &builder, mlir::Type type,
+                                   mlir::Location loc, mlir::Value op1,
+                                   mlir::Value op2) {
+  assert(type.isIntOrIndexOrFloat() && "only integer and float types are currently supported");
+  if (type.isIntOrIndex())
+    return builder.create<IntegerOp>(loc, op1, op2);
+  return builder.create<FloatOp>(loc, op1, op2);
+}
+
 /// Creates an OpenMP reduction declaration and inserts it into the provided
 /// symbol table. The declaration has a constant initializer with the neutral
 /// value `initValue`, and the reduction combiner carried over from `reduce`.
@ -891,19 +908,23 @@ static omp::ReductionDeclareOp createReductionDecl(
  mlir::Value op1 = decl.reductionRegion().front().getArgument(0);
  mlir::Value op2 = decl.reductionRegion().front().getArgument(1);

-  Value res;
+  Value reductionOp;
  switch (intrinsicOp) {
  case Fortran::parser::DefinedOperator::IntrinsicOperator::Add:
-    res = builder.create<mlir::arith::AddIOp>(loc, op1, op2);
+    reductionOp =
+        getReductionOperation<mlir::arith::AddFOp, mlir::arith::AddIOp>(
+            builder, type, loc, op1, op2);
    break;
  case Fortran::parser::DefinedOperator::IntrinsicOperator::Multiply:
-    res = builder.create<mlir::arith::MulIOp>(loc, op1, op2);
+    reductionOp =
+        getReductionOperation<mlir::arith::MulFOp, mlir::arith::MulIOp>(
+            builder, type, loc, op1, op2);
    break;
  default:
    TODO(loc, "Reduction of some intrinsic operators is not supported");
  }

-  builder.create<omp::YieldOp>(loc, res);
+  builder.create<omp::YieldOp>(loc, reductionOp);
  return decl;
 }

@ -1106,7 +1127,7 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
              mlir::Type redType =
                  symVal.getType().cast<fir::ReferenceType>().getEleTy();
              reductionVars.push_back(symVal);
-              if (redType.isIntOrIndex()) {
+              if (redType.isIntOrIndexOrFloat()) {
                decl = createReductionDecl(
                    firOpBuilder, getReductionName(intrinsicOp, redType),
                    intrinsicOp, redType, currentLocation);
@ -1785,7 +1806,7 @@ void Fortran::lower::genOpenMPReduction(
              mlir::Value reductionVal = converter.getSymbolAddress(*symbol);
              mlir::Type reductionType =
                  reductionVal.getType().cast<fir::ReferenceType>().getEleTy();
-              if (!reductionType.isIntOrIndex())
+              if (!reductionType.isIntOrIndexOrFloat())
                continue;

              for (mlir::OpOperand &reductionValUse : reductionVal.getUses()) {
--- a/flang/test/Lower/OpenMP/Todo/reduction-real.f90
+++ b/flang/test/Lower/OpenMP/Todo/reduction-real.f90
@ -1,16 +0,0 @@
-! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-
-! CHECK: not yet implemented: Reduction of some types is not supported
-subroutine reduction_real
-  real :: x
-  x = 0.0
-  !$omp parallel
-  !$omp do reduction(+:x)
-  do i=1, 100
-    x = x + 1.0
-  end do
-  !$omp end do
-  !$omp end parallel
-  print *, x
-end subroutine
--- a/flang/test/Lower/OpenMP/wsloop-reduction-add.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-add.f90
@ -0,0 +1,273 @@
+! RUN: bbc -emit-fir -fopenmp %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | FileCheck %s
+
+!CHECK-LABEL: omp.reduction.declare
+!CHECK-SAME: @[[RED_F64_NAME:.*]] : f64 init {
+!CHECK: ^bb0(%{{.*}}: f64):
+!CHECK:  %[[C0_1:.*]] = arith.constant 0.000000e+00 : f64
+!CHECK:  omp.yield(%[[C0_1]] : f64)
+!CHECK: } combiner {
+!CHECK: ^bb0(%[[ARG0:.*]]: f64, %[[ARG1:.*]]: f64):
+!CHECK:  %[[RES:.*]] = arith.addf %[[ARG0]], %[[ARG1]] : f64
+!CHECK:  omp.yield(%[[RES]] : f64)
+!CHECK: }
+
+!CHECK-LABEL: omp.reduction.declare
+!CHECK-SAME: @[[RED_I64_NAME:.*]] : i64 init {
+!CHECK: ^bb0(%{{.*}}: i64):
+!CHECK:  %[[C0_1:.*]] = arith.constant 0 : i64
+!CHECK:  omp.yield(%[[C0_1]] : i64)
+!CHECK: } combiner {
+!CHECK: ^bb0(%[[ARG0:.*]]: i64, %[[ARG1:.*]]: i64):
+!CHECK:  %[[RES:.*]] = arith.addi %[[ARG0]], %[[ARG1]] : i64
+!CHECK:  omp.yield(%[[RES]] : i64)
+!CHECK: }
+
+!CHECK-LABEL: omp.reduction.declare
+!CHECK-SAME: @[[RED_F32_NAME:.*]] : f32 init {
+!CHECK: ^bb0(%{{.*}}: f32):
+!CHECK:  %[[C0_1:.*]] = arith.constant 0.000000e+00 : f32
+!CHECK:  omp.yield(%[[C0_1]] : f32)
+!CHECK: } combiner {
+!CHECK: ^bb0(%[[ARG0:.*]]: f32, %[[ARG1:.*]]: f32):
+!CHECK:  %[[RES:.*]] = arith.addf %[[ARG0]], %[[ARG1]] : f32
+!CHECK:  omp.yield(%[[RES]] : f32)
+!CHECK: }
+
+!CHECK-LABEL: omp.reduction.declare
+!CHECK-SAME: @[[RED_I32_NAME:.*]] : i32 init {
+!CHECK: ^bb0(%{{.*}}: i32):
+!CHECK:  %[[C0_1:.*]] = arith.constant 0 : i32
+!CHECK:  omp.yield(%[[C0_1]] : i32)
+!CHECK: } combiner {
+!CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32):
+!CHECK:  %[[RES:.*]] = arith.addi %[[ARG0]], %[[ARG1]] : i32
+!CHECK:  omp.yield(%[[RES]] : i32)
+!CHECK: }
+
+!CHECK-LABEL: func.func @_QPsimple_int_reduction
+!CHECK:  %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reductionEx"}
+!CHECK:  %[[C0_2:.*]] = arith.constant 0 : i32
+!CHECK:  fir.store %[[C0_2]] to %[[XREF]] : !fir.ref<i32>
+!CHECK:  omp.parallel
+!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+!CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
+!CHECK:    %[[C100:.*]] = arith.constant 100 : i32
+!CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
+!CHECK:    omp.wsloop   reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref<i32>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]])
+!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      omp.reduction %[[I_PVT_VAL]], %[[XREF]] : !fir.ref<i32>
+!CHECK:      omp.yield
+!CHECK:    omp.terminator
+!CHECK:  return
+subroutine simple_int_reduction
+  integer :: x
+  x = 0
+  !$omp parallel
+  !$omp do reduction(+:x)
+  do i=1, 100
+    x = x + i
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func.func @_QPsimple_real_reduction
+!CHECK:  %[[XREF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reductionEx"}
+!CHECK:  %[[C0_2:.*]] = arith.constant 0.000000e+00 : f32
+!CHECK:  fir.store %[[C0_2]] to %[[XREF]] : !fir.ref<f32>
+!CHECK:  omp.parallel
+!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+!CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
+!CHECK:    %[[C100:.*]] = arith.constant 100 : i32
+!CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
+!CHECK:    omp.wsloop   reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref<f32>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]])
+!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL_i32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL_f32:.*]] = fir.convert %[[I_PVT_VAL_i32]] : (i32) -> f32
+!CHECK:      omp.reduction %[[I_PVT_VAL_f32]], %[[XREF]] : !fir.ref<f32>
+!CHECK:      omp.yield
+!CHECK:    omp.terminator
+!CHECK:  return
+subroutine simple_real_reduction
+  real :: x
+  x = 0.0
+  !$omp parallel
+  !$omp do reduction(+:x)
+  do i=1, 100
+    x = x + i
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func.func @_QPsimple_int_reduction_switch_order
+!CHECK:  %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reduction_switch_orderEx"}
+!CHECK:  %[[C0_2:.*]] = arith.constant 0 : i32
+!CHECK:  fir.store %[[C0_2]] to %[[XREF]] : !fir.ref<i32>
+!CHECK:  omp.parallel
+!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+!CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
+!CHECK:    %[[C100:.*]] = arith.constant 100 : i32
+!CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
+!CHECK:    omp.wsloop   reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref<i32>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]])
+!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      omp.reduction %[[I_PVT_VAL]], %[[XREF]] : !fir.ref<i32>
+!CHECK:      omp.yield
+!CHECK:    omp.terminator
+!CHECK:  return
+subroutine simple_int_reduction_switch_order
+  integer :: x
+  x = 0
+  !$omp parallel
+  !$omp do reduction(+:x)
+  do i=1, 100
+    x = i + x
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func.func @_QPsimple_real_reduction_switch_order
+!CHECK:  %[[XREF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reduction_switch_orderEx"}
+!CHECK:  %[[C0_2:.*]] = arith.constant 0.000000e+00 : f32
+!CHECK:  fir.store %[[C0_2]] to %[[XREF]] : !fir.ref<f32>
+!CHECK:  omp.parallel
+!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+!CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
+!CHECK:    %[[C100:.*]] = arith.constant 100 : i32
+!CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
+!CHECK:    omp.wsloop   reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref<f32>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]])
+!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL_i32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL_f32:.*]] = fir.convert %[[I_PVT_VAL_i32]] : (i32) -> f32
+!CHECK:      omp.reduction %[[I_PVT_VAL_f32]], %[[XREF]] : !fir.ref<f32>
+!CHECK:      omp.yield
+!CHECK:    omp.terminator
+!CHECK:  return
+subroutine simple_real_reduction_switch_order
+  real :: x
+  x = 0.0
+  !$omp parallel
+  !$omp do reduction(+:x)
+  do i=1, 100
+    x = i + x
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func.func @_QPmultiple_int_reductions_same_type
+!CHECK:  %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_int_reductions_same_typeEx"}
+!CHECK:  %[[YREF:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFmultiple_int_reductions_same_typeEy"}
+!CHECK:  %[[ZREF:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_int_reductions_same_typeEz"}
+!CHECK:  omp.parallel
+!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+!CHECK:    omp.wsloop   reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref<i32>, @[[RED_I32_NAME]] -> %[[YREF]] : !fir.ref<i32>, @[[RED_I32_NAME]] -> %[[ZREF]] : !fir.ref<i32>) for  (%[[IVAL]]) : i32
+!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      omp.reduction %[[I_PVT_VAL1]], %[[XREF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      omp.reduction %[[I_PVT_VAL2]], %[[YREF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      omp.reduction %[[I_PVT_VAL3]], %[[ZREF]] : !fir.ref<i32>
+!CHECK:      omp.yield
+!CHECK:    omp.terminator
+!CHECK:  return
+subroutine multiple_int_reductions_same_type
+  integer :: x,y,z
+  x = 0
+  y = 0
+  z = 0
+  !$omp parallel
+  !$omp do reduction(+:x,y,z)
+  do i=1, 100
+    x = x + i
+    y = y + i
+    z = z + i
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func.func @_QPmultiple_real_reductions_same_type
+!CHECK:  %[[XREF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFmultiple_real_reductions_same_typeEx"}
+!CHECK:  %[[YREF:.*]] = fir.alloca f32 {bindc_name = "y", uniq_name = "_QFmultiple_real_reductions_same_typeEy"}
+!CHECK:  %[[ZREF:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_real_reductions_same_typeEz"}
+!CHECK:  omp.parallel
+!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+!CHECK:    omp.wsloop   reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref<f32>, @[[RED_F32_NAME]] -> %[[YREF]] : !fir.ref<f32>, @[[RED_F32_NAME]] -> %[[ZREF]] : !fir.ref<f32>) for  (%[[IVAL]]) : i32
+!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL1_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL1_F32:.*]] = fir.convert %[[I_PVT_VAL1_I32]] : (i32) -> f32
+!CHECK:      omp.reduction %[[I_PVT_VAL1_F32]], %[[XREF]] : !fir.ref<f32>
+!CHECK:      %[[I_PVT_VAL2_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL2_F32:.*]] = fir.convert %[[I_PVT_VAL2_I32]] : (i32) -> f32
+!CHECK:      omp.reduction %[[I_PVT_VAL2_F32]], %[[YREF]] : !fir.ref<f32>
+!CHECK:      %[[I_PVT_VAL3_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[I_PVT_VAL3_I32]] : (i32) -> f32
+!CHECK:      omp.reduction %[[I_PVT_VAL3_F32]], %[[ZREF]] : !fir.ref<f32>
+!CHECK:      omp.yield
+!CHECK:    omp.terminator
+!CHECK:  return
+subroutine multiple_real_reductions_same_type
+  real :: x,y,z
+  x = 0.0
+  y = 0.0
+  z = 0.0
+  !$omp parallel
+  !$omp do reduction(+:x,y,z)
+  do i=1, 100
+    x = x + i
+    y = y + i
+    z = z + i
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func.func @_QPmultiple_reductions_different_type
+!CHECK:  %[[WREF:.*]] = fir.alloca f64 {bindc_name = "w", uniq_name = "_QFmultiple_reductions_different_typeEw"}
+!CHECK:  %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_reductions_different_typeEx"}
+!CHECK:  %[[YREF:.*]] = fir.alloca i64 {bindc_name = "y", uniq_name = "_QFmultiple_reductions_different_typeEy"}
+!CHECK:  %[[ZREF:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_reductions_different_typeEz"}
+!CHECK:  omp.parallel
+!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+!CHECK:    omp.wsloop   reduction(@[[RED_I32_NAME]] -> %[[XREF]]  : !fir.ref<i32>, @[[RED_I64_NAME]] -> %[[YREF]] : !fir.ref<i64>, @[[RED_F32_NAME]] -> %[[ZREF]]  : !fir.ref<f32>, @[[RED_F64_NAME]] -> %[[WREF]]  : !fir.ref<f64>) for  (%[[IVAL:.*]]) : i32
+!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL1_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      omp.reduction %[[I_PVT_VAL1_I32]], %[[XREF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL2_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL2_I64:.*]] = fir.convert %[[I_PVT_VAL2_I32]] : (i32) -> i64
+!CHECK:      omp.reduction %[[I_PVT_VAL2_I64]], %[[YREF]] : !fir.ref<i64>
+!CHECK:      %[[I_PVT_VAL3_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[I_PVT_VAL3_I32]] : (i32) -> f32
+!CHECK:      omp.reduction %[[I_PVT_VAL3_F32]], %[[ZREF]] : !fir.ref<f32>
+!CHECK:      %[[I_PVT_VAL4_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL4_F64:.*]] = fir.convert %[[I_PVT_VAL4_I32]] : (i32) -> f64
+!CHECK:      omp.reduction %[[I_PVT_VAL4_F64]], %[[WREF]] : !fir.ref<f64>
+!CHECK:      omp.yield
+!CHECK:    omp.terminator
+!CHECK:  return
+subroutine multiple_reductions_different_type
+  integer :: x
+  integer(kind=8) :: y
+  real :: z
+  real(kind=8) :: w
+  x = 0
+  y = 0
+  z = 0.0
+  w = 0.0
+  !$omp parallel
+  !$omp do reduction(+:x,y,z,w)
+  do i=1, 100
+    x = x + i
+    y = y + i
+    z = z + i
+    w = w + i
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
--- a/flang/test/Lower/OpenMP/wsloop-reduction-int-add.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-int-add.f90
@ -1,144 +0,0 @@
-! RUN: bbc -emit-fir -fopenmp %s -o - | FileCheck %s
-! RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | FileCheck %s
-
-!CHECK-LABEL: omp.reduction.declare
-!CHECK-SAME: @[[RED_I64_NAME:.*]] : i64 init {
-!CHECK: ^bb0(%{{.*}}: i64):
-!CHECK:  %[[C0_1:.*]] = arith.constant 0 : i64
-!CHECK:  omp.yield(%[[C0_1]] : i64)
-!CHECK: } combiner {
-!CHECK: ^bb0(%[[ARG0:.*]]: i64, %[[ARG1:.*]]: i64):
-!CHECK:  %[[RES:.*]] = arith.addi %[[ARG0]], %[[ARG1]] : i64
-!CHECK:  omp.yield(%[[RES]] : i64)
-!CHECK: }
-
-!CHECK-LABEL: omp.reduction.declare
-!CHECK-SAME: @[[RED_I32_NAME:.*]] : i32 init {
-!CHECK: ^bb0(%{{.*}}: i32):
-!CHECK:  %[[C0_1:.*]] = arith.constant 0 : i32
-!CHECK:  omp.yield(%[[C0_1]] : i32)
-!CHECK: } combiner {
-!CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32):
-!CHECK:  %[[RES:.*]] = arith.addi %[[ARG0]], %[[ARG1]] : i32
-!CHECK:  omp.yield(%[[RES]] : i32)
-!CHECK: }
-
-!CHECK-LABEL: func.func @_QPsimple_reduction
-!CHECK:  %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"}
-!CHECK:  %[[C0_2:.*]] = arith.constant 0 : i32
-!CHECK:  fir.store %[[C0_2]] to %[[XREF]] : !fir.ref<i32>
-!CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
-!CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
-!CHECK:    %[[C100:.*]] = arith.constant 100 : i32
-!CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
-!CHECK:    omp.wsloop   reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref<i32>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]])
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      omp.reduction %[[I_PVT_VAL]], %[[XREF]] : !fir.ref<i32>
-!CHECK:      omp.yield
-!CHECK:    omp.terminator
-!CHECK:  return
-
-subroutine simple_reduction
-  integer :: x
-  x = 0
-  !$omp parallel
-  !$omp do reduction(+:x)
-  do i=1, 100
-    x = x + i
-  end do
-  !$omp end do
-  !$omp end parallel
-end subroutine
-
-!CHECK-LABEL: func.func @_QPsimple_reduction_switch_order
-!CHECK:  %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"}
-!CHECK:  %[[C0_2:.*]] = arith.constant 0 : i32
-!CHECK:  fir.store %[[C0_2]] to %[[XREF]] : !fir.ref<i32>
-!CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
-!CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
-!CHECK:    %[[C100:.*]] = arith.constant 100 : i32
-!CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
-!CHECK:    omp.wsloop   reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref<i32>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]])
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      omp.reduction %[[I_PVT_VAL]], %[[XREF]] : !fir.ref<i32>
-!CHECK:      omp.yield
-!CHECK:    omp.terminator
-!CHECK:  return
-
-subroutine simple_reduction_switch_order
-  integer :: x
-  x = 0
-  !$omp parallel
-  !$omp do reduction(+:x)
-  do i=1, 100
-    x = i + x
-  end do
-  !$omp end do
-  !$omp end parallel
-end subroutine
-
-!CHECK-LABEL: func.func @_QPmultiple_reductions_same_type
-!CHECK:  %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_reductions_same_typeEx"}
-!CHECK:  %[[YREF:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFmultiple_reductions_same_typeEy"}
-!CHECK:  %[[ZREF:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_reductions_same_typeEz"}
-!CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
-!CHECK:    omp.wsloop   reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref<i32>, @[[RED_I32_NAME]] -> %[[YREF]] : !fir.ref<i32>, @[[RED_I32_NAME]] -> %[[ZREF]] : !fir.ref<i32>) for  (%[[IVAL]]) : i32
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      omp.reduction %[[I_PVT_VAL1]], %[[XREF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      omp.reduction %[[I_PVT_VAL2]], %[[YREF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      omp.reduction %[[I_PVT_VAL3]], %[[ZREF]] : !fir.ref<i32>
-!CHECK:      omp.yield
-!CHECK:    omp.terminator
-!CHECK:  return
-
-subroutine multiple_reductions_same_type
-  integer :: x,y,z
-  x = 0
-  y = 0
-  z = 0
-  !$omp parallel
-  !$omp do reduction(+:x,y,z)
-  do i=1, 100
-    x = x + i
-    y = y + i
-    z = z + i
-  end do
-  !$omp end do
-  !$omp end parallel
-end subroutine
-
-!CHECK-LABEL: func.func @_QPmultiple_reductions_different_type
-!CHECK:  %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_reductions_different_typeEx"}
-!CHECK:  %[[YREF:.*]] = fir.alloca i64 {bindc_name = "y", uniq_name = "_QFmultiple_reductions_different_typeEy"}
-!CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
-!CHECK:    omp.wsloop   reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref<i32>, @[[RED_I64_NAME]] -> %[[YREF]] : !fir.ref<i64>) for  (%[[IVAL:.*]]) : i32
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[C1_32:.*]] = arith.constant 1 : i32
-!CHECK:      omp.reduction %[[C1_32]], %[[XREF]] : !fir.ref<i32>
-!CHECK:      %[[C1_64:.*]] = arith.constant 1 : i64
-!CHECK:      omp.reduction %[[C1_64]], %[[YREF]] : !fir.ref<i64>
-!CHECK:      omp.yield
-!CHECK:    omp.terminator
-!CHECK:  return
-
-subroutine multiple_reductions_different_type
-  integer :: x
-  integer(kind=8) :: y
-  !$omp parallel
-  !$omp do reduction(+:x,y)
-  do i=1, 100
-    x = x + 1_4
-    y = y + 1_8
-  end do
-  !$omp end do
-  !$omp end parallel
-end subroutine
--- a/flang/test/Lower/OpenMP/wsloop-reduction-int-mul.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-int-mul.f90
@ -1,144 +0,0 @@
-! RUN: bbc -emit-fir -fopenmp %s -o - | FileCheck %s
-! RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | FileCheck %s
-
-!CHECK-LABEL: omp.reduction.declare
-!CHECK-SAME: @[[RED_I64_NAME:.*]] : i64 init {
-!CHECK: ^bb0(%{{.*}}: i64):
-!CHECK:  %[[C1_1:.*]] = arith.constant 1 : i64
-!CHECK:  omp.yield(%[[C1_1]] : i64)
-!CHECK: } combiner {
-!CHECK: ^bb0(%[[ARG0:.*]]: i64, %[[ARG1:.*]]: i64):
-!CHECK:  %[[RES:.*]] = arith.muli %[[ARG0]], %[[ARG1]] : i64
-!CHECK:  omp.yield(%[[RES]] : i64)
-!CHECK: }
-
-!CHECK-LABEL: omp.reduction.declare
-!CHECK-SAME: @[[RED_I32_NAME:.*]] : i32 init {
-!CHECK: ^bb0(%{{.*}}: i32):
-!CHECK:  %[[C1_1:.*]] = arith.constant 1 : i32
-!CHECK:  omp.yield(%[[C1_1]] : i32)
-!CHECK: } combiner {
-!CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32):
-!CHECK:  %[[RES:.*]] = arith.muli %[[ARG0]], %[[ARG1]] : i32
-!CHECK:  omp.yield(%[[RES]] : i32)
-!CHECK: }
-
-!CHECK-LABEL: func.func @_QPsimple_reduction
-!CHECK:  %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"}
-!CHECK:  %[[C1_2:.*]] = arith.constant 1 : i32
-!CHECK:  fir.store %[[C1_2]] to %[[XREF]] : !fir.ref<i32>
-!CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
-!CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
-!CHECK:    %[[C10:.*]] = arith.constant 10 : i32
-!CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
-!CHECK:    omp.wsloop   reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref<i32>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C10]]) inclusive step (%[[C1_2]])
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      omp.reduction %[[I_PVT_VAL]], %[[XREF]] : !fir.ref<i32>
-!CHECK:      omp.yield
-!CHECK:    omp.terminator
-!CHECK:  return
-
-subroutine simple_reduction
-  integer :: x
-  x = 1
-  !$omp parallel
-  !$omp do reduction(*:x)
-  do i=1, 10
-    x = x * i
-  end do
-  !$omp end do
-  !$omp end parallel
-end subroutine
-
-!CHECK-LABEL: func.func @_QPsimple_reduction_switch_order
-!CHECK:  %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"}
-!CHECK:  %[[C1_2:.*]] = arith.constant 1 : i32
-!CHECK:  fir.store %[[C1_2]] to %[[XREF]] : !fir.ref<i32>
-!CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
-!CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
-!CHECK:    %[[C10:.*]] = arith.constant 10 : i32
-!CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
-!CHECK:    omp.wsloop   reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref<i32>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C10]]) inclusive step (%[[C1_2]])
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      omp.reduction %[[I_PVT_VAL]], %[[XREF]] : !fir.ref<i32>
-!CHECK:      omp.yield
-!CHECK:    omp.terminator
-!CHECK:  return
-
-subroutine simple_reduction_switch_order
-  integer :: x
-  x = 1
-  !$omp parallel
-  !$omp do reduction(*:x)
-  do i=1, 10
-  x = i * x
-  end do
-  !$omp end do
-  !$omp end parallel
-end subroutine
-
-!CHECK-LABEL: func.func @_QPmultiple_reductions_same_type
-!CHECK:  %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_reductions_same_typeEx"}
-!CHECK:  %[[YREF:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFmultiple_reductions_same_typeEy"}
-!CHECK:  %[[ZREF:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_reductions_same_typeEz"}
-!CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
-!CHECK:    omp.wsloop   reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref<i32>, @[[RED_I32_NAME]] -> %[[YREF]] : !fir.ref<i32>, @[[RED_I32_NAME]] -> %[[ZREF]] : !fir.ref<i32>) for  (%[[IVAL]]) : i32
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      omp.reduction %[[I_PVT_VAL1]], %[[XREF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      omp.reduction %[[I_PVT_VAL2]], %[[YREF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      omp.reduction %[[I_PVT_VAL3]], %[[ZREF]] : !fir.ref<i32>
-!CHECK:      omp.yield
-!CHECK:    omp.terminator
-!CHECK:  return
-
-subroutine multiple_reductions_same_type
-  integer :: x,y,z
-  x = 1
-  y = 1
-  z = 1
-  !$omp parallel
-  !$omp do reduction(*:x,y,z)
-  do i=1, 10
-  x = x * i
-  y = y * i
-  z = z * i
-  end do
-  !$omp end do
-  !$omp end parallel
-end subroutine
-
-!CHECK-LABEL: func.func @_QPmultiple_reductions_different_type
-!CHECK:  %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_reductions_different_typeEx"}
-!CHECK:  %[[YREF:.*]] = fir.alloca i64 {bindc_name = "y", uniq_name = "_QFmultiple_reductions_different_typeEy"}
-!CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
-!CHECK:    omp.wsloop   reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref<i32>, @[[RED_I64_NAME]] -> %[[YREF]] : !fir.ref<i64>) for  (%[[IVAL:.*]]) : i32
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[C2_32:.*]] = arith.constant 2 : i32
-!CHECK:      omp.reduction %[[C2_32]], %[[XREF]] : !fir.ref<i32>
-!CHECK:      %[[C2_64:.*]] = arith.constant 2 : i64
-!CHECK:      omp.reduction %[[C2_64]], %[[YREF]] : !fir.ref<i64>
-!CHECK:      omp.yield
-!CHECK:    omp.terminator
-!CHECK:  return
-
-subroutine multiple_reductions_different_type
-  integer :: x
-  integer(kind=8) :: y
-  !$omp parallel
-  !$omp do reduction(*:x,y)
-  do i=1, 10
-    x = x * 2_4
-    y = y * 2_8
-  end do
-  !$omp end do
-  !$omp end parallel
-end subroutine
--- a/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90
@ -0,0 +1,274 @@
+! RUN: bbc -emit-fir -fopenmp %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | FileCheck %s
+
+!CHECK-LABEL: omp.reduction.declare
+!CHECK-SAME: @[[RED_F64_NAME:.*]] : f64 init {
+!CHECK: ^bb0(%{{.*}}: f64):
+!CHECK:  %[[C0_1:.*]] = arith.constant 1.000000e+00 : f64
+!CHECK:  omp.yield(%[[C0_1]] : f64)
+!CHECK: } combiner {
+!CHECK: ^bb0(%[[ARG0:.*]]: f64, %[[ARG1:.*]]: f64):
+!CHECK:  %[[RES:.*]] = arith.mulf %[[ARG0]], %[[ARG1]] : f64
+!CHECK:  omp.yield(%[[RES]] : f64)
+!CHECK: }
+
+!CHECK-LABEL: omp.reduction.declare
+!CHECK-SAME: @[[RED_I64_NAME:.*]] : i64 init {
+!CHECK: ^bb0(%{{.*}}: i64):
+!CHECK:  %[[C1_1:.*]] = arith.constant 1 : i64
+!CHECK:  omp.yield(%[[C1_1]] : i64)
+!CHECK: } combiner {
+!CHECK: ^bb0(%[[ARG0:.*]]: i64, %[[ARG1:.*]]: i64):
+!CHECK:  %[[RES:.*]] = arith.muli %[[ARG0]], %[[ARG1]] : i64
+!CHECK:  omp.yield(%[[RES]] : i64)
+!CHECK: }
+
+!CHECK-LABEL: omp.reduction.declare
+!CHECK-SAME: @[[RED_F32_NAME:.*]] : f32 init {
+!CHECK: ^bb0(%{{.*}}: f32):
+!CHECK:  %[[C0_1:.*]] = arith.constant 1.000000e+00 : f32
+!CHECK:  omp.yield(%[[C0_1]] : f32)
+!CHECK: } combiner {
+!CHECK: ^bb0(%[[ARG0:.*]]: f32, %[[ARG1:.*]]: f32):
+!CHECK:  %[[RES:.*]] = arith.mulf %[[ARG0]], %[[ARG1]] : f32
+!CHECK:  omp.yield(%[[RES]] : f32)
+!CHECK: }
+
+!CHECK-LABEL: omp.reduction.declare
+!CHECK-SAME: @[[RED_I32_NAME:.*]] : i32 init {
+!CHECK: ^bb0(%{{.*}}: i32):
+!CHECK:  %[[C1_1:.*]] = arith.constant 1 : i32
+!CHECK:  omp.yield(%[[C1_1]] : i32)
+!CHECK: } combiner {
+!CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32):
+!CHECK:  %[[RES:.*]] = arith.muli %[[ARG0]], %[[ARG1]] : i32
+!CHECK:  omp.yield(%[[RES]] : i32)
+!CHECK: }
+
+!CHECK-LABEL: func.func @_QPsimple_int_reduction
+!CHECK:  %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reductionEx"}
+!CHECK:  %[[C1_2:.*]] = arith.constant 1 : i32
+!CHECK:  fir.store %[[C1_2]] to %[[XREF]] : !fir.ref<i32>
+!CHECK:  omp.parallel
+!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+!CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
+!CHECK:    %[[C10:.*]] = arith.constant 10 : i32
+!CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
+!CHECK:    omp.wsloop   reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref<i32>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C10]]) inclusive step (%[[C1_2]])
+!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      omp.reduction %[[I_PVT_VAL]], %[[XREF]] : !fir.ref<i32>
+!CHECK:      omp.yield
+!CHECK:    omp.terminator
+!CHECK:  return
+
+subroutine simple_int_reduction
+  integer :: x
+  x = 1
+  !$omp parallel
+  !$omp do reduction(*:x)
+  do i=1, 10
+    x = x * i
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func.func @_QPsimple_real_reduction
+!CHECK:  %[[XREF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reductionEx"}
+!CHECK:  %[[C0_2:.*]] = arith.constant 1.000000e+00 : f32
+!CHECK:  fir.store %[[C0_2]] to %[[XREF]] : !fir.ref<f32>
+!CHECK:  omp.parallel
+!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+!CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
+!CHECK:    %[[C100:.*]] = arith.constant 10 : i32
+!CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
+!CHECK:    omp.wsloop   reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref<f32>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]])
+!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL_i32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL_f32:.*]] = fir.convert %[[I_PVT_VAL_i32]] : (i32) -> f32
+!CHECK:      omp.reduction %[[I_PVT_VAL_f32]], %[[XREF]] : !fir.ref<f32>
+!CHECK:      omp.yield
+!CHECK:    omp.terminator
+!CHECK:  return
+subroutine simple_real_reduction
+  real :: x
+  x = 1.0
+  !$omp parallel
+  !$omp do reduction(*:x)
+  do i=1, 10
+    x = x * i
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func.func @_QPsimple_int_reduction_switch_order
+!CHECK:  %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reduction_switch_orderEx"}
+!CHECK:  %[[C1_2:.*]] = arith.constant 1 : i32
+!CHECK:  fir.store %[[C1_2]] to %[[XREF]] : !fir.ref<i32>
+!CHECK:  omp.parallel
+!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+!CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
+!CHECK:    %[[C10:.*]] = arith.constant 10 : i32
+!CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
+!CHECK:    omp.wsloop   reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref<i32>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C10]]) inclusive step (%[[C1_2]])
+!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      omp.reduction %[[I_PVT_VAL]], %[[XREF]] : !fir.ref<i32>
+!CHECK:      omp.yield
+!CHECK:    omp.terminator
+!CHECK:  return
+subroutine simple_int_reduction_switch_order
+  integer :: x
+  x = 1
+  !$omp parallel
+  !$omp do reduction(*:x)
+  do i=1, 10
+  x = i * x
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func.func @_QPsimple_real_reduction_switch_order
+!CHECK:  %[[XREF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reduction_switch_orderEx"}
+!CHECK:  %[[C0_2:.*]] = arith.constant 1.000000e+00 : f32
+!CHECK:  fir.store %[[C0_2]] to %[[XREF]] : !fir.ref<f32>
+!CHECK:  omp.parallel
+!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+!CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
+!CHECK:    %[[C100:.*]] = arith.constant 10 : i32
+!CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
+!CHECK:    omp.wsloop   reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref<f32>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]])
+!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL_i32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL_f32:.*]] = fir.convert %[[I_PVT_VAL_i32]] : (i32) -> f32
+!CHECK:      omp.reduction %[[I_PVT_VAL_f32]], %[[XREF]] : !fir.ref<f32>
+!CHECK:      omp.yield
+!CHECK:    omp.terminator
+!CHECK:  return
+subroutine simple_real_reduction_switch_order
+  real :: x
+  x = 1.0
+  !$omp parallel
+  !$omp do reduction(*:x)
+  do i=1, 10
+  x = i * x
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func.func @_QPmultiple_int_reductions_same_type
+!CHECK:  %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_int_reductions_same_typeEx"}
+!CHECK:  %[[YREF:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFmultiple_int_reductions_same_typeEy"}
+!CHECK:  %[[ZREF:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_int_reductions_same_typeEz"}
+!CHECK:  omp.parallel
+!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+!CHECK:    omp.wsloop   reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref<i32>, @[[RED_I32_NAME]] -> %[[YREF]] : !fir.ref<i32>, @[[RED_I32_NAME]] -> %[[ZREF]] : !fir.ref<i32>) for  (%[[IVAL]]) : i32
+!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      omp.reduction %[[I_PVT_VAL1]], %[[XREF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      omp.reduction %[[I_PVT_VAL2]], %[[YREF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      omp.reduction %[[I_PVT_VAL3]], %[[ZREF]] : !fir.ref<i32>
+!CHECK:      omp.yield
+!CHECK:    omp.terminator
+!CHECK:  return
+subroutine multiple_int_reductions_same_type
+  integer :: x,y,z
+  x = 1
+  y = 1
+  z = 1
+  !$omp parallel
+  !$omp do reduction(*:x,y,z)
+  do i=1, 10
+  x = x * i
+  y = y * i
+  z = z * i
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func.func @_QPmultiple_real_reductions_same_type
+!CHECK:  %[[XREF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFmultiple_real_reductions_same_typeEx"}
+!CHECK:  %[[YREF:.*]] = fir.alloca f32 {bindc_name = "y", uniq_name = "_QFmultiple_real_reductions_same_typeEy"}
+!CHECK:  %[[ZREF:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_real_reductions_same_typeEz"}
+!CHECK:  omp.parallel
+!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+!CHECK:    omp.wsloop   reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref<f32>, @[[RED_F32_NAME]] -> %[[YREF]] : !fir.ref<f32>, @[[RED_F32_NAME]] -> %[[ZREF]] : !fir.ref<f32>) for  (%[[IVAL]]) : i32
+!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL1_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL1_F32:.*]] = fir.convert %[[I_PVT_VAL1_I32]] : (i32) -> f32
+!CHECK:      omp.reduction %[[I_PVT_VAL1_F32]], %[[XREF]] : !fir.ref<f32>
+!CHECK:      %[[I_PVT_VAL2_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL2_F32:.*]] = fir.convert %[[I_PVT_VAL2_I32]] : (i32) -> f32
+!CHECK:      omp.reduction %[[I_PVT_VAL2_F32]], %[[YREF]] : !fir.ref<f32>
+!CHECK:      %[[I_PVT_VAL3_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[I_PVT_VAL3_I32]] : (i32) -> f32
+!CHECK:      omp.reduction %[[I_PVT_VAL3_F32]], %[[ZREF]] : !fir.ref<f32>
+!CHECK:      omp.yield
+!CHECK:    omp.terminator
+!CHECK:  return
+subroutine multiple_real_reductions_same_type
+  real :: x,y,z
+  x = 1
+  y = 1
+  z = 1
+  !$omp parallel
+  !$omp do reduction(*:x,y,z)
+  do i=1, 10
+    x = x * i
+    y = y * i
+    z = z * i
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func.func @_QPmultiple_reductions_different_type
+!CHECK:  %[[WREF:.*]] = fir.alloca f64 {bindc_name = "w", uniq_name = "_QFmultiple_reductions_different_typeEw"}
+!CHECK:  %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_reductions_different_typeEx"}
+!CHECK:  %[[YREF:.*]] = fir.alloca i64 {bindc_name = "y", uniq_name = "_QFmultiple_reductions_different_typeEy"}
+!CHECK:  %[[ZREF:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_reductions_different_typeEz"}
+!CHECK:  omp.parallel
+!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+!CHECK:    omp.wsloop   reduction(@[[RED_I32_NAME]] -> %2 : !fir.ref<i32>, @[[RED_I64_NAME]] -> %3 : !fir.ref<i64>, @[[RED_F32_NAME]] -> %4 : !fir.ref<f32>, @[[RED_F64_NAME]] -> %1 : !fir.ref<f64>) for  (%[[IVAL:.*]]) : i32
+!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL1_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      omp.reduction %[[I_PVT_VAL1_I32]], %[[XREF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL2_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL2_I64:.*]] = fir.convert %[[I_PVT_VAL2_I32]] : (i32) -> i64
+!CHECK:      omp.reduction %[[I_PVT_VAL2_I64]], %[[YREF]] : !fir.ref<i64>
+!CHECK:      %[[I_PVT_VAL3_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[I_PVT_VAL3_I32]] : (i32) -> f32
+!CHECK:      omp.reduction %[[I_PVT_VAL3_F32]], %[[ZREF]] : !fir.ref<f32>
+!CHECK:      %[[I_PVT_VAL4_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL4_F64:.*]] = fir.convert %[[I_PVT_VAL4_I32]] : (i32) -> f64
+!CHECK:      omp.reduction %[[I_PVT_VAL4_F64]], %[[WREF]] : !fir.ref<f64>
+!CHECK:      omp.yield
+!CHECK:    omp.terminator
+!CHECK:  return
+subroutine multiple_reductions_different_type
+  integer :: x
+  integer(kind=8) :: y
+  real :: z
+  real(kind=8) :: w
+  x = 1
+  y = 1
+  z = 1
+  w = 1
+  !$omp parallel
+  !$omp do reduction(*:x,y,z,w)
+  do i=1, 10
+    x = x * i
+    y = y * i
+    z = z * i
+    w = w * i
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine