forked from OSchip/llvm-project
[Flang][OpenMP] Add support for integer multiplication reduction in worksharing-loop
Adds support for reduction of multiplcation by extending OpenMP.cpp::genOpenMPReduction() and altering the identity constant emitted in OpenMP.cpp::createReductionDelc() This patch builds D130077 and as such, only supports reductions for interger types in worksharping loops. Reviewed By: awarzynski Differential Revision: https://reviews.llvm.org/D130767
This commit is contained in:
parent
46c49e66d8
commit
9893b26dfa
|
@ -783,28 +783,50 @@ genOMP(Fortran::lower::AbstractConverter &converter,
|
|||
}
|
||||
}
|
||||
|
||||
/// This function returns the identity value of the operator \p reductionOpName.
|
||||
/// For example:
|
||||
/// 0 + x = x,
|
||||
/// 1 * x = x
|
||||
static int getOperationIdentity(llvm::StringRef reductionOpName,
|
||||
mlir::Location loc) {
|
||||
if (reductionOpName.contains("add"))
|
||||
return 0;
|
||||
else if (reductionOpName.contains("multiply"))
|
||||
return 1;
|
||||
TODO(loc, "Reduction of some intrinsic operators is not supported");
|
||||
}
|
||||
|
||||
static Value getReductionInitValue(mlir::Location loc, mlir::Type type,
|
||||
llvm::StringRef reductionOpName,
|
||||
fir::FirOpBuilder &builder) {
|
||||
return builder.create<mlir::arith::ConstantOp>(
|
||||
loc, type,
|
||||
builder.getIntegerAttr(type, getOperationIdentity(reductionOpName, loc)));
|
||||
}
|
||||
|
||||
/// Creates an OpenMP reduction declaration and inserts it into the provided
|
||||
/// symbol table. The declaration has a constant initializer with the neutral
|
||||
/// value `initValue`, and the reduction combiner carried over from `reduce`.
|
||||
/// TODO: Generalize this for non-integer types, add atomic region.
|
||||
static omp::ReductionDeclareOp createReductionDecl(fir::FirOpBuilder &builder,
|
||||
llvm::StringRef name,
|
||||
mlir::Type type,
|
||||
mlir::Location loc) {
|
||||
static omp::ReductionDeclareOp createReductionDecl(
|
||||
fir::FirOpBuilder &builder, llvm::StringRef reductionOpName,
|
||||
Fortran::parser::DefinedOperator::IntrinsicOperator intrinsicOp,
|
||||
mlir::Type type, mlir::Location loc) {
|
||||
OpBuilder::InsertionGuard guard(builder);
|
||||
mlir::ModuleOp module = builder.getModule();
|
||||
mlir::OpBuilder modBuilder(module.getBodyRegion());
|
||||
auto decl = module.lookupSymbol<mlir::omp::ReductionDeclareOp>(name);
|
||||
auto decl =
|
||||
module.lookupSymbol<mlir::omp::ReductionDeclareOp>(reductionOpName);
|
||||
if (!decl)
|
||||
decl = modBuilder.create<omp::ReductionDeclareOp>(loc, name, type);
|
||||
decl =
|
||||
modBuilder.create<omp::ReductionDeclareOp>(loc, reductionOpName, type);
|
||||
else
|
||||
return decl;
|
||||
|
||||
builder.createBlock(&decl.initializerRegion(), decl.initializerRegion().end(),
|
||||
{type}, {loc});
|
||||
builder.setInsertionPointToEnd(&decl.initializerRegion().back());
|
||||
Value init = builder.create<mlir::arith::ConstantOp>(
|
||||
loc, type, builder.getIntegerAttr(type, 0));
|
||||
Value init = getReductionInitValue(loc, type, reductionOpName, builder);
|
||||
builder.create<omp::YieldOp>(loc, init);
|
||||
|
||||
builder.createBlock(&decl.reductionRegion(), decl.reductionRegion().end(),
|
||||
|
@ -812,8 +834,20 @@ static omp::ReductionDeclareOp createReductionDecl(fir::FirOpBuilder &builder,
|
|||
builder.setInsertionPointToEnd(&decl.reductionRegion().back());
|
||||
mlir::Value op1 = decl.reductionRegion().front().getArgument(0);
|
||||
mlir::Value op2 = decl.reductionRegion().front().getArgument(1);
|
||||
Value addRes = builder.create<mlir::arith::AddIOp>(loc, op1, op2);
|
||||
builder.create<omp::YieldOp>(loc, addRes);
|
||||
|
||||
Value res;
|
||||
switch (intrinsicOp) {
|
||||
case Fortran::parser::DefinedOperator::IntrinsicOperator::Add:
|
||||
res = builder.create<mlir::arith::AddIOp>(loc, op1, op2);
|
||||
break;
|
||||
case Fortran::parser::DefinedOperator::IntrinsicOperator::Multiply:
|
||||
res = builder.create<mlir::arith::MulIOp>(loc, op1, op2);
|
||||
break;
|
||||
default:
|
||||
TODO(loc, "Reduction of some intrinsic operators is not supported");
|
||||
}
|
||||
|
||||
builder.create<omp::YieldOp>(loc, res);
|
||||
return decl;
|
||||
}
|
||||
|
||||
|
@ -885,10 +919,18 @@ static std::string getReductionName(
|
|||
Fortran::parser::DefinedOperator::IntrinsicOperator intrinsicOp,
|
||||
mlir::Type ty) {
|
||||
std::string reductionName;
|
||||
if (intrinsicOp == Fortran::parser::DefinedOperator::IntrinsicOperator::Add)
|
||||
|
||||
switch (intrinsicOp) {
|
||||
case Fortran::parser::DefinedOperator::IntrinsicOperator::Add:
|
||||
reductionName = "add_reduction";
|
||||
else
|
||||
break;
|
||||
case Fortran::parser::DefinedOperator::IntrinsicOperator::Multiply:
|
||||
reductionName = "multiply_reduction";
|
||||
break;
|
||||
default:
|
||||
reductionName = "other_reduction";
|
||||
break;
|
||||
}
|
||||
|
||||
return (llvm::Twine(reductionName) +
|
||||
(ty.isIntOrIndex() ? llvm::Twine("_i_") : llvm::Twine("_f_")) +
|
||||
|
@ -990,10 +1032,16 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
|
|||
const auto &intrinsicOp{
|
||||
std::get<Fortran::parser::DefinedOperator::IntrinsicOperator>(
|
||||
redDefinedOp->u)};
|
||||
if (intrinsicOp !=
|
||||
Fortran::parser::DefinedOperator::IntrinsicOperator::Add)
|
||||
switch (intrinsicOp) {
|
||||
case Fortran::parser::DefinedOperator::IntrinsicOperator::Add:
|
||||
case Fortran::parser::DefinedOperator::IntrinsicOperator::Multiply:
|
||||
break;
|
||||
|
||||
default:
|
||||
TODO(currentLocation,
|
||||
"Reduction of some intrinsic operators is not supported");
|
||||
break;
|
||||
}
|
||||
for (const auto &ompObject : objectList.v) {
|
||||
if (const auto *name{
|
||||
Fortran::parser::Unwrap<Fortran::parser::Name>(ompObject)}) {
|
||||
|
@ -1005,7 +1053,7 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
|
|||
if (redType.isIntOrIndex()) {
|
||||
decl = createReductionDecl(
|
||||
firOpBuilder, getReductionName(intrinsicOp, redType),
|
||||
redType, currentLocation);
|
||||
intrinsicOp, redType, currentLocation);
|
||||
} else {
|
||||
TODO(currentLocation,
|
||||
"Reduction of some types is not supported");
|
||||
|
@ -1604,8 +1652,8 @@ void Fortran::lower::genOpenMPDeclarativeConstruct(
|
|||
// Generate an OpenMP reduction operation. This implementation finds the chain :
|
||||
// load reduction var -> reduction_operation -> store reduction var and replaces
|
||||
// it with the reduction operation.
|
||||
// TODO: Currently assumes it is an integer addition reduction. Generalize this
|
||||
// for various reduction operation types.
|
||||
// TODO: Currently assumes it is an integer addition/multiplication reduction.
|
||||
// Generalize this for various reduction operation types.
|
||||
// TODO: Generate the reduction operation during lowering instead of creating
|
||||
// and removing operations since this is not a robust approach. Also, removing
|
||||
// ops in the builder (instead of a rewriter) is probably not the best approach.
|
||||
|
@ -1626,9 +1674,14 @@ void Fortran::lower::genOpenMPReduction(
|
|||
const auto &intrinsicOp{
|
||||
std::get<Fortran::parser::DefinedOperator::IntrinsicOperator>(
|
||||
reductionOp->u)};
|
||||
if (intrinsicOp !=
|
||||
Fortran::parser::DefinedOperator::IntrinsicOperator::Add)
|
||||
|
||||
switch (intrinsicOp) {
|
||||
case Fortran::parser::DefinedOperator::IntrinsicOperator::Add:
|
||||
case Fortran::parser::DefinedOperator::IntrinsicOperator::Multiply:
|
||||
break;
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
for (const auto &ompObject : objectList.v) {
|
||||
if (const auto *name{
|
||||
Fortran::parser::Unwrap<Fortran::parser::Name>(ompObject)}) {
|
||||
|
|
|
@ -1,15 +0,0 @@
|
|||
! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
|
||||
! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
|
||||
|
||||
! CHECK: not yet implemented: Reduction of some intrinsic operators is not supported
|
||||
subroutine reduction_multiply
|
||||
integer :: x
|
||||
!$omp parallel
|
||||
!$omp do reduction(*:x)
|
||||
do i=1, 100
|
||||
x = x * i
|
||||
end do
|
||||
!$omp end do
|
||||
!$omp end parallel
|
||||
print *, x
|
||||
end subroutine
|
|
@ -0,0 +1,144 @@
|
|||
! RUN: bbc -emit-fir -fopenmp %s -o - | FileCheck %s
|
||||
! RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | FileCheck %s
|
||||
|
||||
!CHECK-LABEL: omp.reduction.declare
|
||||
!CHECK-SAME: @[[RED_I64_NAME:.*]] : i64 init {
|
||||
!CHECK: ^bb0(%{{.*}}: i64):
|
||||
!CHECK: %[[C1_1:.*]] = arith.constant 1 : i64
|
||||
!CHECK: omp.yield(%[[C1_1]] : i64)
|
||||
!CHECK: } combiner {
|
||||
!CHECK: ^bb0(%[[ARG0:.*]]: i64, %[[ARG1:.*]]: i64):
|
||||
!CHECK: %[[RES:.*]] = arith.muli %[[ARG0]], %[[ARG1]] : i64
|
||||
!CHECK: omp.yield(%[[RES]] : i64)
|
||||
!CHECK: }
|
||||
|
||||
!CHECK-LABEL: omp.reduction.declare
|
||||
!CHECK-SAME: @[[RED_I32_NAME:.*]] : i32 init {
|
||||
!CHECK: ^bb0(%{{.*}}: i32):
|
||||
!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32
|
||||
!CHECK: omp.yield(%[[C1_1]] : i32)
|
||||
!CHECK: } combiner {
|
||||
!CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32):
|
||||
!CHECK: %[[RES:.*]] = arith.muli %[[ARG0]], %[[ARG1]] : i32
|
||||
!CHECK: omp.yield(%[[RES]] : i32)
|
||||
!CHECK: }
|
||||
|
||||
!CHECK-LABEL: func.func @_QPsimple_reduction
|
||||
!CHECK: %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"}
|
||||
!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32
|
||||
!CHECK: fir.store %[[C1_2]] to %[[XREF]] : !fir.ref<i32>
|
||||
!CHECK: omp.parallel
|
||||
!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
|
||||
!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32
|
||||
!CHECK: %[[C10:.*]] = arith.constant 10 : i32
|
||||
!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32
|
||||
!CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref<i32>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C10]]) inclusive step (%[[C1_2]])
|
||||
!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
|
||||
!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
|
||||
!CHECK: omp.reduction %[[I_PVT_VAL]], %[[XREF]] : !fir.ref<i32>
|
||||
!CHECK: omp.yield
|
||||
!CHECK: omp.terminator
|
||||
!CHECK: return
|
||||
|
||||
subroutine simple_reduction
|
||||
integer :: x
|
||||
x = 1
|
||||
!$omp parallel
|
||||
!$omp do reduction(*:x)
|
||||
do i=1, 10
|
||||
x = x * i
|
||||
end do
|
||||
!$omp end do
|
||||
!$omp end parallel
|
||||
end subroutine
|
||||
|
||||
!CHECK-LABEL: func.func @_QPsimple_reduction_switch_order
|
||||
!CHECK: %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"}
|
||||
!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32
|
||||
!CHECK: fir.store %[[C1_2]] to %[[XREF]] : !fir.ref<i32>
|
||||
!CHECK: omp.parallel
|
||||
!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
|
||||
!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32
|
||||
!CHECK: %[[C10:.*]] = arith.constant 10 : i32
|
||||
!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32
|
||||
!CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref<i32>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C10]]) inclusive step (%[[C1_2]])
|
||||
!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
|
||||
!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
|
||||
!CHECK: omp.reduction %[[I_PVT_VAL]], %[[XREF]] : !fir.ref<i32>
|
||||
!CHECK: omp.yield
|
||||
!CHECK: omp.terminator
|
||||
!CHECK: return
|
||||
|
||||
subroutine simple_reduction_switch_order
|
||||
integer :: x
|
||||
x = 1
|
||||
!$omp parallel
|
||||
!$omp do reduction(*:x)
|
||||
do i=1, 10
|
||||
x = i * x
|
||||
end do
|
||||
!$omp end do
|
||||
!$omp end parallel
|
||||
end subroutine
|
||||
|
||||
!CHECK-LABEL: func.func @_QPmultiple_reductions_same_type
|
||||
!CHECK: %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_reductions_same_typeEx"}
|
||||
!CHECK: %[[YREF:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFmultiple_reductions_same_typeEy"}
|
||||
!CHECK: %[[ZREF:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_reductions_same_typeEz"}
|
||||
!CHECK: omp.parallel
|
||||
!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
|
||||
!CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref<i32>, @[[RED_I32_NAME]] -> %[[YREF]] : !fir.ref<i32>, @[[RED_I32_NAME]] -> %[[ZREF]] : !fir.ref<i32>) for (%[[IVAL]]) : i32
|
||||
!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
|
||||
!CHECK: %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
|
||||
!CHECK: omp.reduction %[[I_PVT_VAL1]], %[[XREF]] : !fir.ref<i32>
|
||||
!CHECK: %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
|
||||
!CHECK: omp.reduction %[[I_PVT_VAL2]], %[[YREF]] : !fir.ref<i32>
|
||||
!CHECK: %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
|
||||
!CHECK: omp.reduction %[[I_PVT_VAL3]], %[[ZREF]] : !fir.ref<i32>
|
||||
!CHECK: omp.yield
|
||||
!CHECK: omp.terminator
|
||||
!CHECK: return
|
||||
|
||||
subroutine multiple_reductions_same_type
|
||||
integer :: x,y,z
|
||||
x = 1
|
||||
y = 1
|
||||
z = 1
|
||||
!$omp parallel
|
||||
!$omp do reduction(*:x,y,z)
|
||||
do i=1, 10
|
||||
x = x * i
|
||||
y = y * i
|
||||
z = z * i
|
||||
end do
|
||||
!$omp end do
|
||||
!$omp end parallel
|
||||
end subroutine
|
||||
|
||||
!CHECK-LABEL: func.func @_QPmultiple_reductions_different_type
|
||||
!CHECK: %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_reductions_different_typeEx"}
|
||||
!CHECK: %[[YREF:.*]] = fir.alloca i64 {bindc_name = "y", uniq_name = "_QFmultiple_reductions_different_typeEy"}
|
||||
!CHECK: omp.parallel
|
||||
!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
|
||||
!CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref<i32>, @[[RED_I64_NAME]] -> %[[YREF]] : !fir.ref<i64>) for (%[[IVAL:.*]]) : i32
|
||||
!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
|
||||
!CHECK: %[[C2_32:.*]] = arith.constant 2 : i32
|
||||
!CHECK: omp.reduction %[[C2_32]], %[[XREF]] : !fir.ref<i32>
|
||||
!CHECK: %[[C2_64:.*]] = arith.constant 2 : i64
|
||||
!CHECK: omp.reduction %[[C2_64]], %[[YREF]] : !fir.ref<i64>
|
||||
!CHECK: omp.yield
|
||||
!CHECK: omp.terminator
|
||||
!CHECK: return
|
||||
|
||||
subroutine multiple_reductions_different_type
|
||||
integer :: x
|
||||
integer(kind=8) :: y
|
||||
!$omp parallel
|
||||
!$omp do reduction(*:x,y)
|
||||
do i=1, 10
|
||||
x = x * 2_4
|
||||
y = y * 2_8
|
||||
end do
|
||||
!$omp end do
|
||||
!$omp end parallel
|
||||
end subroutine
|
Loading…
Reference in New Issue