forked from OSchip/llvm-project
419 lines
12 KiB
MLIR
419 lines
12 KiB
MLIR
// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
|
|
|
|
// Only check the overall shape of the code and the presence of relevant
|
|
// runtime calls. Actual IR checking is done at the OpenMPIRBuilder level.
|
|
|
|
omp.reduction.declare @add_f32 : f32
|
|
init {
|
|
^bb0(%arg: f32):
|
|
%0 = llvm.mlir.constant(0.0 : f32) : f32
|
|
omp.yield (%0 : f32)
|
|
}
|
|
combiner {
|
|
^bb1(%arg0: f32, %arg1: f32):
|
|
%1 = llvm.fadd %arg0, %arg1 : f32
|
|
omp.yield (%1 : f32)
|
|
}
|
|
atomic {
|
|
^bb2(%arg2: !llvm.ptr<f32>, %arg3: !llvm.ptr<f32>):
|
|
%2 = llvm.load %arg3 : !llvm.ptr<f32>
|
|
llvm.atomicrmw fadd %arg2, %2 monotonic : f32
|
|
omp.yield
|
|
}
|
|
|
|
// CHECK-LABEL: @simple_reduction
|
|
llvm.func @simple_reduction(%lb : i64, %ub : i64, %step : i64) {
|
|
%c1 = llvm.mlir.constant(1 : i32) : i32
|
|
%0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
|
|
omp.parallel {
|
|
omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step)
|
|
reduction(@add_f32 -> %0 : !llvm.ptr<f32>) {
|
|
%1 = llvm.mlir.constant(2.0 : f32) : f32
|
|
omp.reduction %1, %0 : !llvm.ptr<f32>
|
|
omp.yield
|
|
}
|
|
omp.terminator
|
|
}
|
|
llvm.return
|
|
}
|
|
|
|
// Call to the outlined function.
|
|
// CHECK: call void {{.*}} @__kmpc_fork_call
|
|
// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
|
|
|
|
// Outlined function.
|
|
// CHECK: define internal void @[[OUTLINED]]
|
|
|
|
// Private reduction variable and its initialization.
|
|
// CHECK: %[[PRIVATE:.+]] = alloca float
|
|
// CHECK: store float 0.000000e+00, float* %[[PRIVATE]]
|
|
|
|
// Call to the reduction function.
|
|
// CHECK: call i32 @__kmpc_reduce
|
|
// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
|
|
|
|
// Atomic reduction.
|
|
// CHECK: %[[PARTIAL:.+]] = load float, float* %[[PRIVATE]]
|
|
// CHECK: atomicrmw fadd float* %{{.*}}, float %[[PARTIAL]]
|
|
|
|
// Non-atomic reduction:
|
|
// CHECK: fadd float
|
|
// CHECK: call void @__kmpc_end_reduce
|
|
// CHECK: br label %[[FINALIZE:.+]]
|
|
|
|
// CHECK: [[FINALIZE]]:
|
|
// CHECK: call void @__kmpc_barrier
|
|
|
|
// Update of the private variable using the reduction region
|
|
// (the body block currently comes after all the other blocks).
|
|
// CHECK: %[[PARTIAL:.+]] = load float, float* %[[PRIVATE]]
|
|
// CHECK: %[[UPDATED:.+]] = fadd float %[[PARTIAL]], 2.000000e+00
|
|
// CHECK: store float %[[UPDATED]], float* %[[PRIVATE]]
|
|
|
|
// Reduction function.
|
|
// CHECK: define internal void @[[REDFUNC]]
|
|
// CHECK: fadd float
|
|
|
|
// -----
|
|
|
|
omp.reduction.declare @add_f32 : f32
|
|
init {
|
|
^bb0(%arg: f32):
|
|
%0 = llvm.mlir.constant(0.0 : f32) : f32
|
|
omp.yield (%0 : f32)
|
|
}
|
|
combiner {
|
|
^bb1(%arg0: f32, %arg1: f32):
|
|
%1 = llvm.fadd %arg0, %arg1 : f32
|
|
omp.yield (%1 : f32)
|
|
}
|
|
atomic {
|
|
^bb2(%arg2: !llvm.ptr<f32>, %arg3: !llvm.ptr<f32>):
|
|
%2 = llvm.load %arg3 : !llvm.ptr<f32>
|
|
llvm.atomicrmw fadd %arg2, %2 monotonic : f32
|
|
omp.yield
|
|
}
|
|
|
|
// When the same reduction declaration is used several times, its regions
|
|
// are translated several times, which shouldn't lead to value/block
|
|
// remapping assertions.
|
|
// CHECK-LABEL: @reuse_declaration
|
|
llvm.func @reuse_declaration(%lb : i64, %ub : i64, %step : i64) {
|
|
%c1 = llvm.mlir.constant(1 : i32) : i32
|
|
%0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
|
|
%2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
|
|
omp.parallel {
|
|
omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step)
|
|
reduction(@add_f32 -> %0 : !llvm.ptr<f32>, @add_f32 -> %2 : !llvm.ptr<f32>) {
|
|
%1 = llvm.mlir.constant(2.0 : f32) : f32
|
|
omp.reduction %1, %0 : !llvm.ptr<f32>
|
|
omp.reduction %1, %2 : !llvm.ptr<f32>
|
|
omp.yield
|
|
}
|
|
omp.terminator
|
|
}
|
|
llvm.return
|
|
}
|
|
|
|
// Call to the outlined function.
|
|
// CHECK: call void {{.*}} @__kmpc_fork_call
|
|
// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
|
|
|
|
// Outlined function.
|
|
// CHECK: define internal void @[[OUTLINED]]
|
|
|
|
// Private reduction variable and its initialization.
|
|
// CHECK: %[[PRIVATE1:.+]] = alloca float
|
|
// CHECK: %[[PRIVATE2:.+]] = alloca float
|
|
// CHECK: store float 0.000000e+00, float* %[[PRIVATE1]]
|
|
// CHECK: store float 0.000000e+00, float* %[[PRIVATE2]]
|
|
|
|
// Call to the reduction function.
|
|
// CHECK: call i32 @__kmpc_reduce
|
|
// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
|
|
|
|
// Atomic reduction.
|
|
// CHECK: %[[PARTIAL1:.+]] = load float, float* %[[PRIVATE1]]
|
|
// CHECK: atomicrmw fadd float* %{{.*}}, float %[[PARTIAL1]]
|
|
// CHECK: %[[PARTIAL2:.+]] = load float, float* %[[PRIVATE2]]
|
|
// CHECK: atomicrmw fadd float* %{{.*}}, float %[[PARTIAL2]]
|
|
|
|
// Non-atomic reduction:
|
|
// CHECK: fadd float
|
|
// CHECK: fadd float
|
|
// CHECK: call void @__kmpc_end_reduce
|
|
// CHECK: br label %[[FINALIZE:.+]]
|
|
|
|
// CHECK: [[FINALIZE]]:
|
|
// CHECK: call void @__kmpc_barrier
|
|
|
|
// Update of the private variable using the reduction region
|
|
// (the body block currently comes after all the other blocks).
|
|
// CHECK: %[[PARTIAL1:.+]] = load float, float* %[[PRIVATE1]]
|
|
// CHECK: %[[UPDATED1:.+]] = fadd float %[[PARTIAL1]], 2.000000e+00
|
|
// CHECK: store float %[[UPDATED1]], float* %[[PRIVATE1]]
|
|
// CHECK: %[[PARTIAL2:.+]] = load float, float* %[[PRIVATE2]]
|
|
// CHECK: %[[UPDATED2:.+]] = fadd float %[[PARTIAL2]], 2.000000e+00
|
|
// CHECK: store float %[[UPDATED2]], float* %[[PRIVATE2]]
|
|
|
|
// Reduction function.
|
|
// CHECK: define internal void @[[REDFUNC]]
|
|
// CHECK: fadd float
|
|
// CHECK: fadd float
|
|
|
|
|
|
// -----
|
|
|
|
omp.reduction.declare @add_f32 : f32
|
|
init {
|
|
^bb0(%arg: f32):
|
|
%0 = llvm.mlir.constant(0.0 : f32) : f32
|
|
omp.yield (%0 : f32)
|
|
}
|
|
combiner {
|
|
^bb1(%arg0: f32, %arg1: f32):
|
|
%1 = llvm.fadd %arg0, %arg1 : f32
|
|
omp.yield (%1 : f32)
|
|
}
|
|
atomic {
|
|
^bb2(%arg2: !llvm.ptr<f32>, %arg3: !llvm.ptr<f32>):
|
|
%2 = llvm.load %arg3 : !llvm.ptr<f32>
|
|
llvm.atomicrmw fadd %arg2, %2 monotonic : f32
|
|
omp.yield
|
|
}
|
|
|
|
// It's okay not to reference the reduction variable in the body.
|
|
// CHECK-LABEL: @missing_omp_reduction
|
|
llvm.func @missing_omp_reduction(%lb : i64, %ub : i64, %step : i64) {
|
|
%c1 = llvm.mlir.constant(1 : i32) : i32
|
|
%0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
|
|
%2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
|
|
omp.parallel {
|
|
omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step)
|
|
reduction(@add_f32 -> %0 : !llvm.ptr<f32>, @add_f32 -> %2 : !llvm.ptr<f32>) {
|
|
%1 = llvm.mlir.constant(2.0 : f32) : f32
|
|
omp.reduction %1, %0 : !llvm.ptr<f32>
|
|
omp.yield
|
|
}
|
|
omp.terminator
|
|
}
|
|
llvm.return
|
|
}
|
|
|
|
// Call to the outlined function.
|
|
// CHECK: call void {{.*}} @__kmpc_fork_call
|
|
// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
|
|
|
|
// Outlined function.
|
|
// CHECK: define internal void @[[OUTLINED]]
|
|
|
|
// Private reduction variable and its initialization.
|
|
// CHECK: %[[PRIVATE1:.+]] = alloca float
|
|
// CHECK: %[[PRIVATE2:.+]] = alloca float
|
|
// CHECK: store float 0.000000e+00, float* %[[PRIVATE1]]
|
|
// CHECK: store float 0.000000e+00, float* %[[PRIVATE2]]
|
|
|
|
// Call to the reduction function.
|
|
// CHECK: call i32 @__kmpc_reduce
|
|
// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
|
|
|
|
// Atomic reduction.
|
|
// CHECK: %[[PARTIAL1:.+]] = load float, float* %[[PRIVATE1]]
|
|
// CHECK: atomicrmw fadd float* %{{.*}}, float %[[PARTIAL1]]
|
|
// CHECK: %[[PARTIAL2:.+]] = load float, float* %[[PRIVATE2]]
|
|
// CHECK: atomicrmw fadd float* %{{.*}}, float %[[PARTIAL2]]
|
|
|
|
// Non-atomic reduction:
|
|
// CHECK: fadd float
|
|
// CHECK: fadd float
|
|
// CHECK: call void @__kmpc_end_reduce
|
|
// CHECK: br label %[[FINALIZE:.+]]
|
|
|
|
// CHECK: [[FINALIZE]]:
|
|
// CHECK: call void @__kmpc_barrier
|
|
|
|
// Update of the private variable using the reduction region
|
|
// (the body block currently comes after all the other blocks).
|
|
// CHECK: %[[PARTIAL1:.+]] = load float, float* %[[PRIVATE1]]
|
|
// CHECK: %[[UPDATED1:.+]] = fadd float %[[PARTIAL1]], 2.000000e+00
|
|
// CHECK: store float %[[UPDATED1]], float* %[[PRIVATE1]]
|
|
// CHECK-NOT: %{{.*}} = load float, float* %[[PRIVATE2]]
|
|
// CHECK-NOT: %{{.*}} = fadd float %[[PARTIAL2]], 2.000000e+00
|
|
|
|
// Reduction function.
|
|
// CHECK: define internal void @[[REDFUNC]]
|
|
// CHECK: fadd float
|
|
// CHECK: fadd float
|
|
|
|
// -----
|
|
|
|
omp.reduction.declare @add_f32 : f32
|
|
init {
|
|
^bb0(%arg: f32):
|
|
%0 = llvm.mlir.constant(0.0 : f32) : f32
|
|
omp.yield (%0 : f32)
|
|
}
|
|
combiner {
|
|
^bb1(%arg0: f32, %arg1: f32):
|
|
%1 = llvm.fadd %arg0, %arg1 : f32
|
|
omp.yield (%1 : f32)
|
|
}
|
|
atomic {
|
|
^bb2(%arg2: !llvm.ptr<f32>, %arg3: !llvm.ptr<f32>):
|
|
%2 = llvm.load %arg3 : !llvm.ptr<f32>
|
|
llvm.atomicrmw fadd %arg2, %2 monotonic : f32
|
|
omp.yield
|
|
}
|
|
|
|
// It's okay to refer to the same reduction variable more than once in the
|
|
// body.
|
|
// CHECK-LABEL: @double_reference
|
|
llvm.func @double_reference(%lb : i64, %ub : i64, %step : i64) {
|
|
%c1 = llvm.mlir.constant(1 : i32) : i32
|
|
%0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
|
|
omp.parallel {
|
|
omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step)
|
|
reduction(@add_f32 -> %0 : !llvm.ptr<f32>) {
|
|
%1 = llvm.mlir.constant(2.0 : f32) : f32
|
|
omp.reduction %1, %0 : !llvm.ptr<f32>
|
|
omp.reduction %1, %0 : !llvm.ptr<f32>
|
|
omp.yield
|
|
}
|
|
omp.terminator
|
|
}
|
|
llvm.return
|
|
}
|
|
|
|
// Call to the outlined function.
|
|
// CHECK: call void {{.*}} @__kmpc_fork_call
|
|
// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
|
|
|
|
// Outlined function.
|
|
// CHECK: define internal void @[[OUTLINED]]
|
|
|
|
// Private reduction variable and its initialization.
|
|
// CHECK: %[[PRIVATE:.+]] = alloca float
|
|
// CHECK: store float 0.000000e+00, float* %[[PRIVATE]]
|
|
|
|
// Call to the reduction function.
|
|
// CHECK: call i32 @__kmpc_reduce
|
|
// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
|
|
|
|
// Atomic reduction.
|
|
// CHECK: %[[PARTIAL:.+]] = load float, float* %[[PRIVATE]]
|
|
// CHECK: atomicrmw fadd float* %{{.*}}, float %[[PARTIAL]]
|
|
|
|
// Non-atomic reduction:
|
|
// CHECK: fadd float
|
|
// CHECK: call void @__kmpc_end_reduce
|
|
// CHECK: br label %[[FINALIZE:.+]]
|
|
|
|
// CHECK: [[FINALIZE]]:
|
|
// CHECK: call void @__kmpc_barrier
|
|
|
|
// Update of the private variable using the reduction region
|
|
// (the body block currently comes after all the other blocks).
|
|
// CHECK: %[[PARTIAL:.+]] = load float, float* %[[PRIVATE]]
|
|
// CHECK: %[[UPDATED:.+]] = fadd float %[[PARTIAL]], 2.000000e+00
|
|
// CHECK: store float %[[UPDATED]], float* %[[PRIVATE]]
|
|
// CHECK: %[[PARTIAL:.+]] = load float, float* %[[PRIVATE]]
|
|
// CHECK: %[[UPDATED:.+]] = fadd float %[[PARTIAL]], 2.000000e+00
|
|
// CHECK: store float %[[UPDATED]], float* %[[PRIVATE]]
|
|
|
|
// Reduction function.
|
|
// CHECK: define internal void @[[REDFUNC]]
|
|
// CHECK: fadd float
|
|
|
|
// -----
|
|
|
|
omp.reduction.declare @add_f32 : f32
|
|
init {
|
|
^bb0(%arg: f32):
|
|
%0 = llvm.mlir.constant(0.0 : f32) : f32
|
|
omp.yield (%0 : f32)
|
|
}
|
|
combiner {
|
|
^bb1(%arg0: f32, %arg1: f32):
|
|
%1 = llvm.fadd %arg0, %arg1 : f32
|
|
omp.yield (%1 : f32)
|
|
}
|
|
atomic {
|
|
^bb2(%arg2: !llvm.ptr<f32>, %arg3: !llvm.ptr<f32>):
|
|
%2 = llvm.load %arg3 : !llvm.ptr<f32>
|
|
llvm.atomicrmw fadd %arg2, %2 monotonic : f32
|
|
omp.yield
|
|
}
|
|
|
|
omp.reduction.declare @mul_f32 : f32
|
|
init {
|
|
^bb0(%arg: f32):
|
|
%0 = llvm.mlir.constant(1.0 : f32) : f32
|
|
omp.yield (%0 : f32)
|
|
}
|
|
combiner {
|
|
^bb1(%arg0: f32, %arg1: f32):
|
|
%1 = llvm.fmul %arg0, %arg1 : f32
|
|
omp.yield (%1 : f32)
|
|
}
|
|
|
|
// CHECK-LABEL: @no_atomic
|
|
llvm.func @no_atomic(%lb : i64, %ub : i64, %step : i64) {
|
|
%c1 = llvm.mlir.constant(1 : i32) : i32
|
|
%0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
|
|
%2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
|
|
omp.parallel {
|
|
omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step)
|
|
reduction(@add_f32 -> %0 : !llvm.ptr<f32>, @mul_f32 -> %2 : !llvm.ptr<f32>) {
|
|
%1 = llvm.mlir.constant(2.0 : f32) : f32
|
|
omp.reduction %1, %0 : !llvm.ptr<f32>
|
|
omp.reduction %1, %2 : !llvm.ptr<f32>
|
|
omp.yield
|
|
}
|
|
omp.terminator
|
|
}
|
|
llvm.return
|
|
}
|
|
|
|
// Call to the outlined function.
|
|
// CHECK: call void {{.*}} @__kmpc_fork_call
|
|
// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
|
|
|
|
// Outlined function.
|
|
// CHECK: define internal void @[[OUTLINED]]
|
|
|
|
// Private reduction variable and its initialization.
|
|
// CHECK: %[[PRIVATE1:.+]] = alloca float
|
|
// CHECK: %[[PRIVATE2:.+]] = alloca float
|
|
// CHECK: store float 0.000000e+00, float* %[[PRIVATE1]]
|
|
// CHECK: store float 1.000000e+00, float* %[[PRIVATE2]]
|
|
|
|
// Call to the reduction function.
|
|
// CHECK: call i32 @__kmpc_reduce
|
|
// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
|
|
|
|
// Atomic reduction not provided.
|
|
// CHECK: unreachable
|
|
|
|
// Non-atomic reduction:
|
|
// CHECK: fadd float
|
|
// CHECK: fmul float
|
|
// CHECK: call void @__kmpc_end_reduce
|
|
// CHECK: br label %[[FINALIZE:.+]]
|
|
|
|
// CHECK: [[FINALIZE]]:
|
|
// CHECK: call void @__kmpc_barrier
|
|
|
|
// Update of the private variable using the reduction region
|
|
// (the body block currently comes after all the other blocks).
|
|
// CHECK: %[[PARTIAL1:.+]] = load float, float* %[[PRIVATE1]]
|
|
// CHECK: %[[UPDATED1:.+]] = fadd float %[[PARTIAL1]], 2.000000e+00
|
|
// CHECK: store float %[[UPDATED1]], float* %[[PRIVATE1]]
|
|
// CHECK: %[[PARTIAL2:.+]] = load float, float* %[[PRIVATE2]]
|
|
// CHECK: %[[UPDATED2:.+]] = fmul float %[[PARTIAL2]], 2.000000e+00
|
|
// CHECK: store float %[[UPDATED2]], float* %[[PRIVATE2]]
|
|
|
|
// Reduction function.
|
|
// CHECK: define internal void @[[REDFUNC]]
|
|
// CHECK: fadd float
|
|
// CHECK: fmul float
|