forked from OSchip/llvm-project
[mlir][OpenMP] omp.task translation to LLVM IR
This patch adds translation for omp.task from OpenMPDialect to LLVM IR Dialect and adds tests for the same. Depends on D71989 Reviewed By: ftynse, kiranchandramohan, peixin, Meinersbur Differential Revision: https://reviews.llvm.org/D123919
This commit is contained in:
parent
10ebaf7686
commit
fdf505f3f2
|
@ -1260,6 +1260,9 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc,
|
|||
if (!updateToLocation(Loc))
|
||||
return InsertPointTy();
|
||||
|
||||
uint32_t SrcLocStrSize;
|
||||
Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
|
||||
Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
|
||||
// The current basic block is split into four basic blocks. After outlining,
|
||||
// they will be mapped as follows:
|
||||
// ```
|
||||
|
@ -1285,7 +1288,7 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc,
|
|||
OI.EntryBB = TaskAllocaBB;
|
||||
OI.OuterAllocaBB = AllocaIP.getBlock();
|
||||
OI.ExitBB = TaskExitBB;
|
||||
OI.PostOutlineCB = [this, &Loc, Tied, Final](Function &OutlinedFn) {
|
||||
OI.PostOutlineCB = [this, Ident, Tied, Final](Function &OutlinedFn) {
|
||||
// The input IR here looks like the following-
|
||||
// ```
|
||||
// func @current_fn() {
|
||||
|
@ -1324,9 +1327,6 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc,
|
|||
|
||||
// Arguments - `loc_ref` (Ident) and `gtid` (ThreadID)
|
||||
// call.
|
||||
uint32_t SrcLocStrSize;
|
||||
Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
|
||||
Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
|
||||
Value *ThreadID = getOrCreateThreadID(Ident);
|
||||
|
||||
// Argument - `flags`
|
||||
|
|
|
@ -677,6 +677,30 @@ convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder,
|
|||
return bodyGenStatus;
|
||||
}
|
||||
|
||||
/// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
|
||||
static LogicalResult
|
||||
convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
|
||||
LLVM::ModuleTranslation &moduleTranslation) {
|
||||
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
|
||||
LogicalResult bodyGenStatus = success();
|
||||
if (taskOp.if_expr() || taskOp.final_expr() || taskOp.untiedAttr() ||
|
||||
taskOp.mergeableAttr() || taskOp.in_reductions() || taskOp.priority() ||
|
||||
!taskOp.allocate_vars().empty()) {
|
||||
return taskOp.emitError("unhandled clauses for translation to LLVM IR");
|
||||
}
|
||||
auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
|
||||
builder.restoreIP(codegenIP);
|
||||
convertOmpOpRegions(taskOp.region(), "omp.task.region", builder,
|
||||
moduleTranslation, bodyGenStatus);
|
||||
};
|
||||
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
|
||||
findAllocaInsertPoint(builder, moduleTranslation);
|
||||
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
|
||||
builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTask(
|
||||
ompLoc, allocaIP, bodyCB, !taskOp.untied()));
|
||||
return bodyGenStatus;
|
||||
}
|
||||
|
||||
/// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
|
||||
static LogicalResult
|
||||
convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
|
@ -1367,6 +1391,9 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
|
|||
.Case([&](omp::SingleOp op) {
|
||||
return convertOmpSingle(op, builder, moduleTranslation);
|
||||
})
|
||||
.Case([&](omp::TaskOp op) {
|
||||
return convertOmpTaskOp(op, builder, moduleTranslation);
|
||||
})
|
||||
.Case<omp::YieldOp, omp::TerminatorOp, omp::ReductionDeclareOp,
|
||||
omp::CriticalDeclareOp>([](auto op) {
|
||||
// `yield` and `terminator` can be just omitted. The block structure
|
||||
|
|
|
@ -2170,3 +2170,91 @@ llvm.func @omp_threadprivate() {
|
|||
}
|
||||
|
||||
llvm.mlir.global internal @_QFsubEx() : i32
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: define void @omp_task
|
||||
// CHECK-SAME: (i32 %[[x:.+]], i32 %[[y:.+]], ptr %[[zaddr:.+]])
|
||||
llvm.func @omp_task(%x: i32, %y: i32, %zaddr: !llvm.ptr<i32>) {
|
||||
// CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num({{.+}})
|
||||
// CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc
|
||||
// CHECK-SAME: (ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 0,
|
||||
// CHECK-SAME: i64 0, ptr @[[wrapper_fn:.+]])
|
||||
// CHECK: call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]])
|
||||
omp.task {
|
||||
%n = llvm.mlir.constant(1 : i64) : i64
|
||||
%valaddr = llvm.alloca %n x i32 : (i64) -> !llvm.ptr<i32>
|
||||
%val = llvm.load %valaddr : !llvm.ptr<i32>
|
||||
%double = llvm.add %val, %val : i32
|
||||
llvm.store %double, %valaddr : !llvm.ptr<i32>
|
||||
omp.terminator
|
||||
}
|
||||
llvm.return
|
||||
}
|
||||
|
||||
// CHECK: define internal void @[[outlined_fn:.+]]()
|
||||
// CHECK: task.alloca{{.*}}:
|
||||
// CHECK: br label %[[task_body:[^, ]+]]
|
||||
// CHECK: [[task_body]]:
|
||||
// CHECK: br label %[[task_region:[^, ]+]]
|
||||
// CHECK: [[task_region]]:
|
||||
// CHECK: %[[alloca:.+]] = alloca i32, i64 1
|
||||
// CHECK: %[[val:.+]] = load i32, ptr %[[alloca]]
|
||||
// CHECK: %[[newval:.+]] = add i32 %[[val]], %[[val]]
|
||||
// CHECK: store i32 %[[newval]], ptr %{{[^, ]+}}
|
||||
// CHECK: br label %[[exit_stub:[^, ]+]]
|
||||
// CHECK: [[exit_stub]]:
|
||||
// CHECK: ret void
|
||||
|
||||
|
||||
// CHECK: define i32 @[[wrapper_fn]](i32 %{{.+}}) {
|
||||
// CHECK: call void @[[outlined_fn]]()
|
||||
// CHECK: ret i32 0
|
||||
// CHECK: }
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: define void @omp_task
|
||||
// CHECK-SAME: (i32 %[[x:.+]], i32 %[[y:.+]], ptr %[[zaddr:.+]])
|
||||
module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu"} {
|
||||
llvm.func @omp_task(%x: i32, %y: i32, %zaddr: !llvm.ptr<i32>) {
|
||||
// CHECK: %[[diff:.+]] = sub i32 %[[x]], %[[y]],
|
||||
%diff = llvm.sub %x, %y : i32
|
||||
// CHECK: store i32 %[[diff]], ptr %2
|
||||
llvm.store %diff, %zaddr : !llvm.ptr<i32>
|
||||
// CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num({{.+}})
|
||||
// CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc
|
||||
// CHECK-SAME: (ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 16, i64 0,
|
||||
// CHECK-SAME: ptr @[[wrapper_fn:.+]])
|
||||
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.+}} %[[task_data]], ptr {{.+}}, i64 16, i1 false)
|
||||
// CHECK: call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]])
|
||||
omp.task {
|
||||
%z = llvm.add %x, %y : i32
|
||||
llvm.store %z, %zaddr : !llvm.ptr<i32>
|
||||
omp.terminator
|
||||
}
|
||||
// CHECK: %[[prod:.+]] = mul i32 %[[x]], %[[y]]
|
||||
%b = llvm.mul %x, %y : i32
|
||||
// CHECK: store i32 %[[prod]], ptr %[[zaddr]]
|
||||
llvm.store %b, %zaddr : !llvm.ptr<i32>
|
||||
llvm.return
|
||||
}
|
||||
}
|
||||
|
||||
// CHECK: define internal void @[[outlined_fn:.+]](ptr %[[task_data:.+]])
|
||||
// CHECK: task.alloca{{.*}}:
|
||||
// CHECK: br label %[[task_body:[^, ]+]]
|
||||
// CHECK: [[task_body]]:
|
||||
// CHECK: br label %[[task_region:[^, ]+]]
|
||||
// CHECK: [[task_region]]:
|
||||
// CHECK: %[[sum:.+]] = add i32 %{{.+}}, %{{.+}}
|
||||
// CHECK: store i32 %[[sum]], ptr %{{.+}}
|
||||
// CHECK: br label %[[exit_stub:[^, ]+]]
|
||||
// CHECK: [[exit_stub]]:
|
||||
// CHECK: ret void
|
||||
|
||||
|
||||
// CHECK: define i32 @[[wrapper_fn]](i32 %{{.+}}, ptr %[[task_data:.+]]) {
|
||||
// CHECK: call void @[[outlined_fn]](ptr %[[task_data]])
|
||||
// CHECK: ret i32 0
|
||||
// CHECK: }
|
||||
|
|
Loading…
Reference in New Issue