[mlir][OpenMP] omp.task translation to LLVM IR

This patch adds translation for omp.task from OpenMPDialect to LLVM IR
Dialect and adds tests for the same.

Depends on D71989

Reviewed By: ftynse, kiranchandramohan, peixin, Meinersbur

Differential Revision: https://reviews.llvm.org/D123919
This commit is contained in:
Shraiysh Vaishay 2022-07-04 13:52:35 +05:30
parent 10ebaf7686
commit fdf505f3f2
3 changed files with 119 additions and 4 deletions

View File

@ -1260,6 +1260,9 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc,
if (!updateToLocation(Loc))
return InsertPointTy();
uint32_t SrcLocStrSize;
Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
// The current basic block is split into four basic blocks. After outlining,
// they will be mapped as follows:
// ```
@ -1285,7 +1288,7 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc,
OI.EntryBB = TaskAllocaBB;
OI.OuterAllocaBB = AllocaIP.getBlock();
OI.ExitBB = TaskExitBB;
OI.PostOutlineCB = [this, &Loc, Tied, Final](Function &OutlinedFn) {
OI.PostOutlineCB = [this, Ident, Tied, Final](Function &OutlinedFn) {
// The input IR here looks like the following-
// ```
// func @current_fn() {
@ -1324,9 +1327,6 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc,
// Arguments - `loc_ref` (Ident) and `gtid` (ThreadID)
// call.
uint32_t SrcLocStrSize;
Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
Value *ThreadID = getOrCreateThreadID(Ident);
// Argument - `flags`

View File

@ -677,6 +677,30 @@ convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder,
return bodyGenStatus;
}
/// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
static LogicalResult
convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
LogicalResult bodyGenStatus = success();
if (taskOp.if_expr() || taskOp.final_expr() || taskOp.untiedAttr() ||
taskOp.mergeableAttr() || taskOp.in_reductions() || taskOp.priority() ||
!taskOp.allocate_vars().empty()) {
return taskOp.emitError("unhandled clauses for translation to LLVM IR");
}
auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
builder.restoreIP(codegenIP);
convertOmpOpRegions(taskOp.region(), "omp.task.region", builder,
moduleTranslation, bodyGenStatus);
};
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
findAllocaInsertPoint(builder, moduleTranslation);
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTask(
ompLoc, allocaIP, bodyCB, !taskOp.untied()));
return bodyGenStatus;
}
/// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult
convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
@ -1367,6 +1391,9 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
.Case([&](omp::SingleOp op) {
return convertOmpSingle(op, builder, moduleTranslation);
})
.Case([&](omp::TaskOp op) {
return convertOmpTaskOp(op, builder, moduleTranslation);
})
.Case<omp::YieldOp, omp::TerminatorOp, omp::ReductionDeclareOp,
omp::CriticalDeclareOp>([](auto op) {
// `yield` and `terminator` can be just omitted. The block structure

View File

@ -2170,3 +2170,91 @@ llvm.func @omp_threadprivate() {
}
llvm.mlir.global internal @_QFsubEx() : i32
// -----
// CHECK-LABEL: define void @omp_task
// CHECK-SAME: (i32 %[[x:.+]], i32 %[[y:.+]], ptr %[[zaddr:.+]])
llvm.func @omp_task(%x: i32, %y: i32, %zaddr: !llvm.ptr<i32>) {
// CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num({{.+}})
// CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc
// CHECK-SAME: (ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 0,
// CHECK-SAME: i64 0, ptr @[[wrapper_fn:.+]])
// CHECK: call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]])
omp.task {
%n = llvm.mlir.constant(1 : i64) : i64
%valaddr = llvm.alloca %n x i32 : (i64) -> !llvm.ptr<i32>
%val = llvm.load %valaddr : !llvm.ptr<i32>
%double = llvm.add %val, %val : i32
llvm.store %double, %valaddr : !llvm.ptr<i32>
omp.terminator
}
llvm.return
}
// CHECK: define internal void @[[outlined_fn:.+]]()
// CHECK: task.alloca{{.*}}:
// CHECK: br label %[[task_body:[^, ]+]]
// CHECK: [[task_body]]:
// CHECK: br label %[[task_region:[^, ]+]]
// CHECK: [[task_region]]:
// CHECK: %[[alloca:.+]] = alloca i32, i64 1
// CHECK: %[[val:.+]] = load i32, ptr %[[alloca]]
// CHECK: %[[newval:.+]] = add i32 %[[val]], %[[val]]
// CHECK: store i32 %[[newval]], ptr %{{[^, ]+}}
// CHECK: br label %[[exit_stub:[^, ]+]]
// CHECK: [[exit_stub]]:
// CHECK: ret void
// CHECK: define i32 @[[wrapper_fn]](i32 %{{.+}}) {
// CHECK: call void @[[outlined_fn]]()
// CHECK: ret i32 0
// CHECK: }
// -----
// CHECK-LABEL: define void @omp_task
// CHECK-SAME: (i32 %[[x:.+]], i32 %[[y:.+]], ptr %[[zaddr:.+]])
module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu"} {
llvm.func @omp_task(%x: i32, %y: i32, %zaddr: !llvm.ptr<i32>) {
// CHECK: %[[diff:.+]] = sub i32 %[[x]], %[[y]],
%diff = llvm.sub %x, %y : i32
// CHECK: store i32 %[[diff]], ptr %2
llvm.store %diff, %zaddr : !llvm.ptr<i32>
// CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num({{.+}})
// CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc
// CHECK-SAME: (ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 16, i64 0,
// CHECK-SAME: ptr @[[wrapper_fn:.+]])
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.+}} %[[task_data]], ptr {{.+}}, i64 16, i1 false)
// CHECK: call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]])
omp.task {
%z = llvm.add %x, %y : i32
llvm.store %z, %zaddr : !llvm.ptr<i32>
omp.terminator
}
// CHECK: %[[prod:.+]] = mul i32 %[[x]], %[[y]]
%b = llvm.mul %x, %y : i32
// CHECK: store i32 %[[prod]], ptr %[[zaddr]]
llvm.store %b, %zaddr : !llvm.ptr<i32>
llvm.return
}
}
// CHECK: define internal void @[[outlined_fn:.+]](ptr %[[task_data:.+]])
// CHECK: task.alloca{{.*}}:
// CHECK: br label %[[task_body:[^, ]+]]
// CHECK: [[task_body]]:
// CHECK: br label %[[task_region:[^, ]+]]
// CHECK: [[task_region]]:
// CHECK: %[[sum:.+]] = add i32 %{{.+}}, %{{.+}}
// CHECK: store i32 %[[sum]], ptr %{{.+}}
// CHECK: br label %[[exit_stub:[^, ]+]]
// CHECK: [[exit_stub]]:
// CHECK: ret void
// CHECK: define i32 @[[wrapper_fn]](i32 %{{.+}}, ptr %[[task_data:.+]]) {
// CHECK: call void @[[outlined_fn]](ptr %[[task_data]])
// CHECK: ret i32 0
// CHECK: }