forked from OSchip/llvm-project
[Coroutines] Set presplit attribute in Clang and mlir
This fixes bug49264. Simply, coroutine shouldn't be inlined before CoroSplit. And the marker for pre-splited coroutine is created in CoroEarly pass, which ran after AlwaysInliner Pass in O0 pipeline. So that the AlwaysInliner couldn't detect it shouldn't inline a coroutine. So here is the error. This patch set the presplit attribute in clang and mlir. So the inliner would always detect the attribute before splitting. Reviewed By: rjmccall, ezhulenev Differential Revision: https://reviews.llvm.org/D115790
This commit is contained in:
parent
f2a43f06dd
commit
c75cedc237
|
@ -707,6 +707,10 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) {
|
|||
|
||||
if (Stmt *Ret = S.getReturnStmt())
|
||||
EmitStmt(Ret);
|
||||
|
||||
// LLVM require the frontend to add the function attribute. See
|
||||
// Coroutines.rst.
|
||||
CurFn->addFnAttr("coroutine.presplit", "0");
|
||||
}
|
||||
|
||||
// Emit coroutine intrinsic and patch up arguments of the token type.
|
||||
|
|
|
@ -48,3 +48,15 @@ struct coroutine_traits {
|
|||
// CHECK: [[CAST3:%[0-9]+]] = bitcast %"struct.std::awaitable"* %ref.tmp{{.*}} to i8*
|
||||
// CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 1, i8* [[CAST3]])
|
||||
void foo() { co_return; }
|
||||
|
||||
// Check that bar is not inlined even it's marked as always_inline.
|
||||
|
||||
// CHECK-LABEL: define {{.*}} void @_Z3bazv()
|
||||
// CHECK: call void @_Z3barv(
|
||||
__attribute__((__always_inline__)) void bar() {
|
||||
co_return;
|
||||
}
|
||||
void baz() {
|
||||
bar();
|
||||
co_return;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s
|
||||
#include "Inputs/coroutine.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
struct coro {
|
||||
struct promise_type {
|
||||
coro get_return_object();
|
||||
suspend_never initial_suspend();
|
||||
suspend_never final_suspend() noexcept;
|
||||
void return_void();
|
||||
static void unhandled_exception();
|
||||
};
|
||||
};
|
||||
|
||||
// CHECK: void @_Z3foov() #[[FOO_ATTR_NUM:[0-9]+]]
|
||||
// CHECK: attributes #[[FOO_ATTR_NUM]] = { {{.*}} "coroutine.presplit"="0"
|
||||
coro foo() {
|
||||
co_await suspend_always{};
|
||||
}
|
|
@ -1175,6 +1175,8 @@ duplicated.
|
|||
|
||||
A frontend should emit exactly one `coro.id` intrinsic per coroutine.
|
||||
|
||||
A frontend should emit function attribute `"coroutine.presplit"` for the coroutine.
|
||||
|
||||
.. _coro.id.async:
|
||||
|
||||
'llvm.coro.id.async' Intrinsic
|
||||
|
@ -1214,6 +1216,8 @@ Semantics:
|
|||
|
||||
A frontend should emit exactly one `coro.id.async` intrinsic per coroutine.
|
||||
|
||||
A frontend should emit function attribute `"coroutine.presplit"` for the coroutine.
|
||||
|
||||
.. _coro.id.retcon:
|
||||
|
||||
'llvm.coro.id.retcon' Intrinsic
|
||||
|
@ -1266,6 +1270,11 @@ or throwing an exception. It must take an integer and return a pointer.
|
|||
The sixth argument must be a reference to a global function that will
|
||||
be used to deallocate memory. It must take a pointer and return ``void``.
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
|
||||
A frontend should emit function attribute `"coroutine.presplit"` for the coroutine.
|
||||
|
||||
'llvm.coro.id.retcon.once' Intrinsic
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
::
|
||||
|
@ -1287,6 +1296,11 @@ As for ``llvm.core.id.retcon``, except that the return type of the
|
|||
continuation prototype must be `void` instead of matching the
|
||||
coroutine's return type.
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
|
||||
A frontend should emit function attribute `"coroutine.presplit"` for the coroutine.
|
||||
|
||||
.. _coro.end:
|
||||
|
||||
'llvm.coro.end' Intrinsic
|
||||
|
|
|
@ -176,11 +176,14 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) {
|
|||
lowerCoroNoop(cast<IntrinsicInst>(&I));
|
||||
break;
|
||||
case Intrinsic::coro_id:
|
||||
// Mark a function that comes out of the frontend that has a coro.id
|
||||
// with a coroutine attribute.
|
||||
if (auto *CII = cast<CoroIdInst>(&I)) {
|
||||
if (CII->getInfo().isPreSplit()) {
|
||||
F.addFnAttr(CORO_PRESPLIT_ATTR, UNPREPARED_FOR_SPLIT);
|
||||
assert(F.hasFnAttribute(CORO_PRESPLIT_ATTR) &&
|
||||
F.getFnAttribute(CORO_PRESPLIT_ATTR).getValueAsString() ==
|
||||
UNPREPARED_FOR_SPLIT &&
|
||||
"The frontend uses Swtich-Resumed ABI should emit "
|
||||
"\"coroutine.presplit\" attribute with value \"0\" for the "
|
||||
"coroutine.");
|
||||
setCannotDuplicate(CII);
|
||||
CII->setCoroutineSelf();
|
||||
CoroId = cast<CoroIdInst>(&I);
|
||||
|
@ -190,6 +193,8 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) {
|
|||
case Intrinsic::coro_id_retcon:
|
||||
case Intrinsic::coro_id_retcon_once:
|
||||
case Intrinsic::coro_id_async:
|
||||
// TODO: Remove the line once we support it in the corresponding
|
||||
// frontend.
|
||||
F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT);
|
||||
break;
|
||||
case Intrinsic::coro_resume:
|
||||
|
|
|
@ -36,6 +36,11 @@ void initializeCoroCleanupLegacyPass(PassRegistry &);
|
|||
// adds coroutine subfunctions to the SCC to be processed by IPO pipeline.
|
||||
// Async lowering similarily triggers a restart of the pipeline after it has
|
||||
// split the coroutine.
|
||||
//
|
||||
// FIXME: Refactor these attributes as LLVM attributes instead of string
|
||||
// attributes since these attributes are already used outside LLVM's
|
||||
// coroutine module.
|
||||
// FIXME: Remove these values once we remove the Legacy PM.
|
||||
#define CORO_PRESPLIT_ATTR "coroutine.presplit"
|
||||
#define UNPREPARED_FOR_SPLIT "0"
|
||||
#define PREPARED_FOR_SPLIT "1"
|
||||
|
|
|
@ -61,7 +61,7 @@ entry:
|
|||
}
|
||||
|
||||
|
||||
define swiftcc void @my_async_function(i8* swiftasync %async.ctxt, %async.task* %task, %async.actor* %actor) !dbg !1 {
|
||||
define swiftcc void @my_async_function(i8* swiftasync %async.ctxt, %async.task* %task, %async.actor* %actor) "coroutine.presplit"="1" !dbg !1 {
|
||||
entry:
|
||||
%tmp = alloca { i64, i64 }, align 8
|
||||
%vector = alloca <4 x double>, align 16
|
||||
|
@ -203,7 +203,7 @@ define void @my_async_function_pa(i8* %ctxt, %async.task* %task, %async.actor* %
|
|||
i32 128 ; Initial async context size without space for frame
|
||||
}>
|
||||
|
||||
define swiftcc void @my_async_function2(%async.task* %task, %async.actor* %actor, i8* %async.ctxt) "frame-pointer"="all" !dbg !6 {
|
||||
define swiftcc void @my_async_function2(%async.task* %task, %async.actor* %actor, i8* %async.ctxt) "coroutine.presplit"="1" "frame-pointer"="all" !dbg !6 {
|
||||
entry:
|
||||
|
||||
%id = call token @llvm.coro.id.async(i32 128, i32 16, i32 2, i8* bitcast (<{i32, i32}>* @my_async_function2_fp to i8*))
|
||||
|
@ -325,7 +325,7 @@ is_not_null:
|
|||
ret void
|
||||
}
|
||||
|
||||
define swiftcc void @dont_crash_on_cf(i8* %async.ctxt, %async.task* %task, %async.actor* %actor) {
|
||||
define swiftcc void @dont_crash_on_cf(i8* %async.ctxt, %async.task* %task, %async.actor* %actor) "coroutine.presplit"="1" {
|
||||
entry:
|
||||
%id = call token @llvm.coro.id.async(i32 128, i32 16, i32 0,
|
||||
i8* bitcast (<{i32, i32}>* @dont_crash_on_cf_fp to i8*))
|
||||
|
@ -371,7 +371,7 @@ define swiftcc void @must_tail_call_return(i8* %async.ctxt, %async.task* %task,
|
|||
ret void
|
||||
}
|
||||
|
||||
define swiftcc void @multiple_coro_end_async(i8* %async.ctxt, %async.task* %task, %async.actor* %actor) {
|
||||
define swiftcc void @multiple_coro_end_async(i8* %async.ctxt, %async.task* %task, %async.actor* %actor) "coroutine.presplit"="1" {
|
||||
entry:
|
||||
%id = call token @llvm.coro.id.async(i32 128, i32 16, i32 0,
|
||||
i8* bitcast (<{i32, i32}>* @dont_crash_on_cf_fp to i8*))
|
||||
|
@ -427,7 +427,7 @@ is_not_equal:
|
|||
i32 64 ; Initial async context size without space for frame
|
||||
}>
|
||||
|
||||
define swiftcc void @polymorphic_suspend_return(i8* swiftasync %async.ctxt, %async.task* %task, %async.actor* %actor) {
|
||||
define swiftcc void @polymorphic_suspend_return(i8* swiftasync %async.ctxt, %async.task* %task, %async.actor* %actor) "coroutine.presplit"="1" {
|
||||
entry:
|
||||
%tmp = alloca { i64, i64 }, align 8
|
||||
%proj.1 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %tmp, i64 0, i32 0
|
||||
|
@ -496,7 +496,7 @@ entry:
|
|||
i32 128 ; Initial async context size without space for frame
|
||||
}>
|
||||
|
||||
define swiftcc void @no_coro_suspend(i8* %async.ctx) {
|
||||
define swiftcc void @no_coro_suspend(i8* %async.ctx) "coroutine.presplit"="1" {
|
||||
entry:
|
||||
%some_alloca = alloca i64
|
||||
%id = call token @llvm.coro.id.async(i32 128, i32 16, i32 0,
|
||||
|
@ -523,7 +523,7 @@ entry:
|
|||
|
||||
declare void @do_with_swifterror(i64** swifterror)
|
||||
|
||||
define swiftcc void @no_coro_suspend_swifterror(i8* %async.ctx) {
|
||||
define swiftcc void @no_coro_suspend_swifterror(i8* %async.ctx) "coroutine.presplit"="1" {
|
||||
entry:
|
||||
%some_alloca = alloca swifterror i64*
|
||||
%id = call token @llvm.coro.id.async(i32 128, i32 16, i32 0,
|
||||
|
@ -553,7 +553,7 @@ entry:
|
|||
declare void @crash()
|
||||
declare void @use(i8*)
|
||||
|
||||
define swiftcc void @undefined_coro_async_resume(i8 *%async.ctx) {
|
||||
define swiftcc void @undefined_coro_async_resume(i8 *%async.ctx) "coroutine.presplit"="1" {
|
||||
entry:
|
||||
%id = call token @llvm.coro.id.async(i32 24, i32 16, i32 0, i8* bitcast (<{i32, i32}>* @undefined_coro_async_resume_fp to i8*))
|
||||
%hdl = call i8* @llvm.coro.begin(token %id, i8* null)
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
; CHECK: ![[PROMISEVAR_RESUME]] = !DILocalVariable(name: "__promise"
|
||||
%promise_type = type { i32, i32, double }
|
||||
|
||||
define void @f() !dbg !8 {
|
||||
define void @f() "coroutine.presplit"="0" !dbg !8 {
|
||||
entry:
|
||||
%__promise = alloca %promise_type, align 8
|
||||
%0 = bitcast %promise_type* %__promise to i8*
|
||||
|
|
|
@ -56,7 +56,7 @@ declare void @pi32(i32*)
|
|||
declare void @pi64(i64*)
|
||||
declare void @pdouble(double*)
|
||||
|
||||
define void @f(i32 %a, i32 %b, i64 %c, double %d) !dbg !8 {
|
||||
define void @f(i32 %a, i32 %b, i64 %c, double %d) "coroutine.presplit"="0" !dbg !8 {
|
||||
entry:
|
||||
%__promise = alloca %promise_type, align 8
|
||||
%0 = bitcast %promise_type* %__promise to i8*
|
||||
|
@ -182,7 +182,7 @@ unreachable: ; preds = %after.coro.free
|
|||
|
||||
}
|
||||
|
||||
define void @bar(i32 %a, i64 %c, double %d) !dbg !19 {
|
||||
define void @bar(i32 %a, i64 %c, double %d) "coroutine.presplit"="0" !dbg !19 {
|
||||
entry:
|
||||
%__promise = alloca %promise_type, align 8
|
||||
%0 = bitcast %promise_type* %__promise to i8*
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
source_filename = "../llvm/test/Transforms/Coroutines/coro-debug-dbg.values-O2.ll"
|
||||
|
||||
define void @f(i32 %i, i32 %j) !dbg !8 {
|
||||
define void @f(i32 %i, i32 %j) "coroutine.presplit"="0" !dbg !8 {
|
||||
entry:
|
||||
%__promise = alloca i8, align 8
|
||||
%x = alloca [10 x i32], align 16
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
source_filename = "../llvm/test/Transforms/Coroutines/coro-debug-dbg.values-O2.ll"
|
||||
declare void @consume(i32)
|
||||
|
||||
define void @f(i32 %i, i32 %j) !dbg !8 {
|
||||
define void @f(i32 %i, i32 %j) "coroutine.presplit"="0" !dbg !8 {
|
||||
entry:
|
||||
%__promise = alloca i8, align 8
|
||||
%x = alloca [10 x i32], align 16
|
||||
|
|
|
@ -62,7 +62,7 @@
|
|||
; CHECK: ![[IVAR_RESUME]] = !DILocalVariable(name: "i"
|
||||
; CHECK: ![[JVAR_RESUME]] = !DILocalVariable(name: "j"
|
||||
; CHECK: ![[JDBGLOC_RESUME]] = !DILocation(line: 32, column: 7, scope: ![[RESUME_SCOPE]])
|
||||
define void @f() {
|
||||
define void @f() "coroutine.presplit"="0" {
|
||||
entry:
|
||||
%__promise = alloca i8, align 8
|
||||
%i = alloca i32, align 4
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; RUN: opt < %s -S -passes=coro-early | FileCheck %s
|
||||
%struct.A = type <{ i64, i64, i32, [4 x i8] }>
|
||||
|
||||
define void @f(%struct.A* nocapture readonly noalias align 8 %a) {
|
||||
define void @f(%struct.A* nocapture readonly noalias align 8 %a) "coroutine.presplit"="0" {
|
||||
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
|
||||
%size = call i32 @llvm.coro.size.i32()
|
||||
%alloc = call i8* @malloc(i32 %size)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; Tests that a coroutine is split, inlined into the caller and devirtualized.
|
||||
; RUN: opt < %s -S -enable-coroutines -passes='default<O2>' | FileCheck %s
|
||||
|
||||
define i8* @f() {
|
||||
define i8* @f() "coroutine.presplit"="0" {
|
||||
entry:
|
||||
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
|
||||
%need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
|
||||
|
|
|
@ -13,7 +13,7 @@ declare i8 @llvm.coro.suspend(token, i1)
|
|||
; CHECK: call void @foo()
|
||||
; CHECK-LABEL: define {{.*}}void @foo.destroy(
|
||||
|
||||
define void @foo() {
|
||||
define void @foo() "coroutine.presplit"="0" {
|
||||
entry:
|
||||
%__promise = alloca i32, align 8
|
||||
%0 = bitcast i32* %__promise to i8*
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; First example from Doc/Coroutines.rst (two block loop)
|
||||
; RUN: opt < %s -enable-coroutines -aa-pipeline=basic-aa -passes='default<O2>' -preserve-alignment-assumptions-during-inlining=false -S | FileCheck %s
|
||||
|
||||
define i8* @f(i32 %n) {
|
||||
define i8* @f(i32 %n) "coroutine.presplit"="0" {
|
||||
entry:
|
||||
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
|
||||
%size = call i32 @llvm.coro.size.i32()
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; First example from Doc/Coroutines.rst (one block loop)
|
||||
; RUN: opt < %s -aa-pipeline=basic-aa -passes='default<O2>' -enable-coroutines -preserve-alignment-assumptions-during-inlining=false -S | FileCheck %s
|
||||
|
||||
define i8* @f(i32 %n) {
|
||||
define i8* @f(i32 %n) "coroutine.presplit"="0" {
|
||||
entry:
|
||||
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
|
||||
%size = call i32 @llvm.coro.size.i32()
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; Second example from Doc/Coroutines.rst (custom alloc and free functions)
|
||||
; RUN: opt < %s -passes='default<O2>' -enable-coroutines -S | FileCheck %s
|
||||
|
||||
define i8* @f(i32 %n) {
|
||||
define i8* @f(i32 %n) "coroutine.presplit"="0" {
|
||||
entry:
|
||||
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
|
||||
%need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; Third example from Doc/Coroutines.rst (two suspend points)
|
||||
; RUN: opt < %s -aa-pipeline=basic-aa -passes='default<O2>' -enable-coroutines -S | FileCheck %s
|
||||
|
||||
define i8* @f(i32 %n) {
|
||||
define i8* @f(i32 %n) "coroutine.presplit"="0" {
|
||||
entry:
|
||||
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
|
||||
%size = call i32 @llvm.coro.size.i32()
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; Fourth example from Doc/Coroutines.rst (coroutine promise)
|
||||
; RUN: opt < %s -passes='default<O2>' -enable-coroutines -S | FileCheck %s
|
||||
|
||||
define i8* @f(i32 %n) {
|
||||
define i8* @f(i32 %n) "coroutine.presplit"="0" {
|
||||
entry:
|
||||
%promise = alloca i32
|
||||
%pv = bitcast i32* %promise to i8*
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; Fifth example from Doc/Coroutines.rst (final suspend)
|
||||
; RUN: opt < %s -aa-pipeline=basic-aa -passes='default<O2>' -enable-coroutines -preserve-alignment-assumptions-during-inlining=false -S | FileCheck %s
|
||||
|
||||
define i8* @f(i32 %n) {
|
||||
define i8* @f(i32 %n) "coroutine.presplit"="0" {
|
||||
entry:
|
||||
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
|
||||
%size = call i32 @llvm.coro.size.i32()
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; Verify that we correctly handle suspend when the coro.end block contains phi
|
||||
; RUN: opt < %s -aa-pipeline=basic-aa -passes='default<O2>' -enable-coroutines -S | FileCheck %s
|
||||
|
||||
define i8* @f(i32 %n) {
|
||||
define i8* @f(i32 %n) "coroutine.presplit"="0" {
|
||||
entry:
|
||||
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
|
||||
%size = call i32 @llvm.coro.size.i32()
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
; CHECK-NEWPM-NOT: CoroSplit: Processing coroutine 'f' state: 1
|
||||
|
||||
|
||||
define void @f() {
|
||||
define void @f() "coroutine.presplit"="0" {
|
||||
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
|
||||
%size = call i32 @llvm.coro.size.i32()
|
||||
%alloc = call i8* @malloc(i32 %size)
|
||||
|
|
|
@ -190,6 +190,13 @@ static CoroMachinery setupCoroMachinery(FuncOp func) {
|
|||
}
|
||||
}
|
||||
|
||||
// The switch-resumed API based coroutine should be marked with
|
||||
// "coroutine.presplit" attribute with value "0" to mark the function as a
|
||||
// coroutine.
|
||||
func->setAttr("passthrough", builder.getArrayAttr(builder.getArrayAttr(
|
||||
{builder.getStringAttr("coroutine.presplit"),
|
||||
builder.getStringAttr("0")})));
|
||||
|
||||
CoroMachinery machinery;
|
||||
machinery.func = func;
|
||||
machinery.asyncToken = retToken;
|
||||
|
|
|
@ -0,0 +1,80 @@
|
|||
// Check if mlir marks the corresponding function with required coroutine attribute.
|
||||
//
|
||||
// RUN: mlir-opt %s -async-to-async-runtime \
|
||||
// RUN: -async-runtime-ref-counting \
|
||||
// RUN: -async-runtime-ref-counting-opt \
|
||||
// RUN: -convert-async-to-llvm \
|
||||
// RUN: -convert-linalg-to-loops \
|
||||
// RUN: -convert-scf-to-std \
|
||||
// RUN: -convert-linalg-to-llvm \
|
||||
// RUN: -convert-memref-to-llvm \
|
||||
// RUN: -convert-arith-to-llvm \
|
||||
// RUN: -convert-std-to-llvm \
|
||||
// RUN: -reconcile-unrealized-casts \
|
||||
// RUN: | FileCheck %s
|
||||
|
||||
// CHECK: llvm.func @async_execute_fn{{.*}}attributes{{.*}}"coroutine.presplit", "0"
|
||||
// CHECK: llvm.func @async_execute_fn_0{{.*}}attributes{{.*}}"coroutine.presplit", "0"
|
||||
// CHECK: llvm.func @async_execute_fn_1{{.*}}attributes{{.*}}"coroutine.presplit", "0"
|
||||
|
||||
func @main() {
|
||||
%i0 = arith.constant 0 : index
|
||||
%i1 = arith.constant 1 : index
|
||||
%i2 = arith.constant 2 : index
|
||||
%i3 = arith.constant 3 : index
|
||||
|
||||
%c0 = arith.constant 0.0 : f32
|
||||
%c1 = arith.constant 1.0 : f32
|
||||
%c2 = arith.constant 2.0 : f32
|
||||
%c3 = arith.constant 3.0 : f32
|
||||
%c4 = arith.constant 4.0 : f32
|
||||
|
||||
%A = memref.alloc() : memref<4xf32>
|
||||
linalg.fill(%c0, %A) : f32, memref<4xf32>
|
||||
|
||||
%U = memref.cast %A : memref<4xf32> to memref<*xf32>
|
||||
call @print_memref_f32(%U): (memref<*xf32>) -> ()
|
||||
|
||||
memref.store %c1, %A[%i0]: memref<4xf32>
|
||||
call @mlirAsyncRuntimePrintCurrentThreadId(): () -> ()
|
||||
call @print_memref_f32(%U): (memref<*xf32>) -> ()
|
||||
|
||||
%outer = async.execute {
|
||||
memref.store %c2, %A[%i1]: memref<4xf32>
|
||||
call @mlirAsyncRuntimePrintCurrentThreadId(): () -> ()
|
||||
call @print_memref_f32(%U): (memref<*xf32>) -> ()
|
||||
|
||||
// No op async region to create a token for testing async dependency.
|
||||
%noop = async.execute {
|
||||
call @mlirAsyncRuntimePrintCurrentThreadId(): () -> ()
|
||||
async.yield
|
||||
}
|
||||
|
||||
%inner = async.execute [%noop] {
|
||||
memref.store %c3, %A[%i2]: memref<4xf32>
|
||||
call @mlirAsyncRuntimePrintCurrentThreadId(): () -> ()
|
||||
call @print_memref_f32(%U): (memref<*xf32>) -> ()
|
||||
|
||||
async.yield
|
||||
}
|
||||
async.await %inner : !async.token
|
||||
|
||||
memref.store %c4, %A[%i3]: memref<4xf32>
|
||||
call @mlirAsyncRuntimePrintCurrentThreadId(): () -> ()
|
||||
call @print_memref_f32(%U): (memref<*xf32>) -> ()
|
||||
|
||||
async.yield
|
||||
}
|
||||
async.await %outer : !async.token
|
||||
|
||||
call @mlirAsyncRuntimePrintCurrentThreadId(): () -> ()
|
||||
call @print_memref_f32(%U): (memref<*xf32>) -> ()
|
||||
|
||||
memref.dealloc %A : memref<4xf32>
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func private @mlirAsyncRuntimePrintCurrentThreadId() -> ()
|
||||
|
||||
func private @print_memref_f32(memref<*xf32>) attributes { llvm.emit_c_interface }
|
Loading…
Reference in New Issue