[mlir] Async: lowering async.value to LLVM

1. Add new methods to Async runtime API to support yielding async values
2. Add lowering from `async.yield` with value payload to the new runtime API calls

`async.value` lowering requires that payload type is convertible to LLVM and supported by `llvm.mlir.cast` (DialectCast) operation.

Reviewed By: csigg

Differential Revision: https://reviews.llvm.org/D93592
This commit is contained in:
Eugene Zhulenev 2020-12-24 05:08:09 -08:00
parent a2ca6bbda6
commit 621ad468d9
6 changed files with 620 additions and 151 deletions

View File

@ -45,6 +45,12 @@ typedef struct AsyncToken AsyncToken;
// Runtime implementation of `async.group` data type.
typedef struct AsyncGroup AsyncGroup;
// Runtime implementation of `async.value` data type.
typedef struct AsyncValue AsyncValue;
// Async value payload stored in a memory owned by the async.value.
using ValueStorage = void *;
// Async runtime uses LLVM coroutines to represent asynchronous tasks. Task
// function is a coroutine handle and a resume function that continue coroutine
// execution from a suspension point.
@ -66,6 +72,13 @@ extern "C" MLIR_ASYNCRUNTIME_EXPORT void
// Create a new `async.token` in not-ready state.
extern "C" MLIR_ASYNCRUNTIME_EXPORT AsyncToken *mlirAsyncRuntimeCreateToken();
// Create a new `async.value` in not-ready state. Size parameter specifies the
// number of bytes that will be allocated for the async value storage. Storage
// is owned by the `async.value` and deallocated when the async value is
// destructed (reference count drops to zero).
extern "C" MLIR_ASYNCRUNTIME_EXPORT AsyncValue *
mlirAsyncRuntimeCreateValue(int32_t);
// Create a new `async.group` in empty state.
extern "C" MLIR_ASYNCRUNTIME_EXPORT AsyncGroup *mlirAsyncRuntimeCreateGroup();
@ -76,14 +89,26 @@ mlirAsyncRuntimeAddTokenToGroup(AsyncToken *, AsyncGroup *);
extern "C" MLIR_ASYNCRUNTIME_EXPORT void
mlirAsyncRuntimeEmplaceToken(AsyncToken *);
// Switches `async.value` to ready state and runs all awaiters.
extern "C" MLIR_ASYNCRUNTIME_EXPORT void
mlirAsyncRuntimeEmplaceValue(AsyncValue *);
// Blocks the caller thread until the token becomes ready.
extern "C" MLIR_ASYNCRUNTIME_EXPORT void
mlirAsyncRuntimeAwaitToken(AsyncToken *);
// Blocks the caller thread until the value becomes ready.
extern "C" MLIR_ASYNCRUNTIME_EXPORT void
mlirAsyncRuntimeAwaitValue(AsyncValue *);
// Blocks the caller thread until the elements in the group become ready.
extern "C" MLIR_ASYNCRUNTIME_EXPORT void
mlirAsyncRuntimeAwaitAllInGroup(AsyncGroup *);
// Returns a pointer to the storage owned by the async value.
extern "C" MLIR_ASYNCRUNTIME_EXPORT ValueStorage
mlirAsyncRuntimeGetValueStorage(AsyncValue *);
// Executes the task (coro handle + resume function) in one of the threads
// managed by the runtime.
extern "C" MLIR_ASYNCRUNTIME_EXPORT void mlirAsyncRuntimeExecute(CoroHandle,
@ -94,6 +119,11 @@ extern "C" MLIR_ASYNCRUNTIME_EXPORT void mlirAsyncRuntimeExecute(CoroHandle,
extern "C" MLIR_ASYNCRUNTIME_EXPORT void
mlirAsyncRuntimeAwaitTokenAndExecute(AsyncToken *, CoroHandle, CoroResume);
// Executes the task (coro handle + resume function) in one of the threads
// managed by the runtime after the value becomes ready.
extern "C" MLIR_ASYNCRUNTIME_EXPORT void
mlirAsyncRuntimeAwaitValueAndExecute(AsyncValue *, CoroHandle, CoroResume);
// Executes the task (coro handle + resume function) in one of the threads
// managed by the runtime after the all members of the group become ready.
extern "C" MLIR_ASYNCRUNTIME_EXPORT void

View File

@ -9,9 +9,11 @@
#include "mlir/Conversion/AsyncToLLVM/AsyncToLLVM.h"
#include "../PassDetail.h"
#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
#include "mlir/Dialect/Async/IR/Async.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/Dialect/StandardOps/Transforms/FuncConversions.h"
#include "mlir/IR/BlockAndValueMapping.h"
#include "mlir/IR/ImplicitLocOpBuilder.h"
#include "mlir/IR/TypeUtilities.h"
@ -36,23 +38,39 @@ static constexpr const char kAsyncFnPrefix[] = "async_execute_fn";
static constexpr const char *kAddRef = "mlirAsyncRuntimeAddRef";
static constexpr const char *kDropRef = "mlirAsyncRuntimeDropRef";
static constexpr const char *kCreateToken = "mlirAsyncRuntimeCreateToken";
static constexpr const char *kCreateValue = "mlirAsyncRuntimeCreateValue";
static constexpr const char *kCreateGroup = "mlirAsyncRuntimeCreateGroup";
static constexpr const char *kEmplaceToken = "mlirAsyncRuntimeEmplaceToken";
static constexpr const char *kEmplaceValue = "mlirAsyncRuntimeEmplaceValue";
static constexpr const char *kAwaitToken = "mlirAsyncRuntimeAwaitToken";
static constexpr const char *kAwaitValue = "mlirAsyncRuntimeAwaitValue";
static constexpr const char *kAwaitGroup = "mlirAsyncRuntimeAwaitAllInGroup";
static constexpr const char *kExecute = "mlirAsyncRuntimeExecute";
static constexpr const char *kGetValueStorage =
"mlirAsyncRuntimeGetValueStorage";
static constexpr const char *kAddTokenToGroup =
"mlirAsyncRuntimeAddTokenToGroup";
static constexpr const char *kAwaitAndExecute =
static constexpr const char *kAwaitTokenAndExecute =
"mlirAsyncRuntimeAwaitTokenAndExecute";
static constexpr const char *kAwaitValueAndExecute =
"mlirAsyncRuntimeAwaitValueAndExecute";
static constexpr const char *kAwaitAllAndExecute =
"mlirAsyncRuntimeAwaitAllInGroupAndExecute";
namespace {
// Async Runtime API function types.
/// Async Runtime API function types.
///
/// Because we can't create API function signature for type parametrized
/// async.value type, we use opaque pointers (!llvm.ptr<i8>) instead. After
/// lowering all async data types become opaque pointers at runtime.
struct AsyncAPI {
// All async types are lowered to opaque i8* LLVM pointers at runtime.
static LLVM::LLVMPointerType opaquePointerType(MLIRContext *ctx) {
return LLVM::LLVMPointerType::get(LLVM::LLVMIntegerType::get(ctx, 8));
}
static FunctionType addOrDropRefFunctionType(MLIRContext *ctx) {
auto ref = LLVM::LLVMPointerType::get(LLVM::LLVMIntegerType::get(ctx, 8));
auto ref = opaquePointerType(ctx);
auto count = IntegerType::get(ctx, 32);
return FunctionType::get(ctx, {ref, count}, {});
}
@ -61,24 +79,46 @@ struct AsyncAPI {
return FunctionType::get(ctx, {}, {TokenType::get(ctx)});
}
static FunctionType createValueFunctionType(MLIRContext *ctx) {
auto i32 = IntegerType::get(ctx, 32);
auto value = opaquePointerType(ctx);
return FunctionType::get(ctx, {i32}, {value});
}
static FunctionType createGroupFunctionType(MLIRContext *ctx) {
return FunctionType::get(ctx, {}, {GroupType::get(ctx)});
}
static FunctionType getValueStorageFunctionType(MLIRContext *ctx) {
auto value = opaquePointerType(ctx);
auto storage = opaquePointerType(ctx);
return FunctionType::get(ctx, {value}, {storage});
}
static FunctionType emplaceTokenFunctionType(MLIRContext *ctx) {
return FunctionType::get(ctx, {TokenType::get(ctx)}, {});
}
static FunctionType emplaceValueFunctionType(MLIRContext *ctx) {
auto value = opaquePointerType(ctx);
return FunctionType::get(ctx, {value}, {});
}
static FunctionType awaitTokenFunctionType(MLIRContext *ctx) {
return FunctionType::get(ctx, {TokenType::get(ctx)}, {});
}
static FunctionType awaitValueFunctionType(MLIRContext *ctx) {
auto value = opaquePointerType(ctx);
return FunctionType::get(ctx, {value}, {});
}
static FunctionType awaitGroupFunctionType(MLIRContext *ctx) {
return FunctionType::get(ctx, {GroupType::get(ctx)}, {});
}
static FunctionType executeFunctionType(MLIRContext *ctx) {
auto hdl = LLVM::LLVMPointerType::get(LLVM::LLVMIntegerType::get(ctx, 8));
auto hdl = opaquePointerType(ctx);
auto resume = LLVM::LLVMPointerType::get(resumeFunctionType(ctx));
return FunctionType::get(ctx, {hdl, resume}, {});
}
@ -89,14 +129,21 @@ struct AsyncAPI {
{i64});
}
static FunctionType awaitAndExecuteFunctionType(MLIRContext *ctx) {
auto hdl = LLVM::LLVMPointerType::get(LLVM::LLVMIntegerType::get(ctx, 8));
static FunctionType awaitTokenAndExecuteFunctionType(MLIRContext *ctx) {
auto hdl = opaquePointerType(ctx);
auto resume = LLVM::LLVMPointerType::get(resumeFunctionType(ctx));
return FunctionType::get(ctx, {TokenType::get(ctx), hdl, resume}, {});
}
static FunctionType awaitValueAndExecuteFunctionType(MLIRContext *ctx) {
auto value = opaquePointerType(ctx);
auto hdl = opaquePointerType(ctx);
auto resume = LLVM::LLVMPointerType::get(resumeFunctionType(ctx));
return FunctionType::get(ctx, {value, hdl, resume}, {});
}
static FunctionType awaitAllAndExecuteFunctionType(MLIRContext *ctx) {
auto hdl = LLVM::LLVMPointerType::get(LLVM::LLVMIntegerType::get(ctx, 8));
auto hdl = opaquePointerType(ctx);
auto resume = LLVM::LLVMPointerType::get(resumeFunctionType(ctx));
return FunctionType::get(ctx, {GroupType::get(ctx), hdl, resume}, {});
}
@ -104,13 +151,13 @@ struct AsyncAPI {
// Auxiliary coroutine resume intrinsic wrapper.
static LLVM::LLVMType resumeFunctionType(MLIRContext *ctx) {
auto voidTy = LLVM::LLVMVoidType::get(ctx);
auto i8Ptr = LLVM::LLVMPointerType::get(LLVM::LLVMIntegerType::get(ctx, 8));
auto i8Ptr = opaquePointerType(ctx);
return LLVM::LLVMFunctionType::get(voidTy, {i8Ptr}, false);
}
};
} // namespace
// Adds Async Runtime C API declarations to the module.
/// Adds Async Runtime C API declarations to the module.
static void addAsyncRuntimeApiDeclarations(ModuleOp module) {
auto builder = ImplicitLocOpBuilder::atBlockTerminator(module.getLoc(),
module.getBody());
@ -125,13 +172,20 @@ static void addAsyncRuntimeApiDeclarations(ModuleOp module) {
addFuncDecl(kAddRef, AsyncAPI::addOrDropRefFunctionType(ctx));
addFuncDecl(kDropRef, AsyncAPI::addOrDropRefFunctionType(ctx));
addFuncDecl(kCreateToken, AsyncAPI::createTokenFunctionType(ctx));
addFuncDecl(kCreateValue, AsyncAPI::createValueFunctionType(ctx));
addFuncDecl(kCreateGroup, AsyncAPI::createGroupFunctionType(ctx));
addFuncDecl(kEmplaceToken, AsyncAPI::emplaceTokenFunctionType(ctx));
addFuncDecl(kEmplaceValue, AsyncAPI::emplaceValueFunctionType(ctx));
addFuncDecl(kAwaitToken, AsyncAPI::awaitTokenFunctionType(ctx));
addFuncDecl(kAwaitValue, AsyncAPI::awaitValueFunctionType(ctx));
addFuncDecl(kAwaitGroup, AsyncAPI::awaitGroupFunctionType(ctx));
addFuncDecl(kExecute, AsyncAPI::executeFunctionType(ctx));
addFuncDecl(kGetValueStorage, AsyncAPI::getValueStorageFunctionType(ctx));
addFuncDecl(kAddTokenToGroup, AsyncAPI::addTokenToGroupFunctionType(ctx));
addFuncDecl(kAwaitAndExecute, AsyncAPI::awaitAndExecuteFunctionType(ctx));
addFuncDecl(kAwaitTokenAndExecute,
AsyncAPI::awaitTokenAndExecuteFunctionType(ctx));
addFuncDecl(kAwaitValueAndExecute,
AsyncAPI::awaitValueAndExecuteFunctionType(ctx));
addFuncDecl(kAwaitAllAndExecute,
AsyncAPI::awaitAllAndExecuteFunctionType(ctx));
}
@ -215,9 +269,9 @@ static void addCRuntimeDeclarations(ModuleOp module) {
static constexpr const char *kResume = "__resume";
// A function that takes a coroutine handle and calls a `llvm.coro.resume`
// intrinsics. We need this function to be able to pass it to the async
// runtime execute API.
/// A function that takes a coroutine handle and calls a `llvm.coro.resume`
/// intrinsics. We need this function to be able to pass it to the async
/// runtime execute API.
static void addResumeFunction(ModuleOp module) {
MLIRContext *ctx = module.getContext();
@ -248,49 +302,61 @@ static void addResumeFunction(ModuleOp module) {
// async.execute op outlining to the coroutine functions.
//===----------------------------------------------------------------------===//
// Function targeted for coroutine transformation has two additional blocks at
// the end: coroutine cleanup and coroutine suspension.
//
// async.await op lowering additionaly creates a resume block for each
// operation to enable non-blocking waiting via coroutine suspension.
/// Function targeted for coroutine transformation has two additional blocks at
/// the end: coroutine cleanup and coroutine suspension.
///
/// async.await op lowering additionaly creates a resume block for each
/// operation to enable non-blocking waiting via coroutine suspension.
namespace {
struct CoroMachinery {
Value asyncToken;
// Async execute region returns a completion token, and an async value for
// each yielded value.
//
// %token, %result = async.execute -> !async.value<T> {
// %0 = constant ... : T
// async.yield %0 : T
// }
Value asyncToken; // token representing completion of the async region
llvm::SmallVector<Value, 4> returnValues; // returned async values
Value coroHandle;
Block *cleanup;
Block *suspend;
};
} // namespace
// Builds an coroutine template compatible with LLVM coroutines lowering.
//
// - `entry` block sets up the coroutine.
// - `cleanup` block cleans up the coroutine state.
// - `suspend block after the @llvm.coro.end() defines what value will be
// returned to the initial caller of a coroutine. Everything before the
// @llvm.coro.end() will be executed at every suspension point.
//
// Coroutine structure (only the important bits):
//
// func @async_execute_fn(<function-arguments>) -> !async.token {
// ^entryBlock(<function-arguments>):
// %token = <async token> : !async.token // create async runtime token
// %hdl = llvm.call @llvm.coro.id(...) // create a coroutine handle
// br ^cleanup
//
// ^cleanup:
// llvm.call @llvm.coro.free(...) // delete coroutine state
// br ^suspend
//
// ^suspend:
// llvm.call @llvm.coro.end(...) // marks the end of a coroutine
// return %token : !async.token
// }
//
// The actual code for the async.execute operation body region will be inserted
// before the entry block terminator.
//
//
/// Builds an coroutine template compatible with LLVM coroutines lowering.
///
/// - `entry` block sets up the coroutine.
/// - `cleanup` block cleans up the coroutine state.
/// - `suspend block after the @llvm.coro.end() defines what value will be
/// returned to the initial caller of a coroutine. Everything before the
/// @llvm.coro.end() will be executed at every suspension point.
///
/// Coroutine structure (only the important bits):
///
/// func @async_execute_fn(<function-arguments>)
/// -> (!async.token, !async.value<T>)
/// {
/// ^entryBlock(<function-arguments>):
/// %token = <async token> : !async.token // create async runtime token
/// %value = <async value> : !async.value<T> // create async value
/// %hdl = llvm.call @llvm.coro.id(...) // create a coroutine handle
/// br ^cleanup
///
/// ^cleanup:
/// llvm.call @llvm.coro.free(...) // delete coroutine state
/// br ^suspend
///
/// ^suspend:
/// llvm.call @llvm.coro.end(...) // marks the end of a coroutine
/// return %token, %value : !async.token, !async.value<T>
/// }
///
/// The actual code for the async.execute operation body region will be inserted
/// before the entry block terminator.
///
///
static CoroMachinery setupCoroMachinery(FuncOp func) {
assert(func.getBody().empty() && "Function must have empty body");
@ -312,6 +378,44 @@ static CoroMachinery setupCoroMachinery(FuncOp func) {
// ------------------------------------------------------------------------ //
auto createToken = builder.create<CallOp>(kCreateToken, TokenType::get(ctx));
// Async value operands and results must be convertible to LLVM types. This is
// verified before the function outlining.
LLVMTypeConverter converter(ctx);
// Returns the size requirements for the async value storage.
// http://nondot.org/sabre/LLVMNotes/SizeOf-OffsetOf-VariableSizedStructs.txt
auto sizeOf = [&](ValueType valueType) -> Value {
auto storedType = converter.convertType(valueType.getValueType());
auto storagePtrType =
LLVM::LLVMPointerType::get(storedType.cast<LLVM::LLVMType>());
// %Size = getelementptr %T* null, int 1
// %SizeI = ptrtoint %T* %Size to i32
auto nullPtr = builder.create<LLVM::NullOp>(loc, storagePtrType);
auto one = builder.create<LLVM::ConstantOp>(loc, i32,
builder.getI32IntegerAttr(1));
auto gep = builder.create<LLVM::GEPOp>(loc, storagePtrType, nullPtr,
one.getResult());
auto size = builder.create<LLVM::PtrToIntOp>(loc, i32, gep);
// Cast to std type because runtime API defined using std types.
return builder.create<LLVM::DialectCastOp>(loc, builder.getI32Type(),
size.getResult());
};
// We use the `async.value` type as a return type although it does not match
// the `kCreateValue` function signature, because it will be later lowered to
// the runtime type (opaque i8* pointer).
llvm::SmallVector<CallOp, 4> createValues;
for (auto resultType : func.getCallableResults().drop_front(1))
createValues.emplace_back(builder.create<CallOp>(
loc, kCreateValue, resultType, sizeOf(resultType.cast<ValueType>())));
auto createdValues = llvm::map_range(
createValues, [](CallOp call) { return call.getResult(0); });
llvm::SmallVector<Value, 4> returnValues(createdValues.begin(),
createdValues.end());
// ------------------------------------------------------------------------ //
// Initialize coroutine: allocate frame, get coroutine handle.
// ------------------------------------------------------------------------ //
@ -371,9 +475,11 @@ static CoroMachinery setupCoroMachinery(FuncOp func) {
builder.create<LLVM::CallOp>(i1, builder.getSymbolRefAttr(kCoroEnd),
ValueRange({coroHdl.getResult(0), constFalse}));
// Return created `async.token` from the suspend block. This will be the
// return value of a coroutine ramp function.
builder.create<ReturnOp>(createToken.getResult(0));
// Return created `async.token` and `async.values` from the suspend block.
// This will be the return value of a coroutine ramp function.
SmallVector<Value, 4> ret{createToken.getResult(0)};
ret.insert(ret.end(), returnValues.begin(), returnValues.end());
builder.create<ReturnOp>(loc, ret);
// Branch from the entry block to the cleanup block to create a valid CFG.
builder.setInsertionPointToEnd(entryBlock);
@ -383,39 +489,44 @@ static CoroMachinery setupCoroMachinery(FuncOp func) {
// `async.await` op lowering will create resume blocks for async
// continuations, and will conditionally branch to cleanup or suspend blocks.
return {createToken.getResult(0), coroHdl.getResult(0), cleanupBlock,
suspendBlock};
CoroMachinery machinery;
machinery.asyncToken = createToken.getResult(0);
machinery.returnValues = returnValues;
machinery.coroHandle = coroHdl.getResult(0);
machinery.cleanup = cleanupBlock;
machinery.suspend = suspendBlock;
return machinery;
}
// Add a LLVM coroutine suspension point to the end of suspended block, to
// resume execution in resume block. The caller is responsible for creating the
// two suspended/resume blocks with the desired ops contained in each block.
// This function merely provides the required control flow logic.
//
// `coroState` must be a value returned from the call to @llvm.coro.save(...)
// intrinsic (saved coroutine state).
//
// Before:
//
// ^bb0:
// "opBefore"(...)
// "op"(...)
// ^cleanup: ...
// ^suspend: ...
// ^resume:
// "op"(...)
//
// After:
//
// ^bb0:
// "opBefore"(...)
// %suspend = llmv.call @llvm.coro.suspend(...)
// switch %suspend [-1: ^suspend, 0: ^resume, 1: ^cleanup]
// ^resume:
// "op"(...)
// ^cleanup: ...
// ^suspend: ...
//
/// Add a LLVM coroutine suspension point to the end of suspended block, to
/// resume execution in resume block. The caller is responsible for creating the
/// two suspended/resume blocks with the desired ops contained in each block.
/// This function merely provides the required control flow logic.
///
/// `coroState` must be a value returned from the call to @llvm.coro.save(...)
/// intrinsic (saved coroutine state).
///
/// Before:
///
/// ^bb0:
/// "opBefore"(...)
/// "op"(...)
/// ^cleanup: ...
/// ^suspend: ...
/// ^resume:
/// "op"(...)
///
/// After:
///
/// ^bb0:
/// "opBefore"(...)
/// %suspend = llmv.call @llvm.coro.suspend(...)
/// switch %suspend [-1: ^suspend, 0: ^resume, 1: ^cleanup]
/// ^resume:
/// "op"(...)
/// ^cleanup: ...
/// ^suspend: ...
///
static void addSuspensionPoint(CoroMachinery coro, Value coroState,
Operation *op, Block *suspended, Block *resume,
OpBuilder &builder) {
@ -461,10 +572,10 @@ static void addSuspensionPoint(CoroMachinery coro, Value coroState,
/*falseDest=*/coro.cleanup);
}
// Outline the body region attached to the `async.execute` op into a standalone
// function.
//
// Note that this is not reversible transformation.
/// Outline the body region attached to the `async.execute` op into a standalone
/// function.
///
/// Note that this is not reversible transformation.
static std::pair<FuncOp, CoroMachinery>
outlineExecuteOp(SymbolTable &symbolTable, ExecuteOp execute) {
ModuleOp module = execute->getParentOfType<ModuleOp>();
@ -475,6 +586,7 @@ outlineExecuteOp(SymbolTable &symbolTable, ExecuteOp execute) {
// Collect all outlined function inputs.
llvm::SetVector<mlir::Value> functionInputs(execute.dependencies().begin(),
execute.dependencies().end());
assert(execute.operands().empty() && "operands are not supported");
getUsedValuesDefinedAbove(execute.body(), functionInputs);
// Collect types for the outlined function inputs and outputs.
@ -535,15 +647,9 @@ outlineExecuteOp(SymbolTable &symbolTable, ExecuteOp execute) {
valueMapping.map(functionInputs, func.getArguments());
// Clone all operations from the execute operation body into the outlined
// function body, and replace all `async.yield` operations with a call
// to async runtime to emplace the result token.
for (Operation &op : execute.body().getOps()) {
if (isa<async::YieldOp>(op)) {
builder.create<CallOp>(kEmplaceToken, TypeRange(), coro.asyncToken);
continue;
}
// function body.
for (Operation &op : execute.body().getOps())
builder.clone(op, valueMapping);
}
// Replace the original `async.execute` with a call to outlined function.
ImplicitLocOpBuilder callBuilder(loc, execute);
@ -560,42 +666,38 @@ outlineExecuteOp(SymbolTable &symbolTable, ExecuteOp execute) {
//===----------------------------------------------------------------------===//
namespace {
/// AsyncRuntimeTypeConverter only converts types from the Async dialect to
/// their runtime type (opaque pointers) and does not convert any other types.
class AsyncRuntimeTypeConverter : public TypeConverter {
public:
AsyncRuntimeTypeConverter() { addConversion(convertType); }
AsyncRuntimeTypeConverter() {
addConversion([](Type type) { return type; });
addConversion(convertAsyncTypes);
}
static Type convertType(Type type) {
MLIRContext *ctx = type.getContext();
// Convert async tokens and groups to opaque pointers.
if (type.isa<TokenType, GroupType>())
return LLVM::LLVMPointerType::get(LLVM::LLVMIntegerType::get(ctx, 8));
return type;
static Optional<Type> convertAsyncTypes(Type type) {
if (type.isa<TokenType, GroupType, ValueType>())
return AsyncAPI::opaquePointerType(type.getContext());
return llvm::None;
}
};
} // namespace
//===----------------------------------------------------------------------===//
// Convert types for all call operations to lowered async types.
// Convert return operations that return async values from async regions.
//===----------------------------------------------------------------------===//
namespace {
class CallOpOpConversion : public ConversionPattern {
class ReturnOpOpConversion : public ConversionPattern {
public:
explicit CallOpOpConversion(MLIRContext *ctx)
: ConversionPattern(CallOp::getOperationName(), 1, ctx) {}
explicit ReturnOpOpConversion(TypeConverter &converter, MLIRContext *ctx)
: ConversionPattern(ReturnOp::getOperationName(), 1, converter, ctx) {}
LogicalResult
matchAndRewrite(Operation *op, ArrayRef<Value> operands,
ConversionPatternRewriter &rewriter) const override {
AsyncRuntimeTypeConverter converter;
SmallVector<Type, 5> resultTypes;
converter.convertTypes(op->getResultTypes(), resultTypes);
CallOp call = cast<CallOp>(op);
rewriter.replaceOpWithNewOp<CallOp>(op, resultTypes, call.callee(),
operands);
rewriter.replaceOpWithNewOp<ReturnOp>(op, operands);
return success();
}
};
@ -611,8 +713,9 @@ namespace {
template <typename RefCountingOp>
class RefCountingOpLowering : public ConversionPattern {
public:
explicit RefCountingOpLowering(MLIRContext *ctx, StringRef apiFunctionName)
: ConversionPattern(RefCountingOp::getOperationName(), 1, ctx),
explicit RefCountingOpLowering(TypeConverter &converter, MLIRContext *ctx,
StringRef apiFunctionName)
: ConversionPattern(RefCountingOp::getOperationName(), 1, converter, ctx),
apiFunctionName(apiFunctionName) {}
LogicalResult
@ -634,18 +737,18 @@ private:
StringRef apiFunctionName;
};
// async.drop_ref op lowering to mlirAsyncRuntimeDropRef function call.
/// async.drop_ref op lowering to mlirAsyncRuntimeDropRef function call.
class AddRefOpLowering : public RefCountingOpLowering<AddRefOp> {
public:
explicit AddRefOpLowering(MLIRContext *ctx)
: RefCountingOpLowering(ctx, kAddRef) {}
explicit AddRefOpLowering(TypeConverter &converter, MLIRContext *ctx)
: RefCountingOpLowering(converter, ctx, kAddRef) {}
};
// async.create_group op lowering to mlirAsyncRuntimeCreateGroup function call.
/// async.create_group op lowering to mlirAsyncRuntimeCreateGroup function call.
class DropRefOpLowering : public RefCountingOpLowering<DropRefOp> {
public:
explicit DropRefOpLowering(MLIRContext *ctx)
: RefCountingOpLowering(ctx, kDropRef) {}
explicit DropRefOpLowering(TypeConverter &converter, MLIRContext *ctx)
: RefCountingOpLowering(converter, ctx, kDropRef) {}
};
} // namespace
@ -657,8 +760,9 @@ public:
namespace {
class CreateGroupOpLowering : public ConversionPattern {
public:
explicit CreateGroupOpLowering(MLIRContext *ctx)
: ConversionPattern(CreateGroupOp::getOperationName(), 1, ctx) {}
explicit CreateGroupOpLowering(TypeConverter &converter, MLIRContext *ctx)
: ConversionPattern(CreateGroupOp::getOperationName(), 1, converter,
ctx) {}
LogicalResult
matchAndRewrite(Operation *op, ArrayRef<Value> operands,
@ -677,8 +781,9 @@ public:
namespace {
class AddToGroupOpLowering : public ConversionPattern {
public:
explicit AddToGroupOpLowering(MLIRContext *ctx)
: ConversionPattern(AddToGroupOp::getOperationName(), 1, ctx) {}
explicit AddToGroupOpLowering(TypeConverter &converter, MLIRContext *ctx)
: ConversionPattern(AddToGroupOp::getOperationName(), 1, converter, ctx) {
}
LogicalResult
matchAndRewrite(Operation *op, ArrayRef<Value> operands,
@ -706,10 +811,10 @@ template <typename AwaitType, typename AwaitableType>
class AwaitOpLoweringBase : public ConversionPattern {
protected:
explicit AwaitOpLoweringBase(
MLIRContext *ctx,
TypeConverter &converter, MLIRContext *ctx,
const llvm::DenseMap<FuncOp, CoroMachinery> &outlinedFunctions,
StringRef blockingAwaitFuncName, StringRef coroAwaitFuncName)
: ConversionPattern(AwaitType::getOperationName(), 1, ctx),
: ConversionPattern(AwaitType::getOperationName(), 1, converter, ctx),
outlinedFunctions(outlinedFunctions),
blockingAwaitFuncName(blockingAwaitFuncName),
coroAwaitFuncName(coroAwaitFuncName) {}
@ -719,7 +824,7 @@ public:
matchAndRewrite(Operation *op, ArrayRef<Value> operands,
ConversionPatternRewriter &rewriter) const override {
// We can only await on one the `AwaitableType` (for `await` it can be
// only a `token`, for `await_all` it is a `group`).
// a `token` or a `value`, for `await_all` it must be a `group`).
auto await = cast<AwaitType>(op);
if (!await.operand().getType().template isa<AwaitableType>())
return failure();
@ -768,44 +873,163 @@ public:
Block *resume = rewriter.splitBlock(suspended, Block::iterator(op));
addSuspensionPoint(coro, coroSave.getResult(0), op, suspended, resume,
builder);
// Make sure that replacement value will be constructed in resume block.
rewriter.setInsertionPointToStart(resume);
}
// Original operation was replaced by function call or suspension point.
rewriter.eraseOp(op);
// Replace or erase the await operation with the new value.
if (Value replaceWith = getReplacementValue(op, operands[0], rewriter))
rewriter.replaceOp(op, replaceWith);
else
rewriter.eraseOp(op);
return success();
}
virtual Value getReplacementValue(Operation *op, Value operand,
ConversionPatternRewriter &rewriter) const {
return Value();
}
private:
const llvm::DenseMap<FuncOp, CoroMachinery> &outlinedFunctions;
StringRef blockingAwaitFuncName;
StringRef coroAwaitFuncName;
};
// Lowering for `async.await` operation (only token operands are supported).
class AwaitOpLowering : public AwaitOpLoweringBase<AwaitOp, TokenType> {
/// Lowering for `async.await` with a token operand.
class AwaitTokenOpLowering : public AwaitOpLoweringBase<AwaitOp, TokenType> {
using Base = AwaitOpLoweringBase<AwaitOp, TokenType>;
public:
explicit AwaitOpLowering(
MLIRContext *ctx,
explicit AwaitTokenOpLowering(
TypeConverter &converter, MLIRContext *ctx,
const llvm::DenseMap<FuncOp, CoroMachinery> &outlinedFunctions)
: Base(ctx, outlinedFunctions, kAwaitToken, kAwaitAndExecute) {}
: Base(converter, ctx, outlinedFunctions, kAwaitToken,
kAwaitTokenAndExecute) {}
};
// Lowering for `async.await_all` operation.
/// Lowering for `async.await` with a value operand.
class AwaitValueOpLowering : public AwaitOpLoweringBase<AwaitOp, ValueType> {
using Base = AwaitOpLoweringBase<AwaitOp, ValueType>;
public:
explicit AwaitValueOpLowering(
TypeConverter &converter, MLIRContext *ctx,
const llvm::DenseMap<FuncOp, CoroMachinery> &outlinedFunctions)
: Base(converter, ctx, outlinedFunctions, kAwaitValue,
kAwaitValueAndExecute) {}
Value
getReplacementValue(Operation *op, Value operand,
ConversionPatternRewriter &rewriter) const override {
Location loc = op->getLoc();
auto i8Ptr = AsyncAPI::opaquePointerType(rewriter.getContext());
// Get the underlying value type from the `async.value`.
auto await = cast<AwaitOp>(op);
auto valueType = await.operand().getType().cast<ValueType>().getValueType();
// Get a pointer to an async value storage from the runtime.
auto storage = rewriter.create<CallOp>(loc, kGetValueStorage,
TypeRange(i8Ptr), operand);
// Cast from i8* to the pointer pointer to LLVM type.
auto llvmValueType = getTypeConverter()->convertType(valueType);
auto castedStorage = rewriter.create<LLVM::BitcastOp>(
loc, LLVM::LLVMPointerType::get(llvmValueType.cast<LLVM::LLVMType>()),
storage.getResult(0));
// Load from the async value storage.
auto loaded = rewriter.create<LLVM::LoadOp>(loc, castedStorage.getResult());
// Cast from LLVM type to the expected value type. This cast will become
// no-op after lowering to LLVM.
return rewriter.create<LLVM::DialectCastOp>(loc, valueType, loaded);
}
};
/// Lowering for `async.await_all` operation.
class AwaitAllOpLowering : public AwaitOpLoweringBase<AwaitAllOp, GroupType> {
using Base = AwaitOpLoweringBase<AwaitAllOp, GroupType>;
public:
explicit AwaitAllOpLowering(
MLIRContext *ctx,
TypeConverter &converter, MLIRContext *ctx,
const llvm::DenseMap<FuncOp, CoroMachinery> &outlinedFunctions)
: Base(ctx, outlinedFunctions, kAwaitGroup, kAwaitAllAndExecute) {}
: Base(converter, ctx, outlinedFunctions, kAwaitGroup,
kAwaitAllAndExecute) {}
};
} // namespace
//===----------------------------------------------------------------------===//
// async.yield op lowerings to the corresponding async runtime function calls.
//===----------------------------------------------------------------------===//
class YieldOpLowering : public ConversionPattern {
public:
explicit YieldOpLowering(
TypeConverter &converter, MLIRContext *ctx,
const llvm::DenseMap<FuncOp, CoroMachinery> &outlinedFunctions)
: ConversionPattern(async::YieldOp::getOperationName(), 1, converter,
ctx),
outlinedFunctions(outlinedFunctions) {}
LogicalResult
matchAndRewrite(Operation *op, ArrayRef<Value> operands,
ConversionPatternRewriter &rewriter) const override {
// Check if yield operation is inside the outlined coroutine function.
auto func = op->template getParentOfType<FuncOp>();
auto outlined = outlinedFunctions.find(func);
if (outlined == outlinedFunctions.end())
return op->emitOpError(
"async.yield is not inside the outlined coroutine function");
Location loc = op->getLoc();
const CoroMachinery &coro = outlined->getSecond();
// Store yielded values into the async values storage and emplace them.
auto i8Ptr = AsyncAPI::opaquePointerType(rewriter.getContext());
for (auto tuple : llvm::zip(operands, coro.returnValues)) {
// Store `yieldValue` into the `asyncValue` storage.
Value yieldValue = std::get<0>(tuple);
Value asyncValue = std::get<1>(tuple);
// Get an opaque i8* pointer to an async value storage from the runtime.
auto storage = rewriter.create<CallOp>(loc, kGetValueStorage,
TypeRange(i8Ptr), asyncValue);
// Cast storage pointer to the yielded value type.
auto castedStorage = rewriter.create<LLVM::BitcastOp>(
loc,
LLVM::LLVMPointerType::get(
yieldValue.getType().cast<LLVM::LLVMType>()),
storage.getResult(0));
// Store the yielded value into the async value storage.
rewriter.create<LLVM::StoreOp>(loc, yieldValue,
castedStorage.getResult());
// Emplace the `async.value` to mark it ready.
rewriter.create<CallOp>(loc, kEmplaceValue, TypeRange(), asyncValue);
}
// Emplace the completion token to mark it ready.
rewriter.create<CallOp>(loc, kEmplaceToken, TypeRange(), coro.asyncToken);
// Original operation was replaced by the function call(s).
rewriter.eraseOp(op);
return success();
}
private:
const llvm::DenseMap<FuncOp, CoroMachinery> &outlinedFunctions;
};
//===----------------------------------------------------------------------===//
namespace {
@ -818,15 +1042,38 @@ void ConvertAsyncToLLVMPass::runOnOperation() {
ModuleOp module = getOperation();
SymbolTable symbolTable(module);
MLIRContext *ctx = &getContext();
// Outline all `async.execute` body regions into async functions (coroutines).
llvm::DenseMap<FuncOp, CoroMachinery> outlinedFunctions;
// We use conversion to LLVM type to ensure that all `async.value` operands
// and results can be lowered to LLVM load and store operations.
LLVMTypeConverter llvmConverter(ctx);
llvmConverter.addConversion(AsyncRuntimeTypeConverter::convertAsyncTypes);
// Returns true if the `async.value` payload is convertible to LLVM.
auto isConvertibleToLlvm = [&](Type type) -> bool {
auto valueType = type.cast<ValueType>().getValueType();
return static_cast<bool>(llvmConverter.convertType(valueType));
};
WalkResult outlineResult = module.walk([&](ExecuteOp execute) {
// All operands and results must be convertible to LLVM.
if (!llvm::all_of(execute.operands().getTypes(), isConvertibleToLlvm)) {
execute.emitOpError("operands payload must be convertible to LLVM type");
return WalkResult::interrupt();
}
if (!llvm::all_of(execute.results().getTypes(), isConvertibleToLlvm)) {
execute.emitOpError("results payload must be convertible to LLVM type");
return WalkResult::interrupt();
}
// We currently do not support execute operations that have async value
// operands or produce async results.
if (!execute.operands().empty() || !execute.results().empty()) {
execute.emitOpError("can't outline async.execute op with async value "
"operands or returned async results");
if (!execute.operands().empty()) {
execute.emitOpError(
"can't outline async.execute op with async value operands");
return WalkResult::interrupt();
}
@ -852,26 +1099,44 @@ void ConvertAsyncToLLVMPass::runOnOperation() {
addCoroutineIntrinsicsDeclarations(module);
addCRuntimeDeclarations(module);
MLIRContext *ctx = &getContext();
// Convert async dialect types and operations to LLVM dialect.
AsyncRuntimeTypeConverter converter;
OwningRewritePatternList patterns;
// Convert async types in function signatures and function calls.
populateFuncOpTypeConversionPattern(patterns, ctx, converter);
patterns.insert<CallOpOpConversion>(ctx);
patterns.insert<AddRefOpLowering, DropRefOpLowering>(ctx);
patterns.insert<CreateGroupOpLowering, AddToGroupOpLowering>(ctx);
patterns.insert<AwaitOpLowering, AwaitAllOpLowering>(ctx, outlinedFunctions);
populateCallOpTypeConversionPattern(patterns, ctx, converter);
// Convert return operations inside async.execute regions.
patterns.insert<ReturnOpOpConversion>(converter, ctx);
// Lower async operations to async runtime API calls.
patterns.insert<AddRefOpLowering, DropRefOpLowering>(converter, ctx);
patterns.insert<CreateGroupOpLowering, AddToGroupOpLowering>(converter, ctx);
// Use LLVM type converter to automatically convert between the async value
// payload type and LLVM type when loading/storing from/to the async
// value storage which is an opaque i8* pointer using LLVM load/store ops.
patterns
.insert<AwaitTokenOpLowering, AwaitValueOpLowering, AwaitAllOpLowering>(
llvmConverter, ctx, outlinedFunctions);
patterns.insert<YieldOpLowering>(llvmConverter, ctx, outlinedFunctions);
ConversionTarget target(*ctx);
target.addLegalOp<ConstantOp>();
target.addLegalDialect<LLVM::LLVMDialect>();
// All operations from Async dialect must be lowered to the runtime API calls.
target.addIllegalDialect<AsyncDialect>();
// Add dynamic legality constraints to apply conversions defined above.
target.addDynamicallyLegalOp<FuncOp>(
[&](FuncOp op) { return converter.isSignatureLegal(op.getType()); });
target.addDynamicallyLegalOp<CallOp>(
[&](CallOp op) { return converter.isLegal(op.getResultTypes()); });
target.addDynamicallyLegalOp<ReturnOp>(
[&](ReturnOp op) { return converter.isLegal(op.getOperandTypes()); });
target.addDynamicallyLegalOp<CallOp>([&](CallOp op) {
return converter.isSignatureLegal(op.getCalleeType());
});
if (failed(applyPartialConversion(module, target, std::move(patterns))))
signalPassFailure();

View File

@ -13,5 +13,7 @@ add_mlir_conversion_library(MLIRAsyncToLLVM
LINK_LIBS PUBLIC
MLIRAsync
MLIRLLVMIR
MLIRStandardOpsTransforms
MLIRStandardToLLVM
MLIRTransforms
)

View File

@ -114,6 +114,7 @@ static AsyncRuntime *getDefaultAsyncRuntimeInstance() {
return runtime.get();
}
// Async token provides a mechanism to signal asynchronous operation completion.
struct AsyncToken : public RefCounted {
// AsyncToken created with a reference count of 2 because it will be returned
// to the `async.execute` caller and also will be later on emplaced by the
@ -130,6 +131,28 @@ struct AsyncToken : public RefCounted {
std::vector<std::function<void()>> awaiters;
};
// Async value provides a mechanism to access the result of asynchronous
// operations. It owns the storage that is used to store/load the value of the
// underlying type, and a flag to signal if the value is ready or not.
struct AsyncValue : public RefCounted {
// AsyncValue similar to an AsyncToken created with a reference count of 2.
AsyncValue(AsyncRuntime *runtime, int32_t size)
: RefCounted(runtime, /*count=*/2), storage(size) {}
// Internal state below guarded by a mutex.
std::mutex mu;
std::condition_variable cv;
bool ready = false;
std::vector<std::function<void()>> awaiters;
// Use vector of bytes to store async value payload.
std::vector<int8_t> storage;
};
// Async group provides a mechanism to group together multiple async tokens or
// values to await on all of them together (wait for the completion of all
// tokens or values added to the group).
struct AsyncGroup : public RefCounted {
AsyncGroup(AsyncRuntime *runtime)
: RefCounted(runtime), pendingTokens(0), rank(0) {}
@ -159,12 +182,18 @@ extern "C" void mlirAsyncRuntimeDropRef(RefCountedObjPtr ptr, int32_t count) {
refCounted->dropRef(count);
}
// Create a new `async.token` in not-ready state.
// Creates a new `async.token` in not-ready state.
extern "C" AsyncToken *mlirAsyncRuntimeCreateToken() {
AsyncToken *token = new AsyncToken(getDefaultAsyncRuntimeInstance());
return token;
}
// Creates a new `async.value` in not-ready state.
extern "C" AsyncValue *mlirAsyncRuntimeCreateValue(int32_t size) {
AsyncValue *value = new AsyncValue(getDefaultAsyncRuntimeInstance(), size);
return value;
}
// Create a new `async.group` in empty state.
extern "C" AsyncGroup *mlirAsyncRuntimeCreateGroup() {
AsyncGroup *group = new AsyncGroup(getDefaultAsyncRuntimeInstance());
@ -228,18 +257,45 @@ extern "C" void mlirAsyncRuntimeEmplaceToken(AsyncToken *token) {
token->dropRef();
}
// Switches `async.value` to ready state and runs all awaiters.
extern "C" void mlirAsyncRuntimeEmplaceValue(AsyncValue *value) {
// Make sure that `dropRef` does not destroy the mutex owned by the lock.
{
std::unique_lock<std::mutex> lock(value->mu);
value->ready = true;
value->cv.notify_all();
for (auto &awaiter : value->awaiters)
awaiter();
}
// Async values created with a ref count `2` to keep value alive until the
// async task completes. Drop this reference explicitly when value emplaced.
value->dropRef();
}
extern "C" void mlirAsyncRuntimeAwaitToken(AsyncToken *token) {
std::unique_lock<std::mutex> lock(token->mu);
if (!token->ready)
token->cv.wait(lock, [token] { return token->ready; });
}
extern "C" void mlirAsyncRuntimeAwaitValue(AsyncValue *value) {
std::unique_lock<std::mutex> lock(value->mu);
if (!value->ready)
value->cv.wait(lock, [value] { return value->ready; });
}
extern "C" void mlirAsyncRuntimeAwaitAllInGroup(AsyncGroup *group) {
std::unique_lock<std::mutex> lock(group->mu);
if (group->pendingTokens != 0)
group->cv.wait(lock, [group] { return group->pendingTokens == 0; });
}
// Returns a pointer to the storage owned by the async value.
extern "C" ValueStorage mlirAsyncRuntimeGetValueStorage(AsyncValue *value) {
return value->storage.data();
}
extern "C" void mlirAsyncRuntimeExecute(CoroHandle handle, CoroResume resume) {
(*resume)(handle);
}
@ -255,6 +311,17 @@ extern "C" void mlirAsyncRuntimeAwaitTokenAndExecute(AsyncToken *token,
token->awaiters.push_back([execute]() { execute(); });
}
extern "C" void mlirAsyncRuntimeAwaitValueAndExecute(AsyncValue *value,
CoroHandle handle,
CoroResume resume) {
std::unique_lock<std::mutex> lock(value->mu);
auto execute = [handle, resume]() { (*resume)(handle); };
if (value->ready)
execute();
else
value->awaiters.push_back([execute]() { execute(); });
}
extern "C" void mlirAsyncRuntimeAwaitAllInGroupAndExecute(AsyncGroup *group,
CoroHandle handle,
CoroResume resume) {

View File

@ -211,3 +211,44 @@ func @async_group_await_all(%arg0: f32, %arg1: memref<1xf32>) {
// Emplace result token.
// CHECK: call @mlirAsyncRuntimeEmplaceToken(%[[RET_1]])
// -----
// CHECK-LABEL: execute_and_return_f32
func @execute_and_return_f32() -> f32 {
// CHECK: %[[RET:.*]]:2 = call @async_execute_fn
%token, %result = async.execute -> !async.value<f32> {
%c0 = constant 123.0 : f32
async.yield %c0 : f32
}
// CHECK: %[[STORAGE:.*]] = call @mlirAsyncRuntimeGetValueStorage(%[[RET]]#1)
// CHECK: %[[ST_F32:.*]] = llvm.bitcast %[[STORAGE]]
// CHECK: %[[LOADED:.*]] = llvm.load %[[ST_F32]] : !llvm.ptr<float>
// CHECK: %[[CASTED:.*]] = llvm.mlir.cast %[[LOADED]] : !llvm.float to f32
%0 = async.await %result : !async.value<f32>
return %0 : f32
}
// Function outlined from the async.execute operation.
// CHECK-LABEL: func private @async_execute_fn()
// CHECK: %[[TOKEN:.*]] = call @mlirAsyncRuntimeCreateToken()
// CHECK: %[[VALUE:.*]] = call @mlirAsyncRuntimeCreateValue
// CHECK: %[[HDL:.*]] = llvm.call @llvm.coro.begin
// Suspend coroutine in the beginning.
// CHECK: call @mlirAsyncRuntimeExecute(%[[HDL]],
// CHECK: llvm.call @llvm.coro.suspend
// Emplace result value.
// CHECK: %[[CST:.*]] = constant 1.230000e+02 : f32
// CHECK: %[[LLVM_CST:.*]] = llvm.mlir.cast %[[CST]] : f32 to !llvm.float
// CHECK: %[[STORAGE:.*]] = call @mlirAsyncRuntimeGetValueStorage(%[[VALUE]])
// CHECK: %[[ST_F32:.*]] = llvm.bitcast %[[STORAGE]]
// CHECK: llvm.store %[[LLVM_CST]], %[[ST_F32]] : !llvm.ptr<float>
// CHECK: call @mlirAsyncRuntimeEmplaceValue(%[[VALUE]])
// Emplace result token.
// CHECK: call @mlirAsyncRuntimeEmplaceToken(%[[TOKEN]])

View File

@ -0,0 +1,64 @@
// RUN: mlir-opt %s -async-ref-counting \
// RUN: -convert-async-to-llvm \
// RUN: -convert-vector-to-llvm \
// RUN: -convert-std-to-llvm \
// RUN: | mlir-cpu-runner \
// RUN: -e main -entry-point-result=void -O0 \
// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext \
// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_async_runtime%shlibext \
// RUN: | FileCheck %s --dump-input=always
func @main() {
// ------------------------------------------------------------------------ //
// Blocking async.await outside of the async.execute.
// ------------------------------------------------------------------------ //
%token, %result = async.execute -> !async.value<f32> {
%0 = constant 123.456 : f32
async.yield %0 : f32
}
%1 = async.await %result : !async.value<f32>
// CHECK: 123.456
vector.print %1 : f32
// ------------------------------------------------------------------------ //
// Non-blocking async.await inside the async.execute
// ------------------------------------------------------------------------ //
%token0, %result0 = async.execute -> !async.value<f32> {
%token1, %result2 = async.execute -> !async.value<f32> {
%2 = constant 456.789 : f32
async.yield %2 : f32
}
%3 = async.await %result2 : !async.value<f32>
async.yield %3 : f32
}
%4 = async.await %result0 : !async.value<f32>
// CHECK: 456.789
vector.print %4 : f32
// ------------------------------------------------------------------------ //
// Memref allocated inside async.execute region.
// ------------------------------------------------------------------------ //
%token2, %result2 = async.execute[%token0] -> !async.value<memref<f32>> {
%5 = alloc() : memref<f32>
%c0 = constant 987.654 : f32
store %c0, %5[]: memref<f32>
async.yield %5 : memref<f32>
}
%6 = async.await %result2 : !async.value<memref<f32>>
%7 = memref_cast %6 : memref<f32> to memref<*xf32>
// CHECK: Unranked Memref
// CHECK-SAME: rank = 0 offset = 0 sizes = [] strides = []
// CHECK-NEXT: [987.654]
call @print_memref_f32(%7): (memref<*xf32>) -> ()
dealloc %6 : memref<f32>
return
}
func private @print_memref_f32(memref<*xf32>)
attributes { llvm.emit_c_interface }