Use named modules for gpu.launch_func

The kernel function called by gpu.launch_func is now placed into an isolated
nested module during the outlining stage to simplify separate compilation.
Until recently, modules did not have names and could not be referenced. This
limitation was circumvented by introducing a stub kernel at the same name at
the same nesting level as the module containing the actual kernel. This
relation is only effective in one direction: from actual kernel function to its
launch_func "caller".

Leverage the recently introduced symbol name attributes on modules to refer to
a specific nested module from `gpu.launch_func`. This removes the implicit
connection between the identically named stub and kernel functions. It also
enables support for `gpu.launch_func`s to call different kernels located in the
same module.

PiperOrigin-RevId: 273491891
This commit is contained in:
Alex Zinenko 2019-10-08 04:29:58 -07:00 committed by A. Unique TensorFlower
parent 780f107a57
commit 90d65d32d6
15 changed files with 673 additions and 426 deletions

View File

@ -120,12 +120,17 @@ to have that information by construction.
### `gpu.launch_func`
Launch a kernel given as a function on the specified grid of thread blocks.
`gpu.launch` operations are lowered to `gpu.launch_func` operations by outlining
the kernel body into a function, which is closer to the NVVM model. The
`gpu.launch_func` operation has a function attribute named `kernel` to specify
the kernel function to launch. The kernel function itself has a `nvvm.kernel`
attribute.
Launch a kernel function on the specified grid of thread blocks. `gpu.launch`
operations are lowered to `gpu.launch_func` operations by outlining the kernel
body into a function in a dedicated module, which reflects the separate
compilation process. The kernel function is required to have the `gpu.kernel`
attribute. The module containing the kernel function is required to have the
`gpu.kernel_module` attribute and must be named. And finally, the module
containing the kernel module (which thus cannot be the top-level module) is
required to have the `gpu.container_module` attribute. The `gpu.launch_func`
operation has a string attribute named `kernel` to specify the name of the
kernel function to launch and an attribute named `kernel_module` to specify the
name of the module containing that kernel function.
The operation takes at least six operands, with the first three operands being
grid sizes along x,y,z dimensions and the following three being block sizes
@ -138,36 +143,43 @@ A custom syntax for this operation is currently not available.
Example:
```mlir {.mlir}
func @kernel_1(%arg0 : f32, %arg1 : !llvm<"float*">)
attributes { nvvm.kernel: true } {
module attributes {gpu.container_module} {
// Operations that produce block/thread IDs and dimensions are injected when
// outlining the `gpu.launch` body to a function called by `gpu.launch_func`.
%tIdX = "gpu.thread_id"() {dimension: "x"} : () -> (index)
%tIdY = "gpu.thread_id"() {dimension: "y"} : () -> (index)
%tIdZ = "gpu.thread_id"() {dimension: "z"} : () -> (index)
// This module creates a separate compilation unit for the GPU compiler.
module @kernels attributes {gpu.kernel_module} {
func @kernel_1(%arg0 : f32, %arg1 : !llvm<"float*">)
attributes { nvvm.kernel: true } {
%bDimX = "gpu.block_dim"() {dimension: "x"} : () -> (index)
%bDimY = "gpu.block_dim"() {dimension: "y"} : () -> (index)
%bDimZ = "gpu.block_dim"() {dimension: "z"} : () -> (index)
// Operations that produce block/thread IDs and dimensions are injected when
// outlining the `gpu.launch` body to a function called by `gpu.launch_func`.
%tIdX = "gpu.thread_id"() {dimension: "x"} : () -> (index)
%tIdY = "gpu.thread_id"() {dimension: "y"} : () -> (index)
%tIdZ = "gpu.thread_id"() {dimension: "z"} : () -> (index)
%bIdX = "gpu.block_id"() {dimension: "x"} : () -> (index)
%bIdY = "gpu.block_id"() {dimension: "y"} : () -> (index)
%bIdZ = "gpu.block_id"() {dimension: "z"} : () -> (index)
%bDimX = "gpu.block_dim"() {dimension: "x"} : () -> (index)
%bDimY = "gpu.block_dim"() {dimension: "y"} : () -> (index)
%bDimZ = "gpu.block_dim"() {dimension: "z"} : () -> (index)
%gDimX = "gpu.grid_dim"() {dimension: "x"} : () -> (index)
%gDimY = "gpu.grid_dim"() {dimension: "y"} : () -> (index)
%gDimZ = "gpu.grid_dim"() {dimension: "z"} : () -> (index)
%bIdX = "gpu.block_id"() {dimension: "x"} : () -> (index)
%bIdY = "gpu.block_id"() {dimension: "y"} : () -> (index)
%bIdZ = "gpu.block_id"() {dimension: "z"} : () -> (index)
"some_op"(%bx, %tx) : (index, index) -> ()
%42 = load %arg1[%bx] : memref<?xf32, 1>
%gDimX = "gpu.grid_dim"() {dimension: "x"} : () -> (index)
%gDimY = "gpu.grid_dim"() {dimension: "y"} : () -> (index)
%gDimZ = "gpu.grid_dim"() {dimension: "z"} : () -> (index)
"some_op"(%bx, %tx) : (index, index) -> ()
%42 = load %arg1[%bx] : memref<?xf32, 1>
}
}
"gpu.launch_func"(%cst, %cst, %cst, // Grid sizes.
%cst, %cst, %cst, // Block sizes.
%arg0, %arg1) // Arguments passed to the kernel function.
{ kernel_module = @kernels, // Module containing the kernel function.
kernel = "kernel_1" } // Kernel function.
: (index, index, index, index, index, index, f32, !llvm<"float*">) -> ()
}
"gpu.launch_func"(%cst, %cst, %cst, // Grid sizes.
%cst, %cst, %cst, // Block sizes.
%arg0, %arg1) // Arguments passed to the kernel function.
{kernel: @kernel_1 : (f32, !llvm<"float*">) -> ()} // Kernel function.
: (index, index, index, index, index, index, f32, !llvm<"float*">) -> ()
```
### `gpu.thread_id`

View File

@ -38,6 +38,12 @@ public:
/// Create the dialect in the given `context`.
GPUDialect(MLIRContext *context);
/// Get the name of the attribute used to annotate the modules that contain
/// kernel modules.
static StringRef getContainerModuleAttrName() {
return "gpu.container_module";
}
/// Get the canonical string name of the dialect.
static StringRef getDialectName();
@ -50,6 +56,9 @@ public:
/// Returns whether the given function is a kernel function, i.e., has the
/// 'gpu.kernel' attribute.
static bool isKernel(FuncOp function);
LogicalResult verifyOperationAttribute(Operation *op,
NamedAttribute attr) override;
};
/// Utility class for the GPU dialect to represent triples of `Value`s
@ -147,6 +156,9 @@ public:
StringRef kernel();
/// The number of operands passed to the kernel function.
unsigned getNumKernelOperands();
/// The name of the kernel module specified by the operation's `kernel_module`
/// attribute.
StringRef getKernelModuleName();
/// The i-th operand passed to the kernel function.
Value *getKernelOperand(unsigned i);
@ -164,8 +176,17 @@ public:
static constexpr unsigned kNumConfigOperands = 6;
private:
/// The name of the function attribute specifying the kernel to launch.
// This needs to quietly verify if attributes with names defined below are
// present since it is run before the verifier of this op.
friend LogicalResult GPUDialect::verifyOperationAttribute(Operation *,
NamedAttribute);
/// The name of the symbolRef attribute specifying the kernel to launch.
static StringRef getKernelAttrName() { return "kernel"; }
/// The name of the symbolRef attribute specifying the name of the module
/// containing the kernel to launch.
static StringRef getKernelModuleAttrName() { return "kernel_module"; }
};
#define GET_OP_CLASSES

View File

@ -135,6 +135,12 @@ public:
func.walk(
[this](mlir::gpu::LaunchFuncOp op) { translateGpuLaunchCalls(op); });
}
// GPU kernel modules are no longer necessary since we have a global
// constant with the CUBIN data.
for (auto m : llvm::make_early_inc_range(getModule().getOps<ModuleOp>()))
if (m.getAttrOfType<UnitAttr>(gpu::GPUDialect::getKernelModuleAttrName()))
m.erase();
}
private:
@ -342,11 +348,12 @@ void GpuLaunchFuncToCudaCallsPass::translateGpuLaunchCalls(
// Emit a call to the cubin getter to retrieve a pointer to the data that
// represents the cubin at runtime.
// TODO(herhut): This should rather be a static global once supported.
auto kernelFunction = getModule().lookupSymbol<FuncOp>(launchOp.kernel());
if (!kernelFunction) {
launchOp.emitError("missing kernel function ") << launchOp.kernel();
return signalPassFailure();
}
auto kernelModule =
getModule().lookupSymbol<ModuleOp>(launchOp.getKernelModuleName());
assert(kernelModule && "expected a kernel module");
auto kernelFunction = kernelModule.lookupSymbol<FuncOp>(launchOp.kernel());
assert(kernelFunction && "expected a kernel function");
auto cubinGetter =
kernelFunction.getAttrOfType<SymbolRefAttr>(kCubinGetterAnnotation);
if (!cubinGetter) {

View File

@ -61,26 +61,21 @@ private:
return LLVM::LLVMType::getIntNTy(llvmDialect, bits);
}
// Inserts a global constant string containing `blob` into the parent module
// of `kernelFunc` and generates the function that returns the address of the
// first character of this string.
// Inserts a global constant string containing `blob` into the grand-parent
// module of `kernelFunc` and generates the function that returns the address
// of the first character of this string. Returns the generator function.
// TODO(herhut): consider fusing this pass with launch-func-to-cuda.
void generate(FuncOp kernelFunc, StringAttr blob) {
auto stubFunc = getModule().lookupSymbol<FuncOp>(kernelFunc.getName());
if (!stubFunc) {
kernelFunc.emitError(
"corresponding external function not found in parent module");
return signalPassFailure();
}
Location loc = stubFunc.getLoc();
SmallString<128> nameBuffer(stubFunc.getName());
auto module = stubFunc.getParentOfType<ModuleOp>();
assert(module && "function must belong to a module");
FuncOp generate(FuncOp kernelFunc, StringAttr blob) {
Location loc = kernelFunc.getLoc();
SmallString<128> nameBuffer(kernelFunc.getName());
ModuleOp module = getModule();
assert(kernelFunc.getParentOp() &&
kernelFunc.getParentOp()->getParentOp() == module &&
"expected one level of module nesting");
// Insert the getter function just after the original function.
OpBuilder moduleBuilder(module.getBody(), module.getBody()->begin());
moduleBuilder.setInsertionPointAfter(stubFunc.getOperation());
OpBuilder moduleBuilder(module.getBody());
moduleBuilder.setInsertionPointAfter(kernelFunc.getParentOp());
auto getterType = moduleBuilder.getFunctionType(
llvm::None, LLVM::LLVMType::getInt8PtrTy(llvmDialect));
nameBuffer.append(kCubinGetterSuffix);
@ -89,7 +84,7 @@ private:
Block *entryBlock = result.addEntryBlock();
// Drop the getter suffix before appending the storage suffix.
nameBuffer.resize(stubFunc.getName().size());
nameBuffer.resize(kernelFunc.getName().size());
nameBuffer.append(kCubinStorageSuffix);
// Obtain the address of the first character of the global string containing
@ -98,25 +93,29 @@ private:
Value *startPtr = LLVM::createGlobalString(
loc, builder, StringRef(nameBuffer), blob.getValue(), llvmDialect);
builder.create<LLVM::ReturnOp>(loc, startPtr);
// Store the name of the getter on the function for easier lookup.
stubFunc.setAttr(kCubinGetterAnnotation, builder.getSymbolRefAttr(result));
return result;
}
public:
void runOnModule() override {
llvmDialect = getContext().getRegisteredDialect<LLVM::LLVMDialect>();
auto modules = getModule().getOps<ModuleOp>();
for (auto module : llvm::make_early_inc_range(modules)) {
for (auto module : getModule().getOps<ModuleOp>()) {
if (!module.getAttrOfType<UnitAttr>(
gpu::GPUDialect::getKernelModuleAttrName()))
continue;
for (auto func : module.getOps<FuncOp>()) {
if (StringAttr blob = func.getAttrOfType<StringAttr>(kCubinAnnotation))
generate(func, blob);
if (StringAttr blob =
func.getAttrOfType<StringAttr>(kCubinAnnotation)) {
FuncOp getter = generate(func, blob);
// Store the name of the getter on the function for easier lookup and
// remove the CUBIN.
func.setAttr(kCubinGetterAnnotation,
SymbolRefAttr::get(getter.getName(), func.getContext()));
func.removeAttr(kCubinAnnotation);
}
}
module.erase();
}
}

View File

@ -121,7 +121,7 @@ void GPUToSPIRVPass::runOnModule() {
auto module = getModule();
SmallVector<Operation *, 4> spirvModules;
for (auto funcOp : module.getOps<FuncOp>()) {
module.walk([&module, &spirvModules](FuncOp funcOp) {
if (gpu::GPUDialect::isKernel(funcOp)) {
OpBuilder builder(module.getBodyRegion());
// Create a new spirv::ModuleOp for this function, and clone the
@ -139,7 +139,7 @@ void GPUToSPIRVPass::runOnModule() {
moduleBuilder.clone(*funcOp.getOperation());
spirvModules.push_back(spvModule);
}
}
});
/// Dialect conversion to lower the functions with the spirv::ModuleOps.
SPIRVBasicTypeConverter basicTypeConverter;

View File

@ -31,6 +31,10 @@
using namespace mlir;
using namespace mlir::gpu;
//===----------------------------------------------------------------------===//
// GPUDialect
//===----------------------------------------------------------------------===//
StringRef GPUDialect::getDialectName() { return "gpu"; }
bool GPUDialect::isKernel(FuncOp function) {
@ -47,6 +51,78 @@ GPUDialect::GPUDialect(MLIRContext *context)
>();
}
LogicalResult GPUDialect::verifyOperationAttribute(Operation *op,
NamedAttribute attr) {
if (!attr.second.isa<UnitAttr>() ||
!attr.first.is(getContainerModuleAttrName()))
return success();
auto module = dyn_cast<ModuleOp>(op);
if (!module)
return op->emitError("expected '")
<< getContainerModuleAttrName() << "' attribute to be attached to '"
<< ModuleOp::getOperationName() << '\'';
auto walkResult = module.walk([&module](LaunchFuncOp launchOp) -> WalkResult {
// Ignore launches that are nested more or less deep than functions in the
// module we are currently checking.
if (!launchOp.getParentOp() ||
launchOp.getParentOp()->getParentOp() != module)
return success();
// Ignore launch ops with missing attributes here. The errors will be
// reported by the verifiers of those ops.
if (!launchOp.getAttrOfType<StringAttr>(
LaunchFuncOp::getKernelAttrName()) ||
!launchOp.getAttrOfType<SymbolRefAttr>(
LaunchFuncOp::getKernelModuleAttrName()))
return success();
// Check that `launch_func` refers to a well-formed GPU kernel module.
StringRef kernelModuleName = launchOp.getKernelModuleName();
auto kernelModule = module.lookupSymbol<ModuleOp>(kernelModuleName);
if (!kernelModule)
return launchOp.emitOpError()
<< "kernel module '" << kernelModuleName << "' is undefined";
if (!kernelModule.getAttrOfType<UnitAttr>(
GPUDialect::getKernelModuleAttrName()))
return launchOp.emitOpError("module '")
<< kernelModuleName << "' is missing the '"
<< GPUDialect::getKernelModuleAttrName() << "' attribute";
// Check that `launch_func` refers to a well-formed kernel function.
StringRef kernelName = launchOp.kernel();
auto kernelFunction = kernelModule.lookupSymbol<FuncOp>(kernelName);
if (!kernelFunction)
return launchOp.emitOpError("kernel function '")
<< kernelName << "' is undefined";
if (!kernelFunction.getAttrOfType<mlir::UnitAttr>(
GPUDialect::getKernelFuncAttrName()))
return launchOp.emitOpError("kernel function is missing the '")
<< GPUDialect::getKernelFuncAttrName() << "' attribute";
if (launchOp.getNumKernelOperands() != kernelFunction.getNumArguments())
return launchOp.emitOpError("got ") << launchOp.getNumKernelOperands()
<< " kernel operands but expected "
<< kernelFunction.getNumArguments();
// Due to the ordering of the current impl of lowering and LLVMLowering,
// type checks need to be temporarily disabled.
// TODO(ntv,zinenko,herhut): reactivate checks once "changing gpu.launchFunc
// to encode target module" has landed.
// auto functionType = kernelFunc.getType();
// for (unsigned i = 0; i < numKernelFuncArgs; ++i) {
// if (getKernelOperand(i)->getType() != functionType.getInput(i)) {
// return emitOpError("type of function argument ")
// << i << " does not match";
// }
// }
return success();
});
return walkResult.wasInterrupted() ? failure() : success();
}
template <typename T> static LogicalResult verifyIndexOp(T op) {
auto dimension = op.dimension();
if (dimension != "x" && dimension != "y" && dimension != "z")
@ -394,7 +470,11 @@ void LaunchFuncOp::build(Builder *builder, OperationState &result,
{gridSizeX, gridSizeY, gridSizeZ, blockSizeX, blockSizeY, blockSizeZ});
result.addOperands(kernelOperands);
result.addAttribute(getKernelAttrName(),
builder->getSymbolRefAttr(kernelFunc));
builder->getStringAttr(kernelFunc.getName()));
auto kernelModule = kernelFunc.getParentOfType<ModuleOp>();
if (Optional<StringRef> kernelModuleName = kernelModule.getName())
result.addAttribute(getKernelModuleAttrName(),
builder->getSymbolRefAttr(*kernelModuleName));
}
void LaunchFuncOp::build(Builder *builder, OperationState &result,
@ -406,13 +486,17 @@ void LaunchFuncOp::build(Builder *builder, OperationState &result,
}
StringRef LaunchFuncOp::kernel() {
return getAttrOfType<SymbolRefAttr>(getKernelAttrName()).getValue();
return getAttrOfType<StringAttr>(getKernelAttrName()).getValue();
}
unsigned LaunchFuncOp::getNumKernelOperands() {
return getNumOperands() - kNumConfigOperands;
}
StringRef LaunchFuncOp::getKernelModuleName() {
return getAttrOfType<SymbolRefAttr>(getKernelModuleAttrName()).getValue();
}
Value *LaunchFuncOp::getKernelOperand(unsigned i) {
return getOperation()->getOperand(i + kNumConfigOperands);
}
@ -426,39 +510,25 @@ KernelDim3 LaunchFuncOp::getBlockSizeOperandValues() {
}
LogicalResult LaunchFuncOp::verify() {
auto kernelAttr = this->getAttr(getKernelAttrName());
if (!kernelAttr) {
return emitOpError("attribute 'kernel' must be specified");
} else if (!kernelAttr.isa<SymbolRefAttr>()) {
return emitOpError("attribute 'kernel' must be a function");
}
auto module = getParentOfType<ModuleOp>();
FuncOp kernelFunc = module.lookupSymbol<FuncOp>(kernel());
if (!kernelFunc)
return emitOpError("kernel function '") << kernelAttr << "' is undefined";
if (!module)
return emitOpError("expected to belong to a module");
if (!module.getAttrOfType<UnitAttr>(GPUDialect::getContainerModuleAttrName()))
return emitOpError("expected the closest surrounding module to have the '" +
GPUDialect::getContainerModuleAttrName() +
"' attribute");
auto kernelAttr = getAttrOfType<StringAttr>(getKernelAttrName());
if (!kernelAttr)
return emitOpError("string attribute '" + getKernelAttrName() +
"' must be specified");
auto kernelModuleAttr =
getAttrOfType<SymbolRefAttr>(getKernelModuleAttrName());
if (!kernelModuleAttr)
return emitOpError("symbol reference attribute '" +
getKernelModuleAttrName() + "' must be specified");
if (!kernelFunc.getAttrOfType<mlir::UnitAttr>(
GPUDialect::getKernelFuncAttrName())) {
return emitOpError("kernel function is missing the '")
<< GPUDialect::getKernelFuncAttrName() << "' attribute";
}
unsigned numKernelFuncArgs = kernelFunc.getNumArguments();
if (getNumKernelOperands() != numKernelFuncArgs) {
return emitOpError("got ")
<< getNumKernelOperands() << " kernel operands but expected "
<< numKernelFuncArgs;
}
// Due to the ordering of the current impl of lowering and LLVMLowering, type
// checks need to be temporarily disabled.
// TODO(ntv,zinenko,herhut): reactivate checks once "changing gpu.launchFunc
// to encode target module" has landed.
// auto functionType = kernelFunc.getType();
// for (unsigned i = 0; i < numKernelFuncArgs; ++i) {
// if (getKernelOperand(i)->getType() != functionType.getInput(i)) {
// return emitOpError("type of function argument ")
// << i << " does not match";
// }
// }
return success();
}

View File

@ -144,27 +144,30 @@ class GpuKernelOutliningPass : public ModulePass<GpuKernelOutliningPass> {
public:
void runOnModule() override {
ModuleManager moduleManager(getModule());
bool modified = false;
for (auto func : getModule().getOps<FuncOp>()) {
// Insert just after the function.
Block::iterator insertPt(func.getOperation()->getNextNode());
func.walk([&](gpu::LaunchOp op) {
FuncOp outlinedFunc = outlineKernelFunc(op);
// Potentially renames outlinedFunc to make symbol unique.
moduleManager.insert(insertPt, outlinedFunc);
// Create nested module and insert outlinedFunc. The module will
// originally get the same name as the function, but may be renamed on
// insertion into the parent module.
auto kernelModule = createKernelModule(outlinedFunc, moduleManager);
moduleManager.insert(insertPt, kernelModule);
// Potentially changes signature, pulling in constants.
convertToLaunchFuncOp(op, outlinedFunc);
// Create clone and move body from outlinedFunc.
auto kernelFunc = outlinedFunc.cloneWithoutRegions();
kernelFunc.getBody().takeBody(outlinedFunc.getBody());
// Create nested module and insert kernelFunc.
auto kernelModule = createKernelModule(kernelFunc, moduleManager);
getModule().insert(insertPt, kernelModule);
modified = true;
});
}
// If any new module was inserted in this module, annotate this module as
// a container module.
if (modified)
getModule().setAttr(gpu::GPUDialect::getContainerModuleAttrName(),
UnitAttr::get(&getContext()));
}
private:
@ -172,9 +175,11 @@ private:
ModuleOp createKernelModule(FuncOp kernelFunc,
const ModuleManager &parentModuleManager) {
auto context = getModule().getContext();
auto kernelModule = ModuleOp::create(UnknownLoc::get(context));
Builder builder(context);
auto kernelModule =
ModuleOp::create(builder.getUnknownLoc(), kernelFunc.getName());
kernelModule.setAttr(gpu::GPUDialect::getKernelModuleAttrName(),
UnitAttr::get(context));
builder.getUnitAttr());
ModuleManager moduleManager(kernelModule);
llvm::SmallVector<FuncOp, 8> funcsToInsert = {kernelFunc};

View File

@ -1,15 +1,15 @@
// RUN: mlir-opt %s --generate-cubin-accessors | FileCheck %s
module attributes {gpu.container_module} {
// CHECK: llvm.mlir.global constant @[[global:.*]]("CUBIN")
module attributes {gpu.kernel_module} {
func @kernel(!llvm.float, !llvm<"float*">)
attributes {nvvm.cubin = "CUBIN"}
}
func @kernel(!llvm.float, !llvm<"float*">)
// CHECK: attributes {gpu.kernel, nvvm.cubingetter = @[[getter:.*]]}
attributes {gpu.kernel}
module attributes {gpu.kernel_module} {
// CHECK-LABEL: func @kernel
func @kernel(!llvm.float, !llvm<"float*">)
// CHECK: attributes {nvvm.cubingetter = @[[getter:.*]]}
attributes {nvvm.cubin = "CUBIN"}
}
// CHECK: func @[[getter]]() -> !llvm<"i8*">
// CHECK: %[[addressof:.*]] = llvm.mlir.addressof @[[global]]
@ -17,3 +17,4 @@ func @kernel(!llvm.float, !llvm<"float*">)
// CHECK: %[[gep:.*]] = llvm.getelementptr %[[addressof]][%[[c0]], %[[c0]]]
// CHECK-SAME: -> !llvm<"i8*">
// CHECK: llvm.return %[[gep]] : !llvm<"i8*">
}

View File

@ -1,27 +1,33 @@
// RUN: mlir-opt %s --launch-func-to-cuda | FileCheck %s
// CHECK: llvm.mlir.global constant @[[kernel_name:.*]]("kernel\00")
module attributes {gpu.container_module} {
func @cubin_getter() -> !llvm<"i8*">
// CHECK: llvm.mlir.global constant @[[kernel_name:.*]]("kernel\00")
func @kernel(!llvm.float, !llvm<"float*">)
attributes { gpu.kernel, nvvm.cubingetter = @cubin_getter }
func @cubin_getter() -> !llvm<"i8*">
module @kernel_module attributes {gpu.kernel_module} {
func @kernel(!llvm.float, !llvm<"float*">)
attributes { gpu.kernel, nvvm.cubingetter = @cubin_getter }
}
func @foo() {
%0 = "op"() : () -> (!llvm.float)
%1 = "op"() : () -> (!llvm<"float*">)
%cst = constant 8 : index
func @foo() {
%0 = "op"() : () -> (!llvm.float)
%1 = "op"() : () -> (!llvm<"float*">)
%cst = constant 8 : index
// CHECK: [[module_ptr:%.*]] = llvm.alloca {{.*}} x !llvm<"i8*"> : (!llvm.i32) -> !llvm<"i8**">
// CHECK: llvm.call @mcuModuleLoad([[module_ptr]], {{.*}}) : (!llvm<"i8**">, !llvm<"i8*">) -> !llvm.i32
// CHECK: [[func_ptr:%.*]] = llvm.alloca {{.*}} x !llvm<"i8*"> : (!llvm.i32) -> !llvm<"i8**">
// CHECK: llvm.call @mcuModuleGetFunction([[func_ptr]], {{.*}}, {{.*}}) : (!llvm<"i8**">, !llvm<"i8*">, !llvm<"i8*">) -> !llvm.i32
// CHECK: llvm.call @mcuGetStreamHelper
// CHECK: llvm.call @mcuLaunchKernel
// CHECK: llvm.call @mcuStreamSynchronize
"gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = @kernel }
: (index, index, index, index, index, index, !llvm.float, !llvm<"float*">) -> ()
// CHECK: [[module_ptr:%.*]] = llvm.alloca {{.*}} x !llvm<"i8*"> : (!llvm.i32) -> !llvm<"i8**">
// CHECK: llvm.call @mcuModuleLoad([[module_ptr]], {{.*}}) : (!llvm<"i8**">, !llvm<"i8*">) -> !llvm.i32
// CHECK: [[func_ptr:%.*]] = llvm.alloca {{.*}} x !llvm<"i8*"> : (!llvm.i32) -> !llvm<"i8**">
// CHECK: llvm.call @mcuModuleGetFunction([[func_ptr]], {{.*}}, {{.*}}) : (!llvm<"i8**">, !llvm<"i8*">, !llvm<"i8*">) -> !llvm.i32
// CHECK: llvm.call @mcuGetStreamHelper
// CHECK: llvm.call @mcuLaunchKernel
// CHECK: llvm.call @mcuStreamSynchronize
"gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = "kernel", kernel_module = @kernel_module }
: (index, index, index, index, index, index, !llvm.float, !llvm<"float*">) -> ()
return
}
return
}

View File

@ -1,113 +1,137 @@
// RUN: mlir-opt -split-input-file -convert-gpu-to-spirv %s -o - | FileCheck %s
func @builtin() {
%c0 = constant 1 : index
"gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = @builtin_workgroup_id_x} : (index, index, index, index, index, index) -> ()
return
}
module attributes {gpu.container_module} {
func @builtin() {
%c0 = constant 1 : index
"gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = "builtin_workgroup_id_x", kernel_module = @kernels} : (index, index, index, index, index, index) -> ()
return
}
// CHECK-LABEL: spv.module "Logical" "GLSL450"
// CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId")
func @builtin_workgroup_id_x()
attributes {gpu.kernel} {
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]]
// CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
// CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
%0 = "gpu.block_id"() {dimension = "x"} : () -> index
return
// CHECK-LABEL: spv.module "Logical" "GLSL450"
// CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId")
module @kernels attributes {gpu.kernel_module} {
func @builtin_workgroup_id_x()
attributes {gpu.kernel} {
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]]
// CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
// CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
%0 = "gpu.block_id"() {dimension = "x"} : () -> index
return
}
}
}
// -----
func @builtin() {
%c0 = constant 1 : index
"gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = @builtin_workgroup_id_y} : (index, index, index, index, index, index) -> ()
return
}
module attributes {gpu.container_module} {
func @builtin() {
%c0 = constant 1 : index
"gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = "builtin_workgroup_id_y", kernel_module = @kernels} : (index, index, index, index, index, index) -> ()
return
}
// CHECK-LABEL: spv.module "Logical" "GLSL450"
// CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId")
func @builtin_workgroup_id_y()
attributes {gpu.kernel} {
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]]
// CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
// CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}1 : i32{{\]}}
%0 = "gpu.block_id"() {dimension = "y"} : () -> index
return
// CHECK-LABEL: spv.module "Logical" "GLSL450"
// CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId")
module @kernels attributes {gpu.kernel_module} {
func @builtin_workgroup_id_y()
attributes {gpu.kernel} {
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]]
// CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
// CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}1 : i32{{\]}}
%0 = "gpu.block_id"() {dimension = "y"} : () -> index
return
}
}
}
// -----
func @builtin() {
%c0 = constant 1 : index
"gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = @builtin_workgroup_id_z} : (index, index, index, index, index, index) -> ()
return
}
module attributes {gpu.container_module} {
func @builtin() {
%c0 = constant 1 : index
"gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = "builtin_workgroup_id_z", kernel_module = @kernels} : (index, index, index, index, index, index) -> ()
return
}
// CHECK-LABEL: spv.module "Logical" "GLSL450"
// CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId")
func @builtin_workgroup_id_z()
attributes {gpu.kernel} {
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]]
// CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
// CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}2 : i32{{\]}}
%0 = "gpu.block_id"() {dimension = "z"} : () -> index
return
// CHECK-LABEL: spv.module "Logical" "GLSL450"
// CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId")
module @kernels attributes {gpu.kernel_module} {
func @builtin_workgroup_id_z()
attributes {gpu.kernel} {
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]]
// CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
// CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}2 : i32{{\]}}
%0 = "gpu.block_id"() {dimension = "z"} : () -> index
return
}
}
}
// -----
func @builtin() {
%c0 = constant 1 : index
"gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = @builtin_workgroup_size_x} : (index, index, index, index, index, index) -> ()
return
}
module attributes {gpu.container_module} {
func @builtin() {
%c0 = constant 1 : index
"gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = "builtin_workgroup_size_x", kernel_module = @kernels} : (index, index, index, index, index, index) -> ()
return
}
// CHECK-LABEL: spv.module "Logical" "GLSL450"
// CHECK: spv.globalVariable [[WORKGROUPSIZE:@.*]] built_in("WorkgroupSize")
func @builtin_workgroup_size_x()
attributes {gpu.kernel} {
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPSIZE]]
// CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
// CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
%0 = "gpu.block_dim"() {dimension = "x"} : () -> index
return
// CHECK-LABEL: spv.module "Logical" "GLSL450"
// CHECK: spv.globalVariable [[WORKGROUPSIZE:@.*]] built_in("WorkgroupSize")
module @kernels attributes {gpu.kernel_module} {
func @builtin_workgroup_size_x()
attributes {gpu.kernel} {
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPSIZE]]
// CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
// CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
%0 = "gpu.block_dim"() {dimension = "x"} : () -> index
return
}
}
}
// -----
func @builtin() {
%c0 = constant 1 : index
"gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = @builtin_local_id_x} : (index, index, index, index, index, index) -> ()
return
}
module attributes {gpu.container_module} {
func @builtin() {
%c0 = constant 1 : index
"gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = "builtin_local_id_x", kernel_module = @kernels} : (index, index, index, index, index, index) -> ()
return
}
// CHECK-LABEL: spv.module "Logical" "GLSL450"
// CHECK: spv.globalVariable [[LOCALINVOCATIONID:@.*]] built_in("LocalInvocationId")
func @builtin_local_id_x()
attributes {gpu.kernel} {
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[LOCALINVOCATIONID]]
// CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
// CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
%0 = "gpu.thread_id"() {dimension = "x"} : () -> index
return
// CHECK-LABEL: spv.module "Logical" "GLSL450"
// CHECK: spv.globalVariable [[LOCALINVOCATIONID:@.*]] built_in("LocalInvocationId")
module @kernels attributes {gpu.kernel_module} {
func @builtin_local_id_x()
attributes {gpu.kernel} {
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[LOCALINVOCATIONID]]
// CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
// CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
%0 = "gpu.thread_id"() {dimension = "x"} : () -> index
return
}
}
}
// -----
func @builtin() {
%c0 = constant 1 : index
"gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = @builtin_num_workgroups_x} : (index, index, index, index, index, index) -> ()
return
}
module attributes {gpu.container_module} {
func @builtin() {
%c0 = constant 1 : index
"gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = "builtin_num_workgroups_x", kernel_module = @kernels} : (index, index, index, index, index, index) -> ()
return
}
// CHECK-LABEL: spv.module "Logical" "GLSL450"
// CHECK: spv.globalVariable [[NUMWORKGROUPS:@.*]] built_in("NumWorkgroups")
func @builtin_num_workgroups_x()
attributes {gpu.kernel} {
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[NUMWORKGROUPS]]
// CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
// CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
%0 = "gpu.grid_dim"() {dimension = "x"} : () -> index
return
// CHECK-LABEL: spv.module "Logical" "GLSL450"
// CHECK: spv.globalVariable [[NUMWORKGROUPS:@.*]] built_in("NumWorkgroups")
module @kernels attributes {gpu.kernel_module} {
func @builtin_num_workgroups_x()
attributes {gpu.kernel} {
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[NUMWORKGROUPS]]
// CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
// CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
%0 = "gpu.grid_dim"() {dimension = "x"} : () -> index
return
}
}
}

View File

@ -1,52 +1,56 @@
// RUN: mlir-opt -convert-gpu-to-spirv %s -o - | FileCheck %s
func @load_store(%arg0: memref<12x4xf32>, %arg1: memref<12x4xf32>, %arg2: memref<12x4xf32>) {
%c0 = constant 0 : index
%c12 = constant 12 : index
%0 = subi %c12, %c0 : index
%c1 = constant 1 : index
%c0_0 = constant 0 : index
%c4 = constant 4 : index
%1 = subi %c4, %c0_0 : index
%c1_1 = constant 1 : index
%c1_2 = constant 1 : index
"gpu.launch_func"(%0, %c1_2, %c1_2, %1, %c1_2, %c1_2, %arg0, %arg1, %arg2, %c0, %c0_0, %c1, %c1_1) {kernel = @load_store_kernel} : (index, index, index, index, index, index, memref<12x4xf32>, memref<12x4xf32>, memref<12x4xf32>, index, index, index, index) -> ()
return
}
module attributes {gpu.container_module} {
func @load_store(%arg0: memref<12x4xf32>, %arg1: memref<12x4xf32>, %arg2: memref<12x4xf32>) {
%c0 = constant 0 : index
%c12 = constant 12 : index
%0 = subi %c12, %c0 : index
%c1 = constant 1 : index
%c0_0 = constant 0 : index
%c4 = constant 4 : index
%1 = subi %c4, %c0_0 : index
%c1_1 = constant 1 : index
%c1_2 = constant 1 : index
"gpu.launch_func"(%0, %c1_2, %c1_2, %1, %c1_2, %c1_2, %arg0, %arg1, %arg2, %c0, %c0_0, %c1, %c1_1) {kernel = "load_store_kernel", kernel_module = @kernels} : (index, index, index, index, index, index, memref<12x4xf32>, memref<12x4xf32>, memref<12x4xf32>, index, index, index, index) -> ()
return
}
// CHECK-LABEL: spv.module "Logical" "GLSL450"
// CHECK: spv.globalVariable {{@.*}} bind(0, 0) : [[TYPE1:!spv.ptr<!spv.array<12 x !spv.array<4 x f32>>, StorageBuffer>]]
// CHECK-NEXT: spv.globalVariable {{@.*}} bind(0, 1) : [[TYPE2:!spv.ptr<!spv.array<12 x !spv.array<4 x f32>>, StorageBuffer>]]
// CHECK-NEXT: spv.globalVariable {{@.*}} bind(0, 2) : [[TYPE3:!spv.ptr<!spv.array<12 x !spv.array<4 x f32>>, StorageBuffer>]]
// CHECK: func @load_store_kernel([[ARG0:%.*]]: [[TYPE1]], [[ARG1:%.*]]: [[TYPE2]], [[ARG2:%.*]]: [[TYPE3]], [[ARG3:%.*]]: i32, [[ARG4:%.*]]: i32, [[ARG5:%.*]]: i32, [[ARG6:%.*]]: i32)
func @load_store_kernel(%arg0: memref<12x4xf32>, %arg1: memref<12x4xf32>, %arg2: memref<12x4xf32>, %arg3: index, %arg4: index, %arg5: index, %arg6: index)
attributes {gpu.kernel} {
%0 = "gpu.block_id"() {dimension = "x"} : () -> index
%1 = "gpu.block_id"() {dimension = "y"} : () -> index
%2 = "gpu.block_id"() {dimension = "z"} : () -> index
%3 = "gpu.thread_id"() {dimension = "x"} : () -> index
%4 = "gpu.thread_id"() {dimension = "y"} : () -> index
%5 = "gpu.thread_id"() {dimension = "z"} : () -> index
%6 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%7 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%8 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%9 = "gpu.block_dim"() {dimension = "x"} : () -> index
%10 = "gpu.block_dim"() {dimension = "y"} : () -> index
%11 = "gpu.block_dim"() {dimension = "z"} : () -> index
// CHECK: [[INDEX1:%.*]] = spv.IAdd [[ARG3]], {{%.*}}
%12 = addi %arg3, %0 : index
// CHECK: [[INDEX2:%.*]] = spv.IAdd [[ARG4]], {{%.*}}
%13 = addi %arg4, %3 : index
// CHECK: [[PTR1:%.*]] = spv.AccessChain [[ARG0]]{{\[}}[[INDEX1]], [[INDEX2]]{{\]}}
// CHECK-NEXT: [[VAL1:%.*]] = spv.Load "StorageBuffer" [[PTR1]]
%14 = load %arg0[%12, %13] : memref<12x4xf32>
// CHECK: [[PTR2:%.*]] = spv.AccessChain [[ARG1]]{{\[}}[[INDEX1]], [[INDEX2]]{{\]}}
// CHECK-NEXT: [[VAL2:%.*]] = spv.Load "StorageBuffer" [[PTR2]]
%15 = load %arg1[%12, %13] : memref<12x4xf32>
// CHECK: [[VAL3:%.*]] = spv.FAdd [[VAL1]], [[VAL2]]
%16 = addf %14, %15 : f32
// CHECK: [[PTR3:%.*]] = spv.AccessChain [[ARG2]]{{\[}}[[INDEX1]], [[INDEX2]]{{\]}}
// CHECK-NEXT: spv.Store "StorageBuffer" [[PTR3]], [[VAL3]]
store %16, %arg2[%12, %13] : memref<12x4xf32>
return
}
// CHECK-LABEL: spv.module "Logical" "GLSL450"
// CHECK: spv.globalVariable {{@.*}} bind(0, 0) : [[TYPE1:!spv.ptr<!spv.array<12 x !spv.array<4 x f32>>, StorageBuffer>]]
// CHECK-NEXT: spv.globalVariable {{@.*}} bind(0, 1) : [[TYPE2:!spv.ptr<!spv.array<12 x !spv.array<4 x f32>>, StorageBuffer>]]
// CHECK-NEXT: spv.globalVariable {{@.*}} bind(0, 2) : [[TYPE3:!spv.ptr<!spv.array<12 x !spv.array<4 x f32>>, StorageBuffer>]]
// CHECK: func @load_store_kernel([[ARG0:%.*]]: [[TYPE1]], [[ARG1:%.*]]: [[TYPE2]], [[ARG2:%.*]]: [[TYPE3]], [[ARG3:%.*]]: i32, [[ARG4:%.*]]: i32, [[ARG5:%.*]]: i32, [[ARG6:%.*]]: i32)
module @kernels attributes {gpu.kernel_module} {
func @load_store_kernel(%arg0: memref<12x4xf32>, %arg1: memref<12x4xf32>, %arg2: memref<12x4xf32>, %arg3: index, %arg4: index, %arg5: index, %arg6: index)
attributes {gpu.kernel} {
%0 = "gpu.block_id"() {dimension = "x"} : () -> index
%1 = "gpu.block_id"() {dimension = "y"} : () -> index
%2 = "gpu.block_id"() {dimension = "z"} : () -> index
%3 = "gpu.thread_id"() {dimension = "x"} : () -> index
%4 = "gpu.thread_id"() {dimension = "y"} : () -> index
%5 = "gpu.thread_id"() {dimension = "z"} : () -> index
%6 = "gpu.grid_dim"() {dimension = "x"} : () -> index
%7 = "gpu.grid_dim"() {dimension = "y"} : () -> index
%8 = "gpu.grid_dim"() {dimension = "z"} : () -> index
%9 = "gpu.block_dim"() {dimension = "x"} : () -> index
%10 = "gpu.block_dim"() {dimension = "y"} : () -> index
%11 = "gpu.block_dim"() {dimension = "z"} : () -> index
// CHECK: [[INDEX1:%.*]] = spv.IAdd [[ARG3]], {{%.*}}
%12 = addi %arg3, %0 : index
// CHECK: [[INDEX2:%.*]] = spv.IAdd [[ARG4]], {{%.*}}
%13 = addi %arg4, %3 : index
// CHECK: [[PTR1:%.*]] = spv.AccessChain [[ARG0]]{{\[}}[[INDEX1]], [[INDEX2]]{{\]}}
// CHECK-NEXT: [[VAL1:%.*]] = spv.Load "StorageBuffer" [[PTR1]]
%14 = load %arg0[%12, %13] : memref<12x4xf32>
// CHECK: [[PTR2:%.*]] = spv.AccessChain [[ARG1]]{{\[}}[[INDEX1]], [[INDEX2]]{{\]}}
// CHECK-NEXT: [[VAL2:%.*]] = spv.Load "StorageBuffer" [[PTR2]]
%15 = load %arg1[%12, %13] : memref<12x4xf32>
// CHECK: [[VAL3:%.*]] = spv.FAdd [[VAL1]], [[VAL2]]
%16 = addf %14, %15 : f32
// CHECK: [[PTR3:%.*]] = spv.AccessChain [[ARG2]]{{\[}}[[INDEX1]], [[INDEX2]]{{\]}}
// CHECK-NEXT: spv.Store "StorageBuffer" [[PTR3]], [[VAL3]]
store %16, %arg2[%12, %13] : memref<12x4xf32>
return
}
}
}

View File

@ -1,21 +1,27 @@
// RUN: mlir-opt -convert-gpu-to-spirv %s -o - | FileCheck %s
// CHECK: spv.module "Logical" "GLSL450" {
// CHECK-NEXT: spv.globalVariable [[VAR1:@.*]] bind(0, 0) : !spv.ptr<f32, StorageBuffer>
// CHECK-NEXT: spv.globalVariable [[VAR2:@.*]] bind(0, 1) : !spv.ptr<!spv.array<12 x f32>, StorageBuffer>
// CHECK-NEXT: func @kernel_1
// CHECK-NEXT: spv.Return
// CHECK: spv.EntryPoint "GLCompute" @kernel_1, [[VAR1]], [[VAR2]]
func @kernel_1(%arg0 : f32, %arg1 : memref<12xf32, 1>)
attributes { gpu.kernel } {
return
}
module attributes {gpu.container_module} {
func @foo() {
%0 = "op"() : () -> (f32)
%1 = "op"() : () -> (memref<12xf32, 1>)
%cst = constant 1 : index
"gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = @kernel_1 }
: (index, index, index, index, index, index, f32, memref<12xf32, 1>) -> ()
return
}
// CHECK: spv.module "Logical" "GLSL450" {
// CHECK-NEXT: spv.globalVariable [[VAR1:@.*]] bind(0, 0) : !spv.ptr<f32, StorageBuffer>
// CHECK-NEXT: spv.globalVariable [[VAR2:@.*]] bind(0, 1) : !spv.ptr<!spv.array<12 x f32>, StorageBuffer>
// CHECK-NEXT: func @kernel_1
// CHECK-NEXT: spv.Return
// CHECK: spv.EntryPoint "GLCompute" @kernel_1, [[VAR1]], [[VAR2]]
module @kernels attributes {gpu.kernel_module} {
func @kernel_1(%arg0 : f32, %arg1 : memref<12xf32, 1>)
attributes { gpu.kernel } {
return
}
}
func @foo() {
%0 = "op"() : () -> (f32)
%1 = "op"() : () -> (memref<12xf32, 1>)
%cst = constant 1 : index
"gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = "kernel_1", kernel_module = @kernels }
: (index, index, index, index, index, index, f32, memref<12xf32, 1>) -> ()
return
}
}

View File

@ -96,8 +96,8 @@ func @launch_func_too_few_operands(%sz : index) {
// -----
func @launch_func_missing_callee_attribute(%sz : index) {
// expected-error@+1 {{attribute 'kernel' must be specified}}
func @launch_func_missing_parent_module_attribute(%sz : index) {
// expected-error@+1 {{expected the closest surrounding module to have the 'gpu.container_module' attribute}}
"gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz) {foo = "bar"}
: (index, index, index, index, index, index) -> ()
return
@ -105,54 +105,134 @@ func @launch_func_missing_callee_attribute(%sz : index) {
// -----
func @launch_func_no_function_attribute(%sz : index) {
// expected-error@+1 {{attribute 'kernel' must be a function}}
"gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz) {kernel = 10}
: (index, index, index, index, index, index) -> ()
return
module attributes {gpu.container_module} {
func @launch_func_missing_callee_attribute(%sz : index) {
// expected-error@+1 {{string attribute 'kernel' must be specified}}
"gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz) {foo = "bar"}
: (index, index, index, index, index, index) -> ()
return
}
}
// -----
func @launch_func_undefined_function(%sz : index) {
// expected-error@+1 {{kernel function '@kernel_1' is undefined}}
"gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz) { kernel = @kernel_1 }
: (index, index, index, index, index, index) -> ()
return
module attributes {gpu.container_module} {
func @launch_func_missing_module_attribute(%sz : index) {
// expected-error@+1 {{attribute 'kernel_module' must be specified}}
"gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz) {kernel = "launch_func_missing_kernel_attr"}
: (index, index, index, index, index, index) -> ()
return
}
}
// -----
func @kernel_1(%arg1 : !llvm<"float*">) {
return
}
func @launch_func_missing_kernel_attr(%sz : index, %arg : !llvm<"float*">) {
// expected-error@+1 {{kernel function is missing the 'gpu.kernel' attribute}}
"gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz, %arg) {kernel = @kernel_1}
: (index, index, index, index, index, index, !llvm<"float*">) -> ()
return
module attributes {gpu.container_module} {
func @launch_func_no_function_attribute(%sz : index) {
// expected-error@+1 {{string attribute 'kernel' must be specified}}
"gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz) {kernel = 10}
: (index, index, index, index, index, index) -> ()
return
}
}
// -----
func @kernel_1(%arg1 : !llvm<"float*">) attributes { gpu.kernel } {
return
}
func @launch_func_kernel_operand_size(%sz : index, %arg : !llvm<"float*">) {
// expected-error@+1 {{got 2 kernel operands but expected 1}}
"gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz, %arg, %arg)
{kernel = @kernel_1}
: (index, index, index, index, index, index, !llvm<"float*">,
!llvm<"float*">) -> ()
return
module attributes {gpu.container_module} {
func @launch_func_module_attribute_wrong_type(%sz : index) {
// expected-error@+1 {{symbol reference attribute 'kernel_module' must be specified}}
"gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz)
{kernel = "launch_func_module_attribute_wrong_type", kernel_module = 10}
: (index, index, index, index, index, index) -> ()
return
}
}
// -----
func @kernel_1(%arg1 : !llvm<"float*">) attributes { gpu.kernel } {
return
module attributes {gpu.container_module} {
func @launch_func_undefined_module(%sz : index) {
// expected-error@+1 {{kernel module 'kernels' is undefined}}
"gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz)
{ kernel = "kernel_1", kernel_module = @kernels }
: (index, index, index, index, index, index) -> ()
return
}
}
// -----
module attributes {gpu.container_module} {
module @kernels {
}
func @launch_func_missing_module_attirbute(%sz : index) {
// expected-error@+1 {{module 'kernels' is missing the 'gpu.kernel_module' attribute}}
"gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz)
{ kernel = "kernel_1", kernel_module = @kernels }
: (index, index, index, index, index, index) -> ()
return
}
}
// -----
module attributes {gpu.container_module} {
module @kernels attributes {gpu.kernel_module} {
}
func @launch_func_undefined_function(%sz : index) {
// expected-error@+1 {{kernel function 'kernel_1' is undefined}}
"gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz)
{ kernel = "kernel_1", kernel_module = @kernels }
: (index, index, index, index, index, index) -> ()
return
}
}
// -----
module attributes {gpu.container_module} {
module @kernels attributes {gpu.kernel_module} {
func @kernel_1(%arg1 : !llvm<"float*">) {
return
}
}
func @launch_func_missing_kernel_attr(%sz : index, %arg : !llvm<"float*">) {
// expected-error@+1 {{kernel function is missing the 'gpu.kernel' attribute}}
"gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz, %arg)
{kernel = "kernel_1", kernel_module = @kernels}
: (index, index, index, index, index, index, !llvm<"float*">) -> ()
return
}
}
// -----
module attributes {gpu.container_module} {
module @kernels attributes {gpu.kernel_module} {
func @kernel_1(%arg1 : !llvm<"float*">) attributes { gpu.kernel } {
return
}
}
func @launch_func_kernel_operand_size(%sz : index, %arg : !llvm<"float*">) {
// expected-error@+1 {{got 2 kernel operands but expected 1}}
"gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz, %arg, %arg)
{kernel = "kernel_1", kernel_module = @kernels}
: (index, index, index, index, index, index, !llvm<"float*">,
!llvm<"float*">) -> ()
return
}
}
// -----
module @kernels attributes {gpu.kernel_module} {
func @kernel_1(%arg1 : !llvm<"float*">) attributes { gpu.kernel } {
return
}
}
// Due to the ordering of the current impl of lowering and LLVMLowering, type
@ -162,7 +242,7 @@ func @kernel_1(%arg1 : !llvm<"float*">) attributes { gpu.kernel } {
// func @launch_func_kernel_operand_types(%sz : index, %arg : f32) {
// // expected-err@+1 {{type of function argument 0 does not match}}
// "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz, %arg)
// {kernel = @kernel_1}
// {kernel = "kernel_1"}
// : (index, index, index, index, index, index, f32) -> ()
// return
// }

View File

@ -1,105 +1,113 @@
// RUN: mlir-opt %s | FileCheck %s
// CHECK-LABEL:func @no_args(%{{.*}}: index)
func @no_args(%sz : index) {
// CHECK: gpu.launch blocks(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) threads(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}})
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %sz, %grid_y = %sz, %grid_z = %sz)
threads(%tx, %ty, %tz) in (%block_x = %sz, %block_y = %sz, %block_z = %sz) {
// CHECK: gpu.return
gpu.return
}
return
}
module attributes {gpu.container_module} {
// CHECK-LABEL:func @args(%{{.*}}: index, %{{.*}}: index, %{{.*}}: f32, %{{.*}}: memref<?xf32, 1>) {
func @args(%blk : index, %thrd : index, %float : f32, %data : memref<?xf32,1>) {
// CHECK: gpu.launch blocks(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) threads(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) args(%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) : f32, memref<?xf32, 1>
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %blk, %grid_y = %blk, %grid_z = %blk)
threads(%tx, %ty, %tz) in (%block_x = %thrd, %block_y = %thrd, %block_z = %thrd)
args(%kernel_arg0 = %float, %kernel_arg1 = %data) : f32, memref<?xf32, 1> {
// CHECK: gpu.return
gpu.return
// CHECK-LABEL:func @no_args(%{{.*}}: index)
func @no_args(%sz : index) {
// CHECK: gpu.launch blocks(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) threads(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}})
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %sz, %grid_y = %sz, %grid_z = %sz)
threads(%tx, %ty, %tz) in (%block_x = %sz, %block_y = %sz, %block_z = %sz) {
// CHECK: gpu.return
gpu.return
}
return
}
return
}
// It is possible to use values passed into the region as arguments.
// CHECK-LABEL: func @passing_values
func @passing_values(%blk : index, %thrd : index, %float : f32, %data : memref<?xf32,1>) {
// CHECK: gpu.launch blocks(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) threads(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) args(%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) : f32, memref<?xf32, 1>
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %blk, %grid_y = %blk, %grid_z = %blk)
threads(%tx, %ty, %tz) in (%block_x = %thrd, %block_y = %thrd, %block_z = %thrd)
args(%kernel_arg0 = %float, %kernel_arg1 = %data) : f32, memref<?xf32, 1> {
// CHECK: "use"(%{{.*}})
"use"(%kernel_arg0): (f32) -> ()
// CHECK: gpu.return
gpu.return
// CHECK-LABEL:func @args(%{{.*}}: index, %{{.*}}: index, %{{.*}}: f32, %{{.*}}: memref<?xf32, 1>) {
func @args(%blk : index, %thrd : index, %float : f32, %data : memref<?xf32,1>) {
// CHECK: gpu.launch blocks(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) threads(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) args(%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) : f32, memref<?xf32, 1>
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %blk, %grid_y = %blk, %grid_z = %blk)
threads(%tx, %ty, %tz) in (%block_x = %thrd, %block_y = %thrd, %block_z = %thrd)
args(%kernel_arg0 = %float, %kernel_arg1 = %data) : f32, memref<?xf32, 1> {
// CHECK: gpu.return
gpu.return
}
return
}
return
}
// It is possible to use values defined in nested regions as long as they don't
// cross kernel launch region boundaries.
// CHECK-LABEL: func @nested_isolation
func @nested_isolation(%sz : index) {
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %sz, %grid_y = %sz, %grid_z = %sz)
threads(%tx, %ty, %tz) in (%block_x = %sz, %block_y = %sz, %block_z = %sz) {
"region"() ({
// CHECK: %{{.*}} = "produce"()
%val = "produce"() : () -> (index)
// It is possible to use values passed into the region as arguments.
// CHECK-LABEL: func @passing_values
func @passing_values(%blk : index, %thrd : index, %float : f32, %data : memref<?xf32,1>) {
// CHECK: gpu.launch blocks(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) threads(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) args(%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) : f32, memref<?xf32, 1>
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %blk, %grid_y = %blk, %grid_z = %blk)
threads(%tx, %ty, %tz) in (%block_x = %thrd, %block_y = %thrd, %block_z = %thrd)
args(%kernel_arg0 = %float, %kernel_arg1 = %data) : f32, memref<?xf32, 1> {
// CHECK: "use"(%{{.*}})
"use"(%kernel_arg0): (f32) -> ()
// CHECK: gpu.return
gpu.return
}
return
}
// It is possible to use values defined in nested regions as long as they don't
// cross kernel launch region boundaries.
// CHECK-LABEL: func @nested_isolation
func @nested_isolation(%sz : index) {
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %sz, %grid_y = %sz, %grid_z = %sz)
threads(%tx, %ty, %tz) in (%block_x = %sz, %block_y = %sz, %block_z = %sz) {
"region"() ({
// CHECK: "use"(%{{.*}})
"use"(%val) : (index) -> ()
// CHECK: %{{.*}} = "produce"()
%val = "produce"() : () -> (index)
"region"() ({
// CHECK: "use"(%{{.*}})
"use"(%val) : (index) -> ()
}) : () -> ()
}) : () -> ()
}) : () -> ()
// CHECK: gpu.return
gpu.return
// CHECK: gpu.return
gpu.return
}
return
}
return
}
func @kernel_1(%arg0 : f32, %arg1 : memref<?xf32, 1>)
attributes { gpu.kernel } {
%tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index)
%tIdY = "gpu.thread_id"() {dimension = "y"} : () -> (index)
%tIdZ = "gpu.thread_id"() {dimension = "z"} : () -> (index)
%bDimX = "gpu.block_dim"() {dimension = "x"} : () -> (index)
%bDimY = "gpu.block_dim"() {dimension = "y"} : () -> (index)
%bDimZ = "gpu.block_dim"() {dimension = "z"} : () -> (index)
%bIdX = "gpu.block_id"() {dimension = "x"} : () -> (index)
%bIdY = "gpu.block_id"() {dimension = "y"} : () -> (index)
%bIdZ = "gpu.block_id"() {dimension = "z"} : () -> (index)
%gDimX = "gpu.grid_dim"() {dimension = "x"} : () -> (index)
%gDimY = "gpu.grid_dim"() {dimension = "y"} : () -> (index)
%gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index)
%one = constant 1.0 : f32
%sum = "gpu.all_reduce"(%one) : (f32) -> (f32)
"some_op"(%bIdX, %tIdX) : (index, index) -> ()
%42 = load %arg1[%bIdX] : memref<?xf32, 1>
return
}
func @kernel_2(f32, memref<?xf32, 1>)
attributes { gpu.kernel }
func @foo() {
%0 = "op"() : () -> (f32)
%1 = "op"() : () -> (memref<?xf32, 1>)
// CHECK: %{{.*}} = constant 8
%cst = constant 8 : index
// CHECK: "gpu.launch_func"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {kernel = @kernel_1} : (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> ()
"gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = @kernel_1 }
: (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> ()
// CHECK: "gpu.launch_func"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {kernel = @kernel_2} : (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> ()
"gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = @kernel_2 }
: (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> ()
return
module @kernels attributes {gpu.kernel_module} {
func @kernel_1(%arg0 : f32, %arg1 : memref<?xf32, 1>)
attributes { gpu.kernel } {
%tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index)
%tIdY = "gpu.thread_id"() {dimension = "y"} : () -> (index)
%tIdZ = "gpu.thread_id"() {dimension = "z"} : () -> (index)
%bDimX = "gpu.block_dim"() {dimension = "x"} : () -> (index)
%bDimY = "gpu.block_dim"() {dimension = "y"} : () -> (index)
%bDimZ = "gpu.block_dim"() {dimension = "z"} : () -> (index)
%bIdX = "gpu.block_id"() {dimension = "x"} : () -> (index)
%bIdY = "gpu.block_id"() {dimension = "y"} : () -> (index)
%bIdZ = "gpu.block_id"() {dimension = "z"} : () -> (index)
%gDimX = "gpu.grid_dim"() {dimension = "x"} : () -> (index)
%gDimY = "gpu.grid_dim"() {dimension = "y"} : () -> (index)
%gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index)
%one = constant 1.0 : f32
%sum = "gpu.all_reduce"(%one) : (f32) -> (f32)
"some_op"(%bIdX, %tIdX) : (index, index) -> ()
%42 = load %arg1[%bIdX] : memref<?xf32, 1>
return
}
func @kernel_2(f32, memref<?xf32, 1>)
attributes { gpu.kernel }
}
func @foo() {
%0 = "op"() : () -> (f32)
%1 = "op"() : () -> (memref<?xf32, 1>)
// CHECK: %{{.*}} = constant 8
%cst = constant 8 : index
// CHECK: "gpu.launch_func"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {kernel = "kernel_1", kernel_module = @kernels} : (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> ()
"gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1)
{ kernel = "kernel_1", kernel_module = @kernels }
: (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> ()
// CHECK: "gpu.launch_func"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {kernel = "kernel_2", kernel_module = @kernels} : (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> ()
"gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1)
{ kernel = "kernel_2", kernel_module = @kernels }
: (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> ()
return
}
}

View File

@ -1,5 +1,7 @@
// RUN: mlir-opt -gpu-kernel-outlining -split-input-file -verify-diagnostics %s | FileCheck %s
// CHECK: module attributes {gpu.container_module}
// CHECK-LABEL: func @launch()
func @launch() {
// CHECK: %[[ARG0:.*]] = "op"() : () -> f32
@ -19,7 +21,7 @@ func @launch() {
// CHECK: %[[BDIMZ:.*]] = constant 28
%bDimZ = constant 28 : index
// CHECK: "gpu.launch_func"(%[[GDIMX]], %[[GDIMY]], %[[GDIMZ]], %[[BDIMX]], %[[BDIMY]], %[[BDIMZ]], %[[ARG0]], %[[ARG1]]) {kernel = @launch_kernel} : (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> ()
// CHECK: "gpu.launch_func"(%[[GDIMX]], %[[GDIMY]], %[[GDIMZ]], %[[BDIMX]], %[[BDIMY]], %[[BDIMZ]], %[[ARG0]], %[[ARG1]]) {kernel = "launch_kernel", kernel_module = @launch_kernel} : (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> ()
// CHECK-NOT: gpu.launch blocks
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY,
%grid_z = %gDimZ)
@ -34,11 +36,9 @@ func @launch() {
return
}
// CHECK-LABEL: func @launch_kernel
// CHECK-SAME: (f32, memref<?xf32, 1>)
// CHECK-NEXT: attributes {gpu.kernel}
// CHECK-LABEL: func @launch_kernel
// CHECK-LABEL: module @launch_kernel
// CHECK-NEXT: func @launch_kernel
// CHECK-SAME: (%[[KERNEL_ARG0:.*]]: f32, %[[KERNEL_ARG1:.*]]: memref<?xf32, 1>)
// CHECK-NEXT: attributes {gpu.kernel}
// CHECK-NEXT: %[[BID:.*]] = "gpu.block_id"() {dimension = "x"} : () -> index
@ -59,17 +59,19 @@ func @launch() {
// -----
// CHECK: module attributes {gpu.container_module}
func @multiple_launches() {
// CHECK: %[[CST:.*]] = constant 8 : index
%cst = constant 8 : index
// CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]]) {kernel = @multiple_launches_kernel} : (index, index, index, index, index, index) -> ()
// CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]]) {kernel = "multiple_launches_kernel", kernel_module = @multiple_launches_kernel} : (index, index, index, index, index, index) -> ()
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst,
%grid_z = %cst)
threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst,
%block_z = %cst) {
gpu.return
}
// CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]]) {kernel = @multiple_launches_kernel_0} : (index, index, index, index, index, index) -> ()
// CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]]) {kernel = "multiple_launches_kernel", kernel_module = @multiple_launches_kernel_0} : (index, index, index, index, index, index) -> ()
gpu.launch blocks(%bx2, %by2, %bz2) in (%grid_x2 = %cst, %grid_y2 = %cst,
%grid_z2 = %cst)
threads(%tx2, %ty2, %tz2) in (%block_x2 = %cst, %block_y2 = %cst,
@ -79,8 +81,10 @@ func @multiple_launches() {
return
}
// CHECK: func @multiple_launches_kernel()
// CHECK: func @multiple_launches_kernel_0()
// CHECK: module @multiple_launches_kernel
// CHECK: func @multiple_launches_kernel
// CHECK: module @multiple_launches_kernel_0
// CHECK: func @multiple_launches_kernel
// -----
@ -89,7 +93,7 @@ func @extra_constants(%arg0 : memref<?xf32>) {
%cst = constant 8 : index
%cst2 = constant 2 : index
%cst3 = constant 3 : index
// CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %{{.*}}) {kernel = @extra_constants_kernel} : (index, index, index, index, index, index, memref<?xf32>) -> ()
// CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %{{.*}}) {kernel = "extra_constants_kernel", kernel_module = @extra_constants_kernel} : (index, index, index, index, index, index, memref<?xf32>) -> ()
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst,
%grid_z = %cst)
threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst,