forked from OSchip/llvm-project
Move gpu.launch_func to ODS. NFC
Move the definition of gpu.launch_func operation from hand-rolled C++ implementation to the ODS framework. Also move the documentation. This only performs the move and remains a non-functional change, a follow-up will clean up the custom functions that can be auto-generated using ODS. PiperOrigin-RevId: 284842252
This commit is contained in:
parent
995048d7b7
commit
d1213ae51d
|
@ -69,70 +69,6 @@ Example:
|
|||
%gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index)
|
||||
```
|
||||
|
||||
### `gpu.launch_func`
|
||||
|
||||
Launch a kernel function on the specified grid of thread blocks. `gpu.launch`
|
||||
operations are lowered to `gpu.launch_func` operations by outlining the kernel
|
||||
body into a function in a dedicated module, which reflects the separate
|
||||
compilation process. The kernel function is required to have the `gpu.kernel`
|
||||
attribute. The module containing the kernel function is required to have the
|
||||
`gpu.kernel_module` attribute and must be named. And finally, the module
|
||||
containing the kernel module (which thus cannot be the top-level module) is
|
||||
required to have the `gpu.container_module` attribute. The `gpu.launch_func`
|
||||
operation has a string attribute named `kernel` to specify the name of the
|
||||
kernel function to launch and an attribute named `kernel_module` to specify the
|
||||
name of the module containing that kernel function.
|
||||
|
||||
The operation takes at least six operands, with the first three operands being
|
||||
grid sizes along x,y,z dimensions and the following three being block sizes
|
||||
along x,y,z dimensions. When a lower-dimensional kernel is required, unused
|
||||
sizes must be explicitly set to `1`. The remaining operands are passed as
|
||||
arguments to the kernel function.
|
||||
|
||||
A custom syntax for this operation is currently not available.
|
||||
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
module attributes {gpu.container_module} {
|
||||
|
||||
// This module creates a separate compilation unit for the GPU compiler.
|
||||
module @kernels attributes {gpu.kernel_module} {
|
||||
func @kernel_1(%arg0 : f32, %arg1 : !llvm<"float*">)
|
||||
attributes { nvvm.kernel = true } {
|
||||
|
||||
// Operations that produce block/thread IDs and dimensions are injected when
|
||||
// outlining the `gpu.launch` body to a function called by `gpu.launch_func`.
|
||||
%tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index)
|
||||
%tIdY = "gpu.thread_id"() {dimension = "y"} : () -> (index)
|
||||
%tIdZ = "gpu.thread_id"() {dimension = "z"} : () -> (index)
|
||||
|
||||
%bDimX = "gpu.block_dim"() {dimension = "x"} : () -> (index)
|
||||
%bDimY = "gpu.block_dim"() {dimension = "y"} : () -> (index)
|
||||
%bDimZ = "gpu.block_dim"() {dimension = "z"} : () -> (index)
|
||||
|
||||
%bIdX = "gpu.block_id"() {dimension = "x"} : () -> (index)
|
||||
%bIdY = "gpu.block_id"() {dimension = "y"} : () -> (index)
|
||||
%bIdZ = "gpu.block_id"() {dimension = "z"} : () -> (index)
|
||||
|
||||
%gDimX = "gpu.grid_dim"() {dimension = "x"} : () -> (index)
|
||||
%gDimY = "gpu.grid_dim"() {dimension = "y"} : () -> (index)
|
||||
%gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index)
|
||||
|
||||
"some_op"(%bx, %tx) : (index, index) -> ()
|
||||
%42 = load %arg1[%bx] : memref<?xf32, 1>
|
||||
}
|
||||
}
|
||||
|
||||
"gpu.launch_func"(%cst, %cst, %cst, // Grid sizes.
|
||||
%cst, %cst, %cst, // Block sizes.
|
||||
%arg0, %arg1) // Arguments passed to the kernel function.
|
||||
{ kernel_module = @kernels, // Module containing the kernel function.
|
||||
kernel = "kernel_1" } // Kernel function.
|
||||
: (index, index, index, index, index, index, f32, !llvm<"float*">) -> ()
|
||||
}
|
||||
```
|
||||
|
||||
### `gpu.thread_id`
|
||||
|
||||
Returns the thread id, i.e. the index of the current thread within the block
|
||||
|
|
|
@ -77,58 +77,6 @@ struct KernelDim3 {
|
|||
Value *z;
|
||||
};
|
||||
|
||||
/// Operation to launch a kernel given as outlined function.
|
||||
class LaunchFuncOp : public Op<LaunchFuncOp, OpTrait::AtLeastNOperands<6>::Impl,
|
||||
OpTrait::ZeroResult> {
|
||||
public:
|
||||
using Op::Op;
|
||||
|
||||
static void build(Builder *builder, OperationState &result, FuncOp kernelFunc,
|
||||
Value *gridSizeX, Value *gridSizeY, Value *gridSizeZ,
|
||||
Value *blockSizeX, Value *blockSizeY, Value *blockSizeZ,
|
||||
ValueRange kernelOperands);
|
||||
|
||||
static void build(Builder *builder, OperationState &result, FuncOp kernelFunc,
|
||||
KernelDim3 gridSize, KernelDim3 blockSize,
|
||||
ValueRange kernelOperands);
|
||||
|
||||
/// The kernel function specified by the operation's `kernel` attribute.
|
||||
StringRef kernel();
|
||||
/// The number of operands passed to the kernel function.
|
||||
unsigned getNumKernelOperands();
|
||||
/// The name of the kernel module specified by the operation's `kernel_module`
|
||||
/// attribute.
|
||||
StringRef getKernelModuleName();
|
||||
/// The i-th operand passed to the kernel function.
|
||||
Value *getKernelOperand(unsigned i);
|
||||
|
||||
/// Get the SSA values passed as operands to specify the grid size.
|
||||
KernelDim3 getGridSizeOperandValues();
|
||||
/// Get the SSA values passed as operands to specify the block size.
|
||||
KernelDim3 getBlockSizeOperandValues();
|
||||
|
||||
LogicalResult verify();
|
||||
|
||||
static StringRef getOperationName() { return "gpu.launch_func"; }
|
||||
|
||||
/// The number of launch configuration operands, placed at the leading
|
||||
/// positions of the operand list.
|
||||
static constexpr unsigned kNumConfigOperands = 6;
|
||||
|
||||
private:
|
||||
// This needs to quietly verify if attributes with names defined below are
|
||||
// present since it is run before the verifier of this op.
|
||||
friend LogicalResult GPUDialect::verifyOperationAttribute(Operation *,
|
||||
NamedAttribute);
|
||||
|
||||
/// The name of the symbolRef attribute specifying the kernel to launch.
|
||||
static StringRef getKernelAttrName() { return "kernel"; }
|
||||
|
||||
/// The name of the symbolRef attribute specifying the name of the module
|
||||
/// containing the kernel to launch.
|
||||
static StringRef getKernelModuleAttrName() { return "kernel_module"; }
|
||||
};
|
||||
|
||||
#define GET_OP_CLASSES
|
||||
#include "mlir/Dialect/GPU/GPUOps.h.inc"
|
||||
|
||||
|
|
|
@ -23,6 +23,17 @@
|
|||
#define GPU_OPS
|
||||
|
||||
include "mlir/IR/OpBase.td"
|
||||
include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
|
||||
|
||||
// Type constraint accepting standard integers, indices and wrapped LLVM integer
|
||||
// types.
|
||||
def IntLikeOrLLVMInt : TypeConstraint<
|
||||
Or<[AnyInteger.predicate, Index.predicate, LLVMInt.predicate]>,
|
||||
"integer, index or LLVM dialect equivalent">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// GPU Dialect operations.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def GPU_Dialect : Dialect {
|
||||
let name = "gpu";
|
||||
|
@ -181,6 +192,133 @@ def GPU_GPUFuncOp : GPU_Op<"func", [FunctionLike, IsolatedFromAbove, Symbol]> {
|
|||
let parser = [{ return parseGPUFuncOp(parser, result); }];
|
||||
}
|
||||
|
||||
def GPU_LaunchFuncOp : GPU_Op<"launch_func">,
|
||||
Arguments<(ins IntLikeOrLLVMInt:$gridSizeX, IntLikeOrLLVMInt:$gridSizeY,
|
||||
IntLikeOrLLVMInt:$gridSizeZ, IntLikeOrLLVMInt:$blockSizeX,
|
||||
IntLikeOrLLVMInt:$blockSizeY, IntLikeOrLLVMInt:$blockSizeZ,
|
||||
Variadic<AnyType>:$operands)>,
|
||||
Results<(outs)> {
|
||||
let summary = "Launches a function as a GPU kerneel";
|
||||
|
||||
let description = [{
|
||||
Launch a kernel function on the specified grid of thread blocks.
|
||||
`gpu.launch` operations are lowered to `gpu.launch_func` operations by
|
||||
outlining the kernel body into a function in a dedicated module, which
|
||||
reflects the separate compilation process. The kernel function is required
|
||||
to have the `gpu.kernel` attribute. The module containing the kernel
|
||||
function is required to have the `gpu.kernel_module` attribute and must be
|
||||
named. And finally, the module containing the kernel module (which thus
|
||||
cannot be the top-level module) is required to have the
|
||||
`gpu.container_module` attribute. The `gpu.launch_func` operation has a
|
||||
string attribute named `kernel` to specify the name of the kernel function
|
||||
to launch and an attribute named `kernel_module` to specify the name of the
|
||||
module containing that kernel function.
|
||||
|
||||
The operation takes at least six operands, with the first three operands
|
||||
being grid sizes along x,y,z dimensions and the following three being block
|
||||
sizes along x,y,z dimensions. When a lower-dimensional kernel is required,
|
||||
unused sizes must be explicitly set to `1`. The remaining operands are
|
||||
passed as arguments to the kernel function.
|
||||
|
||||
A custom syntax for this operation is currently not available.
|
||||
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
module attributes {gpu.container_module} {
|
||||
|
||||
// This module creates a separate compilation unit for the GPU compiler.
|
||||
module @kernels attributes {gpu.kernel_module} {
|
||||
func @kernel_1(%arg0 : f32, %arg1 : !llvm<"float*">)
|
||||
attributes { nvvm.kernel = true } {
|
||||
|
||||
// Operations that produce block/thread IDs and dimensions are
|
||||
// injected when outlining the `gpu.launch` body to a function called
|
||||
// by `gpu.launch_func`.
|
||||
%tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index)
|
||||
%tIdY = "gpu.thread_id"() {dimension = "y"} : () -> (index)
|
||||
%tIdZ = "gpu.thread_id"() {dimension = "z"} : () -> (index)
|
||||
|
||||
%bDimX = "gpu.block_dim"() {dimension = "x"} : () -> (index)
|
||||
%bDimY = "gpu.block_dim"() {dimension = "y"} : () -> (index)
|
||||
%bDimZ = "gpu.block_dim"() {dimension = "z"} : () -> (index)
|
||||
|
||||
%bIdX = "gpu.block_id"() {dimension = "x"} : () -> (index)
|
||||
%bIdY = "gpu.block_id"() {dimension = "y"} : () -> (index)
|
||||
%bIdZ = "gpu.block_id"() {dimension = "z"} : () -> (index)
|
||||
|
||||
%gDimX = "gpu.grid_dim"() {dimension = "x"} : () -> (index)
|
||||
%gDimY = "gpu.grid_dim"() {dimension = "y"} : () -> (index)
|
||||
%gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index)
|
||||
|
||||
"some_op"(%bx, %tx) : (index, index) -> ()
|
||||
%42 = load %arg1[%bx] : memref<?xf32, 1>
|
||||
}
|
||||
}
|
||||
|
||||
"gpu.launch_func"(%cst, %cst, %cst, // Grid sizes.
|
||||
%cst, %cst, %cst, // Block sizes.
|
||||
%arg0, %arg1) // Arguments passed to the kernel.
|
||||
{ kernel_module = @kernels, // Module containing the kernel.
|
||||
kernel = "kernel_1" } // Kernel function.
|
||||
: (index, index, index, index, index, index, f32, !llvm<"float*">)
|
||||
-> ()
|
||||
}
|
||||
```
|
||||
}];
|
||||
|
||||
let skipDefaultBuilders = 1;
|
||||
|
||||
let builders = [
|
||||
OpBuilder<"Builder *builder, OperationState &result, FuncOp kernelFunc, "
|
||||
"Value *gridSizeX, Value *gridSizeY, Value *gridSizeZ, "
|
||||
"Value *blockSizeX, Value *blockSizeY, Value *blockSizeZ, "
|
||||
"ValueRange kernelOperands">,
|
||||
OpBuilder<"Builder *builder, OperationState &result, FuncOp kernelFunc, "
|
||||
"KernelDim3 gridSize, KernelDim3 blockSize, "
|
||||
"ValueRange kernelOperands">
|
||||
];
|
||||
|
||||
let extraClassDeclaration = [{
|
||||
/// The kernel function specified by the operation's `kernel` attribute.
|
||||
StringRef kernel();
|
||||
|
||||
/// The number of operands passed to the kernel function.
|
||||
unsigned getNumKernelOperands();
|
||||
|
||||
/// The name of the kernel module specified by the operation's
|
||||
/// `kernel_module` attribute.
|
||||
StringRef getKernelModuleName();
|
||||
|
||||
/// The i-th operand passed to the kernel function.
|
||||
Value *getKernelOperand(unsigned i);
|
||||
|
||||
/// Get the SSA values passed as operands to specify the grid size.
|
||||
KernelDim3 getGridSizeOperandValues();
|
||||
|
||||
/// Get the SSA values passed as operands to specify the block size.
|
||||
KernelDim3 getBlockSizeOperandValues();
|
||||
|
||||
/// The number of launch configuration operands, placed at the leading
|
||||
/// positions of the operand list.
|
||||
static constexpr unsigned kNumConfigOperands = 6;
|
||||
|
||||
// This needs to quietly verify if attributes with names defined below are
|
||||
// present since it is run before the verifier of this op.
|
||||
friend LogicalResult GPUDialect::verifyOperationAttribute(Operation *,
|
||||
NamedAttribute);
|
||||
|
||||
/// The name of the symbolRef attribute specifying the kernel to launch.
|
||||
static StringRef getKernelAttrName() { return "kernel"; }
|
||||
|
||||
/// The name of the symbolRef attribute specifying the name of the module
|
||||
/// containing the kernel to launch.
|
||||
static StringRef getKernelModuleAttrName() { return "kernel_module"; }
|
||||
}];
|
||||
|
||||
let verifier = [{ return ::verify(*this); }];
|
||||
}
|
||||
|
||||
def GPU_LaunchOp : GPU_Op<"launch", [IsolatedFromAbove]>,
|
||||
Arguments<(ins Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
|
||||
Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
|
||||
|
|
|
@ -34,6 +34,12 @@ def LLVM_Dialect : Dialect {
|
|||
def LLVM_Type : Type<CPred<"$_self.isa<::mlir::LLVM::LLVMType>()">,
|
||||
"LLVM dialect type">;
|
||||
|
||||
// Type constraint accepting only wrapped LLVM integer types.
|
||||
def LLVMInt : TypeConstraint<
|
||||
And<[LLVM_Type.predicate,
|
||||
CPred<"$_self.cast<::mlir::LLVM::LLVMType>().isIntegerTy()">]>,
|
||||
"LLVM dialect integer">;
|
||||
|
||||
// Base class for LLVM operations. Defines the interface to the llvm::IRBuilder
|
||||
// used to translate to LLVM IR proper.
|
||||
class LLVM_OpBase<Dialect dialect, string mnemonic, list<OpTrait> traits = []> :
|
||||
|
|
|
@ -46,10 +46,10 @@ bool GPUDialect::isKernel(Operation *op) {
|
|||
|
||||
GPUDialect::GPUDialect(MLIRContext *context)
|
||||
: Dialect(getDialectName(), context) {
|
||||
addOperations<LaunchFuncOp,
|
||||
addOperations<
|
||||
#define GET_OP_LIST
|
||||
#include "mlir/Dialect/GPU/GPUOps.cpp.inc"
|
||||
>();
|
||||
>();
|
||||
}
|
||||
|
||||
LogicalResult GPUDialect::verifyOperationAttribute(Operation *op,
|
||||
|
@ -545,26 +545,26 @@ KernelDim3 LaunchFuncOp::getBlockSizeOperandValues() {
|
|||
return KernelDim3{getOperand(3), getOperand(4), getOperand(5)};
|
||||
}
|
||||
|
||||
LogicalResult LaunchFuncOp::verify() {
|
||||
auto module = getParentOfType<ModuleOp>();
|
||||
LogicalResult verify(LaunchFuncOp op) {
|
||||
auto module = op.getParentOfType<ModuleOp>();
|
||||
if (!module)
|
||||
return emitOpError("expected to belong to a module");
|
||||
return op.emitOpError("expected to belong to a module");
|
||||
|
||||
if (!module.getAttrOfType<UnitAttr>(GPUDialect::getContainerModuleAttrName()))
|
||||
return emitOpError("expected the closest surrounding module to have the '" +
|
||||
GPUDialect::getContainerModuleAttrName() +
|
||||
"' attribute");
|
||||
return op.emitOpError(
|
||||
"expected the closest surrounding module to have the '" +
|
||||
GPUDialect::getContainerModuleAttrName() + "' attribute");
|
||||
|
||||
auto kernelAttr = getAttrOfType<StringAttr>(getKernelAttrName());
|
||||
auto kernelAttr = op.getAttrOfType<StringAttr>(op.getKernelAttrName());
|
||||
if (!kernelAttr)
|
||||
return emitOpError("string attribute '" + getKernelAttrName() +
|
||||
"' must be specified");
|
||||
return op.emitOpError("string attribute '" + op.getKernelAttrName() +
|
||||
"' must be specified");
|
||||
|
||||
auto kernelModuleAttr =
|
||||
getAttrOfType<SymbolRefAttr>(getKernelModuleAttrName());
|
||||
op.getAttrOfType<SymbolRefAttr>(op.getKernelModuleAttrName());
|
||||
if (!kernelModuleAttr)
|
||||
return emitOpError("symbol reference attribute '" +
|
||||
getKernelModuleAttrName() + "' must be specified");
|
||||
return op.emitOpError("symbol reference attribute '" +
|
||||
op.getKernelModuleAttrName() + "' must be specified");
|
||||
|
||||
return success();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue