Move gpu.launch_func to ODS. NFC

Move the definition of gpu.launch_func operation from hand-rolled C++
implementation to the ODS framework. Also move the documentation. This only
performs the move and remains a non-functional change, a follow-up will clean
up the custom functions that can be auto-generated using ODS.

PiperOrigin-RevId: 284842252
This commit is contained in:
Alex Zinenko 2019-12-10 13:54:50 -08:00 committed by A. Unique TensorFlower
parent 995048d7b7
commit d1213ae51d
5 changed files with 158 additions and 130 deletions

View File

@ -69,70 +69,6 @@ Example:
%gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index)
```
### `gpu.launch_func`
Launch a kernel function on the specified grid of thread blocks. `gpu.launch`
operations are lowered to `gpu.launch_func` operations by outlining the kernel
body into a function in a dedicated module, which reflects the separate
compilation process. The kernel function is required to have the `gpu.kernel`
attribute. The module containing the kernel function is required to have the
`gpu.kernel_module` attribute and must be named. And finally, the module
containing the kernel module (which thus cannot be the top-level module) is
required to have the `gpu.container_module` attribute. The `gpu.launch_func`
operation has a string attribute named `kernel` to specify the name of the
kernel function to launch and an attribute named `kernel_module` to specify the
name of the module containing that kernel function.
The operation takes at least six operands, with the first three operands being
grid sizes along x,y,z dimensions and the following three being block sizes
along x,y,z dimensions. When a lower-dimensional kernel is required, unused
sizes must be explicitly set to `1`. The remaining operands are passed as
arguments to the kernel function.
A custom syntax for this operation is currently not available.
Example:
```mlir
module attributes {gpu.container_module} {
// This module creates a separate compilation unit for the GPU compiler.
module @kernels attributes {gpu.kernel_module} {
func @kernel_1(%arg0 : f32, %arg1 : !llvm<"float*">)
attributes { nvvm.kernel = true } {
// Operations that produce block/thread IDs and dimensions are injected when
// outlining the `gpu.launch` body to a function called by `gpu.launch_func`.
%tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index)
%tIdY = "gpu.thread_id"() {dimension = "y"} : () -> (index)
%tIdZ = "gpu.thread_id"() {dimension = "z"} : () -> (index)
%bDimX = "gpu.block_dim"() {dimension = "x"} : () -> (index)
%bDimY = "gpu.block_dim"() {dimension = "y"} : () -> (index)
%bDimZ = "gpu.block_dim"() {dimension = "z"} : () -> (index)
%bIdX = "gpu.block_id"() {dimension = "x"} : () -> (index)
%bIdY = "gpu.block_id"() {dimension = "y"} : () -> (index)
%bIdZ = "gpu.block_id"() {dimension = "z"} : () -> (index)
%gDimX = "gpu.grid_dim"() {dimension = "x"} : () -> (index)
%gDimY = "gpu.grid_dim"() {dimension = "y"} : () -> (index)
%gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index)
"some_op"(%bx, %tx) : (index, index) -> ()
%42 = load %arg1[%bx] : memref<?xf32, 1>
}
}
"gpu.launch_func"(%cst, %cst, %cst, // Grid sizes.
%cst, %cst, %cst, // Block sizes.
%arg0, %arg1) // Arguments passed to the kernel function.
{ kernel_module = @kernels, // Module containing the kernel function.
kernel = "kernel_1" } // Kernel function.
: (index, index, index, index, index, index, f32, !llvm<"float*">) -> ()
}
```
### `gpu.thread_id`
Returns the thread id, i.e. the index of the current thread within the block

View File

@ -77,58 +77,6 @@ struct KernelDim3 {
Value *z;
};
/// Operation to launch a kernel given as outlined function.
class LaunchFuncOp : public Op<LaunchFuncOp, OpTrait::AtLeastNOperands<6>::Impl,
OpTrait::ZeroResult> {
public:
using Op::Op;
static void build(Builder *builder, OperationState &result, FuncOp kernelFunc,
Value *gridSizeX, Value *gridSizeY, Value *gridSizeZ,
Value *blockSizeX, Value *blockSizeY, Value *blockSizeZ,
ValueRange kernelOperands);
static void build(Builder *builder, OperationState &result, FuncOp kernelFunc,
KernelDim3 gridSize, KernelDim3 blockSize,
ValueRange kernelOperands);
/// The kernel function specified by the operation's `kernel` attribute.
StringRef kernel();
/// The number of operands passed to the kernel function.
unsigned getNumKernelOperands();
/// The name of the kernel module specified by the operation's `kernel_module`
/// attribute.
StringRef getKernelModuleName();
/// The i-th operand passed to the kernel function.
Value *getKernelOperand(unsigned i);
/// Get the SSA values passed as operands to specify the grid size.
KernelDim3 getGridSizeOperandValues();
/// Get the SSA values passed as operands to specify the block size.
KernelDim3 getBlockSizeOperandValues();
LogicalResult verify();
static StringRef getOperationName() { return "gpu.launch_func"; }
/// The number of launch configuration operands, placed at the leading
/// positions of the operand list.
static constexpr unsigned kNumConfigOperands = 6;
private:
// This needs to quietly verify if attributes with names defined below are
// present since it is run before the verifier of this op.
friend LogicalResult GPUDialect::verifyOperationAttribute(Operation *,
NamedAttribute);
/// The name of the symbolRef attribute specifying the kernel to launch.
static StringRef getKernelAttrName() { return "kernel"; }
/// The name of the symbolRef attribute specifying the name of the module
/// containing the kernel to launch.
static StringRef getKernelModuleAttrName() { return "kernel_module"; }
};
#define GET_OP_CLASSES
#include "mlir/Dialect/GPU/GPUOps.h.inc"

View File

@ -23,6 +23,17 @@
#define GPU_OPS
include "mlir/IR/OpBase.td"
include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
// Type constraint accepting standard integers, indices and wrapped LLVM integer
// types.
def IntLikeOrLLVMInt : TypeConstraint<
Or<[AnyInteger.predicate, Index.predicate, LLVMInt.predicate]>,
"integer, index or LLVM dialect equivalent">;
//===----------------------------------------------------------------------===//
// GPU Dialect operations.
//===----------------------------------------------------------------------===//
def GPU_Dialect : Dialect {
let name = "gpu";
@ -181,6 +192,133 @@ def GPU_GPUFuncOp : GPU_Op<"func", [FunctionLike, IsolatedFromAbove, Symbol]> {
let parser = [{ return parseGPUFuncOp(parser, result); }];
}
def GPU_LaunchFuncOp : GPU_Op<"launch_func">,
Arguments<(ins IntLikeOrLLVMInt:$gridSizeX, IntLikeOrLLVMInt:$gridSizeY,
IntLikeOrLLVMInt:$gridSizeZ, IntLikeOrLLVMInt:$blockSizeX,
IntLikeOrLLVMInt:$blockSizeY, IntLikeOrLLVMInt:$blockSizeZ,
Variadic<AnyType>:$operands)>,
Results<(outs)> {
let summary = "Launches a function as a GPU kerneel";
let description = [{
Launch a kernel function on the specified grid of thread blocks.
`gpu.launch` operations are lowered to `gpu.launch_func` operations by
outlining the kernel body into a function in a dedicated module, which
reflects the separate compilation process. The kernel function is required
to have the `gpu.kernel` attribute. The module containing the kernel
function is required to have the `gpu.kernel_module` attribute and must be
named. And finally, the module containing the kernel module (which thus
cannot be the top-level module) is required to have the
`gpu.container_module` attribute. The `gpu.launch_func` operation has a
string attribute named `kernel` to specify the name of the kernel function
to launch and an attribute named `kernel_module` to specify the name of the
module containing that kernel function.
The operation takes at least six operands, with the first three operands
being grid sizes along x,y,z dimensions and the following three being block
sizes along x,y,z dimensions. When a lower-dimensional kernel is required,
unused sizes must be explicitly set to `1`. The remaining operands are
passed as arguments to the kernel function.
A custom syntax for this operation is currently not available.
Example:
```mlir
module attributes {gpu.container_module} {
// This module creates a separate compilation unit for the GPU compiler.
module @kernels attributes {gpu.kernel_module} {
func @kernel_1(%arg0 : f32, %arg1 : !llvm<"float*">)
attributes { nvvm.kernel = true } {
// Operations that produce block/thread IDs and dimensions are
// injected when outlining the `gpu.launch` body to a function called
// by `gpu.launch_func`.
%tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index)
%tIdY = "gpu.thread_id"() {dimension = "y"} : () -> (index)
%tIdZ = "gpu.thread_id"() {dimension = "z"} : () -> (index)
%bDimX = "gpu.block_dim"() {dimension = "x"} : () -> (index)
%bDimY = "gpu.block_dim"() {dimension = "y"} : () -> (index)
%bDimZ = "gpu.block_dim"() {dimension = "z"} : () -> (index)
%bIdX = "gpu.block_id"() {dimension = "x"} : () -> (index)
%bIdY = "gpu.block_id"() {dimension = "y"} : () -> (index)
%bIdZ = "gpu.block_id"() {dimension = "z"} : () -> (index)
%gDimX = "gpu.grid_dim"() {dimension = "x"} : () -> (index)
%gDimY = "gpu.grid_dim"() {dimension = "y"} : () -> (index)
%gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index)
"some_op"(%bx, %tx) : (index, index) -> ()
%42 = load %arg1[%bx] : memref<?xf32, 1>
}
}
"gpu.launch_func"(%cst, %cst, %cst, // Grid sizes.
%cst, %cst, %cst, // Block sizes.
%arg0, %arg1) // Arguments passed to the kernel.
{ kernel_module = @kernels, // Module containing the kernel.
kernel = "kernel_1" } // Kernel function.
: (index, index, index, index, index, index, f32, !llvm<"float*">)
-> ()
}
```
}];
let skipDefaultBuilders = 1;
let builders = [
OpBuilder<"Builder *builder, OperationState &result, FuncOp kernelFunc, "
"Value *gridSizeX, Value *gridSizeY, Value *gridSizeZ, "
"Value *blockSizeX, Value *blockSizeY, Value *blockSizeZ, "
"ValueRange kernelOperands">,
OpBuilder<"Builder *builder, OperationState &result, FuncOp kernelFunc, "
"KernelDim3 gridSize, KernelDim3 blockSize, "
"ValueRange kernelOperands">
];
let extraClassDeclaration = [{
/// The kernel function specified by the operation's `kernel` attribute.
StringRef kernel();
/// The number of operands passed to the kernel function.
unsigned getNumKernelOperands();
/// The name of the kernel module specified by the operation's
/// `kernel_module` attribute.
StringRef getKernelModuleName();
/// The i-th operand passed to the kernel function.
Value *getKernelOperand(unsigned i);
/// Get the SSA values passed as operands to specify the grid size.
KernelDim3 getGridSizeOperandValues();
/// Get the SSA values passed as operands to specify the block size.
KernelDim3 getBlockSizeOperandValues();
/// The number of launch configuration operands, placed at the leading
/// positions of the operand list.
static constexpr unsigned kNumConfigOperands = 6;
// This needs to quietly verify if attributes with names defined below are
// present since it is run before the verifier of this op.
friend LogicalResult GPUDialect::verifyOperationAttribute(Operation *,
NamedAttribute);
/// The name of the symbolRef attribute specifying the kernel to launch.
static StringRef getKernelAttrName() { return "kernel"; }
/// The name of the symbolRef attribute specifying the name of the module
/// containing the kernel to launch.
static StringRef getKernelModuleAttrName() { return "kernel_module"; }
}];
let verifier = [{ return ::verify(*this); }];
}
def GPU_LaunchOp : GPU_Op<"launch", [IsolatedFromAbove]>,
Arguments<(ins Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,

View File

@ -34,6 +34,12 @@ def LLVM_Dialect : Dialect {
def LLVM_Type : Type<CPred<"$_self.isa<::mlir::LLVM::LLVMType>()">,
"LLVM dialect type">;
// Type constraint accepting only wrapped LLVM integer types.
def LLVMInt : TypeConstraint<
And<[LLVM_Type.predicate,
CPred<"$_self.cast<::mlir::LLVM::LLVMType>().isIntegerTy()">]>,
"LLVM dialect integer">;
// Base class for LLVM operations. Defines the interface to the llvm::IRBuilder
// used to translate to LLVM IR proper.
class LLVM_OpBase<Dialect dialect, string mnemonic, list<OpTrait> traits = []> :

View File

@ -46,10 +46,10 @@ bool GPUDialect::isKernel(Operation *op) {
GPUDialect::GPUDialect(MLIRContext *context)
: Dialect(getDialectName(), context) {
addOperations<LaunchFuncOp,
addOperations<
#define GET_OP_LIST
#include "mlir/Dialect/GPU/GPUOps.cpp.inc"
>();
>();
}
LogicalResult GPUDialect::verifyOperationAttribute(Operation *op,
@ -545,26 +545,26 @@ KernelDim3 LaunchFuncOp::getBlockSizeOperandValues() {
return KernelDim3{getOperand(3), getOperand(4), getOperand(5)};
}
LogicalResult LaunchFuncOp::verify() {
auto module = getParentOfType<ModuleOp>();
LogicalResult verify(LaunchFuncOp op) {
auto module = op.getParentOfType<ModuleOp>();
if (!module)
return emitOpError("expected to belong to a module");
return op.emitOpError("expected to belong to a module");
if (!module.getAttrOfType<UnitAttr>(GPUDialect::getContainerModuleAttrName()))
return emitOpError("expected the closest surrounding module to have the '" +
GPUDialect::getContainerModuleAttrName() +
"' attribute");
return op.emitOpError(
"expected the closest surrounding module to have the '" +
GPUDialect::getContainerModuleAttrName() + "' attribute");
auto kernelAttr = getAttrOfType<StringAttr>(getKernelAttrName());
auto kernelAttr = op.getAttrOfType<StringAttr>(op.getKernelAttrName());
if (!kernelAttr)
return emitOpError("string attribute '" + getKernelAttrName() +
"' must be specified");
return op.emitOpError("string attribute '" + op.getKernelAttrName() +
"' must be specified");
auto kernelModuleAttr =
getAttrOfType<SymbolRefAttr>(getKernelModuleAttrName());
op.getAttrOfType<SymbolRefAttr>(op.getKernelModuleAttrName());
if (!kernelModuleAttr)
return emitOpError("symbol reference attribute '" +
getKernelModuleAttrName() + "' must be specified");
return op.emitOpError("symbol reference attribute '" +
op.getKernelModuleAttrName() + "' must be specified");
return success();
}