forked from OSchip/llvm-project
Revert "[mlir] Create a gpu.module operation for the GPU Dialect."
This reverts commit 4624a1e8ac
. Causing
problems downstream.
This commit is contained in:
parent
76b92cc7c1
commit
0133cc60e4
|
@ -19,17 +19,12 @@ namespace mlir {
|
|||
class Location;
|
||||
class ModuleOp;
|
||||
|
||||
template <typename T>
|
||||
class OpPassBase;
|
||||
|
||||
namespace gpu {
|
||||
class GPUModuleOp;
|
||||
} // namespace gpu
|
||||
|
||||
namespace LLVM {
|
||||
class LLVMDialect;
|
||||
} // namespace LLVM
|
||||
|
||||
template <typename T> class OpPassBase;
|
||||
|
||||
using OwnedCubin = std::unique_ptr<std::vector<char>>;
|
||||
using CubinGenerator =
|
||||
std::function<OwnedCubin(const std::string &, Location, StringRef)>;
|
||||
|
@ -43,7 +38,7 @@ using CubinGenerator =
|
|||
/// attached as a string attribute named 'nvvm.cubin' to the kernel function.
|
||||
/// After the transformation, the body of the kernel function is removed (i.e.,
|
||||
/// it is turned into a declaration).
|
||||
std::unique_ptr<OpPassBase<gpu::GPUModuleOp>>
|
||||
std::unique_ptr<OpPassBase<ModuleOp>>
|
||||
createConvertGPUKernelToCubinPass(CubinGenerator cubinGenerator);
|
||||
|
||||
/// Creates a pass to convert a gpu.launch_func operation into a sequence of
|
||||
|
|
|
@ -14,19 +14,15 @@ namespace mlir {
|
|||
class LLVMTypeConverter;
|
||||
class OwningRewritePatternList;
|
||||
|
||||
template <typename OpT>
|
||||
class OpPassBase;
|
||||
|
||||
namespace gpu {
|
||||
class GPUModuleOp;
|
||||
}
|
||||
class ModuleOp;
|
||||
template <typename OpT> class OpPassBase;
|
||||
|
||||
/// Collect a set of patterns to convert from the GPU dialect to NVVM.
|
||||
void populateGpuToNVVMConversionPatterns(LLVMTypeConverter &converter,
|
||||
OwningRewritePatternList &patterns);
|
||||
|
||||
/// Creates a pass that lowers GPU dialect operations to NVVM counterparts.
|
||||
std::unique_ptr<OpPassBase<gpu::GPUModuleOp>> createLowerGpuOpsToNVVMOpsPass();
|
||||
std::unique_ptr<OpPassBase<ModuleOp>> createLowerGpuOpsToNVVMOpsPass();
|
||||
|
||||
} // namespace mlir
|
||||
|
||||
|
|
|
@ -588,56 +588,4 @@ def GPU_BarrierOp : GPU_Op<"barrier"> {
|
|||
let printer = [{ p << getOperationName(); }];
|
||||
}
|
||||
|
||||
def GPU_GPUModuleOp : GPU_Op<"module", [
|
||||
IsolatedFromAbove, SymbolTable, Symbol,
|
||||
SingleBlockImplicitTerminator<"ModuleEndOp">
|
||||
]> {
|
||||
let summary = "A top level compilation unit containing code to be run on a GPU.";
|
||||
let description = [{
|
||||
GPU module contains code that is intended to be run on a GPU. A host device
|
||||
can launch this code through a gpu.launc_func that creates a fully
|
||||
qualified symbol through the gpu.module's symbol and a gpu.func symbol
|
||||
contained in the gpu.module.
|
||||
|
||||
The module's top-level scope is modeled by a single region with a single
|
||||
block. GPU modules are required to have a name that is used for symbol
|
||||
resolution by the gpu.launch_func operation.
|
||||
|
||||
Using an op with a region to define a GPU module enables "embedding" GPU
|
||||
modules with SIMT execution models in other dialects in a clean manner and
|
||||
allows filtering of code regions to execute passes on only code intended to
|
||||
or not intended to be run on the separate device.
|
||||
|
||||
```
|
||||
gpu.module @symbol_name {
|
||||
gpu.func {}
|
||||
...
|
||||
gpu.module_end
|
||||
}
|
||||
|
||||
```
|
||||
}];
|
||||
let builders = [OpBuilder<"Builder *builder, OperationState &result, "
|
||||
"StringRef name">];
|
||||
let parser = [{ return ::parseGPUModuleOp(parser, result); }];
|
||||
let printer = [{ return ::print(p, *this); }];
|
||||
let regions = (region SizedRegion<1>:$body);
|
||||
|
||||
// We need to ensure the block inside the region is properly terminated;
|
||||
// the auto-generated builders do not guarantee that.
|
||||
let skipDefaultBuilders = 1;
|
||||
}
|
||||
|
||||
def GPU_ModuleEndOp : GPU_Op<"module_end", [
|
||||
Terminator, HasParent<"GPUModuleOp">
|
||||
]> {
|
||||
let summary = "A pseudo op that marks the end of a gpu.module.";
|
||||
let description = [{
|
||||
This op terminates the only block inside the only region of a `gpu.module`.
|
||||
}];
|
||||
|
||||
let parser = [{ return success(); }];
|
||||
let printer = [{ p << getOperationName(); }];
|
||||
}
|
||||
|
||||
#endif // GPU_OPS
|
||||
|
|
|
@ -46,15 +46,18 @@ static constexpr const char *kCubinAnnotation = "nvvm.cubin";
|
|||
/// IR and further to PTX. A user provided CubinGenerator compiles the PTX to
|
||||
/// GPU binary code, which is then attached as an attribute to the function. The
|
||||
/// function body is erased.
|
||||
class GpuKernelToCubinPass
|
||||
: public OperationPass<GpuKernelToCubinPass, gpu::GPUModuleOp> {
|
||||
class GpuKernelToCubinPass : public ModulePass<GpuKernelToCubinPass> {
|
||||
public:
|
||||
GpuKernelToCubinPass(
|
||||
CubinGenerator cubinGenerator = compilePtxToCubinForTesting)
|
||||
: cubinGenerator(cubinGenerator) {}
|
||||
|
||||
void runOnOperation() override {
|
||||
gpu::GPUModuleOp module = getOperation();
|
||||
void runOnModule() override {
|
||||
ModuleOp module = getModule();
|
||||
if (!module.getAttrOfType<UnitAttr>(
|
||||
gpu::GPUDialect::getKernelModuleAttrName()) ||
|
||||
!module.getName())
|
||||
return;
|
||||
|
||||
// Make sure the NVPTX target is initialized.
|
||||
LLVMInitializeNVPTXTarget();
|
||||
|
@ -68,8 +71,8 @@ public:
|
|||
|
||||
// Translate the module to CUBIN and attach the result as attribute to the
|
||||
// module.
|
||||
if (auto cubinAttr = translateGPUModuleToCubinAnnotation(
|
||||
*llvmModule, module.getLoc(), module.getName()))
|
||||
if (auto cubinAttr = translateGpuModuleToCubinAnnotation(
|
||||
*llvmModule, module.getLoc(), *module.getName()))
|
||||
module.setAttr(kCubinAnnotation, cubinAttr);
|
||||
else
|
||||
signalPassFailure();
|
||||
|
@ -89,7 +92,7 @@ private:
|
|||
StringRef name);
|
||||
|
||||
/// Translates llvmModule to cubin and returns the result as attribute.
|
||||
StringAttr translateGPUModuleToCubinAnnotation(llvm::Module &llvmModule,
|
||||
StringAttr translateGpuModuleToCubinAnnotation(llvm::Module &llvmModule,
|
||||
Location loc, StringRef name);
|
||||
|
||||
CubinGenerator cubinGenerator;
|
||||
|
@ -146,7 +149,7 @@ OwnedCubin GpuKernelToCubinPass::convertModuleToCubin(llvm::Module &llvmModule,
|
|||
return cubinGenerator(ptx, loc, name);
|
||||
}
|
||||
|
||||
StringAttr GpuKernelToCubinPass::translateGPUModuleToCubinAnnotation(
|
||||
StringAttr GpuKernelToCubinPass::translateGpuModuleToCubinAnnotation(
|
||||
llvm::Module &llvmModule, Location loc, StringRef name) {
|
||||
auto cubin = convertModuleToCubin(llvmModule, loc, name);
|
||||
if (!cubin)
|
||||
|
@ -154,7 +157,7 @@ StringAttr GpuKernelToCubinPass::translateGPUModuleToCubinAnnotation(
|
|||
return StringAttr::get({cubin->data(), cubin->size()}, loc->getContext());
|
||||
}
|
||||
|
||||
std::unique_ptr<OpPassBase<gpu::GPUModuleOp>>
|
||||
std::unique_ptr<OpPassBase<ModuleOp>>
|
||||
mlir::createConvertGPUKernelToCubinPass(CubinGenerator cubinGenerator) {
|
||||
return std::make_unique<GpuKernelToCubinPass>(cubinGenerator);
|
||||
}
|
||||
|
|
|
@ -132,8 +132,8 @@ public:
|
|||
|
||||
// GPU kernel modules are no longer necessary since we have a global
|
||||
// constant with the CUBIN data.
|
||||
for (auto m :
|
||||
llvm::make_early_inc_range(getModule().getOps<gpu::GPUModuleOp>()))
|
||||
for (auto m : llvm::make_early_inc_range(getModule().getOps<ModuleOp>()))
|
||||
if (m.getAttrOfType<UnitAttr>(gpu::GPUDialect::getKernelModuleAttrName()))
|
||||
m.erase();
|
||||
}
|
||||
|
||||
|
@ -343,8 +343,8 @@ void GpuLaunchFuncToCudaCallsPass::translateGpuLaunchCalls(
|
|||
builder.getI32IntegerAttr(0));
|
||||
// Create an LLVM global with CUBIN extracted from the kernel annotation and
|
||||
// obtain a pointer to the first byte in it.
|
||||
auto kernelModule = getModule().lookupSymbol<gpu::GPUModuleOp>(
|
||||
launchOp.getKernelModuleName());
|
||||
auto kernelModule =
|
||||
getModule().lookupSymbol<ModuleOp>(launchOp.getKernelModuleName());
|
||||
assert(kernelModule && "expected a kernel module");
|
||||
|
||||
auto cubinAttr = kernelModule.getAttrOfType<StringAttr>(kCubinAnnotation);
|
||||
|
@ -354,7 +354,8 @@ void GpuLaunchFuncToCudaCallsPass::translateGpuLaunchCalls(
|
|||
return signalPassFailure();
|
||||
}
|
||||
|
||||
SmallString<128> nameBuffer(kernelModule.getName());
|
||||
assert(kernelModule.getName() && "expected a named module");
|
||||
SmallString<128> nameBuffer(*kernelModule.getName());
|
||||
nameBuffer.append(kCubinStorageSuffix);
|
||||
Value data = LLVM::createGlobalString(
|
||||
loc, builder, nameBuffer.str(), cubinAttr.getValue(),
|
||||
|
|
|
@ -200,7 +200,7 @@ private:
|
|||
auto type = operand.getType().cast<LLVM::LLVMType>();
|
||||
|
||||
// Create shared memory array to store the warp reduction.
|
||||
auto module = operand.getDefiningOp()->getParentOfType<gpu::GPUModuleOp>();
|
||||
auto module = operand.getDefiningOp()->getParentOfType<ModuleOp>();
|
||||
assert(module && "op must belong to a module");
|
||||
Value sharedMemPtr =
|
||||
createSharedMemoryArray(loc, module, type, kWarpSize, rewriter);
|
||||
|
@ -391,10 +391,10 @@ private:
|
|||
}
|
||||
|
||||
/// Creates a global array stored in shared memory.
|
||||
Value createSharedMemoryArray(Location loc, gpu::GPUModuleOp module,
|
||||
Value createSharedMemoryArray(Location loc, ModuleOp module,
|
||||
LLVM::LLVMType elementType, int numElements,
|
||||
ConversionPatternRewriter &rewriter) const {
|
||||
OpBuilder builder(module.body());
|
||||
OpBuilder builder(module.getBodyRegion());
|
||||
|
||||
auto arrayType = LLVM::LLVMType::getArrayTy(elementType, numElements);
|
||||
StringRef name = "reduce_buffer";
|
||||
|
@ -699,11 +699,13 @@ struct GPUReturnOpLowering : public LLVMOpLowering {
|
|||
///
|
||||
/// This pass only handles device code and is not meant to be run on GPU host
|
||||
/// code.
|
||||
class LowerGpuOpsToNVVMOpsPass
|
||||
: public OperationPass<LowerGpuOpsToNVVMOpsPass, gpu::GPUModuleOp> {
|
||||
class LowerGpuOpsToNVVMOpsPass : public ModulePass<LowerGpuOpsToNVVMOpsPass> {
|
||||
public:
|
||||
void runOnOperation() override {
|
||||
gpu::GPUModuleOp m = getOperation();
|
||||
void runOnModule() override {
|
||||
ModuleOp m = getModule();
|
||||
if (!m.getAttrOfType<UnitAttr>(gpu::GPUDialect::getKernelModuleAttrName()))
|
||||
return;
|
||||
|
||||
OwningRewritePatternList patterns;
|
||||
NVVMTypeConverter converter(m.getContext());
|
||||
populateStdToLLVMConversionPatterns(converter, patterns);
|
||||
|
@ -716,7 +718,7 @@ public:
|
|||
target.addLegalDialect<LLVM::LLVMDialect>();
|
||||
target.addLegalDialect<NVVM::NVVMDialect>();
|
||||
// TODO(csigg): Remove once we support replacing non-root ops.
|
||||
target.addLegalOp<gpu::YieldOp, gpu::GPUModuleOp, gpu::ModuleEndOp>();
|
||||
target.addLegalOp<gpu::YieldOp>();
|
||||
if (failed(applyPartialConversion(m, target, patterns, &converter)))
|
||||
signalPassFailure();
|
||||
}
|
||||
|
@ -748,8 +750,7 @@ void mlir::populateGpuToNVVMConversionPatterns(
|
|||
"__nv_exp");
|
||||
}
|
||||
|
||||
std::unique_ptr<OpPassBase<gpu::GPUModuleOp>>
|
||||
mlir::createLowerGpuOpsToNVVMOpsPass() {
|
||||
std::unique_ptr<OpPassBase<ModuleOp>> mlir::createLowerGpuOpsToNVVMOpsPass() {
|
||||
return std::make_unique<LowerGpuOpsToNVVMOpsPass>();
|
||||
}
|
||||
|
||||
|
|
|
@ -1,15 +1,8 @@
|
|||
set(LLVM_TARGET_DEFINITIONS GPUToSPIRV.td)
|
||||
mlir_tablegen(GPUToSPIRV.cpp.inc -gen-rewriters)
|
||||
add_public_tablegen_target(MLIRGPUToSPIRVIncGen)
|
||||
|
||||
add_llvm_library(MLIRGPUtoSPIRVTransforms
|
||||
ConvertGPUToSPIRV.cpp
|
||||
ConvertGPUToSPIRVPass.cpp
|
||||
)
|
||||
|
||||
add_dependencies(MLIRGPUtoSPIRVTransforms
|
||||
MLIRGPUToSPIRVIncGen)
|
||||
|
||||
target_link_libraries(MLIRGPUtoSPIRVTransforms
|
||||
MLIRGPU
|
||||
MLIRIR
|
||||
|
|
|
@ -63,13 +63,27 @@ private:
|
|||
SmallVector<int32_t, 3> workGroupSizeAsInt32;
|
||||
};
|
||||
|
||||
/// Pattern to convert a gpu.module to a spv.module.
|
||||
class GPUModuleConversion final : public SPIRVOpLowering<gpu::GPUModuleOp> {
|
||||
/// Pattern to convert a module with gpu.kernel_module attribute to a
|
||||
/// spv.module.
|
||||
class KernelModuleConversion final : public SPIRVOpLowering<ModuleOp> {
|
||||
public:
|
||||
using SPIRVOpLowering<gpu::GPUModuleOp>::SPIRVOpLowering;
|
||||
using SPIRVOpLowering<ModuleOp>::SPIRVOpLowering;
|
||||
|
||||
PatternMatchResult
|
||||
matchAndRewrite(gpu::GPUModuleOp moduleOp, ArrayRef<Value> operands,
|
||||
matchAndRewrite(ModuleOp moduleOp, ArrayRef<Value> operands,
|
||||
ConversionPatternRewriter &rewriter) const override;
|
||||
};
|
||||
|
||||
/// Pattern to convert a module terminator op to a terminator of spv.module op.
|
||||
// TODO: Move this into DRR, but that requires ModuleTerminatorOp to be defined
|
||||
// in ODS.
|
||||
class KernelModuleTerminatorConversion final
|
||||
: public SPIRVOpLowering<ModuleTerminatorOp> {
|
||||
public:
|
||||
using SPIRVOpLowering<ModuleTerminatorOp>::SPIRVOpLowering;
|
||||
|
||||
PatternMatchResult
|
||||
matchAndRewrite(ModuleTerminatorOp terminatorOp, ArrayRef<Value> operands,
|
||||
ConversionPatternRewriter &rewriter) const override;
|
||||
};
|
||||
|
||||
|
@ -270,12 +284,16 @@ KernelFnConversion::matchAndRewrite(gpu::GPUFuncOp funcOp,
|
|||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ModuleOp with gpu.module.
|
||||
// ModuleOp with gpu.kernel_module.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
PatternMatchResult GPUModuleConversion::matchAndRewrite(
|
||||
gpu::GPUModuleOp moduleOp, ArrayRef<Value> operands,
|
||||
PatternMatchResult KernelModuleConversion::matchAndRewrite(
|
||||
ModuleOp moduleOp, ArrayRef<Value> operands,
|
||||
ConversionPatternRewriter &rewriter) const {
|
||||
if (!moduleOp.getAttrOfType<UnitAttr>(
|
||||
gpu::GPUDialect::getKernelModuleAttrName())) {
|
||||
return matchFailure();
|
||||
}
|
||||
// TODO : Generalize this to account for different extensions,
|
||||
// capabilities, extended_instruction_sets, other addressing models
|
||||
// and memory models.
|
||||
|
@ -284,8 +302,8 @@ PatternMatchResult GPUModuleConversion::matchAndRewrite(
|
|||
spirv::MemoryModel::GLSL450, spirv::Capability::Shader,
|
||||
spirv::Extension::SPV_KHR_storage_buffer_storage_class);
|
||||
// Move the region from the module op into the SPIR-V module.
|
||||
Region &spvModuleRegion = spvModule.body();
|
||||
rewriter.inlineRegionBefore(moduleOp.body(), spvModuleRegion,
|
||||
Region &spvModuleRegion = spvModule.getOperation()->getRegion(0);
|
||||
rewriter.inlineRegionBefore(moduleOp.getBodyRegion(), spvModuleRegion,
|
||||
spvModuleRegion.begin());
|
||||
// The spv.module build method adds a block with a terminator. Remove that
|
||||
// block. The terminator of the module op in the remaining block will be
|
||||
|
@ -295,6 +313,17 @@ PatternMatchResult GPUModuleConversion::matchAndRewrite(
|
|||
return matchSuccess();
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ModuleTerminatorOp for gpu.kernel_module.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
PatternMatchResult KernelModuleTerminatorConversion::matchAndRewrite(
|
||||
ModuleTerminatorOp terminatorOp, ArrayRef<Value> operands,
|
||||
ConversionPatternRewriter &rewriter) const {
|
||||
rewriter.replaceOpWithNewOp<spirv::ModuleEndOp>(terminatorOp);
|
||||
return matchSuccess();
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// GPU return inside kernel functions to SPIR-V return.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -313,18 +342,14 @@ PatternMatchResult GPUReturnOpConversion::matchAndRewrite(
|
|||
// GPU To SPIRV Patterns.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
namespace {
|
||||
#include "GPUToSPIRV.cpp.inc"
|
||||
}
|
||||
|
||||
void mlir::populateGPUToSPIRVPatterns(MLIRContext *context,
|
||||
SPIRVTypeConverter &typeConverter,
|
||||
OwningRewritePatternList &patterns,
|
||||
ArrayRef<int64_t> workGroupSize) {
|
||||
populateWithGenerated(context, &patterns);
|
||||
patterns.insert<KernelFnConversion>(context, typeConverter, workGroupSize);
|
||||
patterns.insert<
|
||||
GPUReturnOpConversion, ForOpConversion, GPUModuleConversion,
|
||||
GPUReturnOpConversion, ForOpConversion, KernelModuleConversion,
|
||||
KernelModuleTerminatorConversion,
|
||||
LaunchConfigConversion<gpu::BlockDimOp, spirv::BuiltIn::WorkgroupSize>,
|
||||
LaunchConfigConversion<gpu::BlockIdOp, spirv::BuiltIn::WorkgroupId>,
|
||||
LaunchConfigConversion<gpu::GridDimOp, spirv::BuiltIn::NumWorkgroups>,
|
||||
|
|
|
@ -60,12 +60,15 @@ void GPUToSPIRVPass::runOnModule() {
|
|||
|
||||
SmallVector<Operation *, 1> kernelModules;
|
||||
OpBuilder builder(context);
|
||||
module.walk([&builder, &kernelModules](gpu::GPUModuleOp moduleOp) {
|
||||
module.walk([&builder, &kernelModules](ModuleOp moduleOp) {
|
||||
if (moduleOp.getAttrOfType<UnitAttr>(
|
||||
gpu::GPUDialect::getKernelModuleAttrName())) {
|
||||
// For each kernel module (should be only 1 for now, but that is not a
|
||||
// requirement here), clone the module for conversion because the
|
||||
// gpu.launch function still needs the kernel module.
|
||||
builder.setInsertionPoint(moduleOp.getOperation());
|
||||
kernelModules.push_back(builder.clone(*moduleOp.getOperation()));
|
||||
}
|
||||
});
|
||||
|
||||
SPIRVTypeConverter typeConverter;
|
||||
|
|
|
@ -1,22 +0,0 @@
|
|||
//===-- GPUToSPIRV.td - GPU to SPIR-V Dialect Lowerings ----*- tablegen -*-===//
|
||||
//
|
||||
// Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains patterns to lower GPU dialect ops to to SPIR-V ops.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
#ifndef CONVERT_GPU_TO_SPIRV
|
||||
#define CONVERT_GPU_TO_SPIRV
|
||||
|
||||
include "mlir/Dialect/GPU/GPUOps.td"
|
||||
include "mlir/Dialect/SPIRV/SPIRVStructureOps.td"
|
||||
|
||||
def : Pat<(GPU_ModuleEndOp), (SPV_ModuleEndOp)>;
|
||||
|
||||
#endif // CONVERT_GPU_TO_SPIRV
|
|
@ -72,10 +72,15 @@ LogicalResult GPUDialect::verifyOperationAttribute(Operation *op,
|
|||
|
||||
// Check that `launch_func` refers to a well-formed GPU kernel module.
|
||||
StringRef kernelModuleName = launchOp.getKernelModuleName();
|
||||
auto kernelModule = module.lookupSymbol<GPUModuleOp>(kernelModuleName);
|
||||
auto kernelModule = module.lookupSymbol<ModuleOp>(kernelModuleName);
|
||||
if (!kernelModule)
|
||||
return launchOp.emitOpError()
|
||||
<< "kernel module '" << kernelModuleName << "' is undefined";
|
||||
if (!kernelModule.getAttrOfType<UnitAttr>(
|
||||
GPUDialect::getKernelModuleAttrName()))
|
||||
return launchOp.emitOpError("module '")
|
||||
<< kernelModuleName << "' is missing the '"
|
||||
<< GPUDialect::getKernelModuleAttrName() << "' attribute";
|
||||
|
||||
// Check that `launch_func` refers to a well-formed kernel function.
|
||||
StringRef kernelName = launchOp.kernel();
|
||||
|
@ -512,9 +517,10 @@ void LaunchFuncOp::build(Builder *builder, OperationState &result,
|
|||
result.addOperands(kernelOperands);
|
||||
result.addAttribute(getKernelAttrName(),
|
||||
builder->getStringAttr(kernelFunc.getName()));
|
||||
auto kernelModule = kernelFunc.getParentOfType<GPUModuleOp>();
|
||||
auto kernelModule = kernelFunc.getParentOfType<ModuleOp>();
|
||||
if (Optional<StringRef> kernelModuleName = kernelModule.getName())
|
||||
result.addAttribute(getKernelModuleAttrName(),
|
||||
builder->getSymbolRefAttr(kernelModule.getName()));
|
||||
builder->getSymbolRefAttr(*kernelModuleName));
|
||||
}
|
||||
|
||||
void LaunchFuncOp::build(Builder *builder, OperationState &result,
|
||||
|
@ -814,47 +820,6 @@ LogicalResult GPUFuncOp::verifyBody() {
|
|||
return success();
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// GPUModuleOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
void GPUModuleOp::build(Builder *builder, OperationState &result,
|
||||
StringRef name) {
|
||||
ensureTerminator(*result.addRegion(), *builder, result.location);
|
||||
result.attributes.push_back(builder->getNamedAttr(
|
||||
::mlir::SymbolTable::getSymbolAttrName(), builder->getStringAttr(name)));
|
||||
}
|
||||
|
||||
static ParseResult parseGPUModuleOp(OpAsmParser &parser,
|
||||
OperationState &result) {
|
||||
StringAttr nameAttr;
|
||||
if (parser.parseSymbolName(nameAttr, SymbolTable::getSymbolAttrName(),
|
||||
result.attributes))
|
||||
return failure();
|
||||
|
||||
// If module attributes are present, parse them.
|
||||
if (parser.parseOptionalAttrDictWithKeyword(result.attributes))
|
||||
return failure();
|
||||
|
||||
// Parse the module body.
|
||||
auto *body = result.addRegion();
|
||||
if (parser.parseRegion(*body, None, None))
|
||||
return failure();
|
||||
|
||||
// Ensure that this module has a valid terminator.
|
||||
GPUModuleOp::ensureTerminator(*body, parser.getBuilder(), result.location);
|
||||
return success();
|
||||
}
|
||||
|
||||
static void print(OpAsmPrinter &p, GPUModuleOp op) {
|
||||
p << op.getOperationName() << ' ';
|
||||
p.printSymbolName(op.getName());
|
||||
p.printOptionalAttrDictWithKeyword(op.getAttrs(),
|
||||
{SymbolTable::getSymbolAttrName()});
|
||||
p.printRegion(op.getOperation()->getRegion(0), /*printEntryBlockArgs=*/false,
|
||||
/*printBlockTerminators=*/false);
|
||||
}
|
||||
|
||||
// Namespace avoids ambiguous ReturnOpOperandAdaptor.
|
||||
namespace mlir {
|
||||
namespace gpu {
|
||||
|
|
|
@ -140,8 +140,8 @@ namespace {
|
|||
/// inside a nested module. It also creates an external function of the same
|
||||
/// name in the parent module.
|
||||
///
|
||||
/// The gpu.modules are intended to be compiled to a cubin blob independently in
|
||||
/// a separate pass. The external functions can then be annotated with the
|
||||
/// The kernel modules are intended to be compiled to a cubin blob independently
|
||||
/// in a separate pass. The external functions can then be annotated with the
|
||||
/// symbol of the cubin accessor function.
|
||||
class GpuKernelOutliningPass : public ModulePass<GpuKernelOutliningPass> {
|
||||
public:
|
||||
|
@ -174,19 +174,15 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
// Returns a gpu.module containing kernelFunc and all callees (recursive).
|
||||
gpu::GPUModuleOp createKernelModule(gpu::GPUFuncOp kernelFunc,
|
||||
// Returns a module containing kernelFunc and all callees (recursive).
|
||||
ModuleOp createKernelModule(gpu::GPUFuncOp kernelFunc,
|
||||
const SymbolTable &parentSymbolTable) {
|
||||
// TODO: This code cannot use an OpBuilder because it must be inserted into
|
||||
// a SymbolTable by the caller. SymbolTable needs to be refactored to
|
||||
// prevent manual building of Ops with symbols in code using SymbolTables
|
||||
// and then this needs to use the OpBuilder.
|
||||
auto context = getModule().getContext();
|
||||
Builder builder(context);
|
||||
OperationState state(kernelFunc.getLoc(),
|
||||
gpu::GPUModuleOp::getOperationName());
|
||||
gpu::GPUModuleOp::build(&builder, state, kernelFunc.getName());
|
||||
auto kernelModule = cast<gpu::GPUModuleOp>(Operation::create(state));
|
||||
auto kernelModule =
|
||||
ModuleOp::create(builder.getUnknownLoc(), kernelFunc.getName());
|
||||
kernelModule.setAttr(gpu::GPUDialect::getKernelModuleAttrName(),
|
||||
builder.getUnitAttr());
|
||||
SymbolTable symbolTable(kernelModule);
|
||||
symbolTable.insert(kernelFunc);
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@ module attributes {gpu.container_module} {
|
|||
// CHECK: llvm.mlir.global internal constant @[[kernel_name:.*]]("kernel\00")
|
||||
// CHECK: llvm.mlir.global internal constant @[[global:.*]]("CUBIN")
|
||||
|
||||
gpu.module @kernel_module attributes {nvvm.cubin = "CUBIN"} {
|
||||
module @kernel_module attributes {gpu.kernel_module, nvvm.cubin = "CUBIN"} {
|
||||
gpu.func @kernel(%arg0: !llvm.float, %arg1: !llvm<"float*">) attributes {gpu.kernel} {
|
||||
gpu.return
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
// RUN: mlir-opt %s --test-kernel-to-cubin -split-input-file | FileCheck %s
|
||||
|
||||
// CHECK: attributes {nvvm.cubin = "CUBIN"}
|
||||
gpu.module @foo {
|
||||
// CHECK: attributes {gpu.kernel_module, nvvm.cubin = "CUBIN"}
|
||||
module @foo attributes {gpu.kernel_module} {
|
||||
llvm.func @kernel(%arg0 : !llvm.float, %arg1 : !llvm<"float*">)
|
||||
// CHECK: attributes {gpu.kernel}
|
||||
attributes { gpu.kernel } {
|
||||
|
@ -11,7 +11,7 @@ gpu.module @foo {
|
|||
|
||||
// -----
|
||||
|
||||
gpu.module @bar {
|
||||
module @bar attributes {gpu.kernel_module} {
|
||||
// CHECK: func @kernel_a
|
||||
llvm.func @kernel_a()
|
||||
attributes { gpu.kernel } {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// RUN: mlir-opt %s -convert-gpu-to-nvvm -split-input-file | FileCheck %s
|
||||
|
||||
gpu.module @test_module {
|
||||
module attributes {gpu.kernel_module} {
|
||||
// CHECK-LABEL: func @gpu_index_ops()
|
||||
func @gpu_index_ops()
|
||||
attributes { gpu.kernel } {
|
||||
|
@ -38,7 +38,7 @@ gpu.module @test_module {
|
|||
|
||||
// -----
|
||||
|
||||
gpu.module @test_module {
|
||||
module attributes {gpu.kernel_module} {
|
||||
// CHECK-LABEL: func @gpu_all_reduce_op()
|
||||
func @gpu_all_reduce_op()
|
||||
attributes { gpu.kernel } {
|
||||
|
@ -55,7 +55,7 @@ gpu.module @test_module {
|
|||
|
||||
// -----
|
||||
|
||||
gpu.module @test_module {
|
||||
module attributes {gpu.kernel_module} {
|
||||
// CHECK-LABEL: func @gpu_all_reduce_region()
|
||||
func @gpu_all_reduce_region()
|
||||
attributes { gpu.kernel } {
|
||||
|
@ -74,7 +74,7 @@ gpu.module @test_module {
|
|||
|
||||
// -----
|
||||
|
||||
gpu.module @test_module {
|
||||
module attributes {gpu.kernel_module} {
|
||||
// CHECK-LABEL: func @gpu_shuffle()
|
||||
func @gpu_shuffle()
|
||||
attributes { gpu.kernel } {
|
||||
|
@ -99,7 +99,7 @@ gpu.module @test_module {
|
|||
|
||||
// -----
|
||||
|
||||
gpu.module @test_module {
|
||||
module attributes {gpu.kernel_module} {
|
||||
// CHECK-LABEL: func @gpu_sync()
|
||||
func @gpu_sync()
|
||||
attributes { gpu.kernel } {
|
||||
|
@ -111,7 +111,7 @@ gpu.module @test_module {
|
|||
|
||||
// -----
|
||||
|
||||
gpu.module @test_module {
|
||||
module attributes {gpu.kernel_module} {
|
||||
// CHECK: llvm.func @__nv_fabsf(!llvm.float) -> !llvm.float
|
||||
// CHECK: llvm.func @__nv_fabs(!llvm.double) -> !llvm.double
|
||||
// CHECK-LABEL: func @gpu_fabs
|
||||
|
@ -126,7 +126,7 @@ gpu.module @test_module {
|
|||
|
||||
// -----
|
||||
|
||||
gpu.module @test_module {
|
||||
module attributes {gpu.kernel_module} {
|
||||
// CHECK: llvm.func @__nv_ceilf(!llvm.float) -> !llvm.float
|
||||
// CHECK: llvm.func @__nv_ceil(!llvm.double) -> !llvm.double
|
||||
// CHECK-LABEL: func @gpu_ceil
|
||||
|
@ -141,7 +141,7 @@ gpu.module @test_module {
|
|||
|
||||
// -----
|
||||
|
||||
gpu.module @test_module {
|
||||
module attributes {gpu.kernel_module} {
|
||||
// CHECK: llvm.func @__nv_cosf(!llvm.float) -> !llvm.float
|
||||
// CHECK: llvm.func @__nv_cos(!llvm.double) -> !llvm.double
|
||||
// CHECK-LABEL: func @gpu_cos
|
||||
|
@ -156,7 +156,7 @@ gpu.module @test_module {
|
|||
|
||||
// -----
|
||||
|
||||
gpu.module @test_module {
|
||||
module attributes {gpu.kernel_module} {
|
||||
// CHECK: llvm.func @__nv_expf(!llvm.float) -> !llvm.float
|
||||
// CHECK: llvm.func @__nv_exp(!llvm.double) -> !llvm.double
|
||||
// CHECK-LABEL: func @gpu_exp
|
||||
|
@ -174,7 +174,7 @@ gpu.module @test_module {
|
|||
// -----
|
||||
|
||||
// Test that we handled properly operation with SymbolTable other than module op
|
||||
gpu.module @test_module {
|
||||
module attributes {gpu.kernel_module} {
|
||||
"test.symbol_scope"() ({
|
||||
// CHECK: test.symbol_scope
|
||||
// CHECK: llvm.func @__nv_expf(!llvm.float) -> !llvm.float
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// RUN: mlir-opt --convert-gpu-to-nvvm --split-input-file %s | FileCheck %s
|
||||
|
||||
gpu.module @kernel {
|
||||
module attributes {gpu.kernel_module} {
|
||||
// CHECK-LABEL: llvm.func @private
|
||||
gpu.func @private(%arg0: f32) private(%arg1: memref<4xf32, 5>) {
|
||||
// Allocate private memory inside the function.
|
||||
|
@ -32,7 +32,7 @@ gpu.module @kernel {
|
|||
|
||||
// -----
|
||||
|
||||
gpu.module @kernel {
|
||||
module attributes {gpu.kernel_module} {
|
||||
// Workgroup buffers are allocated as globals.
|
||||
// CHECK: llvm.mlir.global internal @[[buffer:.*]]()
|
||||
// CHECK-SAME: addr_space = 3
|
||||
|
@ -72,7 +72,7 @@ gpu.module @kernel {
|
|||
|
||||
// -----
|
||||
|
||||
gpu.module @kernel {
|
||||
module attributes {gpu.kernel_module} {
|
||||
// Check that the total size was computed correctly.
|
||||
// CHECK: llvm.mlir.global internal @[[buffer:.*]]()
|
||||
// CHECK-SAME: addr_space = 3
|
||||
|
@ -113,7 +113,7 @@ gpu.module @kernel {
|
|||
|
||||
// -----
|
||||
|
||||
gpu.module @kernel {
|
||||
module attributes {gpu.kernel_module} {
|
||||
// Check that several buffers are defined.
|
||||
// CHECK: llvm.mlir.global internal @[[buffer1:.*]]()
|
||||
// CHECK-SAME: !llvm<"[1 x float]">
|
||||
|
|
|
@ -9,7 +9,7 @@ module attributes {gpu.container_module} {
|
|||
|
||||
// CHECK-LABEL: spv.module "Logical" "GLSL450"
|
||||
// CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId")
|
||||
gpu.module @kernels {
|
||||
module @kernels attributes {gpu.kernel_module} {
|
||||
gpu.func @builtin_workgroup_id_x()
|
||||
attributes {gpu.kernel} {
|
||||
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]]
|
||||
|
@ -32,7 +32,7 @@ module attributes {gpu.container_module} {
|
|||
|
||||
// CHECK-LABEL: spv.module "Logical" "GLSL450"
|
||||
// CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId")
|
||||
gpu.module @kernels {
|
||||
module @kernels attributes {gpu.kernel_module} {
|
||||
gpu.func @builtin_workgroup_id_y()
|
||||
attributes {gpu.kernel} {
|
||||
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]]
|
||||
|
@ -55,7 +55,7 @@ module attributes {gpu.container_module} {
|
|||
|
||||
// CHECK-LABEL: spv.module "Logical" "GLSL450"
|
||||
// CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId")
|
||||
gpu.module @kernels {
|
||||
module @kernels attributes {gpu.kernel_module} {
|
||||
gpu.func @builtin_workgroup_id_z()
|
||||
attributes {gpu.kernel} {
|
||||
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]]
|
||||
|
@ -78,7 +78,7 @@ module attributes {gpu.container_module} {
|
|||
|
||||
// CHECK-LABEL: spv.module "Logical" "GLSL450"
|
||||
// CHECK: spv.globalVariable [[WORKGROUPSIZE:@.*]] built_in("WorkgroupSize")
|
||||
gpu.module @kernels {
|
||||
module @kernels attributes {gpu.kernel_module} {
|
||||
gpu.func @builtin_workgroup_size_x()
|
||||
attributes {gpu.kernel} {
|
||||
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPSIZE]]
|
||||
|
@ -101,7 +101,7 @@ module attributes {gpu.container_module} {
|
|||
|
||||
// CHECK-LABEL: spv.module "Logical" "GLSL450"
|
||||
// CHECK: spv.globalVariable [[LOCALINVOCATIONID:@.*]] built_in("LocalInvocationId")
|
||||
gpu.module @kernels {
|
||||
module @kernels attributes {gpu.kernel_module} {
|
||||
gpu.func @builtin_local_id_x()
|
||||
attributes {gpu.kernel} {
|
||||
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[LOCALINVOCATIONID]]
|
||||
|
@ -124,7 +124,7 @@ module attributes {gpu.container_module} {
|
|||
|
||||
// CHECK-LABEL: spv.module "Logical" "GLSL450"
|
||||
// CHECK: spv.globalVariable [[NUMWORKGROUPS:@.*]] built_in("NumWorkgroups")
|
||||
gpu.module @kernels {
|
||||
module @kernels attributes {gpu.kernel_module} {
|
||||
gpu.func @builtin_num_workgroups_x()
|
||||
attributes {gpu.kernel} {
|
||||
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[NUMWORKGROUPS]]
|
||||
|
|
|
@ -16,7 +16,7 @@ module attributes {gpu.container_module} {
|
|||
}
|
||||
|
||||
// CHECK-LABEL: spv.module "Logical" "GLSL450"
|
||||
gpu.module @kernels {
|
||||
module @kernels attributes {gpu.kernel_module} {
|
||||
// CHECK-DAG: spv.globalVariable [[WORKGROUPSIZEVAR:@.*]] built_in("WorkgroupSize") : !spv.ptr<vector<3xi32>, Input>
|
||||
// CHECK-DAG: spv.globalVariable [[NUMWORKGROUPSVAR:@.*]] built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
|
||||
// CHECK-DAG: spv.globalVariable [[LOCALINVOCATIONIDVAR:@.*]] built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
|
||||
|
|
|
@ -7,7 +7,7 @@ module attributes {gpu.container_module} {
|
|||
return
|
||||
}
|
||||
|
||||
gpu.module @kernels {
|
||||
module @kernels attributes {gpu.kernel_module} {
|
||||
gpu.func @loop_kernel(%arg2 : memref<10xf32>, %arg3 : memref<10xf32>)
|
||||
attributes {gpu.kernel} {
|
||||
// CHECK: [[LB:%.*]] = spv.constant 4 : i32
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
module attributes {gpu.container_module} {
|
||||
|
||||
gpu.module @kernels {
|
||||
module @kernels attributes {gpu.kernel_module} {
|
||||
// CHECK: spv.module "Logical" "GLSL450" {
|
||||
// CHECK-LABEL: func @kernel_1
|
||||
// CHECK-SAME: {{%.*}}: f32 {spv.interface_var_abi = {binding = 0 : i32, descriptor_set = 0 : i32, storage_class = 12 : i32{{[}][}]}}
|
||||
|
|
|
@ -167,7 +167,7 @@ module attributes {gpu.container_module} {
|
|||
}
|
||||
|
||||
func @launch_func_missing_module_attribute(%sz : index) {
|
||||
// expected-error@+1 {{kernel module 'kernels' is undefined}}
|
||||
// expected-error@+1 {{module 'kernels' is missing the 'gpu.kernel_module' attribute}}
|
||||
"gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz)
|
||||
{ kernel = "kernel_1", kernel_module = @kernels }
|
||||
: (index, index, index, index, index, index) -> ()
|
||||
|
@ -178,7 +178,8 @@ module attributes {gpu.container_module} {
|
|||
// -----
|
||||
|
||||
module attributes {gpu.container_module} {
|
||||
gpu.module @kernels { }
|
||||
module @kernels attributes {gpu.kernel_module} {
|
||||
}
|
||||
|
||||
func @launch_func_undefined_function(%sz : index) {
|
||||
// expected-error@+1 {{kernel function 'kernel_1' is undefined}}
|
||||
|
@ -192,7 +193,7 @@ module attributes {gpu.container_module} {
|
|||
// -----
|
||||
|
||||
module attributes {gpu.container_module} {
|
||||
gpu.module @kernels {
|
||||
module @kernels attributes {gpu.kernel_module} {
|
||||
gpu.func @kernel_1(%arg1 : !llvm<"float*">) kernel {
|
||||
gpu.return
|
||||
}
|
||||
|
@ -210,7 +211,7 @@ module attributes {gpu.container_module} {
|
|||
// -----
|
||||
|
||||
module attributes {gpu.container_module} {
|
||||
gpu.module @kernels {
|
||||
module @kernels attributes {gpu.kernel_module} {
|
||||
gpu.func @kernel_1(%arg1 : !llvm<"float*">) attributes { gpu.kernel } {
|
||||
gpu.return
|
||||
}
|
||||
|
@ -228,7 +229,7 @@ module attributes {gpu.container_module} {
|
|||
|
||||
// -----
|
||||
|
||||
gpu.module @kernels {
|
||||
module @kernels attributes {gpu.kernel_module} {
|
||||
gpu.func @kernel_1(%arg1 : !llvm<"float*">) attributes { gpu.kernel } {
|
||||
gpu.return
|
||||
}
|
||||
|
|
|
@ -60,7 +60,7 @@ module attributes {gpu.container_module} {
|
|||
return
|
||||
}
|
||||
|
||||
gpu.module @kernels {
|
||||
module @kernels attributes {gpu.kernel_module} {
|
||||
gpu.func @kernel_1(%arg0 : f32, %arg1 : memref<?xf32, 1>) attributes {gpu.kernel} {
|
||||
%tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index)
|
||||
%tIdY = "gpu.thread_id"() {dimension = "y"} : () -> (index)
|
||||
|
|
|
@ -136,7 +136,7 @@ func @recursive_device_function() {
|
|||
gpu.return
|
||||
}
|
||||
|
||||
// CHECK: gpu.module @function_call_kernel {
|
||||
// CHECK: module @function_call_kernel attributes {gpu.kernel_module} {
|
||||
// CHECK: gpu.func @function_call_kernel()
|
||||
// CHECK: call @device_function() : () -> ()
|
||||
// CHECK: call @device_function() : () -> ()
|
||||
|
|
|
@ -105,7 +105,7 @@ static LogicalResult runMLIRPasses(ModuleOp m) {
|
|||
applyPassManagerCLOptions(pm);
|
||||
|
||||
pm.addPass(createGpuKernelOutliningPass());
|
||||
auto &kernelPm = pm.nest<gpu::GPUModuleOp>();
|
||||
auto &kernelPm = pm.nest<ModuleOp>();
|
||||
kernelPm.addPass(createLowerGpuOpsToNVVMOpsPass());
|
||||
kernelPm.addPass(createConvertGPUKernelToCubinPass(&compilePtxToCubin));
|
||||
pm.addPass(createLowerToLLVMPass());
|
||||
|
|
Loading…
Reference in New Issue