forked from OSchip/llvm-project
150 lines
5.3 KiB
C++
150 lines
5.3 KiB
C++
//===- ConvertKernelFuncToCubin.cpp - MLIR GPU lowering passes ------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements a pass to convert gpu kernel functions into a
|
|
// corresponding binary blob that can be executed on a CUDA GPU. Currently
|
|
// only translates the function itself but no dependencies.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "mlir/Conversion/GPUToCUDA/GPUToCUDAPass.h"
|
|
|
|
#include "mlir/Dialect/GPU/GPUDialect.h"
|
|
#include "mlir/IR/Attributes.h"
|
|
#include "mlir/IR/Builders.h"
|
|
#include "mlir/IR/Function.h"
|
|
#include "mlir/IR/Module.h"
|
|
#include "mlir/Pass/Pass.h"
|
|
#include "mlir/Pass/PassRegistry.h"
|
|
#include "mlir/Support/LogicalResult.h"
|
|
#include "mlir/Target/NVVMIR.h"
|
|
|
|
#include "llvm/ADT/Optional.h"
|
|
#include "llvm/ADT/Twine.h"
|
|
#include "llvm/IR/Constants.h"
|
|
#include "llvm/IR/LegacyPassManager.h"
|
|
#include "llvm/IR/Module.h"
|
|
#include "llvm/Support/Error.h"
|
|
#include "llvm/Support/TargetRegistry.h"
|
|
#include "llvm/Support/TargetSelect.h"
|
|
#include "llvm/Target/TargetMachine.h"
|
|
|
|
using namespace mlir;
|
|
|
|
namespace {
|
|
// TODO(herhut): Move to shared location.
|
|
static constexpr const char *kCubinAnnotation = "nvvm.cubin";
|
|
|
|
/// A pass converting tagged kernel modules to cubin blobs.
|
|
///
|
|
/// If tagged as a kernel module, each contained function is translated to NVVM
|
|
/// IR and further to PTX. A user provided CubinGenerator compiles the PTX to
|
|
/// GPU binary code, which is then attached as an attribute to the function. The
|
|
/// function body is erased.
|
|
class GpuKernelToCubinPass
|
|
: public OperationPass<GpuKernelToCubinPass, gpu::GPUModuleOp> {
|
|
public:
|
|
GpuKernelToCubinPass(CubinGenerator cubinGenerator)
|
|
: cubinGenerator(cubinGenerator) {}
|
|
|
|
void runOnOperation() override {
|
|
gpu::GPUModuleOp module = getOperation();
|
|
|
|
// Make sure the NVPTX target is initialized.
|
|
LLVMInitializeNVPTXTarget();
|
|
LLVMInitializeNVPTXTargetInfo();
|
|
LLVMInitializeNVPTXTargetMC();
|
|
LLVMInitializeNVPTXAsmPrinter();
|
|
|
|
auto llvmModule = translateModuleToNVVMIR(module);
|
|
if (!llvmModule)
|
|
return signalPassFailure();
|
|
|
|
// Translate the module to CUBIN and attach the result as attribute to the
|
|
// module.
|
|
if (auto cubinAttr = translateGPUModuleToCubinAnnotation(
|
|
*llvmModule, module.getLoc(), module.getName()))
|
|
module.setAttr(kCubinAnnotation, cubinAttr);
|
|
else
|
|
signalPassFailure();
|
|
}
|
|
|
|
private:
|
|
std::string translateModuleToPtx(llvm::Module &module,
|
|
llvm::TargetMachine &target_machine);
|
|
|
|
/// Converts llvmModule to cubin using the user-provided generator. Location
|
|
/// is used for error reporting and name is forwarded to the CUBIN generator
|
|
/// to use in its logging mechanisms.
|
|
OwnedCubin convertModuleToCubin(llvm::Module &llvmModule, Location loc,
|
|
StringRef name);
|
|
|
|
/// Translates llvmModule to cubin and returns the result as attribute.
|
|
StringAttr translateGPUModuleToCubinAnnotation(llvm::Module &llvmModule,
|
|
Location loc, StringRef name);
|
|
|
|
CubinGenerator cubinGenerator;
|
|
};
|
|
|
|
} // anonymous namespace
|
|
|
|
std::string GpuKernelToCubinPass::translateModuleToPtx(
|
|
llvm::Module &module, llvm::TargetMachine &target_machine) {
|
|
std::string ptx;
|
|
{
|
|
llvm::raw_string_ostream stream(ptx);
|
|
llvm::buffer_ostream pstream(stream);
|
|
llvm::legacy::PassManager codegen_passes;
|
|
target_machine.addPassesToEmitFile(codegen_passes, pstream, nullptr,
|
|
llvm::CGFT_AssemblyFile);
|
|
codegen_passes.run(module);
|
|
}
|
|
|
|
return ptx;
|
|
}
|
|
|
|
OwnedCubin GpuKernelToCubinPass::convertModuleToCubin(llvm::Module &llvmModule,
|
|
Location loc,
|
|
StringRef name) {
|
|
std::unique_ptr<llvm::TargetMachine> targetMachine;
|
|
{
|
|
std::string error;
|
|
// TODO(herhut): Make triple configurable.
|
|
constexpr const char *cudaTriple = "nvptx64-nvidia-cuda";
|
|
llvm::Triple triple(cudaTriple);
|
|
const llvm::Target *target =
|
|
llvm::TargetRegistry::lookupTarget("", triple, error);
|
|
if (target == nullptr) {
|
|
emitError(loc, "cannot initialize target triple");
|
|
return {};
|
|
}
|
|
targetMachine.reset(
|
|
target->createTargetMachine(triple.str(), "sm_35", "+ptx60", {}, {}));
|
|
}
|
|
|
|
// Set the data layout of the llvm module to match what the ptx target needs.
|
|
llvmModule.setDataLayout(targetMachine->createDataLayout());
|
|
|
|
auto ptx = translateModuleToPtx(llvmModule, *targetMachine);
|
|
|
|
return cubinGenerator(ptx, loc, name);
|
|
}
|
|
|
|
StringAttr GpuKernelToCubinPass::translateGPUModuleToCubinAnnotation(
|
|
llvm::Module &llvmModule, Location loc, StringRef name) {
|
|
auto cubin = convertModuleToCubin(llvmModule, loc, name);
|
|
if (!cubin)
|
|
return {};
|
|
return StringAttr::get({cubin->data(), cubin->size()}, loc->getContext());
|
|
}
|
|
|
|
std::unique_ptr<OpPassBase<gpu::GPUModuleOp>>
|
|
mlir::createConvertGPUKernelToCubinPass(CubinGenerator cubinGenerator) {
|
|
return std::make_unique<GpuKernelToCubinPass>(cubinGenerator);
|
|
}
|