forked from OSchip/llvm-project
Unify GPU op definition names with other dialects.
Rename GPU op names from gpu_Foo to GPU_FooOp. PiperOrigin-RevId: 275882232
This commit is contained in:
parent
03d7be2aca
commit
b74af4aa5c
|
@ -41,12 +41,12 @@ class GPU_IndexOp<string mnemonic, list<OpTrait> traits = []> :
|
|||
let verifier = [{ return ::verifyIndexOp(*this); }];
|
||||
}
|
||||
|
||||
def gpu_BlockDim : GPU_IndexOp<"block_dim">;
|
||||
def gpu_BlockId : GPU_IndexOp<"block_id">;
|
||||
def gpu_GridDim : GPU_IndexOp<"grid_dim">;
|
||||
def gpu_ThreadId : GPU_IndexOp<"thread_id">;
|
||||
def GPU_BlockDimOp : GPU_IndexOp<"block_dim">;
|
||||
def GPU_BlockIdOp : GPU_IndexOp<"block_id">;
|
||||
def GPU_GridDimOp : GPU_IndexOp<"grid_dim">;
|
||||
def GPU_ThreadIdOp : GPU_IndexOp<"thread_id">;
|
||||
|
||||
def gpu_Return : GPU_Op<"return", [Terminator]>, Arguments<(ins)>,
|
||||
def GPU_ReturnOp : GPU_Op<"return", [Terminator]>, Arguments<(ins)>,
|
||||
Results<(outs)> {
|
||||
let summary = "Terminator for GPU launch regions.";
|
||||
let description = [{
|
||||
|
@ -59,7 +59,7 @@ def gpu_Return : GPU_Op<"return", [Terminator]>, Arguments<(ins)>,
|
|||
let printer = [{ p << getOperationName(); }];
|
||||
}
|
||||
|
||||
def gpu_Yield : GPU_Op<"yield", [Terminator]>,
|
||||
def GPU_YieldOp : GPU_Op<"yield", [Terminator]>,
|
||||
Arguments<(ins Variadic<AnyType>:$values)> {
|
||||
let summary = "GPU yield operation";
|
||||
let description = [{
|
||||
|
@ -74,20 +74,20 @@ def gpu_Yield : GPU_Op<"yield", [Terminator]>,
|
|||
}
|
||||
|
||||
// These mirror the XLA ComparisonDirection enum.
|
||||
def gpu_AllReduceOpAdd : StrEnumAttrCase<"add">;
|
||||
def gpu_AllReduceOpMul : StrEnumAttrCase<"mul">;
|
||||
def GPU_AllReduceOpAdd : StrEnumAttrCase<"add">;
|
||||
def GPU_AllReduceOpMul : StrEnumAttrCase<"mul">;
|
||||
|
||||
def gpu_AllReduceOperationAttr : StrEnumAttr<"AllReduceOperationAttr",
|
||||
def GPU_AllReduceOperationAttr : StrEnumAttr<"AllReduceOperationAttr",
|
||||
"built-in reduction operations supported by gpu.allreduce.",
|
||||
[
|
||||
gpu_AllReduceOpAdd,
|
||||
gpu_AllReduceOpMul,
|
||||
GPU_AllReduceOpAdd,
|
||||
GPU_AllReduceOpMul,
|
||||
]>;
|
||||
|
||||
def gpu_AllReduce : GPU_Op<"all_reduce",
|
||||
def GPU_AllReduceOp : GPU_Op<"all_reduce",
|
||||
[SameOperandsAndResultType, IsolatedFromAbove]>,
|
||||
Arguments<(ins AnyType:$value,
|
||||
OptionalAttr<gpu_AllReduceOperationAttr>:$op)>,
|
||||
OptionalAttr<GPU_AllReduceOperationAttr>:$op)>,
|
||||
Results<(outs AnyType)> {
|
||||
let summary = "Reduce values among workgroup.";
|
||||
let description = [{
|
||||
|
@ -115,7 +115,7 @@ def gpu_AllReduce : GPU_Op<"all_reduce",
|
|||
let verifier = [{ return ::verifyAllReduce(*this); }];
|
||||
}
|
||||
|
||||
def gpu_Barrier : GPU_Op<"barrier"> {
|
||||
def GPU_BarrierOp : GPU_Op<"barrier"> {
|
||||
let summary = "Synchronizes all work items of a workgroup.";
|
||||
let description = [{
|
||||
The "barrier" op synchronizes all work items of a workgroup. It is used
|
||||
|
|
|
@ -33,6 +33,6 @@ include "mlir/Dialect/GPU/GPUOps.td"
|
|||
include "mlir/Dialect/LLVMIR/NVVMOps.td"
|
||||
#endif // NVVMIR_OPS
|
||||
|
||||
def : Pat<(gpu_Barrier), (NVVM_Barrier0Op)>;
|
||||
def : Pat<(GPU_BarrierOp), (NVVM_Barrier0Op)>;
|
||||
|
||||
#endif // MLIR_CONVERSION_GPUTONVVM_TD
|
||||
|
|
|
@ -41,7 +41,7 @@ struct GPUAllReduceOpLowering : public LLVMOpLowering {
|
|||
Location, Value *, Value *, ConversionPatternRewriter &)>;
|
||||
|
||||
explicit GPUAllReduceOpLowering(LLVMTypeConverter &lowering_)
|
||||
: LLVMOpLowering(gpu::AllReduce::getOperationName(),
|
||||
: LLVMOpLowering(gpu::AllReduceOp::getOperationName(),
|
||||
lowering_.getDialect()->getContext(), lowering_),
|
||||
int32Type(LLVM::LLVMType::getInt32Ty(lowering_.getDialect())) {}
|
||||
|
||||
|
@ -55,7 +55,8 @@ struct GPUAllReduceOpLowering : public LLVMOpLowering {
|
|||
assert(op->getOperand(0)->getType().isIntOrFloat());
|
||||
|
||||
// Create the reduction using an accumulator factory.
|
||||
AccumulatorFactory factory = getFactory(cast<gpu::AllReduce>(op), operand);
|
||||
AccumulatorFactory factory =
|
||||
getFactory(cast<gpu::AllReduceOp>(op), operand);
|
||||
assert(factory && "failed to create accumulator factory");
|
||||
Value *result = createBlockReduce(loc, operand, factory, rewriter);
|
||||
|
||||
|
@ -66,7 +67,7 @@ struct GPUAllReduceOpLowering : public LLVMOpLowering {
|
|||
private:
|
||||
/// Returns an accumulator factory using either the op attribute or the body
|
||||
/// region.
|
||||
AccumulatorFactory getFactory(gpu::AllReduce allReduce,
|
||||
AccumulatorFactory getFactory(gpu::AllReduceOp allReduce,
|
||||
Value *operand) const {
|
||||
if (!allReduce.body().empty()) {
|
||||
return getFactory(allReduce.body());
|
||||
|
@ -103,7 +104,7 @@ private:
|
|||
// Replace all gpu.yield ops with branch out of body.
|
||||
for (; block != split; block = block->getNextNode()) {
|
||||
Operation *terminator = block->getTerminator();
|
||||
if (!llvm::isa<gpu::Yield>(terminator))
|
||||
if (!llvm::isa<gpu::YieldOp>(terminator))
|
||||
continue;
|
||||
rewriter.setInsertionPointToEnd(block);
|
||||
rewriter.replaceOpWithNewOp<LLVM::BrOp>(
|
||||
|
@ -469,13 +470,13 @@ public:
|
|||
populateStdToLLVMConversionPatterns(converter, patterns);
|
||||
populateWithGenerated(&getContext(), &patterns);
|
||||
patterns.insert<
|
||||
GPUIndexIntrinsicOpLowering<gpu::ThreadId, NVVM::ThreadIdXOp,
|
||||
GPUIndexIntrinsicOpLowering<gpu::ThreadIdOp, NVVM::ThreadIdXOp,
|
||||
NVVM::ThreadIdYOp, NVVM::ThreadIdZOp>,
|
||||
GPUIndexIntrinsicOpLowering<gpu::BlockDim, NVVM::BlockDimXOp,
|
||||
GPUIndexIntrinsicOpLowering<gpu::BlockDimOp, NVVM::BlockDimXOp,
|
||||
NVVM::BlockDimYOp, NVVM::BlockDimZOp>,
|
||||
GPUIndexIntrinsicOpLowering<gpu::BlockId, NVVM::BlockIdXOp,
|
||||
GPUIndexIntrinsicOpLowering<gpu::BlockIdOp, NVVM::BlockIdXOp,
|
||||
NVVM::BlockIdYOp, NVVM::BlockIdZOp>,
|
||||
GPUIndexIntrinsicOpLowering<gpu::GridDim, NVVM::GridDimXOp,
|
||||
GPUIndexIntrinsicOpLowering<gpu::GridDimOp, NVVM::GridDimXOp,
|
||||
NVVM::GridDimYOp, NVVM::GridDimZOp>,
|
||||
GPUAllReduceOpLowering>(converter);
|
||||
|
||||
|
@ -484,7 +485,7 @@ public:
|
|||
target.addLegalDialect<LLVM::LLVMDialect>();
|
||||
target.addLegalDialect<NVVM::NVVMDialect>();
|
||||
// TODO(csigg): Remove once we support replacing non-root ops.
|
||||
target.addLegalOp<gpu::Yield>();
|
||||
target.addLegalOp<gpu::YieldOp>();
|
||||
if (failed(applyPartialConversion(m, target, patterns, &converter)))
|
||||
signalPassFailure();
|
||||
}
|
||||
|
|
|
@ -50,13 +50,13 @@ public:
|
|||
LLVMTypeConverter converter(m.getContext());
|
||||
populateStdToLLVMConversionPatterns(converter, patterns);
|
||||
patterns.insert<
|
||||
GPUIndexIntrinsicOpLowering<gpu::ThreadId, ROCDL::ThreadIdXOp,
|
||||
GPUIndexIntrinsicOpLowering<gpu::ThreadIdOp, ROCDL::ThreadIdXOp,
|
||||
ROCDL::ThreadIdYOp, ROCDL::ThreadIdZOp>,
|
||||
GPUIndexIntrinsicOpLowering<gpu::BlockDim, ROCDL::BlockDimXOp,
|
||||
GPUIndexIntrinsicOpLowering<gpu::BlockDimOp, ROCDL::BlockDimXOp,
|
||||
ROCDL::BlockDimYOp, ROCDL::BlockDimZOp>,
|
||||
GPUIndexIntrinsicOpLowering<gpu::BlockId, ROCDL::BlockIdXOp,
|
||||
GPUIndexIntrinsicOpLowering<gpu::BlockIdOp, ROCDL::BlockIdXOp,
|
||||
ROCDL::BlockIdYOp, ROCDL::BlockIdZOp>,
|
||||
GPUIndexIntrinsicOpLowering<gpu::GridDim, ROCDL::GridDimXOp,
|
||||
GPUIndexIntrinsicOpLowering<gpu::GridDimOp, ROCDL::GridDimXOp,
|
||||
ROCDL::GridDimYOp, ROCDL::GridDimZOp>>(
|
||||
converter);
|
||||
|
||||
|
|
|
@ -149,11 +149,12 @@ void GPUToSPIRVPass::runOnModule() {
|
|||
OwningRewritePatternList patterns;
|
||||
patterns.insert<
|
||||
KernelFnConversion,
|
||||
LaunchConfigConversion<gpu::BlockDim, spirv::BuiltIn::WorkgroupSize>,
|
||||
LaunchConfigConversion<gpu::BlockId, spirv::BuiltIn::WorkgroupId>,
|
||||
LaunchConfigConversion<gpu::GridDim, spirv::BuiltIn::NumWorkgroups>,
|
||||
LaunchConfigConversion<gpu::ThreadId, spirv::BuiltIn::LocalInvocationId>>(
|
||||
context, typeConverter);
|
||||
LaunchConfigConversion<gpu::BlockDimOp, spirv::BuiltIn::WorkgroupSize>,
|
||||
LaunchConfigConversion<gpu::BlockIdOp, spirv::BuiltIn::WorkgroupId>,
|
||||
LaunchConfigConversion<gpu::GridDimOp, spirv::BuiltIn::NumWorkgroups>,
|
||||
LaunchConfigConversion<gpu::ThreadIdOp,
|
||||
spirv::BuiltIn::LocalInvocationId>>(context,
|
||||
typeConverter);
|
||||
populateStandardToSPIRVPatterns(context, patterns);
|
||||
|
||||
ConversionTarget target(*context);
|
||||
|
|
|
@ -261,7 +261,7 @@ void LoopToGpuConverter::createLaunch(OpTy rootForOp, OpTy innermostForOp,
|
|||
Location terminatorLoc = terminator.getLoc();
|
||||
terminator.erase();
|
||||
builder.setInsertionPointToEnd(innermostForOp.getBody());
|
||||
builder.create<gpu::Return>(terminatorLoc);
|
||||
builder.create<gpu::ReturnOp>(terminatorLoc);
|
||||
launchOp.getBody().front().getOperations().splice(
|
||||
launchOp.getBody().front().begin(),
|
||||
innermostForOp.getBody()->getOperations());
|
||||
|
|
|
@ -137,7 +137,7 @@ template <typename T> static LogicalResult verifyIndexOp(T op) {
|
|||
return success();
|
||||
}
|
||||
|
||||
static LogicalResult verifyAllReduce(gpu::AllReduce allReduce) {
|
||||
static LogicalResult verifyAllReduce(gpu::AllReduceOp allReduce) {
|
||||
if (allReduce.body().empty() != allReduce.op().hasValue())
|
||||
return allReduce.emitError(
|
||||
"expected either an op attribute or a non-empty body");
|
||||
|
@ -150,7 +150,7 @@ static LogicalResult verifyAllReduce(gpu::AllReduce allReduce) {
|
|||
}
|
||||
unsigned yieldCount = 0;
|
||||
for (Block &block : allReduce.body()) {
|
||||
if (auto yield = dyn_cast<gpu::Yield>(block.getTerminator())) {
|
||||
if (auto yield = dyn_cast<gpu::YieldOp>(block.getTerminator())) {
|
||||
if (yield.getNumOperands() != 1)
|
||||
return allReduce.emitError("expected one gpu.yield operand");
|
||||
if (yield.getOperand(0)->getType() != allReduce.getType())
|
||||
|
@ -164,8 +164,13 @@ static LogicalResult verifyAllReduce(gpu::AllReduce allReduce) {
|
|||
return success();
|
||||
}
|
||||
|
||||
// Namespace avoids ambiguous ReturnOpOperandAdaptor.
|
||||
namespace mlir {
|
||||
namespace gpu {
|
||||
#define GET_OP_CLASSES
|
||||
#include "mlir/Dialect/GPU/GPUOps.cpp.inc"
|
||||
} // namespace gpu
|
||||
} // namespace mlir
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// LaunchOp
|
||||
|
@ -263,7 +268,7 @@ LogicalResult LaunchOp::verify() {
|
|||
continue;
|
||||
if (block.back().getNumSuccessors() != 0)
|
||||
continue;
|
||||
if (!isa<gpu::Return>(&block.back())) {
|
||||
if (!isa<gpu::ReturnOp>(&block.back())) {
|
||||
return block.back()
|
||||
.emitError("expected 'gpu.terminator' or a terminator with "
|
||||
"successors")
|
||||
|
|
|
@ -43,10 +43,10 @@ static void createForAllDimensions(OpBuilder &builder, Location loc,
|
|||
static void injectGpuIndexOperations(Location loc, FuncOp kernelFunc) {
|
||||
OpBuilder OpBuilder(kernelFunc.getBody());
|
||||
SmallVector<Value *, 12> indexOps;
|
||||
createForAllDimensions<gpu::BlockId>(OpBuilder, loc, indexOps);
|
||||
createForAllDimensions<gpu::ThreadId>(OpBuilder, loc, indexOps);
|
||||
createForAllDimensions<gpu::GridDim>(OpBuilder, loc, indexOps);
|
||||
createForAllDimensions<gpu::BlockDim>(OpBuilder, loc, indexOps);
|
||||
createForAllDimensions<gpu::BlockIdOp>(OpBuilder, loc, indexOps);
|
||||
createForAllDimensions<gpu::ThreadIdOp>(OpBuilder, loc, indexOps);
|
||||
createForAllDimensions<gpu::GridDimOp>(OpBuilder, loc, indexOps);
|
||||
createForAllDimensions<gpu::BlockDimOp>(OpBuilder, loc, indexOps);
|
||||
// Replace the leading 12 function args with the respective thread/block index
|
||||
// operations. Iterate backwards since args are erased and indices change.
|
||||
for (int i = 11; i >= 0; --i) {
|
||||
|
@ -107,7 +107,7 @@ static FuncOp outlineKernelFunc(gpu::LaunchOp launchOp) {
|
|||
outlinedFunc.setAttr(gpu::GPUDialect::getKernelFuncAttrName(),
|
||||
builder.getUnitAttr());
|
||||
injectGpuIndexOperations(loc, outlinedFunc);
|
||||
outlinedFunc.walk([](gpu::Return op) {
|
||||
outlinedFunc.walk([](gpu::ReturnOp op) {
|
||||
OpBuilder replacer(op);
|
||||
replacer.create<ReturnOp>(op.getLoc());
|
||||
op.erase();
|
||||
|
|
Loading…
Reference in New Issue