From b74af4aa5c314f596ad5ed26c00e4b9033dee78f Mon Sep 17 00:00:00 2001 From: Christian Sigg Date: Mon, 21 Oct 2019 11:10:13 -0700 Subject: [PATCH] Unify GPU op definition names with other dialects. Rename GPU op names from gpu_Foo to GPU_FooOp. PiperOrigin-RevId: 275882232 --- mlir/include/mlir/Dialect/GPU/GPUOps.td | 28 +++++++++---------- mlir/lib/Conversion/GPUToNVVM/GPUToNVVM.td | 2 +- .../GPUToNVVM/LowerGpuOpsToNVVMOps.cpp | 19 +++++++------ .../GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 8 +++--- mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp | 11 ++++---- mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp | 2 +- mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 11 ++++++-- .../GPU/Transforms/KernelOutlining.cpp | 10 +++---- 8 files changed, 49 insertions(+), 42 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td index f38e1b0386c5..9c0ab8e7f020 100644 --- a/mlir/include/mlir/Dialect/GPU/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td @@ -41,12 +41,12 @@ class GPU_IndexOp traits = []> : let verifier = [{ return ::verifyIndexOp(*this); }]; } -def gpu_BlockDim : GPU_IndexOp<"block_dim">; -def gpu_BlockId : GPU_IndexOp<"block_id">; -def gpu_GridDim : GPU_IndexOp<"grid_dim">; -def gpu_ThreadId : GPU_IndexOp<"thread_id">; +def GPU_BlockDimOp : GPU_IndexOp<"block_dim">; +def GPU_BlockIdOp : GPU_IndexOp<"block_id">; +def GPU_GridDimOp : GPU_IndexOp<"grid_dim">; +def GPU_ThreadIdOp : GPU_IndexOp<"thread_id">; -def gpu_Return : GPU_Op<"return", [Terminator]>, Arguments<(ins)>, +def GPU_ReturnOp : GPU_Op<"return", [Terminator]>, Arguments<(ins)>, Results<(outs)> { let summary = "Terminator for GPU launch regions."; let description = [{ @@ -59,7 +59,7 @@ def gpu_Return : GPU_Op<"return", [Terminator]>, Arguments<(ins)>, let printer = [{ p << getOperationName(); }]; } -def gpu_Yield : GPU_Op<"yield", [Terminator]>, +def GPU_YieldOp : GPU_Op<"yield", [Terminator]>, Arguments<(ins Variadic:$values)> { let summary = "GPU yield operation"; let description = [{ @@ -74,20 +74,20 @@ def gpu_Yield : GPU_Op<"yield", [Terminator]>, } // These mirror the XLA ComparisonDirection enum. -def gpu_AllReduceOpAdd : StrEnumAttrCase<"add">; -def gpu_AllReduceOpMul : StrEnumAttrCase<"mul">; +def GPU_AllReduceOpAdd : StrEnumAttrCase<"add">; +def GPU_AllReduceOpMul : StrEnumAttrCase<"mul">; -def gpu_AllReduceOperationAttr : StrEnumAttr<"AllReduceOperationAttr", +def GPU_AllReduceOperationAttr : StrEnumAttr<"AllReduceOperationAttr", "built-in reduction operations supported by gpu.allreduce.", [ - gpu_AllReduceOpAdd, - gpu_AllReduceOpMul, + GPU_AllReduceOpAdd, + GPU_AllReduceOpMul, ]>; -def gpu_AllReduce : GPU_Op<"all_reduce", +def GPU_AllReduceOp : GPU_Op<"all_reduce", [SameOperandsAndResultType, IsolatedFromAbove]>, Arguments<(ins AnyType:$value, - OptionalAttr:$op)>, + OptionalAttr:$op)>, Results<(outs AnyType)> { let summary = "Reduce values among workgroup."; let description = [{ @@ -115,7 +115,7 @@ def gpu_AllReduce : GPU_Op<"all_reduce", let verifier = [{ return ::verifyAllReduce(*this); }]; } -def gpu_Barrier : GPU_Op<"barrier"> { +def GPU_BarrierOp : GPU_Op<"barrier"> { let summary = "Synchronizes all work items of a workgroup."; let description = [{ The "barrier" op synchronizes all work items of a workgroup. It is used diff --git a/mlir/lib/Conversion/GPUToNVVM/GPUToNVVM.td b/mlir/lib/Conversion/GPUToNVVM/GPUToNVVM.td index d7daf1062f06..2b89b721d830 100644 --- a/mlir/lib/Conversion/GPUToNVVM/GPUToNVVM.td +++ b/mlir/lib/Conversion/GPUToNVVM/GPUToNVVM.td @@ -33,6 +33,6 @@ include "mlir/Dialect/GPU/GPUOps.td" include "mlir/Dialect/LLVMIR/NVVMOps.td" #endif // NVVMIR_OPS -def : Pat<(gpu_Barrier), (NVVM_Barrier0Op)>; +def : Pat<(GPU_BarrierOp), (NVVM_Barrier0Op)>; #endif // MLIR_CONVERSION_GPUTONVVM_TD diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index 462457ccca80..b0515147724c 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -41,7 +41,7 @@ struct GPUAllReduceOpLowering : public LLVMOpLowering { Location, Value *, Value *, ConversionPatternRewriter &)>; explicit GPUAllReduceOpLowering(LLVMTypeConverter &lowering_) - : LLVMOpLowering(gpu::AllReduce::getOperationName(), + : LLVMOpLowering(gpu::AllReduceOp::getOperationName(), lowering_.getDialect()->getContext(), lowering_), int32Type(LLVM::LLVMType::getInt32Ty(lowering_.getDialect())) {} @@ -55,7 +55,8 @@ struct GPUAllReduceOpLowering : public LLVMOpLowering { assert(op->getOperand(0)->getType().isIntOrFloat()); // Create the reduction using an accumulator factory. - AccumulatorFactory factory = getFactory(cast(op), operand); + AccumulatorFactory factory = + getFactory(cast(op), operand); assert(factory && "failed to create accumulator factory"); Value *result = createBlockReduce(loc, operand, factory, rewriter); @@ -66,7 +67,7 @@ struct GPUAllReduceOpLowering : public LLVMOpLowering { private: /// Returns an accumulator factory using either the op attribute or the body /// region. - AccumulatorFactory getFactory(gpu::AllReduce allReduce, + AccumulatorFactory getFactory(gpu::AllReduceOp allReduce, Value *operand) const { if (!allReduce.body().empty()) { return getFactory(allReduce.body()); @@ -103,7 +104,7 @@ private: // Replace all gpu.yield ops with branch out of body. for (; block != split; block = block->getNextNode()) { Operation *terminator = block->getTerminator(); - if (!llvm::isa(terminator)) + if (!llvm::isa(terminator)) continue; rewriter.setInsertionPointToEnd(block); rewriter.replaceOpWithNewOp( @@ -469,13 +470,13 @@ public: populateStdToLLVMConversionPatterns(converter, patterns); populateWithGenerated(&getContext(), &patterns); patterns.insert< - GPUIndexIntrinsicOpLowering, - GPUIndexIntrinsicOpLowering, - GPUIndexIntrinsicOpLowering, - GPUIndexIntrinsicOpLowering, GPUAllReduceOpLowering>(converter); @@ -484,7 +485,7 @@ public: target.addLegalDialect(); target.addLegalDialect(); // TODO(csigg): Remove once we support replacing non-root ops. - target.addLegalOp(); + target.addLegalOp(); if (failed(applyPartialConversion(m, target, patterns, &converter))) signalPassFailure(); } diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp index 3b8ad14cfdb8..2ea587ecf477 100644 --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -50,13 +50,13 @@ public: LLVMTypeConverter converter(m.getContext()); populateStdToLLVMConversionPatterns(converter, patterns); patterns.insert< - GPUIndexIntrinsicOpLowering, - GPUIndexIntrinsicOpLowering, - GPUIndexIntrinsicOpLowering, - GPUIndexIntrinsicOpLowering>( converter); diff --git a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp index 45720fefcc11..4f73b9bfd6a9 100644 --- a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp +++ b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp @@ -149,11 +149,12 @@ void GPUToSPIRVPass::runOnModule() { OwningRewritePatternList patterns; patterns.insert< KernelFnConversion, - LaunchConfigConversion, - LaunchConfigConversion, - LaunchConfigConversion, - LaunchConfigConversion>( - context, typeConverter); + LaunchConfigConversion, + LaunchConfigConversion, + LaunchConfigConversion, + LaunchConfigConversion>(context, + typeConverter); populateStandardToSPIRVPatterns(context, patterns); ConversionTarget target(*context); diff --git a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp index 154a8660bee3..2229455ef336 100644 --- a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp +++ b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp @@ -261,7 +261,7 @@ void LoopToGpuConverter::createLaunch(OpTy rootForOp, OpTy innermostForOp, Location terminatorLoc = terminator.getLoc(); terminator.erase(); builder.setInsertionPointToEnd(innermostForOp.getBody()); - builder.create(terminatorLoc); + builder.create(terminatorLoc); launchOp.getBody().front().getOperations().splice( launchOp.getBody().front().begin(), innermostForOp.getBody()->getOperations()); diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index 6e55c0b1212b..d1f223e389ab 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -137,7 +137,7 @@ template static LogicalResult verifyIndexOp(T op) { return success(); } -static LogicalResult verifyAllReduce(gpu::AllReduce allReduce) { +static LogicalResult verifyAllReduce(gpu::AllReduceOp allReduce) { if (allReduce.body().empty() != allReduce.op().hasValue()) return allReduce.emitError( "expected either an op attribute or a non-empty body"); @@ -150,7 +150,7 @@ static LogicalResult verifyAllReduce(gpu::AllReduce allReduce) { } unsigned yieldCount = 0; for (Block &block : allReduce.body()) { - if (auto yield = dyn_cast(block.getTerminator())) { + if (auto yield = dyn_cast(block.getTerminator())) { if (yield.getNumOperands() != 1) return allReduce.emitError("expected one gpu.yield operand"); if (yield.getOperand(0)->getType() != allReduce.getType()) @@ -164,8 +164,13 @@ static LogicalResult verifyAllReduce(gpu::AllReduce allReduce) { return success(); } +// Namespace avoids ambiguous ReturnOpOperandAdaptor. +namespace mlir { +namespace gpu { #define GET_OP_CLASSES #include "mlir/Dialect/GPU/GPUOps.cpp.inc" +} // namespace gpu +} // namespace mlir //===----------------------------------------------------------------------===// // LaunchOp @@ -263,7 +268,7 @@ LogicalResult LaunchOp::verify() { continue; if (block.back().getNumSuccessors() != 0) continue; - if (!isa(&block.back())) { + if (!isa(&block.back())) { return block.back() .emitError("expected 'gpu.terminator' or a terminator with " "successors") diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp index 8377ec64e7e6..d9a1106270f4 100644 --- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp @@ -43,10 +43,10 @@ static void createForAllDimensions(OpBuilder &builder, Location loc, static void injectGpuIndexOperations(Location loc, FuncOp kernelFunc) { OpBuilder OpBuilder(kernelFunc.getBody()); SmallVector indexOps; - createForAllDimensions(OpBuilder, loc, indexOps); - createForAllDimensions(OpBuilder, loc, indexOps); - createForAllDimensions(OpBuilder, loc, indexOps); - createForAllDimensions(OpBuilder, loc, indexOps); + createForAllDimensions(OpBuilder, loc, indexOps); + createForAllDimensions(OpBuilder, loc, indexOps); + createForAllDimensions(OpBuilder, loc, indexOps); + createForAllDimensions(OpBuilder, loc, indexOps); // Replace the leading 12 function args with the respective thread/block index // operations. Iterate backwards since args are erased and indices change. for (int i = 11; i >= 0; --i) { @@ -107,7 +107,7 @@ static FuncOp outlineKernelFunc(gpu::LaunchOp launchOp) { outlinedFunc.setAttr(gpu::GPUDialect::getKernelFuncAttrName(), builder.getUnitAttr()); injectGpuIndexOperations(loc, outlinedFunc); - outlinedFunc.walk([](gpu::Return op) { + outlinedFunc.walk([](gpu::ReturnOp op) { OpBuilder replacer(op); replacer.create(op.getLoc()); op.erase();