Unify GPU op definition names with other dialects.

Rename GPU op names from gpu_Foo to GPU_FooOp.

PiperOrigin-RevId: 275882232
This commit is contained in:
Christian Sigg 2019-10-21 11:10:13 -07:00 committed by A. Unique TensorFlower
parent 03d7be2aca
commit b74af4aa5c
8 changed files with 49 additions and 42 deletions

View File

@ -41,12 +41,12 @@ class GPU_IndexOp<string mnemonic, list<OpTrait> traits = []> :
let verifier = [{ return ::verifyIndexOp(*this); }];
}
def gpu_BlockDim : GPU_IndexOp<"block_dim">;
def gpu_BlockId : GPU_IndexOp<"block_id">;
def gpu_GridDim : GPU_IndexOp<"grid_dim">;
def gpu_ThreadId : GPU_IndexOp<"thread_id">;
def GPU_BlockDimOp : GPU_IndexOp<"block_dim">;
def GPU_BlockIdOp : GPU_IndexOp<"block_id">;
def GPU_GridDimOp : GPU_IndexOp<"grid_dim">;
def GPU_ThreadIdOp : GPU_IndexOp<"thread_id">;
def gpu_Return : GPU_Op<"return", [Terminator]>, Arguments<(ins)>,
def GPU_ReturnOp : GPU_Op<"return", [Terminator]>, Arguments<(ins)>,
Results<(outs)> {
let summary = "Terminator for GPU launch regions.";
let description = [{
@ -59,7 +59,7 @@ def gpu_Return : GPU_Op<"return", [Terminator]>, Arguments<(ins)>,
let printer = [{ p << getOperationName(); }];
}
def gpu_Yield : GPU_Op<"yield", [Terminator]>,
def GPU_YieldOp : GPU_Op<"yield", [Terminator]>,
Arguments<(ins Variadic<AnyType>:$values)> {
let summary = "GPU yield operation";
let description = [{
@ -74,20 +74,20 @@ def gpu_Yield : GPU_Op<"yield", [Terminator]>,
}
// These mirror the XLA ComparisonDirection enum.
def gpu_AllReduceOpAdd : StrEnumAttrCase<"add">;
def gpu_AllReduceOpMul : StrEnumAttrCase<"mul">;
def GPU_AllReduceOpAdd : StrEnumAttrCase<"add">;
def GPU_AllReduceOpMul : StrEnumAttrCase<"mul">;
def gpu_AllReduceOperationAttr : StrEnumAttr<"AllReduceOperationAttr",
def GPU_AllReduceOperationAttr : StrEnumAttr<"AllReduceOperationAttr",
"built-in reduction operations supported by gpu.allreduce.",
[
gpu_AllReduceOpAdd,
gpu_AllReduceOpMul,
GPU_AllReduceOpAdd,
GPU_AllReduceOpMul,
]>;
def gpu_AllReduce : GPU_Op<"all_reduce",
def GPU_AllReduceOp : GPU_Op<"all_reduce",
[SameOperandsAndResultType, IsolatedFromAbove]>,
Arguments<(ins AnyType:$value,
OptionalAttr<gpu_AllReduceOperationAttr>:$op)>,
OptionalAttr<GPU_AllReduceOperationAttr>:$op)>,
Results<(outs AnyType)> {
let summary = "Reduce values among workgroup.";
let description = [{
@ -115,7 +115,7 @@ def gpu_AllReduce : GPU_Op<"all_reduce",
let verifier = [{ return ::verifyAllReduce(*this); }];
}
def gpu_Barrier : GPU_Op<"barrier"> {
def GPU_BarrierOp : GPU_Op<"barrier"> {
let summary = "Synchronizes all work items of a workgroup.";
let description = [{
The "barrier" op synchronizes all work items of a workgroup. It is used

View File

@ -33,6 +33,6 @@ include "mlir/Dialect/GPU/GPUOps.td"
include "mlir/Dialect/LLVMIR/NVVMOps.td"
#endif // NVVMIR_OPS
def : Pat<(gpu_Barrier), (NVVM_Barrier0Op)>;
def : Pat<(GPU_BarrierOp), (NVVM_Barrier0Op)>;
#endif // MLIR_CONVERSION_GPUTONVVM_TD

View File

@ -41,7 +41,7 @@ struct GPUAllReduceOpLowering : public LLVMOpLowering {
Location, Value *, Value *, ConversionPatternRewriter &)>;
explicit GPUAllReduceOpLowering(LLVMTypeConverter &lowering_)
: LLVMOpLowering(gpu::AllReduce::getOperationName(),
: LLVMOpLowering(gpu::AllReduceOp::getOperationName(),
lowering_.getDialect()->getContext(), lowering_),
int32Type(LLVM::LLVMType::getInt32Ty(lowering_.getDialect())) {}
@ -55,7 +55,8 @@ struct GPUAllReduceOpLowering : public LLVMOpLowering {
assert(op->getOperand(0)->getType().isIntOrFloat());
// Create the reduction using an accumulator factory.
AccumulatorFactory factory = getFactory(cast<gpu::AllReduce>(op), operand);
AccumulatorFactory factory =
getFactory(cast<gpu::AllReduceOp>(op), operand);
assert(factory && "failed to create accumulator factory");
Value *result = createBlockReduce(loc, operand, factory, rewriter);
@ -66,7 +67,7 @@ struct GPUAllReduceOpLowering : public LLVMOpLowering {
private:
/// Returns an accumulator factory using either the op attribute or the body
/// region.
AccumulatorFactory getFactory(gpu::AllReduce allReduce,
AccumulatorFactory getFactory(gpu::AllReduceOp allReduce,
Value *operand) const {
if (!allReduce.body().empty()) {
return getFactory(allReduce.body());
@ -103,7 +104,7 @@ private:
// Replace all gpu.yield ops with branch out of body.
for (; block != split; block = block->getNextNode()) {
Operation *terminator = block->getTerminator();
if (!llvm::isa<gpu::Yield>(terminator))
if (!llvm::isa<gpu::YieldOp>(terminator))
continue;
rewriter.setInsertionPointToEnd(block);
rewriter.replaceOpWithNewOp<LLVM::BrOp>(
@ -469,13 +470,13 @@ public:
populateStdToLLVMConversionPatterns(converter, patterns);
populateWithGenerated(&getContext(), &patterns);
patterns.insert<
GPUIndexIntrinsicOpLowering<gpu::ThreadId, NVVM::ThreadIdXOp,
GPUIndexIntrinsicOpLowering<gpu::ThreadIdOp, NVVM::ThreadIdXOp,
NVVM::ThreadIdYOp, NVVM::ThreadIdZOp>,
GPUIndexIntrinsicOpLowering<gpu::BlockDim, NVVM::BlockDimXOp,
GPUIndexIntrinsicOpLowering<gpu::BlockDimOp, NVVM::BlockDimXOp,
NVVM::BlockDimYOp, NVVM::BlockDimZOp>,
GPUIndexIntrinsicOpLowering<gpu::BlockId, NVVM::BlockIdXOp,
GPUIndexIntrinsicOpLowering<gpu::BlockIdOp, NVVM::BlockIdXOp,
NVVM::BlockIdYOp, NVVM::BlockIdZOp>,
GPUIndexIntrinsicOpLowering<gpu::GridDim, NVVM::GridDimXOp,
GPUIndexIntrinsicOpLowering<gpu::GridDimOp, NVVM::GridDimXOp,
NVVM::GridDimYOp, NVVM::GridDimZOp>,
GPUAllReduceOpLowering>(converter);
@ -484,7 +485,7 @@ public:
target.addLegalDialect<LLVM::LLVMDialect>();
target.addLegalDialect<NVVM::NVVMDialect>();
// TODO(csigg): Remove once we support replacing non-root ops.
target.addLegalOp<gpu::Yield>();
target.addLegalOp<gpu::YieldOp>();
if (failed(applyPartialConversion(m, target, patterns, &converter)))
signalPassFailure();
}

View File

@ -50,13 +50,13 @@ public:
LLVMTypeConverter converter(m.getContext());
populateStdToLLVMConversionPatterns(converter, patterns);
patterns.insert<
GPUIndexIntrinsicOpLowering<gpu::ThreadId, ROCDL::ThreadIdXOp,
GPUIndexIntrinsicOpLowering<gpu::ThreadIdOp, ROCDL::ThreadIdXOp,
ROCDL::ThreadIdYOp, ROCDL::ThreadIdZOp>,
GPUIndexIntrinsicOpLowering<gpu::BlockDim, ROCDL::BlockDimXOp,
GPUIndexIntrinsicOpLowering<gpu::BlockDimOp, ROCDL::BlockDimXOp,
ROCDL::BlockDimYOp, ROCDL::BlockDimZOp>,
GPUIndexIntrinsicOpLowering<gpu::BlockId, ROCDL::BlockIdXOp,
GPUIndexIntrinsicOpLowering<gpu::BlockIdOp, ROCDL::BlockIdXOp,
ROCDL::BlockIdYOp, ROCDL::BlockIdZOp>,
GPUIndexIntrinsicOpLowering<gpu::GridDim, ROCDL::GridDimXOp,
GPUIndexIntrinsicOpLowering<gpu::GridDimOp, ROCDL::GridDimXOp,
ROCDL::GridDimYOp, ROCDL::GridDimZOp>>(
converter);

View File

@ -149,11 +149,12 @@ void GPUToSPIRVPass::runOnModule() {
OwningRewritePatternList patterns;
patterns.insert<
KernelFnConversion,
LaunchConfigConversion<gpu::BlockDim, spirv::BuiltIn::WorkgroupSize>,
LaunchConfigConversion<gpu::BlockId, spirv::BuiltIn::WorkgroupId>,
LaunchConfigConversion<gpu::GridDim, spirv::BuiltIn::NumWorkgroups>,
LaunchConfigConversion<gpu::ThreadId, spirv::BuiltIn::LocalInvocationId>>(
context, typeConverter);
LaunchConfigConversion<gpu::BlockDimOp, spirv::BuiltIn::WorkgroupSize>,
LaunchConfigConversion<gpu::BlockIdOp, spirv::BuiltIn::WorkgroupId>,
LaunchConfigConversion<gpu::GridDimOp, spirv::BuiltIn::NumWorkgroups>,
LaunchConfigConversion<gpu::ThreadIdOp,
spirv::BuiltIn::LocalInvocationId>>(context,
typeConverter);
populateStandardToSPIRVPatterns(context, patterns);
ConversionTarget target(*context);

View File

@ -261,7 +261,7 @@ void LoopToGpuConverter::createLaunch(OpTy rootForOp, OpTy innermostForOp,
Location terminatorLoc = terminator.getLoc();
terminator.erase();
builder.setInsertionPointToEnd(innermostForOp.getBody());
builder.create<gpu::Return>(terminatorLoc);
builder.create<gpu::ReturnOp>(terminatorLoc);
launchOp.getBody().front().getOperations().splice(
launchOp.getBody().front().begin(),
innermostForOp.getBody()->getOperations());

View File

@ -137,7 +137,7 @@ template <typename T> static LogicalResult verifyIndexOp(T op) {
return success();
}
static LogicalResult verifyAllReduce(gpu::AllReduce allReduce) {
static LogicalResult verifyAllReduce(gpu::AllReduceOp allReduce) {
if (allReduce.body().empty() != allReduce.op().hasValue())
return allReduce.emitError(
"expected either an op attribute or a non-empty body");
@ -150,7 +150,7 @@ static LogicalResult verifyAllReduce(gpu::AllReduce allReduce) {
}
unsigned yieldCount = 0;
for (Block &block : allReduce.body()) {
if (auto yield = dyn_cast<gpu::Yield>(block.getTerminator())) {
if (auto yield = dyn_cast<gpu::YieldOp>(block.getTerminator())) {
if (yield.getNumOperands() != 1)
return allReduce.emitError("expected one gpu.yield operand");
if (yield.getOperand(0)->getType() != allReduce.getType())
@ -164,8 +164,13 @@ static LogicalResult verifyAllReduce(gpu::AllReduce allReduce) {
return success();
}
// Namespace avoids ambiguous ReturnOpOperandAdaptor.
namespace mlir {
namespace gpu {
#define GET_OP_CLASSES
#include "mlir/Dialect/GPU/GPUOps.cpp.inc"
} // namespace gpu
} // namespace mlir
//===----------------------------------------------------------------------===//
// LaunchOp
@ -263,7 +268,7 @@ LogicalResult LaunchOp::verify() {
continue;
if (block.back().getNumSuccessors() != 0)
continue;
if (!isa<gpu::Return>(&block.back())) {
if (!isa<gpu::ReturnOp>(&block.back())) {
return block.back()
.emitError("expected 'gpu.terminator' or a terminator with "
"successors")

View File

@ -43,10 +43,10 @@ static void createForAllDimensions(OpBuilder &builder, Location loc,
static void injectGpuIndexOperations(Location loc, FuncOp kernelFunc) {
OpBuilder OpBuilder(kernelFunc.getBody());
SmallVector<Value *, 12> indexOps;
createForAllDimensions<gpu::BlockId>(OpBuilder, loc, indexOps);
createForAllDimensions<gpu::ThreadId>(OpBuilder, loc, indexOps);
createForAllDimensions<gpu::GridDim>(OpBuilder, loc, indexOps);
createForAllDimensions<gpu::BlockDim>(OpBuilder, loc, indexOps);
createForAllDimensions<gpu::BlockIdOp>(OpBuilder, loc, indexOps);
createForAllDimensions<gpu::ThreadIdOp>(OpBuilder, loc, indexOps);
createForAllDimensions<gpu::GridDimOp>(OpBuilder, loc, indexOps);
createForAllDimensions<gpu::BlockDimOp>(OpBuilder, loc, indexOps);
// Replace the leading 12 function args with the respective thread/block index
// operations. Iterate backwards since args are erased and indices change.
for (int i = 11; i >= 0; --i) {
@ -107,7 +107,7 @@ static FuncOp outlineKernelFunc(gpu::LaunchOp launchOp) {
outlinedFunc.setAttr(gpu::GPUDialect::getKernelFuncAttrName(),
builder.getUnitAttr());
injectGpuIndexOperations(loc, outlinedFunc);
outlinedFunc.walk([](gpu::Return op) {
outlinedFunc.walk([](gpu::ReturnOp op) {
OpBuilder replacer(op);
replacer.create<ReturnOp>(op.getLoc());
op.erase();