diff --git a/mlir/include/mlir/Conversion/AffineToGPU/AffineToGPUPass.h b/mlir/include/mlir/Conversion/AffineToGPU/AffineToGPUPass.h new file mode 100644 index 000000000000..1ed03da3d314 --- /dev/null +++ b/mlir/include/mlir/Conversion/AffineToGPU/AffineToGPUPass.h @@ -0,0 +1,35 @@ +//===- AffineToGPUPass.h - Pass converting loops to GPU kernels -*- C++ -*-===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= +#ifndef MLIR_CONVERSION_AFFINETOGPU_AFFINETOGPUPASS_H_ +#define MLIR_CONVERSION_AFFINETOGPU_AFFINETOGPUPASS_H_ + +namespace mlir { +class FunctionPassBase; + +/// Create a pass that converts loop nests into GPU kernels. It considers +/// top-level affine.for operations as roots of loop nests and converts them +/// to the gpu.launch operations if possible. +/// +/// No check on the size of the block or grid, or on the validity of +/// parallelization is performed, it is under the responsibility of the caller +/// to strip-mine the loops and to perform the dependence analysis before +/// calling the conversion. +FunctionPassBase *createSimpleAffineToGPUPass(unsigned numBlockDims, + unsigned numThreadDims); +} // namespace mlir + +#endif // MLIR_CONVERSION_AFFINETOGPU_AFFINETOGPUPASS_H_ diff --git a/mlir/lib/Conversion/AffineToGPU/AffineToGPUPass.cpp b/mlir/lib/Conversion/AffineToGPU/AffineToGPUPass.cpp index 22f254fad3c4..601340b114d3 100644 --- a/mlir/lib/Conversion/AffineToGPU/AffineToGPUPass.cpp +++ b/mlir/lib/Conversion/AffineToGPU/AffineToGPUPass.cpp @@ -15,6 +15,7 @@ // limitations under the License. // ============================================================================= +#include "mlir/Conversion/AffineToGPU/AffineToGPUPass.h" #include "mlir/AffineOps/AffineOps.h" #include "mlir/Conversion/AffineToGPU/AffineToGPU.h" #include "mlir/Pass/Pass.h" @@ -40,17 +41,33 @@ namespace { // GPU launch operations. Nested launches are not allowed, so this does not // walk the function recursively to avoid considering nested loops. struct AffineForGPUMapper : public FunctionPass { + AffineForGPUMapper(unsigned numBlockDims, unsigned numThreadDims) + : numBlockDims(numBlockDims), numThreadDims(numThreadDims) {} + void runOnFunction() override { for (Block &block : getFunction()) for (Operation &op : llvm::make_early_inc_range(block)) if (auto forOp = dyn_cast(&op)) - if (failed(convertAffineLoopNestToGPULaunch( - forOp, clNumBlockDims.getValue(), - clNumThreadDims.getValue()))) + if (failed(convertAffineLoopNestToGPULaunch(forOp, numBlockDims, + numThreadDims))) signalPassFailure(); } + + unsigned numBlockDims; + unsigned numThreadDims; +}; + +struct AffineForGPUMapperCLI : public AffineForGPUMapper { + AffineForGPUMapperCLI() + : AffineForGPUMapper(clNumBlockDims.getValue(), + clNumThreadDims.getValue()) {} }; } // namespace -static PassRegistration +FunctionPassBase *mlir::createSimpleAffineToGPUPass(unsigned numBlockDims, + unsigned numThreadDims) { + return new AffineForGPUMapper(numBlockDims, numThreadDims); +} + +static PassRegistration registration(PASS_NAME, "Convert top-level affine loops to GPU kernels");