[MLIR][GPU] Expose GpuParallelLoopMapping as non-test pass.

Reviewed By: bondhugula, herhut Differential Revision: https://reviews.llvm.org/D126199
2022-05-30 08:32:01 +02:00 · 2022-05-30 08:32:01 +02:00 · bcf3d52486
parent a5ddd4a238
commit bcf3d52486
8 changed files with 46 additions and 80 deletions
--- a/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h
+++ b/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h
@ -60,13 +60,5 @@ ParallelLoopDimMapping getParallelLoopDimMappingAttr(Processor processor,
 LogicalResult setMappingAttr(scf::ParallelOp ploopOp,
                             ArrayRef<ParallelLoopDimMapping> mapping);
 } // namespace gpu
 /// Maps the parallel loops found in the given function to workgroups. The first
 /// loop encountered will be mapped to the global workgroup and the second loop
 /// encountered to the local workgroup. Within each mapping, the first three
 /// dimensions are mapped to x/y/z hardware ids and all following dimensions are
 /// mapped to sequential loops.
 void greedilyMapParallelSCFToGPU(Region &region);
 } // namespace mlir
 #endif // MLIR_DIALECT_GPU_PARALLELLOOPMAPPER_H
--- a/mlir/include/mlir/Dialect/GPU/Passes.h
+++ b/mlir/include/mlir/Dialect/GPU/Passes.h
@ -39,6 +39,13 @@ createGpuKernelOutliningPass(StringRef dataLayoutStr = StringRef());
 /// Rewrites a function region so that GPU ops execute asynchronously.
 std::unique_ptr<OperationPass<func::FuncOp>> createGpuAsyncRegionPass();
 /// Maps the parallel loops found in the given function to workgroups. The first
 /// loop encountered will be mapped to the global workgroup and the second loop
 /// encountered to the local workgroup. Within each mapping, the first three
 /// dimensions are mapped to x/y/z hardware ids and all following dimensions are
 /// mapped to sequential loops.
 std::unique_ptr<OperationPass<func::FuncOp>> createGpuMapParallelLoopsPass();
 /// Collect a set of patterns to rewrite all-reduce ops within the GPU dialect.
 void populateGpuAllReducePatterns(RewritePatternSet &patterns);
--- a/mlir/include/mlir/Dialect/GPU/Passes.td
+++ b/mlir/include/mlir/Dialect/GPU/Passes.td
@ -29,4 +29,11 @@ def GpuAsyncRegionPass : Pass<"gpu-async-region", "func::FuncOp"> {
  let dependentDialects = ["async::AsyncDialect"];
 }
 def GpuMapParallelLoopsPass
    : Pass<"gpu-map-parallel-loops", "mlir::func::FuncOp"> {
  let summary = "Greedily maps loops to GPU hardware dimensions.";
  let constructor = "mlir::createGpuMapParallelLoopsPass()";
  let description = "Greedily maps loops to GPU hardware dimensions.";
 }
 #endif // MLIR_DIALECT_GPU_PASSES
--- a/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp
@ -13,26 +13,25 @@
 #include "mlir/Dialect/GPU/ParallelLoopMapper.h"
 #include "PassDetail.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/GPU/Passes.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/Pass/Pass.h"
 using namespace mlir;
 using namespace mlir::gpu;
 using namespace mlir::scf;
 #include "mlir/Dialect/GPU/ParallelLoopMapperAttr.cpp.inc"
 #include "mlir/Dialect/GPU/ParallelLoopMapperEnums.cpp.inc"
 namespace mlir {
 namespace gpu {
-StringRef getMappingAttrName() { return "mapping"; }
+using scf::ParallelOp;
-ParallelLoopDimMapping getParallelLoopDimMappingAttr(Processor processor,
+StringRef gpu::getMappingAttrName() { return "mapping"; }
-                                                     AffineMap map,
+
-                                                     AffineMap bound) {
+gpu::ParallelLoopDimMapping
 gpu::getParallelLoopDimMappingAttr(Processor processor, AffineMap map,
                                   AffineMap bound) {
  MLIRContext *context = map.getContext();
  OpBuilder builder(context);
  return ParallelLoopDimMapping::get(
@ -40,8 +39,8 @@ ParallelLoopDimMapping getParallelLoopDimMappingAttr(Processor processor,
      AffineMapAttr::get(map), AffineMapAttr::get(bound), context);
 }
-LogicalResult setMappingAttr(scf::ParallelOp ploopOp,
+LogicalResult gpu::setMappingAttr(ParallelOp ploopOp,
-                             ArrayRef<ParallelLoopDimMapping> mapping) {
+                                  ArrayRef<ParallelLoopDimMapping> mapping) {
  // Verify that each processor is mapped to only once.
  llvm::DenseSet<gpu::Processor> specifiedMappings;
  for (auto dimAttr : mapping) {
@ -56,20 +55,17 @@ LogicalResult setMappingAttr(scf::ParallelOp ploopOp,
                   ArrayAttr::get(ploopOp.getContext(), mappingAsAttrs));
  return success();
 }
 } // namespace gpu
 } // namespace mlir
 namespace gpu {
 namespace {
 enum MappingLevel { MapGrid = 0, MapBlock = 1, Sequential = 2 };
 } // namespace
 static constexpr int kNumHardwareIds = 3;
 } // namespace
 /// Bounded increment on MappingLevel. Increments to the next
 /// level unless Sequential was already reached.
-MappingLevel &operator++(MappingLevel &mappingLevel) {
+static MappingLevel &operator++(MappingLevel &mappingLevel) {
  if (mappingLevel < Sequential) {
    mappingLevel = static_cast<MappingLevel>(mappingLevel + 1);
  }
@ -82,8 +78,7 @@ MappingLevel &operator++(MappingLevel &mappingLevel) {
 /// TODO: Make this use x for the inner-most loop that is
 /// distributed to map to x, the next innermost to y and the next innermost to
 /// z.
-static gpu::Processor getHardwareIdForMapping(MappingLevel level,
+static Processor getHardwareIdForMapping(MappingLevel level, int dimension) {
                                              int dimension) {
  if (dimension >= kNumHardwareIds || level == Sequential)
    return Processor::Sequential;
@ -145,6 +140,21 @@ static void mapParallelOp(ParallelOp parallelOp,
  }
 }
-void mlir::greedilyMapParallelSCFToGPU(Region &region) {
+namespace {
-  region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); });
+struct GpuMapParallelLoopsPass
    : public GpuMapParallelLoopsPassBase<GpuMapParallelLoopsPass> {
  void runOnOperation() override {
    for (Region &region : getOperation()->getRegions()) {
      region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); });
    }
  }
 };
 } // namespace
 } // namespace gpu
 } // namespace mlir
 std::unique_ptr<mlir::OperationPass<mlir::func::FuncOp>>
 mlir::createGpuMapParallelLoopsPass() {
  return std::make_unique<gpu::GpuMapParallelLoopsPass>();
 }
--- a/mlir/test/Dialect/GPU/mapping.mlir
+++ b/mlir/test/Dialect/GPU/mapping.mlir
@ -1,4 +1,4 @@
-// RUN: mlir-opt -test-gpu-greedy-parallel-loop-mapping -split-input-file %s | FileCheck %s
+// RUN: mlir-opt -gpu-map-parallel-loops -split-input-file %s | FileCheck %s
 func.func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index,
                    %arg3 : index) {
--- a/mlir/test/lib/Dialect/GPU/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/GPU/CMakeLists.txt
@ -3,7 +3,6 @@ add_mlir_library(MLIRGPUTestPasses
  TestConvertGPUKernelToCubin.cpp
  TestConvertGPUKernelToHsaco.cpp
  TestGpuMemoryPromotion.cpp
  TestGpuParallelLoopMapping.cpp
  TestGpuRewrite.cpp
  EXCLUDE_FROM_LIBMLIR
--- a/mlir/test/lib/Dialect/GPU/TestGpuParallelLoopMapping.cpp
+++ b/mlir/test/lib/Dialect/GPU/TestGpuParallelLoopMapping.cpp
@ -1,47 +0,0 @@
 //===- TestGPUParallelLoopMapping.cpp - Test pass for GPU loop mapping ----===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This file implements the pass testing the utilities for mapping parallel
 // loops to gpu hardware ids.
 //
 //===----------------------------------------------------------------------===//
 #include "mlir/Dialect/GPU/ParallelLoopMapper.h"
 #include "mlir/Pass/Pass.h"
 using namespace mlir;
 namespace {
 /// Simple pass for testing the mapping of parallel loops to hardware ids using
 /// a greedy mapping strategy.
 struct TestGpuGreedyParallelLoopMappingPass
    : public PassWrapper<TestGpuGreedyParallelLoopMappingPass,
                         OperationPass<>> {
  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(
      TestGpuGreedyParallelLoopMappingPass)
  StringRef getArgument() const final {
    return "test-gpu-greedy-parallel-loop-mapping";
  }
  StringRef getDescription() const final {
    return "Greedily maps all parallel loops to gpu hardware ids.";
  }
  void runOnOperation() override {
    for (Region &region : getOperation()->getRegions())
      greedilyMapParallelSCFToGPU(region);
  }
 };
 } // namespace
 namespace mlir {
 namespace test {
 void registerTestGpuParallelLoopMappingPass() {
  PassRegistration<TestGpuGreedyParallelLoopMappingPass>();
 }
 } // namespace test
 } // namespace mlir
--- a/mlir/tools/mlir-opt/mlir-opt.cpp
+++ b/mlir/tools/mlir-opt/mlir-opt.cpp
@ -79,7 +79,6 @@ void registerTestDynamicPipelinePass();
 void registerTestExpandTanhPass();
 void registerTestComposeSubView();
 void registerTestMultiBuffering();
 void registerTestGpuParallelLoopMappingPass();
 void registerTestIRVisitorsPass();
 void registerTestGenericIRVisitorsPass();
 void registerTestGenericIRVisitorsInterruptPass();
@ -176,7 +175,6 @@ void registerTestPasses() {
  mlir::test::registerTestExpandTanhPass();
  mlir::test::registerTestComposeSubView();
  mlir::test::registerTestMultiBuffering();
  mlir::test::registerTestGpuParallelLoopMappingPass();
  mlir::test::registerTestIRVisitorsPass();
  mlir::test::registerTestGenericIRVisitorsPass();
  mlir::test::registerTestInterfaces();