diff --git a/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h b/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h
index 07d92e82ab5a..9ae3683298f6 100644
--- a/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h
+++ b/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h
@@ -60,13 +60,5 @@ ParallelLoopDimMapping getParallelLoopDimMappingAttr(Processor processor,
 LogicalResult setMappingAttr(scf::ParallelOp ploopOp,
                              ArrayRef<ParallelLoopDimMapping> mapping);
 } // namespace gpu
-
-/// Maps the parallel loops found in the given function to workgroups. The first
-/// loop encountered will be mapped to the global workgroup and the second loop
-/// encountered to the local workgroup. Within each mapping, the first three
-/// dimensions are mapped to x/y/z hardware ids and all following dimensions are
-/// mapped to sequential loops.
-void greedilyMapParallelSCFToGPU(Region &region);
-
 } // namespace mlir
 #endif // MLIR_DIALECT_GPU_PARALLELLOOPMAPPER_H
diff --git a/mlir/include/mlir/Dialect/GPU/Passes.h b/mlir/include/mlir/Dialect/GPU/Passes.h
index b9b127c6f5db..53f3f84efbaa 100644
--- a/mlir/include/mlir/Dialect/GPU/Passes.h
+++ b/mlir/include/mlir/Dialect/GPU/Passes.h
@@ -39,6 +39,13 @@ createGpuKernelOutliningPass(StringRef dataLayoutStr = StringRef());
 /// Rewrites a function region so that GPU ops execute asynchronously.
 std::unique_ptr<OperationPass<func::FuncOp>> createGpuAsyncRegionPass();
 
+/// Maps the parallel loops found in the given function to workgroups. The first
+/// loop encountered will be mapped to the global workgroup and the second loop
+/// encountered to the local workgroup. Within each mapping, the first three
+/// dimensions are mapped to x/y/z hardware ids and all following dimensions are
+/// mapped to sequential loops.
+std::unique_ptr<OperationPass<func::FuncOp>> createGpuMapParallelLoopsPass();
+
 /// Collect a set of patterns to rewrite all-reduce ops within the GPU dialect.
 void populateGpuAllReducePatterns(RewritePatternSet &patterns);
 
diff --git a/mlir/include/mlir/Dialect/GPU/Passes.td b/mlir/include/mlir/Dialect/GPU/Passes.td
index 0752839fe515..f5786e877713 100644
--- a/mlir/include/mlir/Dialect/GPU/Passes.td
+++ b/mlir/include/mlir/Dialect/GPU/Passes.td
@@ -29,4 +29,11 @@ def GpuAsyncRegionPass : Pass<"gpu-async-region", "func::FuncOp"> {
   let dependentDialects = ["async::AsyncDialect"];
 }
 
+def GpuMapParallelLoopsPass
+    : Pass<"gpu-map-parallel-loops", "mlir::func::FuncOp"> {
+  let summary = "Greedily maps loops to GPU hardware dimensions.";
+  let constructor = "mlir::createGpuMapParallelLoopsPass()";
+  let description = "Greedily maps loops to GPU hardware dimensions.";
+}
+
 #endif // MLIR_DIALECT_GPU_PASSES
diff --git a/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp b/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp
index b032169188bc..c7a1ef3994f5 100644
--- a/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp
@@ -13,26 +13,25 @@
 
 #include "mlir/Dialect/GPU/ParallelLoopMapper.h"
 
+#include "PassDetail.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/GPU/Passes.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/Pass/Pass.h"
 
-using namespace mlir;
-using namespace mlir::gpu;
-using namespace mlir::scf;
-
 #include "mlir/Dialect/GPU/ParallelLoopMapperAttr.cpp.inc"
 #include "mlir/Dialect/GPU/ParallelLoopMapperEnums.cpp.inc"
+
 namespace mlir {
-namespace gpu {
 
-StringRef getMappingAttrName() { return "mapping"; }
+using scf::ParallelOp;
 
-ParallelLoopDimMapping getParallelLoopDimMappingAttr(Processor processor,
-                                                     AffineMap map,
-                                                     AffineMap bound) {
+StringRef gpu::getMappingAttrName() { return "mapping"; }
+
+gpu::ParallelLoopDimMapping
+gpu::getParallelLoopDimMappingAttr(Processor processor, AffineMap map,
+                                   AffineMap bound) {
   MLIRContext *context = map.getContext();
   OpBuilder builder(context);
   return ParallelLoopDimMapping::get(
@@ -40,8 +39,8 @@ ParallelLoopDimMapping getParallelLoopDimMappingAttr(Processor processor,
       AffineMapAttr::get(map), AffineMapAttr::get(bound), context);
 }
 
-LogicalResult setMappingAttr(scf::ParallelOp ploopOp,
-                             ArrayRef<ParallelLoopDimMapping> mapping) {
+LogicalResult gpu::setMappingAttr(ParallelOp ploopOp,
+                                  ArrayRef<ParallelLoopDimMapping> mapping) {
   // Verify that each processor is mapped to only once.
   llvm::DenseSet<gpu::Processor> specifiedMappings;
   for (auto dimAttr : mapping) {
@@ -56,20 +55,17 @@ LogicalResult setMappingAttr(scf::ParallelOp ploopOp,
                    ArrayAttr::get(ploopOp.getContext(), mappingAsAttrs));
   return success();
 }
-} // namespace gpu
-} // namespace mlir
 
+namespace gpu {
 namespace {
-
 enum MappingLevel { MapGrid = 0, MapBlock = 1, Sequential = 2 };
+} // namespace
 
 static constexpr int kNumHardwareIds = 3;
 
-} // namespace
-
 /// Bounded increment on MappingLevel. Increments to the next
 /// level unless Sequential was already reached.
-MappingLevel &operator++(MappingLevel &mappingLevel) {
+static MappingLevel &operator++(MappingLevel &mappingLevel) {
   if (mappingLevel < Sequential) {
     mappingLevel = static_cast<MappingLevel>(mappingLevel + 1);
   }
@@ -82,8 +78,7 @@ MappingLevel &operator++(MappingLevel &mappingLevel) {
 /// TODO: Make this use x for the inner-most loop that is
 /// distributed to map to x, the next innermost to y and the next innermost to
 /// z.
-static gpu::Processor getHardwareIdForMapping(MappingLevel level,
-                                              int dimension) {
+static Processor getHardwareIdForMapping(MappingLevel level, int dimension) {
 
   if (dimension >= kNumHardwareIds || level == Sequential)
     return Processor::Sequential;
@@ -145,6 +140,21 @@ static void mapParallelOp(ParallelOp parallelOp,
   }
 }
 
-void mlir::greedilyMapParallelSCFToGPU(Region &region) {
-  region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); });
+namespace {
+struct GpuMapParallelLoopsPass
+    : public GpuMapParallelLoopsPassBase<GpuMapParallelLoopsPass> {
+  void runOnOperation() override {
+    for (Region &region : getOperation()->getRegions()) {
+      region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); });
+    }
+  }
+};
+
+} // namespace
+} // namespace gpu
+} // namespace mlir
+
+std::unique_ptr<mlir::OperationPass<mlir::func::FuncOp>>
+mlir::createGpuMapParallelLoopsPass() {
+  return std::make_unique<gpu::GpuMapParallelLoopsPass>();
 }
diff --git a/mlir/test/Dialect/GPU/mapping.mlir b/mlir/test/Dialect/GPU/mapping.mlir
index ff5b07f1844c..8c233648bca9 100644
--- a/mlir/test/Dialect/GPU/mapping.mlir
+++ b/mlir/test/Dialect/GPU/mapping.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -test-gpu-greedy-parallel-loop-mapping -split-input-file %s | FileCheck %s
+// RUN: mlir-opt -gpu-map-parallel-loops -split-input-file %s | FileCheck %s
 
 func.func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index,
                     %arg3 : index) {
diff --git a/mlir/test/lib/Dialect/GPU/CMakeLists.txt b/mlir/test/lib/Dialect/GPU/CMakeLists.txt
index 97fc6699f6fb..65f4780ea82e 100644
--- a/mlir/test/lib/Dialect/GPU/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/GPU/CMakeLists.txt
@@ -3,7 +3,6 @@ add_mlir_library(MLIRGPUTestPasses
   TestConvertGPUKernelToCubin.cpp
   TestConvertGPUKernelToHsaco.cpp
   TestGpuMemoryPromotion.cpp
-  TestGpuParallelLoopMapping.cpp
   TestGpuRewrite.cpp
 
   EXCLUDE_FROM_LIBMLIR
diff --git a/mlir/test/lib/Dialect/GPU/TestGpuParallelLoopMapping.cpp b/mlir/test/lib/Dialect/GPU/TestGpuParallelLoopMapping.cpp
deleted file mode 100644
index f53abba6a21b..000000000000
--- a/mlir/test/lib/Dialect/GPU/TestGpuParallelLoopMapping.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-//===- TestGPUParallelLoopMapping.cpp - Test pass for GPU loop mapping ----===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the pass testing the utilities for mapping parallel
-// loops to gpu hardware ids.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/GPU/ParallelLoopMapper.h"
-#include "mlir/Pass/Pass.h"
-
-using namespace mlir;
-
-namespace {
-/// Simple pass for testing the mapping of parallel loops to hardware ids using
-/// a greedy mapping strategy.
-struct TestGpuGreedyParallelLoopMappingPass
-    : public PassWrapper<TestGpuGreedyParallelLoopMappingPass,
-                         OperationPass<>> {
-  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(
-      TestGpuGreedyParallelLoopMappingPass)
-
-  StringRef getArgument() const final {
-    return "test-gpu-greedy-parallel-loop-mapping";
-  }
-  StringRef getDescription() const final {
-    return "Greedily maps all parallel loops to gpu hardware ids.";
-  }
-  void runOnOperation() override {
-    for (Region &region : getOperation()->getRegions())
-      greedilyMapParallelSCFToGPU(region);
-  }
-};
-} // namespace
-
-namespace mlir {
-namespace test {
-void registerTestGpuParallelLoopMappingPass() {
-  PassRegistration<TestGpuGreedyParallelLoopMappingPass>();
-}
-} // namespace test
-} // namespace mlir
diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp
index 78814df031e6..dcd8946d9c40 100644
--- a/mlir/tools/mlir-opt/mlir-opt.cpp
+++ b/mlir/tools/mlir-opt/mlir-opt.cpp
@@ -79,7 +79,6 @@ void registerTestDynamicPipelinePass();
 void registerTestExpandTanhPass();
 void registerTestComposeSubView();
 void registerTestMultiBuffering();
-void registerTestGpuParallelLoopMappingPass();
 void registerTestIRVisitorsPass();
 void registerTestGenericIRVisitorsPass();
 void registerTestGenericIRVisitorsInterruptPass();
@@ -176,7 +175,6 @@ void registerTestPasses() {
   mlir::test::registerTestExpandTanhPass();
   mlir::test::registerTestComposeSubView();
   mlir::test::registerTestMultiBuffering();
-  mlir::test::registerTestGpuParallelLoopMappingPass();
   mlir::test::registerTestIRVisitorsPass();
   mlir::test::registerTestGenericIRVisitorsPass();
   mlir::test::registerTestInterfaces();