diff --git a/mlir/include/mlir/Conversion/AffineToGPU/AffineToGPUPass.h b/mlir/include/mlir/Conversion/AffineToGPU/AffineToGPUPass.h
new file mode 100644
index 000000000000..1ed03da3d314
--- /dev/null
+++ b/mlir/include/mlir/Conversion/AffineToGPU/AffineToGPUPass.h
@@ -0,0 +1,35 @@
+//===- AffineToGPUPass.h - Pass converting loops to GPU kernels -*- C++ -*-===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+#ifndef MLIR_CONVERSION_AFFINETOGPU_AFFINETOGPUPASS_H_
+#define MLIR_CONVERSION_AFFINETOGPU_AFFINETOGPUPASS_H_
+
+namespace mlir {
+class FunctionPassBase;
+
+/// Create a pass that converts loop nests into GPU kernels.  It considers
+/// top-level affine.for operations as roots of loop nests and converts them
+/// to the gpu.launch operations if possible.
+///
+/// No check on the size of the block or grid, or on the validity of
+/// parallelization is performed, it is under the responsibility of the caller
+/// to strip-mine the loops and to perform the dependence analysis before
+/// calling the conversion.
+FunctionPassBase *createSimpleAffineToGPUPass(unsigned numBlockDims,
+                                              unsigned numThreadDims);
+} // namespace mlir
+
+#endif // MLIR_CONVERSION_AFFINETOGPU_AFFINETOGPUPASS_H_
diff --git a/mlir/lib/Conversion/AffineToGPU/AffineToGPUPass.cpp b/mlir/lib/Conversion/AffineToGPU/AffineToGPUPass.cpp
index 22f254fad3c4..601340b114d3 100644
--- a/mlir/lib/Conversion/AffineToGPU/AffineToGPUPass.cpp
+++ b/mlir/lib/Conversion/AffineToGPU/AffineToGPUPass.cpp
@@ -15,6 +15,7 @@
 // limitations under the License.
 // =============================================================================
 
+#include "mlir/Conversion/AffineToGPU/AffineToGPUPass.h"
 #include "mlir/AffineOps/AffineOps.h"
 #include "mlir/Conversion/AffineToGPU/AffineToGPU.h"
 #include "mlir/Pass/Pass.h"
@@ -40,17 +41,33 @@ namespace {
 // GPU launch operations.  Nested launches are not allowed, so this does not
 // walk the function recursively to avoid considering nested loops.
 struct AffineForGPUMapper : public FunctionPass<AffineForGPUMapper> {
+  AffineForGPUMapper(unsigned numBlockDims, unsigned numThreadDims)
+      : numBlockDims(numBlockDims), numThreadDims(numThreadDims) {}
+
   void runOnFunction() override {
     for (Block &block : getFunction())
       for (Operation &op : llvm::make_early_inc_range(block))
         if (auto forOp = dyn_cast<AffineForOp>(&op))
-          if (failed(convertAffineLoopNestToGPULaunch(
-                  forOp, clNumBlockDims.getValue(),
-                  clNumThreadDims.getValue())))
+          if (failed(convertAffineLoopNestToGPULaunch(forOp, numBlockDims,
+                                                      numThreadDims)))
             signalPassFailure();
   }
+
+  unsigned numBlockDims;
+  unsigned numThreadDims;
+};
+
+struct AffineForGPUMapperCLI : public AffineForGPUMapper {
+  AffineForGPUMapperCLI()
+      : AffineForGPUMapper(clNumBlockDims.getValue(),
+                           clNumThreadDims.getValue()) {}
 };
 } // namespace
 
-static PassRegistration<AffineForGPUMapper>
+FunctionPassBase *mlir::createSimpleAffineToGPUPass(unsigned numBlockDims,
+                                                    unsigned numThreadDims) {
+  return new AffineForGPUMapper(numBlockDims, numThreadDims);
+}
+
+static PassRegistration<AffineForGPUMapperCLI>
     registration(PASS_NAME, "Convert top-level affine loops to GPU kernels");