[SE] Make Kernel movable

Summary: Kernel is basically just a smart pointer to the underlying implementation, so making it movable prevents having to store a std::unique_ptr to it. Reviewers: jlebar Subscribers: jprice, parallel_libs-commits Differential Revision: https://reviews.llvm.org/D24150 llvm-svn: 280437
2016-09-02 00:22:05 +00:00 · 2016-09-02 00:22:05 +00:00 · dc2dff6c68
parent 3bd6d7fb78
commit dc2dff6c68
3 changed files with 12 additions and 72 deletions
--- a/parallel-libs/streamexecutor/examples/Example.cpp
+++ b/parallel-libs/streamexecutor/examples/Example.cpp
@ -18,7 +18,6 @@
 #include <cassert>
 #include <cstdio>
 #include <cstdlib>
-#include <memory>
 #include <vector>

 #include "streamexecutor/StreamExecutor.h"
@ -111,7 +110,7 @@ int main() {
  se::Device *Device = getOrDie(Platform->getDevice(0));

  // Load the kernel onto the device.
-  std::unique_ptr<cg::SaxpyKernel> Kernel =
+  cg::SaxpyKernel Kernel =
      getOrDie(Device->createKernel<cg::SaxpyKernel>(cg::SaxpyLoaderSpec));

  // Allocate memory on the device.
@ -124,7 +123,7 @@ int main() {
  se::Stream Stream = getOrDie(Device->createStream());
  Stream.thenCopyH2D<float>(HostX, X)
      .thenCopyH2D<float>(HostY, Y)
-      .thenLaunch(ArraySize, 1, *Kernel, A, X, Y)
+      .thenLaunch(ArraySize, 1, Kernel, A, X, Y)
      .thenCopyD2H<float>(X, HostX);
  // Wait for the stream to complete.
  se::dieIfError(Stream.blockHostUntilDone());
--- a/parallel-libs/streamexecutor/include/streamexecutor/Device.h
+++ b/parallel-libs/streamexecutor/include/streamexecutor/Device.h
@ -32,22 +32,18 @@ public:

  /// Creates a kernel object for this device.
  ///
-  /// If the return value is not an error, the returned pointer will never be
-  /// null.
-  ///
  /// See \ref CompilerGeneratedKernelExample "Kernel.h" for an example of how
  /// this method is used.
  template <typename KernelT>
-  Expected<std::unique_ptr<typename std::enable_if<
-      std::is_base_of<KernelBase, KernelT>::value, KernelT>::type>>
+  Expected<typename std::enable_if<std::is_base_of<KernelBase, KernelT>::value,
+                                   KernelT>::type>
  createKernel(const MultiKernelLoaderSpec &Spec) {
    Expected<std::unique_ptr<PlatformKernelHandle>> MaybeKernelHandle =
        PDevice->createKernel(Spec);
    if (!MaybeKernelHandle) {
      return MaybeKernelHandle.takeError();
    }
-    return llvm::make_unique<KernelT>(Spec.getKernelName(),
-                                      std::move(*MaybeKernelHandle));
+    return KernelT(Spec.getKernelName(), std::move(*MaybeKernelHandle));
  }

  /// Creates a stream object for this device.
--- a/parallel-libs/streamexecutor/include/streamexecutor/Kernel.h
+++ b/parallel-libs/streamexecutor/include/streamexecutor/Kernel.h
@ -11,68 +11,10 @@
 /// Types to represent device kernels (code compiled to run on GPU or other
 /// accelerator).
 ///
-/// With the kernel parameter types recorded in the Kernel template parameters,
-/// type-safe kernel launch functions can be written with signatures like the
-/// following:
-/// \code
-///     template <typename... ParameterTs>
-///     void Launch(
-///       const Kernel<ParameterTs...> &Kernel, ParamterTs... Arguments);
-/// \endcode
-/// and the compiler will check that the user passes in arguments with types
-/// matching the corresponding kernel parameters.
-///
-/// A problem is that a Kernel template specialization with the right parameter
-/// types must be passed as the first argument to the Launch function, and it's
-/// just as hard to get the types right in that template specialization as it is
-/// to get them right for the kernel arguments.
-///
-/// With this problem in mind, it is not recommended for users to specialize the
-/// Kernel template class themselves, but instead to let the compiler do it for
-/// them. When the compiler encounters a device kernel function, it can create a
-/// Kernel template specialization in the host code that has the right parameter
-/// types for that kernel and which has a type name based on the name of the
-/// kernel function.
-///
-/// \anchor CompilerGeneratedKernelExample
-/// For example, if a CUDA device kernel function with the following signature
-/// has been defined:
-/// \code
-///     void Saxpy(float A, float *X, float *Y);
-/// \endcode
-/// the compiler can insert the following declaration in the host code:
-/// \code
-///     namespace compiler_cuda_namespace {
-///     namespace se = streamexecutor;
-///     using SaxpyKernel =
-///         se::Kernel<
-///             float,
-///             se::GlobalDeviceMemory<float>,
-///             se::GlobalDeviceMemory<float>>;
-///     } // namespace compiler_cuda_namespace
-/// \endcode
-/// and then the user can launch the kernel by calling the StreamExecutor launch
-/// function as follows:
-/// \code
-///     namespace ccn = compiler_cuda_namespace;
-///     using KernelPtr = std::unique_ptr<ccn::SaxpyKernel>;
-///     // Assumes Device is a pointer to the Device on which to launch the
-///     // kernel.
-///     //
-///     // See KernelSpec.h for details on how the compiler can create a
-///     // MultiKernelLoaderSpec instance like SaxpyKernelLoaderSpec below.
-///     Expected<KernelPtr> MaybeKernel =
-///         Device->createKernel<ccn::SaxpyKernel>(ccn::SaxpyKernelLoaderSpec);
-///     if (!MaybeKernel) { /* Handle error */ }
-///     KernelPtr SaxpyKernel = std::move(*MaybeKernel);
-///     Launch(*SaxpyKernel, A, X, Y);
-/// \endcode
-///
-/// With the compiler's help in specializing Kernel for each device kernel
-/// function (and generating a MultiKernelLoaderSpec instance for each kernel),
-/// the user can safely launch the device kernel from the host and get an error
-/// message at compile time if the argument types don't match the kernel
-/// parameter types.
+/// See the \ref index "main page" for an example of how a compiler-generated
+/// specialization of the Kernel class template can be used along with the
+/// streamexecutor::Stream::thenLaunch method to create a typesafe interface for
+/// kernel launches.
 ///
 //===----------------------------------------------------------------------===//

@ -112,6 +54,9 @@ public:
  Kernel(llvm::StringRef Name, std::unique_ptr<PlatformKernelHandle> PHandle)
      : KernelBase(Name), PHandle(std::move(PHandle)) {}

+  Kernel(Kernel &&Other) = default;
+  Kernel &operator=(Kernel &&Other) = default;
+
  /// Gets the underlying platform-specific handle for this kernel.
  PlatformKernelHandle *getPlatformHandle() const { return PHandle.get(); }