[SE] Make Kernel movable

Summary:
Kernel is basically just a smart pointer to the underlying
implementation, so making it movable prevents having to store a
std::unique_ptr to it.

Reviewers: jlebar

Subscribers: jprice, parallel_libs-commits

Differential Revision: https://reviews.llvm.org/D24150

llvm-svn: 280437
This commit is contained in:
Jason Henline 2016-09-02 00:22:05 +00:00
parent 3bd6d7fb78
commit dc2dff6c68
3 changed files with 12 additions and 72 deletions

View File

@ -18,7 +18,6 @@
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <memory>
#include <vector>
#include "streamexecutor/StreamExecutor.h"
@ -111,7 +110,7 @@ int main() {
se::Device *Device = getOrDie(Platform->getDevice(0));
// Load the kernel onto the device.
std::unique_ptr<cg::SaxpyKernel> Kernel =
cg::SaxpyKernel Kernel =
getOrDie(Device->createKernel<cg::SaxpyKernel>(cg::SaxpyLoaderSpec));
// Allocate memory on the device.
@ -124,7 +123,7 @@ int main() {
se::Stream Stream = getOrDie(Device->createStream());
Stream.thenCopyH2D<float>(HostX, X)
.thenCopyH2D<float>(HostY, Y)
.thenLaunch(ArraySize, 1, *Kernel, A, X, Y)
.thenLaunch(ArraySize, 1, Kernel, A, X, Y)
.thenCopyD2H<float>(X, HostX);
// Wait for the stream to complete.
se::dieIfError(Stream.blockHostUntilDone());

View File

@ -32,22 +32,18 @@ public:
/// Creates a kernel object for this device.
///
/// If the return value is not an error, the returned pointer will never be
/// null.
///
/// See \ref CompilerGeneratedKernelExample "Kernel.h" for an example of how
/// this method is used.
template <typename KernelT>
Expected<std::unique_ptr<typename std::enable_if<
std::is_base_of<KernelBase, KernelT>::value, KernelT>::type>>
Expected<typename std::enable_if<std::is_base_of<KernelBase, KernelT>::value,
KernelT>::type>
createKernel(const MultiKernelLoaderSpec &Spec) {
Expected<std::unique_ptr<PlatformKernelHandle>> MaybeKernelHandle =
PDevice->createKernel(Spec);
if (!MaybeKernelHandle) {
return MaybeKernelHandle.takeError();
}
return llvm::make_unique<KernelT>(Spec.getKernelName(),
std::move(*MaybeKernelHandle));
return KernelT(Spec.getKernelName(), std::move(*MaybeKernelHandle));
}
/// Creates a stream object for this device.

View File

@ -11,68 +11,10 @@
/// Types to represent device kernels (code compiled to run on GPU or other
/// accelerator).
///
/// With the kernel parameter types recorded in the Kernel template parameters,
/// type-safe kernel launch functions can be written with signatures like the
/// following:
/// \code
/// template <typename... ParameterTs>
/// void Launch(
/// const Kernel<ParameterTs...> &Kernel, ParamterTs... Arguments);
/// \endcode
/// and the compiler will check that the user passes in arguments with types
/// matching the corresponding kernel parameters.
///
/// A problem is that a Kernel template specialization with the right parameter
/// types must be passed as the first argument to the Launch function, and it's
/// just as hard to get the types right in that template specialization as it is
/// to get them right for the kernel arguments.
///
/// With this problem in mind, it is not recommended for users to specialize the
/// Kernel template class themselves, but instead to let the compiler do it for
/// them. When the compiler encounters a device kernel function, it can create a
/// Kernel template specialization in the host code that has the right parameter
/// types for that kernel and which has a type name based on the name of the
/// kernel function.
///
/// \anchor CompilerGeneratedKernelExample
/// For example, if a CUDA device kernel function with the following signature
/// has been defined:
/// \code
/// void Saxpy(float A, float *X, float *Y);
/// \endcode
/// the compiler can insert the following declaration in the host code:
/// \code
/// namespace compiler_cuda_namespace {
/// namespace se = streamexecutor;
/// using SaxpyKernel =
/// se::Kernel<
/// float,
/// se::GlobalDeviceMemory<float>,
/// se::GlobalDeviceMemory<float>>;
/// } // namespace compiler_cuda_namespace
/// \endcode
/// and then the user can launch the kernel by calling the StreamExecutor launch
/// function as follows:
/// \code
/// namespace ccn = compiler_cuda_namespace;
/// using KernelPtr = std::unique_ptr<ccn::SaxpyKernel>;
/// // Assumes Device is a pointer to the Device on which to launch the
/// // kernel.
/// //
/// // See KernelSpec.h for details on how the compiler can create a
/// // MultiKernelLoaderSpec instance like SaxpyKernelLoaderSpec below.
/// Expected<KernelPtr> MaybeKernel =
/// Device->createKernel<ccn::SaxpyKernel>(ccn::SaxpyKernelLoaderSpec);
/// if (!MaybeKernel) { /* Handle error */ }
/// KernelPtr SaxpyKernel = std::move(*MaybeKernel);
/// Launch(*SaxpyKernel, A, X, Y);
/// \endcode
///
/// With the compiler's help in specializing Kernel for each device kernel
/// function (and generating a MultiKernelLoaderSpec instance for each kernel),
/// the user can safely launch the device kernel from the host and get an error
/// message at compile time if the argument types don't match the kernel
/// parameter types.
/// See the \ref index "main page" for an example of how a compiler-generated
/// specialization of the Kernel class template can be used along with the
/// streamexecutor::Stream::thenLaunch method to create a typesafe interface for
/// kernel launches.
///
//===----------------------------------------------------------------------===//
@ -112,6 +54,9 @@ public:
Kernel(llvm::StringRef Name, std::unique_ptr<PlatformKernelHandle> PHandle)
: KernelBase(Name), PHandle(std::move(PHandle)) {}
Kernel(Kernel &&Other) = default;
Kernel &operator=(Kernel &&Other) = default;
/// Gets the underlying platform-specific handle for this kernel.
PlatformKernelHandle *getPlatformHandle() const { return PHandle.get(); }