[StreamExecutor] Clean up device copy comments

Summary:
Consolidate Executor::synchronousCopy* and Stream::thenCopy* methods into
Doxygen method groups and combine all their comments into one section.

Also a "doc" target to the build files to use Doxygen to build the
documentation.

Reviewers: jlebar

Subscribers: jprice, parallel_libs-commits

Differential Revision: https://reviews.llvm.org/D23845

llvm-svn: 279654
This commit is contained in:
Jason Henline 2016-08-24 18:56:26 +00:00
parent c4a621155b
commit 424fc7e611
4 changed files with 2361 additions and 159 deletions

View File

@ -1,6 +1,7 @@
cmake_minimum_required(VERSION 3.1)
option(STREAM_EXECUTOR_UNIT_TESTS "enable unit tests" ON)
option(STREAM_EXECUTOR_ENABLE_DOXYGEN "enable StreamExecutor doxygen" ON)
# First find includes relative to the streamexecutor top-level source path.
include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/include)
@ -61,3 +62,17 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unused-parameter")
add_subdirectory(lib)
if (STREAM_EXECUTOR_ENABLE_DOXYGEN)
find_package(Doxygen REQUIRED)
configure_file(Doxyfile.in ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY)
add_custom_target(
doc
${DOXYGEN_EXECUTABLE}
${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
WORKING_DIRECTORY
${CMAKE_CURRENT_BINARY_DIR}
COMMENT
"Generating API documentation with Doxygen"
VERBATIM)
endif(STREAM_EXECUTOR_ENABLE_DOXYGEN)

File diff suppressed because it is too large Load Diff

View File

@ -77,16 +77,34 @@ public:
return PExecutor->unregisterHostMemory(Memory);
}
/// Host-synchronously copies a slice of an array of elements of type T from
/// host to device memory.
/// \anchor ExecutorHostSyncCopyGroup
/// \name Host-synchronous device memory copying functions
///
/// Returns an error if ElementCount is too large for the source slice or the
/// destination.
/// These methods block the calling host thread while copying data to or from
/// device memory. On the device side, these methods do not block any ongoing
/// device calls.
///
/// The calling host thread is blocked until the copy completes. Can be used
/// with any host memory, the host memory does not have to be allocated with
/// allocateHostMemory or registered with registerHostMemory. Does not block
/// any ongoing device calls.
/// There are no restrictions on the host memory that is used as a source or
/// destination in these copy methods, so there is no need to allocate that
/// host memory using allocateHostMemory or register it with
/// registerHostMemory.
///
/// Each of these methods has a single template parameter, T, that specifies
/// the type of data being copied. The ElementCount arguments specify the
/// number of objects of type T to be copied.
///
/// For ease of use, each of the methods is overloaded to take either a
/// GlobalDeviceMemorySlice or a GlobalDeviceMemory argument in the device
/// memory argument slots, and the GlobalDeviceMemory arguments are just
/// converted to GlobalDeviceMemorySlice arguments internally by using
/// GlobalDeviceMemory::asSlice.
///
/// These methods perform bounds checking to make sure that the ElementCount
/// is not too large for the source or destination. For methods that do not
/// take an ElementCount argument, an error is returned if the source size
/// does not exactly match the destination size.
///@{
template <typename T>
Error synchronousCopyD2H(GlobalDeviceMemorySlice<T> Src,
llvm::MutableArrayRef<T> Dst, size_t ElementCount) {
@ -104,11 +122,6 @@ public:
ElementCount * sizeof(T));
}
/// Similar to synchronousCopyD2H(GlobalDeviceMemorySlice<T>,
/// llvm::MutableArrayRef<T>, size_t) but does not take an element count
/// argument because it copies the entire source array.
///
/// Returns an error if the Src and Dst sizes do not match.
template <typename T>
Error synchronousCopyD2H(GlobalDeviceMemorySlice<T> Src,
llvm::MutableArrayRef<T> Dst) {
@ -120,11 +133,6 @@ public:
return synchronousCopyD2H(Src, Dst, Src.getElementCount());
}
/// Similar to synchronousCopyD2H(GlobalDeviceMemorySlice<T>,
/// llvm::MutableArrayRef<T>, size_t) but copies to a pointer rather than an
/// llvm::MutableArrayRef.
///
/// Returns an error if ElementCount is too large for the source slice.
template <typename T>
Error synchronousCopyD2H(GlobalDeviceMemorySlice<T> Src, T *Dst,
size_t ElementCount) {
@ -132,42 +140,24 @@ public:
ElementCount);
}
/// Similar to synchronousCopyD2H(GlobalDeviceMemorySlice<T>,
/// llvm::MutableArrayRef<T>, size_t) but the source is a GlobalDeviceMemory
/// rather than a GlobalDeviceMemorySlice.
template <typename T>
Error synchronousCopyD2H(GlobalDeviceMemory<T> Src,
llvm::MutableArrayRef<T> Dst, size_t ElementCount) {
return synchronousCopyD2H(Src.asSlice(), Dst, ElementCount);
}
/// Similar to synchronousCopyD2H(GlobalDeviceMemorySlice<T>,
/// llvm::MutableArrayRef<T>) but the source is a GlobalDeviceMemory rather
/// than a GlobalDeviceMemorySlice.
template <typename T>
Error synchronousCopyD2H(GlobalDeviceMemory<T> Src,
llvm::MutableArrayRef<T> Dst) {
return synchronousCopyD2H(Src.asSlice(), Dst);
}
/// Similar to synchronousCopyD2H(GlobalDeviceMemorySlice<T>, T*, size_t) but
/// the source is a GlobalDeviceMemory rather than a GlobalDeviceMemorySlice.
template <typename T>
Error synchronousCopyD2H(GlobalDeviceMemory<T> Src, T *Dst,
size_t ElementCount) {
return synchronousCopyD2H(Src.asSlice(), Dst, ElementCount);
}
/// Host-synchronously copies a slice of an array of elements of type T from
/// device to host memory.
///
/// Returns an error if ElementCount is too large for the source or the
/// destination.
///
/// The calling host thread is blocked until the copy completes. Can be used
/// with any host memory, the host memory does not have to be allocated with
/// allocateHostMemory or registered with registerHostMemory. Does not block
/// any ongoing device calls.
template <typename T>
Error synchronousCopyH2D(llvm::ArrayRef<T> Src,
GlobalDeviceMemorySlice<T> Dst,
@ -186,11 +176,6 @@ public:
ElementCount * sizeof(T));
}
/// Similar to synchronousCopyH2D(llvm::ArrayRef<T>,
/// GlobalDeviceMemorySlice<T>, size_t) but does not take an element count
/// argument because it copies the entire source array.
///
/// Returns an error if the Src and Dst sizes do not match.
template <typename T>
Error synchronousCopyH2D(llvm::ArrayRef<T> Src,
GlobalDeviceMemorySlice<T> Dst) {
@ -203,11 +188,6 @@ public:
return synchronousCopyH2D(Src, Dst, Dst.getElementCount());
}
/// Similar to synchronousCopyH2D(llvm::ArrayRef<T>,
/// GlobalDeviceMemorySlice<T>, size_t) but copies from a pointer rather than
/// an llvm::ArrayRef.
///
/// Returns an error if ElementCount is too large for the destination.
template <typename T>
Error synchronousCopyH2D(T *Src, GlobalDeviceMemorySlice<T> Dst,
size_t ElementCount) {
@ -215,42 +195,23 @@ public:
ElementCount);
}
/// Similar to synchronousCopyH2D(llvm::ArrayRef<T>,
/// GlobalDeviceMemorySlice<T>, size_t) but the destination is a
/// GlobalDeviceMemory rather than a GlobalDeviceMemorySlice.
template <typename T>
Error synchronousCopyH2D(llvm::ArrayRef<T> Src, GlobalDeviceMemory<T> Dst,
size_t ElementCount) {
return synchronousCopyH2D(Src, Dst.asSlice(), ElementCount);
}
/// Similar to synchronousCopyH2D(llvm::ArrayRef<T>,
/// GlobalDeviceMemorySlice<T>) but the destination is a GlobalDeviceMemory
/// rather than a GlobalDeviceMemorySlice.
template <typename T>
Error synchronousCopyH2D(llvm::ArrayRef<T> Src, GlobalDeviceMemory<T> Dst) {
return synchronousCopyH2D(Src, Dst.asSlice());
}
/// Similar to synchronousCopyH2D(T*, GlobalDeviceMemorySlice<T>, size_t) but
/// the destination is a GlobalDeviceMemory rather than a
/// GlobalDeviceMemorySlice.
template <typename T>
Error synchronousCopyH2D(T *Src, GlobalDeviceMemory<T> Dst,
size_t ElementCount) {
return synchronousCopyH2D(Src, Dst.asSlice(), ElementCount);
}
/// Host-synchronously copies a slice of an array of elements of type T from
/// one location in device memory to another.
///
/// Returns an error if ElementCount is too large for the source slice or the
/// destination.
///
/// The calling host thread is blocked until the copy completes. Can be used
/// with any host memory, the host memory does not have to be allocated with
/// allocateHostMemory or registered with registerHostMemory. Does not block
/// any ongoing device calls.
template <typename T>
Error synchronousCopyD2D(GlobalDeviceMemorySlice<T> Src,
GlobalDeviceMemorySlice<T> Dst,
@ -271,11 +232,6 @@ public:
ElementCount * sizeof(T));
}
/// Similar to synchronousCopyD2D(GlobalDeviceMemorySlice<T>,
/// GlobalDeviceMemorySlice<T>, size_t) but does not take an element count
/// argument because it copies the entire source array.
///
/// Returns an error if the Src and Dst sizes do not match.
template <typename T>
Error synchronousCopyD2D(GlobalDeviceMemorySlice<T> Src,
GlobalDeviceMemorySlice<T> Dst) {
@ -288,9 +244,6 @@ public:
return synchronousCopyD2D(Src, Dst, Src.getElementCount());
}
/// Similar to synchronousCopyD2D(GlobalDeviceMemorySlice<T>,
/// GlobalDeviceMemorySlice<T>, size_t) but the source is a
/// GlobalDeviceMemory<T> rather than a GlobalDeviceMemorySlice<T>.
template <typename T>
Error synchronousCopyD2D(GlobalDeviceMemory<T> Src,
GlobalDeviceMemorySlice<T> Dst,
@ -298,51 +251,38 @@ public:
return synchronousCopyD2D(Src.asSlice(), Dst, ElementCount);
}
/// Similar to synchronousCopyD2D(GlobalDeviceMemorySlice<T>,
/// GlobalDeviceMemorySlice<T>) but the source is a GlobalDeviceMemory<T>
/// rather than a GlobalDeviceMemorySlice<T>.
template <typename T>
Error synchronousCopyD2D(GlobalDeviceMemory<T> Src,
GlobalDeviceMemorySlice<T> Dst) {
return synchronousCopyD2D(Src.asSlice(), Dst);
}
/// Similar to synchronousCopyD2D(GlobalDeviceMemorySlice<T>,
/// GlobalDeviceMemorySlice<T>, size_t) but the destination is a
/// GlobalDeviceMemory<T> rather than a GlobalDeviceMemorySlice<T>.
template <typename T>
Error synchronousCopyD2D(GlobalDeviceMemorySlice<T> Src,
GlobalDeviceMemory<T> Dst, size_t ElementCount) {
return synchronousCopyD2D(Src, Dst.asSlice(), ElementCount);
}
/// Similar to synchronousCopyD2D(GlobalDeviceMemorySlice<T>,
/// GlobalDeviceMemorySlice<T>) but the destination is a GlobalDeviceMemory<T>
/// rather than a GlobalDeviceMemorySlice<T>.
template <typename T>
Error synchronousCopyD2D(GlobalDeviceMemorySlice<T> Src,
GlobalDeviceMemory<T> Dst) {
return synchronousCopyD2D(Src, Dst.asSlice());
}
/// Similar to synchronousCopyD2D(GlobalDeviceMemorySlice<T>,
/// GlobalDeviceMemorySlice<T>, size_t) but the source and destination are
/// GlobalDeviceMemory<T> rather than a GlobalDeviceMemorySlice<T>.
template <typename T>
Error synchronousCopyD2D(GlobalDeviceMemory<T> Src, GlobalDeviceMemory<T> Dst,
size_t ElementCount) {
return synchronousCopyD2D(Src.asSlice(), Dst.asSlice(), ElementCount);
}
/// Similar to synchronousCopyD2D(GlobalDeviceMemorySlice<T>,
/// GlobalDeviceMemorySlice<T>) but the source and destination are
/// GlobalDeviceMemory<T> rather than a GlobalDeviceMemorySlice<T>.
template <typename T>
Error synchronousCopyD2D(GlobalDeviceMemory<T> Src,
GlobalDeviceMemory<T> Dst) {
return synchronousCopyD2D(Src.asSlice(), Dst.asSlice());
}
///@} End host-synchronous device memory copying functions
private:
PlatformExecutor *PExecutor;
};

View File

@ -99,15 +99,21 @@ public:
return *this;
}
/// Enqueues on this stream a command to copy a slice of an array of elements
/// of type T from device to host memory.
/// \name Device memory copying functions
///
/// Sets an error if ElementCount is too large for the source or the
/// destination.
/// These methods enqueue a device memory copy operation on the stream and
/// return without waiting for the operation to complete.
///
/// If the Src memory was not created by allocateHostMemory or registered with
/// registerHostMemory, then the copy operation may cause the host and device
/// to block until the copy operation is completed.
/// Any host memory used as a source or destination for one of these
/// operations must be allocated with Executor::allocateHostMemory or
/// registered with Executor::registerHostMemory. Otherwise, the enqueuing
/// operation may block until the copy operation is fully complete.
///
/// The arguments and bounds checking for these methods match the API of the
/// \ref ExecutorHostSyncCopyGroup
/// "host-synchronous device memory copying functions" of Executor.
///@{
template <typename T>
Stream &thenCopyD2H(GlobalDeviceMemorySlice<T> Src,
llvm::MutableArrayRef<T> Dst, size_t ElementCount) {
@ -125,11 +131,6 @@ public:
return *this;
}
/// Similar to thenCopyD2H(GlobalDeviceMemorySlice<T>,
/// llvm::MutableArrayRef<T>, size_t) but does not take an element count
/// argument because it copies the entire source array.
///
/// Sets an error if the Src and Dst sizes do not match.
template <typename T>
Stream &thenCopyD2H(GlobalDeviceMemorySlice<T> Src,
llvm::MutableArrayRef<T> Dst) {
@ -143,11 +144,6 @@ public:
return *this;
}
/// Similar to thenCopyD2H(GlobalDeviceMemorySlice<T>,
/// llvm::MutableArrayRef<T>, size_t) but copies to a pointer rather than an
/// llvm::MutableArrayRef.
///
/// Sets an error if ElementCount is too large for the source slice.
template <typename T>
Stream &thenCopyD2H(GlobalDeviceMemorySlice<T> Src, T *Dst,
size_t ElementCount) {
@ -155,9 +151,6 @@ public:
return *this;
}
/// Similar to thenCopyD2H(GlobalDeviceMemorySlice<T>,
/// llvm::MutableArrayRef<T>, size_t) but the source is a GlobalDeviceMemory
/// rather than a GlobalDeviceMemorySlice.
template <typename T>
Stream &thenCopyD2H(GlobalDeviceMemory<T> Src, llvm::MutableArrayRef<T> Dst,
size_t ElementCount) {
@ -165,26 +158,18 @@ public:
return *this;
}
/// Similar to thenCopyD2H(GlobalDeviceMemorySlice<T>,
/// llvm::MutableArrayRef<T>) but the source is a GlobalDeviceMemory rather
/// than a GlobalDeviceMemorySlice.
template <typename T>
Stream &thenCopyD2H(GlobalDeviceMemory<T> Src, llvm::MutableArrayRef<T> Dst) {
thenCopyD2H(Src.asSlice(), Dst);
return *this;
}
/// Similar to thenCopyD2H(GlobalDeviceMemorySlice<T>, T*, size_t) but the
/// source is a GlobalDeviceMemory rather than a GlobalDeviceMemorySlice.
template <typename T>
Stream &thenCopyD2H(GlobalDeviceMemory<T> Src, T *Dst, size_t ElementCount) {
thenCopyD2H(Src.asSlice(), Dst, ElementCount);
return *this;
}
/// Similar to thenCopyD2H(GlobalDeviceMemorySlice<T>,
/// llvm::MutableArrayRef<T>, size_t) but copies from host to device memory
/// rather than device to host memory.
template <typename T>
Stream &thenCopyH2D(llvm::ArrayRef<T> Src, GlobalDeviceMemorySlice<T> Dst,
size_t ElementCount) {
@ -203,11 +188,6 @@ public:
return *this;
}
/// Similar to thenCopyH2D(llvm::ArrayRef<T>, GlobalDeviceMemorySlice<T>,
/// size_t) but does not take an element count argument because it copies the
/// entire source array.
///
/// Sets an error if the Src and Dst sizes do not match.
template <typename T>
Stream &thenCopyH2D(llvm::ArrayRef<T> Src, GlobalDeviceMemorySlice<T> Dst) {
if (Src.size() != Dst.getElementCount())
@ -220,10 +200,6 @@ public:
return *this;
}
/// Similar to thenCopyH2D(llvm::ArrayRef<T>, GlobalDeviceMemorySlice<T>,
/// size_t) but copies from a pointer rather than an llvm::ArrayRef.
///
/// Sets an error if ElementCount is too large for the destination.
template <typename T>
Stream &thenCopyH2D(T *Src, GlobalDeviceMemorySlice<T> Dst,
size_t ElementCount) {
@ -231,9 +207,6 @@ public:
return *this;
}
/// Similar to thenCopyH2D(llvm::ArrayRef<T>, GlobalDeviceMemorySlice<T>,
/// size_t) but the destination is a GlobalDeviceMemory rather than a
/// GlobalDeviceMemorySlice.
template <typename T>
Stream &thenCopyH2D(llvm::ArrayRef<T> Src, GlobalDeviceMemory<T> Dst,
size_t ElementCount) {
@ -241,26 +214,18 @@ public:
return *this;
}
/// Similar to thenCopyH2D(llvm::ArrayRef<T>, GlobalDeviceMemorySlice<T>) but
/// the destination is a GlobalDeviceMemory rather than a
/// GlobalDeviceMemorySlice.
template <typename T>
Stream &thenCopyH2D(llvm::ArrayRef<T> Src, GlobalDeviceMemory<T> Dst) {
thenCopyH2D(Src, Dst.asSlice());
return *this;
}
/// Similar to thenCopyH2D(T*, GlobalDeviceMemorySlice<T>, size_t) but the
/// destination is a GlobalDeviceMemory rather than a GlobalDeviceMemorySlice.
template <typename T>
Stream &thenCopyH2D(T *Src, GlobalDeviceMemory<T> Dst, size_t ElementCount) {
thenCopyH2D(Src, Dst.asSlice(), ElementCount);
return *this;
}
/// Similar to thenCopyD2H(GlobalDeviceMemorySlice<T>,
/// llvm::MutableArrayRef<T>, size_t) but copies from one location in device
/// memory to another rather than from device to host memory.
template <typename T>
Stream &thenCopyD2D(GlobalDeviceMemorySlice<T> Src,
GlobalDeviceMemorySlice<T> Dst, size_t ElementCount) {
@ -280,11 +245,6 @@ public:
return *this;
}
/// Similar to thenCopyD2D(GlobalDeviceMemorySlice<T>,
/// GlobalDeviceMemorySlice<T>, size_t) but does not take an element count
/// argument because it copies the entire source array.
///
/// Sets an error if the Src and Dst sizes do not match.
template <typename T>
Stream &thenCopyD2D(GlobalDeviceMemorySlice<T> Src,
GlobalDeviceMemorySlice<T> Dst) {
@ -298,9 +258,6 @@ public:
return *this;
}
/// Similar to thenCopyD2D(GlobalDeviceMemorySlice<T>,
/// GlobalDeviceMemorySlice<T>, size_t) but the source is a
/// GlobalDeviceMemory<T> rather than a GlobalDeviceMemorySlice<T>.
template <typename T>
Stream &thenCopyD2D(GlobalDeviceMemory<T> Src, GlobalDeviceMemorySlice<T> Dst,
size_t ElementCount) {
@ -308,9 +265,6 @@ public:
return *this;
}
/// Similar to thenCopyD2D(GlobalDeviceMemorySlice<T>,
/// GlobalDeviceMemorySlice<T>) but the source is a GlobalDeviceMemory<T>
/// rather than a GlobalDeviceMemorySlice<T>.
template <typename T>
Stream &thenCopyD2D(GlobalDeviceMemory<T> Src,
GlobalDeviceMemorySlice<T> Dst) {
@ -318,9 +272,6 @@ public:
return *this;
}
/// Similar to thenCopyD2D(GlobalDeviceMemorySlice<T>,
/// GlobalDeviceMemorySlice<T>, size_t) but the destination is a
/// GlobalDeviceMemory<T> rather than a GlobalDeviceMemorySlice<T>.
template <typename T>
Stream &thenCopyD2D(GlobalDeviceMemorySlice<T> Src, GlobalDeviceMemory<T> Dst,
size_t ElementCount) {
@ -328,9 +279,6 @@ public:
return *this;
}
/// Similar to thenCopyD2D(GlobalDeviceMemorySlice<T>,
/// GlobalDeviceMemorySlice<T>) but the destination is a GlobalDeviceMemory<T>
/// rather than a GlobalDeviceMemorySlice<T>.
template <typename T>
Stream &thenCopyD2D(GlobalDeviceMemorySlice<T> Src,
GlobalDeviceMemory<T> Dst) {
@ -338,9 +286,6 @@ public:
return *this;
}
/// Similar to thenCopyD2D(GlobalDeviceMemorySlice<T>,
/// GlobalDeviceMemorySlice<T>, size_t) but the source and destination are
/// GlobalDeviceMemory<T> rather than a GlobalDeviceMemorySlice<T>.
template <typename T>
Stream &thenCopyD2D(GlobalDeviceMemory<T> Src, GlobalDeviceMemory<T> Dst,
size_t ElementCount) {
@ -348,15 +293,14 @@ public:
return *this;
}
/// Similar to thenCopyD2D(GlobalDeviceMemorySlice<T>,
/// GlobalDeviceMemorySlice<T>) but the source and destination are
/// GlobalDeviceMemory<T> rather than a GlobalDeviceMemorySlice<T>.
template <typename T>
Stream &thenCopyD2D(GlobalDeviceMemory<T> Src, GlobalDeviceMemory<T> Dst) {
thenCopyD2D(Src.asSlice(), Dst.asSlice());
return *this;
}
///@} End device memory copying functions
private:
/// Sets the error state from an Error object.
///