forked from OSchip/llvm-project
Implement async_work_group_strided_copy builtin v2
This is a simple implementation which just copies data synchronously. v2: - Use size_t. llvm-svn: 219007
This commit is contained in:
parent
b5064f79ef
commit
ed5bbfdb1b
|
@ -0,0 +1,15 @@
|
|||
#define __CLC_DST_ADDR_SPACE local
|
||||
#define __CLC_SRC_ADDR_SPACE global
|
||||
#define __CLC_BODY <clc/async/async_work_group_strided_copy.inc>
|
||||
#include <clc/async/gentype.inc>
|
||||
#undef __CLC_DST_ADDR_SPACE
|
||||
#undef __CLC_SRC_ADDR_SPACE
|
||||
#undef __CLC_BODY
|
||||
|
||||
#define __CLC_DST_ADDR_SPACE global
|
||||
#define __CLC_SRC_ADDR_SPACE local
|
||||
#define __CLC_BODY <clc/async/async_work_group_strided_copy.inc>
|
||||
#include <clc/async/gentype.inc>
|
||||
#undef __CLC_DST_ADDR_SPACE
|
||||
#undef __CLC_SRC_ADDR_SPACE
|
||||
#undef __CLC_BODY
|
|
@ -0,0 +1,6 @@
|
|||
_CLC_OVERLOAD _CLC_DECL event_t async_work_group_strided_copy(
|
||||
__CLC_DST_ADDR_SPACE __CLC_GENTYPE *dst,
|
||||
const __CLC_SRC_ADDR_SPACE __CLC_GENTYPE *src,
|
||||
size_t num_gentypes,
|
||||
size_t stride,
|
||||
event_t event);
|
|
@ -137,6 +137,7 @@
|
|||
#include <clc/synchronization/barrier.h>
|
||||
|
||||
/* 6.11.10 Async Copy and Prefetch Functions */
|
||||
#include <clc/async/async_work_group_strided_copy.h>
|
||||
#include <clc/async/prefetch.h>
|
||||
#include <clc/async/wait_group_events.h>
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
async/async_work_group_strided_copy.cl
|
||||
async/prefetch.cl
|
||||
async/wait_group_events.cl
|
||||
atomic/atomic_xchg.cl
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
#include <clc/clc.h>
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
#endif
|
||||
|
||||
#define __CLC_BODY <async_work_group_strided_copy.inc>
|
||||
#include <clc/async/gentype.inc>
|
||||
#undef __CLC_BODY
|
|
@ -0,0 +1,34 @@
|
|||
|
||||
#define STRIDED_COPY(dst, src, num_gentypes, dst_stride, src_stride) \
|
||||
size_t size = get_local_size(0) * get_local_size(1) * get_local_size(2); \
|
||||
size_t id = (get_local_size(1) * get_local_size(2) * get_local_id(0)) + \
|
||||
(get_local_size(2) * get_local_id(1)) + \
|
||||
get_local_id(2); \
|
||||
size_t i; \
|
||||
\
|
||||
for (i = id; i < num_gentypes; i += size) { \
|
||||
dst[i * dst_stride] = src[i * src_stride]; \
|
||||
}
|
||||
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF event_t async_work_group_strided_copy(
|
||||
local __CLC_GENTYPE *dst,
|
||||
const global __CLC_GENTYPE *src,
|
||||
size_t num_gentypes,
|
||||
size_t src_stride,
|
||||
event_t event) {
|
||||
|
||||
STRIDED_COPY(dst, src, num_gentypes, 1, src_stride);
|
||||
return event;
|
||||
}
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF event_t async_work_group_strided_copy(
|
||||
global __CLC_GENTYPE *dst,
|
||||
const local __CLC_GENTYPE *src,
|
||||
size_t num_gentypes,
|
||||
size_t dst_stride,
|
||||
event_t event) {
|
||||
|
||||
STRIDED_COPY(dst, src, num_gentypes, dst_stride, 1);
|
||||
return event;
|
||||
}
|
Loading…
Reference in New Issue