forked from OSchip/llvm-project
Provide vstore_half helper to workaround clc restrictions
clang won't accept half precision loads and stores without cl_khr_fp16 since r281904 llvm-svn: 282106
This commit is contained in:
parent
9666ba7526
commit
0a5aac3fc4
|
@ -142,6 +142,7 @@ shared/max.cl
|
|||
shared/min.cl
|
||||
shared/vload.cl
|
||||
shared/vstore.cl
|
||||
shared/vstore_half_helpers.ll
|
||||
workitem/get_global_id.cl
|
||||
workitem/get_global_size.cl
|
||||
image/get_image_dim.cl
|
||||
|
|
|
@ -52,32 +52,45 @@ VSTORE_TYPES()
|
|||
#endif
|
||||
|
||||
/* vstore_half are legal even without cl_khr_fp16 */
|
||||
#define DECLARE_HELPER(STYPE, AS) void __clc_vstore_half_##STYPE##_helper##AS(STYPE, AS half *);
|
||||
|
||||
#define VEC_STORE1(val) mem[offset++] = val;
|
||||
#define VEC_STORE2(val) \
|
||||
VEC_STORE1(val.lo) \
|
||||
VEC_STORE1(val.hi)
|
||||
#define VEC_STORE3(val) \
|
||||
VEC_STORE1(val.s0) \
|
||||
VEC_STORE1(val.s1) \
|
||||
VEC_STORE1(val.s2)
|
||||
#define VEC_STORE4(val) \
|
||||
VEC_STORE2(val.lo) \
|
||||
VEC_STORE2(val.hi)
|
||||
#define VEC_STORE8(val) \
|
||||
VEC_STORE4(val.lo) \
|
||||
VEC_STORE4(val.hi)
|
||||
#define VEC_STORE16(val) \
|
||||
VEC_STORE8(val.lo) \
|
||||
VEC_STORE8(val.hi)
|
||||
DECLARE_HELPER(float, __private);
|
||||
DECLARE_HELPER(float, __global);
|
||||
DECLARE_HELPER(float, __local);
|
||||
|
||||
#define __FUNC(SUFFIX, VEC_SIZE, TYPE, AS) \
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
DECLARE_HELPER(double, __private);
|
||||
DECLARE_HELPER(double, __global);
|
||||
DECLARE_HELPER(double, __local);
|
||||
#endif
|
||||
|
||||
|
||||
#define VEC_STORE1(STYPE, AS, val) __clc_vstore_half_##STYPE##_helper##AS (val, &mem[offset++]);
|
||||
#define VEC_STORE2(STYPE, AS, val) \
|
||||
VEC_STORE1(STYPE, AS, val.lo) \
|
||||
VEC_STORE1(STYPE, AS, val.hi)
|
||||
#define VEC_STORE3(STYPE, AS, val) \
|
||||
VEC_STORE1(STYPE, AS, val.s0) \
|
||||
VEC_STORE1(STYPE, AS, val.s1) \
|
||||
VEC_STORE1(STYPE, AS, val.s2)
|
||||
#define VEC_STORE4(STYPE, AS, val) \
|
||||
VEC_STORE2(STYPE, AS, val.lo) \
|
||||
VEC_STORE2(STYPE, AS, val.hi)
|
||||
#define VEC_STORE8(STYPE, AS, val) \
|
||||
VEC_STORE4(STYPE, AS, val.lo) \
|
||||
VEC_STORE4(STYPE, AS, val.hi)
|
||||
#define VEC_STORE16(STYPE, AS, val) \
|
||||
VEC_STORE8(STYPE, AS, val.lo) \
|
||||
VEC_STORE8(STYPE, AS, val.hi)
|
||||
|
||||
#define __FUNC(SUFFIX, VEC_SIZE, TYPE, STYPE, AS) \
|
||||
_CLC_OVERLOAD _CLC_DEF void vstore_half##SUFFIX(TYPE vec, size_t offset, AS half *mem) { \
|
||||
offset *= VEC_SIZE; \
|
||||
VEC_STORE##VEC_SIZE(vec) \
|
||||
VEC_STORE##VEC_SIZE(STYPE, AS, vec) \
|
||||
}
|
||||
|
||||
#define FUNC(SUFFIX, VEC_SIZE, TYPE, AS) __FUNC(SUFFIX, VEC_SIZE, TYPE, AS)
|
||||
#define FUNC(SUFFIX, VEC_SIZE, TYPE, STYPE, AS) __FUNC(SUFFIX, VEC_SIZE, TYPE, STYPE, AS)
|
||||
|
||||
#define __CLC_BODY "vstore_half.inc"
|
||||
#include <clc/math/gentype.inc>
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
|
||||
#ifdef __CLC_VECSIZE
|
||||
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __private);
|
||||
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __local);
|
||||
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __global);
|
||||
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private);
|
||||
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local);
|
||||
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global);
|
||||
#else
|
||||
FUNC(, 1, __CLC_GENTYPE, __private);
|
||||
FUNC(, 1, __CLC_GENTYPE, __local);
|
||||
FUNC(, 1, __CLC_GENTYPE, __global);
|
||||
FUNC(, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private);
|
||||
FUNC(, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local);
|
||||
FUNC(, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global);
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
define void @__clc_vstore_half_float_helper__private(float %data, half addrspace(0)* nocapture %ptr) nounwind alwaysinline {
|
||||
%res = fptrunc float %data to half
|
||||
store half %res, half addrspace(0)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__clc_vstore_half_float_helper__global(float %data, half addrspace(1)* nocapture %ptr) nounwind alwaysinline {
|
||||
%res = fptrunc float %data to half
|
||||
store half %res, half addrspace(1)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__clc_vstore_half_float_helper__local(float %data, half addrspace(3)* nocapture %ptr) nounwind alwaysinline {
|
||||
%res = fptrunc float %data to half
|
||||
store half %res, half addrspace(3)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__clc_vstore_half_double_helper__private(double %data, half addrspace(0)* nocapture %ptr) nounwind alwaysinline {
|
||||
%res = fptrunc double %data to half
|
||||
store half %res, half addrspace(0)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__clc_vstore_half_double_helper__global(double %data, half addrspace(1)* nocapture %ptr) nounwind alwaysinline {
|
||||
%res = fptrunc double %data to half
|
||||
store half %res, half addrspace(1)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__clc_vstore_half_double_helper__local(double %data, half addrspace(3)* nocapture %ptr) nounwind alwaysinline {
|
||||
%res = fptrunc double %data to half
|
||||
store half %res, half addrspace(3)* %ptr
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue