From 0a5aac3fc4298babb8668ca297f32504ce0a7950 Mon Sep 17 00:00:00 2001 From: Jan Vesely Date: Wed, 21 Sep 2016 20:15:55 +0000 Subject: [PATCH] Provide vstore_half helper to workaround clc restrictions clang won't accept half precision loads and stores without cl_khr_fp16 since r281904 llvm-svn: 282106 --- libclc/generic/lib/SOURCES | 1 + libclc/generic/lib/shared/vstore.cl | 53 ++++++++++++------- libclc/generic/lib/shared/vstore_half.inc | 12 ++--- .../generic/lib/shared/vstore_half_helpers.ll | 35 ++++++++++++ 4 files changed, 75 insertions(+), 26 deletions(-) create mode 100644 libclc/generic/lib/shared/vstore_half_helpers.ll diff --git a/libclc/generic/lib/SOURCES b/libclc/generic/lib/SOURCES index 423a50b4e95c..ecd2e7327004 100644 --- a/libclc/generic/lib/SOURCES +++ b/libclc/generic/lib/SOURCES @@ -142,6 +142,7 @@ shared/max.cl shared/min.cl shared/vload.cl shared/vstore.cl +shared/vstore_half_helpers.ll workitem/get_global_id.cl workitem/get_global_size.cl image/get_image_dim.cl diff --git a/libclc/generic/lib/shared/vstore.cl b/libclc/generic/lib/shared/vstore.cl index ebc9446262f1..28383848fa18 100644 --- a/libclc/generic/lib/shared/vstore.cl +++ b/libclc/generic/lib/shared/vstore.cl @@ -52,32 +52,45 @@ VSTORE_TYPES() #endif /* vstore_half are legal even without cl_khr_fp16 */ +#define DECLARE_HELPER(STYPE, AS) void __clc_vstore_half_##STYPE##_helper##AS(STYPE, AS half *); -#define VEC_STORE1(val) mem[offset++] = val; -#define VEC_STORE2(val) \ - VEC_STORE1(val.lo) \ - VEC_STORE1(val.hi) -#define VEC_STORE3(val) \ - VEC_STORE1(val.s0) \ - VEC_STORE1(val.s1) \ - VEC_STORE1(val.s2) -#define VEC_STORE4(val) \ - VEC_STORE2(val.lo) \ - VEC_STORE2(val.hi) -#define VEC_STORE8(val) \ - VEC_STORE4(val.lo) \ - VEC_STORE4(val.hi) -#define VEC_STORE16(val) \ - VEC_STORE8(val.lo) \ - VEC_STORE8(val.hi) +DECLARE_HELPER(float, __private); +DECLARE_HELPER(float, __global); +DECLARE_HELPER(float, __local); -#define __FUNC(SUFFIX, VEC_SIZE, TYPE, AS) \ +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +DECLARE_HELPER(double, __private); +DECLARE_HELPER(double, __global); +DECLARE_HELPER(double, __local); +#endif + + +#define VEC_STORE1(STYPE, AS, val) __clc_vstore_half_##STYPE##_helper##AS (val, &mem[offset++]); +#define VEC_STORE2(STYPE, AS, val) \ + VEC_STORE1(STYPE, AS, val.lo) \ + VEC_STORE1(STYPE, AS, val.hi) +#define VEC_STORE3(STYPE, AS, val) \ + VEC_STORE1(STYPE, AS, val.s0) \ + VEC_STORE1(STYPE, AS, val.s1) \ + VEC_STORE1(STYPE, AS, val.s2) +#define VEC_STORE4(STYPE, AS, val) \ + VEC_STORE2(STYPE, AS, val.lo) \ + VEC_STORE2(STYPE, AS, val.hi) +#define VEC_STORE8(STYPE, AS, val) \ + VEC_STORE4(STYPE, AS, val.lo) \ + VEC_STORE4(STYPE, AS, val.hi) +#define VEC_STORE16(STYPE, AS, val) \ + VEC_STORE8(STYPE, AS, val.lo) \ + VEC_STORE8(STYPE, AS, val.hi) + +#define __FUNC(SUFFIX, VEC_SIZE, TYPE, STYPE, AS) \ _CLC_OVERLOAD _CLC_DEF void vstore_half##SUFFIX(TYPE vec, size_t offset, AS half *mem) { \ offset *= VEC_SIZE; \ - VEC_STORE##VEC_SIZE(vec) \ + VEC_STORE##VEC_SIZE(STYPE, AS, vec) \ } -#define FUNC(SUFFIX, VEC_SIZE, TYPE, AS) __FUNC(SUFFIX, VEC_SIZE, TYPE, AS) +#define FUNC(SUFFIX, VEC_SIZE, TYPE, STYPE, AS) __FUNC(SUFFIX, VEC_SIZE, TYPE, STYPE, AS) #define __CLC_BODY "vstore_half.inc" #include diff --git a/libclc/generic/lib/shared/vstore_half.inc b/libclc/generic/lib/shared/vstore_half.inc index 8ed03a0808af..fee52bc9c23e 100644 --- a/libclc/generic/lib/shared/vstore_half.inc +++ b/libclc/generic/lib/shared/vstore_half.inc @@ -1,10 +1,10 @@ #ifdef __CLC_VECSIZE - FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __private); - FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __local); - FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __global); + FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private); + FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local); + FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global); #else - FUNC(, 1, __CLC_GENTYPE, __private); - FUNC(, 1, __CLC_GENTYPE, __local); - FUNC(, 1, __CLC_GENTYPE, __global); + FUNC(, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private); + FUNC(, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local); + FUNC(, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global); #endif diff --git a/libclc/generic/lib/shared/vstore_half_helpers.ll b/libclc/generic/lib/shared/vstore_half_helpers.ll new file mode 100644 index 000000000000..e958664e5601 --- /dev/null +++ b/libclc/generic/lib/shared/vstore_half_helpers.ll @@ -0,0 +1,35 @@ +define void @__clc_vstore_half_float_helper__private(float %data, half addrspace(0)* nocapture %ptr) nounwind alwaysinline { + %res = fptrunc float %data to half + store half %res, half addrspace(0)* %ptr + ret void +} + +define void @__clc_vstore_half_float_helper__global(float %data, half addrspace(1)* nocapture %ptr) nounwind alwaysinline { + %res = fptrunc float %data to half + store half %res, half addrspace(1)* %ptr + ret void +} + +define void @__clc_vstore_half_float_helper__local(float %data, half addrspace(3)* nocapture %ptr) nounwind alwaysinline { + %res = fptrunc float %data to half + store half %res, half addrspace(3)* %ptr + ret void +} + +define void @__clc_vstore_half_double_helper__private(double %data, half addrspace(0)* nocapture %ptr) nounwind alwaysinline { + %res = fptrunc double %data to half + store half %res, half addrspace(0)* %ptr + ret void +} + +define void @__clc_vstore_half_double_helper__global(double %data, half addrspace(1)* nocapture %ptr) nounwind alwaysinline { + %res = fptrunc double %data to half + store half %res, half addrspace(1)* %ptr + ret void +} + +define void @__clc_vstore_half_double_helper__local(double %data, half addrspace(3)* nocapture %ptr) nounwind alwaysinline { + %res = fptrunc double %data to half + store half %res, half addrspace(3)* %ptr + ret void +}