forked from OSchip/llvm-project
shared: Implement aligned vector stores (vstorea_half)
Float version passes newly posted piglit tests on turks, float and double pass on carrizo. v2: scalar vstorea_half v3: fix typo Reviewer: Aaron Watry Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu> llvm-svn: 316291
This commit is contained in:
parent
12061c7125
commit
7ab2d0bdcd
|
@ -16,37 +16,52 @@
|
|||
#define _CLC_VECTOR_VSTORE_PRIM1(PRIM_TYPE) \
|
||||
_CLC_VECTOR_VSTORE_PRIM3(,PRIM_TYPE, PRIM_TYPE) \
|
||||
|
||||
#define _CLC_VECTOR_VSTORE_PRIM() \
|
||||
_CLC_VECTOR_VSTORE_PRIM1(char) \
|
||||
_CLC_VECTOR_VSTORE_PRIM1(uchar) \
|
||||
_CLC_VECTOR_VSTORE_PRIM1(short) \
|
||||
_CLC_VECTOR_VSTORE_PRIM1(ushort) \
|
||||
_CLC_VECTOR_VSTORE_PRIM1(int) \
|
||||
_CLC_VECTOR_VSTORE_PRIM1(uint) \
|
||||
_CLC_VECTOR_VSTORE_PRIM1(long) \
|
||||
_CLC_VECTOR_VSTORE_PRIM1(ulong) \
|
||||
_CLC_VECTOR_VSTORE_PRIM1(float) \
|
||||
_CLC_VECTOR_VSTORE_PRIM3(_half, half, float)
|
||||
_CLC_VECTOR_VSTORE_PRIM1(char)
|
||||
_CLC_VECTOR_VSTORE_PRIM1(uchar)
|
||||
_CLC_VECTOR_VSTORE_PRIM1(short)
|
||||
_CLC_VECTOR_VSTORE_PRIM1(ushort)
|
||||
_CLC_VECTOR_VSTORE_PRIM1(int)
|
||||
_CLC_VECTOR_VSTORE_PRIM1(uint)
|
||||
_CLC_VECTOR_VSTORE_PRIM1(long)
|
||||
_CLC_VECTOR_VSTORE_PRIM1(ulong)
|
||||
_CLC_VECTOR_VSTORE_PRIM1(float)
|
||||
_CLC_VECTOR_VSTORE_PRIM3(_half, half, float)
|
||||
// Use suffix to declare aligned vstorea_halfN
|
||||
_CLC_VECTOR_VSTORE_PRIM3(a_half, half, float)
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
_CLC_VECTOR_VSTORE_PRIM1(double)
|
||||
_CLC_VECTOR_VSTORE_PRIM3(_half, half, double)
|
||||
// Use suffix to declare aligned vstorea_halfN
|
||||
_CLC_VECTOR_VSTORE_PRIM3(a_half, half, double)
|
||||
|
||||
// Scalar vstore_half also needs to be declared
|
||||
_CLC_VSTORE_DECL(_half, half, double, , __private)
|
||||
_CLC_VSTORE_DECL(_half, half, double, , __local)
|
||||
_CLC_VSTORE_DECL(_half, half, double, , __global)
|
||||
|
||||
// Scalar vstorea_half is not part of the specs but CTS expects it
|
||||
_CLC_VSTORE_DECL(a_half, half, double, , __private)
|
||||
_CLC_VSTORE_DECL(a_half, half, double, , __local)
|
||||
_CLC_VSTORE_DECL(a_half, half, double, , __global)
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
_CLC_VECTOR_VSTORE_PRIM1(half)
|
||||
#endif
|
||||
|
||||
_CLC_VECTOR_VSTORE_PRIM()
|
||||
// Scalar vstore_half also needs to be declared
|
||||
_CLC_VSTORE_DECL(_half, half, float, , __private)
|
||||
_CLC_VSTORE_DECL(_half, half, float, , __local)
|
||||
_CLC_VSTORE_DECL(_half, half, float, , __global)
|
||||
|
||||
// Scalar vstorea_half is not part of the specs but CTS expects it
|
||||
_CLC_VSTORE_DECL(a_half, half, float, , __private)
|
||||
_CLC_VSTORE_DECL(a_half, half, float, , __local)
|
||||
_CLC_VSTORE_DECL(a_half, half, float, , __global)
|
||||
|
||||
|
||||
#undef _CLC_VSTORE_DECL
|
||||
#undef _CLC_VECTOR_VSTORE_DECL
|
||||
#undef _CLC_VECTOR_VSTORE_PRIM3
|
||||
#undef _CLC_VECTOR_VSTORE_PRIM1
|
||||
#undef _CLC_VECTOR_VSTORE_PRIM
|
||||
|
|
|
@ -33,23 +33,22 @@
|
|||
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __local) \
|
||||
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __global) \
|
||||
|
||||
#define VSTORE_TYPES() \
|
||||
VSTORE_ADDR_SPACES(char) \
|
||||
VSTORE_ADDR_SPACES(uchar) \
|
||||
VSTORE_ADDR_SPACES(short) \
|
||||
VSTORE_ADDR_SPACES(ushort) \
|
||||
VSTORE_ADDR_SPACES(int) \
|
||||
VSTORE_ADDR_SPACES(uint) \
|
||||
VSTORE_ADDR_SPACES(long) \
|
||||
VSTORE_ADDR_SPACES(ulong) \
|
||||
VSTORE_ADDR_SPACES(float) \
|
||||
VSTORE_ADDR_SPACES(char)
|
||||
VSTORE_ADDR_SPACES(uchar)
|
||||
VSTORE_ADDR_SPACES(short)
|
||||
VSTORE_ADDR_SPACES(ushort)
|
||||
VSTORE_ADDR_SPACES(int)
|
||||
VSTORE_ADDR_SPACES(uint)
|
||||
VSTORE_ADDR_SPACES(long)
|
||||
VSTORE_ADDR_SPACES(ulong)
|
||||
VSTORE_ADDR_SPACES(float)
|
||||
|
||||
VSTORE_TYPES()
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
VSTORE_ADDR_SPACES(double)
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
VSTORE_ADDR_SPACES(half)
|
||||
|
@ -95,13 +94,17 @@ DECLARE_HELPER(double, __local, __builtin_store_half);
|
|||
VEC_STORE8(STYPE, AS, val.lo) \
|
||||
VEC_STORE8(STYPE, AS, val.hi)
|
||||
|
||||
#define __FUNC(SUFFIX, VEC_SIZE, TYPE, STYPE, AS) \
|
||||
#define __FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) \
|
||||
_CLC_OVERLOAD _CLC_DEF void vstore_half##SUFFIX(TYPE vec, size_t offset, AS half *mem) { \
|
||||
offset *= VEC_SIZE; \
|
||||
VEC_STORE##VEC_SIZE(STYPE, AS, vec) \
|
||||
} \
|
||||
_CLC_OVERLOAD _CLC_DEF void vstorea_half##SUFFIX(TYPE vec, size_t offset, AS half *mem) { \
|
||||
offset *= OFFSET; \
|
||||
VEC_STORE##VEC_SIZE(STYPE, AS, vec) \
|
||||
}
|
||||
|
||||
#define FUNC(SUFFIX, VEC_SIZE, TYPE, STYPE, AS) __FUNC(SUFFIX, VEC_SIZE, TYPE, STYPE, AS)
|
||||
#define FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) __FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS)
|
||||
|
||||
#define __CLC_BODY "vstore_half.inc"
|
||||
#include <clc/math/gentype.inc>
|
||||
|
@ -115,6 +118,5 @@ DECLARE_HELPER(double, __local, __builtin_store_half);
|
|||
#undef VEC_LOAD2
|
||||
#undef VEC_LOAD1
|
||||
#undef DECLARE_HELPER
|
||||
#undef VSTORE_TYPES
|
||||
#undef VSTORE_ADDR_SPACES
|
||||
#undef VSTORE_VECTORIZE
|
||||
|
|
|
@ -1,10 +1,19 @@
|
|||
|
||||
#ifdef __CLC_VECSIZE
|
||||
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private);
|
||||
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local);
|
||||
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global);
|
||||
|
||||
#if __CLC_VECSIZE == 3
|
||||
# define __CLC_OFFSET 4
|
||||
#else
|
||||
FUNC(, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private);
|
||||
FUNC(, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local);
|
||||
FUNC(, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global);
|
||||
# define __CLC_OFFSET __CLC_VECSIZE
|
||||
#endif
|
||||
|
||||
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private);
|
||||
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local);
|
||||
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global);
|
||||
|
||||
#undef __CLC_OFFSET
|
||||
#else
|
||||
FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private);
|
||||
FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local);
|
||||
FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global);
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue