forked from OSchip/llvm-project
[PPC64LE] Implement little-endian semantics for vec_perm
The PowerPC vperm (vector permute) instruction is defined architecturally with a big-endian bias, in that the two input vectors are assumed to be concatenated "left to right" and the elements of the combined input vector are assumed to be numbered from "left to right" (i.e., with element 0 referencing the high-order element). This definition is unnatural for little-endian code generation. To facilitate ease of porting, the vec_perm interface is designed to use natural element ordering, so that elements are numbered according to little-endian design principles when code is generated for a little-endian target. The desired semantics can be achieved with the vperm instruction provided that the two input vector registers are reversed, and the permute control vector is complemented. The complementing is performed using an xor with a vector containing all one bits. Only the rightmost 5 bits of each element of the permute control vector are relevant, so it would be possible to complement the vector with respect to a <16xi8> vector containing all 31s. However, when the permute control vector is not a constant, using 255 instead has the advantage that the vec_xor can be recognized during code generation as a vnor instruction. (Power8 introduces a vnand instruction which could alternatively be generated.) The correctness of this code is tested by the new perm.c test added in a previous patch. I plan to later make the existing ppc32 Altivec compile-time tests work for ppc64 and ppc64le as well. llvm-svn: 210279
This commit is contained in:
parent
f56a29924f
commit
f7e289c0f2
|
@ -73,6 +73,9 @@ vec_perm(vector bool int __a, vector bool int __b, vector unsigned char __c);
|
|||
static vector float __ATTRS_o_ai
|
||||
vec_perm(vector float __a, vector float __b, vector unsigned char __c);
|
||||
|
||||
static vector unsigned char __ATTRS_o_ai
|
||||
vec_xor(vector unsigned char __a, vector unsigned char __b);
|
||||
|
||||
/* vec_abs */
|
||||
|
||||
#define __builtin_altivec_abs_v16qi vec_abs
|
||||
|
@ -4281,11 +4284,27 @@ vec_vpkswus(vector unsigned int __a, vector unsigned int __b)
|
|||
|
||||
/* vec_perm */
|
||||
|
||||
// The vperm instruction is defined architecturally with a big-endian bias.
|
||||
// For little endian, we swap the input operands and invert the permute
|
||||
// control vector. Only the rightmost 5 bits matter, so we could use
|
||||
// a vector of all 31s instead of all 255s to perform the inversion.
|
||||
// However, when the PCV is not a constant, using 255 has an advantage
|
||||
// in that the vec_xor can be recognized as a vec_nor (and for P8 and
|
||||
// later, possibly a vec_nand).
|
||||
|
||||
vector signed char __ATTRS_o_ai
|
||||
vec_perm(vector signed char __a, vector signed char __b, vector unsigned char __c)
|
||||
{
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
vector unsigned char __d = {255,255,255,255,255,255,255,255,
|
||||
255,255,255,255,255,255,255,255};
|
||||
__d = vec_xor(__c, __d);
|
||||
return (vector signed char)
|
||||
__builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d);
|
||||
#else
|
||||
return (vector signed char)
|
||||
__builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
|
||||
#endif
|
||||
}
|
||||
|
||||
vector unsigned char __ATTRS_o_ai
|
||||
|
@ -4293,22 +4312,46 @@ vec_perm(vector unsigned char __a,
|
|||
vector unsigned char __b,
|
||||
vector unsigned char __c)
|
||||
{
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
vector unsigned char __d = {255,255,255,255,255,255,255,255,
|
||||
255,255,255,255,255,255,255,255};
|
||||
__d = vec_xor(__c, __d);
|
||||
return (vector unsigned char)
|
||||
__builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d);
|
||||
#else
|
||||
return (vector unsigned char)
|
||||
__builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
|
||||
#endif
|
||||
}
|
||||
|
||||
vector bool char __ATTRS_o_ai
|
||||
vec_perm(vector bool char __a, vector bool char __b, vector unsigned char __c)
|
||||
{
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
vector unsigned char __d = {255,255,255,255,255,255,255,255,
|
||||
255,255,255,255,255,255,255,255};
|
||||
__d = vec_xor(__c, __d);
|
||||
return (vector bool char)
|
||||
__builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d);
|
||||
#else
|
||||
return (vector bool char)
|
||||
__builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
|
||||
#endif
|
||||
}
|
||||
|
||||
vector short __ATTRS_o_ai
|
||||
vec_perm(vector short __a, vector short __b, vector unsigned char __c)
|
||||
{
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
vector unsigned char __d = {255,255,255,255,255,255,255,255,
|
||||
255,255,255,255,255,255,255,255};
|
||||
__d = vec_xor(__c, __d);
|
||||
return (vector short)
|
||||
__builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d);
|
||||
#else
|
||||
return (vector short)
|
||||
__builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
|
||||
#endif
|
||||
}
|
||||
|
||||
vector unsigned short __ATTRS_o_ai
|
||||
|
@ -4316,49 +4359,104 @@ vec_perm(vector unsigned short __a,
|
|||
vector unsigned short __b,
|
||||
vector unsigned char __c)
|
||||
{
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
vector unsigned char __d = {255,255,255,255,255,255,255,255,
|
||||
255,255,255,255,255,255,255,255};
|
||||
__d = vec_xor(__c, __d);
|
||||
return (vector unsigned short)
|
||||
__builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d);
|
||||
#else
|
||||
return (vector unsigned short)
|
||||
__builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
|
||||
#endif
|
||||
}
|
||||
|
||||
vector bool short __ATTRS_o_ai
|
||||
vec_perm(vector bool short __a, vector bool short __b, vector unsigned char __c)
|
||||
{
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
vector unsigned char __d = {255,255,255,255,255,255,255,255,
|
||||
255,255,255,255,255,255,255,255};
|
||||
__d = vec_xor(__c, __d);
|
||||
return (vector bool short)
|
||||
__builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d);
|
||||
#else
|
||||
return (vector bool short)
|
||||
__builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
|
||||
#endif
|
||||
}
|
||||
|
||||
vector pixel __ATTRS_o_ai
|
||||
vec_perm(vector pixel __a, vector pixel __b, vector unsigned char __c)
|
||||
{
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
vector unsigned char __d = {255,255,255,255,255,255,255,255,
|
||||
255,255,255,255,255,255,255,255};
|
||||
__d = vec_xor(__c, __d);
|
||||
return (vector pixel)
|
||||
__builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d);
|
||||
#else
|
||||
return (vector pixel)
|
||||
__builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
|
||||
#endif
|
||||
}
|
||||
|
||||
vector int __ATTRS_o_ai
|
||||
vec_perm(vector int __a, vector int __b, vector unsigned char __c)
|
||||
{
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
vector unsigned char __d = {255,255,255,255,255,255,255,255,
|
||||
255,255,255,255,255,255,255,255};
|
||||
__d = vec_xor(__c, __d);
|
||||
return (vector int)__builtin_altivec_vperm_4si(__b, __a, __d);
|
||||
#else
|
||||
return (vector int)__builtin_altivec_vperm_4si(__a, __b, __c);
|
||||
#endif
|
||||
}
|
||||
|
||||
vector unsigned int __ATTRS_o_ai
|
||||
vec_perm(vector unsigned int __a, vector unsigned int __b, vector unsigned char __c)
|
||||
{
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
vector unsigned char __d = {255,255,255,255,255,255,255,255,
|
||||
255,255,255,255,255,255,255,255};
|
||||
__d = vec_xor(__c, __d);
|
||||
return (vector unsigned int)
|
||||
__builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d);
|
||||
#else
|
||||
return (vector unsigned int)
|
||||
__builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
|
||||
#endif
|
||||
}
|
||||
|
||||
vector bool int __ATTRS_o_ai
|
||||
vec_perm(vector bool int __a, vector bool int __b, vector unsigned char __c)
|
||||
{
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
vector unsigned char __d = {255,255,255,255,255,255,255,255,
|
||||
255,255,255,255,255,255,255,255};
|
||||
__d = vec_xor(__c, __d);
|
||||
return (vector bool int)
|
||||
__builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d);
|
||||
#else
|
||||
return (vector bool int)
|
||||
__builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
|
||||
#endif
|
||||
}
|
||||
|
||||
vector float __ATTRS_o_ai
|
||||
vec_perm(vector float __a, vector float __b, vector unsigned char __c)
|
||||
{
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
vector unsigned char __d = {255,255,255,255,255,255,255,255,
|
||||
255,255,255,255,255,255,255,255};
|
||||
__d = vec_xor(__c, __d);
|
||||
return (vector float)
|
||||
__builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d);
|
||||
#else
|
||||
return (vector float)
|
||||
__builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* vec_vperm */
|
||||
|
@ -4366,8 +4464,7 @@ vec_perm(vector float __a, vector float __b, vector unsigned char __c)
|
|||
static vector signed char __ATTRS_o_ai
|
||||
vec_vperm(vector signed char __a, vector signed char __b, vector unsigned char __c)
|
||||
{
|
||||
return (vector signed char)
|
||||
__builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
|
||||
return vec_perm(__a, __b, __c);
|
||||
}
|
||||
|
||||
static vector unsigned char __ATTRS_o_ai
|
||||
|
@ -4375,22 +4472,19 @@ vec_vperm(vector unsigned char __a,
|
|||
vector unsigned char __b,
|
||||
vector unsigned char __c)
|
||||
{
|
||||
return (vector unsigned char)
|
||||
__builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
|
||||
return vec_perm(__a, __b, __c);
|
||||
}
|
||||
|
||||
static vector bool char __ATTRS_o_ai
|
||||
vec_vperm(vector bool char __a, vector bool char __b, vector unsigned char __c)
|
||||
{
|
||||
return (vector bool char)
|
||||
__builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
|
||||
return vec_perm(__a, __b, __c);
|
||||
}
|
||||
|
||||
static vector short __ATTRS_o_ai
|
||||
vec_vperm(vector short __a, vector short __b, vector unsigned char __c)
|
||||
{
|
||||
return (vector short)
|
||||
__builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
|
||||
return vec_perm(__a, __b, __c);
|
||||
}
|
||||
|
||||
static vector unsigned short __ATTRS_o_ai
|
||||
|
@ -4398,49 +4492,43 @@ vec_vperm(vector unsigned short __a,
|
|||
vector unsigned short __b,
|
||||
vector unsigned char __c)
|
||||
{
|
||||
return (vector unsigned short)
|
||||
__builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
|
||||
return vec_perm(__a, __b, __c);
|
||||
}
|
||||
|
||||
static vector bool short __ATTRS_o_ai
|
||||
vec_vperm(vector bool short __a, vector bool short __b, vector unsigned char __c)
|
||||
{
|
||||
return (vector bool short)
|
||||
__builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
|
||||
return vec_perm(__a, __b, __c);
|
||||
}
|
||||
|
||||
static vector pixel __ATTRS_o_ai
|
||||
vec_vperm(vector pixel __a, vector pixel __b, vector unsigned char __c)
|
||||
{
|
||||
return (vector pixel)
|
||||
__builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
|
||||
return vec_perm(__a, __b, __c);
|
||||
}
|
||||
|
||||
static vector int __ATTRS_o_ai
|
||||
vec_vperm(vector int __a, vector int __b, vector unsigned char __c)
|
||||
{
|
||||
return (vector int)__builtin_altivec_vperm_4si(__a, __b, __c);
|
||||
return vec_perm(__a, __b, __c);
|
||||
}
|
||||
|
||||
static vector unsigned int __ATTRS_o_ai
|
||||
vec_vperm(vector unsigned int __a, vector unsigned int __b, vector unsigned char __c)
|
||||
{
|
||||
return (vector unsigned int)
|
||||
__builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
|
||||
return vec_perm(__a, __b, __c);
|
||||
}
|
||||
|
||||
static vector bool int __ATTRS_o_ai
|
||||
vec_vperm(vector bool int __a, vector bool int __b, vector unsigned char __c)
|
||||
{
|
||||
return (vector bool int)
|
||||
__builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
|
||||
return vec_perm(__a, __b, __c);
|
||||
}
|
||||
|
||||
static vector float __ATTRS_o_ai
|
||||
vec_vperm(vector float __a, vector float __b, vector unsigned char __c)
|
||||
{
|
||||
return (vector float)
|
||||
__builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
|
||||
return vec_perm(__a, __b, __c);
|
||||
}
|
||||
|
||||
/* vec_re */
|
||||
|
|
Loading…
Reference in New Issue