forked from OSchip/llvm-project
PR14964: intrinsic headers using non-reserved identifiers
Several of the intrinsic headers were using plain non-reserved identifiers. C++11 17.6.4.3.2 [global.names] p1 reservers names containing a double begining with an underscore followed by an uppercase letter for any use. I think I got them all, but open to being corrected. For the most part I didn't bother updating function-like macro parameter names because I don't believe they're subject to any such collission - though some function-like macros already follow this convention (I didn't update them in part because the churn was more significant as several function-like macros use the double underscore prefixed version of the same name as a parameter in their implementation) llvm-svn: 172666
This commit is contained in:
parent
048fbfa302
commit
3302f2bd46
File diff suppressed because it is too large
Load Diff
|
@ -29,39 +29,39 @@
|
|||
#define _mm256_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw256((X), (Y), (M))
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_abs_epi8(__m256i a)
|
||||
_mm256_abs_epi8(__m256i __a)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_pabsb256((__v32qi)a);
|
||||
return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_abs_epi16(__m256i a)
|
||||
_mm256_abs_epi16(__m256i __a)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_pabsw256((__v16hi)a);
|
||||
return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_abs_epi32(__m256i a)
|
||||
_mm256_abs_epi32(__m256i __a)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_pabsd256((__v8si)a);
|
||||
return (__m256i)__builtin_ia32_pabsd256((__v8si)__a);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_packs_epi16(__m256i a, __m256i b)
|
||||
_mm256_packs_epi16(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_packsswb256((__v16hi)a, (__v16hi)b);
|
||||
return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_packs_epi32(__m256i a, __m256i b)
|
||||
_mm256_packs_epi32(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_packssdw256((__v8si)a, (__v8si)b);
|
||||
return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_packus_epi16(__m256i a, __m256i b)
|
||||
_mm256_packus_epi16(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_packuswb256((__v16hi)a, (__v16hi)b);
|
||||
return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
|
@ -71,51 +71,51 @@ _mm256_packus_epi32(__m256i __V1, __m256i __V2)
|
|||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_add_epi8(__m256i a, __m256i b)
|
||||
_mm256_add_epi8(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)((__v32qi)a + (__v32qi)b);
|
||||
return (__m256i)((__v32qi)__a + (__v32qi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_add_epi16(__m256i a, __m256i b)
|
||||
_mm256_add_epi16(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)((__v16hi)a + (__v16hi)b);
|
||||
return (__m256i)((__v16hi)__a + (__v16hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_add_epi32(__m256i a, __m256i b)
|
||||
_mm256_add_epi32(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)((__v8si)a + (__v8si)b);
|
||||
return (__m256i)((__v8si)__a + (__v8si)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_add_epi64(__m256i a, __m256i b)
|
||||
_mm256_add_epi64(__m256i __a, __m256i __b)
|
||||
{
|
||||
return a + b;
|
||||
return __a + __b;
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_adds_epi8(__m256i a, __m256i b)
|
||||
_mm256_adds_epi8(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_paddsb256((__v32qi)a, (__v32qi)b);
|
||||
return (__m256i)__builtin_ia32_paddsb256((__v32qi)__a, (__v32qi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_adds_epi16(__m256i a, __m256i b)
|
||||
_mm256_adds_epi16(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_paddsw256((__v16hi)a, (__v16hi)b);
|
||||
return (__m256i)__builtin_ia32_paddsw256((__v16hi)__a, (__v16hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_adds_epu8(__m256i a, __m256i b)
|
||||
_mm256_adds_epu8(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_paddusb256((__v32qi)a, (__v32qi)b);
|
||||
return (__m256i)__builtin_ia32_paddusb256((__v32qi)__a, (__v32qi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_adds_epu16(__m256i a, __m256i b)
|
||||
_mm256_adds_epu16(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_paddusw256((__v16hi)a, (__v16hi)b);
|
||||
return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b);
|
||||
}
|
||||
|
||||
#define _mm256_alignr_epi8(a, b, n) __extension__ ({ \
|
||||
|
@ -124,27 +124,27 @@ _mm256_adds_epu16(__m256i a, __m256i b)
|
|||
(__m256i)__builtin_ia32_palignr256((__v32qi)__a, (__v32qi)__b, (n)); })
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_and_si256(__m256i a, __m256i b)
|
||||
_mm256_and_si256(__m256i __a, __m256i __b)
|
||||
{
|
||||
return a & b;
|
||||
return __a & __b;
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_andnot_si256(__m256i a, __m256i b)
|
||||
_mm256_andnot_si256(__m256i __a, __m256i __b)
|
||||
{
|
||||
return ~a & b;
|
||||
return ~__a & __b;
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_avg_epu8(__m256i a, __m256i b)
|
||||
_mm256_avg_epu8(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_pavgb256((__v32qi)a, (__v32qi)b);
|
||||
return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_avg_epu16(__m256i a, __m256i b)
|
||||
_mm256_avg_epu16(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_pavgw256((__v16hi)a, (__v16hi)b);
|
||||
return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
|
@ -160,177 +160,177 @@ _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)
|
|||
(__m256i)__builtin_ia32_pblendw256((__v16hi)__V1, (__v16hi)__V2, (M)); })
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_cmpeq_epi8(__m256i a, __m256i b)
|
||||
_mm256_cmpeq_epi8(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)((__v32qi)a == (__v32qi)b);
|
||||
return (__m256i)((__v32qi)__a == (__v32qi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_cmpeq_epi16(__m256i a, __m256i b)
|
||||
_mm256_cmpeq_epi16(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)((__v16hi)a == (__v16hi)b);
|
||||
return (__m256i)((__v16hi)__a == (__v16hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_cmpeq_epi32(__m256i a, __m256i b)
|
||||
_mm256_cmpeq_epi32(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)((__v8si)a == (__v8si)b);
|
||||
return (__m256i)((__v8si)__a == (__v8si)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_cmpeq_epi64(__m256i a, __m256i b)
|
||||
_mm256_cmpeq_epi64(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)(a == b);
|
||||
return (__m256i)(__a == __b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_cmpgt_epi8(__m256i a, __m256i b)
|
||||
_mm256_cmpgt_epi8(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)((__v32qi)a > (__v32qi)b);
|
||||
return (__m256i)((__v32qi)__a > (__v32qi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_cmpgt_epi16(__m256i a, __m256i b)
|
||||
_mm256_cmpgt_epi16(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)((__v16hi)a > (__v16hi)b);
|
||||
return (__m256i)((__v16hi)__a > (__v16hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_cmpgt_epi32(__m256i a, __m256i b)
|
||||
_mm256_cmpgt_epi32(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)((__v8si)a > (__v8si)b);
|
||||
return (__m256i)((__v8si)__a > (__v8si)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_cmpgt_epi64(__m256i a, __m256i b)
|
||||
_mm256_cmpgt_epi64(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)(a > b);
|
||||
return (__m256i)(__a > __b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_hadd_epi16(__m256i a, __m256i b)
|
||||
_mm256_hadd_epi16(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_phaddw256((__v16hi)a, (__v16hi)b);
|
||||
return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_hadd_epi32(__m256i a, __m256i b)
|
||||
_mm256_hadd_epi32(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_phaddd256((__v8si)a, (__v8si)b);
|
||||
return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_hadds_epi16(__m256i a, __m256i b)
|
||||
_mm256_hadds_epi16(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_phaddsw256((__v16hi)a, (__v16hi)b);
|
||||
return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_hsub_epi16(__m256i a, __m256i b)
|
||||
_mm256_hsub_epi16(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_phsubw256((__v16hi)a, (__v16hi)b);
|
||||
return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_hsub_epi32(__m256i a, __m256i b)
|
||||
_mm256_hsub_epi32(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_phsubd256((__v8si)a, (__v8si)b);
|
||||
return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_hsubs_epi16(__m256i a, __m256i b)
|
||||
_mm256_hsubs_epi16(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_phsubsw256((__v16hi)a, (__v16hi)b);
|
||||
return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_maddubs_epi16(__m256i a, __m256i b)
|
||||
_mm256_maddubs_epi16(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)a, (__v32qi)b);
|
||||
return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_madd_epi16(__m256i a, __m256i b)
|
||||
_mm256_madd_epi16(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)a, (__v16hi)b);
|
||||
return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)__a, (__v16hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_max_epi8(__m256i a, __m256i b)
|
||||
_mm256_max_epi8(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)a, (__v32qi)b);
|
||||
return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)__a, (__v32qi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_max_epi16(__m256i a, __m256i b)
|
||||
_mm256_max_epi16(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)a, (__v16hi)b);
|
||||
return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)__a, (__v16hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_max_epi32(__m256i a, __m256i b)
|
||||
_mm256_max_epi32(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_pmaxsd256((__v8si)a, (__v8si)b);
|
||||
return (__m256i)__builtin_ia32_pmaxsd256((__v8si)__a, (__v8si)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_max_epu8(__m256i a, __m256i b)
|
||||
_mm256_max_epu8(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_pmaxub256((__v32qi)a, (__v32qi)b);
|
||||
return (__m256i)__builtin_ia32_pmaxub256((__v32qi)__a, (__v32qi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_max_epu16(__m256i a, __m256i b)
|
||||
_mm256_max_epu16(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)a, (__v16hi)b);
|
||||
return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)__a, (__v16hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_max_epu32(__m256i a, __m256i b)
|
||||
_mm256_max_epu32(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_pmaxud256((__v8si)a, (__v8si)b);
|
||||
return (__m256i)__builtin_ia32_pmaxud256((__v8si)__a, (__v8si)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_min_epi8(__m256i a, __m256i b)
|
||||
_mm256_min_epi8(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_pminsb256((__v32qi)a, (__v32qi)b);
|
||||
return (__m256i)__builtin_ia32_pminsb256((__v32qi)__a, (__v32qi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_min_epi16(__m256i a, __m256i b)
|
||||
_mm256_min_epi16(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_pminsw256((__v16hi)a, (__v16hi)b);
|
||||
return (__m256i)__builtin_ia32_pminsw256((__v16hi)__a, (__v16hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_min_epi32(__m256i a, __m256i b)
|
||||
_mm256_min_epi32(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_pminsd256((__v8si)a, (__v8si)b);
|
||||
return (__m256i)__builtin_ia32_pminsd256((__v8si)__a, (__v8si)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_min_epu8(__m256i a, __m256i b)
|
||||
_mm256_min_epu8(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_pminub256((__v32qi)a, (__v32qi)b);
|
||||
return (__m256i)__builtin_ia32_pminub256((__v32qi)__a, (__v32qi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_min_epu16(__m256i a, __m256i b)
|
||||
_mm256_min_epu16(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)a, (__v16hi)b);
|
||||
return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__a, (__v16hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_min_epu32(__m256i a, __m256i b)
|
||||
_mm256_min_epu32(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_pminud256((__v8si)a, (__v8si)b);
|
||||
return (__m256i)__builtin_ia32_pminud256((__v8si)__a, (__v8si)__b);
|
||||
}
|
||||
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_movemask_epi8(__m256i a)
|
||||
_mm256_movemask_epi8(__m256i __a)
|
||||
{
|
||||
return __builtin_ia32_pmovmskb256((__v32qi)a);
|
||||
return __builtin_ia32_pmovmskb256((__v32qi)__a);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
|
@ -406,63 +406,63 @@ _mm256_cvtepu32_epi64(__m128i __V)
|
|||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_mul_epi32(__m256i a, __m256i b)
|
||||
_mm256_mul_epi32(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_pmuldq256((__v8si)a, (__v8si)b);
|
||||
return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_mulhrs_epi16(__m256i a, __m256i b)
|
||||
_mm256_mulhrs_epi16(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)a, (__v16hi)b);
|
||||
return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_mulhi_epu16(__m256i a, __m256i b)
|
||||
_mm256_mulhi_epu16(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)a, (__v16hi)b);
|
||||
return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_mulhi_epi16(__m256i a, __m256i b)
|
||||
_mm256_mulhi_epi16(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_pmulhw256((__v16hi)a, (__v16hi)b);
|
||||
return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_mullo_epi16(__m256i a, __m256i b)
|
||||
_mm256_mullo_epi16(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)((__v16hi)a * (__v16hi)b);
|
||||
return (__m256i)((__v16hi)__a * (__v16hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_mullo_epi32 (__m256i a, __m256i b)
|
||||
_mm256_mullo_epi32 (__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)((__v8si)a * (__v8si)b);
|
||||
return (__m256i)((__v8si)__a * (__v8si)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_mul_epu32(__m256i a, __m256i b)
|
||||
_mm256_mul_epu32(__m256i __a, __m256i __b)
|
||||
{
|
||||
return __builtin_ia32_pmuludq256((__v8si)a, (__v8si)b);
|
||||
return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_or_si256(__m256i a, __m256i b)
|
||||
_mm256_or_si256(__m256i __a, __m256i __b)
|
||||
{
|
||||
return a | b;
|
||||
return __a | __b;
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_sad_epu8(__m256i a, __m256i b)
|
||||
_mm256_sad_epu8(__m256i __a, __m256i __b)
|
||||
{
|
||||
return __builtin_ia32_psadbw256((__v32qi)a, (__v32qi)b);
|
||||
return __builtin_ia32_psadbw256((__v32qi)__a, (__v32qi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_shuffle_epi8(__m256i a, __m256i b)
|
||||
_mm256_shuffle_epi8(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_pshufb256((__v32qi)a, (__v32qi)b);
|
||||
return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b);
|
||||
}
|
||||
|
||||
#define _mm256_shuffle_epi32(a, imm) __extension__ ({ \
|
||||
|
@ -502,21 +502,21 @@ _mm256_shuffle_epi8(__m256i a, __m256i b)
|
|||
12, 13, 14, 15); })
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_sign_epi8(__m256i a, __m256i b)
|
||||
_mm256_sign_epi8(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_psignb256((__v32qi)a, (__v32qi)b);
|
||||
return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_sign_epi16(__m256i a, __m256i b)
|
||||
_mm256_sign_epi16(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_psignw256((__v16hi)a, (__v16hi)b);
|
||||
return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_sign_epi32(__m256i a, __m256i b)
|
||||
_mm256_sign_epi32(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_psignd256((__v8si)a, (__v8si)b);
|
||||
return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b);
|
||||
}
|
||||
|
||||
#define _mm256_slli_si256(a, count) __extension__ ({ \
|
||||
|
@ -524,63 +524,63 @@ _mm256_sign_epi32(__m256i a, __m256i b)
|
|||
(__m256i)__builtin_ia32_pslldqi256(__a, (count)*8); })
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_slli_epi16(__m256i a, int count)
|
||||
_mm256_slli_epi16(__m256i __a, int __count)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_psllwi256((__v16hi)a, count);
|
||||
return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_sll_epi16(__m256i a, __m128i count)
|
||||
_mm256_sll_epi16(__m256i __a, __m128i __count)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_psllw256((__v16hi)a, (__v8hi)count);
|
||||
return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_slli_epi32(__m256i a, int count)
|
||||
_mm256_slli_epi32(__m256i __a, int __count)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_pslldi256((__v8si)a, count);
|
||||
return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_sll_epi32(__m256i a, __m128i count)
|
||||
_mm256_sll_epi32(__m256i __a, __m128i __count)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_pslld256((__v8si)a, (__v4si)count);
|
||||
return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_slli_epi64(__m256i a, int count)
|
||||
_mm256_slli_epi64(__m256i __a, int __count)
|
||||
{
|
||||
return __builtin_ia32_psllqi256(a, count);
|
||||
return __builtin_ia32_psllqi256(__a, __count);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_sll_epi64(__m256i a, __m128i count)
|
||||
_mm256_sll_epi64(__m256i __a, __m128i __count)
|
||||
{
|
||||
return __builtin_ia32_psllq256(a, count);
|
||||
return __builtin_ia32_psllq256(__a, __count);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_srai_epi16(__m256i a, int count)
|
||||
_mm256_srai_epi16(__m256i __a, int __count)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_psrawi256((__v16hi)a, count);
|
||||
return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_sra_epi16(__m256i a, __m128i count)
|
||||
_mm256_sra_epi16(__m256i __a, __m128i __count)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_psraw256((__v16hi)a, (__v8hi)count);
|
||||
return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_srai_epi32(__m256i a, int count)
|
||||
_mm256_srai_epi32(__m256i __a, int __count)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_psradi256((__v8si)a, count);
|
||||
return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_sra_epi32(__m256i a, __m128i count)
|
||||
_mm256_sra_epi32(__m256i __a, __m128i __count)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_psrad256((__v8si)a, (__v4si)count);
|
||||
return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count);
|
||||
}
|
||||
|
||||
#define _mm256_srli_si256(a, count) __extension__ ({ \
|
||||
|
@ -588,141 +588,141 @@ _mm256_sra_epi32(__m256i a, __m128i count)
|
|||
(__m256i)__builtin_ia32_psrldqi256(__a, (count)*8); })
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_srli_epi16(__m256i a, int count)
|
||||
_mm256_srli_epi16(__m256i __a, int __count)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_psrlwi256((__v16hi)a, count);
|
||||
return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_srl_epi16(__m256i a, __m128i count)
|
||||
_mm256_srl_epi16(__m256i __a, __m128i __count)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_psrlw256((__v16hi)a, (__v8hi)count);
|
||||
return (__m256i)__builtin_ia32_psrlw256((__v16hi)__a, (__v8hi)__count);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_srli_epi32(__m256i a, int count)
|
||||
_mm256_srli_epi32(__m256i __a, int __count)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_psrldi256((__v8si)a, count);
|
||||
return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_srl_epi32(__m256i a, __m128i count)
|
||||
_mm256_srl_epi32(__m256i __a, __m128i __count)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_psrld256((__v8si)a, (__v4si)count);
|
||||
return (__m256i)__builtin_ia32_psrld256((__v8si)__a, (__v4si)__count);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_srli_epi64(__m256i a, int count)
|
||||
_mm256_srli_epi64(__m256i __a, int __count)
|
||||
{
|
||||
return __builtin_ia32_psrlqi256(a, count);
|
||||
return __builtin_ia32_psrlqi256(__a, __count);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_srl_epi64(__m256i a, __m128i count)
|
||||
_mm256_srl_epi64(__m256i __a, __m128i __count)
|
||||
{
|
||||
return __builtin_ia32_psrlq256(a, count);
|
||||
return __builtin_ia32_psrlq256(__a, __count);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_sub_epi8(__m256i a, __m256i b)
|
||||
_mm256_sub_epi8(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)((__v32qi)a - (__v32qi)b);
|
||||
return (__m256i)((__v32qi)__a - (__v32qi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_sub_epi16(__m256i a, __m256i b)
|
||||
_mm256_sub_epi16(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)((__v16hi)a - (__v16hi)b);
|
||||
return (__m256i)((__v16hi)__a - (__v16hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_sub_epi32(__m256i a, __m256i b)
|
||||
_mm256_sub_epi32(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)((__v8si)a - (__v8si)b);
|
||||
return (__m256i)((__v8si)__a - (__v8si)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_sub_epi64(__m256i a, __m256i b)
|
||||
_mm256_sub_epi64(__m256i __a, __m256i __b)
|
||||
{
|
||||
return a - b;
|
||||
return __a - __b;
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_subs_epi8(__m256i a, __m256i b)
|
||||
_mm256_subs_epi8(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_psubsb256((__v32qi)a, (__v32qi)b);
|
||||
return (__m256i)__builtin_ia32_psubsb256((__v32qi)__a, (__v32qi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_subs_epi16(__m256i a, __m256i b)
|
||||
_mm256_subs_epi16(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_psubsw256((__v16hi)a, (__v16hi)b);
|
||||
return (__m256i)__builtin_ia32_psubsw256((__v16hi)__a, (__v16hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_subs_epu8(__m256i a, __m256i b)
|
||||
_mm256_subs_epu8(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_psubusb256((__v32qi)a, (__v32qi)b);
|
||||
return (__m256i)__builtin_ia32_psubusb256((__v32qi)__a, (__v32qi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_subs_epu16(__m256i a, __m256i b)
|
||||
_mm256_subs_epu16(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_psubusw256((__v16hi)a, (__v16hi)b);
|
||||
return (__m256i)__builtin_ia32_psubusw256((__v16hi)__a, (__v16hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_unpackhi_epi8(__m256i a, __m256i b)
|
||||
_mm256_unpackhi_epi8(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_shufflevector((__v32qi)a, (__v32qi)b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31);
|
||||
return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_unpackhi_epi16(__m256i a, __m256i b)
|
||||
_mm256_unpackhi_epi16(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_shufflevector((__v16hi)a, (__v16hi)b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
|
||||
return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_unpackhi_epi32(__m256i a, __m256i b)
|
||||
_mm256_unpackhi_epi32(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_shufflevector((__v8si)a, (__v8si)b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7);
|
||||
return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_unpackhi_epi64(__m256i a, __m256i b)
|
||||
_mm256_unpackhi_epi64(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_shufflevector(a, b, 1, 4+1, 3, 4+3);
|
||||
return (__m256i)__builtin_shufflevector(__a, __b, 1, 4+1, 3, 4+3);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_unpacklo_epi8(__m256i a, __m256i b)
|
||||
_mm256_unpacklo_epi8(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_shufflevector((__v32qi)a, (__v32qi)b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23);
|
||||
return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_unpacklo_epi16(__m256i a, __m256i b)
|
||||
_mm256_unpacklo_epi16(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_shufflevector((__v16hi)a, (__v16hi)b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11);
|
||||
return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_unpacklo_epi32(__m256i a, __m256i b)
|
||||
_mm256_unpacklo_epi32(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_shufflevector((__v8si)a, (__v8si)b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5);
|
||||
return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_unpacklo_epi64(__m256i a, __m256i b)
|
||||
_mm256_unpacklo_epi64(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_shufflevector(a, b, 0, 4+0, 2, 4+2);
|
||||
return (__m256i)__builtin_shufflevector(__a, __b, 0, 4+0, 2, 4+2);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_xor_si256(__m256i a, __m256i b)
|
||||
_mm256_xor_si256(__m256i __a, __m256i __b)
|
||||
{
|
||||
return a ^ b;
|
||||
return __a ^ __b;
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
|
@ -750,9 +750,9 @@ _mm256_broadcastsd_pd(__m128d __X)
|
|||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_broadcastsi128_si256(__m128i const *a)
|
||||
_mm_broadcastsi128_si256(__m128i const *__a)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vbroadcastsi256(a);
|
||||
return (__m256i)__builtin_ia32_vbroadcastsi256(__a);
|
||||
}
|
||||
|
||||
#define _mm_blend_epi32(V1, V2, M) __extension__ ({ \
|
||||
|
@ -815,9 +815,9 @@ _mm_broadcastq_epi64(__m128i __X)
|
|||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_permutevar8x32_epi32(__m256i a, __m256i b)
|
||||
_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_permvarsi256((__v8si)a, (__v8si)b);
|
||||
return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b);
|
||||
}
|
||||
|
||||
#define _mm256_permute4x64_pd(V, M) __extension__ ({ \
|
||||
|
@ -827,9 +827,9 @@ _mm256_permutevar8x32_epi32(__m256i a, __m256i b)
|
|||
((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); })
|
||||
|
||||
static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_permutevar8x32_ps(__m256 a, __m256 b)
|
||||
_mm256_permutevar8x32_ps(__m256 __a, __m256 __b)
|
||||
{
|
||||
return (__m256)__builtin_ia32_permvarsf256((__v8sf)a, (__v8sf)b);
|
||||
return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8sf)__b);
|
||||
}
|
||||
|
||||
#define _mm256_permute4x64_epi64(V, M) __extension__ ({ \
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -25,9 +25,10 @@
|
|||
#error this header is for x86 only
|
||||
#endif
|
||||
|
||||
static inline int __get_cpuid (unsigned int level, unsigned int *eax,
|
||||
unsigned int *ebx, unsigned int *ecx,
|
||||
unsigned int *edx) {
|
||||
__asm("cpuid" : "=a"(*eax), "=b" (*ebx), "=c"(*ecx), "=d"(*edx) : "0"(level));
|
||||
static inline int __get_cpuid (unsigned int __level, unsigned int *__eax,
|
||||
unsigned int *__ebx, unsigned int *__ecx,
|
||||
unsigned int *__edx) {
|
||||
__asm("cpuid" : "=a"(*__eax), "=b" (*__ebx), "=c"(*__ecx), "=d"(*__edx)
|
||||
: "0"(__level));
|
||||
return 1;
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,6 +1,6 @@
|
|||
/*===---- f16cintrin.h - F16C intrinsics ---------------------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* Permission is hereby granted, free of charge, to any person obtaining __a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
|
@ -44,15 +44,15 @@ typedef float __m256 __attribute__ ((__vector_size__ (32)));
|
|||
(__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)__a, (imm)); })
|
||||
|
||||
static __inline __m128 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_cvtph_ps(__m128i a)
|
||||
_mm_cvtph_ps(__m128i __a)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)a);
|
||||
return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);
|
||||
}
|
||||
|
||||
static __inline __m256 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_cvtph_ps(__m128i a)
|
||||
_mm256_cvtph_ps(__m128i __a)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)a);
|
||||
return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
|
||||
}
|
||||
|
||||
#endif /* __F16CINTRIN_H */
|
||||
|
|
|
@ -30,45 +30,45 @@
|
|||
#include <malloc.h>
|
||||
#else
|
||||
#ifndef __cplusplus
|
||||
extern int posix_memalign(void **memptr, size_t alignment, size_t size);
|
||||
extern int posix_memalign(void **__memptr, size_t __alignment, size_t __size);
|
||||
#else
|
||||
// Some systems (e.g. those with GNU libc) declare posix_memalign with an
|
||||
// exception specifier. Via an "egregious workaround" in
|
||||
// Sema::CheckEquivalentExceptionSpec, Clang accepts the following as a valid
|
||||
// redeclaration of glibc's declaration.
|
||||
extern "C" int posix_memalign(void **memptr, size_t alignment, size_t size);
|
||||
extern "C" int posix_memalign(void **__memptr, size_t __alignment, size_t __size);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !(defined(_WIN32) && defined(_mm_malloc))
|
||||
static __inline__ void *__attribute__((__always_inline__, __nodebug__,
|
||||
__malloc__))
|
||||
_mm_malloc(size_t size, size_t align)
|
||||
_mm_malloc(size_t __size, size_t __align)
|
||||
{
|
||||
if (align == 1) {
|
||||
return malloc(size);
|
||||
if (__align == 1) {
|
||||
return malloc(__size);
|
||||
}
|
||||
|
||||
if (!(align & (align - 1)) && align < sizeof(void *))
|
||||
align = sizeof(void *);
|
||||
if (!(__align & (__align - 1)) && __align < sizeof(void *))
|
||||
__align = sizeof(void *);
|
||||
|
||||
void *mallocedMemory;
|
||||
void *__mallocedMemory;
|
||||
#if defined(__MINGW32__)
|
||||
mallocedMemory = __mingw_aligned_malloc(size, align);
|
||||
__mallocedMemory = __mingw_aligned_malloc(__size, __align);
|
||||
#elif defined(_WIN32)
|
||||
mallocedMemory = _aligned_malloc(size, align);
|
||||
__mallocedMemory = _aligned_malloc(__size, __align);
|
||||
#else
|
||||
if (posix_memalign(&mallocedMemory, align, size))
|
||||
if (posix_memalign(&__mallocedMemory, __align, __size))
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
return mallocedMemory;
|
||||
return __mallocedMemory;
|
||||
}
|
||||
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_free(void *p)
|
||||
_mm_free(void *__p)
|
||||
{
|
||||
free(p);
|
||||
free(__p);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -31,65 +31,65 @@
|
|||
#include <emmintrin.h>
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_lddqu_si128(__m128i const *p)
|
||||
_mm_lddqu_si128(__m128i const *__p)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_lddqu((char const *)p);
|
||||
return (__m128i)__builtin_ia32_lddqu((char const *)__p);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_addsub_ps(__m128 a, __m128 b)
|
||||
_mm_addsub_ps(__m128 __a, __m128 __b)
|
||||
{
|
||||
return __builtin_ia32_addsubps(a, b);
|
||||
return __builtin_ia32_addsubps(__a, __b);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_hadd_ps(__m128 a, __m128 b)
|
||||
_mm_hadd_ps(__m128 __a, __m128 __b)
|
||||
{
|
||||
return __builtin_ia32_haddps(a, b);
|
||||
return __builtin_ia32_haddps(__a, __b);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_hsub_ps(__m128 a, __m128 b)
|
||||
_mm_hsub_ps(__m128 __a, __m128 __b)
|
||||
{
|
||||
return __builtin_ia32_hsubps(a, b);
|
||||
return __builtin_ia32_hsubps(__a, __b);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_movehdup_ps(__m128 a)
|
||||
_mm_movehdup_ps(__m128 __a)
|
||||
{
|
||||
return __builtin_shufflevector(a, a, 1, 1, 3, 3);
|
||||
return __builtin_shufflevector(__a, __a, 1, 1, 3, 3);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_moveldup_ps(__m128 a)
|
||||
_mm_moveldup_ps(__m128 __a)
|
||||
{
|
||||
return __builtin_shufflevector(a, a, 0, 0, 2, 2);
|
||||
return __builtin_shufflevector(__a, __a, 0, 0, 2, 2);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_addsub_pd(__m128d a, __m128d b)
|
||||
_mm_addsub_pd(__m128d __a, __m128d __b)
|
||||
{
|
||||
return __builtin_ia32_addsubpd(a, b);
|
||||
return __builtin_ia32_addsubpd(__a, __b);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_hadd_pd(__m128d a, __m128d b)
|
||||
_mm_hadd_pd(__m128d __a, __m128d __b)
|
||||
{
|
||||
return __builtin_ia32_haddpd(a, b);
|
||||
return __builtin_ia32_haddpd(__a, __b);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_hsub_pd(__m128d a, __m128d b)
|
||||
_mm_hsub_pd(__m128d __a, __m128d __b)
|
||||
{
|
||||
return __builtin_ia32_hsubpd(a, b);
|
||||
return __builtin_ia32_hsubpd(__a, __b);
|
||||
}
|
||||
|
||||
#define _mm_loaddup_pd(dp) _mm_load1_pd(dp)
|
||||
|
||||
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_movedup_pd(__m128d a)
|
||||
_mm_movedup_pd(__m128d __a)
|
||||
{
|
||||
return __builtin_shufflevector(a, a, 0, 0);
|
||||
return __builtin_shufflevector(__a, __a, 0, 0);
|
||||
}
|
||||
|
||||
#define _MM_DENORMALS_ZERO_ON (0x0040)
|
||||
|
@ -101,15 +101,15 @@ _mm_movedup_pd(__m128d a)
|
|||
#define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x)))
|
||||
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_monitor(void const *p, unsigned extensions, unsigned hints)
|
||||
_mm_monitor(void const *__p, unsigned __extensions, unsigned __hints)
|
||||
{
|
||||
__builtin_ia32_monitor((void *)p, extensions, hints);
|
||||
__builtin_ia32_monitor((void *)__p, __extensions, __hints);
|
||||
}
|
||||
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_mwait(unsigned extensions, unsigned hints)
|
||||
_mm_mwait(unsigned __extensions, unsigned __hints)
|
||||
{
|
||||
__builtin_ia32_mwait(extensions, hints);
|
||||
__builtin_ia32_mwait(__extensions, __hints);
|
||||
}
|
||||
|
||||
#endif /* __SSE3__ */
|
||||
|
|
|
@ -195,10 +195,10 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
|
|||
/* SSE4 Insertion and Extraction from XMM Register Instructions. */
|
||||
#define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N))
|
||||
#define _mm_extract_ps(X, N) (__extension__ \
|
||||
({ union { int i; float f; } __t; \
|
||||
({ union { int __i; float __f; } __t; \
|
||||
__v4sf __a = (__v4sf)(X); \
|
||||
__t.f = __a[N]; \
|
||||
__t.i;}))
|
||||
__t.__f = __a[N]; \
|
||||
__t.__i;}))
|
||||
|
||||
/* Miscellaneous insert and extract macros. */
|
||||
/* Extract a single-precision float from X at index N into D. */
|
||||
|
|
|
@ -31,39 +31,39 @@
|
|||
#include <pmmintrin.h>
|
||||
|
||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_abs_pi8(__m64 a)
|
||||
_mm_abs_pi8(__m64 __a)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pabsb((__v8qi)a);
|
||||
return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_abs_epi8(__m128i a)
|
||||
_mm_abs_epi8(__m128i __a)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_pabsb128((__v16qi)a);
|
||||
return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_abs_pi16(__m64 a)
|
||||
_mm_abs_pi16(__m64 __a)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pabsw((__v4hi)a);
|
||||
return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_abs_epi16(__m128i a)
|
||||
_mm_abs_epi16(__m128i __a)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_pabsw128((__v8hi)a);
|
||||
return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_abs_pi32(__m64 a)
|
||||
_mm_abs_pi32(__m64 __a)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pabsd((__v2si)a);
|
||||
return (__m64)__builtin_ia32_pabsd((__v2si)__a);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_abs_epi32(__m128i a)
|
||||
_mm_abs_epi32(__m128i __a)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_pabsd128((__v4si)a);
|
||||
return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
|
||||
}
|
||||
|
||||
#define _mm_alignr_epi8(a, b, n) __extension__ ({ \
|
||||
|
@ -77,147 +77,147 @@ _mm_abs_epi32(__m128i a)
|
|||
(__m64)__builtin_ia32_palignr((__v8qi)__a, (__v8qi)__b, (n)); })
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_hadd_epi16(__m128i a, __m128i b)
|
||||
_mm_hadd_epi16(__m128i __a, __m128i __b)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_phaddw128((__v8hi)a, (__v8hi)b);
|
||||
return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_hadd_epi32(__m128i a, __m128i b)
|
||||
_mm_hadd_epi32(__m128i __a, __m128i __b)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_phaddd128((__v4si)a, (__v4si)b);
|
||||
return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_hadd_pi16(__m64 a, __m64 b)
|
||||
_mm_hadd_pi16(__m64 __a, __m64 __b)
|
||||
{
|
||||
return (__m64)__builtin_ia32_phaddw((__v4hi)a, (__v4hi)b);
|
||||
return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_hadd_pi32(__m64 a, __m64 b)
|
||||
_mm_hadd_pi32(__m64 __a, __m64 __b)
|
||||
{
|
||||
return (__m64)__builtin_ia32_phaddd((__v2si)a, (__v2si)b);
|
||||
return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_hadds_epi16(__m128i a, __m128i b)
|
||||
_mm_hadds_epi16(__m128i __a, __m128i __b)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_phaddsw128((__v8hi)a, (__v8hi)b);
|
||||
return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_hadds_pi16(__m64 a, __m64 b)
|
||||
_mm_hadds_pi16(__m64 __a, __m64 __b)
|
||||
{
|
||||
return (__m64)__builtin_ia32_phaddsw((__v4hi)a, (__v4hi)b);
|
||||
return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_hsub_epi16(__m128i a, __m128i b)
|
||||
_mm_hsub_epi16(__m128i __a, __m128i __b)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_phsubw128((__v8hi)a, (__v8hi)b);
|
||||
return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_hsub_epi32(__m128i a, __m128i b)
|
||||
_mm_hsub_epi32(__m128i __a, __m128i __b)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_phsubd128((__v4si)a, (__v4si)b);
|
||||
return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_hsub_pi16(__m64 a, __m64 b)
|
||||
_mm_hsub_pi16(__m64 __a, __m64 __b)
|
||||
{
|
||||
return (__m64)__builtin_ia32_phsubw((__v4hi)a, (__v4hi)b);
|
||||
return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_hsub_pi32(__m64 a, __m64 b)
|
||||
_mm_hsub_pi32(__m64 __a, __m64 __b)
|
||||
{
|
||||
return (__m64)__builtin_ia32_phsubd((__v2si)a, (__v2si)b);
|
||||
return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_hsubs_epi16(__m128i a, __m128i b)
|
||||
_mm_hsubs_epi16(__m128i __a, __m128i __b)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_phsubsw128((__v8hi)a, (__v8hi)b);
|
||||
return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_hsubs_pi16(__m64 a, __m64 b)
|
||||
_mm_hsubs_pi16(__m64 __a, __m64 __b)
|
||||
{
|
||||
return (__m64)__builtin_ia32_phsubsw((__v4hi)a, (__v4hi)b);
|
||||
return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_maddubs_epi16(__m128i a, __m128i b)
|
||||
_mm_maddubs_epi16(__m128i __a, __m128i __b)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)a, (__v16qi)b);
|
||||
return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_maddubs_pi16(__m64 a, __m64 b)
|
||||
_mm_maddubs_pi16(__m64 __a, __m64 __b)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pmaddubsw((__v8qi)a, (__v8qi)b);
|
||||
return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_mulhrs_epi16(__m128i a, __m128i b)
|
||||
_mm_mulhrs_epi16(__m128i __a, __m128i __b)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)a, (__v8hi)b);
|
||||
return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_mulhrs_pi16(__m64 a, __m64 b)
|
||||
_mm_mulhrs_pi16(__m64 __a, __m64 __b)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pmulhrsw((__v4hi)a, (__v4hi)b);
|
||||
return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_shuffle_epi8(__m128i a, __m128i b)
|
||||
_mm_shuffle_epi8(__m128i __a, __m128i __b)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_pshufb128((__v16qi)a, (__v16qi)b);
|
||||
return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_shuffle_pi8(__m64 a, __m64 b)
|
||||
_mm_shuffle_pi8(__m64 __a, __m64 __b)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pshufb((__v8qi)a, (__v8qi)b);
|
||||
return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_sign_epi8(__m128i a, __m128i b)
|
||||
_mm_sign_epi8(__m128i __a, __m128i __b)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_psignb128((__v16qi)a, (__v16qi)b);
|
||||
return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_sign_epi16(__m128i a, __m128i b)
|
||||
_mm_sign_epi16(__m128i __a, __m128i __b)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_psignw128((__v8hi)a, (__v8hi)b);
|
||||
return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_sign_epi32(__m128i a, __m128i b)
|
||||
_mm_sign_epi32(__m128i __a, __m128i __b)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_psignd128((__v4si)a, (__v4si)b);
|
||||
return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_sign_pi8(__m64 a, __m64 b)
|
||||
_mm_sign_pi8(__m64 __a, __m64 __b)
|
||||
{
|
||||
return (__m64)__builtin_ia32_psignb((__v8qi)a, (__v8qi)b);
|
||||
return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_sign_pi16(__m64 a, __m64 b)
|
||||
_mm_sign_pi16(__m64 __a, __m64 __b)
|
||||
{
|
||||
return (__m64)__builtin_ia32_psignw((__v4hi)a, (__v4hi)b);
|
||||
return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_sign_pi32(__m64 a, __m64 b)
|
||||
_mm_sign_pi32(__m64 __a, __m64 __b)
|
||||
{
|
||||
return (__m64)__builtin_ia32_psignd((__v2si)a, (__v2si)b);
|
||||
return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
|
||||
}
|
||||
|
||||
#endif /* __SSSE3__ */
|
||||
|
|
|
@ -105,15 +105,15 @@ typedef enum {
|
|||
_UVRSR_FAILED = 2
|
||||
} _Unwind_VRS_Result;
|
||||
|
||||
_Unwind_VRS_Result _Unwind_VRS_Get(struct _Unwind_Context *context,
|
||||
_Unwind_VRS_RegClass regclass,
|
||||
uint32_t regno,
|
||||
_Unwind_VRS_DataRepresentation representation,
|
||||
void *valuep);
|
||||
_Unwind_VRS_Result _Unwind_VRS_Get(struct _Unwind_Context *__context,
|
||||
_Unwind_VRS_RegClass __regclass,
|
||||
uint32_t __regno,
|
||||
_Unwind_VRS_DataRepresentation __representation,
|
||||
void *__valuep);
|
||||
|
||||
#else
|
||||
|
||||
uintptr_t _Unwind_GetIP(struct _Unwind_Context* context);
|
||||
uintptr_t _Unwind_GetIP(struct _Unwind_Context* __context);
|
||||
|
||||
#endif
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue