forked from OSchip/llvm-project
[X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer
According to the gcc headers, intel intrinsics docs and msdn codegen the _mm_store1_pd (and its _mm_store_pd1 equivalent) should use an aligned pointer - the clang headers are the only implementation I can find that assume non-aligned stores (by storing with _mm_storeu_pd). Additionally, according to the intel intrinsics docs and msdn codegen the _mm_store1_ps (_mm_store_ps1) requires a similarly aligned pointer. This patch raises the alignment requirements to match the other implementations by calling _mm_store_ps/_mm_store_pd instead. I've also added the missing _mm_store_pd1 intrinsic (which maps to _mm_store1_pd like _mm_store_ps1 does to _mm_store1_ps). As a followup I'll update the llvm fast-isel tests to match this codegen. Differential Revision: http://reviews.llvm.org/D20617 llvm-svn: 271218
This commit is contained in:
parent
720f8da33a
commit
645e1ad33a
|
@ -588,19 +588,22 @@ _mm_store_sd(double *__dp, __m128d __a)
|
|||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm_store1_pd(double *__dp, __m128d __a)
|
||||
_mm_store_pd(double *__dp, __m128d __a)
|
||||
{
|
||||
struct __mm_store1_pd_struct {
|
||||
double __u[2];
|
||||
} __attribute__((__packed__, __may_alias__));
|
||||
((struct __mm_store1_pd_struct*)__dp)->__u[0] = __a[0];
|
||||
((struct __mm_store1_pd_struct*)__dp)->__u[1] = __a[0];
|
||||
*(__m128d*)__dp = __a;
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm_store_pd(double *__dp, __m128d __a)
|
||||
_mm_store1_pd(double *__dp, __m128d __a)
|
||||
{
|
||||
*(__m128d *)__dp = __a;
|
||||
__a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
|
||||
_mm_store_pd(__dp, __a);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm_store_pd1(double *__dp, __m128d __a)
|
||||
{
|
||||
return _mm_store1_pd(__dp, __a);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
|
|
|
@ -1592,23 +1592,23 @@ _mm_storeu_ps(float *__p, __m128 __a)
|
|||
((struct __storeu_ps*)__p)->__v = __a;
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm_store_ps(float *__p, __m128 __a)
|
||||
{
|
||||
*(__m128*)__p = __a;
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm_store1_ps(float *__p, __m128 __a)
|
||||
{
|
||||
__a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0);
|
||||
_mm_storeu_ps(__p, __a);
|
||||
_mm_store_ps(__p, __a);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm_store_ps1(float *__p, __m128 __a)
|
||||
{
|
||||
return _mm_store1_ps(__p, __a);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm_store_ps(float *__p, __m128 __a)
|
||||
{
|
||||
*(__m128 *)__p = __a;
|
||||
return _mm_store1_ps(__p, __a);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
|
|
|
@ -1205,6 +1205,13 @@ void test_mm_store_pd(double* A, __m128d B) {
|
|||
_mm_store_pd(A, B);
|
||||
}
|
||||
|
||||
void test_mm_store_pd1(double* x, __m128d y) {
|
||||
// CHECK-LABEL: test_mm_store_pd1
|
||||
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
|
||||
// CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16
|
||||
_mm_store_pd1(x, y);
|
||||
}
|
||||
|
||||
void test_mm_store_sd(double* A, __m128d B) {
|
||||
// CHECK-LABEL: test_mm_store_sd
|
||||
// CHECK: extractelement <2 x double> %{{.*}}, i32 0
|
||||
|
@ -1220,9 +1227,8 @@ void test_mm_store_si128(__m128i* A, __m128i B) {
|
|||
|
||||
void test_mm_store1_pd(double* x, __m128d y) {
|
||||
// CHECK-LABEL: test_mm_store1_pd
|
||||
// CHECK: extractelement <2 x double> %{{.*}}, i32 0
|
||||
// CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
|
||||
// CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
|
||||
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
|
||||
// CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16
|
||||
_mm_store1_pd(x, y);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue