forked from OSchip/llvm-project
[ARM] Fix Neon vector type alignment to 64-bit
The maximum alignment for ARM NEON data types should be 64-bits as specified in ARM procedure call standard document Sec. A.2 Notes. This patch fixes it from its current larger natural default values, except for Android (so as not to break existing ABI). Reviewed by: Stephen Hines, Renato Golin. Differential Revision: https://reviews.llvm.org/D33205 llvm-svn: 304201
This commit is contained in:
parent
6ceea9a4d3
commit
3d92d7ab36
|
@ -5382,6 +5382,11 @@ public:
|
||||||
// ARM has atomics up to 8 bytes
|
// ARM has atomics up to 8 bytes
|
||||||
setAtomic();
|
setAtomic();
|
||||||
|
|
||||||
|
if (Triple.getEnvironment() == llvm::Triple::Android)
|
||||||
|
MaxVectorAlign = 128; // don't break existing Android ABI
|
||||||
|
else
|
||||||
|
MaxVectorAlign = 64; // AAPCS
|
||||||
|
|
||||||
// Do force alignment of members that follow zero length bitfields. If
|
// Do force alignment of members that follow zero length bitfields. If
|
||||||
// the alignment of the zero-length bitfield is greater than the member
|
// the alignment of the zero-length bitfield is greater than the member
|
||||||
// that follows it, `bar', `bar' will be aligned as the type of the
|
// that follows it, `bar', `bar' will be aligned as the type of the
|
||||||
|
|
|
@ -133,20 +133,20 @@ double test_5c(__char5 *in) {
|
||||||
|
|
||||||
double varargs_vec_9c(int fixed, ...) {
|
double varargs_vec_9c(int fixed, ...) {
|
||||||
// CHECK: varargs_vec_9c
|
// CHECK: varargs_vec_9c
|
||||||
// CHECK: [[VAR:%.*]] = alloca <9 x i8>, align 16
|
// CHECK: [[VAR:%.*]] = alloca <9 x i8>, align 8
|
||||||
// CHECK: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
|
// CHECK: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
|
||||||
// CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
|
// CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
|
||||||
// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 16
|
// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 16
|
||||||
// CHECK: [[AP_CAST:%.*]] = bitcast i8* [[AP_ALIGN]] to <9 x i8>*
|
// CHECK: [[AP_CAST:%.*]] = bitcast i8* [[AP_ALIGN]] to <9 x i8>*
|
||||||
// CHECK: [[T0:%.*]] = load <9 x i8>, <9 x i8>* [[AP_CAST]], align 8
|
// CHECK: [[T0:%.*]] = load <9 x i8>, <9 x i8>* [[AP_CAST]], align 8
|
||||||
// CHECK: store <9 x i8> [[T0]], <9 x i8>* [[VAR]], align 16
|
// CHECK: store <9 x i8> [[T0]], <9 x i8>* [[VAR]], align 8
|
||||||
// APCS-GNU: varargs_vec_9c
|
// APCS-GNU: varargs_vec_9c
|
||||||
// APCS-GNU: [[VAR:%.*]] = alloca <9 x i8>, align 16
|
// APCS-GNU: [[VAR:%.*]] = alloca <9 x i8>, align 8
|
||||||
// APCS-GNU: [[AP:%.*]] = load i8*,
|
// APCS-GNU: [[AP:%.*]] = load i8*,
|
||||||
// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP]], i32 16
|
// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP]], i32 16
|
||||||
// APCS-GNU: [[AP_CAST:%.*]] = bitcast i8* [[AP]] to <9 x i8>*
|
// APCS-GNU: [[AP_CAST:%.*]] = bitcast i8* [[AP]] to <9 x i8>*
|
||||||
// APCS-GNU: [[VEC:%.*]] = load <9 x i8>, <9 x i8>* [[AP_CAST]], align 4
|
// APCS-GNU: [[VEC:%.*]] = load <9 x i8>, <9 x i8>* [[AP_CAST]], align 4
|
||||||
// APCS-GNU: store <9 x i8> [[VEC]], <9 x i8>* [[VAR]], align 16
|
// APCS-GNU: store <9 x i8> [[VEC]], <9 x i8>* [[VAR]], align 8
|
||||||
// ANDROID: varargs_vec_9c
|
// ANDROID: varargs_vec_9c
|
||||||
// ANDROID: [[VAR:%.*]] = alloca <9 x i8>, align 16
|
// ANDROID: [[VAR:%.*]] = alloca <9 x i8>, align 16
|
||||||
// ANDROID: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
|
// ANDROID: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
|
||||||
|
@ -246,15 +246,15 @@ double test_3s(__short3 *in) {
|
||||||
|
|
||||||
double varargs_vec_5s(int fixed, ...) {
|
double varargs_vec_5s(int fixed, ...) {
|
||||||
// CHECK: varargs_vec_5s
|
// CHECK: varargs_vec_5s
|
||||||
// CHECK: [[VAR_ALIGN:%.*]] = alloca <5 x i16>, align 16
|
// CHECK: [[VAR_ALIGN:%.*]] = alloca <5 x i16>, align 8
|
||||||
// CHECK: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
|
// CHECK: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
|
||||||
// CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
|
// CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
|
||||||
// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 16
|
// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 16
|
||||||
// CHECK: [[AP_CAST:%.*]] = bitcast i8* [[AP_ALIGN]] to <5 x i16>*
|
// CHECK: [[AP_CAST:%.*]] = bitcast i8* [[AP_ALIGN]] to <5 x i16>*
|
||||||
// CHECK: [[VEC:%.*]] = load <5 x i16>, <5 x i16>* [[AP_CAST]], align 8
|
// CHECK: [[VEC:%.*]] = load <5 x i16>, <5 x i16>* [[AP_CAST]], align 8
|
||||||
// CHECK: store <5 x i16> [[VEC]], <5 x i16>* [[VAR_ALIGN]], align 16
|
// CHECK: store <5 x i16> [[VEC]], <5 x i16>* [[VAR_ALIGN]], align 8
|
||||||
// APCS-GNU: varargs_vec_5s
|
// APCS-GNU: varargs_vec_5s
|
||||||
// APCS-GNU: [[VAR:%.*]] = alloca <5 x i16>, align 16
|
// APCS-GNU: [[VAR:%.*]] = alloca <5 x i16>, align 8
|
||||||
// APCS-GNU: [[AP:%.*]] = load i8*,
|
// APCS-GNU: [[AP:%.*]] = load i8*,
|
||||||
// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP]], i32 16
|
// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP]], i32 16
|
||||||
// APCS-GNU: [[AP_CAST:%.*]] = bitcast i8* [[AP]] to <5 x i16>*
|
// APCS-GNU: [[AP_CAST:%.*]] = bitcast i8* [[AP]] to <5 x i16>*
|
||||||
|
|
|
@ -32,3 +32,11 @@ void t2(uint64_t *src1, uint8_t *src2, uint64x2_t *dst) {
|
||||||
*dst = q;
|
*dst = q;
|
||||||
// CHECK: store <2 x i64>
|
// CHECK: store <2 x i64>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Neon types have 64-bit alignment
|
||||||
|
int32x4_t gl_b;
|
||||||
|
void t3(int32x4_t *src) {
|
||||||
|
// CHECK: @t3
|
||||||
|
gl_b = *src;
|
||||||
|
// CHECK: store <4 x i32> {{%.*}}, <4 x i32>* @gl_b, align 8
|
||||||
|
}
|
||||||
|
|
|
@ -343,7 +343,7 @@ typedef union {
|
||||||
} union_hom_fp_partial;
|
} union_hom_fp_partial;
|
||||||
TEST(union_hom_fp_partial)
|
TEST(union_hom_fp_partial)
|
||||||
// CHECK-LABEL: define void @test_union_hom_fp_partial()
|
// CHECK-LABEL: define void @test_union_hom_fp_partial()
|
||||||
// CHECK: [[TMP:%.*]] = alloca [[REC:%.*]], align 16
|
// CHECK: [[TMP:%.*]] = alloca [[REC:%.*]], align 8
|
||||||
// CHECK: [[CALL:%.*]] = call [[SWIFTCC]] [[UAGG:{ float, float, float, float }]] @return_union_hom_fp_partial()
|
// CHECK: [[CALL:%.*]] = call [[SWIFTCC]] [[UAGG:{ float, float, float, float }]] @return_union_hom_fp_partial()
|
||||||
// CHECK: [[CAST_TMP:%.*]] = bitcast [[REC]]* [[TMP]] to [[AGG:{ float, float, float, float }]]*
|
// CHECK: [[CAST_TMP:%.*]] = bitcast [[REC]]* [[TMP]] to [[AGG:{ float, float, float, float }]]*
|
||||||
// CHECK: [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
|
// CHECK: [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
|
||||||
|
@ -376,7 +376,7 @@ typedef union {
|
||||||
} union_het_fpv_partial;
|
} union_het_fpv_partial;
|
||||||
TEST(union_het_fpv_partial)
|
TEST(union_het_fpv_partial)
|
||||||
// CHECK-LABEL: define void @test_union_het_fpv_partial()
|
// CHECK-LABEL: define void @test_union_het_fpv_partial()
|
||||||
// CHECK: [[TMP:%.*]] = alloca [[REC:%.*]], align 16
|
// CHECK: [[TMP:%.*]] = alloca [[REC:%.*]], align 8
|
||||||
// CHECK: [[CALL:%.*]] = call [[SWIFTCC]] [[UAGG:{ i32, i32, float, float }]] @return_union_het_fpv_partial()
|
// CHECK: [[CALL:%.*]] = call [[SWIFTCC]] [[UAGG:{ i32, i32, float, float }]] @return_union_het_fpv_partial()
|
||||||
// CHECK: [[CAST_TMP:%.*]] = bitcast [[REC]]* [[TMP]] to [[AGG:{ i32, i32, float, float }]]*
|
// CHECK: [[CAST_TMP:%.*]] = bitcast [[REC]]* [[TMP]] to [[AGG:{ i32, i32, float, float }]]*
|
||||||
// CHECK: [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
|
// CHECK: [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
|
||||||
|
@ -413,7 +413,7 @@ TEST(int4)
|
||||||
|
|
||||||
TEST(int8)
|
TEST(int8)
|
||||||
// CHECK-LABEL: define {{.*}} @return_int8()
|
// CHECK-LABEL: define {{.*}} @return_int8()
|
||||||
// CHECK: [[RET:%.*]] = alloca [[REC:<8 x i32>]], align 32
|
// CHECK: [[RET:%.*]] = alloca [[REC:<8 x i32>]], align 8
|
||||||
// CHECK: [[VAR:%.*]] = alloca [[REC]], align
|
// CHECK: [[VAR:%.*]] = alloca [[REC]], align
|
||||||
// CHECK: store
|
// CHECK: store
|
||||||
// CHECK: load
|
// CHECK: load
|
||||||
|
@ -457,7 +457,7 @@ TEST(int8)
|
||||||
|
|
||||||
TEST(int5)
|
TEST(int5)
|
||||||
// CHECK-LABEL: define {{.*}} @return_int5()
|
// CHECK-LABEL: define {{.*}} @return_int5()
|
||||||
// CHECK: [[RET:%.*]] = alloca [[REC:<5 x i32>]], align 32
|
// CHECK: [[RET:%.*]] = alloca [[REC:<5 x i32>]], align 8
|
||||||
// CHECK: [[VAR:%.*]] = alloca [[REC]], align
|
// CHECK: [[VAR:%.*]] = alloca [[REC]], align
|
||||||
// CHECK: store
|
// CHECK: store
|
||||||
// CHECK: load
|
// CHECK: load
|
||||||
|
|
|
@ -83,11 +83,11 @@ typedef struct {
|
||||||
OddlySizedStruct return_oddly_sized_struct() {}
|
OddlySizedStruct return_oddly_sized_struct() {}
|
||||||
|
|
||||||
// CHECK: define <4 x float> @test_va_arg_vec(i8* %l)
|
// CHECK: define <4 x float> @test_va_arg_vec(i8* %l)
|
||||||
// CHECK: [[ALIGN_TMP:%.*]] = add i32 {{%.*}}, 15
|
// CHECK: [[ALIGN_TMP:%.*]] = add i32 {{%.*}}, 7
|
||||||
// CHECK: [[ALIGNED:%.*]] = and i32 [[ALIGN_TMP]], -16
|
// CHECK: [[ALIGNED:%.*]] = and i32 [[ALIGN_TMP]], -8
|
||||||
// CHECK: [[ALIGNED_I8:%.*]] = inttoptr i32 [[ALIGNED]] to i8*
|
// CHECK: [[ALIGNED_I8:%.*]] = inttoptr i32 [[ALIGNED]] to i8*
|
||||||
// CHECK: [[ALIGNED_VEC:%.*]] = bitcast i8* [[ALIGNED_I8]] to <4 x float>
|
// CHECK: [[ALIGNED_VEC:%.*]] = bitcast i8* [[ALIGNED_I8]] to <4 x float>
|
||||||
// CHECK: load <4 x float>, <4 x float>* [[ALIGNED_VEC]], align 16
|
// CHECK: load <4 x float>, <4 x float>* [[ALIGNED_VEC]], align 8
|
||||||
float32x4_t test_va_arg_vec(__builtin_va_list l) {
|
float32x4_t test_va_arg_vec(__builtin_va_list l) {
|
||||||
return __builtin_va_arg(l, float32x4_t);
|
return __builtin_va_arg(l, float32x4_t);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue