llvm-project/clang/test/CodeGenCXX/alloca-align.cpp

29 lines
614 B
C++
Raw Normal View History

// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck %s
struct s0 {
int Start, End;
unsigned Alignment;
int TheStores __attribute__((aligned(16)));
};
// CHECK: define void @f0
// CHECK: alloca %struct.s0, align 16
extern "C" void f0() {
(void) s0();
}
// CHECK: define void @f1
// CHECK: alloca %struct.s0, align 16
extern "C" void f1() {
(void) (struct s0) { 0, 0, 0, 0 };
}
X86-64: pass/return structs of float/int as float/i32 instead of double/i64 to make the code generated for ABI cleaner. Passing in the low part of a double is the same as passing in a float. For example, we now compile: struct DeclGroup { float NumDecls; }; float foo(DeclGroup D); void bar(DeclGroup *D) { foo(*D); } into: %struct.DeclGroup = type { float } define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind { entry: %D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2] %agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2] store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr %tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1] %tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1] %tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1] call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false) %coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <float*> [#uses=1] %0 = load float* %coerce.dive, align 1 ; <float> [#uses=1] %call = call float @_Z3foo9DeclGroup(float %0) ; <float> [#uses=0] ret void } instead of: %struct.DeclGroup = type { float } define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind { entry: %D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2] %agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2] %tmp3 = alloca double ; <double*> [#uses=2] store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr %tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1] %tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1] %tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1] call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false) %coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <float*> [#uses=1] %0 = bitcast double* %tmp3 to float* ; <float*> [#uses=1] %1 = load float* %coerce.dive ; <float> [#uses=1] store float %1, float* %0, align 1 %2 = load double* %tmp3 ; <double> [#uses=1] %call = call float @_Z3foo9DeclGroup(double %2) ; <float> [#uses=0] ret void } which is this machine code (at -O0): __Z3barP9DeclGroup: subq $24, %rsp movq %rdi, 16(%rsp) movq 16(%rsp), %rdi leaq 8(%rsp), %rax movl (%rdi), %ecx movl %ecx, (%rax) movss 8(%rsp), %xmm0 callq __Z3foo9DeclGroup addq $24, %rsp ret vs this: __Z3barP9DeclGroup: subq $24, %rsp movq %rdi, 16(%rsp) movq 16(%rsp), %rdi leaq 8(%rsp), %rax movl (%rdi), %ecx movl %ecx, (%rax) movss 8(%rsp), %xmm0 movss %xmm0, (%rsp) movsd (%rsp), %xmm0 callq __Z3foo9DeclGroup addq $24, %rsp ret At -O3, it is the difference between this now: __Z3barP9DeclGroup: movss (%rdi), %xmm0 jmp __Z3foo9DeclGroup # TAILCALL vs this before: __Z3barP9DeclGroup: movl (%rdi), %eax movd %rax, %xmm0 jmp __Z3foo9DeclGroup # TAILCALL llvm-svn: 107048
2010-06-29 03:56:59 +08:00
// CHECK: define i32 @f2
// CHECK: alloca %struct.s1, align 2
struct s1 { short x; short y; };
extern "C" struct s1 f2(int a, struct s1 *x, struct s1 *y) {
if (a)
return *x;
return *y;
}