2010-02-09 10:48:28 +08:00
|
|
|
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck %s
|
|
|
|
|
2010-02-17 03:43:39 +08:00
|
|
|
struct s0 {
|
2010-02-09 10:48:28 +08:00
|
|
|
int Start, End;
|
|
|
|
unsigned Alignment;
|
|
|
|
int TheStores __attribute__((aligned(16)));
|
|
|
|
};
|
2010-02-17 03:45:20 +08:00
|
|
|
|
|
|
|
// CHECK: define void @f0
|
|
|
|
// CHECK: alloca %struct.s0, align 16
|
2010-02-17 03:43:39 +08:00
|
|
|
extern "C" void f0() {
|
|
|
|
(void) s0();
|
|
|
|
}
|
|
|
|
|
2010-02-17 03:45:20 +08:00
|
|
|
// CHECK: define void @f1
|
|
|
|
// CHECK: alloca %struct.s0, align 16
|
2010-02-17 03:43:39 +08:00
|
|
|
extern "C" void f1() {
|
2010-02-17 03:45:20 +08:00
|
|
|
(void) (struct s0) { 0, 0, 0, 0 };
|
|
|
|
}
|
|
|
|
|
X86-64:
pass/return structs of float/int as float/i32 instead of double/i64
to make the code generated for ABI cleaner. Passing in the low part
of a double is the same as passing in a float.
For example, we now compile:
struct DeclGroup { float NumDecls; };
float foo(DeclGroup D);
void bar(DeclGroup *D) {
foo(*D);
}
into:
%struct.DeclGroup = type { float }
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <float*> [#uses=1]
%0 = load float* %coerce.dive, align 1 ; <float> [#uses=1]
%call = call float @_Z3foo9DeclGroup(float %0) ; <float> [#uses=0]
ret void
}
instead of:
%struct.DeclGroup = type { float }
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca double ; <double*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <float*> [#uses=1]
%0 = bitcast double* %tmp3 to float* ; <float*> [#uses=1]
%1 = load float* %coerce.dive ; <float> [#uses=1]
store float %1, float* %0, align 1
%2 = load double* %tmp3 ; <double> [#uses=1]
%call = call float @_Z3foo9DeclGroup(double %2) ; <float> [#uses=0]
ret void
}
which is this machine code (at -O0):
__Z3barP9DeclGroup:
subq $24, %rsp
movq %rdi, 16(%rsp)
movq 16(%rsp), %rdi
leaq 8(%rsp), %rax
movl (%rdi), %ecx
movl %ecx, (%rax)
movss 8(%rsp), %xmm0
callq __Z3foo9DeclGroup
addq $24, %rsp
ret
vs this:
__Z3barP9DeclGroup:
subq $24, %rsp
movq %rdi, 16(%rsp)
movq 16(%rsp), %rdi
leaq 8(%rsp), %rax
movl (%rdi), %ecx
movl %ecx, (%rax)
movss 8(%rsp), %xmm0
movss %xmm0, (%rsp)
movsd (%rsp), %xmm0
callq __Z3foo9DeclGroup
addq $24, %rsp
ret
At -O3, it is the difference between this now:
__Z3barP9DeclGroup:
movss (%rdi), %xmm0
jmp __Z3foo9DeclGroup # TAILCALL
vs this before:
__Z3barP9DeclGroup:
movl (%rdi), %eax
movd %rax, %xmm0
jmp __Z3foo9DeclGroup # TAILCALL
llvm-svn: 107048
2010-06-29 03:56:59 +08:00
|
|
|
// CHECK: define i32 @f2
|
2010-02-17 03:45:20 +08:00
|
|
|
// CHECK: alloca %struct.s1, align 2
|
|
|
|
struct s1 { short x; short y; };
|
|
|
|
extern "C" struct s1 f2(int a, struct s1 *x, struct s1 *y) {
|
|
|
|
if (a)
|
|
|
|
return *x;
|
|
|
|
return *y;
|
2010-02-09 10:48:28 +08:00
|
|
|
}
|