Chris Lattner
a7d81ab7f3
X86-64:
...
pass/return structs of float/int as float/i32 instead of double/i64
to make the code generated for ABI cleaner. Passing in the low part
of a double is the same as passing in a float.
For example, we now compile:
struct DeclGroup { float NumDecls; };
float foo(DeclGroup D);
void bar(DeclGroup *D) {
foo(*D);
}
into:
%struct.DeclGroup = type { float }
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <float*> [#uses=1]
%0 = load float* %coerce.dive, align 1 ; <float> [#uses=1]
%call = call float @_Z3foo9DeclGroup(float %0) ; <float> [#uses=0]
ret void
}
instead of:
%struct.DeclGroup = type { float }
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca double ; <double*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <float*> [#uses=1]
%0 = bitcast double* %tmp3 to float* ; <float*> [#uses=1]
%1 = load float* %coerce.dive ; <float> [#uses=1]
store float %1, float* %0, align 1
%2 = load double* %tmp3 ; <double> [#uses=1]
%call = call float @_Z3foo9DeclGroup(double %2) ; <float> [#uses=0]
ret void
}
which is this machine code (at -O0):
__Z3barP9DeclGroup:
subq $24, %rsp
movq %rdi, 16(%rsp)
movq 16(%rsp), %rdi
leaq 8(%rsp), %rax
movl (%rdi), %ecx
movl %ecx, (%rax)
movss 8(%rsp), %xmm0
callq __Z3foo9DeclGroup
addq $24, %rsp
ret
vs this:
__Z3barP9DeclGroup:
subq $24, %rsp
movq %rdi, 16(%rsp)
movq 16(%rsp), %rdi
leaq 8(%rsp), %rax
movl (%rdi), %ecx
movl %ecx, (%rax)
movss 8(%rsp), %xmm0
movss %xmm0, (%rsp)
movsd (%rsp), %xmm0
callq __Z3foo9DeclGroup
addq $24, %rsp
ret
At -O3, it is the difference between this now:
__Z3barP9DeclGroup:
movss (%rdi), %xmm0
jmp __Z3foo9DeclGroup # TAILCALL
vs this before:
__Z3barP9DeclGroup:
movl (%rdi), %eax
movd %rax, %xmm0
jmp __Z3foo9DeclGroup # TAILCALL
llvm-svn: 107048
2010-06-28 19:56:59 +00:00