Commit Graph

4 Commits

Author SHA1 Message Date
Chris Lattner a7d81ab7f3 X86-64:
pass/return structs of float/int as float/i32 instead of double/i64
to make the code generated for ABI cleaner.  Passing in the low part
of a double is the same as passing in a float.

For example, we now compile:

struct DeclGroup { float NumDecls; };
float foo(DeclGroup D);
void bar(DeclGroup *D) {
 foo(*D);
}

into:

%struct.DeclGroup = type { float }

define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind {
entry:
  %D.addr = alloca %struct.DeclGroup*, align 8    ; <%struct.DeclGroup**> [#uses=2]
  %agg.tmp = alloca %struct.DeclGroup, align 4    ; <%struct.DeclGroup*> [#uses=2]
  store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
  %tmp = load %struct.DeclGroup** %D.addr         ; <%struct.DeclGroup*> [#uses=1]
  %tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
  %tmp2 = bitcast %struct.DeclGroup* %tmp to i8*  ; <i8*> [#uses=1]
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
  %coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <float*> [#uses=1]
  %0 = load float* %coerce.dive, align 1          ; <float> [#uses=1]
  %call = call float @_Z3foo9DeclGroup(float %0)  ; <float> [#uses=0]
  ret void
}

instead of:

%struct.DeclGroup = type { float }

define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind {
entry:
  %D.addr = alloca %struct.DeclGroup*, align 8    ; <%struct.DeclGroup**> [#uses=2]
  %agg.tmp = alloca %struct.DeclGroup, align 4    ; <%struct.DeclGroup*> [#uses=2]
  %tmp3 = alloca double                           ; <double*> [#uses=2]
  store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
  %tmp = load %struct.DeclGroup** %D.addr         ; <%struct.DeclGroup*> [#uses=1]
  %tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
  %tmp2 = bitcast %struct.DeclGroup* %tmp to i8*  ; <i8*> [#uses=1]
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
  %coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <float*> [#uses=1]
  %0 = bitcast double* %tmp3 to float*            ; <float*> [#uses=1]
  %1 = load float* %coerce.dive                   ; <float> [#uses=1]
  store float %1, float* %0, align 1
  %2 = load double* %tmp3                         ; <double> [#uses=1]
  %call = call float @_Z3foo9DeclGroup(double %2) ; <float> [#uses=0]
  ret void
}

which is this machine code (at -O0):

__Z3barP9DeclGroup:
	subq	$24, %rsp
	movq	%rdi, 16(%rsp)
	movq	16(%rsp), %rdi
	leaq	8(%rsp), %rax
	movl	(%rdi), %ecx
	movl	%ecx, (%rax)
	movss	8(%rsp), %xmm0
	callq	__Z3foo9DeclGroup
	addq	$24, %rsp
	ret

vs this:

__Z3barP9DeclGroup:
	subq	$24, %rsp
	movq	%rdi, 16(%rsp)
	movq	16(%rsp), %rdi
	leaq	8(%rsp), %rax
	movl	(%rdi), %ecx
	movl	%ecx, (%rax)
	movss	8(%rsp), %xmm0
	movss	%xmm0, (%rsp)
	movsd	(%rsp), %xmm0
	callq	__Z3foo9DeclGroup
	addq	$24, %rsp
	ret

At -O3, it is the difference between this now:

__Z3barP9DeclGroup:
	movss	(%rdi), %xmm0
	jmp	__Z3foo9DeclGroup  # TAILCALL

vs this before:

__Z3barP9DeclGroup:
	movl	(%rdi), %eax
	movd	%rax, %xmm0
	jmp	__Z3foo9DeclGroup  # TAILCALL

llvm-svn: 107048
2010-06-28 19:56:59 +00:00
Daniel Dunbar fd09df7839 IRgen: Switch 'retval' to use CreateIRTemp.
llvm-svn: 96376
2010-02-16 19:45:20 +00:00
Daniel Dunbar 27bacafb71 IRgen: Switch EmitCompoundLiteralLValue to use CreateMemTemp.
llvm-svn: 96373
2010-02-16 19:43:39 +00:00
Daniel Dunbar a7566f163a IRgen: Add CreateMemTemp, for creating an temporary memory object for a particular type, and flood fill. - CreateMemTemp sets the alignment on the alloca correctly, which fixes a great many places in IRgen where we were doing the wrong thing.
- This fixes many many more places than the test case, but my feeling is we need to audit alignment systematically so I'm not inclined to try hard to test the individual fixes in this patch. If this bothers you, patches welcome!

PR6240.

llvm-svn: 95648
2010-02-09 02:48:28 +00:00