2012-07-20 02:53:21 +08:00
|
|
|
; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin -mcpu=generic | FileCheck %s
|
|
|
|
; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin -mcpu=atom | FileCheck -check-prefix=ATOM %s
|
2008-09-04 14:18:33 +08:00
|
|
|
|
|
|
|
@src = external global i32
|
|
|
|
|
implement rdar://6653118 - fastisel should fold loads where possible.
Since mem2reg isn't run at -O0, we get a ton of reloads from the stack,
for example, before, this code:
int foo(int x, int y, int z) {
return x+y+z;
}
used to compile into:
_foo: ## @foo
subq $12, %rsp
movl %edi, 8(%rsp)
movl %esi, 4(%rsp)
movl %edx, (%rsp)
movl 8(%rsp), %edx
movl 4(%rsp), %esi
addl %edx, %esi
movl (%rsp), %edx
addl %esi, %edx
movl %edx, %eax
addq $12, %rsp
ret
Now we produce:
_foo: ## @foo
subq $12, %rsp
movl %edi, 8(%rsp)
movl %esi, 4(%rsp)
movl %edx, (%rsp)
movl 8(%rsp), %edx
addl 4(%rsp), %edx ## Folded load
addl (%rsp), %edx ## Folded load
movl %edx, %eax
addq $12, %rsp
ret
Fewer instructions and less register use = faster compiles.
llvm-svn: 113102
2010-09-05 10:18:34 +08:00
|
|
|
; rdar://6653118
|
2008-09-04 14:18:33 +08:00
|
|
|
define i32 @loadgv() nounwind {
|
|
|
|
entry:
|
|
|
|
%0 = load i32* @src, align 4
|
|
|
|
%1 = load i32* @src, align 4
|
|
|
|
%2 = add i32 %0, %1
|
2008-09-09 09:26:59 +08:00
|
|
|
store i32 %2, i32* @src
|
2008-09-04 14:18:33 +08:00
|
|
|
ret i32 %2
|
implement rdar://6653118 - fastisel should fold loads where possible.
Since mem2reg isn't run at -O0, we get a ton of reloads from the stack,
for example, before, this code:
int foo(int x, int y, int z) {
return x+y+z;
}
used to compile into:
_foo: ## @foo
subq $12, %rsp
movl %edi, 8(%rsp)
movl %esi, 4(%rsp)
movl %edx, (%rsp)
movl 8(%rsp), %edx
movl 4(%rsp), %esi
addl %edx, %esi
movl (%rsp), %edx
addl %esi, %edx
movl %edx, %eax
addq $12, %rsp
ret
Now we produce:
_foo: ## @foo
subq $12, %rsp
movl %edi, 8(%rsp)
movl %esi, 4(%rsp)
movl %edx, (%rsp)
movl 8(%rsp), %edx
addl 4(%rsp), %edx ## Folded load
addl (%rsp), %edx ## Folded load
movl %edx, %eax
addq $12, %rsp
ret
Fewer instructions and less register use = faster compiles.
llvm-svn: 113102
2010-09-05 10:18:34 +08:00
|
|
|
; This should fold one of the loads into the add.
|
|
|
|
; CHECK: loadgv:
|
|
|
|
; CHECK: movl L_src$non_lazy_ptr, %ecx
|
|
|
|
; CHECK: movl (%ecx), %eax
|
|
|
|
; CHECK: addl (%ecx), %eax
|
|
|
|
; CHECK: movl %eax, (%ecx)
|
|
|
|
; CHECK: ret
|
|
|
|
|
2012-07-20 02:53:21 +08:00
|
|
|
; ATOM: loadgv:
|
|
|
|
; ATOM: movl L_src$non_lazy_ptr, %ecx
|
|
|
|
; ATOM: movl (%ecx), %eax
|
|
|
|
; ATOM: addl (%ecx), %eax
|
|
|
|
; ATOM: movl %eax, (%ecx)
|
|
|
|
; ATOM: ret
|
|
|
|
|
2008-09-04 14:18:33 +08:00
|
|
|
}
|
2008-09-09 09:26:59 +08:00
|
|
|
|
|
|
|
%stuff = type { i32 (...)** }
|
|
|
|
@LotsStuff = external constant [4 x i32 (...)*]
|
|
|
|
|
|
|
|
define void @t(%stuff* %this) nounwind {
|
|
|
|
entry:
|
|
|
|
store i32 (...)** getelementptr ([4 x i32 (...)*]* @LotsStuff, i32 0, i32 2), i32 (...)*** null, align 4
|
|
|
|
ret void
|
implement rdar://6653118 - fastisel should fold loads where possible.
Since mem2reg isn't run at -O0, we get a ton of reloads from the stack,
for example, before, this code:
int foo(int x, int y, int z) {
return x+y+z;
}
used to compile into:
_foo: ## @foo
subq $12, %rsp
movl %edi, 8(%rsp)
movl %esi, 4(%rsp)
movl %edx, (%rsp)
movl 8(%rsp), %edx
movl 4(%rsp), %esi
addl %edx, %esi
movl (%rsp), %edx
addl %esi, %edx
movl %edx, %eax
addq $12, %rsp
ret
Now we produce:
_foo: ## @foo
subq $12, %rsp
movl %edi, 8(%rsp)
movl %esi, 4(%rsp)
movl %edx, (%rsp)
movl 8(%rsp), %edx
addl 4(%rsp), %edx ## Folded load
addl (%rsp), %edx ## Folded load
movl %edx, %eax
addq $12, %rsp
ret
Fewer instructions and less register use = faster compiles.
llvm-svn: 113102
2010-09-05 10:18:34 +08:00
|
|
|
; CHECK: _t:
|
|
|
|
; CHECK: movl $0, %eax
|
|
|
|
; CHECK: movl L_LotsStuff$non_lazy_ptr, %ecx
|
|
|
|
|
2012-07-20 02:53:21 +08:00
|
|
|
; ATOM: _t:
|
|
|
|
; ATOM: movl L_LotsStuff$non_lazy_ptr, %ecx
|
|
|
|
; ATOM: movl $0, %eax
|
|
|
|
|
2008-09-09 09:26:59 +08:00
|
|
|
}
|