Elide argument copies during instruction selection
Summary:
Avoids tons of prologue boilerplate when arguments are passed in memory
and left in memory. This can happen in a debug build or in a release
build when an argument alloca is escaped. This will dramatically affect
the code size of x86 debug builds, because X86 fast isel doesn't handle
arguments passed in memory at all. It only handles the x86_64 case of up
to 6 basic register parameters.
This is implemented by analyzing the entry block before ISel to identify
copy elision candidates. A copy elision candidate is an argument that is
used to fully initialize an alloca before any other possibly escaping
uses of that alloca. If an argument is a copy elision candidate, we set
a flag on the InputArg. If the the target generates loads from a fixed
stack object that matches the size and alignment requirements of the
alloca, the SelectionDAG builder will delete the stack object created
for the alloca and replace it with the fixed stack object. The load is
left behind to satisfy any remaining uses of the argument value. The
store is now dead and is therefore elided. The fixed stack object is
also marked as mutable, as it may now be modified by the user, and it
would be invalid to rematerialize the initial load from it.
Supersedes D28388
Fixes PR26328
Reviewers: chandlerc, MatzeB, qcolombet, inglorion, hans
Subscribers: igorb, llvm-commits
Differential Revision: https://reviews.llvm.org/D29668
llvm-svn: 296683
2017-03-02 05:42:00 +08:00
|
|
|
; RUN: llc -mtriple=armv7-linux < %s | FileCheck %s
|
|
|
|
|
|
|
|
declare arm_aapcscc void @addrof_i32(i32*)
|
|
|
|
declare arm_aapcscc void @addrof_i64(i64*)
|
|
|
|
|
|
|
|
define arm_aapcscc void @simple(i32, i32, i32, i32, i32 %x) {
|
|
|
|
entry:
|
|
|
|
%x.addr = alloca i32
|
|
|
|
store i32 %x, i32* %x.addr
|
|
|
|
call void @addrof_i32(i32* %x.addr)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: simple:
|
|
|
|
; CHECK: push {r11, lr}
|
|
|
|
; CHECK: add r0, sp, #8
|
|
|
|
; CHECK: bl addrof_i32
|
|
|
|
; CHECK: pop {r11, pc}
|
|
|
|
|
|
|
|
|
|
|
|
; We need to load %x before calling addrof_i32 now because it could mutate %x in
|
|
|
|
; place.
|
|
|
|
|
|
|
|
define arm_aapcscc i32 @use_arg(i32, i32, i32, i32, i32 %x) {
|
|
|
|
entry:
|
|
|
|
%x.addr = alloca i32
|
|
|
|
store i32 %x, i32* %x.addr
|
|
|
|
call void @addrof_i32(i32* %x.addr)
|
|
|
|
ret i32 %x
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: use_arg:
|
|
|
|
; CHECK: push {[[csr:[^ ]*]], lr}
|
|
|
|
; CHECK: add r0, sp, #8
|
2017-06-28 15:07:03 +08:00
|
|
|
; CHECK: ldr [[csr]], [sp, #8]
|
Elide argument copies during instruction selection
Summary:
Avoids tons of prologue boilerplate when arguments are passed in memory
and left in memory. This can happen in a debug build or in a release
build when an argument alloca is escaped. This will dramatically affect
the code size of x86 debug builds, because X86 fast isel doesn't handle
arguments passed in memory at all. It only handles the x86_64 case of up
to 6 basic register parameters.
This is implemented by analyzing the entry block before ISel to identify
copy elision candidates. A copy elision candidate is an argument that is
used to fully initialize an alloca before any other possibly escaping
uses of that alloca. If an argument is a copy elision candidate, we set
a flag on the InputArg. If the the target generates loads from a fixed
stack object that matches the size and alignment requirements of the
alloca, the SelectionDAG builder will delete the stack object created
for the alloca and replace it with the fixed stack object. The load is
left behind to satisfy any remaining uses of the argument value. The
store is now dead and is therefore elided. The fixed stack object is
also marked as mutable, as it may now be modified by the user, and it
would be invalid to rematerialize the initial load from it.
Supersedes D28388
Fixes PR26328
Reviewers: chandlerc, MatzeB, qcolombet, inglorion, hans
Subscribers: igorb, llvm-commits
Differential Revision: https://reviews.llvm.org/D29668
llvm-svn: 296683
2017-03-02 05:42:00 +08:00
|
|
|
; CHECK: bl addrof_i32
|
|
|
|
; CHECK: mov r0, [[csr]]
|
|
|
|
; CHECK: pop {[[csr]], pc}
|
|
|
|
|
|
|
|
|
|
|
|
define arm_aapcscc i64 @split_i64(i32, i32, i32, i32, i64 %x) {
|
|
|
|
entry:
|
|
|
|
%x.addr = alloca i64, align 4
|
|
|
|
store i64 %x, i64* %x.addr, align 4
|
|
|
|
call void @addrof_i64(i64* %x.addr)
|
|
|
|
ret i64 %x
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: split_i64:
|
|
|
|
; CHECK: push {r4, r5, r11, lr}
|
|
|
|
; CHECK: sub sp, sp, #8
|
|
|
|
; CHECK: ldr r4, [sp, #28]
|
|
|
|
; CHECK: mov r0, sp
|
2017-06-28 15:07:03 +08:00
|
|
|
; CHECK: ldr r5, [sp, #24]
|
Elide argument copies during instruction selection
Summary:
Avoids tons of prologue boilerplate when arguments are passed in memory
and left in memory. This can happen in a debug build or in a release
build when an argument alloca is escaped. This will dramatically affect
the code size of x86 debug builds, because X86 fast isel doesn't handle
arguments passed in memory at all. It only handles the x86_64 case of up
to 6 basic register parameters.
This is implemented by analyzing the entry block before ISel to identify
copy elision candidates. A copy elision candidate is an argument that is
used to fully initialize an alloca before any other possibly escaping
uses of that alloca. If an argument is a copy elision candidate, we set
a flag on the InputArg. If the the target generates loads from a fixed
stack object that matches the size and alignment requirements of the
alloca, the SelectionDAG builder will delete the stack object created
for the alloca and replace it with the fixed stack object. The load is
left behind to satisfy any remaining uses of the argument value. The
store is now dead and is therefore elided. The fixed stack object is
also marked as mutable, as it may now be modified by the user, and it
would be invalid to rematerialize the initial load from it.
Supersedes D28388
Fixes PR26328
Reviewers: chandlerc, MatzeB, qcolombet, inglorion, hans
Subscribers: igorb, llvm-commits
Differential Revision: https://reviews.llvm.org/D29668
llvm-svn: 296683
2017-03-02 05:42:00 +08:00
|
|
|
; CHECK: str r4, [sp, #4]
|
|
|
|
; CHECK: str r5, [sp]
|
|
|
|
; CHECK: bl addrof_i64
|
|
|
|
; CHECK: mov r0, r5
|
|
|
|
; CHECK: mov r1, r4
|
|
|
|
; CHECK: add sp, sp, #8
|
|
|
|
; CHECK: pop {r4, r5, r11, pc}
|