2018-04-12 00:03:07 +08:00
|
|
|
; RUN: llc -fast-isel-sink-local-values < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
|
|
|
|
; RUN: llc -fast-isel-sink-local-values < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
|
|
|
|
; RUN: llc -fast-isel-sink-local-values < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
|
|
|
|
; RUN: llc -fast-isel-sink-local-values < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -mattr=+long-calls | FileCheck %s --check-prefix=ARM-LONG --check-prefix=ARM-LONG-MACHO
|
|
|
|
; RUN: llc -fast-isel-sink-local-values < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -mattr=+long-calls | FileCheck %s --check-prefix=ARM-LONG --check-prefix=ARM-LONG-ELF
|
|
|
|
; RUN: llc -fast-isel-sink-local-values < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -mattr=+long-calls | FileCheck %s --check-prefix=THUMB-LONG
|
|
|
|
; RUN: llc -fast-isel-sink-local-values < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP
|
|
|
|
; RUN: llc -fast-isel-sink-local-values < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP
|
|
|
|
; RUN: llc -fast-isel-sink-local-values < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=THUMB-NOVFP
|
2013-05-15 00:26:38 +08:00
|
|
|
|
|
|
|
; Note that some of these tests assume that relocations are either
|
|
|
|
; movw/movt or constant pool loads. Different platforms will select
|
|
|
|
; different approaches.
|
2011-11-06 04:16:15 +08:00
|
|
|
|
|
|
|
define i32 @t0(i1 zeroext %a) nounwind {
|
|
|
|
%1 = zext i1 %a to i32
|
|
|
|
ret i32 %1
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @t1(i8 signext %a) nounwind {
|
|
|
|
%1 = sext i8 %a to i32
|
|
|
|
ret i32 %1
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @t2(i8 zeroext %a) nounwind {
|
|
|
|
%1 = zext i8 %a to i32
|
|
|
|
ret i32 %1
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @t3(i16 signext %a) nounwind {
|
|
|
|
%1 = sext i16 %a to i32
|
|
|
|
ret i32 %1
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @t4(i16 zeroext %a) nounwind {
|
|
|
|
%1 = zext i16 %a to i32
|
|
|
|
ret i32 %1
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @foo(i8 %a, i16 %b) nounwind {
|
|
|
|
; ARM: foo
|
|
|
|
; THUMB: foo
|
|
|
|
;; Materialize i1 1
|
|
|
|
; ARM: movw r2, #1
|
|
|
|
;; zero-ext
|
|
|
|
; ARM: and r2, r2, #1
|
|
|
|
; THUMB: and r2, r2, #1
|
|
|
|
%1 = call i32 @t0(i1 zeroext 1)
|
|
|
|
; ARM: sxtb r2, r1
|
|
|
|
; ARM: mov r0, r2
|
|
|
|
; THUMB: sxtb r2, r1
|
|
|
|
; THUMB: mov r0, r2
|
|
|
|
%2 = call i32 @t1(i8 signext %a)
|
2013-06-08 04:10:37 +08:00
|
|
|
; ARM: and r2, r1, #255
|
2011-11-06 04:16:15 +08:00
|
|
|
; ARM: mov r0, r2
|
2013-06-08 04:10:37 +08:00
|
|
|
; THUMB: and r2, r1, #255
|
2011-11-06 04:16:15 +08:00
|
|
|
; THUMB: mov r0, r2
|
|
|
|
%3 = call i32 @t2(i8 zeroext %a)
|
|
|
|
; ARM: sxth r2, r1
|
|
|
|
; ARM: mov r0, r2
|
|
|
|
; THUMB: sxth r2, r1
|
|
|
|
; THUMB: mov r0, r2
|
|
|
|
%4 = call i32 @t3(i16 signext %b)
|
|
|
|
; ARM: uxth r2, r1
|
|
|
|
; ARM: mov r0, r2
|
|
|
|
; THUMB: uxth r2, r1
|
|
|
|
; THUMB: mov r0, r2
|
|
|
|
%5 = call i32 @t4(i16 zeroext %b)
|
|
|
|
|
|
|
|
;; A few test to check materialization
|
|
|
|
;; Note: i1 1 was materialized with t1 call
|
|
|
|
; ARM: movw r1, #255
|
|
|
|
%6 = call i32 @t2(i8 zeroext 255)
|
|
|
|
; ARM: movw r1, #65535
|
|
|
|
; THUMB: movw r1, #65535
|
|
|
|
%7 = call i32 @t4(i16 zeroext 65535)
|
|
|
|
ret void
|
|
|
|
}
|
2011-11-08 08:03:32 +08:00
|
|
|
|
|
|
|
define void @foo2() nounwind {
|
|
|
|
%1 = call signext i16 @t5()
|
|
|
|
%2 = call zeroext i16 @t6()
|
|
|
|
%3 = call signext i8 @t7()
|
|
|
|
%4 = call zeroext i8 @t8()
|
|
|
|
%5 = call zeroext i1 @t9()
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare signext i16 @t5();
|
|
|
|
declare zeroext i16 @t6();
|
|
|
|
declare signext i8 @t7();
|
|
|
|
declare zeroext i8 @t8();
|
|
|
|
declare zeroext i1 @t9();
|
2011-12-03 04:25:18 +08:00
|
|
|
|
2013-06-22 06:56:30 +08:00
|
|
|
define i32 @t10() {
|
2011-12-03 04:25:18 +08:00
|
|
|
entry:
|
|
|
|
; ARM: @t10
|
[FastISel] Sink local value materializations to first use
Summary:
Local values are constants, global addresses, and stack addresses that
can't be folded into the instruction that uses them. For example, when
storing the address of a global variable into memory, we need to
materialize that address into a register.
FastISel doesn't want to materialize any given local value more than
once, so it generates all local value materialization code at
EmitStartPt, which always dominates the current insertion point. This
allows it to maintain a map of local value registers, and it knows that
the local value area will always dominate the current insertion point.
The downside is that local value instructions are always emitted without
a source location. This is done to prevent jumpy line tables, but it
means that the local value area will be considered part of the previous
statement. Consider this C code:
call1(); // line 1
++global; // line 2
++global; // line 3
call2(&global, &local); // line 4
Today we end up with assembly and line tables like this:
.loc 1 1
callq call1
leaq global(%rip), %rdi
leaq local(%rsp), %rsi
.loc 1 2
addq $1, global(%rip)
.loc 1 3
addq $1, global(%rip)
.loc 1 4
callq call2
The LEA instructions in the local value area have no source location and
are treated as being on line 1. Stepping through the code in a debugger
and correlating it with the assembly won't make much sense, because
these materializations are only required for line 4.
This is actually problematic for the VS debugger "set next statement"
feature, which effectively assumes that there are no registers live
across statement boundaries. By sinking the local value code into the
statement and fixing up the source location, we can make that feature
work. This was filed as https://bugs.llvm.org/show_bug.cgi?id=35975 and
https://crbug.com/793819.
This change is obviously not enough to make this feature work reliably
in all cases, but I felt that it was worth doing anyway because it
usually generates smaller, more comprehensible -O0 code. I measured a
0.12% regression in code generation time with LLC on the sqlite3
amalgamation, so I think this is worth doing.
There are some special cases worth calling out in the commit message:
1. local values materialized for phis
2. local values used by no-op casts
3. dead local value code
Local values can be materialized for phis, and this does not show up as
a vreg use in MachineRegisterInfo. In this case, if there are no other
uses, this patch sinks the value to the first terminator, EH label, or
the end of the BB if nothing else exists.
Local values may also be used by no-op casts, which adds the register to
the RegFixups table. Without reversing the RegFixups map direction, we
don't have enough information to sink these instructions.
Lastly, if the local value register has no other uses, we can delete it.
This comes up when fastisel tries two instruction selection approaches
and the first materializes the value but fails and the second succeeds
without using the local value.
Reviewers: aprantl, dblaikie, qcolombet, MatzeB, vsk, echristo
Subscribers: dotdash, chandlerc, hans, sdardis, amccarth, javed.absar, zturner, llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D43093
llvm-svn: 327581
2018-03-15 05:54:21 +08:00
|
|
|
; ARM-DAG: movw [[R0:l?r[0-9]*]], #0
|
|
|
|
; ARM-DAG: movw [[R1:l?r[0-9]*]], #248
|
|
|
|
; ARM-DAG: movw [[R2:l?r[0-9]*]], #187
|
|
|
|
; ARM-DAG: movw [[R3:l?r[0-9]*]], #28
|
|
|
|
; ARM-DAG: movw [[R4:l?r[0-9]*]], #40
|
|
|
|
; ARM-DAG: movw [[R5:l?r[0-9]*]], #186
|
|
|
|
; ARM-DAG: and [[R0]], [[R0]], #255
|
|
|
|
; ARM-DAG: and [[R1]], [[R1]], #255
|
|
|
|
; ARM-DAG: and [[R2]], [[R2]], #255
|
|
|
|
; ARM-DAG: and [[R3]], [[R3]], #255
|
|
|
|
; ARM-DAG: and [[R4]], [[R4]], #255
|
|
|
|
; ARM-DAG: str [[R4]], [sp]
|
|
|
|
; ARM-DAG: and [[R4]], [[R5]], #255
|
|
|
|
; ARM-DAG: str [[R4]], [sp, #4]
|
2013-05-15 00:26:38 +08:00
|
|
|
; ARM: bl {{_?}}bar
|
[FastISel] Sink local value materializations to first use
Summary:
Local values are constants, global addresses, and stack addresses that
can't be folded into the instruction that uses them. For example, when
storing the address of a global variable into memory, we need to
materialize that address into a register.
FastISel doesn't want to materialize any given local value more than
once, so it generates all local value materialization code at
EmitStartPt, which always dominates the current insertion point. This
allows it to maintain a map of local value registers, and it knows that
the local value area will always dominate the current insertion point.
The downside is that local value instructions are always emitted without
a source location. This is done to prevent jumpy line tables, but it
means that the local value area will be considered part of the previous
statement. Consider this C code:
call1(); // line 1
++global; // line 2
++global; // line 3
call2(&global, &local); // line 4
Today we end up with assembly and line tables like this:
.loc 1 1
callq call1
leaq global(%rip), %rdi
leaq local(%rsp), %rsi
.loc 1 2
addq $1, global(%rip)
.loc 1 3
addq $1, global(%rip)
.loc 1 4
callq call2
The LEA instructions in the local value area have no source location and
are treated as being on line 1. Stepping through the code in a debugger
and correlating it with the assembly won't make much sense, because
these materializations are only required for line 4.
This is actually problematic for the VS debugger "set next statement"
feature, which effectively assumes that there are no registers live
across statement boundaries. By sinking the local value code into the
statement and fixing up the source location, we can make that feature
work. This was filed as https://bugs.llvm.org/show_bug.cgi?id=35975 and
https://crbug.com/793819.
This change is obviously not enough to make this feature work reliably
in all cases, but I felt that it was worth doing anyway because it
usually generates smaller, more comprehensible -O0 code. I measured a
0.12% regression in code generation time with LLC on the sqlite3
amalgamation, so I think this is worth doing.
There are some special cases worth calling out in the commit message:
1. local values materialized for phis
2. local values used by no-op casts
3. dead local value code
Local values can be materialized for phis, and this does not show up as
a vreg use in MachineRegisterInfo. In this case, if there are no other
uses, this patch sinks the value to the first terminator, EH label, or
the end of the BB if nothing else exists.
Local values may also be used by no-op casts, which adds the register to
the RegFixups table. Without reversing the RegFixups map direction, we
don't have enough information to sink these instructions.
Lastly, if the local value register has no other uses, we can delete it.
This comes up when fastisel tries two instruction selection approaches
and the first materializes the value but fails and the second succeeds
without using the local value.
Reviewers: aprantl, dblaikie, qcolombet, MatzeB, vsk, echristo
Subscribers: dotdash, chandlerc, hans, sdardis, amccarth, javed.absar, zturner, llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D43093
llvm-svn: 327581
2018-03-15 05:54:21 +08:00
|
|
|
; ARM-LONG-LABEL: @t10
|
2016-05-28 12:47:13 +08:00
|
|
|
|
|
|
|
; ARM-LONG-MACHO: {{(movw)|(ldr)}} [[R:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}}
|
|
|
|
; ARM-LONG-MACHO: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}}
|
[FastISel] Sink local value materializations to first use
Summary:
Local values are constants, global addresses, and stack addresses that
can't be folded into the instruction that uses them. For example, when
storing the address of a global variable into memory, we need to
materialize that address into a register.
FastISel doesn't want to materialize any given local value more than
once, so it generates all local value materialization code at
EmitStartPt, which always dominates the current insertion point. This
allows it to maintain a map of local value registers, and it knows that
the local value area will always dominate the current insertion point.
The downside is that local value instructions are always emitted without
a source location. This is done to prevent jumpy line tables, but it
means that the local value area will be considered part of the previous
statement. Consider this C code:
call1(); // line 1
++global; // line 2
++global; // line 3
call2(&global, &local); // line 4
Today we end up with assembly and line tables like this:
.loc 1 1
callq call1
leaq global(%rip), %rdi
leaq local(%rsp), %rsi
.loc 1 2
addq $1, global(%rip)
.loc 1 3
addq $1, global(%rip)
.loc 1 4
callq call2
The LEA instructions in the local value area have no source location and
are treated as being on line 1. Stepping through the code in a debugger
and correlating it with the assembly won't make much sense, because
these materializations are only required for line 4.
This is actually problematic for the VS debugger "set next statement"
feature, which effectively assumes that there are no registers live
across statement boundaries. By sinking the local value code into the
statement and fixing up the source location, we can make that feature
work. This was filed as https://bugs.llvm.org/show_bug.cgi?id=35975 and
https://crbug.com/793819.
This change is obviously not enough to make this feature work reliably
in all cases, but I felt that it was worth doing anyway because it
usually generates smaller, more comprehensible -O0 code. I measured a
0.12% regression in code generation time with LLC on the sqlite3
amalgamation, so I think this is worth doing.
There are some special cases worth calling out in the commit message:
1. local values materialized for phis
2. local values used by no-op casts
3. dead local value code
Local values can be materialized for phis, and this does not show up as
a vreg use in MachineRegisterInfo. In this case, if there are no other
uses, this patch sinks the value to the first terminator, EH label, or
the end of the BB if nothing else exists.
Local values may also be used by no-op casts, which adds the register to
the RegFixups table. Without reversing the RegFixups map direction, we
don't have enough information to sink these instructions.
Lastly, if the local value register has no other uses, we can delete it.
This comes up when fastisel tries two instruction selection approaches
and the first materializes the value but fails and the second succeeds
without using the local value.
Reviewers: aprantl, dblaikie, qcolombet, MatzeB, vsk, echristo
Subscribers: dotdash, chandlerc, hans, sdardis, amccarth, javed.absar, zturner, llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D43093
llvm-svn: 327581
2018-03-15 05:54:21 +08:00
|
|
|
; ARM-LONG-MACHO: str [[R]], [r7, [[SLOT:#[-0-9]+]]] @ 4-byte Spill
|
|
|
|
; ARM-LONG-MACHO: ldr [[R:l?r[0-9]*]], [r7, [[SLOT]]] @ 4-byte Reload
|
2016-05-28 12:47:13 +08:00
|
|
|
|
|
|
|
; ARM-LONG-ELF: movw [[R:l?r[0-9]*]], :lower16:bar
|
|
|
|
; ARM-LONG-ELF: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}}
|
|
|
|
|
2013-05-15 00:26:38 +08:00
|
|
|
; ARM-LONG: blx [[R]]
|
2011-12-03 04:25:18 +08:00
|
|
|
; THUMB: @t10
|
[FastISel] Sink local value materializations to first use
Summary:
Local values are constants, global addresses, and stack addresses that
can't be folded into the instruction that uses them. For example, when
storing the address of a global variable into memory, we need to
materialize that address into a register.
FastISel doesn't want to materialize any given local value more than
once, so it generates all local value materialization code at
EmitStartPt, which always dominates the current insertion point. This
allows it to maintain a map of local value registers, and it knows that
the local value area will always dominate the current insertion point.
The downside is that local value instructions are always emitted without
a source location. This is done to prevent jumpy line tables, but it
means that the local value area will be considered part of the previous
statement. Consider this C code:
call1(); // line 1
++global; // line 2
++global; // line 3
call2(&global, &local); // line 4
Today we end up with assembly and line tables like this:
.loc 1 1
callq call1
leaq global(%rip), %rdi
leaq local(%rsp), %rsi
.loc 1 2
addq $1, global(%rip)
.loc 1 3
addq $1, global(%rip)
.loc 1 4
callq call2
The LEA instructions in the local value area have no source location and
are treated as being on line 1. Stepping through the code in a debugger
and correlating it with the assembly won't make much sense, because
these materializations are only required for line 4.
This is actually problematic for the VS debugger "set next statement"
feature, which effectively assumes that there are no registers live
across statement boundaries. By sinking the local value code into the
statement and fixing up the source location, we can make that feature
work. This was filed as https://bugs.llvm.org/show_bug.cgi?id=35975 and
https://crbug.com/793819.
This change is obviously not enough to make this feature work reliably
in all cases, but I felt that it was worth doing anyway because it
usually generates smaller, more comprehensible -O0 code. I measured a
0.12% regression in code generation time with LLC on the sqlite3
amalgamation, so I think this is worth doing.
There are some special cases worth calling out in the commit message:
1. local values materialized for phis
2. local values used by no-op casts
3. dead local value code
Local values can be materialized for phis, and this does not show up as
a vreg use in MachineRegisterInfo. In this case, if there are no other
uses, this patch sinks the value to the first terminator, EH label, or
the end of the BB if nothing else exists.
Local values may also be used by no-op casts, which adds the register to
the RegFixups table. Without reversing the RegFixups map direction, we
don't have enough information to sink these instructions.
Lastly, if the local value register has no other uses, we can delete it.
This comes up when fastisel tries two instruction selection approaches
and the first materializes the value but fails and the second succeeds
without using the local value.
Reviewers: aprantl, dblaikie, qcolombet, MatzeB, vsk, echristo
Subscribers: dotdash, chandlerc, hans, sdardis, amccarth, javed.absar, zturner, llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D43093
llvm-svn: 327581
2018-03-15 05:54:21 +08:00
|
|
|
; THUMB-DAG: movs [[R0:l?r[0-9]*]], #0
|
|
|
|
; THUMB-DAG: movs [[R1:l?r[0-9]*]], #248
|
|
|
|
; THUMB-DAG: movs [[R2:l?r[0-9]*]], #187
|
|
|
|
; THUMB-DAG: movs [[R3:l?r[0-9]*]], #28
|
|
|
|
; THUMB-DAG: movw [[R4:l?r[0-9]*]], #40
|
|
|
|
; THUMB-DAG: movw [[R5:l?r[0-9]*]], #186
|
|
|
|
; THUMB-DAG: and [[R0]], [[R0]], #255
|
|
|
|
; THUMB-DAG: and [[R1]], [[R1]], #255
|
|
|
|
; THUMB-DAG: and [[R2]], [[R2]], #255
|
|
|
|
; THUMB-DAG: and [[R3]], [[R3]], #255
|
|
|
|
; THUMB-DAG: and [[R4]], [[R4]], #255
|
|
|
|
; THUMB-DAG: str.w [[R4]], [sp]
|
|
|
|
; THUMB-DAG: and [[R4]], [[R5]], #255
|
|
|
|
; THUMB-DAG: str.w [[R4]], [sp, #4]
|
2013-05-15 00:26:38 +08:00
|
|
|
; THUMB: bl {{_?}}bar
|
[FastISel] Sink local value materializations to first use
Summary:
Local values are constants, global addresses, and stack addresses that
can't be folded into the instruction that uses them. For example, when
storing the address of a global variable into memory, we need to
materialize that address into a register.
FastISel doesn't want to materialize any given local value more than
once, so it generates all local value materialization code at
EmitStartPt, which always dominates the current insertion point. This
allows it to maintain a map of local value registers, and it knows that
the local value area will always dominate the current insertion point.
The downside is that local value instructions are always emitted without
a source location. This is done to prevent jumpy line tables, but it
means that the local value area will be considered part of the previous
statement. Consider this C code:
call1(); // line 1
++global; // line 2
++global; // line 3
call2(&global, &local); // line 4
Today we end up with assembly and line tables like this:
.loc 1 1
callq call1
leaq global(%rip), %rdi
leaq local(%rsp), %rsi
.loc 1 2
addq $1, global(%rip)
.loc 1 3
addq $1, global(%rip)
.loc 1 4
callq call2
The LEA instructions in the local value area have no source location and
are treated as being on line 1. Stepping through the code in a debugger
and correlating it with the assembly won't make much sense, because
these materializations are only required for line 4.
This is actually problematic for the VS debugger "set next statement"
feature, which effectively assumes that there are no registers live
across statement boundaries. By sinking the local value code into the
statement and fixing up the source location, we can make that feature
work. This was filed as https://bugs.llvm.org/show_bug.cgi?id=35975 and
https://crbug.com/793819.
This change is obviously not enough to make this feature work reliably
in all cases, but I felt that it was worth doing anyway because it
usually generates smaller, more comprehensible -O0 code. I measured a
0.12% regression in code generation time with LLC on the sqlite3
amalgamation, so I think this is worth doing.
There are some special cases worth calling out in the commit message:
1. local values materialized for phis
2. local values used by no-op casts
3. dead local value code
Local values can be materialized for phis, and this does not show up as
a vreg use in MachineRegisterInfo. In this case, if there are no other
uses, this patch sinks the value to the first terminator, EH label, or
the end of the BB if nothing else exists.
Local values may also be used by no-op casts, which adds the register to
the RegFixups table. Without reversing the RegFixups map direction, we
don't have enough information to sink these instructions.
Lastly, if the local value register has no other uses, we can delete it.
This comes up when fastisel tries two instruction selection approaches
and the first materializes the value but fails and the second succeeds
without using the local value.
Reviewers: aprantl, dblaikie, qcolombet, MatzeB, vsk, echristo
Subscribers: dotdash, chandlerc, hans, sdardis, amccarth, javed.absar, zturner, llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D43093
llvm-svn: 327581
2018-03-15 05:54:21 +08:00
|
|
|
; THUMB-LONG-LABEL: @t10
|
2013-05-15 00:26:38 +08:00
|
|
|
; THUMB-LONG: {{(movw)|(ldr.n)}} [[R:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}}
|
|
|
|
; THUMB-LONG: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}}
|
|
|
|
; THUMB-LONG: ldr{{(.w)?}} [[R]], {{\[}}[[R]]{{\]}}
|
[FastISel] Sink local value materializations to first use
Summary:
Local values are constants, global addresses, and stack addresses that
can't be folded into the instruction that uses them. For example, when
storing the address of a global variable into memory, we need to
materialize that address into a register.
FastISel doesn't want to materialize any given local value more than
once, so it generates all local value materialization code at
EmitStartPt, which always dominates the current insertion point. This
allows it to maintain a map of local value registers, and it knows that
the local value area will always dominate the current insertion point.
The downside is that local value instructions are always emitted without
a source location. This is done to prevent jumpy line tables, but it
means that the local value area will be considered part of the previous
statement. Consider this C code:
call1(); // line 1
++global; // line 2
++global; // line 3
call2(&global, &local); // line 4
Today we end up with assembly and line tables like this:
.loc 1 1
callq call1
leaq global(%rip), %rdi
leaq local(%rsp), %rsi
.loc 1 2
addq $1, global(%rip)
.loc 1 3
addq $1, global(%rip)
.loc 1 4
callq call2
The LEA instructions in the local value area have no source location and
are treated as being on line 1. Stepping through the code in a debugger
and correlating it with the assembly won't make much sense, because
these materializations are only required for line 4.
This is actually problematic for the VS debugger "set next statement"
feature, which effectively assumes that there are no registers live
across statement boundaries. By sinking the local value code into the
statement and fixing up the source location, we can make that feature
work. This was filed as https://bugs.llvm.org/show_bug.cgi?id=35975 and
https://crbug.com/793819.
This change is obviously not enough to make this feature work reliably
in all cases, but I felt that it was worth doing anyway because it
usually generates smaller, more comprehensible -O0 code. I measured a
0.12% regression in code generation time with LLC on the sqlite3
amalgamation, so I think this is worth doing.
There are some special cases worth calling out in the commit message:
1. local values materialized for phis
2. local values used by no-op casts
3. dead local value code
Local values can be materialized for phis, and this does not show up as
a vreg use in MachineRegisterInfo. In this case, if there are no other
uses, this patch sinks the value to the first terminator, EH label, or
the end of the BB if nothing else exists.
Local values may also be used by no-op casts, which adds the register to
the RegFixups table. Without reversing the RegFixups map direction, we
don't have enough information to sink these instructions.
Lastly, if the local value register has no other uses, we can delete it.
This comes up when fastisel tries two instruction selection approaches
and the first materializes the value but fails and the second succeeds
without using the local value.
Reviewers: aprantl, dblaikie, qcolombet, MatzeB, vsk, echristo
Subscribers: dotdash, chandlerc, hans, sdardis, amccarth, javed.absar, zturner, llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D43093
llvm-svn: 327581
2018-03-15 05:54:21 +08:00
|
|
|
; THUMB-LONG: str [[R]], [sp, [[SLOT:#[-0-9]+]]] @ 4-byte Spill
|
|
|
|
; THUMB-LONG: ldr.w [[R:l?r[0-9]*]], [sp, [[SLOT]]] @ 4-byte Reload
|
2013-05-15 00:26:38 +08:00
|
|
|
; THUMB-LONG: blx [[R]]
|
2011-12-03 04:25:18 +08:00
|
|
|
%call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70)
|
|
|
|
ret i32 0
|
|
|
|
}
|
|
|
|
|
|
|
|
declare i32 @bar(i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext)
|
2012-05-24 02:38:57 +08:00
|
|
|
|
|
|
|
define i32 @bar0(i32 %i) nounwind {
|
|
|
|
ret i32 0
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @foo3() uwtable {
|
[FastISel] Sink local value materializations to first use
Summary:
Local values are constants, global addresses, and stack addresses that
can't be folded into the instruction that uses them. For example, when
storing the address of a global variable into memory, we need to
materialize that address into a register.
FastISel doesn't want to materialize any given local value more than
once, so it generates all local value materialization code at
EmitStartPt, which always dominates the current insertion point. This
allows it to maintain a map of local value registers, and it knows that
the local value area will always dominate the current insertion point.
The downside is that local value instructions are always emitted without
a source location. This is done to prevent jumpy line tables, but it
means that the local value area will be considered part of the previous
statement. Consider this C code:
call1(); // line 1
++global; // line 2
++global; // line 3
call2(&global, &local); // line 4
Today we end up with assembly and line tables like this:
.loc 1 1
callq call1
leaq global(%rip), %rdi
leaq local(%rsp), %rsi
.loc 1 2
addq $1, global(%rip)
.loc 1 3
addq $1, global(%rip)
.loc 1 4
callq call2
The LEA instructions in the local value area have no source location and
are treated as being on line 1. Stepping through the code in a debugger
and correlating it with the assembly won't make much sense, because
these materializations are only required for line 4.
This is actually problematic for the VS debugger "set next statement"
feature, which effectively assumes that there are no registers live
across statement boundaries. By sinking the local value code into the
statement and fixing up the source location, we can make that feature
work. This was filed as https://bugs.llvm.org/show_bug.cgi?id=35975 and
https://crbug.com/793819.
This change is obviously not enough to make this feature work reliably
in all cases, but I felt that it was worth doing anyway because it
usually generates smaller, more comprehensible -O0 code. I measured a
0.12% regression in code generation time with LLC on the sqlite3
amalgamation, so I think this is worth doing.
There are some special cases worth calling out in the commit message:
1. local values materialized for phis
2. local values used by no-op casts
3. dead local value code
Local values can be materialized for phis, and this does not show up as
a vreg use in MachineRegisterInfo. In this case, if there are no other
uses, this patch sinks the value to the first terminator, EH label, or
the end of the BB if nothing else exists.
Local values may also be used by no-op casts, which adds the register to
the RegFixups table. Without reversing the RegFixups map direction, we
don't have enough information to sink these instructions.
Lastly, if the local value register has no other uses, we can delete it.
This comes up when fastisel tries two instruction selection approaches
and the first materializes the value but fails and the second succeeds
without using the local value.
Reviewers: aprantl, dblaikie, qcolombet, MatzeB, vsk, echristo
Subscribers: dotdash, chandlerc, hans, sdardis, amccarth, javed.absar, zturner, llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D43093
llvm-svn: 327581
2018-03-15 05:54:21 +08:00
|
|
|
; ARM: @foo3
|
|
|
|
; ARM: {{(movw r[0-9]+, :lower16:_?bar0)|(ldr r[0-9]+, .LCPI)}}
|
|
|
|
; ARM: {{(movt r[0-9]+, :upper16:_?bar0)|(ldr r[0-9]+, \[r[0-9]+\])}}
|
|
|
|
; ARM: movw {{r[0-9]+}}, #0
|
|
|
|
; ARM: blx {{r[0-9]+}}
|
|
|
|
; THUMB: {{(movw r[0-9]+, :lower16:_?bar0)|(ldr.n r[0-9]+, .LCPI)}}
|
|
|
|
; THUMB: {{(movt r[0-9]+, :upper16:_?bar0)|(ldr r[0-9]+, \[r[0-9]+\])}}
|
|
|
|
; THUMB: movs {{r[0-9]+}}, #0
|
|
|
|
; THUMB: blx {{r[0-9]+}}
|
2012-05-24 02:38:57 +08:00
|
|
|
%fptr = alloca i32 (i32)*, align 8
|
|
|
|
store i32 (i32)* @bar0, i32 (i32)** %fptr, align 8
|
2015-02-28 05:17:42 +08:00
|
|
|
%1 = load i32 (i32)*, i32 (i32)** %fptr, align 8
|
2012-05-24 02:38:57 +08:00
|
|
|
%call = call i32 %1(i32 0)
|
|
|
|
ret void
|
|
|
|
}
|
2012-06-13 03:25:13 +08:00
|
|
|
|
|
|
|
define i32 @LibCall(i32 %a, i32 %b) {
|
|
|
|
entry:
|
|
|
|
; ARM: LibCall
|
2013-05-15 00:26:38 +08:00
|
|
|
; ARM: bl {{___udivsi3|__aeabi_uidiv}}
|
[FastISel] Sink local value materializations to first use
Summary:
Local values are constants, global addresses, and stack addresses that
can't be folded into the instruction that uses them. For example, when
storing the address of a global variable into memory, we need to
materialize that address into a register.
FastISel doesn't want to materialize any given local value more than
once, so it generates all local value materialization code at
EmitStartPt, which always dominates the current insertion point. This
allows it to maintain a map of local value registers, and it knows that
the local value area will always dominate the current insertion point.
The downside is that local value instructions are always emitted without
a source location. This is done to prevent jumpy line tables, but it
means that the local value area will be considered part of the previous
statement. Consider this C code:
call1(); // line 1
++global; // line 2
++global; // line 3
call2(&global, &local); // line 4
Today we end up with assembly and line tables like this:
.loc 1 1
callq call1
leaq global(%rip), %rdi
leaq local(%rsp), %rsi
.loc 1 2
addq $1, global(%rip)
.loc 1 3
addq $1, global(%rip)
.loc 1 4
callq call2
The LEA instructions in the local value area have no source location and
are treated as being on line 1. Stepping through the code in a debugger
and correlating it with the assembly won't make much sense, because
these materializations are only required for line 4.
This is actually problematic for the VS debugger "set next statement"
feature, which effectively assumes that there are no registers live
across statement boundaries. By sinking the local value code into the
statement and fixing up the source location, we can make that feature
work. This was filed as https://bugs.llvm.org/show_bug.cgi?id=35975 and
https://crbug.com/793819.
This change is obviously not enough to make this feature work reliably
in all cases, but I felt that it was worth doing anyway because it
usually generates smaller, more comprehensible -O0 code. I measured a
0.12% regression in code generation time with LLC on the sqlite3
amalgamation, so I think this is worth doing.
There are some special cases worth calling out in the commit message:
1. local values materialized for phis
2. local values used by no-op casts
3. dead local value code
Local values can be materialized for phis, and this does not show up as
a vreg use in MachineRegisterInfo. In this case, if there are no other
uses, this patch sinks the value to the first terminator, EH label, or
the end of the BB if nothing else exists.
Local values may also be used by no-op casts, which adds the register to
the RegFixups table. Without reversing the RegFixups map direction, we
don't have enough information to sink these instructions.
Lastly, if the local value register has no other uses, we can delete it.
This comes up when fastisel tries two instruction selection approaches
and the first materializes the value but fails and the second succeeds
without using the local value.
Reviewers: aprantl, dblaikie, qcolombet, MatzeB, vsk, echristo
Subscribers: dotdash, chandlerc, hans, sdardis, amccarth, javed.absar, zturner, llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D43093
llvm-svn: 327581
2018-03-15 05:54:21 +08:00
|
|
|
; ARM-LONG-LABEL: LibCall
|
2016-05-28 12:47:13 +08:00
|
|
|
|
|
|
|
; ARM-LONG-MACHO: {{(movw r2, :lower16:L___udivsi3\$non_lazy_ptr)|(ldr r2, .LCPI)}}
|
|
|
|
; ARM-LONG-MACHO: {{(movt r2, :upper16:L___udivsi3\$non_lazy_ptr)?}}
|
|
|
|
; ARM-LONG-MACHO: ldr r2, [r2]
|
|
|
|
|
|
|
|
; ARM-LONG-ELF: movw r2, :lower16:__aeabi_uidiv
|
|
|
|
; ARM-LONG-ELF: movt r2, :upper16:__aeabi_uidiv
|
|
|
|
|
2012-06-13 03:25:13 +08:00
|
|
|
; ARM-LONG: blx r2
|
|
|
|
; THUMB: LibCall
|
2013-05-15 00:26:38 +08:00
|
|
|
; THUMB: bl {{___udivsi3|__aeabi_uidiv}}
|
[FastISel] Sink local value materializations to first use
Summary:
Local values are constants, global addresses, and stack addresses that
can't be folded into the instruction that uses them. For example, when
storing the address of a global variable into memory, we need to
materialize that address into a register.
FastISel doesn't want to materialize any given local value more than
once, so it generates all local value materialization code at
EmitStartPt, which always dominates the current insertion point. This
allows it to maintain a map of local value registers, and it knows that
the local value area will always dominate the current insertion point.
The downside is that local value instructions are always emitted without
a source location. This is done to prevent jumpy line tables, but it
means that the local value area will be considered part of the previous
statement. Consider this C code:
call1(); // line 1
++global; // line 2
++global; // line 3
call2(&global, &local); // line 4
Today we end up with assembly and line tables like this:
.loc 1 1
callq call1
leaq global(%rip), %rdi
leaq local(%rsp), %rsi
.loc 1 2
addq $1, global(%rip)
.loc 1 3
addq $1, global(%rip)
.loc 1 4
callq call2
The LEA instructions in the local value area have no source location and
are treated as being on line 1. Stepping through the code in a debugger
and correlating it with the assembly won't make much sense, because
these materializations are only required for line 4.
This is actually problematic for the VS debugger "set next statement"
feature, which effectively assumes that there are no registers live
across statement boundaries. By sinking the local value code into the
statement and fixing up the source location, we can make that feature
work. This was filed as https://bugs.llvm.org/show_bug.cgi?id=35975 and
https://crbug.com/793819.
This change is obviously not enough to make this feature work reliably
in all cases, but I felt that it was worth doing anyway because it
usually generates smaller, more comprehensible -O0 code. I measured a
0.12% regression in code generation time with LLC on the sqlite3
amalgamation, so I think this is worth doing.
There are some special cases worth calling out in the commit message:
1. local values materialized for phis
2. local values used by no-op casts
3. dead local value code
Local values can be materialized for phis, and this does not show up as
a vreg use in MachineRegisterInfo. In this case, if there are no other
uses, this patch sinks the value to the first terminator, EH label, or
the end of the BB if nothing else exists.
Local values may also be used by no-op casts, which adds the register to
the RegFixups table. Without reversing the RegFixups map direction, we
don't have enough information to sink these instructions.
Lastly, if the local value register has no other uses, we can delete it.
This comes up when fastisel tries two instruction selection approaches
and the first materializes the value but fails and the second succeeds
without using the local value.
Reviewers: aprantl, dblaikie, qcolombet, MatzeB, vsk, echristo
Subscribers: dotdash, chandlerc, hans, sdardis, amccarth, javed.absar, zturner, llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D43093
llvm-svn: 327581
2018-03-15 05:54:21 +08:00
|
|
|
; THUMB-LONG-LABEL: LibCall
|
2013-05-15 00:26:38 +08:00
|
|
|
; THUMB-LONG: {{(movw r2, :lower16:L___udivsi3\$non_lazy_ptr)|(ldr.n r2, .LCPI)}}
|
|
|
|
; THUMB-LONG: {{(movt r2, :upper16:L___udivsi3\$non_lazy_ptr)?}}
|
2012-06-13 03:25:13 +08:00
|
|
|
; THUMB-LONG: ldr r2, [r2]
|
|
|
|
; THUMB-LONG: blx r2
|
|
|
|
%tmp1 = udiv i32 %a, %b ; <i32> [#uses=1]
|
|
|
|
ret i32 %tmp1
|
|
|
|
}
|
2012-07-19 17:49:00 +08:00
|
|
|
|
2012-08-16 13:15:53 +08:00
|
|
|
; Test fastcc
|
|
|
|
|
|
|
|
define fastcc void @fast_callee(float %i) ssp {
|
|
|
|
entry:
|
|
|
|
; ARM: fast_callee
|
|
|
|
; ARM: vmov r0, s0
|
|
|
|
; THUMB: fast_callee
|
|
|
|
; THUMB: vmov r0, s0
|
|
|
|
; ARM-NOVFP: fast_callee
|
|
|
|
; ARM-NOVFP-NOT: s0
|
|
|
|
; THUMB-NOVFP: fast_callee
|
|
|
|
; THUMB-NOVFP-NOT: s0
|
|
|
|
call void @print(float %i)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @fast_caller() ssp {
|
|
|
|
entry:
|
|
|
|
; ARM: fast_caller
|
|
|
|
; ARM: vldr s0,
|
|
|
|
; THUMB: fast_caller
|
|
|
|
; THUMB: vldr s0,
|
|
|
|
; ARM-NOVFP: fast_caller
|
|
|
|
; ARM-NOVFP: movw r0, #13107
|
|
|
|
; ARM-NOVFP: movt r0, #16611
|
|
|
|
; THUMB-NOVFP: fast_caller
|
|
|
|
; THUMB-NOVFP: movw r0, #13107
|
|
|
|
; THUMB-NOVFP: movt r0, #16611
|
|
|
|
call fastcc void @fast_callee(float 0x401C666660000000)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @no_fast_callee(float %i) ssp {
|
|
|
|
entry:
|
|
|
|
; ARM: no_fast_callee
|
|
|
|
; ARM: vmov s0, r0
|
|
|
|
; THUMB: no_fast_callee
|
|
|
|
; THUMB: vmov s0, r0
|
|
|
|
; ARM-NOVFP: no_fast_callee
|
|
|
|
; ARM-NOVFP-NOT: s0
|
|
|
|
; THUMB-NOVFP: no_fast_callee
|
|
|
|
; THUMB-NOVFP-NOT: s0
|
|
|
|
call void @print(float %i)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @no_fast_caller() ssp {
|
|
|
|
entry:
|
|
|
|
; ARM: no_fast_caller
|
|
|
|
; ARM: vmov r0, s0
|
|
|
|
; THUMB: no_fast_caller
|
|
|
|
; THUMB: vmov r0, s0
|
|
|
|
; ARM-NOVFP: no_fast_caller
|
|
|
|
; ARM-NOVFP: movw r0, #13107
|
|
|
|
; ARM-NOVFP: movt r0, #16611
|
|
|
|
; THUMB-NOVFP: no_fast_caller
|
|
|
|
; THUMB-NOVFP: movw r0, #13107
|
|
|
|
; THUMB-NOVFP: movt r0, #16611
|
|
|
|
call void @no_fast_callee(float 0x401C666660000000)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2014-08-02 02:04:14 +08:00
|
|
|
declare void @bar2(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6)
|
|
|
|
|
|
|
|
define void @call_undef_args() {
|
|
|
|
; ARM-LABEL: call_undef_args
|
|
|
|
; ARM: movw r0, #1
|
|
|
|
; ARM-NEXT: movw r1, #2
|
|
|
|
; ARM-NEXT: movw r2, #3
|
|
|
|
; ARM-NEXT: movw r3, #4
|
|
|
|
; ARM-NOT: str {{r[0-9]+}}, [sp]
|
|
|
|
; ARM: movw [[REG:l?r[0-9]*]], #6
|
|
|
|
; ARM-NEXT: str [[REG]], [sp, #4]
|
|
|
|
call void @bar2(i32 1, i32 2, i32 3, i32 4, i32 undef, i32 6)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2012-08-16 13:15:53 +08:00
|
|
|
declare void @print(float)
|