2013-04-06 01:05:56 +08:00
|
|
|
; We specify -mcpu explicitly to avoid instruction reordering that happens on
|
|
|
|
; some setups (e.g., Atom) from affecting the output.
|
|
|
|
; RUN: llc < %s -mcpu=core2 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32
|
2013-12-04 09:25:24 +08:00
|
|
|
; RUN: llc < %s -mcpu=core2 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86
|
2014-04-06 18:01:23 +08:00
|
|
|
; RUN: llc < %s -mcpu=core2 -mtriple=i686-pc-cygwin | FileCheck %s -check-prefix=CYGWIN
|
2013-12-04 09:25:24 +08:00
|
|
|
; RUN: llc < %s -mcpu=core2 -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
|
2013-04-06 01:05:56 +08:00
|
|
|
; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32
|
2013-12-04 09:25:24 +08:00
|
|
|
; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86
|
2014-04-06 18:01:23 +08:00
|
|
|
; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i686-pc-cygwin | FileCheck %s -check-prefix=CYGWIN
|
2013-12-04 09:25:24 +08:00
|
|
|
; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
|
2012-01-20 08:05:46 +08:00
|
|
|
|
|
|
|
; The SysV ABI used by most Unixes and Mingw on x86 specifies that an sret pointer
|
|
|
|
; is callee-cleanup. However, in MSVC's cdecl calling convention, sret pointer
|
|
|
|
; arguments are caller-cleanup like normal arguments.
|
|
|
|
|
2013-03-29 05:30:04 +08:00
|
|
|
define void @sret1(i8* sret %x) nounwind {
|
2012-01-20 08:05:46 +08:00
|
|
|
entry:
|
2013-12-04 05:12:36 +08:00
|
|
|
; WIN32-LABEL: _sret1:
|
2013-03-29 05:30:04 +08:00
|
|
|
; WIN32: movb $42, (%eax)
|
|
|
|
; WIN32-NOT: popl %eax
|
2014-01-08 20:58:07 +08:00
|
|
|
; WIN32: {{retl$}}
|
2013-03-29 05:30:04 +08:00
|
|
|
|
2013-12-04 05:12:36 +08:00
|
|
|
; MINGW_X86-LABEL: _sret1:
|
2014-01-08 20:58:07 +08:00
|
|
|
; MINGW_X86: {{retl$}}
|
2013-03-29 05:30:04 +08:00
|
|
|
|
2014-04-06 18:01:23 +08:00
|
|
|
; CYGWIN-LABEL: _sret1:
|
|
|
|
; CYGWIN: retl $4
|
|
|
|
|
2013-12-04 05:12:36 +08:00
|
|
|
; LINUX-LABEL: sret1:
|
2014-01-08 20:58:07 +08:00
|
|
|
; LINUX: retl $4
|
2013-03-29 05:30:04 +08:00
|
|
|
|
|
|
|
store i8 42, i8* %x, align 4
|
2012-01-20 08:05:46 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2013-03-29 05:30:04 +08:00
|
|
|
define void @sret2(i8* sret %x, i8 %y) nounwind {
|
2012-01-20 08:05:46 +08:00
|
|
|
entry:
|
2013-12-04 05:12:36 +08:00
|
|
|
; WIN32-LABEL: _sret2:
|
2013-03-29 05:30:04 +08:00
|
|
|
; WIN32: movb {{.*}}, (%eax)
|
|
|
|
; WIN32-NOT: popl %eax
|
2014-01-08 20:58:07 +08:00
|
|
|
; WIN32: {{retl$}}
|
2013-03-29 05:30:04 +08:00
|
|
|
|
2013-12-04 05:12:36 +08:00
|
|
|
; MINGW_X86-LABEL: _sret2:
|
2014-01-08 20:58:07 +08:00
|
|
|
; MINGW_X86: {{retl$}}
|
2013-03-29 05:30:04 +08:00
|
|
|
|
2014-04-06 18:01:23 +08:00
|
|
|
; CYGWIN-LABEL: _sret2:
|
|
|
|
; CYGWIN: retl $4
|
|
|
|
|
2013-12-04 05:12:36 +08:00
|
|
|
; LINUX-LABEL: sret2:
|
2014-01-08 20:58:07 +08:00
|
|
|
; LINUX: retl $4
|
2013-03-29 05:30:04 +08:00
|
|
|
|
|
|
|
store i8 %y, i8* %x
|
2012-01-20 08:05:46 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2013-03-29 05:30:04 +08:00
|
|
|
define void @sret3(i8* sret %x, i8* %y) nounwind {
|
|
|
|
entry:
|
2013-12-04 05:12:36 +08:00
|
|
|
; WIN32-LABEL: _sret3:
|
2013-03-29 05:30:04 +08:00
|
|
|
; WIN32: movb $42, (%eax)
|
|
|
|
; WIN32-NOT: movb $13, (%eax)
|
|
|
|
; WIN32-NOT: popl %eax
|
2014-01-08 20:58:07 +08:00
|
|
|
; WIN32: {{retl$}}
|
2013-03-29 05:30:04 +08:00
|
|
|
|
2013-12-04 05:12:36 +08:00
|
|
|
; MINGW_X86-LABEL: _sret3:
|
2014-01-08 20:58:07 +08:00
|
|
|
; MINGW_X86: {{retl$}}
|
2013-03-29 05:30:04 +08:00
|
|
|
|
2014-04-06 18:01:23 +08:00
|
|
|
; CYGWIN-LABEL: _sret3:
|
|
|
|
; CYGWIN: retl $4
|
|
|
|
|
2013-12-04 05:12:36 +08:00
|
|
|
; LINUX-LABEL: sret3:
|
2014-01-08 20:58:07 +08:00
|
|
|
; LINUX: retl $4
|
2013-03-29 05:30:04 +08:00
|
|
|
|
|
|
|
store i8 42, i8* %x
|
|
|
|
store i8 13, i8* %y
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; PR15556
|
|
|
|
%struct.S4 = type { i32, i32, i32 }
|
|
|
|
|
|
|
|
define void @sret4(%struct.S4* noalias sret %agg.result) {
|
|
|
|
entry:
|
2013-12-04 05:12:36 +08:00
|
|
|
; WIN32-LABEL: _sret4:
|
2013-03-29 05:30:04 +08:00
|
|
|
; WIN32: movl $42, (%eax)
|
|
|
|
; WIN32-NOT: popl %eax
|
2014-01-08 20:58:07 +08:00
|
|
|
; WIN32: {{retl$}}
|
2013-03-29 05:30:04 +08:00
|
|
|
|
2013-12-04 05:12:36 +08:00
|
|
|
; MINGW_X86-LABEL: _sret4:
|
2014-01-08 20:58:07 +08:00
|
|
|
; MINGW_X86: {{retl$}}
|
2013-03-29 05:30:04 +08:00
|
|
|
|
2014-04-06 18:01:23 +08:00
|
|
|
; CYGWIN-LABEL: _sret4:
|
|
|
|
; CYGWIN: retl $4
|
|
|
|
|
2013-12-04 05:12:36 +08:00
|
|
|
; LINUX-LABEL: sret4:
|
2014-01-08 20:58:07 +08:00
|
|
|
; LINUX: retl $4
|
2013-03-29 05:30:04 +08:00
|
|
|
|
[opaque pointer type] Add textual IR support for explicit type parameter to getelementptr instruction
One of several parallel first steps to remove the target type of pointers,
replacing them with a single opaque pointer type.
This adds an explicit type parameter to the gep instruction so that when the
first parameter becomes an opaque pointer type, the type to gep through is
still available to the instructions.
* This doesn't modify gep operators, only instructions (operators will be
handled separately)
* Textual IR changes only. Bitcode (including upgrade) and changing the
in-memory representation will be in separate changes.
* geps of vectors are transformed as:
getelementptr <4 x float*> %x, ...
->getelementptr float, <4 x float*> %x, ...
Then, once the opaque pointer type is introduced, this will ultimately look
like:
getelementptr float, <4 x ptr> %x
with the unambiguous interpretation that it is a vector of pointers to float.
* address spaces remain on the pointer, not the type:
getelementptr float addrspace(1)* %x
->getelementptr float, float addrspace(1)* %x
Then, eventually:
getelementptr float, ptr addrspace(1) %x
Importantly, the massive amount of test case churn has been automated by
same crappy python code. I had to manually update a few test cases that
wouldn't fit the script's model (r228970,r229196,r229197,r229198). The
python script just massages stdin and writes the result to stdout, I
then wrapped that in a shell script to handle replacing files, then
using the usual find+xargs to migrate all the files.
update.py:
import fileinput
import sys
import re
ibrep = re.compile(r"(^.*?[^%\w]getelementptr inbounds )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
normrep = re.compile( r"(^.*?[^%\w]getelementptr )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
def conv(match, line):
if not match:
return line
line = match.groups()[0]
if len(match.groups()[5]) == 0:
line += match.groups()[2]
line += match.groups()[3]
line += ", "
line += match.groups()[1]
line += "\n"
return line
for line in sys.stdin:
if line.find("getelementptr ") == line.find("getelementptr inbounds"):
if line.find("getelementptr inbounds") != line.find("getelementptr inbounds ("):
line = conv(re.match(ibrep, line), line)
elif line.find("getelementptr ") != line.find("getelementptr ("):
line = conv(re.match(normrep, line), line)
sys.stdout.write(line)
apply.sh:
for name in "$@"
do
python3 `dirname "$0"`/update.py < "$name" > "$name.tmp" && mv "$name.tmp" "$name"
rm -f "$name.tmp"
done
The actual commands:
From llvm/src:
find test/ -name *.ll | xargs ./apply.sh
From llvm/src/tools/clang:
find test/ -name *.mm -o -name *.m -o -name *.cpp -o -name *.c | xargs -I '{}' ../../apply.sh "{}"
From llvm/src/tools/polly:
find test/ -name *.ll | xargs ./apply.sh
After that, check-all (with llvm, clang, clang-tools-extra, lld,
compiler-rt, and polly all checked out).
The extra 'rm' in the apply.sh script is due to a few files in clang's test
suite using interesting unicode stuff that my python script was throwing
exceptions on. None of those files needed to be migrated, so it seemed
sufficient to ignore those cases.
Reviewers: rafael, dexonsmith, grosser
Differential Revision: http://reviews.llvm.org/D7636
llvm-svn: 230786
2015-02-28 03:29:02 +08:00
|
|
|
%x = getelementptr inbounds %struct.S4, %struct.S4* %agg.result, i32 0, i32 0
|
2013-03-29 05:30:04 +08:00
|
|
|
store i32 42, i32* %x, align 4
|
|
|
|
ret void
|
|
|
|
}
|
2013-04-03 19:27:54 +08:00
|
|
|
|
|
|
|
%struct.S5 = type { i32 }
|
|
|
|
%class.C5 = type { i8 }
|
|
|
|
|
|
|
|
define x86_thiscallcc void @"\01?foo@C5@@QAE?AUS5@@XZ"(%struct.S5* noalias sret %agg.result, %class.C5* %this) {
|
|
|
|
entry:
|
|
|
|
%this.addr = alloca %class.C5*, align 4
|
|
|
|
store %class.C5* %this, %class.C5** %this.addr, align 4
|
2015-02-28 05:17:42 +08:00
|
|
|
%this1 = load %class.C5*, %class.C5** %this.addr
|
[opaque pointer type] Add textual IR support for explicit type parameter to getelementptr instruction
One of several parallel first steps to remove the target type of pointers,
replacing them with a single opaque pointer type.
This adds an explicit type parameter to the gep instruction so that when the
first parameter becomes an opaque pointer type, the type to gep through is
still available to the instructions.
* This doesn't modify gep operators, only instructions (operators will be
handled separately)
* Textual IR changes only. Bitcode (including upgrade) and changing the
in-memory representation will be in separate changes.
* geps of vectors are transformed as:
getelementptr <4 x float*> %x, ...
->getelementptr float, <4 x float*> %x, ...
Then, once the opaque pointer type is introduced, this will ultimately look
like:
getelementptr float, <4 x ptr> %x
with the unambiguous interpretation that it is a vector of pointers to float.
* address spaces remain on the pointer, not the type:
getelementptr float addrspace(1)* %x
->getelementptr float, float addrspace(1)* %x
Then, eventually:
getelementptr float, ptr addrspace(1) %x
Importantly, the massive amount of test case churn has been automated by
same crappy python code. I had to manually update a few test cases that
wouldn't fit the script's model (r228970,r229196,r229197,r229198). The
python script just massages stdin and writes the result to stdout, I
then wrapped that in a shell script to handle replacing files, then
using the usual find+xargs to migrate all the files.
update.py:
import fileinput
import sys
import re
ibrep = re.compile(r"(^.*?[^%\w]getelementptr inbounds )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
normrep = re.compile( r"(^.*?[^%\w]getelementptr )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
def conv(match, line):
if not match:
return line
line = match.groups()[0]
if len(match.groups()[5]) == 0:
line += match.groups()[2]
line += match.groups()[3]
line += ", "
line += match.groups()[1]
line += "\n"
return line
for line in sys.stdin:
if line.find("getelementptr ") == line.find("getelementptr inbounds"):
if line.find("getelementptr inbounds") != line.find("getelementptr inbounds ("):
line = conv(re.match(ibrep, line), line)
elif line.find("getelementptr ") != line.find("getelementptr ("):
line = conv(re.match(normrep, line), line)
sys.stdout.write(line)
apply.sh:
for name in "$@"
do
python3 `dirname "$0"`/update.py < "$name" > "$name.tmp" && mv "$name.tmp" "$name"
rm -f "$name.tmp"
done
The actual commands:
From llvm/src:
find test/ -name *.ll | xargs ./apply.sh
From llvm/src/tools/clang:
find test/ -name *.mm -o -name *.m -o -name *.cpp -o -name *.c | xargs -I '{}' ../../apply.sh "{}"
From llvm/src/tools/polly:
find test/ -name *.ll | xargs ./apply.sh
After that, check-all (with llvm, clang, clang-tools-extra, lld,
compiler-rt, and polly all checked out).
The extra 'rm' in the apply.sh script is due to a few files in clang's test
suite using interesting unicode stuff that my python script was throwing
exceptions on. None of those files needed to be migrated, so it seemed
sufficient to ignore those cases.
Reviewers: rafael, dexonsmith, grosser
Differential Revision: http://reviews.llvm.org/D7636
llvm-svn: 230786
2015-02-28 03:29:02 +08:00
|
|
|
%x = getelementptr inbounds %struct.S5, %struct.S5* %agg.result, i32 0, i32 0
|
2013-04-03 19:27:54 +08:00
|
|
|
store i32 42, i32* %x, align 4
|
|
|
|
ret void
|
2013-12-04 05:12:36 +08:00
|
|
|
; WIN32-LABEL: {{^}}"?foo@C5@@QAE?AUS5@@XZ":
|
|
|
|
; MINGW_X86-LABEL: {{^}}"?foo@C5@@QAE?AUS5@@XZ":
|
2014-04-06 18:01:23 +08:00
|
|
|
; CYGWIN-LABEL: {{^}}"?foo@C5@@QAE?AUS5@@XZ":
|
2013-12-04 05:12:36 +08:00
|
|
|
; LINUX-LABEL: {{^}}"?foo@C5@@QAE?AUS5@@XZ":
|
2013-04-03 19:27:54 +08:00
|
|
|
|
|
|
|
; The address of the return structure is passed as an implicit parameter.
|
|
|
|
; In the -O0 build, %eax is spilled at the beginning of the function, hence we
|
|
|
|
; should match both 4(%esp) and 8(%esp).
|
|
|
|
; WIN32: {{[48]}}(%esp), %eax
|
|
|
|
; WIN32: movl $42, (%eax)
|
2014-01-08 20:58:07 +08:00
|
|
|
; WIN32: retl $4
|
2013-04-03 19:27:54 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
define void @call_foo5() {
|
|
|
|
entry:
|
|
|
|
%c = alloca %class.C5, align 1
|
|
|
|
%s = alloca %struct.S5, align 4
|
|
|
|
call x86_thiscallcc void @"\01?foo@C5@@QAE?AUS5@@XZ"(%struct.S5* sret %s, %class.C5* %c)
|
2013-12-04 05:12:36 +08:00
|
|
|
; WIN32-LABEL: {{^}}_call_foo5:
|
|
|
|
; MINGW_X86-LABEL: {{^}}_call_foo5:
|
2014-04-06 18:01:23 +08:00
|
|
|
; CYGWIN-LABEL: {{^}}_call_foo5:
|
2013-12-04 05:12:36 +08:00
|
|
|
; LINUX-LABEL: {{^}}call_foo5:
|
|
|
|
|
2013-04-03 19:27:54 +08:00
|
|
|
|
|
|
|
; Load the address of the result and put it onto stack
|
|
|
|
; The this pointer goes to ECX.
|
2016-03-31 07:38:01 +08:00
|
|
|
; (through %ecx in the -O0 build).
|
[FastISel] Sink local value materializations to first use
Summary:
Local values are constants, global addresses, and stack addresses that
can't be folded into the instruction that uses them. For example, when
storing the address of a global variable into memory, we need to
materialize that address into a register.
FastISel doesn't want to materialize any given local value more than
once, so it generates all local value materialization code at
EmitStartPt, which always dominates the current insertion point. This
allows it to maintain a map of local value registers, and it knows that
the local value area will always dominate the current insertion point.
The downside is that local value instructions are always emitted without
a source location. This is done to prevent jumpy line tables, but it
means that the local value area will be considered part of the previous
statement. Consider this C code:
call1(); // line 1
++global; // line 2
++global; // line 3
call2(&global, &local); // line 4
Today we end up with assembly and line tables like this:
.loc 1 1
callq call1
leaq global(%rip), %rdi
leaq local(%rsp), %rsi
.loc 1 2
addq $1, global(%rip)
.loc 1 3
addq $1, global(%rip)
.loc 1 4
callq call2
The LEA instructions in the local value area have no source location and
are treated as being on line 1. Stepping through the code in a debugger
and correlating it with the assembly won't make much sense, because
these materializations are only required for line 4.
This is actually problematic for the VS debugger "set next statement"
feature, which effectively assumes that there are no registers live
across statement boundaries. By sinking the local value code into the
statement and fixing up the source location, we can make that feature
work. This was filed as https://bugs.llvm.org/show_bug.cgi?id=35975 and
https://crbug.com/793819.
This change is obviously not enough to make this feature work reliably
in all cases, but I felt that it was worth doing anyway because it
usually generates smaller, more comprehensible -O0 code. I measured a
0.12% regression in code generation time with LLC on the sqlite3
amalgamation, so I think this is worth doing.
There are some special cases worth calling out in the commit message:
1. local values materialized for phis
2. local values used by no-op casts
3. dead local value code
Local values can be materialized for phis, and this does not show up as
a vreg use in MachineRegisterInfo. In this case, if there are no other
uses, this patch sinks the value to the first terminator, EH label, or
the end of the BB if nothing else exists.
Local values may also be used by no-op casts, which adds the register to
the RegFixups table. Without reversing the RegFixups map direction, we
don't have enough information to sink these instructions.
Lastly, if the local value register has no other uses, we can delete it.
This comes up when fastisel tries two instruction selection approaches
and the first materializes the value but fails and the second succeeds
without using the local value.
Reviewers: aprantl, dblaikie, qcolombet, MatzeB, vsk, echristo
Subscribers: dotdash, chandlerc, hans, sdardis, amccarth, javed.absar, zturner, llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D43093
llvm-svn: 327581
2018-03-15 05:54:21 +08:00
|
|
|
; WIN32-DAG: leal {{[0-9]*}}(%esp), %e{{[a-d]}}x
|
|
|
|
; WIN32-DAG: {{leal [1-9]+\(%esp\)|movl %esp}}, %ecx
|
|
|
|
; WIN32-DAG: {{pushl %e[a-d]x|movl %e[a-d]x, \(%esp\)}}
|
2013-04-06 01:05:56 +08:00
|
|
|
; WIN32-NEXT: calll "?foo@C5@@QAE?AUS5@@XZ"
|
2014-01-08 20:58:07 +08:00
|
|
|
; WIN32: retl
|
2013-04-03 19:27:54 +08:00
|
|
|
ret void
|
|
|
|
}
|
2013-12-04 04:51:23 +08:00
|
|
|
|
|
|
|
|
|
|
|
%struct.test6 = type { i32, i32, i32 }
|
|
|
|
define void @test6_f(%struct.test6* %x) nounwind {
|
|
|
|
; WIN32-LABEL: _test6_f:
|
|
|
|
; MINGW_X86-LABEL: _test6_f:
|
2014-04-06 18:01:23 +08:00
|
|
|
; CYGWIN-LABEL: _test6_f:
|
2013-12-04 05:12:36 +08:00
|
|
|
; LINUX-LABEL: test6_f:
|
2013-12-04 04:51:23 +08:00
|
|
|
|
|
|
|
; The %x argument is moved to %ecx. It will be the this pointer.
|
[FastISel] Sink local value materializations to first use
Summary:
Local values are constants, global addresses, and stack addresses that
can't be folded into the instruction that uses them. For example, when
storing the address of a global variable into memory, we need to
materialize that address into a register.
FastISel doesn't want to materialize any given local value more than
once, so it generates all local value materialization code at
EmitStartPt, which always dominates the current insertion point. This
allows it to maintain a map of local value registers, and it knows that
the local value area will always dominate the current insertion point.
The downside is that local value instructions are always emitted without
a source location. This is done to prevent jumpy line tables, but it
means that the local value area will be considered part of the previous
statement. Consider this C code:
call1(); // line 1
++global; // line 2
++global; // line 3
call2(&global, &local); // line 4
Today we end up with assembly and line tables like this:
.loc 1 1
callq call1
leaq global(%rip), %rdi
leaq local(%rsp), %rsi
.loc 1 2
addq $1, global(%rip)
.loc 1 3
addq $1, global(%rip)
.loc 1 4
callq call2
The LEA instructions in the local value area have no source location and
are treated as being on line 1. Stepping through the code in a debugger
and correlating it with the assembly won't make much sense, because
these materializations are only required for line 4.
This is actually problematic for the VS debugger "set next statement"
feature, which effectively assumes that there are no registers live
across statement boundaries. By sinking the local value code into the
statement and fixing up the source location, we can make that feature
work. This was filed as https://bugs.llvm.org/show_bug.cgi?id=35975 and
https://crbug.com/793819.
This change is obviously not enough to make this feature work reliably
in all cases, but I felt that it was worth doing anyway because it
usually generates smaller, more comprehensible -O0 code. I measured a
0.12% regression in code generation time with LLC on the sqlite3
amalgamation, so I think this is worth doing.
There are some special cases worth calling out in the commit message:
1. local values materialized for phis
2. local values used by no-op casts
3. dead local value code
Local values can be materialized for phis, and this does not show up as
a vreg use in MachineRegisterInfo. In this case, if there are no other
uses, this patch sinks the value to the first terminator, EH label, or
the end of the BB if nothing else exists.
Local values may also be used by no-op casts, which adds the register to
the RegFixups table. Without reversing the RegFixups map direction, we
don't have enough information to sink these instructions.
Lastly, if the local value register has no other uses, we can delete it.
This comes up when fastisel tries two instruction selection approaches
and the first materializes the value but fails and the second succeeds
without using the local value.
Reviewers: aprantl, dblaikie, qcolombet, MatzeB, vsk, echristo
Subscribers: dotdash, chandlerc, hans, sdardis, amccarth, javed.absar, zturner, llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D43093
llvm-svn: 327581
2018-03-15 05:54:21 +08:00
|
|
|
; WIN32-DAG: movl {{16|20}}(%esp), %ecx
|
2013-12-04 04:51:23 +08:00
|
|
|
|
2014-04-06 18:01:23 +08:00
|
|
|
|
2013-12-04 04:51:23 +08:00
|
|
|
; The sret pointer is (%esp)
|
[FastISel] Sink local value materializations to first use
Summary:
Local values are constants, global addresses, and stack addresses that
can't be folded into the instruction that uses them. For example, when
storing the address of a global variable into memory, we need to
materialize that address into a register.
FastISel doesn't want to materialize any given local value more than
once, so it generates all local value materialization code at
EmitStartPt, which always dominates the current insertion point. This
allows it to maintain a map of local value registers, and it knows that
the local value area will always dominate the current insertion point.
The downside is that local value instructions are always emitted without
a source location. This is done to prevent jumpy line tables, but it
means that the local value area will be considered part of the previous
statement. Consider this C code:
call1(); // line 1
++global; // line 2
++global; // line 3
call2(&global, &local); // line 4
Today we end up with assembly and line tables like this:
.loc 1 1
callq call1
leaq global(%rip), %rdi
leaq local(%rsp), %rsi
.loc 1 2
addq $1, global(%rip)
.loc 1 3
addq $1, global(%rip)
.loc 1 4
callq call2
The LEA instructions in the local value area have no source location and
are treated as being on line 1. Stepping through the code in a debugger
and correlating it with the assembly won't make much sense, because
these materializations are only required for line 4.
This is actually problematic for the VS debugger "set next statement"
feature, which effectively assumes that there are no registers live
across statement boundaries. By sinking the local value code into the
statement and fixing up the source location, we can make that feature
work. This was filed as https://bugs.llvm.org/show_bug.cgi?id=35975 and
https://crbug.com/793819.
This change is obviously not enough to make this feature work reliably
in all cases, but I felt that it was worth doing anyway because it
usually generates smaller, more comprehensible -O0 code. I measured a
0.12% regression in code generation time with LLC on the sqlite3
amalgamation, so I think this is worth doing.
There are some special cases worth calling out in the commit message:
1. local values materialized for phis
2. local values used by no-op casts
3. dead local value code
Local values can be materialized for phis, and this does not show up as
a vreg use in MachineRegisterInfo. In this case, if there are no other
uses, this patch sinks the value to the first terminator, EH label, or
the end of the BB if nothing else exists.
Local values may also be used by no-op casts, which adds the register to
the RegFixups table. Without reversing the RegFixups map direction, we
don't have enough information to sink these instructions.
Lastly, if the local value register has no other uses, we can delete it.
This comes up when fastisel tries two instruction selection approaches
and the first materializes the value but fails and the second succeeds
without using the local value.
Reviewers: aprantl, dblaikie, qcolombet, MatzeB, vsk, echristo
Subscribers: dotdash, chandlerc, hans, sdardis, amccarth, javed.absar, zturner, llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D43093
llvm-svn: 327581
2018-03-15 05:54:21 +08:00
|
|
|
; WIN32-DAG: {{leal 4\(%esp\)|movl %esp}}, %eax
|
|
|
|
; WIN32-DAG: {{pushl %eax|movl %eax, \(%esp\)}}
|
2013-12-04 04:51:23 +08:00
|
|
|
|
|
|
|
; The sret pointer is %ecx
|
2016-03-31 07:38:01 +08:00
|
|
|
; The %x argument is moved to (%esp). It will be the this pointer.
|
[FastISel] Sink local value materializations to first use
Summary:
Local values are constants, global addresses, and stack addresses that
can't be folded into the instruction that uses them. For example, when
storing the address of a global variable into memory, we need to
materialize that address into a register.
FastISel doesn't want to materialize any given local value more than
once, so it generates all local value materialization code at
EmitStartPt, which always dominates the current insertion point. This
allows it to maintain a map of local value registers, and it knows that
the local value area will always dominate the current insertion point.
The downside is that local value instructions are always emitted without
a source location. This is done to prevent jumpy line tables, but it
means that the local value area will be considered part of the previous
statement. Consider this C code:
call1(); // line 1
++global; // line 2
++global; // line 3
call2(&global, &local); // line 4
Today we end up with assembly and line tables like this:
.loc 1 1
callq call1
leaq global(%rip), %rdi
leaq local(%rsp), %rsi
.loc 1 2
addq $1, global(%rip)
.loc 1 3
addq $1, global(%rip)
.loc 1 4
callq call2
The LEA instructions in the local value area have no source location and
are treated as being on line 1. Stepping through the code in a debugger
and correlating it with the assembly won't make much sense, because
these materializations are only required for line 4.
This is actually problematic for the VS debugger "set next statement"
feature, which effectively assumes that there are no registers live
across statement boundaries. By sinking the local value code into the
statement and fixing up the source location, we can make that feature
work. This was filed as https://bugs.llvm.org/show_bug.cgi?id=35975 and
https://crbug.com/793819.
This change is obviously not enough to make this feature work reliably
in all cases, but I felt that it was worth doing anyway because it
usually generates smaller, more comprehensible -O0 code. I measured a
0.12% regression in code generation time with LLC on the sqlite3
amalgamation, so I think this is worth doing.
There are some special cases worth calling out in the commit message:
1. local values materialized for phis
2. local values used by no-op casts
3. dead local value code
Local values can be materialized for phis, and this does not show up as
a vreg use in MachineRegisterInfo. In this case, if there are no other
uses, this patch sinks the value to the first terminator, EH label, or
the end of the BB if nothing else exists.
Local values may also be used by no-op casts, which adds the register to
the RegFixups table. Without reversing the RegFixups map direction, we
don't have enough information to sink these instructions.
Lastly, if the local value register has no other uses, we can delete it.
This comes up when fastisel tries two instruction selection approaches
and the first materializes the value but fails and the second succeeds
without using the local value.
Reviewers: aprantl, dblaikie, qcolombet, MatzeB, vsk, echristo
Subscribers: dotdash, chandlerc, hans, sdardis, amccarth, javed.absar, zturner, llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D43093
llvm-svn: 327581
2018-03-15 05:54:21 +08:00
|
|
|
; MINGW_X86-DAG: {{leal 4\(%esp\)|movl %esp}}, %ecx
|
|
|
|
; MINGW_X86-DAG: {{pushl 16\(%esp\)|movl %eax, \(%esp\)}}
|
2013-12-04 04:51:23 +08:00
|
|
|
; MINGW_X86-NEXT: calll _test6_g
|
|
|
|
|
[FastISel] Sink local value materializations to first use
Summary:
Local values are constants, global addresses, and stack addresses that
can't be folded into the instruction that uses them. For example, when
storing the address of a global variable into memory, we need to
materialize that address into a register.
FastISel doesn't want to materialize any given local value more than
once, so it generates all local value materialization code at
EmitStartPt, which always dominates the current insertion point. This
allows it to maintain a map of local value registers, and it knows that
the local value area will always dominate the current insertion point.
The downside is that local value instructions are always emitted without
a source location. This is done to prevent jumpy line tables, but it
means that the local value area will be considered part of the previous
statement. Consider this C code:
call1(); // line 1
++global; // line 2
++global; // line 3
call2(&global, &local); // line 4
Today we end up with assembly and line tables like this:
.loc 1 1
callq call1
leaq global(%rip), %rdi
leaq local(%rsp), %rsi
.loc 1 2
addq $1, global(%rip)
.loc 1 3
addq $1, global(%rip)
.loc 1 4
callq call2
The LEA instructions in the local value area have no source location and
are treated as being on line 1. Stepping through the code in a debugger
and correlating it with the assembly won't make much sense, because
these materializations are only required for line 4.
This is actually problematic for the VS debugger "set next statement"
feature, which effectively assumes that there are no registers live
across statement boundaries. By sinking the local value code into the
statement and fixing up the source location, we can make that feature
work. This was filed as https://bugs.llvm.org/show_bug.cgi?id=35975 and
https://crbug.com/793819.
This change is obviously not enough to make this feature work reliably
in all cases, but I felt that it was worth doing anyway because it
usually generates smaller, more comprehensible -O0 code. I measured a
0.12% regression in code generation time with LLC on the sqlite3
amalgamation, so I think this is worth doing.
There are some special cases worth calling out in the commit message:
1. local values materialized for phis
2. local values used by no-op casts
3. dead local value code
Local values can be materialized for phis, and this does not show up as
a vreg use in MachineRegisterInfo. In this case, if there are no other
uses, this patch sinks the value to the first terminator, EH label, or
the end of the BB if nothing else exists.
Local values may also be used by no-op casts, which adds the register to
the RegFixups table. Without reversing the RegFixups map direction, we
don't have enough information to sink these instructions.
Lastly, if the local value register has no other uses, we can delete it.
This comes up when fastisel tries two instruction selection approaches
and the first materializes the value but fails and the second succeeds
without using the local value.
Reviewers: aprantl, dblaikie, qcolombet, MatzeB, vsk, echristo
Subscribers: dotdash, chandlerc, hans, sdardis, amccarth, javed.absar, zturner, llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D43093
llvm-svn: 327581
2018-03-15 05:54:21 +08:00
|
|
|
; CYGWIN-DAG: {{leal 4\(%esp\)|movl %esp}}, %ecx
|
|
|
|
; CYGWIN-DAG: {{pushl 16\(%esp\)|movl %eax, \(%esp\)}}
|
2014-04-06 18:01:23 +08:00
|
|
|
; CYGWIN-NEXT: calll _test6_g
|
|
|
|
|
2013-12-04 04:51:23 +08:00
|
|
|
%tmp = alloca %struct.test6, align 4
|
|
|
|
call x86_thiscallcc void @test6_g(%struct.test6* sret %tmp, %struct.test6* %x)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
declare x86_thiscallcc void @test6_g(%struct.test6* sret, %struct.test6*)
|
2014-05-10 06:32:13 +08:00
|
|
|
|
|
|
|
; Flipping the parameters at the IR level generates the same code.
|
|
|
|
%struct.test7 = type { i32, i32, i32 }
|
|
|
|
define void @test7_f(%struct.test7* %x) nounwind {
|
|
|
|
; WIN32-LABEL: _test7_f:
|
|
|
|
; MINGW_X86-LABEL: _test7_f:
|
|
|
|
; CYGWIN-LABEL: _test7_f:
|
|
|
|
; LINUX-LABEL: test7_f:
|
|
|
|
|
|
|
|
; The %x argument is moved to %ecx on all OSs. It will be the this pointer.
|
2016-07-14 23:40:22 +08:00
|
|
|
; WIN32: movl {{16|20}}(%esp), %ecx
|
|
|
|
; MINGW_X86: movl {{16|20}}(%esp), %ecx
|
|
|
|
; CYGWIN: movl {{16|20}}(%esp), %ecx
|
2014-05-10 06:32:13 +08:00
|
|
|
|
|
|
|
; The sret pointer is (%esp)
|
2016-09-26 14:42:07 +08:00
|
|
|
; WIN32: {{leal 4\(%esp\)|movl %esp}}, %eax
|
2016-07-14 23:40:22 +08:00
|
|
|
; WIN32-NEXT: {{pushl %eax|movl %eax, \(%esp\)}}
|
2016-09-26 14:42:07 +08:00
|
|
|
; MINGW_X86: {{leal 4\(%esp\)|movl %esp}}, %eax
|
2016-07-14 23:40:22 +08:00
|
|
|
; MINGW_X86-NEXT: {{pushl %eax|movl %eax, \(%esp\)}}
|
2016-09-26 14:42:07 +08:00
|
|
|
; CYGWIN: {{leal 4\(%esp\)|movl %esp}}, %eax
|
2016-07-14 23:40:22 +08:00
|
|
|
; CYGWIN-NEXT: {{pushl %eax|movl %eax, \(%esp\)}}
|
2014-05-10 06:32:13 +08:00
|
|
|
|
|
|
|
%tmp = alloca %struct.test7, align 4
|
|
|
|
call x86_thiscallcc void @test7_g(%struct.test7* %x, %struct.test7* sret %tmp)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define x86_thiscallcc void @test7_g(%struct.test7* %in, %struct.test7* sret %out) {
|
[opaque pointer type] Add textual IR support for explicit type parameter to getelementptr instruction
One of several parallel first steps to remove the target type of pointers,
replacing them with a single opaque pointer type.
This adds an explicit type parameter to the gep instruction so that when the
first parameter becomes an opaque pointer type, the type to gep through is
still available to the instructions.
* This doesn't modify gep operators, only instructions (operators will be
handled separately)
* Textual IR changes only. Bitcode (including upgrade) and changing the
in-memory representation will be in separate changes.
* geps of vectors are transformed as:
getelementptr <4 x float*> %x, ...
->getelementptr float, <4 x float*> %x, ...
Then, once the opaque pointer type is introduced, this will ultimately look
like:
getelementptr float, <4 x ptr> %x
with the unambiguous interpretation that it is a vector of pointers to float.
* address spaces remain on the pointer, not the type:
getelementptr float addrspace(1)* %x
->getelementptr float, float addrspace(1)* %x
Then, eventually:
getelementptr float, ptr addrspace(1) %x
Importantly, the massive amount of test case churn has been automated by
same crappy python code. I had to manually update a few test cases that
wouldn't fit the script's model (r228970,r229196,r229197,r229198). The
python script just massages stdin and writes the result to stdout, I
then wrapped that in a shell script to handle replacing files, then
using the usual find+xargs to migrate all the files.
update.py:
import fileinput
import sys
import re
ibrep = re.compile(r"(^.*?[^%\w]getelementptr inbounds )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
normrep = re.compile( r"(^.*?[^%\w]getelementptr )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
def conv(match, line):
if not match:
return line
line = match.groups()[0]
if len(match.groups()[5]) == 0:
line += match.groups()[2]
line += match.groups()[3]
line += ", "
line += match.groups()[1]
line += "\n"
return line
for line in sys.stdin:
if line.find("getelementptr ") == line.find("getelementptr inbounds"):
if line.find("getelementptr inbounds") != line.find("getelementptr inbounds ("):
line = conv(re.match(ibrep, line), line)
elif line.find("getelementptr ") != line.find("getelementptr ("):
line = conv(re.match(normrep, line), line)
sys.stdout.write(line)
apply.sh:
for name in "$@"
do
python3 `dirname "$0"`/update.py < "$name" > "$name.tmp" && mv "$name.tmp" "$name"
rm -f "$name.tmp"
done
The actual commands:
From llvm/src:
find test/ -name *.ll | xargs ./apply.sh
From llvm/src/tools/clang:
find test/ -name *.mm -o -name *.m -o -name *.cpp -o -name *.c | xargs -I '{}' ../../apply.sh "{}"
From llvm/src/tools/polly:
find test/ -name *.ll | xargs ./apply.sh
After that, check-all (with llvm, clang, clang-tools-extra, lld,
compiler-rt, and polly all checked out).
The extra 'rm' in the apply.sh script is due to a few files in clang's test
suite using interesting unicode stuff that my python script was throwing
exceptions on. None of those files needed to be migrated, so it seemed
sufficient to ignore those cases.
Reviewers: rafael, dexonsmith, grosser
Differential Revision: http://reviews.llvm.org/D7636
llvm-svn: 230786
2015-02-28 03:29:02 +08:00
|
|
|
%s = getelementptr %struct.test7, %struct.test7* %in, i32 0, i32 0
|
|
|
|
%d = getelementptr %struct.test7, %struct.test7* %out, i32 0, i32 0
|
2015-02-28 05:17:42 +08:00
|
|
|
%v = load i32, i32* %s
|
2014-05-10 06:32:13 +08:00
|
|
|
store i32 %v, i32* %d
|
|
|
|
call void @clobber_eax()
|
|
|
|
ret void
|
|
|
|
|
|
|
|
; Make sure we return the second parameter in %eax.
|
|
|
|
; WIN32-LABEL: _test7_g:
|
|
|
|
; WIN32: calll _clobber_eax
|
|
|
|
; WIN32: movl {{.*}}, %eax
|
|
|
|
; WIN32: retl
|
|
|
|
}
|
|
|
|
|
|
|
|
declare void @clobber_eax()
|
|
|
|
|
|
|
|
; Test what happens if the first parameter has to be split by codegen.
|
|
|
|
; Realistically, no frontend will generate code like this, but here it is for
|
|
|
|
; completeness.
|
|
|
|
define void @test8_f(i64 inreg %a, i64* sret %out) {
|
|
|
|
store i64 %a, i64* %out
|
|
|
|
call void @clobber_eax()
|
|
|
|
ret void
|
|
|
|
|
|
|
|
; WIN32-LABEL: _test8_f:
|
|
|
|
; WIN32: movl {{[0-9]+}}(%esp), %[[out:[a-z]+]]
|
|
|
|
; WIN32-DAG: movl %edx, 4(%[[out]])
|
|
|
|
; WIN32-DAG: movl %eax, (%[[out]])
|
|
|
|
; WIN32: calll _clobber_eax
|
|
|
|
; WIN32: movl {{.*}}, %eax
|
|
|
|
; WIN32: retl
|
|
|
|
}
|