forked from OSchip/llvm-project
Misc x86 README updates: remove a couple of already-fixed issues,
add a few suggestions from looking at some assembly code. llvm-svn: 73210
This commit is contained in:
parent
66189503ef
commit
32ad5e9c08
|
@ -482,35 +482,6 @@ _usesbb:
|
|||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Currently we don't have elimination of redundant stack manipulations. Consider
|
||||
the code:
|
||||
|
||||
int %main() {
|
||||
entry:
|
||||
call fastcc void %test1( )
|
||||
call fastcc void %test2( sbyte* cast (void ()* %test1 to sbyte*) )
|
||||
ret int 0
|
||||
}
|
||||
|
||||
declare fastcc void %test1()
|
||||
|
||||
declare fastcc void %test2(sbyte*)
|
||||
|
||||
|
||||
This currently compiles to:
|
||||
|
||||
subl $16, %esp
|
||||
call _test5
|
||||
addl $12, %esp
|
||||
subl $16, %esp
|
||||
movl $_test5, (%esp)
|
||||
call _test6
|
||||
addl $12, %esp
|
||||
|
||||
The add\sub pair is really unneeded here.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Consider the expansion of:
|
||||
|
||||
define i32 @test3(i32 %X) {
|
||||
|
@ -902,34 +873,6 @@ condition register is dead. xor reg reg is shorter than mov reg, #0.
|
|||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
We aren't matching RMW instructions aggressively
|
||||
enough. Here's a reduced testcase (more in PR1160):
|
||||
|
||||
define void @test(i32* %huge_ptr, i32* %target_ptr) {
|
||||
%A = load i32* %huge_ptr ; <i32> [#uses=1]
|
||||
%B = load i32* %target_ptr ; <i32> [#uses=1]
|
||||
%C = or i32 %A, %B ; <i32> [#uses=1]
|
||||
store i32 %C, i32* %target_ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
$ llvm-as < t.ll | llc -march=x86-64
|
||||
|
||||
_test:
|
||||
movl (%rdi), %eax
|
||||
orl (%rsi), %eax
|
||||
movl %eax, (%rsi)
|
||||
ret
|
||||
|
||||
That should be something like:
|
||||
|
||||
_test:
|
||||
movl (%rdi), %eax
|
||||
orl %eax, (%rsi)
|
||||
ret
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
The following code:
|
||||
|
||||
bb114.preheader: ; preds = %cond_next94
|
||||
|
@ -1897,3 +1840,60 @@ The second one is done for: Atom, Pentium Pro, all AMDs, Pentium 4, Nocona,
|
|||
Core 2, and "Generic"
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Testcase:
|
||||
int a(int x) { return (x & 127) > 31; }
|
||||
|
||||
Current output:
|
||||
movl 4(%esp), %eax
|
||||
andl $127, %eax
|
||||
cmpl $31, %eax
|
||||
seta %al
|
||||
movzbl %al, %eax
|
||||
ret
|
||||
|
||||
Ideal output:
|
||||
xorl %eax, %eax
|
||||
testl $96, 4(%esp)
|
||||
setne %al
|
||||
ret
|
||||
|
||||
We could do this transformation in instcombine, but it's only clearly
|
||||
beneficial on platforms with a test instruction.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
Testcase:
|
||||
int x(int a) { return (a&0xf0)>>4; }
|
||||
|
||||
Current output:
|
||||
movl 4(%esp), %eax
|
||||
shrl $4, %eax
|
||||
andl $15, %eax
|
||||
ret
|
||||
|
||||
Ideal output:
|
||||
movzbl 4(%esp), %eax
|
||||
shrl $4, %eax
|
||||
ret
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Testcase:
|
||||
int x(int a) { return (a & 0x80) ? 0x100 : 0; }
|
||||
|
||||
Current output:
|
||||
testl $128, 4(%esp)
|
||||
setne %al
|
||||
movzbl %al, %eax
|
||||
shll $8, %eax
|
||||
ret
|
||||
|
||||
Ideal output:
|
||||
movl 4(%esp), %eax
|
||||
addl %eax, %eax
|
||||
andl $256, %eax
|
||||
ret
|
||||
|
||||
We generally want to fold shifted tests of a single bit into a shift+and on x86.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
|
Loading…
Reference in New Issue