Andrew Lenharth
|
ccdaecc448
|
Account for pseudo-ops correctly
llvm-svn: 30548
|
2006-09-20 20:08:52 +00:00 |
Chris Lattner
|
a81a75c390
|
The DarwinAsmPrinter need not check for isDarwin. createPPCAsmPrinterPass
should create the right asmprinter subclass.
llvm-svn: 30542
|
2006-09-20 17:12:19 +00:00 |
Chris Lattner
|
8597a2fc4e
|
Wrap some darwin'isms with isDarwin checks.
llvm-svn: 30541
|
2006-09-20 17:07:15 +00:00 |
Nick Lewycky
|
cfff1c3f86
|
Use a total ordering to compare instructions.
Fixes infinite loop in resolve().
llvm-svn: 30540
|
2006-09-20 17:04:01 +00:00 |
Andrew Lenharth
|
44cb67af5c
|
simplify
llvm-svn: 30535
|
2006-09-20 15:37:57 +00:00 |
Andrew Lenharth
|
f007f21c8a
|
catch constants more often
llvm-svn: 30534
|
2006-09-20 15:05:49 +00:00 |
Andrew Lenharth
|
b04e899bb4
|
catch another constant
llvm-svn: 30533
|
2006-09-20 15:04:55 +00:00 |
Andrew Lenharth
|
97a4e99aff
|
clarify with test case
llvm-svn: 30531
|
2006-09-20 14:48:00 +00:00 |
Andrew Lenharth
|
e2d138a462
|
Add Note
llvm-svn: 30530
|
2006-09-20 14:40:01 +00:00 |
Jim Laskey
|
fbeab72759
|
Trim the home directory from the dejagnu test
llvm-svn: 30519
|
2006-09-20 09:20:22 +00:00 |
Chris Lattner
|
fba9e8f422
|
item done
llvm-svn: 30518
|
2006-09-20 06:41:56 +00:00 |
Chris Lattner
|
c8cd62d381
|
Compile:
int test3(int a, int b) { return (a < 0) ? a : 0; }
to:
_test3:
srawi r2, r3, 31
and r3, r2, r3
blr
instead of:
_test3:
cmpwi cr0, r3, 1
li r2, 0
blt cr0, LBB2_2 ;entry
LBB2_1: ;entry
mr r3, r2
LBB2_2: ;entry
blr
This implements: PowerPC/select_lt0.ll:seli32_a_a
llvm-svn: 30517
|
2006-09-20 06:41:35 +00:00 |
Chris Lattner
|
2b09e1d2fc
|
new testcase
llvm-svn: 30516
|
2006-09-20 06:40:37 +00:00 |
Chris Lattner
|
27d8985a71
|
add a note
llvm-svn: 30515
|
2006-09-20 06:32:10 +00:00 |
Chris Lattner
|
8746e2cd57
|
Fold the full generality of (any_extend (truncate x))
llvm-svn: 30514
|
2006-09-20 06:29:17 +00:00 |
Chris Lattner
|
8b68decb27
|
Two things:
1. teach SimplifySetCC that '(srl (ctlz x), 5) == 0' is really x != 0.
2. Teach visitSELECT_CC to use SimplifySetCC instead of calling it and
ignoring the result. This allows us to compile:
bool %test(ulong %x) {
%tmp = setlt ulong %x, 4294967296
ret bool %tmp
}
to:
_test:
cntlzw r2, r3
cmplwi cr0, r3, 1
srwi r2, r2, 5
li r3, 0
beq cr0, LBB1_2 ;
LBB1_1: ;
mr r3, r2
LBB1_2: ;
blr
instead of:
_test:
addi r2, r3, -1
cntlzw r2, r2
cntlzw r3, r3
srwi r2, r2, 5
cmplwi cr0, r2, 0
srwi r2, r3, 5
li r3, 0
bne cr0, LBB1_2 ;
LBB1_1: ;
mr r3, r2
LBB1_2: ;
blr
This isn't wonderful, but it's an improvement.
llvm-svn: 30513
|
2006-09-20 06:19:26 +00:00 |
Chris Lattner
|
f62f090ea1
|
This is already done
llvm-svn: 30512
|
2006-09-20 04:59:33 +00:00 |
Chris Lattner
|
380c7e9a59
|
We went through all that trouble to compute whether it was safe to transform
this comparison, but never checked it. Whoops, no wonder we miscompiled
177.mesa!
llvm-svn: 30511
|
2006-09-20 04:44:59 +00:00 |
Chris Lattner
|
da9b1a9322
|
Improve PPC64 equality comparisons like PPC32 comparisons.
llvm-svn: 30510
|
2006-09-20 04:33:27 +00:00 |
Chris Lattner
|
aa3926b7ea
|
Two improvements:
1. Codegen this comparison:
if (X == 0x8000)
as:
cmplwi cr0, r3, 32768
bne cr0, LBB1_2 ;cond_next
instead of:
lis r2, 0
ori r2, r2, 32768
cmpw cr0, r3, r2
bne cr0, LBB1_2 ;cond_next
2. Codegen this comparison:
if (X == 0x12345678)
as:
xoris r2, r3, 4660
cmplwi cr0, r2, 22136
bne cr0, LBB1_2 ;cond_next
instead of:
lis r2, 4660
ori r2, r2, 22136
cmpw cr0, r3, r2
bne cr0, LBB1_2 ;cond_next
llvm-svn: 30509
|
2006-09-20 04:25:47 +00:00 |
Chris Lattner
|
ab33d350a7
|
Add a note that we should match rlwnm better
llvm-svn: 30508
|
2006-09-20 03:59:25 +00:00 |
Chris Lattner
|
601b86513d
|
Legalize is no longer limited to cleverness with just constant shift amounts.
Allow it to be clever when possible and fall back to the gross code when needed.
This allows us to compile:
long long foo1(long long X, int C) {
return X << (C|32);
}
long long foo2(long long X, int C) {
return X << (C&~32);
}
to:
_foo1:
rlwinm r2, r5, 0, 27, 31
slw r3, r4, r2
li r4, 0
blr
.globl _foo2
.align 4
_foo2:
rlwinm r2, r5, 0, 27, 25
subfic r5, r2, 32
slw r3, r3, r2
srw r5, r4, r5
or r3, r3, r5
slw r4, r4, r2
blr
instead of:
_foo1:
ori r2, r5, 32
subfic r5, r2, 32
addi r6, r2, -32
srw r5, r4, r5
slw r3, r3, r2
slw r6, r4, r6
or r3, r3, r5
slw r4, r4, r2
or r3, r3, r6
blr
.globl _foo2
.align 4
_foo2:
rlwinm r2, r5, 0, 27, 25
subfic r5, r2, 32
addi r6, r2, -32
srw r5, r4, r5
slw r3, r3, r2
slw r6, r4, r6
or r3, r3, r5
slw r4, r4, r2
or r3, r3, r6
blr
llvm-svn: 30507
|
2006-09-20 03:47:40 +00:00 |
Chris Lattner
|
875ea0cdbd
|
Expand 64-bit shifts more optimally if we know that the high bit of the
shift amount is one or zero. For example, for:
long long foo1(long long X, int C) {
return X << (C|32);
}
long long foo2(long long X, int C) {
return X << (C&~32);
}
we get:
_foo1:
movb $31, %cl
movl 4(%esp), %edx
andb 12(%esp), %cl
shll %cl, %edx
xorl %eax, %eax
ret
_foo2:
movb $223, %cl
movl 4(%esp), %eax
movl 8(%esp), %edx
andb 12(%esp), %cl
shldl %cl, %eax, %edx
shll %cl, %eax
ret
instead of:
_foo1:
subl $4, %esp
movl %ebx, (%esp)
movb $32, %bl
movl 8(%esp), %eax
movl 12(%esp), %edx
movb %bl, %cl
orb 16(%esp), %cl
shldl %cl, %eax, %edx
shll %cl, %eax
xorl %ecx, %ecx
testb %bl, %bl
cmovne %eax, %edx
cmovne %ecx, %eax
movl (%esp), %ebx
addl $4, %esp
ret
_foo2:
subl $4, %esp
movl %ebx, (%esp)
movb $223, %cl
movl 8(%esp), %eax
movl 12(%esp), %edx
andb 16(%esp), %cl
shldl %cl, %eax, %edx
shll %cl, %eax
xorl %ecx, %ecx
xorb %bl, %bl
testb %bl, %bl
cmovne %eax, %edx
cmovne %ecx, %eax
movl (%esp), %ebx
addl $4, %esp
ret
llvm-svn: 30506
|
2006-09-20 03:38:48 +00:00 |
Evan Cheng
|
cd3f6ff0e5
|
Back out Chris' last set of changes. This breaks 177.mesa and povray somehow.
llvm-svn: 30505
|
2006-09-20 01:39:40 +00:00 |
Evan Cheng
|
453280b94d
|
80 col.
llvm-svn: 30504
|
2006-09-20 01:10:02 +00:00 |
Evan Cheng
|
9de003670f
|
Allow PatFrag to be a leaf node.
llvm-svn: 30498
|
2006-09-19 19:08:04 +00:00 |
Evan Cheng
|
00b2848adb
|
Add result of a Xform to isel queue.
llvm-svn: 30497
|
2006-09-19 18:40:15 +00:00 |
Andrew Lenharth
|
4f339bebb0
|
If we have an add, do it in the pointer realm, not the int realm. This is critical in the linux kernel for pointer analysis correctness
llvm-svn: 30496
|
2006-09-19 18:24:51 +00:00 |
Andrew Lenharth
|
731f2d52a3
|
Inspired by the linux kernel, the more we keep adds in the pointer realm, the better pointer analysis works.
llvm-svn: 30495
|
2006-09-19 18:23:39 +00:00 |
Chris Lattner
|
698000b0da
|
Fix UnitTests/2005-05-12-Int64ToFP.c with llc-beta. In particular, do not
allow it to go into an infinite loop, filling up the disk!
llvm-svn: 30494
|
2006-09-19 18:02:01 +00:00 |
Rafael Espindola
|
fa7217f970
|
fix header
add comments
untabify
llvm-svn: 30486
|
2006-09-19 16:41:40 +00:00 |
Rafael Espindola
|
f7d4a9900c
|
Implement a MachineFunctionPass to fix the mul instruction
llvm-svn: 30485
|
2006-09-19 15:49:25 +00:00 |
Chris Lattner
|
dc892c6221
|
number test right
llvm-svn: 30484
|
2006-09-19 06:19:19 +00:00 |
Chris Lattner
|
b94df039c0
|
item done
llvm-svn: 30483
|
2006-09-19 06:19:03 +00:00 |
Chris Lattner
|
12f52faf93
|
implement select.ll:test19-22
llvm-svn: 30482
|
2006-09-19 06:18:21 +00:00 |
Chris Lattner
|
aa0ad47e97
|
make this harder
llvm-svn: 30481
|
2006-09-19 06:17:55 +00:00 |
Chris Lattner
|
db4fbdd1a6
|
new testcases
llvm-svn: 30480
|
2006-09-19 06:16:46 +00:00 |
Chris Lattner
|
3c48ea54ee
|
Fold the PPCISD shifts when presented with 0 inputs. This occurs for code
like:
long long test(long long X, int Y) {
return 1ULL << Y;
}
long long test2(long long X, int Y) {
return -1LL << Y;
}
which we used to compile to:
_test:
li r2, 1
subfic r3, r5, 32
li r4, 0
addi r6, r5, -32
srw r3, r2, r3
slw r4, r4, r5
slw r6, r2, r6
or r3, r4, r3
slw r4, r2, r5
or r3, r3, r6
blr
_test2:
li r2, -1
subfic r3, r5, 32
addi r6, r5, -32
srw r3, r2, r3
slw r4, r2, r5
slw r2, r2, r6
or r3, r4, r3
or r3, r3, r2
blr
Now we produce:
_test:
li r2, 1
addi r3, r5, -32
subfic r4, r5, 32
slw r3, r2, r3
srw r4, r2, r4
or r3, r4, r3
slw r4, r2, r5
blr
_test2:
li r2, -1
subfic r3, r5, 32
addi r6, r5, -32
srw r3, r2, r3
slw r4, r2, r5
slw r2, r2, r6
or r3, r4, r3
or r3, r3, r2
blr
llvm-svn: 30479
|
2006-09-19 05:22:59 +00:00 |
Chris Lattner
|
5a42ebcff3
|
Fold extract_element(cst) to cst
llvm-svn: 30478
|
2006-09-19 05:02:39 +00:00 |
Chris Lattner
|
4c059f4962
|
Minor speedup for legalize by avoiding some malloc traffic
llvm-svn: 30477
|
2006-09-19 04:51:23 +00:00 |
Chris Lattner
|
bea5f91946
|
If multiple predicates are listed, they must all pass
llvm-svn: 30476
|
2006-09-19 00:41:36 +00:00 |
Nick Lewycky
|
5cc1e9b51d
|
Enable dejagnu tests for predicate simplifier.
llvm-svn: 30475
|
2006-09-19 00:31:54 +00:00 |
Evan Cheng
|
1fc7c363e6
|
Fix a typo.
llvm-svn: 30474
|
2006-09-18 23:28:33 +00:00 |
Chris Lattner
|
bba17b9913
|
There!
llvm-svn: 30473
|
2006-09-18 22:41:07 +00:00 |
Chris Lattner
|
82a73a1989
|
Fix Regression/TableGen/2006-09-18-LargeInt.td
llvm-svn: 30472
|
2006-09-18 22:28:27 +00:00 |
Chris Lattner
|
165f344721
|
new testcase
llvm-svn: 30471
|
2006-09-18 22:28:07 +00:00 |
Evan Cheng
|
4bfaf0bd2c
|
Allow i32 UDIV, SDIV, UREM, SREM to be expanded into libcalls.
llvm-svn: 30470
|
2006-09-18 21:49:04 +00:00 |
Nick Lewycky
|
b9c5483a93
|
Walk down the dominator tree instead of the control flow graph. That means
that we can't modify the CFG any more, at least not until it's possible
to update the dominator tree (PR217).
llvm-svn: 30469
|
2006-09-18 21:09:35 +00:00 |
Nick Lewycky
|
86bda361b0
|
Fix findCaseDest to return null when BB is both the default dest and one
of the numeric cases.
llvm-svn: 30468
|
2006-09-18 20:44:37 +00:00 |
Andrew Lenharth
|
f23e3bfcb2
|
A pass to remove the worst of the replay trap offenders, and as a bonus, align basic blocks when it is free to do so
llvm-svn: 30467
|
2006-09-18 19:44:29 +00:00 |