forked from OSchip/llvm-project
parent
6bc98653c2
commit
c2dd7aae71
|
@ -538,3 +538,31 @@ Currently the load folding imull has a higher complexity than the LEA32 pattern.
|
|||
|
||||
Lower memcpy / memset to a series of SSE 128 bit move instructions when it's
|
||||
feasible.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Teach the coallescer to commute 2-addr instructions, allowing us to eliminate
|
||||
the reg-reg copy in this example:
|
||||
|
||||
float foo(int *x, float *y, unsigned c) {
|
||||
float res = 0.0;
|
||||
unsigned i;
|
||||
for (i = 0; i < c; i++) {
|
||||
float xx = (float)x[i];
|
||||
xx = xx * y[i];
|
||||
xx += res;
|
||||
res = xx;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
LBB_foo_3: # no_exit
|
||||
cvtsi2ss %XMM0, DWORD PTR [%EDX + 4*%ESI]
|
||||
mulss %XMM0, DWORD PTR [%EAX + 4*%ESI]
|
||||
addss %XMM0, %XMM1
|
||||
inc %ESI
|
||||
cmp %ESI, %ECX
|
||||
**** movaps %XMM1, %XMM0
|
||||
jb LBB_foo_3 # no_exit
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
|
Loading…
Reference in New Issue