Bill Wendling
e1fd78f2bc
Generate a VTBL instruction instead of a series of loads and stores when we
...
can. As Nate pointed out, VTBL isn't super performant, but it *has* to be better
than this:
_shuf:
@ BB#0: @ %entry
push {r4, r7, lr}
add r7, sp, #4
sub sp, #12
mov r4, sp
bic r4, r4, #7
mov sp, r4
mov r2, sp
vmov d16, r0, r1
orr r0, r2, #6
orr r3, r2, #7
vst1.8 {d16[0]}, [r3]
vst1.8 {d16[5]}, [r0]
subs r4, r7, #4
orr r0, r2, #5
vst1.8 {d16[4]}, [r0]
orr r0, r2, #4
vst1.8 {d16[4]}, [r0]
orr r0, r2, #3
vst1.8 {d16[0]}, [r0]
orr r0, r2, #2
vst1.8 {d16[2]}, [r0]
orr r0, r2, #1
vst1.8 {d16[1]}, [r0]
vst1.8 {d16[3]}, [r2]
vldr.64 d16, [sp]
vmov r0, r1, d16
mov sp, r4
pop {r4, r7, pc}
The "illegal" testcase in vext.ll is no longer illegal.
<rdar://problem/9078775>
llvm-svn: 127630
2011-03-14 23:02:38 +00:00