powerpc/32: cacheable_memcpy becomes memcpy
cacheable_memcpy uses dcbz instruction and is more efficient than memcpy when the destination is in RAM. If the destination is in an io area, memcpy_toio() is normally used, not memcpy This patch renames memcpy as generic_memcpy, and renames cacheable_memcpy as memcpy On MPC885, we get approximatly 7% increase of the transfer rate on an FTP reception Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Scott Wood <scottwood@freescale.com>
This commit is contained in:
parent
c152f149ce
commit
0b05e2d671
|
@ -129,13 +129,18 @@ _GLOBAL(memset)
|
||||||
* We only use this version if the source and dest don't overlap.
|
* We only use this version if the source and dest don't overlap.
|
||||||
* -- paulus.
|
* -- paulus.
|
||||||
*/
|
*/
|
||||||
_GLOBAL(cacheable_memcpy)
|
_GLOBAL(memmove)
|
||||||
|
cmplw 0,r3,r4
|
||||||
|
bgt backwards_memcpy
|
||||||
|
/* fall through */
|
||||||
|
|
||||||
|
_GLOBAL(memcpy)
|
||||||
add r7,r3,r5 /* test if the src & dst overlap */
|
add r7,r3,r5 /* test if the src & dst overlap */
|
||||||
add r8,r4,r5
|
add r8,r4,r5
|
||||||
cmplw 0,r4,r7
|
cmplw 0,r4,r7
|
||||||
cmplw 1,r3,r8
|
cmplw 1,r3,r8
|
||||||
crand 0,0,4 /* cr0.lt &= cr1.lt */
|
crand 0,0,4 /* cr0.lt &= cr1.lt */
|
||||||
blt memcpy /* if regions overlap */
|
blt generic_memcpy /* if regions overlap */
|
||||||
|
|
||||||
addi r4,r4,-4
|
addi r4,r4,-4
|
||||||
addi r6,r3,-4
|
addi r6,r3,-4
|
||||||
|
@ -201,12 +206,7 @@ _GLOBAL(cacheable_memcpy)
|
||||||
bdnz 40b
|
bdnz 40b
|
||||||
65: blr
|
65: blr
|
||||||
|
|
||||||
_GLOBAL(memmove)
|
_GLOBAL(generic_memcpy)
|
||||||
cmplw 0,r3,r4
|
|
||||||
bgt backwards_memcpy
|
|
||||||
/* fall through */
|
|
||||||
|
|
||||||
_GLOBAL(memcpy)
|
|
||||||
srwi. r7,r5,3
|
srwi. r7,r5,3
|
||||||
addi r6,r3,-4
|
addi r6,r3,-4
|
||||||
addi r4,r4,-4
|
addi r4,r4,-4
|
||||||
|
|
Loading…
Reference in New Issue