powerpc/32: optimise memset()

There is no need to extend the set value to an int when the length
is lower than 4 as in that case we only do byte stores.
We can therefore immediately branch to the part handling it.
By separating it from the normal case, we are able to eliminate
a few actions on the destination pointer.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
This commit is contained in:
Christophe Leroy 2017-08-23 16:54:36 +02:00 committed by Michael Ellerman
parent c0622167e3
commit 7bf6057b96
1 changed files with 14 additions and 7 deletions

View File

@ -91,17 +91,17 @@ EXPORT_SYMBOL(memset16)
* replaced by a nop once cache is active. This is done in machine_init()
*/
_GLOBAL(memset)
cmplwi 0,r5,4
blt 7f
rlwimi r4,r4,8,16,23
rlwimi r4,r4,16,0,15
addi r6,r3,-4
cmplwi 0,r5,4
blt 7f
stwu r4,4(r6)
stw r4,0(r3)
beqlr
andi. r0,r6,3
andi. r0,r3,3
add r5,r0,r5
subf r6,r0,r6
subf r6,r0,r3
cmplwi 0,r4,0
bne 2f /* Use normal procedure if r4 is not zero */
_GLOBAL(memset_nocache_branch)
@ -132,13 +132,20 @@ _GLOBAL(memset_nocache_branch)
1: stwu r4,4(r6)
bdnz 1b
6: andi. r5,r5,3
7: cmpwi 0,r5,0
beqlr
mtctr r5
addi r6,r6,3
8: stbu r4,1(r6)
bdnz 8b
blr
7: cmpwi 0,r5,0
beqlr
mtctr r5
addi r6,r3,-1
9: stbu r4,1(r6)
bdnz 9b
blr
EXPORT_SYMBOL(memset)
/*