Blackfin: optimize strncpy a bit
Add a little strncpy optimization which can easily cut boot time by 20%. When the kernel is booting with initramfs, it builds up the filesystem from a cpio archive by calling strncpy_from_user() via fs/namei.c's do_getname() on every file in the archive (which can be lots) with a length of PATH_MAX (1024). This causes the dest of the strncpy to be padded with many NUL bytes. This optimization mostly causes these NUL bytes to be padded with a call to memset() which is already optimized for filling memory quickly, but the hardware loop helps a little bit as well. Boot time measured with 'loglevel=0' so UART speed doesn't get in the way. Signed-off-by: Robin Getz <robin.getz@analog.com> Signed-off-by: Mike Frysinger <vapier@gentoo.org>
This commit is contained in:
parent
c70dcabc8e
commit
648eee52cc
|
@ -20,6 +20,7 @@
|
||||||
* R1 = filler byte
|
* R1 = filler byte
|
||||||
* R2 = count
|
* R2 = count
|
||||||
* Favours word aligned data.
|
* Favours word aligned data.
|
||||||
|
* The strncpy assumes that I0 and I1 are not used in this function
|
||||||
*/
|
*/
|
||||||
|
|
||||||
ENTRY(_memset)
|
ENTRY(_memset)
|
||||||
|
|
|
@ -5,12 +5,14 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <linux/linkage.h>
|
#include <linux/linkage.h>
|
||||||
|
#include <asm/context.S>
|
||||||
|
|
||||||
/* void *strncpy(char *dest, const char *src, size_t n);
|
/* void *strncpy(char *dest, const char *src, size_t n);
|
||||||
* R0 = address (dest)
|
* R0 = address (dest)
|
||||||
* R1 = address (src)
|
* R1 = address (src)
|
||||||
* R2 = size
|
* R2 = size
|
||||||
* Returns a pointer to the destination string dest
|
* Returns a pointer (R0) to the destination string dest
|
||||||
|
* we do this by not changing R0
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifdef CONFIG_STRNCPY_L1
|
#ifdef CONFIG_STRNCPY_L1
|
||||||
|
@ -24,29 +26,60 @@
|
||||||
ENTRY(_strncpy)
|
ENTRY(_strncpy)
|
||||||
CC = R2 == 0;
|
CC = R2 == 0;
|
||||||
if CC JUMP 4f;
|
if CC JUMP 4f;
|
||||||
|
|
||||||
|
P2 = R2 ; /* size */
|
||||||
P0 = R0 ; /* dst*/
|
P0 = R0 ; /* dst*/
|
||||||
P1 = R1 ; /* src*/
|
P1 = R1 ; /* src*/
|
||||||
|
|
||||||
|
LSETUP (1f, 2f) LC0 = P2;
|
||||||
1:
|
1:
|
||||||
R1 = B [P1++] (Z);
|
R1 = B [P1++] (Z);
|
||||||
B [P0++] = R1;
|
B [P0++] = R1;
|
||||||
CC = R1;
|
CC = R1 == 0;
|
||||||
if ! cc jump 2f;
|
|
||||||
R2 += -1;
|
|
||||||
CC = R2 == 0;
|
|
||||||
if ! cc jump 1b (bp);
|
|
||||||
jump 4f;
|
|
||||||
2:
|
2:
|
||||||
/* if src is shorter than n, we need to null pad bytes in dest */
|
if CC jump 3f;
|
||||||
R1 = 0;
|
|
||||||
|
RTS;
|
||||||
|
|
||||||
|
/* if src is shorter than n, we need to null pad bytes in dest
|
||||||
|
* but, we can get here when the last byte is zero, and we don't
|
||||||
|
* want to copy an extra byte at the end, so we need to check
|
||||||
|
*/
|
||||||
3:
|
3:
|
||||||
|
R2 = LC0;
|
||||||
|
CC = R2
|
||||||
|
if ! CC jump 6f;
|
||||||
|
|
||||||
|
/* if the required null padded portion is small, do it here, rather than
|
||||||
|
* handling the overhead of memset (which is OK when things are big).
|
||||||
|
*/
|
||||||
|
R3 = 0x20;
|
||||||
|
CC = R2 < R3;
|
||||||
|
IF CC jump 4f;
|
||||||
|
|
||||||
R2 += -1;
|
R2 += -1;
|
||||||
CC = R2 == 0;
|
|
||||||
if cc jump 4f;
|
/* Set things up for memset
|
||||||
B [P0++] = R1;
|
* R0 = address
|
||||||
jump 3b;
|
* R1 = filler byte (this case it's zero, set above)
|
||||||
|
* R2 = count (set above)
|
||||||
|
*/
|
||||||
|
|
||||||
|
I1 = R0;
|
||||||
|
R0 = RETS;
|
||||||
|
I0 = R0;
|
||||||
|
R0 = P0;
|
||||||
|
pseudo_long_call _memset, p0;
|
||||||
|
R0 = I0;
|
||||||
|
RETS = R0;
|
||||||
|
R0 = I1;
|
||||||
|
RTS;
|
||||||
|
|
||||||
4:
|
4:
|
||||||
|
LSETUP(5f, 5f) LC0;
|
||||||
|
5:
|
||||||
|
B [P0++] = R1;
|
||||||
|
6:
|
||||||
RTS;
|
RTS;
|
||||||
|
|
||||||
ENDPROC(_strncpy)
|
ENDPROC(_strncpy)
|
||||||
|
|
Loading…
Reference in New Issue