Blackfin: optimize strncpy a bit

Add a little strncpy optimization which can easily cut boot time by 20%.

When the kernel is booting with initramfs, it builds up the filesystem
from a cpio archive by calling strncpy_from_user() via fs/namei.c's
do_getname() on every file in the archive (which can be lots) with a
length of PATH_MAX (1024).  This causes the dest of the strncpy to be
padded with many NUL bytes.

This optimization mostly causes these NUL bytes to be padded with a call
to memset() which is already optimized for filling memory quickly, but
the hardware loop helps a little bit as well.

Boot time measured with 'loglevel=0' so UART speed doesn't get in the way.

Signed-off-by: Robin Getz <robin.getz@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
This commit is contained in:
Robin Getz 2010-05-04 14:59:21 +00:00 committed by Mike Frysinger
parent c70dcabc8e
commit 648eee52cc
2 changed files with 47 additions and 13 deletions

View File

@ -20,6 +20,7 @@
* R1 = filler byte * R1 = filler byte
* R2 = count * R2 = count
* Favours word aligned data. * Favours word aligned data.
* The strncpy assumes that I0 and I1 are not used in this function
*/ */
ENTRY(_memset) ENTRY(_memset)

View File

@ -5,12 +5,14 @@
*/ */
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/context.S>
/* void *strncpy(char *dest, const char *src, size_t n); /* void *strncpy(char *dest, const char *src, size_t n);
* R0 = address (dest) * R0 = address (dest)
* R1 = address (src) * R1 = address (src)
* R2 = size * R2 = size
* Returns a pointer to the destination string dest * Returns a pointer (R0) to the destination string dest
* we do this by not changing R0
*/ */
#ifdef CONFIG_STRNCPY_L1 #ifdef CONFIG_STRNCPY_L1
@ -24,29 +26,60 @@
ENTRY(_strncpy) ENTRY(_strncpy)
CC = R2 == 0; CC = R2 == 0;
if CC JUMP 4f; if CC JUMP 4f;
P2 = R2 ; /* size */
P0 = R0 ; /* dst*/ P0 = R0 ; /* dst*/
P1 = R1 ; /* src*/ P1 = R1 ; /* src*/
LSETUP (1f, 2f) LC0 = P2;
1: 1:
R1 = B [P1++] (Z); R1 = B [P1++] (Z);
B [P0++] = R1; B [P0++] = R1;
CC = R1; CC = R1 == 0;
if ! cc jump 2f;
R2 += -1;
CC = R2 == 0;
if ! cc jump 1b (bp);
jump 4f;
2: 2:
/* if src is shorter than n, we need to null pad bytes in dest */ if CC jump 3f;
R1 = 0;
RTS;
/* if src is shorter than n, we need to null pad bytes in dest
* but, we can get here when the last byte is zero, and we don't
* want to copy an extra byte at the end, so we need to check
*/
3: 3:
R2 = LC0;
CC = R2
if ! CC jump 6f;
/* if the required null padded portion is small, do it here, rather than
* handling the overhead of memset (which is OK when things are big).
*/
R3 = 0x20;
CC = R2 < R3;
IF CC jump 4f;
R2 += -1; R2 += -1;
CC = R2 == 0;
if cc jump 4f; /* Set things up for memset
B [P0++] = R1; * R0 = address
jump 3b; * R1 = filler byte (this case it's zero, set above)
* R2 = count (set above)
*/
I1 = R0;
R0 = RETS;
I0 = R0;
R0 = P0;
pseudo_long_call _memset, p0;
R0 = I0;
RETS = R0;
R0 = I1;
RTS;
4: 4:
LSETUP(5f, 5f) LC0;
5:
B [P0++] = R1;
6:
RTS; RTS;
ENDPROC(_strncpy) ENDPROC(_strncpy)