OpenCloudOS-Kernel/arch/alpha/lib/memmove.S

184 lines
2.7 KiB
ArmAsm

/* SPDX-License-Identifier: GPL-2.0 */
/*
* arch/alpha/lib/memmove.S
*
* Barely optimized memmove routine for Alpha EV5.
*
* This is hand-massaged output from the original memcpy.c. We defer to
* memcpy whenever possible; the backwards copy loops are not unrolled.
*/
#include <asm/export.h>
.set noat
.set noreorder
.text
.align 4
.globl memmove
.ent memmove
memmove:
ldgp $29, 0($27)
unop
nop
.prologue 1
addq $16,$18,$4
addq $17,$18,$5
cmpule $4,$17,$1 /* dest + n <= src */
cmpule $5,$16,$2 /* dest >= src + n */
bis $1,$2,$1
mov $16,$0
xor $16,$17,$2
bne $1,memcpy !samegp
and $2,7,$2 /* Test for src/dest co-alignment. */
and $16,7,$1
cmpule $16,$17,$3
bne $3,$memmove_up /* dest < src */
and $4,7,$1
bne $2,$misaligned_dn
unop
beq $1,$skip_aligned_byte_loop_head_dn
$aligned_byte_loop_head_dn:
lda $4,-1($4)
lda $5,-1($5)
unop
ble $18,$egress
ldq_u $3,0($5)
ldq_u $2,0($4)
lda $18,-1($18)
extbl $3,$5,$1
insbl $1,$4,$1
mskbl $2,$4,$2
bis $1,$2,$1
and $4,7,$6
stq_u $1,0($4)
bne $6,$aligned_byte_loop_head_dn
$skip_aligned_byte_loop_head_dn:
lda $18,-8($18)
blt $18,$skip_aligned_word_loop_dn
$aligned_word_loop_dn:
ldq $1,-8($5)
nop
lda $5,-8($5)
lda $18,-8($18)
stq $1,-8($4)
nop
lda $4,-8($4)
bge $18,$aligned_word_loop_dn
$skip_aligned_word_loop_dn:
lda $18,8($18)
bgt $18,$byte_loop_tail_dn
unop
ret $31,($26),1
.align 4
$misaligned_dn:
nop
fnop
unop
beq $18,$egress
$byte_loop_tail_dn:
ldq_u $3,-1($5)
ldq_u $2,-1($4)
lda $5,-1($5)
lda $4,-1($4)
lda $18,-1($18)
extbl $3,$5,$1
insbl $1,$4,$1
mskbl $2,$4,$2
bis $1,$2,$1
stq_u $1,0($4)
bgt $18,$byte_loop_tail_dn
br $egress
$memmove_up:
mov $16,$4
mov $17,$5
bne $2,$misaligned_up
beq $1,$skip_aligned_byte_loop_head_up
$aligned_byte_loop_head_up:
unop
ble $18,$egress
ldq_u $3,0($5)
ldq_u $2,0($4)
lda $18,-1($18)
extbl $3,$5,$1
insbl $1,$4,$1
mskbl $2,$4,$2
bis $1,$2,$1
lda $5,1($5)
stq_u $1,0($4)
lda $4,1($4)
and $4,7,$6
bne $6,$aligned_byte_loop_head_up
$skip_aligned_byte_loop_head_up:
lda $18,-8($18)
blt $18,$skip_aligned_word_loop_up
$aligned_word_loop_up:
ldq $1,0($5)
nop
lda $5,8($5)
lda $18,-8($18)
stq $1,0($4)
nop
lda $4,8($4)
bge $18,$aligned_word_loop_up
$skip_aligned_word_loop_up:
lda $18,8($18)
bgt $18,$byte_loop_tail_up
unop
ret $31,($26),1
.align 4
$misaligned_up:
nop
fnop
unop
beq $18,$egress
$byte_loop_tail_up:
ldq_u $3,0($5)
ldq_u $2,0($4)
lda $18,-1($18)
extbl $3,$5,$1
insbl $1,$4,$1
mskbl $2,$4,$2
bis $1,$2,$1
stq_u $1,0($4)
lda $5,1($5)
lda $4,1($4)
nop
bgt $18,$byte_loop_tail_up
$egress:
ret $31,($26),1
nop
nop
nop
.end memmove
EXPORT_SYMBOL(memmove)