ARM: 7626/1: arm/crypto: Make asm SHA-1 and AES code Thumb-2 compatible
This patch fixes aes-armv4.S and sha1-armv4-large.S to work natively in Thumb. This allows ARM/Thumb interworking workarounds to be removed. I also take the opportunity to convert some explicit assembler directives for exported functions to the standard ENTRY()/ENDPROC(). For the code itself: * In sha1_block_data_order, use of TEQ with sp is deprecated in ARMv7 and not supported in Thumb. For the branches back to .L_00_15 and .L_40_59, the TEQ is converted to a CMP, under the assumption that clobbering the C flag here will not cause incorrect behaviour. For the first branch back to .L_20_39_or_60_79 the C flag is important, so sp is moved temporarily into another register so that TEQ can be used for the comparison. * In the AES code, most forms of register-indexed addressing with shifts and rotates are not permitted for loads and stores in Thumb, so the address calculation is done using a separate instruction for the Thumb case. The resulting code is unlikely to be optimally scheduled, but it should not have a large impact given the overall size of the code. I haven't run any benchmarks. Signed-off-by: Dave Martin <dave.martin@linaro.org> Tested-by: David McCullough <ucdevel@gmail.com> (ARM only) Acked-by: David McCullough <ucdevel@gmail.com> Acked-by: Nicolas Pitre <nico@linaro.org> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
This commit is contained in:
parent
9931faca02
commit
638591cd7b
|
@ -34,8 +34,9 @@
|
||||||
@ A little glue here to select the correct code below for the ARM CPU
|
@ A little glue here to select the correct code below for the ARM CPU
|
||||||
@ that is being targetted.
|
@ that is being targetted.
|
||||||
|
|
||||||
|
#include <linux/linkage.h>
|
||||||
|
|
||||||
.text
|
.text
|
||||||
.code 32
|
|
||||||
|
|
||||||
.type AES_Te,%object
|
.type AES_Te,%object
|
||||||
.align 5
|
.align 5
|
||||||
|
@ -145,10 +146,8 @@ AES_Te:
|
||||||
|
|
||||||
@ void AES_encrypt(const unsigned char *in, unsigned char *out,
|
@ void AES_encrypt(const unsigned char *in, unsigned char *out,
|
||||||
@ const AES_KEY *key) {
|
@ const AES_KEY *key) {
|
||||||
.global AES_encrypt
|
|
||||||
.type AES_encrypt,%function
|
|
||||||
.align 5
|
.align 5
|
||||||
AES_encrypt:
|
ENTRY(AES_encrypt)
|
||||||
sub r3,pc,#8 @ AES_encrypt
|
sub r3,pc,#8 @ AES_encrypt
|
||||||
stmdb sp!,{r1,r4-r12,lr}
|
stmdb sp!,{r1,r4-r12,lr}
|
||||||
mov r12,r0 @ inp
|
mov r12,r0 @ inp
|
||||||
|
@ -239,15 +238,8 @@ AES_encrypt:
|
||||||
strb r6,[r12,#14]
|
strb r6,[r12,#14]
|
||||||
strb r3,[r12,#15]
|
strb r3,[r12,#15]
|
||||||
#endif
|
#endif
|
||||||
#if __ARM_ARCH__>=5
|
|
||||||
ldmia sp!,{r4-r12,pc}
|
ldmia sp!,{r4-r12,pc}
|
||||||
#else
|
ENDPROC(AES_encrypt)
|
||||||
ldmia sp!,{r4-r12,lr}
|
|
||||||
tst lr,#1
|
|
||||||
moveq pc,lr @ be binary compatible with V4, yet
|
|
||||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
|
||||||
#endif
|
|
||||||
.size AES_encrypt,.-AES_encrypt
|
|
||||||
|
|
||||||
.type _armv4_AES_encrypt,%function
|
.type _armv4_AES_encrypt,%function
|
||||||
.align 2
|
.align 2
|
||||||
|
@ -386,10 +378,8 @@ _armv4_AES_encrypt:
|
||||||
ldr pc,[sp],#4 @ pop and return
|
ldr pc,[sp],#4 @ pop and return
|
||||||
.size _armv4_AES_encrypt,.-_armv4_AES_encrypt
|
.size _armv4_AES_encrypt,.-_armv4_AES_encrypt
|
||||||
|
|
||||||
.global private_AES_set_encrypt_key
|
|
||||||
.type private_AES_set_encrypt_key,%function
|
|
||||||
.align 5
|
.align 5
|
||||||
private_AES_set_encrypt_key:
|
ENTRY(private_AES_set_encrypt_key)
|
||||||
_armv4_AES_set_encrypt_key:
|
_armv4_AES_set_encrypt_key:
|
||||||
sub r3,pc,#8 @ AES_set_encrypt_key
|
sub r3,pc,#8 @ AES_set_encrypt_key
|
||||||
teq r0,#0
|
teq r0,#0
|
||||||
|
@ -658,15 +648,11 @@ _armv4_AES_set_encrypt_key:
|
||||||
|
|
||||||
.Ldone: mov r0,#0
|
.Ldone: mov r0,#0
|
||||||
ldmia sp!,{r4-r12,lr}
|
ldmia sp!,{r4-r12,lr}
|
||||||
.Labrt: tst lr,#1
|
.Labrt: mov pc,lr
|
||||||
moveq pc,lr @ be binary compatible with V4, yet
|
ENDPROC(private_AES_set_encrypt_key)
|
||||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
|
||||||
.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
|
|
||||||
|
|
||||||
.global private_AES_set_decrypt_key
|
|
||||||
.type private_AES_set_decrypt_key,%function
|
|
||||||
.align 5
|
.align 5
|
||||||
private_AES_set_decrypt_key:
|
ENTRY(private_AES_set_decrypt_key)
|
||||||
str lr,[sp,#-4]! @ push lr
|
str lr,[sp,#-4]! @ push lr
|
||||||
#if 0
|
#if 0
|
||||||
@ kernel does both of these in setkey so optimise this bit out by
|
@ kernel does both of these in setkey so optimise this bit out by
|
||||||
|
@ -748,15 +734,8 @@ private_AES_set_decrypt_key:
|
||||||
bne .Lmix
|
bne .Lmix
|
||||||
|
|
||||||
mov r0,#0
|
mov r0,#0
|
||||||
#if __ARM_ARCH__>=5
|
|
||||||
ldmia sp!,{r4-r12,pc}
|
ldmia sp!,{r4-r12,pc}
|
||||||
#else
|
ENDPROC(private_AES_set_decrypt_key)
|
||||||
ldmia sp!,{r4-r12,lr}
|
|
||||||
tst lr,#1
|
|
||||||
moveq pc,lr @ be binary compatible with V4, yet
|
|
||||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
|
||||||
#endif
|
|
||||||
.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
|
|
||||||
|
|
||||||
.type AES_Td,%object
|
.type AES_Td,%object
|
||||||
.align 5
|
.align 5
|
||||||
|
@ -862,10 +841,8 @@ AES_Td:
|
||||||
|
|
||||||
@ void AES_decrypt(const unsigned char *in, unsigned char *out,
|
@ void AES_decrypt(const unsigned char *in, unsigned char *out,
|
||||||
@ const AES_KEY *key) {
|
@ const AES_KEY *key) {
|
||||||
.global AES_decrypt
|
|
||||||
.type AES_decrypt,%function
|
|
||||||
.align 5
|
.align 5
|
||||||
AES_decrypt:
|
ENTRY(AES_decrypt)
|
||||||
sub r3,pc,#8 @ AES_decrypt
|
sub r3,pc,#8 @ AES_decrypt
|
||||||
stmdb sp!,{r1,r4-r12,lr}
|
stmdb sp!,{r1,r4-r12,lr}
|
||||||
mov r12,r0 @ inp
|
mov r12,r0 @ inp
|
||||||
|
@ -956,15 +933,8 @@ AES_decrypt:
|
||||||
strb r6,[r12,#14]
|
strb r6,[r12,#14]
|
||||||
strb r3,[r12,#15]
|
strb r3,[r12,#15]
|
||||||
#endif
|
#endif
|
||||||
#if __ARM_ARCH__>=5
|
|
||||||
ldmia sp!,{r4-r12,pc}
|
ldmia sp!,{r4-r12,pc}
|
||||||
#else
|
ENDPROC(AES_decrypt)
|
||||||
ldmia sp!,{r4-r12,lr}
|
|
||||||
tst lr,#1
|
|
||||||
moveq pc,lr @ be binary compatible with V4, yet
|
|
||||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
|
||||||
#endif
|
|
||||||
.size AES_decrypt,.-AES_decrypt
|
|
||||||
|
|
||||||
.type _armv4_AES_decrypt,%function
|
.type _armv4_AES_decrypt,%function
|
||||||
.align 2
|
.align 2
|
||||||
|
@ -1064,7 +1034,9 @@ _armv4_AES_decrypt:
|
||||||
and r9,lr,r1,lsr#8
|
and r9,lr,r1,lsr#8
|
||||||
|
|
||||||
ldrb r7,[r10,r7] @ Td4[s1>>0]
|
ldrb r7,[r10,r7] @ Td4[s1>>0]
|
||||||
ldrb r1,[r10,r1,lsr#24] @ Td4[s1>>24]
|
ARM( ldrb r1,[r10,r1,lsr#24] ) @ Td4[s1>>24]
|
||||||
|
THUMB( add r1,r10,r1,lsr#24 ) @ Td4[s1>>24]
|
||||||
|
THUMB( ldrb r1,[r1] )
|
||||||
ldrb r8,[r10,r8] @ Td4[s1>>16]
|
ldrb r8,[r10,r8] @ Td4[s1>>16]
|
||||||
eor r0,r7,r0,lsl#24
|
eor r0,r7,r0,lsl#24
|
||||||
ldrb r9,[r10,r9] @ Td4[s1>>8]
|
ldrb r9,[r10,r9] @ Td4[s1>>8]
|
||||||
|
@ -1077,7 +1049,9 @@ _armv4_AES_decrypt:
|
||||||
ldrb r8,[r10,r8] @ Td4[s2>>0]
|
ldrb r8,[r10,r8] @ Td4[s2>>0]
|
||||||
and r9,lr,r2,lsr#16
|
and r9,lr,r2,lsr#16
|
||||||
|
|
||||||
ldrb r2,[r10,r2,lsr#24] @ Td4[s2>>24]
|
ARM( ldrb r2,[r10,r2,lsr#24] ) @ Td4[s2>>24]
|
||||||
|
THUMB( add r2,r10,r2,lsr#24 ) @ Td4[s2>>24]
|
||||||
|
THUMB( ldrb r2,[r2] )
|
||||||
eor r0,r0,r7,lsl#8
|
eor r0,r0,r7,lsl#8
|
||||||
ldrb r9,[r10,r9] @ Td4[s2>>16]
|
ldrb r9,[r10,r9] @ Td4[s2>>16]
|
||||||
eor r1,r8,r1,lsl#16
|
eor r1,r8,r1,lsl#16
|
||||||
|
@ -1090,7 +1064,9 @@ _armv4_AES_decrypt:
|
||||||
and r9,lr,r3 @ i2
|
and r9,lr,r3 @ i2
|
||||||
|
|
||||||
ldrb r9,[r10,r9] @ Td4[s3>>0]
|
ldrb r9,[r10,r9] @ Td4[s3>>0]
|
||||||
ldrb r3,[r10,r3,lsr#24] @ Td4[s3>>24]
|
ARM( ldrb r3,[r10,r3,lsr#24] ) @ Td4[s3>>24]
|
||||||
|
THUMB( add r3,r10,r3,lsr#24 ) @ Td4[s3>>24]
|
||||||
|
THUMB( ldrb r3,[r3] )
|
||||||
eor r0,r0,r7,lsl#16
|
eor r0,r0,r7,lsl#16
|
||||||
ldr r7,[r11,#0]
|
ldr r7,[r11,#0]
|
||||||
eor r1,r1,r8,lsl#8
|
eor r1,r1,r8,lsl#8
|
||||||
|
|
|
@ -51,13 +51,12 @@
|
||||||
@ Profiler-assisted and platform-specific optimization resulted in 10%
|
@ Profiler-assisted and platform-specific optimization resulted in 10%
|
||||||
@ improvement on Cortex A8 core and 12.2 cycles per byte.
|
@ improvement on Cortex A8 core and 12.2 cycles per byte.
|
||||||
|
|
||||||
|
#include <linux/linkage.h>
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
|
||||||
.global sha1_block_data_order
|
|
||||||
.type sha1_block_data_order,%function
|
|
||||||
|
|
||||||
.align 2
|
.align 2
|
||||||
sha1_block_data_order:
|
ENTRY(sha1_block_data_order)
|
||||||
stmdb sp!,{r4-r12,lr}
|
stmdb sp!,{r4-r12,lr}
|
||||||
add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
|
add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
|
||||||
ldmia r0,{r3,r4,r5,r6,r7}
|
ldmia r0,{r3,r4,r5,r6,r7}
|
||||||
|
@ -194,7 +193,7 @@ sha1_block_data_order:
|
||||||
eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
|
eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
|
||||||
str r9,[r14,#-4]!
|
str r9,[r14,#-4]!
|
||||||
add r3,r3,r10 @ E+=F_00_19(B,C,D)
|
add r3,r3,r10 @ E+=F_00_19(B,C,D)
|
||||||
teq r14,sp
|
cmp r14,sp
|
||||||
bne .L_00_15 @ [((11+4)*5+2)*3]
|
bne .L_00_15 @ [((11+4)*5+2)*3]
|
||||||
#if __ARM_ARCH__<7
|
#if __ARM_ARCH__<7
|
||||||
ldrb r10,[r1,#2]
|
ldrb r10,[r1,#2]
|
||||||
|
@ -374,7 +373,9 @@ sha1_block_data_order:
|
||||||
@ F_xx_xx
|
@ F_xx_xx
|
||||||
add r3,r3,r9 @ E+=X[i]
|
add r3,r3,r9 @ E+=X[i]
|
||||||
add r3,r3,r10 @ E+=F_20_39(B,C,D)
|
add r3,r3,r10 @ E+=F_20_39(B,C,D)
|
||||||
teq r14,sp @ preserve carry
|
ARM( teq r14,sp ) @ preserve carry
|
||||||
|
THUMB( mov r11,sp )
|
||||||
|
THUMB( teq r14,r11 ) @ preserve carry
|
||||||
bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
|
bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
|
||||||
bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes
|
bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes
|
||||||
|
|
||||||
|
@ -466,7 +467,7 @@ sha1_block_data_order:
|
||||||
add r3,r3,r9 @ E+=X[i]
|
add r3,r3,r9 @ E+=X[i]
|
||||||
add r3,r3,r10 @ E+=F_40_59(B,C,D)
|
add r3,r3,r10 @ E+=F_40_59(B,C,D)
|
||||||
add r3,r3,r11,ror#2
|
add r3,r3,r11,ror#2
|
||||||
teq r14,sp
|
cmp r14,sp
|
||||||
bne .L_40_59 @ [+((12+5)*5+2)*4]
|
bne .L_40_59 @ [+((12+5)*5+2)*4]
|
||||||
|
|
||||||
ldr r8,.LK_60_79
|
ldr r8,.LK_60_79
|
||||||
|
@ -485,19 +486,12 @@ sha1_block_data_order:
|
||||||
teq r1,r2
|
teq r1,r2
|
||||||
bne .Lloop @ [+18], total 1307
|
bne .Lloop @ [+18], total 1307
|
||||||
|
|
||||||
#if __ARM_ARCH__>=5
|
|
||||||
ldmia sp!,{r4-r12,pc}
|
ldmia sp!,{r4-r12,pc}
|
||||||
#else
|
|
||||||
ldmia sp!,{r4-r12,lr}
|
|
||||||
tst lr,#1
|
|
||||||
moveq pc,lr @ be binary compatible with V4, yet
|
|
||||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
|
||||||
#endif
|
|
||||||
.align 2
|
.align 2
|
||||||
.LK_00_19: .word 0x5a827999
|
.LK_00_19: .word 0x5a827999
|
||||||
.LK_20_39: .word 0x6ed9eba1
|
.LK_20_39: .word 0x6ed9eba1
|
||||||
.LK_40_59: .word 0x8f1bbcdc
|
.LK_40_59: .word 0x8f1bbcdc
|
||||||
.LK_60_79: .word 0xca62c1d6
|
.LK_60_79: .word 0xca62c1d6
|
||||||
.size sha1_block_data_order,.-sha1_block_data_order
|
ENDPROC(sha1_block_data_order)
|
||||||
.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
|
.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
|
||||||
.align 2
|
.align 2
|
||||||
|
|
Loading…
Reference in New Issue