arm64/crypto: issue aese/aesmc instructions in pairs
This changes the AES core transform implementations to issue aese/aesmc (and aesd/aesimc) in pairs. This enables a micro-architectural optimization in recent Cortex-A5x cores that improves performance by 50-90%. Measured performance in cycles per byte (Cortex-A57): CBC enc CBC dec CTR before 3.64 1.34 1.32 after 1.95 0.85 0.93 Note that this results in a ~5% performance decrease for older cores. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Will Deacon <will.deacon@arm.com>
This commit is contained in:
parent
b63dbef93f
commit
4a97abd443
|
@ -101,19 +101,19 @@ ENTRY(ce_aes_ccm_final)
|
|||
0: mov v4.16b, v3.16b
|
||||
1: ld1 {v5.2d}, [x2], #16 /* load next round key */
|
||||
aese v0.16b, v4.16b
|
||||
aese v1.16b, v4.16b
|
||||
aesmc v0.16b, v0.16b
|
||||
aese v1.16b, v4.16b
|
||||
aesmc v1.16b, v1.16b
|
||||
2: ld1 {v3.2d}, [x2], #16 /* load next round key */
|
||||
aese v0.16b, v5.16b
|
||||
aese v1.16b, v5.16b
|
||||
aesmc v0.16b, v0.16b
|
||||
aese v1.16b, v5.16b
|
||||
aesmc v1.16b, v1.16b
|
||||
3: ld1 {v4.2d}, [x2], #16 /* load next round key */
|
||||
subs w3, w3, #3
|
||||
aese v0.16b, v3.16b
|
||||
aese v1.16b, v3.16b
|
||||
aesmc v0.16b, v0.16b
|
||||
aese v1.16b, v3.16b
|
||||
aesmc v1.16b, v1.16b
|
||||
bpl 1b
|
||||
aese v0.16b, v4.16b
|
||||
|
@ -146,19 +146,19 @@ ENDPROC(ce_aes_ccm_final)
|
|||
ld1 {v5.2d}, [x10], #16 /* load 2nd round key */
|
||||
2: /* inner loop: 3 rounds, 2x interleaved */
|
||||
aese v0.16b, v4.16b
|
||||
aese v1.16b, v4.16b
|
||||
aesmc v0.16b, v0.16b
|
||||
aese v1.16b, v4.16b
|
||||
aesmc v1.16b, v1.16b
|
||||
3: ld1 {v3.2d}, [x10], #16 /* load next round key */
|
||||
aese v0.16b, v5.16b
|
||||
aese v1.16b, v5.16b
|
||||
aesmc v0.16b, v0.16b
|
||||
aese v1.16b, v5.16b
|
||||
aesmc v1.16b, v1.16b
|
||||
4: ld1 {v4.2d}, [x10], #16 /* load next round key */
|
||||
subs w7, w7, #3
|
||||
aese v0.16b, v3.16b
|
||||
aese v1.16b, v3.16b
|
||||
aesmc v0.16b, v0.16b
|
||||
aese v1.16b, v3.16b
|
||||
aesmc v1.16b, v1.16b
|
||||
ld1 {v5.2d}, [x10], #16 /* load next round key */
|
||||
bpl 2b
|
||||
|
|
|
@ -45,18 +45,14 @@
|
|||
|
||||
.macro do_enc_Nx, de, mc, k, i0, i1, i2, i3
|
||||
aes\de \i0\().16b, \k\().16b
|
||||
.ifnb \i1
|
||||
aes\de \i1\().16b, \k\().16b
|
||||
.ifnb \i3
|
||||
aes\de \i2\().16b, \k\().16b
|
||||
aes\de \i3\().16b, \k\().16b
|
||||
.endif
|
||||
.endif
|
||||
aes\mc \i0\().16b, \i0\().16b
|
||||
.ifnb \i1
|
||||
aes\de \i1\().16b, \k\().16b
|
||||
aes\mc \i1\().16b, \i1\().16b
|
||||
.ifnb \i3
|
||||
aes\de \i2\().16b, \k\().16b
|
||||
aes\mc \i2\().16b, \i2\().16b
|
||||
aes\de \i3\().16b, \k\().16b
|
||||
aes\mc \i3\().16b, \i3\().16b
|
||||
.endif
|
||||
.endif
|
||||
|
|
Loading…
Reference in New Issue