arm64/crypto: issue aese/aesmc instructions in pairs

This changes the AES core transform implementations to issue aese/aesmc
(and aesd/aesimc) in pairs. This enables a micro-architectural optimization
in recent Cortex-A5x cores that improves performance by 50-90%.

Measured performance in cycles per byte (Cortex-A57):

                CBC enc         CBC dec         CTR
  before        3.64            1.34            1.32
  after         1.95            0.85            0.93

Note that this results in a ~5% performance decrease for older cores.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
This commit is contained in:
Ard Biesheuvel 2015-03-17 18:05:13 +00:00 committed by Will Deacon
parent b63dbef93f
commit 4a97abd443
2 changed files with 9 additions and 13 deletions

View File

@ -101,19 +101,19 @@ ENTRY(ce_aes_ccm_final)
0: mov v4.16b, v3.16b
1: ld1 {v5.2d}, [x2], #16 /* load next round key */
aese v0.16b, v4.16b
aese v1.16b, v4.16b
aesmc v0.16b, v0.16b
aese v1.16b, v4.16b
aesmc v1.16b, v1.16b
2: ld1 {v3.2d}, [x2], #16 /* load next round key */
aese v0.16b, v5.16b
aese v1.16b, v5.16b
aesmc v0.16b, v0.16b
aese v1.16b, v5.16b
aesmc v1.16b, v1.16b
3: ld1 {v4.2d}, [x2], #16 /* load next round key */
subs w3, w3, #3
aese v0.16b, v3.16b
aese v1.16b, v3.16b
aesmc v0.16b, v0.16b
aese v1.16b, v3.16b
aesmc v1.16b, v1.16b
bpl 1b
aese v0.16b, v4.16b
@ -146,19 +146,19 @@ ENDPROC(ce_aes_ccm_final)
ld1 {v5.2d}, [x10], #16 /* load 2nd round key */
2: /* inner loop: 3 rounds, 2x interleaved */
aese v0.16b, v4.16b
aese v1.16b, v4.16b
aesmc v0.16b, v0.16b
aese v1.16b, v4.16b
aesmc v1.16b, v1.16b
3: ld1 {v3.2d}, [x10], #16 /* load next round key */
aese v0.16b, v5.16b
aese v1.16b, v5.16b
aesmc v0.16b, v0.16b
aese v1.16b, v5.16b
aesmc v1.16b, v1.16b
4: ld1 {v4.2d}, [x10], #16 /* load next round key */
subs w7, w7, #3
aese v0.16b, v3.16b
aese v1.16b, v3.16b
aesmc v0.16b, v0.16b
aese v1.16b, v3.16b
aesmc v1.16b, v1.16b
ld1 {v5.2d}, [x10], #16 /* load next round key */
bpl 2b

View File

@ -45,18 +45,14 @@
.macro do_enc_Nx, de, mc, k, i0, i1, i2, i3
aes\de \i0\().16b, \k\().16b
.ifnb \i1
aes\de \i1\().16b, \k\().16b
.ifnb \i3
aes\de \i2\().16b, \k\().16b
aes\de \i3\().16b, \k\().16b
.endif
.endif
aes\mc \i0\().16b, \i0\().16b
.ifnb \i1
aes\de \i1\().16b, \k\().16b
aes\mc \i1\().16b, \i1\().16b
.ifnb \i3
aes\de \i2\().16b, \k\().16b
aes\mc \i2\().16b, \i2\().16b
aes\de \i3\().16b, \k\().16b
aes\mc \i3\().16b, \i3\().16b
.endif
.endif