crypto: arm64/aes - don't use IV buffer to return final keystream block

The arm64 bit sliced AES core code uses the IV buffer to pass the final
keystream block back to the glue code if the input is not a multiple of
the block size, so that the asm code does not have to deal with anything
except 16 byte blocks. This is done under the assumption that the outgoing
IV is meaningless anyway in this case, given that chaining is no longer
possible under these circumstances.

However, as it turns out, the CCM driver does expect the IV to retain
a value that is equal to the original IV except for the counter value,
and even interprets byte zero as a length indicator, which may result
in memory corruption if the IV is overwritten with something else.

So use a separate buffer to return the final keystream block.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
Ard Biesheuvel 2017-02-02 11:38:55 +00:00 committed by Herbert Xu
parent 12fcd92305
commit 88a3f582be
2 changed files with 28 additions and 18 deletions

View File

@ -853,13 +853,15 @@ ENDPROC(aesbs_xts_decrypt)
/* /*
* aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
* int rounds, int blocks, u8 iv[], bool final) * int rounds, int blocks, u8 iv[], u8 final[])
*/ */
ENTRY(aesbs_ctr_encrypt) ENTRY(aesbs_ctr_encrypt)
stp x29, x30, [sp, #-16]! stp x29, x30, [sp, #-16]!
mov x29, sp mov x29, sp
add x4, x4, x6 // do one extra block if final cmp x6, #0
cset x10, ne
add x4, x4, x10 // do one extra block if final
ldp x7, x8, [x5] ldp x7, x8, [x5]
ld1 {v0.16b}, [x5] ld1 {v0.16b}, [x5]
@ -874,19 +876,26 @@ CPU_LE( rev x8, x8 )
csel x4, x4, xzr, pl csel x4, x4, xzr, pl
csel x9, x9, xzr, le csel x9, x9, xzr, le
tbnz x9, #1, 0f
next_ctr v1 next_ctr v1
tbnz x9, #2, 0f
next_ctr v2 next_ctr v2
tbnz x9, #3, 0f
next_ctr v3 next_ctr v3
tbnz x9, #4, 0f
next_ctr v4 next_ctr v4
tbnz x9, #5, 0f
next_ctr v5 next_ctr v5
tbnz x9, #6, 0f
next_ctr v6 next_ctr v6
tbnz x9, #7, 0f
next_ctr v7 next_ctr v7
0: mov bskey, x2 0: mov bskey, x2
mov rounds, x3 mov rounds, x3
bl aesbs_encrypt8 bl aesbs_encrypt8
lsr x9, x9, x6 // disregard the extra block lsr x9, x9, x10 // disregard the extra block
tbnz x9, #0, 0f tbnz x9, #0, 0f
ld1 {v8.16b}, [x1], #16 ld1 {v8.16b}, [x1], #16
@ -928,36 +937,36 @@ CPU_LE( rev x8, x8 )
eor v5.16b, v5.16b, v15.16b eor v5.16b, v5.16b, v15.16b
st1 {v5.16b}, [x0], #16 st1 {v5.16b}, [x0], #16
next_ctr v0 8: next_ctr v0
cbnz x4, 99b cbnz x4, 99b
0: st1 {v0.16b}, [x5] 0: st1 {v0.16b}, [x5]
8: ldp x29, x30, [sp], #16 ldp x29, x30, [sp], #16
ret ret
/* /*
* If we are handling the tail of the input (x6 == 1), return the * If we are handling the tail of the input (x6 != NULL), return the
* final keystream block back to the caller via the IV buffer. * final keystream block back to the caller.
*/ */
1: cbz x6, 8b 1: cbz x6, 8b
st1 {v1.16b}, [x5] st1 {v1.16b}, [x6]
b 8b b 8b
2: cbz x6, 8b 2: cbz x6, 8b
st1 {v4.16b}, [x5] st1 {v4.16b}, [x6]
b 8b b 8b
3: cbz x6, 8b 3: cbz x6, 8b
st1 {v6.16b}, [x5] st1 {v6.16b}, [x6]
b 8b b 8b
4: cbz x6, 8b 4: cbz x6, 8b
st1 {v3.16b}, [x5] st1 {v3.16b}, [x6]
b 8b b 8b
5: cbz x6, 8b 5: cbz x6, 8b
st1 {v7.16b}, [x5] st1 {v7.16b}, [x6]
b 8b b 8b
6: cbz x6, 8b 6: cbz x6, 8b
st1 {v2.16b}, [x5] st1 {v2.16b}, [x6]
b 8b b 8b
7: cbz x6, 8b 7: cbz x6, 8b
st1 {v5.16b}, [x5] st1 {v5.16b}, [x6]
b 8b b 8b
ENDPROC(aesbs_ctr_encrypt) ENDPROC(aesbs_ctr_encrypt)

View File

@ -34,7 +34,7 @@ asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 iv[]); int rounds, int blocks, u8 iv[]);
asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 iv[], bool final); int rounds, int blocks, u8 iv[], u8 final[]);
asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 iv[]); int rounds, int blocks, u8 iv[]);
@ -201,6 +201,7 @@ static int ctr_encrypt(struct skcipher_request *req)
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm); struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk; struct skcipher_walk walk;
u8 buf[AES_BLOCK_SIZE];
int err; int err;
err = skcipher_walk_virt(&walk, req, true); err = skcipher_walk_virt(&walk, req, true);
@ -208,12 +209,12 @@ static int ctr_encrypt(struct skcipher_request *req)
kernel_neon_begin(); kernel_neon_begin();
while (walk.nbytes > 0) { while (walk.nbytes > 0) {
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
bool final = (walk.total % AES_BLOCK_SIZE) != 0; u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL;
if (walk.nbytes < walk.total) { if (walk.nbytes < walk.total) {
blocks = round_down(blocks, blocks = round_down(blocks,
walk.stride / AES_BLOCK_SIZE); walk.stride / AES_BLOCK_SIZE);
final = false; final = NULL;
} }
aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
@ -225,7 +226,7 @@ static int ctr_encrypt(struct skcipher_request *req)
if (dst != src) if (dst != src)
memcpy(dst, src, walk.total % AES_BLOCK_SIZE); memcpy(dst, src, walk.total % AES_BLOCK_SIZE);
crypto_xor(dst, walk.iv, walk.total % AES_BLOCK_SIZE); crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE);
err = skcipher_walk_done(&walk, 0); err = skcipher_walk_done(&walk, 0);
break; break;