Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto fixes from Herbert Xu:
 "This fixes a potential scheduling latency problem for the algorithms
  used by WireGuard"

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6:
  crypto: arch/nhpoly1305 - process in explicit 4k chunks
  crypto: arch/lib - limit simd usage to 4k chunks
This commit is contained in:
Linus Torvalds 2020-05-06 10:20:00 -07:00
commit 3c40cdb0e9
11 changed files with 69 additions and 34 deletions

View File

@ -91,9 +91,17 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
return;
}
kernel_neon_begin();
chacha_doneon(state, dst, src, bytes, nrounds);
kernel_neon_end();
do {
unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
kernel_neon_begin();
chacha_doneon(state, dst, src, todo, nrounds);
kernel_neon_end();
bytes -= todo;
src += todo;
dst += todo;
} while (bytes);
}
EXPORT_SYMBOL(chacha_crypt_arch);

View File

@ -30,7 +30,7 @@ static int nhpoly1305_neon_update(struct shash_desc *desc,
return crypto_nhpoly1305_update(desc, src, srclen);
do {
unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE);
unsigned int n = min_t(unsigned int, srclen, SZ_4K);
kernel_neon_begin();
crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon);

View File

@ -160,13 +160,20 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
if (static_branch_likely(&have_neon) && do_neon) {
kernel_neon_begin();
poly1305_blocks_neon(&dctx->h, src, len, 1);
kernel_neon_end();
do {
unsigned int todo = min_t(unsigned int, len, SZ_4K);
kernel_neon_begin();
poly1305_blocks_neon(&dctx->h, src, todo, 1);
kernel_neon_end();
len -= todo;
src += todo;
} while (len);
} else {
poly1305_blocks_arm(&dctx->h, src, len, 1);
src += len;
}
src += len;
nbytes %= POLY1305_BLOCK_SIZE;
}

View File

@ -87,9 +87,17 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
!crypto_simd_usable())
return chacha_crypt_generic(state, dst, src, bytes, nrounds);
kernel_neon_begin();
chacha_doneon(state, dst, src, bytes, nrounds);
kernel_neon_end();
do {
unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
kernel_neon_begin();
chacha_doneon(state, dst, src, todo, nrounds);
kernel_neon_end();
bytes -= todo;
src += todo;
dst += todo;
} while (bytes);
}
EXPORT_SYMBOL(chacha_crypt_arch);

View File

@ -30,7 +30,7 @@ static int nhpoly1305_neon_update(struct shash_desc *desc,
return crypto_nhpoly1305_update(desc, src, srclen);
do {
unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE);
unsigned int n = min_t(unsigned int, srclen, SZ_4K);
kernel_neon_begin();
crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon);

View File

@ -143,13 +143,20 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
kernel_neon_begin();
poly1305_blocks_neon(&dctx->h, src, len, 1);
kernel_neon_end();
do {
unsigned int todo = min_t(unsigned int, len, SZ_4K);
kernel_neon_begin();
poly1305_blocks_neon(&dctx->h, src, todo, 1);
kernel_neon_end();
len -= todo;
src += todo;
} while (len);
} else {
poly1305_blocks(&dctx->h, src, len, 1);
src += len;
}
src += len;
nbytes %= POLY1305_BLOCK_SIZE;
}

View File

@ -32,16 +32,16 @@ void blake2s_compress_arch(struct blake2s_state *state,
const u32 inc)
{
/* SIMD disables preemption, so relax after processing each page. */
BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8);
BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8);
if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) {
blake2s_compress_generic(state, block, nblocks, inc);
return;
}
for (;;) {
do {
const size_t blocks = min_t(size_t, nblocks,
PAGE_SIZE / BLAKE2S_BLOCK_SIZE);
SZ_4K / BLAKE2S_BLOCK_SIZE);
kernel_fpu_begin();
if (IS_ENABLED(CONFIG_AS_AVX512) &&
@ -52,10 +52,8 @@ void blake2s_compress_arch(struct blake2s_state *state,
kernel_fpu_end();
nblocks -= blocks;
if (!nblocks)
break;
block += blocks * BLAKE2S_BLOCK_SIZE;
}
} while (nblocks);
}
EXPORT_SYMBOL(blake2s_compress_arch);

View File

@ -153,9 +153,17 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
bytes <= CHACHA_BLOCK_SIZE)
return chacha_crypt_generic(state, dst, src, bytes, nrounds);
kernel_fpu_begin();
chacha_dosimd(state, dst, src, bytes, nrounds);
kernel_fpu_end();
do {
unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
kernel_fpu_begin();
chacha_dosimd(state, dst, src, todo, nrounds);
kernel_fpu_end();
bytes -= todo;
src += todo;
dst += todo;
} while (bytes);
}
EXPORT_SYMBOL(chacha_crypt_arch);

View File

@ -29,7 +29,7 @@ static int nhpoly1305_avx2_update(struct shash_desc *desc,
return crypto_nhpoly1305_update(desc, src, srclen);
do {
unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE);
unsigned int n = min_t(unsigned int, srclen, SZ_4K);
kernel_fpu_begin();
crypto_nhpoly1305_update_helper(desc, src, n, _nh_avx2);

View File

@ -29,7 +29,7 @@ static int nhpoly1305_sse2_update(struct shash_desc *desc,
return crypto_nhpoly1305_update(desc, src, srclen);
do {
unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE);
unsigned int n = min_t(unsigned int, srclen, SZ_4K);
kernel_fpu_begin();
crypto_nhpoly1305_update_helper(desc, src, n, _nh_sse2);

View File

@ -91,8 +91,8 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
struct poly1305_arch_internal *state = ctx;
/* SIMD disables preemption, so relax after processing each page. */
BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE ||
PAGE_SIZE % POLY1305_BLOCK_SIZE);
BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE ||
SZ_4K % POLY1305_BLOCK_SIZE);
if (!static_branch_likely(&poly1305_use_avx) ||
(len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) ||
@ -102,8 +102,8 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
return;
}
for (;;) {
const size_t bytes = min_t(size_t, len, PAGE_SIZE);
do {
const size_t bytes = min_t(size_t, len, SZ_4K);
kernel_fpu_begin();
if (IS_ENABLED(CONFIG_AS_AVX512) && static_branch_likely(&poly1305_use_avx512))
@ -113,11 +113,10 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
else
poly1305_blocks_avx(ctx, inp, bytes, padbit);
kernel_fpu_end();
len -= bytes;
if (!len)
break;
inp += bytes;
}
} while (len);
}
static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],