2020-07-31 13:51:14 +08:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 */
|
|
|
|
/*
|
|
|
|
* include/linux/prandom.h
|
|
|
|
*
|
|
|
|
* Include file for the fast pseudo-random 32-bit
|
|
|
|
* generation.
|
|
|
|
*/
|
|
|
|
#ifndef _LINUX_PRANDOM_H
|
|
|
|
#define _LINUX_PRANDOM_H
|
|
|
|
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/percpu.h>
|
random32: use real rng for non-deterministic randomness
random32.c has two random number generators in it: one that is meant to
be used deterministically, with some predefined seed, and one that does
the same exact thing as random.c, except does it poorly. The first one
has some use cases. The second one no longer does and can be replaced
with calls to random.c's proper random number generator.
The relatively recent siphash-based bad random32.c code was added in
response to concerns that the prior random32.c was too deterministic.
Out of fears that random.c was (at the time) too slow, this code was
anonymously contributed. Then out of that emerged a kind of shadow
entropy gathering system, with its own tentacles throughout various net
code, added willy nilly.
Stop👏making👏bespoke👏random👏number👏generators👏.
Fortunately, recent advances in random.c mean that we can stop playing
with this sketchiness, and just use get_random_u32(), which is now fast
enough. In micro benchmarks using RDPMC, I'm seeing the same median
cycle count between the two functions, with the mean being _slightly_
higher due to batches refilling (which we can optimize further need be).
However, when doing *real* benchmarks of the net functions that actually
use these random numbers, the mean cycles actually *decreased* slightly
(with the median still staying the same), likely because the additional
prandom code means icache misses and complexity, whereas random.c is
generally already being used by something else nearby.
The biggest benefit of this is that there are many users of prandom who
probably should be using cryptographically secure random numbers. This
makes all of those accidental cases become secure by just flipping a
switch. Later on, we can do a tree-wide cleanup to remove the static
inline wrapper functions that this commit adds.
There are also some low-ish hanging fruits for making this even faster
in the future: a get_random_u16() function for use in the networking
stack will give a 2x performance boost there, using SIMD for ChaCha20
will let us compute 4 or 8 or 16 blocks of output in parallel, instead
of just one, giving us large buffers for cheap, and introducing a
get_random_*_bh() function that assumes irqs are already disabled will
shave off a few cycles for ordinary calls. These are things we can chip
away at down the road.
Acked-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
2022-05-11 22:11:29 +08:00
|
|
|
#include <linux/random.h>
|
2020-07-31 13:51:14 +08:00
|
|
|
|
|
|
|
struct rnd_state {
|
|
|
|
__u32 s1, s2, s3, s4;
|
|
|
|
};
|
|
|
|
|
|
|
|
u32 prandom_u32_state(struct rnd_state *state);
|
|
|
|
void prandom_bytes_state(struct rnd_state *state, void *buf, size_t nbytes);
|
|
|
|
void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state);
|
|
|
|
|
|
|
|
#define prandom_init_once(pcpu_state) \
|
|
|
|
DO_ONCE(prandom_seed_full_state, (pcpu_state))
|
|
|
|
|
random: use rejection sampling for uniform bounded random integers
Until the very recent commits, many bounded random integers were
calculated using `get_random_u32() % max_plus_one`, which not only
incurs the price of a division -- indicating performance mostly was not
a real issue -- but also does not result in a uniformly distributed
output if max_plus_one is not a power of two. Recent commits moved to
using `prandom_u32_max(max_plus_one)`, which replaces the division with
a faster multiplication, but still does not solve the issue with
non-uniform output.
For some users, maybe this isn't a problem, and for others, maybe it is,
but for the majority of users, probably the question has never been
posed and analyzed, and nobody thought much about it, probably assuming
random is random is random. In other words, the unthinking expectation
of most users is likely that the resultant numbers are uniform.
So we implement here an efficient way of generating uniform bounded
random integers. Through use of compile-time evaluation, and avoiding
divisions as much as possible, this commit introduces no measurable
overhead. At least for hot-path uses tested, any potential difference
was lost in the noise. On both clang and gcc, code generation is pretty
small.
The new function, get_random_u32_below(), lives in random.h, rather than
prandom.h, and has a "get_random_xxx" function name, because it is
suitable for all uses, including cryptography.
In order to be efficient, we implement a kernel-specific variant of
Daniel Lemire's algorithm from "Fast Random Integer Generation in an
Interval", linked below. The kernel's variant takes advantage of
constant folding to avoid divisions entirely in the vast majority of
cases, works on both 32-bit and 64-bit architectures, and requests a
minimal amount of bytes from the RNG.
Link: https://arxiv.org/pdf/1805.10941.pdf
Cc: stable@vger.kernel.org # to ease future backports that use this api
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
2022-10-09 10:42:54 +08:00
|
|
|
/* Deprecated: use get_random_u32_below() instead. */
|
2020-07-31 13:51:14 +08:00
|
|
|
static inline u32 prandom_u32_max(u32 ep_ro)
|
|
|
|
{
|
random: use rejection sampling for uniform bounded random integers
Until the very recent commits, many bounded random integers were
calculated using `get_random_u32() % max_plus_one`, which not only
incurs the price of a division -- indicating performance mostly was not
a real issue -- but also does not result in a uniformly distributed
output if max_plus_one is not a power of two. Recent commits moved to
using `prandom_u32_max(max_plus_one)`, which replaces the division with
a faster multiplication, but still does not solve the issue with
non-uniform output.
For some users, maybe this isn't a problem, and for others, maybe it is,
but for the majority of users, probably the question has never been
posed and analyzed, and nobody thought much about it, probably assuming
random is random is random. In other words, the unthinking expectation
of most users is likely that the resultant numbers are uniform.
So we implement here an efficient way of generating uniform bounded
random integers. Through use of compile-time evaluation, and avoiding
divisions as much as possible, this commit introduces no measurable
overhead. At least for hot-path uses tested, any potential difference
was lost in the noise. On both clang and gcc, code generation is pretty
small.
The new function, get_random_u32_below(), lives in random.h, rather than
prandom.h, and has a "get_random_xxx" function name, because it is
suitable for all uses, including cryptography.
In order to be efficient, we implement a kernel-specific variant of
Daniel Lemire's algorithm from "Fast Random Integer Generation in an
Interval", linked below. The kernel's variant takes advantage of
constant folding to avoid divisions entirely in the vast majority of
cases, works on both 32-bit and 64-bit architectures, and requests a
minimal amount of bytes from the RNG.
Link: https://arxiv.org/pdf/1805.10941.pdf
Cc: stable@vger.kernel.org # to ease future backports that use this api
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
2022-10-09 10:42:54 +08:00
|
|
|
return get_random_u32_below(ep_ro);
|
2020-07-31 13:51:14 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Handle minimum values for seeds
|
|
|
|
*/
|
|
|
|
static inline u32 __seed(u32 x, u32 m)
|
|
|
|
{
|
|
|
|
return (x < m) ? x + m : x;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* prandom_seed_state - set seed for prandom_u32_state().
|
|
|
|
* @state: pointer to state structure to receive the seed.
|
|
|
|
* @seed: arbitrary 64-bit value to use as a seed.
|
|
|
|
*/
|
|
|
|
static inline void prandom_seed_state(struct rnd_state *state, u64 seed)
|
|
|
|
{
|
2021-05-25 20:20:12 +08:00
|
|
|
u32 i = ((seed >> 32) ^ (seed << 10) ^ seed) & 0xffffffffUL;
|
2020-07-31 13:51:14 +08:00
|
|
|
|
|
|
|
state->s1 = __seed(i, 2U);
|
|
|
|
state->s2 = __seed(i, 8U);
|
|
|
|
state->s3 = __seed(i, 16U);
|
|
|
|
state->s4 = __seed(i, 128U);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Pseudo random number generator from numerical recipes. */
|
|
|
|
static inline u32 next_pseudo_random32(u32 seed)
|
|
|
|
{
|
|
|
|
return seed * 1664525 + 1013904223;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|