random: make /dev/urandom scalable for silly userspace programs
On a system with a 4 socket (NUMA) system where a large number of application threads were all trying to read from /dev/urandom, this can result in the system spending 80% of its time contending on the global urandom spinlock. The application should have used its own PRNG, but let's try to help it from running, lemming-like, straight over the locking cliff. Reported-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
This commit is contained in:
parent
e192be9d9a
commit
1e7f583af6
|
@ -436,6 +436,8 @@ static int crng_init = 0;
|
||||||
#define crng_ready() (likely(crng_init > 0))
|
#define crng_ready() (likely(crng_init > 0))
|
||||||
static int crng_init_cnt = 0;
|
static int crng_init_cnt = 0;
|
||||||
#define CRNG_INIT_CNT_THRESH (2*CHACHA20_KEY_SIZE)
|
#define CRNG_INIT_CNT_THRESH (2*CHACHA20_KEY_SIZE)
|
||||||
|
static void _extract_crng(struct crng_state *crng,
|
||||||
|
__u8 out[CHACHA20_BLOCK_SIZE]);
|
||||||
static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE]);
|
static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE]);
|
||||||
static void process_random_ready_list(void);
|
static void process_random_ready_list(void);
|
||||||
|
|
||||||
|
@ -756,6 +758,16 @@ static void credit_entropy_bits_safe(struct entropy_store *r, int nbits)
|
||||||
|
|
||||||
static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait);
|
static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait);
|
||||||
|
|
||||||
|
#ifdef CONFIG_NUMA
|
||||||
|
/*
|
||||||
|
* Hack to deal with crazy userspace progams when they are all trying
|
||||||
|
* to access /dev/urandom in parallel. The programs are almost
|
||||||
|
* certainly doing something terribly wrong, but we'll work around
|
||||||
|
* their brain damage.
|
||||||
|
*/
|
||||||
|
static struct crng_state **crng_node_pool __read_mostly;
|
||||||
|
#endif
|
||||||
|
|
||||||
static void crng_initialize(struct crng_state *crng)
|
static void crng_initialize(struct crng_state *crng)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
@ -815,7 +827,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
|
||||||
if (num == 0)
|
if (num == 0)
|
||||||
return;
|
return;
|
||||||
} else
|
} else
|
||||||
extract_crng(buf.block);
|
_extract_crng(&primary_crng, buf.block);
|
||||||
spin_lock_irqsave(&primary_crng.lock, flags);
|
spin_lock_irqsave(&primary_crng.lock, flags);
|
||||||
for (i = 0; i < 8; i++) {
|
for (i = 0; i < 8; i++) {
|
||||||
unsigned long rv;
|
unsigned long rv;
|
||||||
|
@ -835,19 +847,26 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
|
||||||
spin_unlock_irqrestore(&primary_crng.lock, flags);
|
spin_unlock_irqrestore(&primary_crng.lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void maybe_reseed_primary_crng(void)
|
||||||
|
{
|
||||||
|
if (crng_init > 2 &&
|
||||||
|
time_after(jiffies, primary_crng.init_time + CRNG_RESEED_INTERVAL))
|
||||||
|
crng_reseed(&primary_crng, &input_pool);
|
||||||
|
}
|
||||||
|
|
||||||
static inline void crng_wait_ready(void)
|
static inline void crng_wait_ready(void)
|
||||||
{
|
{
|
||||||
wait_event_interruptible(crng_init_wait, crng_ready());
|
wait_event_interruptible(crng_init_wait, crng_ready());
|
||||||
}
|
}
|
||||||
|
|
||||||
static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE])
|
static void _extract_crng(struct crng_state *crng,
|
||||||
|
__u8 out[CHACHA20_BLOCK_SIZE])
|
||||||
{
|
{
|
||||||
unsigned long v, flags;
|
unsigned long v, flags;
|
||||||
struct crng_state *crng = &primary_crng;
|
|
||||||
|
|
||||||
if (crng_init > 1 &&
|
if (crng_init > 1 &&
|
||||||
time_after(jiffies, crng->init_time + CRNG_RESEED_INTERVAL))
|
time_after(jiffies, crng->init_time + CRNG_RESEED_INTERVAL))
|
||||||
crng_reseed(crng, &input_pool);
|
crng_reseed(crng, crng == &primary_crng ? &input_pool : NULL);
|
||||||
spin_lock_irqsave(&crng->lock, flags);
|
spin_lock_irqsave(&crng->lock, flags);
|
||||||
if (arch_get_random_long(&v))
|
if (arch_get_random_long(&v))
|
||||||
crng->state[14] ^= v;
|
crng->state[14] ^= v;
|
||||||
|
@ -857,6 +876,19 @@ static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE])
|
||||||
spin_unlock_irqrestore(&crng->lock, flags);
|
spin_unlock_irqrestore(&crng->lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE])
|
||||||
|
{
|
||||||
|
struct crng_state *crng = NULL;
|
||||||
|
|
||||||
|
#ifdef CONFIG_NUMA
|
||||||
|
if (crng_node_pool)
|
||||||
|
crng = crng_node_pool[numa_node_id()];
|
||||||
|
if (crng == NULL)
|
||||||
|
#endif
|
||||||
|
crng = &primary_crng;
|
||||||
|
_extract_crng(crng, out);
|
||||||
|
}
|
||||||
|
|
||||||
static ssize_t extract_crng_user(void __user *buf, size_t nbytes)
|
static ssize_t extract_crng_user(void __user *buf, size_t nbytes)
|
||||||
{
|
{
|
||||||
ssize_t ret = 0, i;
|
ssize_t ret = 0, i;
|
||||||
|
@ -1575,9 +1607,31 @@ static void init_std_data(struct entropy_store *r)
|
||||||
*/
|
*/
|
||||||
static int rand_initialize(void)
|
static int rand_initialize(void)
|
||||||
{
|
{
|
||||||
|
#ifdef CONFIG_NUMA
|
||||||
|
int i;
|
||||||
|
int num_nodes = num_possible_nodes();
|
||||||
|
struct crng_state *crng;
|
||||||
|
struct crng_state **pool;
|
||||||
|
#endif
|
||||||
|
|
||||||
init_std_data(&input_pool);
|
init_std_data(&input_pool);
|
||||||
init_std_data(&blocking_pool);
|
init_std_data(&blocking_pool);
|
||||||
crng_initialize(&primary_crng);
|
crng_initialize(&primary_crng);
|
||||||
|
|
||||||
|
#ifdef CONFIG_NUMA
|
||||||
|
pool = kmalloc(num_nodes * sizeof(void *),
|
||||||
|
GFP_KERNEL|__GFP_NOFAIL|__GFP_ZERO);
|
||||||
|
for (i=0; i < num_nodes; i++) {
|
||||||
|
crng = kmalloc_node(sizeof(struct crng_state),
|
||||||
|
GFP_KERNEL | __GFP_NOFAIL, i);
|
||||||
|
spin_lock_init(&crng->lock);
|
||||||
|
crng_initialize(crng);
|
||||||
|
pool[i] = crng;
|
||||||
|
|
||||||
|
}
|
||||||
|
mb();
|
||||||
|
crng_node_pool = pool;
|
||||||
|
#endif
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
early_initcall(rand_initialize);
|
early_initcall(rand_initialize);
|
||||||
|
|
Loading…
Reference in New Issue