OpenCloudOS-Kernel/drivers/char/random.c

1753 lines
52 KiB
C
Raw Normal View History

/*
* random.c -- A strong random number generator
*
* Copyright Matt Mackall <mpm@selenic.com>, 2003, 2004, 2005
*
* Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999. All
* rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, and the entire permission notice in its entirety,
* including the disclaimer of warranties.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* ALTERNATIVELY, this product may be distributed under the terms of
* the GNU General Public License, in which case the provisions of the GPL are
* required INSTEAD OF the above restrictions. (This clause is
* necessary due to a potential bad interaction between the GPL and
* the restrictions contained in a BSD-style copyright.)
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
* WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
* OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
/*
* (now, with legal B.S. out of the way.....)
*
* This routine gathers environmental noise from device drivers, etc.,
* and returns good random numbers, suitable for cryptographic use.
* Besides the obvious cryptographic uses, these numbers are also good
* for seeding TCP sequence numbers, and other places where it is
* desirable to have numbers which are not only random, but hard to
* predict by an attacker.
*
* Theory of operation
* ===================
*
* Computers are very predictable devices. Hence it is extremely hard
* to produce truly random numbers on a computer --- as opposed to
* pseudo-random numbers, which can easily generated by using a
* algorithm. Unfortunately, it is very easy for attackers to guess
* the sequence of pseudo-random number generators, and for some
* applications this is not acceptable. So instead, we must try to
* gather "environmental noise" from the computer's environment, which
* must be hard for outside attackers to observe, and use that to
* generate random numbers. In a Unix environment, this is best done
* from inside the kernel.
*
* Sources of randomness from the environment include inter-keyboard
* timings, inter-interrupt timings from some interrupts, and other
* events which are both (a) non-deterministic and (b) hard for an
* outside observer to measure. Randomness from these sources are
* added to an "entropy pool", which is mixed using a CRC-like function.
* This is not cryptographically strong, but it is adequate assuming
* the randomness is not chosen maliciously, and it is fast enough that
* the overhead of doing it on every interrupt is very reasonable.
* As random bytes are mixed into the entropy pool, the routines keep
* an *estimate* of how many bits of randomness have been stored into
* the random number generator's internal state.
*
* When random bytes are desired, they are obtained by taking the SHA
* hash of the contents of the "entropy pool". The SHA hash avoids
* exposing the internal state of the entropy pool. It is believed to
* be computationally infeasible to derive any useful information
* about the input of SHA from its output. Even if it is possible to
* analyze SHA in some clever way, as long as the amount of data
* returned from the generator is less than the inherent entropy in
* the pool, the output data is totally unpredictable. For this
* reason, the routine decreases its internal estimate of how many
* bits of "true randomness" are contained in the entropy pool as it
* outputs random numbers.
*
* If this estimate goes to zero, the routine can still generate
* random numbers; however, an attacker may (at least in theory) be
* able to infer the future output of the generator from prior
* outputs. This requires successful cryptanalysis of SHA, which is
* not believed to be feasible, but there is a remote possibility.
* Nonetheless, these numbers should be useful for the vast majority
* of purposes.
*
* Exported interfaces ---- output
* ===============================
*
* There are three exported interfaces; the first is one designed to
* be used from within the kernel:
*
* void get_random_bytes(void *buf, int nbytes);
*
* This interface will return the requested number of random bytes,
* and place it in the requested buffer.
*
* The two other interfaces are two character devices /dev/random and
* /dev/urandom. /dev/random is suitable for use when very high
* quality randomness is desired (for example, for key generation or
* one-time pads), as it will only return a maximum of the number of
* bits of randomness (as estimated by the random number generator)
* contained in the entropy pool.
*
* The /dev/urandom device does not have this limit, and will return
* as many bytes as are requested. As more and more random bytes are
* requested without giving time for the entropy pool to recharge,
* this will result in random numbers that are merely cryptographically
* strong. For many applications, however, this is acceptable.
*
* Exported interfaces ---- input
* ==============================
*
* The current exported interfaces for gathering environmental noise
* from the devices are:
*
* void add_device_randomness(const void *buf, unsigned int size);
* void add_input_randomness(unsigned int type, unsigned int code,
* unsigned int value);
* void add_interrupt_randomness(int irq, int irq_flags);
* void add_disk_randomness(struct gendisk *disk);
*
* add_device_randomness() is for adding data to the random pool that
* is likely to differ between two devices (or possibly even per boot).
* This would be things like MAC addresses or serial numbers, or the
* read-out of the RTC. This does *not* add any actual entropy to the
* pool, but it initializes the pool to different values for devices
* that might otherwise be identical and have very little entropy
* available to them (particularly common in the embedded world).
*
* add_input_randomness() uses the input layer interrupt timing, as well as
* the event type information from the hardware.
*
* add_interrupt_randomness() uses the interrupt timing as random
* inputs to the entropy pool. Using the cycle counters and the irq source
* as inputs, it feeds the randomness roughly once a second.
*
* add_disk_randomness() uses what amounts to the seek time of block
* layer request events, on a per-disk_devt basis, as input to the
* entropy pool. Note that high-speed solid state drives with very low
* seek times do not make for good sources of entropy, as their seek
* times are usually fairly consistent.
*
* All of these routines try to estimate how many bits of randomness a
* particular randomness source. They do this by keeping track of the
* first and second order deltas of the event timings.
*
* Ensuring unpredictability at system startup
* ============================================
*
* When any operating system starts up, it will go through a sequence
* of actions that are fairly predictable by an adversary, especially
* if the start-up does not involve interaction with a human operator.
* This reduces the actual number of bits of unpredictability in the
* entropy pool below the value in entropy_count. In order to
* counteract this effect, it helps to carry information in the
* entropy pool across shut-downs and start-ups. To do this, put the
* following lines an appropriate script which is run during the boot
* sequence:
*
* echo "Initializing random number generator..."
* random_seed=/var/run/random-seed
* # Carry a random seed from start-up to start-up
* # Load and then save the whole entropy pool
* if [ -f $random_seed ]; then
* cat $random_seed >/dev/urandom
* else
* touch $random_seed
* fi
* chmod 600 $random_seed
* dd if=/dev/urandom of=$random_seed count=1 bs=512
*
* and the following lines in an appropriate script which is run as
* the system is shutdown:
*
* # Carry a random seed from shut-down to start-up
* # Save the whole entropy pool
* echo "Saving random seed..."
* random_seed=/var/run/random-seed
* touch $random_seed
* chmod 600 $random_seed
* dd if=/dev/urandom of=$random_seed count=1 bs=512
*
* For example, on most modern systems using the System V init
* scripts, such code fragments would be found in
* /etc/rc.d/init.d/random. On older Linux systems, the correct script
* location might be in /etc/rcb.d/rc.local or /etc/rc.d/rc.0.
*
* Effectively, these commands cause the contents of the entropy pool
* to be saved at shut-down time and reloaded into the entropy pool at
* start-up. (The 'dd' in the addition to the bootup script is to
* make sure that /etc/random-seed is different for every start-up,
* even if the system crashes without executing rc.0.) Even with
* complete knowledge of the start-up activities, predicting the state
* of the entropy pool requires knowledge of the previous history of
* the system.
*
* Configuring the /dev/random driver under Linux
* ==============================================
*
* The /dev/random driver under Linux uses minor numbers 8 and 9 of
* the /dev/mem major number (#1). So if your system does not have
* /dev/random and /dev/urandom created already, they can be created
* by using the commands:
*
* mknod /dev/random c 1 8
* mknod /dev/urandom c 1 9
*
* Acknowledgements:
* =================
*
* Ideas for constructing this random number generator were derived
* from Pretty Good Privacy's random number generator, and from private
* discussions with Phil Karn. Colin Plumb provided a faster random
* number generator, which speed up the mixing function of the entropy
* pool, taken from PGPfone. Dale Worley has also contributed many
* useful ideas and suggestions to improve this driver.
*
* Any flaws in the design are solely my responsibility, and should
* not be attributed to the Phil, Colin, or any of authors of PGP.
*
* Further background information on this topic may be obtained from
* RFC 1750, "Randomness Recommendations for Security", by Donald
* Eastlake, Steve Crocker, and Jeff Schiller.
*/
#include <linux/utsname.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/major.h>
#include <linux/string.h>
#include <linux/fcntl.h>
#include <linux/slab.h>
#include <linux/random.h>
#include <linux/poll.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/genhd.h>
#include <linux/interrupt.h>
2008-07-24 12:28:13 +08:00
#include <linux/mm.h>
#include <linux/spinlock.h>
#include <linux/percpu.h>
#include <linux/cryptohash.h>
#include <linux/fips.h>
#include <linux/ptrace.h>
#include <linux/kmemcheck.h>
#include <linux/workqueue.h>
#include <linux/irq.h>
#include <asm/processor.h>
#include <asm/uaccess.h>
#include <asm/irq.h>
#include <asm/irq_regs.h>
#include <asm/io.h>
#define CREATE_TRACE_POINTS
#include <trace/events/random.h>
/* #define ADD_INTERRUPT_BENCH */
/*
* Configuration information
*/
random: account for entropy loss due to overwrites When we write entropy into a non-empty pool, we currently don't account at all for the fact that we will probabilistically overwrite some of the entropy in that pool. This means that unless the pool is fully empty, we are currently *guaranteed* to overestimate the amount of entropy in the pool! Assuming Shannon entropy with zero correlations we end up with an exponentally decaying value of new entropy added: entropy <- entropy + (pool_size - entropy) * (1 - exp(-add_entropy/pool_size)) However, calculations involving fractional exponentials are not practical in the kernel, so apply a piecewise linearization: For add_entropy <= pool_size/2 then (1 - exp(-add_entropy/pool_size)) >= (add_entropy/pool_size)*0.7869... ... so we can approximate the exponential with 3/4*add_entropy/pool_size and still be on the safe side by adding at most pool_size/2 at a time. In order for the loop not to take arbitrary amounts of time if a bad ioctl is received, terminate if we are within one bit of full. This way the loop is guaranteed to terminate after no more than log2(poolsize) iterations, no matter what the input value is. The vast majority of the time the loop will be executed exactly once. The piecewise linearization is very conservative, approaching 3/4 of the usable input value for small inputs, however, our entropy estimation is pretty weak at best, especially for small values; we have no handle on correlation; and the Shannon entropy measure (Rényi entropy of order 1) is not the correct one to use in the first place, but rather the correct entropy measure is the min-entropy, the Rényi entropy of infinite order. As such, this conservatism seems more than justified. This does introduce fractional bit values. I have left it to have 3 bits of fraction, so that with a pool of 2^12 bits the multiply in credit_entropy_bits() can still fit into an int, as 2*(3+12) < 31. It is definitely possible to allow for more fractional accounting, but that multiply then would have to be turned into a 32*32 -> 64 multiply. Signed-off-by: H. Peter Anvin <hpa@linux.intel.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Cc: DJ Johnston <dj.johnston@intel.com>
2013-09-11 11:16:17 +08:00
#define INPUT_POOL_SHIFT 12
#define INPUT_POOL_WORDS (1 << (INPUT_POOL_SHIFT-5))
#define OUTPUT_POOL_SHIFT 10
#define OUTPUT_POOL_WORDS (1 << (OUTPUT_POOL_SHIFT-5))
#define SEC_XFER_SIZE 512
#define EXTRACT_SIZE 10
#define DEBUG_RANDOM_BOOT 0
#define LONGS(x) (((x) + sizeof(unsigned long) - 1)/sizeof(unsigned long))
/*
* To allow fractional bits to be tracked, the entropy_count field is
* denominated in units of 1/8th bits.
random: account for entropy loss due to overwrites When we write entropy into a non-empty pool, we currently don't account at all for the fact that we will probabilistically overwrite some of the entropy in that pool. This means that unless the pool is fully empty, we are currently *guaranteed* to overestimate the amount of entropy in the pool! Assuming Shannon entropy with zero correlations we end up with an exponentally decaying value of new entropy added: entropy <- entropy + (pool_size - entropy) * (1 - exp(-add_entropy/pool_size)) However, calculations involving fractional exponentials are not practical in the kernel, so apply a piecewise linearization: For add_entropy <= pool_size/2 then (1 - exp(-add_entropy/pool_size)) >= (add_entropy/pool_size)*0.7869... ... so we can approximate the exponential with 3/4*add_entropy/pool_size and still be on the safe side by adding at most pool_size/2 at a time. In order for the loop not to take arbitrary amounts of time if a bad ioctl is received, terminate if we are within one bit of full. This way the loop is guaranteed to terminate after no more than log2(poolsize) iterations, no matter what the input value is. The vast majority of the time the loop will be executed exactly once. The piecewise linearization is very conservative, approaching 3/4 of the usable input value for small inputs, however, our entropy estimation is pretty weak at best, especially for small values; we have no handle on correlation; and the Shannon entropy measure (Rényi entropy of order 1) is not the correct one to use in the first place, but rather the correct entropy measure is the min-entropy, the Rényi entropy of infinite order. As such, this conservatism seems more than justified. This does introduce fractional bit values. I have left it to have 3 bits of fraction, so that with a pool of 2^12 bits the multiply in credit_entropy_bits() can still fit into an int, as 2*(3+12) < 31. It is definitely possible to allow for more fractional accounting, but that multiply then would have to be turned into a 32*32 -> 64 multiply. Signed-off-by: H. Peter Anvin <hpa@linux.intel.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Cc: DJ Johnston <dj.johnston@intel.com>
2013-09-11 11:16:17 +08:00
*
* 2*(ENTROPY_SHIFT + log2(poolbits)) must <= 31, or the multiply in
* credit_entropy_bits() needs to be 64 bits wide.
*/
#define ENTROPY_SHIFT 3
#define ENTROPY_BITS(r) ((r)->entropy_count >> ENTROPY_SHIFT)
/*
* The minimum number of bits of entropy before we wake up a read on
* /dev/random. Should be enough to do a significant reseed.
*/
static int random_read_wakeup_bits = 64;
/*
* If the entropy count falls under this number of bits, then we
* should wake up processes which are selecting or polling on write
* access to /dev/random.
*/
static int random_write_wakeup_bits = 28 * OUTPUT_POOL_WORDS;
/*
* The minimum number of seconds between urandom pool reseeding. We
* do this to limit the amount of entropy that can be drained from the
* input pool even if there are heavy demands on /dev/urandom.
*/
static int random_min_urandom_seed = 60;
/*
* Originally, we used a primitive polynomial of degree .poolwords
* over GF(2). The taps for various sizes are defined below. They
* were chosen to be evenly spaced except for the last tap, which is 1
* to get the twisting happening as fast as possible.
*
* For the purposes of better mixing, we use the CRC-32 polynomial as
* well to make a (modified) twisted Generalized Feedback Shift
* Register. (See M. Matsumoto & Y. Kurita, 1992. Twisted GFSR
* generators. ACM Transactions on Modeling and Computer Simulation
* 2(3):179-194. Also see M. Matsumoto & Y. Kurita, 1994. Twisted
* GFSR generators II. ACM Transactions on Modeling and Computer
* Simulation 4:254-266)
*
* Thanks to Colin Plumb for suggesting this.
*
* The mixing operation is much less sensitive than the output hash,
* where we use SHA-1. All that we want of mixing operation is that
* it be a good non-cryptographic hash; i.e. it not produce collisions
* when fed "random" data of the sort we expect to see. As long as
* the pool state differs for different inputs, we have preserved the
* input entropy and done a good job. The fact that an intelligent
* attacker can construct inputs that will produce controlled
* alterations to the pool's state is not important because we don't
* consider such inputs to contribute any randomness. The only
* property we need with respect to them is that the attacker can't
* increase his/her knowledge of the pool's state. Since all
* additions are reversible (knowing the final state and the input,
* you can reconstruct the initial state), if an attacker has any
* uncertainty about the initial state, he/she can only shuffle that
* uncertainty about, but never cause any collisions (which would
* decrease the uncertainty).
*
* Our mixing functions were analyzed by Lacharme, Roeck, Strubel, and
* Videau in their paper, "The Linux Pseudorandom Number Generator
* Revisited" (see: http://eprint.iacr.org/2012/251.pdf). In their
* paper, they point out that we are not using a true Twisted GFSR,
* since Matsumoto & Kurita used a trinomial feedback polynomial (that
* is, with only three taps, instead of the six that we are using).
* As a result, the resulting polynomial is neither primitive nor
* irreducible, and hence does not have a maximal period over
* GF(2**32). They suggest a slight change to the generator
* polynomial which improves the resulting TGFSR polynomial to be
* irreducible, which we have made here.
*/
static struct poolinfo {
int poolbitshift, poolwords, poolbytes, poolbits, poolfracbits;
#define S(x) ilog2(x)+5, (x), (x)*4, (x)*32, (x) << (ENTROPY_SHIFT+5)
int tap1, tap2, tap3, tap4, tap5;
} poolinfo_table[] = {
/* was: x^128 + x^103 + x^76 + x^51 +x^25 + x + 1 */
/* x^128 + x^104 + x^76 + x^51 +x^25 + x + 1 */
{ S(128), 104, 76, 51, 25, 1 },
/* was: x^32 + x^26 + x^20 + x^14 + x^7 + x + 1 */
/* x^32 + x^26 + x^19 + x^14 + x^7 + x + 1 */
{ S(32), 26, 19, 14, 7, 1 },
#if 0
/* x^2048 + x^1638 + x^1231 + x^819 + x^411 + x + 1 -- 115 */
{ S(2048), 1638, 1231, 819, 411, 1 },
/* x^1024 + x^817 + x^615 + x^412 + x^204 + x + 1 -- 290 */
{ S(1024), 817, 615, 412, 204, 1 },
/* x^1024 + x^819 + x^616 + x^410 + x^207 + x^2 + 1 -- 115 */
{ S(1024), 819, 616, 410, 207, 2 },
/* x^512 + x^411 + x^308 + x^208 + x^104 + x + 1 -- 225 */
{ S(512), 411, 308, 208, 104, 1 },
/* x^512 + x^409 + x^307 + x^206 + x^102 + x^2 + 1 -- 95 */
{ S(512), 409, 307, 206, 102, 2 },
/* x^512 + x^409 + x^309 + x^205 + x^103 + x^2 + 1 -- 95 */
{ S(512), 409, 309, 205, 103, 2 },
/* x^256 + x^205 + x^155 + x^101 + x^52 + x + 1 -- 125 */
{ S(256), 205, 155, 101, 52, 1 },
/* x^128 + x^103 + x^78 + x^51 + x^27 + x^2 + 1 -- 70 */
{ S(128), 103, 78, 51, 27, 2 },
/* x^64 + x^52 + x^39 + x^26 + x^14 + x + 1 -- 15 */
{ S(64), 52, 39, 26, 14, 1 },
#endif
};
/*
* Static global variables
*/
static DECLARE_WAIT_QUEUE_HEAD(random_read_wait);
static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
2008-04-29 16:03:08 +08:00
static struct fasync_struct *fasync;
/**********************************************************************
*
* OS independent entropy store. Here are the functions which handle
* storing entropy in an entropy pool.
*
**********************************************************************/
struct entropy_store;
struct entropy_store {
/* read-only data: */
random: account for entropy loss due to overwrites When we write entropy into a non-empty pool, we currently don't account at all for the fact that we will probabilistically overwrite some of the entropy in that pool. This means that unless the pool is fully empty, we are currently *guaranteed* to overestimate the amount of entropy in the pool! Assuming Shannon entropy with zero correlations we end up with an exponentally decaying value of new entropy added: entropy <- entropy + (pool_size - entropy) * (1 - exp(-add_entropy/pool_size)) However, calculations involving fractional exponentials are not practical in the kernel, so apply a piecewise linearization: For add_entropy <= pool_size/2 then (1 - exp(-add_entropy/pool_size)) >= (add_entropy/pool_size)*0.7869... ... so we can approximate the exponential with 3/4*add_entropy/pool_size and still be on the safe side by adding at most pool_size/2 at a time. In order for the loop not to take arbitrary amounts of time if a bad ioctl is received, terminate if we are within one bit of full. This way the loop is guaranteed to terminate after no more than log2(poolsize) iterations, no matter what the input value is. The vast majority of the time the loop will be executed exactly once. The piecewise linearization is very conservative, approaching 3/4 of the usable input value for small inputs, however, our entropy estimation is pretty weak at best, especially for small values; we have no handle on correlation; and the Shannon entropy measure (Rényi entropy of order 1) is not the correct one to use in the first place, but rather the correct entropy measure is the min-entropy, the Rényi entropy of infinite order. As such, this conservatism seems more than justified. This does introduce fractional bit values. I have left it to have 3 bits of fraction, so that with a pool of 2^12 bits the multiply in credit_entropy_bits() can still fit into an int, as 2*(3+12) < 31. It is definitely possible to allow for more fractional accounting, but that multiply then would have to be turned into a 32*32 -> 64 multiply. Signed-off-by: H. Peter Anvin <hpa@linux.intel.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Cc: DJ Johnston <dj.johnston@intel.com>
2013-09-11 11:16:17 +08:00
const struct poolinfo *poolinfo;
__u32 *pool;
const char *name;
struct entropy_store *pull;
struct work_struct push_work;
/* read-write data: */
unsigned long last_pulled;
spinlock_t lock;
unsigned short add_ptr;
unsigned short input_rotate;
int entropy_count;
int entropy_total;
unsigned int initialized:1;
unsigned int limit:1;
unsigned int last_data_init:1;
__u8 last_data[EXTRACT_SIZE];
};
static void push_to_pool(struct work_struct *work);
static __u32 input_pool_data[INPUT_POOL_WORDS];
static __u32 blocking_pool_data[OUTPUT_POOL_WORDS];
static __u32 nonblocking_pool_data[OUTPUT_POOL_WORDS];
static struct entropy_store input_pool = {
.poolinfo = &poolinfo_table[0],
.name = "input",
.limit = 1,
.lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
.pool = input_pool_data
};
static struct entropy_store blocking_pool = {
.poolinfo = &poolinfo_table[1],
.name = "blocking",
.limit = 1,
.pull = &input_pool,
.lock = __SPIN_LOCK_UNLOCKED(blocking_pool.lock),
.pool = blocking_pool_data,
.push_work = __WORK_INITIALIZER(blocking_pool.push_work,
push_to_pool),
};
static struct entropy_store nonblocking_pool = {
.poolinfo = &poolinfo_table[1],
.name = "nonblocking",
.pull = &input_pool,
.lock = __SPIN_LOCK_UNLOCKED(nonblocking_pool.lock),
.pool = nonblocking_pool_data,
.push_work = __WORK_INITIALIZER(nonblocking_pool.push_work,
push_to_pool),
};
static __u32 const twist_table[8] = {
0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158,
0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 };
/*
* This function adds bytes into the entropy "pool". It does not
* update the entropy estimate. The caller should call
* credit_entropy_bits if this is appropriate.
*
* The pool is stirred with a primitive polynomial of the appropriate
* degree, and then twisted. We twist by three bits at a time because
* it's cheap to do so and helps slightly in the expected case where
* the entropy is concentrated in the low-order bits.
*/
static void _mix_pool_bytes(struct entropy_store *r, const void *in,
int nbytes)
{
unsigned long i, tap1, tap2, tap3, tap4, tap5;
int input_rotate;
int wordmask = r->poolinfo->poolwords - 1;
const char *bytes = in;
__u32 w;
tap1 = r->poolinfo->tap1;
tap2 = r->poolinfo->tap2;
tap3 = r->poolinfo->tap3;
tap4 = r->poolinfo->tap4;
tap5 = r->poolinfo->tap5;
input_rotate = r->input_rotate;
i = r->add_ptr;
/* mix one byte at a time to simplify size handling and churn faster */
while (nbytes--) {
w = rol32(*bytes++, input_rotate);
i = (i - 1) & wordmask;
/* XOR in the various taps */
w ^= r->pool[i];
w ^= r->pool[(i + tap1) & wordmask];
w ^= r->pool[(i + tap2) & wordmask];
w ^= r->pool[(i + tap3) & wordmask];
w ^= r->pool[(i + tap4) & wordmask];
w ^= r->pool[(i + tap5) & wordmask];
/* Mix the result back in with a twist */
r->pool[i] = (w >> 3) ^ twist_table[w & 7];
/*
* Normally, we add 7 bits of rotation to the pool.
* At the beginning of the pool, add an extra 7 bits
* rotation, so that successive passes spread the
* input bits across the pool evenly.
*/
input_rotate = (input_rotate + (i ? 7 : 14)) & 31;
}
r->input_rotate = input_rotate;
r->add_ptr = i;
}
static void __mix_pool_bytes(struct entropy_store *r, const void *in,
int nbytes)
{
trace_mix_pool_bytes_nolock(r->name, nbytes, _RET_IP_);
_mix_pool_bytes(r, in, nbytes);
}
static void mix_pool_bytes(struct entropy_store *r, const void *in,
int nbytes)
{
unsigned long flags;
trace_mix_pool_bytes(r->name, nbytes, _RET_IP_);
spin_lock_irqsave(&r->lock, flags);
_mix_pool_bytes(r, in, nbytes);
spin_unlock_irqrestore(&r->lock, flags);
}
struct fast_pool {
__u32 pool[4];
unsigned long last;
unsigned char count;
unsigned char notimer_count;
unsigned char rotate;
};
/*
* This is a fast mixing routine used by the interrupt randomness
* collector. It's hardcoded for an 128 bit pool and assumes that any
* locks that might be needed are taken by the caller.
*/
static void fast_mix(struct fast_pool *f)
{
__u32 a = f->pool[0], b = f->pool[1];
__u32 c = f->pool[2], d = f->pool[3];
a += b; c += d;
b = rol32(a, 6); d = rol32(c, 27);
d ^= a; b ^= c;
a += b; c += d;
b = rol32(a, 16); d = rol32(c, 14);
d ^= a; b ^= c;
a += b; c += d;
b = rol32(a, 6); d = rol32(c, 27);
d ^= a; b ^= c;
a += b; c += d;
b = rol32(a, 16); d = rol32(c, 14);
d ^= a; b ^= c;
f->pool[0] = a; f->pool[1] = b;
f->pool[2] = c; f->pool[3] = d;
f->count++;
}
/*
* Credit (or debit) the entropy store with n bits of entropy.
* Use credit_entropy_bits_safe() if the value comes from userspace
* or otherwise should be checked for extreme values.
*/
static void credit_entropy_bits(struct entropy_store *r, int nbits)
{
int entropy_count, orig;
random: account for entropy loss due to overwrites When we write entropy into a non-empty pool, we currently don't account at all for the fact that we will probabilistically overwrite some of the entropy in that pool. This means that unless the pool is fully empty, we are currently *guaranteed* to overestimate the amount of entropy in the pool! Assuming Shannon entropy with zero correlations we end up with an exponentally decaying value of new entropy added: entropy <- entropy + (pool_size - entropy) * (1 - exp(-add_entropy/pool_size)) However, calculations involving fractional exponentials are not practical in the kernel, so apply a piecewise linearization: For add_entropy <= pool_size/2 then (1 - exp(-add_entropy/pool_size)) >= (add_entropy/pool_size)*0.7869... ... so we can approximate the exponential with 3/4*add_entropy/pool_size and still be on the safe side by adding at most pool_size/2 at a time. In order for the loop not to take arbitrary amounts of time if a bad ioctl is received, terminate if we are within one bit of full. This way the loop is guaranteed to terminate after no more than log2(poolsize) iterations, no matter what the input value is. The vast majority of the time the loop will be executed exactly once. The piecewise linearization is very conservative, approaching 3/4 of the usable input value for small inputs, however, our entropy estimation is pretty weak at best, especially for small values; we have no handle on correlation; and the Shannon entropy measure (Rényi entropy of order 1) is not the correct one to use in the first place, but rather the correct entropy measure is the min-entropy, the Rényi entropy of infinite order. As such, this conservatism seems more than justified. This does introduce fractional bit values. I have left it to have 3 bits of fraction, so that with a pool of 2^12 bits the multiply in credit_entropy_bits() can still fit into an int, as 2*(3+12) < 31. It is definitely possible to allow for more fractional accounting, but that multiply then would have to be turned into a 32*32 -> 64 multiply. Signed-off-by: H. Peter Anvin <hpa@linux.intel.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Cc: DJ Johnston <dj.johnston@intel.com>
2013-09-11 11:16:17 +08:00
const int pool_size = r->poolinfo->poolfracbits;
int nfrac = nbits << ENTROPY_SHIFT;
if (!nbits)
return;
retry:
entropy_count = orig = ACCESS_ONCE(r->entropy_count);
random: account for entropy loss due to overwrites When we write entropy into a non-empty pool, we currently don't account at all for the fact that we will probabilistically overwrite some of the entropy in that pool. This means that unless the pool is fully empty, we are currently *guaranteed* to overestimate the amount of entropy in the pool! Assuming Shannon entropy with zero correlations we end up with an exponentally decaying value of new entropy added: entropy <- entropy + (pool_size - entropy) * (1 - exp(-add_entropy/pool_size)) However, calculations involving fractional exponentials are not practical in the kernel, so apply a piecewise linearization: For add_entropy <= pool_size/2 then (1 - exp(-add_entropy/pool_size)) >= (add_entropy/pool_size)*0.7869... ... so we can approximate the exponential with 3/4*add_entropy/pool_size and still be on the safe side by adding at most pool_size/2 at a time. In order for the loop not to take arbitrary amounts of time if a bad ioctl is received, terminate if we are within one bit of full. This way the loop is guaranteed to terminate after no more than log2(poolsize) iterations, no matter what the input value is. The vast majority of the time the loop will be executed exactly once. The piecewise linearization is very conservative, approaching 3/4 of the usable input value for small inputs, however, our entropy estimation is pretty weak at best, especially for small values; we have no handle on correlation; and the Shannon entropy measure (Rényi entropy of order 1) is not the correct one to use in the first place, but rather the correct entropy measure is the min-entropy, the Rényi entropy of infinite order. As such, this conservatism seems more than justified. This does introduce fractional bit values. I have left it to have 3 bits of fraction, so that with a pool of 2^12 bits the multiply in credit_entropy_bits() can still fit into an int, as 2*(3+12) < 31. It is definitely possible to allow for more fractional accounting, but that multiply then would have to be turned into a 32*32 -> 64 multiply. Signed-off-by: H. Peter Anvin <hpa@linux.intel.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Cc: DJ Johnston <dj.johnston@intel.com>
2013-09-11 11:16:17 +08:00
if (nfrac < 0) {
/* Debit */
entropy_count += nfrac;
} else {
/*
* Credit: we have to account for the possibility of
* overwriting already present entropy. Even in the
* ideal case of pure Shannon entropy, new contributions
* approach the full value asymptotically:
*
* entropy <- entropy + (pool_size - entropy) *
* (1 - exp(-add_entropy/pool_size))
*
* For add_entropy <= pool_size/2 then
* (1 - exp(-add_entropy/pool_size)) >=
* (add_entropy/pool_size)*0.7869...
* so we can approximate the exponential with
* 3/4*add_entropy/pool_size and still be on the
* safe side by adding at most pool_size/2 at a time.
*
* The use of pool_size-2 in the while statement is to
* prevent rounding artifacts from making the loop
* arbitrarily long; this limits the loop to log2(pool_size)*2
* turns no matter how large nbits is.
*/
int pnfrac = nfrac;
const int s = r->poolinfo->poolbitshift + ENTROPY_SHIFT + 2;
/* The +2 corresponds to the /4 in the denominator */
do {
unsigned int anfrac = min(pnfrac, pool_size/2);
unsigned int add =
((pool_size - entropy_count)*anfrac*3) >> s;
entropy_count += add;
pnfrac -= anfrac;
} while (unlikely(entropy_count < pool_size-2 && pnfrac));
}
if (entropy_count < 0) {
pr_warn("random: negative entropy/overflow: pool %s count %d\n",
r->name, entropy_count);
WARN_ON(1);
entropy_count = 0;
random: account for entropy loss due to overwrites When we write entropy into a non-empty pool, we currently don't account at all for the fact that we will probabilistically overwrite some of the entropy in that pool. This means that unless the pool is fully empty, we are currently *guaranteed* to overestimate the amount of entropy in the pool! Assuming Shannon entropy with zero correlations we end up with an exponentally decaying value of new entropy added: entropy <- entropy + (pool_size - entropy) * (1 - exp(-add_entropy/pool_size)) However, calculations involving fractional exponentials are not practical in the kernel, so apply a piecewise linearization: For add_entropy <= pool_size/2 then (1 - exp(-add_entropy/pool_size)) >= (add_entropy/pool_size)*0.7869... ... so we can approximate the exponential with 3/4*add_entropy/pool_size and still be on the safe side by adding at most pool_size/2 at a time. In order for the loop not to take arbitrary amounts of time if a bad ioctl is received, terminate if we are within one bit of full. This way the loop is guaranteed to terminate after no more than log2(poolsize) iterations, no matter what the input value is. The vast majority of the time the loop will be executed exactly once. The piecewise linearization is very conservative, approaching 3/4 of the usable input value for small inputs, however, our entropy estimation is pretty weak at best, especially for small values; we have no handle on correlation; and the Shannon entropy measure (Rényi entropy of order 1) is not the correct one to use in the first place, but rather the correct entropy measure is the min-entropy, the Rényi entropy of infinite order. As such, this conservatism seems more than justified. This does introduce fractional bit values. I have left it to have 3 bits of fraction, so that with a pool of 2^12 bits the multiply in credit_entropy_bits() can still fit into an int, as 2*(3+12) < 31. It is definitely possible to allow for more fractional accounting, but that multiply then would have to be turned into a 32*32 -> 64 multiply. Signed-off-by: H. Peter Anvin <hpa@linux.intel.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Cc: DJ Johnston <dj.johnston@intel.com>
2013-09-11 11:16:17 +08:00
} else if (entropy_count > pool_size)
entropy_count = pool_size;
if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig)
goto retry;
r->entropy_total += nbits;
The /dev/random changes for 3.13 including a number of improvements in the following areas: performance, avoiding waste of entropy, better tracking of entropy estimates, support for non-x86 platforms that have a register which can't be used for fine-grained timekeeping, but which might be good enough for the random driver. Also add some printk's so that we can see how quickly /dev/urandom can get initialized, and when programs try to use /dev/urandom before it is fully initialized (since this could be a security issue). This shouldn't be an issue on x86 desktop/laptops --- a test on my Lenovo T430s laptop shows that /dev/urandom is getting fully initialized approximately two seconds before the root file system is mounted read/write --- this may be an issue with ARM and MIPS embedded/mobile systems, though. These printk's will be a useful canary before potentially adding a future change to start blocking processes which try to read from /dev/urandom before it is initialized, which is something FreeBSD does already for security reasons, and which security folks have been agitating for Linux to also adopt. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.14 (GNU/Linux) iQIcBAABCAAGBQJShC4MAAoJENNvdpvBGATwC0QQAMujsIxTZnsHwQrbb5eJf1kD 74TwQyEfWw5qnGQrc8JOoAbe1MG7C4QlfHxRsWxvCD8G+Mft4Q5ZgZOt0/ecAGD6 Tid58EaZGSfK9+YE6jgvJFekQADCREdPSxBASJ3cECT6dXXBX9IqR9gbAK02mM+w QZdbgWBMsPJZiHSsCNeRbZ9oIiPdcNDsMJwzJhirPUeAnKCaX3z+LWc3XcMw7wYi q5cSl0ENZd6QsBKs37A1ol5BtLEsoot2t3HKdnpOBsDQKSJ712KduwN5jUfs6h9D 0fqmVHwfKsge+D8/3NgBKz+yWLQnGkuB4Ibo+09BZXwH3rYU1/gKm0iLNi0yQ5fV 73bn4pqF6cZdDNgj0Ic+MyYAW+S/NOQ6TcF/3eSAPW6z/wHZOfZ2njCh1GEHBOKI 6iZZu+Ek7QyFJ/z5Fr1bXFJR7V99r7hRD3gwMCMZ/mjhloB2cyD0a2A9kFP85ykI I4tFEnq0FpX/K60ag4hiLnqVx/TsmbdMoz+8OpQckHgQJrZMuRRf1d+T4au47Y6K uXGLpSuvkALYW2koo2OoO2d873N/89fqFL8lI8Iy0YlgAxxxm++gl1Mql/E1wPOa 5jB0lW/jex/CquE7meTgRlM/fTU/HVbe3608ZNUYBJUHS9K/PaSnCCu2ya8/TsSW xeVS/vMnNvtGerdEIyKm =wla0 -----END PGP SIGNATURE----- Merge tag 'random_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/random Pull /dev/random changes from Ted Ts'o: "The /dev/random changes for 3.13 including a number of improvements in the following areas: performance, avoiding waste of entropy, better tracking of entropy estimates, support for non-x86 platforms that have a register which can't be used for fine-grained timekeeping, but which might be good enough for the random driver. Also add some printk's so that we can see how quickly /dev/urandom can get initialized, and when programs try to use /dev/urandom before it is fully initialized (since this could be a security issue). This shouldn't be an issue on x86 desktop/laptops --- a test on my Lenovo T430s laptop shows that /dev/urandom is getting fully initialized approximately two seconds before the root file system is mounted read/write --- this may be an issue with ARM and MIPS embedded/mobile systems, though. These printk's will be a useful canary before potentially adding a future change to start blocking processes which try to read from /dev/urandom before it is initialized, which is something FreeBSD does already for security reasons, and which security folks have been agitating for Linux to also adopt" * tag 'random_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/random: random: add debugging code to detect early use of get_random_bytes() random: initialize the last_time field in struct timer_rand_state random: don't zap entropy count in rand_initialize() random: printk notifications for urandom pool initialization random: make add_timer_randomness() fill the nonblocking pool first random: convert DEBUG_ENT to tracepoints random: push extra entropy to the output pools random: drop trickle mode random: adjust the generator polynomials in the mixing function slightly random: speed up the fast_mix function by a factor of four random: cap the rate which the /dev/urandom pool gets reseeded random: optimize the entropy_store structure random: optimize spinlock use in add_device_randomness() random: fix the tracepoint for get_random_bytes(_arch) random: account for entropy loss due to overwrites random: allow fractional bits to be tracked random: statically compute poolbitshift, poolbytes, poolbits random: mix in architectural randomness earlier in extract_buf()
2013-11-17 02:19:15 +08:00
if (!r->initialized && r->entropy_total > 128) {
r->initialized = 1;
r->entropy_total = 0;
if (r == &nonblocking_pool) {
prandom_reseed_late();
pr_notice("random: %s pool is initialized\n", r->name);
}
}
trace_credit_entropy_bits(r->name, nbits,
entropy_count >> ENTROPY_SHIFT,
r->entropy_total, _RET_IP_);
if (r == &input_pool) {
int entropy_bits = entropy_count >> ENTROPY_SHIFT;
/* should we wake readers? */
if (entropy_bits >= random_read_wakeup_bits) {
wake_up_interruptible(&random_read_wait);
kill_fasync(&fasync, SIGIO, POLL_IN);
}
/* If the input pool is getting full, send some
* entropy to the two output pools, flipping back and
* forth between them, until the output pools are 75%
* full.
*/
if (entropy_bits > random_write_wakeup_bits &&
r->initialized &&
r->entropy_total >= 2*random_read_wakeup_bits) {
static struct entropy_store *last = &blocking_pool;
struct entropy_store *other = &blocking_pool;
if (last == &blocking_pool)
other = &nonblocking_pool;
if (other->entropy_count <=
3 * other->poolinfo->poolfracbits / 4)
last = other;
if (last->entropy_count <=
3 * last->poolinfo->poolfracbits / 4) {
schedule_work(&last->push_work);
r->entropy_total = 0;
}
}
2008-04-29 16:03:08 +08:00
}
}
static void credit_entropy_bits_safe(struct entropy_store *r, int nbits)
{
const int nbits_max = (int)(~0U >> (ENTROPY_SHIFT + 1));
/* Cap the value to avoid overflows */
nbits = min(nbits, nbits_max);
nbits = max(nbits, -nbits_max);
credit_entropy_bits(r, nbits);
}
/*********************************************************************
*
* Entropy input management
*
*********************************************************************/
/* There is one of these per entropy source */
struct timer_rand_state {
cycles_t last_time;
long last_delta, last_delta2;
unsigned dont_count_entropy:1;
};
#define INIT_TIMER_RAND_STATE { INITIAL_JIFFIES, };
/*
* Add device- or boot-specific data to the input and nonblocking
* pools to help initialize them to unique values.
*
* None of this adds any entropy, it is meant to avoid the
* problem of the nonblocking pool having similar initial state
* across largely identical devices.
*/
void add_device_randomness(const void *buf, unsigned int size)
{
unsigned long time = random_get_entropy() ^ jiffies;
unsigned long flags;
trace_add_device_randomness(size, _RET_IP_);
spin_lock_irqsave(&input_pool.lock, flags);
_mix_pool_bytes(&input_pool, buf, size);
_mix_pool_bytes(&input_pool, &time, sizeof(time));
spin_unlock_irqrestore(&input_pool.lock, flags);
spin_lock_irqsave(&nonblocking_pool.lock, flags);
_mix_pool_bytes(&nonblocking_pool, buf, size);
_mix_pool_bytes(&nonblocking_pool, &time, sizeof(time));
spin_unlock_irqrestore(&nonblocking_pool.lock, flags);
}
EXPORT_SYMBOL(add_device_randomness);
static struct timer_rand_state input_timer_state = INIT_TIMER_RAND_STATE;
/*
* This function adds entropy to the entropy "pool" by using timing
* delays. It uses the timer_rand_state structure to make an estimate
* of how many bits of entropy this call has added to the pool.
*
* The number "num" is also added to the pool - it should somehow describe
* the type of event which just happened. This is currently 0-255 for
* keyboard scan codes, and 256 upwards for interrupts.
*
*/
static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
{
struct entropy_store *r;
struct {
long jiffies;
unsigned cycles;
unsigned num;
} sample;
long delta, delta2, delta3;
preempt_disable();
sample.jiffies = jiffies;
sample.cycles = random_get_entropy();
sample.num = num;
r = nonblocking_pool.initialized ? &input_pool : &nonblocking_pool;
mix_pool_bytes(r, &sample, sizeof(sample));
/*
* Calculate number of bits of randomness we probably added.
* We take into account the first, second and third-order deltas
* in order to make our estimate.
*/
if (!state->dont_count_entropy) {
delta = sample.jiffies - state->last_time;
state->last_time = sample.jiffies;
delta2 = delta - state->last_delta;
state->last_delta = delta;
delta3 = delta2 - state->last_delta2;
state->last_delta2 = delta2;
if (delta < 0)
delta = -delta;
if (delta2 < 0)
delta2 = -delta2;
if (delta3 < 0)
delta3 = -delta3;
if (delta > delta2)
delta = delta2;
if (delta > delta3)
delta = delta3;
/*
* delta is now minimum absolute delta.
* Round down by 1 bit on general principles,
* and limit entropy entimate to 12 bits.
*/
credit_entropy_bits(r, min_t(int, fls(delta>>1), 11));
}
preempt_enable();
}
void add_input_randomness(unsigned int type, unsigned int code,
unsigned int value)
{
static unsigned char last_value;
/* ignore autorepeat and the like */
if (value == last_value)
return;
last_value = value;
add_timer_randomness(&input_timer_state,
(type << 4) ^ code ^ (code >> 4) ^ value);
trace_add_input_randomness(ENTROPY_BITS(&input_pool));
}
EXPORT_SYMBOL_GPL(add_input_randomness);
static DEFINE_PER_CPU(struct fast_pool, irq_randomness);
#ifdef ADD_INTERRUPT_BENCH
static unsigned long avg_cycles, avg_deviation;
#define AVG_SHIFT 8 /* Exponential average factor k=1/256 */
#define FIXED_1_2 (1 << (AVG_SHIFT-1))
static void add_interrupt_bench(cycles_t start)
{
long delta = random_get_entropy() - start;
/* Use a weighted moving average */
delta = delta - ((avg_cycles + FIXED_1_2) >> AVG_SHIFT);
avg_cycles += delta;
/* And average deviation */
delta = abs(delta) - ((avg_deviation + FIXED_1_2) >> AVG_SHIFT);
avg_deviation += delta;
}
#else
#define add_interrupt_bench(x)
#endif
void add_interrupt_randomness(int irq, int irq_flags)
{
struct entropy_store *r;
struct fast_pool *fast_pool = &__get_cpu_var(irq_randomness);
struct pt_regs *regs = get_irq_regs();
unsigned long now = jiffies;
cycles_t cycles = random_get_entropy();
__u32 c_high, j_high;
__u64 ip;
unsigned long seed;
int credit = 0;
c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0;
j_high = (sizeof(now) > 4) ? now >> 32 : 0;
fast_pool->pool[0] ^= cycles ^ j_high ^ irq;
fast_pool->pool[1] ^= now ^ c_high;
ip = regs ? instruction_pointer(regs) : _RET_IP_;
fast_pool->pool[2] ^= ip;
fast_pool->pool[3] ^= ip >> 32;
fast_mix(fast_pool);
if ((irq_flags & __IRQF_TIMER) == 0)
fast_pool->notimer_count++;
add_interrupt_bench(cycles);
if (cycles) {
if ((fast_pool->count < 64) &&
!time_after(now, fast_pool->last + HZ))
return;
} else {
/* CPU does not have a cycle counting register :-( */
if (fast_pool->count < 64)
return;
}
r = nonblocking_pool.initialized ? &input_pool : &nonblocking_pool;
if (!spin_trylock(&r->lock))
return;
fast_pool->last = now;
__mix_pool_bytes(r, &fast_pool->pool, sizeof(fast_pool->pool));
/*
* If we have architectural seed generator, produce a seed and
* add it to the pool. For the sake of paranoia count it as
* 50% entropic.
*/
if (arch_get_random_seed_long(&seed)) {
__mix_pool_bytes(r, &seed, sizeof(seed));
credit += sizeof(seed) * 4;
}
spin_unlock(&r->lock);
/*
* If we have a valid cycle counter or if the majority of
* interrupts collected were non-timer interrupts, then give
* an entropy credit of 1 bit. Yes, this is being very
* conservative.
*/
if (cycles || (fast_pool->notimer_count >= 32))
credit++;
fast_pool->count = fast_pool->notimer_count = 0;
credit_entropy_bits(r, credit);
}
[PATCH] BLOCK: Make it possible to disable the block layer [try #6] Make it possible to disable the block layer. Not all embedded devices require it, some can make do with just JFFS2, NFS, ramfs, etc - none of which require the block layer to be present. This patch does the following: (*) Introduces CONFIG_BLOCK to disable the block layer, buffering and blockdev support. (*) Adds dependencies on CONFIG_BLOCK to any configuration item that controls an item that uses the block layer. This includes: (*) Block I/O tracing. (*) Disk partition code. (*) All filesystems that are block based, eg: Ext3, ReiserFS, ISOFS. (*) The SCSI layer. As far as I can tell, even SCSI chardevs use the block layer to do scheduling. Some drivers that use SCSI facilities - such as USB storage - end up disabled indirectly from this. (*) Various block-based device drivers, such as IDE and the old CDROM drivers. (*) MTD blockdev handling and FTL. (*) JFFS - which uses set_bdev_super(), something it could avoid doing by taking a leaf out of JFFS2's book. (*) Makes most of the contents of linux/blkdev.h, linux/buffer_head.h and linux/elevator.h contingent on CONFIG_BLOCK being set. sector_div() is, however, still used in places, and so is still available. (*) Also made contingent are the contents of linux/mpage.h, linux/genhd.h and parts of linux/fs.h. (*) Makes a number of files in fs/ contingent on CONFIG_BLOCK. (*) Makes mm/bounce.c (bounce buffering) contingent on CONFIG_BLOCK. (*) set_page_dirty() doesn't call __set_page_dirty_buffers() if CONFIG_BLOCK is not enabled. (*) fs/no-block.c is created to hold out-of-line stubs and things that are required when CONFIG_BLOCK is not set: (*) Default blockdev file operations (to give error ENODEV on opening). (*) Makes some /proc changes: (*) /proc/devices does not list any blockdevs. (*) /proc/diskstats and /proc/partitions are contingent on CONFIG_BLOCK. (*) Makes some compat ioctl handling contingent on CONFIG_BLOCK. (*) If CONFIG_BLOCK is not defined, makes sys_quotactl() return -ENODEV if given command other than Q_SYNC or if a special device is specified. (*) In init/do_mounts.c, no reference is made to the blockdev routines if CONFIG_BLOCK is not defined. This does not prohibit NFS roots or JFFS2. (*) The bdflush, ioprio_set and ioprio_get syscalls can now be absent (return error ENOSYS by way of cond_syscall if so). (*) The seclvl_bd_claim() and seclvl_bd_release() security calls do nothing if CONFIG_BLOCK is not set, since they can't then happen. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
2006-10-01 02:45:40 +08:00
#ifdef CONFIG_BLOCK
void add_disk_randomness(struct gendisk *disk)
{
if (!disk || !disk->random)
return;
/* first major is 1, so we get >= 0x200 here */
add_timer_randomness(disk->random, 0x100 + disk_devt(disk));
trace_add_disk_randomness(disk_devt(disk), ENTROPY_BITS(&input_pool));
}
EXPORT_SYMBOL_GPL(add_disk_randomness);
[PATCH] BLOCK: Make it possible to disable the block layer [try #6] Make it possible to disable the block layer. Not all embedded devices require it, some can make do with just JFFS2, NFS, ramfs, etc - none of which require the block layer to be present. This patch does the following: (*) Introduces CONFIG_BLOCK to disable the block layer, buffering and blockdev support. (*) Adds dependencies on CONFIG_BLOCK to any configuration item that controls an item that uses the block layer. This includes: (*) Block I/O tracing. (*) Disk partition code. (*) All filesystems that are block based, eg: Ext3, ReiserFS, ISOFS. (*) The SCSI layer. As far as I can tell, even SCSI chardevs use the block layer to do scheduling. Some drivers that use SCSI facilities - such as USB storage - end up disabled indirectly from this. (*) Various block-based device drivers, such as IDE and the old CDROM drivers. (*) MTD blockdev handling and FTL. (*) JFFS - which uses set_bdev_super(), something it could avoid doing by taking a leaf out of JFFS2's book. (*) Makes most of the contents of linux/blkdev.h, linux/buffer_head.h and linux/elevator.h contingent on CONFIG_BLOCK being set. sector_div() is, however, still used in places, and so is still available. (*) Also made contingent are the contents of linux/mpage.h, linux/genhd.h and parts of linux/fs.h. (*) Makes a number of files in fs/ contingent on CONFIG_BLOCK. (*) Makes mm/bounce.c (bounce buffering) contingent on CONFIG_BLOCK. (*) set_page_dirty() doesn't call __set_page_dirty_buffers() if CONFIG_BLOCK is not enabled. (*) fs/no-block.c is created to hold out-of-line stubs and things that are required when CONFIG_BLOCK is not set: (*) Default blockdev file operations (to give error ENODEV on opening). (*) Makes some /proc changes: (*) /proc/devices does not list any blockdevs. (*) /proc/diskstats and /proc/partitions are contingent on CONFIG_BLOCK. (*) Makes some compat ioctl handling contingent on CONFIG_BLOCK. (*) If CONFIG_BLOCK is not defined, makes sys_quotactl() return -ENODEV if given command other than Q_SYNC or if a special device is specified. (*) In init/do_mounts.c, no reference is made to the blockdev routines if CONFIG_BLOCK is not defined. This does not prohibit NFS roots or JFFS2. (*) The bdflush, ioprio_set and ioprio_get syscalls can now be absent (return error ENOSYS by way of cond_syscall if so). (*) The seclvl_bd_claim() and seclvl_bd_release() security calls do nothing if CONFIG_BLOCK is not set, since they can't then happen. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
2006-10-01 02:45:40 +08:00
#endif
/*********************************************************************
*
* Entropy extraction routines
*
*********************************************************************/
static ssize_t extract_entropy(struct entropy_store *r, void *buf,
size_t nbytes, int min, int rsvd);
/*
* This utility inline function is responsible for transferring entropy
* from the primary pool to the secondary extraction pool. We make
* sure we pull enough for a 'catastrophic reseed'.
*/
static void _xfer_secondary_pool(struct entropy_store *r, size_t nbytes);
static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes)
{
if (!r->pull ||
r->entropy_count >= (nbytes << (ENTROPY_SHIFT + 3)) ||
r->entropy_count > r->poolinfo->poolfracbits)
return;
if (r->limit == 0 && random_min_urandom_seed) {
unsigned long now = jiffies;
if (time_before(now,
r->last_pulled + random_min_urandom_seed * HZ))
return;
r->last_pulled = now;
}
_xfer_secondary_pool(r, nbytes);
}
static void _xfer_secondary_pool(struct entropy_store *r, size_t nbytes)
{
__u32 tmp[OUTPUT_POOL_WORDS];
/* For /dev/random's pool, always leave two wakeups' worth */
int rsvd_bytes = r->limit ? 0 : random_read_wakeup_bits / 4;
int bytes = nbytes;
/* pull at least as much as a wakeup */
bytes = max_t(int, bytes, random_read_wakeup_bits / 8);
/* but never more than the buffer size */
bytes = min_t(int, bytes, sizeof(tmp));
trace_xfer_secondary_pool(r->name, bytes * 8, nbytes * 8,
ENTROPY_BITS(r), ENTROPY_BITS(r->pull));
bytes = extract_entropy(r->pull, tmp, bytes,
random_read_wakeup_bits / 8, rsvd_bytes);
mix_pool_bytes(r, tmp, bytes);
credit_entropy_bits(r, bytes*8);
}
/*
* Used as a workqueue function so that when the input pool is getting
* full, we can "spill over" some entropy to the output pools. That
* way the output pools can store some of the excess entropy instead
* of letting it go to waste.
*/
static void push_to_pool(struct work_struct *work)
{
struct entropy_store *r = container_of(work, struct entropy_store,
push_work);
BUG_ON(!r);
_xfer_secondary_pool(r, random_read_wakeup_bits/8);
trace_push_to_pool(r->name, r->entropy_count >> ENTROPY_SHIFT,
r->pull->entropy_count >> ENTROPY_SHIFT);
}
/*
* This function decides how many bytes to actually take from the
* given pool, and also debits the entropy count accordingly.
*/
static size_t account(struct entropy_store *r, size_t nbytes, int min,
int reserved)
{
int entropy_count, orig;
size_t ibytes;
BUG_ON(r->entropy_count > r->poolinfo->poolfracbits);
/* Can we pull enough? */
retry:
entropy_count = orig = ACCESS_ONCE(r->entropy_count);
ibytes = nbytes;
/* If limited, never pull more than available */
if (r->limit) {
int have_bytes = entropy_count >> (ENTROPY_SHIFT + 3);
if ((have_bytes -= reserved) < 0)
have_bytes = 0;
ibytes = min_t(size_t, ibytes, have_bytes);
}
if (ibytes < min)
ibytes = 0;
if ((entropy_count -= ibytes << (ENTROPY_SHIFT + 3)) < 0)
entropy_count = 0;
if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig)
goto retry;
trace_debit_entropy(r->name, 8 * ibytes);
if (ibytes &&
(r->entropy_count >> ENTROPY_SHIFT) < random_write_wakeup_bits) {
wake_up_interruptible(&random_write_wait);
kill_fasync(&fasync, SIGIO, POLL_OUT);
}
return ibytes;
}
/*
* This function does the actual extraction for extract_entropy and
* extract_entropy_user.
*
* Note: we assume that .poolwords is a multiple of 16 words.
*/
static void extract_buf(struct entropy_store *r, __u8 *out)
{
int i;
union {
__u32 w[5];
unsigned long l[LONGS(20)];
} hash;
__u32 workspace[SHA_WORKSPACE_WORDS];
unsigned long flags;
/*
* If we have an architectural hardware random number
* generator, use it for SHA's initial vector
*/
sha_init(hash.w);
for (i = 0; i < LONGS(20); i++) {
unsigned long v;
if (!arch_get_random_long(&v))
break;
hash.l[i] = v;
}
/* Generate a hash across the pool, 16 words (512 bits) at a time */
spin_lock_irqsave(&r->lock, flags);
for (i = 0; i < r->poolinfo->poolwords; i += 16)
sha_transform(hash.w, (__u8 *)(r->pool + i), workspace);
/*
* We mix the hash back into the pool to prevent backtracking
* attacks (where the attacker knows the state of the pool
* plus the current outputs, and attempts to find previous
* ouputs), unless the hash function can be inverted. By
* mixing at least a SHA1 worth of hash data back, we make
* brute-forcing the feedback as hard as brute-forcing the
* hash.
*/
__mix_pool_bytes(r, hash.w, sizeof(hash.w));
spin_unlock_irqrestore(&r->lock, flags);
memset(workspace, 0, sizeof(workspace));
/*
* In case the hash function has some recognizable output
* pattern, we fold it in half. Thus, we always feed back
* twice as much data as we output.
*/
hash.w[0] ^= hash.w[3];
hash.w[1] ^= hash.w[4];
hash.w[2] ^= rol32(hash.w[2], 16);
memcpy(out, &hash, EXTRACT_SIZE);
memset(&hash, 0, sizeof(hash));
}
/*
* This function extracts randomness from the "entropy pool", and
* returns it in a buffer.
*
* The min parameter specifies the minimum amount we can pull before
* failing to avoid races that defeat catastrophic reseeding while the
* reserved parameter indicates how much entropy we must leave in the
* pool after each pull to avoid starving other readers.
*/
static ssize_t extract_entropy(struct entropy_store *r, void *buf,
size_t nbytes, int min, int reserved)
{
ssize_t ret = 0, i;
__u8 tmp[EXTRACT_SIZE];
2013-05-25 06:55:31 +08:00
unsigned long flags;
/* if last_data isn't primed, we need EXTRACT_SIZE extra bytes */
2013-05-25 06:55:31 +08:00
if (fips_enabled) {
spin_lock_irqsave(&r->lock, flags);
if (!r->last_data_init) {
r->last_data_init = 1;
2013-05-25 06:55:31 +08:00
spin_unlock_irqrestore(&r->lock, flags);
trace_extract_entropy(r->name, EXTRACT_SIZE,
ENTROPY_BITS(r), _RET_IP_);
2013-05-25 06:55:31 +08:00
xfer_secondary_pool(r, EXTRACT_SIZE);
extract_buf(r, tmp);
spin_lock_irqsave(&r->lock, flags);
memcpy(r->last_data, tmp, EXTRACT_SIZE);
}
spin_unlock_irqrestore(&r->lock, flags);
}
trace_extract_entropy(r->name, nbytes, ENTROPY_BITS(r), _RET_IP_);
xfer_secondary_pool(r, nbytes);
nbytes = account(r, nbytes, min, reserved);
while (nbytes) {
extract_buf(r, tmp);
if (fips_enabled) {
spin_lock_irqsave(&r->lock, flags);
if (!memcmp(tmp, r->last_data, EXTRACT_SIZE))
panic("Hardware RNG duplicated output!\n");
memcpy(r->last_data, tmp, EXTRACT_SIZE);
spin_unlock_irqrestore(&r->lock, flags);
}
i = min_t(int, nbytes, EXTRACT_SIZE);
memcpy(buf, tmp, i);
nbytes -= i;
buf += i;
ret += i;
}
/* Wipe data just returned from memory */
memset(tmp, 0, sizeof(tmp));
return ret;
}
/*
* This function extracts randomness from the "entropy pool", and
* returns it in a userspace buffer.
*/
static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf,
size_t nbytes)
{
ssize_t ret = 0, i;
__u8 tmp[EXTRACT_SIZE];
trace_extract_entropy_user(r->name, nbytes, ENTROPY_BITS(r), _RET_IP_);
xfer_secondary_pool(r, nbytes);
nbytes = account(r, nbytes, 0, 0);
while (nbytes) {
if (need_resched()) {
if (signal_pending(current)) {
if (ret == 0)
ret = -ERESTARTSYS;
break;
}
schedule();
}
extract_buf(r, tmp);
i = min_t(int, nbytes, EXTRACT_SIZE);
if (copy_to_user(buf, tmp, i)) {
ret = -EFAULT;
break;
}
nbytes -= i;
buf += i;
ret += i;
}
/* Wipe data just returned from memory */
memset(tmp, 0, sizeof(tmp));
return ret;
}
/*
* This function is the exported kernel interface. It returns some
random: add new get_random_bytes_arch() function Create a new function, get_random_bytes_arch() which will use the architecture-specific hardware random number generator if it is present. Change get_random_bytes() to not use the HW RNG, even if it is avaiable. The reason for this is that the hw random number generator is fast (if it is present), but it requires that we trust the hardware manufacturer to have not put in a back door. (For example, an increasing counter encrypted by an AES key known to the NSA.) It's unlikely that Intel (for example) was paid off by the US Government to do this, but it's impossible for them to prove otherwise --- especially since Bull Mountain is documented to use AES as a whitener. Hence, the output of an evil, trojan-horse version of RDRAND is statistically indistinguishable from an RDRAND implemented to the specifications claimed by Intel. Short of using a tunnelling electronic microscope to reverse engineer an Ivy Bridge chip and disassembling and analyzing the CPU microcode, there's no way for us to tell for sure. Since users of get_random_bytes() in the Linux kernel need to be able to support hardware systems where the HW RNG is not present, most time-sensitive users of this interface have already created their own cryptographic RNG interface which uses get_random_bytes() as a seed. So it's much better to use the HW RNG to improve the existing random number generator, by mixing in any entropy returned by the HW RNG into /dev/random's entropy pool, but to always _use_ /dev/random's entropy pool. This way we get almost of the benefits of the HW RNG without any potential liabilities. The only benefits we forgo is the speed/performance enhancements --- and generic kernel code can't depend on depend on get_random_bytes() having the speed of a HW RNG anyway. For those places that really want access to the arch-specific HW RNG, if it is available, we provide get_random_bytes_arch(). Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Cc: stable@vger.kernel.org
2012-07-05 22:35:23 +08:00
* number of good random numbers, suitable for key generation, seeding
* TCP sequence numbers, etc. It does not rely on the hardware random
* number generator. For random bytes direct from the hardware RNG
* (when available), use get_random_bytes_arch().
*/
void get_random_bytes(void *buf, int nbytes)
random: add new get_random_bytes_arch() function Create a new function, get_random_bytes_arch() which will use the architecture-specific hardware random number generator if it is present. Change get_random_bytes() to not use the HW RNG, even if it is avaiable. The reason for this is that the hw random number generator is fast (if it is present), but it requires that we trust the hardware manufacturer to have not put in a back door. (For example, an increasing counter encrypted by an AES key known to the NSA.) It's unlikely that Intel (for example) was paid off by the US Government to do this, but it's impossible for them to prove otherwise --- especially since Bull Mountain is documented to use AES as a whitener. Hence, the output of an evil, trojan-horse version of RDRAND is statistically indistinguishable from an RDRAND implemented to the specifications claimed by Intel. Short of using a tunnelling electronic microscope to reverse engineer an Ivy Bridge chip and disassembling and analyzing the CPU microcode, there's no way for us to tell for sure. Since users of get_random_bytes() in the Linux kernel need to be able to support hardware systems where the HW RNG is not present, most time-sensitive users of this interface have already created their own cryptographic RNG interface which uses get_random_bytes() as a seed. So it's much better to use the HW RNG to improve the existing random number generator, by mixing in any entropy returned by the HW RNG into /dev/random's entropy pool, but to always _use_ /dev/random's entropy pool. This way we get almost of the benefits of the HW RNG without any potential liabilities. The only benefits we forgo is the speed/performance enhancements --- and generic kernel code can't depend on depend on get_random_bytes() having the speed of a HW RNG anyway. For those places that really want access to the arch-specific HW RNG, if it is available, we provide get_random_bytes_arch(). Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Cc: stable@vger.kernel.org
2012-07-05 22:35:23 +08:00
{
#if DEBUG_RANDOM_BOOT > 0
if (unlikely(nonblocking_pool.initialized == 0))
printk(KERN_NOTICE "random: %pF get_random_bytes called "
"with %d bits of entropy available\n",
(void *) _RET_IP_,
nonblocking_pool.entropy_total);
#endif
trace_get_random_bytes(nbytes, _RET_IP_);
random: add new get_random_bytes_arch() function Create a new function, get_random_bytes_arch() which will use the architecture-specific hardware random number generator if it is present. Change get_random_bytes() to not use the HW RNG, even if it is avaiable. The reason for this is that the hw random number generator is fast (if it is present), but it requires that we trust the hardware manufacturer to have not put in a back door. (For example, an increasing counter encrypted by an AES key known to the NSA.) It's unlikely that Intel (for example) was paid off by the US Government to do this, but it's impossible for them to prove otherwise --- especially since Bull Mountain is documented to use AES as a whitener. Hence, the output of an evil, trojan-horse version of RDRAND is statistically indistinguishable from an RDRAND implemented to the specifications claimed by Intel. Short of using a tunnelling electronic microscope to reverse engineer an Ivy Bridge chip and disassembling and analyzing the CPU microcode, there's no way for us to tell for sure. Since users of get_random_bytes() in the Linux kernel need to be able to support hardware systems where the HW RNG is not present, most time-sensitive users of this interface have already created their own cryptographic RNG interface which uses get_random_bytes() as a seed. So it's much better to use the HW RNG to improve the existing random number generator, by mixing in any entropy returned by the HW RNG into /dev/random's entropy pool, but to always _use_ /dev/random's entropy pool. This way we get almost of the benefits of the HW RNG without any potential liabilities. The only benefits we forgo is the speed/performance enhancements --- and generic kernel code can't depend on depend on get_random_bytes() having the speed of a HW RNG anyway. For those places that really want access to the arch-specific HW RNG, if it is available, we provide get_random_bytes_arch(). Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Cc: stable@vger.kernel.org
2012-07-05 22:35:23 +08:00
extract_entropy(&nonblocking_pool, buf, nbytes, 0, 0);
}
EXPORT_SYMBOL(get_random_bytes);
/*
* This function will use the architecture-specific hardware random
* number generator if it is available. The arch-specific hw RNG will
* almost certainly be faster than what we can do in software, but it
* is impossible to verify that it is implemented securely (as
* opposed, to, say, the AES encryption of a sequence number using a
* key known by the NSA). So it's useful if we need the speed, but
* only if we're willing to trust the hardware manufacturer not to
* have put in a back door.
*/
void get_random_bytes_arch(void *buf, int nbytes)
{
char *p = buf;
trace_get_random_bytes_arch(nbytes, _RET_IP_);
while (nbytes) {
unsigned long v;
int chunk = min(nbytes, (int)sizeof(unsigned long));
random: add new get_random_bytes_arch() function Create a new function, get_random_bytes_arch() which will use the architecture-specific hardware random number generator if it is present. Change get_random_bytes() to not use the HW RNG, even if it is avaiable. The reason for this is that the hw random number generator is fast (if it is present), but it requires that we trust the hardware manufacturer to have not put in a back door. (For example, an increasing counter encrypted by an AES key known to the NSA.) It's unlikely that Intel (for example) was paid off by the US Government to do this, but it's impossible for them to prove otherwise --- especially since Bull Mountain is documented to use AES as a whitener. Hence, the output of an evil, trojan-horse version of RDRAND is statistically indistinguishable from an RDRAND implemented to the specifications claimed by Intel. Short of using a tunnelling electronic microscope to reverse engineer an Ivy Bridge chip and disassembling and analyzing the CPU microcode, there's no way for us to tell for sure. Since users of get_random_bytes() in the Linux kernel need to be able to support hardware systems where the HW RNG is not present, most time-sensitive users of this interface have already created their own cryptographic RNG interface which uses get_random_bytes() as a seed. So it's much better to use the HW RNG to improve the existing random number generator, by mixing in any entropy returned by the HW RNG into /dev/random's entropy pool, but to always _use_ /dev/random's entropy pool. This way we get almost of the benefits of the HW RNG without any potential liabilities. The only benefits we forgo is the speed/performance enhancements --- and generic kernel code can't depend on depend on get_random_bytes() having the speed of a HW RNG anyway. For those places that really want access to the arch-specific HW RNG, if it is available, we provide get_random_bytes_arch(). Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Cc: stable@vger.kernel.org
2012-07-05 22:35:23 +08:00
if (!arch_get_random_long(&v))
break;
memcpy(p, &v, chunk);
p += chunk;
nbytes -= chunk;
}
random: add new get_random_bytes_arch() function Create a new function, get_random_bytes_arch() which will use the architecture-specific hardware random number generator if it is present. Change get_random_bytes() to not use the HW RNG, even if it is avaiable. The reason for this is that the hw random number generator is fast (if it is present), but it requires that we trust the hardware manufacturer to have not put in a back door. (For example, an increasing counter encrypted by an AES key known to the NSA.) It's unlikely that Intel (for example) was paid off by the US Government to do this, but it's impossible for them to prove otherwise --- especially since Bull Mountain is documented to use AES as a whitener. Hence, the output of an evil, trojan-horse version of RDRAND is statistically indistinguishable from an RDRAND implemented to the specifications claimed by Intel. Short of using a tunnelling electronic microscope to reverse engineer an Ivy Bridge chip and disassembling and analyzing the CPU microcode, there's no way for us to tell for sure. Since users of get_random_bytes() in the Linux kernel need to be able to support hardware systems where the HW RNG is not present, most time-sensitive users of this interface have already created their own cryptographic RNG interface which uses get_random_bytes() as a seed. So it's much better to use the HW RNG to improve the existing random number generator, by mixing in any entropy returned by the HW RNG into /dev/random's entropy pool, but to always _use_ /dev/random's entropy pool. This way we get almost of the benefits of the HW RNG without any potential liabilities. The only benefits we forgo is the speed/performance enhancements --- and generic kernel code can't depend on depend on get_random_bytes() having the speed of a HW RNG anyway. For those places that really want access to the arch-specific HW RNG, if it is available, we provide get_random_bytes_arch(). Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Cc: stable@vger.kernel.org
2012-07-05 22:35:23 +08:00
if (nbytes)
extract_entropy(&nonblocking_pool, p, nbytes, 0, 0);
}
random: add new get_random_bytes_arch() function Create a new function, get_random_bytes_arch() which will use the architecture-specific hardware random number generator if it is present. Change get_random_bytes() to not use the HW RNG, even if it is avaiable. The reason for this is that the hw random number generator is fast (if it is present), but it requires that we trust the hardware manufacturer to have not put in a back door. (For example, an increasing counter encrypted by an AES key known to the NSA.) It's unlikely that Intel (for example) was paid off by the US Government to do this, but it's impossible for them to prove otherwise --- especially since Bull Mountain is documented to use AES as a whitener. Hence, the output of an evil, trojan-horse version of RDRAND is statistically indistinguishable from an RDRAND implemented to the specifications claimed by Intel. Short of using a tunnelling electronic microscope to reverse engineer an Ivy Bridge chip and disassembling and analyzing the CPU microcode, there's no way for us to tell for sure. Since users of get_random_bytes() in the Linux kernel need to be able to support hardware systems where the HW RNG is not present, most time-sensitive users of this interface have already created their own cryptographic RNG interface which uses get_random_bytes() as a seed. So it's much better to use the HW RNG to improve the existing random number generator, by mixing in any entropy returned by the HW RNG into /dev/random's entropy pool, but to always _use_ /dev/random's entropy pool. This way we get almost of the benefits of the HW RNG without any potential liabilities. The only benefits we forgo is the speed/performance enhancements --- and generic kernel code can't depend on depend on get_random_bytes() having the speed of a HW RNG anyway. For those places that really want access to the arch-specific HW RNG, if it is available, we provide get_random_bytes_arch(). Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Cc: stable@vger.kernel.org
2012-07-05 22:35:23 +08:00
EXPORT_SYMBOL(get_random_bytes_arch);
/*
* init_std_data - initialize pool with system data
*
* @r: pool to initialize
*
* This function clears the pool's entropy count and mixes some system
* data into the pool to prepare it for use. The pool is not cleared
* as that can only decrease the entropy in the pool.
*/
static void init_std_data(struct entropy_store *r)
{
int i;
ktime_t now = ktime_get_real();
unsigned long rv;
r->last_pulled = jiffies;
mix_pool_bytes(r, &now, sizeof(now));
for (i = r->poolinfo->poolbytes; i > 0; i -= sizeof(rv)) {
if (!arch_get_random_seed_long(&rv) &&
!arch_get_random_long(&rv))
rv = random_get_entropy();
mix_pool_bytes(r, &rv, sizeof(rv));
}
mix_pool_bytes(r, utsname(), sizeof(*(utsname())));
}
/*
* Note that setup_arch() may call add_device_randomness()
* long before we get here. This allows seeding of the pools
* with some platform dependent data very early in the boot
* process. But it limits our options here. We must use
* statically allocated structures that already have all
* initializations complete at compile time. We should also
* take care not to overwrite the precious per platform data
* we were given.
*/
static int rand_initialize(void)
{
init_std_data(&input_pool);
init_std_data(&blocking_pool);
init_std_data(&nonblocking_pool);
return 0;
}
early_initcall(rand_initialize);
[PATCH] BLOCK: Make it possible to disable the block layer [try #6] Make it possible to disable the block layer. Not all embedded devices require it, some can make do with just JFFS2, NFS, ramfs, etc - none of which require the block layer to be present. This patch does the following: (*) Introduces CONFIG_BLOCK to disable the block layer, buffering and blockdev support. (*) Adds dependencies on CONFIG_BLOCK to any configuration item that controls an item that uses the block layer. This includes: (*) Block I/O tracing. (*) Disk partition code. (*) All filesystems that are block based, eg: Ext3, ReiserFS, ISOFS. (*) The SCSI layer. As far as I can tell, even SCSI chardevs use the block layer to do scheduling. Some drivers that use SCSI facilities - such as USB storage - end up disabled indirectly from this. (*) Various block-based device drivers, such as IDE and the old CDROM drivers. (*) MTD blockdev handling and FTL. (*) JFFS - which uses set_bdev_super(), something it could avoid doing by taking a leaf out of JFFS2's book. (*) Makes most of the contents of linux/blkdev.h, linux/buffer_head.h and linux/elevator.h contingent on CONFIG_BLOCK being set. sector_div() is, however, still used in places, and so is still available. (*) Also made contingent are the contents of linux/mpage.h, linux/genhd.h and parts of linux/fs.h. (*) Makes a number of files in fs/ contingent on CONFIG_BLOCK. (*) Makes mm/bounce.c (bounce buffering) contingent on CONFIG_BLOCK. (*) set_page_dirty() doesn't call __set_page_dirty_buffers() if CONFIG_BLOCK is not enabled. (*) fs/no-block.c is created to hold out-of-line stubs and things that are required when CONFIG_BLOCK is not set: (*) Default blockdev file operations (to give error ENODEV on opening). (*) Makes some /proc changes: (*) /proc/devices does not list any blockdevs. (*) /proc/diskstats and /proc/partitions are contingent on CONFIG_BLOCK. (*) Makes some compat ioctl handling contingent on CONFIG_BLOCK. (*) If CONFIG_BLOCK is not defined, makes sys_quotactl() return -ENODEV if given command other than Q_SYNC or if a special device is specified. (*) In init/do_mounts.c, no reference is made to the blockdev routines if CONFIG_BLOCK is not defined. This does not prohibit NFS roots or JFFS2. (*) The bdflush, ioprio_set and ioprio_get syscalls can now be absent (return error ENOSYS by way of cond_syscall if so). (*) The seclvl_bd_claim() and seclvl_bd_release() security calls do nothing if CONFIG_BLOCK is not set, since they can't then happen. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
2006-10-01 02:45:40 +08:00
#ifdef CONFIG_BLOCK
void rand_initialize_disk(struct gendisk *disk)
{
struct timer_rand_state *state;
/*
* If kzalloc returns null, we just won't use that entropy
* source.
*/
state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
if (state) {
state->last_time = INITIAL_JIFFIES;
disk->random = state;
}
}
[PATCH] BLOCK: Make it possible to disable the block layer [try #6] Make it possible to disable the block layer. Not all embedded devices require it, some can make do with just JFFS2, NFS, ramfs, etc - none of which require the block layer to be present. This patch does the following: (*) Introduces CONFIG_BLOCK to disable the block layer, buffering and blockdev support. (*) Adds dependencies on CONFIG_BLOCK to any configuration item that controls an item that uses the block layer. This includes: (*) Block I/O tracing. (*) Disk partition code. (*) All filesystems that are block based, eg: Ext3, ReiserFS, ISOFS. (*) The SCSI layer. As far as I can tell, even SCSI chardevs use the block layer to do scheduling. Some drivers that use SCSI facilities - such as USB storage - end up disabled indirectly from this. (*) Various block-based device drivers, such as IDE and the old CDROM drivers. (*) MTD blockdev handling and FTL. (*) JFFS - which uses set_bdev_super(), something it could avoid doing by taking a leaf out of JFFS2's book. (*) Makes most of the contents of linux/blkdev.h, linux/buffer_head.h and linux/elevator.h contingent on CONFIG_BLOCK being set. sector_div() is, however, still used in places, and so is still available. (*) Also made contingent are the contents of linux/mpage.h, linux/genhd.h and parts of linux/fs.h. (*) Makes a number of files in fs/ contingent on CONFIG_BLOCK. (*) Makes mm/bounce.c (bounce buffering) contingent on CONFIG_BLOCK. (*) set_page_dirty() doesn't call __set_page_dirty_buffers() if CONFIG_BLOCK is not enabled. (*) fs/no-block.c is created to hold out-of-line stubs and things that are required when CONFIG_BLOCK is not set: (*) Default blockdev file operations (to give error ENODEV on opening). (*) Makes some /proc changes: (*) /proc/devices does not list any blockdevs. (*) /proc/diskstats and /proc/partitions are contingent on CONFIG_BLOCK. (*) Makes some compat ioctl handling contingent on CONFIG_BLOCK. (*) If CONFIG_BLOCK is not defined, makes sys_quotactl() return -ENODEV if given command other than Q_SYNC or if a special device is specified. (*) In init/do_mounts.c, no reference is made to the blockdev routines if CONFIG_BLOCK is not defined. This does not prohibit NFS roots or JFFS2. (*) The bdflush, ioprio_set and ioprio_get syscalls can now be absent (return error ENOSYS by way of cond_syscall if so). (*) The seclvl_bd_claim() and seclvl_bd_release() security calls do nothing if CONFIG_BLOCK is not set, since they can't then happen. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
2006-10-01 02:45:40 +08:00
#endif
/*
* Attempt an emergency refill using arch_get_random_seed_long().
*
* As with add_interrupt_randomness() be paranoid and only
* credit the output as 50% entropic.
*/
static int arch_random_refill(void)
{
const unsigned int nlongs = 64; /* Arbitrary number */
unsigned int n = 0;
unsigned int i;
unsigned long buf[nlongs];
if (!arch_has_random_seed())
return 0;
for (i = 0; i < nlongs; i++) {
if (arch_get_random_seed_long(&buf[n]))
n++;
}
if (n) {
unsigned int rand_bytes = n * sizeof(unsigned long);
mix_pool_bytes(&input_pool, buf, rand_bytes);
credit_entropy_bits(&input_pool, rand_bytes*4);
}
return n;
}
static ssize_t
random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
{
ssize_t n;
if (nbytes == 0)
return 0;
nbytes = min_t(size_t, nbytes, SEC_XFER_SIZE);
while (1) {
n = extract_entropy_user(&blocking_pool, buf, nbytes);
if (n < 0)
return n;
trace_random_read(n*8, (nbytes-n)*8,
ENTROPY_BITS(&blocking_pool),
ENTROPY_BITS(&input_pool));
if (n > 0)
return n;
/* Pool is (near) empty. Maybe wait and retry. */
/* First try an emergency refill */
if (arch_random_refill())
continue;
if (file->f_flags & O_NONBLOCK)
return -EAGAIN;
wait_event_interruptible(random_read_wait,
ENTROPY_BITS(&input_pool) >=
random_read_wakeup_bits);
if (signal_pending(current))
return -ERESTARTSYS;
}
}
static ssize_t
urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
{
int ret;
if (unlikely(nonblocking_pool.initialized == 0))
printk_once(KERN_NOTICE "random: %s urandom read "
"with %d bits of entropy available\n",
current->comm, nonblocking_pool.entropy_total);
ret = extract_entropy_user(&nonblocking_pool, buf, nbytes);
trace_urandom_read(8 * nbytes, ENTROPY_BITS(&nonblocking_pool),
ENTROPY_BITS(&input_pool));
return ret;
}
static unsigned int
random_poll(struct file *file, poll_table * wait)
{
unsigned int mask;
poll_wait(file, &random_read_wait, wait);
poll_wait(file, &random_write_wait, wait);
mask = 0;
if (ENTROPY_BITS(&input_pool) >= random_read_wakeup_bits)
mask |= POLLIN | POLLRDNORM;
if (ENTROPY_BITS(&input_pool) < random_write_wakeup_bits)
mask |= POLLOUT | POLLWRNORM;
return mask;
}
static int
write_pool(struct entropy_store *r, const char __user *buffer, size_t count)
{
size_t bytes;
__u32 buf[16];
const char __user *p = buffer;
while (count > 0) {
bytes = min(count, sizeof(buf));
if (copy_from_user(&buf, p, bytes))
return -EFAULT;
count -= bytes;
p += bytes;
mix_pool_bytes(r, buf, bytes);
cond_resched();
}
return 0;
}
static ssize_t random_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos)
{
size_t ret;
ret = write_pool(&blocking_pool, buffer, count);
if (ret)
return ret;
ret = write_pool(&nonblocking_pool, buffer, count);
if (ret)
return ret;
return (ssize_t)count;
}
static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
{
int size, ent_count;
int __user *p = (int __user *)arg;
int retval;
switch (cmd) {
case RNDGETENTCNT:
/* inherently racy, no point locking */
ent_count = ENTROPY_BITS(&input_pool);
if (put_user(ent_count, p))
return -EFAULT;
return 0;
case RNDADDTOENTCNT:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (get_user(ent_count, p))
return -EFAULT;
credit_entropy_bits_safe(&input_pool, ent_count);
return 0;
case RNDADDENTROPY:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (get_user(ent_count, p++))
return -EFAULT;
if (ent_count < 0)
return -EINVAL;
if (get_user(size, p++))
return -EFAULT;
retval = write_pool(&input_pool, (const char __user *)p,
size);
if (retval < 0)
return retval;
credit_entropy_bits_safe(&input_pool, ent_count);
return 0;
case RNDZAPENTCNT:
case RNDCLEARPOOL:
/*
* Clear the entropy pool counters. We no longer clear
* the entropy pool, as that's silly.
*/
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
input_pool.entropy_count = 0;
nonblocking_pool.entropy_count = 0;
blocking_pool.entropy_count = 0;
return 0;
default:
return -EINVAL;
}
}
2008-04-29 16:03:08 +08:00
static int random_fasync(int fd, struct file *filp, int on)
{
return fasync_helper(fd, filp, on, &fasync);
}
const struct file_operations random_fops = {
.read = random_read,
.write = random_write,
.poll = random_poll,
.unlocked_ioctl = random_ioctl,
2008-04-29 16:03:08 +08:00
.fasync = random_fasync,
llseek: automatically add .llseek fop All file_operations should get a .llseek operation so we can make nonseekable_open the default for future file operations without a .llseek pointer. The three cases that we can automatically detect are no_llseek, seq_lseek and default_llseek. For cases where we can we can automatically prove that the file offset is always ignored, we use noop_llseek, which maintains the current behavior of not returning an error from a seek. New drivers should normally not use noop_llseek but instead use no_llseek and call nonseekable_open at open time. Existing drivers can be converted to do the same when the maintainer knows for certain that no user code relies on calling seek on the device file. The generated code is often incorrectly indented and right now contains comments that clarify for each added line why a specific variant was chosen. In the version that gets submitted upstream, the comments will be gone and I will manually fix the indentation, because there does not seem to be a way to do that using coccinelle. Some amount of new code is currently sitting in linux-next that should get the same modifications, which I will do at the end of the merge window. Many thanks to Julia Lawall for helping me learn to write a semantic patch that does all this. ===== begin semantic patch ===== // This adds an llseek= method to all file operations, // as a preparation for making no_llseek the default. // // The rules are // - use no_llseek explicitly if we do nonseekable_open // - use seq_lseek for sequential files // - use default_llseek if we know we access f_pos // - use noop_llseek if we know we don't access f_pos, // but we still want to allow users to call lseek // @ open1 exists @ identifier nested_open; @@ nested_open(...) { <+... nonseekable_open(...) ...+> } @ open exists@ identifier open_f; identifier i, f; identifier open1.nested_open; @@ int open_f(struct inode *i, struct file *f) { <+... ( nonseekable_open(...) | nested_open(...) ) ...+> } @ read disable optional_qualifier exists @ identifier read_f; identifier f, p, s, off; type ssize_t, size_t, loff_t; expression E; identifier func; @@ ssize_t read_f(struct file *f, char *p, size_t s, loff_t *off) { <+... ( *off = E | *off += E | func(..., off, ...) | E = *off ) ...+> } @ read_no_fpos disable optional_qualifier exists @ identifier read_f; identifier f, p, s, off; type ssize_t, size_t, loff_t; @@ ssize_t read_f(struct file *f, char *p, size_t s, loff_t *off) { ... when != off } @ write @ identifier write_f; identifier f, p, s, off; type ssize_t, size_t, loff_t; expression E; identifier func; @@ ssize_t write_f(struct file *f, const char *p, size_t s, loff_t *off) { <+... ( *off = E | *off += E | func(..., off, ...) | E = *off ) ...+> } @ write_no_fpos @ identifier write_f; identifier f, p, s, off; type ssize_t, size_t, loff_t; @@ ssize_t write_f(struct file *f, const char *p, size_t s, loff_t *off) { ... when != off } @ fops0 @ identifier fops; @@ struct file_operations fops = { ... }; @ has_llseek depends on fops0 @ identifier fops0.fops; identifier llseek_f; @@ struct file_operations fops = { ... .llseek = llseek_f, ... }; @ has_read depends on fops0 @ identifier fops0.fops; identifier read_f; @@ struct file_operations fops = { ... .read = read_f, ... }; @ has_write depends on fops0 @ identifier fops0.fops; identifier write_f; @@ struct file_operations fops = { ... .write = write_f, ... }; @ has_open depends on fops0 @ identifier fops0.fops; identifier open_f; @@ struct file_operations fops = { ... .open = open_f, ... }; // use no_llseek if we call nonseekable_open //////////////////////////////////////////// @ nonseekable1 depends on !has_llseek && has_open @ identifier fops0.fops; identifier nso ~= "nonseekable_open"; @@ struct file_operations fops = { ... .open = nso, ... +.llseek = no_llseek, /* nonseekable */ }; @ nonseekable2 depends on !has_llseek @ identifier fops0.fops; identifier open.open_f; @@ struct file_operations fops = { ... .open = open_f, ... +.llseek = no_llseek, /* open uses nonseekable */ }; // use seq_lseek for sequential files ///////////////////////////////////// @ seq depends on !has_llseek @ identifier fops0.fops; identifier sr ~= "seq_read"; @@ struct file_operations fops = { ... .read = sr, ... +.llseek = seq_lseek, /* we have seq_read */ }; // use default_llseek if there is a readdir /////////////////////////////////////////// @ fops1 depends on !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier readdir_e; @@ // any other fop is used that changes pos struct file_operations fops = { ... .readdir = readdir_e, ... +.llseek = default_llseek, /* readdir is present */ }; // use default_llseek if at least one of read/write touches f_pos ///////////////////////////////////////////////////////////////// @ fops2 depends on !fops1 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier read.read_f; @@ // read fops use offset struct file_operations fops = { ... .read = read_f, ... +.llseek = default_llseek, /* read accesses f_pos */ }; @ fops3 depends on !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier write.write_f; @@ // write fops use offset struct file_operations fops = { ... .write = write_f, ... + .llseek = default_llseek, /* write accesses f_pos */ }; // Use noop_llseek if neither read nor write accesses f_pos /////////////////////////////////////////////////////////// @ fops4 depends on !fops1 && !fops2 && !fops3 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier read_no_fpos.read_f; identifier write_no_fpos.write_f; @@ // write fops use offset struct file_operations fops = { ... .write = write_f, .read = read_f, ... +.llseek = noop_llseek, /* read and write both use no f_pos */ }; @ depends on has_write && !has_read && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier write_no_fpos.write_f; @@ struct file_operations fops = { ... .write = write_f, ... +.llseek = noop_llseek, /* write uses no f_pos */ }; @ depends on has_read && !has_write && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier read_no_fpos.read_f; @@ struct file_operations fops = { ... .read = read_f, ... +.llseek = noop_llseek, /* read uses no f_pos */ }; @ depends on !has_read && !has_write && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; @@ struct file_operations fops = { ... +.llseek = noop_llseek, /* no read or write fn */ }; ===== End semantic patch ===== Signed-off-by: Arnd Bergmann <arnd@arndb.de> Cc: Julia Lawall <julia@diku.dk> Cc: Christoph Hellwig <hch@infradead.org>
2010-08-16 00:52:59 +08:00
.llseek = noop_llseek,
};
const struct file_operations urandom_fops = {
.read = urandom_read,
.write = random_write,
.unlocked_ioctl = random_ioctl,
2008-04-29 16:03:08 +08:00
.fasync = random_fasync,
llseek: automatically add .llseek fop All file_operations should get a .llseek operation so we can make nonseekable_open the default for future file operations without a .llseek pointer. The three cases that we can automatically detect are no_llseek, seq_lseek and default_llseek. For cases where we can we can automatically prove that the file offset is always ignored, we use noop_llseek, which maintains the current behavior of not returning an error from a seek. New drivers should normally not use noop_llseek but instead use no_llseek and call nonseekable_open at open time. Existing drivers can be converted to do the same when the maintainer knows for certain that no user code relies on calling seek on the device file. The generated code is often incorrectly indented and right now contains comments that clarify for each added line why a specific variant was chosen. In the version that gets submitted upstream, the comments will be gone and I will manually fix the indentation, because there does not seem to be a way to do that using coccinelle. Some amount of new code is currently sitting in linux-next that should get the same modifications, which I will do at the end of the merge window. Many thanks to Julia Lawall for helping me learn to write a semantic patch that does all this. ===== begin semantic patch ===== // This adds an llseek= method to all file operations, // as a preparation for making no_llseek the default. // // The rules are // - use no_llseek explicitly if we do nonseekable_open // - use seq_lseek for sequential files // - use default_llseek if we know we access f_pos // - use noop_llseek if we know we don't access f_pos, // but we still want to allow users to call lseek // @ open1 exists @ identifier nested_open; @@ nested_open(...) { <+... nonseekable_open(...) ...+> } @ open exists@ identifier open_f; identifier i, f; identifier open1.nested_open; @@ int open_f(struct inode *i, struct file *f) { <+... ( nonseekable_open(...) | nested_open(...) ) ...+> } @ read disable optional_qualifier exists @ identifier read_f; identifier f, p, s, off; type ssize_t, size_t, loff_t; expression E; identifier func; @@ ssize_t read_f(struct file *f, char *p, size_t s, loff_t *off) { <+... ( *off = E | *off += E | func(..., off, ...) | E = *off ) ...+> } @ read_no_fpos disable optional_qualifier exists @ identifier read_f; identifier f, p, s, off; type ssize_t, size_t, loff_t; @@ ssize_t read_f(struct file *f, char *p, size_t s, loff_t *off) { ... when != off } @ write @ identifier write_f; identifier f, p, s, off; type ssize_t, size_t, loff_t; expression E; identifier func; @@ ssize_t write_f(struct file *f, const char *p, size_t s, loff_t *off) { <+... ( *off = E | *off += E | func(..., off, ...) | E = *off ) ...+> } @ write_no_fpos @ identifier write_f; identifier f, p, s, off; type ssize_t, size_t, loff_t; @@ ssize_t write_f(struct file *f, const char *p, size_t s, loff_t *off) { ... when != off } @ fops0 @ identifier fops; @@ struct file_operations fops = { ... }; @ has_llseek depends on fops0 @ identifier fops0.fops; identifier llseek_f; @@ struct file_operations fops = { ... .llseek = llseek_f, ... }; @ has_read depends on fops0 @ identifier fops0.fops; identifier read_f; @@ struct file_operations fops = { ... .read = read_f, ... }; @ has_write depends on fops0 @ identifier fops0.fops; identifier write_f; @@ struct file_operations fops = { ... .write = write_f, ... }; @ has_open depends on fops0 @ identifier fops0.fops; identifier open_f; @@ struct file_operations fops = { ... .open = open_f, ... }; // use no_llseek if we call nonseekable_open //////////////////////////////////////////// @ nonseekable1 depends on !has_llseek && has_open @ identifier fops0.fops; identifier nso ~= "nonseekable_open"; @@ struct file_operations fops = { ... .open = nso, ... +.llseek = no_llseek, /* nonseekable */ }; @ nonseekable2 depends on !has_llseek @ identifier fops0.fops; identifier open.open_f; @@ struct file_operations fops = { ... .open = open_f, ... +.llseek = no_llseek, /* open uses nonseekable */ }; // use seq_lseek for sequential files ///////////////////////////////////// @ seq depends on !has_llseek @ identifier fops0.fops; identifier sr ~= "seq_read"; @@ struct file_operations fops = { ... .read = sr, ... +.llseek = seq_lseek, /* we have seq_read */ }; // use default_llseek if there is a readdir /////////////////////////////////////////// @ fops1 depends on !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier readdir_e; @@ // any other fop is used that changes pos struct file_operations fops = { ... .readdir = readdir_e, ... +.llseek = default_llseek, /* readdir is present */ }; // use default_llseek if at least one of read/write touches f_pos ///////////////////////////////////////////////////////////////// @ fops2 depends on !fops1 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier read.read_f; @@ // read fops use offset struct file_operations fops = { ... .read = read_f, ... +.llseek = default_llseek, /* read accesses f_pos */ }; @ fops3 depends on !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier write.write_f; @@ // write fops use offset struct file_operations fops = { ... .write = write_f, ... + .llseek = default_llseek, /* write accesses f_pos */ }; // Use noop_llseek if neither read nor write accesses f_pos /////////////////////////////////////////////////////////// @ fops4 depends on !fops1 && !fops2 && !fops3 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier read_no_fpos.read_f; identifier write_no_fpos.write_f; @@ // write fops use offset struct file_operations fops = { ... .write = write_f, .read = read_f, ... +.llseek = noop_llseek, /* read and write both use no f_pos */ }; @ depends on has_write && !has_read && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier write_no_fpos.write_f; @@ struct file_operations fops = { ... .write = write_f, ... +.llseek = noop_llseek, /* write uses no f_pos */ }; @ depends on has_read && !has_write && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier read_no_fpos.read_f; @@ struct file_operations fops = { ... .read = read_f, ... +.llseek = noop_llseek, /* read uses no f_pos */ }; @ depends on !has_read && !has_write && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; @@ struct file_operations fops = { ... +.llseek = noop_llseek, /* no read or write fn */ }; ===== End semantic patch ===== Signed-off-by: Arnd Bergmann <arnd@arndb.de> Cc: Julia Lawall <julia@diku.dk> Cc: Christoph Hellwig <hch@infradead.org>
2010-08-16 00:52:59 +08:00
.llseek = noop_llseek,
};
/***************************************************************
* Random UUID interface
*
* Used here for a Boot ID, but can be useful for other kernel
* drivers.
***************************************************************/
/*
* Generate random UUID
*/
void generate_random_uuid(unsigned char uuid_out[16])
{
get_random_bytes(uuid_out, 16);
/* Set UUID version to 4 --- truly random generation */
uuid_out[6] = (uuid_out[6] & 0x0F) | 0x40;
/* Set the UUID variant to DCE */
uuid_out[8] = (uuid_out[8] & 0x3F) | 0x80;
}
EXPORT_SYMBOL(generate_random_uuid);
/********************************************************************
*
* Sysctl interface
*
********************************************************************/
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
static int min_read_thresh = 8, min_write_thresh;
static int max_read_thresh = OUTPUT_POOL_WORDS * 32;
static int max_write_thresh = INPUT_POOL_WORDS * 32;
static char sysctl_bootid[16];
/*
* This function is used to return both the bootid UUID, and random
* UUID. The difference is in whether table->data is NULL; if it is,
* then a new UUID is generated and returned to the user.
*
* If the user accesses this via the proc interface, the UUID will be
* returned as an ASCII string in the standard UUID format; if via the
* sysctl system call, as 16 bytes of binary data.
*/
static int proc_do_uuid(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table fake_table;
unsigned char buf[64], tmp_uuid[16], *uuid;
uuid = table->data;
if (!uuid) {
uuid = tmp_uuid;
generate_random_uuid(uuid);
} else {
static DEFINE_SPINLOCK(bootid_spinlock);
spin_lock(&bootid_spinlock);
if (!uuid[8])
generate_random_uuid(uuid);
spin_unlock(&bootid_spinlock);
}
sprintf(buf, "%pU", uuid);
fake_table.data = buf;
fake_table.maxlen = sizeof(buf);
return proc_dostring(&fake_table, write, buffer, lenp, ppos);
}
/*
* Return entropy available scaled to integral bits
*/
static int proc_do_entropy(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table fake_table;
int entropy_count;
entropy_count = *(int *)table->data >> ENTROPY_SHIFT;
fake_table.data = &entropy_count;
fake_table.maxlen = sizeof(entropy_count);
return proc_dointvec(&fake_table, write, buffer, lenp, ppos);
}
static int sysctl_poolsize = INPUT_POOL_WORDS * 32;
extern struct ctl_table random_table[];
struct ctl_table random_table[] = {
{
.procname = "poolsize",
.data = &sysctl_poolsize,
.maxlen = sizeof(int),
.mode = 0444,
.proc_handler = proc_dointvec,
},
{
.procname = "entropy_avail",
.maxlen = sizeof(int),
.mode = 0444,
.proc_handler = proc_do_entropy,
.data = &input_pool.entropy_count,
},
{
.procname = "read_wakeup_threshold",
.data = &random_read_wakeup_bits,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &min_read_thresh,
.extra2 = &max_read_thresh,
},
{
.procname = "write_wakeup_threshold",
.data = &random_write_wakeup_bits,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &min_write_thresh,
.extra2 = &max_write_thresh,
},
{
.procname = "urandom_min_reseed_secs",
.data = &random_min_urandom_seed,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "boot_id",
.data = &sysctl_bootid,
.maxlen = 16,
.mode = 0444,
.proc_handler = proc_do_uuid,
},
{
.procname = "uuid",
.maxlen = 16,
.mode = 0444,
.proc_handler = proc_do_uuid,
},
#ifdef ADD_INTERRUPT_BENCH
{
.procname = "add_interrupt_avg_cycles",
.data = &avg_cycles,
.maxlen = sizeof(avg_cycles),
.mode = 0444,
.proc_handler = proc_doulongvec_minmax,
},
{
.procname = "add_interrupt_avg_deviation",
.data = &avg_deviation,
.maxlen = sizeof(avg_deviation),
.mode = 0444,
.proc_handler = proc_doulongvec_minmax,
},
#endif
{ }
};
#endif /* CONFIG_SYSCTL */
static u32 random_int_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
int random_int_secret_init(void)
{
get_random_bytes(random_int_secret, sizeof(random_int_secret));
return 0;
}
/*
* Get a random word for internal kernel use only. Similar to urandom but
* with the goal of minimal entropy pool depletion. As a result, the random
* value is not cryptographically secure but for several uses the cost of
* depleting entropy is too high
*/
static DEFINE_PER_CPU(__u32 [MD5_DIGEST_WORDS], get_random_int_hash);
unsigned int get_random_int(void)
{
__u32 *hash;
unsigned int ret;
if (arch_get_random_int(&ret))
return ret;
hash = get_cpu_var(get_random_int_hash);
hash[0] += current->pid + jiffies + random_get_entropy();
md5_transform(hash, random_int_secret);
ret = hash[0];
put_cpu_var(get_random_int_hash);
return ret;
}
EXPORT_SYMBOL(get_random_int);
/*
* randomize_range() returns a start address such that
*
* [...... <range> .....]
* start end
*
* a <range> with size "len" starting at the return value is inside in the
* area defined by [start, end], but is otherwise randomized.
*/
unsigned long
randomize_range(unsigned long start, unsigned long end, unsigned long len)
{
unsigned long range = end - len - start;
if (end <= start + len)
return 0;
return PAGE_ALIGN(get_random_int() % range + start);
}