/*
 * random.c -- A strong random number generator
 *
 * Version 1.04, last modified 26-Apr-98
 * 
 * Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, and the entire permission notice in its entirety,
 *    including the disclaimer of warranties.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote
 *    products derived from this software without specific prior
 *    written permission.
 * 
 * ALTERNATIVELY, this product may be distributed under the terms of
 * the GNU Public License, in which case the provisions of the GPL are
 * required INSTEAD OF the above restrictions.  (This clause is
 * necessary due to a potential bad interaction between the GPL and
 * the restrictions contained in a BSD-style copyright.)
 * 
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * (now, with legal B.S. out of the way.....) 
 * 
 * This routine gathers environmental noise from device drivers, etc.,
 * and returns good random numbers, suitable for cryptographic use.
 * Besides the obvious cryptographic uses, these numbers are also good
 * for seeding TCP sequence numbers, and other places where it is
 * desirable to have numbers which are not only random, but hard to
 * predict by an attacker.
 *
 * Theory of operation
 * ===================
 * 
 * Computers are very predictable devices.  Hence it is extremely hard
 * to produce truly random numbers on a computer --- as opposed to
 * pseudo-random numbers, which can easily generated by using a
 * algorithm.  Unfortunately, it is very easy for attackers to guess
 * the sequence of pseudo-random number generators, and for some
 * applications this is not acceptable.  So instead, we must try to
 * gather "environmental noise" from the computer's environment, which
 * must be hard for outside attackers to observe, and use that to
 * generate random numbers.  In a Unix environment, this is best done
 * from inside the kernel.
 * 
 * Sources of randomness from the environment include inter-keyboard
 * timings, inter-interrupt timings from some interrupts, and other
 * events which are both (a) non-deterministic and (b) hard for an
 * outside observer to measure.  Randomness from these sources are
 * added to an "entropy pool", which is mixed using a CRC-like function.
 * This is not cryptographically strong, but it is adequate assuming
 * the randomness is not chosen maliciously, and it is fast enough that
 * the overhead of doing it on every interrupt is very reasonable.
 * As random bytes are mixed into the entropy pool, the routines keep
 * an *estimate* of how many bits of randomness have been stored into
 * the random number generator's internal state.
 * 
 * When random bytes are desired, they are obtained by taking the SHA
 * hash of the contents of the "entropy pool".  The SHA hash avoids
 * exposing the internal state of the entropy pool.  It is believed to
 * be computationally infeasible to derive any useful information
 * about the input of SHA from its output.  Even if it is possible to
 * analyze SHA in some clever way, as long as the amount of data
 * returned from the generator is less than the inherent entropy in
 * the pool, the output data is totally unpredictable.  For this
 * reason, the routine decreases its internal estimate of how many
 * bits of "true randomness" are contained in the entropy pool as it
 * outputs random numbers.
 * 
 * If this estimate goes to zero, the routine can still generate
 * random numbers; however, an attacker may (at least in theory) be
 * able to infer the future output of the generator from prior
 * outputs.  This requires successful cryptanalysis of SHA, which is
 * not believed to be feasible, but there is a remote possibility.
 * Nonetheless, these numbers should be useful for the vast majority
 * of purposes.
 * 
 * Exported interfaces ---- output
 * ===============================
 * 
 * There are three exported interfaces; the first is one designed to
 * be used from within the kernel:
 *
 * 	void get_random_bytes(void *buf, int nbytes);
 *
 * This interface will return the requested number of random bytes,
 * and place it in the requested buffer.
 * 
 * The two other interfaces are two character devices /dev/random and
 * /dev/urandom.  /dev/random is suitable for use when very high
 * quality randomness is desired (for example, for key generation or
 * one-time pads), as it will only return a maximum of the number of
 * bits of randomness (as estimated by the random number generator)
 * contained in the entropy pool.
 * 
 * The /dev/urandom device does not have this limit, and will return
 * as many bytes as are requested.  As more and more random bytes are
 * requested without giving time for the entropy pool to recharge,
 * this will result in random numbers that are merely cryptographically
 * strong.  For many applications, however, this is acceptable.
 *
 * Exported interfaces ---- input
 * ==============================
 * 
 * The current exported interfaces for gathering environmental noise
 * from the devices are:
 * 
 * 	void add_keyboard_randomness(unsigned char scancode);
 * 	void add_mouse_randomness(__u32 mouse_data);
 * 	void add_interrupt_randomness(int irq);
 * 	void add_blkdev_randomness(int irq);
 * 
 * add_keyboard_randomness() uses the inter-keypress timing, as well as the
 * scancode as random inputs into the "entropy pool".
 * 
 * add_mouse_randomness() uses the mouse interrupt timing, as well as
 * the reported position of the mouse from the hardware.
 *
 * add_interrupt_randomness() uses the inter-interrupt timing as random
 * inputs to the entropy pool.  Note that not all interrupts are good
 * sources of randomness!  For example, the timer interrupts is not a
 * good choice, because the periodicity of the interrupts is to
 * regular, and hence predictable to an attacker.  Disk interrupts are
 * a better measure, since the timing of the disk interrupts are more
 * unpredictable.
 * 
 * add_blkdev_randomness() times the finishing time of block requests.
 * 
 * All of these routines try to estimate how many bits of randomness a
 * particular randomness source.  They do this by keeping track of the
 * first and second order deltas of the event timings.
 *
 * Ensuring unpredictability at system startup
 * ============================================
 * 
 * When any operating system starts up, it will go through a sequence
 * of actions that are fairly predictable by an adversary, especially
 * if the start-up does not involve interaction with a human operator.
 * This reduces the actual number of bits of unpredictability in the
 * entropy pool below the value in entropy_count.  In order to
 * counteract this effect, it helps to carry information in the
 * entropy pool across shut-downs and start-ups.  To do this, put the
 * following lines an appropriate script which is run during the boot
 * sequence: 
 *
 *	echo "Initializing random number generator..."
 * 	random_seed=/var/run/random-seed
 *	# Carry a random seed from start-up to start-up
 *	# Load and then save 512 bytes, which is the size of the entropy pool
 * 	if [ -f $random_seed ]; then
 *		cat $random_seed >/dev/urandom
 * 	fi
 *	dd if=/dev/urandom of=$random_seed count=1
 * 	chmod 600 $random_seed
 *
 * and the following lines in an appropriate script which is run as
 * the system is shutdown:
 * 
 *	# Carry a random seed from shut-down to start-up
 *	# Save 512 bytes, which is the size of the entropy pool
 *	echo "Saving random seed..."
 * 	random_seed=/var/run/random-seed
 *	dd if=/dev/urandom of=$random_seed count=1
 * 	chmod 600 $random_seed
 * 
 * For example, on most modern systems using the System V init
 * scripts, such code fragments would be found in
 * /etc/rc.d/init.d/random.  On older Linux systems, the correct script
 * location might be in /etc/rcb.d/rc.local or /etc/rc.d/rc.0.
 * 
 * Effectively, these commands cause the contents of the entropy pool
 * to be saved at shut-down time and reloaded into the entropy pool at
 * start-up.  (The 'dd' in the addition to the bootup script is to
 * make sure that /etc/random-seed is different for every start-up,
 * even if the system crashes without executing rc.0.)  Even with
 * complete knowledge of the start-up activities, predicting the state
 * of the entropy pool requires knowledge of the previous history of
 * the system.
 *
 * Configuring the /dev/random driver under Linux
 * ==============================================
 *
 * The /dev/random driver under Linux uses minor numbers 8 and 9 of
 * the /dev/mem major number (#1).  So if your system does not have
 * /dev/random and /dev/urandom created already, they can be created
 * by using the commands:
 *
 * 	mknod /dev/random c 1 8
 * 	mknod /dev/urandom c 1 9
 * 
 * Acknowledgements:
 * =================
 *
 * Ideas for constructing this random number generator were derived
 * from Pretty Good Privacy's random number generator, and from private
 * discussions with Phil Karn.  Colin Plumb provided a faster random
 * number generator, which speed up the mixing function of the entropy
 * pool, taken from PGPfone.  Dale Worley has also contributed many
 * useful ideas and suggestions to improve this driver.
 * 
 * Any flaws in the design are solely my responsibility, and should
 * not be attributed to the Phil, Colin, or any of authors of PGP.
 * 
 * The code for SHA transform was taken from Peter Gutmann's
 * implementation, which has been placed in the public domain.
 * The code for MD5 transform was taken from Colin Plumb's
 * implementation, which has been placed in the public domain.  The
 * MD5 cryptographic checksum was devised by Ronald Rivest, and is
 * documented in RFC 1321, "The MD5 Message Digest Algorithm".
 * 
 * Further background information on this topic may be obtained from
 * RFC 1750, "Randomness Recommendations for Security", by Donald
 * Eastlake, Steve Crocker, and Jeff Schiller.
 */

#include <linux/utsname.h>
#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/major.h>
#include <linux/string.h>
#include <linux/fcntl.h>
#include <linux/malloc.h>
#include <linux/random.h>
#include <linux/poll.h>
#include <linux/init.h>

#include <asm/processor.h>
#include <asm/uaccess.h>
#include <asm/irq.h>
#include <asm/io.h>

/*
 * Configuration information
 */
#undef RANDOM_BENCHMARK
#undef BENCHMARK_NOINT
#define ROTATE_PARANOIA

#define POOLWORDS 128    /* Power of 2 - note that this is 32-bit words */
#define POOLBITS (POOLWORDS*32)
/*
 * The pool is stirred with a primitive polynomial of degree POOLWORDS
 * over GF(2).  The taps for various sizes are defined below.  They are
 * chosen to be evenly spaced (minimum RMS distance from evenly spaced;
 * the numbers in the comments are a scaled squared error sum) except
 * for the last tap, which is 1 to get the twisting happening as fast
 * as possible.
 */
#if POOLWORDS == 2048	/* 115 x^2048+x^1638+x^1231+x^819+x^411+x^1+1 */
#define TAP1	1638
#define TAP2	1231
#define TAP3	819
#define TAP4	411
#define TAP5	1
#elif POOLWORDS == 1024	/* 290 x^1024+x^817+x^615+x^412+x^204+x^1+1 */
/* Alt: 115 x^1024+x^819+x^616+x^410+x^207+x^2+1 */
#define TAP1	817
#define TAP2	615
#define TAP3	412
#define TAP4	204
#define TAP5	1
#elif POOLWORDS == 512	/* 225 x^512+x^411+x^308+x^208+x^104+x+1 */
/* Alt: 95 x^512+x^409+x^307+x^206+x^102+x^2+1
 *      95 x^512+x^409+x^309+x^205+x^103+x^2+1 */
#define TAP1	411
#define TAP2	308
#define TAP3	208
#define TAP4	104
#define TAP5	1
#elif POOLWORDS == 256	/* 125 x^256+x^205+x^155+x^101+x^52+x+1 */
#define TAP1	205
#define TAP2	155
#define TAP3	101
#define TAP4	52
#define TAP5	1
#elif POOLWORDS == 128	/* 105 x^128+x^103+x^76+x^51+x^25+x+1 */
/* Alt: 70 x^128+x^103+x^78+x^51+x^27+x^2+1 */
#define TAP1	103
#define TAP2	76
#define TAP3	51
#define TAP4	25
#define TAP5	1
#elif POOLWORDS == 64	/* 15 x^64+x^52+x^39+x^26+x^14+x+1 */
#define TAP1	52
#define TAP2	39
#define TAP3	26
#define TAP4	14
#define TAP5	1
#elif POOLWORDS == 32	/* 15 x^32+x^26+x^20+x^14+x^7+x^1+1 */
#define TAP1	26
#define TAP2	20
#define TAP3	14
#define TAP4	7
#define TAP5	1
#elif POOLWORDS & (POOLWORDS-1)
#error POOLWORDS must be a power of 2
#else
#error No primitive polynomial available for chosen POOLWORDS
#endif

/*
 * For the purposes of better mixing, we use the CRC-32 polynomial as
 * well to make a twisted Generalized Feedback Shift Reigster
 *
 * (See M. Matsumoto & Y. Kurita, 1992.  Twisted GFSR generators.  ACM
 * Transactions on Modeling and Computer Simulation 2(3):179-194.
 * Also see M. Matsumoto & Y. Kurita, 1994.  Twisted GFSR generators
 * II.  ACM Transactions on Mdeling and Computer Simulation 4:254-266)
 *
 * Thanks to Colin Plumb for suggesting this.
 * We have not analyzed the resultant polynomial to prove it primitive;
 * in fact it almost certainly isn't.  Nonetheless, the irreducible factors
 * of a random large-degree polynomial over GF(2) are more than large enough
 * that periodicity is not a concern.
 *
 * The input hash is much less sensitive than the output hash.  All that
 * we want of it is that it be a good non-cryptographic hash; i.e. it
 * not produce collisions when fed "random" data of the sort we expect
 * to see.  As long as the pool state differs for different inputs, we
 * have preserved the input entropy and done a good job.  The fact that an
 * intelligent attacker can construct inputs that will produce controlled
 * alterations to the pool's state is not important because we don't
 * consider such inputs to contribute any randomness.
 * The only property we need with respect to them is
 * that the attacker can't increase his/her knowledge of the pool's state.
 * Since all additions are reversible (knowing the final state and the
 * input, you can reconstruct the initial state), if an attacker has
 * any uncertainty about the initial state, he/she can only shuffle that
 * uncertainty about, but never cause any collisions (which would
 * decrease the uncertainty).
 *
 * The chosen system lets the state of the pool be (essentially) the input
 * modulo the generator polymnomial.  Now, for random primitive polynomials,
 * this is a universal class of hash functions, meaning that the chance
 * of a collision is limited by the attacker's knowledge of the generator
 * polynomail, so if it is chosen at random, an attacker can never force
 * a collision.  Here, we use a fixed polynomial, but we *can* assume that
 * ###--> it is unknown to the processes generating the input entropy. <-###
 * Because of this important property, this is a good, collision-resistant
 * hash; hash collisions will occur no more often than chance.
 */

/*
 * The minimum number of bits to release a "wait on input".  Should
 * probably always be 8, since a /dev/random read can return a single
 * byte.
 */
#define WAIT_INPUT_BITS 8
/* 
 * The limit number of bits under which to release a "wait on
 * output".  Should probably always be the same as WAIT_INPUT_BITS, so
 * that an output wait releases when and only when a wait on input
 * would block.
 */
#define WAIT_OUTPUT_BITS WAIT_INPUT_BITS

/* There is actually only one of these, globally. */
struct random_bucket {
	unsigned add_ptr;
	unsigned entropy_count;
#ifdef ROTATE_PARANOIA	
	int input_rotate;
#endif
	__u32 pool[POOLWORDS];
};

#ifdef RANDOM_BENCHMARK
/* For benchmarking only */
struct random_benchmark {
	unsigned long long 	start_time;
	int			times;		/* # of samples */
	unsigned long		min;
	unsigned long		max;
	unsigned long		accum;		/* accumulator for average */
	const char		*descr;
	int			unit;
	unsigned long		flags;
};

#define BENCHMARK_INTERVAL 500

static void initialize_benchmark(struct random_benchmark *bench,
				 const char *descr, int unit);
static void begin_benchmark(struct random_benchmark *bench);
static void end_benchmark(struct random_benchmark *bench);

struct random_benchmark timer_benchmark;
#endif

/* There is one of these per entropy source */
struct timer_rand_state {
	__u32		last_time;
	__s32		last_delta,last_delta2;
	int		dont_count_entropy:1;
};

static struct random_bucket random_state;
static struct timer_rand_state keyboard_timer_state;
static struct timer_rand_state mouse_timer_state;
static struct timer_rand_state extract_timer_state;
static struct timer_rand_state *irq_timer_state[NR_IRQS];
static struct timer_rand_state *blkdev_timer_state[MAX_BLKDEV];
static struct wait_queue *random_read_wait;
static struct wait_queue *random_write_wait;

static ssize_t random_read(struct file * file, char * buf,
			   size_t nbytes, loff_t *ppos);
static ssize_t random_read_unlimited(struct file * file, char * buf,
				     size_t nbytes, loff_t *ppos);
static unsigned int random_poll(struct file *file, poll_table * wait);
static ssize_t random_write(struct file * file, const char * buffer,
			    size_t count, loff_t *ppos);
static int random_ioctl(struct inode * inode, struct file * file,
			unsigned int cmd, unsigned long arg);

static inline void fast_add_entropy_words(struct random_bucket *r,
					 __u32 x, __u32 y);

static void add_entropy_words(struct random_bucket *r, __u32 x, __u32 y);

#ifndef MIN
#define MIN(a,b) (((a) < (b)) ? (a) : (b))
#endif

/*
 * Unfortunately, while the GCC optimizer for the i386 understands how
 * to optimize a static rotate left of x bits, it doesn't know how to
 * deal with a variable rotate of x bits.  So we use a bit of asm magic.
 */
#if (!defined (__i386__))
extern inline __u32 rotate_left(int i, __u32 word)
{
	return (word << i) | (word >> (32 - i));
	
}
#else
extern inline __u32 rotate_left(int i, __u32 word)
{
	__asm__("roll %%cl,%0"
		:"=r" (word)
		:"0" (word),"c" (i));
	return word;
}
#endif

/*
 * More asm magic....
 * 
 * For entropy estimation, we need to do an integral base 2
 * logarithm.  
 *
 * Note the "12bits" suffix - this is used for numbers between
 * 0 and 4095 only.  This allows a few shortcuts.
 */
#if 0	/* Slow but clear version */
static inline __u32 int_ln_12bits(__u32 word)
{
	__u32 nbits = 0;
	
	while (word >>= 1)
		nbits++;
	return nbits;
}
#else	/* Faster (more clever) version, courtesy Colin Plumb */
static inline __u32 int_ln_12bits(__u32 word)
{
	/* Smear msbit right to make an n-bit mask */
	word |= word >> 8;
	word |= word >> 4;
	word |= word >> 2;
	word |= word >> 1;
	/* Remove one bit to make this a logarithm */
	word >>= 1;
	/* Count the bits set in the word */
	word -= (word >> 1) & 0x555;
	word = (word & 0x333) + ((word >> 2) & 0x333);
	word += (word >> 4);
	word += (word >> 8);
	return word & 15;
}
#endif

	
/*
 * Initialize the random pool with standard stuff.
 *
 * NOTE: This is an OS-dependent function.
 */
static void init_std_data(struct random_bucket *r)
{
	__u32 words[2], *p;
	int i;
	struct timeval 	tv;

	do_gettimeofday(&tv);
	add_entropy_words(r, tv.tv_sec, tv.tv_usec);

	/*
	 *	This doesnt lock system.utsname. Howeve we are generating
	 *	entropy so a race with a name set here is fine.
	 */
	p = (__u32 *)&system_utsname;
	for (i = sizeof(system_utsname) / sizeof(words); i; i--) {
		memcpy(words, p, sizeof(words));
		add_entropy_words(r, words[0], words[1]);
		p += sizeof(words)/sizeof(*words);
	}
	
}

/* Clear the entropy pool and associated counters. */
static void rand_clear_pool(void)
{
	memset(&random_state, 0, sizeof(random_state));
	init_std_data(&random_state);
}

__initfunc(void rand_initialize(void))
{
	int i;

	rand_clear_pool();
	for (i = 0; i < NR_IRQS; i++)
		irq_timer_state[i] = NULL;
	for (i = 0; i < MAX_BLKDEV; i++)
		blkdev_timer_state[i] = NULL;
	memset(&keyboard_timer_state, 0, sizeof(struct timer_rand_state));
	memset(&mouse_timer_state, 0, sizeof(struct timer_rand_state));
	memset(&extract_timer_state, 0, sizeof(struct timer_rand_state));
#ifdef RANDOM_BENCHMARK
	initialize_benchmark(&timer_benchmark, "timer", 0);
#endif
	extract_timer_state.dont_count_entropy = 1;
	random_read_wait = NULL;
	random_write_wait = NULL;
}

void rand_initialize_irq(int irq)
{
	struct timer_rand_state *state;
	
	if (irq >= NR_IRQS || irq_timer_state[irq])
		return;

	/*
	 * If kmalloc returns null, we just won't use that entropy
	 * source.
	 */
	state = kmalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
	if (state) {
		irq_timer_state[irq] = state;
		memset(state, 0, sizeof(struct timer_rand_state));
	}
}

void rand_initialize_blkdev(int major, int mode)
{
	struct timer_rand_state *state;
	
	if (major >= MAX_BLKDEV || blkdev_timer_state[major])
		return;

	/*
	 * If kmalloc returns null, we just won't use that entropy
	 * source.
	 */
	state = kmalloc(sizeof(struct timer_rand_state), mode);
	if (state) {
		blkdev_timer_state[major] = state;
		memset(state, 0, sizeof(struct timer_rand_state));
	}
}

/*
 * This function adds a byte into the entropy "pool".  It does not
 * update the entropy estimate.  The caller must do this if appropriate.
 *
 * This function is tuned for speed above most other considerations.
 *
 * The pool is stirred with a primitive polynomial of the appropriate degree,
 * and then twisted.  We twist by three bits at a time because it's
 * cheap to do so and helps slightly in the expected case where the
 * entropy is concentrated in the low-order bits.
 */
#define MASK(x) ((x) & (POOLWORDS-1))	/* Convenient abreviation */
static inline void fast_add_entropy_words(struct random_bucket *r,
					 __u32 x, __u32 y)
{
	static __u32 const twist_table[8] = {
		         0, 0x3b6e20c8, 0x76dc4190, 0x4db26158,
		0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 };
	unsigned i, j;

	i = MASK(r->add_ptr - 2);	/* i is always even */
	r->add_ptr = i;

#ifdef ROTATE_PARANOIA
	j = r->input_rotate + 14;
	if (i)
		j -= 7;
	r->input_rotate = j & 31;

	x = rotate_left(r->input_rotate, x);
	y = rotate_left(r->input_rotate, y);
#endif

	/*
	 * XOR in the various taps.  Even though logically, we compute
	 * x and then compute y, we read in y then x order because most
	 * caches work slightly better with increasing read addresses.
	 * If a tap is even then we can use the fact that i is even to
	 * avoid a masking operation.  Every polynomial has at least one
	 * even tap, so j is always used.
	 * (Is there a nicer way to arrange this code?)
	 */
#if TAP1 & 1
	y ^= r->pool[MASK(i+TAP1)];	x ^= r->pool[MASK(i+TAP1+1)];
#else
	j = MASK(i+TAP1);	y ^= r->pool[j];	x ^= r->pool[j+1];
#endif
#if TAP2 & 1
	y ^= r->pool[MASK(i+TAP2)];	x ^= r->pool[MASK(i+TAP2+1)];
#else
	j = MASK(i+TAP2);	y ^= r->pool[j];	x ^= r->pool[j+1];
#endif
#if TAP3 & 1
	y ^= r->pool[MASK(i+TAP3)];	x ^= r->pool[MASK(i+TAP3+1)];
#else
	j = MASK(i+TAP3);	y ^= r->pool[j];	x ^= r->pool[j+1];
#endif
#if TAP4 & 1
	y ^= r->pool[MASK(i+TAP4)];	x ^= r->pool[MASK(i+TAP4+1)];
#else
	j = MASK(i+TAP4);	y ^= r->pool[j];	x ^= r->pool[j+1];
#endif
#if TAP5 == 1
	/* We need to pretend to write pool[i+1] before computing y */
	y ^= r->pool[i];
	x ^= r->pool[i+1];
	x ^= r->pool[MASK(i+TAP5+1)];
	y ^= r->pool[i+1] = x = (x >> 3) ^ twist_table[x & 7];
	r->pool[i] = (y >> 3) ^ twist_table[y & 7];
#else
# if TAP5 & 1
	y ^= r->pool[MASK(i+TAP5)];	x ^= r->pool[MASK(i+TAP5+1)];
# else
	j = MASK(i+TAP5);	y ^= r->pool[j];	x ^= r->pool[j+1];
# endif
	y ^= r->pool[i];
	x ^= r->pool[i+1];
	r->pool[i] = (y >> 3) ^ twist_table[y & 7];
	r->pool[i+1] = (x >> 3) ^ twist_table[x & 7];
#endif
}

/*
 * For places where we don't need the inlined version
 */
static void add_entropy_words(struct random_bucket *r, __u32 x, __u32 y)
{
	fast_add_entropy_words(r, x, y);
}

/*
 * This function adds entropy to the entropy "pool" by using timing
 * delays.  It uses the timer_rand_state structure to make an estimate
 * of how many bits of entropy this call has added to the pool.
 *
 * The number "num" is also added to the pool - it should somehow describe
 * the type of event which just happened.  This is currently 0-255 for
 * keyboard scan codes, and 256 upwards for interrupts.
 * On the i386, this is assumed to be at most 16 bits, and the high bits
 * are used for a high-resolution timer.
 *
 */
static void add_timer_randomness(struct random_bucket *r,
				 struct timer_rand_state *state, unsigned num)
{
	__u32		time;
	__s32		delta, delta2, delta3;

#ifdef RANDOM_BENCHMARK
	begin_benchmark(&timer_benchmark);
#endif
#if defined (__i386__)
	if (boot_cpu_data.x86_capability & X86_FEATURE_TSC) {
		__u32 high;
		__asm__(".byte 0x0f,0x31"
			:"=a" (time), "=d" (high));
		num ^= high;
	} else {
		time = jiffies;
	}
#else
	time = jiffies;
#endif

	fast_add_entropy_words(r, (__u32)num, time);
	
	/*
	 * Calculate number of bits of randomness we probably added.
	 * We take into account the first, second and third-order deltas
	 * in order to make our estimate.
	 */
	if ((r->entropy_count < POOLBITS) && !state->dont_count_entropy) {
		delta = time - state->last_time;
		state->last_time = time;

		delta2 = delta - state->last_delta;
		state->last_delta = delta;

		delta3 = delta2 - state->last_delta2;
		state->last_delta2 = delta2;

		if (delta < 0)
			delta = -delta;
		if (delta2 < 0)
			delta2 = -delta2;
		if (delta3 < 0)
			delta3 = -delta3;
		if (delta > delta2)
			delta = delta2;
		if (delta > delta3)
			delta = delta3;

		/*
		 * delta is now minimum absolute delta.
		 * Round down by 1 bit on general principles,
		 * and limit entropy entimate to 12 bits.
		 */
		delta >>= 1;
		delta &= (1 << 12) - 1;

		r->entropy_count += int_ln_12bits(delta);

		/* Prevent overflow */
		if (r->entropy_count > POOLBITS)
			r->entropy_count = POOLBITS;

		/* Wake up waiting processes, if we have enough entropy. */
		if (r->entropy_count >= WAIT_INPUT_BITS)
			wake_up_interruptible(&random_read_wait);
	}
		
#ifdef RANDOM_BENCHMARK
	end_benchmark(&timer_benchmark);
#endif
}

void add_keyboard_randomness(unsigned char scancode)
{
	add_timer_randomness(&random_state, &keyboard_timer_state, scancode);
}

void add_mouse_randomness(__u32 mouse_data)
{
	add_timer_randomness(&random_state, &mouse_timer_state, mouse_data);
}

void add_interrupt_randomness(int irq)
{
	if (irq >= NR_IRQS || irq_timer_state[irq] == 0)
		return;

	add_timer_randomness(&random_state, irq_timer_state[irq], 0x100+irq);
}

void add_blkdev_randomness(int major)
{
	if (major >= MAX_BLKDEV)
		return;

	if (blkdev_timer_state[major] == 0) {
		rand_initialize_blkdev(major, GFP_ATOMIC);
		if (blkdev_timer_state[major] == 0)
			return;
	}
		
	add_timer_randomness(&random_state, blkdev_timer_state[major],
			     0x200+major);
}

/*
 * This chunk of code defines a function
 * void HASH_TRANSFORM(__u32 digest[HASH_BUFFER_SIZE + HASH_EXTRA_SIZE],
 * 		__u32 const data[16])
 * 
 * The function hashes the input data to produce a digest in the first
 * HASH_BUFFER_SIZE words of the digest[] array, and uses HASH_EXTRA_SIZE
 * more words for internal purposes.  (This buffer is exported so the
 * caller can wipe it once rather than this code doing it each call,
 * and tacking it onto the end of the digest[] array is the quick and
 * dirty way of doing it.)
 *
 * It so happens that MD5 and SHA share most of the initial vector
 * used to initialize the digest[] array before the first call:
 * 1) 0x67452301
 * 2) 0xefcdab89
 * 3) 0x98badcfe
 * 4) 0x10325476
 * 5) 0xc3d2e1f0 (SHA only)
 *
 * For /dev/random purposes, the length of the data being hashed is
 * fixed in length (at POOLWORDS words), so appending a bit count in
 * the usual way is not cryptographically necessary.
 */
#define USE_SHA

#ifdef USE_SHA

#define HASH_BUFFER_SIZE 5
#define HASH_EXTRA_SIZE 80
#define HASH_TRANSFORM SHATransform

/* Various size/speed tradeoffs are available.  Choose 0..3. */
#define SHA_CODE_SIZE 0

/*
 * SHA transform algorithm, taken from code written by Peter Gutmann,
 * and placed in the public domain.
 */

/* The SHA f()-functions.  */

#define f1(x,y,z)   ( z ^ (x & (y^z)) )		/* Rounds  0-19: x ? y : z */
#define f2(x,y,z)   (x ^ y ^ z)			/* Rounds 20-39: XOR */
#define f3(x,y,z)   ( (x & y) + (z & (x ^ y)) )	/* Rounds 40-59: majority */
#define f4(x,y,z)   (x ^ y ^ z)			/* Rounds 60-79: XOR */

/* The SHA Mysterious Constants */

#define K1  0x5A827999L			/* Rounds  0-19: sqrt(2) * 2^30 */
#define K2  0x6ED9EBA1L			/* Rounds 20-39: sqrt(3) * 2^30 */
#define K3  0x8F1BBCDCL			/* Rounds 40-59: sqrt(5) * 2^30 */
#define K4  0xCA62C1D6L			/* Rounds 60-79: sqrt(10) * 2^30 */

#define ROTL(n,X)  ( ( ( X ) << n ) | ( ( X ) >> ( 32 - n ) ) )

#define subRound(a, b, c, d, e, f, k, data) \
    ( e += ROTL( 5, a ) + f( b, c, d ) + k + data, b = ROTL( 30, b ) )


static void SHATransform(__u32 digest[85], __u32 const data[16])
{
    __u32 A, B, C, D, E;     /* Local vars */
    __u32 TEMP;
    int	i;
#define W (digest + HASH_BUFFER_SIZE)	/* Expanded data array */

    /*
     * Do the preliminary expansion of 16 to 80 words.  Doing it
     * out-of-line line this is faster than doing it in-line on
     * register-starved machines like the x86, and not really any
     * slower on real processors.
     */
    memcpy(W, data, 16*sizeof(__u32));
    for (i = 0; i < 64; i++) {
	    TEMP = W[i] ^ W[i+2] ^ W[i+8] ^ W[i+13];
	    W[i+16] = ROTL(1, TEMP);
    }

    /* Set up first buffer and local data buffer */
    A = digest[ 0 ];
    B = digest[ 1 ];
    C = digest[ 2 ];
    D = digest[ 3 ];
    E = digest[ 4 ];

    /* Heavy mangling, in 4 sub-rounds of 20 iterations each. */
#if SHA_CODE_SIZE == 0
    /*
     * Approximately 50% of the speed of the largest version, but
     * takes up 1/16 the space.  Saves about 6k on an i386 kernel.
     */
    for (i = 0; i < 80; i++) {
	if (i < 40) {
	    if (i < 20)
		TEMP = f1(B, C, D) + K1;
	    else
		TEMP = f2(B, C, D) + K2;
	} else {
	    if (i < 60)
		TEMP = f3(B, C, D) + K3;
	    else
		TEMP = f4(B, C, D) + K4;
	}
	TEMP += ROTL(5, A) + E + W[i];
	E = D; D = C; C = ROTL(30, B); B = A; A = TEMP;
    }
#elif SHA_CODE_SIZE == 1
    for (i = 0; i < 20; i++) {
	TEMP = f1(B, C, D) + K1 + ROTL(5, A) + E + W[i];
	E = D; D = C; C = ROTL(30, B); B = A; A = TEMP;
    }
    for (; i < 40; i++) {
	TEMP = f2(B, C, D) + K2 + ROTL(5, A) + E + W[i];
	E = D; D = C; C = ROTL(30, B); B = A; A = TEMP;
    }
    for (; i < 60; i++) {
	TEMP = f3(B, C, D) + K3 + ROTL(5, A) + E + W[i];
	E = D; D = C; C = ROTL(30, B); B = A; A = TEMP;
    }
    for (; i < 80; i++) {
	TEMP = f4(B, C, D) + K4 + ROTL(5, A) + E + W[i];
	E = D; D = C; C = ROTL(30, B); B = A; A = TEMP;
    }
#elif SHA_CODE_SIZE == 2
    for (i = 0; i < 20; i += 5) {
	subRound( A, B, C, D, E, f1, K1, W[ i   ] );
	subRound( E, A, B, C, D, f1, K1, W[ i+1 ] );
	subRound( D, E, A, B, C, f1, K1, W[ i+2 ] );
	subRound( C, D, E, A, B, f1, K1, W[ i+3 ] );
	subRound( B, C, D, E, A, f1, K1, W[ i+4 ] );
    }
    for (; i < 40; i += 5) {
	subRound( A, B, C, D, E, f2, K2, W[ i   ] );
	subRound( E, A, B, C, D, f2, K2, W[ i+1 ] );
	subRound( D, E, A, B, C, f2, K2, W[ i+2 ] );
	subRound( C, D, E, A, B, f2, K2, W[ i+3 ] );
	subRound( B, C, D, E, A, f2, K2, W[ i+4 ] );
    }
    for (; i < 60; i += 5) {
	subRound( A, B, C, D, E, f3, K3, W[ i   ] );
	subRound( E, A, B, C, D, f3, K3, W[ i+1 ] );
	subRound( D, E, A, B, C, f3, K3, W[ i+2 ] );
	subRound( C, D, E, A, B, f3, K3, W[ i+3 ] );
	subRound( B, C, D, E, A, f3, K3, W[ i+4 ] );
    }
    for (; i < 80; i += 5) {
	subRound( A, B, C, D, E, f4, K4, W[ i   ] );
	subRound( E, A, B, C, D, f4, K4, W[ i+1 ] );
	subRound( D, E, A, B, C, f4, K4, W[ i+2 ] );
	subRound( C, D, E, A, B, f4, K4, W[ i+3 ] );
	subRound( B, C, D, E, A, f4, K4, W[ i+4 ] );
    }
#elif SHA_CODE_SIZE == 3 /* Really large version */
    subRound( A, B, C, D, E, f1, K1, W[  0 ] );
    subRound( E, A, B, C, D, f1, K1, W[  1 ] );
    subRound( D, E, A, B, C, f1, K1, W[  2 ] );
    subRound( C, D, E, A, B, f1, K1, W[  3 ] );
    subRound( B, C, D, E, A, f1, K1, W[  4 ] );
    subRound( A, B, C, D, E, f1, K1, W[  5 ] );
    subRound( E, A, B, C, D, f1, K1, W[  6 ] );
    subRound( D, E, A, B, C, f1, K1, W[  7 ] );
    subRound( C, D, E, A, B, f1, K1, W[  8 ] );
    subRound( B, C, D, E, A, f1, K1, W[  9 ] );
    subRound( A, B, C, D, E, f1, K1, W[ 10 ] );
    subRound( E, A, B, C, D, f1, K1, W[ 11 ] );
    subRound( D, E, A, B, C, f1, K1, W[ 12 ] );
    subRound( C, D, E, A, B, f1, K1, W[ 13 ] );
    subRound( B, C, D, E, A, f1, K1, W[ 14 ] );
    subRound( A, B, C, D, E, f1, K1, W[ 15 ] );
    subRound( E, A, B, C, D, f1, K1, W[ 16 ] );
    subRound( D, E, A, B, C, f1, K1, W[ 17 ] );
    subRound( C, D, E, A, B, f1, K1, W[ 18 ] );
    subRound( B, C, D, E, A, f1, K1, W[ 19 ] );

    subRound( A, B, C, D, E, f2, K2, W[ 20 ] );
    subRound( E, A, B, C, D, f2, K2, W[ 21 ] );
    subRound( D, E, A, B, C, f2, K2, W[ 22 ] );
    subRound( C, D, E, A, B, f2, K2, W[ 23 ] );
    subRound( B, C, D, E, A, f2, K2, W[ 24 ] );
    subRound( A, B, C, D, E, f2, K2, W[ 25 ] );
    subRound( E, A, B, C, D, f2, K2, W[ 26 ] );
    subRound( D, E, A, B, C, f2, K2, W[ 27 ] );
    subRound( C, D, E, A, B, f2, K2, W[ 28 ] );
    subRound( B, C, D, E, A, f2, K2, W[ 29 ] );
    subRound( A, B, C, D, E, f2, K2, W[ 30 ] );
    subRound( E, A, B, C, D, f2, K2, W[ 31 ] );
    subRound( D, E, A, B, C, f2, K2, W[ 32 ] );
    subRound( C, D, E, A, B, f2, K2, W[ 33 ] );
    subRound( B, C, D, E, A, f2, K2, W[ 34 ] );
    subRound( A, B, C, D, E, f2, K2, W[ 35 ] );
    subRound( E, A, B, C, D, f2, K2, W[ 36 ] );
    subRound( D, E, A, B, C, f2, K2, W[ 37 ] );
    subRound( C, D, E, A, B, f2, K2, W[ 38 ] );
    subRound( B, C, D, E, A, f2, K2, W[ 39 ] );
    
    subRound( A, B, C, D, E, f3, K3, W[ 40 ] );
    subRound( E, A, B, C, D, f3, K3, W[ 41 ] );
    subRound( D, E, A, B, C, f3, K3, W[ 42 ] );
    subRound( C, D, E, A, B, f3, K3, W[ 43 ] );
    subRound( B, C, D, E, A, f3, K3, W[ 44 ] );
    subRound( A, B, C, D, E, f3, K3, W[ 45 ] );
    subRound( E, A, B, C, D, f3, K3, W[ 46 ] );
    subRound( D, E, A, B, C, f3, K3, W[ 47 ] );
    subRound( C, D, E, A, B, f3, K3, W[ 48 ] );
    subRound( B, C, D, E, A, f3, K3, W[ 49 ] );
    subRound( A, B, C, D, E, f3, K3, W[ 50 ] );
    subRound( E, A, B, C, D, f3, K3, W[ 51 ] );
    subRound( D, E, A, B, C, f3, K3, W[ 52 ] );
    subRound( C, D, E, A, B, f3, K3, W[ 53 ] );
    subRound( B, C, D, E, A, f3, K3, W[ 54 ] );
    subRound( A, B, C, D, E, f3, K3, W[ 55 ] );
    subRound( E, A, B, C, D, f3, K3, W[ 56 ] );
    subRound( D, E, A, B, C, f3, K3, W[ 57 ] );
    subRound( C, D, E, A, B, f3, K3, W[ 58 ] );
    subRound( B, C, D, E, A, f3, K3, W[ 59 ] );

    subRound( A, B, C, D, E, f4, K4, W[ 60 ] );
    subRound( E, A, B, C, D, f4, K4, W[ 61 ] );
    subRound( D, E, A, B, C, f4, K4, W[ 62 ] );
    subRound( C, D, E, A, B, f4, K4, W[ 63 ] );
    subRound( B, C, D, E, A, f4, K4, W[ 64 ] );
    subRound( A, B, C, D, E, f4, K4, W[ 65 ] );
    subRound( E, A, B, C, D, f4, K4, W[ 66 ] );
    subRound( D, E, A, B, C, f4, K4, W[ 67 ] );
    subRound( C, D, E, A, B, f4, K4, W[ 68 ] );
    subRound( B, C, D, E, A, f4, K4, W[ 69 ] );
    subRound( A, B, C, D, E, f4, K4, W[ 70 ] );
    subRound( E, A, B, C, D, f4, K4, W[ 71 ] );
    subRound( D, E, A, B, C, f4, K4, W[ 72 ] );
    subRound( C, D, E, A, B, f4, K4, W[ 73 ] );
    subRound( B, C, D, E, A, f4, K4, W[ 74 ] );
    subRound( A, B, C, D, E, f4, K4, W[ 75 ] );
    subRound( E, A, B, C, D, f4, K4, W[ 76 ] );
    subRound( D, E, A, B, C, f4, K4, W[ 77 ] );
    subRound( C, D, E, A, B, f4, K4, W[ 78 ] );
    subRound( B, C, D, E, A, f4, K4, W[ 79 ] );
#else
#error Illegal SHA_CODE_SIZE
#endif

    /* Build message digest */
    digest[ 0 ] += A;
    digest[ 1 ] += B;
    digest[ 2 ] += C;
    digest[ 3 ] += D;
    digest[ 4 ] += E;

	/* W is wiped by the caller */
#undef W
}

#undef ROTL
#undef f1
#undef f2
#undef f3
#undef f4
#undef K1	
#undef K2
#undef K3	
#undef K4	
#undef subRound
	
#else /* !USE_SHA - Use MD5 */

#define HASH_BUFFER_SIZE 4
#define HASH_EXTRA_SIZE 0
#define HASH_TRANSFORM MD5Transform
	
/*
 * MD5 transform algorithm, taken from code written by Colin Plumb,
 * and put into the public domain
 *
 * QUESTION: Replace this with SHA, which as generally received better
 * reviews from the cryptographic community?
 */

/* The four core functions - F1 is optimized somewhat */

/* #define F1(x, y, z) (x & y | ~x & z) */
#define F1(x, y, z) (z ^ (x & (y ^ z)))
#define F2(x, y, z) F1(z, x, y)
#define F3(x, y, z) (x ^ y ^ z)
#define F4(x, y, z) (y ^ (x | ~z))

/* This is the central step in the MD5 algorithm. */
#define MD5STEP(f, w, x, y, z, data, s) \
	( w += f(x, y, z) + data,  w = w<<s | w>>(32-s),  w += x )

/*
 * The core of the MD5 algorithm, this alters an existing MD5 hash to
 * reflect the addition of 16 longwords of new data.  MD5Update blocks
 * the data and converts bytes into longwords for this routine.
 */
static void MD5Transform(__u32 buf[HASH_BUFFER_SIZE], __u32 const in[16])
{
	__u32 a, b, c, d;

	a = buf[0];
	b = buf[1];
	c = buf[2];
	d = buf[3];

	MD5STEP(F1, a, b, c, d, in[ 0]+0xd76aa478,  7);
	MD5STEP(F1, d, a, b, c, in[ 1]+0xe8c7b756, 12);
	MD5STEP(F1, c, d, a, b, in[ 2]+0x242070db, 17);
	MD5STEP(F1, b, c, d, a, in[ 3]+0xc1bdceee, 22);
	MD5STEP(F1, a, b, c, d, in[ 4]+0xf57c0faf,  7);
	MD5STEP(F1, d, a, b, c, in[ 5]+0x4787c62a, 12);
	MD5STEP(F1, c, d, a, b, in[ 6]+0xa8304613, 17);
	MD5STEP(F1, b, c, d, a, in[ 7]+0xfd469501, 22);
	MD5STEP(F1, a, b, c, d, in[ 8]+0x698098d8,  7);
	MD5STEP(F1, d, a, b, c, in[ 9]+0x8b44f7af, 12);
	MD5STEP(F1, c, d, a, b, in[10]+0xffff5bb1, 17);
	MD5STEP(F1, b, c, d, a, in[11]+0x895cd7be, 22);
	MD5STEP(F1, a, b, c, d, in[12]+0x6b901122,  7);
	MD5STEP(F1, d, a, b, c, in[13]+0xfd987193, 12);
	MD5STEP(F1, c, d, a, b, in[14]+0xa679438e, 17);
	MD5STEP(F1, b, c, d, a, in[15]+0x49b40821, 22);

	MD5STEP(F2, a, b, c, d, in[ 1]+0xf61e2562,  5);
	MD5STEP(F2, d, a, b, c, in[ 6]+0xc040b340,  9);
	MD5STEP(F2, c, d, a, b, in[11]+0x265e5a51, 14);
	MD5STEP(F2, b, c, d, a, in[ 0]+0xe9b6c7aa, 20);
	MD5STEP(F2, a, b, c, d, in[ 5]+0xd62f105d,  5);
	MD5STEP(F2, d, a, b, c, in[10]+0x02441453,  9);
	MD5STEP(F2, c, d, a, b, in[15]+0xd8a1e681, 14);
	MD5STEP(F2, b, c, d, a, in[ 4]+0xe7d3fbc8, 20);
	MD5STEP(F2, a, b, c, d, in[ 9]+0x21e1cde6,  5);
	MD5STEP(F2, d, a, b, c, in[14]+0xc33707d6,  9);
	MD5STEP(F2, c, d, a, b, in[ 3]+0xf4d50d87, 14);
	MD5STEP(F2, b, c, d, a, in[ 8]+0x455a14ed, 20);
	MD5STEP(F2, a, b, c, d, in[13]+0xa9e3e905,  5);
	MD5STEP(F2, d, a, b, c, in[ 2]+0xfcefa3f8,  9);
	MD5STEP(F2, c, d, a, b, in[ 7]+0x676f02d9, 14);
	MD5STEP(F2, b, c, d, a, in[12]+0x8d2a4c8a, 20);

	MD5STEP(F3, a, b, c, d, in[ 5]+0xfffa3942,  4);
	MD5STEP(F3, d, a, b, c, in[ 8]+0x8771f681, 11);
	MD5STEP(F3, c, d, a, b, in[11]+0x6d9d6122, 16);
	MD5STEP(F3, b, c, d, a, in[14]+0xfde5380c, 23);
	MD5STEP(F3, a, b, c, d, in[ 1]+0xa4beea44,  4);
	MD5STEP(F3, d, a, b, c, in[ 4]+0x4bdecfa9, 11);
	MD5STEP(F3, c, d, a, b, in[ 7]+0xf6bb4b60, 16);
	MD5STEP(F3, b, c, d, a, in[10]+0xbebfbc70, 23);
	MD5STEP(F3, a, b, c, d, in[13]+0x289b7ec6,  4);
	MD5STEP(F3, d, a, b, c, in[ 0]+0xeaa127fa, 11);
	MD5STEP(F3, c, d, a, b, in[ 3]+0xd4ef3085, 16);
	MD5STEP(F3, b, c, d, a, in[ 6]+0x04881d05, 23);
	MD5STEP(F3, a, b, c, d, in[ 9]+0xd9d4d039,  4);
	MD5STEP(F3, d, a, b, c, in[12]+0xe6db99e5, 11);
	MD5STEP(F3, c, d, a, b, in[15]+0x1fa27cf8, 16);
	MD5STEP(F3, b, c, d, a, in[ 2]+0xc4ac5665, 23);

	MD5STEP(F4, a, b, c, d, in[ 0]+0xf4292244,  6);
	MD5STEP(F4, d, a, b, c, in[ 7]+0x432aff97, 10);
	MD5STEP(F4, c, d, a, b, in[14]+0xab9423a7, 15);
	MD5STEP(F4, b, c, d, a, in[ 5]+0xfc93a039, 21);
	MD5STEP(F4, a, b, c, d, in[12]+0x655b59c3,  6);
	MD5STEP(F4, d, a, b, c, in[ 3]+0x8f0ccc92, 10);
	MD5STEP(F4, c, d, a, b, in[10]+0xffeff47d, 15);
	MD5STEP(F4, b, c, d, a, in[ 1]+0x85845dd1, 21);
	MD5STEP(F4, a, b, c, d, in[ 8]+0x6fa87e4f,  6);
	MD5STEP(F4, d, a, b, c, in[15]+0xfe2ce6e0, 10);
	MD5STEP(F4, c, d, a, b, in[ 6]+0xa3014314, 15);
	MD5STEP(F4, b, c, d, a, in[13]+0x4e0811a1, 21);
	MD5STEP(F4, a, b, c, d, in[ 4]+0xf7537e82,  6);
	MD5STEP(F4, d, a, b, c, in[11]+0xbd3af235, 10);
	MD5STEP(F4, c, d, a, b, in[ 2]+0x2ad7d2bb, 15);
	MD5STEP(F4, b, c, d, a, in[ 9]+0xeb86d391, 21);

	buf[0] += a;
	buf[1] += b;
	buf[2] += c;
	buf[3] += d;
}

#undef F1
#undef F2
#undef F3
#undef F4
#undef MD5STEP

#endif /* !USE_SHA */


#if POOLWORDS % 16 != 0
#error extract_entropy() assumes that POOLWORDS is a multiple of 16 words.
#endif
/*
 * This function extracts randomness from the "entropy pool", and
 * returns it in a buffer.  This function computes how many remaining
 * bits of entropy are left in the pool, but it does not restrict the
 * number of bytes that are actually obtained.
 */
static ssize_t extract_entropy(struct random_bucket *r, char * buf,
			       size_t nbytes, int to_user)
{
	ssize_t ret, i;
	__u32 tmp[HASH_BUFFER_SIZE + HASH_EXTRA_SIZE];
	__u32 x;

	add_timer_randomness(r, &extract_timer_state, nbytes);
	
	/* Redundant, but just in case... */
	if (r->entropy_count > POOLBITS) 
		r->entropy_count = POOLBITS;

	ret = nbytes;
	if (r->entropy_count / 8 >= nbytes)
		r->entropy_count -= nbytes*8;
	else
		r->entropy_count = 0;

	if (r->entropy_count < WAIT_OUTPUT_BITS)
		wake_up_interruptible(&random_write_wait);
	
	while (nbytes) {
		/* Hash the pool to get the output */
		tmp[0] = 0x67452301;
		tmp[1] = 0xefcdab89;
		tmp[2] = 0x98badcfe;
		tmp[3] = 0x10325476;
#ifdef USE_SHA
		tmp[4] = 0xc3d2e1f0;
#endif
		for (i = 0; i < POOLWORDS; i += 16)
			HASH_TRANSFORM(tmp, r->pool+i);

		/*
		 * The following code does two separate things that happen
		 * to both work two words at a time, so are convenient
		 * to do together.
		 *
		 * First, this feeds the output back into the pool so
		 * that the next call will return different results.
		 * Any perturbation of the pool's state would do, even
		 * changing one bit, but this mixes the pool nicely.
		 *
		 * Second, this folds the output in half to hide the data
		 * fed back into the pool from the user and further mask
		 * any patterns in the hash output.  (The exact folding
		 * pattern is not important; the one used here is quick.)
		 */
		for (i = 0; i <  HASH_BUFFER_SIZE/2; i++) {
			x = tmp[i + (HASH_BUFFER_SIZE+1)/2];
			add_entropy_words(r, tmp[i], x);
			tmp[i] ^= x;
		}
#if HASH_BUFFER_SIZE & 1	/* There's a middle word to deal with */
		x = tmp[HASH_BUFFER_SIZE/2];
		add_entropy_words(r, x, (__u32)buf);
		x ^= (x >> 16);		/* Fold it in half */
		((__u16 *)tmp)[HASH_BUFFER_SIZE-1] = (__u16)x;
#endif
		
		/* Copy data to destination buffer */
		i = MIN(nbytes, HASH_BUFFER_SIZE*sizeof(__u32)/2);
		if (to_user) {
			i -= copy_to_user(buf, (__u8 const *)tmp, i);
			if (!i) {
				ret = -EFAULT;
				break;
			}
		} else
			memcpy(buf, (__u8 const *)tmp, i);
		nbytes -= i;
		buf += i;
		add_timer_randomness(r, &extract_timer_state, nbytes);
		if (to_user && current->need_resched)
			schedule();
	}

	/* Wipe data just returned from memory */
	memset(tmp, 0, sizeof(tmp));
	
	return ret;
}

/*
 * This function is the exported kernel interface.  It returns some
 * number of good random numbers, suitable for seeding TCP sequence
 * numbers, etc.
 */
void get_random_bytes(void *buf, int nbytes)
{
	extract_entropy(&random_state, (char *) buf, nbytes, 0);
}

static ssize_t
random_read(struct file * file, char * buf, size_t nbytes, loff_t *ppos)
{
	struct wait_queue 	wait = { current, NULL };
	ssize_t			n, retval = 0, count = 0;
	
	if (nbytes == 0)
		return 0;

	add_wait_queue(&random_read_wait, &wait);
	while (nbytes > 0) {
		current->state = TASK_INTERRUPTIBLE;
		
		n = nbytes;
		if (n > random_state.entropy_count / 8)
			n = random_state.entropy_count / 8;
		if (n == 0) {
			if (file->f_flags & O_NONBLOCK) {
				retval = -EAGAIN;
				break;
			}
			if (signal_pending(current)) {
				retval = -ERESTARTSYS;
				break;
			}
			schedule();
			continue;
		}
		n = extract_entropy(&random_state, buf, n, 1);
		if (n < 0) {
			retval = n;
			break;
		}
		count += n;
		buf += n;
		nbytes -= n;
		break;		/* This break makes the device work */
				/* like a named pipe */
	}
	current->state = TASK_RUNNING;
	remove_wait_queue(&random_read_wait, &wait);

	/*
	 * If we gave the user some bytes, update the access time.
	 */
	if (count != 0) {
		UPDATE_ATIME(file->f_dentry->d_inode);
	}
	
	return (count ? count : retval);
}

static ssize_t
random_read_unlimited(struct file * file, char * buf,
		      size_t nbytes, loff_t *ppos)
{
	return extract_entropy(&random_state, buf, nbytes, 1);
}

static unsigned int
random_poll(struct file *file, poll_table * wait)
{
	unsigned int mask;

	poll_wait(file, &random_read_wait, wait);
	poll_wait(file, &random_write_wait, wait);
	mask = 0;
	if (random_state.entropy_count >= WAIT_INPUT_BITS)
		mask |= POLLIN | POLLRDNORM;
	if (random_state.entropy_count < WAIT_OUTPUT_BITS)
		mask |= POLLOUT | POLLWRNORM;
	return mask;
}

static ssize_t
random_write(struct file * file, const char * buffer,
	     size_t count, loff_t *ppos)
{
	int		ret = 0;
	size_t		bytes;
	unsigned	i;
	__u32 		buf[16];
	const char 	*p = buffer;
	size_t		c = count;

	while (c > 0) {
		bytes = MIN(c, sizeof(buf));

		bytes -= copy_from_user(&buf, p, bytes);
		if (!bytes) {
			ret = -EFAULT;
			break;
		}
		c -= bytes;
		p += bytes;
		
		i = (unsigned)((bytes-1) / (2 * sizeof(__u32)));
		do {
			add_entropy_words(&random_state, buf[2*i], buf[2*i+1]);
		} while (i--);
	}
	if (p == buffer) {
		return (ssize_t)ret;
	} else {
		file->f_dentry->d_inode->i_mtime = CURRENT_TIME;
		mark_inode_dirty(file->f_dentry->d_inode);
		return (ssize_t)(p - buffer);
	}
}

static int
random_ioctl(struct inode * inode, struct file * file,
	     unsigned int cmd, unsigned long arg)
{
	int *p, size, ent_count;
	int retval;
	
	switch (cmd) {
	case RNDGETENTCNT:
		retval = verify_area(VERIFY_WRITE, (void *) arg, sizeof(int));
		if (retval)
			return(retval);
		ent_count = random_state.entropy_count;
		put_user(ent_count, (int *) arg);
		return 0;
	case RNDADDTOENTCNT:
		if (!capable(CAP_SYS_ADMIN))
			return -EPERM;
		retval = verify_area(VERIFY_READ, (void *) arg, sizeof(int));
		if (retval)
			return(retval);
		get_user(ent_count, (int *) arg);
		/*
		 * Add i to entropy_count, limiting the result to be
		 * between 0 and POOLBITS.
		 */
		if (ent_count < -random_state.entropy_count)
			random_state.entropy_count = 0;
		else if (ent_count > POOLBITS)
			random_state.entropy_count = POOLBITS;
		else {
			random_state.entropy_count += ent_count;
			if (random_state.entropy_count > POOLBITS)
				random_state.entropy_count = POOLBITS;
			if (random_state.entropy_count < 0)
				random_state.entropy_count = 0;
		}
		/*
		 * Wake up waiting processes if we have enough
		 * entropy.
		 */
		if (random_state.entropy_count >= WAIT_INPUT_BITS)
			wake_up_interruptible(&random_read_wait);
		return 0;
	case RNDGETPOOL:
		if (!capable(CAP_SYS_ADMIN))
			return -EPERM;
		p = (int *) arg;
		retval = verify_area(VERIFY_WRITE, (void *) p, sizeof(int));
		if (retval)
			return(retval);
		ent_count = random_state.entropy_count;
		put_user(ent_count, p++);
		retval = verify_area(VERIFY_WRITE, (void *) p, sizeof(int));
		if (retval)
			return(retval);
		get_user(size, p);
		put_user(POOLWORDS, p++);
		if (size < 0)
			return -EINVAL;
		if (size > POOLWORDS)
			size = POOLWORDS;
		if (copy_to_user(p, random_state.pool, size*sizeof(__u32)))
			return -EFAULT;
		return 0;
	case RNDADDENTROPY:
		if (!capable(CAP_SYS_ADMIN))
			return -EPERM;
		p = (int *) arg;
		retval = verify_area(VERIFY_READ, (void *) p, 2*sizeof(int));
		if (retval)
			return(retval);
		get_user(ent_count, p++);
		if (ent_count < 0)
			return -EINVAL;
		get_user(size, p++);
		retval = verify_area(VERIFY_READ, (void *) p, size);
		if (retval)
			return retval;
		retval = random_write(file, (const char *) p,
				      size, &file->f_pos);
		if (retval < 0)
			return retval;
		/*
		 * Add ent_count to entropy_count, limiting the result to be
		 * between 0 and POOLBITS.
		 */
		if (ent_count > POOLBITS)
			random_state.entropy_count = POOLBITS;
		else {
			random_state.entropy_count += ent_count;
			if (random_state.entropy_count > POOLBITS)
				random_state.entropy_count = POOLBITS;
			if (random_state.entropy_count < 0)
				random_state.entropy_count = 0;
		}
		/*
		 * Wake up waiting processes if we have enough
		 * entropy.
		 */
		if (random_state.entropy_count >= WAIT_INPUT_BITS)
			wake_up_interruptible(&random_read_wait);
		return 0;
	case RNDZAPENTCNT:
		if (!capable(CAP_SYS_ADMIN))
			return -EPERM;
		random_state.entropy_count = 0;
		return 0;
	case RNDCLEARPOOL:
		/* Clear the entropy pool and associated counters. */
		if (!capable(CAP_SYS_ADMIN))
			return -EPERM;
		rand_clear_pool();
		return 0;
	default:
		return -EINVAL;
	}
}

struct file_operations random_fops = {
	NULL,		/* random_lseek */
	random_read,
	random_write,
	NULL,		/* random_readdir */
	random_poll,	/* random_poll */
	random_ioctl,
	NULL,		/* random_mmap */
	NULL,		/* no special open code */
	NULL		/* no special release code */
};

struct file_operations urandom_fops = {
	NULL,		/* unrandom_lseek */
	random_read_unlimited,
	random_write,
	NULL,		/* urandom_readdir */
	NULL,		/* urandom_poll */
	random_ioctl,
	NULL,		/* urandom_mmap */
	NULL,		/* no special open code */
	NULL		/* no special release code */
};

/*
 * TCP initial sequence number picking.  This uses the random number
 * generator to pick an initial secret value.  This value is hashed
 * along with the TCP endpoint information to provide a unique
 * starting point for each pair of TCP endpoints.  This defeats
 * attacks which rely on guessing the initial TCP sequence number.
 * This algorithm was suggested by Steve Bellovin.
 *
 * Using a very strong hash was taking an appreciable amount of the total
 * TCP connection establishment time, so this is a weaker hash,
 * compensated for by changing the secret periodically.
 */

/* F, G and H are basic MD4 functions: selection, majority, parity */
#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
#define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z)))
#define H(x, y, z) ((x) ^ (y) ^ (z))

/*
 * The generic round function.  The application is so specific that
 * we don't bother protecting all the arguments with parens, as is generally
 * good macro practice, in favor of extra legibility.
 * Rotation is separate from addition to prevent recomputation
 */
#define ROUND(f, a, b, c, d, x, s)	\
	(a += f(b, c, d) + x, a = (a << s) | (a >> (32-s)))
#define K1 0
#define K2 013240474631UL
#define K3 015666365641UL

/*
 * Basic cut-down MD4 transform.  Returns only 32 bits of result.
 */
static __u32 halfMD4Transform (__u32 const buf[4], __u32 const in[8])
{
	__u32	a = buf[0], b = buf[1], c = buf[2], d = buf[3];

	/* Round 1 */
	ROUND(F, a, b, c, d, in[0] + K1,  3);
	ROUND(F, d, a, b, c, in[1] + K1,  7);
	ROUND(F, c, d, a, b, in[2] + K1, 11);
	ROUND(F, b, c, d, a, in[3] + K1, 19);
	ROUND(F, a, b, c, d, in[4] + K1,  3);
	ROUND(F, d, a, b, c, in[5] + K1,  7);
	ROUND(F, c, d, a, b, in[6] + K1, 11);
	ROUND(F, b, c, d, a, in[7] + K1, 19);

	/* Round 2 */
	ROUND(G, a, b, c, d, in[1] + K2,  3);
	ROUND(G, d, a, b, c, in[3] + K2,  5);
	ROUND(G, c, d, a, b, in[5] + K2,  9);
	ROUND(G, b, c, d, a, in[7] + K2, 13);
	ROUND(G, a, b, c, d, in[0] + K2,  3);
	ROUND(G, d, a, b, c, in[2] + K2,  5);
	ROUND(G, c, d, a, b, in[4] + K2,  9);
	ROUND(G, b, c, d, a, in[6] + K2, 13);

	/* Round 3 */
	ROUND(H, a, b, c, d, in[3] + K3,  3);
	ROUND(H, d, a, b, c, in[7] + K3,  9);
	ROUND(H, c, d, a, b, in[2] + K3, 11);
	ROUND(H, b, c, d, a, in[6] + K3, 15);
	ROUND(H, a, b, c, d, in[1] + K3,  3);
	ROUND(H, d, a, b, c, in[5] + K3,  9);
	ROUND(H, c, d, a, b, in[0] + K3, 11);
	ROUND(H, b, c, d, a, in[4] + K3, 15);

	return buf[1] + b;	/* "most hashed" word */
	/* Alternative: return sum of all words? */
}

#if 0	/* May be needed for IPv6 */

static __u32 twothirdsMD4Transform (__u32 const buf[4], __u32 const in[12])
{
	__u32	a = buf[0], b = buf[1], c = buf[2], d = buf[3];

	/* Round 1 */
	ROUND(F, a, b, c, d, in[ 0] + K1,  3);
	ROUND(F, d, a, b, c, in[ 1] + K1,  7);
	ROUND(F, c, d, a, b, in[ 2] + K1, 11);
	ROUND(F, b, c, d, a, in[ 3] + K1, 19);
	ROUND(F, a, b, c, d, in[ 4] + K1,  3);
	ROUND(F, d, a, b, c, in[ 5] + K1,  7);
	ROUND(F, c, d, a, b, in[ 6] + K1, 11);
	ROUND(F, b, c, d, a, in[ 7] + K1, 19);
	ROUND(F, a, b, c, d, in[ 8] + K1,  3);
	ROUND(F, d, a, b, c, in[ 9] + K1,  7);
	ROUND(F, c, d, a, b, in[10] + K1, 11);
	ROUND(F, b, c, d, a, in[11] + K1, 19);

	/* Round 2 */
	ROUND(G, a, b, c, d, in[ 1] + K2,  3);
	ROUND(G, d, a, b, c, in[ 3] + K2,  5);
	ROUND(G, c, d, a, b, in[ 5] + K2,  9);
	ROUND(G, b, c, d, a, in[ 7] + K2, 13);
	ROUND(G, a, b, c, d, in[ 9] + K2,  3);
	ROUND(G, d, a, b, c, in[11] + K2,  5);
	ROUND(G, c, d, a, b, in[ 0] + K2,  9);
	ROUND(G, b, c, d, a, in[ 2] + K2, 13);
	ROUND(G, a, b, c, d, in[ 4] + K2,  3);
	ROUND(G, d, a, b, c, in[ 6] + K2,  5);
	ROUND(G, c, d, a, b, in[ 8] + K2,  9);
	ROUND(G, b, c, d, a, in[10] + K2, 13);

	/* Round 3 */
	ROUND(H, a, b, c, d, in[ 3] + K3,  3);
	ROUND(H, d, a, b, c, in[ 7] + K3,  9);
	ROUND(H, c, d, a, b, in[11] + K3, 11);
	ROUND(H, b, c, d, a, in[ 2] + K3, 15);
	ROUND(H, a, b, c, d, in[ 6] + K3,  3);
	ROUND(H, d, a, b, c, in[10] + K3,  9);
	ROUND(H, c, d, a, b, in[ 1] + K3, 11);
	ROUND(H, b, c, d, a, in[ 5] + K3, 15);
	ROUND(H, a, b, c, d, in[ 9] + K3,  3);
	ROUND(H, d, a, b, c, in[ 0] + K3,  9);
	ROUND(H, c, d, a, b, in[ 4] + K3, 11);
	ROUND(H, b, c, d, a, in[ 8] + K3, 15);

	return buf[1] + b;	/* "most hashed" word */
	/* Alternative: return sum of all words? */
}
#endif

#undef ROUND
#undef F
#undef G
#undef H
#undef K1
#undef K2
#undef K3

/* This should not be decreased so low that ISNs wrap too fast. */
#define REKEY_INTERVAL	300
#define HASH_BITS 24

__u32 secure_tcp_sequence_number(__u32 saddr, __u32 daddr,
				 __u16 sport, __u16 dport)
{
	static __u32	rekey_time = 0;
	static __u32	count = 0;
	static __u32	secret[12];
	struct timeval 	tv;
	__u32		seq;

	/*
	 * Pick a random secret every REKEY_INTERVAL seconds.
	 */
	do_gettimeofday(&tv);	/* We need the usecs below... */

	if (!rekey_time || (tv.tv_sec - rekey_time) > REKEY_INTERVAL) {
		rekey_time = tv.tv_sec;
		/* First three words are overwritten below. */
		get_random_bytes(&secret+3, sizeof(secret)-12);
		count = (tv.tv_sec/REKEY_INTERVAL) << HASH_BITS;
	}

	/*
	 *  Pick a unique starting offset for each TCP connection endpoints
	 *  (saddr, daddr, sport, dport).
	 *  Note that the words are placed into the first words to be
	 *  mixed in with the halfMD4.  This is because the starting
	 *  vector is also a random secret (at secret+8), and further
	 *  hashing fixed data into it isn't going to improve anything,
	 *  so we should get started with the variable data.
	 */
	secret[0]=saddr;
	secret[1]=daddr;
	secret[2]=(sport << 16) + dport;

	seq = (halfMD4Transform(secret+8, secret) &
	       ((1<<HASH_BITS)-1)) + count;

	/*
	 *	As close as possible to RFC 793, which
	 *	suggests using a 250 kHz clock.
	 *	Further reading shows this assumes 2 Mb/s networks.
	 *	For 10 Mb/s Ethernet, a 1 MHz clock is appropriate.
	 *	That's funny, Linux has one built in!  Use it!
	 *	(Networks are faster now - should this be increased?)
	 */
	seq += tv.tv_usec + tv.tv_sec*1000000;
#if 0
	printk("init_seq(%lx, %lx, %d, %d) = %d\n",
	       saddr, daddr, sport, dport, seq);
#endif
	return seq;
}

#ifdef CONFIG_SYN_COOKIES
/*
 * Secure SYN cookie computation. This is the algorithm worked out by
 * Dan Bernstein and Eric Schenk.
 *
 * For linux I implement the 1 minute counter by looking at the jiffies clock.
 * The count is passed in as a parameter, so this code doesn't much care.
 */

#define COOKIEBITS 24	/* Upper bits store count */
#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)

static int	syncookie_init = 0;
static __u32	syncookie_secret[2][16-3+HASH_BUFFER_SIZE];

__u32 secure_tcp_syn_cookie(__u32 saddr, __u32 daddr, __u16 sport,
		__u16 dport, __u32 sseq, __u32 count, __u32 data)
{
	__u32 	tmp[16 + HASH_BUFFER_SIZE + HASH_EXTRA_SIZE];
	__u32	seq;

	/*
	 * Pick two random secrets the first time we need a cookie.
	 */
	if (syncookie_init == 0) {
		get_random_bytes(syncookie_secret, sizeof(syncookie_secret));
		syncookie_init = 1;
	}

	/*
	 * Compute the secure sequence number.
	 * The output should be:
   	 *   HASH(sec1,saddr,sport,daddr,dport,sec1) + sseq + (count * 2^24)
	 *      + (HASH(sec2,saddr,sport,daddr,dport,count,sec2) % 2^24).
	 * Where sseq is their sequence number and count increases every
	 * minute by 1.
	 * As an extra hack, we add a small "data" value that encodes the
	 * MSS into the second hash value.
	 */

	memcpy(tmp+3, syncookie_secret[0], sizeof(syncookie_secret[0]));
	tmp[0]=saddr;
	tmp[1]=daddr;
	tmp[2]=(sport << 16) + dport;
	HASH_TRANSFORM(tmp+16, tmp);
	seq = tmp[17] + sseq + (count << COOKIEBITS);

	memcpy(tmp+3, syncookie_secret[1], sizeof(syncookie_secret[1]));
	tmp[0]=saddr;
	tmp[1]=daddr;
	tmp[2]=(sport << 16) + dport;
	tmp[3] = count;	/* minute counter */
	HASH_TRANSFORM(tmp+16, tmp);

	/* Add in the second hash and the data */
	return seq + ((tmp[17] + data) & COOKIEMASK);
}

/*
 * This retrieves the small "data" value from the syncookie.
 * If the syncookie is bad, the data returned will be out of
 * range.  This must be checked by the caller.
 *
 * The count value used to generate the cookie must be within
 * "maxdiff" if the current (passed-in) "count".  The return value
 * is (__u32)-1 if this test fails.
 */
__u32 check_tcp_syn_cookie(__u32 cookie, __u32 saddr, __u32 daddr, __u16 sport,
		__u16 dport, __u32 sseq, __u32 count, __u32 maxdiff)
{
	__u32 	tmp[16 + HASH_BUFFER_SIZE + HASH_EXTRA_SIZE];
	__u32	diff;

	if (syncookie_init == 0)
		return (__u32)-1;	/* Well, duh! */

	/* Strip away the layers from the cookie */
	memcpy(tmp+3, syncookie_secret[0], sizeof(syncookie_secret[0]));
	tmp[0]=saddr;
	tmp[1]=daddr;
	tmp[2]=(sport << 16) + dport;
	HASH_TRANSFORM(tmp+16, tmp);
	cookie -= tmp[17] + sseq;
	/* Cookie is now reduced to (count * 2^24) ^ (hash % 2^24) */

	diff = (count - (cookie >> COOKIEBITS)) & ((__u32)-1 >> COOKIEBITS);
	if (diff >= maxdiff)
		return (__u32)-1;

	memcpy(tmp+3, syncookie_secret[1], sizeof(syncookie_secret[1]));
	tmp[0] = saddr;
	tmp[1] = daddr;
	tmp[2] = (sport << 16) + dport;
	tmp[3] = count - diff;	/* minute counter */
	HASH_TRANSFORM(tmp+16, tmp);

	return (cookie - tmp[17]) & COOKIEMASK;	/* Leaving the data behind */
}
#endif


#ifdef RANDOM_BENCHMARK
/*
 * This is so we can do some benchmarking of the random driver, to see
 * how much overhead add_timer_randomness really takes.  This only
 * works on a Pentium, since it depends on the timer clock...
 *
 * Note: the results of this benchmark as of this writing (5/27/96)
 *
 * On a Pentium, add_timer_randomness() takes between 150 and 1000
 * clock cycles, with an average of around 600 clock cycles.  On a 75
 * MHz Pentium, this translates to 2 to 13 microseconds, with an
 * average time of 8 microseconds.  This should be fast enough so we
 * can use add_timer_randomness() even with the fastest of interrupts...
 */
static inline unsigned long long get_clock_cnt(void)
{
	unsigned long low, high;
	__asm__(".byte 0x0f,0x31" :"=a" (low), "=d" (high));
	return (((unsigned long long) high << 32) | low); 
}

__initfunc(static void
initialize_benchmark(struct random_benchmark *bench,
	             const char *descr, int unit))
{
	bench->times = 0;
	bench->accum = 0;
	bench->max = 0;
	bench->min = 1 << 31;
	bench->descr = descr;
	bench->unit = unit;
}

static void begin_benchmark(struct random_benchmark *bench)
{
#ifdef BENCHMARK_NOINT
	save_flags(bench->flags); cli();
#endif
	bench->start_time = get_clock_cnt();
}

static void end_benchmark(struct random_benchmark *bench)
{
	unsigned long ticks;
	
	ticks = (unsigned long) (get_clock_cnt() - bench->start_time);
#ifdef BENCHMARK_NOINT
	restore_flags(bench->flags);
#endif
	if (ticks < bench->min)
		bench->min = ticks;
	if (ticks > bench->max)
		bench->max = ticks;
	bench->accum += ticks;
	bench->times++;
	if (bench->times == BENCHMARK_INTERVAL) {
		printk("Random benchmark: %s %d: %lu min, %lu avg, "
		       "%lu max\n", bench->descr, bench->unit, bench->min,
		       bench->accum / BENCHMARK_INTERVAL, bench->max);
		bench->times = 0;
		bench->accum = 0;
		bench->max = 0;
		bench->min = 1 << 31;
	}
}	
#endif /* RANDOM_BENCHMARK */