Commit d4150779 authored by Jason A. Donenfeld's avatar Jason A. Donenfeld

random32: use real rng for non-deterministic randomness

random32.c has two random number generators in it: one that is meant to
be used deterministically, with some predefined seed, and one that does
the same exact thing as random.c, except does it poorly. The first one
has some use cases. The second one no longer does and can be replaced
with calls to random.c's proper random number generator.

The relatively recent siphash-based bad random32.c code was added in
response to concerns that the prior random32.c was too deterministic.
Out of fears that random.c was (at the time) too slow, this code was
anonymously contributed. Then out of that emerged a kind of shadow
entropy gathering system, with its own tentacles throughout various net
code, added willy nilly.

Stop👏making👏bespoke👏random👏number👏generators👏.

Fortunately, recent advances in random.c mean that we can stop playing
with this sketchiness, and just use get_random_u32(), which is now fast
enough. In micro benchmarks using RDPMC, I'm seeing the same median
cycle count between the two functions, with the mean being _slightly_
higher due to batches refilling (which we can optimize further need be).
However, when doing *real* benchmarks of the net functions that actually
use these random numbers, the mean cycles actually *decreased* slightly
(with the median still staying the same), likely because the additional
prandom code means icache misses and complexity, whereas random.c is
generally already being used by something else nearby.

The biggest benefit of this is that there are many users of prandom who
probably should be using cryptographically secure random numbers. This
makes all of those accidental cases become secure by just flipping a
switch. Later on, we can do a tree-wide cleanup to remove the static
inline wrapper functions that this commit adds.

There are also some low-ish hanging fruits for making this even faster
in the future: a get_random_u16() function for use in the networking
stack will give a 2x performance boost there, using SIMD for ChaCha20
will let us compute 4 or 8 or 16 blocks of output in parallel, instead
of just one, giving us large buffers for cheap, and introducing a
get_random_*_bh() function that assumes irqs are already disabled will
shave off a few cycles for ordinary calls. These are things we can chip
away at down the road.
Acked-by: default avatarJakub Kicinski <kuba@kernel.org>
Acked-by: default avatarTheodore Ts'o <tytso@mit.edu>
Signed-off-by: default avatarJason A. Donenfeld <Jason@zx2c4.com>
parent e73aaae2
...@@ -10,53 +10,16 @@ ...@@ -10,53 +10,16 @@
#include <linux/types.h> #include <linux/types.h>
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/siphash.h> #include <linux/random.h>
u32 prandom_u32(void); static inline u32 prandom_u32(void)
void prandom_bytes(void *buf, size_t nbytes); {
void prandom_seed(u32 seed); return get_random_u32();
void prandom_reseed_late(void); }
DECLARE_PER_CPU(unsigned long, net_rand_noise);
#define PRANDOM_ADD_NOISE(a, b, c, d) \
prandom_u32_add_noise((unsigned long)(a), (unsigned long)(b), \
(unsigned long)(c), (unsigned long)(d))
#if BITS_PER_LONG == 64
/*
* The core SipHash round function. Each line can be executed in
* parallel given enough CPU resources.
*/
#define PRND_SIPROUND(v0, v1, v2, v3) SIPHASH_PERMUTATION(v0, v1, v2, v3)
#define PRND_K0 (SIPHASH_CONST_0 ^ SIPHASH_CONST_2)
#define PRND_K1 (SIPHASH_CONST_1 ^ SIPHASH_CONST_3)
#elif BITS_PER_LONG == 32
/*
* On 32-bit machines, we use HSipHash, a reduced-width version of SipHash.
* This is weaker, but 32-bit machines are not used for high-traffic
* applications, so there is less output for an attacker to analyze.
*/
#define PRND_SIPROUND(v0, v1, v2, v3) HSIPHASH_PERMUTATION(v0, v1, v2, v3)
#define PRND_K0 (HSIPHASH_CONST_0 ^ HSIPHASH_CONST_2)
#define PRND_K1 (HSIPHASH_CONST_1 ^ HSIPHASH_CONST_3)
#else
#error Unsupported BITS_PER_LONG
#endif
static inline void prandom_u32_add_noise(unsigned long a, unsigned long b, static inline void prandom_bytes(void *buf, size_t nbytes)
unsigned long c, unsigned long d)
{ {
/* return get_random_bytes(buf, nbytes);
* This is not used cryptographically; it's just
* a convenient 4-word hash function. (3 xor, 2 add, 2 rol)
*/
a ^= raw_cpu_read(net_rand_noise);
PRND_SIPROUND(a, b, c, d);
raw_cpu_write(net_rand_noise, d);
} }
struct rnd_state { struct rnd_state {
...@@ -108,7 +71,6 @@ static inline void prandom_seed_state(struct rnd_state *state, u64 seed) ...@@ -108,7 +71,6 @@ static inline void prandom_seed_state(struct rnd_state *state, u64 seed)
state->s2 = __seed(i, 8U); state->s2 = __seed(i, 8U);
state->s3 = __seed(i, 16U); state->s3 = __seed(i, 16U);
state->s4 = __seed(i, 128U); state->s4 = __seed(i, 128U);
PRANDOM_ADD_NOISE(state, i, 0, 0);
} }
/* Pseudo random number generator from numerical recipes. */ /* Pseudo random number generator from numerical recipes. */
......
...@@ -1780,8 +1780,6 @@ void update_process_times(int user_tick) ...@@ -1780,8 +1780,6 @@ void update_process_times(int user_tick)
{ {
struct task_struct *p = current; struct task_struct *p = current;
PRANDOM_ADD_NOISE(jiffies, user_tick, p, 0);
/* Note: this timer irq context must be accounted for as well. */ /* Note: this timer irq context must be accounted for as well. */
account_process_tick(p, user_tick); account_process_tick(p, user_tick);
run_local_timers(); run_local_timers();
......
This diff is collapsed.
...@@ -3527,7 +3527,6 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev, ...@@ -3527,7 +3527,6 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev,
dev_queue_xmit_nit(skb, dev); dev_queue_xmit_nit(skb, dev);
len = skb->len; len = skb->len;
PRANDOM_ADD_NOISE(skb, dev, txq, len + jiffies);
trace_net_dev_start_xmit(skb, dev); trace_net_dev_start_xmit(skb, dev);
rc = netdev_start_xmit(skb, dev, txq, more); rc = netdev_start_xmit(skb, dev, txq, more);
trace_net_dev_xmit(skb, rc, dev, len); trace_net_dev_xmit(skb, rc, dev, len);
...@@ -4168,7 +4167,6 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) ...@@ -4168,7 +4167,6 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
if (!skb) if (!skb)
goto out; goto out;
PRANDOM_ADD_NOISE(skb, dev, txq, jiffies);
HARD_TX_LOCK(dev, txq, cpu); HARD_TX_LOCK(dev, txq, cpu);
if (!netif_xmit_stopped(txq)) { if (!netif_xmit_stopped(txq)) {
...@@ -4234,7 +4232,6 @@ int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id) ...@@ -4234,7 +4232,6 @@ int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
skb_set_queue_mapping(skb, queue_id); skb_set_queue_mapping(skb, queue_id);
txq = skb_get_tx_queue(dev, skb); txq = skb_get_tx_queue(dev, skb);
PRANDOM_ADD_NOISE(skb, dev, txq, jiffies);
local_bh_disable(); local_bh_disable();
......
...@@ -536,10 +536,8 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, ...@@ -536,10 +536,8 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
return ret; return ret;
} }
if (!(ifa->ifa_flags & IFA_F_SECONDARY)) { if (!(ifa->ifa_flags & IFA_F_SECONDARY))
prandom_seed((__force u32) ifa->ifa_local);
ifap = last_primary; ifap = last_primary;
}
rcu_assign_pointer(ifa->ifa_next, *ifap); rcu_assign_pointer(ifa->ifa_next, *ifap);
rcu_assign_pointer(*ifap, ifa); rcu_assign_pointer(*ifap, ifa);
......
...@@ -3972,8 +3972,6 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) ...@@ -3972,8 +3972,6 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
addrconf_join_solict(dev, &ifp->addr); addrconf_join_solict(dev, &ifp->addr);
prandom_seed((__force u32) ifp->addr.s6_addr32[3]);
read_lock_bh(&idev->lock); read_lock_bh(&idev->lock);
spin_lock(&ifp->lock); spin_lock(&ifp->lock);
if (ifp->state == INET6_IFADDR_STATE_DEAD) if (ifp->state == INET6_IFADDR_STATE_DEAD)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment