Commit 32b3ad14 authored by David S. Miller's avatar David S. Miller

Merge branch 'sysctl-data-races'

Kuniyuki Iwashima says:

====================
sysctl: Fix data-races around ipv4_table.

A sysctl variable is accessed concurrently, and there is always a chance
of data-race.  So, all readers and writers need some basic protection to
avoid load/store-tearing.

The first half of this series changes some proc handlers used in ipv4_table
to use READ_ONCE() and WRITE_ONCE() internally to fix data-races on the
sysctl side.  Then, the second half adds READ_ONCE() to the other readers
of ipv4_table.

Changes:
  v2:
    * Drop some changes that makes backporting difficult
      * First cleanup patch
      * Lockless helpers and .proc_handler changes
    * Drop the tracing part for .sysctl_mem
      * Steve already posted a fix
    * Drop int-to-bool change for cipso
      * Should be posted to net-next later
    * Drop proc_dobool() change
      * Can be included in another series

  v1: https://lore.kernel.org/netdev/20220706052130.16368-1-kuniyu@amazon.com/
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 820b8963 73318c4b
...@@ -1085,7 +1085,7 @@ cipso_cache_enable - BOOLEAN ...@@ -1085,7 +1085,7 @@ cipso_cache_enable - BOOLEAN
cipso_cache_bucket_size - INTEGER cipso_cache_bucket_size - INTEGER
The CIPSO label cache consists of a fixed size hash table with each The CIPSO label cache consists of a fixed size hash table with each
hash bucket containing a number of cache entries. This variable limits hash bucket containing a number of cache entries. This variable limits
the number of entries in each hash bucket; the larger the value the the number of entries in each hash bucket; the larger the value is, the
more CIPSO label mappings that can be cached. When the number of more CIPSO label mappings that can be cached. When the number of
entries in a given hash bucket reaches this limit adding new entries entries in a given hash bucket reaches this limit adding new entries
causes the oldest entry in the bucket to be removed to make room. causes the oldest entry in the bucket to be removed to make room.
......
...@@ -1529,7 +1529,7 @@ void __sk_mem_reclaim(struct sock *sk, int amount); ...@@ -1529,7 +1529,7 @@ void __sk_mem_reclaim(struct sock *sk, int amount);
/* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */ /* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */
static inline long sk_prot_mem_limits(const struct sock *sk, int index) static inline long sk_prot_mem_limits(const struct sock *sk, int index)
{ {
long val = sk->sk_prot->sysctl_mem[index]; long val = READ_ONCE(sk->sk_prot->sysctl_mem[index]);
#if PAGE_SIZE > SK_MEM_QUANTUM #if PAGE_SIZE > SK_MEM_QUANTUM
val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT; val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT;
......
...@@ -446,14 +446,14 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp, ...@@ -446,14 +446,14 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
if (*negp) { if (*negp) {
if (*lvalp > (unsigned long) INT_MAX + 1) if (*lvalp > (unsigned long) INT_MAX + 1)
return -EINVAL; return -EINVAL;
*valp = -*lvalp; WRITE_ONCE(*valp, -*lvalp);
} else { } else {
if (*lvalp > (unsigned long) INT_MAX) if (*lvalp > (unsigned long) INT_MAX)
return -EINVAL; return -EINVAL;
*valp = *lvalp; WRITE_ONCE(*valp, *lvalp);
} }
} else { } else {
int val = *valp; int val = READ_ONCE(*valp);
if (val < 0) { if (val < 0) {
*negp = true; *negp = true;
*lvalp = -(unsigned long)val; *lvalp = -(unsigned long)val;
...@@ -472,9 +472,9 @@ static int do_proc_douintvec_conv(unsigned long *lvalp, ...@@ -472,9 +472,9 @@ static int do_proc_douintvec_conv(unsigned long *lvalp,
if (write) { if (write) {
if (*lvalp > UINT_MAX) if (*lvalp > UINT_MAX)
return -EINVAL; return -EINVAL;
*valp = *lvalp; WRITE_ONCE(*valp, *lvalp);
} else { } else {
unsigned int val = *valp; unsigned int val = READ_ONCE(*valp);
*lvalp = (unsigned long)val; *lvalp = (unsigned long)val;
} }
return 0; return 0;
...@@ -857,7 +857,7 @@ static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp, ...@@ -857,7 +857,7 @@ static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
if ((param->min && *param->min > tmp) || if ((param->min && *param->min > tmp) ||
(param->max && *param->max < tmp)) (param->max && *param->max < tmp))
return -EINVAL; return -EINVAL;
*valp = tmp; WRITE_ONCE(*valp, tmp);
} }
return 0; return 0;
...@@ -923,7 +923,7 @@ static int do_proc_douintvec_minmax_conv(unsigned long *lvalp, ...@@ -923,7 +923,7 @@ static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
(param->max && *param->max < tmp)) (param->max && *param->max < tmp))
return -ERANGE; return -ERANGE;
*valp = tmp; WRITE_ONCE(*valp, tmp);
} }
return 0; return 0;
...@@ -1090,9 +1090,9 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, ...@@ -1090,9 +1090,9 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table,
err = -EINVAL; err = -EINVAL;
break; break;
} }
*i = val; WRITE_ONCE(*i, val);
} else { } else {
val = convdiv * (*i) / convmul; val = convdiv * READ_ONCE(*i) / convmul;
if (!first) if (!first)
proc_put_char(&buffer, &left, '\t'); proc_put_char(&buffer, &left, '\t');
proc_put_long(&buffer, &left, val, false); proc_put_long(&buffer, &left, val, false);
...@@ -1173,9 +1173,12 @@ static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp, ...@@ -1173,9 +1173,12 @@ static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
if (write) { if (write) {
if (*lvalp > INT_MAX / HZ) if (*lvalp > INT_MAX / HZ)
return 1; return 1;
*valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ); if (*negp)
WRITE_ONCE(*valp, -*lvalp * HZ);
else
WRITE_ONCE(*valp, *lvalp * HZ);
} else { } else {
int val = *valp; int val = READ_ONCE(*valp);
unsigned long lval; unsigned long lval;
if (val < 0) { if (val < 0) {
*negp = true; *negp = true;
......
...@@ -239,7 +239,7 @@ static int cipso_v4_cache_check(const unsigned char *key, ...@@ -239,7 +239,7 @@ static int cipso_v4_cache_check(const unsigned char *key,
struct cipso_v4_map_cache_entry *prev_entry = NULL; struct cipso_v4_map_cache_entry *prev_entry = NULL;
u32 hash; u32 hash;
if (!cipso_v4_cache_enabled) if (!READ_ONCE(cipso_v4_cache_enabled))
return -ENOENT; return -ENOENT;
hash = cipso_v4_map_cache_hash(key, key_len); hash = cipso_v4_map_cache_hash(key, key_len);
...@@ -296,13 +296,14 @@ static int cipso_v4_cache_check(const unsigned char *key, ...@@ -296,13 +296,14 @@ static int cipso_v4_cache_check(const unsigned char *key,
int cipso_v4_cache_add(const unsigned char *cipso_ptr, int cipso_v4_cache_add(const unsigned char *cipso_ptr,
const struct netlbl_lsm_secattr *secattr) const struct netlbl_lsm_secattr *secattr)
{ {
int bkt_size = READ_ONCE(cipso_v4_cache_bucketsize);
int ret_val = -EPERM; int ret_val = -EPERM;
u32 bkt; u32 bkt;
struct cipso_v4_map_cache_entry *entry = NULL; struct cipso_v4_map_cache_entry *entry = NULL;
struct cipso_v4_map_cache_entry *old_entry = NULL; struct cipso_v4_map_cache_entry *old_entry = NULL;
u32 cipso_ptr_len; u32 cipso_ptr_len;
if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0) if (!READ_ONCE(cipso_v4_cache_enabled) || bkt_size <= 0)
return 0; return 0;
cipso_ptr_len = cipso_ptr[1]; cipso_ptr_len = cipso_ptr[1];
...@@ -322,7 +323,7 @@ int cipso_v4_cache_add(const unsigned char *cipso_ptr, ...@@ -322,7 +323,7 @@ int cipso_v4_cache_add(const unsigned char *cipso_ptr,
bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETS - 1); bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETS - 1);
spin_lock_bh(&cipso_v4_cache[bkt].lock); spin_lock_bh(&cipso_v4_cache[bkt].lock);
if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) { if (cipso_v4_cache[bkt].size < bkt_size) {
list_add(&entry->list, &cipso_v4_cache[bkt].list); list_add(&entry->list, &cipso_v4_cache[bkt].list);
cipso_v4_cache[bkt].size += 1; cipso_v4_cache[bkt].size += 1;
} else { } else {
...@@ -1199,7 +1200,8 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def, ...@@ -1199,7 +1200,8 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def,
/* This will send packets using the "optimized" format when /* This will send packets using the "optimized" format when
* possible as specified in section 3.4.2.6 of the * possible as specified in section 3.4.2.6 of the
* CIPSO draft. */ * CIPSO draft. */
if (cipso_v4_rbm_optfmt && ret_val > 0 && ret_val <= 10) if (READ_ONCE(cipso_v4_rbm_optfmt) && ret_val > 0 &&
ret_val <= 10)
tag_len = 14; tag_len = 14;
else else
tag_len = 4 + ret_val; tag_len = 4 + ret_val;
...@@ -1603,7 +1605,7 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) ...@@ -1603,7 +1605,7 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option)
* all the CIPSO validations here but it doesn't * all the CIPSO validations here but it doesn't
* really specify _exactly_ what we need to validate * really specify _exactly_ what we need to validate
* ... so, just make it a sysctl tunable. */ * ... so, just make it a sysctl tunable. */
if (cipso_v4_rbm_strictvalid) { if (READ_ONCE(cipso_v4_rbm_strictvalid)) {
if (cipso_v4_map_lvl_valid(doi_def, if (cipso_v4_map_lvl_valid(doi_def,
tag[3]) < 0) { tag[3]) < 0) {
err_offset = opt_iter + 3; err_offset = opt_iter + 3;
......
...@@ -498,7 +498,7 @@ static void tnode_free(struct key_vector *tn) ...@@ -498,7 +498,7 @@ static void tnode_free(struct key_vector *tn)
tn = container_of(head, struct tnode, rcu)->kv; tn = container_of(head, struct tnode, rcu)->kv;
} }
if (tnode_free_size >= sysctl_fib_sync_mem) { if (tnode_free_size >= READ_ONCE(sysctl_fib_sync_mem)) {
tnode_free_size = 0; tnode_free_size = 0;
synchronize_rcu(); synchronize_rcu();
} }
......
...@@ -253,11 +253,12 @@ bool icmp_global_allow(void) ...@@ -253,11 +253,12 @@ bool icmp_global_allow(void)
spin_lock(&icmp_global.lock); spin_lock(&icmp_global.lock);
delta = min_t(u32, now - icmp_global.stamp, HZ); delta = min_t(u32, now - icmp_global.stamp, HZ);
if (delta >= HZ / 50) { if (delta >= HZ / 50) {
incr = sysctl_icmp_msgs_per_sec * delta / HZ ; incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ;
if (incr) if (incr)
WRITE_ONCE(icmp_global.stamp, now); WRITE_ONCE(icmp_global.stamp, now);
} }
credit = min_t(u32, icmp_global.credit + incr, sysctl_icmp_msgs_burst); credit = min_t(u32, icmp_global.credit + incr,
READ_ONCE(sysctl_icmp_msgs_burst));
if (credit) { if (credit) {
/* We want to use a credit of one in average, but need to randomize /* We want to use a credit of one in average, but need to randomize
* it for security reasons. * it for security reasons.
......
...@@ -141,16 +141,20 @@ static void inet_peer_gc(struct inet_peer_base *base, ...@@ -141,16 +141,20 @@ static void inet_peer_gc(struct inet_peer_base *base,
struct inet_peer *gc_stack[], struct inet_peer *gc_stack[],
unsigned int gc_cnt) unsigned int gc_cnt)
{ {
int peer_threshold, peer_maxttl, peer_minttl;
struct inet_peer *p; struct inet_peer *p;
__u32 delta, ttl; __u32 delta, ttl;
int i; int i;
if (base->total >= inet_peer_threshold) peer_threshold = READ_ONCE(inet_peer_threshold);
peer_maxttl = READ_ONCE(inet_peer_maxttl);
peer_minttl = READ_ONCE(inet_peer_minttl);
if (base->total >= peer_threshold)
ttl = 0; /* be aggressive */ ttl = 0; /* be aggressive */
else else
ttl = inet_peer_maxttl ttl = peer_maxttl - (peer_maxttl - peer_minttl) / HZ *
- (inet_peer_maxttl - inet_peer_minttl) / HZ * base->total / peer_threshold * HZ;
base->total / inet_peer_threshold * HZ;
for (i = 0; i < gc_cnt; i++) { for (i = 0; i < gc_cnt; i++) {
p = gc_stack[i]; p = gc_stack[i];
......
...@@ -2715,7 +2715,8 @@ static void tcp_orphan_update(struct timer_list *unused) ...@@ -2715,7 +2715,8 @@ static void tcp_orphan_update(struct timer_list *unused)
static bool tcp_too_many_orphans(int shift) static bool tcp_too_many_orphans(int shift)
{ {
return READ_ONCE(tcp_orphan_cache) << shift > sysctl_tcp_max_orphans; return READ_ONCE(tcp_orphan_cache) << shift >
READ_ONCE(sysctl_tcp_max_orphans);
} }
bool tcp_check_oom(struct sock *sk, int shift) bool tcp_check_oom(struct sock *sk, int shift)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment