Commit 32b3ad14 authored by David S. Miller's avatar David S. Miller

Merge branch 'sysctl-data-races'

Kuniyuki Iwashima says:

====================
sysctl: Fix data-races around ipv4_table.

A sysctl variable is accessed concurrently, and there is always a chance
of data-race.  So, all readers and writers need some basic protection to
avoid load/store-tearing.

The first half of this series changes some proc handlers used in ipv4_table
to use READ_ONCE() and WRITE_ONCE() internally to fix data-races on the
sysctl side.  Then, the second half adds READ_ONCE() to the other readers
of ipv4_table.

Changes:
  v2:
    * Drop some changes that makes backporting difficult
      * First cleanup patch
      * Lockless helpers and .proc_handler changes
    * Drop the tracing part for .sysctl_mem
      * Steve already posted a fix
    * Drop int-to-bool change for cipso
      * Should be posted to net-next later
    * Drop proc_dobool() change
      * Can be included in another series

  v1: https://lore.kernel.org/netdev/20220706052130.16368-1-kuniyu@amazon.com/
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 820b8963 73318c4b
......@@ -1085,7 +1085,7 @@ cipso_cache_enable - BOOLEAN
cipso_cache_bucket_size - INTEGER
The CIPSO label cache consists of a fixed size hash table with each
hash bucket containing a number of cache entries. This variable limits
the number of entries in each hash bucket; the larger the value the
the number of entries in each hash bucket; the larger the value is, the
more CIPSO label mappings that can be cached. When the number of
entries in a given hash bucket reaches this limit adding new entries
causes the oldest entry in the bucket to be removed to make room.
......
......@@ -1529,7 +1529,7 @@ void __sk_mem_reclaim(struct sock *sk, int amount);
/* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */
static inline long sk_prot_mem_limits(const struct sock *sk, int index)
{
long val = sk->sk_prot->sysctl_mem[index];
long val = READ_ONCE(sk->sk_prot->sysctl_mem[index]);
#if PAGE_SIZE > SK_MEM_QUANTUM
val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT;
......
......@@ -446,14 +446,14 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
if (*negp) {
if (*lvalp > (unsigned long) INT_MAX + 1)
return -EINVAL;
*valp = -*lvalp;
WRITE_ONCE(*valp, -*lvalp);
} else {
if (*lvalp > (unsigned long) INT_MAX)
return -EINVAL;
*valp = *lvalp;
WRITE_ONCE(*valp, *lvalp);
}
} else {
int val = *valp;
int val = READ_ONCE(*valp);
if (val < 0) {
*negp = true;
*lvalp = -(unsigned long)val;
......@@ -472,9 +472,9 @@ static int do_proc_douintvec_conv(unsigned long *lvalp,
if (write) {
if (*lvalp > UINT_MAX)
return -EINVAL;
*valp = *lvalp;
WRITE_ONCE(*valp, *lvalp);
} else {
unsigned int val = *valp;
unsigned int val = READ_ONCE(*valp);
*lvalp = (unsigned long)val;
}
return 0;
......@@ -857,7 +857,7 @@ static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
if ((param->min && *param->min > tmp) ||
(param->max && *param->max < tmp))
return -EINVAL;
*valp = tmp;
WRITE_ONCE(*valp, tmp);
}
return 0;
......@@ -923,7 +923,7 @@ static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
(param->max && *param->max < tmp))
return -ERANGE;
*valp = tmp;
WRITE_ONCE(*valp, tmp);
}
return 0;
......@@ -1090,9 +1090,9 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table,
err = -EINVAL;
break;
}
*i = val;
WRITE_ONCE(*i, val);
} else {
val = convdiv * (*i) / convmul;
val = convdiv * READ_ONCE(*i) / convmul;
if (!first)
proc_put_char(&buffer, &left, '\t');
proc_put_long(&buffer, &left, val, false);
......@@ -1173,9 +1173,12 @@ static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
if (write) {
if (*lvalp > INT_MAX / HZ)
return 1;
*valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
if (*negp)
WRITE_ONCE(*valp, -*lvalp * HZ);
else
WRITE_ONCE(*valp, *lvalp * HZ);
} else {
int val = *valp;
int val = READ_ONCE(*valp);
unsigned long lval;
if (val < 0) {
*negp = true;
......
......@@ -239,7 +239,7 @@ static int cipso_v4_cache_check(const unsigned char *key,
struct cipso_v4_map_cache_entry *prev_entry = NULL;
u32 hash;
if (!cipso_v4_cache_enabled)
if (!READ_ONCE(cipso_v4_cache_enabled))
return -ENOENT;
hash = cipso_v4_map_cache_hash(key, key_len);
......@@ -296,13 +296,14 @@ static int cipso_v4_cache_check(const unsigned char *key,
int cipso_v4_cache_add(const unsigned char *cipso_ptr,
const struct netlbl_lsm_secattr *secattr)
{
int bkt_size = READ_ONCE(cipso_v4_cache_bucketsize);
int ret_val = -EPERM;
u32 bkt;
struct cipso_v4_map_cache_entry *entry = NULL;
struct cipso_v4_map_cache_entry *old_entry = NULL;
u32 cipso_ptr_len;
if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0)
if (!READ_ONCE(cipso_v4_cache_enabled) || bkt_size <= 0)
return 0;
cipso_ptr_len = cipso_ptr[1];
......@@ -322,7 +323,7 @@ int cipso_v4_cache_add(const unsigned char *cipso_ptr,
bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETS - 1);
spin_lock_bh(&cipso_v4_cache[bkt].lock);
if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) {
if (cipso_v4_cache[bkt].size < bkt_size) {
list_add(&entry->list, &cipso_v4_cache[bkt].list);
cipso_v4_cache[bkt].size += 1;
} else {
......@@ -1199,7 +1200,8 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def,
/* This will send packets using the "optimized" format when
* possible as specified in section 3.4.2.6 of the
* CIPSO draft. */
if (cipso_v4_rbm_optfmt && ret_val > 0 && ret_val <= 10)
if (READ_ONCE(cipso_v4_rbm_optfmt) && ret_val > 0 &&
ret_val <= 10)
tag_len = 14;
else
tag_len = 4 + ret_val;
......@@ -1603,7 +1605,7 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option)
* all the CIPSO validations here but it doesn't
* really specify _exactly_ what we need to validate
* ... so, just make it a sysctl tunable. */
if (cipso_v4_rbm_strictvalid) {
if (READ_ONCE(cipso_v4_rbm_strictvalid)) {
if (cipso_v4_map_lvl_valid(doi_def,
tag[3]) < 0) {
err_offset = opt_iter + 3;
......
......@@ -498,7 +498,7 @@ static void tnode_free(struct key_vector *tn)
tn = container_of(head, struct tnode, rcu)->kv;
}
if (tnode_free_size >= sysctl_fib_sync_mem) {
if (tnode_free_size >= READ_ONCE(sysctl_fib_sync_mem)) {
tnode_free_size = 0;
synchronize_rcu();
}
......
......@@ -253,11 +253,12 @@ bool icmp_global_allow(void)
spin_lock(&icmp_global.lock);
delta = min_t(u32, now - icmp_global.stamp, HZ);
if (delta >= HZ / 50) {
incr = sysctl_icmp_msgs_per_sec * delta / HZ ;
incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ;
if (incr)
WRITE_ONCE(icmp_global.stamp, now);
}
credit = min_t(u32, icmp_global.credit + incr, sysctl_icmp_msgs_burst);
credit = min_t(u32, icmp_global.credit + incr,
READ_ONCE(sysctl_icmp_msgs_burst));
if (credit) {
/* We want to use a credit of one in average, but need to randomize
* it for security reasons.
......
......@@ -141,16 +141,20 @@ static void inet_peer_gc(struct inet_peer_base *base,
struct inet_peer *gc_stack[],
unsigned int gc_cnt)
{
int peer_threshold, peer_maxttl, peer_minttl;
struct inet_peer *p;
__u32 delta, ttl;
int i;
if (base->total >= inet_peer_threshold)
peer_threshold = READ_ONCE(inet_peer_threshold);
peer_maxttl = READ_ONCE(inet_peer_maxttl);
peer_minttl = READ_ONCE(inet_peer_minttl);
if (base->total >= peer_threshold)
ttl = 0; /* be aggressive */
else
ttl = inet_peer_maxttl
- (inet_peer_maxttl - inet_peer_minttl) / HZ *
base->total / inet_peer_threshold * HZ;
ttl = peer_maxttl - (peer_maxttl - peer_minttl) / HZ *
base->total / peer_threshold * HZ;
for (i = 0; i < gc_cnt; i++) {
p = gc_stack[i];
......
......@@ -2715,7 +2715,8 @@ static void tcp_orphan_update(struct timer_list *unused)
static bool tcp_too_many_orphans(int shift)
{
return READ_ONCE(tcp_orphan_cache) << shift > sysctl_tcp_max_orphans;
return READ_ONCE(tcp_orphan_cache) << shift >
READ_ONCE(sysctl_tcp_max_orphans);
}
bool tcp_check_oom(struct sock *sk, int shift)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment