Commit b16c2919 authored by Sasha Levin's avatar Sasha Levin Committed by Pablo Neira Ayuso

netfilter: nf_conntrack: use safer way to lock all buckets

When we need to lock all buckets in the connection hashtable we'd attempt to
lock 1024 spinlocks, which is way more preemption levels than supported by
the kernel. Furthermore, this behavior was hidden by checking if lockdep is
enabled, and if it was - use only 8 buckets(!).

Fix this by using a global lock and synchronize all buckets on it when we
need to lock them all. This is pretty heavyweight, but is only done when we
need to resize the hashtable, and that doesn't happen often enough (or at all).
Signed-off-by: default avatarSasha Levin <sasha.levin@oracle.com>
Acked-by: default avatarJesper Dangaard Brouer <brouer@redhat.com>
Reviewed-by: default avatarFlorian Westphal <fw@strlen.de>
Signed-off-by: default avatarPablo Neira Ayuso <pablo@netfilter.org>
parent 35b81539
...@@ -79,12 +79,10 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, ...@@ -79,12 +79,10 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l3proto *l3proto, const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *proto); const struct nf_conntrack_l4proto *proto);
#ifdef CONFIG_LOCKDEP #define CONNTRACK_LOCKS 1024
# define CONNTRACK_LOCKS 8
#else
# define CONNTRACK_LOCKS 1024
#endif
extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS]; extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
void nf_conntrack_lock(spinlock_t *lock);
extern spinlock_t nf_conntrack_expect_lock; extern spinlock_t nf_conntrack_expect_lock;
......
...@@ -66,6 +66,21 @@ EXPORT_SYMBOL_GPL(nf_conntrack_locks); ...@@ -66,6 +66,21 @@ EXPORT_SYMBOL_GPL(nf_conntrack_locks);
__cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock); __cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock); EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
static __read_mostly bool nf_conntrack_locks_all;
void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
{
spin_lock(lock);
while (unlikely(nf_conntrack_locks_all)) {
spin_unlock(lock);
spin_lock(&nf_conntrack_locks_all_lock);
spin_unlock(&nf_conntrack_locks_all_lock);
spin_lock(lock);
}
}
EXPORT_SYMBOL_GPL(nf_conntrack_lock);
static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2) static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2)
{ {
h1 %= CONNTRACK_LOCKS; h1 %= CONNTRACK_LOCKS;
...@@ -82,12 +97,12 @@ static bool nf_conntrack_double_lock(struct net *net, unsigned int h1, ...@@ -82,12 +97,12 @@ static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
h1 %= CONNTRACK_LOCKS; h1 %= CONNTRACK_LOCKS;
h2 %= CONNTRACK_LOCKS; h2 %= CONNTRACK_LOCKS;
if (h1 <= h2) { if (h1 <= h2) {
spin_lock(&nf_conntrack_locks[h1]); nf_conntrack_lock(&nf_conntrack_locks[h1]);
if (h1 != h2) if (h1 != h2)
spin_lock_nested(&nf_conntrack_locks[h2], spin_lock_nested(&nf_conntrack_locks[h2],
SINGLE_DEPTH_NESTING); SINGLE_DEPTH_NESTING);
} else { } else {
spin_lock(&nf_conntrack_locks[h2]); nf_conntrack_lock(&nf_conntrack_locks[h2]);
spin_lock_nested(&nf_conntrack_locks[h1], spin_lock_nested(&nf_conntrack_locks[h1],
SINGLE_DEPTH_NESTING); SINGLE_DEPTH_NESTING);
} }
...@@ -102,16 +117,19 @@ static void nf_conntrack_all_lock(void) ...@@ -102,16 +117,19 @@ static void nf_conntrack_all_lock(void)
{ {
int i; int i;
for (i = 0; i < CONNTRACK_LOCKS; i++) spin_lock(&nf_conntrack_locks_all_lock);
spin_lock_nested(&nf_conntrack_locks[i], i); nf_conntrack_locks_all = true;
for (i = 0; i < CONNTRACK_LOCKS; i++) {
spin_lock(&nf_conntrack_locks[i]);
spin_unlock(&nf_conntrack_locks[i]);
}
} }
static void nf_conntrack_all_unlock(void) static void nf_conntrack_all_unlock(void)
{ {
int i; nf_conntrack_locks_all = false;
spin_unlock(&nf_conntrack_locks_all_lock);
for (i = 0; i < CONNTRACK_LOCKS; i++)
spin_unlock(&nf_conntrack_locks[i]);
} }
unsigned int nf_conntrack_htable_size __read_mostly; unsigned int nf_conntrack_htable_size __read_mostly;
...@@ -757,7 +775,7 @@ static noinline int early_drop(struct net *net, unsigned int _hash) ...@@ -757,7 +775,7 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
hash = hash_bucket(_hash, net); hash = hash_bucket(_hash, net);
for (; i < net->ct.htable_size; i++) { for (; i < net->ct.htable_size; i++) {
lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS]; lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
spin_lock(lockp); nf_conntrack_lock(lockp);
if (read_seqcount_retry(&net->ct.generation, sequence)) { if (read_seqcount_retry(&net->ct.generation, sequence)) {
spin_unlock(lockp); spin_unlock(lockp);
goto restart; goto restart;
...@@ -1382,7 +1400,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), ...@@ -1382,7 +1400,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
for (; *bucket < net->ct.htable_size; (*bucket)++) { for (; *bucket < net->ct.htable_size; (*bucket)++) {
lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS]; lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
local_bh_disable(); local_bh_disable();
spin_lock(lockp); nf_conntrack_lock(lockp);
if (*bucket < net->ct.htable_size) { if (*bucket < net->ct.htable_size) {
hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) { hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
......
...@@ -425,7 +425,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, ...@@ -425,7 +425,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
} }
local_bh_disable(); local_bh_disable();
for (i = 0; i < net->ct.htable_size; i++) { for (i = 0; i < net->ct.htable_size; i++) {
spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
if (i < net->ct.htable_size) { if (i < net->ct.htable_size) {
hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
unhelp(h, me); unhelp(h, me);
......
...@@ -840,7 +840,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -840,7 +840,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) { for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) {
restart: restart:
lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS]; lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS];
spin_lock(lockp); nf_conntrack_lock(lockp);
if (cb->args[0] >= net->ct.htable_size) { if (cb->args[0] >= net->ct.htable_size) {
spin_unlock(lockp); spin_unlock(lockp);
goto out; goto out;
......
...@@ -307,12 +307,12 @@ static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout) ...@@ -307,12 +307,12 @@ static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout)
local_bh_disable(); local_bh_disable();
for (i = 0; i < net->ct.htable_size; i++) { for (i = 0; i < net->ct.htable_size; i++) {
spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
if (i < net->ct.htable_size) { if (i < net->ct.htable_size) {
hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
untimeout(h, timeout); untimeout(h, timeout);
} }
spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
} }
local_bh_enable(); local_bh_enable();
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment