Commit de4c5efe authored by David S. Miller's avatar David S. Miller

Merge tag 'nf-23-08-16' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf

Florisn Westphal says:

====================
These are netfilter fixes for the *net* tree.

First patch resolves a false-positive lockdep splat:
rcu_dereference is used outside of rcu read lock.  Let lockdep
validate that the transaction mutex is locked.

Second patch fixes a kdoc warning added in previous PR.

Third patch fixes a memory leak:
The catchall element isn't disabled correctly, this allows
userspace to deactivate the element again. This results in refcount
underflow which in turn prevents memory release. This was always
broken since the feature was added in 5.13.

Patch 4 fixes an incorrect change in the previous pull request:
Adding a duplicate key to a set should work if the duplicate key
has expired, restore this behaviour. All from myself.

Patch #5 resolves an old historic artifact in sctp conntrack:
a 300ms timeout for shutdown_ack. Increase this to 3s.  From Xin Long.

Patch #6 fixes a sysctl data race in ipvs, two threads can clobber the
sysctl value, from Sishuai Gong. This is a day-0 bug that predates git
history.

Patches 7, 8 and 9, from Pablo Neira Ayuso, are also followups
for the previous GC rework in nf_tables: The netlink notifier and the
netns exit path must both increment the gc worker seqcount, else worker
may encounter stale (free'd) pointers.
================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents b35c9683 23185c6a
......@@ -178,10 +178,10 @@ nf_conntrack_sctp_timeout_established - INTEGER (seconds)
Default is set to (hb_interval * path_max_retrans + rto_max)
nf_conntrack_sctp_timeout_shutdown_sent - INTEGER (seconds)
default 0.3
default 3
nf_conntrack_sctp_timeout_shutdown_recd - INTEGER (seconds)
default 0.3
default 3
nf_conntrack_sctp_timeout_shutdown_ack_sent - INTEGER (seconds)
default 3
......
......@@ -534,6 +534,7 @@ struct nft_set_elem_expr {
* @expr: stateful expression
* @ops: set ops
* @flags: set flags
* @dead: set will be freed, never cleared
* @genmask: generation mask
* @klen: key length
* @dlen: data length
......
......@@ -1876,6 +1876,7 @@ static int
proc_do_sync_threshold(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct netns_ipvs *ipvs = table->extra2;
int *valp = table->data;
int val[2];
int rc;
......@@ -1885,6 +1886,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write,
.mode = table->mode,
};
mutex_lock(&ipvs->sync_mutex);
memcpy(val, valp, sizeof(val));
rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
if (write) {
......@@ -1894,6 +1896,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write,
else
memcpy(valp, val, sizeof(val));
}
mutex_unlock(&ipvs->sync_mutex);
return rc;
}
......@@ -4321,6 +4324,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
tbl[idx].data = &ipvs->sysctl_sync_threshold;
tbl[idx].extra2 = ipvs;
tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
......
......@@ -49,8 +49,8 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = {
[SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS,
[SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS,
[SCTP_CONNTRACK_ESTABLISHED] = 210 SECS,
[SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000,
[SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000,
[SCTP_CONNTRACK_SHUTDOWN_SENT] = 3 SECS,
[SCTP_CONNTRACK_SHUTDOWN_RECD] = 3 SECS,
[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS,
[SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS,
};
......@@ -105,7 +105,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
{
/* ORIGINAL */
/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */
/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW},
/* init */ {sCL, sCL, sCW, sCE, sES, sCL, sCL, sSA, sCW},
/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},
/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL},
......
......@@ -7091,6 +7091,7 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx,
ret = __nft_set_catchall_flush(ctx, set, &elem);
if (ret < 0)
break;
nft_set_elem_change_active(ctx->net, set, ext);
}
return ret;
......@@ -9480,9 +9481,14 @@ struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
if (!trans)
return NULL;
trans->net = maybe_get_net(net);
if (!trans->net) {
kfree(trans);
return NULL;
}
refcount_inc(&set->refs);
trans->set = set;
trans->net = get_net(net);
trans->seq = gc_seq;
return trans;
......@@ -9738,6 +9744,22 @@ static void nft_set_commit_update(struct list_head *set_update_list)
}
}
static unsigned int nft_gc_seq_begin(struct nftables_pernet *nft_net)
{
unsigned int gc_seq;
/* Bump gc counter, it becomes odd, this is the busy mark. */
gc_seq = READ_ONCE(nft_net->gc_seq);
WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
return gc_seq;
}
static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq)
{
WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
}
static int nf_tables_commit(struct net *net, struct sk_buff *skb)
{
struct nftables_pernet *nft_net = nft_pernet(net);
......@@ -9823,9 +9845,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
WRITE_ONCE(nft_net->base_seq, base_seq);
/* Bump gc counter, it becomes odd, this is the busy mark. */
gc_seq = READ_ONCE(nft_net->gc_seq);
WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
gc_seq = nft_gc_seq_begin(nft_net);
/* step 3. Start new generation, rules_gen_X now in use. */
net->nft.gencursor = nft_gencursor_next(net);
......@@ -10038,7 +10058,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
nf_tables_commit_audit_log(&adl, nft_net->base_seq);
WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
nft_gc_seq_end(nft_net, gc_seq);
nf_tables_commit_release(net);
return 0;
......@@ -11039,6 +11059,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
struct net *net = n->net;
unsigned int deleted;
bool restart = false;
unsigned int gc_seq;
if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER)
return NOTIFY_DONE;
......@@ -11046,6 +11067,9 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
nft_net = nft_pernet(net);
deleted = 0;
mutex_lock(&nft_net->commit_mutex);
gc_seq = nft_gc_seq_begin(nft_net);
if (!list_empty(&nf_tables_destroy_list))
rcu_barrier();
again:
......@@ -11068,6 +11092,8 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
if (restart)
goto again;
}
nft_gc_seq_end(nft_net, gc_seq);
mutex_unlock(&nft_net->commit_mutex);
return NOTIFY_DONE;
......@@ -11105,12 +11131,20 @@ static void __net_exit nf_tables_pre_exit_net(struct net *net)
static void __net_exit nf_tables_exit_net(struct net *net)
{
struct nftables_pernet *nft_net = nft_pernet(net);
unsigned int gc_seq;
mutex_lock(&nft_net->commit_mutex);
gc_seq = nft_gc_seq_begin(nft_net);
if (!list_empty(&nft_net->commit_list) ||
!list_empty(&nft_net->module_list))
__nf_tables_abort(net, NFNL_ABORT_NONE);
__nft_release_tables(net);
nft_gc_seq_end(nft_net, gc_seq);
mutex_unlock(&nft_net->commit_mutex);
WARN_ON_ONCE(!list_empty(&nft_net->tables));
WARN_ON_ONCE(!list_empty(&nft_net->module_list));
......
......@@ -191,6 +191,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
if (IS_ERR(set))
return PTR_ERR(set);
if (set->flags & NFT_SET_OBJECT)
return -EOPNOTSUPP;
if (set->ops->update == NULL)
return -EOPNOTSUPP;
......
......@@ -566,6 +566,8 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net,
goto out;
if (last) {
if (nft_set_elem_expired(&f->mt[b].e->ext))
goto next_match;
if ((genmask &&
!nft_set_elem_active(&f->mt[b].e->ext, genmask)))
goto next_match;
......@@ -600,17 +602,8 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net,
static void *nft_pipapo_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
{
struct nft_pipapo_elem *ret;
ret = pipapo_get(net, set, (const u8 *)elem->key.val.data,
return pipapo_get(net, set, (const u8 *)elem->key.val.data,
nft_genmask_cur(net));
if (IS_ERR(ret))
return ret;
if (nft_set_elem_expired(&ret->ext))
return ERR_PTR(-ENOENT);
return ret;
}
/**
......@@ -1549,7 +1542,7 @@ static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,
/**
* pipapo_gc() - Drop expired entries from set, destroy start and end elements
* @set: nftables API set representation
* @_set: nftables API set representation
* @m: Matching data
*/
static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
......@@ -1697,6 +1690,17 @@ static void nft_pipapo_commit(const struct nft_set *set)
priv->clone = new_clone;
}
static bool nft_pipapo_transaction_mutex_held(const struct nft_set *set)
{
#ifdef CONFIG_PROVE_LOCKING
const struct net *net = read_pnet(&set->net);
return lockdep_is_held(&nft_pernet(net)->commit_mutex);
#else
return true;
#endif
}
static void nft_pipapo_abort(const struct nft_set *set)
{
struct nft_pipapo *priv = nft_set_priv(set);
......@@ -1705,7 +1709,7 @@ static void nft_pipapo_abort(const struct nft_set *set)
if (!priv->dirty)
return;
m = rcu_dereference(priv->match);
m = rcu_dereference_protected(priv->match, nft_pipapo_transaction_mutex_held(set));
new_clone = pipapo_clone(m);
if (IS_ERR(new_clone))
......@@ -1732,11 +1736,7 @@ static void nft_pipapo_activate(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_pipapo_elem *e;
e = pipapo_get(net, set, (const u8 *)elem->key.val.data, 0);
if (IS_ERR(e))
return;
struct nft_pipapo_elem *e = elem->priv;
nft_set_elem_change_active(net, set, &e->ext);
}
......@@ -1950,10 +1950,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
data = (const u8 *)nft_set_ext_key(&e->ext);
e = pipapo_get(net, set, data, 0);
if (IS_ERR(e))
return;
while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
const u8 *match_start, *match_end;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment