Commit 24c7a64e authored by Jakub Kicinski's avatar Jakub Kicinski

Merge git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

1) Fix crash with malformed ebtables blob which do not provide all
   entry points, from Florian Westphal.

2) Fix possible TCP connection clogging up with default 5-days
   timeout in conntrack, from Florian.

3) Fix crash in nf_tables tproxy with unsupported chains, also from Florian.

4) Do not allow to update implicit chains.

5) Make table handle allocation per-netns to fix data race.

6) Do not truncated payload length and offset, and checksum offset.
   Instead report EINVAl.

7) Enable chain stats update via static key iff no error occurs.

8) Restrict osf expression to ip, ip6 and inet families.

9) Restrict tunnel expression to netdev family.

10) Fix crash when trying to bind again an already bound chain.

11) Flowtable garbage collector might leave behind pending work to
    delete entries. This patch comes with a previous preparation patch
    as dependency.

12) Allow net.netfilter.nf_conntrack_frag6_high_thresh to be lowered,
    from Eric Dumazet.

* git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
  netfilter: nf_defrag_ipv6: allow nf_conntrack_frag6_high_thresh increases
  netfilter: flowtable: fix stuck flows on cleanup due to pending work
  netfilter: flowtable: add function to invoke garbage collection immediately
  netfilter: nf_tables: disallow binding to already bound chain
  netfilter: nft_tunnel: restrict it to netdev family
  netfilter: nft_osf: restrict osf to ipv4, ipv6 and inet families
  netfilter: nf_tables: do not leave chain stats enabled on error
  netfilter: nft_payload: do not truncate csum_offset and csum_type
  netfilter: nft_payload: report ERANGE for too long offset and length
  netfilter: nf_tables: make table handle allocation per-netns friendly
  netfilter: nf_tables: disallow updates of implicit chain
  netfilter: nft_tproxy: restrict to prerouting hook
  netfilter: conntrack: work around exceeded receive window
  netfilter: ebtables: reject blobs that don't provide all entry points
====================

Link: https://lore.kernel.org/r/20220824220330.64283-1-pablo@netfilter.orgSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents b09da012 00cd7bf9
......@@ -94,10 +94,6 @@ struct ebt_table {
struct ebt_replace_kernel *table;
unsigned int valid_hooks;
rwlock_t lock;
/* e.g. could be the table explicitly only allows certain
* matches, targets, ... 0 == let it in */
int (*check)(const struct ebt_table_info *info,
unsigned int valid_hooks);
/* the data used by the kernel */
struct ebt_table_info *private;
struct nf_hook_ops *ops;
......
......@@ -270,6 +270,7 @@ void flow_offload_refresh(struct nf_flowtable *flow_table,
struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
struct flow_offload_tuple *tuple);
void nf_flow_table_gc_run(struct nf_flowtable *flow_table);
void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
struct net_device *dev);
void nf_flow_table_cleanup(struct net_device *dev);
......@@ -306,6 +307,8 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable,
struct flow_offload *flow);
void nf_flow_table_offload_flush(struct nf_flowtable *flowtable);
void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable);
int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
struct net_device *dev,
enum flow_block_command cmd);
......
......@@ -1652,6 +1652,7 @@ struct nftables_pernet {
struct list_head module_list;
struct list_head notify_list;
struct mutex commit_mutex;
u64 table_handle;
unsigned int base_seq;
u8 validate_state;
};
......
......@@ -36,18 +36,10 @@ static struct ebt_replace_kernel initial_table = {
.entries = (char *)&initial_chain,
};
static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
{
if (valid_hooks & ~(1 << NF_BR_BROUTING))
return -EINVAL;
return 0;
}
static const struct ebt_table broute_table = {
.name = "broute",
.table = &initial_table,
.valid_hooks = 1 << NF_BR_BROUTING,
.check = check,
.me = THIS_MODULE,
};
......
......@@ -43,18 +43,10 @@ static struct ebt_replace_kernel initial_table = {
.entries = (char *)initial_chains,
};
static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
{
if (valid_hooks & ~FILTER_VALID_HOOKS)
return -EINVAL;
return 0;
}
static const struct ebt_table frame_filter = {
.name = "filter",
.table = &initial_table,
.valid_hooks = FILTER_VALID_HOOKS,
.check = check,
.me = THIS_MODULE,
};
......
......@@ -43,18 +43,10 @@ static struct ebt_replace_kernel initial_table = {
.entries = (char *)initial_chains,
};
static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
{
if (valid_hooks & ~NAT_VALID_HOOKS)
return -EINVAL;
return 0;
}
static const struct ebt_table frame_nat = {
.name = "nat",
.table = &initial_table,
.valid_hooks = NAT_VALID_HOOKS,
.check = check,
.me = THIS_MODULE,
};
......
......@@ -1040,8 +1040,7 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
goto free_iterate;
}
/* the table doesn't like it */
if (t->check && (ret = t->check(newinfo, repl->valid_hooks)))
if (repl->valid_hooks != t->valid_hooks)
goto free_unlock;
if (repl->num_counters && repl->num_counters != t->private->nentries) {
......@@ -1231,11 +1230,6 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table,
if (ret != 0)
goto free_chainstack;
if (table->check && table->check(newinfo, table->valid_hooks)) {
ret = -EINVAL;
goto free_chainstack;
}
table->private = newinfo;
rwlock_init(&table->lock);
mutex_lock(&ebt_mutex);
......
......@@ -86,7 +86,6 @@ static int nf_ct_frag6_sysctl_register(struct net *net)
table[1].extra2 = &nf_frag->fqdir->high_thresh;
table[2].data = &nf_frag->fqdir->high_thresh;
table[2].extra1 = &nf_frag->fqdir->low_thresh;
table[2].extra2 = &nf_frag->fqdir->high_thresh;
hdr = register_net_sysctl(net, "net/netfilter", table);
if (hdr == NULL)
......
......@@ -655,6 +655,37 @@ static bool tcp_in_window(struct nf_conn *ct,
tn->tcp_be_liberal)
res = true;
if (!res) {
bool seq_ok = before(seq, sender->td_maxend + 1);
if (!seq_ok) {
u32 overshot = end - sender->td_maxend + 1;
bool ack_ok;
ack_ok = after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1);
if (in_recv_win &&
ack_ok &&
overshot <= receiver->td_maxwin &&
before(sack, receiver->td_end + 1)) {
/* Work around TCPs that send more bytes than allowed by
* the receive window.
*
* If the (marked as invalid) packet is allowed to pass by
* the ruleset and the peer acks this data, then its possible
* all future packets will trigger 'ACK is over upper bound' check.
*
* Thus if only the sequence check fails then do update td_end so
* possible ACK for this data can update internal state.
*/
sender->td_end = end;
sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
nf_ct_l4proto_log_invalid(skb, ct, hook_state,
"%u bytes more than expected", overshot);
return res;
}
}
nf_ct_l4proto_log_invalid(skb, ct, hook_state,
"%s",
before(seq, sender->td_maxend + 1) ?
......
......@@ -437,12 +437,17 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
}
}
void nf_flow_table_gc_run(struct nf_flowtable *flow_table)
{
nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL);
}
static void nf_flow_offload_work_gc(struct work_struct *work)
{
struct nf_flowtable *flow_table;
flow_table = container_of(work, struct nf_flowtable, gc_work.work);
nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL);
nf_flow_table_gc_run(flow_table);
queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
}
......@@ -600,11 +605,11 @@ void nf_flow_table_free(struct nf_flowtable *flow_table)
mutex_unlock(&flowtable_lock);
cancel_delayed_work_sync(&flow_table->gc_work);
nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL);
nf_flow_table_offload_flush(flow_table);
if (nf_flowtable_hw_offload(flow_table))
nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL);
/* ... no more pending work after this stage ... */
nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
nf_flow_table_gc_run(flow_table);
nf_flow_table_offload_flush_cleanup(flow_table);
rhashtable_destroy(&flow_table->rhashtable);
}
EXPORT_SYMBOL_GPL(nf_flow_table_free);
......
......@@ -1074,6 +1074,14 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable,
flow_offload_queue_work(offload);
}
void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable)
{
if (nf_flowtable_hw_offload(flowtable)) {
flush_workqueue(nf_flow_offload_del_wq);
nf_flow_table_gc_run(flowtable);
}
}
void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
{
if (nf_flowtable_hw_offload(flowtable)) {
......
......@@ -32,7 +32,6 @@ static LIST_HEAD(nf_tables_objects);
static LIST_HEAD(nf_tables_flowtables);
static LIST_HEAD(nf_tables_destroy_list);
static DEFINE_SPINLOCK(nf_tables_destroy_list_lock);
static u64 table_handle;
enum {
NFT_VALIDATE_SKIP = 0,
......@@ -1235,7 +1234,7 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info,
INIT_LIST_HEAD(&table->flowtables);
table->family = family;
table->flags = flags;
table->handle = ++table_handle;
table->handle = ++nft_net->table_handle;
if (table->flags & NFT_TABLE_F_OWNER)
table->nlpid = NETLINK_CB(skb).portid;
......@@ -2196,9 +2195,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
struct netlink_ext_ack *extack)
{
const struct nlattr * const *nla = ctx->nla;
struct nft_stats __percpu *stats = NULL;
struct nft_table *table = ctx->table;
struct nft_base_chain *basechain;
struct nft_stats __percpu *stats;
struct net *net = ctx->net;
char name[NFT_NAME_MAXLEN];
struct nft_rule_blob *blob;
......@@ -2236,7 +2235,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
return PTR_ERR(stats);
}
rcu_assign_pointer(basechain->stats, stats);
static_branch_inc(&nft_counters_enabled);
}
err = nft_basechain_init(basechain, family, &hook, flags);
......@@ -2319,6 +2317,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
goto err_unregister_hook;
}
if (stats)
static_branch_inc(&nft_counters_enabled);
table->use++;
return 0;
......@@ -2574,6 +2575,9 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info,
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);
if (chain != NULL) {
if (chain->flags & NFT_CHAIN_BINDING)
return -EINVAL;
if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
......@@ -9707,6 +9711,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
return PTR_ERR(chain);
if (nft_is_base_chain(chain))
return -EOPNOTSUPP;
if (nft_chain_is_bound(chain))
return -EINVAL;
if (desc->flags & NFT_DATA_DESC_SETELEM &&
chain->flags & NFT_CHAIN_BINDING)
return -EINVAL;
......
......@@ -115,9 +115,21 @@ static int nft_osf_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data)
{
return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) |
(1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_FORWARD));
unsigned int hooks;
switch (ctx->family) {
case NFPROTO_IPV4:
case NFPROTO_IPV6:
case NFPROTO_INET:
hooks = (1 << NF_INET_LOCAL_IN) |
(1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_FORWARD);
break;
default:
return -EOPNOTSUPP;
}
return nft_chain_validate_hooks(ctx->chain, hooks);
}
static bool nft_osf_reduce(struct nft_regs_track *track,
......
......@@ -740,17 +740,23 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_payload_set *priv = nft_expr_priv(expr);
u32 csum_offset, csum_type = NFT_PAYLOAD_CSUM_NONE;
int err;
priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
if (tb[NFTA_PAYLOAD_CSUM_TYPE])
priv->csum_type =
ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE]));
if (tb[NFTA_PAYLOAD_CSUM_OFFSET])
priv->csum_offset =
ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_OFFSET]));
csum_type = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE]));
if (tb[NFTA_PAYLOAD_CSUM_OFFSET]) {
err = nft_parse_u32_check(tb[NFTA_PAYLOAD_CSUM_OFFSET], U8_MAX,
&csum_offset);
if (err < 0)
return err;
priv->csum_offset = csum_offset;
}
if (tb[NFTA_PAYLOAD_CSUM_FLAGS]) {
u32 flags;
......@@ -761,7 +767,7 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
priv->csum_flags = flags;
}
switch (priv->csum_type) {
switch (csum_type) {
case NFT_PAYLOAD_CSUM_NONE:
case NFT_PAYLOAD_CSUM_INET:
break;
......@@ -775,6 +781,7 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
default:
return -EOPNOTSUPP;
}
priv->csum_type = csum_type;
return nft_parse_register_load(tb[NFTA_PAYLOAD_SREG], &priv->sreg,
priv->len);
......@@ -833,6 +840,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
{
enum nft_payload_bases base;
unsigned int offset, len;
int err;
if (tb[NFTA_PAYLOAD_BASE] == NULL ||
tb[NFTA_PAYLOAD_OFFSET] == NULL ||
......@@ -859,8 +867,13 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
if (tb[NFTA_PAYLOAD_DREG] == NULL)
return ERR_PTR(-EINVAL);
offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
err = nft_parse_u32_check(tb[NFTA_PAYLOAD_OFFSET], U8_MAX, &offset);
if (err < 0)
return ERR_PTR(err);
err = nft_parse_u32_check(tb[NFTA_PAYLOAD_LEN], U8_MAX, &len);
if (err < 0)
return ERR_PTR(err);
if (len <= 4 && is_power_of_2(len) && IS_ALIGNED(offset, len) &&
base != NFT_PAYLOAD_LL_HEADER && base != NFT_PAYLOAD_INNER_HEADER)
......
......@@ -312,6 +312,13 @@ static int nft_tproxy_dump(struct sk_buff *skb,
return 0;
}
static int nft_tproxy_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data)
{
return nft_chain_validate_hooks(ctx->chain, 1 << NF_INET_PRE_ROUTING);
}
static struct nft_expr_type nft_tproxy_type;
static const struct nft_expr_ops nft_tproxy_ops = {
.type = &nft_tproxy_type,
......@@ -321,6 +328,7 @@ static const struct nft_expr_ops nft_tproxy_ops = {
.destroy = nft_tproxy_destroy,
.dump = nft_tproxy_dump,
.reduce = NFT_REDUCE_READONLY,
.validate = nft_tproxy_validate,
};
static struct nft_expr_type nft_tproxy_type __read_mostly = {
......
......@@ -161,6 +161,7 @@ static const struct nft_expr_ops nft_tunnel_get_ops = {
static struct nft_expr_type nft_tunnel_type __read_mostly = {
.name = "tunnel",
.family = NFPROTO_NETDEV,
.ops = &nft_tunnel_get_ops,
.policy = nft_tunnel_policy,
.maxattr = NFTA_TUNNEL_MAX,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment