Commit 1ecaf17d authored by Jakub Kicinski's avatar Jakub Kicinski

Merge tag 'nf-next-2023-05-18' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next

Florian Westphal says:

====================
Netfilter updates for net-next

nftables updates:

1. Allow key existence checks with maps.
   At the moment the kernel requires userspace to pass a destination
   register for the associated value, make this optional so userspace
   can query if the key exists, just like with normal sets.

2. nftables maintains a counter per set that holds the number of
   elements.  This counter gets decremented on element removal,
   but its only incremented if the set has a upper maximum value.
   Increment unconditionally, this will allow us to update the
   maximum value later on.

3. At DCCP option maching, from Jeremy Sowden.

4. use struct_size macro, from Christophe JAILLET.

Conntrack:

5. Squash holes in struct nf_conntrack_expect, also Christophe JAILLET.

6. Allow clash resolution for GRE Protocol to avoid a packet drop,
   from Faicker Mo.

Flowtable:

Simplify route logic and split large functions into smaller
chunks, from Pablo Neira Ayuso.

* tag 'nf-next-2023-05-18' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next:
  netfilter: flowtable: split IPv6 datapath in helper functions
  netfilter: flowtable: split IPv4 datapath in helper functions
  netfilter: flowtable: simplify route logic
  netfilter: conntrack: allow insertion clash of gre protocol
  netfilter: nft_set_pipapo: Use struct_size()
  netfilter: Reorder fields in 'struct nf_conntrack_expect'
  netfilter: nft_exthdr: add boolean DCCP option matching
  netfilter: nf_tables: always increment set element count
  netfilter: nf_tables: relax set/map validation checks
====================

Link: https://lore.kernel.org/r/20230518100759.84858-1-fw@strlen.deSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 02f8fc1a e05b5362
......@@ -26,6 +26,15 @@ struct nf_conntrack_expect {
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple_mask mask;
/* Usage count. */
refcount_t use;
/* Flags */
unsigned int flags;
/* Expectation class */
unsigned int class;
/* Function to call after setup and insertion */
void (*expectfn)(struct nf_conn *new,
struct nf_conntrack_expect *this);
......@@ -39,15 +48,6 @@ struct nf_conntrack_expect {
/* Timer function; deletes the expectation. */
struct timer_list timeout;
/* Usage count. */
refcount_t use;
/* Flags */
unsigned int flags;
/* Expectation class */
unsigned int class;
#if IS_ENABLED(CONFIG_NF_NAT)
union nf_inet_addr saved_addr;
/* This is the original per-proto part, used to map the
......
......@@ -263,8 +263,8 @@ nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
up_write(&flow_table->flow_block_lock);
}
int flow_offload_route_init(struct flow_offload *flow,
const struct nf_flow_route *route);
void flow_offload_route_init(struct flow_offload *flow,
const struct nf_flow_route *route);
int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
void flow_offload_refresh(struct nf_flowtable *flow_table,
......
......@@ -859,12 +859,14 @@ enum nft_exthdr_flags {
* @NFT_EXTHDR_OP_TCP: match against tcp options
* @NFT_EXTHDR_OP_IPV4: match against ipv4 options
* @NFT_EXTHDR_OP_SCTP: match against sctp chunks
* @NFT_EXTHDR_OP_DCCP: match against dccp otions
*/
enum nft_exthdr_op {
NFT_EXTHDR_OP_IPV6,
NFT_EXTHDR_OP_TCPOPT,
NFT_EXTHDR_OP_IPV4,
NFT_EXTHDR_OP_SCTP,
NFT_EXTHDR_OP_DCCP,
__NFT_EXTHDR_OP_MAX
};
#define NFT_EXTHDR_OP_MAX (__NFT_EXTHDR_OP_MAX - 1)
......
......@@ -296,6 +296,7 @@ void nf_conntrack_gre_init_net(struct net *net)
/* protocol helper struct */
const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre = {
.l4proto = IPPROTO_GRE,
.allow_clash = true,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = gre_print_conntrack,
#endif
......
......@@ -125,9 +125,6 @@ static int flow_offload_fill_route(struct flow_offload *flow,
break;
case FLOW_OFFLOAD_XMIT_XFRM:
case FLOW_OFFLOAD_XMIT_NEIGH:
if (!dst_hold_safe(route->tuple[dir].dst))
return -1;
flow_tuple->dst_cache = dst;
flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
break;
......@@ -148,27 +145,12 @@ static void nft_flow_dst_release(struct flow_offload *flow,
dst_release(flow->tuplehash[dir].tuple.dst_cache);
}
int flow_offload_route_init(struct flow_offload *flow,
void flow_offload_route_init(struct flow_offload *flow,
const struct nf_flow_route *route)
{
int err;
err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
if (err < 0)
return err;
err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
if (err < 0)
goto err_route_reply;
flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
flow->type = NF_FLOW_OFFLOAD_ROUTE;
return 0;
err_route_reply:
nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
return err;
}
EXPORT_SYMBOL_GPL(flow_offload_route_init);
......
This diff is collapsed.
......@@ -6541,10 +6541,13 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
goto err_element_clash;
}
if (!(flags & NFT_SET_ELEM_CATCHALL) && set->size &&
!atomic_add_unless(&set->nelems, 1, set->size + set->ndeact)) {
err = -ENFILE;
goto err_set_full;
if (!(flags & NFT_SET_ELEM_CATCHALL)) {
unsigned int max = set->size ? set->size + set->ndeact : UINT_MAX;
if (!atomic_add_unless(&set->nelems, 1, max)) {
err = -ENFILE;
goto err_set_full;
}
}
nft_trans_elem(trans) = elem;
......
......@@ -10,6 +10,7 @@
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <linux/dccp.h>
#include <linux/sctp.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
......@@ -406,6 +407,82 @@ static void nft_exthdr_sctp_eval(const struct nft_expr *expr,
regs->verdict.code = NFT_BREAK;
}
static void nft_exthdr_dccp_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_exthdr *priv = nft_expr_priv(expr);
unsigned int thoff, dataoff, optoff, optlen, i;
u32 *dest = &regs->data[priv->dreg];
const struct dccp_hdr *dh;
struct dccp_hdr _dh;
if (pkt->tprot != IPPROTO_DCCP || pkt->fragoff)
goto err;
thoff = nft_thoff(pkt);
dh = skb_header_pointer(pkt->skb, thoff, sizeof(_dh), &_dh);
if (!dh)
goto err;
dataoff = dh->dccph_doff * sizeof(u32);
optoff = __dccp_hdr_len(dh);
if (dataoff <= optoff)
goto err;
optlen = dataoff - optoff;
for (i = 0; i < optlen; ) {
/* Options 0 (DCCPO_PADDING) - 31 (DCCPO_MAX_RESERVED) are 1B in
* the length; the remaining options are at least 2B long. In
* all cases, the first byte contains the option type. In
* multi-byte options, the second byte contains the option
* length, which must be at least two: 1 for the type plus 1 for
* the length plus 0-253 for any following option data. We
* aren't interested in the option data, only the type and the
* length, so we don't need to read more than two bytes at a
* time.
*/
unsigned int buflen = optlen - i;
u8 buf[2], *bufp;
u8 type, len;
if (buflen > sizeof(buf))
buflen = sizeof(buf);
bufp = skb_header_pointer(pkt->skb, thoff + optoff + i, buflen,
&buf);
if (!bufp)
goto err;
type = bufp[0];
if (type == priv->type) {
*dest = 1;
return;
}
if (type <= DCCPO_MAX_RESERVED) {
i++;
continue;
}
if (buflen < 2)
goto err;
len = bufp[1];
if (len < 2)
goto err;
i += len;
}
err:
*dest = 0;
}
static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
[NFTA_EXTHDR_DREG] = { .type = NLA_U32 },
[NFTA_EXTHDR_TYPE] = { .type = NLA_U8 },
......@@ -557,6 +634,22 @@ static int nft_exthdr_ipv4_init(const struct nft_ctx *ctx,
return 0;
}
static int nft_exthdr_dccp_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_exthdr *priv = nft_expr_priv(expr);
int err = nft_exthdr_init(ctx, expr, tb);
if (err < 0)
return err;
if (!(priv->flags & NFT_EXTHDR_F_PRESENT))
return -EOPNOTSUPP;
return 0;
}
static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv)
{
if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type))
......@@ -686,6 +779,15 @@ static const struct nft_expr_ops nft_exthdr_sctp_ops = {
.reduce = nft_exthdr_reduce,
};
static const struct nft_expr_ops nft_exthdr_dccp_ops = {
.type = &nft_exthdr_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
.eval = nft_exthdr_dccp_eval,
.init = nft_exthdr_dccp_init,
.dump = nft_exthdr_dump,
.reduce = nft_exthdr_reduce,
};
static const struct nft_expr_ops *
nft_exthdr_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
......@@ -720,6 +822,10 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx,
if (tb[NFTA_EXTHDR_DREG])
return &nft_exthdr_sctp_ops;
break;
case NFT_EXTHDR_OP_DCCP:
if (tb[NFTA_EXTHDR_DREG])
return &nft_exthdr_dccp_ops;
break;
}
return ERR_PTR(-EOPNOTSUPP);
......
......@@ -250,9 +250,14 @@ static int nft_flow_route(const struct nft_pktinfo *pkt,
break;
}
if (!dst_hold_safe(this_dst))
return -ENOENT;
nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt));
if (!other_dst)
if (!other_dst) {
dst_release(this_dst);
return -ENOENT;
}
nft_default_forward_path(route, this_dst, dir);
nft_default_forward_path(route, other_dst, !dir);
......@@ -349,8 +354,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
if (!flow)
goto err_flow_alloc;
if (flow_offload_route_init(flow, &route) < 0)
goto err_flow_add;
flow_offload_route_init(flow, &route);
if (tcph) {
ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
......@@ -361,12 +365,12 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
if (ret < 0)
goto err_flow_add;
dst_release(route.tuple[!dir].dst);
return;
err_flow_add:
flow_offload_free(flow);
err_flow_alloc:
dst_release(route.tuple[dir].dst);
dst_release(route.tuple[!dir].dst);
err_flow_route:
clear_bit(IPS_OFFLOAD_BIT, &ct->status);
......
......@@ -19,6 +19,7 @@ struct nft_lookup {
struct nft_set *set;
u8 sreg;
u8 dreg;
bool dreg_set;
bool invert;
struct nft_set_binding binding;
};
......@@ -75,7 +76,7 @@ void nft_lookup_eval(const struct nft_expr *expr,
}
if (ext) {
if (set->flags & NFT_SET_MAP)
if (priv->dreg_set)
nft_data_copy(&regs->data[priv->dreg],
nft_set_ext_data(ext), set->dlen);
......@@ -122,11 +123,8 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
if (flags & ~NFT_LOOKUP_F_INV)
return -EINVAL;
if (flags & NFT_LOOKUP_F_INV) {
if (set->flags & NFT_SET_MAP)
return -EINVAL;
if (flags & NFT_LOOKUP_F_INV)
priv->invert = true;
}
}
if (tb[NFTA_LOOKUP_DREG] != NULL) {
......@@ -140,8 +138,17 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
set->dlen);
if (err < 0)
return err;
} else if (set->flags & NFT_SET_MAP)
return -EINVAL;
priv->dreg_set = true;
} else if (set->flags & NFT_SET_MAP) {
/* Map given, but user asks for lookup only (i.e. to
* ignore value assoicated with key).
*
* This makes no sense for anonymous maps since they are
* scoped to the rule, but for named sets this can be useful.
*/
if (set->flags & NFT_SET_ANONYMOUS)
return -EINVAL;
}
priv->binding.flags = set->flags & NFT_SET_MAP;
......@@ -188,7 +195,7 @@ static int nft_lookup_dump(struct sk_buff *skb,
goto nla_put_failure;
if (nft_dump_register(skb, NFTA_LOOKUP_SREG, priv->sreg))
goto nla_put_failure;
if (priv->set->flags & NFT_SET_MAP)
if (priv->dreg_set)
if (nft_dump_register(skb, NFTA_LOOKUP_DREG, priv->dreg))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_LOOKUP_FLAGS, htonl(flags)))
......
......@@ -1274,8 +1274,7 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
struct nft_pipapo_match *new;
int i;
new = kmalloc(sizeof(*new) + sizeof(*dst) * old->field_count,
GFP_KERNEL);
new = kmalloc(struct_size(new, f, old->field_count), GFP_KERNEL);
if (!new)
return ERR_PTR(-ENOMEM);
......@@ -2059,8 +2058,7 @@ static int nft_pipapo_init(const struct nft_set *set,
if (field_count > NFT_PIPAPO_MAX_FIELDS)
return -EINVAL;
m = kmalloc(sizeof(*priv->match) + sizeof(*f) * field_count,
GFP_KERNEL);
m = kmalloc(struct_size(m, f, field_count), GFP_KERNEL);
if (!m)
return -ENOMEM;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment