Commit 9c5af2d7 authored by Paolo Abeni's avatar Paolo Abeni

Merge tag 'nf-24-08-15' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

The following patchset contains Netfilter fixes for net:

1) Ignores ifindex for types other than mcast/linklocal in ipv6 frag
   reasm, from Tom Hughes.

2) Initialize extack for begin/end netlink message marker in batch,
   from Donald Hunter.

3) Initialize extack for flowtable offload support, also from Donald.

4) Dropped packets with cloned unconfirmed conntracks in nfqueue,
   later it should be possible to explore lookup after reinject but
   Florian prefers this approach at this stage. From Florian Westphal.

5) Add selftest for cloned unconfirmed conntracks in nfqueue for
   previous update.

6) Audit after filling netlink header successfully in object dump,
   from Phil Sutter.

7-8) Fix concurrent dump and reset which could result in underflow
     counter / quota objects.

netfilter pull request 24-08-15

* tag 'nf-24-08-15' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
  netfilter: nf_tables: Add locking for NFT_MSG_GETOBJ_RESET requests
  netfilter: nf_tables: Introduce nf_tables_getobj_single
  netfilter: nf_tables: Audit log dump reset after the fact
  selftests: netfilter: add test for br_netfilter+conntrack+queue combination
  netfilter: nf_queue: drop packets with cloned unconfirmed conntracks
  netfilter: flowtable: initialise extack before use
  netfilter: nfnetlink: Initialise extack before use in ACKs
  netfilter: allow ipv6 fragments to arrive on different devices
====================

Link: https://patch.msgid.link/20240814222042.150590-1-pablo@netfilter.orgSigned-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents 34dfdf21 bd662c42
......@@ -622,8 +622,12 @@ static unsigned int br_nf_local_in(void *priv,
if (likely(nf_ct_is_confirmed(ct)))
return NF_ACCEPT;
if (WARN_ON_ONCE(refcount_read(&nfct->use) != 1)) {
nf_reset_ct(skb);
return NF_ACCEPT;
}
WARN_ON_ONCE(skb_shared(skb));
WARN_ON_ONCE(refcount_read(&nfct->use) != 1);
/* We can't call nf_confirm here, it would create a dependency
* on nf_conntrack module.
......
......@@ -154,6 +154,10 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
};
struct inet_frag_queue *q;
if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST |
IPV6_ADDR_LINKLOCAL)))
key.iif = 0;
q = inet_frag_find(nf_frag->fqdir, &key);
if (!q)
return NULL;
......
......@@ -841,8 +841,8 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
struct list_head *block_cb_list)
{
struct flow_cls_offload cls_flow = {};
struct netlink_ext_ack extack = {};
struct flow_block_cb *block_cb;
struct netlink_ext_ack extack;
__be16 proto = ETH_P_ALL;
int err, i = 0;
......
......@@ -8020,6 +8020,19 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
static int nf_tables_dumpreset_obj(struct sk_buff *skb,
struct netlink_callback *cb)
{
struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk));
int ret;
mutex_lock(&nft_net->commit_mutex);
ret = nf_tables_dump_obj(skb, cb);
mutex_unlock(&nft_net->commit_mutex);
return ret;
}
static int nf_tables_dump_obj_start(struct netlink_callback *cb)
{
struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
......@@ -8036,10 +8049,16 @@ static int nf_tables_dump_obj_start(struct netlink_callback *cb)
if (nla[NFTA_OBJ_TYPE])
ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
return 0;
}
static int nf_tables_dumpreset_obj_start(struct netlink_callback *cb)
{
struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
ctx->reset = true;
return 0;
return nf_tables_dump_obj_start(cb);
}
static int nf_tables_dump_obj_done(struct netlink_callback *cb)
......@@ -8052,8 +8071,9 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb)
}
/* called with rcu_read_lock held */
static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
static struct sk_buff *
nf_tables_getobj_single(u32 portid, const struct nfnl_info *info,
const struct nlattr * const nla[], bool reset)
{
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_cur(info->net);
......@@ -8062,72 +8082,109 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
struct net *net = info->net;
struct nft_object *obj;
struct sk_buff *skb2;
bool reset = false;
u32 objtype;
int err;
if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.start = nf_tables_dump_obj_start,
.dump = nf_tables_dump_obj,
.done = nf_tables_dump_obj_done,
.module = THIS_MODULE,
.data = (void *)nla,
};
return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
}
if (!nla[NFTA_OBJ_NAME] ||
!nla[NFTA_OBJ_TYPE])
return -EINVAL;
return ERR_PTR(-EINVAL);
table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask, 0);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]);
return PTR_ERR(table);
return ERR_CAST(table);
}
objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask);
if (IS_ERR(obj)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
return PTR_ERR(obj);
return ERR_CAST(obj);
}
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
if (!skb2)
return -ENOMEM;
return ERR_PTR(-ENOMEM);
if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
reset = true;
err = nf_tables_fill_obj_info(skb2, net, portid,
info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0,
family, table, obj, reset);
if (err < 0) {
kfree_skb(skb2);
return ERR_PTR(err);
}
return skb2;
}
static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
u32 portid = NETLINK_CB(skb).portid;
struct sk_buff *skb2;
if (reset) {
const struct nftables_pernet *nft_net;
if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.start = nf_tables_dump_obj_start,
.dump = nf_tables_dump_obj,
.done = nf_tables_dump_obj_done,
.module = THIS_MODULE,
.data = (void *)nla,
};
return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
}
skb2 = nf_tables_getobj_single(portid, info, nla, false);
if (IS_ERR(skb2))
return PTR_ERR(skb2);
return nfnetlink_unicast(skb2, info->net, portid);
}
static int nf_tables_getobj_reset(struct sk_buff *skb,
const struct nfnl_info *info,
const struct nlattr * const nla[])
{
struct nftables_pernet *nft_net = nft_pernet(info->net);
u32 portid = NETLINK_CB(skb).portid;
struct net *net = info->net;
struct sk_buff *skb2;
char *buf;
nft_net = nft_pernet(net);
buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, nft_net->base_seq);
if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.start = nf_tables_dumpreset_obj_start,
.dump = nf_tables_dumpreset_obj,
.done = nf_tables_dump_obj_done,
.module = THIS_MODULE,
.data = (void *)nla,
};
audit_log_nfcfg(buf,
family,
1,
AUDIT_NFT_OP_OBJ_RESET,
GFP_ATOMIC);
kfree(buf);
return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
}
err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid,
info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0,
family, table, obj, reset);
if (err < 0)
goto err_fill_obj_info;
if (!try_module_get(THIS_MODULE))
return -EINVAL;
rcu_read_unlock();
mutex_lock(&nft_net->commit_mutex);
skb2 = nf_tables_getobj_single(portid, info, nla, true);
mutex_unlock(&nft_net->commit_mutex);
rcu_read_lock();
module_put(THIS_MODULE);
return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
if (IS_ERR(skb2))
return PTR_ERR(skb2);
err_fill_obj_info:
kfree_skb(skb2);
return err;
buf = kasprintf(GFP_ATOMIC, "%.*s:%u",
nla_len(nla[NFTA_OBJ_TABLE]),
(char *)nla_data(nla[NFTA_OBJ_TABLE]),
nft_net->base_seq);
audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1,
AUDIT_NFT_OP_OBJ_RESET, GFP_ATOMIC);
kfree(buf);
return nfnetlink_unicast(skb2, net, portid);
}
static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj)
......@@ -9410,7 +9467,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_obj_policy,
},
[NFT_MSG_GETOBJ_RESET] = {
.call = nf_tables_getobj,
.call = nf_tables_getobj_reset,
.type = NFNL_CB_RCU,
.attr_count = NFTA_OBJ_MAX,
.policy = nft_obj_policy,
......
......@@ -427,8 +427,10 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
nfnl_unlock(subsys_id);
if (nlh->nlmsg_flags & NLM_F_ACK)
if (nlh->nlmsg_flags & NLM_F_ACK) {
memset(&extack, 0, sizeof(extack));
nfnl_err_add(&err_list, nlh, 0, &extack);
}
while (skb->len >= nlmsg_total_size(0)) {
int msglen, type;
......@@ -577,6 +579,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
ss->abort(net, oskb, NFNL_ABORT_NONE);
netlink_ack(oskb, nlmsg_hdr(oskb), err, NULL);
} else if (nlh->nlmsg_flags & NLM_F_ACK) {
memset(&extack, 0, sizeof(extack));
nfnl_err_add(&err_list, nlh, 0, &extack);
}
} else {
......
......@@ -820,9 +820,40 @@ static bool nf_ct_drop_unconfirmed(const struct nf_queue_entry *entry)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
static const unsigned long flags = IPS_CONFIRMED | IPS_DYING;
const struct nf_conn *ct = (void *)skb_nfct(entry->skb);
struct nf_conn *ct = (void *)skb_nfct(entry->skb);
unsigned long status;
unsigned int use;
if (ct && ((ct->status & flags) == IPS_DYING))
if (!ct)
return false;
status = READ_ONCE(ct->status);
if ((status & flags) == IPS_DYING)
return true;
if (status & IPS_CONFIRMED)
return false;
/* in some cases skb_clone() can occur after initial conntrack
* pickup, but conntrack assumes exclusive skb->_nfct ownership for
* unconfirmed entries.
*
* This happens for br_netfilter and with ip multicast routing.
* We can't be solved with serialization here because one clone could
* have been queued for local delivery.
*/
use = refcount_read(&ct->ct_general.use);
if (likely(use == 1))
return false;
/* Can't decrement further? Exclusive ownership. */
if (!refcount_dec_not_one(&ct->ct_general.use))
return false;
skb_set_nfct(entry->skb, 0);
/* No nf_ct_put(): we already decremented .use and it cannot
* drop down to 0.
*/
return true;
#endif
return false;
......
......@@ -7,6 +7,7 @@ MNL_CFLAGS := $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null)
MNL_LDLIBS := $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl)
TEST_PROGS := br_netfilter.sh bridge_brouter.sh
TEST_PROGS += br_netfilter_queue.sh
TEST_PROGS += conntrack_icmp_related.sh
TEST_PROGS += conntrack_ipip_mtu.sh
TEST_PROGS += conntrack_tcp_unreplied.sh
......
#!/bin/bash
source lib.sh
checktool "nft --version" "run test without nft tool"
cleanup() {
cleanup_all_ns
}
setup_ns c1 c2 c3 sender
trap cleanup EXIT
nf_queue_wait()
{
grep -q "^ *$1 " "/proc/self/net/netfilter/nfnetlink_queue"
}
port_add() {
ns="$1"
dev="$2"
a="$3"
ip link add name "$dev" type veth peer name "$dev" netns "$ns"
ip -net "$ns" addr add 192.168.1."$a"/24 dev "$dev"
ip -net "$ns" link set "$dev" up
ip link set "$dev" master br0
ip link set "$dev" up
}
[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; }
ip link add br0 type bridge
ip addr add 192.168.1.254/24 dev br0
port_add "$c1" "c1" 1
port_add "$c2" "c2" 2
port_add "$c3" "c3" 3
port_add "$sender" "sender" 253
ip link set br0 up
modprobe -q br_netfilter
sysctl net.bridge.bridge-nf-call-iptables=1 || exit 1
ip netns exec "$sender" ping -I sender -c1 192.168.1.1 || exit 1
ip netns exec "$sender" ping -I sender -c1 192.168.1.2 || exit 2
ip netns exec "$sender" ping -I sender -c1 192.168.1.3 || exit 3
nft -f /dev/stdin <<EOF
table ip filter {
chain forward {
type filter hook forward priority 0; policy accept;
ct state new counter
ip protocol icmp counter queue num 0 bypass
}
}
EOF
./nf_queue -t 5 > /dev/null &
busywait 5000 nf_queue_wait
for i in $(seq 1 5); do conntrack -F > /dev/null 2> /dev/null; sleep 0.1 ; done &
ip netns exec "$sender" ping -I sender -f -c 50 -b 192.168.1.255
read t < /proc/sys/kernel/tainted
if [ "$t" -eq 0 ];then
echo PASS: kernel not tainted
else
echo ERROR: kernel is tainted
exit 1
fi
exit 0
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment