Commit 07cf7974 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge tag 'nf-next-23-09-28' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next

Florian Westphal says:

====================
netfilter updates for net-next

First patch, from myself, is a bug fix. The issue (connect timeout) is
ancient, so I think its safe to give this more soak time given the esoteric
conditions needed to trigger this.
Also updates the existing selftest to cover this.

Add netlink extacks when an update references a non-existent
table/chain/set.  This allows userspace to provide much better
errors to the user, from Pablo Neira Ayuso.

Last patch adds more policy checks to nf_tables as a better
alternative to the existing runtime checks, from Phil Sutter.

* tag 'nf-next-23-09-28' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next:
  netfilter: nf_tables: Utilize NLA_POLICY_NESTED_ARRAY
  netfilter: nf_tables: missing extended netlink error in lookup functions
  selftests: netfilter: test nat source port clash resolution interaction with tcp early demux
  netfilter: nf_nat: undo erroneous tcp edemux lookup after port clash
====================

Link: https://lore.kernel.org/r/20230928144916.18339-1-fw@strlen.deSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 2b464cc2 013714bf
......@@ -697,6 +697,31 @@ static int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int
}
#endif
static bool nf_nat_inet_port_was_mangled(const struct sk_buff *skb, __be16 sport)
{
enum ip_conntrack_info ctinfo;
enum ip_conntrack_dir dir;
const struct nf_conn *ct;
ct = nf_ct_get(skb, &ctinfo);
if (!ct)
return false;
switch (nf_ct_protonum(ct)) {
case IPPROTO_TCP:
case IPPROTO_UDP:
break;
default:
return false;
}
dir = CTINFO2DIR(ctinfo);
if (dir != IP_CT_DIR_ORIGINAL)
return false;
return ct->tuplehash[!dir].tuple.dst.u.all != sport;
}
static unsigned int
nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
......@@ -707,8 +732,20 @@ nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb,
ret = nf_nat_ipv4_fn(priv, skb, state);
if (ret == NF_ACCEPT && sk && saddr != ip_hdr(skb)->saddr &&
!inet_sk_transparent(sk))
if (ret != NF_ACCEPT || !sk || inet_sk_transparent(sk))
return ret;
/* skb has a socket assigned via tcp edemux. We need to check
* if nf_nat_ipv4_fn() has mangled the packet in a way that
* edemux would not have found this socket.
*
* This includes both changes to the source address and changes
* to the source port, which are both handled by the
* nf_nat_ipv4_fn() call above -- long after tcp/udp early demux
* might have found a socket for the old (pre-snat) address.
*/
if (saddr != ip_hdr(skb)->saddr ||
nf_nat_inet_port_was_mangled(skb, sk->sk_dport))
skb_orphan(skb); /* TCP edemux obtained wrong socket */
return ret;
......@@ -937,6 +974,27 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
return nf_nat_inet_fn(priv, skb, state);
}
static unsigned int
nf_nat_ipv6_local_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct in6_addr saddr = ipv6_hdr(skb)->saddr;
struct sock *sk = skb->sk;
unsigned int ret;
ret = nf_nat_ipv6_fn(priv, skb, state);
if (ret != NF_ACCEPT || !sk || inet_sk_transparent(sk))
return ret;
/* see nf_nat_ipv4_local_in */
if (ipv6_addr_cmp(&saddr, &ipv6_hdr(skb)->saddr) ||
nf_nat_inet_port_was_mangled(skb, sk->sk_dport))
skb_orphan(skb);
return ret;
}
static unsigned int
nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
......@@ -1051,7 +1109,7 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
},
/* After packet filtering, change source */
{
.hook = nf_nat_ipv6_fn,
.hook = nf_nat_ipv6_local_in,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_NAT_SRC,
......
......@@ -3316,7 +3316,7 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
[NFTA_RULE_CHAIN] = { .type = NLA_STRING,
.len = NFT_CHAIN_MAXNAMELEN - 1 },
[NFTA_RULE_HANDLE] = { .type = NLA_U64 },
[NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED },
[NFTA_RULE_EXPRESSIONS] = NLA_POLICY_NESTED_ARRAY(nft_expr_policy),
[NFTA_RULE_COMPAT] = { .type = NLA_NESTED },
[NFTA_RULE_POSITION] = { .type = NLA_U64 },
[NFTA_RULE_USERDATA] = { .type = NLA_BINARY,
......@@ -4254,12 +4254,16 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
[NFTA_SET_OBJ_TYPE] = { .type = NLA_U32 },
[NFTA_SET_HANDLE] = { .type = NLA_U64 },
[NFTA_SET_EXPR] = { .type = NLA_NESTED },
[NFTA_SET_EXPRESSIONS] = { .type = NLA_NESTED },
[NFTA_SET_EXPRESSIONS] = NLA_POLICY_NESTED_ARRAY(nft_expr_policy),
};
static const struct nla_policy nft_concat_policy[NFTA_SET_FIELD_MAX + 1] = {
[NFTA_SET_FIELD_LEN] = { .type = NLA_U32 },
};
static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
[NFTA_SET_DESC_SIZE] = { .type = NLA_U32 },
[NFTA_SET_DESC_CONCAT] = { .type = NLA_NESTED },
[NFTA_SET_DESC_CONCAT] = NLA_POLICY_NESTED_ARRAY(nft_concat_policy),
};
static struct nft_set *nft_set_lookup(const struct nft_table *table,
......@@ -4695,8 +4699,10 @@ static int nf_tables_getset(struct sk_buff *skb, const struct nfnl_info *info,
return -EINVAL;
set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask);
if (IS_ERR(set))
if (IS_ERR(set)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
return PTR_ERR(set);
}
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
if (skb2 == NULL)
......@@ -4713,10 +4719,6 @@ static int nf_tables_getset(struct sk_buff *skb, const struct nfnl_info *info,
return err;
}
static const struct nla_policy nft_concat_policy[NFTA_SET_FIELD_MAX + 1] = {
[NFTA_SET_FIELD_LEN] = { .type = NLA_U32 },
};
static int nft_set_desc_concat_parse(const struct nlattr *attr,
struct nft_set_desc *desc)
{
......@@ -5498,7 +5500,7 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = {
[NFTA_SET_ELEM_OBJREF] = { .type = NLA_STRING,
.len = NFT_OBJ_MAXNAMELEN - 1 },
[NFTA_SET_ELEM_KEY_END] = { .type = NLA_NESTED },
[NFTA_SET_ELEM_EXPRESSIONS] = { .type = NLA_NESTED },
[NFTA_SET_ELEM_EXPRESSIONS] = NLA_POLICY_NESTED_ARRAY(nft_expr_policy),
};
static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = {
......@@ -5506,7 +5508,7 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX +
.len = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING,
.len = NFT_SET_MAXNAMELEN - 1 },
[NFTA_SET_ELEM_LIST_ELEMENTS] = { .type = NLA_NESTED },
[NFTA_SET_ELEM_LIST_ELEMENTS] = NLA_POLICY_NESTED_ARRAY(nft_set_elem_policy),
[NFTA_SET_ELEM_LIST_SET_ID] = { .type = NLA_U32 },
};
......@@ -6025,8 +6027,10 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
}
set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
if (IS_ERR(set))
if (IS_ERR(set)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]);
return PTR_ERR(set);
}
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
......@@ -6919,8 +6923,10 @@ static int nf_tables_newsetelem(struct sk_buff *skb,
set = nft_set_lookup_global(net, table, nla[NFTA_SET_ELEM_LIST_SET],
nla[NFTA_SET_ELEM_LIST_SET_ID], genmask);
if (IS_ERR(set))
if (IS_ERR(set)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]);
return PTR_ERR(set);
}
if (!list_empty(&set->bindings) &&
(set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS)))
......@@ -7195,8 +7201,10 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
}
set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
if (IS_ERR(set))
if (IS_ERR(set)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]);
return PTR_ERR(set);
}
if (nft_set_is_anonymous(set))
return -EOPNOTSUPP;
......@@ -8680,6 +8688,7 @@ static int nf_tables_getflowtable(struct sk_buff *skb,
const struct nfnl_info *info,
const struct nlattr * const nla[])
{
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_cur(info->net);
u8 family = info->nfmsg->nfgen_family;
struct nft_flowtable *flowtable;
......@@ -8705,13 +8714,17 @@ static int nf_tables_getflowtable(struct sk_buff *skb,
table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family,
genmask, 0);
if (IS_ERR(table))
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_TABLE]);
return PTR_ERR(table);
}
flowtable = nft_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
genmask);
if (IS_ERR(flowtable))
if (IS_ERR(flowtable)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]);
return PTR_ERR(flowtable);
}
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
if (!skb2)
......
......@@ -11,16 +11,18 @@ ret=0
sfx=$(mktemp -u "XXXXXXXX")
ns1="ns1-$sfx"
ns2="ns2-$sfx"
socatpid=0
cleanup()
{
[ $socatpid -gt 0 ] && kill $socatpid
ip netns del $ns1
ip netns del $ns2
}
iperf3 -v > /dev/null 2>&1
socat -h > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without iperf3"
echo "SKIP: Could not run test without socat"
exit $ksft_skip
fi
......@@ -60,8 +62,8 @@ ip netns exec $ns2 ip link set up dev veth2
ip netns exec $ns2 ip addr add 192.168.1.2/24 dev veth2
# Create a server in one namespace
ip netns exec $ns1 iperf3 -s > /dev/null 2>&1 &
iperfs=$!
ip netns exec $ns1 socat -u TCP-LISTEN:5201,fork OPEN:/dev/null,wronly=1 &
socatpid=$!
# Restrict source port to just one so we don't have to exhaust
# all others.
......@@ -83,17 +85,43 @@ sleep 1
# ip daddr:dport will be rewritten to 192.168.1.1 5201
# NAT must reallocate source port 10000 because
# 192.168.1.2:10000 -> 192.168.1.1:5201 is already in use
echo test | ip netns exec $ns2 socat -t 3 -u STDIN TCP:10.96.0.1:443 >/dev/null
echo test | ip netns exec $ns2 socat -t 3 -u STDIN TCP:10.96.0.1:443,connect-timeout=3 >/dev/null
ret=$?
kill $iperfs
# Check socat can connect to 10.96.0.1:443 (aka 192.168.1.1:5201).
if [ $ret -eq 0 ]; then
echo "PASS: socat can connect via NAT'd address"
else
echo "FAIL: socat cannot connect via NAT'd address"
exit 1
fi
exit 0
# check sport clashres.
ip netns exec $ns1 iptables -t nat -A PREROUTING -p tcp --dport 5202 -j REDIRECT --to-ports 5201
ip netns exec $ns1 iptables -t nat -A PREROUTING -p tcp --dport 5203 -j REDIRECT --to-ports 5201
sleep 5 | ip netns exec $ns2 socat -t 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null &
cpid1=$!
sleep 1
# if connect succeeds, client closes instantly due to EOF on stdin.
# if connect hangs, it will time out after 5s.
echo | ip netns exec $ns2 socat -t 3 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null &
cpid2=$!
time_then=$(date +%s)
wait $cpid2
rv=$?
time_now=$(date +%s)
# Check how much time has elapsed, expectation is for
# 'cpid2' to connect and then exit (and no connect delay).
delta=$((time_now - time_then))
if [ $delta -lt 2 -a $rv -eq 0 ]; then
echo "PASS: could connect to service via redirected ports"
else
echo "FAIL: socat cannot connect to service via redirect ($delta seconds elapsed, returned $rv)"
ret=1
fi
exit $ret
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment