Commit c56e67f3 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge tag 'nf-23-10-04' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf

Florian Westphal says:

====================
netfilter patches for net

First patch resolves a regression with vlan header matching, this was
broken since 6.5 release.  From myself.

Second patch fixes an ancient problem with sctp connection tracking in
case INIT_ACK packets are delayed.  This comes with a selftest, both
patches from Xin Long.

Patch 4 extends the existing nftables audit selftest, from
Phil Sutter.

Patch 5, also from Phil, avoids a situation where nftables
would emit an audit record twice. This was broken since 5.13 days.

Patch 6, from myself, avoids spurious insertion failure if we encounter an
overlapping but expired range during element insertion with the
'nft_set_rbtree' backend. This problem exists since 6.2.

* tag 'nf-23-10-04' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
  netfilter: nf_tables: nft_set_rbtree: fix spurious insertion failure
  netfilter: nf_tables: Deduplicate nft_register_obj audit logs
  selftests: netfilter: Extend nft_audit.sh
  selftests: netfilter: test for sctp collision processing in nf_conntrack
  netfilter: handle the connecting collision properly in nf_conntrack_proto_sctp
  netfilter: nft_payload: rebuild vlan header on h_proto access
====================

Link: https://lore.kernel.org/r/20231004141405.28749-1-fw@strlen.deSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 513dbc10 08738827
...@@ -9,6 +9,7 @@ struct ip_ct_sctp { ...@@ -9,6 +9,7 @@ struct ip_ct_sctp {
enum sctp_conntrack state; enum sctp_conntrack state;
__be32 vtag[IP_CT_DIR_MAX]; __be32 vtag[IP_CT_DIR_MAX];
u8 init[IP_CT_DIR_MAX];
u8 last_dir; u8 last_dir;
u8 flags; u8 flags;
}; };
......
...@@ -112,7 +112,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { ...@@ -112,7 +112,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA}, /* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA},
/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't have Stale cookie*/ /* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't have Stale cookie*/
/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL},/* 5.2.4 - Big TODO */ /* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL},/* 5.2.4 - Big TODO */
/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */ /* cookie_ack */ {sCL, sCL, sCW, sES, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */
/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL}, /* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL},
/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, /* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, /* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
...@@ -126,7 +126,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { ...@@ -126,7 +126,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV}, /* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV},
/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV}, /* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV},
/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV}, /* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV},
/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */ /* cookie_echo */ {sIV, sCL, sCE, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */
/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV}, /* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV},
/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV}, /* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV},
/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, /* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
...@@ -412,6 +412,9 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, ...@@ -412,6 +412,9 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
/* (D) vtag must be same as init_vtag as found in INIT_ACK */ /* (D) vtag must be same as init_vtag as found in INIT_ACK */
if (sh->vtag != ct->proto.sctp.vtag[dir]) if (sh->vtag != ct->proto.sctp.vtag[dir])
goto out_unlock; goto out_unlock;
} else if (sch->type == SCTP_CID_COOKIE_ACK) {
ct->proto.sctp.init[dir] = 0;
ct->proto.sctp.init[!dir] = 0;
} else if (sch->type == SCTP_CID_HEARTBEAT) { } else if (sch->type == SCTP_CID_HEARTBEAT) {
if (ct->proto.sctp.vtag[dir] == 0) { if (ct->proto.sctp.vtag[dir] == 0) {
pr_debug("Setting %d vtag %x for dir %d\n", sch->type, sh->vtag, dir); pr_debug("Setting %d vtag %x for dir %d\n", sch->type, sh->vtag, dir);
...@@ -461,16 +464,18 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, ...@@ -461,16 +464,18 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
} }
/* If it is an INIT or an INIT ACK note down the vtag */ /* If it is an INIT or an INIT ACK note down the vtag */
if (sch->type == SCTP_CID_INIT || if (sch->type == SCTP_CID_INIT) {
sch->type == SCTP_CID_INIT_ACK) { struct sctp_inithdr _ih, *ih;
struct sctp_inithdr _inithdr, *ih;
ih = skb_header_pointer(skb, offset + sizeof(_sch), ih = skb_header_pointer(skb, offset + sizeof(_sch), sizeof(*ih), &_ih);
sizeof(_inithdr), &_inithdr); if (!ih)
if (ih == NULL)
goto out_unlock; goto out_unlock;
pr_debug("Setting vtag %x for dir %d\n",
ih->init_tag, !dir); if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir])
ct->proto.sctp.init[!dir] = 0;
ct->proto.sctp.init[dir] = 1;
pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir);
ct->proto.sctp.vtag[!dir] = ih->init_tag; ct->proto.sctp.vtag[!dir] = ih->init_tag;
/* don't renew timeout on init retransmit so /* don't renew timeout on init retransmit so
...@@ -481,6 +486,24 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, ...@@ -481,6 +486,24 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
old_state == SCTP_CONNTRACK_CLOSED && old_state == SCTP_CONNTRACK_CLOSED &&
nf_ct_is_confirmed(ct)) nf_ct_is_confirmed(ct))
ignore = true; ignore = true;
} else if (sch->type == SCTP_CID_INIT_ACK) {
struct sctp_inithdr _ih, *ih;
__be32 vtag;
ih = skb_header_pointer(skb, offset + sizeof(_sch), sizeof(*ih), &_ih);
if (!ih)
goto out_unlock;
vtag = ct->proto.sctp.vtag[!dir];
if (!ct->proto.sctp.init[!dir] && vtag && vtag != ih->init_tag)
goto out_unlock;
/* collision */
if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir] &&
vtag != ih->init_tag)
goto out_unlock;
pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir);
ct->proto.sctp.vtag[!dir] = ih->init_tag;
} }
ct->proto.sctp.state = new_state; ct->proto.sctp.state = new_state;
......
...@@ -7871,24 +7871,14 @@ static int nf_tables_delobj(struct sk_buff *skb, const struct nfnl_info *info, ...@@ -7871,24 +7871,14 @@ static int nf_tables_delobj(struct sk_buff *skb, const struct nfnl_info *info,
return nft_delobj(&ctx, obj); return nft_delobj(&ctx, obj);
} }
void nft_obj_notify(struct net *net, const struct nft_table *table, static void
struct nft_object *obj, u32 portid, u32 seq, int event, __nft_obj_notify(struct net *net, const struct nft_table *table,
u16 flags, int family, int report, gfp_t gfp) struct nft_object *obj, u32 portid, u32 seq, int event,
u16 flags, int family, int report, gfp_t gfp)
{ {
struct nftables_pernet *nft_net = nft_pernet(net); struct nftables_pernet *nft_net = nft_pernet(net);
struct sk_buff *skb; struct sk_buff *skb;
int err; int err;
char *buf = kasprintf(gfp, "%s:%u",
table->name, nft_net->base_seq);
audit_log_nfcfg(buf,
family,
obj->handle,
event == NFT_MSG_NEWOBJ ?
AUDIT_NFT_OP_OBJ_REGISTER :
AUDIT_NFT_OP_OBJ_UNREGISTER,
gfp);
kfree(buf);
if (!report && if (!report &&
!nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
...@@ -7911,13 +7901,35 @@ void nft_obj_notify(struct net *net, const struct nft_table *table, ...@@ -7911,13 +7901,35 @@ void nft_obj_notify(struct net *net, const struct nft_table *table,
err: err:
nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS); nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
} }
void nft_obj_notify(struct net *net, const struct nft_table *table,
struct nft_object *obj, u32 portid, u32 seq, int event,
u16 flags, int family, int report, gfp_t gfp)
{
struct nftables_pernet *nft_net = nft_pernet(net);
char *buf = kasprintf(gfp, "%s:%u",
table->name, nft_net->base_seq);
audit_log_nfcfg(buf,
family,
obj->handle,
event == NFT_MSG_NEWOBJ ?
AUDIT_NFT_OP_OBJ_REGISTER :
AUDIT_NFT_OP_OBJ_UNREGISTER,
gfp);
kfree(buf);
__nft_obj_notify(net, table, obj, portid, seq, event,
flags, family, report, gfp);
}
EXPORT_SYMBOL_GPL(nft_obj_notify); EXPORT_SYMBOL_GPL(nft_obj_notify);
static void nf_tables_obj_notify(const struct nft_ctx *ctx, static void nf_tables_obj_notify(const struct nft_ctx *ctx,
struct nft_object *obj, int event) struct nft_object *obj, int event)
{ {
nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, ctx->seq, event, __nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid,
ctx->flags, ctx->family, ctx->report, GFP_KERNEL); ctx->seq, event, ctx->flags, ctx->family,
ctx->report, GFP_KERNEL);
} }
/* /*
......
...@@ -154,6 +154,17 @@ int nft_payload_inner_offset(const struct nft_pktinfo *pkt) ...@@ -154,6 +154,17 @@ int nft_payload_inner_offset(const struct nft_pktinfo *pkt)
return pkt->inneroff; return pkt->inneroff;
} }
static bool nft_payload_need_vlan_copy(const struct nft_payload *priv)
{
unsigned int len = priv->offset + priv->len;
/* data past ether src/dst requested, copy needed */
if (len > offsetof(struct ethhdr, h_proto))
return true;
return false;
}
void nft_payload_eval(const struct nft_expr *expr, void nft_payload_eval(const struct nft_expr *expr,
struct nft_regs *regs, struct nft_regs *regs,
const struct nft_pktinfo *pkt) const struct nft_pktinfo *pkt)
...@@ -172,7 +183,7 @@ void nft_payload_eval(const struct nft_expr *expr, ...@@ -172,7 +183,7 @@ void nft_payload_eval(const struct nft_expr *expr,
goto err; goto err;
if (skb_vlan_tag_present(skb) && if (skb_vlan_tag_present(skb) &&
priv->offset >= offsetof(struct ethhdr, h_proto)) { nft_payload_need_vlan_copy(priv)) {
if (!nft_payload_copy_vlan(dest, skb, if (!nft_payload_copy_vlan(dest, skb,
priv->offset, priv->len)) priv->offset, priv->len))
goto err; goto err;
......
...@@ -233,10 +233,9 @@ static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set, ...@@ -233,10 +233,9 @@ static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
rb_erase(&rbe->node, &priv->root); rb_erase(&rbe->node, &priv->root);
} }
static int nft_rbtree_gc_elem(const struct nft_set *__set, static const struct nft_rbtree_elem *
struct nft_rbtree *priv, nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
struct nft_rbtree_elem *rbe, struct nft_rbtree_elem *rbe, u8 genmask)
u8 genmask)
{ {
struct nft_set *set = (struct nft_set *)__set; struct nft_set *set = (struct nft_set *)__set;
struct rb_node *prev = rb_prev(&rbe->node); struct rb_node *prev = rb_prev(&rbe->node);
...@@ -246,7 +245,7 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set, ...@@ -246,7 +245,7 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC); gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC);
if (!gc) if (!gc)
return -ENOMEM; return ERR_PTR(-ENOMEM);
/* search for end interval coming before this element. /* search for end interval coming before this element.
* end intervals don't carry a timeout extension, they * end intervals don't carry a timeout extension, they
...@@ -261,6 +260,7 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set, ...@@ -261,6 +260,7 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
prev = rb_prev(prev); prev = rb_prev(prev);
} }
rbe_prev = NULL;
if (prev) { if (prev) {
rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
nft_rbtree_gc_remove(net, set, priv, rbe_prev); nft_rbtree_gc_remove(net, set, priv, rbe_prev);
...@@ -272,7 +272,7 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set, ...@@ -272,7 +272,7 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
*/ */
gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
if (WARN_ON_ONCE(!gc)) if (WARN_ON_ONCE(!gc))
return -ENOMEM; return ERR_PTR(-ENOMEM);
nft_trans_gc_elem_add(gc, rbe_prev); nft_trans_gc_elem_add(gc, rbe_prev);
} }
...@@ -280,13 +280,13 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set, ...@@ -280,13 +280,13 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
nft_rbtree_gc_remove(net, set, priv, rbe); nft_rbtree_gc_remove(net, set, priv, rbe);
gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
if (WARN_ON_ONCE(!gc)) if (WARN_ON_ONCE(!gc))
return -ENOMEM; return ERR_PTR(-ENOMEM);
nft_trans_gc_elem_add(gc, rbe); nft_trans_gc_elem_add(gc, rbe);
nft_trans_gc_queue_sync_done(gc); nft_trans_gc_queue_sync_done(gc);
return 0; return rbe_prev;
} }
static bool nft_rbtree_update_first(const struct nft_set *set, static bool nft_rbtree_update_first(const struct nft_set *set,
...@@ -314,7 +314,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, ...@@ -314,7 +314,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree *priv = nft_set_priv(set);
u8 cur_genmask = nft_genmask_cur(net); u8 cur_genmask = nft_genmask_cur(net);
u8 genmask = nft_genmask_next(net); u8 genmask = nft_genmask_next(net);
int d, err; int d;
/* Descend the tree to search for an existing element greater than the /* Descend the tree to search for an existing element greater than the
* key value to insert that is greater than the new element. This is the * key value to insert that is greater than the new element. This is the
...@@ -363,9 +363,14 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, ...@@ -363,9 +363,14 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
*/ */
if (nft_set_elem_expired(&rbe->ext) && if (nft_set_elem_expired(&rbe->ext) &&
nft_set_elem_active(&rbe->ext, cur_genmask)) { nft_set_elem_active(&rbe->ext, cur_genmask)) {
err = nft_rbtree_gc_elem(set, priv, rbe, genmask); const struct nft_rbtree_elem *removed_end;
if (err < 0)
return err; removed_end = nft_rbtree_gc_elem(set, priv, rbe, genmask);
if (IS_ERR(removed_end))
return PTR_ERR(removed_end);
if (removed_end == rbe_le || removed_end == rbe_ge)
return -EAGAIN;
continue; continue;
} }
...@@ -486,11 +491,18 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, ...@@ -486,11 +491,18 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree_elem *rbe = elem->priv; struct nft_rbtree_elem *rbe = elem->priv;
int err; int err;
write_lock_bh(&priv->lock); do {
write_seqcount_begin(&priv->count); if (fatal_signal_pending(current))
err = __nft_rbtree_insert(net, set, rbe, ext); return -EINTR;
write_seqcount_end(&priv->count);
write_unlock_bh(&priv->lock); cond_resched();
write_lock_bh(&priv->lock);
write_seqcount_begin(&priv->count);
err = __nft_rbtree_insert(net, set, rbe, ext);
write_seqcount_end(&priv->count);
write_unlock_bh(&priv->lock);
} while (err == -EAGAIN);
return err; return err;
} }
......
...@@ -6,13 +6,14 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \ ...@@ -6,13 +6,14 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
nft_concat_range.sh nft_conntrack_helper.sh \ nft_concat_range.sh nft_conntrack_helper.sh \
nft_queue.sh nft_meta.sh nf_nat_edemux.sh \ nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \ ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \
conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh \
conntrack_sctp_collision.sh
HOSTPKG_CONFIG := pkg-config HOSTPKG_CONFIG := pkg-config
CFLAGS += $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null) CFLAGS += $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null)
LDLIBS += $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl) LDLIBS += $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl)
TEST_GEN_FILES = nf-queue connect_close audit_logread TEST_GEN_FILES = nf-queue connect_close audit_logread sctp_collision
include ../lib.mk include ../lib.mk
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# Testing For SCTP COLLISION SCENARIO as Below:
#
# 14:35:47.655279 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT] [init tag: 2017837359]
# 14:35:48.353250 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT] [init tag: 1187206187]
# 14:35:48.353275 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT ACK] [init tag: 2017837359]
# 14:35:48.353283 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [COOKIE ECHO]
# 14:35:48.353977 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [COOKIE ACK]
# 14:35:48.855335 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT ACK] [init tag: 164579970]
#
# TOPO: SERVER_NS (link0)<--->(link1) ROUTER_NS (link2)<--->(link3) CLIENT_NS
CLIENT_NS=$(mktemp -u client-XXXXXXXX)
CLIENT_IP="198.51.200.1"
CLIENT_PORT=1234
SERVER_NS=$(mktemp -u server-XXXXXXXX)
SERVER_IP="198.51.100.1"
SERVER_PORT=1234
ROUTER_NS=$(mktemp -u router-XXXXXXXX)
CLIENT_GW="198.51.200.2"
SERVER_GW="198.51.100.2"
# setup the topo
setup() {
ip net add $CLIENT_NS
ip net add $SERVER_NS
ip net add $ROUTER_NS
ip -n $SERVER_NS link add link0 type veth peer name link1 netns $ROUTER_NS
ip -n $CLIENT_NS link add link3 type veth peer name link2 netns $ROUTER_NS
ip -n $SERVER_NS link set link0 up
ip -n $SERVER_NS addr add $SERVER_IP/24 dev link0
ip -n $SERVER_NS route add $CLIENT_IP dev link0 via $SERVER_GW
ip -n $ROUTER_NS link set link1 up
ip -n $ROUTER_NS link set link2 up
ip -n $ROUTER_NS addr add $SERVER_GW/24 dev link1
ip -n $ROUTER_NS addr add $CLIENT_GW/24 dev link2
ip net exec $ROUTER_NS sysctl -wq net.ipv4.ip_forward=1
ip -n $CLIENT_NS link set link3 up
ip -n $CLIENT_NS addr add $CLIENT_IP/24 dev link3
ip -n $CLIENT_NS route add $SERVER_IP dev link3 via $CLIENT_GW
# simulate the delay on OVS upcall by setting up a delay for INIT_ACK with
# tc on $SERVER_NS side
tc -n $SERVER_NS qdisc add dev link0 root handle 1: htb
tc -n $SERVER_NS class add dev link0 parent 1: classid 1:1 htb rate 100mbit
tc -n $SERVER_NS filter add dev link0 parent 1: protocol ip u32 match ip protocol 132 \
0xff match u8 2 0xff at 32 flowid 1:1
tc -n $SERVER_NS qdisc add dev link0 parent 1:1 handle 10: netem delay 1200ms
# simulate the ctstate check on OVS nf_conntrack
ip net exec $ROUTER_NS iptables -A FORWARD -m state --state INVALID,UNTRACKED -j DROP
ip net exec $ROUTER_NS iptables -A INPUT -p sctp -j DROP
# use a smaller number for assoc's max_retrans to reproduce the issue
modprobe sctp
ip net exec $CLIENT_NS sysctl -wq net.sctp.association_max_retrans=3
}
cleanup() {
ip net exec $CLIENT_NS pkill sctp_collision 2>&1 >/dev/null
ip net exec $SERVER_NS pkill sctp_collision 2>&1 >/dev/null
ip net del "$CLIENT_NS"
ip net del "$SERVER_NS"
ip net del "$ROUTER_NS"
}
do_test() {
ip net exec $SERVER_NS ./sctp_collision server \
$SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT &
ip net exec $CLIENT_NS ./sctp_collision client \
$CLIENT_IP $CLIENT_PORT $SERVER_IP $SERVER_PORT
}
# NOTE: one way to work around the issue is set a smaller hb_interval
# ip net exec $CLIENT_NS sysctl -wq net.sctp.hb_interval=3500
# run the test case
trap cleanup EXIT
setup && \
echo "Test for SCTP Collision in nf_conntrack:" && \
do_test && echo "PASS!"
exit $?
...@@ -12,10 +12,11 @@ nft --version >/dev/null 2>&1 || { ...@@ -12,10 +12,11 @@ nft --version >/dev/null 2>&1 || {
} }
logfile=$(mktemp) logfile=$(mktemp)
rulefile=$(mktemp)
echo "logging into $logfile" echo "logging into $logfile"
./audit_logread >"$logfile" & ./audit_logread >"$logfile" &
logread_pid=$! logread_pid=$!
trap 'kill $logread_pid; rm -f $logfile' EXIT trap 'kill $logread_pid; rm -f $logfile $rulefile' EXIT
exec 3<"$logfile" exec 3<"$logfile"
do_test() { # (cmd, log) do_test() { # (cmd, log)
...@@ -26,12 +27,14 @@ do_test() { # (cmd, log) ...@@ -26,12 +27,14 @@ do_test() { # (cmd, log)
res=$(diff -a -u <(echo "$2") - <&3) res=$(diff -a -u <(echo "$2") - <&3)
[ $? -eq 0 ] && { echo "OK"; return; } [ $? -eq 0 ] && { echo "OK"; return; }
echo "FAIL" echo "FAIL"
echo "$res" grep -v '^\(---\|+++\|@@\)' <<< "$res"
((RC++)) ((RC--))
} }
nft flush ruleset nft flush ruleset
# adding tables, chains and rules
for table in t1 t2; do for table in t1 t2; do
do_test "nft add table $table" \ do_test "nft add table $table" \
"table=$table family=2 entries=1 op=nft_register_table" "table=$table family=2 entries=1 op=nft_register_table"
...@@ -62,6 +65,48 @@ for table in t1 t2; do ...@@ -62,6 +65,48 @@ for table in t1 t2; do
"table=$table family=2 entries=6 op=nft_register_rule" "table=$table family=2 entries=6 op=nft_register_rule"
done done
for ((i = 0; i < 500; i++)); do
echo "add rule t2 c3 counter accept comment \"rule $i\""
done >$rulefile
do_test "nft -f $rulefile" \
'table=t2 family=2 entries=500 op=nft_register_rule'
# adding sets and elements
settype='type inet_service; counter'
setelem='{ 22, 80, 443 }'
setblock="{ $settype; elements = $setelem; }"
do_test "nft add set t1 s $setblock" \
"table=t1 family=2 entries=4 op=nft_register_set"
do_test "nft add set t1 s2 $setblock; add set t1 s3 { $settype; }" \
"table=t1 family=2 entries=5 op=nft_register_set"
do_test "nft add element t1 s3 $setelem" \
"table=t1 family=2 entries=3 op=nft_register_setelem"
# adding counters
do_test 'nft add counter t1 c1' \
'table=t1 family=2 entries=1 op=nft_register_obj'
do_test 'nft add counter t2 c1; add counter t2 c2' \
'table=t2 family=2 entries=2 op=nft_register_obj'
# adding/updating quotas
do_test 'nft add quota t1 q1 { 10 bytes }' \
'table=t1 family=2 entries=1 op=nft_register_obj'
do_test 'nft add quota t2 q1 { 10 bytes }; add quota t2 q2 { 10 bytes }' \
'table=t2 family=2 entries=2 op=nft_register_obj'
# changing the quota value triggers obj update path
do_test 'nft add quota t1 q1 { 20 bytes }' \
'table=t1 family=2 entries=1 op=nft_register_obj'
# resetting rules
do_test 'nft reset rules t1 c2' \ do_test 'nft reset rules t1 c2' \
'table=t1 family=2 entries=3 op=nft_reset_rule' 'table=t1 family=2 entries=3 op=nft_reset_rule'
...@@ -70,19 +115,6 @@ do_test 'nft reset rules table t1' \ ...@@ -70,19 +115,6 @@ do_test 'nft reset rules table t1' \
table=t1 family=2 entries=3 op=nft_reset_rule table=t1 family=2 entries=3 op=nft_reset_rule
table=t1 family=2 entries=3 op=nft_reset_rule' table=t1 family=2 entries=3 op=nft_reset_rule'
do_test 'nft reset rules' \
'table=t1 family=2 entries=3 op=nft_reset_rule
table=t1 family=2 entries=3 op=nft_reset_rule
table=t1 family=2 entries=3 op=nft_reset_rule
table=t2 family=2 entries=3 op=nft_reset_rule
table=t2 family=2 entries=3 op=nft_reset_rule
table=t2 family=2 entries=3 op=nft_reset_rule'
for ((i = 0; i < 500; i++)); do
echo "add rule t2 c3 counter accept comment \"rule $i\""
done | do_test 'nft -f -' \
'table=t2 family=2 entries=500 op=nft_register_rule'
do_test 'nft reset rules t2 c3' \ do_test 'nft reset rules t2 c3' \
'table=t2 family=2 entries=189 op=nft_reset_rule 'table=t2 family=2 entries=189 op=nft_reset_rule
table=t2 family=2 entries=188 op=nft_reset_rule table=t2 family=2 entries=188 op=nft_reset_rule
...@@ -105,4 +137,57 @@ table=t2 family=2 entries=180 op=nft_reset_rule ...@@ -105,4 +137,57 @@ table=t2 family=2 entries=180 op=nft_reset_rule
table=t2 family=2 entries=188 op=nft_reset_rule table=t2 family=2 entries=188 op=nft_reset_rule
table=t2 family=2 entries=135 op=nft_reset_rule' table=t2 family=2 entries=135 op=nft_reset_rule'
# resetting sets and elements
elem=(22 ,80 ,443)
relem=""
for i in {1..3}; do
relem+="${elem[((i - 1))]}"
do_test "nft reset element t1 s { $relem }" \
"table=t1 family=2 entries=$i op=nft_reset_setelem"
done
do_test 'nft reset set t1 s' \
'table=t1 family=2 entries=3 op=nft_reset_setelem'
# deleting rules
readarray -t handles < <(nft -a list chain t1 c1 | \
sed -n 's/.*counter.* handle \(.*\)$/\1/p')
do_test "nft delete rule t1 c1 handle ${handles[0]}" \
'table=t1 family=2 entries=1 op=nft_unregister_rule'
cmd='delete rule t1 c1 handle'
do_test "nft $cmd ${handles[1]}; $cmd ${handles[2]}" \
'table=t1 family=2 entries=2 op=nft_unregister_rule'
do_test 'nft flush chain t1 c2' \
'table=t1 family=2 entries=3 op=nft_unregister_rule'
do_test 'nft flush table t2' \
'table=t2 family=2 entries=509 op=nft_unregister_rule'
# deleting chains
do_test 'nft delete chain t2 c2' \
'table=t2 family=2 entries=1 op=nft_unregister_chain'
# deleting sets and elements
do_test 'nft delete element t1 s { 22 }' \
'table=t1 family=2 entries=1 op=nft_unregister_setelem'
do_test 'nft delete element t1 s { 80, 443 }' \
'table=t1 family=2 entries=2 op=nft_unregister_setelem'
do_test 'nft flush set t1 s2' \
'table=t1 family=2 entries=3 op=nft_unregister_setelem'
do_test 'nft delete set t1 s2' \
'table=t1 family=2 entries=1 op=nft_unregister_set'
do_test 'nft delete set t1 s3' \
'table=t1 family=2 entries=1 op=nft_unregister_set'
exit $RC exit $RC
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <arpa/inet.h>
int main(int argc, char *argv[])
{
struct sockaddr_in saddr = {}, daddr = {};
int sd, ret, len = sizeof(daddr);
struct timeval tv = {25, 0};
char buf[] = "hello";
if (argc != 6 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) {
printf("%s <server|client> <LOCAL_IP> <LOCAL_PORT> <REMOTE_IP> <REMOTE_PORT>\n",
argv[0]);
return -1;
}
sd = socket(AF_INET, SOCK_SEQPACKET, IPPROTO_SCTP);
if (sd < 0) {
printf("Failed to create sd\n");
return -1;
}
saddr.sin_family = AF_INET;
saddr.sin_addr.s_addr = inet_addr(argv[2]);
saddr.sin_port = htons(atoi(argv[3]));
ret = bind(sd, (struct sockaddr *)&saddr, sizeof(saddr));
if (ret < 0) {
printf("Failed to bind to address\n");
goto out;
}
ret = listen(sd, 5);
if (ret < 0) {
printf("Failed to listen on port\n");
goto out;
}
daddr.sin_family = AF_INET;
daddr.sin_addr.s_addr = inet_addr(argv[4]);
daddr.sin_port = htons(atoi(argv[5]));
/* make test shorter than 25s */
ret = setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
if (ret < 0) {
printf("Failed to setsockopt SO_RCVTIMEO\n");
goto out;
}
if (!strcmp(argv[1], "server")) {
sleep(1); /* wait a bit for client's INIT */
ret = connect(sd, (struct sockaddr *)&daddr, len);
if (ret < 0) {
printf("Failed to connect to peer\n");
goto out;
}
ret = recvfrom(sd, buf, sizeof(buf), 0, (struct sockaddr *)&daddr, &len);
if (ret < 0) {
printf("Failed to recv msg %d\n", ret);
goto out;
}
ret = sendto(sd, buf, strlen(buf) + 1, 0, (struct sockaddr *)&daddr, len);
if (ret < 0) {
printf("Failed to send msg %d\n", ret);
goto out;
}
printf("Server: sent! %d\n", ret);
}
if (!strcmp(argv[1], "client")) {
usleep(300000); /* wait a bit for server's listening */
ret = connect(sd, (struct sockaddr *)&daddr, len);
if (ret < 0) {
printf("Failed to connect to peer\n");
goto out;
}
sleep(1); /* wait a bit for server's delayed INIT_ACK to reproduce the issue */
ret = sendto(sd, buf, strlen(buf) + 1, 0, (struct sockaddr *)&daddr, len);
if (ret < 0) {
printf("Failed to send msg %d\n", ret);
goto out;
}
ret = recvfrom(sd, buf, sizeof(buf), 0, (struct sockaddr *)&daddr, &len);
if (ret < 0) {
printf("Failed to recv msg %d\n", ret);
goto out;
}
printf("Client: rcvd! %d\n", ret);
}
ret = 0;
out:
close(sd);
return ret;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment