Commit 99e79b67 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'bridge-add-a-limit-on-learned-fdb-entries'

Johannes Nixdorf says:

====================
bridge: Add a limit on learned FDB entries

Introduce a limit on the amount of learned FDB entries on a bridge,
configured by netlink with a build time default on bridge creation in
the kernel config.

For backwards compatibility the kernel config default is disabling the
limit (0).

Without any limit a malicious actor may OOM a kernel by spamming packets
with changing MAC addresses on their bridge port, so allow the bridge
creator to limit the number of entries.

Currently the manual entries are identified by the bridge flags
BR_FDB_LOCAL or BR_FDB_ADDED_BY_USER, atomically bundled under the new
flag BR_FDB_DYNAMIC_LEARNED. This means the limit also applies to
entries created with BR_FDB_ADDED_BY_EXT_LEARN but none of BR_FDB_LOCAL
or BR_FDB_ADDED_BY_USER, e.g. ones added by SWITCHDEV_FDB_ADD_TO_BRIDGE.

Link to the corresponding iproute2 changes:
https://lore.kernel.org/r/20230919-fdb_limit-v4-1-b4d2dc4df30f@avm.de

v4: https://lore.kernel.org/r/20230919-fdb_limit-v4-0-39f0293807b8@avm.de/
v3: https://lore.kernel.org/r/20230905-fdb_limit-v3-0-7597cd500a82@avm.de/
v2: https://lore.kernel.org/netdev/20230619071444.14625-1-jnixdorf-oss@avm.de/
v1: https://lore.kernel.org/netdev/20230515085046.4457-1-jnixdorf-oss@avm.de/
====================

Link: https://lore.kernel.org/r/20231016-fdb_limit-v5-0-32cddff87758@avm.deSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 56a7bb12 6f840903
......@@ -510,6 +510,8 @@ enum {
IFLA_BR_VLAN_STATS_PER_PORT,
IFLA_BR_MULTI_BOOLOPT,
IFLA_BR_MCAST_QUERIER_STATE,
IFLA_BR_FDB_N_LEARNED,
IFLA_BR_FDB_MAX_LEARNED,
__IFLA_BR_MAX,
};
......
......@@ -329,11 +329,18 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f,
hlist_del_init_rcu(&f->fdb_node);
rhashtable_remove_fast(&br->fdb_hash_tbl, &f->rhnode,
br_fdb_rht_params);
if (test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED, &f->flags))
atomic_dec(&br->fdb_n_learned);
fdb_notify(br, f, RTM_DELNEIGH, swdev_notify);
call_rcu(&f->rcu, fdb_rcu_free);
}
/* Delete a local entry if no other port had the same address. */
/* Delete a local entry if no other port had the same address.
*
* This function should only be called on entries with BR_FDB_LOCAL set,
* so even with BR_FDB_ADDED_BY_USER cleared we never need to increase
* the accounting for dynamically learned entries again.
*/
static void fdb_delete_local(struct net_bridge *br,
const struct net_bridge_port *p,
struct net_bridge_fdb_entry *f)
......@@ -388,9 +395,20 @@ static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br,
__u16 vid,
unsigned long flags)
{
bool learned = !test_bit(BR_FDB_ADDED_BY_USER, &flags) &&
!test_bit(BR_FDB_LOCAL, &flags);
u32 max_learned = READ_ONCE(br->fdb_max_learned);
struct net_bridge_fdb_entry *fdb;
int err;
if (likely(learned)) {
int n_learned = atomic_read(&br->fdb_n_learned);
if (unlikely(max_learned && n_learned >= max_learned))
return NULL;
__set_bit(BR_FDB_DYNAMIC_LEARNED, &flags);
}
fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC);
if (!fdb)
return NULL;
......@@ -407,6 +425,9 @@ static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br,
return NULL;
}
if (likely(learned))
atomic_inc(&br->fdb_n_learned);
hlist_add_head_rcu(&fdb->fdb_node, &br->fdb_list);
return fdb;
......@@ -912,8 +933,12 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
clear_bit(BR_FDB_LOCKED, &fdb->flags);
}
if (unlikely(test_bit(BR_FDB_ADDED_BY_USER, &flags)))
if (unlikely(test_bit(BR_FDB_ADDED_BY_USER, &flags))) {
set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
if (test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED,
&fdb->flags))
atomic_dec(&br->fdb_n_learned);
}
if (unlikely(fdb_modified)) {
trace_br_fdb_update(br, source, addr, vid, flags);
fdb_notify(br, fdb, RTM_NEWNEIGH, true);
......@@ -1075,7 +1100,8 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
if (!(flags & NLM_F_CREATE))
return -ENOENT;
fdb = fdb_create(br, source, addr, vid, 0);
fdb = fdb_create(br, source, addr, vid,
BIT(BR_FDB_ADDED_BY_USER));
if (!fdb)
return -ENOMEM;
......@@ -1088,6 +1114,10 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
WRITE_ONCE(fdb->dst, source);
modified = true;
}
set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
if (test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED, &fdb->flags))
atomic_dec(&br->fdb_n_learned);
}
if (fdb_to_nud(br, fdb) != state) {
......@@ -1119,8 +1149,6 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
if (fdb_handle_notify(fdb, notify))
modified = true;
set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
fdb->used = jiffies;
if (modified) {
if (refresh)
......@@ -1464,6 +1492,10 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
if (!p)
set_bit(BR_FDB_LOCAL, &fdb->flags);
if ((swdev_notify || !p) &&
test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED, &fdb->flags))
atomic_dec(&br->fdb_n_learned);
if (modified)
fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify);
}
......
......@@ -1229,6 +1229,8 @@ static size_t br_port_get_slave_size(const struct net_device *brdev,
}
static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = {
[IFLA_BR_UNSPEC] = { .strict_start_type =
IFLA_BR_FDB_N_LEARNED },
[IFLA_BR_FORWARD_DELAY] = { .type = NLA_U32 },
[IFLA_BR_HELLO_TIME] = { .type = NLA_U32 },
[IFLA_BR_MAX_AGE] = { .type = NLA_U32 },
......@@ -1265,6 +1267,8 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = {
[IFLA_BR_VLAN_STATS_PER_PORT] = { .type = NLA_U8 },
[IFLA_BR_MULTI_BOOLOPT] =
NLA_POLICY_EXACT_LEN(sizeof(struct br_boolopt_multi)),
[IFLA_BR_FDB_N_LEARNED] = { .type = NLA_REJECT },
[IFLA_BR_FDB_MAX_LEARNED] = { .type = NLA_U32 },
};
static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
......@@ -1539,6 +1543,12 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
return err;
}
if (data[IFLA_BR_FDB_MAX_LEARNED]) {
u32 val = nla_get_u32(data[IFLA_BR_FDB_MAX_LEARNED]);
WRITE_ONCE(br->fdb_max_learned, val);
}
return 0;
}
......@@ -1593,6 +1603,8 @@ static size_t br_get_size(const struct net_device *brdev)
nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_TOPOLOGY_CHANGE_TIMER */
nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_GC_TIMER */
nla_total_size(ETH_ALEN) + /* IFLA_BR_GROUP_ADDR */
nla_total_size(sizeof(u32)) + /* IFLA_BR_FDB_N_LEARNED */
nla_total_size(sizeof(u32)) + /* IFLA_BR_FDB_MAX_LEARNED */
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_ROUTER */
nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_SNOOPING */
......@@ -1668,7 +1680,10 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
nla_put_u8(skb, IFLA_BR_TOPOLOGY_CHANGE_DETECTED,
br->topology_change_detected) ||
nla_put(skb, IFLA_BR_GROUP_ADDR, ETH_ALEN, br->group_addr) ||
nla_put(skb, IFLA_BR_MULTI_BOOLOPT, sizeof(bm), &bm))
nla_put(skb, IFLA_BR_MULTI_BOOLOPT, sizeof(bm), &bm) ||
nla_put_u32(skb, IFLA_BR_FDB_N_LEARNED,
atomic_read(&br->fdb_n_learned)) ||
nla_put_u32(skb, IFLA_BR_FDB_MAX_LEARNED, br->fdb_max_learned))
return -EMSGSIZE;
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
......
......@@ -274,6 +274,7 @@ enum {
BR_FDB_NOTIFY,
BR_FDB_NOTIFY_INACTIVE,
BR_FDB_LOCKED,
BR_FDB_DYNAMIC_LEARNED,
};
struct net_bridge_fdb_key {
......@@ -555,6 +556,9 @@ struct net_bridge {
struct kobject *ifobj;
u32 auto_cnt;
atomic_t fdb_n_learned;
u32 fdb_max_learned;
#ifdef CONFIG_NET_SWITCHDEV
/* Counter used to make sure that hardware domains get unique
* identifiers in case a bridge spans multiple switchdev instances.
......
# SPDX-License-Identifier: GPL-2.0+ OR MIT
TEST_PROGS = bridge_igmp.sh \
TEST_PROGS = bridge_fdb_learning_limit.sh \
bridge_igmp.sh \
bridge_locked_port.sh \
bridge_mdb.sh \
bridge_mdb_host.sh \
......
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
# ShellCheck incorrectly believes that most of the code here is unreachable
# because it's invoked by variable name following ALL_TESTS.
#
# shellcheck disable=SC2317
ALL_TESTS="check_accounting check_limit"
NUM_NETIFS=6
source lib.sh
TEST_MAC_BASE=de:ad:be:ef:42:
NUM_PKTS=16
FDB_LIMIT=8
FDB_TYPES=(
# name is counted? overrides learned?
'learned 1 0'
'static 0 1'
'user 0 1'
'extern_learn 0 1'
'local 0 1'
)
mac()
{
printf "${TEST_MAC_BASE}%02x" "$1"
}
H1_DEFAULT_MAC=$(mac 42)
switch_create()
{
ip link add dev br0 type bridge
ip link set dev "$swp1" master br0
ip link set dev "$swp2" master br0
# swp3 is used to add local MACs, so do not add it to the bridge yet.
# swp2 is only used for replying when learning on swp1, its MAC should not be learned.
ip link set dev "$swp2" type bridge_slave learning off
ip link set dev br0 up
ip link set dev "$swp1" up
ip link set dev "$swp2" up
ip link set dev "$swp3" up
}
switch_destroy()
{
ip link set dev "$swp3" down
ip link set dev "$swp2" down
ip link set dev "$swp1" down
ip link del dev br0
}
h_create()
{
ip link set "$h1" addr "$H1_DEFAULT_MAC"
simple_if_init "$h1" 192.0.2.1/24
simple_if_init "$h2" 192.0.2.2/24
}
h_destroy()
{
simple_if_fini "$h1" 192.0.2.1/24
simple_if_fini "$h2" 192.0.2.2/24
}
setup_prepare()
{
h1=${NETIFS[p1]}
swp1=${NETIFS[p2]}
h2=${NETIFS[p3]}
swp2=${NETIFS[p4]}
swp3=${NETIFS[p6]}
vrf_prepare
h_create
switch_create
}
cleanup()
{
pre_cleanup
switch_destroy
h_destroy
vrf_cleanup
}
fdb_get_n_learned()
{
ip -d -j link show dev br0 type bridge | \
jq '.[]["linkinfo"]["info_data"]["fdb_n_learned"]'
}
fdb_get_n_mac()
{
local mac=${1}
bridge -j fdb show br br0 | \
jq "map(select(.mac == \"${mac}\" and (has(\"vlan\") | not))) | length"
}
fdb_fill_learned()
{
local i
for i in $(seq 1 "$NUM_PKTS"); do
fdb_add learned "$(mac "$i")"
done
}
fdb_reset()
{
bridge fdb flush dev br0
# Keep the default MAC address of h1 in the table. We set it to a different one when
# testing dynamic learning.
bridge fdb add "$H1_DEFAULT_MAC" dev "$swp1" master static use
}
fdb_add()
{
local type=$1 mac=$2
case "$type" in
learned)
ip link set "$h1" addr "$mac"
# Wait for a reply so we implicitly wait until after the forwarding
# code finished and the FDB entry was created.
PING_COUNT=1 ping_do "$h1" 192.0.2.2
check_err $? "Failed to ping another bridge port"
ip link set "$h1" addr "$H1_DEFAULT_MAC"
;;
local)
ip link set dev "$swp3" addr "$mac" && ip link set "$swp3" master br0
;;
static)
bridge fdb replace "$mac" dev "$swp1" master static
;;
user)
bridge fdb replace "$mac" dev "$swp1" master static use
;;
extern_learn)
bridge fdb replace "$mac" dev "$swp1" master extern_learn
;;
esac
check_err $? "Failed to add a FDB entry of type ${type}"
}
fdb_del()
{
local type=$1 mac=$2
case "$type" in
local)
ip link set "$swp3" nomaster
;;
*)
bridge fdb del "$mac" dev "$swp1" master
;;
esac
check_err $? "Failed to remove a FDB entry of type ${type}"
}
check_accounting_one_type()
{
local type=$1 is_counted=$2 overrides_learned=$3
shift 3
RET=0
fdb_reset
fdb_add "$type" "$(mac 0)"
learned=$(fdb_get_n_learned)
[ "$learned" -ne "$is_counted" ]
check_fail $? "Inserted FDB type ${type}: Expected the count ${is_counted}, but got ${learned}"
fdb_del "$type" "$(mac 0)"
learned=$(fdb_get_n_learned)
[ "$learned" -ne 0 ]
check_fail $? "Removed FDB type ${type}: Expected the count 0, but got ${learned}"
if [ "$overrides_learned" -eq 1 ]; then
fdb_reset
fdb_add learned "$(mac 0)"
fdb_add "$type" "$(mac 0)"
learned=$(fdb_get_n_learned)
[ "$learned" -ne "$is_counted" ]
check_fail $? "Set a learned entry to FDB type ${type}: Expected the count ${is_counted}, but got ${learned}"
fdb_del "$type" "$(mac 0)"
fi
log_test "FDB accounting interacting with FDB type ${type}"
}
check_accounting()
{
local type_args learned
RET=0
fdb_reset
learned=$(fdb_get_n_learned)
[ "$learned" -ne 0 ]
check_fail $? "Flushed the FDB table: Expected the count 0, but got ${learned}"
fdb_fill_learned
sleep 1
learned=$(fdb_get_n_learned)
[ "$learned" -ne "$NUM_PKTS" ]
check_fail $? "Filled the FDB table: Expected the count ${NUM_PKTS}, but got ${learned}"
log_test "FDB accounting"
for type_args in "${FDB_TYPES[@]}"; do
# This is intentional use of word splitting.
# shellcheck disable=SC2086
check_accounting_one_type $type_args
done
}
check_limit_one_type()
{
local type=$1 is_counted=$2
local n_mac expected=$((1 - is_counted))
RET=0
fdb_reset
fdb_fill_learned
fdb_add "$type" "$(mac 0)"
n_mac=$(fdb_get_n_mac "$(mac 0)")
[ "$n_mac" -ne "$expected" ]
check_fail $? "Inserted FDB type ${type} at limit: Expected the count ${expected}, but got ${n_mac}"
log_test "FDB limits interacting with FDB type ${type}"
}
check_limit()
{
local learned
RET=0
ip link set br0 type bridge fdb_max_learned "$FDB_LIMIT"
fdb_reset
fdb_fill_learned
learned=$(fdb_get_n_learned)
[ "$learned" -ne "$FDB_LIMIT" ]
check_fail $? "Filled the limited FDB table: Expected the count ${FDB_LIMIT}, but got ${learned}"
log_test "FDB limits"
for type_args in "${FDB_TYPES[@]}"; do
# This is intentional use of word splitting.
# shellcheck disable=SC2086
check_limit_one_type $type_args
done
}
trap cleanup EXIT
setup_prepare
tests_run
exit $EXIT_STATUS
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment