Commit 72b93a86 authored by David S. Miller's avatar David S. Miller

Merge branch 'mlxsw-rif-mac-prefixes'

Ido Schimmel says:

====================
mlxsw: Support multiple RIF MAC prefixes

Currently, mlxsw enforces that all the netdevs used as router interfaces
(RIFs) have the same MAC prefix (e.g., same 38 MSBs in Spectrum-1).
Otherwise, an error is returned to user space with extack. This patchset
relaxes the limitation through the use of RIF MAC profiles.

A RIF MAC profile is a hardware entity that represents a particular MAC
prefix which multiple RIFs can reference. Therefore, the number of
possible MAC prefixes is no longer one, but the number of profiles
supported by the device.

The ability to change the MAC of a particular netdev is useful, for
example, for users who use the netdev to connect to an upstream provider
that performs MAC filtering. Currently, such users are either forced to
negotiate with the provider or change the MAC address of all other
netdevs so that they share the same prefix.

Patchset overview:

Patches #1-#3 are preparations.

Patch #4 adds actual support for RIF MAC profiles.

Patch #5 exposes RIF MAC profiles as a devlink resource, so that user
space has visibility into the maximum number of profiles and current
occupancy. Useful for debugging and testing (next 3 patches).

Patches #6-#8 add both scale and functional tests.

Patch #9 removes tests that validated the previous limitation. It is now
covered by patch #6 for devices that support a single profile.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents be348926 c24dbf3d
......@@ -6526,6 +6526,12 @@ MLXSW_ITEM32(reg, ritr, mtu, 0x34, 0, 16);
*/
MLXSW_ITEM32(reg, ritr, if_swid, 0x08, 24, 8);
/* reg_ritr_if_mac_profile_id
* MAC msb profile ID.
* Access: RW
*/
MLXSW_ITEM32(reg, ritr, if_mac_profile_id, 0x10, 16, 4);
/* reg_ritr_if_mac
* Router interface MAC address.
* In Spectrum, all MAC addresses must have the same 38 MSBits.
......
......@@ -49,6 +49,7 @@ enum mlxsw_res_id {
MLXSW_RES_ID_MAX_VRS,
MLXSW_RES_ID_MAX_RIFS,
MLXSW_RES_ID_MC_ERIF_LIST_ENTRIES,
MLXSW_RES_ID_MAX_RIF_MAC_PROFILES,
MLXSW_RES_ID_MAX_LPM_TREES,
MLXSW_RES_ID_MAX_NVE_MC_ENTRIES_IPV4,
MLXSW_RES_ID_MAX_NVE_MC_ENTRIES_IPV6,
......@@ -105,6 +106,7 @@ static u16 mlxsw_res_ids[] = {
[MLXSW_RES_ID_MAX_VRS] = 0x2C01,
[MLXSW_RES_ID_MAX_RIFS] = 0x2C02,
[MLXSW_RES_ID_MC_ERIF_LIST_ENTRIES] = 0x2C10,
[MLXSW_RES_ID_MAX_RIF_MAC_PROFILES] = 0x2C14,
[MLXSW_RES_ID_MAX_LPM_TREES] = 0x2C30,
[MLXSW_RES_ID_MAX_NVE_MC_ENTRIES_IPV4] = 0x2E02,
[MLXSW_RES_ID_MAX_NVE_MC_ENTRIES_IPV6] = 0x2E03,
......
......@@ -3282,6 +3282,30 @@ static int mlxsw_sp_resources_span_register(struct mlxsw_core *mlxsw_core)
&span_size_params);
}
static int
mlxsw_sp_resources_rif_mac_profile_register(struct mlxsw_core *mlxsw_core)
{
struct devlink *devlink = priv_to_devlink(mlxsw_core);
struct devlink_resource_size_params size_params;
u8 max_rif_mac_profiles;
if (!MLXSW_CORE_RES_VALID(mlxsw_core, MAX_RIF_MAC_PROFILES))
return -EIO;
max_rif_mac_profiles = MLXSW_CORE_RES_GET(mlxsw_core,
MAX_RIF_MAC_PROFILES);
devlink_resource_size_params_init(&size_params, max_rif_mac_profiles,
max_rif_mac_profiles, 1,
DEVLINK_RESOURCE_UNIT_ENTRY);
return devlink_resource_register(devlink,
"rif_mac_profiles",
max_rif_mac_profiles,
MLXSW_SP_RESOURCE_RIF_MAC_PROFILES,
DEVLINK_RESOURCE_ID_PARENT_TOP,
&size_params);
}
static int mlxsw_sp1_resources_register(struct mlxsw_core *mlxsw_core)
{
int err;
......@@ -3300,10 +3324,16 @@ static int mlxsw_sp1_resources_register(struct mlxsw_core *mlxsw_core)
err = mlxsw_sp_policer_resources_register(mlxsw_core);
if (err)
goto err_resources_counter_register;
goto err_policer_resources_register;
err = mlxsw_sp_resources_rif_mac_profile_register(mlxsw_core);
if (err)
goto err_resources_rif_mac_profile_register;
return 0;
err_resources_rif_mac_profile_register:
err_policer_resources_register:
err_resources_counter_register:
err_resources_span_register:
devlink_resources_unregister(priv_to_devlink(mlxsw_core), NULL);
......@@ -3328,10 +3358,16 @@ static int mlxsw_sp2_resources_register(struct mlxsw_core *mlxsw_core)
err = mlxsw_sp_policer_resources_register(mlxsw_core);
if (err)
goto err_resources_counter_register;
goto err_policer_resources_register;
err = mlxsw_sp_resources_rif_mac_profile_register(mlxsw_core);
if (err)
goto err_resources_rif_mac_profile_register;
return 0;
err_resources_rif_mac_profile_register:
err_policer_resources_register:
err_resources_counter_register:
err_resources_span_register:
devlink_resources_unregister(priv_to_devlink(mlxsw_core), NULL);
......
......@@ -67,6 +67,7 @@ enum mlxsw_sp_resource_id {
MLXSW_SP_RESOURCE_COUNTERS_RIF,
MLXSW_SP_RESOURCE_GLOBAL_POLICERS,
MLXSW_SP_RESOURCE_SINGLE_RATE_POLICERS,
MLXSW_SP_RESOURCE_RIF_MAC_PROFILES,
};
struct mlxsw_sp_port;
......
......@@ -39,6 +39,9 @@ mlxsw_sp_fib_entry_op_ctx_clear(struct mlxsw_sp_fib_entry_op_ctx *op_ctx)
struct mlxsw_sp_router {
struct mlxsw_sp *mlxsw_sp;
struct mlxsw_sp_rif **rifs;
struct idr rif_mac_profiles_idr;
atomic_t rif_mac_profiles_count;
u8 max_rif_mac_profile;
struct mlxsw_sp_vr *vrs;
struct rhashtable neigh_ht;
struct rhashtable nexthop_group_ht;
......
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
# Test for RIF MAC profiles resource. The test adds VLAN netdevices according to
# the maximum number of RIF MAC profiles, sets each of them with a random
# MAC address, and checks that eventually the number of occupied RIF MAC
# profiles equals the maximum number of RIF MAC profiles.
RIF_MAC_PROFILE_NUM_NETIFS=2
rif_mac_profiles_create()
{
local count=$1; shift
local should_fail=$1; shift
local batch_file="$(mktemp)"
for ((i = 1; i <= count; i++)); do
vlan=$(( i*10 ))
m=$(( i*11 ))
cat >> $batch_file <<-EOF
link add link $h1 name $h1.$vlan \
address 00:$m:$m:$m:$m:$m type vlan id $vlan
address add 192.0.$m.1/24 dev $h1.$vlan
EOF
done
ip -b $batch_file &> /dev/null
check_err_fail $should_fail $? "RIF creation"
rm -f $batch_file
}
rif_mac_profile_test()
{
local count=$1; shift
local should_fail=$1; shift
rif_mac_profiles_create $count $should_fail
occ=$(devlink -j resource show $DEVLINK_DEV \
| jq '.[][][] | select(.name=="rif_mac_profiles") |.["occ"]')
[[ $occ -eq $count ]]
check_err_fail $should_fail $? "Attempt to use $count profiles (actual result $occ)"
}
rif_mac_profile_setup_prepare()
{
h1=${NETIFS[p1]}
h2=${NETIFS[p2]}
# Disable IPv6 on the two interfaces to avoid IPv6 link-local addresses
# being generated and RIFs being created.
sysctl_set net.ipv6.conf.$h1.disable_ipv6 1
sysctl_set net.ipv6.conf.$h2.disable_ipv6 1
ip link set $h1 up
ip link set $h2 up
}
rif_mac_profile_cleanup()
{
pre_cleanup
ip link set $h2 down
ip link set $h1 down
sysctl_restore net.ipv6.conf.$h2.disable_ipv6
sysctl_restore net.ipv6.conf.$h1.disable_ipv6
}
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
lib_dir=$(dirname $0)/../../../net/forwarding
ALL_TESTS="
mac_profile_test
"
NUM_NETIFS=4
source $lib_dir/lib.sh
source $lib_dir/tc_common.sh
source $lib_dir/devlink_lib.sh
h1_create()
{
simple_if_init $h1 192.0.2.1/24
ip route add 198.51.100.0/24 vrf v$h1 nexthop via 192.0.2.2
tc qdisc add dev $h1 ingress
}
h1_destroy()
{
tc qdisc del dev $h1 ingress
ip route del 198.51.100.0/24 vrf v$h1
simple_if_fini $h1 192.0.2.1/24
}
h2_create()
{
simple_if_init $h2 198.51.100.1/24
ip route add 192.0.2.0/24 vrf v$h2 nexthop via 198.51.100.2
tc qdisc add dev $h2 ingress
}
h2_destroy()
{
tc qdisc del dev $h2 ingress
ip route del 192.0.2.0/24 vrf v$h2
simple_if_fini $h2 198.51.100.1/24
}
router_create()
{
ip link set dev $rp1 up
ip link set dev $rp2 up
tc qdisc add dev $rp1 clsact
tc qdisc add dev $rp2 clsact
ip address add 192.0.2.2/24 dev $rp1
ip address add 198.51.100.2/24 dev $rp2
}
router_destroy()
{
ip address del 198.51.100.2/24 dev $rp2
ip address del 192.0.2.2/24 dev $rp1
tc qdisc del dev $rp2 clsact
tc qdisc del dev $rp1 clsact
ip link set dev $rp2 down
ip link set dev $rp1 down
}
setup_prepare()
{
h1=${NETIFS[p1]}
rp1=${NETIFS[p2]}
rp2=${NETIFS[p3]}
h2=${NETIFS[p4]}
vrf_prepare
h1_create
h2_create
router_create
forwarding_enable
}
cleanup()
{
pre_cleanup
forwarding_restore
router_destroy
h2_destroy
h1_destroy
vrf_cleanup
}
h1_to_h2()
{
local test_name=$@; shift
local smac=$(mac_get $rp2)
RET=0
# Replace neighbour to avoid first packet being forwarded in software
ip neigh replace dev $rp2 198.51.100.1 lladdr $(mac_get $h2)
# Add a filter to ensure that packets are forwarded in hardware. Cannot
# match on source MAC because it is not set in eACL after routing
tc filter add dev $rp2 egress proto ip pref 1 handle 101 \
flower skip_sw ip_proto udp src_port 12345 dst_port 54321 \
action pass
# Add a filter to ensure that packets are received with the correct
# source MAC
tc filter add dev $h2 ingress proto ip pref 1 handle 101 \
flower skip_sw src_mac $smac ip_proto udp src_port 12345 \
dst_port 54321 action pass
$MZ $h1 -a own -b $(mac_get $rp1) -t udp "sp=12345,dp=54321" \
-A 192.0.2.1 -B 198.51.100.1 -c 10 -p 100 -d 1msec -q
tc_check_packets "dev $rp2 egress" 101 10
check_err $? "packets not forwarded in hardware"
tc_check_packets "dev $h2 ingress" 101 10
check_err $? "packets not forwarded with correct source mac"
log_test "h1->h2: $test_name"
tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
ip neigh del dev $rp2 198.51.100.1 lladdr $(mac_get $h2)
}
h2_to_h1()
{
local test_name=$@; shift
local rp1_mac=$(mac_get $rp1)
RET=0
ip neigh replace dev $rp1 192.0.2.1 lladdr $(mac_get $h1)
tc filter add dev $rp1 egress proto ip pref 1 handle 101 \
flower skip_sw ip_proto udp src_port 54321 dst_port 12345 \
action pass
tc filter add dev $h1 ingress proto ip pref 1 handle 101 \
flower skip_sw src_mac $rp1_mac ip_proto udp src_port 54321 \
dst_port 12345 action pass
$MZ $h2 -a own -b $(mac_get $rp2) -t udp "sp=54321,dp=12345" \
-A 198.51.100.1 -B 192.0.2.1 -c 10 -p 100 -d 1msec -q
tc_check_packets "dev $rp1 egress" 101 10
check_err $? "packets not forwarded in hardware"
tc_check_packets "dev $h1 ingress" 101 10
check_err $? "packets not forwarded with correct source mac"
log_test "h2->h1: $test_name"
tc filter del dev $h1 ingress protocol ip pref 1 handle 101 flower
tc filter del dev $rp1 egress protocol ip pref 1 handle 101 flower
ip neigh del dev $rp1 192.0.2.1 lladdr $(mac_get $h1)
}
smac_test()
{
local test_name=$@; shift
# Test that packets forwarded to $h2 via $rp2 are forwarded with the
# current source MAC of $rp2
h1_to_h2 $test_name
# Test that packets forwarded to $h1 via $rp1 are forwarded with the
# current source MAC of $rp1. This MAC is never changed during the test,
# but given the shared nature of MAC profile, the point is to see that
# changes to the MAC of $rp2 do not affect that of $rp1
h2_to_h1 $test_name
}
mac_profile_test()
{
local rp2_mac=$(mac_get $rp2)
# Test behavior when the RIF backing $rp2 is transitioned to use
# a new MAC profile
ip link set dev $rp2 addr 00:11:22:33:44:55
smac_test "new mac profile"
# Test behavior when the MAC profile used by the RIF is edited
ip link set dev $rp2 address 00:22:22:22:22:22
smac_test "edit mac profile"
# Restore original MAC
ip link set dev $rp2 addr $rp2_mac
}
trap cleanup EXIT
setup_prepare
setup_wait
mac_profiles=$(devlink_resource_size_get rif_mac_profiles)
if [[ $mac_profiles -ne 1 ]]; then
tests_run
fi
exit $EXIT_STATUS
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
lib_dir=$(dirname $0)/../../../net/forwarding
ALL_TESTS="
rif_mac_profile_edit_test
"
NUM_NETIFS=2
source $lib_dir/lib.sh
source $lib_dir/devlink_lib.sh
setup_prepare()
{
h1=${NETIFS[p1]}
h2=${NETIFS[p2]}
# Disable IPv6 on the two interfaces to avoid IPv6 link-local addresses
# being generated and RIFs being created
sysctl_set net.ipv6.conf.$h1.disable_ipv6 1
sysctl_set net.ipv6.conf.$h2.disable_ipv6 1
ip link set $h1 up
ip link set $h2 up
}
cleanup()
{
pre_cleanup
ip link set $h2 down
ip link set $h1 down
sysctl_restore net.ipv6.conf.$h2.disable_ipv6
sysctl_restore net.ipv6.conf.$h1.disable_ipv6
# Reload in order to clean all the RIFs and RIF MAC profiles created
devlink_reload
}
create_max_rif_mac_profiles()
{
local count=$1; shift
local batch_file="$(mktemp)"
for ((i = 1; i <= count; i++)); do
vlan=$(( i*10 ))
m=$(( i*11 ))
cat >> $batch_file <<-EOF
link add link $h1 name $h1.$vlan \
address 00:$m:$m:$m:$m:$m type vlan id $vlan
address add 192.0.$m.1/24 dev $h1.$vlan
EOF
done
ip -b $batch_file &> /dev/null
rm -f $batch_file
}
rif_mac_profile_replacement_test()
{
local h1_10_mac=$(mac_get $h1.10)
RET=0
ip link set $h1.10 address 00:12:34:56:78:99
check_err $?
log_test "RIF MAC profile replacement"
ip link set $h1.10 address $h1_10_mac
}
rif_mac_profile_shared_replacement_test()
{
local count=$1; shift
local i=$((count + 1))
local vlan=$(( i*10 ))
local m=11
RET=0
# Create a VLAN netdevice that has the same MAC as the first one.
ip link add link $h1 name $h1.$vlan address 00:$m:$m:$m:$m:$m \
type vlan id $vlan
ip address add 192.0.$m.1/24 dev $h1.$vlan
# MAC replacement should fail because all the MAC profiles are in use
# and the profile is shared between multiple RIFs
m=$(( i*11 ))
ip link set $h1.$vlan address 00:$m:$m:$m:$m:$m &> /dev/null
check_fail $?
log_test "RIF MAC profile shared replacement"
ip link del dev $h1.$vlan
}
rif_mac_profile_edit_test()
{
local count=$(devlink_resource_size_get rif_mac_profiles)
create_max_rif_mac_profiles $count
rif_mac_profile_replacement_test
rif_mac_profile_shared_replacement_test $count
}
trap cleanup EXIT
setup_prepare
setup_wait
tests_run
exit $EXIT_STATUS
......@@ -10,9 +10,7 @@
lib_dir=$(dirname $0)/../../../net/forwarding
ALL_TESTS="
rif_set_addr_test
rif_vrf_set_addr_test
rif_inherit_bridge_addr_test
rif_non_inherit_bridge_addr_test
vlan_interface_deletion_test
bridge_deletion_test
......@@ -60,55 +58,6 @@ cleanup()
ip link set dev $swp1 down
}
rif_set_addr_test()
{
local swp1_mac=$(mac_get $swp1)
local swp2_mac=$(mac_get $swp2)
RET=0
# $swp1 and $swp2 likely got their IPv6 local addresses already, but
# here we need to test the transition to RIF.
ip addr flush dev $swp1
ip addr flush dev $swp2
sleep .1
ip addr add dev $swp1 192.0.2.1/28
check_err $?
ip link set dev $swp1 addr 00:11:22:33:44:55
check_err $?
# IP address enablement should be rejected if the MAC address prefix
# doesn't match other RIFs.
ip addr add dev $swp2 192.0.2.2/28 &>/dev/null
check_fail $? "IP address addition passed for a device with a wrong MAC"
ip addr add dev $swp2 192.0.2.2/28 2>&1 >/dev/null \
| grep -q mlxsw_spectrum
check_err $? "no extack for IP address addition"
ip link set dev $swp2 addr 00:11:22:33:44:66
check_err $?
ip addr add dev $swp2 192.0.2.2/28 &>/dev/null
check_err $?
# Change of MAC address of a RIF should be forbidden if the new MAC
# doesn't share the prefix with other MAC addresses.
ip link set dev $swp2 addr 00:11:22:33:00:66 &>/dev/null
check_fail $? "change of MAC address passed for a wrong MAC"
ip link set dev $swp2 addr 00:11:22:33:00:66 2>&1 >/dev/null \
| grep -q mlxsw_spectrum
check_err $? "no extack for MAC address change"
log_test "RIF - bad MAC change"
ip addr del dev $swp2 192.0.2.2/28
ip addr del dev $swp1 192.0.2.1/28
ip link set dev $swp2 addr $swp2_mac
ip link set dev $swp1 addr $swp1_mac
}
rif_vrf_set_addr_test()
{
# Test that it is possible to set an IP address on a VRF upper despite
......@@ -128,45 +77,6 @@ rif_vrf_set_addr_test()
ip link del dev vrf-test
}
rif_inherit_bridge_addr_test()
{
RET=0
# Create first RIF
ip addr add dev $swp1 192.0.2.1/28
check_err $?
# Create a FID RIF
ip link add name br1 up type bridge vlan_filtering 0
ip link set dev $swp2 master br1
ip addr add dev br1 192.0.2.17/28
check_err $?
# Prepare a device with a low MAC address
ip link add name d up type dummy
ip link set dev d addr 00:11:22:33:44:55
# Attach the device to br1. That prompts bridge address change, which
# should be vetoed, thus preventing the attachment.
ip link set dev d master br1 &>/dev/null
check_fail $? "Device with low MAC was permitted to attach a bridge with RIF"
ip link set dev d master br1 2>&1 >/dev/null \
| grep -q mlxsw_spectrum
check_err $? "no extack for bridge attach rejection"
ip link set dev $swp2 addr 00:11:22:33:44:55 &>/dev/null
check_fail $? "Changing swp2's MAC address permitted"
ip link set dev $swp2 addr 00:11:22:33:44:55 2>&1 >/dev/null \
| grep -q mlxsw_spectrum
check_err $? "no extack for bridge port MAC address change rejection"
log_test "RIF - attach port with bad MAC to bridge"
ip link del dev d
ip link del dev br1
ip addr del dev $swp1 192.0.2.1/28
}
rif_non_inherit_bridge_addr_test()
{
local swp2_mac=$(mac_get $swp2)
......
......@@ -25,7 +25,7 @@ cleanup()
trap cleanup EXIT
ALL_TESTS="router tc_flower mirror_gre tc_police port"
ALL_TESTS="router tc_flower mirror_gre tc_police port rif_mac_profile"
for current_test in ${TESTS:-$ALL_TESTS}; do
RET_FIN=0
source ${current_test}_scale.sh
......
# SPDX-License-Identifier: GPL-2.0
source ../rif_mac_profile_scale.sh
rif_mac_profile_get_target()
{
local should_fail=$1
local target
target=$(devlink_resource_size_get rif_mac_profiles)
if ((! should_fail)); then
echo $target
else
echo $((target + 1))
fi
}
......@@ -22,7 +22,7 @@ cleanup()
devlink_sp_read_kvd_defaults
trap cleanup EXIT
ALL_TESTS="router tc_flower mirror_gre tc_police port"
ALL_TESTS="router tc_flower mirror_gre tc_police port rif_mac_profile"
for current_test in ${TESTS:-$ALL_TESTS}; do
RET_FIN=0
source ${current_test}_scale.sh
......
# SPDX-License-Identifier: GPL-2.0
source ../rif_mac_profile_scale.sh
rif_mac_profile_get_target()
{
local should_fail=$1
local target
target=$(devlink_resource_size_get rif_mac_profiles)
if ((! should_fail)); then
echo $target
else
echo $((target + 1))
fi
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment