Commit 16dc75e5 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'mptcp-fix-endpoints-with-signal-and-subflow-flags'

Matthieu Baerts says:

====================
mptcp: fix endpoints with 'signal' and 'subflow' flags

When looking at improving the user experience around the MPTCP endpoints
setup, I noticed that setting an endpoint with both the 'signal' and the
'subflow' flags -- as it has been done in the past by users according to
bug reports we got -- was resulting on only announcing the endpoint, but
not using it to create subflows: the 'subflow' flag was then ignored.

My initial thought was to modify IPRoute2 to warn the user when the two
flags were set, but it doesn't sound normal to ignore one of them. I
then looked at modifying the kernel not to allow having the two flags
set, but when discussing about that with Mat, we thought it was maybe
not ideal to do that, as there might be use-cases, we might break some
configs. Then I saw it was working before v5.17. So instead, I fixed the
support on the kernel side (patch 5) using Paolo's suggestion. This also
includes a fix on the options side (patch 1: for v5.11+), an explicit
deny of some options combinations (patch 2: for v5.18+), and some
refactoring (patches 3 and 4) to ease the inclusion of the patch 5.

While at it, I added a new selftest (patch 7) to validate this case --
including a modification of the chk_add_nr helper to inverse the sides
were the counters are checked (patch 6) -- and allowed ADD_ADDR echo
just after the MP_JOIN 3WHS.

The selftests modification have the same Fixes tag as the previous
commit, but no 'Cc: Stable': if the backport can work, that's good --
but it still need to be verified by running the selftests -- if not, no
need to worry, many CIs will use the selftests from the last stable
version to validate previous stable releases.
====================

Link: https://patch.msgid.link/20240731-upstream-net-20240731-mptcp-endp-subflow-signal-v1-0-c8a9b036493b@kernel.orgSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 183d46ff 4d2868b5
......@@ -958,7 +958,8 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
if (subflow->remote_key_valid &&
(((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) ||
((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && !mp_opt->echo))) {
((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) &&
(!mp_opt->echo || subflow->mp_join)))) {
/* subflows are fully established as soon as we get any
* additional ack, including ADD_ADDR.
*/
......
......@@ -348,7 +348,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr);
if (add_entry) {
if (mptcp_pm_is_kernel(msk))
if (WARN_ON_ONCE(mptcp_pm_is_kernel(msk)))
return false;
sk_reset_timer(sk, &add_entry->add_timer,
......@@ -512,8 +512,8 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info)
static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
{
struct mptcp_pm_addr_entry *local, *signal_and_subflow = NULL;
struct sock *sk = (struct sock *)msk;
struct mptcp_pm_addr_entry *local;
unsigned int add_addr_signal_max;
unsigned int local_addr_max;
struct pm_nl_pernet *pernet;
......@@ -555,8 +555,6 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
/* check first for announce */
if (msk->pm.add_addr_signaled < add_addr_signal_max) {
local = select_signal_address(pernet, msk);
/* due to racing events on both ends we can reach here while
* previous add address is still running: if we invoke now
* mptcp_pm_announce_addr(), that will fail and the
......@@ -567,16 +565,26 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL))
return;
if (local) {
if (mptcp_pm_alloc_anno_list(msk, &local->addr)) {
__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
msk->pm.add_addr_signaled++;
mptcp_pm_announce_addr(msk, &local->addr, false);
mptcp_pm_nl_addr_send_ack(msk);
}
}
local = select_signal_address(pernet, msk);
if (!local)
goto subflow;
/* If the alloc fails, we are on memory pressure, not worth
* continuing, and trying to create subflows.
*/
if (!mptcp_pm_alloc_anno_list(msk, &local->addr))
return;
__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
msk->pm.add_addr_signaled++;
mptcp_pm_announce_addr(msk, &local->addr, false);
mptcp_pm_nl_addr_send_ack(msk);
if (local->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)
signal_and_subflow = local;
}
subflow:
/* check if should create a new subflow */
while (msk->pm.local_addr_used < local_addr_max &&
msk->pm.subflows < subflows_max) {
......@@ -584,9 +592,14 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
bool fullmesh;
int i, nr;
local = select_local_address(pernet, msk);
if (!local)
break;
if (signal_and_subflow) {
local = signal_and_subflow;
signal_and_subflow = NULL;
} else {
local = select_local_address(pernet, msk);
if (!local)
break;
}
fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH);
......@@ -1328,8 +1341,8 @@ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
return ret;
if (addr.addr.port && !(addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
GENL_SET_ERR_MSG(info, "flags must have signal when using port");
if (addr.addr.port && !address_use_port(&addr)) {
GENL_SET_ERR_MSG(info, "flags must have signal and not subflow when using port");
return -EINVAL;
}
......
......@@ -1415,18 +1415,28 @@ chk_add_nr()
local add_nr=$1
local echo_nr=$2
local port_nr=${3:-0}
local syn_nr=${4:-$port_nr}
local syn_ack_nr=${5:-$port_nr}
local ack_nr=${6:-$port_nr}
local mis_syn_nr=${7:-0}
local mis_ack_nr=${8:-0}
local ns_invert=${4:-""}
local syn_nr=$port_nr
local syn_ack_nr=$port_nr
local ack_nr=$port_nr
local mis_syn_nr=0
local mis_ack_nr=0
local ns_tx=$ns1
local ns_rx=$ns2
local extra_msg=""
local count
local timeout
timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)
if [[ $ns_invert = "invert" ]]; then
ns_tx=$ns2
ns_rx=$ns1
extra_msg="invert"
fi
timeout=$(ip netns exec ${ns_tx} sysctl -n net.mptcp.add_addr_timeout)
print_check "add"
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtAddAddr")
count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtAddAddr")
if [ -z "$count" ]; then
print_skip
# if the test configured a short timeout tolerate greater then expected
......@@ -1438,7 +1448,7 @@ chk_add_nr()
fi
print_check "echo"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtEchoAdd")
count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtEchoAdd")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$echo_nr" ]; then
......@@ -1449,7 +1459,7 @@ chk_add_nr()
if [ $port_nr -gt 0 ]; then
print_check "pt"
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtPortAdd")
count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtPortAdd")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$port_nr" ]; then
......@@ -1459,7 +1469,7 @@ chk_add_nr()
fi
print_check "syn"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortSynRx")
count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortSynRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$syn_nr" ]; then
......@@ -1470,7 +1480,7 @@ chk_add_nr()
fi
print_check "synack"
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx")
count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPJoinPortSynAckRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$syn_ack_nr" ]; then
......@@ -1481,7 +1491,7 @@ chk_add_nr()
fi
print_check "ack"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortAckRx")
count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortAckRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$ack_nr" ]; then
......@@ -1492,7 +1502,7 @@ chk_add_nr()
fi
print_check "syn"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortSynRx")
count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortSynRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$mis_syn_nr" ]; then
......@@ -1503,7 +1513,7 @@ chk_add_nr()
fi
print_check "ack"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortAckRx")
count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortAckRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$mis_ack_nr" ]; then
......@@ -1513,6 +1523,8 @@ chk_add_nr()
print_ok
fi
fi
print_info "$extra_msg"
}
chk_add_tx_nr()
......@@ -1977,6 +1989,21 @@ signal_address_tests()
chk_add_nr 1 1
fi
# uncommon: subflow and signal flags on the same endpoint
# or because the user wrongly picked both, but still expects the client
# to create additional subflows
if reset "subflow and signal together"; then
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 0 2
pm_nl_add_endpoint $ns2 10.0.3.2 flags signal,subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
chk_add_nr 1 1 0 invert # only initiated by ns2
chk_add_nr 0 0 0 # none initiated by ns1
chk_rst_nr 0 0 invert # no RST sent by the client
chk_rst_nr 0 0 # no RST sent by the server
fi
# accept and use add_addr with additional subflows
if reset "multiple subflows and signal"; then
pm_nl_set_limits $ns1 0 3
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment