Commit 38e3bfa8 authored by David S. Miller's avatar David S. Miller

Merge branch 'mptcp-improve-backup-subflows'

Mat Martineau says:

====================
mptcp: Improve use of backup subflows

Multipath TCP combines multiple TCP subflows in to one stream, and the
MPTCP-level socket must decide which subflow to use when sending (or
resending) chunks of data. The choice of the "best" subflow to transmit
on can vary depending on the priority (normal or backup) for each
subflow and how well the subflow is performing.

In order to improve MPTCP performance when some subflows are failing,
this patch set changes how backup subflows are utilized and introduces
tracking of "stale" subflows that are still connected but not making
progress.

Patch 1 adjusts MPTCP-level retransmit timeouts to use data from all
subflows.

Patch 2 makes MPTCP-level retransmissions less aggressive to avoid
resending data that's still queued at the TCP level.

Patch 3 changes the way pending data is handled when subflows are
closed. Unacked MPTCP-level data still in the subflow tx queue is
immediately moved to another subflow for transmission instead of waiting
for MPTCP-level timeouts to trigger retransmission.

Patch 4 has some sysctl code cleanup.

Patches 5 and 6 add tracking of "stale" subflows, so only underlying TCP
subflow connections that appear to be making progress are considered
when selecting a subflow to (re)transmit data. How fast a subflow goes
stale is configurable with a per-namespace sysctl. Related MIBS are
added too.

Patch 7 makes sure the backup flag is always correctly recorded when the
MP_JOIN SYN/ACK is received for an added subflow.

Patch 8 adds more test cases for backup subflows and stale subflows.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents e5f31552 7d1e6f16
...@@ -45,3 +45,15 @@ allow_join_initial_addr_port - BOOLEAN ...@@ -45,3 +45,15 @@ allow_join_initial_addr_port - BOOLEAN
This is a per-namespace sysctl. This is a per-namespace sysctl.
Default: 1 Default: 1
stale_loss_cnt - INTEGER
The number of MPTCP-level retransmission intervals with no traffic and
pending outstanding data on a given subflow required to declare it stale.
The packet scheduler ignores stale subflows.
A low stale_loss_cnt value allows for fast active-backup switch-over,
an high value maximize links utilization on edge scenarios e.g. lossy
link with high BER or peer pausing the data processing.
This is a per-namespace sysctl.
Default: 4
...@@ -21,43 +21,50 @@ struct mptcp_pernet { ...@@ -21,43 +21,50 @@ struct mptcp_pernet {
struct ctl_table_header *ctl_table_hdr; struct ctl_table_header *ctl_table_hdr;
#endif #endif
u8 mptcp_enabled;
unsigned int add_addr_timeout; unsigned int add_addr_timeout;
unsigned int stale_loss_cnt;
u8 mptcp_enabled;
u8 checksum_enabled; u8 checksum_enabled;
u8 allow_join_initial_addr_port; u8 allow_join_initial_addr_port;
}; };
static struct mptcp_pernet *mptcp_get_pernet(struct net *net) static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
{ {
return net_generic(net, mptcp_pernet_id); return net_generic(net, mptcp_pernet_id);
} }
int mptcp_is_enabled(struct net *net) int mptcp_is_enabled(const struct net *net)
{ {
return mptcp_get_pernet(net)->mptcp_enabled; return mptcp_get_pernet(net)->mptcp_enabled;
} }
unsigned int mptcp_get_add_addr_timeout(struct net *net) unsigned int mptcp_get_add_addr_timeout(const struct net *net)
{ {
return mptcp_get_pernet(net)->add_addr_timeout; return mptcp_get_pernet(net)->add_addr_timeout;
} }
int mptcp_is_checksum_enabled(struct net *net) int mptcp_is_checksum_enabled(const struct net *net)
{ {
return mptcp_get_pernet(net)->checksum_enabled; return mptcp_get_pernet(net)->checksum_enabled;
} }
int mptcp_allow_join_id0(struct net *net) int mptcp_allow_join_id0(const struct net *net)
{ {
return mptcp_get_pernet(net)->allow_join_initial_addr_port; return mptcp_get_pernet(net)->allow_join_initial_addr_port;
} }
unsigned int mptcp_stale_loss_cnt(const struct net *net)
{
return mptcp_get_pernet(net)->stale_loss_cnt;
}
static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
{ {
pernet->mptcp_enabled = 1; pernet->mptcp_enabled = 1;
pernet->add_addr_timeout = TCP_RTO_MAX; pernet->add_addr_timeout = TCP_RTO_MAX;
pernet->checksum_enabled = 0; pernet->checksum_enabled = 0;
pernet->allow_join_initial_addr_port = 1; pernet->allow_join_initial_addr_port = 1;
pernet->stale_loss_cnt = 4;
} }
#ifdef CONFIG_SYSCTL #ifdef CONFIG_SYSCTL
...@@ -95,6 +102,12 @@ static struct ctl_table mptcp_sysctl_table[] = { ...@@ -95,6 +102,12 @@ static struct ctl_table mptcp_sysctl_table[] = {
.extra1 = SYSCTL_ZERO, .extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE .extra2 = SYSCTL_ONE
}, },
{
.procname = "stale_loss_cnt",
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_douintvec_minmax,
},
{} {}
}; };
...@@ -114,6 +127,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) ...@@ -114,6 +127,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
table[1].data = &pernet->add_addr_timeout; table[1].data = &pernet->add_addr_timeout;
table[2].data = &pernet->checksum_enabled; table[2].data = &pernet->checksum_enabled;
table[3].data = &pernet->allow_join_initial_addr_port; table[3].data = &pernet->allow_join_initial_addr_port;
table[4].data = &pernet->stale_loss_cnt;
hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table); hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table);
if (!hdr) if (!hdr)
......
...@@ -45,6 +45,8 @@ static const struct snmp_mib mptcp_snmp_list[] = { ...@@ -45,6 +45,8 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX), SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX),
SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX), SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX),
SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED), SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED),
SNMP_MIB_ITEM("SubflowStale", MPTCP_MIB_SUBFLOWSTALE),
SNMP_MIB_ITEM("SubflowRecover", MPTCP_MIB_SUBFLOWRECOVER),
SNMP_MIB_SENTINEL SNMP_MIB_SENTINEL
}; };
......
...@@ -38,6 +38,8 @@ enum linux_mptcp_mib_field { ...@@ -38,6 +38,8 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */ MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */
MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */ MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */
MPTCP_MIB_RCVPRUNED, /* Incoming packet dropped due to memory limit */ MPTCP_MIB_RCVPRUNED, /* Incoming packet dropped due to memory limit */
MPTCP_MIB_SUBFLOWSTALE, /* Subflows entered 'stale' status */
MPTCP_MIB_SUBFLOWRECOVER, /* Subflows returned to active status after being stale */
__MPTCP_MIB_MAX __MPTCP_MIB_MAX
}; };
......
...@@ -975,9 +975,11 @@ static void ack_update_msk(struct mptcp_sock *msk, ...@@ -975,9 +975,11 @@ static void ack_update_msk(struct mptcp_sock *msk,
old_snd_una = msk->snd_una; old_snd_una = msk->snd_una;
new_snd_una = mptcp_expand_seq(old_snd_una, mp_opt->data_ack, mp_opt->ack64); new_snd_una = mptcp_expand_seq(old_snd_una, mp_opt->data_ack, mp_opt->ack64);
/* ACK for data not even sent yet? Ignore. */ /* ACK for data not even sent yet and even above recovery bound? Ignore.*/
if (after64(new_snd_una, snd_nxt)) if (unlikely(after64(new_snd_una, snd_nxt))) {
new_snd_una = old_snd_una; if (!msk->recovery || after64(new_snd_una, msk->recovery_snd_nxt))
new_snd_una = old_snd_una;
}
new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd; new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd;
......
...@@ -10,6 +10,8 @@ ...@@ -10,6 +10,8 @@
#include <net/mptcp.h> #include <net/mptcp.h>
#include "protocol.h" #include "protocol.h"
#include "mib.h"
/* path manager command handlers */ /* path manager command handlers */
int mptcp_pm_announce_addr(struct mptcp_sock *msk, int mptcp_pm_announce_addr(struct mptcp_sock *msk,
...@@ -308,6 +310,25 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) ...@@ -308,6 +310,25 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
return mptcp_pm_nl_get_local_id(msk, skc); return mptcp_pm_nl_get_local_id(msk, skc);
} }
void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
u32 rcv_tstamp = READ_ONCE(tcp_sk(ssk)->rcv_tstamp);
/* keep track of rtx periods with no progress */
if (!subflow->stale_count) {
subflow->stale_rcv_tstamp = rcv_tstamp;
subflow->stale_count++;
} else if (subflow->stale_rcv_tstamp == rcv_tstamp) {
if (subflow->stale_count < U8_MAX)
subflow->stale_count++;
mptcp_pm_nl_subflow_chk_stale(msk, ssk);
} else {
subflow->stale_count = 0;
mptcp_subflow_set_active(subflow);
}
}
void mptcp_pm_data_init(struct mptcp_sock *msk) void mptcp_pm_data_init(struct mptcp_sock *msk)
{ {
msk->pm.add_addr_signaled = 0; msk->pm.add_addr_signaled = 0;
......
...@@ -46,6 +46,7 @@ struct pm_nl_pernet { ...@@ -46,6 +46,7 @@ struct pm_nl_pernet {
spinlock_t lock; spinlock_t lock;
struct list_head local_addr_list; struct list_head local_addr_list;
unsigned int addrs; unsigned int addrs;
unsigned int stale_loss_cnt;
unsigned int add_addr_signal_max; unsigned int add_addr_signal_max;
unsigned int add_addr_accept_max; unsigned int add_addr_accept_max;
unsigned int local_addr_max; unsigned int local_addr_max;
...@@ -899,6 +900,43 @@ static const struct nla_policy mptcp_pm_policy[MPTCP_PM_ATTR_MAX + 1] = { ...@@ -899,6 +900,43 @@ static const struct nla_policy mptcp_pm_policy[MPTCP_PM_ATTR_MAX + 1] = {
[MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, }, [MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, },
}; };
void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
{
struct mptcp_subflow_context *iter, *subflow = mptcp_subflow_ctx(ssk);
struct sock *sk = (struct sock *)msk;
unsigned int active_max_loss_cnt;
struct net *net = sock_net(sk);
unsigned int stale_loss_cnt;
bool slow;
stale_loss_cnt = mptcp_stale_loss_cnt(net);
if (subflow->stale || !stale_loss_cnt || subflow->stale_count <= stale_loss_cnt)
return;
/* look for another available subflow not in loss state */
active_max_loss_cnt = max_t(int, stale_loss_cnt - 1, 1);
mptcp_for_each_subflow(msk, iter) {
if (iter != subflow && mptcp_subflow_active(iter) &&
iter->stale_count < active_max_loss_cnt) {
/* we have some alternatives, try to mark this subflow as idle ...*/
slow = lock_sock_fast(ssk);
if (!tcp_rtx_and_write_queues_empty(ssk)) {
subflow->stale = 1;
__mptcp_retransmit_pending_data(sk);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_SUBFLOWSTALE);
}
unlock_sock_fast(ssk, slow);
/* always try to push the pending data regarless of re-injections:
* we can possibly use backup subflows now, and subflow selection
* is cheap under the msk socket lock
*/
__mptcp_push_pending(sk, 0);
return;
}
}
}
static int mptcp_pm_family_to_addr(int family) static int mptcp_pm_family_to_addr(int family)
{ {
#if IS_ENABLED(CONFIG_MPTCP_IPV6) #if IS_ENABLED(CONFIG_MPTCP_IPV6)
...@@ -1922,6 +1960,7 @@ static int __net_init pm_nl_init_net(struct net *net) ...@@ -1922,6 +1960,7 @@ static int __net_init pm_nl_init_net(struct net *net)
INIT_LIST_HEAD_RCU(&pernet->local_addr_list); INIT_LIST_HEAD_RCU(&pernet->local_addr_list);
pernet->next_id = 1; pernet->next_id = 1;
pernet->stale_loss_cnt = 4;
spin_lock_init(&pernet->lock); spin_lock_init(&pernet->lock);
/* No need to initialize other pernet fields, the struct is zeroed at /* No need to initialize other pernet fields, the struct is zeroed at
......
This diff is collapsed.
...@@ -230,12 +230,17 @@ struct mptcp_sock { ...@@ -230,12 +230,17 @@ struct mptcp_sock {
struct sock *last_snd; struct sock *last_snd;
int snd_burst; int snd_burst;
int old_wspace; int old_wspace;
u64 recovery_snd_nxt; /* in recovery mode accept up to this seq;
* recovery related fields are under data_lock
* protection
*/
u64 snd_una; u64 snd_una;
u64 wnd_end; u64 wnd_end;
unsigned long timer_ival; unsigned long timer_ival;
u32 token; u32 token;
int rmem_released; int rmem_released;
unsigned long flags; unsigned long flags;
bool recovery; /* closing subflow write queue reinjected */
bool can_ack; bool can_ack;
bool fully_established; bool fully_established;
bool rcv_data_fin; bool rcv_data_fin;
...@@ -427,7 +432,8 @@ struct mptcp_subflow_context { ...@@ -427,7 +432,8 @@ struct mptcp_subflow_context {
send_mp_prio : 1, send_mp_prio : 1,
rx_eof : 1, rx_eof : 1,
can_ack : 1, /* only after processing the remote a key */ can_ack : 1, /* only after processing the remote a key */
disposable : 1; /* ctx can be free at ulp release time */ disposable : 1, /* ctx can be free at ulp release time */
stale : 1; /* unable to snd/rcv data, do not use for xmit */
enum mptcp_data_avail data_avail; enum mptcp_data_avail data_avail;
u32 remote_nonce; u32 remote_nonce;
u64 thmac; u64 thmac;
...@@ -439,11 +445,13 @@ struct mptcp_subflow_context { ...@@ -439,11 +445,13 @@ struct mptcp_subflow_context {
u8 reset_seen:1; u8 reset_seen:1;
u8 reset_transient:1; u8 reset_transient:1;
u8 reset_reason:4; u8 reset_reason:4;
u8 stale_count;
long delegated_status; long delegated_status;
struct list_head delegated_node; /* link into delegated_action, protected by local BH */ struct list_head delegated_node; /* link into delegated_action, protected by local BH */
u32 setsockopt_seq; u32 setsockopt_seq;
u32 stale_rcv_tstamp;
struct sock *tcp_sock; /* tcp sk backpointer */ struct sock *tcp_sock; /* tcp sk backpointer */
struct sock *conn; /* parent mptcp_sock */ struct sock *conn; /* parent mptcp_sock */
...@@ -549,12 +557,15 @@ static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *su ...@@ -549,12 +557,15 @@ static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *su
clear_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status); clear_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status);
} }
int mptcp_is_enabled(struct net *net); int mptcp_is_enabled(const struct net *net);
unsigned int mptcp_get_add_addr_timeout(struct net *net); unsigned int mptcp_get_add_addr_timeout(const struct net *net);
int mptcp_is_checksum_enabled(struct net *net); int mptcp_is_checksum_enabled(const struct net *net);
int mptcp_allow_join_id0(struct net *net); int mptcp_allow_join_id0(const struct net *net);
unsigned int mptcp_stale_loss_cnt(const struct net *net);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
struct mptcp_options_received *mp_opt); struct mptcp_options_received *mp_opt);
bool __mptcp_retransmit_pending_data(struct sock *sk);
void __mptcp_push_pending(struct sock *sk, unsigned int flags);
bool mptcp_subflow_data_available(struct sock *sk); bool mptcp_subflow_data_available(struct sock *sk);
void __init mptcp_subflow_init(void); void __init mptcp_subflow_init(void);
void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how); void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how);
...@@ -573,7 +584,7 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info, ...@@ -573,7 +584,7 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
struct sockaddr_storage *addr, struct sockaddr_storage *addr,
unsigned short family); unsigned short family);
static inline bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow)
{ {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
...@@ -585,6 +596,10 @@ static inline bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) ...@@ -585,6 +596,10 @@ static inline bool mptcp_subflow_active(struct mptcp_subflow_context *subflow)
return ((1 << ssk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)); return ((1 << ssk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT));
} }
void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow);
bool mptcp_subflow_active(struct mptcp_subflow_context *subflow);
static inline void mptcp_subflow_tcp_fallback(struct sock *sk, static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
struct mptcp_subflow_context *ctx) struct mptcp_subflow_context *ctx)
{ {
...@@ -690,6 +705,8 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac); ...@@ -690,6 +705,8 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac);
void __init mptcp_pm_init(void); void __init mptcp_pm_init(void);
void mptcp_pm_data_init(struct mptcp_sock *msk); void mptcp_pm_data_init(struct mptcp_sock *msk);
void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side); void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side);
void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp);
bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk); bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk);
......
...@@ -435,10 +435,12 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) ...@@ -435,10 +435,12 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
goto do_reset; goto do_reset;
} }
subflow->backup = mp_opt.backup;
subflow->thmac = mp_opt.thmac; subflow->thmac = mp_opt.thmac;
subflow->remote_nonce = mp_opt.nonce; subflow->remote_nonce = mp_opt.nonce;
pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow, pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d",
subflow->thmac, subflow->remote_nonce); subflow, subflow->thmac, subflow->remote_nonce,
subflow->backup);
if (!subflow_thmac_valid(subflow)) { if (!subflow_thmac_valid(subflow)) {
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC);
......
...@@ -3,8 +3,10 @@ ...@@ -3,8 +3,10 @@
ret=0 ret=0
sin="" sin=""
sinfail=""
sout="" sout=""
cin="" cin=""
cinfail=""
cinsent="" cinsent=""
cout="" cout=""
ksft_skip=4 ksft_skip=4
...@@ -76,6 +78,14 @@ init() ...@@ -76,6 +78,14 @@ init()
done done
} }
init_shapers()
{
for i in `seq 1 4`; do
tc -n $ns1 qdisc add dev ns1eth$i root netem rate 20mbit delay 1
tc -n $ns2 qdisc add dev ns2eth$i root netem rate 20mbit delay 1
done
}
cleanup_partial() cleanup_partial()
{ {
rm -f "$capout" rm -f "$capout"
...@@ -88,8 +98,8 @@ cleanup_partial() ...@@ -88,8 +98,8 @@ cleanup_partial()
cleanup() cleanup()
{ {
rm -f "$cin" "$cout" rm -f "$cin" "$cout" "$sinfail"
rm -f "$sin" "$sout" "$cinsent" rm -f "$sin" "$sout" "$cinsent" "$cinfail"
cleanup_partial cleanup_partial
} }
...@@ -211,11 +221,15 @@ link_failure() ...@@ -211,11 +221,15 @@ link_failure()
{ {
ns="$1" ns="$1"
l=$((RANDOM%4)) if [ -z "$FAILING_LINKS" ]; then
l=$((l+1)) l=$((RANDOM%4))
FAILING_LINKS=$((l+1))
fi
veth="ns1eth$l" for l in $FAILING_LINKS; do
ip -net "$ns" link set "$veth" down veth="ns1eth$l"
ip -net "$ns" link set "$veth" down
done
} }
# $1: IP address # $1: IP address
...@@ -280,10 +294,17 @@ do_transfer() ...@@ -280,10 +294,17 @@ do_transfer()
local_addr="0.0.0.0" local_addr="0.0.0.0"
fi fi
timeout ${timeout_test} \ if [ "$test_link_fail" -eq 2 ];then
ip netns exec ${listener_ns} \ timeout ${timeout_test} \
$mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ ip netns exec ${listener_ns} \
${local_addr} < "$sin" > "$sout" & $mptcp_connect -t ${timeout_poll} -l -p $port -s ${cl_proto} \
${local_addr} < "$sinfail" > "$sout" &
else
timeout ${timeout_test} \
ip netns exec ${listener_ns} \
$mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
${local_addr} < "$sin" > "$sout" &
fi
spid=$! spid=$!
sleep 1 sleep 1
...@@ -294,7 +315,7 @@ do_transfer() ...@@ -294,7 +315,7 @@ do_transfer()
$mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ $mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
$connect_addr < "$cin" > "$cout" & $connect_addr < "$cin" > "$cout" &
else else
( cat "$cin" ; sleep 2; link_failure $listener_ns ; cat "$cin" ) | \ ( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \
tee "$cinsent" | \ tee "$cinsent" | \
timeout ${timeout_test} \ timeout ${timeout_test} \
ip netns exec ${connector_ns} \ ip netns exec ${connector_ns} \
...@@ -434,7 +455,11 @@ do_transfer() ...@@ -434,7 +455,11 @@ do_transfer()
return 1 return 1
fi fi
check_transfer $sin $cout "file received by client" if [ "$test_link_fail" -eq 2 ];then
check_transfer $sinfail $cout "file received by client"
else
check_transfer $sin $cout "file received by client"
fi
retc=$? retc=$?
if [ "$test_link_fail" -eq 0 ];then if [ "$test_link_fail" -eq 0 ];then
check_transfer $cin $sout "file received by server" check_transfer $cin $sout "file received by server"
...@@ -477,29 +502,33 @@ run_tests() ...@@ -477,29 +502,33 @@ run_tests()
lret=0 lret=0
oldin="" oldin=""
if [ "$test_linkfail" -eq 1 ];then # create the input file for the failure test when
size=$((RANDOM%1024)) # the first failure test run
if [ "$test_linkfail" -ne 0 -a -z "$cinfail" ]; then
# the client file must be considerably larger
# of the maximum expected cwin value, or the
# link utilization will be not predicable
size=$((RANDOM%2))
size=$((size+1)) size=$((size+1))
size=$((size*128)) size=$((size*8192))
size=$((size + ( $RANDOM % 8192) ))
oldin=$(mktemp) cinfail=$(mktemp)
cp "$cin" "$oldin" make_file "$cinfail" "client" $size
make_file "$cin" "client" $size
fi fi
do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \ if [ "$test_linkfail" -eq 2 -a -z "$sinfail" ]; then
${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${bkup} size=$((RANDOM%16))
lret=$? size=$((size+1))
size=$((size*2048))
if [ "$test_linkfail" -eq 1 ];then sinfail=$(mktemp)
cp "$oldin" "$cin" make_file "$sinfail" "server" $size
rm -f "$oldin"
fi fi
if [ $lret -ne 0 ]; then do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \
ret=$lret ${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${bkup}
return lret=$?
fi
} }
chk_csum_nr() chk_csum_nr()
...@@ -593,6 +622,46 @@ chk_join_nr() ...@@ -593,6 +622,46 @@ chk_join_nr()
fi fi
} }
# a negative value for 'stale_max' means no upper bound:
# for bidirectional transfer, if one peer sleep for a while
# - as these tests do - we can have a quite high number of
# stale/recover conversions, proportional to
# sleep duration/ MPTCP-level RTX interval.
chk_stale_nr()
{
local ns=$1
local stale_min=$2
local stale_max=$3
local stale_delta=$4
local dump_stats
local stale_nr
local recover_nr
printf "%-39s %-18s" " " "stale"
stale_nr=`ip netns exec $ns nstat -as | grep MPTcpExtSubflowStale | awk '{print $2}'`
[ -z "$stale_nr" ] && stale_nr=0
recover_nr=`ip netns exec $ns nstat -as | grep MPTcpExtSubflowRecover | awk '{print $2}'`
[ -z "$recover_nr" ] && recover_nr=0
if [ $stale_nr -lt $stale_min ] ||
[ $stale_max -gt 0 -a $stale_nr -gt $stale_max ] ||
[ $((stale_nr - $recover_nr)) -ne $stale_delta ]; then
echo "[fail] got $stale_nr stale[s] $recover_nr recover[s], " \
" expected stale in range [$stale_min..$stale_max]," \
" stale-recover delta $stale_delta "
ret=1
dump_stats=1
else
echo "[ ok ]"
fi
if [ "${dump_stats}" = 1 ]; then
echo $ns stats
ip netns exec $ns ip -s link show
ip netns exec $ns nstat -as | grep MPTcp
fi
}
chk_add_nr() chk_add_nr()
{ {
local add_nr=$1 local add_nr=$1
...@@ -801,6 +870,27 @@ chk_prio_nr() ...@@ -801,6 +870,27 @@ chk_prio_nr()
fi fi
} }
chk_link_usage()
{
local ns=$1
local link=$2
local out=$3
local expected_rate=$4
local tx_link=`ip netns exec $ns cat /sys/class/net/$link/statistics/tx_bytes`
local tx_total=`ls -l $out | awk '{print $5}'`
local tx_rate=$((tx_link * 100 / $tx_total))
local tolerance=5
printf "%-39s %-18s" " " "link usage"
if [ $tx_rate -lt $((expected_rate - $tolerance)) -o \
$tx_rate -gt $((expected_rate + $tolerance)) ]; then
echo "[fail] got $tx_rate% usage, expected $expected_rate%"
ret=1
else
echo "[ ok ]"
fi
}
subflows_tests() subflows_tests()
{ {
reset reset
...@@ -924,14 +1014,80 @@ link_failure_tests() ...@@ -924,14 +1014,80 @@ link_failure_tests()
{ {
# accept and use add_addr with additional subflows and link loss # accept and use add_addr with additional subflows and link loss
reset reset
# without any b/w limit each veth could spool the packets and get
# them acked at xmit time, so that the corresponding subflow will
# have almost always no outstanding pkts, the scheduler will pick
# always the first subflow and we will have hard time testing
# active backup and link switch-over.
# Let's set some arbitrary (low) virtual link limits.
init_shapers
ip netns exec $ns1 ./pm_nl_ctl limits 0 3 ip netns exec $ns1 ./pm_nl_ctl limits 0 3
ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
ip netns exec $ns2 ./pm_nl_ctl limits 1 3 ip netns exec $ns2 ./pm_nl_ctl limits 1 3
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow
ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 dev ns2eth4 flags subflow
run_tests $ns1 $ns2 10.0.1.1 1 run_tests $ns1 $ns2 10.0.1.1 1
chk_join_nr "multiple flows, signal, link failure" 3 3 3 chk_join_nr "multiple flows, signal, link failure" 3 3 3
chk_add_nr 1 1 chk_add_nr 1 1
chk_stale_nr $ns2 1 5 1
# accept and use add_addr with additional subflows and link loss
# for bidirectional transfer
reset
init_shapers
ip netns exec $ns1 ./pm_nl_ctl limits 0 3
ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
ip netns exec $ns2 ./pm_nl_ctl limits 1 3
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow
ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 dev ns2eth4 flags subflow
run_tests $ns1 $ns2 10.0.1.1 2
chk_join_nr "multi flows, signal, bidi, link fail" 3 3 3
chk_add_nr 1 1
chk_stale_nr $ns2 1 -1 1
# 2 subflows plus 1 backup subflow with a lossy link, backup
# will never be used
reset
init_shapers
ip netns exec $ns1 ./pm_nl_ctl limits 0 2
ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
ip netns exec $ns2 ./pm_nl_ctl limits 1 2
export FAILING_LINKS="1"
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
run_tests $ns1 $ns2 10.0.1.1 1
chk_join_nr "backup subflow unused, link failure" 2 2 2
chk_add_nr 1 1
chk_link_usage $ns2 ns2eth3 $cinsent 0
# 2 lossy links after half transfer, backup will get half of
# the traffic
reset
init_shapers
ip netns exec $ns1 ./pm_nl_ctl limits 0 2
ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
ip netns exec $ns2 ./pm_nl_ctl limits 1 2
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
export FAILING_LINKS="1 2"
run_tests $ns1 $ns2 10.0.1.1 1
chk_join_nr "backup flow used, multi links fail" 2 2 2
chk_add_nr 1 1
chk_stale_nr $ns2 2 4 2
chk_link_usage $ns2 ns2eth3 $cinsent 50
# use a backup subflow with the first subflow on a lossy link
# for bidirectional transfer
reset
init_shapers
ip netns exec $ns1 ./pm_nl_ctl limits 0 2
ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
ip netns exec $ns2 ./pm_nl_ctl limits 1 3
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
run_tests $ns1 $ns2 10.0.1.1 2
chk_join_nr "backup flow used, bidi, link failure" 2 2 2
chk_add_nr 1 1
chk_stale_nr $ns2 1 -1 2
chk_link_usage $ns2 ns2eth3 $cinsent 50
} }
add_addr_timeout_tests() add_addr_timeout_tests()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment