Commit 98e95872 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'mptcp-expose-more-info-and-small-improvements'

Matthieu Baerts says:

====================
mptcp: expose more info and small improvements

Patch 1-3/9 track and expose some aggregated data counters at the MPTCP
level: the number of retransmissions and the bytes that have been
transferred. The first patch prepares the work by moving where snd_una
is updated for fallback sockets while the last patch adds some tests to
cover the new code.

Patch 4-6/9 introduce a new getsockopt for SOL_MPTCP: MPTCP_FULL_INFO.
This new socket option allows to combine info from MPTCP_INFO,
MPTCP_TCPINFO and MPTCP_SUBFLOW_ADDRS socket options into one. It can be
needed to have all info in one because the path-manager can close and
re-create subflows between getsockopt() and fooling the accounting. The
first patch introduces a unique subflow ID to easily detect when
subflows are being re-created with the same 5-tuple while the last patch
adds some tests to cover the new code.

Please note that patch 5/9 ("mptcp: introduce MPTCP_FULL_INFO getsockopt")
can reveal a bug that were there for a bit of time, see [1]. A fix has
recently been fixed to netdev for the -net tree: "mptcp: ensure listener
is unhashed before updating the sk status", see [2]. There is no
conflicts between the two patches but it might be better to apply this
series after the one for -net and after having merged "net" into
"net-next".

Patch 7/9 is similar to commit 47867f0a ("selftests: mptcp: join:
skip check if MIB counter not supported") recently applied in the -net
tree but here it adapts the new code that is only in net-next (and it
fixes a merge conflict resolution which didn't have any impact).

Patch 8 and 9/9 are two simple refactoring. One to consolidate the
transition to TCP_CLOSE in mptcp_do_fastclose() and avoid duplicated
code. The other one reduces the scope of an argument passed to
mptcp_pm_alloc_anno_list() function.

Link: https://github.com/multipath-tcp/mptcp_net-next/issues/407 [1]
Link: https://lore.kernel.org/netdev/20230620-upstream-net-20230620-misc-fixes-for-v6-4-v1-0-f36aa5eae8b9@tessares.net/ [2]
====================

Link: https://lore.kernel.org/r/20230620-upstream-net-next-20230620-mptcp-expose-more-info-and-misc-v1-0-62b9444bfd48@tessares.netSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 5dfbbaa2 528cb5f2
......@@ -123,6 +123,11 @@ struct mptcp_info {
__u8 mptcpi_local_addr_used;
__u8 mptcpi_local_addr_max;
__u8 mptcpi_csum_enabled;
__u32 mptcpi_retransmits;
__u64 mptcpi_bytes_retrans;
__u64 mptcpi_bytes_sent;
__u64 mptcpi_bytes_received;
__u64 mptcpi_bytes_acked;
};
/*
......@@ -244,9 +249,33 @@ struct mptcp_subflow_addrs {
};
};
struct mptcp_subflow_info {
__u32 id;
struct mptcp_subflow_addrs addrs;
};
struct mptcp_full_info {
__u32 size_tcpinfo_kernel; /* must be 0, set by kernel */
__u32 size_tcpinfo_user;
__u32 size_sfinfo_kernel; /* must be 0, set by kernel */
__u32 size_sfinfo_user;
__u32 num_subflows; /* must be 0, set by kernel (real subflow count) */
__u32 size_arrays_user; /* max subflows that userspace is interested in;
* the buffers at subflow_info/tcp_info
* are respectively at least:
* size_arrays * size_sfinfo_user
* size_arrays * size_tcpinfo_user
* bytes wide
*/
__aligned_u64 subflow_info;
__aligned_u64 tcp_info;
struct mptcp_info mptcp_info;
};
/* MPTCP socket options */
#define MPTCP_INFO 1
#define MPTCP_TCPINFO 2
#define MPTCP_SUBFLOW_ADDRS 3
#define MPTCP_FULL_INFO 4
#endif /* _UAPI_MPTCP_H */
......@@ -1026,6 +1026,12 @@ u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq)
return cur_seq;
}
static void __mptcp_snd_una_update(struct mptcp_sock *msk, u64 new_snd_una)
{
msk->bytes_acked += new_snd_una - msk->snd_una;
msk->snd_una = new_snd_una;
}
static void ack_update_msk(struct mptcp_sock *msk,
struct sock *ssk,
struct mptcp_options_received *mp_opt)
......@@ -1057,7 +1063,7 @@ static void ack_update_msk(struct mptcp_sock *msk,
__mptcp_check_push(sk, ssk);
if (after64(new_snd_una, old_snd_una)) {
msk->snd_una = new_snd_una;
__mptcp_snd_una_update(msk, new_snd_una);
__mptcp_data_acked(sk);
}
mptcp_data_unlock(sk);
......@@ -1119,6 +1125,12 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
mptcp_data_lock(subflow->conn);
if (sk_stream_memory_free(sk))
__mptcp_check_push(subflow->conn, sk);
/* on fallback we just need to ignore the msk-level snd_una, as
* this is really plain TCP
*/
__mptcp_snd_una_update(msk, READ_ONCE(msk->snd_nxt));
__mptcp_data_acked(subflow->conn);
mptcp_data_unlock(subflow->conn);
return true;
......
......@@ -341,7 +341,7 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
}
bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
const struct mptcp_pm_addr_entry *entry)
const struct mptcp_addr_info *addr)
{
struct mptcp_pm_add_entry *add_entry = NULL;
struct sock *sk = (struct sock *)msk;
......@@ -349,7 +349,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
lockdep_assert_held(&msk->pm.lock);
add_entry = mptcp_lookup_anno_list_by_saddr(msk, &entry->addr);
add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr);
if (add_entry) {
if (mptcp_pm_is_kernel(msk))
......@@ -366,7 +366,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
list_add(&add_entry->list, &msk->pm.anno_list);
add_entry->addr = entry->addr;
add_entry->addr = *addr;
add_entry->sock = msk;
add_entry->retrans_times = 0;
......@@ -576,7 +576,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
return;
if (local) {
if (mptcp_pm_alloc_anno_list(msk, local)) {
if (mptcp_pm_alloc_anno_list(msk, &local->addr)) {
__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
msk->pm.add_addr_signaled++;
mptcp_pm_announce_addr(msk, &local->addr, false);
......
......@@ -193,7 +193,7 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info)
lock_sock((struct sock *)msk);
spin_lock_bh(&msk->pm.lock);
if (mptcp_pm_alloc_anno_list(msk, &addr_val)) {
if (mptcp_pm_alloc_anno_list(msk, &addr_val.addr)) {
msk->pm.add_addr_signaled++;
mptcp_pm_announce_addr(msk, &addr_val.addr, false);
mptcp_pm_nl_addr_send_ack(msk);
......
......@@ -96,6 +96,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
list_add(&subflow->node, &msk->conn_list);
sock_hold(ssock->sk);
subflow->request_mptcp = 1;
subflow->subflow_id = msk->subflow_id++;
/* This is the first subflow, always with id 0 */
subflow->local_id_valid = 1;
......@@ -377,6 +378,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) {
/* in sequence */
msk->bytes_received += copy_len;
WRITE_ONCE(msk->ack_seq, msk->ack_seq + copy_len);
tail = skb_peek_tail(&sk->sk_receive_queue);
if (tail && mptcp_try_coalesce(sk, tail, skb))
......@@ -760,6 +762,7 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
MPTCP_SKB_CB(skb)->map_seq += delta;
__skb_queue_tail(&sk->sk_receive_queue, skb);
}
msk->bytes_received += end_seq - msk->ack_seq;
msk->ack_seq = end_seq;
moved = true;
}
......@@ -845,6 +848,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
if (sk->sk_socket && !ssk->sk_socket)
mptcp_sock_graft(ssk, sk->sk_socket);
mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++;
mptcp_sockopt_sync_locked(msk, ssk);
mptcp_subflow_joined(msk, ssk);
return true;
......@@ -1004,12 +1008,6 @@ static void __mptcp_clean_una(struct sock *sk)
struct mptcp_data_frag *dtmp, *dfrag;
u64 snd_una;
/* on fallback we just need to ignore snd_una, as this is really
* plain TCP
*/
if (__mptcp_check_fallback(msk))
msk->snd_una = READ_ONCE(msk->snd_nxt);
snd_una = msk->snd_una;
list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) {
if (after64(dfrag->data_seq + dfrag->data_len, snd_una))
......@@ -1537,8 +1535,10 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
* that has been handed to the subflow for transmission
* and skip update in case it was old dfrag.
*/
if (likely(after64(snd_nxt_new, msk->snd_nxt)))
if (likely(after64(snd_nxt_new, msk->snd_nxt))) {
msk->bytes_sent += snd_nxt_new - msk->snd_nxt;
msk->snd_nxt = snd_nxt_new;
}
}
void mptcp_check_and_set_pending(struct sock *sk)
......@@ -2596,6 +2596,7 @@ static void __mptcp_retrans(struct sock *sk)
}
if (copied) {
dfrag->already_sent = max(dfrag->already_sent, info.sent);
msk->bytes_retrans += copied;
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
WRITE_ONCE(msk->allow_infinite_fallback, false);
......@@ -2654,6 +2655,7 @@ static void mptcp_do_fastclose(struct sock *sk)
struct mptcp_subflow_context *subflow, *tmp;
struct mptcp_sock *msk = mptcp_sk(sk);
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_for_each_subflow_safe(msk, subflow, tmp)
__mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow),
subflow, MPTCP_CF_FASTCLOSE);
......@@ -2691,10 +2693,9 @@ static void mptcp_worker(struct work_struct *work)
* even if it is orphaned and in FIN_WAIT2 state
*/
if (sock_flag(sk, SOCK_DEAD)) {
if (mptcp_should_close(sk)) {
inet_sk_state_store(sk, TCP_CLOSE);
if (mptcp_should_close(sk))
mptcp_do_fastclose(sk);
}
if (sk->sk_state == TCP_CLOSE) {
__mptcp_destroy_sock(sk);
goto unlock;
......@@ -2733,6 +2734,7 @@ static int __mptcp_init_sock(struct sock *sk)
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
WRITE_ONCE(msk->allow_infinite_fallback, true);
msk->recovery = false;
msk->subflow_id = 1;
mptcp_pm_data_init(msk);
......@@ -2936,7 +2938,6 @@ static void __mptcp_destroy_sock(struct sock *sk)
void __mptcp_unaccepted_force_close(struct sock *sk)
{
sock_set_flag(sk, SOCK_DEAD);
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_do_fastclose(sk);
__mptcp_destroy_sock(sk);
}
......@@ -2978,7 +2979,6 @@ bool __mptcp_close(struct sock *sk, long timeout)
/* If the msk has read data, or the caller explicitly ask it,
* do the MPTCP equivalent of TCP reset, aka MPTCP fastclose
*/
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_do_fastclose(sk);
timeout = 0;
} else if (mptcp_close_state(sk)) {
......@@ -3108,6 +3108,10 @@ static int mptcp_disconnect(struct sock *sk, int flags)
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
mptcp_pm_data_reset(msk);
mptcp_ca_reset(sk);
msk->bytes_acked = 0;
msk->bytes_received = 0;
msk->bytes_sent = 0;
msk->bytes_retrans = 0;
WRITE_ONCE(sk->sk_shutdown, 0);
sk_error_report(sk);
......@@ -3157,6 +3161,9 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
/* passive msk is created after the first/MPC subflow */
msk->subflow_id = 2;
sock_reset_flag(nsk, SOCK_RCU_FREE);
security_inet_csk_clone(nsk, req);
......
......@@ -262,10 +262,13 @@ struct mptcp_sock {
u64 local_key;
u64 remote_key;
u64 write_seq;
u64 bytes_sent;
u64 snd_nxt;
u64 bytes_received;
u64 ack_seq;
atomic64_t rcv_wnd_sent;
u64 rcv_data_fin_seq;
u64 bytes_retrans;
int rmem_fwd_alloc;
struct sock *last_snd;
int snd_burst;
......@@ -274,6 +277,7 @@ struct mptcp_sock {
* recovery related fields are under data_lock
* protection
*/
u64 bytes_acked;
u64 snd_una;
u64 wnd_end;
unsigned long timer_ival;
......@@ -319,7 +323,8 @@ struct mptcp_sock {
u64 rtt_us; /* last maximum rtt of subflows */
} rcvq_space;
u32 setsockopt_seq;
u32 subflow_id;
u32 setsockopt_seq;
char ca_name[TCP_CA_NAME_MAX];
struct mptcp_sock *dl_next;
};
......@@ -500,6 +505,8 @@ struct mptcp_subflow_context {
u8 reset_reason:4;
u8 stale_count;
u32 subflow_id;
long delegated_status;
unsigned long fail_tout;
......@@ -810,7 +817,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
struct mptcp_addr_info *rem,
u8 bkup);
bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
const struct mptcp_pm_addr_entry *entry);
const struct mptcp_addr_info *addr);
void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
struct mptcp_pm_add_entry *
......
......@@ -14,7 +14,8 @@
#include <net/mptcp.h>
#include "protocol.h"
#define MIN_INFO_OPTLEN_SIZE 16
#define MIN_INFO_OPTLEN_SIZE 16
#define MIN_FULL_INFO_OPTLEN_SIZE 40
static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
{
......@@ -889,7 +890,9 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
{
struct sock *sk = (struct sock *)msk;
u32 flags = 0;
bool slow;
memset(info, 0, sizeof(*info));
......@@ -898,6 +901,9 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted);
info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used);
if (inet_sk_state_load(sk) == TCP_LISTEN)
return;
/* The following limits only make sense for the in-kernel PM */
if (mptcp_pm_is_kernel(msk)) {
info->mptcpi_subflows_max =
......@@ -915,11 +921,21 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
if (READ_ONCE(msk->can_ack))
flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED;
info->mptcpi_flags = flags;
info->mptcpi_token = READ_ONCE(msk->token);
info->mptcpi_write_seq = READ_ONCE(msk->write_seq);
info->mptcpi_snd_una = READ_ONCE(msk->snd_una);
info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq);
info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);
mptcp_data_lock(sk);
info->mptcpi_snd_una = msk->snd_una;
info->mptcpi_rcv_nxt = msk->ack_seq;
info->mptcpi_bytes_acked = msk->bytes_acked;
mptcp_data_unlock(sk);
slow = lock_sock_fast(sk);
info->mptcpi_csum_enabled = msk->csum_enabled;
info->mptcpi_token = msk->token;
info->mptcpi_write_seq = msk->write_seq;
info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits;
info->mptcpi_bytes_sent = msk->bytes_sent;
info->mptcpi_bytes_received = msk->bytes_received;
info->mptcpi_bytes_retrans = msk->bytes_retrans;
unlock_sock_fast(sk, slow);
}
EXPORT_SYMBOL_GPL(mptcp_diag_fill_info);
......@@ -966,7 +982,8 @@ static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd,
}
static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd,
char __user *optval, int __user *optlen)
char __user *optval,
int __user *optlen)
{
int len, copylen;
......@@ -1147,6 +1164,125 @@ static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *o
return 0;
}
static int mptcp_get_full_info(struct mptcp_full_info *mfi,
char __user *optval,
int __user *optlen)
{
int len;
BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) !=
MIN_FULL_INFO_OPTLEN_SIZE);
if (get_user(len, optlen))
return -EFAULT;
if (len < MIN_FULL_INFO_OPTLEN_SIZE)
return -EINVAL;
memset(mfi, 0, sizeof(*mfi));
if (copy_from_user(mfi, optval, MIN_FULL_INFO_OPTLEN_SIZE))
return -EFAULT;
if (mfi->size_tcpinfo_kernel ||
mfi->size_sfinfo_kernel ||
mfi->num_subflows)
return -EINVAL;
if (mfi->size_sfinfo_user > INT_MAX ||
mfi->size_tcpinfo_user > INT_MAX)
return -EINVAL;
return len - MIN_FULL_INFO_OPTLEN_SIZE;
}
static int mptcp_put_full_info(struct mptcp_full_info *mfi,
char __user *optval,
u32 copylen,
int __user *optlen)
{
copylen += MIN_FULL_INFO_OPTLEN_SIZE;
if (put_user(copylen, optlen))
return -EFAULT;
if (copy_to_user(optval, mfi, copylen))
return -EFAULT;
return 0;
}
static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval,
int __user *optlen)
{
unsigned int sfcount = 0, copylen = 0;
struct mptcp_subflow_context *subflow;
struct sock *sk = (struct sock *)msk;
void __user *tcpinfoptr, *sfinfoptr;
struct mptcp_full_info mfi;
int len;
len = mptcp_get_full_info(&mfi, optval, optlen);
if (len < 0)
return len;
/* don't bother filling the mptcp info if there is not enough
* user-space-provided storage
*/
if (len > 0) {
mptcp_diag_fill_info(msk, &mfi.mptcp_info);
copylen += min_t(unsigned int, len, sizeof(struct mptcp_info));
}
mfi.size_tcpinfo_kernel = sizeof(struct tcp_info);
mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user,
sizeof(struct tcp_info));
sfinfoptr = u64_to_user_ptr(mfi.subflow_info);
mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info);
mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user,
sizeof(struct mptcp_subflow_info));
tcpinfoptr = u64_to_user_ptr(mfi.tcp_info);
lock_sock(sk);
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
struct mptcp_subflow_info sfinfo;
struct tcp_info tcp_info;
if (sfcount++ >= mfi.size_arrays_user)
continue;
/* fetch addr/tcp_info only if the user space buffers
* are wide enough
*/
memset(&sfinfo, 0, sizeof(sfinfo));
sfinfo.id = subflow->subflow_id;
if (mfi.size_sfinfo_user >
offsetof(struct mptcp_subflow_info, addrs))
mptcp_get_sub_addrs(ssk, &sfinfo.addrs);
if (copy_to_user(sfinfoptr, &sfinfo, mfi.size_sfinfo_user))
goto fail_release;
if (mfi.size_tcpinfo_user) {
tcp_get_info(ssk, &tcp_info);
if (copy_to_user(tcpinfoptr, &tcp_info,
mfi.size_tcpinfo_user))
goto fail_release;
}
tcpinfoptr += mfi.size_tcpinfo_user;
sfinfoptr += mfi.size_sfinfo_user;
}
release_sock(sk);
mfi.num_subflows = sfcount;
if (mptcp_put_full_info(&mfi, optval, copylen, optlen))
return -EFAULT;
return 0;
fail_release:
release_sock(sk);
return -EFAULT;
}
static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval,
int __user *optlen, int val)
{
......@@ -1220,6 +1356,8 @@ static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname,
switch (optname) {
case MPTCP_INFO:
return mptcp_getsockopt_info(msk, optval, optlen);
case MPTCP_FULL_INFO:
return mptcp_getsockopt_full_info(msk, optval, optlen);
case MPTCP_TCPINFO:
return mptcp_getsockopt_tcpinfo(msk, optval, optlen);
case MPTCP_SUBFLOW_ADDRS:
......
......@@ -819,6 +819,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
if (!ctx->conn)
goto fallback;
ctx->subflow_id = 1;
owner = mptcp_sk(ctx->conn);
mptcp_pm_new_connection(owner, child, 1);
......@@ -1574,6 +1575,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
subflow->remote_id = remote_id;
subflow->request_join = 1;
subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP);
subflow->subflow_id = msk->subflow_id++;
mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
sock_hold(ssk);
......
......@@ -1683,12 +1683,12 @@ chk_add_tx_nr()
timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)
printf "%-${nr_blank}s %s" " " "add TX"
count=$(ip netns exec $ns1 nstat -as MPTcpExtAddAddrTx | grep MPTcpExtAddAddrTx | awk '{print $2}')
[ -z "$count" ] && count=0
count=$(get_counter ${ns1} "MPTcpExtAddAddrTx")
if [ -z "$count" ]; then
echo -n "[skip]"
# if the test configured a short timeout tolerate greater then expected
# add addrs options, due to retransmissions
if [ "$count" != "$add_tx_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_tx_nr" ]; }; then
elif [ "$count" != "$add_tx_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_tx_nr" ]; }; then
echo "[fail] got $count ADD_ADDR[s] TX, expected $add_tx_nr"
fail_test
else
......@@ -1696,9 +1696,10 @@ chk_add_tx_nr()
fi
echo -n " - echo TX "
count=$(ip netns exec $ns2 nstat -as MPTcpExtEchoAddTx | grep MPTcpExtEchoAddTx | awk '{print $2}')
[ -z "$count" ] && count=0
if [ "$count" != "$echo_tx_nr" ]; then
count=$(get_counter ${ns2} "MPTcpExtEchoAddTx")
if [ -z "$count" ]; then
echo "[skip]"
elif [ "$count" != "$echo_tx_nr" ]; then
echo "[fail] got $count ADD_ADDR echo[s] TX, expected $echo_tx_nr"
fail_test
else
......@@ -1734,9 +1735,10 @@ chk_rm_nr()
fi
printf "%-${nr_blank}s %s" " " "rm "
count=$(ip netns exec $addr_ns nstat -as MPTcpExtRmAddr | grep MPTcpExtRmAddr | awk '{print $2}')
[ -z "$count" ] && count=0
if [ "$count" != "$rm_addr_nr" ]; then
count=$(get_counter ${addr_ns} "MPTcpExtRmAddr")
if [ -z "$count" ]; then
echo -n "[skip]"
elif [ "$count" != "$rm_addr_nr" ]; then
echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr"
fail_test
else
......@@ -1778,16 +1780,15 @@ chk_rm_tx_nr()
local rm_addr_tx_nr=$1
printf "%-${nr_blank}s %s" " " "rm TX "
count=$(ip netns exec $ns2 nstat -as MPTcpExtRmAddrTx | grep MPTcpExtRmAddrTx | awk '{print $2}')
[ -z "$count" ] && count=0
if [ "$count" != "$rm_addr_tx_nr" ]; then
count=$(get_counter ${ns2} "MPTcpExtRmAddrTx")
if [ -z "$count" ]; then
echo "[skip]"
elif [ "$count" != "$rm_addr_tx_nr" ]; then
echo "[fail] got $count RM_ADDR[s] expected $rm_addr_tx_nr"
fail_test
else
echo -n "[ ok ]"
echo "[ ok ]"
fi
echo "$extra_msg"
}
chk_prio_nr()
......
......@@ -51,6 +51,11 @@ struct mptcp_info {
__u8 mptcpi_local_addr_used;
__u8 mptcpi_local_addr_max;
__u8 mptcpi_csum_enabled;
__u32 mptcpi_retransmits;
__u64 mptcpi_bytes_retrans;
__u64 mptcpi_bytes_sent;
__u64 mptcpi_bytes_received;
__u64 mptcpi_bytes_acked;
};
struct mptcp_subflow_data {
......@@ -81,10 +86,41 @@ struct mptcp_subflow_addrs {
#define MPTCP_SUBFLOW_ADDRS 3
#endif
#ifndef MPTCP_FULL_INFO
struct mptcp_subflow_info {
__u32 id;
struct mptcp_subflow_addrs addrs;
};
struct mptcp_full_info {
__u32 size_tcpinfo_kernel; /* must be 0, set by kernel */
__u32 size_tcpinfo_user;
__u32 size_sfinfo_kernel; /* must be 0, set by kernel */
__u32 size_sfinfo_user;
__u32 num_subflows; /* must be 0, set by kernel (real subflow count) */
__u32 size_arrays_user; /* max subflows that userspace is interested in;
* the buffers at subflow_info/tcp_info
* are respectively at least:
* size_arrays * size_sfinfo_user
* size_arrays * size_tcpinfo_user
* bytes wide
*/
__aligned_u64 subflow_info;
__aligned_u64 tcp_info;
struct mptcp_info mptcp_info;
};
#define MPTCP_FULL_INFO 4
#endif
struct so_state {
struct mptcp_info mi;
struct mptcp_info last_sample;
struct tcp_info tcp_info;
struct mptcp_subflow_addrs addrs;
uint64_t mptcpi_rcv_delta;
uint64_t tcpi_rcv_delta;
bool pkt_stats_avail;
};
#ifndef MIN
......@@ -322,8 +358,9 @@ static void do_getsockopt_mptcp_info(struct so_state *s, int fd, size_t w)
if (ret < 0)
die_perror("getsockopt MPTCP_INFO");
assert(olen == sizeof(i));
s->pkt_stats_avail = olen >= sizeof(i);
s->last_sample = i;
if (s->mi.mptcpi_write_seq == 0)
s->mi = i;
......@@ -362,6 +399,8 @@ static void do_getsockopt_tcp_info(struct so_state *s, int fd, size_t r, size_t
olen -= sizeof(struct mptcp_subflow_data);
assert(olen == ti.d.size_user);
s->tcp_info = ti.ti[0];
if (ti.ti[0].tcpi_bytes_sent == w &&
ti.ti[0].tcpi_bytes_received == r)
goto done;
......@@ -383,7 +422,7 @@ static void do_getsockopt_tcp_info(struct so_state *s, int fd, size_t r, size_t
do_getsockopt_bogus_sf_data(fd, MPTCP_TCPINFO);
}
static void do_getsockopt_subflow_addrs(int fd)
static void do_getsockopt_subflow_addrs(struct so_state *s, int fd)
{
struct sockaddr_storage remote, local;
socklen_t olen, rlen, llen;
......@@ -431,6 +470,7 @@ static void do_getsockopt_subflow_addrs(int fd)
assert(memcmp(&local, &addrs.addr[0].ss_local, sizeof(local)) == 0);
assert(memcmp(&remote, &addrs.addr[0].ss_remote, sizeof(remote)) == 0);
s->addrs = addrs.addr[0];
memset(&addrs, 0, sizeof(addrs));
......@@ -451,13 +491,70 @@ static void do_getsockopt_subflow_addrs(int fd)
do_getsockopt_bogus_sf_data(fd, MPTCP_SUBFLOW_ADDRS);
}
static void do_getsockopt_mptcp_full_info(struct so_state *s, int fd)
{
size_t data_size = sizeof(struct mptcp_full_info);
struct mptcp_subflow_info sfinfo[2];
struct tcp_info tcp_info[2];
struct mptcp_full_info mfi;
socklen_t olen;
int ret;
memset(&mfi, 0, data_size);
memset(tcp_info, 0, sizeof(tcp_info));
memset(sfinfo, 0, sizeof(sfinfo));
mfi.size_tcpinfo_user = sizeof(struct tcp_info);
mfi.size_sfinfo_user = sizeof(struct mptcp_subflow_info);
mfi.size_arrays_user = 2;
mfi.subflow_info = (unsigned long)&sfinfo[0];
mfi.tcp_info = (unsigned long)&tcp_info[0];
olen = data_size;
ret = getsockopt(fd, SOL_MPTCP, MPTCP_FULL_INFO, &mfi, &olen);
if (ret < 0) {
if (errno == EOPNOTSUPP) {
perror("MPTCP_FULL_INFO test skipped");
return;
}
xerror("getsockopt MPTCP_FULL_INFO");
}
assert(olen <= data_size);
assert(mfi.size_tcpinfo_kernel > 0);
assert(mfi.size_tcpinfo_user ==
MIN(mfi.size_tcpinfo_kernel, sizeof(struct tcp_info)));
assert(mfi.size_sfinfo_kernel > 0);
assert(mfi.size_sfinfo_user ==
MIN(mfi.size_sfinfo_kernel, sizeof(struct mptcp_subflow_info)));
assert(mfi.num_subflows == 1);
/* Tolerate future extension to mptcp_info struct and running newer
* test on top of older kernel.
* Anyway any kernel supporting MPTCP_FULL_INFO must at least include
* the following in mptcp_info.
*/
assert(olen > (socklen_t)__builtin_offsetof(struct mptcp_full_info, tcp_info));
assert(mfi.mptcp_info.mptcpi_subflows == 0);
assert(mfi.mptcp_info.mptcpi_bytes_sent == s->last_sample.mptcpi_bytes_sent);
assert(mfi.mptcp_info.mptcpi_bytes_received == s->last_sample.mptcpi_bytes_received);
assert(sfinfo[0].id == 1);
assert(tcp_info[0].tcpi_bytes_sent == s->tcp_info.tcpi_bytes_sent);
assert(tcp_info[0].tcpi_bytes_received == s->tcp_info.tcpi_bytes_received);
assert(!memcmp(&sfinfo->addrs, &s->addrs, sizeof(struct mptcp_subflow_addrs)));
}
static void do_getsockopts(struct so_state *s, int fd, size_t r, size_t w)
{
do_getsockopt_mptcp_info(s, fd, w);
do_getsockopt_tcp_info(s, fd, r, w);
do_getsockopt_subflow_addrs(fd);
do_getsockopt_subflow_addrs(s, fd);
if (r)
do_getsockopt_mptcp_full_info(s, fd);
}
static void connect_one_server(int fd, int pipefd)
......@@ -562,6 +659,23 @@ static void process_one_client(int fd, int pipefd)
do_getsockopts(&s, fd, ret, ret2);
if (s.mptcpi_rcv_delta != (uint64_t)ret + 1)
xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64, s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - ret);
/* be nice when running on top of older kernel */
if (s.pkt_stats_avail) {
if (s.last_sample.mptcpi_bytes_sent != ret2)
xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64,
s.last_sample.mptcpi_bytes_sent, ret2,
s.last_sample.mptcpi_bytes_sent - ret2);
if (s.last_sample.mptcpi_bytes_received != ret)
xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64,
s.last_sample.mptcpi_bytes_received, ret,
s.last_sample.mptcpi_bytes_received - ret);
if (s.last_sample.mptcpi_bytes_acked != ret)
xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64,
s.last_sample.mptcpi_bytes_acked, ret2,
s.last_sample.mptcpi_bytes_acked - ret2);
}
close(fd);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment