Commit d9ca1de8 authored by Paolo Abeni's avatar Paolo Abeni Committed by Jakub Kicinski

mptcp: move page frag allocation in mptcp_sendmsg()

mptcp_sendmsg() is refactored so that first it copies
the data provided from user space into the send queue,
and then tries to spool the send queue via sendmsg_frag.

There a subtle change in the mptcp level collapsing on
consecutive data fragment: we now allow that only on unsent
data.

The latter don't need to deal with msghdr data anymore
and can be simplified in a relevant way.

snd_nxt and write_seq are now tracked independently.

Overall this allows some relevant cleanup and will
allow sending pending mptcp data on msk una update in
later patch.
Co-developed-by: default avatarFlorian Westphal <fw@strlen.de>
Signed-off-by: default avatarFlorian Westphal <fw@strlen.de>
Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent e16163b6
...@@ -43,6 +43,7 @@ struct mptcp_skb_cb { ...@@ -43,6 +43,7 @@ struct mptcp_skb_cb {
static struct percpu_counter mptcp_sockets_allocated; static struct percpu_counter mptcp_sockets_allocated;
static void __mptcp_destroy_sock(struct sock *sk); static void __mptcp_destroy_sock(struct sock *sk);
static void __mptcp_check_send_data_fin(struct sock *sk);
/* If msk has an initial subflow socket, and the MP_CAPABLE handshake has not /* If msk has an initial subflow socket, and the MP_CAPABLE handshake has not
* completed yet or has failed, return the subflow socket. * completed yet or has failed, return the subflow socket.
...@@ -814,6 +815,7 @@ static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk, ...@@ -814,6 +815,7 @@ static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk,
const struct mptcp_data_frag *df) const struct mptcp_data_frag *df)
{ {
return df && pfrag->page == df->page && return df && pfrag->page == df->page &&
pfrag->size - pfrag->offset > 0 &&
df->data_seq + df->data_len == msk->write_seq; df->data_seq + df->data_len == msk->write_seq;
} }
...@@ -864,6 +866,8 @@ static void mptcp_clean_una(struct sock *sk) ...@@ -864,6 +866,8 @@ static void mptcp_clean_una(struct sock *sk)
if (after64(dfrag->data_seq + dfrag->data_len, snd_una)) if (after64(dfrag->data_seq + dfrag->data_len, snd_una))
break; break;
if (WARN_ON_ONCE(dfrag == msk->first_pending))
break;
dfrag_clear(sk, dfrag); dfrag_clear(sk, dfrag);
cleaned = true; cleaned = true;
} }
...@@ -872,12 +876,13 @@ static void mptcp_clean_una(struct sock *sk) ...@@ -872,12 +876,13 @@ static void mptcp_clean_una(struct sock *sk)
if (dfrag && after64(snd_una, dfrag->data_seq)) { if (dfrag && after64(snd_una, dfrag->data_seq)) {
u64 delta = snd_una - dfrag->data_seq; u64 delta = snd_una - dfrag->data_seq;
if (WARN_ON_ONCE(delta > dfrag->data_len)) if (WARN_ON_ONCE(delta > dfrag->already_sent))
goto out; goto out;
dfrag->data_seq += delta; dfrag->data_seq += delta;
dfrag->offset += delta; dfrag->offset += delta;
dfrag->data_len -= delta; dfrag->data_len -= delta;
dfrag->already_sent -= delta;
dfrag_uncharge(sk, delta); dfrag_uncharge(sk, delta);
cleaned = true; cleaned = true;
...@@ -911,12 +916,23 @@ static void mptcp_clean_una_wakeup(struct sock *sk) ...@@ -911,12 +916,23 @@ static void mptcp_clean_una_wakeup(struct sock *sk)
*/ */
static bool mptcp_page_frag_refill(struct sock *sk, struct page_frag *pfrag) static bool mptcp_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
{ {
struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
bool first = true;
if (likely(skb_page_frag_refill(32U + sizeof(struct mptcp_data_frag), if (likely(skb_page_frag_refill(32U + sizeof(struct mptcp_data_frag),
pfrag, sk->sk_allocation))) pfrag, sk->sk_allocation)))
return true; return true;
sk->sk_prot->enter_memory_pressure(sk);
sk_stream_moderate_sndbuf(sk); sk_stream_moderate_sndbuf(sk);
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
if (first)
tcp_enter_memory_pressure(ssk);
sk_stream_moderate_sndbuf(ssk);
first = false;
}
return false; return false;
} }
...@@ -932,6 +948,7 @@ mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page_frag *pfrag, ...@@ -932,6 +948,7 @@ mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page_frag *pfrag,
dfrag->data_seq = msk->write_seq; dfrag->data_seq = msk->write_seq;
dfrag->overhead = offset - orig_offset + sizeof(struct mptcp_data_frag); dfrag->overhead = offset - orig_offset + sizeof(struct mptcp_data_frag);
dfrag->offset = offset + sizeof(struct mptcp_data_frag); dfrag->offset = offset + sizeof(struct mptcp_data_frag);
dfrag->already_sent = 0;
dfrag->page = pfrag->page; dfrag->page = pfrag->page;
return dfrag; return dfrag;
...@@ -940,121 +957,58 @@ mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page_frag *pfrag, ...@@ -940,121 +957,58 @@ mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page_frag *pfrag,
struct mptcp_sendmsg_info { struct mptcp_sendmsg_info {
int mss_now; int mss_now;
int size_goal; int size_goal;
u16 limit;
u16 sent;
unsigned int flags;
}; };
static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
struct msghdr *msg, struct mptcp_data_frag *dfrag, struct mptcp_data_frag *dfrag,
struct mptcp_sendmsg_info *info) struct mptcp_sendmsg_info *info)
{ {
int avail_size, offset, ret, frag_truesize = 0; u64 data_seq = dfrag->data_seq + info->sent;
bool dfrag_collapsed, can_collapse = false;
struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_ext *mpext = NULL; struct mptcp_ext *mpext = NULL;
bool retransmission = !!dfrag;
struct sk_buff *skb, *tail; struct sk_buff *skb, *tail;
struct page_frag *pfrag; bool can_collapse = false;
struct page *page; int avail_size;
u64 *write_seq; size_t ret;
size_t psize;
/* use the mptcp page cache so that we can easily move the data
* from one substream to another, but do per subflow memory accounting
* Note: pfrag is used only !retransmission, but the compiler if
* fooled into a warning if we don't init here
*/
pfrag = sk_page_frag(sk);
if (!retransmission) {
write_seq = &msk->write_seq;
page = pfrag->page;
} else {
write_seq = &dfrag->data_seq;
page = dfrag->page;
}
/* compute copy limit */ pr_debug("msk=%p ssk=%p sending dfrag at seq=%lld len=%d already sent=%d",
info->mss_now = tcp_send_mss(ssk, &info->size_goal, msg->msg_flags); msk, ssk, dfrag->data_seq, dfrag->data_len, info->sent);
/* compute send limit */
info->mss_now = tcp_send_mss(ssk, &info->size_goal, info->flags);
avail_size = info->size_goal; avail_size = info->size_goal;
skb = tcp_write_queue_tail(ssk); skb = tcp_write_queue_tail(ssk);
if (skb) { if (skb) {
mpext = skb_ext_find(skb, SKB_EXT_MPTCP);
/* Limit the write to the size available in the /* Limit the write to the size available in the
* current skb, if any, so that we create at most a new skb. * current skb, if any, so that we create at most a new skb.
* Explicitly tells TCP internals to avoid collapsing on later * Explicitly tells TCP internals to avoid collapsing on later
* queue management operation, to avoid breaking the ext <-> * queue management operation, to avoid breaking the ext <->
* SSN association set here * SSN association set here
*/ */
mpext = skb_ext_find(skb, SKB_EXT_MPTCP);
can_collapse = (info->size_goal - skb->len > 0) && can_collapse = (info->size_goal - skb->len > 0) &&
mptcp_skb_can_collapse_to(*write_seq, skb, mpext); mptcp_skb_can_collapse_to(data_seq, skb, mpext);
if (!can_collapse) if (!can_collapse)
TCP_SKB_CB(skb)->eor = 1; TCP_SKB_CB(skb)->eor = 1;
else else
avail_size = info->size_goal - skb->len; avail_size = info->size_goal - skb->len;
} }
if (!retransmission) { if (WARN_ON_ONCE(info->sent > info->limit ||
/* reuse tail pfrag, if possible, or carve a new one from the info->limit > dfrag->data_len))
* page allocator return 0;
*/
dfrag = mptcp_rtx_tail(sk);
offset = pfrag->offset;
dfrag_collapsed = mptcp_frag_can_collapse_to(msk, pfrag, dfrag);
if (!dfrag_collapsed) {
dfrag = mptcp_carve_data_frag(msk, pfrag, offset);
offset = dfrag->offset;
frag_truesize = dfrag->overhead;
}
psize = min_t(size_t, pfrag->size - offset, avail_size);
/* Copy to page */
pr_debug("left=%zu", msg_data_left(msg));
psize = copy_page_from_iter(pfrag->page, offset,
min_t(size_t, msg_data_left(msg),
psize),
&msg->msg_iter);
pr_debug("left=%zu", msg_data_left(msg));
if (!psize)
return -EINVAL;
if (!sk_wmem_schedule(sk, psize + dfrag->overhead)) { ret = info->limit - info->sent;
iov_iter_revert(&msg->msg_iter, psize); tail = tcp_build_frag(ssk, avail_size, info->flags, dfrag->page,
return -ENOMEM; dfrag->offset + info->sent, &ret);
}
} else {
offset = dfrag->offset;
psize = min_t(size_t, dfrag->data_len, avail_size);
}
tail = tcp_build_frag(ssk, psize, msg->msg_flags, page, offset, &psize);
if (!tail) { if (!tail) {
tcp_remove_empty_skb(sk, tcp_write_queue_tail(ssk)); tcp_remove_empty_skb(sk, tcp_write_queue_tail(ssk));
return -ENOMEM; return -ENOMEM;
} }
ret = psize;
frag_truesize += ret;
if (!retransmission) {
if (unlikely(ret < psize))
iov_iter_revert(&msg->msg_iter, psize - ret);
/* send successful, keep track of sent data for mptcp-level
* retransmission
*/
dfrag->data_len += ret;
if (!dfrag_collapsed) {
get_page(dfrag->page);
list_add_tail(&dfrag->list, &msk->rtx_queue);
sk_wmem_queued_add(sk, frag_truesize);
} else {
sk_wmem_queued_add(sk, ret);
}
/* charge data on mptcp rtx queue to the master socket
* Note: we charge such data both to sk and ssk
*/
sk->sk_forward_alloc -= frag_truesize;
}
/* if the tail skb is still the cached one, collapsing really happened. /* if the tail skb is still the cached one, collapsing really happened.
*/ */
if (skb == tail) { if (skb == tail) {
...@@ -1067,7 +1021,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, ...@@ -1067,7 +1021,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
msk->cached_ext = NULL; msk->cached_ext = NULL;
memset(mpext, 0, sizeof(*mpext)); memset(mpext, 0, sizeof(*mpext));
mpext->data_seq = *write_seq; mpext->data_seq = data_seq;
mpext->subflow_seq = mptcp_subflow_ctx(ssk)->rel_write_seq; mpext->subflow_seq = mptcp_subflow_ctx(ssk)->rel_write_seq;
mpext->data_len = ret; mpext->data_len = ret;
mpext->use_map = 1; mpext->use_map = 1;
...@@ -1078,11 +1032,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, ...@@ -1078,11 +1032,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
mpext->dsn64); mpext->dsn64);
out: out:
if (!retransmission)
pfrag->offset += frag_truesize;
WRITE_ONCE(*write_seq, *write_seq + ret);
mptcp_subflow_ctx(ssk)->rel_write_seq += ret; mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
return ret; return ret;
} }
...@@ -1210,19 +1160,86 @@ static void ssk_check_wmem(struct mptcp_sock *msk) ...@@ -1210,19 +1160,86 @@ static void ssk_check_wmem(struct mptcp_sock *msk)
mptcp_nospace(msk); mptcp_nospace(msk);
} }
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) static void mptcp_push_release(struct sock *sk, struct sock *ssk,
struct mptcp_sendmsg_info *info)
{
mptcp_set_timeout(sk, ssk);
tcp_push(ssk, 0, info->mss_now, tcp_sk(ssk)->nonagle, info->size_goal);
release_sock(ssk);
}
static void mptcp_push_pending(struct sock *sk, unsigned int flags)
{ {
struct sock *prev_ssk = NULL, *ssk = NULL;
struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_sendmsg_info info = { struct mptcp_sendmsg_info info = {
.mss_now = 0, .flags = flags,
.size_goal = 0,
}; };
struct mptcp_data_frag *dfrag;
int len, copied = 0;
u32 sndbuf;
while ((dfrag = mptcp_send_head(sk))) {
info.sent = dfrag->already_sent;
info.limit = dfrag->data_len;
len = dfrag->data_len - dfrag->already_sent;
while (len > 0) {
int ret = 0;
prev_ssk = ssk;
__mptcp_flush_join_list(msk);
ssk = mptcp_subflow_get_send(msk, &sndbuf);
/* do auto tuning */
if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK) &&
sndbuf > READ_ONCE(sk->sk_sndbuf))
WRITE_ONCE(sk->sk_sndbuf, sndbuf);
/* try to keep the subflow socket lock across
* consecutive xmit on the same socket
*/
if (ssk != prev_ssk && prev_ssk)
mptcp_push_release(sk, prev_ssk, &info);
if (!ssk)
goto out;
if (ssk != prev_ssk || !prev_ssk)
lock_sock(ssk);
ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
if (ret <= 0) {
mptcp_push_release(sk, ssk, &info);
goto out;
}
info.sent += ret;
dfrag->already_sent += ret;
msk->snd_nxt += ret;
msk->snd_burst -= ret;
copied += ret;
len -= ret;
}
WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
}
/* at this point we held the socket lock for the last subflow we used */
if (ssk)
mptcp_push_release(sk, ssk, &info);
out:
/* start the timer, if it's not pending */
if (!mptcp_timer_pending(sk))
mptcp_reset_timer(sk);
if (copied)
__mptcp_check_send_data_fin(sk);
}
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct page_frag *pfrag; struct page_frag *pfrag;
size_t copied = 0; size_t copied = 0;
struct sock *ssk;
int ret = 0; int ret = 0;
u32 sndbuf;
bool tx_ok;
long timeo; long timeo;
if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL)) if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
...@@ -1239,129 +1256,93 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ...@@ -1239,129 +1256,93 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
} }
pfrag = sk_page_frag(sk); pfrag = sk_page_frag(sk);
restart:
mptcp_clean_una(sk); mptcp_clean_una(sk);
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) { while (msg_data_left(msg)) {
ret = -EPIPE; struct mptcp_data_frag *dfrag;
goto out; int frag_truesize = 0;
} bool dfrag_collapsed;
size_t psize, offset;
__mptcp_flush_join_list(msk);
ssk = mptcp_subflow_get_send(msk, &sndbuf);
while (!sk_stream_memory_free(sk) ||
!ssk ||
!mptcp_page_frag_refill(ssk, pfrag)) {
if (ssk) {
/* make sure retransmit timer is
* running before we wait for memory.
*
* The retransmit timer might be needed
* to make the peer send an up-to-date
* MPTCP Ack.
*/
mptcp_set_timeout(sk, ssk);
if (!mptcp_timer_pending(sk))
mptcp_reset_timer(sk);
}
mptcp_nospace(msk);
ret = sk_stream_wait_memory(sk, &timeo);
if (ret)
goto out;
mptcp_clean_una(sk);
ssk = mptcp_subflow_get_send(msk, &sndbuf); if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) {
if (list_empty(&msk->conn_list)) { ret = -EPIPE;
ret = -ENOTCONN;
goto out; goto out;
} }
}
/* do auto tuning */ /* reuse tail pfrag, if possible, or carve a new one from the
if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK) && * page allocator
sndbuf > READ_ONCE(sk->sk_sndbuf)) */
WRITE_ONCE(sk->sk_sndbuf, sndbuf); dfrag = mptcp_pending_tail(sk);
dfrag_collapsed = mptcp_frag_can_collapse_to(msk, pfrag, dfrag);
pr_debug("conn_list->subflow=%p", ssk); if (!dfrag_collapsed) {
if (!sk_stream_memory_free(sk)) {
lock_sock(ssk); mptcp_push_pending(sk, msg->msg_flags);
tx_ok = msg_data_left(msg); if (!sk_stream_memory_free(sk))
while (tx_ok) { goto wait_for_memory;
ret = mptcp_sendmsg_frag(sk, ssk, msg, NULL, &info);
if (ret < 0) {
if (ret == -EAGAIN && timeo > 0) {
mptcp_set_timeout(sk, ssk);
release_sock(ssk);
goto restart;
} }
break; if (!mptcp_page_frag_refill(sk, pfrag))
goto wait_for_memory;
dfrag = mptcp_carve_data_frag(msk, pfrag, pfrag->offset);
frag_truesize = dfrag->overhead;
} }
/* burst can be negative, we will try move to the next subflow /* we do not bound vs wspace, to allow a single packet.
* at selection time, if possible. * memory accounting will prevent execessive memory usage
* anyway
*/ */
msk->snd_burst -= ret; offset = dfrag->offset + dfrag->data_len;
copied += ret; psize = pfrag->size - offset;
psize = min_t(size_t, psize, msg_data_left(msg));
tx_ok = msg_data_left(msg); if (!sk_wmem_schedule(sk, psize + frag_truesize))
if (!tx_ok) goto wait_for_memory;
break;
if (copy_page_from_iter(dfrag->page, offset, psize,
if (!sk_stream_memory_free(ssk) || &msg->msg_iter) != psize) {
!mptcp_page_frag_refill(ssk, pfrag) || ret = -EFAULT;
!mptcp_ext_cache_refill(msk)) { goto out;
tcp_push(ssk, msg->msg_flags, info.mss_now,
tcp_sk(ssk)->nonagle, info.size_goal);
mptcp_set_timeout(sk, ssk);
release_sock(ssk);
goto restart;
} }
/* memory is charged to mptcp level socket as well, i.e. /* data successfully copied into the write queue */
* if msg is very large, mptcp socket may run out of buffer copied += psize;
* space. mptcp_clean_una() will release data that has dfrag->data_len += psize;
* been acked at mptcp level in the mean time, so there is frag_truesize += psize;
* a good chance we can continue sending data right away. pfrag->offset += frag_truesize;
* WRITE_ONCE(msk->write_seq, msk->write_seq + psize);
* Normally, when the tcp subflow can accept more data, then
* so can the MPTCP socket. However, we need to cope with /* charge data on mptcp pending queue to the msk socket
* peers that might lag behind in their MPTCP-level * Note: we charge such data both to sk and ssk
* acknowledgements, i.e. data might have been acked at
* tcp level only. So, we must also check the MPTCP socket
* limits before we send more data.
*/ */
if (unlikely(!sk_stream_memory_free(sk))) { sk_wmem_queued_add(sk, frag_truesize);
tcp_push(ssk, msg->msg_flags, info.mss_now, sk->sk_forward_alloc -= frag_truesize;
tcp_sk(ssk)->nonagle, info.size_goal); if (!dfrag_collapsed) {
mptcp_clean_una(sk); get_page(dfrag->page);
if (!sk_stream_memory_free(sk)) { list_add_tail(&dfrag->list, &msk->rtx_queue);
/* can't send more for now, need to wait for if (!msk->first_pending)
* MPTCP-level ACKs from peer. WRITE_ONCE(msk->first_pending, dfrag);
*
* Wakeup will happen via mptcp_clean_una().
*/
mptcp_set_timeout(sk, ssk);
release_sock(ssk);
goto restart;
}
} }
} pr_debug("msk=%p dfrag at seq=%lld len=%d sent=%d new=%d", msk,
dfrag->data_seq, dfrag->data_len, dfrag->already_sent,
!dfrag_collapsed);
mptcp_set_timeout(sk, ssk); if (!mptcp_ext_cache_refill(msk))
if (copied) { goto wait_for_memory;
tcp_push(ssk, msg->msg_flags, info.mss_now, continue;
tcp_sk(ssk)->nonagle, info.size_goal);
/* start the timer, if it's not pending */ wait_for_memory:
if (!mptcp_timer_pending(sk)) mptcp_nospace(msk);
mptcp_clean_una(sk);
if (mptcp_timer_pending(sk))
mptcp_reset_timer(sk); mptcp_reset_timer(sk);
ret = sk_stream_wait_memory(sk, &timeo);
if (ret)
goto out;
} }
release_sock(ssk); if (copied)
mptcp_push_pending(sk, msg->msg_flags);
out: out:
msk->snd_nxt = msk->write_seq;
ssk_check_wmem(msk); ssk_check_wmem(msk);
release_sock(sk); release_sock(sk);
return copied ? : ret; return copied ? : ret;
...@@ -1700,7 +1681,7 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk) ...@@ -1700,7 +1681,7 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
sock_owned_by_me((const struct sock *)msk); sock_owned_by_me((const struct sock *)msk);
if (__mptcp_check_fallback(msk)) if (__mptcp_check_fallback(msk))
return msk->first; return NULL;
mptcp_for_each_subflow(msk, subflow) { mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
...@@ -1843,12 +1824,7 @@ static void mptcp_worker(struct work_struct *work) ...@@ -1843,12 +1824,7 @@ static void mptcp_worker(struct work_struct *work)
struct sock *ssk, *sk = &msk->sk.icsk_inet.sk; struct sock *ssk, *sk = &msk->sk.icsk_inet.sk;
struct mptcp_sendmsg_info info = {}; struct mptcp_sendmsg_info info = {};
struct mptcp_data_frag *dfrag; struct mptcp_data_frag *dfrag;
int orig_len, orig_offset;
u64 orig_write_seq;
size_t copied = 0; size_t copied = 0;
struct msghdr msg = {
.msg_flags = MSG_DONTWAIT,
};
int state, ret; int state, ret;
lock_sock(sk); lock_sock(sk);
...@@ -1901,18 +1877,17 @@ static void mptcp_worker(struct work_struct *work) ...@@ -1901,18 +1877,17 @@ static void mptcp_worker(struct work_struct *work)
lock_sock(ssk); lock_sock(ssk);
orig_len = dfrag->data_len; /* limit retransmission to the bytes already sent on some subflows */
orig_offset = dfrag->offset; info.sent = 0;
orig_write_seq = dfrag->data_seq; info.limit = dfrag->already_sent;
while (dfrag->data_len > 0) { while (info.sent < dfrag->already_sent) {
ret = mptcp_sendmsg_frag(sk, ssk, &msg, dfrag, &info); ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
if (ret < 0) if (ret < 0)
break; break;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RETRANSSEGS); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RETRANSSEGS);
copied += ret; copied += ret;
dfrag->data_len -= ret; info.sent += ret;
dfrag->offset += ret;
if (!mptcp_ext_cache_refill(msk)) if (!mptcp_ext_cache_refill(msk))
break; break;
...@@ -1921,10 +1896,6 @@ static void mptcp_worker(struct work_struct *work) ...@@ -1921,10 +1896,6 @@ static void mptcp_worker(struct work_struct *work)
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal); info.size_goal);
dfrag->data_seq = orig_write_seq;
dfrag->offset = orig_offset;
dfrag->data_len = orig_len;
mptcp_set_timeout(sk, ssk); mptcp_set_timeout(sk, ssk);
release_sock(ssk); release_sock(ssk);
...@@ -1996,6 +1967,7 @@ static void __mptcp_clear_xmit(struct sock *sk) ...@@ -1996,6 +1967,7 @@ static void __mptcp_clear_xmit(struct sock *sk)
sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer); sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer);
WRITE_ONCE(msk->first_pending, NULL);
list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list)
dfrag_clear(sk, dfrag); dfrag_clear(sk, dfrag);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment