Commit 849fd444 authored by David S. Miller's avatar David S. Miller

Merge branch 'mptcp-Connection-and-accounting-fixes'

Mat Martineau says:

====================
mptcp: Connection and accounting fixes

Here are some miscellaneous fixes for MPTCP:

Patch 1 modifies an MPTCP hash so it doesn't depend on one of skb->dev
and skb->sk being non-NULL.

Patch 2 removes an extra destructor call when rejecting a join due to
port mismatch.

Patches 3 and 5 more cleanly handle error conditions with MP_JOIN and
syncookies, and update a related self test.

Patch 4 makes sure packets that trigger a subflow TCP reset during MPTCP
option header processing are correctly dropped.

Patch 6 addresses a rmem accounting issue that could keep packets in
subflow receive buffers longer than necessary, delaying MPTCP-level
ACKs.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 5d52c906 ce599c51
...@@ -105,7 +105,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, ...@@ -105,7 +105,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
unsigned int *size, unsigned int remaining, unsigned int *size, unsigned int remaining,
struct mptcp_out_options *opts); struct mptcp_out_options *opts);
void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb); bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb);
void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
struct mptcp_out_options *opts); struct mptcp_out_options *opts);
...@@ -227,9 +227,10 @@ static inline bool mptcp_established_options(struct sock *sk, ...@@ -227,9 +227,10 @@ static inline bool mptcp_established_options(struct sock *sk,
return false; return false;
} }
static inline void mptcp_incoming_options(struct sock *sk, static inline bool mptcp_incoming_options(struct sock *sk,
struct sk_buff *skb) struct sk_buff *skb)
{ {
return true;
} }
static inline void mptcp_skb_ext_move(struct sk_buff *to, static inline void mptcp_skb_ext_move(struct sk_buff *to,
......
...@@ -4247,6 +4247,9 @@ void tcp_reset(struct sock *sk, struct sk_buff *skb) ...@@ -4247,6 +4247,9 @@ void tcp_reset(struct sock *sk, struct sk_buff *skb)
{ {
trace_tcp_receive_reset(sk); trace_tcp_receive_reset(sk);
/* mptcp can't tell us to ignore reset pkts,
* so just ignore the return value of mptcp_incoming_options().
*/
if (sk_is_mptcp(sk)) if (sk_is_mptcp(sk))
mptcp_incoming_options(sk, skb); mptcp_incoming_options(sk, skb);
...@@ -4941,8 +4944,13 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) ...@@ -4941,8 +4944,13 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
bool fragstolen; bool fragstolen;
int eaten; int eaten;
if (sk_is_mptcp(sk)) /* If a subflow has been reset, the packet should not continue
mptcp_incoming_options(sk, skb); * to be processed, drop the packet.
*/
if (sk_is_mptcp(sk) && !mptcp_incoming_options(sk, skb)) {
__kfree_skb(skb);
return;
}
if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
__kfree_skb(skb); __kfree_skb(skb);
...@@ -6523,8 +6531,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) ...@@ -6523,8 +6531,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
case TCP_CLOSING: case TCP_CLOSING:
case TCP_LAST_ACK: case TCP_LAST_ACK:
if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
if (sk_is_mptcp(sk)) /* If a subflow has been reset, the packet should not
mptcp_incoming_options(sk, skb); * continue to be processed, drop the packet.
*/
if (sk_is_mptcp(sk) && !mptcp_incoming_options(sk, skb))
goto discard;
break; break;
} }
fallthrough; fallthrough;
......
...@@ -44,6 +44,7 @@ static const struct snmp_mib mptcp_snmp_list[] = { ...@@ -44,6 +44,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW), SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW),
SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX), SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX),
SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX), SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX),
SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED),
SNMP_MIB_SENTINEL SNMP_MIB_SENTINEL
}; };
......
...@@ -37,6 +37,7 @@ enum linux_mptcp_mib_field { ...@@ -37,6 +37,7 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */ MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */
MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */ MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */
MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */ MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */
MPTCP_MIB_RCVPRUNED, /* Incoming packet dropped due to memory limit */
__MPTCP_MIB_MAX __MPTCP_MIB_MAX
}; };
......
...@@ -1035,7 +1035,8 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk, ...@@ -1035,7 +1035,8 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk,
return hmac == mp_opt->ahmac; return hmac == mp_opt->ahmac;
} }
void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) /* Return false if a subflow has been reset, else return true */
bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
{ {
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn); struct mptcp_sock *msk = mptcp_sk(subflow->conn);
...@@ -1053,12 +1054,16 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) ...@@ -1053,12 +1054,16 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
__mptcp_check_push(subflow->conn, sk); __mptcp_check_push(subflow->conn, sk);
__mptcp_data_acked(subflow->conn); __mptcp_data_acked(subflow->conn);
mptcp_data_unlock(subflow->conn); mptcp_data_unlock(subflow->conn);
return; return true;
} }
mptcp_get_options(sk, skb, &mp_opt); mptcp_get_options(sk, skb, &mp_opt);
/* The subflow can be in close state only if check_fully_established()
* just sent a reset. If so, tell the caller to ignore the current packet.
*/
if (!check_fully_established(msk, sk, subflow, skb, &mp_opt)) if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
return; return sk->sk_state != TCP_CLOSE;
if (mp_opt.fastclose && if (mp_opt.fastclose &&
msk->local_key == mp_opt.rcvr_key) { msk->local_key == mp_opt.rcvr_key) {
...@@ -1100,7 +1105,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) ...@@ -1100,7 +1105,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
} }
if (!mp_opt.dss) if (!mp_opt.dss)
return; return true;
/* we can't wait for recvmsg() to update the ack_seq, otherwise /* we can't wait for recvmsg() to update the ack_seq, otherwise
* monodirectional flows will stuck * monodirectional flows will stuck
...@@ -1119,12 +1124,12 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) ...@@ -1119,12 +1124,12 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
schedule_work(&msk->work)) schedule_work(&msk->work))
sock_hold(subflow->conn); sock_hold(subflow->conn);
return; return true;
} }
mpext = skb_ext_add(skb, SKB_EXT_MPTCP); mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
if (!mpext) if (!mpext)
return; return true;
memset(mpext, 0, sizeof(*mpext)); memset(mpext, 0, sizeof(*mpext));
...@@ -1153,6 +1158,8 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) ...@@ -1153,6 +1158,8 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
if (mpext->csum_reqd) if (mpext->csum_reqd)
mpext->csum = mp_opt.csum; mpext->csum = mp_opt.csum;
} }
return true;
} }
static void mptcp_set_rwin(const struct tcp_sock *tp) static void mptcp_set_rwin(const struct tcp_sock *tp)
......
...@@ -474,7 +474,7 @@ static void mptcp_cleanup_rbuf(struct mptcp_sock *msk) ...@@ -474,7 +474,7 @@ static void mptcp_cleanup_rbuf(struct mptcp_sock *msk)
bool cleanup, rx_empty; bool cleanup, rx_empty;
cleanup = (space > 0) && (space >= (old_space << 1)); cleanup = (space > 0) && (space >= (old_space << 1));
rx_empty = !atomic_read(&sk->sk_rmem_alloc); rx_empty = !__mptcp_rmem(sk);
mptcp_for_each_subflow(msk, subflow) { mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
...@@ -720,8 +720,10 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) ...@@ -720,8 +720,10 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
sk_rbuf = ssk_rbuf; sk_rbuf = ssk_rbuf;
/* over limit? can't append more skbs to msk, Also, no need to wake-up*/ /* over limit? can't append more skbs to msk, Also, no need to wake-up*/
if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf) if (__mptcp_rmem(sk) > sk_rbuf) {
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED);
return; return;
}
/* Wake-up the reader only for in-sequence data */ /* Wake-up the reader only for in-sequence data */
mptcp_data_lock(sk); mptcp_data_lock(sk);
...@@ -1754,7 +1756,7 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, ...@@ -1754,7 +1756,7 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
if (!(flags & MSG_PEEK)) { if (!(flags & MSG_PEEK)) {
/* we will bulk release the skb memory later */ /* we will bulk release the skb memory later */
skb->destructor = NULL; skb->destructor = NULL;
msk->rmem_released += skb->truesize; WRITE_ONCE(msk->rmem_released, msk->rmem_released + skb->truesize);
__skb_unlink(skb, &msk->receive_queue); __skb_unlink(skb, &msk->receive_queue);
__kfree_skb(skb); __kfree_skb(skb);
} }
...@@ -1873,7 +1875,7 @@ static void __mptcp_update_rmem(struct sock *sk) ...@@ -1873,7 +1875,7 @@ static void __mptcp_update_rmem(struct sock *sk)
atomic_sub(msk->rmem_released, &sk->sk_rmem_alloc); atomic_sub(msk->rmem_released, &sk->sk_rmem_alloc);
sk_mem_uncharge(sk, msk->rmem_released); sk_mem_uncharge(sk, msk->rmem_released);
msk->rmem_released = 0; WRITE_ONCE(msk->rmem_released, 0);
} }
static void __mptcp_splice_receive_queue(struct sock *sk) static void __mptcp_splice_receive_queue(struct sock *sk)
...@@ -2380,7 +2382,7 @@ static int __mptcp_init_sock(struct sock *sk) ...@@ -2380,7 +2382,7 @@ static int __mptcp_init_sock(struct sock *sk)
msk->out_of_order_queue = RB_ROOT; msk->out_of_order_queue = RB_ROOT;
msk->first_pending = NULL; msk->first_pending = NULL;
msk->wmem_reserved = 0; msk->wmem_reserved = 0;
msk->rmem_released = 0; WRITE_ONCE(msk->rmem_released, 0);
msk->tx_pending_data = 0; msk->tx_pending_data = 0;
msk->first = NULL; msk->first = NULL;
......
...@@ -296,9 +296,17 @@ static inline struct mptcp_sock *mptcp_sk(const struct sock *sk) ...@@ -296,9 +296,17 @@ static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
return (struct mptcp_sock *)sk; return (struct mptcp_sock *)sk;
} }
/* the msk socket don't use the backlog, also account for the bulk
* free memory
*/
static inline int __mptcp_rmem(const struct sock *sk)
{
return atomic_read(&sk->sk_rmem_alloc) - READ_ONCE(mptcp_sk(sk)->rmem_released);
}
static inline int __mptcp_space(const struct sock *sk) static inline int __mptcp_space(const struct sock *sk)
{ {
return tcp_space(sk) + READ_ONCE(mptcp_sk(sk)->rmem_released); return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk));
} }
static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk) static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk)
......
...@@ -214,11 +214,6 @@ static int subflow_check_req(struct request_sock *req, ...@@ -214,11 +214,6 @@ static int subflow_check_req(struct request_sock *req,
ntohs(inet_sk(sk_listener)->inet_sport), ntohs(inet_sk(sk_listener)->inet_sport),
ntohs(inet_sk((struct sock *)subflow_req->msk)->inet_sport)); ntohs(inet_sk((struct sock *)subflow_req->msk)->inet_sport));
if (!mptcp_pm_sport_in_anno_list(subflow_req->msk, sk_listener)) { if (!mptcp_pm_sport_in_anno_list(subflow_req->msk, sk_listener)) {
sock_put((struct sock *)subflow_req->msk);
mptcp_token_destroy_request(req);
tcp_request_sock_ops.destructor(req);
subflow_req->msk = NULL;
subflow_req->mp_join = 0;
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTSYNRX); SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTSYNRX);
return -EPERM; return -EPERM;
} }
...@@ -230,6 +225,8 @@ static int subflow_check_req(struct request_sock *req, ...@@ -230,6 +225,8 @@ static int subflow_check_req(struct request_sock *req,
if (unlikely(req->syncookie)) { if (unlikely(req->syncookie)) {
if (mptcp_can_accept_new_subflow(subflow_req->msk)) if (mptcp_can_accept_new_subflow(subflow_req->msk))
subflow_init_req_cookie_join_save(subflow_req, skb); subflow_init_req_cookie_join_save(subflow_req, skb);
else
return -EPERM;
} }
pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token, pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token,
...@@ -269,9 +266,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req, ...@@ -269,9 +266,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
if (!mptcp_token_join_cookie_init_state(subflow_req, skb)) if (!mptcp_token_join_cookie_init_state(subflow_req, skb))
return -EINVAL; return -EINVAL;
if (mptcp_can_accept_new_subflow(subflow_req->msk))
subflow_req->mp_join = 1; subflow_req->mp_join = 1;
subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1; subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1;
} }
......
...@@ -37,7 +37,21 @@ static spinlock_t join_entry_locks[COOKIE_JOIN_SLOTS] __cacheline_aligned_in_smp ...@@ -37,7 +37,21 @@ static spinlock_t join_entry_locks[COOKIE_JOIN_SLOTS] __cacheline_aligned_in_smp
static u32 mptcp_join_entry_hash(struct sk_buff *skb, struct net *net) static u32 mptcp_join_entry_hash(struct sk_buff *skb, struct net *net)
{ {
u32 i = skb_get_hash(skb) ^ net_hash_mix(net); static u32 mptcp_join_hash_secret __read_mostly;
struct tcphdr *th = tcp_hdr(skb);
u32 seq, i;
net_get_random_once(&mptcp_join_hash_secret,
sizeof(mptcp_join_hash_secret));
if (th->syn)
seq = TCP_SKB_CB(skb)->seq;
else
seq = TCP_SKB_CB(skb)->seq - 1;
i = jhash_3words(seq, net_hash_mix(net),
(__force __u32)th->source << 16 | (__force __u32)th->dest,
mptcp_join_hash_secret);
return i % ARRAY_SIZE(join_entries); return i % ARRAY_SIZE(join_entries);
} }
......
...@@ -1409,7 +1409,7 @@ syncookies_tests() ...@@ -1409,7 +1409,7 @@ syncookies_tests()
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1 run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "subflows limited by server w cookies" 2 2 1 chk_join_nr "subflows limited by server w cookies" 2 1 1
# test signal address with cookies # test signal address with cookies
reset_with_cookies reset_with_cookies
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment