Commit 849fd444 authored by David S. Miller's avatar David S. Miller

Merge branch 'mptcp-Connection-and-accounting-fixes'

Mat Martineau says:

====================
mptcp: Connection and accounting fixes

Here are some miscellaneous fixes for MPTCP:

Patch 1 modifies an MPTCP hash so it doesn't depend on one of skb->dev
and skb->sk being non-NULL.

Patch 2 removes an extra destructor call when rejecting a join due to
port mismatch.

Patches 3 and 5 more cleanly handle error conditions with MP_JOIN and
syncookies, and update a related self test.

Patch 4 makes sure packets that trigger a subflow TCP reset during MPTCP
option header processing are correctly dropped.

Patch 6 addresses a rmem accounting issue that could keep packets in
subflow receive buffers longer than necessary, delaying MPTCP-level
ACKs.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 5d52c906 ce599c51
......@@ -105,7 +105,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
unsigned int *size, unsigned int remaining,
struct mptcp_out_options *opts);
void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb);
bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb);
void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
struct mptcp_out_options *opts);
......@@ -227,9 +227,10 @@ static inline bool mptcp_established_options(struct sock *sk,
return false;
}
static inline void mptcp_incoming_options(struct sock *sk,
static inline bool mptcp_incoming_options(struct sock *sk,
struct sk_buff *skb)
{
return true;
}
static inline void mptcp_skb_ext_move(struct sk_buff *to,
......
......@@ -4247,6 +4247,9 @@ void tcp_reset(struct sock *sk, struct sk_buff *skb)
{
trace_tcp_receive_reset(sk);
/* mptcp can't tell us to ignore reset pkts,
* so just ignore the return value of mptcp_incoming_options().
*/
if (sk_is_mptcp(sk))
mptcp_incoming_options(sk, skb);
......@@ -4941,8 +4944,13 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
bool fragstolen;
int eaten;
if (sk_is_mptcp(sk))
mptcp_incoming_options(sk, skb);
/* If a subflow has been reset, the packet should not continue
* to be processed, drop the packet.
*/
if (sk_is_mptcp(sk) && !mptcp_incoming_options(sk, skb)) {
__kfree_skb(skb);
return;
}
if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
__kfree_skb(skb);
......@@ -6523,8 +6531,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
case TCP_CLOSING:
case TCP_LAST_ACK:
if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
if (sk_is_mptcp(sk))
mptcp_incoming_options(sk, skb);
/* If a subflow has been reset, the packet should not
* continue to be processed, drop the packet.
*/
if (sk_is_mptcp(sk) && !mptcp_incoming_options(sk, skb))
goto discard;
break;
}
fallthrough;
......
......@@ -44,6 +44,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW),
SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX),
SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX),
SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED),
SNMP_MIB_SENTINEL
};
......
......@@ -37,6 +37,7 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */
MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */
MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */
MPTCP_MIB_RCVPRUNED, /* Incoming packet dropped due to memory limit */
__MPTCP_MIB_MAX
};
......
......@@ -1035,7 +1035,8 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk,
return hmac == mp_opt->ahmac;
}
void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
/* Return false if a subflow has been reset, else return true */
bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
......@@ -1053,12 +1054,16 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
__mptcp_check_push(subflow->conn, sk);
__mptcp_data_acked(subflow->conn);
mptcp_data_unlock(subflow->conn);
return;
return true;
}
mptcp_get_options(sk, skb, &mp_opt);
/* The subflow can be in close state only if check_fully_established()
* just sent a reset. If so, tell the caller to ignore the current packet.
*/
if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
return;
return sk->sk_state != TCP_CLOSE;
if (mp_opt.fastclose &&
msk->local_key == mp_opt.rcvr_key) {
......@@ -1100,7 +1105,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
}
if (!mp_opt.dss)
return;
return true;
/* we can't wait for recvmsg() to update the ack_seq, otherwise
* monodirectional flows will stuck
......@@ -1119,12 +1124,12 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
schedule_work(&msk->work))
sock_hold(subflow->conn);
return;
return true;
}
mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
if (!mpext)
return;
return true;
memset(mpext, 0, sizeof(*mpext));
......@@ -1153,6 +1158,8 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
if (mpext->csum_reqd)
mpext->csum = mp_opt.csum;
}
return true;
}
static void mptcp_set_rwin(const struct tcp_sock *tp)
......
......@@ -474,7 +474,7 @@ static void mptcp_cleanup_rbuf(struct mptcp_sock *msk)
bool cleanup, rx_empty;
cleanup = (space > 0) && (space >= (old_space << 1));
rx_empty = !atomic_read(&sk->sk_rmem_alloc);
rx_empty = !__mptcp_rmem(sk);
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
......@@ -720,8 +720,10 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
sk_rbuf = ssk_rbuf;
/* over limit? can't append more skbs to msk, Also, no need to wake-up*/
if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf)
if (__mptcp_rmem(sk) > sk_rbuf) {
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED);
return;
}
/* Wake-up the reader only for in-sequence data */
mptcp_data_lock(sk);
......@@ -1754,7 +1756,7 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
if (!(flags & MSG_PEEK)) {
/* we will bulk release the skb memory later */
skb->destructor = NULL;
msk->rmem_released += skb->truesize;
WRITE_ONCE(msk->rmem_released, msk->rmem_released + skb->truesize);
__skb_unlink(skb, &msk->receive_queue);
__kfree_skb(skb);
}
......@@ -1873,7 +1875,7 @@ static void __mptcp_update_rmem(struct sock *sk)
atomic_sub(msk->rmem_released, &sk->sk_rmem_alloc);
sk_mem_uncharge(sk, msk->rmem_released);
msk->rmem_released = 0;
WRITE_ONCE(msk->rmem_released, 0);
}
static void __mptcp_splice_receive_queue(struct sock *sk)
......@@ -2380,7 +2382,7 @@ static int __mptcp_init_sock(struct sock *sk)
msk->out_of_order_queue = RB_ROOT;
msk->first_pending = NULL;
msk->wmem_reserved = 0;
msk->rmem_released = 0;
WRITE_ONCE(msk->rmem_released, 0);
msk->tx_pending_data = 0;
msk->first = NULL;
......
......@@ -296,9 +296,17 @@ static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
return (struct mptcp_sock *)sk;
}
/* the msk socket don't use the backlog, also account for the bulk
* free memory
*/
static inline int __mptcp_rmem(const struct sock *sk)
{
return atomic_read(&sk->sk_rmem_alloc) - READ_ONCE(mptcp_sk(sk)->rmem_released);
}
static inline int __mptcp_space(const struct sock *sk)
{
return tcp_space(sk) + READ_ONCE(mptcp_sk(sk)->rmem_released);
return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk));
}
static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk)
......
......@@ -214,11 +214,6 @@ static int subflow_check_req(struct request_sock *req,
ntohs(inet_sk(sk_listener)->inet_sport),
ntohs(inet_sk((struct sock *)subflow_req->msk)->inet_sport));
if (!mptcp_pm_sport_in_anno_list(subflow_req->msk, sk_listener)) {
sock_put((struct sock *)subflow_req->msk);
mptcp_token_destroy_request(req);
tcp_request_sock_ops.destructor(req);
subflow_req->msk = NULL;
subflow_req->mp_join = 0;
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTSYNRX);
return -EPERM;
}
......@@ -230,6 +225,8 @@ static int subflow_check_req(struct request_sock *req,
if (unlikely(req->syncookie)) {
if (mptcp_can_accept_new_subflow(subflow_req->msk))
subflow_init_req_cookie_join_save(subflow_req, skb);
else
return -EPERM;
}
pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token,
......@@ -269,9 +266,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
if (!mptcp_token_join_cookie_init_state(subflow_req, skb))
return -EINVAL;
if (mptcp_can_accept_new_subflow(subflow_req->msk))
subflow_req->mp_join = 1;
subflow_req->mp_join = 1;
subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1;
}
......
......@@ -37,7 +37,21 @@ static spinlock_t join_entry_locks[COOKIE_JOIN_SLOTS] __cacheline_aligned_in_smp
static u32 mptcp_join_entry_hash(struct sk_buff *skb, struct net *net)
{
u32 i = skb_get_hash(skb) ^ net_hash_mix(net);
static u32 mptcp_join_hash_secret __read_mostly;
struct tcphdr *th = tcp_hdr(skb);
u32 seq, i;
net_get_random_once(&mptcp_join_hash_secret,
sizeof(mptcp_join_hash_secret));
if (th->syn)
seq = TCP_SKB_CB(skb)->seq;
else
seq = TCP_SKB_CB(skb)->seq - 1;
i = jhash_3words(seq, net_hash_mix(net),
(__force __u32)th->source << 16 | (__force __u32)th->dest,
mptcp_join_hash_secret);
return i % ARRAY_SIZE(join_entries);
}
......
......@@ -1409,7 +1409,7 @@ syncookies_tests()
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "subflows limited by server w cookies" 2 2 1
chk_join_nr "subflows limited by server w cookies" 2 1 1
# test signal address with cookies
reset_with_cookies
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment