Commit 5ab54e57 authored by David S. Miller's avatar David S. Miller

Merge branch 'mptcp-Optimize-received-options-handling'

Mat Martineau says:

====================
mptcp: Optimize received options handling

These patches optimize received MPTCP option handling in terms of both
storage and fewer conditionals to evaluate in common cases, and also add
a couple of cleanup patches.

Patches 1 and 5 do some cleanup in checksum option parsing and
clarification of lock handling.

Patches 2 and 3 rearrange struct mptcp_options_received to shrink it
slightly and consolidate frequently used fields in the same cache line.

Patch 4 optimizes incoming MPTCP option parsing to skip many extra
comparisons in the common case where only a DSS option is present.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 3aa7857f 9758f40e
......@@ -81,12 +81,11 @@ static void mptcp_parse_option(const struct sk_buff *skb,
* is if both hosts in their SYNs set A=0."
*/
if (flags & MPTCP_CAP_CHECKSUM_REQD)
mp_opt->csum_reqd = 1;
mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD;
if (flags & MPTCP_CAP_DENY_JOIN_ID0)
mp_opt->deny_join_id0 = 1;
mp_opt->deny_join_id0 = !!(flags & MPTCP_CAP_DENY_JOIN_ID0);
mp_opt->mp_capable = 1;
mp_opt->suboptions |= OPTIONS_MPTCP_MPC;
if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
mp_opt->sndr_key = get_unaligned_be64(ptr);
ptr += 8;
......@@ -101,7 +100,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
* equivalent to those in a DSS option and can be used
* interchangeably."
*/
mp_opt->dss = 1;
mp_opt->suboptions |= OPTION_MPTCP_DSS;
mp_opt->use_map = 1;
mp_opt->mpc_map = 1;
mp_opt->data_len = get_unaligned_be16(ptr);
......@@ -109,7 +108,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
}
if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM) {
mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
mp_opt->csum_reqd = 1;
mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD;
ptr += 2;
}
pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d csum=%u",
......@@ -118,7 +117,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
break;
case MPTCPOPT_MP_JOIN:
mp_opt->mp_join = 1;
mp_opt->suboptions |= OPTIONS_MPTCP_MPJ;
if (opsize == TCPOLEN_MPTCP_MPJ_SYN) {
mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP;
mp_opt->join_id = *ptr++;
......@@ -144,7 +143,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
memcpy(mp_opt->hmac, ptr, MPTCPOPT_HMAC_LEN);
pr_debug("MP_JOIN hmac");
} else {
mp_opt->mp_join = 0;
mp_opt->suboptions &= ~OPTIONS_MPTCP_MPJ;
}
break;
......@@ -192,8 +191,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
opsize != expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM)
break;
mp_opt->dss = 1;
mp_opt->suboptions |= OPTION_MPTCP_DSS;
if (mp_opt->use_ack) {
if (mp_opt->ack64) {
mp_opt->data_ack = get_unaligned_be64(ptr);
......@@ -222,14 +220,15 @@ static void mptcp_parse_option(const struct sk_buff *skb,
ptr += 2;
if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) {
mp_opt->csum_reqd = 1;
mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD;
mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
ptr += 2;
}
pr_debug("data_seq=%llu subflow_seq=%u data_len=%u csum=%d:%u",
mp_opt->data_seq, mp_opt->subflow_seq,
mp_opt->data_len, mp_opt->csum_reqd, mp_opt->csum);
mp_opt->data_len, !!(mp_opt->suboptions & OPTION_MPTCP_CSUMREQD),
mp_opt->csum);
}
break;
......@@ -260,8 +259,10 @@ static void mptcp_parse_option(const struct sk_buff *skb,
break;
}
mp_opt->add_addr = 1;
mp_opt->suboptions |= OPTION_MPTCP_ADD_ADDR;
mp_opt->addr.id = *ptr++;
mp_opt->addr.port = 0;
mp_opt->ahmac = 0;
if (mp_opt->addr.family == AF_INET) {
memcpy((u8 *)&mp_opt->addr.addr.s_addr, (u8 *)ptr, 4);
ptr += 4;
......@@ -298,7 +299,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
ptr++;
mp_opt->rm_addr = 1;
mp_opt->suboptions |= OPTION_MPTCP_RM_ADDR;
mp_opt->rm_list.nr = opsize - TCPOLEN_MPTCP_RM_ADDR_BASE;
for (i = 0; i < mp_opt->rm_list.nr; i++)
mp_opt->rm_list.ids[i] = *ptr++;
......@@ -309,7 +310,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
if (opsize != TCPOLEN_MPTCP_PRIO)
break;
mp_opt->mp_prio = 1;
mp_opt->suboptions |= OPTION_MPTCP_PRIO;
mp_opt->backup = *ptr++ & MPTCP_PRIO_BKUP;
pr_debug("MP_PRIO: prio=%d", mp_opt->backup);
break;
......@@ -321,7 +322,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
ptr += 2;
mp_opt->rcvr_key = get_unaligned_be64(ptr);
ptr += 8;
mp_opt->fastclose = 1;
mp_opt->suboptions |= OPTION_MPTCP_FASTCLOSE;
break;
case MPTCPOPT_RST:
......@@ -330,7 +331,8 @@ static void mptcp_parse_option(const struct sk_buff *skb,
if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST))
break;
mp_opt->reset = 1;
mp_opt->suboptions |= OPTION_MPTCP_RST;
flags = *ptr++;
mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT;
mp_opt->reset_reason = *ptr;
......@@ -341,7 +343,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
break;
ptr += 2;
mp_opt->mp_fail = 1;
mp_opt->suboptions |= OPTION_MPTCP_FAIL;
mp_opt->fail_seq = get_unaligned_be64(ptr);
pr_debug("MP_FAIL: data_seq=%llu", mp_opt->fail_seq);
break;
......@@ -355,26 +357,12 @@ void mptcp_get_options(const struct sock *sk,
const struct sk_buff *skb,
struct mptcp_options_received *mp_opt)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
const struct tcphdr *th = tcp_hdr(skb);
const unsigned char *ptr;
int length;
/* initialize option status */
mp_opt->mp_capable = 0;
mp_opt->mp_join = 0;
mp_opt->add_addr = 0;
mp_opt->ahmac = 0;
mp_opt->fastclose = 0;
mp_opt->addr.port = 0;
mp_opt->rm_addr = 0;
mp_opt->dss = 0;
mp_opt->mp_prio = 0;
mp_opt->reset = 0;
mp_opt->csum_reqd = READ_ONCE(msk->csum_enabled);
mp_opt->deny_join_id0 = 0;
mp_opt->mp_fail = 0;
mp_opt->suboptions = 0;
length = (th->doff * 4) - sizeof(struct tcphdr);
ptr = (const unsigned char *)(th + 1);
......@@ -928,7 +916,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
*/
if (TCP_SKB_CB(skb)->seq == subflow->ssn_offset + 1 &&
TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq &&
subflow->mp_join && mp_opt->mp_join &&
subflow->mp_join && (mp_opt->suboptions & OPTIONS_MPTCP_MPJ) &&
READ_ONCE(msk->pm.server_side))
tcp_send_ack(ssk);
goto fully_established;
......@@ -945,8 +933,8 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
return subflow->mp_capable;
}
if ((mp_opt->dss && mp_opt->use_ack) ||
(mp_opt->add_addr && !mp_opt->echo)) {
if (((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) ||
((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && !mp_opt->echo)) {
/* subflows are fully established as soon as we get any
* additional ack, including ADD_ADDR.
*/
......@@ -959,7 +947,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
* then fallback to TCP. Fallback scenarios requires a reset for
* MP_JOIN subflows.
*/
if (!mp_opt->mp_capable) {
if (!(mp_opt->suboptions & OPTIONS_MPTCP_MPC)) {
if (subflow->mp_join)
goto reset;
subflow->mp_capable = 0;
......@@ -1123,13 +1111,15 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
return sk->sk_state != TCP_CLOSE;
if (mp_opt.fastclose &&
if (unlikely(mp_opt.suboptions != OPTION_MPTCP_DSS)) {
if ((mp_opt.suboptions & OPTION_MPTCP_FASTCLOSE) &&
msk->local_key == mp_opt.rcvr_key) {
WRITE_ONCE(msk->rcv_fastclose, true);
mptcp_schedule_work((struct sock *)msk);
}
if (mp_opt.add_addr && add_addr_hmac_valid(msk, &mp_opt)) {
if ((mp_opt.suboptions & OPTION_MPTCP_ADD_ADDR) &&
add_addr_hmac_valid(msk, &mp_opt)) {
if (!mp_opt.echo) {
mptcp_pm_add_addr_received(msk, &mp_opt.addr);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR);
......@@ -1141,35 +1131,30 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
if (mp_opt.addr.port)
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_PORTADD);
mp_opt.add_addr = 0;
}
if (mp_opt.rm_addr) {
if (mp_opt.suboptions & OPTION_MPTCP_RM_ADDR)
mptcp_pm_rm_addr_received(msk, &mp_opt.rm_list);
mp_opt.rm_addr = 0;
}
if (mp_opt.mp_prio) {
if (mp_opt.suboptions & OPTION_MPTCP_PRIO) {
mptcp_pm_mp_prio_received(sk, mp_opt.backup);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIORX);
mp_opt.mp_prio = 0;
}
if (mp_opt.mp_fail) {
if (mp_opt.suboptions & OPTION_MPTCP_FAIL) {
mptcp_pm_mp_fail_received(sk, mp_opt.fail_seq);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILRX);
mp_opt.mp_fail = 0;
}
if (mp_opt.reset) {
if (mp_opt.suboptions & OPTION_MPTCP_RST) {
subflow->reset_seen = 1;
subflow->reset_reason = mp_opt.reset_reason;
subflow->reset_transient = mp_opt.reset_transient;
}
if (!mp_opt.dss)
if (!(mp_opt.suboptions & OPTION_MPTCP_DSS))
return true;
}
/* we can't wait for recvmsg() to update the ack_seq, otherwise
* monodirectional flows will stuck
......@@ -1197,7 +1182,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
memset(mpext, 0, sizeof(*mpext));
if (mp_opt.use_map) {
if (likely(mp_opt.use_map)) {
if (mp_opt.mpc_map) {
/* this is an MP_CAPABLE carrying MPTCP data
* we know this map the first chunk of data
......@@ -1217,7 +1202,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
}
mpext->data_len = mp_opt.data_len;
mpext->use_map = 1;
mpext->csum_reqd = mp_opt.csum_reqd;
mpext->csum_reqd = !!(mp_opt.suboptions & OPTION_MPTCP_CSUMREQD);
if (mpext->csum_reqd)
mpext->csum = mp_opt.csum;
......
......@@ -1515,15 +1515,19 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
mptcp_flush_join_list(msk);
ssk = mptcp_subflow_get_send(msk);
/* try to keep the subflow socket lock across
* consecutive xmit on the same socket
/* First check. If the ssk has changed since
* the last round, release prev_ssk
*/
if (ssk != prev_ssk && prev_ssk)
mptcp_push_release(sk, prev_ssk, &info);
if (!ssk)
goto out;
if (ssk != prev_ssk || !prev_ssk)
/* Need to lock the new subflow only if different
* from the previous one, otherwise we are still
* helding the relevant lock
*/
if (ssk != prev_ssk)
lock_sock(ssk);
/* keep it simple and always provide a new skb for the
......@@ -2832,7 +2836,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->token = subflow_req->token;
msk->subflow = NULL;
WRITE_ONCE(msk->fully_established, false);
if (mp_opt->csum_reqd)
if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
WRITE_ONCE(msk->csum_enabled, true);
msk->write_seq = subflow_req->idsn + 1;
......@@ -2841,7 +2845,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
if (mp_opt->mp_capable) {
if (mp_opt->suboptions & OPTIONS_MPTCP_MPC) {
msk->can_ack = true;
msk->remote_key = mp_opt->sndr_key;
mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
......
......@@ -29,6 +29,13 @@
#define OPTION_MPTCP_DSS BIT(11)
#define OPTION_MPTCP_FAIL BIT(12)
#define OPTION_MPTCP_CSUMREQD BIT(13)
#define OPTIONS_MPTCP_MPC (OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | \
OPTION_MPTCP_MPC_ACK)
#define OPTIONS_MPTCP_MPJ (OPTION_MPTCP_MPJ_SYN | OPTION_MPTCP_MPJ_SYNACK | \
OPTION_MPTCP_MPJ_SYNACK)
/* MPTCP option subtypes */
#define MPTCPOPT_MP_CAPABLE 0
#define MPTCPOPT_MP_JOIN 1
......@@ -132,36 +139,27 @@ struct mptcp_options_received {
u32 subflow_seq;
u16 data_len;
__sum16 csum;
u16 mp_capable : 1,
mp_join : 1,
fastclose : 1,
reset : 1,
dss : 1,
add_addr : 1,
rm_addr : 1,
mp_prio : 1,
mp_fail : 1,
echo : 1,
csum_reqd : 1,
backup : 1,
deny_join_id0 : 1;
u16 suboptions;
u32 token;
u32 nonce;
u64 thmac;
u8 hmac[MPTCPOPT_HMAC_LEN];
u8 join_id;
u8 use_map:1,
u16 use_map:1,
dsn64:1,
data_fin:1,
use_ack:1,
ack64:1,
mpc_map:1,
reset_reason:4,
reset_transient:1,
echo:1,
backup:1,
deny_join_id0:1,
__unused:2;
u8 join_id;
u64 thmac;
u8 hmac[MPTCPOPT_HMAC_LEN];
struct mptcp_addr_info addr;
struct mptcp_rm_list rm_list;
u64 ahmac;
u8 reset_reason:4;
u8 reset_transient:1;
u64 fail_seq;
};
......
......@@ -141,6 +141,7 @@ static int subflow_check_req(struct request_sock *req,
struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct mptcp_options_received mp_opt;
bool opt_mp_capable, opt_mp_join;
pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
......@@ -154,16 +155,18 @@ static int subflow_check_req(struct request_sock *req,
mptcp_get_options(sk_listener, skb, &mp_opt);
if (mp_opt.mp_capable) {
opt_mp_capable = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPC);
opt_mp_join = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ);
if (opt_mp_capable) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
if (mp_opt.mp_join)
if (opt_mp_join)
return 0;
} else if (mp_opt.mp_join) {
} else if (opt_mp_join) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX);
}
if (mp_opt.mp_capable && listener->request_mptcp) {
if (opt_mp_capable && listener->request_mptcp) {
int err, retries = MPTCP_TOKEN_MAX_RETRIES;
subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
......@@ -194,7 +197,7 @@ static int subflow_check_req(struct request_sock *req,
else
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_TOKENFALLBACKINIT);
} else if (mp_opt.mp_join && listener->request_mptcp) {
} else if (opt_mp_join && listener->request_mptcp) {
subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
subflow_req->mp_join = 1;
subflow_req->backup = mp_opt.backup;
......@@ -243,15 +246,18 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct mptcp_options_received mp_opt;
bool opt_mp_capable, opt_mp_join;
int err;
subflow_init_req(req, sk_listener);
mptcp_get_options(sk_listener, skb, &mp_opt);
if (mp_opt.mp_capable && mp_opt.mp_join)
opt_mp_capable = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPC);
opt_mp_join = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ);
if (opt_mp_capable && opt_mp_join)
return -EINVAL;
if (mp_opt.mp_capable && listener->request_mptcp) {
if (opt_mp_capable && listener->request_mptcp) {
if (mp_opt.sndr_key == 0)
return -EINVAL;
......@@ -262,7 +268,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
subflow_req->mp_capable = 1;
subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1;
} else if (mp_opt.mp_join && listener->request_mptcp) {
} else if (opt_mp_join && listener->request_mptcp) {
if (!mptcp_token_join_cookie_init_state(subflow_req, skb))
return -EINVAL;
......@@ -394,7 +400,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
/* be sure no special action on any packet other than syn-ack */
if (subflow->conn_finished)
return;
......@@ -407,7 +412,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
mptcp_get_options(sk, skb, &mp_opt);
if (subflow->request_mptcp) {
if (!mp_opt.mp_capable) {
if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC)) {
MPTCP_INC_STATS(sock_net(sk),
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
mptcp_do_fallback(sk);
......@@ -415,7 +420,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
goto fallback;
}
if (mp_opt.csum_reqd)
if (mp_opt.suboptions & OPTION_MPTCP_CSUMREQD)
WRITE_ONCE(mptcp_sk(parent)->csum_enabled, true);
if (mp_opt.deny_join_id0)
WRITE_ONCE(mptcp_sk(parent)->pm.remote_deny_join_id0, true);
......@@ -430,7 +435,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
} else if (subflow->request_join) {
u8 hmac[SHA256_DIGEST_SIZE];
if (!mp_opt.mp_join) {
if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ)) {
subflow->reset_reason = MPTCP_RST_EMPTCP;
goto do_reset;
}
......@@ -636,10 +641,10 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
/* After child creation we must look for 'mp_capable' even when options
/* After child creation we must look for MPC even when options
* are not parsed
*/
mp_opt.mp_capable = 0;
mp_opt.suboptions = 0;
/* hopefully temporary handling for MP_JOIN+syncookie */
subflow_req = mptcp_subflow_rsk(req);
......@@ -659,7 +664,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
* options.
*/
mptcp_get_options(sk, skb, &mp_opt);
if (!mp_opt.mp_capable) {
if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC)) {
fallback = true;
goto create_child;
}
......@@ -669,7 +674,8 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
fallback = true;
} else if (subflow_req->mp_join) {
mptcp_get_options(sk, skb, &mp_opt);
if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt) ||
if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ) ||
!subflow_hmac_valid(req, &mp_opt) ||
!mptcp_can_accept_new_subflow(subflow_req->msk)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
fallback = true;
......@@ -726,7 +732,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
/* with OoO packets we can reach here without ingress
* mpc option
*/
if (mp_opt.mp_capable)
if (mp_opt.suboptions & OPTIONS_MPTCP_MPC)
mptcp_subflow_fully_established(ctx, &mp_opt);
} else if (ctx->mp_join) {
struct mptcp_sock *owner;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment