Commit cb0f8b03 authored by David S. Miller's avatar David S. Miller

Merge branch 'mptcp-next'

Mat Martineau says:

====================
mptcp: Optimize output options and add MP_FAIL

This patch set contains two groups of changes that we've been testing in
the MPTCP tree.

The first optimizes the code path and data structure for populating
MPTCP option headers when transmitting.

Patch 1 reorganizes code to reduce the number of conditionals that need
to be evaluated in common cases.

Patch 2 rearranges struct mptcp_out_options to save 80 bytes (on x86_64).

The next five patches add partial support for the MP_FAIL option as
defined in RFC 8684. MP_FAIL is an option header used to cleanly handle
MPTCP checksum failures. When the MPTCP checksum detects an error in the
MPTCP DSS header or the data mapped by that header, the receiver uses a
TCP RST with MP_FAIL to close the subflow that experienced the error and
provide associated MPTCP sequence number information to the peer. RFC
8684 also describes how a single-subflow connection can discard corrupt
data and remain connected under certain conditions using MP_FAIL, but
that feature is not implemented here.

Patches 3-5 implement MP_FAIL transmit and receive, and integrates with
checksum validation.

Patches 6 & 7 add MP_FAIL selftests and the MIBs required for those
tests.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents d484dc2b 6bb3ab49
...@@ -58,10 +58,6 @@ struct mptcp_addr_info { ...@@ -58,10 +58,6 @@ struct mptcp_addr_info {
struct mptcp_out_options { struct mptcp_out_options {
#if IS_ENABLED(CONFIG_MPTCP) #if IS_ENABLED(CONFIG_MPTCP)
u16 suboptions; u16 suboptions;
u64 sndr_key;
u64 rcvr_key;
u64 ahmac;
struct mptcp_addr_info addr;
struct mptcp_rm_list rm_list; struct mptcp_rm_list rm_list;
u8 join_id; u8 join_id;
u8 backup; u8 backup;
...@@ -69,11 +65,26 @@ struct mptcp_out_options { ...@@ -69,11 +65,26 @@ struct mptcp_out_options {
reset_transient:1, reset_transient:1,
csum_reqd:1, csum_reqd:1,
allow_join_id0:1; allow_join_id0:1;
u32 nonce; union {
u64 thmac; struct {
u32 token; u64 sndr_key;
u8 hmac[20]; u64 rcvr_key;
struct mptcp_ext ext_copy; };
struct {
struct mptcp_addr_info addr;
u64 ahmac;
};
struct {
struct mptcp_ext ext_copy;
u64 fail_seq;
};
struct {
u32 nonce;
u32 token;
u64 thmac;
u8 hmac[20];
};
};
#endif #endif
}; };
......
...@@ -44,6 +44,8 @@ static const struct snmp_mib mptcp_snmp_list[] = { ...@@ -44,6 +44,8 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW), SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW),
SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX), SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX),
SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX), SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX),
SNMP_MIB_ITEM("MPFailTx", MPTCP_MIB_MPFAILTX),
SNMP_MIB_ITEM("MPFailRx", MPTCP_MIB_MPFAILRX),
SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED), SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED),
SNMP_MIB_ITEM("SubflowStale", MPTCP_MIB_SUBFLOWSTALE), SNMP_MIB_ITEM("SubflowStale", MPTCP_MIB_SUBFLOWSTALE),
SNMP_MIB_ITEM("SubflowRecover", MPTCP_MIB_SUBFLOWRECOVER), SNMP_MIB_ITEM("SubflowRecover", MPTCP_MIB_SUBFLOWRECOVER),
......
...@@ -37,6 +37,8 @@ enum linux_mptcp_mib_field { ...@@ -37,6 +37,8 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */ MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */
MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */ MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */
MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */ MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */
MPTCP_MIB_MPFAILTX, /* Transmit a MP_FAIL */
MPTCP_MIB_MPFAILRX, /* Received a MP_FAIL */
MPTCP_MIB_RCVPRUNED, /* Incoming packet dropped due to memory limit */ MPTCP_MIB_RCVPRUNED, /* Incoming packet dropped due to memory limit */
MPTCP_MIB_SUBFLOWSTALE, /* Subflows entered 'stale' status */ MPTCP_MIB_SUBFLOWSTALE, /* Subflows entered 'stale' status */
MPTCP_MIB_SUBFLOWRECOVER, /* Subflows returned to active status after being stale */ MPTCP_MIB_SUBFLOWRECOVER, /* Subflows returned to active status after being stale */
......
...@@ -336,6 +336,16 @@ static void mptcp_parse_option(const struct sk_buff *skb, ...@@ -336,6 +336,16 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->reset_reason = *ptr; mp_opt->reset_reason = *ptr;
break; break;
case MPTCPOPT_MP_FAIL:
if (opsize != TCPOLEN_MPTCP_FAIL)
break;
ptr += 2;
mp_opt->mp_fail = 1;
mp_opt->fail_seq = get_unaligned_be64(ptr);
pr_debug("MP_FAIL: data_seq=%llu", mp_opt->fail_seq);
break;
default: default:
break; break;
} }
...@@ -364,6 +374,7 @@ void mptcp_get_options(const struct sock *sk, ...@@ -364,6 +374,7 @@ void mptcp_get_options(const struct sock *sk,
mp_opt->reset = 0; mp_opt->reset = 0;
mp_opt->csum_reqd = READ_ONCE(msk->csum_enabled); mp_opt->csum_reqd = READ_ONCE(msk->csum_enabled);
mp_opt->deny_join_id0 = 0; mp_opt->deny_join_id0 = 0;
mp_opt->mp_fail = 0;
length = (th->doff * 4) - sizeof(struct tcphdr); length = (th->doff * 4) - sizeof(struct tcphdr);
ptr = (const unsigned char *)(th + 1); ptr = (const unsigned char *)(th + 1);
...@@ -592,6 +603,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, ...@@ -592,6 +603,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
dss_size = map_size; dss_size = map_size;
if (skb && snd_data_fin_enable) if (skb && snd_data_fin_enable)
mptcp_write_data_fin(subflow, skb, &opts->ext_copy); mptcp_write_data_fin(subflow, skb, &opts->ext_copy);
opts->suboptions = OPTION_MPTCP_DSS;
ret = true; ret = true;
} }
...@@ -615,6 +627,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, ...@@ -615,6 +627,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
opts->ext_copy.ack64 = 0; opts->ext_copy.ack64 = 0;
} }
opts->ext_copy.use_ack = 1; opts->ext_copy.use_ack = 1;
opts->suboptions = OPTION_MPTCP_DSS;
WRITE_ONCE(msk->old_wspace, __mptcp_space((struct sock *)msk)); WRITE_ONCE(msk->old_wspace, __mptcp_space((struct sock *)msk));
/* Add kind/length/subtype/flag overhead if mapping is not populated */ /* Add kind/length/subtype/flag overhead if mapping is not populated */
...@@ -686,8 +699,13 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * ...@@ -686,8 +699,13 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
if (drop_other_suboptions) { if (drop_other_suboptions) {
pr_debug("drop other suboptions"); pr_debug("drop other suboptions");
opts->suboptions = 0; opts->suboptions = 0;
opts->ext_copy.use_ack = 0;
opts->ext_copy.use_map = 0; /* note that e.g. DSS could have written into the memory
* aliased by ahmac, we must reset the field here
* to avoid appending the hmac even for ADD_ADDR echo
* options
*/
opts->ahmac = 0;
*size -= opt_size; *size -= opt_size;
} }
opts->suboptions |= OPTION_MPTCP_ADD_ADDR; opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
...@@ -739,7 +757,12 @@ static bool mptcp_established_options_mp_prio(struct sock *sk, ...@@ -739,7 +757,12 @@ static bool mptcp_established_options_mp_prio(struct sock *sk,
{ {
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
if (!subflow->send_mp_prio) /* can't send MP_PRIO with MPC, as they share the same option space:
* 'backup'. Also it makes no sense at all
*/
if (!subflow->send_mp_prio ||
((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
OPTION_MPTCP_MPC_ACK) & opts->suboptions))
return false; return false;
/* account for the trailing 'nop' option */ /* account for the trailing 'nop' option */
...@@ -755,7 +778,7 @@ static bool mptcp_established_options_mp_prio(struct sock *sk, ...@@ -755,7 +778,7 @@ static bool mptcp_established_options_mp_prio(struct sock *sk,
return true; return true;
} }
static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_buff *skb, static noinline bool mptcp_established_options_rst(struct sock *sk, struct sk_buff *skb,
unsigned int *size, unsigned int *size,
unsigned int remaining, unsigned int remaining,
struct mptcp_out_options *opts) struct mptcp_out_options *opts)
...@@ -763,12 +786,36 @@ static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_bu ...@@ -763,12 +786,36 @@ static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_bu
const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
if (remaining < TCPOLEN_MPTCP_RST) if (remaining < TCPOLEN_MPTCP_RST)
return; return false;
*size = TCPOLEN_MPTCP_RST; *size = TCPOLEN_MPTCP_RST;
opts->suboptions |= OPTION_MPTCP_RST; opts->suboptions |= OPTION_MPTCP_RST;
opts->reset_transient = subflow->reset_transient; opts->reset_transient = subflow->reset_transient;
opts->reset_reason = subflow->reset_reason; opts->reset_reason = subflow->reset_reason;
return true;
}
static bool mptcp_established_options_mp_fail(struct sock *sk,
unsigned int *size,
unsigned int remaining,
struct mptcp_out_options *opts)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
if (likely(!subflow->send_mp_fail))
return false;
if (remaining < TCPOLEN_MPTCP_FAIL)
return false;
*size = TCPOLEN_MPTCP_FAIL;
opts->suboptions |= OPTION_MPTCP_FAIL;
opts->fail_seq = subflow->map_seq;
pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq);
return true;
} }
bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
...@@ -787,15 +834,28 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, ...@@ -787,15 +834,28 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
return false; return false;
if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) { if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
mptcp_established_options_rst(sk, skb, size, remaining, opts); if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
*size += opt_size;
remaining -= opt_size;
}
if (mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts)) {
*size += opt_size;
remaining -= opt_size;
}
return true; return true;
} }
snd_data_fin = mptcp_data_fin_enabled(msk); snd_data_fin = mptcp_data_fin_enabled(msk);
if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts)) if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts))
ret = true; ret = true;
else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts)) else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts)) {
ret = true; ret = true;
if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
*size += opt_size;
remaining -= opt_size;
return true;
}
}
/* we reserved enough space for the above options, and exceeding the /* we reserved enough space for the above options, and exceeding the
* TCP option space would be fatal * TCP option space would be fatal
...@@ -1096,6 +1156,12 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) ...@@ -1096,6 +1156,12 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
mp_opt.mp_prio = 0; mp_opt.mp_prio = 0;
} }
if (mp_opt.mp_fail) {
mptcp_pm_mp_fail_received(sk, mp_opt.fail_seq);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILRX);
mp_opt.mp_fail = 0;
}
if (mp_opt.reset) { if (mp_opt.reset) {
subflow->reset_seen = 1; subflow->reset_seen = 1;
subflow->reset_reason = mp_opt.reset_reason; subflow->reset_reason = mp_opt.reset_reason;
...@@ -1198,8 +1264,88 @@ static u16 mptcp_make_csum(const struct mptcp_ext *mpext) ...@@ -1198,8 +1264,88 @@ static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
struct mptcp_out_options *opts) struct mptcp_out_options *opts)
{ {
if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions)) {
OPTION_MPTCP_MPC_ACK) & opts->suboptions) { const struct sock *ssk = (const struct sock *)tp;
struct mptcp_subflow_context *subflow;
subflow = mptcp_subflow_ctx(ssk);
subflow->send_mp_fail = 0;
*ptr++ = mptcp_option(MPTCPOPT_MP_FAIL,
TCPOLEN_MPTCP_FAIL,
0, 0);
put_unaligned_be64(opts->fail_seq, ptr);
ptr += 2;
}
/* RST is mutually exclusive with everything else */
if (unlikely(OPTION_MPTCP_RST & opts->suboptions)) {
*ptr++ = mptcp_option(MPTCPOPT_RST,
TCPOLEN_MPTCP_RST,
opts->reset_transient,
opts->reset_reason);
return;
}
/* DSS, MPC, MPJ and ADD_ADDR are mutually exclusive, see
* mptcp_established_options*()
*/
if (likely(OPTION_MPTCP_DSS & opts->suboptions)) {
struct mptcp_ext *mpext = &opts->ext_copy;
u8 len = TCPOLEN_MPTCP_DSS_BASE;
u8 flags = 0;
if (mpext->use_ack) {
flags = MPTCP_DSS_HAS_ACK;
if (mpext->ack64) {
len += TCPOLEN_MPTCP_DSS_ACK64;
flags |= MPTCP_DSS_ACK64;
} else {
len += TCPOLEN_MPTCP_DSS_ACK32;
}
}
if (mpext->use_map) {
len += TCPOLEN_MPTCP_DSS_MAP64;
/* Use only 64-bit mapping flags for now, add
* support for optional 32-bit mappings later.
*/
flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64;
if (mpext->data_fin)
flags |= MPTCP_DSS_DATA_FIN;
if (opts->csum_reqd)
len += TCPOLEN_MPTCP_DSS_CHECKSUM;
}
*ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
if (mpext->use_ack) {
if (mpext->ack64) {
put_unaligned_be64(mpext->data_ack, ptr);
ptr += 2;
} else {
put_unaligned_be32(mpext->data_ack32, ptr);
ptr += 1;
}
}
if (mpext->use_map) {
put_unaligned_be64(mpext->data_seq, ptr);
ptr += 2;
put_unaligned_be32(mpext->subflow_seq, ptr);
ptr += 1;
if (opts->csum_reqd) {
put_unaligned_be32(mpext->data_len << 16 |
mptcp_make_csum(mpext), ptr);
} else {
put_unaligned_be32(mpext->data_len << 16 |
TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
}
}
} else if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
u8 len, flag = MPTCP_CAP_HMAC_SHA256; u8 len, flag = MPTCP_CAP_HMAC_SHA256;
if (OPTION_MPTCP_MPC_SYN & opts->suboptions) { if (OPTION_MPTCP_MPC_SYN & opts->suboptions) {
...@@ -1246,10 +1392,31 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, ...@@ -1246,10 +1392,31 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
} }
ptr += 1; ptr += 1;
}
mp_capable_done: /* MPC is additionally mutually exclusive with MP_PRIO */
if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) { goto mp_capable_done;
} else if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
*ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
TCPOLEN_MPTCP_MPJ_SYN,
opts->backup, opts->join_id);
put_unaligned_be32(opts->token, ptr);
ptr += 1;
put_unaligned_be32(opts->nonce, ptr);
ptr += 1;
} else if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) {
*ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
TCPOLEN_MPTCP_MPJ_SYNACK,
opts->backup, opts->join_id);
put_unaligned_be64(opts->thmac, ptr);
ptr += 2;
put_unaligned_be32(opts->nonce, ptr);
ptr += 1;
} else if (OPTION_MPTCP_MPJ_ACK & opts->suboptions) {
*ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
TCPOLEN_MPTCP_MPJ_ACK, 0, 0);
memcpy(ptr, opts->hmac, MPTCPOPT_HMAC_LEN);
ptr += 5;
} else if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) {
u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE; u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE;
u8 echo = MPTCP_ADDR_ECHO; u8 echo = MPTCP_ADDR_ECHO;
...@@ -1307,6 +1474,19 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, ...@@ -1307,6 +1474,19 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
} }
} }
if (OPTION_MPTCP_PRIO & opts->suboptions) {
const struct sock *ssk = (const struct sock *)tp;
struct mptcp_subflow_context *subflow;
subflow = mptcp_subflow_ctx(ssk);
subflow->send_mp_prio = 0;
*ptr++ = mptcp_option(MPTCPOPT_MP_PRIO,
TCPOLEN_MPTCP_PRIO,
opts->backup, TCPOPT_NOP);
}
mp_capable_done:
if (OPTION_MPTCP_RM_ADDR & opts->suboptions) { if (OPTION_MPTCP_RM_ADDR & opts->suboptions) {
u8 i = 1; u8 i = 1;
...@@ -1327,107 +1507,6 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, ...@@ -1327,107 +1507,6 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
} }
} }
if (OPTION_MPTCP_PRIO & opts->suboptions) {
const struct sock *ssk = (const struct sock *)tp;
struct mptcp_subflow_context *subflow;
subflow = mptcp_subflow_ctx(ssk);
subflow->send_mp_prio = 0;
*ptr++ = mptcp_option(MPTCPOPT_MP_PRIO,
TCPOLEN_MPTCP_PRIO,
opts->backup, TCPOPT_NOP);
}
if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
*ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
TCPOLEN_MPTCP_MPJ_SYN,
opts->backup, opts->join_id);
put_unaligned_be32(opts->token, ptr);
ptr += 1;
put_unaligned_be32(opts->nonce, ptr);
ptr += 1;
}
if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) {
*ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
TCPOLEN_MPTCP_MPJ_SYNACK,
opts->backup, opts->join_id);
put_unaligned_be64(opts->thmac, ptr);
ptr += 2;
put_unaligned_be32(opts->nonce, ptr);
ptr += 1;
}
if (OPTION_MPTCP_MPJ_ACK & opts->suboptions) {
*ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
TCPOLEN_MPTCP_MPJ_ACK, 0, 0);
memcpy(ptr, opts->hmac, MPTCPOPT_HMAC_LEN);
ptr += 5;
}
if (OPTION_MPTCP_RST & opts->suboptions)
*ptr++ = mptcp_option(MPTCPOPT_RST,
TCPOLEN_MPTCP_RST,
opts->reset_transient,
opts->reset_reason);
if (opts->ext_copy.use_ack || opts->ext_copy.use_map) {
struct mptcp_ext *mpext = &opts->ext_copy;
u8 len = TCPOLEN_MPTCP_DSS_BASE;
u8 flags = 0;
if (mpext->use_ack) {
flags = MPTCP_DSS_HAS_ACK;
if (mpext->ack64) {
len += TCPOLEN_MPTCP_DSS_ACK64;
flags |= MPTCP_DSS_ACK64;
} else {
len += TCPOLEN_MPTCP_DSS_ACK32;
}
}
if (mpext->use_map) {
len += TCPOLEN_MPTCP_DSS_MAP64;
/* Use only 64-bit mapping flags for now, add
* support for optional 32-bit mappings later.
*/
flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64;
if (mpext->data_fin)
flags |= MPTCP_DSS_DATA_FIN;
if (opts->csum_reqd)
len += TCPOLEN_MPTCP_DSS_CHECKSUM;
}
*ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
if (mpext->use_ack) {
if (mpext->ack64) {
put_unaligned_be64(mpext->data_ack, ptr);
ptr += 2;
} else {
put_unaligned_be32(mpext->data_ack32, ptr);
ptr += 1;
}
}
if (mpext->use_map) {
put_unaligned_be64(mpext->data_seq, ptr);
ptr += 2;
put_unaligned_be32(mpext->subflow_seq, ptr);
ptr += 1;
if (opts->csum_reqd) {
put_unaligned_be32(mpext->data_len << 16 |
mptcp_make_csum(mpext), ptr);
} else {
put_unaligned_be32(mpext->data_len << 16 |
TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
}
}
}
if (tp) if (tp)
mptcp_set_rwin(tp); mptcp_set_rwin(tp);
} }
......
...@@ -249,6 +249,11 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup) ...@@ -249,6 +249,11 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC); mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC);
} }
void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
{
pr_debug("fail_seq=%llu", fail_seq);
}
/* path manager helpers */ /* path manager helpers */
bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, struct sk_buff *skb, bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, struct sk_buff *skb,
......
...@@ -26,6 +26,8 @@ ...@@ -26,6 +26,8 @@
#define OPTION_MPTCP_FASTCLOSE BIT(8) #define OPTION_MPTCP_FASTCLOSE BIT(8)
#define OPTION_MPTCP_PRIO BIT(9) #define OPTION_MPTCP_PRIO BIT(9)
#define OPTION_MPTCP_RST BIT(10) #define OPTION_MPTCP_RST BIT(10)
#define OPTION_MPTCP_DSS BIT(11)
#define OPTION_MPTCP_FAIL BIT(12)
/* MPTCP option subtypes */ /* MPTCP option subtypes */
#define MPTCPOPT_MP_CAPABLE 0 #define MPTCPOPT_MP_CAPABLE 0
...@@ -67,6 +69,7 @@ ...@@ -67,6 +69,7 @@
#define TCPOLEN_MPTCP_PRIO_ALIGN 4 #define TCPOLEN_MPTCP_PRIO_ALIGN 4
#define TCPOLEN_MPTCP_FASTCLOSE 12 #define TCPOLEN_MPTCP_FASTCLOSE 12
#define TCPOLEN_MPTCP_RST 4 #define TCPOLEN_MPTCP_RST 4
#define TCPOLEN_MPTCP_FAIL 12
#define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM (TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA) #define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM (TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA)
...@@ -137,6 +140,7 @@ struct mptcp_options_received { ...@@ -137,6 +140,7 @@ struct mptcp_options_received {
add_addr : 1, add_addr : 1,
rm_addr : 1, rm_addr : 1,
mp_prio : 1, mp_prio : 1,
mp_fail : 1,
echo : 1, echo : 1,
csum_reqd : 1, csum_reqd : 1,
backup : 1, backup : 1,
...@@ -158,6 +162,7 @@ struct mptcp_options_received { ...@@ -158,6 +162,7 @@ struct mptcp_options_received {
u64 ahmac; u64 ahmac;
u8 reset_reason:4; u8 reset_reason:4;
u8 reset_transient:1; u8 reset_transient:1;
u64 fail_seq;
}; };
static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field) static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
...@@ -428,6 +433,7 @@ struct mptcp_subflow_context { ...@@ -428,6 +433,7 @@ struct mptcp_subflow_context {
mpc_map : 1, mpc_map : 1,
backup : 1, backup : 1,
send_mp_prio : 1, send_mp_prio : 1,
send_mp_fail : 1,
rx_eof : 1, rx_eof : 1,
can_ack : 1, /* only after processing the remote a key */ can_ack : 1, /* only after processing the remote a key */
disposable : 1, /* ctx can be free at ulp release time */ disposable : 1, /* ctx can be free at ulp release time */
...@@ -608,6 +614,19 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk, ...@@ -608,6 +614,19 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops; inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
} }
static inline bool mptcp_has_another_subflow(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk), *tmp;
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
mptcp_for_each_subflow(msk, tmp) {
if (tmp != subflow)
return true;
}
return false;
}
void __init mptcp_proto_init(void); void __init mptcp_proto_init(void);
#if IS_ENABLED(CONFIG_MPTCP_IPV6) #if IS_ENABLED(CONFIG_MPTCP_IPV6)
int __init mptcp_proto_v6_init(void); int __init mptcp_proto_v6_init(void);
...@@ -722,6 +741,7 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup); ...@@ -722,6 +741,7 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
struct mptcp_addr_info *addr, struct mptcp_addr_info *addr,
u8 bkup); u8 bkup);
void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq);
void mptcp_pm_free_anno_list(struct mptcp_sock *msk); void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk); bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
struct mptcp_pm_add_entry * struct mptcp_pm_add_entry *
......
...@@ -910,6 +910,8 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff * ...@@ -910,6 +910,8 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
csum = csum_partial(&header, sizeof(header), subflow->map_data_csum); csum = csum_partial(&header, sizeof(header), subflow->map_data_csum);
if (unlikely(csum_fold(csum))) { if (unlikely(csum_fold(csum))) {
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
subflow->send_mp_fail = 1;
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX);
return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY; return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
} }
...@@ -1157,6 +1159,20 @@ static bool subflow_check_data_avail(struct sock *ssk) ...@@ -1157,6 +1159,20 @@ static bool subflow_check_data_avail(struct sock *ssk)
fallback: fallback:
/* RFC 8684 section 3.7. */ /* RFC 8684 section 3.7. */
if (subflow->send_mp_fail) {
if (mptcp_has_another_subflow(ssk)) {
while ((skb = skb_peek(&ssk->sk_receive_queue)))
sk_eat_skb(ssk, skb);
}
ssk->sk_err = EBADMSG;
tcp_set_state(ssk, TCP_CLOSE);
subflow->reset_transient = 0;
subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
tcp_send_active_reset(ssk, GFP_ATOMIC);
WRITE_ONCE(subflow->data_avail, 0);
return true;
}
if (subflow->mp_join || subflow->fully_established) { if (subflow->mp_join || subflow->fully_established) {
/* fatal protocol error, close the socket. /* fatal protocol error, close the socket.
* subflow_error_report() will introduce the appropriate barriers * subflow_error_report() will introduce the appropriate barriers
......
...@@ -578,6 +578,43 @@ chk_csum_nr() ...@@ -578,6 +578,43 @@ chk_csum_nr()
fi fi
} }
chk_fail_nr()
{
local mp_fail_nr_tx=$1
local mp_fail_nr_rx=$2
local count
local dump_stats
printf "%-39s %s" " " "ftx"
count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPFailTx | awk '{print $2}'`
[ -z "$count" ] && count=0
if [ "$count" != "$mp_fail_nr_tx" ]; then
echo "[fail] got $count MP_FAIL[s] TX expected $mp_fail_nr_tx"
ret=1
dump_stats=1
else
echo -n "[ ok ]"
fi
echo -n " - frx "
count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPFailRx | awk '{print $2}'`
[ -z "$count" ] && count=0
if [ "$count" != "$mp_fail_nr_rx" ]; then
echo "[fail] got $count MP_FAIL[s] RX expected $mp_fail_nr_rx"
ret=1
dump_stats=1
else
echo "[ ok ]"
fi
if [ "${dump_stats}" = 1 ]; then
echo Server ns stats
ip netns exec $ns1 nstat -as | grep MPTcp
echo Client ns stats
ip netns exec $ns2 nstat -as | grep MPTcp
fi
}
chk_join_nr() chk_join_nr()
{ {
local msg="$1" local msg="$1"
...@@ -627,6 +664,7 @@ chk_join_nr() ...@@ -627,6 +664,7 @@ chk_join_nr()
fi fi
if [ $checksum -eq 1 ]; then if [ $checksum -eq 1 ]; then
chk_csum_nr chk_csum_nr
chk_fail_nr 0 0
fi fi
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment