Commit a1a809c4 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'mptcp-add_addr-enhancements'

Mat Martineau says:

====================
mptcp: ADD_ADDR enhancements

This patch series from the MPTCP tree contains enhancements and
associated tests for the ADD_ADDR ("add address") MPTCP option. This
option allows already-connected MPTCP peers to share additional IP
addresses with each other, which can then be used to create additional
subflows within those MPTCP connections.

Patches 1 & 2 remove duplicated data in the per-connection path manager
structure.

Patches 3-6 initiate additional subflows when an address is added using
the netlink path manager interface and improve ADD_ADDR signaling
reliability, subject to configured limits. Self tests are also updated.

Patches 7-15 add new support for optional port numbers in ADD_ADDR. This
includes creating an additional in-kernel TCP listening socket for the
requested port number, validating the port number when processing
incoming subflow connections, including the port number in netlink
interfaces, and adding some new MIBs. New self test cases are added for
subflows connecting with alternate port numbers.
====================

Link: https://lore.kernel.org/r/20210201230920.66027-1-mathew.j.martineau@linux.intel.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents e6d6ca6e 8a127bf6
......@@ -29,6 +29,12 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("DuplicateData", MPTCP_MIB_DUPDATA),
SNMP_MIB_ITEM("AddAddr", MPTCP_MIB_ADDADDR),
SNMP_MIB_ITEM("EchoAdd", MPTCP_MIB_ECHOADD),
SNMP_MIB_ITEM("PortAdd", MPTCP_MIB_PORTADD),
SNMP_MIB_ITEM("MPJoinPortSynRx", MPTCP_MIB_JOINPORTSYNRX),
SNMP_MIB_ITEM("MPJoinPortSynAckRx", MPTCP_MIB_JOINPORTSYNACKRX),
SNMP_MIB_ITEM("MPJoinPortAckRx", MPTCP_MIB_JOINPORTACKRX),
SNMP_MIB_ITEM("MismatchPortSynRx", MPTCP_MIB_MISMATCHPORTSYNRX),
SNMP_MIB_ITEM("MismatchPortAckRx", MPTCP_MIB_MISMATCHPORTACKRX),
SNMP_MIB_ITEM("RmAddr", MPTCP_MIB_RMADDR),
SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW),
SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX),
......
......@@ -22,6 +22,12 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_DUPDATA, /* Segments discarded due to duplicate DSS */
MPTCP_MIB_ADDADDR, /* Received ADD_ADDR with echo-flag=0 */
MPTCP_MIB_ECHOADD, /* Received ADD_ADDR with echo-flag=1 */
MPTCP_MIB_PORTADD, /* Received ADD_ADDR with a port-number */
MPTCP_MIB_JOINPORTSYNRX, /* Received a SYN MP_JOIN with a different port-number */
MPTCP_MIB_JOINPORTSYNACKRX, /* Received a SYNACK MP_JOIN with a different port-number */
MPTCP_MIB_JOINPORTACKRX, /* Received an ACK MP_JOIN with a different port-number */
MPTCP_MIB_MISMATCHPORTSYNRX, /* Received a SYN MP_JOIN with a mismatched port-number */
MPTCP_MIB_MISMATCHPORTACKRX, /* Received an ACK MP_JOIN with a mismatched port-number */
MPTCP_MIB_RMADDR, /* Received RM_ADDR */
MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */
MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */
......
......@@ -128,10 +128,10 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
info->mptcpi_subflows = READ_ONCE(msk->pm.subflows);
info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled);
info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted);
info->mptcpi_subflows_max = READ_ONCE(msk->pm.subflows_max);
val = READ_ONCE(msk->pm.add_addr_signal_max);
info->mptcpi_subflows_max = mptcp_pm_get_subflows_max(msk);
val = mptcp_pm_get_add_addr_signal_max(msk);
info->mptcpi_add_addr_signal_max = val;
val = READ_ONCE(msk->pm.add_addr_accept_max);
val = mptcp_pm_get_add_addr_accept_max(msk);
info->mptcpi_add_addr_accepted_max = val;
if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags))
flags |= MPTCP_INFO_FLAG_FALLBACK;
......
......@@ -1025,6 +1025,10 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
mptcp_pm_del_add_timer(msk, &addr);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD);
}
if (mp_opt.port)
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_PORTADD);
mp_opt.add_addr = 0;
}
......
......@@ -78,10 +78,13 @@ void mptcp_pm_new_connection(struct mptcp_sock *msk, int server_side)
bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk)
{
struct mptcp_pm_data *pm = &msk->pm;
unsigned int subflows_max;
int ret = 0;
subflows_max = mptcp_pm_get_subflows_max(msk);
pr_debug("msk=%p subflows=%d max=%d allow=%d", msk, pm->subflows,
pm->subflows_max, READ_ONCE(pm->accept_subflow));
subflows_max, READ_ONCE(pm->accept_subflow));
/* try to avoid acquiring the lock below */
if (!READ_ONCE(pm->accept_subflow))
......@@ -89,8 +92,8 @@ bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk)
spin_lock_bh(&pm->lock);
if (READ_ONCE(pm->accept_subflow)) {
ret = pm->subflows < pm->subflows_max;
if (ret && ++pm->subflows == pm->subflows_max)
ret = pm->subflows < subflows_max;
if (ret && ++pm->subflows == subflows_max)
WRITE_ONCE(pm->accept_subflow, false);
}
spin_unlock_bh(&pm->lock);
......@@ -188,8 +191,7 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk)
{
if (!mptcp_pm_should_add_signal_ipv6(msk) &&
!mptcp_pm_should_add_signal_port(msk))
if (!mptcp_pm_should_add_signal(msk))
return;
mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_SEND_ACK);
......
This diff is collapsed.
......@@ -52,7 +52,7 @@ static struct net_device mptcp_napi_dev;
* completed yet or has failed, return the subflow socket.
* Otherwise return NULL.
*/
static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk)
struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk)
{
if (!msk->subflow || READ_ONCE(msk->can_ack))
return NULL;
......
......@@ -203,10 +203,6 @@ struct mptcp_pm_data {
u8 add_addr_accepted;
u8 local_addr_used;
u8 subflows;
u8 add_addr_signal_max;
u8 add_addr_accept_max;
u8 local_addr_max;
u8 subflows_max;
u8 status;
u8 rm_id;
};
......@@ -542,11 +538,15 @@ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow);
void mptcp_subflow_reset(struct sock *ssk);
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
/* called with sk socket lock held */
int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
const struct mptcp_addr_info *remote);
int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock);
void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
struct sockaddr_storage *addr,
unsigned short family);
static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
struct mptcp_subflow_context *ctx)
......@@ -650,6 +650,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
struct mptcp_addr_info *addr,
u8 bkup);
void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
struct mptcp_pm_add_entry *
mptcp_pm_del_add_timer(struct mptcp_sock *msk,
struct mptcp_addr_info *addr);
......@@ -714,6 +715,9 @@ void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk);
void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk);
void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id);
int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
unsigned int mptcp_pm_get_add_addr_signal_max(struct mptcp_sock *msk);
unsigned int mptcp_pm_get_add_addr_accept_max(struct mptcp_sock *msk);
unsigned int mptcp_pm_get_subflows_max(struct mptcp_sock *msk);
static inline struct mptcp_ext *mptcp_get_ext(struct sk_buff *skb)
{
......
......@@ -64,11 +64,23 @@ static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk)
}
/* validate received token and create truncated hmac and nonce for SYN-ACK */
static struct mptcp_sock *subflow_token_join_request(struct request_sock *req,
const struct sk_buff *skb)
static void subflow_req_create_thmac(struct mptcp_subflow_request_sock *subflow_req)
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct mptcp_sock *msk = subflow_req->msk;
u8 hmac[SHA256_DIGEST_SIZE];
get_random_bytes(&subflow_req->local_nonce, sizeof(u32));
subflow_generate_hmac(msk->local_key, msk->remote_key,
subflow_req->local_nonce,
subflow_req->remote_nonce, hmac);
subflow_req->thmac = get_unaligned_be64(hmac);
}
static struct mptcp_sock *subflow_token_join_request(struct request_sock *req)
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct mptcp_sock *msk;
int local_id;
......@@ -85,13 +97,6 @@ static struct mptcp_sock *subflow_token_join_request(struct request_sock *req,
}
subflow_req->local_id = local_id;
get_random_bytes(&subflow_req->local_nonce, sizeof(u32));
subflow_generate_hmac(msk->local_key, msk->remote_key,
subflow_req->local_nonce,
subflow_req->remote_nonce, hmac);
subflow_req->thmac = get_unaligned_be64(hmac);
return msk;
}
......@@ -115,6 +120,11 @@ static int __subflow_init_req(struct request_sock *req, const struct sock *sk_li
return 0;
}
static bool subflow_use_different_sport(struct mptcp_sock *msk, const struct sock *sk)
{
return inet_sk(sk)->inet_sport != inet_sk((struct sock *)msk)->inet_sport;
}
/* Init mptcp request socket.
*
* Returns an error code if a JOIN has failed and a TCP reset
......@@ -181,12 +191,30 @@ static int subflow_init_req(struct request_sock *req,
subflow_req->remote_id = mp_opt.join_id;
subflow_req->token = mp_opt.token;
subflow_req->remote_nonce = mp_opt.nonce;
subflow_req->msk = subflow_token_join_request(req, skb);
subflow_req->msk = subflow_token_join_request(req);
/* Can't fall back to TCP in this case. */
if (!subflow_req->msk)
return -EPERM;
if (subflow_use_different_sport(subflow_req->msk, sk_listener)) {
pr_debug("syn inet_sport=%d %d",
ntohs(inet_sk(sk_listener)->inet_sport),
ntohs(inet_sk((struct sock *)subflow_req->msk)->inet_sport));
if (!mptcp_pm_sport_in_anno_list(subflow_req->msk, sk_listener)) {
sock_put((struct sock *)subflow_req->msk);
mptcp_token_destroy_request(req);
tcp_request_sock_ops.destructor(req);
subflow_req->msk = NULL;
subflow_req->mp_join = 0;
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTSYNRX);
return -EPERM;
}
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINPORTSYNRX);
}
subflow_req_create_thmac(subflow_req);
if (unlikely(req->syncookie)) {
if (mptcp_can_accept_new_subflow(subflow_req->msk))
subflow_init_req_cookie_join_save(subflow_req, skb);
......@@ -329,6 +357,11 @@ void mptcp_subflow_reset(struct sock *ssk)
sock_put(sk);
}
static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct sock *sk)
{
return inet_sk(sk)->inet_dport != inet_sk((struct sock *)msk)->inet_dport;
}
static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
......@@ -395,6 +428,13 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->mp_join = 1;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
if (subflow_use_different_dport(mptcp_sk(parent), sk)) {
pr_debug("synack inet_dport=%d %d",
ntohs(inet_sk(sk)->inet_dport),
ntohs(inet_sk(parent)->inet_dport));
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINPORTSYNACKRX);
}
} else if (mptcp_check_fallback(sk)) {
fallback:
mptcp_rcv_space_init(mptcp_sk(parent), sk);
......@@ -660,6 +700,17 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
tcp_rsk(req)->drop_req = true;
if (subflow_use_different_sport(owner, sk)) {
pr_debug("ack inet_sport=%d %d",
ntohs(inet_sk(sk)->inet_sport),
ntohs(inet_sk((struct sock *)owner)->inet_sport));
if (!mptcp_pm_sport_in_anno_list(owner, sk)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTACKRX);
goto out;
}
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINPORTACKRX);
}
}
}
......@@ -1084,9 +1135,9 @@ void mptcpv6_handle_mapped(struct sock *sk, bool mapped)
}
#endif
static void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
struct sockaddr_storage *addr,
unsigned short family)
void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
struct sockaddr_storage *addr,
unsigned short family)
{
memset(addr, 0, sizeof(*addr));
addr->ss_family = family;
......
......@@ -177,8 +177,8 @@ int add_addr(int fd, int pm_family, int argc, char *argv[])
1024];
struct rtattr *rta, *nest;
struct nlmsghdr *nh;
u_int32_t flags = 0;
u_int16_t family;
u_int32_t flags;
int nest_start;
u_int8_t id;
int off = 0;
......@@ -224,7 +224,6 @@ int add_addr(int fd, int pm_family, int argc, char *argv[])
char *tok, *str;
/* flags */
flags = 0;
if (++arg >= argc)
error(1, 0, " missing flags value");
......@@ -272,6 +271,20 @@ int add_addr(int fd, int pm_family, int argc, char *argv[])
rta->rta_len = RTA_LENGTH(4);
memcpy(RTA_DATA(rta), &ifindex, 4);
off += NLMSG_ALIGN(rta->rta_len);
} else if (!strcmp(argv[arg], "port")) {
u_int16_t port;
if (++arg >= argc)
error(1, 0, " missing port value");
if (!(flags & MPTCP_PM_ADDR_FLAG_SIGNAL))
error(1, 0, " flags must be signal when using port");
port = atoi(argv[arg]);
rta = (void *)(data + off);
rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT;
rta->rta_len = RTA_LENGTH(2);
memcpy(RTA_DATA(rta), &port, 2);
off += NLMSG_ALIGN(rta->rta_len);
} else
error(1, 0, "unknown keyword %s", argv[arg]);
}
......@@ -324,6 +337,7 @@ int del_addr(int fd, int pm_family, int argc, char *argv[])
static void print_addr(struct rtattr *attrs, int len)
{
uint16_t family = 0;
uint16_t port = 0;
char str[1024];
uint32_t flags;
uint8_t id;
......@@ -331,12 +345,16 @@ static void print_addr(struct rtattr *attrs, int len)
while (RTA_OK(attrs, len)) {
if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_FAMILY)
memcpy(&family, RTA_DATA(attrs), 2);
if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_PORT)
memcpy(&port, RTA_DATA(attrs), 2);
if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ADDR4) {
if (family != AF_INET)
error(1, errno, "wrong IP (v4) for family %d",
family);
inet_ntop(AF_INET, RTA_DATA(attrs), str, sizeof(str));
printf("%s", str);
if (port)
printf(" %d", port);
}
if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ADDR6) {
if (family != AF_INET6)
......@@ -344,6 +362,8 @@ static void print_addr(struct rtattr *attrs, int len)
family);
inet_ntop(AF_INET6, RTA_DATA(attrs), str, sizeof(str));
printf("%s", str);
if (port)
printf(" %d", port);
}
if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ID) {
memcpy(&id, RTA_DATA(attrs), 1);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment