Commit 65550f03 authored by David S. Miller's avatar David S. Miller

Merge branch 'mptcp-subflow-disconnected'

Mat Martineau says:

====================
MPTCP: Allow initial subflow to be disconnected

An MPTCP connection is aggregated from multiple TCP subflows, and can
involve multiple IP addresses on either peer. The addresses used in the
initial subflow connection are assigned address id 0 on each side of the
link. More addresses can be added and shared with the peer using address
IDs of 1 or larger. MPTCP in Linux shares non-zero address IDs across
all MPTCP connections in a net namespace, which allows userspace to
manage subflow connections across a number of sockets. However, this
makes the address with id 0 a special case, since the IP address
associated with id 0 is potentially different for each socket.

This patch set allows the initial subflow to be disconnected when
userspace specifies an address to remove using both id 0 and an IP
address, or when the peer sends an RM_ADDR for id 0.

Patches 1 and 3 implement the change for requests from the peer and
userspace, respectively.

Patch 2 consolidates some code for disconnecting subflows.

Patches 4-6 update the self tests to cover removal of subflows using
address id 0.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents b8128656 5e287fe7
......@@ -586,47 +586,68 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
return -EINVAL;
}
static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
const struct mptcp_rm_list *rm_list,
enum linux_mptcp_mib_field rm_type)
{
struct mptcp_subflow_context *subflow, *tmp;
struct sock *sk = (struct sock *)msk;
u8 i;
pr_debug("address rm_list_nr %d", msk->pm.rm_list_rx.nr);
pr_debug("%s rm_list_nr %d",
rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", rm_list->nr);
msk_owned_by_me(msk);
if (!msk->pm.rm_list_rx.nr)
if (!rm_list->nr)
return;
if (list_empty(&msk->conn_list))
return;
for (i = 0; i < msk->pm.rm_list_rx.nr; i++) {
for (i = 0; i < rm_list->nr; i++) {
list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
u8 id = subflow->local_id;
if (msk->pm.rm_list_rx.ids[i] != subflow->remote_id)
if (rm_type == MPTCP_MIB_RMADDR)
id = subflow->remote_id;
if (rm_list->ids[i] != id)
continue;
pr_debug(" -> address rm_list_ids[%d]=%u", i, msk->pm.rm_list_rx.ids[i]);
pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u",
rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow",
i, rm_list->ids[i], subflow->local_id, subflow->remote_id);
spin_unlock_bh(&msk->pm.lock);
mptcp_subflow_shutdown(sk, ssk, how);
mptcp_close_ssk(sk, ssk, subflow);
spin_lock_bh(&msk->pm.lock);
msk->pm.add_addr_accepted--;
if (rm_type == MPTCP_MIB_RMADDR) {
msk->pm.add_addr_accepted--;
WRITE_ONCE(msk->pm.accept_addr, true);
} else if (rm_type == MPTCP_MIB_RMSUBFLOW) {
msk->pm.local_addr_used--;
}
msk->pm.subflows--;
WRITE_ONCE(msk->pm.accept_addr, true);
__MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMADDR);
break;
__MPTCP_INC_STATS(sock_net(sk), rm_type);
}
}
}
static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
{
mptcp_pm_nl_rm_addr_or_subflow(msk, &msk->pm.rm_list_rx, MPTCP_MIB_RMADDR);
}
void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
const struct mptcp_rm_list *rm_list)
{
mptcp_pm_nl_rm_addr_or_subflow(msk, rm_list, MPTCP_MIB_RMSUBFLOW);
}
void mptcp_pm_nl_work(struct mptcp_sock *msk)
{
struct mptcp_pm_data *pm = &msk->pm;
......@@ -660,47 +681,6 @@ void mptcp_pm_nl_work(struct mptcp_sock *msk)
spin_unlock_bh(&msk->pm.lock);
}
void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
const struct mptcp_rm_list *rm_list)
{
struct mptcp_subflow_context *subflow, *tmp;
struct sock *sk = (struct sock *)msk;
u8 i;
pr_debug("subflow rm_list_nr %d", rm_list->nr);
msk_owned_by_me(msk);
if (!rm_list->nr)
return;
if (list_empty(&msk->conn_list))
return;
for (i = 0; i < rm_list->nr; i++) {
list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
if (rm_list->ids[i] != subflow->local_id)
continue;
pr_debug(" -> subflow rm_list_ids[%d]=%u", i, rm_list->ids[i]);
spin_unlock_bh(&msk->pm.lock);
mptcp_subflow_shutdown(sk, ssk, how);
mptcp_close_ssk(sk, ssk, subflow);
spin_lock_bh(&msk->pm.lock);
msk->pm.local_addr_used--;
msk->pm.subflows--;
__MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMSUBFLOW);
break;
}
}
}
static bool address_use_port(struct mptcp_pm_addr_entry *entry)
{
return (entry->addr.flags &
......@@ -1176,6 +1156,41 @@ static void mptcp_pm_free_addr_entry(struct mptcp_pm_addr_entry *entry)
}
}
static int mptcp_nl_remove_id_zero_address(struct net *net,
struct mptcp_addr_info *addr)
{
struct mptcp_rm_list list = { .nr = 0 };
long s_slot = 0, s_num = 0;
struct mptcp_sock *msk;
list.ids[list.nr++] = 0;
while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
struct sock *sk = (struct sock *)msk;
struct mptcp_addr_info msk_local;
if (list_empty(&msk->conn_list))
goto next;
local_address((struct sock_common *)msk, &msk_local);
if (!addresses_equal(&msk_local, addr, addr->port))
goto next;
lock_sock(sk);
spin_lock_bh(&msk->pm.lock);
mptcp_pm_remove_addr(msk, &list);
mptcp_pm_nl_rm_subflow_received(msk, &list);
spin_unlock_bh(&msk->pm.lock);
release_sock(sk);
next:
sock_put(sk);
cond_resched();
}
return 0;
}
static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
......@@ -1188,6 +1203,14 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
return ret;
/* the zero id address is special: the first address used by the msk
* always gets such an id, so different subflows can have different zero
* id addresses. Additionally zero id is not accounted for in id_bitmap.
* Let's use an 'mptcp_rm_list' instead of the common remove code.
*/
if (addr.addr.id == 0)
return mptcp_nl_remove_id_zero_address(sock_net(skb->sk), &addr.addr);
spin_lock_bh(&pernet->lock);
entry = __lookup_addr_by_id(pernet, addr.addr.id);
if (!entry) {
......
......@@ -294,9 +294,12 @@ do_transfer()
let id+=1
done
fi
else
elif [ $rm_nr_ns1 -eq 8 ]; then
sleep 1
ip netns exec ${listener_ns} ./pm_nl_ctl flush
elif [ $rm_nr_ns1 -eq 9 ]; then
sleep 1
ip netns exec ${listener_ns} ./pm_nl_ctl del 0 ${connect_addr}
fi
fi
......@@ -333,9 +336,18 @@ do_transfer()
let id+=1
done
fi
else
elif [ $rm_nr_ns2 -eq 8 ]; then
sleep 1
ip netns exec ${connector_ns} ./pm_nl_ctl flush
elif [ $rm_nr_ns2 -eq 9 ]; then
local addr
if is_v6 "${connect_addr}"; then
addr="dead:beef:1::2"
else
addr="10.0.1.2"
fi
sleep 1
ip netns exec ${connector_ns} ./pm_nl_ctl del 0 $addr
fi
fi
......@@ -988,6 +1000,25 @@ remove_tests()
chk_join_nr "flush invalid addresses" 1 1 1
chk_add_nr 3 3
chk_rm_nr 3 1 invert
# remove id 0 subflow
reset
ip netns exec $ns1 ./pm_nl_ctl limits 0 1
ip netns exec $ns2 ./pm_nl_ctl limits 0 1
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1 0 0 -9 slow
chk_join_nr "remove id 0 subflow" 1 1 1
chk_rm_nr 1 1
# remove id 0 address
reset
ip netns exec $ns1 ./pm_nl_ctl limits 0 1
ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
ip netns exec $ns2 ./pm_nl_ctl limits 1 1
run_tests $ns1 $ns2 10.0.1.1 0 -9 0 slow
chk_join_nr "remove id 0 address" 1 1 1
chk_add_nr 1 1
chk_rm_nr 1 1 invert
}
add_tests()
......
......@@ -100,12 +100,12 @@ done
check "ip netns exec $ns1 ./pm_nl_ctl get 9" "id 9 flags signal 10.0.1.9" "hard addr limit"
check "ip netns exec $ns1 ./pm_nl_ctl get 10" "" "above hard addr limit"
for i in `seq 9 256`; do
ip netns exec $ns1 ./pm_nl_ctl del 9
for i in `seq 10 255`; do
ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 id $i
ip netns exec $ns1 ./pm_nl_ctl del $i
ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 id $((i+1))
done
check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1
id 2 flags 10.0.0.9
id 3 flags signal,backup 10.0.1.3
id 4 flags signal 10.0.1.4
id 5 flags signal 10.0.1.5
......
......@@ -26,7 +26,7 @@ static void syntax(char *argv[])
{
fprintf(stderr, "%s add|get|set|del|flush|dump|accept [<args>]\n", argv[0]);
fprintf(stderr, "\tadd [flags signal|subflow|backup] [id <nr>] [dev <name>] <ip>\n");
fprintf(stderr, "\tdel <id>\n");
fprintf(stderr, "\tdel <id> [<ip>]\n");
fprintf(stderr, "\tget <id>\n");
fprintf(stderr, "\tset <ip> [flags backup|nobackup]\n");
fprintf(stderr, "\tflush\n");
......@@ -301,6 +301,7 @@ int del_addr(int fd, int pm_family, int argc, char *argv[])
1024];
struct rtattr *rta, *nest;
struct nlmsghdr *nh;
u_int16_t family;
int nest_start;
u_int8_t id;
int off = 0;
......@@ -310,11 +311,14 @@ int del_addr(int fd, int pm_family, int argc, char *argv[])
off = init_genl_req(data, pm_family, MPTCP_PM_CMD_DEL_ADDR,
MPTCP_PM_VER);
/* the only argument is the address id */
if (argc != 3)
/* the only argument is the address id (nonzero) */
if (argc != 3 && argc != 4)
syntax(argv);
id = atoi(argv[2]);
/* zero id with the IP address */
if (!id && argc != 4)
syntax(argv);
nest_start = off;
nest = (void *)(data + off);
......@@ -328,6 +332,30 @@ int del_addr(int fd, int pm_family, int argc, char *argv[])
rta->rta_len = RTA_LENGTH(1);
memcpy(RTA_DATA(rta), &id, 1);
off += NLMSG_ALIGN(rta->rta_len);
if (!id) {
/* addr data */
rta = (void *)(data + off);
if (inet_pton(AF_INET, argv[3], RTA_DATA(rta))) {
family = AF_INET;
rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
rta->rta_len = RTA_LENGTH(4);
} else if (inet_pton(AF_INET6, argv[3], RTA_DATA(rta))) {
family = AF_INET6;
rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
rta->rta_len = RTA_LENGTH(16);
} else {
error(1, errno, "can't parse ip %s", argv[3]);
}
off += NLMSG_ALIGN(rta->rta_len);
/* family */
rta = (void *)(data + off);
rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
rta->rta_len = RTA_LENGTH(2);
memcpy(RTA_DATA(rta), &family, 2);
off += NLMSG_ALIGN(rta->rta_len);
}
nest->rta_len = off - nest_start;
do_nl_req(fd, nh, off, 0);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment