Commit 0a2f6b32 authored by David S. Miller's avatar David S. Miller

Merge branch 'mptcp-genl-events'

Mat Martineau says:

====================
mptcp: Add genl events for connection info

This series from the MPTCP tree adds genl multicast events that are
important for implementing a userspace path manager. In MPTCP, a path
manager is responsible for adding or removing additional subflows on
each MPTCP connection. The in-kernel path manager (already part of the
kernel) is a better fit for many server use cases, but the additional
flexibility of userspace path managers is often useful for client
devices.

Patches 1, 2, 4, 5, and 6 do some refactoring to streamline the netlink
event implementation in the final patch.

Patch 3 improves the timeliness of subflow destruction to ensure the
'subflow closed' event will be sent soon enough.

Patch 7 allows use of the GENL_UNS_ADMIN_PERM flag on genl mcast groups
to mandate CAP_NET_ADMIN, which is important to protect token information
in the MPTCP events. This is a genetlink change.

Patch 8 adds the MPTCP netlink events.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 0a82c37e b911c97c
......@@ -14,6 +14,7 @@
*/
struct genl_multicast_group {
char name[GENL_NAMSIZ];
u8 flags;
};
struct genl_ops;
......
......@@ -36,6 +36,7 @@ enum {
/* netlink interface */
#define MPTCP_PM_NAME "mptcp_pm"
#define MPTCP_PM_CMD_GRP_NAME "mptcp_pm_cmds"
#define MPTCP_PM_EV_GRP_NAME "mptcp_pm_events"
#define MPTCP_PM_VER 0x1
/*
......@@ -104,4 +105,77 @@ struct mptcp_info {
__u64 mptcpi_rcv_nxt;
};
/*
* MPTCP_EVENT_CREATED: token, family, saddr4 | saddr6, daddr4 | daddr6,
* sport, dport
* A new MPTCP connection has been created. It is the good time to allocate
* memory and send ADD_ADDR if needed. Depending on the traffic-patterns
* it can take a long time until the MPTCP_EVENT_ESTABLISHED is sent.
*
* MPTCP_EVENT_ESTABLISHED: token, family, saddr4 | saddr6, daddr4 | daddr6,
* sport, dport
* A MPTCP connection is established (can start new subflows).
*
* MPTCP_EVENT_CLOSED: token
* A MPTCP connection has stopped.
*
* MPTCP_EVENT_ANNOUNCED: token, rem_id, family, daddr4 | daddr6 [, dport]
* A new address has been announced by the peer.
*
* MPTCP_EVENT_REMOVED: token, rem_id
* An address has been lost by the peer.
*
* MPTCP_EVENT_SUB_ESTABLISHED: token, family, saddr4 | saddr6,
* daddr4 | daddr6, sport, dport, backup,
* if_idx [, error]
* A new subflow has been established. 'error' should not be set.
*
* MPTCP_EVENT_SUB_CLOSED: token, family, saddr4 | saddr6, daddr4 | daddr6,
* sport, dport, backup, if_idx [, error]
* A subflow has been closed. An error (copy of sk_err) could be set if an
* error has been detected for this subflow.
*
* MPTCP_EVENT_SUB_PRIORITY: token, family, saddr4 | saddr6, daddr4 | daddr6,
* sport, dport, backup, if_idx [, error]
* The priority of a subflow has changed. 'error' should not be set.
*/
enum mptcp_event_type {
MPTCP_EVENT_UNSPEC = 0,
MPTCP_EVENT_CREATED = 1,
MPTCP_EVENT_ESTABLISHED = 2,
MPTCP_EVENT_CLOSED = 3,
MPTCP_EVENT_ANNOUNCED = 6,
MPTCP_EVENT_REMOVED = 7,
MPTCP_EVENT_SUB_ESTABLISHED = 10,
MPTCP_EVENT_SUB_CLOSED = 11,
MPTCP_EVENT_SUB_PRIORITY = 13,
};
enum mptcp_event_attr {
MPTCP_ATTR_UNSPEC = 0,
MPTCP_ATTR_TOKEN, /* u32 */
MPTCP_ATTR_FAMILY, /* u16 */
MPTCP_ATTR_LOC_ID, /* u8 */
MPTCP_ATTR_REM_ID, /* u8 */
MPTCP_ATTR_SADDR4, /* be32 */
MPTCP_ATTR_SADDR6, /* struct in6_addr */
MPTCP_ATTR_DADDR4, /* be32 */
MPTCP_ATTR_DADDR6, /* struct in6_addr */
MPTCP_ATTR_SPORT, /* be16 */
MPTCP_ATTR_DPORT, /* be16 */
MPTCP_ATTR_BACKUP, /* u8 */
MPTCP_ATTR_ERROR, /* u8 */
MPTCP_ATTR_FLAGS, /* u16 */
MPTCP_ATTR_TIMEOUT, /* u32 */
MPTCP_ATTR_IF_IDX, /* s32 */
__MPTCP_ATTR_AFTER_LAST
};
#define MPTCP_ATTR_MAX (__MPTCP_ATTR_AFTER_LAST - 1)
#endif /* _UAPI_MPTCP_H */
......@@ -867,7 +867,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
clear_3rdack_retransmission(ssk);
mptcp_pm_subflow_established(msk, subflow);
} else {
mptcp_pm_fully_established(msk);
mptcp_pm_fully_established(msk, ssk, GFP_ATOMIC);
}
return true;
......
......@@ -68,13 +68,14 @@ int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 local_id)
/* path manager event handlers */
void mptcp_pm_new_connection(struct mptcp_sock *msk, int server_side)
void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side)
{
struct mptcp_pm_data *pm = &msk->pm;
pr_debug("msk=%p, token=%u side=%d", msk, msk->token, server_side);
WRITE_ONCE(pm->server_side, server_side);
mptcp_event(MPTCP_EVENT_CREATED, msk, ssk, GFP_ATOMIC);
}
bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk)
......@@ -119,16 +120,13 @@ static bool mptcp_pm_schedule_work(struct mptcp_sock *msk,
return true;
}
void mptcp_pm_fully_established(struct mptcp_sock *msk)
void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp)
{
struct mptcp_pm_data *pm = &msk->pm;
bool announce = false;
pr_debug("msk=%p", msk);
/* try to avoid acquiring the lock below */
if (!READ_ONCE(pm->work_pending))
return;
spin_lock_bh(&pm->lock);
/* mptcp_pm_fully_established() can be invoked by multiple
......@@ -138,9 +136,15 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk)
if (READ_ONCE(pm->work_pending) &&
!(msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)))
mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED);
msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED);
if ((msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0)
announce = true;
msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED);
spin_unlock_bh(&pm->lock);
if (announce)
mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, gfp);
}
void mptcp_pm_connection_closed(struct mptcp_sock *msk)
......@@ -179,6 +183,8 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id,
READ_ONCE(pm->accept_addr));
mptcp_event_addr_announced(msk, addr);
spin_lock_bh(&pm->lock);
if (!READ_ONCE(pm->accept_addr)) {
......@@ -205,6 +211,8 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, u8 rm_id)
pr_debug("msk=%p remote_id=%d", msk, rm_id);
mptcp_event_addr_removed(msk, rm_id);
spin_lock_bh(&pm->lock);
mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED);
pm->rm_id = rm_id;
......@@ -217,6 +225,8 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup);
subflow->backup = bkup;
mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC);
}
/* path manager helpers */
......
This diff is collapsed.
......@@ -2114,8 +2114,8 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
* so we need to use tcp_close() after detaching them from the mptcp
* parent socket.
*/
void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow)
static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow)
{
list_del(&subflow->node);
......@@ -2147,40 +2147,17 @@ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
sock_put(ssk);
}
static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu)
void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow)
{
return 0;
if (sk->sk_state == TCP_ESTABLISHED)
mptcp_event(MPTCP_EVENT_SUB_CLOSED, mptcp_sk(sk), ssk, GFP_KERNEL);
__mptcp_close_ssk(sk, ssk, subflow);
}
static void pm_work(struct mptcp_sock *msk)
static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu)
{
struct mptcp_pm_data *pm = &msk->pm;
spin_lock_bh(&msk->pm.lock);
pr_debug("msk=%p status=%x", msk, pm->status);
if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) {
pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED);
mptcp_pm_nl_add_addr_received(msk);
}
if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) {
pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK);
mptcp_pm_nl_add_addr_send_ack(msk);
}
if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) {
pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED);
mptcp_pm_nl_rm_addr_received(msk);
}
if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) {
pm->status &= ~BIT(MPTCP_PM_ESTABLISHED);
mptcp_pm_nl_fully_established(msk);
}
if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) {
pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED);
mptcp_pm_nl_subflow_established(msk);
}
spin_unlock_bh(&msk->pm.lock);
return 0;
}
static void __mptcp_close_subflow(struct mptcp_sock *msk)
......@@ -2195,7 +2172,11 @@ static void __mptcp_close_subflow(struct mptcp_sock *msk)
if (inet_sk_state_load(ssk) != TCP_CLOSE)
continue;
__mptcp_close_ssk((struct sock *)msk, ssk, subflow);
/* 'subflow_data_ready' will re-sched once rx queue is empty */
if (!skb_queue_empty_lockless(&ssk->sk_receive_queue))
continue;
mptcp_close_ssk((struct sock *)msk, ssk, subflow);
}
}
......@@ -2267,11 +2248,8 @@ static void mptcp_worker(struct work_struct *work)
mptcp_check_fastclose(msk);
if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
__mptcp_close_subflow(msk);
if (msk->pm.status)
pm_work(msk);
mptcp_pm_nl_work(msk);
if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
mptcp_check_for_eof(msk);
......@@ -2291,6 +2269,9 @@ static void mptcp_worker(struct work_struct *work)
goto unlock;
}
if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
__mptcp_close_subflow(msk);
if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
goto unlock;
......@@ -2607,6 +2588,10 @@ static void mptcp_close(struct sock *sk, long timeout)
release_sock(sk);
if (do_cancel_work)
mptcp_cancel_work(sk);
if (mptcp_sk(sk)->token)
mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL);
sock_put(sk);
}
......@@ -3049,7 +3034,7 @@ void mptcp_finish_connect(struct sock *ssk)
WRITE_ONCE(msk->can_ack, 1);
WRITE_ONCE(msk->snd_una, msk->write_seq);
mptcp_pm_new_connection(msk, 0);
mptcp_pm_new_connection(msk, ssk, 0);
mptcp_rcv_space_init(msk, ssk);
}
......@@ -3078,7 +3063,7 @@ bool mptcp_finish_join(struct sock *ssk)
return false;
if (!msk->pm.server_side)
return true;
goto out;
if (!mptcp_pm_allow_new_subflow(msk))
return false;
......@@ -3105,6 +3090,8 @@ bool mptcp_finish_join(struct sock *ssk)
if (parent_sock && !ssk->sk_socket)
mptcp_sock_graft(ssk, parent_sock);
subflow->map_seq = READ_ONCE(msk->ack_seq);
out:
mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC);
return true;
}
......@@ -3281,9 +3268,8 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
struct mptcp_sock *msk = mptcp_sk(newsock->sk);
struct mptcp_subflow_context *subflow;
struct sock *newsk = newsock->sk;
bool slowpath;
slowpath = lock_sock_fast(newsk);
lock_sock(newsk);
/* PM/worker can now acquire the first subflow socket
* lock without racing with listener queue cleanup,
......@@ -3293,7 +3279,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
list_add(&subflow->node, &msk->conn_list);
sock_hold(msk->first);
if (mptcp_is_fully_established(newsk))
mptcp_pm_fully_established(msk);
mptcp_pm_fully_established(msk, msk->first, GFP_KERNEL);
mptcp_copy_inaddrs(newsk, msk->first);
mptcp_rcv_space_init(msk, msk->first);
......@@ -3309,7 +3295,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
if (!ssk->sk_socket)
mptcp_sock_graft(ssk, newsock);
}
unlock_sock_fast(newsk, slowpath);
release_sock(newsk);
}
if (inet_csk_listen_poll(ssock->sk))
......
......@@ -10,6 +10,7 @@
#include <linux/random.h>
#include <net/tcp.h>
#include <net/inet_connection_sock.h>
#include <uapi/linux/mptcp.h>
#define MPTCP_SUPPORTED_VERSION 1
......@@ -539,8 +540,8 @@ void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
bool mptcp_subflow_data_available(struct sock *sk);
void __init mptcp_subflow_init(void);
void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how);
void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow);
void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow);
void mptcp_subflow_reset(struct sock *ssk);
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
......@@ -639,8 +640,8 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac);
void __init mptcp_pm_init(void);
void mptcp_pm_data_init(struct mptcp_sock *msk);
void mptcp_pm_new_connection(struct mptcp_sock *msk, int server_side);
void mptcp_pm_fully_established(struct mptcp_sock *msk);
void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side);
void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp);
bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk);
void mptcp_pm_connection_closed(struct mptcp_sock *msk);
void mptcp_pm_subflow_established(struct mptcp_sock *msk,
......@@ -666,6 +667,11 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk,
int mptcp_pm_remove_addr(struct mptcp_sock *msk, u8 local_id);
int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 local_id);
void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
const struct sock *ssk, gfp_t gfp);
void mptcp_event_addr_announced(const struct mptcp_sock *msk, const struct mptcp_addr_info *info);
void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id);
static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk)
{
return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_SIGNAL);
......@@ -713,11 +719,7 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
void __init mptcp_pm_nl_init(void);
void mptcp_pm_nl_data_init(struct mptcp_sock *msk);
void mptcp_pm_nl_fully_established(struct mptcp_sock *msk);
void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk);
void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk);
void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk);
void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk);
void mptcp_pm_nl_work(struct mptcp_sock *msk);
void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id);
int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
unsigned int mptcp_pm_get_add_addr_signal_max(struct mptcp_sock *msk);
......
......@@ -675,7 +675,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
* created mptcp socket
*/
new_msk->sk_destruct = mptcp_sock_destruct;
mptcp_pm_new_connection(mptcp_sk(new_msk), 1);
mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1);
mptcp_token_accept(subflow_req, mptcp_sk(new_msk));
ctx->conn = new_msk;
new_msk = NULL;
......@@ -953,6 +953,22 @@ static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb,
subflow->map_valid = 0;
}
/* sched mptcp worker to remove the subflow if no more data is pending */
static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk)
{
struct sock *sk = (struct sock *)msk;
if (likely(ssk->sk_state != TCP_CLOSE))
return;
if (skb_queue_empty(&ssk->sk_receive_queue) &&
!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) {
sock_hold(sk);
if (!schedule_work(&msk->work))
sock_put(sk);
}
}
static bool subflow_check_data_avail(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
......@@ -991,11 +1007,11 @@ static bool subflow_check_data_avail(struct sock *ssk)
}
if (status != MAPPING_OK)
return false;
goto no_data;
skb = skb_peek(&ssk->sk_receive_queue);
if (WARN_ON_ONCE(!skb))
return false;
goto no_data;
/* if msk lacks the remote key, this subflow must provide an
* MP_CAPABLE-based mapping
......@@ -1029,6 +1045,9 @@ static bool subflow_check_data_avail(struct sock *ssk)
}
return true;
no_data:
subflow_sched_work_if_closed(msk, ssk);
return false;
fatal:
/* fatal protocol error, close the socket */
/* This barrier is coupled with smp_rmb() in tcp_poll() */
......@@ -1413,6 +1432,8 @@ static void subflow_state_change(struct sock *sk)
if (mptcp_subflow_data_available(sk))
mptcp_data_ready(parent, sk);
subflow_sched_work_if_closed(mptcp_sk(parent), sk);
if (__mptcp_check_fallback(mptcp_sk(parent)) &&
!subflow->rx_eof && subflow_is_done(sk)) {
subflow->rx_eof = 1;
......
......@@ -1360,11 +1360,43 @@ static struct genl_family genl_ctrl __ro_after_init = {
.netnsok = true,
};
static int genl_bind(struct net *net, int group)
{
const struct genl_family *family;
unsigned int id;
int ret = 0;
genl_lock_all();
idr_for_each_entry(&genl_fam_idr, family, id) {
const struct genl_multicast_group *grp;
int i;
if (family->n_mcgrps == 0)
continue;
i = group - family->mcgrp_offset;
if (i < 0 || i >= family->n_mcgrps)
continue;
grp = &family->mcgrps[i];
if ((grp->flags & GENL_UNS_ADMIN_PERM) &&
!ns_capable(net->user_ns, CAP_NET_ADMIN))
ret = -EPERM;
break;
}
genl_unlock_all();
return ret;
}
static int __net_init genl_pernet_init(struct net *net)
{
struct netlink_kernel_cfg cfg = {
.input = genl_rcv,
.flags = NL_CFG_F_NONROOT_RECV,
.bind = genl_bind,
};
/* we'll bump the group number right afterwards */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment