Commit 28f610d0 authored by David S. Miller's avatar David S. Miller

Merge branch 'mptcp-subflow-init'

Matthieu Baerts says:

====================
mptcp: refactor first subflow init

This series refactors the initialisation of the first subflow of a
listen socket. The first subflow allocation is no longer done at the
initialisation of the socket but later, when the connection request is
received or when requested by the userspace.

This is needed not just because Paolo likes to refactor things but
because this simplifies the code and makes the behaviour more consistent
with the rest. Also, this is a prerequisite for future patches adding
proper support of SELinux/LSM labels with MPTCP and accept(2).

In [1], Ondrej Mosnacek explained they discovered the (userspace-facing)
sockets returned by accept(2) when using MPTCP always end up with the
label representing the kernel (typically system_u:system_r:kernel_t:s0),
while it would make more sense to inherit the context from the parent
socket (the one that is passed to accept(2)).

Before being able to properly support that on SELinux/LSM side, patches
2-3/5 prepare the code to simplify the patch 4/5 moving the allocation.

Patch 1/5 is a small clean-up seen while working on the series and patch
5/5 is a small improvement when closing unaccepted sockets.

[1] https://lore.kernel.org/netdev/CAFqZXNs2LF-OoQBUiiSEyranJUXkPLcCfBkMkwFeM6qEwMKCTw@mail.gmail.com/
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 0475135f 8d547809
......@@ -1001,7 +1001,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
clear_3rdack_retransmission(ssk);
mptcp_pm_subflow_established(msk);
} else {
mptcp_pm_fully_established(msk, ssk, GFP_ATOMIC);
mptcp_pm_fully_established(msk, ssk);
}
return true;
......
......@@ -126,7 +126,7 @@ static bool mptcp_pm_schedule_work(struct mptcp_sock *msk,
return true;
}
void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp)
void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk)
{
struct mptcp_pm_data *pm = &msk->pm;
bool announce = false;
......@@ -150,7 +150,7 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk,
spin_unlock_bh(&pm->lock);
if (announce)
mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, gfp);
mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, GFP_ATOMIC);
}
void mptcp_pm_connection_closed(struct mptcp_sock *msk)
......
......@@ -1035,8 +1035,8 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
lock_sock(newsk);
ssock = __mptcp_nmpc_socket(mptcp_sk(newsk));
release_sock(newsk);
if (!ssock)
return -EINVAL;
if (IS_ERR(ssock))
return PTR_ERR(ssock);
mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family);
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
......
......@@ -49,18 +49,6 @@ static void __mptcp_check_send_data_fin(struct sock *sk);
DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
static struct net_device mptcp_napi_dev;
/* If msk has an initial subflow socket, and the MP_CAPABLE handshake has not
* completed yet or has failed, return the subflow socket.
* Otherwise return NULL.
*/
struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk)
{
if (!msk->subflow || READ_ONCE(msk->can_ack))
return NULL;
return msk->subflow;
}
/* Returns end sequence number of the receiver's advertised window */
static u64 mptcp_wnd_end(const struct mptcp_sock *msk)
{
......@@ -116,6 +104,31 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
return 0;
}
/* If the MPC handshake is not started, returns the first subflow,
* eventually allocating it.
*/
struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
int ret;
if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
return ERR_PTR(-EINVAL);
if (!msk->subflow) {
if (msk->first)
return ERR_PTR(-EINVAL);
ret = __mptcp_socket_create(msk);
if (ret)
return ERR_PTR(ret);
mptcp_sockopt_sync(msk, msk->first);
}
return msk->subflow;
}
static void mptcp_drop(struct sock *sk, struct sk_buff *skb)
{
sk_drops_add(sk, skb);
......@@ -1662,13 +1675,31 @@ static void mptcp_set_nospace(struct sock *sk)
static int mptcp_disconnect(struct sock *sk, int flags);
static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg,
static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
size_t len, int *copied_syn)
{
unsigned int saved_flags = msg->msg_flags;
struct mptcp_sock *msk = mptcp_sk(sk);
struct socket *ssock;
struct sock *ssk;
int ret;
/* on flags based fastopen the mptcp is supposed to create the
* first subflow right now. Otherwise we are in the defer_connect
* path, and the first subflow must be already present.
* Since the defer_connect flag is cleared after the first succsful
* fastopen attempt, no need to check for additional subflow status.
*/
if (msg->msg_flags & MSG_FASTOPEN) {
ssock = __mptcp_nmpc_socket(msk);
if (IS_ERR(ssock))
return PTR_ERR(ssock);
}
if (!msk->first)
return -EINVAL;
ssk = msk->first;
lock_sock(ssk);
msg->msg_flags |= MSG_DONTWAIT;
msk->connect_flags = O_NONBLOCK;
......@@ -1691,6 +1722,7 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msgh
} else if (ret && ret != -EINPROGRESS) {
mptcp_disconnect(sk, 0);
}
inet_sk(sk)->defer_connect = 0;
return ret;
}
......@@ -1699,7 +1731,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct page_frag *pfrag;
struct socket *ssock;
size_t copied = 0;
int ret = 0;
long timeo;
......@@ -1709,12 +1740,10 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
if (unlikely(ssock && (inet_sk(ssock->sk)->defer_connect ||
msg->msg_flags & MSG_FASTOPEN))) {
if (unlikely(inet_sk(sk)->defer_connect || msg->msg_flags & MSG_FASTOPEN)) {
int copied_syn = 0;
ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn);
ret = mptcp_sendmsg_fastopen(sk, msg, len, &copied_syn);
copied += copied_syn;
if (ret == -EINPROGRESS && copied_syn > 0)
goto out;
......@@ -2728,10 +2757,6 @@ static int mptcp_init_sock(struct sock *sk)
if (unlikely(!net->mib.mptcp_statistics) && !mptcp_mib_alloc(net))
return -ENOMEM;
ret = __mptcp_socket_create(mptcp_sk(sk));
if (ret)
return ret;
set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);
/* fetch the ca name; do it outside __mptcp_init_sock(), so that clone will
......@@ -2928,10 +2953,13 @@ bool __mptcp_close(struct sock *sk, long timeout)
goto cleanup;
}
if (mptcp_check_readable(msk)) {
/* the msk has read data, do the MPTCP equivalent of TCP reset */
if (mptcp_check_readable(msk) || timeout < 0) {
/* If the msk has read data, or the caller explicitly ask it,
* do the MPTCP equivalent of TCP reset, aka MPTCP fastclose
*/
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_do_fastclose(sk);
timeout = 0;
} else if (mptcp_close_state(sk)) {
__mptcp_wr_shutdown(sk);
}
......@@ -3143,7 +3171,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
struct socket *listener;
struct sock *newsk;
listener = __mptcp_nmpc_socket(msk);
listener = msk->subflow;
if (WARN_ON_ONCE(!listener)) {
*err = -EINVAL;
return NULL;
......@@ -3363,7 +3391,7 @@ static int mptcp_get_port(struct sock *sk, unsigned short snum)
struct mptcp_sock *msk = mptcp_sk(sk);
struct socket *ssock;
ssock = __mptcp_nmpc_socket(msk);
ssock = msk->subflow;
pr_debug("msk=%p, subflow=%p", msk, ssock);
if (WARN_ON_ONCE(!ssock))
return -EINVAL;
......@@ -3551,8 +3579,8 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
int err = -EINVAL;
ssock = __mptcp_nmpc_socket(msk);
if (!ssock)
return -EINVAL;
if (IS_ERR(ssock))
return PTR_ERR(ssock);
mptcp_token_destroy(msk);
inet_sk_state_store(sk, TCP_SYN_SENT);
......@@ -3640,8 +3668,8 @@ static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
lock_sock(sock->sk);
ssock = __mptcp_nmpc_socket(msk);
if (!ssock) {
err = -EINVAL;
if (IS_ERR(ssock)) {
err = PTR_ERR(ssock);
goto unlock;
}
......@@ -3677,8 +3705,8 @@ static int mptcp_listen(struct socket *sock, int backlog)
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
if (!ssock) {
err = -EINVAL;
if (IS_ERR(ssock)) {
err = PTR_ERR(ssock);
goto unlock;
}
......@@ -3709,7 +3737,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
pr_debug("msk=%p", msk);
ssock = __mptcp_nmpc_socket(msk);
/* buggy applications can call accept on socket states other then LISTEN
* but no need to allocate the first subflow just to error out.
*/
ssock = msk->subflow;
if (!ssock)
return -EINVAL;
......
......@@ -627,7 +627,7 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
void __mptcp_subflow_send_ack(struct sock *ssk);
void mptcp_subflow_reset(struct sock *ssk);
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk);
bool __mptcp_close(struct sock *sk, long timeout);
void mptcp_cancel_work(struct sock *sk);
void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk);
......@@ -782,7 +782,7 @@ bool mptcp_pm_addr_families_match(const struct sock *sk,
void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side);
void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp);
void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk);
bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk);
void mptcp_pm_connection_closed(struct mptcp_sock *msk);
void mptcp_pm_subflow_established(struct mptcp_sock *msk);
......
......@@ -301,9 +301,9 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
case SO_BINDTOIFINDEX:
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
if (!ssock) {
if (IS_ERR(ssock)) {
release_sock(sk);
return -EINVAL;
return PTR_ERR(ssock);
}
ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen);
......@@ -396,9 +396,9 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
case IPV6_FREEBIND:
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
if (!ssock) {
if (IS_ERR(ssock)) {
release_sock(sk);
return -EINVAL;
return PTR_ERR(ssock);
}
ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen);
......@@ -693,9 +693,9 @@ static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int o
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
if (!ssock) {
if (IS_ERR(ssock)) {
release_sock(sk);
return -EINVAL;
return PTR_ERR(ssock);
}
issk = inet_sk(ssock->sk);
......@@ -762,13 +762,15 @@ static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
{
struct sock *sk = (struct sock *)msk;
struct socket *sock;
int ret = -EINVAL;
int ret;
/* Limit to first subflow, before the connection establishment */
lock_sock(sk);
sock = __mptcp_nmpc_socket(msk);
if (!sock)
if (IS_ERR(sock)) {
ret = PTR_ERR(sock);
goto unlock;
}
ret = tcp_setsockopt(sock->sk, level, optname, optval, optlen);
......@@ -861,7 +863,7 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
{
struct sock *sk = (struct sock *)msk;
struct socket *ssock;
int ret = -EINVAL;
int ret;
struct sock *ssk;
lock_sock(sk);
......@@ -872,8 +874,10 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
}
ssock = __mptcp_nmpc_socket(msk);
if (!ssock)
if (IS_ERR(ssock)) {
ret = PTR_ERR(ssock);
goto out;
}
ret = tcp_getsockopt(ssock->sk, level, optname, optval, optlen);
......
......@@ -850,7 +850,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
*/
if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) {
mptcp_subflow_fully_established(ctx, &mp_opt);
mptcp_pm_fully_established(owner, child, GFP_ATOMIC);
mptcp_pm_fully_established(owner, child);
ctx->pm_notified = 1;
}
} else if (ctx->mp_join) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment