Commit 1f62f58d authored by David S. Miller's avatar David S. Miller

Merge branch 'mptcp-cleanups-ephemeral-port-sockopts'

Matthieu Baerts says:

====================
mptcp: cleanup and support more ephemeral ports sockopts

Patch 1 is a cleanup one: mptcp_is_tcpsk() helper was modifying sock_ops
in some cases which is unexpected with that name.

Patch 2 to 4 add support for two socket options: IP_LOCAL_PORT_RANGE and
IP_BIND_ADDRESS_NO_PORT. The first one is a preparation patch, the
second one adds the support while the last one modifies an existing
selftest to validate the new features.
====================
Signed-off-by: default avatarMatthieu Baerts <matttbe@kernel.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 7961ef1f 122db5e3
...@@ -55,28 +55,14 @@ static u64 mptcp_wnd_end(const struct mptcp_sock *msk) ...@@ -55,28 +55,14 @@ static u64 mptcp_wnd_end(const struct mptcp_sock *msk)
return READ_ONCE(msk->wnd_end); return READ_ONCE(msk->wnd_end);
} }
static bool mptcp_is_tcpsk(struct sock *sk) static const struct proto_ops *mptcp_fallback_tcp_ops(const struct sock *sk)
{ {
struct socket *sock = sk->sk_socket;
if (unlikely(sk->sk_prot == &tcp_prot)) {
/* we are being invoked after mptcp_accept() has
* accepted a non-mp-capable flow: sk is a tcp_sk,
* not an mptcp one.
*
* Hand the socket over to tcp so all further socket ops
* bypass mptcp.
*/
WRITE_ONCE(sock->ops, &inet_stream_ops);
return true;
#if IS_ENABLED(CONFIG_MPTCP_IPV6) #if IS_ENABLED(CONFIG_MPTCP_IPV6)
} else if (unlikely(sk->sk_prot == &tcpv6_prot)) { if (sk->sk_prot == &tcpv6_prot)
WRITE_ONCE(sock->ops, &inet6_stream_ops); return &inet6_stream_ops;
return true;
#endif #endif
} WARN_ON_ONCE(sk->sk_prot != &tcp_prot);
return &inet_stream_ops;
return false;
} }
static int __mptcp_socket_create(struct mptcp_sock *msk) static int __mptcp_socket_create(struct mptcp_sock *msk)
...@@ -3258,44 +3244,6 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk) ...@@ -3258,44 +3244,6 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd); WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd);
} }
static struct sock *mptcp_accept(struct sock *ssk, int flags, int *err,
bool kern)
{
struct sock *newsk;
pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk));
newsk = inet_csk_accept(ssk, flags, err, kern);
if (!newsk)
return NULL;
pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk));
if (sk_is_mptcp(newsk)) {
struct mptcp_subflow_context *subflow;
struct sock *new_mptcp_sock;
subflow = mptcp_subflow_ctx(newsk);
new_mptcp_sock = subflow->conn;
/* is_mptcp should be false if subflow->conn is missing, see
* subflow_syn_recv_sock()
*/
if (WARN_ON_ONCE(!new_mptcp_sock)) {
tcp_sk(newsk)->is_mptcp = 0;
goto out;
}
newsk = new_mptcp_sock;
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
} else {
MPTCP_INC_STATS(sock_net(ssk),
MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
}
out:
newsk->sk_kern_sock = kern;
return newsk;
}
void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags) void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
{ {
struct mptcp_subflow_context *subflow, *tmp; struct mptcp_subflow_context *subflow, *tmp;
...@@ -3739,7 +3687,6 @@ static struct proto mptcp_prot = { ...@@ -3739,7 +3687,6 @@ static struct proto mptcp_prot = {
.connect = mptcp_connect, .connect = mptcp_connect,
.disconnect = mptcp_disconnect, .disconnect = mptcp_disconnect,
.close = mptcp_close, .close = mptcp_close,
.accept = mptcp_accept,
.setsockopt = mptcp_setsockopt, .setsockopt = mptcp_setsockopt,
.getsockopt = mptcp_getsockopt, .getsockopt = mptcp_getsockopt,
.shutdown = mptcp_shutdown, .shutdown = mptcp_shutdown,
...@@ -3849,18 +3796,36 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, ...@@ -3849,18 +3796,36 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
if (!ssk) if (!ssk)
return -EINVAL; return -EINVAL;
newsk = mptcp_accept(ssk, flags, &err, kern); pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk));
newsk = inet_csk_accept(ssk, flags, &err, kern);
if (!newsk) if (!newsk)
return err; return err;
lock_sock(newsk); pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk));
if (sk_is_mptcp(newsk)) {
struct mptcp_subflow_context *subflow;
struct sock *new_mptcp_sock;
subflow = mptcp_subflow_ctx(newsk);
new_mptcp_sock = subflow->conn;
/* is_mptcp should be false if subflow->conn is missing, see
* subflow_syn_recv_sock()
*/
if (WARN_ON_ONCE(!new_mptcp_sock)) {
tcp_sk(newsk)->is_mptcp = 0;
goto tcpfallback;
}
newsk = new_mptcp_sock;
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
newsk->sk_kern_sock = kern;
lock_sock(newsk);
__inet_accept(sock, newsock, newsk); __inet_accept(sock, newsock, newsk);
if (!mptcp_is_tcpsk(newsock->sk)) {
struct mptcp_sock *msk = mptcp_sk(newsk);
struct mptcp_subflow_context *subflow;
set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags); set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
msk = mptcp_sk(newsk);
msk->in_accept_queue = 0; msk->in_accept_queue = 0;
/* set ssk->sk_socket of accept()ed flows to mptcp socket. /* set ssk->sk_socket of accept()ed flows to mptcp socket.
...@@ -3882,6 +3847,21 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, ...@@ -3882,6 +3847,21 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
if (unlikely(list_is_singular(&msk->conn_list))) if (unlikely(list_is_singular(&msk->conn_list)))
inet_sk_state_store(newsk, TCP_CLOSE); inet_sk_state_store(newsk, TCP_CLOSE);
} }
} else {
MPTCP_INC_STATS(sock_net(ssk),
MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
tcpfallback:
newsk->sk_kern_sock = kern;
lock_sock(newsk);
__inet_accept(sock, newsock, newsk);
/* we are being invoked after accepting a non-mp-capable
* flow: sk is a tcp_sk, not an mptcp one.
*
* Hand the socket over to tcp so all further socket ops
* bypass mptcp.
*/
WRITE_ONCE(newsock->sk->sk_socket->ops,
mptcp_fallback_tcp_ops(newsock->sk));
} }
release_sock(newsk); release_sock(newsk);
......
...@@ -440,6 +440,8 @@ static bool mptcp_supported_sockopt(int level, int optname) ...@@ -440,6 +440,8 @@ static bool mptcp_supported_sockopt(int level, int optname)
/* should work fine */ /* should work fine */
case IP_FREEBIND: case IP_FREEBIND:
case IP_TRANSPARENT: case IP_TRANSPARENT:
case IP_BIND_ADDRESS_NO_PORT:
case IP_LOCAL_PORT_RANGE:
/* the following are control cmsg related */ /* the following are control cmsg related */
case IP_PKTINFO: case IP_PKTINFO:
...@@ -455,7 +457,6 @@ static bool mptcp_supported_sockopt(int level, int optname) ...@@ -455,7 +457,6 @@ static bool mptcp_supported_sockopt(int level, int optname)
/* common stuff that need some love */ /* common stuff that need some love */
case IP_TOS: case IP_TOS:
case IP_TTL: case IP_TTL:
case IP_BIND_ADDRESS_NO_PORT:
case IP_MTU_DISCOVER: case IP_MTU_DISCOVER:
case IP_RECVERR: case IP_RECVERR:
...@@ -683,7 +684,7 @@ static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t op ...@@ -683,7 +684,7 @@ static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t op
return 0; return 0;
} }
static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int optname, static int mptcp_setsockopt_sol_ip_set(struct mptcp_sock *msk, int optname,
sockptr_t optval, unsigned int optlen) sockptr_t optval, unsigned int optlen)
{ {
struct sock *sk = (struct sock *)msk; struct sock *sk = (struct sock *)msk;
...@@ -710,6 +711,14 @@ static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int o ...@@ -710,6 +711,14 @@ static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int o
inet_assign_bit(TRANSPARENT, ssk, inet_assign_bit(TRANSPARENT, ssk,
inet_test_bit(TRANSPARENT, sk)); inet_test_bit(TRANSPARENT, sk));
break; break;
case IP_BIND_ADDRESS_NO_PORT:
inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk,
inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
break;
case IP_LOCAL_PORT_RANGE:
WRITE_ONCE(inet_sk(ssk)->local_port_range,
READ_ONCE(inet_sk(sk)->local_port_range));
break;
default: default:
release_sock(sk); release_sock(sk);
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
...@@ -755,7 +764,9 @@ static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, ...@@ -755,7 +764,9 @@ static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
switch (optname) { switch (optname) {
case IP_FREEBIND: case IP_FREEBIND:
case IP_TRANSPARENT: case IP_TRANSPARENT:
return mptcp_setsockopt_sol_ip_set_transparent(msk, optname, optval, optlen); case IP_BIND_ADDRESS_NO_PORT:
case IP_LOCAL_PORT_RANGE:
return mptcp_setsockopt_sol_ip_set(msk, optname, optval, optlen);
case IP_TOS: case IP_TOS:
return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen); return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen);
} }
...@@ -1350,6 +1361,12 @@ static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname, ...@@ -1350,6 +1361,12 @@ static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname,
switch (optname) { switch (optname) {
case IP_TOS: case IP_TOS:
return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos)); return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos));
case IP_BIND_ADDRESS_NO_PORT:
return mptcp_put_int_option(msk, optval, optlen,
inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
case IP_LOCAL_PORT_RANGE:
return mptcp_put_int_option(msk, optval, optlen,
READ_ONCE(inet_sk(sk)->local_port_range));
} }
return -EOPNOTSUPP; return -EOPNOTSUPP;
...@@ -1450,6 +1467,8 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) ...@@ -1450,6 +1467,8 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk)); inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk));
inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk));
inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
WRITE_ONCE(inet_sk(ssk)->local_port_range, READ_ONCE(inet_sk(sk)->local_port_range));
} }
void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk) void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk)
......
...@@ -146,6 +146,12 @@ FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_stcp) { ...@@ -146,6 +146,12 @@ FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_stcp) {
.so_protocol = IPPROTO_SCTP, .so_protocol = IPPROTO_SCTP,
}; };
FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_mptcp) {
.so_domain = AF_INET,
.so_type = SOCK_STREAM,
.so_protocol = IPPROTO_MPTCP,
};
FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_tcp) { FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_tcp) {
.so_domain = AF_INET6, .so_domain = AF_INET6,
.so_type = SOCK_STREAM, .so_type = SOCK_STREAM,
...@@ -164,6 +170,12 @@ FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_stcp) { ...@@ -164,6 +170,12 @@ FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_stcp) {
.so_protocol = IPPROTO_SCTP, .so_protocol = IPPROTO_SCTP,
}; };
FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_mptcp) {
.so_domain = AF_INET6,
.so_type = SOCK_STREAM,
.so_protocol = IPPROTO_MPTCP,
};
TEST_F(ip_local_port_range, invalid_option_value) TEST_F(ip_local_port_range, invalid_option_value)
{ {
__u16 val16; __u16 val16;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment