Commit 11052589 authored by Kuniyuki Iwashima's avatar Kuniyuki Iwashima Committed by Jakub Kicinski

tcp/udp: Make early_demux back namespacified.

Commit e21145a9 ("ipv4: namespacify ip_early_demux sysctl knob") made
it possible to enable/disable early_demux on a per-netns basis.  Then, we
introduced two knobs, tcp_early_demux and udp_early_demux, to switch it for
TCP/UDP in commit dddb64bc ("net: Add sysctl to toggle early demux for
tcp and udp").  However, the .proc_handler() was wrong and actually
disabled us from changing the behaviour in each netns.

We can execute early_demux if net.ipv4.ip_early_demux is on and each proto
.early_demux() handler is not NULL.  When we toggle (tcp|udp)_early_demux,
the change itself is saved in each netns variable, but the .early_demux()
handler is a global variable, so the handler is switched based on the
init_net's sysctl variable.  Thus, netns (tcp|udp)_early_demux knobs have
nothing to do with the logic.  Whether we CAN execute proto .early_demux()
is always decided by init_net's sysctl knob, and whether we DO it or not is
by each netns ip_early_demux knob.

This patch namespacifies (tcp|udp)_early_demux again.  For now, the users
of the .early_demux() handler are TCP and UDP only, and they are called
directly to avoid retpoline.  So, we can remove the .early_demux() handler
from inet6?_protos and need not dereference them in ip6?_rcv_finish_core().
If another proto needs .early_demux(), we can restore it at that time.

Fixes: dddb64bc ("net: Add sysctl to toggle early demux for tcp and udp")
Signed-off-by: default avatarKuniyuki Iwashima <kuniyu@amazon.com>
Link: https://lore.kernel.org/r/20220713175207.7727-1-kuniyu@amazon.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent df254d45
...@@ -35,8 +35,6 @@ ...@@ -35,8 +35,6 @@
/* This is used to register protocols. */ /* This is used to register protocols. */
struct net_protocol { struct net_protocol {
int (*early_demux)(struct sk_buff *skb);
int (*early_demux_handler)(struct sk_buff *skb);
int (*handler)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb);
/* This returns an error if we weren't able to handle the error. */ /* This returns an error if we weren't able to handle the error. */
...@@ -52,8 +50,6 @@ struct net_protocol { ...@@ -52,8 +50,6 @@ struct net_protocol {
#if IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_IPV6)
struct inet6_protocol { struct inet6_protocol {
void (*early_demux)(struct sk_buff *skb);
void (*early_demux_handler)(struct sk_buff *skb);
int (*handler)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb);
/* This returns an error if we weren't able to handle the error. */ /* This returns an error if we weren't able to handle the error. */
......
...@@ -932,7 +932,7 @@ extern const struct inet_connection_sock_af_ops ipv6_specific; ...@@ -932,7 +932,7 @@ extern const struct inet_connection_sock_af_ops ipv6_specific;
INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)); INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb));
INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb)); INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb));
INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *skb)); void tcp_v6_early_demux(struct sk_buff *skb);
#endif #endif
......
...@@ -167,7 +167,7 @@ static inline void udp_csum_pull_header(struct sk_buff *skb) ...@@ -167,7 +167,7 @@ static inline void udp_csum_pull_header(struct sk_buff *skb)
typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport, typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport,
__be16 dport); __be16 dport);
INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *)); void udp_v6_early_demux(struct sk_buff *skb);
INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *)); INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *));
struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
......
...@@ -1710,24 +1710,14 @@ static const struct net_protocol igmp_protocol = { ...@@ -1710,24 +1710,14 @@ static const struct net_protocol igmp_protocol = {
}; };
#endif #endif
/* thinking of making this const? Don't. static const struct net_protocol tcp_protocol = {
* early_demux can change based on sysctl.
*/
static struct net_protocol tcp_protocol = {
.early_demux = tcp_v4_early_demux,
.early_demux_handler = tcp_v4_early_demux,
.handler = tcp_v4_rcv, .handler = tcp_v4_rcv,
.err_handler = tcp_v4_err, .err_handler = tcp_v4_err,
.no_policy = 1, .no_policy = 1,
.icmp_strict_tag_validation = 1, .icmp_strict_tag_validation = 1,
}; };
/* thinking of making this const? Don't. static const struct net_protocol udp_protocol = {
* early_demux can change based on sysctl.
*/
static struct net_protocol udp_protocol = {
.early_demux = udp_v4_early_demux,
.early_demux_handler = udp_v4_early_demux,
.handler = udp_rcv, .handler = udp_rcv,
.err_handler = udp_err, .err_handler = udp_err,
.no_policy = 1, .no_policy = 1,
......
...@@ -312,14 +312,13 @@ static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph, ...@@ -312,14 +312,13 @@ static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph,
ip_hdr(hint)->tos == iph->tos; ip_hdr(hint)->tos == iph->tos;
} }
INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *)); int tcp_v4_early_demux(struct sk_buff *skb);
INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *)); int udp_v4_early_demux(struct sk_buff *skb);
static int ip_rcv_finish_core(struct net *net, struct sock *sk, static int ip_rcv_finish_core(struct net *net, struct sock *sk,
struct sk_buff *skb, struct net_device *dev, struct sk_buff *skb, struct net_device *dev,
const struct sk_buff *hint) const struct sk_buff *hint)
{ {
const struct iphdr *iph = ip_hdr(skb); const struct iphdr *iph = ip_hdr(skb);
int (*edemux)(struct sk_buff *skb);
int err, drop_reason; int err, drop_reason;
struct rtable *rt; struct rtable *rt;
...@@ -332,21 +331,29 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, ...@@ -332,21 +331,29 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
goto drop_error; goto drop_error;
} }
if (net->ipv4.sysctl_ip_early_demux && if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
!skb_dst(skb) && !skb_dst(skb) &&
!skb->sk && !skb->sk &&
!ip_is_fragment(iph)) { !ip_is_fragment(iph)) {
const struct net_protocol *ipprot; switch (iph->protocol) {
int protocol = iph->protocol; case IPPROTO_TCP:
if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) {
ipprot = rcu_dereference(inet_protos[protocol]); tcp_v4_early_demux(skb);
if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux, /* must reload iph, skb->head might have changed */
udp_v4_early_demux, skb); iph = ip_hdr(skb);
if (unlikely(err)) }
goto drop_error; break;
/* must reload iph, skb->head might have changed */ case IPPROTO_UDP:
iph = ip_hdr(skb); if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) {
err = udp_v4_early_demux(skb);
if (unlikely(err))
goto drop_error;
/* must reload iph, skb->head might have changed */
iph = ip_hdr(skb);
}
break;
} }
} }
......
...@@ -350,61 +350,6 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write, ...@@ -350,61 +350,6 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
return ret; return ret;
} }
static void proc_configure_early_demux(int enabled, int protocol)
{
struct net_protocol *ipprot;
#if IS_ENABLED(CONFIG_IPV6)
struct inet6_protocol *ip6prot;
#endif
rcu_read_lock();
ipprot = rcu_dereference(inet_protos[protocol]);
if (ipprot)
ipprot->early_demux = enabled ? ipprot->early_demux_handler :
NULL;
#if IS_ENABLED(CONFIG_IPV6)
ip6prot = rcu_dereference(inet6_protos[protocol]);
if (ip6prot)
ip6prot->early_demux = enabled ? ip6prot->early_demux_handler :
NULL;
#endif
rcu_read_unlock();
}
static int proc_tcp_early_demux(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret = 0;
ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
if (write && !ret) {
int enabled = init_net.ipv4.sysctl_tcp_early_demux;
proc_configure_early_demux(enabled, IPPROTO_TCP);
}
return ret;
}
static int proc_udp_early_demux(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret = 0;
ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
if (write && !ret) {
int enabled = init_net.ipv4.sysctl_udp_early_demux;
proc_configure_early_demux(enabled, IPPROTO_UDP);
}
return ret;
}
static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table, static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
int write, void *buffer, int write, void *buffer,
size_t *lenp, loff_t *ppos) size_t *lenp, loff_t *ppos)
...@@ -707,14 +652,14 @@ static struct ctl_table ipv4_net_table[] = { ...@@ -707,14 +652,14 @@ static struct ctl_table ipv4_net_table[] = {
.data = &init_net.ipv4.sysctl_udp_early_demux, .data = &init_net.ipv4.sysctl_udp_early_demux,
.maxlen = sizeof(u8), .maxlen = sizeof(u8),
.mode = 0644, .mode = 0644,
.proc_handler = proc_udp_early_demux .proc_handler = proc_dou8vec_minmax,
}, },
{ {
.procname = "tcp_early_demux", .procname = "tcp_early_demux",
.data = &init_net.ipv4.sysctl_tcp_early_demux, .data = &init_net.ipv4.sysctl_tcp_early_demux,
.maxlen = sizeof(u8), .maxlen = sizeof(u8),
.mode = 0644, .mode = 0644,
.proc_handler = proc_tcp_early_demux .proc_handler = proc_dou8vec_minmax,
}, },
{ {
.procname = "nexthop_compat_mode", .procname = "nexthop_compat_mode",
......
...@@ -45,20 +45,23 @@ ...@@ -45,20 +45,23 @@
#include <net/inet_ecn.h> #include <net/inet_ecn.h>
#include <net/dst_metadata.h> #include <net/dst_metadata.h>
INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *));
static void ip6_rcv_finish_core(struct net *net, struct sock *sk, static void ip6_rcv_finish_core(struct net *net, struct sock *sk,
struct sk_buff *skb) struct sk_buff *skb)
{ {
void (*edemux)(struct sk_buff *skb); if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
!skb_dst(skb) && !skb->sk) {
if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { switch (ipv6_hdr(skb)->nexthdr) {
const struct inet6_protocol *ipprot; case IPPROTO_TCP:
if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux))
ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); tcp_v6_early_demux(skb);
if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) break;
INDIRECT_CALL_2(edemux, tcp_v6_early_demux, case IPPROTO_UDP:
udp_v6_early_demux, skb); if (READ_ONCE(net->ipv4.sysctl_udp_early_demux))
udp_v6_early_demux(skb);
break;
}
} }
if (!skb_valid_dst(skb)) if (!skb_valid_dst(skb))
ip6_route_input(skb); ip6_route_input(skb);
} }
......
...@@ -1822,7 +1822,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) ...@@ -1822,7 +1822,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
goto discard_it; goto discard_it;
} }
INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb) void tcp_v6_early_demux(struct sk_buff *skb)
{ {
const struct ipv6hdr *hdr; const struct ipv6hdr *hdr;
const struct tcphdr *th; const struct tcphdr *th;
...@@ -2176,12 +2176,7 @@ struct proto tcpv6_prot = { ...@@ -2176,12 +2176,7 @@ struct proto tcpv6_prot = {
}; };
EXPORT_SYMBOL_GPL(tcpv6_prot); EXPORT_SYMBOL_GPL(tcpv6_prot);
/* thinking of making this const? Don't. static const struct inet6_protocol tcpv6_protocol = {
* early_demux can change based on sysctl.
*/
static struct inet6_protocol tcpv6_protocol = {
.early_demux = tcp_v6_early_demux,
.early_demux_handler = tcp_v6_early_demux,
.handler = tcp_v6_rcv, .handler = tcp_v6_rcv,
.err_handler = tcp_v6_err, .err_handler = tcp_v6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
......
...@@ -1052,7 +1052,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net, ...@@ -1052,7 +1052,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
return NULL; return NULL;
} }
INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb) void udp_v6_early_demux(struct sk_buff *skb)
{ {
struct net *net = dev_net(skb->dev); struct net *net = dev_net(skb->dev);
const struct udphdr *uh; const struct udphdr *uh;
...@@ -1660,12 +1660,7 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname, ...@@ -1660,12 +1660,7 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname,
return ipv6_getsockopt(sk, level, optname, optval, optlen); return ipv6_getsockopt(sk, level, optname, optval, optlen);
} }
/* thinking of making this const? Don't. static const struct inet6_protocol udpv6_protocol = {
* early_demux can change based on sysctl.
*/
static struct inet6_protocol udpv6_protocol = {
.early_demux = udp_v6_early_demux,
.early_demux_handler = udp_v6_early_demux,
.handler = udpv6_rcv, .handler = udpv6_rcv,
.err_handler = udpv6_err, .err_handler = udpv6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment