Commit 6897445f authored by Mike Manning's avatar Mike Manning Committed by David S. Miller

net: provide a sysctl raw_l3mdev_accept for raw socket lookup with VRFs

Add a sysctl raw_l3mdev_accept to control raw socket lookup in a manner
similar to use of tcp_l3mdev_accept for stream and of udp_l3mdev_accept
for datagram sockets. Have this default to enabled for reasons of
backwards compatibility. This is so as to specify the output device
with cmsg and IP_PKTINFO, but using a socket not bound to the
corresponding VRF. This allows e.g. older ping implementations to be
run with specifying the device but without executing it in the VRF.
If the option is disabled, packets received in a VRF context are only
handled by a raw socket bound to the VRF, and correspondingly packets
in the default VRF are only handled by a socket not bound to any VRF.
Signed-off-by: default avatarMike Manning <mmanning@vyatta.att-mail.com>
Reviewed-by: default avatarDavid Ahern <dsahern@gmail.com>
Tested-by: default avatarDavid Ahern <dsahern@gmail.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 6da5b0f0
...@@ -370,6 +370,7 @@ tcp_l3mdev_accept - BOOLEAN ...@@ -370,6 +370,7 @@ tcp_l3mdev_accept - BOOLEAN
derived from the listen socket to be bound to the L3 domain in derived from the listen socket to be bound to the L3 domain in
which the packets originated. Only valid when the kernel was which the packets originated. Only valid when the kernel was
compiled with CONFIG_NET_L3_MASTER_DEV. compiled with CONFIG_NET_L3_MASTER_DEV.
Default: 0 (disabled)
tcp_low_latency - BOOLEAN tcp_low_latency - BOOLEAN
This is a legacy option, it has no effect anymore. This is a legacy option, it has no effect anymore.
...@@ -773,6 +774,7 @@ udp_l3mdev_accept - BOOLEAN ...@@ -773,6 +774,7 @@ udp_l3mdev_accept - BOOLEAN
being received regardless of the L3 domain in which they being received regardless of the L3 domain in which they
originated. Only valid when the kernel was compiled with originated. Only valid when the kernel was compiled with
CONFIG_NET_L3_MASTER_DEV. CONFIG_NET_L3_MASTER_DEV.
Default: 0 (disabled)
udp_mem - vector of 3 INTEGERs: min, pressure, max udp_mem - vector of 3 INTEGERs: min, pressure, max
Number of pages allowed for queueing by all UDP sockets. Number of pages allowed for queueing by all UDP sockets.
...@@ -799,6 +801,16 @@ udp_wmem_min - INTEGER ...@@ -799,6 +801,16 @@ udp_wmem_min - INTEGER
total pages of UDP sockets exceed udp_mem pressure. The unit is byte. total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
Default: 4K Default: 4K
RAW variables:
raw_l3mdev_accept - BOOLEAN
Enabling this option allows a "global" bound socket to work
across L3 master domains (e.g., VRFs) with packets capable of
being received regardless of the L3 domain in which they
originated. Only valid when the kernel was compiled with
CONFIG_NET_L3_MASTER_DEV.
Default: 1 (enabled)
CIPSOv4 Variables: CIPSOv4 Variables:
cipso_cache_enable - BOOLEAN cipso_cache_enable - BOOLEAN
......
...@@ -111,9 +111,22 @@ the same port if they bind to an l3mdev. ...@@ -111,9 +111,22 @@ the same port if they bind to an l3mdev.
TCP & UDP services running in the default VRF context (ie., not bound TCP & UDP services running in the default VRF context (ie., not bound
to any VRF device) can work across all VRF domains by enabling the to any VRF device) can work across all VRF domains by enabling the
tcp_l3mdev_accept and udp_l3mdev_accept sysctl options: tcp_l3mdev_accept and udp_l3mdev_accept sysctl options:
sysctl -w net.ipv4.tcp_l3mdev_accept=1 sysctl -w net.ipv4.tcp_l3mdev_accept=1
sysctl -w net.ipv4.udp_l3mdev_accept=1 sysctl -w net.ipv4.udp_l3mdev_accept=1
These options are disabled by default so that a socket in a VRF is only
selected for packets in that VRF. There is a similar option for RAW
sockets, which is enabled by default for reasons of backwards compatibility.
This is so as to specify the output device with cmsg and IP_PKTINFO, but
using a socket not bound to the corresponding VRF. This allows e.g. older ping
implementations to be run with specifying the device but without executing it
in the VRF. This option can be disabled so that packets received in a VRF
context are only handled by a raw socket bound to the VRF, and packets in the
default VRF are only handled by a socket not bound to any VRF:
sysctl -w net.ipv4.raw_l3mdev_accept=0
netfilter rules on the VRF device can be used to limit access to services netfilter rules on the VRF device can be used to limit access to services
running in the default VRF context as well. running in the default VRF context as well.
......
...@@ -103,6 +103,9 @@ struct netns_ipv4 { ...@@ -103,6 +103,9 @@ struct netns_ipv4 {
/* Shall we try to damage output packets if routing dev changes? */ /* Shall we try to damage output packets if routing dev changes? */
int sysctl_ip_dynaddr; int sysctl_ip_dynaddr;
int sysctl_ip_early_demux; int sysctl_ip_early_demux;
#ifdef CONFIG_NET_L3_MASTER_DEV
int sysctl_raw_l3mdev_accept;
#endif
int sysctl_tcp_early_demux; int sysctl_tcp_early_demux;
int sysctl_udp_early_demux; int sysctl_udp_early_demux;
......
...@@ -61,6 +61,7 @@ void raw_seq_stop(struct seq_file *seq, void *v); ...@@ -61,6 +61,7 @@ void raw_seq_stop(struct seq_file *seq, void *v);
int raw_hash_sk(struct sock *sk); int raw_hash_sk(struct sock *sk);
void raw_unhash_sk(struct sock *sk); void raw_unhash_sk(struct sock *sk);
void raw_init(void);
struct raw_sock { struct raw_sock {
/* inet_sock has to be the first member */ /* inet_sock has to be the first member */
......
...@@ -1964,6 +1964,8 @@ static int __init inet_init(void) ...@@ -1964,6 +1964,8 @@ static int __init inet_init(void)
/* Add UDP-Lite (RFC 3828) */ /* Add UDP-Lite (RFC 3828) */
udplite4_register(); udplite4_register();
raw_init();
ping_init(); ping_init();
/* /*
......
...@@ -805,7 +805,7 @@ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, ...@@ -805,7 +805,7 @@ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
return copied; return copied;
} }
static int raw_init(struct sock *sk) static int raw_sk_init(struct sock *sk)
{ {
struct raw_sock *rp = raw_sk(sk); struct raw_sock *rp = raw_sk(sk);
...@@ -970,7 +970,7 @@ struct proto raw_prot = { ...@@ -970,7 +970,7 @@ struct proto raw_prot = {
.connect = ip4_datagram_connect, .connect = ip4_datagram_connect,
.disconnect = __udp_disconnect, .disconnect = __udp_disconnect,
.ioctl = raw_ioctl, .ioctl = raw_ioctl,
.init = raw_init, .init = raw_sk_init,
.setsockopt = raw_setsockopt, .setsockopt = raw_setsockopt,
.getsockopt = raw_getsockopt, .getsockopt = raw_getsockopt,
.sendmsg = raw_sendmsg, .sendmsg = raw_sendmsg,
...@@ -1133,4 +1133,28 @@ void __init raw_proc_exit(void) ...@@ -1133,4 +1133,28 @@ void __init raw_proc_exit(void)
{ {
unregister_pernet_subsys(&raw_net_ops); unregister_pernet_subsys(&raw_net_ops);
} }
static void raw_sysctl_init_net(struct net *net)
{
#ifdef CONFIG_NET_L3_MASTER_DEV
net->ipv4.sysctl_raw_l3mdev_accept = 1;
#endif
}
static int __net_init raw_sysctl_init(struct net *net)
{
raw_sysctl_init_net(net);
return 0;
}
static struct pernet_operations __net_initdata raw_sysctl_ops = {
.init = raw_sysctl_init,
};
void __init raw_init(void)
{
raw_sysctl_init_net(&init_net);
if (register_pernet_subsys(&raw_sysctl_ops))
panic("RAW: failed to init sysctl parameters.\n");
}
#endif /* CONFIG_PROC_FS */ #endif /* CONFIG_PROC_FS */
...@@ -602,6 +602,17 @@ static struct ctl_table ipv4_net_table[] = { ...@@ -602,6 +602,17 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = ipv4_ping_group_range, .proc_handler = ipv4_ping_group_range,
}, },
#ifdef CONFIG_NET_L3_MASTER_DEV
{
.procname = "raw_l3mdev_accept",
.data = &init_net.ipv4.sysctl_raw_l3mdev_accept,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},
#endif
{ {
.procname = "tcp_ecn", .procname = "tcp_ecn",
.data = &init_net.ipv4.sysctl_tcp_ecn, .data = &init_net.ipv4.sysctl_tcp_ecn,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment