Commit be348926 authored by David S. Miller's avatar David S. Miller

Merge branch 'netfilter-vrf-rework'

Florian Westphal says:

====================
vrf: rework interaction with netfilter/conntrack

V2:
- fix 'plain integer as null pointer' warning
- reword commit message in patch 2 to clarify loss of 'ct set untracked'

This patch series aims to solve the to-be-reverted change 09e856d5
("vrf: Reset skb conntrack connection on VRF rcv") in a different way.

Rather than have skbs pass through conntrack and nat hooks twice, suppress
conntrack invocation if the conntrack/nat hook is called from the vrf driver.

First patch deals with 'incoming connection' case:
1. suppress NAT transformations
2. skip conntrack confirmation

NAT and conntrack confirmation is done when ip/ipv6 stack calls
the postrouting hook.

Second patch deals with local packets:
in vrf driver, mark the skbs as 'untracked', so conntrack output
hook ignores them.  This skips all nat hooks as well.

Afterwards, remove the untracked state again so the second
round will pick them up.

One alternative to the chosen implementation would be to add a 'caller
id' field to 'struct nf_hook_state' and then use that, these patches
use the more straightforward check of VRF flag on the state->out device.

The two patches apply to both net and net-next, i am targeting -next
because I think that since snat did not work correctly for so long that
we can take the longer route.  If you disagree, apply to net at your
discretion.

The patches apply both with 09e856d5 reverted or still
in-place, but only with the revert in place ingress conntrack settings
(zone, notrack etc) start working again.

I've already submitted selftests for vrf+nfqueue and conntrack+vrf.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 4900a769 8c9c296a
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include <net/l3mdev.h> #include <net/l3mdev.h>
#include <net/fib_rules.h> #include <net/fib_rules.h>
#include <net/netns/generic.h> #include <net/netns/generic.h>
#include <net/netfilter/nf_conntrack.h>
#define DRV_NAME "vrf" #define DRV_NAME "vrf"
#define DRV_VERSION "1.1" #define DRV_VERSION "1.1"
...@@ -424,12 +425,26 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev, ...@@ -424,12 +425,26 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev,
return NETDEV_TX_OK; return NETDEV_TX_OK;
} }
static void vrf_nf_set_untracked(struct sk_buff *skb)
{
if (skb_get_nfct(skb) == 0)
nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
}
static void vrf_nf_reset_ct(struct sk_buff *skb)
{
if (skb_get_nfct(skb) == IP_CT_UNTRACKED)
nf_reset_ct(skb);
}
#if IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_IPV6)
static int vrf_ip6_local_out(struct net *net, struct sock *sk, static int vrf_ip6_local_out(struct net *net, struct sock *sk,
struct sk_buff *skb) struct sk_buff *skb)
{ {
int err; int err;
vrf_nf_reset_ct(skb);
err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net,
sk, skb, NULL, skb_dst(skb)->dev, dst_output); sk, skb, NULL, skb_dst(skb)->dev, dst_output);
...@@ -508,6 +523,8 @@ static int vrf_ip_local_out(struct net *net, struct sock *sk, ...@@ -508,6 +523,8 @@ static int vrf_ip_local_out(struct net *net, struct sock *sk,
{ {
int err; int err;
vrf_nf_reset_ct(skb);
err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk,
skb, NULL, skb_dst(skb)->dev, dst_output); skb, NULL, skb_dst(skb)->dev, dst_output);
if (likely(err == 1)) if (likely(err == 1))
...@@ -626,8 +643,7 @@ static void vrf_finish_direct(struct sk_buff *skb) ...@@ -626,8 +643,7 @@ static void vrf_finish_direct(struct sk_buff *skb)
skb_pull(skb, ETH_HLEN); skb_pull(skb, ETH_HLEN);
} }
/* reset skb device */ vrf_nf_reset_ct(skb);
nf_reset_ct(skb);
} }
#if IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_IPV6)
...@@ -641,7 +657,7 @@ static int vrf_finish_output6(struct net *net, struct sock *sk, ...@@ -641,7 +657,7 @@ static int vrf_finish_output6(struct net *net, struct sock *sk,
struct neighbour *neigh; struct neighbour *neigh;
int ret; int ret;
nf_reset_ct(skb); vrf_nf_reset_ct(skb);
skb->protocol = htons(ETH_P_IPV6); skb->protocol = htons(ETH_P_IPV6);
skb->dev = dev; skb->dev = dev;
...@@ -752,6 +768,8 @@ static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev, ...@@ -752,6 +768,8 @@ static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev,
skb->dev = vrf_dev; skb->dev = vrf_dev;
vrf_nf_set_untracked(skb);
err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk,
skb, NULL, vrf_dev, vrf_ip6_out_direct_finish); skb, NULL, vrf_dev, vrf_ip6_out_direct_finish);
...@@ -858,7 +876,7 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s ...@@ -858,7 +876,7 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
struct neighbour *neigh; struct neighbour *neigh;
bool is_v6gw = false; bool is_v6gw = false;
nf_reset_ct(skb); vrf_nf_reset_ct(skb);
/* Be paranoid, rather than too clever. */ /* Be paranoid, rather than too clever. */
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
...@@ -980,6 +998,8 @@ static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev, ...@@ -980,6 +998,8 @@ static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev,
skb->dev = vrf_dev; skb->dev = vrf_dev;
vrf_nf_set_untracked(skb);
err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk,
skb, NULL, vrf_dev, vrf_ip_out_direct_finish); skb, NULL, vrf_dev, vrf_ip_out_direct_finish);
......
...@@ -155,6 +155,16 @@ unsigned int nf_confirm(struct sk_buff *skb, unsigned int protoff, ...@@ -155,6 +155,16 @@ unsigned int nf_confirm(struct sk_buff *skb, unsigned int protoff,
} }
EXPORT_SYMBOL_GPL(nf_confirm); EXPORT_SYMBOL_GPL(nf_confirm);
static bool in_vrf_postrouting(const struct nf_hook_state *state)
{
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
if (state->hook == NF_INET_POST_ROUTING &&
netif_is_l3_master(state->out))
return true;
#endif
return false;
}
static unsigned int ipv4_confirm(void *priv, static unsigned int ipv4_confirm(void *priv,
struct sk_buff *skb, struct sk_buff *skb,
const struct nf_hook_state *state) const struct nf_hook_state *state)
...@@ -166,6 +176,9 @@ static unsigned int ipv4_confirm(void *priv, ...@@ -166,6 +176,9 @@ static unsigned int ipv4_confirm(void *priv,
if (!ct || ctinfo == IP_CT_RELATED_REPLY) if (!ct || ctinfo == IP_CT_RELATED_REPLY)
return nf_conntrack_confirm(skb); return nf_conntrack_confirm(skb);
if (in_vrf_postrouting(state))
return NF_ACCEPT;
return nf_confirm(skb, return nf_confirm(skb,
skb_network_offset(skb) + ip_hdrlen(skb), skb_network_offset(skb) + ip_hdrlen(skb),
ct, ctinfo); ct, ctinfo);
...@@ -374,6 +387,9 @@ static unsigned int ipv6_confirm(void *priv, ...@@ -374,6 +387,9 @@ static unsigned int ipv6_confirm(void *priv,
if (!ct || ctinfo == IP_CT_RELATED_REPLY) if (!ct || ctinfo == IP_CT_RELATED_REPLY)
return nf_conntrack_confirm(skb); return nf_conntrack_confirm(skb);
if (in_vrf_postrouting(state))
return NF_ACCEPT;
protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
&frag_off); &frag_off);
if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
......
...@@ -699,6 +699,16 @@ unsigned int nf_nat_packet(struct nf_conn *ct, ...@@ -699,6 +699,16 @@ unsigned int nf_nat_packet(struct nf_conn *ct,
} }
EXPORT_SYMBOL_GPL(nf_nat_packet); EXPORT_SYMBOL_GPL(nf_nat_packet);
static bool in_vrf_postrouting(const struct nf_hook_state *state)
{
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
if (state->hook == NF_INET_POST_ROUTING &&
netif_is_l3_master(state->out))
return true;
#endif
return false;
}
unsigned int unsigned int
nf_nat_inet_fn(void *priv, struct sk_buff *skb, nf_nat_inet_fn(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state) const struct nf_hook_state *state)
...@@ -715,7 +725,7 @@ nf_nat_inet_fn(void *priv, struct sk_buff *skb, ...@@ -715,7 +725,7 @@ nf_nat_inet_fn(void *priv, struct sk_buff *skb,
* packet filter it out, or implement conntrack/NAT for that * packet filter it out, or implement conntrack/NAT for that
* protocol. 8) --RR * protocol. 8) --RR
*/ */
if (!ct) if (!ct || in_vrf_postrouting(state))
return NF_ACCEPT; return NF_ACCEPT;
nat = nfct_nat(ct); nat = nfct_nat(ct);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment