Commit 5a99e7f2 authored by David S. Miller's avatar David S. Miller

Merge branch 'nf-ingress'

Pablo Neira Ayuso says:

====================
Netfilter ingress support (v4)

This is the v4 round of patches to add the Netfilter ingress hook, it basically
comes in two steps:

1) Add the CONFIG_NET_INGRESS switch to wrap the ingress static key around it.
   The idea is to use the same global static key to avoid adding more code to
   the hot path.

2) Add the Netfilter ingress hook after the tc ingress hook, under the global
   ingress_needed static key. As I said, the netfilter ingress hook also has
   its own static key, that is nested under the global static key. Please, see
   patch 5/5 for performance numbers and more information.

I originally started this next round, as it was suggested, exploring the
independent static key for netfilter ingress just after tc ingress, but the
results that I gathered from that patch are not good for non-users:

Result: OK: 6425927(c6425843+d83) usec, 100000000 (60byte,0frags)
  15561955pps 7469Mb/sec (7469738400bps) errors: 100000000

this roughly means 500Kpps less performance wrt. the base numbers, so that's
the reason why I discarded that approach and I focused on this.

The idea of this patchset is to open the window to nf_tables, which comes with
features that will work out-of-the-box (once the boiler plate code to support
the 'netdev' table family is in place), to avoid repeating myself [1], the most
relevant features are:

1) Multi-dimensional key dictionary lookups.
2) Arbitrary stateful flow tables.
3) Transactions and good support for dynamic updates.

But there are also interest aspects to consider from userspace, such as the
ability to support new layer 2 protocols without kernel updates, a well-defined
netlink interface, userspace libraries and utilities for third party
applications, among others.

I hope we can be happy with this approach.

Please, apply. Thanks.

[1] http://marc.info/?l=netfilter-devel&m=143033337020328&w=2
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents a104a6b3 e687ad60
...@@ -1656,6 +1656,9 @@ struct net_device { ...@@ -1656,6 +1656,9 @@ struct net_device {
struct tcf_proto __rcu *ingress_cl_list; struct tcf_proto __rcu *ingress_cl_list;
#endif #endif
struct netdev_queue __rcu *ingress_queue; struct netdev_queue __rcu *ingress_queue;
#ifdef CONFIG_NETFILTER_INGRESS
struct list_head nf_hooks_ingress;
#endif
unsigned char broadcast[MAX_ADDR_LEN]; unsigned char broadcast[MAX_ADDR_LEN];
#ifdef CONFIG_RFS_ACCEL #ifdef CONFIG_RFS_ACCEL
......
...@@ -54,10 +54,12 @@ struct nf_hook_state { ...@@ -54,10 +54,12 @@ struct nf_hook_state {
struct net_device *in; struct net_device *in;
struct net_device *out; struct net_device *out;
struct sock *sk; struct sock *sk;
struct list_head *hook_list;
int (*okfn)(struct sock *, struct sk_buff *); int (*okfn)(struct sock *, struct sk_buff *);
}; };
static inline void nf_hook_state_init(struct nf_hook_state *p, static inline void nf_hook_state_init(struct nf_hook_state *p,
struct list_head *hook_list,
unsigned int hook, unsigned int hook,
int thresh, u_int8_t pf, int thresh, u_int8_t pf,
struct net_device *indev, struct net_device *indev,
...@@ -71,6 +73,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, ...@@ -71,6 +73,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p,
p->in = indev; p->in = indev;
p->out = outdev; p->out = outdev;
p->sk = sk; p->sk = sk;
p->hook_list = hook_list;
p->okfn = okfn; p->okfn = okfn;
} }
...@@ -79,16 +82,17 @@ typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops, ...@@ -79,16 +82,17 @@ typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops,
const struct nf_hook_state *state); const struct nf_hook_state *state);
struct nf_hook_ops { struct nf_hook_ops {
struct list_head list; struct list_head list;
/* User fills in from here down. */ /* User fills in from here down. */
nf_hookfn *hook; nf_hookfn *hook;
struct module *owner; struct net_device *dev;
void *priv; struct module *owner;
u_int8_t pf; void *priv;
unsigned int hooknum; u_int8_t pf;
unsigned int hooknum;
/* Hooks are ordered in ascending priority. */ /* Hooks are ordered in ascending priority. */
int priority; int priority;
}; };
struct nf_sockopt_ops { struct nf_sockopt_ops {
...@@ -131,26 +135,33 @@ extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; ...@@ -131,26 +135,33 @@ extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
#ifdef HAVE_JUMP_LABEL #ifdef HAVE_JUMP_LABEL
extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) static inline bool nf_hook_list_active(struct list_head *nf_hook_list,
u_int8_t pf, unsigned int hook)
{ {
if (__builtin_constant_p(pf) && if (__builtin_constant_p(pf) &&
__builtin_constant_p(hook)) __builtin_constant_p(hook))
return static_key_false(&nf_hooks_needed[pf][hook]); return static_key_false(&nf_hooks_needed[pf][hook]);
return !list_empty(&nf_hooks[pf][hook]); return !list_empty(nf_hook_list);
} }
#else #else
static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) static inline bool nf_hook_list_active(struct list_head *nf_hook_list,
u_int8_t pf, unsigned int hook)
{ {
return !list_empty(&nf_hooks[pf][hook]); return !list_empty(nf_hook_list);
} }
#endif #endif
static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook)
{
return nf_hook_list_active(&nf_hooks[pf][hook], pf, hook);
}
int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state); int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state);
/** /**
* nf_hook_thresh - call a netfilter hook * nf_hook_thresh - call a netfilter hook
* *
* Returns 1 if the hook has allowed the packet to pass. The function * Returns 1 if the hook has allowed the packet to pass. The function
* okfn must be invoked by the caller in this case. Any other return * okfn must be invoked by the caller in this case. Any other return
* value indicates the packet has been consumed by the hook. * value indicates the packet has been consumed by the hook.
...@@ -166,8 +177,8 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, ...@@ -166,8 +177,8 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
if (nf_hooks_active(pf, hook)) { if (nf_hooks_active(pf, hook)) {
struct nf_hook_state state; struct nf_hook_state state;
nf_hook_state_init(&state, hook, thresh, pf, nf_hook_state_init(&state, &nf_hooks[pf][hook], hook, thresh,
indev, outdev, sk, okfn); pf, indev, outdev, sk, okfn);
return nf_hook_slow(skb, &state); return nf_hook_slow(skb, &state);
} }
return 1; return 1;
......
#ifndef _NETFILTER_INGRESS_H_
#define _NETFILTER_INGRESS_H_
#include <linux/netfilter.h>
#include <linux/netdevice.h>
#ifdef CONFIG_NETFILTER_INGRESS
static inline int nf_hook_ingress_active(struct sk_buff *skb)
{
return nf_hook_list_active(&skb->dev->nf_hooks_ingress,
NFPROTO_NETDEV, NF_NETDEV_INGRESS);
}
static inline int nf_hook_ingress(struct sk_buff *skb)
{
struct nf_hook_state state;
nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress,
NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, NULL,
skb->dev, NULL, NULL);
return nf_hook_slow(skb, &state);
}
static inline void nf_hook_ingress_init(struct net_device *dev)
{
INIT_LIST_HEAD(&dev->nf_hooks_ingress);
}
#else /* CONFIG_NETFILTER_INGRESS */
static inline int nf_hook_ingress_active(struct sk_buff *skb)
{
return 0;
}
static inline int nf_hook_ingress(struct sk_buff *skb)
{
return 0;
}
static inline void nf_hook_ingress_init(struct net_device *dev) {}
#endif /* CONFIG_NETFILTER_INGRESS */
#endif /* _NETFILTER_INGRESS_H_ */
...@@ -79,7 +79,7 @@ static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) ...@@ -79,7 +79,7 @@ static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev)
struct netdev_queue *dev_ingress_queue_create(struct net_device *dev); struct netdev_queue *dev_ingress_queue_create(struct net_device *dev);
#ifdef CONFIG_NET_CLS_ACT #ifdef CONFIG_NET_INGRESS
void net_inc_ingress_queue(void); void net_inc_ingress_queue(void);
void net_dec_ingress_queue(void); void net_dec_ingress_queue(void);
#endif #endif
......
...@@ -51,11 +51,17 @@ enum nf_inet_hooks { ...@@ -51,11 +51,17 @@ enum nf_inet_hooks {
NF_INET_NUMHOOKS NF_INET_NUMHOOKS
}; };
enum nf_dev_hooks {
NF_NETDEV_INGRESS,
NF_NETDEV_NUMHOOKS
};
enum { enum {
NFPROTO_UNSPEC = 0, NFPROTO_UNSPEC = 0,
NFPROTO_INET = 1, NFPROTO_INET = 1,
NFPROTO_IPV4 = 2, NFPROTO_IPV4 = 2,
NFPROTO_ARP = 3, NFPROTO_ARP = 3,
NFPROTO_NETDEV = 5,
NFPROTO_BRIDGE = 7, NFPROTO_BRIDGE = 7,
NFPROTO_IPV6 = 10, NFPROTO_IPV6 = 10,
NFPROTO_DECNET = 12, NFPROTO_DECNET = 12,
......
...@@ -45,6 +45,9 @@ config COMPAT_NETLINK_MESSAGES ...@@ -45,6 +45,9 @@ config COMPAT_NETLINK_MESSAGES
Newly written code should NEVER need this option but do Newly written code should NEVER need this option but do
compat-independent messages instead! compat-independent messages instead!
config NET_INGRESS
bool
menu "Networking options" menu "Networking options"
source "net/packet/Kconfig" source "net/packet/Kconfig"
......
...@@ -135,6 +135,7 @@ ...@@ -135,6 +135,7 @@
#include <linux/if_macvlan.h> #include <linux/if_macvlan.h>
#include <linux/errqueue.h> #include <linux/errqueue.h>
#include <linux/hrtimer.h> #include <linux/hrtimer.h>
#include <linux/netfilter_ingress.h>
#include "net-sysfs.h" #include "net-sysfs.h"
...@@ -1630,7 +1631,7 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev) ...@@ -1630,7 +1631,7 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
} }
EXPORT_SYMBOL(call_netdevice_notifiers); EXPORT_SYMBOL(call_netdevice_notifiers);
#ifdef CONFIG_NET_CLS_ACT #ifdef CONFIG_NET_INGRESS
static struct static_key ingress_needed __read_mostly; static struct static_key ingress_needed __read_mostly;
void net_inc_ingress_queue(void) void net_inc_ingress_queue(void)
...@@ -3666,6 +3667,13 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, ...@@ -3666,6 +3667,13 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
return skb; return skb;
} }
#else
static inline struct sk_buff *handle_ing(struct sk_buff *skb,
struct packet_type **pt_prev,
int *ret, struct net_device *orig_dev)
{
return skb;
}
#endif #endif
/** /**
...@@ -3739,6 +3747,28 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb) ...@@ -3739,6 +3747,28 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
} }
} }
#ifdef CONFIG_NETFILTER_INGRESS
static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
int *ret, struct net_device *orig_dev)
{
if (nf_hook_ingress_active(skb)) {
if (*pt_prev) {
*ret = deliver_skb(skb, *pt_prev, orig_dev);
*pt_prev = NULL;
}
return nf_hook_ingress(skb);
}
return 0;
}
#else
static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
int *ret, struct net_device *orig_dev)
{
return 0;
}
#endif
static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
{ {
struct packet_type *ptype, *pt_prev; struct packet_type *ptype, *pt_prev;
...@@ -3798,13 +3828,17 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) ...@@ -3798,13 +3828,17 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
} }
skip_taps: skip_taps:
#ifdef CONFIG_NET_CLS_ACT #ifdef CONFIG_NET_INGRESS
if (static_key_false(&ingress_needed)) { if (static_key_false(&ingress_needed)) {
skb = handle_ing(skb, &pt_prev, &ret, orig_dev); skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
if (!skb) if (!skb)
goto unlock; goto unlock;
}
if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
goto unlock;
}
#endif
#ifdef CONFIG_NET_CLS_ACT
skb->tc_verd = 0; skb->tc_verd = 0;
ncls: ncls:
#endif #endif
...@@ -6967,6 +7001,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, ...@@ -6967,6 +7001,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->group = INIT_NETDEV_GROUP; dev->group = INIT_NETDEV_GROUP;
if (!dev->ethtool_ops) if (!dev->ethtool_ops)
dev->ethtool_ops = &default_ethtool_ops; dev->ethtool_ops = &default_ethtool_ops;
nf_hook_ingress_init(dev);
return dev; return dev;
free_all: free_all:
......
menu "Core Netfilter Configuration" menu "Core Netfilter Configuration"
depends on NET && INET && NETFILTER depends on NET && INET && NETFILTER
config NETFILTER_INGRESS
bool "Netfilter ingress support"
select NET_INGRESS
help
This allows you to classify packets from ingress using the Netfilter
infrastructure.
config NETFILTER_NETLINK config NETFILTER_NETLINK
tristate tristate
......
...@@ -64,10 +64,27 @@ static DEFINE_MUTEX(nf_hook_mutex); ...@@ -64,10 +64,27 @@ static DEFINE_MUTEX(nf_hook_mutex);
int nf_register_hook(struct nf_hook_ops *reg) int nf_register_hook(struct nf_hook_ops *reg)
{ {
struct list_head *nf_hook_list;
struct nf_hook_ops *elem; struct nf_hook_ops *elem;
mutex_lock(&nf_hook_mutex); mutex_lock(&nf_hook_mutex);
list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) { switch (reg->pf) {
case NFPROTO_NETDEV:
#ifdef CONFIG_NETFILTER_INGRESS
if (reg->hooknum == NF_NETDEV_INGRESS) {
BUG_ON(reg->dev == NULL);
nf_hook_list = &reg->dev->nf_hooks_ingress;
net_inc_ingress_queue();
break;
}
#endif
/* Fall through. */
default:
nf_hook_list = &nf_hooks[reg->pf][reg->hooknum];
break;
}
list_for_each_entry(elem, nf_hook_list, list) {
if (reg->priority < elem->priority) if (reg->priority < elem->priority)
break; break;
} }
...@@ -85,6 +102,18 @@ void nf_unregister_hook(struct nf_hook_ops *reg) ...@@ -85,6 +102,18 @@ void nf_unregister_hook(struct nf_hook_ops *reg)
mutex_lock(&nf_hook_mutex); mutex_lock(&nf_hook_mutex);
list_del_rcu(&reg->list); list_del_rcu(&reg->list);
mutex_unlock(&nf_hook_mutex); mutex_unlock(&nf_hook_mutex);
switch (reg->pf) {
case NFPROTO_NETDEV:
#ifdef CONFIG_NETFILTER_INGRESS
if (reg->hooknum == NF_NETDEV_INGRESS) {
net_dec_ingress_queue();
break;
}
break;
#endif
default:
break;
}
#ifdef HAVE_JUMP_LABEL #ifdef HAVE_JUMP_LABEL
static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif #endif
...@@ -166,11 +195,9 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state) ...@@ -166,11 +195,9 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state)
/* We may already have this, but read-locks nest anyway */ /* We may already have this, but read-locks nest anyway */
rcu_read_lock(); rcu_read_lock();
elem = list_entry_rcu(&nf_hooks[state->pf][state->hook], elem = list_entry_rcu(state->hook_list, struct nf_hook_ops, list);
struct nf_hook_ops, list);
next_hook: next_hook:
verdict = nf_iterate(&nf_hooks[state->pf][state->hook], skb, state, verdict = nf_iterate(state->hook_list, skb, state, &elem);
&elem);
if (verdict == NF_ACCEPT || verdict == NF_STOP) { if (verdict == NF_ACCEPT || verdict == NF_STOP) {
ret = 1; ret = 1;
} else if ((verdict & NF_VERDICT_MASK) == NF_DROP) { } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
......
...@@ -312,6 +312,7 @@ config NET_SCH_PIE ...@@ -312,6 +312,7 @@ config NET_SCH_PIE
config NET_SCH_INGRESS config NET_SCH_INGRESS
tristate "Ingress Qdisc" tristate "Ingress Qdisc"
depends on NET_CLS_ACT depends on NET_CLS_ACT
select NET_INGRESS
---help--- ---help---
Say Y here if you want to use classifiers for incoming packets. Say Y here if you want to use classifiers for incoming packets.
If unsure, say Y. If unsure, say Y.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment