Commit 7f81ff04 authored by David S. Miller's avatar David S. Miller

Merge branch 'xfrm-remove-flow-cache'

Florian Westphal says:

====================
xfrm: remove flow cache

After RCU-ification of ipsec packet path there are no major scalability
issues anymore without flow cache.

We still incur a performance hit, which comes mostly from the extra xfrm
dst allocation/freeing.
The last patch in the series adds a simple percpu cache to avoid the
extra allocation if a packet matched the same policies as last one.

The main concern with this is that we will see performance drops,
especially with large numbers of policies/SAs.

However, during hallway discussions at nfws 2017 it seemed the issues
with flow caching outweight the removal downsides, and that it
might be best to just 'remove it' and see where the practical issues
(if any) will appear.

It should now be possible to also remove the genid member in the policies
as we don't hold bundles for prolonged time anymore, but I think
this change is controversial (and intrusive) enough as-is, so defer
that to a later point in time.

Changes since last rfc:

- fix build failures due to implicit interrupt.h includes
- rework last patch (pcpu cache):
 * avoid xchg()
 * check policies for walk.dead = 1 instead of more costly bundle_ok().
 * flush pcpu bundles when sa/policies get removed, to allow module
   references to go away (suggested by Ilan Tayari)
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 6ddb4fdf ec30d78c
...@@ -1291,8 +1291,7 @@ tag - INTEGER ...@@ -1291,8 +1291,7 @@ tag - INTEGER
xfrm4_gc_thresh - INTEGER xfrm4_gc_thresh - INTEGER
The threshold at which we will start garbage collecting for IPv4 The threshold at which we will start garbage collecting for IPv4
destination cache entries. At twice this value the system will destination cache entries. At twice this value the system will
refuse new allocations. The value must be set below the flowcache refuse new allocations.
limit (4096 * number of online cpus) to take effect.
igmp_link_local_mcast_reports - BOOLEAN igmp_link_local_mcast_reports - BOOLEAN
Enable IGMP reports for link local multicast groups in the Enable IGMP reports for link local multicast groups in the
...@@ -1778,8 +1777,7 @@ ratelimit - INTEGER ...@@ -1778,8 +1777,7 @@ ratelimit - INTEGER
xfrm6_gc_thresh - INTEGER xfrm6_gc_thresh - INTEGER
The threshold at which we will start garbage collecting for IPv6 The threshold at which we will start garbage collecting for IPv6
destination cache entries. At twice this value the system will destination cache entries. At twice this value the system will
refuse new allocations. The value must be set below the flowcache refuse new allocations.
limit (4096 * number of online cpus) to take effect.
IPv6 Update by: IPv6 Update by:
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
#include <linux/if_arcnet.h> #include <linux/if_arcnet.h>
#ifdef __KERNEL__ #ifdef __KERNEL__
#include <linux/irqreturn.h> #include <linux/interrupt.h>
/* /*
* RECON_THRESHOLD is the maximum number of RECON messages to receive * RECON_THRESHOLD is the maximum number of RECON messages to receive
......
...@@ -129,6 +129,7 @@ ...@@ -129,6 +129,7 @@
#include <net/dcbnl.h> #include <net/dcbnl.h>
#include <linux/completion.h> #include <linux/completion.h>
#include <linux/cpumask.h> #include <linux/cpumask.h>
#include <linux/interrupt.h>
#define XGBE_DRV_NAME "amd-xgbe" #define XGBE_DRV_NAME "amd-xgbe"
#define XGBE_DRV_VERSION "1.0.3" #define XGBE_DRV_VERSION "1.0.3"
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <linux/netdevice.h> #include <linux/netdevice.h>
#include <linux/tcp.h> #include <linux/tcp.h>
#include <linux/interrupt.h>
#include "dwc-xlgmac.h" #include "dwc-xlgmac.h"
#include "dwc-xlgmac-reg.h" #include "dwc-xlgmac-reg.h"
......
...@@ -66,6 +66,7 @@ ...@@ -66,6 +66,7 @@
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/interrupt.h>
#include <net/ieee802154_netdev.h> #include <net/ieee802154_netdev.h>
#include <net/mac802154.h> #include <net/mac802154.h>
......
...@@ -218,40 +218,6 @@ static inline unsigned int flow_key_size(u16 family) ...@@ -218,40 +218,6 @@ static inline unsigned int flow_key_size(u16 family)
return 0; return 0;
} }
#define FLOW_DIR_IN 0
#define FLOW_DIR_OUT 1
#define FLOW_DIR_FWD 2
struct net;
struct sock;
struct flow_cache_ops;
struct flow_cache_object {
const struct flow_cache_ops *ops;
};
struct flow_cache_ops {
struct flow_cache_object *(*get)(struct flow_cache_object *);
int (*check)(struct flow_cache_object *);
void (*delete)(struct flow_cache_object *);
};
typedef struct flow_cache_object *(*flow_resolve_t)(
struct net *net, const struct flowi *key, u16 family,
u8 dir, struct flow_cache_object *oldobj, void *ctx);
struct flow_cache_object *flow_cache_lookup(struct net *net,
const struct flowi *key, u16 family,
u8 dir, flow_resolve_t resolver,
void *ctx);
int flow_cache_init(struct net *net);
void flow_cache_fini(struct net *net);
void flow_cache_hp_init(void);
void flow_cache_flush(struct net *net);
void flow_cache_flush_deferred(struct net *net);
extern atomic_t flow_cache_genid;
__u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys); __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys);
static inline __u32 get_hash_from_flowi6(const struct flowi6 *fl6) static inline __u32 get_hash_from_flowi6(const struct flowi6 *fl6)
......
#ifndef _NET_FLOWCACHE_H
#define _NET_FLOWCACHE_H
#include <linux/interrupt.h>
#include <linux/types.h>
#include <linux/timer.h>
#include <linux/notifier.h>
struct flow_cache_percpu {
struct hlist_head *hash_table;
unsigned int hash_count;
u32 hash_rnd;
int hash_rnd_recalc;
struct tasklet_struct flush_tasklet;
};
struct flow_cache {
u32 hash_shift;
struct flow_cache_percpu __percpu *percpu;
struct hlist_node node;
unsigned int low_watermark;
unsigned int high_watermark;
struct timer_list rnd_timer;
};
#endif /* _NET_FLOWCACHE_H */
...@@ -6,7 +6,6 @@ ...@@ -6,7 +6,6 @@
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/xfrm.h> #include <linux/xfrm.h>
#include <net/dst_ops.h> #include <net/dst_ops.h>
#include <net/flowcache.h>
struct ctl_table_header; struct ctl_table_header;
...@@ -73,16 +72,6 @@ struct netns_xfrm { ...@@ -73,16 +72,6 @@ struct netns_xfrm {
spinlock_t xfrm_state_lock; spinlock_t xfrm_state_lock;
spinlock_t xfrm_policy_lock; spinlock_t xfrm_policy_lock;
struct mutex xfrm_cfg_mutex; struct mutex xfrm_cfg_mutex;
/* flow cache part */
struct flow_cache flow_cache_global;
atomic_t flow_cache_genid;
struct list_head flow_cache_gc_list;
atomic_t flow_cache_gc_count;
spinlock_t flow_cache_gc_lock;
struct work_struct flow_cache_gc_work;
struct work_struct flow_cache_flush_work;
struct mutex flow_flush_sem;
}; };
#endif #endif
...@@ -317,6 +317,7 @@ int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int fam ...@@ -317,6 +317,7 @@ int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int fam
void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo); void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo);
void km_policy_notify(struct xfrm_policy *xp, int dir, void km_policy_notify(struct xfrm_policy *xp, int dir,
const struct km_event *c); const struct km_event *c);
void xfrm_policy_cache_flush(void);
void km_state_notify(struct xfrm_state *x, const struct km_event *c); void km_state_notify(struct xfrm_state *x, const struct km_event *c);
struct xfrm_tmpl; struct xfrm_tmpl;
...@@ -563,7 +564,6 @@ struct xfrm_policy { ...@@ -563,7 +564,6 @@ struct xfrm_policy {
refcount_t refcnt; refcount_t refcnt;
struct timer_list timer; struct timer_list timer;
struct flow_cache_object flo;
atomic_t genid; atomic_t genid;
u32 priority; u32 priority;
u32 index; u32 index;
...@@ -978,7 +978,6 @@ struct xfrm_dst { ...@@ -978,7 +978,6 @@ struct xfrm_dst {
struct rt6_info rt6; struct rt6_info rt6;
} u; } u;
struct dst_entry *route; struct dst_entry *route;
struct flow_cache_object flo;
struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
int num_pols, num_xfrms; int num_pols, num_xfrms;
u32 xfrm_genid; u32 xfrm_genid;
...@@ -1226,9 +1225,6 @@ static inline void xfrm_sk_free_policy(struct sock *sk) ...@@ -1226,9 +1225,6 @@ static inline void xfrm_sk_free_policy(struct sock *sk)
} }
} }
void xfrm_garbage_collect(struct net *net);
void xfrm_garbage_collect_deferred(struct net *net);
#else #else
static inline void xfrm_sk_free_policy(struct sock *sk) {} static inline void xfrm_sk_free_policy(struct sock *sk) {}
...@@ -1263,9 +1259,6 @@ static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir, ...@@ -1263,9 +1259,6 @@ static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir,
{ {
return 1; return 1;
} }
static inline void xfrm_garbage_collect(struct net *net)
{
}
#endif #endif
static __inline__ static __inline__
......
...@@ -11,7 +11,6 @@ obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ ...@@ -11,7 +11,6 @@ obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
sock_diag.o dev_ioctl.o tso.o sock_reuseport.o sock_diag.o dev_ioctl.o tso.o sock_reuseport.o
obj-$(CONFIG_XFRM) += flow.o
obj-y += net-sysfs.o obj-y += net-sysfs.o
obj-$(CONFIG_PROC_FS) += net-procfs.o obj-$(CONFIG_PROC_FS) += net-procfs.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o obj-$(CONFIG_NET_PKTGEN) += pktgen.o
......
/* flow.c: Generic flow cache.
*
* Copyright (C) 2003 Alexey N. Kuznetsov (kuznet@ms2.inr.ac.ru)
* Copyright (C) 2003 David S. Miller (davem@redhat.com)
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/list.h>
#include <linux/jhash.h>
#include <linux/interrupt.h>
#include <linux/mm.h>
#include <linux/random.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/smp.h>
#include <linux/completion.h>
#include <linux/percpu.h>
#include <linux/bitops.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/mutex.h>
#include <net/flow.h>
#include <linux/atomic.h>
#include <linux/security.h>
#include <net/net_namespace.h>
struct flow_cache_entry {
union {
struct hlist_node hlist;
struct list_head gc_list;
} u;
struct net *net;
u16 family;
u8 dir;
u32 genid;
struct flowi key;
struct flow_cache_object *object;
};
struct flow_flush_info {
struct flow_cache *cache;
atomic_t cpuleft;
struct completion completion;
};
static struct kmem_cache *flow_cachep __read_mostly;
#define flow_cache_hash_size(cache) (1U << (cache)->hash_shift)
#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ)
static void flow_cache_new_hashrnd(unsigned long arg)
{
struct flow_cache *fc = (void *) arg;
int i;
for_each_possible_cpu(i)
per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1;
fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
add_timer(&fc->rnd_timer);
}
static int flow_entry_valid(struct flow_cache_entry *fle,
struct netns_xfrm *xfrm)
{
if (atomic_read(&xfrm->flow_cache_genid) != fle->genid)
return 0;
if (fle->object && !fle->object->ops->check(fle->object))
return 0;
return 1;
}
static void flow_entry_kill(struct flow_cache_entry *fle,
struct netns_xfrm *xfrm)
{
if (fle->object)
fle->object->ops->delete(fle->object);
kmem_cache_free(flow_cachep, fle);
}
static void flow_cache_gc_task(struct work_struct *work)
{
struct list_head gc_list;
struct flow_cache_entry *fce, *n;
struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
flow_cache_gc_work);
INIT_LIST_HEAD(&gc_list);
spin_lock_bh(&xfrm->flow_cache_gc_lock);
list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list);
spin_unlock_bh(&xfrm->flow_cache_gc_lock);
list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) {
flow_entry_kill(fce, xfrm);
atomic_dec(&xfrm->flow_cache_gc_count);
}
}
static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
unsigned int deleted,
struct list_head *gc_list,
struct netns_xfrm *xfrm)
{
if (deleted) {
atomic_add(deleted, &xfrm->flow_cache_gc_count);
fcp->hash_count -= deleted;
spin_lock_bh(&xfrm->flow_cache_gc_lock);
list_splice_tail(gc_list, &xfrm->flow_cache_gc_list);
spin_unlock_bh(&xfrm->flow_cache_gc_lock);
schedule_work(&xfrm->flow_cache_gc_work);
}
}
static void __flow_cache_shrink(struct flow_cache *fc,
struct flow_cache_percpu *fcp,
unsigned int shrink_to)
{
struct flow_cache_entry *fle;
struct hlist_node *tmp;
LIST_HEAD(gc_list);
unsigned int deleted = 0;
struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
flow_cache_global);
unsigned int i;
for (i = 0; i < flow_cache_hash_size(fc); i++) {
unsigned int saved = 0;
hlist_for_each_entry_safe(fle, tmp,
&fcp->hash_table[i], u.hlist) {
if (saved < shrink_to &&
flow_entry_valid(fle, xfrm)) {
saved++;
} else {
deleted++;
hlist_del(&fle->u.hlist);
list_add_tail(&fle->u.gc_list, &gc_list);
}
}
}
flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm);
}
static void flow_cache_shrink(struct flow_cache *fc,
struct flow_cache_percpu *fcp)
{
unsigned int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
__flow_cache_shrink(fc, fcp, shrink_to);
}
static void flow_new_hash_rnd(struct flow_cache *fc,
struct flow_cache_percpu *fcp)
{
get_random_bytes(&fcp->hash_rnd, sizeof(u32));
fcp->hash_rnd_recalc = 0;
__flow_cache_shrink(fc, fcp, 0);
}
static u32 flow_hash_code(struct flow_cache *fc,
struct flow_cache_percpu *fcp,
const struct flowi *key,
unsigned int keysize)
{
const u32 *k = (const u32 *) key;
const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32);
return jhash2(k, length, fcp->hash_rnd)
& (flow_cache_hash_size(fc) - 1);
}
/* I hear what you're saying, use memcmp. But memcmp cannot make
* important assumptions that we can here, such as alignment.
*/
static int flow_key_compare(const struct flowi *key1, const struct flowi *key2,
unsigned int keysize)
{
const flow_compare_t *k1, *k1_lim, *k2;
k1 = (const flow_compare_t *) key1;
k1_lim = k1 + keysize;
k2 = (const flow_compare_t *) key2;
do {
if (*k1++ != *k2++)
return 1;
} while (k1 < k1_lim);
return 0;
}
struct flow_cache_object *
flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
flow_resolve_t resolver, void *ctx)
{
struct flow_cache *fc = &net->xfrm.flow_cache_global;
struct flow_cache_percpu *fcp;
struct flow_cache_entry *fle, *tfle;
struct flow_cache_object *flo;
unsigned int keysize;
unsigned int hash;
local_bh_disable();
fcp = this_cpu_ptr(fc->percpu);
fle = NULL;
flo = NULL;
keysize = flow_key_size(family);
if (!keysize)
goto nocache;
/* Packet really early in init? Making flow_cache_init a
* pre-smp initcall would solve this. --RR */
if (!fcp->hash_table)
goto nocache;
if (fcp->hash_rnd_recalc)
flow_new_hash_rnd(fc, fcp);
hash = flow_hash_code(fc, fcp, key, keysize);
hlist_for_each_entry(tfle, &fcp->hash_table[hash], u.hlist) {
if (tfle->net == net &&
tfle->family == family &&
tfle->dir == dir &&
flow_key_compare(key, &tfle->key, keysize) == 0) {
fle = tfle;
break;
}
}
if (unlikely(!fle)) {
if (fcp->hash_count > fc->high_watermark)
flow_cache_shrink(fc, fcp);
if (atomic_read(&net->xfrm.flow_cache_gc_count) >
2 * num_online_cpus() * fc->high_watermark) {
flo = ERR_PTR(-ENOBUFS);
goto ret_object;
}
fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
if (fle) {
fle->net = net;
fle->family = family;
fle->dir = dir;
memcpy(&fle->key, key, keysize * sizeof(flow_compare_t));
fle->object = NULL;
hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
fcp->hash_count++;
}
} else if (likely(fle->genid == atomic_read(&net->xfrm.flow_cache_genid))) {
flo = fle->object;
if (!flo)
goto ret_object;
flo = flo->ops->get(flo);
if (flo)
goto ret_object;
} else if (fle->object) {
flo = fle->object;
flo->ops->delete(flo);
fle->object = NULL;
}
nocache:
flo = NULL;
if (fle) {
flo = fle->object;
fle->object = NULL;
}
flo = resolver(net, key, family, dir, flo, ctx);
if (fle) {
fle->genid = atomic_read(&net->xfrm.flow_cache_genid);
if (!IS_ERR(flo))
fle->object = flo;
else
fle->genid--;
} else {
if (!IS_ERR_OR_NULL(flo))
flo->ops->delete(flo);
}
ret_object:
local_bh_enable();
return flo;
}
EXPORT_SYMBOL(flow_cache_lookup);
static void flow_cache_flush_tasklet(unsigned long data)
{
struct flow_flush_info *info = (void *)data;
struct flow_cache *fc = info->cache;
struct flow_cache_percpu *fcp;
struct flow_cache_entry *fle;
struct hlist_node *tmp;
LIST_HEAD(gc_list);
unsigned int deleted = 0;
struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
flow_cache_global);
unsigned int i;
fcp = this_cpu_ptr(fc->percpu);
for (i = 0; i < flow_cache_hash_size(fc); i++) {
hlist_for_each_entry_safe(fle, tmp,
&fcp->hash_table[i], u.hlist) {
if (flow_entry_valid(fle, xfrm))
continue;
deleted++;
hlist_del(&fle->u.hlist);
list_add_tail(&fle->u.gc_list, &gc_list);
}
}
flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm);
if (atomic_dec_and_test(&info->cpuleft))
complete(&info->completion);
}
/*
* Return whether a cpu needs flushing. Conservatively, we assume
* the presence of any entries means the core may require flushing,
* since the flow_cache_ops.check() function may assume it's running
* on the same core as the per-cpu cache component.
*/
static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu)
{
struct flow_cache_percpu *fcp;
unsigned int i;
fcp = per_cpu_ptr(fc->percpu, cpu);
for (i = 0; i < flow_cache_hash_size(fc); i++)
if (!hlist_empty(&fcp->hash_table[i]))
return 0;
return 1;
}
static void flow_cache_flush_per_cpu(void *data)
{
struct flow_flush_info *info = data;
struct tasklet_struct *tasklet;
tasklet = &this_cpu_ptr(info->cache->percpu)->flush_tasklet;
tasklet->data = (unsigned long)info;
tasklet_schedule(tasklet);
}
void flow_cache_flush(struct net *net)
{
struct flow_flush_info info;
cpumask_var_t mask;
int i, self;
/* Track which cpus need flushing to avoid disturbing all cores. */
if (!alloc_cpumask_var(&mask, GFP_KERNEL))
return;
cpumask_clear(mask);
/* Don't want cpus going down or up during this. */
get_online_cpus();
mutex_lock(&net->xfrm.flow_flush_sem);
info.cache = &net->xfrm.flow_cache_global;
for_each_online_cpu(i)
if (!flow_cache_percpu_empty(info.cache, i))
cpumask_set_cpu(i, mask);
atomic_set(&info.cpuleft, cpumask_weight(mask));
if (atomic_read(&info.cpuleft) == 0)
goto done;
init_completion(&info.completion);
local_bh_disable();
self = cpumask_test_and_clear_cpu(smp_processor_id(), mask);
on_each_cpu_mask(mask, flow_cache_flush_per_cpu, &info, 0);
if (self)
flow_cache_flush_tasklet((unsigned long)&info);
local_bh_enable();
wait_for_completion(&info.completion);
done:
mutex_unlock(&net->xfrm.flow_flush_sem);
put_online_cpus();
free_cpumask_var(mask);
}
static void flow_cache_flush_task(struct work_struct *work)
{
struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
flow_cache_flush_work);
struct net *net = container_of(xfrm, struct net, xfrm);
flow_cache_flush(net);
}
void flow_cache_flush_deferred(struct net *net)
{
schedule_work(&net->xfrm.flow_cache_flush_work);
}
static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
{
struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
unsigned int sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
if (!fcp->hash_table) {
fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
if (!fcp->hash_table) {
pr_err("NET: failed to allocate flow cache sz %u\n", sz);
return -ENOMEM;
}
fcp->hash_rnd_recalc = 1;
fcp->hash_count = 0;
tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
}
return 0;
}
static int flow_cache_cpu_up_prep(unsigned int cpu, struct hlist_node *node)
{
struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node);
return flow_cache_cpu_prepare(fc, cpu);
}
static int flow_cache_cpu_dead(unsigned int cpu, struct hlist_node *node)
{
struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node);
struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
__flow_cache_shrink(fc, fcp, 0);
return 0;
}
int flow_cache_init(struct net *net)
{
int i;
struct flow_cache *fc = &net->xfrm.flow_cache_global;
if (!flow_cachep)
flow_cachep = kmem_cache_create("flow_cache",
sizeof(struct flow_cache_entry),
0, SLAB_PANIC, NULL);
spin_lock_init(&net->xfrm.flow_cache_gc_lock);
INIT_LIST_HEAD(&net->xfrm.flow_cache_gc_list);
INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task);
INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task);
mutex_init(&net->xfrm.flow_flush_sem);
atomic_set(&net->xfrm.flow_cache_gc_count, 0);
fc->hash_shift = 10;
fc->low_watermark = 2 * flow_cache_hash_size(fc);
fc->high_watermark = 4 * flow_cache_hash_size(fc);
fc->percpu = alloc_percpu(struct flow_cache_percpu);
if (!fc->percpu)
return -ENOMEM;
if (cpuhp_state_add_instance(CPUHP_NET_FLOW_PREPARE, &fc->node))
goto err;
setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
(unsigned long) fc);
fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
add_timer(&fc->rnd_timer);
return 0;
err:
for_each_possible_cpu(i) {
struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i);
kfree(fcp->hash_table);
fcp->hash_table = NULL;
}
free_percpu(fc->percpu);
fc->percpu = NULL;
return -ENOMEM;
}
EXPORT_SYMBOL(flow_cache_init);
void flow_cache_fini(struct net *net)
{
int i;
struct flow_cache *fc = &net->xfrm.flow_cache_global;
del_timer_sync(&fc->rnd_timer);
cpuhp_state_remove_instance_nocalls(CPUHP_NET_FLOW_PREPARE, &fc->node);
for_each_possible_cpu(i) {
struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i);
kfree(fcp->hash_table);
fcp->hash_table = NULL;
}
free_percpu(fc->percpu);
fc->percpu = NULL;
}
EXPORT_SYMBOL(flow_cache_fini);
void __init flow_cache_hp_init(void)
{
int ret;
ret = cpuhp_setup_state_multi(CPUHP_NET_FLOW_PREPARE,
"net/flow:prepare",
flow_cache_cpu_up_prep,
flow_cache_cpu_dead);
WARN_ON(ret < 0);
}
...@@ -584,33 +584,6 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = { ...@@ -584,33 +584,6 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = {
.get_link_net = ip_tunnel_get_link_net, .get_link_net = ip_tunnel_get_link_net,
}; };
static bool is_vti_tunnel(const struct net_device *dev)
{
return dev->netdev_ops == &vti_netdev_ops;
}
static int vti_device_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct ip_tunnel *tunnel = netdev_priv(dev);
if (!is_vti_tunnel(dev))
return NOTIFY_DONE;
switch (event) {
case NETDEV_DOWN:
if (!net_eq(tunnel->net, dev_net(dev)))
xfrm_garbage_collect(tunnel->net);
break;
}
return NOTIFY_DONE;
}
static struct notifier_block vti_notifier_block __read_mostly = {
.notifier_call = vti_device_event,
};
static int __init vti_init(void) static int __init vti_init(void)
{ {
const char *msg; const char *msg;
...@@ -618,8 +591,6 @@ static int __init vti_init(void) ...@@ -618,8 +591,6 @@ static int __init vti_init(void)
pr_info("IPv4 over IPsec tunneling driver\n"); pr_info("IPv4 over IPsec tunneling driver\n");
register_netdevice_notifier(&vti_notifier_block);
msg = "tunnel device"; msg = "tunnel device";
err = register_pernet_device(&vti_net_ops); err = register_pernet_device(&vti_net_ops);
if (err < 0) if (err < 0)
...@@ -652,7 +623,6 @@ static int __init vti_init(void) ...@@ -652,7 +623,6 @@ static int __init vti_init(void)
xfrm_proto_esp_failed: xfrm_proto_esp_failed:
unregister_pernet_device(&vti_net_ops); unregister_pernet_device(&vti_net_ops);
pernet_dev_failed: pernet_dev_failed:
unregister_netdevice_notifier(&vti_notifier_block);
pr_err("vti init: failed to register %s\n", msg); pr_err("vti init: failed to register %s\n", msg);
return err; return err;
} }
...@@ -664,7 +634,6 @@ static void __exit vti_fini(void) ...@@ -664,7 +634,6 @@ static void __exit vti_fini(void)
xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
unregister_pernet_device(&vti_net_ops); unregister_pernet_device(&vti_net_ops);
unregister_netdevice_notifier(&vti_notifier_block);
} }
module_init(vti_init); module_init(vti_init);
......
...@@ -213,14 +213,6 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) ...@@ -213,14 +213,6 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
fl4->flowi4_tos = iph->tos; fl4->flowi4_tos = iph->tos;
} }
static inline int xfrm4_garbage_collect(struct dst_ops *ops)
{
struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops);
xfrm_garbage_collect_deferred(net);
return (dst_entries_get_slow(ops) > ops->gc_thresh * 2);
}
static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk, static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu) struct sk_buff *skb, u32 mtu)
{ {
...@@ -259,14 +251,13 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, ...@@ -259,14 +251,13 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
static struct dst_ops xfrm4_dst_ops_template = { static struct dst_ops xfrm4_dst_ops_template = {
.family = AF_INET, .family = AF_INET,
.gc = xfrm4_garbage_collect,
.update_pmtu = xfrm4_update_pmtu, .update_pmtu = xfrm4_update_pmtu,
.redirect = xfrm4_redirect, .redirect = xfrm4_redirect,
.cow_metrics = dst_cow_metrics_generic, .cow_metrics = dst_cow_metrics_generic,
.destroy = xfrm4_dst_destroy, .destroy = xfrm4_dst_destroy,
.ifdown = xfrm4_dst_ifdown, .ifdown = xfrm4_dst_ifdown,
.local_out = __ip_local_out, .local_out = __ip_local_out,
.gc_thresh = INT_MAX, .gc_thresh = 32768,
}; };
static const struct xfrm_policy_afinfo xfrm4_policy_afinfo = { static const struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
......
...@@ -1145,33 +1145,6 @@ static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = { ...@@ -1145,33 +1145,6 @@ static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = {
.priority = 100, .priority = 100,
}; };
static bool is_vti6_tunnel(const struct net_device *dev)
{
return dev->netdev_ops == &vti6_netdev_ops;
}
static int vti6_device_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct ip6_tnl *t = netdev_priv(dev);
if (!is_vti6_tunnel(dev))
return NOTIFY_DONE;
switch (event) {
case NETDEV_DOWN:
if (!net_eq(t->net, dev_net(dev)))
xfrm_garbage_collect(t->net);
break;
}
return NOTIFY_DONE;
}
static struct notifier_block vti6_notifier_block __read_mostly = {
.notifier_call = vti6_device_event,
};
/** /**
* vti6_tunnel_init - register protocol and reserve needed resources * vti6_tunnel_init - register protocol and reserve needed resources
* *
...@@ -1182,8 +1155,6 @@ static int __init vti6_tunnel_init(void) ...@@ -1182,8 +1155,6 @@ static int __init vti6_tunnel_init(void)
const char *msg; const char *msg;
int err; int err;
register_netdevice_notifier(&vti6_notifier_block);
msg = "tunnel device"; msg = "tunnel device";
err = register_pernet_device(&vti6_net_ops); err = register_pernet_device(&vti6_net_ops);
if (err < 0) if (err < 0)
...@@ -1216,7 +1187,6 @@ static int __init vti6_tunnel_init(void) ...@@ -1216,7 +1187,6 @@ static int __init vti6_tunnel_init(void)
xfrm_proto_esp_failed: xfrm_proto_esp_failed:
unregister_pernet_device(&vti6_net_ops); unregister_pernet_device(&vti6_net_ops);
pernet_dev_failed: pernet_dev_failed:
unregister_netdevice_notifier(&vti6_notifier_block);
pr_err("vti6 init: failed to register %s\n", msg); pr_err("vti6 init: failed to register %s\n", msg);
return err; return err;
} }
...@@ -1231,7 +1201,6 @@ static void __exit vti6_tunnel_cleanup(void) ...@@ -1231,7 +1201,6 @@ static void __exit vti6_tunnel_cleanup(void)
xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
unregister_pernet_device(&vti6_net_ops); unregister_pernet_device(&vti6_net_ops);
unregister_netdevice_notifier(&vti6_notifier_block);
} }
module_init(vti6_tunnel_init); module_init(vti6_tunnel_init);
......
...@@ -214,14 +214,6 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) ...@@ -214,14 +214,6 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
} }
} }
static inline int xfrm6_garbage_collect(struct dst_ops *ops)
{
struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops);
xfrm_garbage_collect_deferred(net);
return dst_entries_get_fast(ops) > ops->gc_thresh * 2;
}
static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk, static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu) struct sk_buff *skb, u32 mtu)
{ {
...@@ -279,14 +271,13 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, ...@@ -279,14 +271,13 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
static struct dst_ops xfrm6_dst_ops_template = { static struct dst_ops xfrm6_dst_ops_template = {
.family = AF_INET6, .family = AF_INET6,
.gc = xfrm6_garbage_collect,
.update_pmtu = xfrm6_update_pmtu, .update_pmtu = xfrm6_update_pmtu,
.redirect = xfrm6_redirect, .redirect = xfrm6_redirect,
.cow_metrics = dst_cow_metrics_generic, .cow_metrics = dst_cow_metrics_generic,
.destroy = xfrm6_dst_destroy, .destroy = xfrm6_dst_destroy,
.ifdown = xfrm6_dst_ifdown, .ifdown = xfrm6_dst_ifdown,
.local_out = __ip6_local_out, .local_out = __ip6_local_out,
.gc_thresh = INT_MAX, .gc_thresh = 32768,
}; };
static const struct xfrm_policy_afinfo xfrm6_policy_afinfo = { static const struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
......
...@@ -2398,8 +2398,6 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa ...@@ -2398,8 +2398,6 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
out: out:
xfrm_pol_put(xp); xfrm_pol_put(xp);
if (err == 0)
xfrm_garbage_collect(net);
return err; return err;
} }
...@@ -2650,8 +2648,6 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_ ...@@ -2650,8 +2648,6 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_
out: out:
xfrm_pol_put(xp); xfrm_pol_put(xp);
if (delete && err == 0)
xfrm_garbage_collect(net);
return err; return err;
} }
...@@ -2751,8 +2747,6 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad ...@@ -2751,8 +2747,6 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad
int err, err2; int err, err2;
err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true); err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true);
if (!err)
xfrm_garbage_collect(net);
err2 = unicast_flush_resp(sk, hdr); err2 = unicast_flush_resp(sk, hdr);
if (err || err2) { if (err || err2) {
if (err == -ESRCH) /* empty table - old silent behavior */ if (err == -ESRCH) /* empty table - old silent behavior */
......
...@@ -153,6 +153,7 @@ static int xfrm_dev_register(struct net_device *dev) ...@@ -153,6 +153,7 @@ static int xfrm_dev_register(struct net_device *dev)
static int xfrm_dev_unregister(struct net_device *dev) static int xfrm_dev_unregister(struct net_device *dev)
{ {
xfrm_policy_cache_flush();
return NOTIFY_DONE; return NOTIFY_DONE;
} }
...@@ -175,8 +176,7 @@ static int xfrm_dev_down(struct net_device *dev) ...@@ -175,8 +176,7 @@ static int xfrm_dev_down(struct net_device *dev)
if (dev->features & NETIF_F_HW_ESP) if (dev->features & NETIF_F_HW_ESP)
xfrm_dev_state_flush(dev_net(dev), dev, true); xfrm_dev_state_flush(dev_net(dev), dev, true);
xfrm_garbage_collect(dev_net(dev)); xfrm_policy_cache_flush();
return NOTIFY_DONE; return NOTIFY_DONE;
} }
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <linux/netfilter.h> #include <linux/netfilter.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/cache.h> #include <linux/cache.h>
#include <linux/cpu.h>
#include <linux/audit.h> #include <linux/audit.h>
#include <net/dst.h> #include <net/dst.h>
#include <net/flow.h> #include <net/flow.h>
...@@ -44,6 +45,8 @@ struct xfrm_flo { ...@@ -44,6 +45,8 @@ struct xfrm_flo {
u8 flags; u8 flags;
}; };
static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst);
static struct work_struct *xfrm_pcpu_work __read_mostly;
static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1] static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
__read_mostly; __read_mostly;
...@@ -246,36 +249,6 @@ static void xfrm_policy_timer(unsigned long data) ...@@ -246,36 +249,6 @@ static void xfrm_policy_timer(unsigned long data)
xfrm_pol_put(xp); xfrm_pol_put(xp);
} }
static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
{
struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
if (unlikely(pol->walk.dead))
flo = NULL;
else
xfrm_pol_hold(pol);
return flo;
}
static int xfrm_policy_flo_check(struct flow_cache_object *flo)
{
struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
return !pol->walk.dead;
}
static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
{
xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
}
static const struct flow_cache_ops xfrm_policy_fc_ops = {
.get = xfrm_policy_flo_get,
.check = xfrm_policy_flo_check,
.delete = xfrm_policy_flo_delete,
};
/* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
* SPD calls. * SPD calls.
*/ */
...@@ -298,7 +271,6 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) ...@@ -298,7 +271,6 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
(unsigned long)policy); (unsigned long)policy);
setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process, setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process,
(unsigned long)policy); (unsigned long)policy);
policy->flo.ops = &xfrm_policy_fc_ops;
} }
return policy; return policy;
} }
...@@ -798,7 +770,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) ...@@ -798,7 +770,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
else else
hlist_add_head(&policy->bydst, chain); hlist_add_head(&policy->bydst, chain);
__xfrm_policy_link(policy, dir); __xfrm_policy_link(policy, dir);
atomic_inc(&net->xfrm.flow_cache_genid);
/* After previous checking, family can either be AF_INET or AF_INET6 */ /* After previous checking, family can either be AF_INET or AF_INET6 */
if (policy->family == AF_INET) if (policy->family == AF_INET)
...@@ -1004,6 +975,8 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) ...@@ -1004,6 +975,8 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
} }
if (!cnt) if (!cnt)
err = -ESRCH; err = -ESRCH;
else
xfrm_policy_cache_flush();
out: out:
spin_unlock_bh(&net->xfrm.xfrm_policy_lock); spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
return err; return err;
...@@ -1175,7 +1148,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, ...@@ -1175,7 +1148,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
} }
static struct xfrm_policy * static struct xfrm_policy *
__xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir) xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
{ {
#ifdef CONFIG_XFRM_SUB_POLICY #ifdef CONFIG_XFRM_SUB_POLICY
struct xfrm_policy *pol; struct xfrm_policy *pol;
...@@ -1187,61 +1160,6 @@ __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir ...@@ -1187,61 +1160,6 @@ __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir
return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
} }
static int flow_to_policy_dir(int dir)
{
if (XFRM_POLICY_IN == FLOW_DIR_IN &&
XFRM_POLICY_OUT == FLOW_DIR_OUT &&
XFRM_POLICY_FWD == FLOW_DIR_FWD)
return dir;
switch (dir) {
default:
case FLOW_DIR_IN:
return XFRM_POLICY_IN;
case FLOW_DIR_OUT:
return XFRM_POLICY_OUT;
case FLOW_DIR_FWD:
return XFRM_POLICY_FWD;
}
}
static struct flow_cache_object *
xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
u8 dir, struct flow_cache_object *old_obj, void *ctx)
{
struct xfrm_policy *pol;
if (old_obj)
xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir));
if (IS_ERR_OR_NULL(pol))
return ERR_CAST(pol);
/* Resolver returns two references:
* one for cache and one for caller of flow_cache_lookup() */
xfrm_pol_hold(pol);
return &pol->flo;
}
static inline int policy_to_flow_dir(int dir)
{
if (XFRM_POLICY_IN == FLOW_DIR_IN &&
XFRM_POLICY_OUT == FLOW_DIR_OUT &&
XFRM_POLICY_FWD == FLOW_DIR_FWD)
return dir;
switch (dir) {
default:
case XFRM_POLICY_IN:
return FLOW_DIR_IN;
case XFRM_POLICY_OUT:
return FLOW_DIR_OUT;
case XFRM_POLICY_FWD:
return FLOW_DIR_FWD;
}
}
static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
const struct flowi *fl, u16 family) const struct flowi *fl, u16 family)
{ {
...@@ -1261,7 +1179,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, ...@@ -1261,7 +1179,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
} }
err = security_xfrm_policy_lookup(pol->security, err = security_xfrm_policy_lookup(pol->security,
fl->flowi_secid, fl->flowi_secid,
policy_to_flow_dir(dir)); dir);
if (!err) { if (!err) {
if (!xfrm_pol_hold_rcu(pol)) if (!xfrm_pol_hold_rcu(pol))
goto again; goto again;
...@@ -1545,58 +1463,6 @@ static int xfrm_get_tos(const struct flowi *fl, int family) ...@@ -1545,58 +1463,6 @@ static int xfrm_get_tos(const struct flowi *fl, int family)
return tos; return tos;
} }
static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
{
struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
struct dst_entry *dst = &xdst->u.dst;
if (xdst->route == NULL) {
/* Dummy bundle - if it has xfrms we were not
* able to build bundle as template resolution failed.
* It means we need to try again resolving. */
if (xdst->num_xfrms > 0)
return NULL;
} else if (dst->flags & DST_XFRM_QUEUE) {
return NULL;
} else {
/* Real bundle */
if (stale_bundle(dst))
return NULL;
}
dst_hold(dst);
return flo;
}
static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
{
struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
struct dst_entry *dst = &xdst->u.dst;
if (!xdst->route)
return 0;
if (stale_bundle(dst))
return 0;
return 1;
}
static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
{
struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
struct dst_entry *dst = &xdst->u.dst;
/* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
dst->obsolete = DST_OBSOLETE_DEAD;
dst_release_immediate(dst);
}
static const struct flow_cache_ops xfrm_bundle_fc_ops = {
.get = xfrm_bundle_flo_get,
.check = xfrm_bundle_flo_check,
.delete = xfrm_bundle_flo_delete,
};
static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
{ {
const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
...@@ -1624,7 +1490,6 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) ...@@ -1624,7 +1490,6 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
struct dst_entry *dst = &xdst->u.dst; struct dst_entry *dst = &xdst->u.dst;
memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst)); memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
xdst->flo.ops = &xfrm_bundle_fc_ops;
} else } else
xdst = ERR_PTR(-ENOBUFS); xdst = ERR_PTR(-ENOBUFS);
...@@ -1840,6 +1705,102 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family, ...@@ -1840,6 +1705,102 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
} }
static void xfrm_last_dst_update(struct xfrm_dst *xdst, struct xfrm_dst *old)
{
this_cpu_write(xfrm_last_dst, xdst);
if (old)
dst_release(&old->u.dst);
}
static void __xfrm_pcpu_work_fn(void)
{
struct xfrm_dst *old;
old = this_cpu_read(xfrm_last_dst);
if (old && !xfrm_bundle_ok(old))
xfrm_last_dst_update(NULL, old);
}
static void xfrm_pcpu_work_fn(struct work_struct *work)
{
local_bh_disable();
rcu_read_lock();
__xfrm_pcpu_work_fn();
rcu_read_unlock();
local_bh_enable();
}
void xfrm_policy_cache_flush(void)
{
struct xfrm_dst *old;
bool found = 0;
int cpu;
local_bh_disable();
rcu_read_lock();
for_each_possible_cpu(cpu) {
old = per_cpu(xfrm_last_dst, cpu);
if (old && !xfrm_bundle_ok(old)) {
if (smp_processor_id() == cpu) {
__xfrm_pcpu_work_fn();
continue;
}
found = true;
break;
}
}
rcu_read_unlock();
local_bh_enable();
if (!found)
return;
get_online_cpus();
for_each_possible_cpu(cpu) {
bool bundle_release;
rcu_read_lock();
old = per_cpu(xfrm_last_dst, cpu);
bundle_release = old && !xfrm_bundle_ok(old);
rcu_read_unlock();
if (!bundle_release)
continue;
if (cpu_online(cpu)) {
schedule_work_on(cpu, &xfrm_pcpu_work[cpu]);
continue;
}
rcu_read_lock();
old = per_cpu(xfrm_last_dst, cpu);
if (old && !xfrm_bundle_ok(old)) {
per_cpu(xfrm_last_dst, cpu) = NULL;
dst_release(&old->u.dst);
}
rcu_read_unlock();
}
put_online_cpus();
}
static bool xfrm_pol_dead(struct xfrm_dst *xdst)
{
unsigned int num_pols = xdst->num_pols;
unsigned int pol_dead = 0, i;
for (i = 0; i < num_pols; i++)
pol_dead |= xdst->pols[i]->walk.dead;
/* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
if (pol_dead)
xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
return pol_dead;
}
static struct xfrm_dst * static struct xfrm_dst *
xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
const struct flowi *fl, u16 family, const struct flowi *fl, u16 family,
...@@ -1847,10 +1808,22 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, ...@@ -1847,10 +1808,22 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
{ {
struct net *net = xp_net(pols[0]); struct net *net = xp_net(pols[0]);
struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
struct xfrm_dst *xdst, *old;
struct dst_entry *dst; struct dst_entry *dst;
struct xfrm_dst *xdst;
int err; int err;
xdst = this_cpu_read(xfrm_last_dst);
if (xdst &&
xdst->u.dst.dev == dst_orig->dev &&
xdst->num_pols == num_pols &&
!xfrm_pol_dead(xdst) &&
memcmp(xdst->pols, pols,
sizeof(struct xfrm_policy *) * num_pols) == 0) {
dst_hold(&xdst->u.dst);
return xdst;
}
old = xdst;
/* Try to instantiate a bundle */ /* Try to instantiate a bundle */
err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
if (err <= 0) { if (err <= 0) {
...@@ -1871,6 +1844,9 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, ...@@ -1871,6 +1844,9 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
xdst->policy_genid = atomic_read(&pols[0]->genid); xdst->policy_genid = atomic_read(&pols[0]->genid);
atomic_set(&xdst->u.dst.__refcnt, 2);
xfrm_last_dst_update(xdst, old);
return xdst; return xdst;
} }
...@@ -2051,45 +2027,17 @@ static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, ...@@ -2051,45 +2027,17 @@ static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
goto out; goto out;
} }
static struct flow_cache_object * static struct xfrm_dst *
xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct xfrm_flo *xflo)
struct flow_cache_object *oldflo, void *ctx)
{ {
struct xfrm_flo *xflo = (struct xfrm_flo *)ctx;
struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
struct xfrm_dst *xdst, *new_xdst; int num_pols = 0, num_xfrms = 0, err;
int num_pols = 0, num_xfrms = 0, i, err, pol_dead; struct xfrm_dst *xdst;
/* Check if the policies from old bundle are usable */
xdst = NULL;
if (oldflo) {
xdst = container_of(oldflo, struct xfrm_dst, flo);
num_pols = xdst->num_pols;
num_xfrms = xdst->num_xfrms;
pol_dead = 0;
for (i = 0; i < num_pols; i++) {
pols[i] = xdst->pols[i];
pol_dead |= pols[i]->walk.dead;
}
if (pol_dead) {
/* Mark DST_OBSOLETE_DEAD to fail the next
* xfrm_dst_check()
*/
xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
dst_release_immediate(&xdst->u.dst);
xdst = NULL;
num_pols = 0;
num_xfrms = 0;
oldflo = NULL;
}
}
/* Resolve policies to use if we couldn't get them from /* Resolve policies to use if we couldn't get them from
* previous cache entry */ * previous cache entry */
if (xdst == NULL) {
num_pols = 1; num_pols = 1;
pols[0] = __xfrm_policy_lookup(net, fl, family, pols[0] = xfrm_policy_lookup(net, fl, family, dir);
flow_to_policy_dir(dir));
err = xfrm_expand_policies(fl, family, pols, err = xfrm_expand_policies(fl, family, pols,
&num_pols, &num_xfrms); &num_pols, &num_xfrms);
if (err < 0) if (err < 0)
...@@ -2098,39 +2046,20 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, ...@@ -2098,39 +2046,20 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
return NULL; return NULL;
if (num_xfrms <= 0) if (num_xfrms <= 0)
goto make_dummy_bundle; goto make_dummy_bundle;
}
new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
xflo->dst_orig); xflo->dst_orig);
if (IS_ERR(new_xdst)) { if (IS_ERR(xdst)) {
err = PTR_ERR(new_xdst); err = PTR_ERR(xdst);
if (err != -EAGAIN) if (err != -EAGAIN)
goto error; goto error;
if (oldflo == NULL)
goto make_dummy_bundle; goto make_dummy_bundle;
dst_hold(&xdst->u.dst); } else if (xdst == NULL) {
return oldflo;
} else if (new_xdst == NULL) {
num_xfrms = 0; num_xfrms = 0;
if (oldflo == NULL)
goto make_dummy_bundle; goto make_dummy_bundle;
xdst->num_xfrms = 0;
dst_hold(&xdst->u.dst);
return oldflo;
}
/* Kill the previous bundle */
if (xdst) {
/* The policies were stolen for newly generated bundle */
xdst->num_pols = 0;
/* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
dst_release_immediate(&xdst->u.dst);
} }
/* We do need to return one reference for original caller */ return xdst;
dst_hold(&new_xdst->u.dst);
return &new_xdst->flo;
make_dummy_bundle: make_dummy_bundle:
/* We found policies, but there's no bundles to instantiate: /* We found policies, but there's no bundles to instantiate:
...@@ -2146,16 +2075,11 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, ...@@ -2146,16 +2075,11 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
dst_hold(&xdst->u.dst); dst_hold(&xdst->u.dst);
return &xdst->flo; return xdst;
inc_error: inc_error:
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
error: error:
if (xdst != NULL) {
/* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
dst_release_immediate(&xdst->u.dst);
} else
xfrm_pols_put(pols, num_pols); xfrm_pols_put(pols, num_pols);
return ERR_PTR(err); return ERR_PTR(err);
} }
...@@ -2187,11 +2111,10 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, ...@@ -2187,11 +2111,10 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
const struct sock *sk, int flags) const struct sock *sk, int flags)
{ {
struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
struct flow_cache_object *flo;
struct xfrm_dst *xdst; struct xfrm_dst *xdst;
struct dst_entry *dst, *route; struct dst_entry *dst, *route;
u16 family = dst_orig->ops->family; u16 family = dst_orig->ops->family;
u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); u8 dir = XFRM_POLICY_OUT;
int i, err, num_pols, num_xfrms = 0, drop_pols = 0; int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
dst = NULL; dst = NULL;
...@@ -2242,15 +2165,13 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, ...@@ -2242,15 +2165,13 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
!net->xfrm.policy_count[XFRM_POLICY_OUT]) !net->xfrm.policy_count[XFRM_POLICY_OUT])
goto nopol; goto nopol;
flo = flow_cache_lookup(net, fl, family, dir, xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo);
xfrm_bundle_lookup, &xflo); if (xdst == NULL)
if (flo == NULL)
goto nopol; goto nopol;
if (IS_ERR(flo)) { if (IS_ERR(xdst)) {
err = PTR_ERR(flo); err = PTR_ERR(xdst);
goto dropdst; goto dropdst;
} }
xdst = container_of(flo, struct xfrm_dst, flo);
num_pols = xdst->num_pols; num_pols = xdst->num_pols;
num_xfrms = xdst->num_xfrms; num_xfrms = xdst->num_xfrms;
...@@ -2449,12 +2370,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, ...@@ -2449,12 +2370,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
int pi; int pi;
int reverse; int reverse;
struct flowi fl; struct flowi fl;
u8 fl_dir;
int xerr_idx = -1; int xerr_idx = -1;
reverse = dir & ~XFRM_POLICY_MASK; reverse = dir & ~XFRM_POLICY_MASK;
dir &= XFRM_POLICY_MASK; dir &= XFRM_POLICY_MASK;
fl_dir = policy_to_flow_dir(dir);
if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) { if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
...@@ -2486,16 +2405,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, ...@@ -2486,16 +2405,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
} }
} }
if (!pol) { if (!pol)
struct flow_cache_object *flo; pol = xfrm_policy_lookup(net, &fl, family, dir);
flo = flow_cache_lookup(net, &fl, family, fl_dir,
xfrm_policy_lookup, NULL);
if (IS_ERR_OR_NULL(flo))
pol = ERR_CAST(flo);
else
pol = container_of(flo, struct xfrm_policy, flo);
}
if (IS_ERR(pol)) { if (IS_ERR(pol)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
...@@ -2641,11 +2552,9 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) ...@@ -2641,11 +2552,9 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
* notice. That's what we are validating here via the * notice. That's what we are validating here via the
* stale_bundle() check. * stale_bundle() check.
* *
* When an xdst is removed from flow cache, DST_OBSOLETE_DEAD will
* be marked on it.
* When a dst is removed from the fib tree, DST_OBSOLETE_DEAD will * When a dst is removed from the fib tree, DST_OBSOLETE_DEAD will
* be marked on it. * be marked on it.
* Both will force stable_bundle() to fail on any xdst bundle with * This will force stale_bundle() to fail on any xdst bundle with
* this dst linked in it. * this dst linked in it.
*/ */
if (dst->obsolete < 0 && !stale_bundle(dst)) if (dst->obsolete < 0 && !stale_bundle(dst))
...@@ -2685,18 +2594,6 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) ...@@ -2685,18 +2594,6 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
return dst; return dst;
} }
void xfrm_garbage_collect(struct net *net)
{
flow_cache_flush(net);
}
EXPORT_SYMBOL(xfrm_garbage_collect);
void xfrm_garbage_collect_deferred(struct net *net)
{
flow_cache_flush_deferred(net);
}
EXPORT_SYMBOL(xfrm_garbage_collect_deferred);
static void xfrm_init_pmtu(struct dst_entry *dst) static void xfrm_init_pmtu(struct dst_entry *dst)
{ {
do { do {
...@@ -3034,14 +2931,9 @@ static int __net_init xfrm_net_init(struct net *net) ...@@ -3034,14 +2931,9 @@ static int __net_init xfrm_net_init(struct net *net)
rv = xfrm_sysctl_init(net); rv = xfrm_sysctl_init(net);
if (rv < 0) if (rv < 0)
goto out_sysctl; goto out_sysctl;
rv = flow_cache_init(net);
if (rv < 0)
goto out;
return 0; return 0;
out:
xfrm_sysctl_fini(net);
out_sysctl: out_sysctl:
xfrm_policy_fini(net); xfrm_policy_fini(net);
out_policy: out_policy:
...@@ -3054,7 +2946,6 @@ static int __net_init xfrm_net_init(struct net *net) ...@@ -3054,7 +2946,6 @@ static int __net_init xfrm_net_init(struct net *net)
static void __net_exit xfrm_net_exit(struct net *net) static void __net_exit xfrm_net_exit(struct net *net)
{ {
flow_cache_fini(net);
xfrm_sysctl_fini(net); xfrm_sysctl_fini(net);
xfrm_policy_fini(net); xfrm_policy_fini(net);
xfrm_state_fini(net); xfrm_state_fini(net);
...@@ -3068,7 +2959,15 @@ static struct pernet_operations __net_initdata xfrm_net_ops = { ...@@ -3068,7 +2959,15 @@ static struct pernet_operations __net_initdata xfrm_net_ops = {
void __init xfrm_init(void) void __init xfrm_init(void)
{ {
flow_cache_hp_init(); int i;
xfrm_pcpu_work = kmalloc_array(NR_CPUS, sizeof(*xfrm_pcpu_work),
GFP_KERNEL);
BUG_ON(!xfrm_pcpu_work);
for (i = 0; i < NR_CPUS; i++)
INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn);
register_pernet_subsys(&xfrm_net_ops); register_pernet_subsys(&xfrm_net_ops);
seqcount_init(&xfrm_policy_hash_generation); seqcount_init(&xfrm_policy_hash_generation);
xfrm_input_init(); xfrm_input_init();
......
...@@ -724,9 +724,10 @@ int xfrm_state_flush(struct net *net, u8 proto, bool task_valid) ...@@ -724,9 +724,10 @@ int xfrm_state_flush(struct net *net, u8 proto, bool task_valid)
} }
} }
} }
if (cnt) if (cnt) {
err = 0; err = 0;
xfrm_policy_cache_flush();
}
out: out:
spin_unlock_bh(&net->xfrm.xfrm_state_lock); spin_unlock_bh(&net->xfrm.xfrm_state_lock);
return err; return err;
......
...@@ -1815,8 +1815,6 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, ...@@ -1815,8 +1815,6 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
out: out:
xfrm_pol_put(xp); xfrm_pol_put(xp);
if (delete && err == 0)
xfrm_garbage_collect(net);
return err; return err;
} }
...@@ -2027,7 +2025,6 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, ...@@ -2027,7 +2025,6 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
return 0; return 0;
return err; return err;
} }
xfrm_garbage_collect(net);
c.data.type = type; c.data.type = type;
c.event = nlh->nlmsg_type; c.event = nlh->nlmsg_type;
......
...@@ -47,10 +47,8 @@ static inline void selinux_xfrm_notify_policyload(void) ...@@ -47,10 +47,8 @@ static inline void selinux_xfrm_notify_policyload(void)
struct net *net; struct net *net;
rtnl_lock(); rtnl_lock();
for_each_net(net) { for_each_net(net)
atomic_inc(&net->xfrm.flow_cache_genid);
rt_genid_bump_all(net); rt_genid_bump_all(net);
}
rtnl_unlock(); rtnl_unlock();
} }
#else #else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment