Commit a80886e4 authored by David S. Miller's avatar David S. Miller

Merge branch 'ipv6-Move-exceptions-to-fib6_nh-and-make-it-optional-in-a-fib6_info'

David Ahern says:

====================
ipv6: Move exceptions to fib6_nh and make it optional in a fib6_info

Patches 1 and 4 move pcpu and exception caches from fib6_info to fib6_nh.
With respect to the current FIB entries this is only a movement from one
struct to another contained within the first.

Patch 2 refactors the core logic of fib6_drop_pcpu_from into a helper
that is invoked per fib6_nh.

Patch 3 refactors exception handling in a similar way - creating a bunch
of helpers that can be invoked per fib6_nh with the goal of making patch
4 easier to review as well as creating the code needed for nexthop
objects.

Patch 5 makes a fib6_nh at the end of a fib6_info an array similar to
IPv4 and its fib_info. For the current fib entry model, all fib6_info
will have a fib6_nh allocated for it.

Patch 6 refactors ip6_route_del moving the code for deleting an
exception entry into a new function.

Patch 7 adds tests for redirect route exceptions. The new test was
written against 5.1 (before any of the nexthop refactoring). It and the
pmtu.sh selftest exercise the exception code paths - from creating
exceptions to cleaning them up on device delete. All tests pass without
any rcu locking or memleak warnings.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents daeceb2d ec810535
......@@ -2886,7 +2886,7 @@ mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
return false;
list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
struct fib6_nh *fib6_nh = &mlxsw_sp_rt6->rt->fib6_nh;
struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
struct in6_addr *gw;
int ifindex, weight;
......@@ -2958,7 +2958,7 @@ mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
struct net_device *dev;
list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
dev = mlxsw_sp_rt6->rt->fib6_nh.fib_nh_dev;
dev = mlxsw_sp_rt6->rt->fib6_nh->fib_nh_dev;
val ^= dev->ifindex;
}
......@@ -3960,9 +3960,9 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
struct fib6_info *rt = mlxsw_sp_rt6->rt;
if (nh->rif && nh->rif->dev == rt->fib6_nh.fib_nh_dev &&
if (nh->rif && nh->rif->dev == rt->fib6_nh->fib_nh_dev &&
ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
&rt->fib6_nh.fib_nh_gw6))
&rt->fib6_nh->fib_nh_gw6))
return nh;
continue;
}
......@@ -4022,13 +4022,13 @@ mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE) {
list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
list)->rt->fib6_nh.fib_nh_flags |= RTNH_F_OFFLOAD;
list)->rt->fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
return;
}
list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
struct fib6_nh *fib6_nh = &mlxsw_sp_rt6->rt->fib6_nh;
struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
struct mlxsw_sp_nexthop *nh;
nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
......@@ -4050,7 +4050,7 @@ mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
struct fib6_info *rt = mlxsw_sp_rt6->rt;
rt->fib6_nh.fib_nh_flags &= ~RTNH_F_OFFLOAD;
rt->fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
}
}
......@@ -4928,7 +4928,8 @@ static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
{
/* RTF_CACHE routes are ignored */
return !(rt->fib6_flags & RTF_ADDRCONF) && rt->fib6_nh.fib_nh_gw_family;
return !(rt->fib6_flags & RTF_ADDRCONF) &&
rt->fib6_nh->fib_nh_gw_family;
}
static struct fib6_info *
......@@ -4987,8 +4988,8 @@ static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
const struct fib6_info *rt,
enum mlxsw_sp_ipip_type *ret)
{
return rt->fib6_nh.fib_nh_dev &&
mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.fib_nh_dev, ret);
return rt->fib6_nh->fib_nh_dev &&
mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh->fib_nh_dev, ret);
}
static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
......@@ -4998,7 +4999,7 @@ static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
{
const struct mlxsw_sp_ipip_ops *ipip_ops;
struct mlxsw_sp_ipip_entry *ipip_entry;
struct net_device *dev = rt->fib6_nh.fib_nh_dev;
struct net_device *dev = rt->fib6_nh->fib_nh_dev;
struct mlxsw_sp_rif *rif;
int err;
......@@ -5041,11 +5042,11 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop *nh,
const struct fib6_info *rt)
{
struct net_device *dev = rt->fib6_nh.fib_nh_dev;
struct net_device *dev = rt->fib6_nh->fib_nh_dev;
nh->nh_grp = nh_grp;
nh->nh_weight = rt->fib6_nh.fib_nh_weight;
memcpy(&nh->gw_addr, &rt->fib6_nh.fib_nh_gw6, sizeof(nh->gw_addr));
nh->nh_weight = rt->fib6_nh->fib_nh_weight;
memcpy(&nh->gw_addr, &rt->fib6_nh->fib_nh_gw6, sizeof(nh->gw_addr));
mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
......@@ -5068,7 +5069,7 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
const struct fib6_info *rt)
{
return rt->fib6_nh.fib_nh_gw_family ||
return rt->fib6_nh->fib_nh_gw_family ||
mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
}
......
......@@ -131,6 +131,9 @@ struct fib6_nh {
#ifdef CONFIG_IPV6_ROUTER_PREF
unsigned long last_probe;
#endif
struct rt6_info * __percpu *rt6i_pcpu;
struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
};
struct fib6_info {
......@@ -156,22 +159,18 @@ struct fib6_info {
struct rt6key fib6_src;
struct rt6key fib6_prefsrc;
struct rt6_info * __percpu *rt6i_pcpu;
struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
u32 fib6_metric;
u8 fib6_protocol;
u8 fib6_type;
u8 exception_bucket_flushed:1,
should_flush:1,
u8 should_flush:1,
dst_nocount:1,
dst_nopolicy:1,
dst_host:1,
fib6_destroying:1,
unused:2;
unused:3;
struct fib6_nh fib6_nh;
struct rcu_head rcu;
struct fib6_nh fib6_nh[0];
};
struct rt6_info {
......@@ -281,7 +280,7 @@ static inline void ip6_rt_put(struct rt6_info *rt)
dst_release(&rt->dst);
}
struct fib6_info *fib6_info_alloc(gfp_t gfp_flags);
struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh);
void fib6_info_destroy_rcu(struct rcu_head *head);
static inline void fib6_info_hold(struct fib6_info *f6i)
......@@ -444,7 +443,7 @@ void rt6_get_prefsrc(const struct rt6_info *rt, struct in6_addr *addr)
static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i)
{
return f6i->fib6_nh.fib_nh_dev;
return f6i->fib6_nh->fib_nh_dev;
}
int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
......
......@@ -70,7 +70,7 @@ static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i)
{
/* the RTF_ADDRCONF flag filters out RA's */
return !(f6i->fib6_flags & RTF_ADDRCONF) &&
f6i->fib6_nh.fib_nh_gw_family;
f6i->fib6_nh->fib_nh_gw_family;
}
void ip6_route_input(struct sk_buff *skb);
......@@ -275,7 +275,7 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt,
static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b)
{
struct fib6_nh *nha = &a->fib6_nh, *nhb = &b->fib6_nh;
struct fib6_nh *nha = a->fib6_nh, *nhb = b->fib6_nh;
return nha->fib_nh_dev == nhb->fib_nh_dev &&
ipv6_addr_equal(&nha->fib_nh_gw6, &nhb->fib_nh_gw6) &&
......
......@@ -2421,9 +2421,9 @@ static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
goto out;
for_each_fib6_node_rt_rcu(fn) {
if (rt->fib6_nh.fib_nh_dev->ifindex != dev->ifindex)
if (rt->fib6_nh->fib_nh_dev->ifindex != dev->ifindex)
continue;
if (no_gw && rt->fib6_nh.fib_nh_gw_family)
if (no_gw && rt->fib6_nh->fib_nh_gw_family)
continue;
if ((rt->fib6_flags & flags) != flags)
continue;
......@@ -6341,16 +6341,16 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
list_for_each_entry(ifa, &idev->addr_list, if_list) {
spin_lock(&ifa->lock);
if (ifa->rt) {
struct fib6_info *rt = ifa->rt;
struct fib6_nh *nh = ifa->rt->fib6_nh;
int cpu;
rcu_read_lock();
ifa->rt->dst_nopolicy = val ? true : false;
if (rt->rt6i_pcpu) {
if (nh->rt6i_pcpu) {
for_each_possible_cpu(cpu) {
struct rt6_info **rtp;
rtp = per_cpu_ptr(rt->rt6i_pcpu, cpu);
rtp = per_cpu_ptr(nh->rt6i_pcpu, cpu);
addrconf_set_nopolicy(*rtp, val);
}
}
......
......@@ -147,20 +147,18 @@ static __be32 addr_bit_set(const void *token, int fn_bit)
addr[fn_bit >> 5];
}
struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
{
struct fib6_info *f6i;
size_t sz = sizeof(*f6i);
f6i = kzalloc(sizeof(*f6i), gfp_flags);
if (with_fib6_nh)
sz += sizeof(struct fib6_nh);
f6i = kzalloc(sz, gfp_flags);
if (!f6i)
return NULL;
f6i->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
if (!f6i->rt6i_pcpu) {
kfree(f6i);
return NULL;
}
INIT_LIST_HEAD(&f6i->fib6_siblings);
refcount_set(&f6i->fib6_ref, 1);
......@@ -170,36 +168,11 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
void fib6_info_destroy_rcu(struct rcu_head *head)
{
struct fib6_info *f6i = container_of(head, struct fib6_info, rcu);
struct rt6_exception_bucket *bucket;
WARN_ON(f6i->fib6_node);
bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, 1);
kfree(bucket);
if (f6i->rt6i_pcpu) {
int cpu;
for_each_possible_cpu(cpu) {
struct rt6_info **ppcpu_rt;
struct rt6_info *pcpu_rt;
ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu);
pcpu_rt = *ppcpu_rt;
if (pcpu_rt) {
dst_dev_put(&pcpu_rt->dst);
dst_release(&pcpu_rt->dst);
*ppcpu_rt = NULL;
}
}
free_percpu(f6i->rt6i_pcpu);
}
fib6_nh_release(&f6i->fib6_nh);
fib6_nh_release(f6i->fib6_nh);
ip_fib_metrics_put(f6i->fib6_metrics);
kfree(f6i);
}
EXPORT_SYMBOL_GPL(fib6_info_destroy_rcu);
......@@ -899,16 +872,14 @@ static struct fib6_node *fib6_add_1(struct net *net,
return ln;
}
static void fib6_drop_pcpu_from(struct fib6_info *f6i,
const struct fib6_table *table)
static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
const struct fib6_info *match,
const struct fib6_table *table)
{
int cpu;
/* Make sure rt6_make_pcpu_route() wont add other percpu routes
* while we are cleaning them here.
*/
f6i->fib6_destroying = 1;
mb(); /* paired with the cmpxchg() in rt6_make_pcpu_route() */
if (!fib6_nh->rt6i_pcpu)
return;
/* release the reference to this fib entry from
* all of its cached pcpu routes
......@@ -917,9 +888,15 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i,
struct rt6_info **ppcpu_rt;
struct rt6_info *pcpu_rt;
ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu);
ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
pcpu_rt = *ppcpu_rt;
if (pcpu_rt) {
/* only dropping the 'from' reference if the cached route
* is using 'match'. The cached pcpu_rt->from only changes
* from a fib6_info to NULL (ip6_dst_destroy); it can never
* change from one fib6_info reference to another
*/
if (pcpu_rt && rcu_access_pointer(pcpu_rt->from) == match) {
struct fib6_info *from;
from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
......@@ -928,13 +905,27 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i,
}
}
static void fib6_drop_pcpu_from(struct fib6_info *f6i,
const struct fib6_table *table)
{
struct fib6_nh *fib6_nh;
/* Make sure rt6_make_pcpu_route() wont add other percpu routes
* while we are cleaning them here.
*/
f6i->fib6_destroying = 1;
mb(); /* paired with the cmpxchg() in rt6_make_pcpu_route() */
fib6_nh = f6i->fib6_nh;
__fib6_drop_pcpu_from(fib6_nh, f6i, table);
}
static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
struct net *net)
{
struct fib6_table *table = rt->fib6_table;
if (rt->rt6i_pcpu)
fib6_drop_pcpu_from(rt, table);
fib6_drop_pcpu_from(rt, table);
if (refcount_read(&rt->fib6_ref) != 1) {
/* This route is used as dummy address holder in some split
......@@ -2314,14 +2305,14 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v)
#else
seq_puts(seq, "00000000000000000000000000000000 00 ");
#endif
if (rt->fib6_nh.fib_nh_gw_family) {
if (rt->fib6_nh->fib_nh_gw_family) {
flags |= RTF_GATEWAY;
seq_printf(seq, "%pi6", &rt->fib6_nh.fib_nh_gw6);
seq_printf(seq, "%pi6", &rt->fib6_nh->fib_nh_gw6);
} else {
seq_puts(seq, "00000000000000000000000000000000");
}
dev = rt->fib6_nh.fib_nh_dev;
dev = rt->fib6_nh->fib_nh_dev;
seq_printf(seq, " %08x %08x %08x %08x %8s\n",
rt->fib6_metric, refcount_read(&rt->fib6_ref), 0,
flags, dev ? dev->name : "");
......
......@@ -1293,8 +1293,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev);
if (rt) {
neigh = ip6_neigh_lookup(&rt->fib6_nh.fib_nh_gw6,
rt->fib6_nh.fib_nh_dev, NULL,
neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6,
rt->fib6_nh->fib_nh_dev, NULL,
&ipv6_hdr(skb)->saddr);
if (!neigh) {
ND_PRINTK(0, err,
......@@ -1323,8 +1323,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
return;
}
neigh = ip6_neigh_lookup(&rt->fib6_nh.fib_nh_gw6,
rt->fib6_nh.fib_nh_dev, NULL,
neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6,
rt->fib6_nh->fib_nh_dev, NULL,
&ipv6_hdr(skb)->saddr);
if (!neigh) {
ND_PRINTK(0, err,
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment