Commit 2ae6b594 authored by David S. Miller's avatar David S. Miller

Merge branch 'mlxsw-Improve-IPv6-route-insertion-rate'

Ido Schimmel says:

====================
mlxsw: Improve IPv6 route insertion rate

Unlike IPv4, an IPv6 multipath route in the kernel is composed from
multiple sibling routes, each representing a single nexthop.

Therefore, an addition of a multipath route with N nexthops translates
to N in-kernel notifications. This is inefficient for device drivers
that need to program the route to the underlying device. Each time a new
nexthop is appended, a new nexthop group needs to be constructed and the
old one deleted.

This patchset improves the situation by sending a single notification
for a multipath route addition / deletion instead of one per-nexthop.
When adding thousands of multipath routes with 16 nexthops, I measured
an improvement of about x10 in the insertion rate.

Patches #1-#3 add a flag that indicates that in-kernel notifications
need to be suppressed and extend the IPv6 FIB notification info with
information about the number of sibling routes that are being notified.

Patches #4-#5 adjust the two current listeners to these notifications to
ignore notifications about IPv6 multipath routes.

Patches #6-#7 adds add / delete notifications for IPv6 multipath routes.

Patches #8-#14 do the same for mlxsw.

Patch #15 finally removes the limitations added in patches #4-#5 and
stops the kernel from sending a notification for each added / deleted
nexthop.

Patch #16 adds test cases.

v2 (David Ahern):
* Remove patch adjusting netdevsim to consume resources for each
  fib6_info. Instead, consume one resource for the entire multipath
  route
* Remove 'multipath_rt' usage in patch #10
* Remove 'multipath_rt' from 'struct fib6_entry_notifier_info' in patch
  #15. The member is only removed in this patch to prevent drivers from
  processing multipath routes twice during the series
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 714a485a 12ee8220
......@@ -377,6 +377,7 @@ typedef struct rt6_info *(*pol_lookup_t)(struct net *,
struct fib6_entry_notifier_info {
struct fib_notifier_info info; /* must be first */
struct fib6_info *rt;
unsigned int nsiblings;
};
/*
......@@ -450,6 +451,11 @@ int call_fib6_entry_notifiers(struct net *net,
enum fib_event_type event_type,
struct fib6_info *rt,
struct netlink_ext_ack *extack);
int call_fib6_multipath_entry_notifiers(struct net *net,
enum fib_event_type event_type,
struct fib6_info *rt,
unsigned int nsiblings,
struct netlink_ext_ack *extack);
void fib6_rt_update(struct net *net, struct fib6_info *rt,
struct nl_info *info);
void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
......
......@@ -378,13 +378,17 @@ struct nla_policy {
/**
* struct nl_info - netlink source information
* @nlh: Netlink message header of original request
* @nl_net: Network namespace
* @portid: Netlink PORTID of requesting application
* @skip_notify: Skip netlink notifications to user space
* @skip_notify_kernel: Skip selected in-kernel notifications
*/
struct nl_info {
struct nlmsghdr *nlh;
struct net *nl_net;
u32 portid;
bool skip_notify;
u8 skip_notify:1,
skip_notify_kernel:1;
};
/**
......
......@@ -381,6 +381,22 @@ int call_fib6_entry_notifiers(struct net *net,
return call_fib6_notifiers(net, event_type, &info.info);
}
int call_fib6_multipath_entry_notifiers(struct net *net,
enum fib_event_type event_type,
struct fib6_info *rt,
unsigned int nsiblings,
struct netlink_ext_ack *extack)
{
struct fib6_entry_notifier_info info = {
.info.extack = extack,
.rt = rt,
.nsiblings = nsiblings,
};
rt->fib6_table->fib_seq++;
return call_fib6_notifiers(net, event_type, &info.info);
}
struct fib6_dump_arg {
struct net *net;
struct notifier_block *nb;
......@@ -1123,11 +1139,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
add:
nlflags |= NLM_F_CREATE;
if (!info->skip_notify_kernel) {
err = call_fib6_entry_notifiers(info->nl_net,
FIB_EVENT_ENTRY_ADD,
rt, extack);
if (err)
return err;
}
rcu_assign_pointer(rt->fib6_next, iter);
fib6_info_hold(rt);
......@@ -1152,11 +1170,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
return -ENOENT;
}
if (!info->skip_notify_kernel) {
err = call_fib6_entry_notifiers(info->nl_net,
FIB_EVENT_ENTRY_REPLACE,
rt, extack);
if (err)
return err;
}
fib6_info_hold(rt);
rcu_assign_pointer(rt->fib6_node, fn);
......@@ -1839,9 +1859,11 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
fib6_purge_rt(rt, fn, net);
if (!info->skip_notify_kernel)
call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL);
if (!info->skip_notify)
inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
fib6_info_release(rt);
}
......
......@@ -3718,6 +3718,12 @@ static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
info->skip_notify = 1;
}
info->skip_notify_kernel = 1;
call_fib6_multipath_entry_notifiers(net,
FIB_EVENT_ENTRY_DEL,
rt,
rt->fib6_nsiblings,
NULL);
list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings,
fib6_siblings) {
......@@ -4965,6 +4971,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
{
struct fib6_info *rt_notif = NULL, *rt_last = NULL;
struct nl_info *info = &cfg->fc_nlinfo;
enum fib_event_type event_type;
struct fib6_config r_cfg;
struct rtnexthop *rtnh;
struct fib6_info *rt;
......@@ -5042,6 +5049,11 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
*/
info->skip_notify = 1;
/* For add and replace, send one notification with all nexthops. For
* append, send one notification with all appended nexthops.
*/
info->skip_notify_kernel = 1;
err_nh = NULL;
list_for_each_entry(nh, &rt6_nh_list, next) {
err = __ip6_ins_rt(nh->fib6_info, info, extack);
......@@ -5078,6 +5090,15 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
nhn++;
}
event_type = replace ? FIB_EVENT_ENTRY_REPLACE : FIB_EVENT_ENTRY_ADD;
err = call_fib6_multipath_entry_notifiers(info->nl_net, event_type,
rt_notif, nhn - 1, extack);
if (err) {
/* Delete all the siblings that were just added */
err_nh = NULL;
goto add_errout;
}
/* success ... tell user about new route */
ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
goto cleanup;
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment