Commit 68e2617a authored by David S. Miller's avatar David S. Miller

Merge branch 'mlxsw-Remove-RTNL-from-route-insertion-path'

Ido Schimmel says:

====================
mlxsw: Remove RTNL from route insertion path

This patch set removes RTNL from the route insertion path in mlxsw in
order to reduce the control plane latency: the time it takes to push
routes from user space to the kernel and mlxsw.

Up until now mlxsw did not have a lock to protect its shared router data
structures and instead relied on RTNL. While this was simple and worked,
it resulted in large control plane latencies as RTNL was heavily
contended - by both the task receiving the netlink messages from user
space and the mlxsw workqueue that programs the routes to the device.

By removing RTNL and introducing a new router mutex, this patch set
reduces the control plane latency by ~80%. A single mutex is added as
inside mlxsw there is not a lot of concurrency. In addition, a more
fine-grained locking scheme is much more error-prone.

Patches #1-#6 are preparations. They add needed locking in NVE and
multicast routing code instead of relying on RTNL
Patch #7 introduces the new mutex
Patches #8-#12 gradually take the lock in various entry points into the
routing code
Patch #13 removes RTNL in places where it is no longer required
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 732a0dee 9811f7a2
......@@ -548,7 +548,7 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
struct netdev_notifier_changeupper_info *info);
bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
const struct net_device *dev);
bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
bool mlxsw_sp_netdev_is_ipip_ul(struct mlxsw_sp *mlxsw_sp,
const struct net_device *dev);
int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
struct net_device *l3_dev,
......@@ -966,9 +966,6 @@ void mlxsw_sp_nve_flood_ip_del(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fid *fid,
enum mlxsw_sp_l3proto proto,
union mlxsw_sp_l3addr *addr);
u32 mlxsw_sp_nve_decap_tunnel_index_get(const struct mlxsw_sp *mlxsw_sp);
bool mlxsw_sp_nve_ipv4_route_is_decap(const struct mlxsw_sp *mlxsw_sp,
u32 tb_id, __be32 addr);
int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid,
struct mlxsw_sp_nve_params *params,
struct netlink_ext_ack *extack);
......
......@@ -2,6 +2,7 @@
/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
#include <linux/kernel.h>
#include <linux/mutex.h>
#include <net/devlink.h>
#include "spectrum.h"
......@@ -210,7 +211,7 @@ mlxsw_sp_dpipe_table_erif_entries_dump(void *priv, bool counters_enabled,
return err;
rif_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
rtnl_lock();
mutex_lock(&mlxsw_sp->router->lock);
i = 0;
start_again:
err = devlink_dpipe_entry_ctx_prepare(dump_ctx);
......@@ -241,14 +242,14 @@ mlxsw_sp_dpipe_table_erif_entries_dump(void *priv, bool counters_enabled,
devlink_dpipe_entry_ctx_close(dump_ctx);
if (i != rif_count)
goto start_again;
rtnl_unlock();
mutex_unlock(&mlxsw_sp->router->lock);
devlink_dpipe_entry_clear(&entry);
return 0;
err_entry_append:
err_entry_get:
err_ctx_prepare:
rtnl_unlock();
mutex_unlock(&mlxsw_sp->router->lock);
devlink_dpipe_entry_clear(&entry);
return err;
}
......@@ -258,7 +259,7 @@ static int mlxsw_sp_dpipe_table_erif_counters_update(void *priv, bool enable)
struct mlxsw_sp *mlxsw_sp = priv;
int i;
rtnl_lock();
mutex_lock(&mlxsw_sp->router->lock);
for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i);
......@@ -271,7 +272,7 @@ static int mlxsw_sp_dpipe_table_erif_counters_update(void *priv, bool enable)
mlxsw_sp_rif_counter_free(mlxsw_sp, rif,
MLXSW_SP_RIF_COUNTER_EGRESS);
}
rtnl_unlock();
mutex_unlock(&mlxsw_sp->router->lock);
return 0;
}
......@@ -546,7 +547,7 @@ mlxsw_sp_dpipe_table_host_entries_get(struct mlxsw_sp *mlxsw_sp,
int i, j;
int err;
rtnl_lock();
mutex_lock(&mlxsw_sp->router->lock);
i = 0;
rif_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
start_again:
......@@ -602,12 +603,12 @@ mlxsw_sp_dpipe_table_host_entries_get(struct mlxsw_sp *mlxsw_sp,
if (i != rif_count)
goto start_again;
rtnl_unlock();
mutex_unlock(&mlxsw_sp->router->lock);
return 0;
err_ctx_prepare:
err_entry_append:
rtnl_unlock();
mutex_unlock(&mlxsw_sp->router->lock);
return err;
}
......@@ -662,7 +663,7 @@ mlxsw_sp_dpipe_table_host_counters_update(struct mlxsw_sp *mlxsw_sp,
{
int i;
rtnl_lock();
mutex_lock(&mlxsw_sp->router->lock);
for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i);
struct mlxsw_sp_neigh_entry *neigh_entry;
......@@ -684,7 +685,7 @@ mlxsw_sp_dpipe_table_host_counters_update(struct mlxsw_sp *mlxsw_sp,
enable);
}
}
rtnl_unlock();
mutex_unlock(&mlxsw_sp->router->lock);
}
static int mlxsw_sp_dpipe_table_host4_counters_update(void *priv, bool enable)
......@@ -701,7 +702,7 @@ mlxsw_sp_dpipe_table_host_size_get(struct mlxsw_sp *mlxsw_sp, int type)
u64 size = 0;
int i;
rtnl_lock();
mutex_lock(&mlxsw_sp->router->lock);
for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i);
struct mlxsw_sp_neigh_entry *neigh_entry;
......@@ -721,7 +722,7 @@ mlxsw_sp_dpipe_table_host_size_get(struct mlxsw_sp *mlxsw_sp, int type)
size++;
}
}
rtnl_unlock();
mutex_unlock(&mlxsw_sp->router->lock);
return size;
}
......@@ -1093,7 +1094,7 @@ mlxsw_sp_dpipe_table_adj_entries_get(struct mlxsw_sp *mlxsw_sp,
int j;
int err;
rtnl_lock();
mutex_lock(&mlxsw_sp->router->lock);
nh_count_max = mlxsw_sp_dpipe_table_adj_size(mlxsw_sp);
start_again:
err = devlink_dpipe_entry_ctx_prepare(dump_ctx);
......@@ -1130,13 +1131,13 @@ mlxsw_sp_dpipe_table_adj_entries_get(struct mlxsw_sp *mlxsw_sp,
devlink_dpipe_entry_ctx_close(dump_ctx);
if (nh_count != nh_count_max)
goto start_again;
rtnl_unlock();
mutex_unlock(&mlxsw_sp->router->lock);
return 0;
err_ctx_prepare:
err_entry_append:
rtnl_unlock();
mutex_unlock(&mlxsw_sp->router->lock);
return err;
}
......@@ -1206,9 +1207,9 @@ mlxsw_sp_dpipe_table_adj_size_get(void *priv)
struct mlxsw_sp *mlxsw_sp = priv;
u64 size;
rtnl_lock();
mutex_lock(&mlxsw_sp->router->lock);
size = mlxsw_sp_dpipe_table_adj_size(mlxsw_sp);
rtnl_unlock();
mutex_unlock(&mlxsw_sp->router->lock);
return size;
}
......
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
#include <linux/mutex.h>
#include <linux/rhashtable.h>
#include <net/ipv6.h>
......@@ -12,6 +13,7 @@ struct mlxsw_sp_mr {
void *catchall_route_priv;
struct delayed_work stats_update_dw;
struct list_head table_list;
struct mutex table_list_lock; /* Protects table_list */
#define MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL 5000 /* ms */
unsigned long priv[0];
/* priv has to be always the last item */
......@@ -66,6 +68,7 @@ struct mlxsw_sp_mr_table {
u32 vr_id;
struct mlxsw_sp_mr_vif vifs[MAXVIFS];
struct list_head route_list;
struct mutex route_list_lock; /* Protects route_list */
struct rhashtable route_ht;
const struct mlxsw_sp_mr_table_ops *ops;
char catchall_route_priv[];
......@@ -370,11 +373,13 @@ static void mlxsw_sp_mr_mfc_offload_update(struct mlxsw_sp_mr_route *mr_route)
static void __mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table,
struct mlxsw_sp_mr_route *mr_route)
{
WARN_ON_ONCE(!mutex_is_locked(&mr_table->route_list_lock));
mlxsw_sp_mr_mfc_offload_set(mr_route, false);
mlxsw_sp_mr_route_erase(mr_table, mr_route);
rhashtable_remove_fast(&mr_table->route_ht, &mr_route->ht_node,
mlxsw_sp_mr_route_ht_params);
list_del(&mr_route->node);
mlxsw_sp_mr_route_erase(mr_table, mr_route);
mlxsw_sp_mr_route_destroy(mr_table, mr_route);
}
......@@ -415,19 +420,21 @@ int mlxsw_sp_mr_route_add(struct mlxsw_sp_mr_table *mr_table,
goto err_duplicate_route;
}
/* Write the route to the hardware */
err = mlxsw_sp_mr_route_write(mr_table, mr_route, replace);
if (err)
goto err_mr_route_write;
/* Put it in the table data-structures */
mutex_lock(&mr_table->route_list_lock);
list_add_tail(&mr_route->node, &mr_table->route_list);
mutex_unlock(&mr_table->route_list_lock);
err = rhashtable_insert_fast(&mr_table->route_ht,
&mr_route->ht_node,
mlxsw_sp_mr_route_ht_params);
if (err)
goto err_rhashtable_insert;
/* Write the route to the hardware */
err = mlxsw_sp_mr_route_write(mr_table, mr_route, replace);
if (err)
goto err_mr_route_write;
/* Destroy the original route */
if (replace) {
rhashtable_remove_fast(&mr_table->route_ht,
......@@ -440,11 +447,12 @@ int mlxsw_sp_mr_route_add(struct mlxsw_sp_mr_table *mr_table,
mlxsw_sp_mr_mfc_offload_update(mr_route);
return 0;
err_mr_route_write:
rhashtable_remove_fast(&mr_table->route_ht, &mr_route->ht_node,
mlxsw_sp_mr_route_ht_params);
err_rhashtable_insert:
mutex_lock(&mr_table->route_list_lock);
list_del(&mr_route->node);
mutex_unlock(&mr_table->route_list_lock);
mlxsw_sp_mr_route_erase(mr_table, mr_route);
err_mr_route_write:
err_no_orig_route:
err_duplicate_route:
mlxsw_sp_mr_route_destroy(mr_table, mr_route);
......@@ -460,8 +468,11 @@ void mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table,
mr_table->ops->key_create(mr_table, &key, mfc);
mr_route = rhashtable_lookup_fast(&mr_table->route_ht, &key,
mlxsw_sp_mr_route_ht_params);
if (mr_route)
if (mr_route) {
mutex_lock(&mr_table->route_list_lock);
__mlxsw_sp_mr_route_del(mr_table, mr_route);
mutex_unlock(&mr_table->route_list_lock);
}
}
/* Should be called after the VIF struct is updated */
......@@ -910,6 +921,7 @@ struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp,
mr_table->proto = proto;
mr_table->ops = &mlxsw_sp_mr_table_ops_arr[proto];
INIT_LIST_HEAD(&mr_table->route_list);
mutex_init(&mr_table->route_list_lock);
err = rhashtable_init(&mr_table->route_ht,
&mlxsw_sp_mr_route_ht_params);
......@@ -927,12 +939,15 @@ struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp,
&catchall_route_params);
if (err)
goto err_ops_route_create;
mutex_lock(&mr->table_list_lock);
list_add_tail(&mr_table->node, &mr->table_list);
mutex_unlock(&mr->table_list_lock);
return mr_table;
err_ops_route_create:
rhashtable_destroy(&mr_table->route_ht);
err_route_rhashtable_init:
mutex_destroy(&mr_table->route_list_lock);
kfree(mr_table);
return ERR_PTR(err);
}
......@@ -943,10 +958,13 @@ void mlxsw_sp_mr_table_destroy(struct mlxsw_sp_mr_table *mr_table)
struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
WARN_ON(!mlxsw_sp_mr_table_empty(mr_table));
mutex_lock(&mr->table_list_lock);
list_del(&mr_table->node);
mutex_unlock(&mr->table_list_lock);
mr->mr_ops->route_destroy(mlxsw_sp, mr->priv,
&mr_table->catchall_route_priv);
rhashtable_destroy(&mr_table->route_ht);
mutex_destroy(&mr_table->route_list_lock);
kfree(mr_table);
}
......@@ -955,8 +973,10 @@ void mlxsw_sp_mr_table_flush(struct mlxsw_sp_mr_table *mr_table)
struct mlxsw_sp_mr_route *mr_route, *tmp;
int i;
mutex_lock(&mr_table->route_list_lock);
list_for_each_entry_safe(mr_route, tmp, &mr_table->route_list, node)
__mlxsw_sp_mr_route_del(mr_table, mr_route);
mutex_unlock(&mr_table->route_list_lock);
for (i = 0; i < MAXVIFS; i++) {
mr_table->vifs[i].dev = NULL;
......@@ -1000,12 +1020,15 @@ static void mlxsw_sp_mr_stats_update(struct work_struct *work)
struct mlxsw_sp_mr_route *mr_route;
unsigned long interval;
rtnl_lock();
list_for_each_entry(mr_table, &mr->table_list, node)
mutex_lock(&mr->table_list_lock);
list_for_each_entry(mr_table, &mr->table_list, node) {
mutex_lock(&mr_table->route_list_lock);
list_for_each_entry(mr_route, &mr_table->route_list, node)
mlxsw_sp_mr_route_stats_update(mr_table->mlxsw_sp,
mr_route);
rtnl_unlock();
mutex_unlock(&mr_table->route_list_lock);
}
mutex_unlock(&mr->table_list_lock);
interval = msecs_to_jiffies(MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL);
mlxsw_core_schedule_dw(&mr->stats_update_dw, interval);
......@@ -1024,6 +1047,7 @@ int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp,
mr->mr_ops = mr_ops;
mlxsw_sp->mr = mr;
INIT_LIST_HEAD(&mr->table_list);
mutex_init(&mr->table_list_lock);
err = mr_ops->init(mlxsw_sp, mr->priv);
if (err)
......@@ -1035,6 +1059,7 @@ int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp,
mlxsw_core_schedule_dw(&mr->stats_update_dw, interval);
return 0;
err:
mutex_destroy(&mr->table_list_lock);
kfree(mr);
return err;
}
......@@ -1045,5 +1070,6 @@ void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp)
cancel_delayed_work_sync(&mr->stats_update_dw);
mr->mr_ops->fini(mlxsw_sp, mr->priv);
mutex_destroy(&mr->table_list_lock);
kfree(mr);
}
......@@ -713,27 +713,6 @@ static void mlxsw_sp_nve_flood_ip_flush(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list);
}
u32 mlxsw_sp_nve_decap_tunnel_index_get(const struct mlxsw_sp *mlxsw_sp)
{
WARN_ON(mlxsw_sp->nve->num_nve_tunnels == 0);
return mlxsw_sp->nve->tunnel_index;
}
bool mlxsw_sp_nve_ipv4_route_is_decap(const struct mlxsw_sp *mlxsw_sp,
u32 tb_id, __be32 addr)
{
struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
struct mlxsw_sp_nve_config *config = &nve->config;
if (nve->num_nve_tunnels &&
config->ul_proto == MLXSW_SP_L3_PROTO_IPV4 &&
config->ul_sip.addr4 == addr && config->ul_tb_id == tb_id)
return true;
return false;
}
static int mlxsw_sp_nve_tunnel_init(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nve_config *config)
{
......
......@@ -7,6 +7,49 @@
#include "spectrum.h"
#include "reg.h"
struct mlxsw_sp_router_nve_decap {
u32 ul_tb_id;
u32 tunnel_index;
enum mlxsw_sp_l3proto ul_proto;
union mlxsw_sp_l3addr ul_sip;
u8 valid:1;
};
struct mlxsw_sp_router {
struct mlxsw_sp *mlxsw_sp;
struct mlxsw_sp_rif **rifs;
struct mlxsw_sp_vr *vrs;
struct rhashtable neigh_ht;
struct rhashtable nexthop_group_ht;
struct rhashtable nexthop_ht;
struct list_head nexthop_list;
struct {
/* One tree for each protocol: IPv4 and IPv6 */
struct mlxsw_sp_lpm_tree *proto_trees[2];
struct mlxsw_sp_lpm_tree *trees;
unsigned int tree_count;
} lpm;
struct {
struct delayed_work dw;
unsigned long interval; /* ms */
} neighs_update;
struct delayed_work nexthop_probe_dw;
#define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
struct list_head nexthop_neighs_list;
struct list_head ipip_list;
bool aborted;
struct notifier_block fib_nb;
struct notifier_block netevent_nb;
struct notifier_block inetaddr_nb;
struct notifier_block inet6addr_nb;
const struct mlxsw_sp_rif_ops **rif_ops_arr;
const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
u32 adj_discard_index;
bool adj_discard_index_valid;
struct mlxsw_sp_router_nve_decap nve_decap_config;
struct mutex lock; /* Protects shared router resources */
};
struct mlxsw_sp_rif_ipip_lb;
struct mlxsw_sp_rif_ipip_lb_config {
enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment