Commit 68e2617a authored by David S. Miller's avatar David S. Miller

Merge branch 'mlxsw-Remove-RTNL-from-route-insertion-path'

Ido Schimmel says:

====================
mlxsw: Remove RTNL from route insertion path

This patch set removes RTNL from the route insertion path in mlxsw in
order to reduce the control plane latency: the time it takes to push
routes from user space to the kernel and mlxsw.

Up until now mlxsw did not have a lock to protect its shared router data
structures and instead relied on RTNL. While this was simple and worked,
it resulted in large control plane latencies as RTNL was heavily
contended - by both the task receiving the netlink messages from user
space and the mlxsw workqueue that programs the routes to the device.

By removing RTNL and introducing a new router mutex, this patch set
reduces the control plane latency by ~80%. A single mutex is added as
inside mlxsw there is not a lot of concurrency. In addition, a more
fine-grained locking scheme is much more error-prone.

Patches #1-#6 are preparations. They add needed locking in NVE and
multicast routing code instead of relying on RTNL
Patch #7 introduces the new mutex
Patches #8-#12 gradually take the lock in various entry points into the
routing code
Patch #13 removes RTNL in places where it is no longer required
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 732a0dee 9811f7a2
......@@ -548,7 +548,7 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
struct netdev_notifier_changeupper_info *info);
bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
const struct net_device *dev);
bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
bool mlxsw_sp_netdev_is_ipip_ul(struct mlxsw_sp *mlxsw_sp,
const struct net_device *dev);
int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
struct net_device *l3_dev,
......@@ -966,9 +966,6 @@ void mlxsw_sp_nve_flood_ip_del(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fid *fid,
enum mlxsw_sp_l3proto proto,
union mlxsw_sp_l3addr *addr);
u32 mlxsw_sp_nve_decap_tunnel_index_get(const struct mlxsw_sp *mlxsw_sp);
bool mlxsw_sp_nve_ipv4_route_is_decap(const struct mlxsw_sp *mlxsw_sp,
u32 tb_id, __be32 addr);
int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid,
struct mlxsw_sp_nve_params *params,
struct netlink_ext_ack *extack);
......
......@@ -2,6 +2,7 @@
/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
#include <linux/kernel.h>
#include <linux/mutex.h>
#include <net/devlink.h>
#include "spectrum.h"
......@@ -210,7 +211,7 @@ mlxsw_sp_dpipe_table_erif_entries_dump(void *priv, bool counters_enabled,
return err;
rif_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
rtnl_lock();
mutex_lock(&mlxsw_sp->router->lock);
i = 0;
start_again:
err = devlink_dpipe_entry_ctx_prepare(dump_ctx);
......@@ -241,14 +242,14 @@ mlxsw_sp_dpipe_table_erif_entries_dump(void *priv, bool counters_enabled,
devlink_dpipe_entry_ctx_close(dump_ctx);
if (i != rif_count)
goto start_again;
rtnl_unlock();
mutex_unlock(&mlxsw_sp->router->lock);
devlink_dpipe_entry_clear(&entry);
return 0;
err_entry_append:
err_entry_get:
err_ctx_prepare:
rtnl_unlock();
mutex_unlock(&mlxsw_sp->router->lock);
devlink_dpipe_entry_clear(&entry);
return err;
}
......@@ -258,7 +259,7 @@ static int mlxsw_sp_dpipe_table_erif_counters_update(void *priv, bool enable)
struct mlxsw_sp *mlxsw_sp = priv;
int i;
rtnl_lock();
mutex_lock(&mlxsw_sp->router->lock);
for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i);
......@@ -271,7 +272,7 @@ static int mlxsw_sp_dpipe_table_erif_counters_update(void *priv, bool enable)
mlxsw_sp_rif_counter_free(mlxsw_sp, rif,
MLXSW_SP_RIF_COUNTER_EGRESS);
}
rtnl_unlock();
mutex_unlock(&mlxsw_sp->router->lock);
return 0;
}
......@@ -546,7 +547,7 @@ mlxsw_sp_dpipe_table_host_entries_get(struct mlxsw_sp *mlxsw_sp,
int i, j;
int err;
rtnl_lock();
mutex_lock(&mlxsw_sp->router->lock);
i = 0;
rif_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
start_again:
......@@ -602,12 +603,12 @@ mlxsw_sp_dpipe_table_host_entries_get(struct mlxsw_sp *mlxsw_sp,
if (i != rif_count)
goto start_again;
rtnl_unlock();
mutex_unlock(&mlxsw_sp->router->lock);
return 0;
err_ctx_prepare:
err_entry_append:
rtnl_unlock();
mutex_unlock(&mlxsw_sp->router->lock);
return err;
}
......@@ -662,7 +663,7 @@ mlxsw_sp_dpipe_table_host_counters_update(struct mlxsw_sp *mlxsw_sp,
{
int i;
rtnl_lock();
mutex_lock(&mlxsw_sp->router->lock);
for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i);
struct mlxsw_sp_neigh_entry *neigh_entry;
......@@ -684,7 +685,7 @@ mlxsw_sp_dpipe_table_host_counters_update(struct mlxsw_sp *mlxsw_sp,
enable);
}
}
rtnl_unlock();
mutex_unlock(&mlxsw_sp->router->lock);
}
static int mlxsw_sp_dpipe_table_host4_counters_update(void *priv, bool enable)
......@@ -701,7 +702,7 @@ mlxsw_sp_dpipe_table_host_size_get(struct mlxsw_sp *mlxsw_sp, int type)
u64 size = 0;
int i;
rtnl_lock();
mutex_lock(&mlxsw_sp->router->lock);
for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i);
struct mlxsw_sp_neigh_entry *neigh_entry;
......@@ -721,7 +722,7 @@ mlxsw_sp_dpipe_table_host_size_get(struct mlxsw_sp *mlxsw_sp, int type)
size++;
}
}
rtnl_unlock();
mutex_unlock(&mlxsw_sp->router->lock);
return size;
}
......@@ -1093,7 +1094,7 @@ mlxsw_sp_dpipe_table_adj_entries_get(struct mlxsw_sp *mlxsw_sp,
int j;
int err;
rtnl_lock();
mutex_lock(&mlxsw_sp->router->lock);
nh_count_max = mlxsw_sp_dpipe_table_adj_size(mlxsw_sp);
start_again:
err = devlink_dpipe_entry_ctx_prepare(dump_ctx);
......@@ -1130,13 +1131,13 @@ mlxsw_sp_dpipe_table_adj_entries_get(struct mlxsw_sp *mlxsw_sp,
devlink_dpipe_entry_ctx_close(dump_ctx);
if (nh_count != nh_count_max)
goto start_again;
rtnl_unlock();
mutex_unlock(&mlxsw_sp->router->lock);
return 0;
err_ctx_prepare:
err_entry_append:
rtnl_unlock();
mutex_unlock(&mlxsw_sp->router->lock);
return err;
}
......@@ -1206,9 +1207,9 @@ mlxsw_sp_dpipe_table_adj_size_get(void *priv)
struct mlxsw_sp *mlxsw_sp = priv;
u64 size;
rtnl_lock();
mutex_lock(&mlxsw_sp->router->lock);
size = mlxsw_sp_dpipe_table_adj_size(mlxsw_sp);
rtnl_unlock();
mutex_unlock(&mlxsw_sp->router->lock);
return size;
}
......
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
#include <linux/mutex.h>
#include <linux/rhashtable.h>
#include <net/ipv6.h>
......@@ -12,6 +13,7 @@ struct mlxsw_sp_mr {
void *catchall_route_priv;
struct delayed_work stats_update_dw;
struct list_head table_list;
struct mutex table_list_lock; /* Protects table_list */
#define MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL 5000 /* ms */
unsigned long priv[0];
/* priv has to be always the last item */
......@@ -66,6 +68,7 @@ struct mlxsw_sp_mr_table {
u32 vr_id;
struct mlxsw_sp_mr_vif vifs[MAXVIFS];
struct list_head route_list;
struct mutex route_list_lock; /* Protects route_list */
struct rhashtable route_ht;
const struct mlxsw_sp_mr_table_ops *ops;
char catchall_route_priv[];
......@@ -370,11 +373,13 @@ static void mlxsw_sp_mr_mfc_offload_update(struct mlxsw_sp_mr_route *mr_route)
static void __mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table,
struct mlxsw_sp_mr_route *mr_route)
{
WARN_ON_ONCE(!mutex_is_locked(&mr_table->route_list_lock));
mlxsw_sp_mr_mfc_offload_set(mr_route, false);
mlxsw_sp_mr_route_erase(mr_table, mr_route);
rhashtable_remove_fast(&mr_table->route_ht, &mr_route->ht_node,
mlxsw_sp_mr_route_ht_params);
list_del(&mr_route->node);
mlxsw_sp_mr_route_erase(mr_table, mr_route);
mlxsw_sp_mr_route_destroy(mr_table, mr_route);
}
......@@ -415,19 +420,21 @@ int mlxsw_sp_mr_route_add(struct mlxsw_sp_mr_table *mr_table,
goto err_duplicate_route;
}
/* Write the route to the hardware */
err = mlxsw_sp_mr_route_write(mr_table, mr_route, replace);
if (err)
goto err_mr_route_write;
/* Put it in the table data-structures */
mutex_lock(&mr_table->route_list_lock);
list_add_tail(&mr_route->node, &mr_table->route_list);
mutex_unlock(&mr_table->route_list_lock);
err = rhashtable_insert_fast(&mr_table->route_ht,
&mr_route->ht_node,
mlxsw_sp_mr_route_ht_params);
if (err)
goto err_rhashtable_insert;
/* Write the route to the hardware */
err = mlxsw_sp_mr_route_write(mr_table, mr_route, replace);
if (err)
goto err_mr_route_write;
/* Destroy the original route */
if (replace) {
rhashtable_remove_fast(&mr_table->route_ht,
......@@ -440,11 +447,12 @@ int mlxsw_sp_mr_route_add(struct mlxsw_sp_mr_table *mr_table,
mlxsw_sp_mr_mfc_offload_update(mr_route);
return 0;
err_mr_route_write:
rhashtable_remove_fast(&mr_table->route_ht, &mr_route->ht_node,
mlxsw_sp_mr_route_ht_params);
err_rhashtable_insert:
mutex_lock(&mr_table->route_list_lock);
list_del(&mr_route->node);
mutex_unlock(&mr_table->route_list_lock);
mlxsw_sp_mr_route_erase(mr_table, mr_route);
err_mr_route_write:
err_no_orig_route:
err_duplicate_route:
mlxsw_sp_mr_route_destroy(mr_table, mr_route);
......@@ -460,8 +468,11 @@ void mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table,
mr_table->ops->key_create(mr_table, &key, mfc);
mr_route = rhashtable_lookup_fast(&mr_table->route_ht, &key,
mlxsw_sp_mr_route_ht_params);
if (mr_route)
if (mr_route) {
mutex_lock(&mr_table->route_list_lock);
__mlxsw_sp_mr_route_del(mr_table, mr_route);
mutex_unlock(&mr_table->route_list_lock);
}
}
/* Should be called after the VIF struct is updated */
......@@ -910,6 +921,7 @@ struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp,
mr_table->proto = proto;
mr_table->ops = &mlxsw_sp_mr_table_ops_arr[proto];
INIT_LIST_HEAD(&mr_table->route_list);
mutex_init(&mr_table->route_list_lock);
err = rhashtable_init(&mr_table->route_ht,
&mlxsw_sp_mr_route_ht_params);
......@@ -927,12 +939,15 @@ struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp,
&catchall_route_params);
if (err)
goto err_ops_route_create;
mutex_lock(&mr->table_list_lock);
list_add_tail(&mr_table->node, &mr->table_list);
mutex_unlock(&mr->table_list_lock);
return mr_table;
err_ops_route_create:
rhashtable_destroy(&mr_table->route_ht);
err_route_rhashtable_init:
mutex_destroy(&mr_table->route_list_lock);
kfree(mr_table);
return ERR_PTR(err);
}
......@@ -943,10 +958,13 @@ void mlxsw_sp_mr_table_destroy(struct mlxsw_sp_mr_table *mr_table)
struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
WARN_ON(!mlxsw_sp_mr_table_empty(mr_table));
mutex_lock(&mr->table_list_lock);
list_del(&mr_table->node);
mutex_unlock(&mr->table_list_lock);
mr->mr_ops->route_destroy(mlxsw_sp, mr->priv,
&mr_table->catchall_route_priv);
rhashtable_destroy(&mr_table->route_ht);
mutex_destroy(&mr_table->route_list_lock);
kfree(mr_table);
}
......@@ -955,8 +973,10 @@ void mlxsw_sp_mr_table_flush(struct mlxsw_sp_mr_table *mr_table)
struct mlxsw_sp_mr_route *mr_route, *tmp;
int i;
mutex_lock(&mr_table->route_list_lock);
list_for_each_entry_safe(mr_route, tmp, &mr_table->route_list, node)
__mlxsw_sp_mr_route_del(mr_table, mr_route);
mutex_unlock(&mr_table->route_list_lock);
for (i = 0; i < MAXVIFS; i++) {
mr_table->vifs[i].dev = NULL;
......@@ -1000,12 +1020,15 @@ static void mlxsw_sp_mr_stats_update(struct work_struct *work)
struct mlxsw_sp_mr_route *mr_route;
unsigned long interval;
rtnl_lock();
list_for_each_entry(mr_table, &mr->table_list, node)
mutex_lock(&mr->table_list_lock);
list_for_each_entry(mr_table, &mr->table_list, node) {
mutex_lock(&mr_table->route_list_lock);
list_for_each_entry(mr_route, &mr_table->route_list, node)
mlxsw_sp_mr_route_stats_update(mr_table->mlxsw_sp,
mr_route);
rtnl_unlock();
mutex_unlock(&mr_table->route_list_lock);
}
mutex_unlock(&mr->table_list_lock);
interval = msecs_to_jiffies(MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL);
mlxsw_core_schedule_dw(&mr->stats_update_dw, interval);
......@@ -1024,6 +1047,7 @@ int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp,
mr->mr_ops = mr_ops;
mlxsw_sp->mr = mr;
INIT_LIST_HEAD(&mr->table_list);
mutex_init(&mr->table_list_lock);
err = mr_ops->init(mlxsw_sp, mr->priv);
if (err)
......@@ -1035,6 +1059,7 @@ int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp,
mlxsw_core_schedule_dw(&mr->stats_update_dw, interval);
return 0;
err:
mutex_destroy(&mr->table_list_lock);
kfree(mr);
return err;
}
......@@ -1045,5 +1070,6 @@ void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp)
cancel_delayed_work_sync(&mr->stats_update_dw);
mr->mr_ops->fini(mlxsw_sp, mr->priv);
mutex_destroy(&mr->table_list_lock);
kfree(mr);
}
......@@ -713,27 +713,6 @@ static void mlxsw_sp_nve_flood_ip_flush(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list);
}
u32 mlxsw_sp_nve_decap_tunnel_index_get(const struct mlxsw_sp *mlxsw_sp)
{
WARN_ON(mlxsw_sp->nve->num_nve_tunnels == 0);
return mlxsw_sp->nve->tunnel_index;
}
bool mlxsw_sp_nve_ipv4_route_is_decap(const struct mlxsw_sp *mlxsw_sp,
u32 tb_id, __be32 addr)
{
struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
struct mlxsw_sp_nve_config *config = &nve->config;
if (nve->num_nve_tunnels &&
config->ul_proto == MLXSW_SP_L3_PROTO_IPV4 &&
config->ul_sip.addr4 == addr && config->ul_tb_id == tb_id)
return true;
return false;
}
static int mlxsw_sp_nve_tunnel_init(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nve_config *config)
{
......
......@@ -17,6 +17,7 @@
#include <linux/refcount.h>
#include <linux/jhash.h>
#include <linux/net_namespace.h>
#include <linux/mutex.h>
#include <net/netevent.h>
#include <net/neighbour.h>
#include <net/arp.h>
......@@ -48,39 +49,6 @@ struct mlxsw_sp_vr;
struct mlxsw_sp_lpm_tree;
struct mlxsw_sp_rif_ops;
struct mlxsw_sp_router {
struct mlxsw_sp *mlxsw_sp;
struct mlxsw_sp_rif **rifs;
struct mlxsw_sp_vr *vrs;
struct rhashtable neigh_ht;
struct rhashtable nexthop_group_ht;
struct rhashtable nexthop_ht;
struct list_head nexthop_list;
struct {
/* One tree for each protocol: IPv4 and IPv6 */
struct mlxsw_sp_lpm_tree *proto_trees[2];
struct mlxsw_sp_lpm_tree *trees;
unsigned int tree_count;
} lpm;
struct {
struct delayed_work dw;
unsigned long interval; /* ms */
} neighs_update;
struct delayed_work nexthop_probe_dw;
#define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
struct list_head nexthop_neighs_list;
struct list_head ipip_list;
bool aborted;
struct notifier_block fib_nb;
struct notifier_block netevent_nb;
struct notifier_block inetaddr_nb;
struct notifier_block inet6addr_nb;
const struct mlxsw_sp_rif_ops **rif_ops_arr;
const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
u32 adj_discard_index;
bool adj_discard_index_valid;
};
struct mlxsw_sp_rif {
struct list_head nexthop_list;
struct list_head neigh_list;
......@@ -763,13 +731,18 @@ int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
u16 *vr_id)
{
struct mlxsw_sp_vr *vr;
int err = 0;
mutex_lock(&mlxsw_sp->router->lock);
vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
if (!vr)
return -ESRCH;
if (!vr) {
err = -ESRCH;
goto out;
}
*vr_id = vr->id;
return 0;
out:
mutex_unlock(&mlxsw_sp->router->lock);
return err;
}
static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
......@@ -1378,10 +1351,16 @@ mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
return NULL;
}
bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
bool mlxsw_sp_netdev_is_ipip_ul(struct mlxsw_sp *mlxsw_sp,
const struct net_device *dev)
{
return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
bool is_ipip_ul;
mutex_lock(&mlxsw_sp->router->lock);
is_ipip_ul = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
mutex_unlock(&mlxsw_sp->router->lock);
return is_ipip_ul;
}
static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
......@@ -1753,35 +1732,41 @@ int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
{
struct netdev_notifier_changeupper_info *chup;
struct netlink_ext_ack *extack;
int err = 0;
mutex_lock(&mlxsw_sp->router->lock);
switch (event) {
case NETDEV_REGISTER:
return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
err = mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
break;
case NETDEV_UNREGISTER:
mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
return 0;
break;
case NETDEV_UP:
mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
return 0;
break;
case NETDEV_DOWN:
mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
return 0;
break;
case NETDEV_CHANGEUPPER:
chup = container_of(info, typeof(*chup), info);
extack = info->extack;
if (netif_is_l3_master(chup->upper_dev))
return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
err = mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
ol_dev,
extack);
return 0;
break;
case NETDEV_CHANGE:
extack = info->extack;
return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
err = mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
ol_dev, extack);
break;
case NETDEV_CHANGEMTU:
return mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
err = mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
break;
}
return 0;
mutex_unlock(&mlxsw_sp->router->lock);
return err;
}
static int
......@@ -1825,8 +1810,9 @@ mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
struct netdev_notifier_info *info)
{
struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
int err;
int err = 0;
mutex_lock(&mlxsw_sp->router->lock);
while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
ul_dev,
ipip_entry))) {
......@@ -1839,7 +1825,7 @@ mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
if (err) {
mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
ul_dev);
return err;
break;
}
if (demote_this) {
......@@ -1856,8 +1842,9 @@ mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
ipip_entry = prev;
}
}
mutex_unlock(&mlxsw_sp->router->lock);
return 0;
return err;
}
int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
......@@ -1866,8 +1853,22 @@ int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
u32 tunnel_index)
{
enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
struct mlxsw_sp_router *router = mlxsw_sp->router;
struct mlxsw_sp_fib_entry *fib_entry;
int err;
int err = 0;
mutex_lock(&mlxsw_sp->router->lock);
if (WARN_ON_ONCE(router->nve_decap_config.valid)) {
err = -EINVAL;
goto out;
}
router->nve_decap_config.ul_tb_id = ul_tb_id;
router->nve_decap_config.tunnel_index = tunnel_index;
router->nve_decap_config.ul_proto = ul_proto;
router->nve_decap_config.ul_sip = *ul_sip;
router->nve_decap_config.valid = true;
/* It is valid to create a tunnel with a local IP and only later
* assign this IP address to a local interface
......@@ -1876,7 +1877,7 @@ int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
ul_proto, ul_sip,
type);
if (!fib_entry)
return 0;
goto out;
fib_entry->decap.tunnel_index = tunnel_index;
fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
......@@ -1885,11 +1886,13 @@ int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
if (err)
goto err_fib_entry_update;
return 0;
goto out;
err_fib_entry_update:
fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
out:
mutex_unlock(&mlxsw_sp->router->lock);
return err;
}
......@@ -1898,16 +1901,40 @@ void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
const union mlxsw_sp_l3addr *ul_sip)
{
enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
struct mlxsw_sp_router *router = mlxsw_sp->router;
struct mlxsw_sp_fib_entry *fib_entry;
mutex_lock(&mlxsw_sp->router->lock);
if (WARN_ON_ONCE(!router->nve_decap_config.valid))
goto out;
router->nve_decap_config.valid = false;
fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
ul_proto, ul_sip,
type);
if (!fib_entry)
return;
goto out;
fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
out:
mutex_unlock(&mlxsw_sp->router->lock);
}
static bool mlxsw_sp_router_nve_is_decap(struct mlxsw_sp *mlxsw_sp,
u32 ul_tb_id,
enum mlxsw_sp_l3proto ul_proto,
const union mlxsw_sp_l3addr *ul_sip)
{
struct mlxsw_sp_router *router = mlxsw_sp->router;
return router->nve_decap_config.valid &&
router->nve_decap_config.ul_tb_id == ul_tb_id &&
router->nve_decap_config.ul_proto == ul_proto &&
!memcmp(&router->nve_decap_config.ul_sip, ul_sip,
sizeof(*ul_sip));
}
struct mlxsw_sp_neigh_key {
......@@ -2280,10 +2307,8 @@ __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
int i, num_rec;
int err;
/* Make sure the neighbour's netdev isn't removed in the
* process.
*/
rtnl_lock();
/* Ensure the RIF we read from the device does not change mid-dump. */
mutex_lock(&mlxsw_sp->router->lock);
do {
mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
......@@ -2297,7 +2322,7 @@ __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
i);
} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
rtnl_unlock();
mutex_unlock(&mlxsw_sp->router->lock);
return err;
}
......@@ -2328,15 +2353,14 @@ static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
{
struct mlxsw_sp_neigh_entry *neigh_entry;
/* Take RTNL mutex here to prevent lists from changes */
rtnl_lock();
mutex_lock(&mlxsw_sp->router->lock);
list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
nexthop_neighs_list_node)
/* If this neigh have nexthops, make the kernel think this neigh
* is active regardless of the traffic.
*/
neigh_event_send(neigh_entry->key.n, NULL);
rtnl_unlock();
mutex_unlock(&mlxsw_sp->router->lock);
}
static void
......@@ -2376,15 +2400,13 @@ static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
* the nexthop wouldn't get offloaded until the neighbor is resolved
* but it wouldn't get resolved ever in case traffic is flowing in HW
* using different nexthop.
*
* Take RTNL mutex here to prevent lists from changes.
*/
rtnl_lock();
mutex_lock(&router->lock);
list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
nexthop_neighs_list_node)
if (!neigh_entry->connected)
neigh_event_send(neigh_entry->key.n, NULL);
rtnl_unlock();
mutex_unlock(&router->lock);
mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
......@@ -2522,7 +2544,7 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
dead = n->dead;
read_unlock_bh(&n->lock);
rtnl_lock();
mutex_lock(&mlxsw_sp->router->lock);
mlxsw_sp_span_respin(mlxsw_sp);
entry_connected = nud_state & NUD_VALID && !dead;
......@@ -2544,7 +2566,7 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
out:
rtnl_unlock();
mutex_unlock(&mlxsw_sp->router->lock);
neigh_release(n);
kfree(net_work);
}
......@@ -4499,6 +4521,7 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
{
struct net_device *dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
struct mlxsw_sp_router *router = mlxsw_sp->router;
u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
struct mlxsw_sp_ipip_entry *ipip_entry;
struct fib_info *fi = fen_info->fi;
......@@ -4513,12 +4536,13 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
fib_entry,
ipip_entry);
}
if (mlxsw_sp_nve_ipv4_route_is_decap(mlxsw_sp, tb_id,
dip.addr4)) {
u32 t_index;
if (mlxsw_sp_router_nve_is_decap(mlxsw_sp, tb_id,
MLXSW_SP_L3_PROTO_IPV4,
&dip)) {
u32 tunnel_index;
t_index = mlxsw_sp_nve_decap_tunnel_index_get(mlxsw_sp);
fib_entry->decap.tunnel_index = t_index;
tunnel_index = router->nve_decap_config.tunnel_index;
fib_entry->decap.tunnel_index = tunnel_index;
fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
return 0;
}
......@@ -5949,8 +5973,7 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
int err;
/* Protect internal structures from changes */
rtnl_lock();
mutex_lock(&mlxsw_sp->router->lock);
mlxsw_sp_span_respin(mlxsw_sp);
switch (fib_work->event) {
......@@ -5972,7 +5995,7 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
break;
}
rtnl_unlock();
mutex_unlock(&mlxsw_sp->router->lock);
kfree(fib_work);
}
......@@ -5983,7 +6006,7 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
int err;
rtnl_lock();
mutex_lock(&mlxsw_sp->router->lock);
mlxsw_sp_span_respin(mlxsw_sp);
switch (fib_work->event) {
......@@ -6010,7 +6033,7 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
break;
}
rtnl_unlock();
mutex_unlock(&mlxsw_sp->router->lock);
kfree(fib_work);
}
......@@ -6023,6 +6046,7 @@ static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
int err;
rtnl_lock();
mutex_lock(&mlxsw_sp->router->lock);
switch (fib_work->event) {
case FIB_EVENT_ENTRY_REPLACE: /* fall through */
case FIB_EVENT_ENTRY_ADD:
......@@ -6051,6 +6075,7 @@ static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
dev_put(fib_work->ven_info.dev);
break;
}
mutex_unlock(&mlxsw_sp->router->lock);
rtnl_unlock();
kfree(fib_work);
}
......@@ -6276,7 +6301,13 @@ mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp,
const struct net_device *dev)
{
return !!mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
struct mlxsw_sp_rif *rif;
mutex_lock(&mlxsw_sp->router->lock);
rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
mutex_unlock(&mlxsw_sp->router->lock);
return rif;
}
u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev)
......@@ -6284,6 +6315,7 @@ u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev)
struct mlxsw_sp_rif *rif;
u16 vid = 0;
mutex_lock(&mlxsw_sp->router->lock);
rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
if (!rif)
goto out;
......@@ -6297,6 +6329,7 @@ u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev)
vid = mlxsw_sp_fid_8021q_vid(rif->fid);
out:
mutex_unlock(&mlxsw_sp->router->lock);
return vid;
}
......@@ -6578,10 +6611,13 @@ void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
{
struct mlxsw_sp_rif *rif;
mutex_lock(&mlxsw_sp->router->lock);
rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
if (!rif)
return;
goto out;
mlxsw_sp_rif_destroy(rif);
out:
mutex_unlock(&mlxsw_sp->router->lock);
}
static void
......@@ -6703,7 +6739,11 @@ __mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
void
mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
{
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp;
mutex_lock(&mlxsw_sp->router->lock);
__mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
mutex_unlock(&mlxsw_sp->router->lock);
}
static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
......@@ -6925,7 +6965,9 @@ static void __mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
const struct net_device *macvlan_dev)
{
mutex_lock(&mlxsw_sp->router->lock);
__mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
mutex_unlock(&mlxsw_sp->router->lock);
}
static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp,
......@@ -7007,15 +7049,17 @@ static int mlxsw_sp_inetaddr_event(struct notifier_block *nb,
/* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
if (event == NETDEV_UP)
goto out;
return NOTIFY_DONE;
router = container_of(nb, struct mlxsw_sp_router, inetaddr_nb);
mutex_lock(&router->lock);
rif = mlxsw_sp_rif_find_by_dev(router->mlxsw_sp, dev);
if (!mlxsw_sp_rif_should_config(rif, dev, event))
goto out;
err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, NULL);
out:
mutex_unlock(&router->lock);
return notifier_from_errno(err);
}
......@@ -7030,8 +7074,9 @@ int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
mlxsw_sp = mlxsw_sp_lower_get(dev);
if (!mlxsw_sp)
goto out;
return NOTIFY_DONE;
mutex_lock(&mlxsw_sp->router->lock);
rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
if (!mlxsw_sp_rif_should_config(rif, dev, event))
goto out;
......@@ -7043,6 +7088,7 @@ int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, ivi->extack);
out:
mutex_unlock(&mlxsw_sp->router->lock);
return notifier_from_errno(err);
}
......@@ -7063,6 +7109,7 @@ static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
struct mlxsw_sp_rif *rif;
rtnl_lock();
mutex_lock(&mlxsw_sp->router->lock);
rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
if (!mlxsw_sp_rif_should_config(rif, dev, event))
......@@ -7070,6 +7117,7 @@ static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
__mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, NULL);
out:
mutex_unlock(&mlxsw_sp->router->lock);
rtnl_unlock();
dev_put(dev);
kfree(inet6addr_work);
......@@ -7114,8 +7162,9 @@ int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
mlxsw_sp = mlxsw_sp_lower_get(dev);
if (!mlxsw_sp)
goto out;
return NOTIFY_DONE;
mutex_lock(&mlxsw_sp->router->lock);
rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
if (!mlxsw_sp_rif_should_config(rif, dev, event))
goto out;
......@@ -7127,6 +7176,7 @@ int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, i6vi->extack);
out:
mutex_unlock(&mlxsw_sp->router->lock);
return notifier_from_errno(err);
}
......@@ -7213,24 +7263,30 @@ int mlxsw_sp_netdevice_router_port_event(struct net_device *dev,
{
struct mlxsw_sp *mlxsw_sp;
struct mlxsw_sp_rif *rif;
int err = 0;
mlxsw_sp = mlxsw_sp_lower_get(dev);
if (!mlxsw_sp)
return 0;
mutex_lock(&mlxsw_sp->router->lock);
rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
if (!rif)
return 0;
goto out;
switch (event) {
case NETDEV_CHANGEMTU: /* fall through */
case NETDEV_CHANGEADDR:
return mlxsw_sp_router_port_change_event(mlxsw_sp, rif);
err = mlxsw_sp_router_port_change_event(mlxsw_sp, rif);
break;
case NETDEV_PRE_CHANGEADDR:
return mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr);
err = mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr);
break;
}
return 0;
out:
mutex_unlock(&mlxsw_sp->router->lock);
return err;
}
static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
......@@ -7273,9 +7329,10 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
if (!mlxsw_sp || netif_is_macvlan(l3_dev))
return 0;
mutex_lock(&mlxsw_sp->router->lock);
switch (event) {
case NETDEV_PRECHANGEUPPER:
return 0;
break;
case NETDEV_CHANGEUPPER:
if (info->linking) {
struct netlink_ext_ack *extack;
......@@ -7287,6 +7344,7 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
}
break;
}
mutex_unlock(&mlxsw_sp->router->lock);
return err;
}
......@@ -7795,28 +7853,32 @@ int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
u16 *ul_rif_index)
{
struct mlxsw_sp_rif *ul_rif;
int err = 0;
ASSERT_RTNL();
mutex_lock(&mlxsw_sp->router->lock);
ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
if (IS_ERR(ul_rif))
return PTR_ERR(ul_rif);
if (IS_ERR(ul_rif)) {
err = PTR_ERR(ul_rif);
goto out;
}
*ul_rif_index = ul_rif->rif_index;
return 0;
out:
mutex_unlock(&mlxsw_sp->router->lock);
return err;
}
void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index)
{
struct mlxsw_sp_rif *ul_rif;
ASSERT_RTNL();
mutex_lock(&mlxsw_sp->router->lock);
ul_rif = mlxsw_sp->router->rifs[ul_rif_index];
if (WARN_ON(!ul_rif))
return;
goto out;
mlxsw_sp_ul_rif_put(ul_rif);
out:
mutex_unlock(&mlxsw_sp->router->lock);
}
static int
......@@ -8066,6 +8128,7 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
if (!router)
return -ENOMEM;
mutex_init(&router->lock);
mlxsw_sp->router = router;
router->mlxsw_sp = mlxsw_sp;
......@@ -8169,6 +8232,7 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
err_register_inet6addr_notifier:
unregister_inetaddr_notifier(&router->inetaddr_nb);
err_register_inetaddr_notifier:
mutex_destroy(&mlxsw_sp->router->lock);
kfree(mlxsw_sp->router);
return err;
}
......@@ -8189,5 +8253,6 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
__mlxsw_sp_router_fini(mlxsw_sp);
unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb);
unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb);
mutex_destroy(&mlxsw_sp->router->lock);
kfree(mlxsw_sp->router);
}
......@@ -7,6 +7,49 @@
#include "spectrum.h"
#include "reg.h"
struct mlxsw_sp_router_nve_decap {
u32 ul_tb_id;
u32 tunnel_index;
enum mlxsw_sp_l3proto ul_proto;
union mlxsw_sp_l3addr ul_sip;
u8 valid:1;
};
struct mlxsw_sp_router {
struct mlxsw_sp *mlxsw_sp;
struct mlxsw_sp_rif **rifs;
struct mlxsw_sp_vr *vrs;
struct rhashtable neigh_ht;
struct rhashtable nexthop_group_ht;
struct rhashtable nexthop_ht;
struct list_head nexthop_list;
struct {
/* One tree for each protocol: IPv4 and IPv6 */
struct mlxsw_sp_lpm_tree *proto_trees[2];
struct mlxsw_sp_lpm_tree *trees;
unsigned int tree_count;
} lpm;
struct {
struct delayed_work dw;
unsigned long interval; /* ms */
} neighs_update;
struct delayed_work nexthop_probe_dw;
#define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
struct list_head nexthop_neighs_list;
struct list_head ipip_list;
bool aborted;
struct notifier_block fib_nb;
struct notifier_block netevent_nb;
struct notifier_block inetaddr_nb;
struct notifier_block inet6addr_nb;
const struct mlxsw_sp_rif_ops **rif_ops_arr;
const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
u32 adj_discard_index;
bool adj_discard_index_valid;
struct mlxsw_sp_router_nve_decap nve_decap_config;
struct mutex lock; /* Protects shared router resources */
};
struct mlxsw_sp_rif_ipip_lb;
struct mlxsw_sp_rif_ipip_lb_config {
enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment