Commit 2a014b20 authored by Ido Schimmel's avatar Ido Schimmel Committed by Jakub Kicinski

mlxsw: spectrum_router: Add support for nexthop objects

Register a listener to the nexthop notification chain and parse notified
nexthop objects into the existing mlxsw nexthop data structures.
Signed-off-by: default avatarIdo Schimmel <idosch@nvidia.com>
Reviewed-by: default avatarJiri Pirko <jiri@nvidia.com>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 30abc9cd
......@@ -2873,6 +2873,7 @@ struct mlxsw_sp_nexthop {
enum mlxsw_sp_nexthop_group_type {
MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4,
MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6,
MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ,
};
struct mlxsw_sp_nexthop_group_info {
......@@ -2894,6 +2895,9 @@ struct mlxsw_sp_nexthop_group {
struct {
struct fib_info *fi;
} ipv4;
struct {
u32 id;
} obj;
};
struct mlxsw_sp_nexthop_group_info *nhgi;
enum mlxsw_sp_nexthop_group_type type;
......@@ -3012,6 +3016,7 @@ struct mlxsw_sp_nexthop_group_cmp_arg {
union {
struct fib_info *fi;
struct mlxsw_sp_fib6_entry *fib6_entry;
u32 id;
};
};
......@@ -3074,6 +3079,8 @@ mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
cmp_arg->fib6_entry);
case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
return cmp_arg->id != nh_grp->obj.id;
default:
WARN_ON(1);
return 1;
......@@ -3100,6 +3107,8 @@ static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
val ^= jhash(&nh->gw_addr, sizeof(nh->gw_addr), seed);
}
return jhash(&val, sizeof(val), seed);
case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
return jhash(&nh_grp->obj.id, sizeof(nh_grp->obj.id), seed);
default:
WARN_ON(1);
return 0;
......@@ -3134,6 +3143,8 @@ mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
return jhash(&cmp_arg->id, sizeof(cmp_arg->id), seed);
default:
WARN_ON(1);
return 0;
......@@ -3538,6 +3549,25 @@ mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
__mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
}
static void
mlxsw_sp_nexthop_obj_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group *nh_grp)
{
/* Do not update the flags if the nexthop group is being destroyed
* since:
* 1. The nexthop objects is being deleted, in which case the flags are
* irrelevant.
* 2. The nexthop group was replaced by a newer group, in which case
* the flags of the nexthop object were already updated based on the
* new group.
*/
if (nh_grp->can_destroy)
return;
nexthop_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
nh_grp->nhgi->adj_index_valid, false);
}
static void
mlxsw_sp_nexthop_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group *nh_grp)
......@@ -3549,6 +3579,9 @@ mlxsw_sp_nexthop_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
mlxsw_sp_nexthop6_group_offload_refresh(mlxsw_sp, nh_grp);
break;
case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
mlxsw_sp_nexthop_obj_group_offload_refresh(mlxsw_sp, nh_grp);
break;
}
}
......@@ -4088,6 +4121,413 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
}
}
static int
mlxsw_sp_nexthop_obj_single_validate(struct mlxsw_sp *mlxsw_sp,
const struct nh_notifier_single_info *nh,
struct netlink_ext_ack *extack)
{
int err = -EINVAL;
if (nh->is_reject)
NL_SET_ERR_MSG_MOD(extack, "Blackhole nexthops are not supported");
else if (nh->is_fdb)
NL_SET_ERR_MSG_MOD(extack, "FDB nexthops are not supported");
else if (nh->has_encap)
NL_SET_ERR_MSG_MOD(extack, "Encapsulating nexthops are not supported");
else
err = 0;
return err;
}
static int
mlxsw_sp_nexthop_obj_group_validate(struct mlxsw_sp *mlxsw_sp,
const struct nh_notifier_grp_info *nh_grp,
struct netlink_ext_ack *extack)
{
int i;
if (nh_grp->is_fdb) {
NL_SET_ERR_MSG_MOD(extack, "FDB nexthop groups are not supported");
return -EINVAL;
}
for (i = 0; i < nh_grp->num_nh; i++) {
const struct nh_notifier_single_info *nh;
int err;
nh = &nh_grp->nh_entries[i].nh;
err = mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, nh,
extack);
if (err)
return err;
/* Device only nexthops with an IPIP device are programmed as
* encapsulating adjacency entries.
*/
if (!nh->gw_family &&
!mlxsw_sp_netdev_ipip_type(mlxsw_sp, nh->dev, NULL)) {
NL_SET_ERR_MSG_MOD(extack, "Nexthop group entry does not have a gateway");
return -EINVAL;
}
}
return 0;
}
static int mlxsw_sp_nexthop_obj_validate(struct mlxsw_sp *mlxsw_sp,
unsigned long event,
struct nh_notifier_info *info)
{
if (event != NEXTHOP_EVENT_REPLACE)
return 0;
if (!info->is_grp)
return mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, info->nh,
info->extack);
return mlxsw_sp_nexthop_obj_group_validate(mlxsw_sp, info->nh_grp,
info->extack);
}
static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp,
const struct nh_notifier_info *info)
{
const struct net_device *dev;
if (info->is_grp)
/* Already validated earlier. */
return true;
dev = info->nh->dev;
return info->nh->gw_family ||
mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
}
static int
mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group *nh_grp,
struct mlxsw_sp_nexthop *nh,
struct nh_notifier_single_info *nh_obj, int weight)
{
struct net_device *dev = nh_obj->dev;
int err;
nh->nhgi = nh_grp->nhgi;
nh->nh_weight = weight;
switch (nh_obj->gw_family) {
case AF_INET:
memcpy(&nh->gw_addr, &nh_obj->ipv4, sizeof(nh_obj->ipv4));
nh->neigh_tbl = &arp_tbl;
break;
case AF_INET6:
memcpy(&nh->gw_addr, &nh_obj->ipv6, sizeof(nh_obj->ipv6));
#if IS_ENABLED(CONFIG_IPV6)
nh->neigh_tbl = &nd_tbl;
#endif
break;
}
mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
nh->ifindex = dev->ifindex;
err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
if (err)
goto err_type_init;
return 0;
err_type_init:
list_del(&nh->router_list_node);
mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
return err;
}
static void mlxsw_sp_nexthop_obj_fini(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop *nh)
{
mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
list_del(&nh->router_list_node);
mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
}
static int
mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group *nh_grp,
struct nh_notifier_info *info)
{
unsigned int nhs = info->is_grp ? info->nh_grp->num_nh : 1;
struct mlxsw_sp_nexthop_group_info *nhgi;
struct mlxsw_sp_nexthop *nh;
int err, i;
nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL);
if (!nhgi)
return -ENOMEM;
nh_grp->nhgi = nhgi;
nhgi->nh_grp = nh_grp;
nhgi->gateway = mlxsw_sp_nexthop_obj_is_gateway(mlxsw_sp, info);
nhgi->count = nhs;
for (i = 0; i < nhgi->count; i++) {
struct nh_notifier_single_info *nh_obj;
int weight;
nh = &nhgi->nexthops[i];
if (info->is_grp) {
nh_obj = &info->nh_grp->nh_entries[i].nh;
weight = info->nh_grp->nh_entries[i].weight;
} else {
nh_obj = info->nh;
weight = 1;
}
err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj,
weight);
if (err)
goto err_nexthop_obj_init;
}
err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
if (err) {
NL_SET_ERR_MSG_MOD(info->extack, "Failed to write adjacency entries to the device");
goto err_group_refresh;
}
return 0;
err_group_refresh:
i = nhgi->count;
err_nexthop_obj_init:
for (i--; i >= 0; i--) {
nh = &nhgi->nexthops[i];
mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
}
kfree(nhgi);
return err;
}
static void
mlxsw_sp_nexthop_obj_group_info_fini(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group *nh_grp)
{
struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
int i;
for (i = nhgi->count - 1; i >= 0; i--) {
struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
}
mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
WARN_ON_ONCE(nhgi->adj_index_valid);
kfree(nhgi);
}
static struct mlxsw_sp_nexthop_group *
mlxsw_sp_nexthop_obj_group_create(struct mlxsw_sp *mlxsw_sp,
struct nh_notifier_info *info)
{
struct mlxsw_sp_nexthop_group *nh_grp;
int err;
nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
if (!nh_grp)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&nh_grp->fib_list);
nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ;
nh_grp->obj.id = info->id;
err = mlxsw_sp_nexthop_obj_group_info_init(mlxsw_sp, nh_grp, info);
if (err)
goto err_nexthop_group_info_init;
nh_grp->can_destroy = false;
return nh_grp;
err_nexthop_group_info_init:
kfree(nh_grp);
return ERR_PTR(err);
}
static void
mlxsw_sp_nexthop_obj_group_destroy(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group *nh_grp)
{
if (!nh_grp->can_destroy)
return;
mlxsw_sp_nexthop_obj_group_info_fini(mlxsw_sp, nh_grp);
WARN_ON_ONCE(!list_empty(&nh_grp->fib_list));
kfree(nh_grp);
}
static struct mlxsw_sp_nexthop_group *
mlxsw_sp_nexthop_obj_group_lookup(struct mlxsw_sp *mlxsw_sp, u32 id)
{
struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ;
cmp_arg.id = id;
return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
&cmp_arg,
mlxsw_sp_nexthop_group_ht_params);
}
static int mlxsw_sp_nexthop_obj_group_add(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group *nh_grp)
{
return mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
}
static int
mlxsw_sp_nexthop_obj_group_replace(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group *nh_grp,
struct mlxsw_sp_nexthop_group *old_nh_grp,
struct netlink_ext_ack *extack)
{
struct mlxsw_sp_nexthop_group_info *old_nhgi = old_nh_grp->nhgi;
struct mlxsw_sp_nexthop_group_info *new_nhgi = nh_grp->nhgi;
int err;
old_nh_grp->nhgi = new_nhgi;
new_nhgi->nh_grp = old_nh_grp;
nh_grp->nhgi = old_nhgi;
old_nhgi->nh_grp = nh_grp;
if (old_nhgi->adj_index_valid && new_nhgi->adj_index_valid) {
/* Both the old adjacency index and the new one are valid.
* Routes are currently using the old one. Tell the device to
* replace the old adjacency index with the new one.
*/
err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, old_nh_grp,
old_nhgi->adj_index,
old_nhgi->ecmp_size);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Failed to replace old adjacency index with new one");
goto err_out;
}
} else if (old_nhgi->adj_index_valid && !new_nhgi->adj_index_valid) {
/* The old adjacency index is valid, while the new one is not.
* Iterate over all the routes using the group and change them
* to trap packets to the CPU.
*/
err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, old_nh_grp);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Failed to update routes to trap packets");
goto err_out;
}
} else if (!old_nhgi->adj_index_valid && new_nhgi->adj_index_valid) {
/* The old adjacency index is invalid, while the new one is.
* Iterate over all the routes using the group and change them
* to forward packets using the new valid index.
*/
err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, old_nh_grp);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Failed to update routes to forward packets");
goto err_out;
}
}
/* Make sure the flags are set / cleared based on the new nexthop group
* information.
*/
mlxsw_sp_nexthop_obj_group_offload_refresh(mlxsw_sp, old_nh_grp);
/* At this point 'nh_grp' is just a shell that is not used by anyone
* and its nexthop group info is the old info that was just replaced
* with the new one. Remove it.
*/
nh_grp->can_destroy = true;
mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
return 0;
err_out:
old_nhgi->nh_grp = old_nh_grp;
nh_grp->nhgi = new_nhgi;
new_nhgi->nh_grp = nh_grp;
old_nh_grp->nhgi = old_nhgi;
return err;
}
static int mlxsw_sp_nexthop_obj_new(struct mlxsw_sp *mlxsw_sp,
struct nh_notifier_info *info)
{
struct mlxsw_sp_nexthop_group *nh_grp, *old_nh_grp;
struct netlink_ext_ack *extack = info->extack;
int err;
nh_grp = mlxsw_sp_nexthop_obj_group_create(mlxsw_sp, info);
if (IS_ERR(nh_grp))
return PTR_ERR(nh_grp);
old_nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
if (!old_nh_grp)
err = mlxsw_sp_nexthop_obj_group_add(mlxsw_sp, nh_grp);
else
err = mlxsw_sp_nexthop_obj_group_replace(mlxsw_sp, nh_grp,
old_nh_grp, extack);
if (err) {
nh_grp->can_destroy = true;
mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
}
return err;
}
static void mlxsw_sp_nexthop_obj_del(struct mlxsw_sp *mlxsw_sp,
struct nh_notifier_info *info)
{
struct mlxsw_sp_nexthop_group *nh_grp;
nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
if (!nh_grp)
return;
nh_grp->can_destroy = true;
mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
/* If the group still has routes using it, then defer the delete
* operation until the last route using it is deleted.
*/
if (!list_empty(&nh_grp->fib_list))
return;
mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
}
static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
struct nh_notifier_info *info = ptr;
struct mlxsw_sp_router *router;
int err = 0;
router = container_of(nb, struct mlxsw_sp_router, nexthop_nb);
err = mlxsw_sp_nexthop_obj_validate(router->mlxsw_sp, event, info);
if (err)
goto out;
mutex_lock(&router->lock);
ASSERT_RTNL();
switch (event) {
case NEXTHOP_EVENT_REPLACE:
err = mlxsw_sp_nexthop_obj_new(router->mlxsw_sp, info);
break;
case NEXTHOP_EVENT_DEL:
mlxsw_sp_nexthop_obj_del(router->mlxsw_sp, info);
break;
default:
break;
}
mutex_unlock(&router->lock);
out:
return notifier_from_errno(err);
}
static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
struct fib_info *fi)
{
......@@ -8549,6 +8989,14 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
if (err)
goto err_register_netevent_notifier;
mlxsw_sp->router->nexthop_nb.notifier_call =
mlxsw_sp_nexthop_obj_event;
err = register_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
&mlxsw_sp->router->nexthop_nb,
extack);
if (err)
goto err_register_nexthop_notifier;
mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
err = register_fib_notifier(mlxsw_sp_net(mlxsw_sp),
&mlxsw_sp->router->fib_nb,
......@@ -8559,6 +9007,9 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
return 0;
err_register_fib_notifier:
unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
&mlxsw_sp->router->nexthop_nb);
err_register_nexthop_notifier:
unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
err_register_netevent_notifier:
unregister_inet6addr_notifier(&router->inet6addr_nb);
......@@ -8598,6 +9049,8 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
{
unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp),
&mlxsw_sp->router->fib_nb);
unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
&mlxsw_sp->router->nexthop_nb);
unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb);
unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb);
......
......@@ -58,6 +58,7 @@ struct mlxsw_sp_router {
struct list_head nexthop_neighs_list;
struct list_head ipip_list;
bool aborted;
struct notifier_block nexthop_nb;
struct notifier_block fib_nb;
struct notifier_block netevent_nb;
struct notifier_block inetaddr_nb;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment