Commit fbd15f48 authored by David S. Miller's avatar David S. Miller

Merge branch 'mlxsw-Add-support-for-non-equal-cost-multi-path'

Jiri Pirko says:

====================
mlxsw: Add support for non-equal-cost multi-path

Ido says:

In the device, nexthops are stored as adjacency entries in an array
called the KVD linear (KVDL). When a multi-path route is hit the
packet's headers are hashed and then converted to an index into KVDL
based on the adjacency group's size and base index.

Up until now the driver ignored the `weight` parameter for multi-path
routes and allocated only one adjacency entry for each nexthop with a
limit of 32 nexthops in a group. This set makes the driver take the
`weight` parameter into account when allocating adjacency entries.

First patch teaches dpipe to show the size of the adjacency group, so
that users will be able to determine the actual weight of each nexthop.
The second patch refactors the KVDL allocator, making it more receptive
towards the addition of another partition later in the set.

Patches 3-5 introduce small changes towards the actual change in the
sixth patch that populates the adjacency entries according to their
relative weight.

Last two patches finally add another partition to the KVDL, which allows
us to allocate more than 32 entries per-group and thus support more
nexthops and also provide higher accuracy with regards to the requested
weights.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents bc9db417 330e2cc6
......@@ -3726,10 +3726,16 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
return err;
}
err = mlxsw_sp_kvdl_init(mlxsw_sp);
if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize KVDL\n");
return err;
}
err = mlxsw_sp_fids_init(mlxsw_sp);
if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize FIDs\n");
return err;
goto err_fids_init;
}
err = mlxsw_sp_traps_init(mlxsw_sp);
......@@ -3834,6 +3840,8 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
mlxsw_sp_traps_fini(mlxsw_sp);
err_traps_init:
mlxsw_sp_fids_fini(mlxsw_sp);
err_fids_init:
mlxsw_sp_kvdl_fini(mlxsw_sp);
return err;
}
......@@ -3854,6 +3862,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
mlxsw_sp_buffers_fini(mlxsw_sp);
mlxsw_sp_traps_fini(mlxsw_sp);
mlxsw_sp_fids_fini(mlxsw_sp);
mlxsw_sp_kvdl_fini(mlxsw_sp);
}
static const struct mlxsw_config_profile mlxsw_sp_config_profile = {
......@@ -3876,8 +3885,8 @@ static const struct mlxsw_config_profile mlxsw_sp_config_profile = {
.max_pkey = 0,
.used_kvd_split_data = 1,
.kvd_hash_granularity = MLXSW_SP_KVD_GRANULARITY,
.kvd_hash_single_parts = 2,
.kvd_hash_double_parts = 1,
.kvd_hash_single_parts = 59,
.kvd_hash_double_parts = 41,
.kvd_linear_size = MLXSW_SP_KVD_LINEAR_SIZE,
.swid_config = {
{
......
......@@ -62,7 +62,7 @@
#define MLXSW_SP_PORT_BASE_SPEED 25000 /* Mb/s */
#define MLXSW_SP_KVD_LINEAR_SIZE 65536 /* entries */
#define MLXSW_SP_KVD_LINEAR_SIZE 98304 /* entries */
#define MLXSW_SP_KVD_GRANULARITY 128
struct mlxsw_sp_port;
......@@ -143,6 +143,7 @@ struct mlxsw_sp_mr;
struct mlxsw_sp_acl;
struct mlxsw_sp_counter_pool;
struct mlxsw_sp_fid_core;
struct mlxsw_sp_kvdl;
struct mlxsw_sp {
struct mlxsw_sp_port **ports;
......@@ -158,9 +159,7 @@ struct mlxsw_sp {
struct mlxsw_afa *afa;
struct mlxsw_sp_acl *acl;
struct mlxsw_sp_fid_core *fid_core;
struct {
DECLARE_BITMAP(usage, MLXSW_SP_KVD_LINEAR_SIZE);
} kvdl;
struct mlxsw_sp_kvdl *kvdl;
struct notifier_block netdevice_nb;
struct mlxsw_sp_counter_pool *counter_pool;
......@@ -411,9 +410,14 @@ mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan);
void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
/* spectrum_kvdl.c */
int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp);
void mlxsw_sp_kvdl_fini(struct mlxsw_sp *mlxsw_sp);
int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count,
u32 *p_entry_index);
void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index);
int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
unsigned int entry_count,
unsigned int *p_alloc_size);
struct mlxsw_sp_acl_rule_info {
unsigned int priority;
......
......@@ -44,6 +44,7 @@ enum mlxsw_sp_field_metadata_id {
MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD,
MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP,
MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX,
MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_SIZE,
MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX,
};
......@@ -69,6 +70,11 @@ static struct devlink_dpipe_field mlxsw_sp_dpipe_fields_metadata[] = {
.id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX,
.bitwidth = 32,
},
{
.name = "adj_size",
.id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_SIZE,
.bitwidth = 32,
},
{
.name = "adj_hash_index",
.id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX,
......@@ -851,6 +857,14 @@ static int mlxsw_sp_dpipe_table_adj_matches_dump(void *priv,
match.header = &mlxsw_sp_dpipe_header_metadata;
match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX;
err = devlink_dpipe_match_put(skb, &match);
if (err)
return err;
match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
match.header = &mlxsw_sp_dpipe_header_metadata;
match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_SIZE;
err = devlink_dpipe_match_put(skb, &match);
if (err)
return err;
......@@ -897,6 +911,7 @@ static u64 mlxsw_sp_dpipe_table_adj_size(struct mlxsw_sp *mlxsw_sp)
enum mlxsw_sp_dpipe_table_adj_match {
MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX,
MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE,
MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX,
MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT,
};
......@@ -919,6 +934,11 @@ mlxsw_sp_dpipe_table_adj_match_action_prepare(struct devlink_dpipe_match *matche
match->header = &mlxsw_sp_dpipe_header_metadata;
match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX;
match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE];
match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
match->header = &mlxsw_sp_dpipe_header_metadata;
match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_SIZE;
match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX];
match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
match->header = &mlxsw_sp_dpipe_header_metadata;
......@@ -955,6 +975,15 @@ mlxsw_sp_dpipe_table_adj_entry_prepare(struct devlink_dpipe_entry *entry,
match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX];
match_value = &match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX];
match_value->match = match;
match_value->value_size = sizeof(u32);
match_value->value = kmalloc(match_value->value_size, GFP_KERNEL);
if (!match_value->value)
return -ENOMEM;
match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE];
match_value = &match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE];
match_value->match = match;
match_value->value_size = sizeof(u32);
match_value->value = kmalloc(match_value->value_size, GFP_KERNEL);
......@@ -993,8 +1022,8 @@ mlxsw_sp_dpipe_table_adj_entry_prepare(struct devlink_dpipe_entry *entry,
static void
__mlxsw_sp_dpipe_table_adj_entry_fill(struct devlink_dpipe_entry *entry,
u32 adj_index, u32 adj_hash_index,
unsigned char *ha,
u32 adj_index, u32 adj_size,
u32 adj_hash_index, unsigned char *ha,
struct mlxsw_sp_rif *rif)
{
struct devlink_dpipe_value *value;
......@@ -1005,6 +1034,10 @@ __mlxsw_sp_dpipe_table_adj_entry_fill(struct devlink_dpipe_entry *entry,
p_index = value->value;
*p_index = adj_index;
value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE];
p_index = value->value;
*p_index = adj_size;
value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX];
p_index = value->value;
*p_index = adj_hash_index;
......@@ -1027,10 +1060,11 @@ static void mlxsw_sp_dpipe_table_adj_entry_fill(struct mlxsw_sp *mlxsw_sp,
unsigned char *ha = mlxsw_sp_nexthop_ha(nh);
u32 adj_hash_index = 0;
u32 adj_index = 0;
u32 adj_size = 0;
int err;
mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_hash_index);
__mlxsw_sp_dpipe_table_adj_entry_fill(entry, adj_index,
mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_size, &adj_hash_index);
__mlxsw_sp_dpipe_table_adj_entry_fill(entry, adj_index, adj_size,
adj_hash_index, ha, rif);
err = mlxsw_sp_nexthop_counter_get(mlxsw_sp, nh, &entry->counter);
if (!err)
......@@ -1138,13 +1172,15 @@ static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable)
struct mlxsw_sp_nexthop *nh;
u32 adj_hash_index = 0;
u32 adj_index = 0;
u32 adj_size = 0;
mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) {
if (!mlxsw_sp_nexthop_offload(nh) ||
mlxsw_sp_nexthop_group_has_ipip(nh))
continue;
mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_hash_index);
mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_size,
&adj_hash_index);
if (enable)
mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
else
......
......@@ -39,55 +39,276 @@
#define MLXSW_SP_KVDL_SINGLE_BASE 0
#define MLXSW_SP_KVDL_SINGLE_SIZE 16384
#define MLXSW_SP_KVDL_SINGLE_END \
(MLXSW_SP_KVDL_SINGLE_SIZE + MLXSW_SP_KVDL_SINGLE_BASE - 1)
#define MLXSW_SP_KVDL_CHUNKS_BASE \
(MLXSW_SP_KVDL_SINGLE_BASE + MLXSW_SP_KVDL_SINGLE_SIZE)
#define MLXSW_SP_KVDL_CHUNKS_SIZE \
(MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP_KVDL_CHUNKS_BASE)
#define MLXSW_SP_KVDL_CHUNKS_SIZE 49152
#define MLXSW_SP_KVDL_CHUNKS_END \
(MLXSW_SP_KVDL_CHUNKS_SIZE + MLXSW_SP_KVDL_CHUNKS_BASE - 1)
#define MLXSW_SP_KVDL_LARGE_CHUNKS_BASE \
(MLXSW_SP_KVDL_CHUNKS_BASE + MLXSW_SP_KVDL_CHUNKS_SIZE)
#define MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE \
(MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP_KVDL_LARGE_CHUNKS_BASE)
#define MLXSW_SP_KVDL_LARGE_CHUNKS_END \
(MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE + MLXSW_SP_KVDL_LARGE_CHUNKS_BASE - 1)
#define MLXSW_SP_CHUNK_MAX 32
#define MLXSW_SP_LARGE_CHUNK_MAX 512
struct mlxsw_sp_kvdl_part_info {
unsigned int part_index;
unsigned int start_index;
unsigned int end_index;
unsigned int alloc_size;
};
struct mlxsw_sp_kvdl_part {
struct list_head list;
const struct mlxsw_sp_kvdl_part_info *info;
unsigned long usage[0]; /* Entries */
};
struct mlxsw_sp_kvdl {
struct list_head parts_list;
};
static struct mlxsw_sp_kvdl_part *
mlxsw_sp_kvdl_alloc_size_part(struct mlxsw_sp_kvdl *kvdl,
unsigned int alloc_size)
{
struct mlxsw_sp_kvdl_part *part, *min_part = NULL;
list_for_each_entry(part, &kvdl->parts_list, list) {
if (alloc_size <= part->info->alloc_size &&
(!min_part ||
part->info->alloc_size <= min_part->info->alloc_size))
min_part = part;
}
return min_part ?: ERR_PTR(-ENOBUFS);
}
static struct mlxsw_sp_kvdl_part *
mlxsw_sp_kvdl_index_part(struct mlxsw_sp_kvdl *kvdl, u32 kvdl_index)
{
struct mlxsw_sp_kvdl_part *part;
list_for_each_entry(part, &kvdl->parts_list, list) {
if (kvdl_index >= part->info->start_index &&
kvdl_index <= part->info->end_index)
return part;
}
return ERR_PTR(-EINVAL);
}
static u32
mlxsw_sp_entry_index_kvdl_index(const struct mlxsw_sp_kvdl_part_info *info,
unsigned int entry_index)
{
return info->start_index + entry_index * info->alloc_size;
}
static unsigned int
mlxsw_sp_kvdl_index_entry_index(const struct mlxsw_sp_kvdl_part_info *info,
u32 kvdl_index)
{
return (kvdl_index - info->start_index) / info->alloc_size;
}
static int mlxsw_sp_kvdl_part_alloc(struct mlxsw_sp_kvdl_part *part,
u32 *p_kvdl_index)
{
const struct mlxsw_sp_kvdl_part_info *info = part->info;
unsigned int entry_index, nr_entries;
nr_entries = (info->end_index - info->start_index + 1) /
info->alloc_size;
entry_index = find_first_zero_bit(part->usage, nr_entries);
if (entry_index == nr_entries)
return -ENOBUFS;
__set_bit(entry_index, part->usage);
*p_kvdl_index = mlxsw_sp_entry_index_kvdl_index(part->info,
entry_index);
return 0;
}
static void mlxsw_sp_kvdl_part_free(struct mlxsw_sp_kvdl_part *part,
u32 kvdl_index)
{
unsigned int entry_index;
entry_index = mlxsw_sp_kvdl_index_entry_index(part->info,
kvdl_index);
__clear_bit(entry_index, part->usage);
}
int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count,
u32 *p_entry_index)
{
int entry_index;
int size;
int type_base;
int type_size;
int type_entries;
if (entry_count == 0 || entry_count > MLXSW_SP_CHUNK_MAX) {
return -EINVAL;
} else if (entry_count == 1) {
type_base = MLXSW_SP_KVDL_SINGLE_BASE;
type_size = MLXSW_SP_KVDL_SINGLE_SIZE;
type_entries = 1;
} else {
type_base = MLXSW_SP_KVDL_CHUNKS_BASE;
type_size = MLXSW_SP_KVDL_CHUNKS_SIZE;
type_entries = MLXSW_SP_CHUNK_MAX;
struct mlxsw_sp_kvdl_part *part;
/* Find partition with smallest allocation size satisfying the
* requested size.
*/
part = mlxsw_sp_kvdl_alloc_size_part(mlxsw_sp->kvdl, entry_count);
if (IS_ERR(part))
return PTR_ERR(part);
return mlxsw_sp_kvdl_part_alloc(part, p_entry_index);
}
void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index)
{
struct mlxsw_sp_kvdl_part *part;
part = mlxsw_sp_kvdl_index_part(mlxsw_sp->kvdl, entry_index);
if (IS_ERR(part))
return;
mlxsw_sp_kvdl_part_free(part, entry_index);
}
int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
unsigned int entry_count,
unsigned int *p_alloc_size)
{
struct mlxsw_sp_kvdl_part *part;
part = mlxsw_sp_kvdl_alloc_size_part(mlxsw_sp->kvdl, entry_count);
if (IS_ERR(part))
return PTR_ERR(part);
*p_alloc_size = part->info->alloc_size;
return 0;
}
static const struct mlxsw_sp_kvdl_part_info kvdl_parts_info[] = {
{
.part_index = 0,
.start_index = MLXSW_SP_KVDL_SINGLE_BASE,
.end_index = MLXSW_SP_KVDL_SINGLE_END,
.alloc_size = 1,
},
{
.part_index = 1,
.start_index = MLXSW_SP_KVDL_CHUNKS_BASE,
.end_index = MLXSW_SP_KVDL_CHUNKS_END,
.alloc_size = MLXSW_SP_CHUNK_MAX,
},
{
.part_index = 2,
.start_index = MLXSW_SP_KVDL_LARGE_CHUNKS_BASE,
.end_index = MLXSW_SP_KVDL_LARGE_CHUNKS_END,
.alloc_size = MLXSW_SP_LARGE_CHUNK_MAX,
},
};
static struct mlxsw_sp_kvdl_part *
mlxsw_sp_kvdl_part_find(struct mlxsw_sp *mlxsw_sp, unsigned int part_index)
{
struct mlxsw_sp_kvdl_part *part;
list_for_each_entry(part, &mlxsw_sp->kvdl->parts_list, list) {
if (part->info->part_index == part_index)
return part;
}
entry_index = type_base;
size = type_base + type_size;
for_each_clear_bit_from(entry_index, mlxsw_sp->kvdl.usage, size) {
int i;
return NULL;
}
static int mlxsw_sp_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
unsigned int part_index)
{
const struct mlxsw_sp_kvdl_part_info *info;
struct mlxsw_sp_kvdl_part *part;
unsigned int nr_entries;
size_t usage_size;
info = &kvdl_parts_info[part_index];
nr_entries = (info->end_index - info->start_index + 1) /
info->alloc_size;
usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long);
part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL);
if (!part)
return -ENOMEM;
part->info = info;
list_add(&part->list, &mlxsw_sp->kvdl->parts_list);
for (i = 0; i < type_entries; i++)
set_bit(entry_index + i, mlxsw_sp->kvdl.usage);
*p_entry_index = entry_index;
return 0;
}
static void mlxsw_sp_kvdl_part_fini(struct mlxsw_sp *mlxsw_sp,
unsigned int part_index)
{
struct mlxsw_sp_kvdl_part *part;
part = mlxsw_sp_kvdl_part_find(mlxsw_sp, part_index);
if (!part)
return;
list_del(&part->list);
kfree(part);
}
static int mlxsw_sp_kvdl_parts_init(struct mlxsw_sp *mlxsw_sp)
{
int err, i;
INIT_LIST_HEAD(&mlxsw_sp->kvdl->parts_list);
for (i = 0; i < ARRAY_SIZE(kvdl_parts_info); i++) {
err = mlxsw_sp_kvdl_part_init(mlxsw_sp, i);
if (err)
goto err_kvdl_part_init;
}
return -ENOBUFS;
return 0;
err_kvdl_part_init:
for (i--; i >= 0; i--)
mlxsw_sp_kvdl_part_fini(mlxsw_sp, i);
return err;
}
void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index)
static void mlxsw_sp_kvdl_parts_fini(struct mlxsw_sp *mlxsw_sp)
{
int type_entries;
int i;
if (entry_index < MLXSW_SP_KVDL_CHUNKS_BASE)
type_entries = 1;
else
type_entries = MLXSW_SP_CHUNK_MAX;
for (i = 0; i < type_entries; i++)
clear_bit(entry_index + i, mlxsw_sp->kvdl.usage);
for (i = ARRAY_SIZE(kvdl_parts_info) - 1; i >= 0; i--)
mlxsw_sp_kvdl_part_fini(mlxsw_sp, i);
}
int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp)
{
struct mlxsw_sp_kvdl *kvdl;
int err;
kvdl = kzalloc(sizeof(*mlxsw_sp->kvdl), GFP_KERNEL);
if (!kvdl)
return -ENOMEM;
mlxsw_sp->kvdl = kvdl;
err = mlxsw_sp_kvdl_parts_init(mlxsw_sp);
if (err)
goto err_kvdl_parts_init;
return 0;
err_kvdl_parts_init:
kfree(mlxsw_sp->kvdl);
return err;
}
void mlxsw_sp_kvdl_fini(struct mlxsw_sp *mlxsw_sp)
{
mlxsw_sp_kvdl_parts_fini(mlxsw_sp);
kfree(mlxsw_sp->kvdl);
}
......@@ -46,6 +46,7 @@
#include <linux/if_bridge.h>
#include <linux/socket.h>
#include <linux/route.h>
#include <linux/gcd.h>
#include <net/netevent.h>
#include <net/neighbour.h>
#include <net/arp.h>
......@@ -2203,6 +2204,9 @@ struct mlxsw_sp_nexthop {
struct mlxsw_sp_nexthop_key key;
unsigned char gw_addr[sizeof(struct in6_addr)];
int ifindex;
int nh_weight;
int norm_nh_weight;
int num_adj_entries;
struct mlxsw_sp_rif *rif;
u8 should_offload:1, /* set indicates this neigh is connected and
* should be put to KVD linear area of this group.
......@@ -2232,6 +2236,7 @@ struct mlxsw_sp_nexthop_group {
u32 adj_index;
u16 ecmp_size;
u16 count;
int sum_norm_weight;
struct mlxsw_sp_nexthop nexthops[0];
#define nh_rif nexthops[0].rif
};
......@@ -2299,7 +2304,7 @@ unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
}
int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
u32 *p_adj_hash_index)
u32 *p_adj_size, u32 *p_adj_hash_index)
{
struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
u32 adj_hash_index = 0;
......@@ -2309,6 +2314,7 @@ int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
return -EINVAL;
*p_adj_index = nh_grp->adj_index;
*p_adj_size = nh_grp->ecmp_size;
for (i = 0; i < nh_grp->count; i++) {
struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
......@@ -2316,7 +2322,7 @@ int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
if (nh_iter == nh)
break;
if (nh_iter->offloaded)
adj_hash_index++;
adj_hash_index += nh_iter->num_adj_entries;
}
*p_adj_hash_index = adj_hash_index;
......@@ -2599,7 +2605,7 @@ static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
return 0;
}
int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
struct mlxsw_sp_nexthop *nh)
{
struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
......@@ -2617,7 +2623,23 @@ int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
}
static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
struct mlxsw_sp_nexthop *nh)
{
int i;
for (i = 0; i < nh->num_adj_entries; i++) {
int err;
err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
if (err)
return err;
}
return 0;
}
static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
u32 adj_index,
struct mlxsw_sp_nexthop *nh)
{
......@@ -2627,6 +2649,24 @@ static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
}
static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
u32 adj_index,
struct mlxsw_sp_nexthop *nh)
{
int i;
for (i = 0; i < nh->num_adj_entries; i++) {
int err;
err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
nh);
if (err)
return err;
}
return 0;
}
static int
mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group *nh_grp,
......@@ -2661,7 +2701,7 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
nh->update = 0;
nh->offloaded = 1;
}
adj_index++;
adj_index += nh->num_adj_entries;
}
return 0;
}
......@@ -2706,17 +2746,118 @@ mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
}
}
static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
{
/* Valid sizes for an adjacency group are:
* 1-64, 512, 1024, 2048 and 4096.
*/
if (*p_adj_grp_size <= 64)
return;
else if (*p_adj_grp_size <= 512)
*p_adj_grp_size = 512;
else if (*p_adj_grp_size <= 1024)
*p_adj_grp_size = 1024;
else if (*p_adj_grp_size <= 2048)
*p_adj_grp_size = 2048;
else
*p_adj_grp_size = 4096;
}
static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
unsigned int alloc_size)
{
if (alloc_size >= 4096)
*p_adj_grp_size = 4096;
else if (alloc_size >= 2048)
*p_adj_grp_size = 2048;
else if (alloc_size >= 1024)
*p_adj_grp_size = 1024;
else if (alloc_size >= 512)
*p_adj_grp_size = 512;
}
static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
u16 *p_adj_grp_size)
{
unsigned int alloc_size;
int err;
/* Round up the requested group size to the next size supported
* by the device and make sure the request can be satisfied.
*/
mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
err = mlxsw_sp_kvdl_alloc_size_query(mlxsw_sp, *p_adj_grp_size,
&alloc_size);
if (err)
return err;
/* It is possible the allocation results in more allocated
* entries than requested. Try to use as much of them as
* possible.
*/
mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
return 0;
}
static void
mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
{
int i, g = 0, sum_norm_weight = 0;
struct mlxsw_sp_nexthop *nh;
for (i = 0; i < nh_grp->count; i++) {
nh = &nh_grp->nexthops[i];
if (!nh->should_offload)
continue;
if (g > 0)
g = gcd(nh->nh_weight, g);
else
g = nh->nh_weight;
}
for (i = 0; i < nh_grp->count; i++) {
nh = &nh_grp->nexthops[i];
if (!nh->should_offload)
continue;
nh->norm_nh_weight = nh->nh_weight / g;
sum_norm_weight += nh->norm_nh_weight;
}
nh_grp->sum_norm_weight = sum_norm_weight;
}
static void
mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
{
int total = nh_grp->sum_norm_weight;
u16 ecmp_size = nh_grp->ecmp_size;
int i, weight = 0, lower_bound = 0;
for (i = 0; i < nh_grp->count; i++) {
struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
int upper_bound;
if (!nh->should_offload)
continue;
weight += nh->norm_nh_weight;
upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
nh->num_adj_entries = upper_bound - lower_bound;
lower_bound = upper_bound;
}
}
static void
mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group *nh_grp)
{
u16 ecmp_size, old_ecmp_size;
struct mlxsw_sp_nexthop *nh;
bool offload_change = false;
u32 adj_index;
u16 ecmp_size = 0;
bool old_adj_index_valid;
u32 old_adj_index;
u16 old_ecmp_size;
int i;
int err;
......@@ -2733,8 +2874,6 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
if (nh->should_offload)
nh->update = 1;
}
if (nh->should_offload)
ecmp_size++;
}
if (!offload_change) {
/* Nothing was added or removed, so no need to reallocate. Just
......@@ -2747,12 +2886,19 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
}
return;
}
if (!ecmp_size)
mlxsw_sp_nexthop_group_normalize(nh_grp);
if (!nh_grp->sum_norm_weight)
/* No neigh of this group is connected so we just set
* the trap and let everthing flow through kernel.
*/
goto set_trap;
ecmp_size = nh_grp->sum_norm_weight;
err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
if (err)
/* No valid allocation size available. */
goto set_trap;
err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index);
if (err) {
/* We ran out of KVD linear space, just set the
......@@ -2767,6 +2913,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
nh_grp->adj_index_valid = 1;
nh_grp->adj_index = adj_index;
nh_grp->ecmp_size = ecmp_size;
mlxsw_sp_nexthop_group_rebalance(nh_grp);
err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
if (err) {
dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
......@@ -3044,6 +3191,11 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
nh->nh_grp = nh_grp;
nh->key.fib_nh = fib_nh;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
nh->nh_weight = fib_nh->nh_weight;
#else
nh->nh_weight = 1;
#endif
memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
if (err)
......@@ -4303,6 +4455,7 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
struct net_device *dev = rt->dst.dev;
nh->nh_grp = nh_grp;
nh->nh_weight = 1;
memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr));
mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
......
......@@ -115,7 +115,7 @@ struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh);
unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh);
int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
u32 *p_adj_hash_index);
u32 *p_adj_size, u32 *p_adj_hash_index);
struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh);
bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh);
#define mlxsw_sp_nexthop_for_each(nh, router) \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment