Commit 95ae2d1d authored by David S. Miller's avatar David S. Miller

Merge branch 'for-net-next' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux

Saeed Mahameed says:

====================
Mellanox, mlx5 E-Switch chains and prios

This series has two parts,

1) A merge commit with mlx5-next branch that include updates for mlx5
HW layouts needed for this and upcoming submissions.

2) From Paul, Increase the number of chains and prios

Currently the Mellanox driver supports offloading tc rules that
are defined on the first 4 chains and the first 16 priorities.
The restriction stems from the firmware flow level enforcement
requiring a flow table of a certain level to point to a flow
table of a higher level. This limitation may be ignored by setting
the ignore_flow_level bit when creating flow table entries.
Use unmanaged tables and ignore flow level to create more tables than
declared by fs_core steering. Manually manage the connections between the
tables themselves.

HW table is instantiated for every tc <chain,prio> tuple. The miss rule
of every table either jumps to the next <chain,prio> table, or continues
to slow_fdb. This logic is realized by following this sequence:

1. Create an auto-grouped flow table for the specified priority with
    reserved entries

Reserved entries are allocated at the end of the flow table.
Flow groups are evaluated in sequence and therefore it is guaranteed
that the flow group defined on the last FTEs will be the last to evaluate.

Define a "match all" flow group on the reserved entries, providing
the platform to add table miss actions.

2. Set the miss rule action to jump to the next <chain,prio> table
    or the slow_fdb.

3. Link the previous priority table to point to the new table by
    updating its miss rule.

Please pull and let me know if there's any problem.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 0c58ac1e 278d51f2
......@@ -3276,12 +3276,14 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
int num_entries, int num_groups,
u32 flags)
{
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_table *ft;
ft = mlx5_create_auto_grouped_flow_table(ns, priority,
num_entries,
num_groups,
0, flags);
ft_attr.prio = priority;
ft_attr.max_fte = num_entries;
ft_attr.flags = flags;
ft_attr.autogroup.max_num_groups = num_groups;
ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
if (IS_ERR(ft))
return ERR_CAST(ft);
......
......@@ -42,7 +42,7 @@ mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o
# Core extra
#
mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \
ecpf.o rdma.o
ecpf.o rdma.o eswitch_offloads_chains.o
mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o
mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
......
......@@ -58,6 +58,7 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv,
struct ethtool_rx_flow_spec *fs,
int num_tuples)
{
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5e_ethtool_table *eth_ft;
struct mlx5_flow_namespace *ns;
struct mlx5_flow_table *ft;
......@@ -102,9 +103,11 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv,
table_size = min_t(u32, BIT(MLX5_CAP_FLOWTABLE(priv->mdev,
flow_table_properties_nic_receive.log_max_ft_size)),
MLX5E_ETHTOOL_NUM_ENTRIES);
ft = mlx5_create_auto_grouped_flow_table(ns, prio,
table_size,
MLX5E_ETHTOOL_NUM_GROUPS, 0, 0);
ft_attr.prio = prio;
ft_attr.max_fte = table_size;
ft_attr.autogroup.max_num_groups = MLX5E_ETHTOOL_NUM_GROUPS;
ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
if (IS_ERR(ft))
return (void *)ft;
......
......@@ -41,6 +41,7 @@
#include <net/ipv6_stubs.h>
#include "eswitch.h"
#include "eswitch_offloads_chains.h"
#include "en.h"
#include "en_rep.h"
#include "en_tc.h"
......@@ -1247,8 +1248,7 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data,
void *cb_priv)
{
struct flow_cls_offload *f = type_data;
struct flow_cls_offload cls_flower;
struct flow_cls_offload tmp, *f = type_data;
struct mlx5e_priv *priv = cb_priv;
struct mlx5_eswitch *esw;
unsigned long flags;
......@@ -1261,16 +1261,30 @@ static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data,
switch (type) {
case TC_SETUP_CLSFLOWER:
if (!mlx5_eswitch_prios_supported(esw) || f->common.chain_index)
memcpy(&tmp, f, sizeof(*f));
if (!mlx5_esw_chains_prios_supported(esw) ||
tmp.common.chain_index)
return -EOPNOTSUPP;
/* Re-use tc offload path by moving the ft flow to the
* reserved ft chain.
*
* FT offload can use prio range [0, INT_MAX], so we normalize
* it to range [1, mlx5_esw_chains_get_prio_range(esw)]
* as with tc, where prio 0 isn't supported.
*
* We only support chain 0 of FT offload.
*/
memcpy(&cls_flower, f, sizeof(*f));
cls_flower.common.chain_index = FDB_FT_CHAIN;
err = mlx5e_rep_setup_tc_cls_flower(priv, &cls_flower, flags);
memcpy(&f->stats, &cls_flower.stats, sizeof(f->stats));
if (tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw))
return -EOPNOTSUPP;
if (tmp.common.chain_index != 0)
return -EOPNOTSUPP;
tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw);
tmp.common.prio++;
err = mlx5e_rep_setup_tc_cls_flower(priv, &tmp, flags);
memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
return err;
default:
return -EOPNOTSUPP;
......
......@@ -1133,6 +1133,7 @@ static void mlx5e_grp_per_port_buffer_congest_update_stats(struct mlx5e_priv *pr
static const struct counter_desc pport_per_prio_traffic_stats_desc[] = {
{ "rx_prio%d_bytes", PPORT_PER_PRIO_OFF(rx_octets) },
{ "rx_prio%d_packets", PPORT_PER_PRIO_OFF(rx_frames) },
{ "rx_prio%d_discards", PPORT_PER_PRIO_OFF(rx_discards) },
{ "tx_prio%d_bytes", PPORT_PER_PRIO_OFF(tx_octets) },
{ "tx_prio%d_packets", PPORT_PER_PRIO_OFF(tx_frames) },
};
......
......@@ -51,6 +51,7 @@
#include "en_rep.h"
#include "en_tc.h"
#include "eswitch.h"
#include "eswitch_offloads_chains.h"
#include "fs_core.h"
#include "en/port.h"
#include "en/tc_tun.h"
......@@ -960,7 +961,8 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
mutex_lock(&priv->fs.tc.t_lock);
if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
int tc_grp_size, tc_tbl_size;
struct mlx5_flow_table_attr ft_attr = {};
int tc_grp_size, tc_tbl_size, tc_num_grps;
u32 max_flow_counter;
max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
......@@ -970,13 +972,15 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
tc_num_grps = MLX5E_TC_TABLE_NUM_GROUPS;
ft_attr.prio = MLX5E_TC_PRIO;
ft_attr.max_fte = tc_tbl_size;
ft_attr.level = MLX5E_TC_FT_LEVEL;
ft_attr.autogroup.max_num_groups = tc_num_grps;
priv->fs.tc.t =
mlx5_create_auto_grouped_flow_table(priv->fs.ns,
MLX5E_TC_PRIO,
tc_tbl_size,
MLX5E_TC_TABLE_NUM_GROUPS,
MLX5E_TC_FT_LEVEL, 0);
&ft_attr);
if (IS_ERR(priv->fs.tc.t)) {
mutex_unlock(&priv->fs.tc.t_lock);
NL_SET_ERR_MSG_MOD(extack,
......@@ -1080,7 +1084,7 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr));
slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
slow_attr->split_count = 0;
slow_attr->dest_chain = FDB_TC_SLOW_PATH_CHAIN;
slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
if (!IS_ERR(rule))
......@@ -1097,7 +1101,7 @@ mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr));
slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
slow_attr->split_count = 0;
slow_attr->dest_chain = FDB_TC_SLOW_PATH_CHAIN;
slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
flow_flag_clear(flow, SLOW);
}
......@@ -1157,19 +1161,18 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
struct netlink_ext_ack *extack)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
u32 max_chain = mlx5_eswitch_get_chain_range(esw);
struct mlx5_esw_flow_attr *attr = flow->esw_attr;
struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
u16 max_prio = mlx5_eswitch_get_prio_range(esw);
struct net_device *out_dev, *encap_dev = NULL;
struct mlx5_fc *counter = NULL;
struct mlx5e_rep_priv *rpriv;
struct mlx5e_priv *out_priv;
bool encap_valid = true;
u32 max_prio, max_chain;
int err = 0;
int out_index;
if (!mlx5_eswitch_prios_supported(esw) && attr->prio != 1) {
if (!mlx5_esw_chains_prios_supported(esw) && attr->prio != 1) {
NL_SET_ERR_MSG(extack, "E-switch priorities unsupported, upgrade FW");
return -EOPNOTSUPP;
}
......@@ -1179,11 +1182,13 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
* FDB_FT_CHAIN which is outside tc range.
* See mlx5e_rep_setup_ft_cb().
*/
max_chain = mlx5_esw_chains_get_chain_range(esw);
if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) {
NL_SET_ERR_MSG(extack, "Requested chain is out of supported range");
return -EOPNOTSUPP;
}
max_prio = mlx5_esw_chains_get_prio_range(esw);
if (attr->prio > max_prio) {
NL_SET_ERR_MSG(extack, "Requested priority is out of supported range");
return -EOPNOTSUPP;
......@@ -3466,7 +3471,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
break;
case FLOW_ACTION_GOTO: {
u32 dest_chain = act->chain_index;
u32 max_chain = mlx5_eswitch_get_chain_range(esw);
u32 max_chain = mlx5_esw_chains_get_chain_range(esw);
if (ft_flow) {
NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported");
......
......@@ -277,6 +277,7 @@ enum {
static int esw_create_legacy_vepa_table(struct mlx5_eswitch *esw)
{
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_core_dev *dev = esw->dev;
struct mlx5_flow_namespace *root_ns;
struct mlx5_flow_table *fdb;
......@@ -289,8 +290,10 @@ static int esw_create_legacy_vepa_table(struct mlx5_eswitch *esw)
}
/* num FTE 2, num FG 2 */
fdb = mlx5_create_auto_grouped_flow_table(root_ns, LEGACY_VEPA_PRIO,
2, 2, 0, 0);
ft_attr.prio = LEGACY_VEPA_PRIO;
ft_attr.max_fte = 2;
ft_attr.autogroup.max_num_groups = 2;
fdb = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr);
if (IS_ERR(fdb)) {
err = PTR_ERR(fdb);
esw_warn(dev, "Failed to create VEPA FDB err %d\n", err);
......
......@@ -157,7 +157,7 @@ enum offloads_fdb_flags {
ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED = BIT(0),
};
extern const unsigned int ESW_POOLS[4];
struct mlx5_esw_chains_priv;
struct mlx5_eswitch_fdb {
union {
......@@ -182,14 +182,7 @@ struct mlx5_eswitch_fdb {
struct mlx5_flow_handle *miss_rule_multi;
int vlan_push_pop_refcount;
struct {
struct mlx5_flow_table *fdb;
u32 num_rules;
} fdb_prio[FDB_NUM_CHAINS][FDB_TC_MAX_PRIO + 1][FDB_TC_LEVELS_PER_PRIO];
/* Protects fdb_prio table */
struct mutex fdb_prio_lock;
int fdb_left[ARRAY_SIZE(ESW_POOLS)];
struct mlx5_esw_chains_priv *esw_chains_priv;
} offloads;
};
u32 flags;
......@@ -355,15 +348,6 @@ mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_handle *rule,
struct mlx5_esw_flow_attr *attr);
bool
mlx5_eswitch_prios_supported(struct mlx5_eswitch *esw);
u16
mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw);
u32
mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw);
struct mlx5_flow_handle *
mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
struct mlx5_flow_destination *dest);
......@@ -388,6 +372,11 @@ enum {
MLX5_ESW_DEST_ENCAP_VALID = BIT(1),
};
enum {
MLX5_ESW_ATTR_FLAG_VLAN_HANDLED = BIT(0),
MLX5_ESW_ATTR_FLAG_SLOW_PATH = BIT(1),
};
struct mlx5_esw_flow_attr {
struct mlx5_eswitch_rep *in_rep;
struct mlx5_core_dev *in_mdev;
......@@ -401,7 +390,6 @@ struct mlx5_esw_flow_attr {
u16 vlan_vid[MLX5_FS_VLAN_DEPTH];
u8 vlan_prio[MLX5_FS_VLAN_DEPTH];
u8 total_vlan;
bool vlan_handled;
struct {
u32 flags;
struct mlx5_eswitch_rep *rep;
......@@ -416,6 +404,7 @@ struct mlx5_esw_flow_attr {
u32 chain;
u16 prio;
u32 dest_chain;
u32 flags;
struct mlx5e_tc_flow_parse_attr *parse_attr;
};
......
......@@ -37,6 +37,7 @@
#include <linux/mlx5/fs.h>
#include "mlx5_core.h"
#include "eswitch.h"
#include "eswitch_offloads_chains.h"
#include "rdma.h"
#include "en.h"
#include "fs_core.h"
......@@ -47,10 +48,6 @@
* one for multicast.
*/
#define MLX5_ESW_MISS_FLOWS (2)
#define fdb_prio_table(esw, chain, prio, level) \
(esw)->fdb_table.offloads.fdb_prio[(chain)][(prio)][(level)]
#define UPLINK_REP_INDEX 0
static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw,
......@@ -62,32 +59,6 @@ static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw,
return &esw->offloads.vport_reps[idx];
}
static struct mlx5_flow_table *
esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level);
static void
esw_put_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level);
bool mlx5_eswitch_prios_supported(struct mlx5_eswitch *esw)
{
return (!!(esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED));
}
u32 mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw)
{
if (esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)
return FDB_TC_MAX_CHAIN;
return 0;
}
u16 mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw)
{
if (esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)
return FDB_TC_MAX_PRIO;
return 1;
}
static bool
esw_check_ingress_prio_tag_enabled(const struct mlx5_eswitch *esw,
const struct mlx5_vport *vport)
......@@ -175,10 +146,17 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
}
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
if (attr->dest_chain) {
struct mlx5_flow_table *ft;
struct mlx5_flow_table *ft;
ft = esw_get_prio_table(esw, attr->dest_chain, 1, 0);
if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) {
flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
dest[i].ft = mlx5_esw_chains_get_tc_end_ft(esw);
i++;
} else if (attr->dest_chain) {
flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
ft = mlx5_esw_chains_get_table(esw, attr->dest_chain,
1, 0);
if (IS_ERR(ft)) {
rule = ERR_CAST(ft);
goto err_create_goto_table;
......@@ -223,7 +201,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
flow_act.modify_hdr = attr->modify_hdr;
fdb = esw_get_prio_table(esw, attr->chain, attr->prio, !!split);
fdb = mlx5_esw_chains_get_table(esw, attr->chain, attr->prio,
!!split);
if (IS_ERR(fdb)) {
rule = ERR_CAST(fdb);
goto err_esw_get;
......@@ -242,10 +221,10 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
return rule;
err_add_rule:
esw_put_prio_table(esw, attr->chain, attr->prio, !!split);
mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, !!split);
err_esw_get:
if (attr->dest_chain)
esw_put_prio_table(esw, attr->dest_chain, 1, 0);
if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) && attr->dest_chain)
mlx5_esw_chains_put_table(esw, attr->dest_chain, 1, 0);
err_create_goto_table:
return rule;
}
......@@ -262,13 +241,13 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_handle *rule;
int i;
fast_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 0);
fast_fdb = mlx5_esw_chains_get_table(esw, attr->chain, attr->prio, 0);
if (IS_ERR(fast_fdb)) {
rule = ERR_CAST(fast_fdb);
goto err_get_fast;
}
fwd_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 1);
fwd_fdb = mlx5_esw_chains_get_table(esw, attr->chain, attr->prio, 1);
if (IS_ERR(fwd_fdb)) {
rule = ERR_CAST(fwd_fdb);
goto err_get_fwd;
......@@ -296,6 +275,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
if (attr->outer_match_level != MLX5_MATCH_NONE)
spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i);
if (IS_ERR(rule))
......@@ -305,9 +285,9 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
return rule;
add_err:
esw_put_prio_table(esw, attr->chain, attr->prio, 1);
mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 1);
err_get_fwd:
esw_put_prio_table(esw, attr->chain, attr->prio, 0);
mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 0);
err_get_fast:
return rule;
}
......@@ -332,12 +312,13 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
atomic64_dec(&esw->offloads.num_flows);
if (fwd_rule) {
esw_put_prio_table(esw, attr->chain, attr->prio, 1);
esw_put_prio_table(esw, attr->chain, attr->prio, 0);
mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 1);
mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 0);
} else {
esw_put_prio_table(esw, attr->chain, attr->prio, !!split);
mlx5_esw_chains_put_table(esw, attr->chain, attr->prio,
!!split);
if (attr->dest_chain)
esw_put_prio_table(esw, attr->dest_chain, 1, 0);
mlx5_esw_chains_put_table(esw, attr->dest_chain, 1, 0);
}
}
......@@ -451,7 +432,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
if (err)
goto unlock;
attr->vlan_handled = false;
attr->flags &= ~MLX5_ESW_ATTR_FLAG_VLAN_HANDLED;
vport = esw_vlan_action_get_vport(attr, push, pop);
......@@ -459,7 +440,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
/* tracks VF --> wire rules without vlan push action */
if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) {
vport->vlan_refcount++;
attr->vlan_handled = true;
attr->flags |= MLX5_ESW_ATTR_FLAG_VLAN_HANDLED;
}
goto unlock;
......@@ -490,7 +471,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
}
out:
if (!err)
attr->vlan_handled = true;
attr->flags |= MLX5_ESW_ATTR_FLAG_VLAN_HANDLED;
unlock:
mutex_unlock(&esw->state_lock);
return err;
......@@ -508,7 +489,7 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
return 0;
if (!attr->vlan_handled)
if (!(attr->flags & MLX5_ESW_ATTR_FLAG_VLAN_HANDLED))
return 0;
push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH);
......@@ -582,8 +563,8 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, u16 vport,
dest.vport.num = vport;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec,
&flow_act, &dest, 1);
flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
spec, &flow_act, &dest, 1);
if (IS_ERR(flow_rule))
esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule));
out:
......@@ -824,8 +805,8 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
dest.vport.num = esw->manager_vport;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec,
&flow_act, &dest, 1);
flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
spec, &flow_act, &dest, 1);
if (IS_ERR(flow_rule)) {
err = PTR_ERR(flow_rule);
esw_warn(esw->dev, "FDB: Failed to add unicast miss flow rule err %d\n", err);
......@@ -839,8 +820,8 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v,
outer_headers.dmac_47_16);
dmac_v[0] = 0x01;
flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec,
&flow_act, &dest, 1);
flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
spec, &flow_act, &dest, 1);
if (IS_ERR(flow_rule)) {
err = PTR_ERR(flow_rule);
esw_warn(esw->dev, "FDB: Failed to add multicast miss flow rule err %d\n", err);
......@@ -855,174 +836,6 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
return err;
}
#define ESW_OFFLOADS_NUM_GROUPS 4
/* Firmware currently has 4 pool of 4 sizes that it supports (ESW_POOLS),
* and a virtual memory region of 16M (ESW_SIZE), this region is duplicated
* for each flow table pool. We can allocate up to 16M of each pool,
* and we keep track of how much we used via put/get_sz_to_pool.
* Firmware doesn't report any of this for now.
* ESW_POOL is expected to be sorted from large to small
*/
#define ESW_SIZE (16 * 1024 * 1024)
const unsigned int ESW_POOLS[4] = { 4 * 1024 * 1024, 1 * 1024 * 1024,
64 * 1024, 4 * 1024 };
static int
get_sz_from_pool(struct mlx5_eswitch *esw)
{
int sz = 0, i;
for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) {
if (esw->fdb_table.offloads.fdb_left[i]) {
--esw->fdb_table.offloads.fdb_left[i];
sz = ESW_POOLS[i];
break;
}
}
return sz;
}
static void
put_sz_to_pool(struct mlx5_eswitch *esw, int sz)
{
int i;
for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) {
if (sz >= ESW_POOLS[i]) {
++esw->fdb_table.offloads.fdb_left[i];
break;
}
}
}
static struct mlx5_flow_table *
create_next_size_table(struct mlx5_eswitch *esw,
struct mlx5_flow_namespace *ns,
u16 table_prio,
int level,
u32 flags)
{
struct mlx5_flow_table *fdb;
int sz;
sz = get_sz_from_pool(esw);
if (!sz)
return ERR_PTR(-ENOSPC);
fdb = mlx5_create_auto_grouped_flow_table(ns,
table_prio,
sz,
ESW_OFFLOADS_NUM_GROUPS,
level,
flags);
if (IS_ERR(fdb)) {
esw_warn(esw->dev, "Failed to create FDB Table err %d (table prio: %d, level: %d, size: %d)\n",
(int)PTR_ERR(fdb), table_prio, level, sz);
put_sz_to_pool(esw, sz);
}
return fdb;
}
static struct mlx5_flow_table *
esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level)
{
struct mlx5_core_dev *dev = esw->dev;
struct mlx5_flow_table *fdb = NULL;
struct mlx5_flow_namespace *ns;
int table_prio, l = 0;
u32 flags = 0;
if (chain == FDB_TC_SLOW_PATH_CHAIN)
return esw->fdb_table.offloads.slow_fdb;
mutex_lock(&esw->fdb_table.offloads.fdb_prio_lock);
fdb = fdb_prio_table(esw, chain, prio, level).fdb;
if (fdb) {
/* take ref on earlier levels as well */
while (level >= 0)
fdb_prio_table(esw, chain, prio, level--).num_rules++;
mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
return fdb;
}
ns = mlx5_get_fdb_sub_ns(dev, chain);
if (!ns) {
esw_warn(dev, "Failed to get FDB sub namespace\n");
mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
return ERR_PTR(-EOPNOTSUPP);
}
if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
table_prio = prio - 1;
/* create earlier levels for correct fs_core lookup when
* connecting tables
*/
for (l = 0; l <= level; l++) {
if (fdb_prio_table(esw, chain, prio, l).fdb) {
fdb_prio_table(esw, chain, prio, l).num_rules++;
continue;
}
fdb = create_next_size_table(esw, ns, table_prio, l, flags);
if (IS_ERR(fdb)) {
l--;
goto err_create_fdb;
}
fdb_prio_table(esw, chain, prio, l).fdb = fdb;
fdb_prio_table(esw, chain, prio, l).num_rules = 1;
}
mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
return fdb;
err_create_fdb:
mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
if (l >= 0)
esw_put_prio_table(esw, chain, prio, l);
return fdb;
}
static void
esw_put_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level)
{
int l;
if (chain == FDB_TC_SLOW_PATH_CHAIN)
return;
mutex_lock(&esw->fdb_table.offloads.fdb_prio_lock);
for (l = level; l >= 0; l--) {
if (--(fdb_prio_table(esw, chain, prio, l).num_rules) > 0)
continue;
put_sz_to_pool(esw, fdb_prio_table(esw, chain, prio, l).fdb->max_fte);
mlx5_destroy_flow_table(fdb_prio_table(esw, chain, prio, l).fdb);
fdb_prio_table(esw, chain, prio, l).fdb = NULL;
}
mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
}
static void esw_destroy_offloads_fast_fdb_tables(struct mlx5_eswitch *esw)
{
/* If lazy creation isn't supported, deref the fast path tables */
if (!(esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)) {
esw_put_prio_table(esw, 0, 1, 1);
esw_put_prio_table(esw, 0, 1, 0);
}
}
#define MAX_PF_SQ 256
#define MAX_SQ_NVPORTS 32
......@@ -1055,16 +868,16 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_core_dev *dev = esw->dev;
u32 *flow_group_in, max_flow_counter;
struct mlx5_flow_namespace *root_ns;
struct mlx5_flow_table *fdb = NULL;
int table_size, ix, err = 0, i;
u32 flags = 0, *flow_group_in;
int table_size, ix, err = 0;
struct mlx5_flow_group *g;
u32 flags = 0, fdb_max;
void *match_criteria;
u8 *dmac;
esw_debug(esw->dev, "Create offloads FDB Tables\n");
flow_group_in = kvzalloc(inlen, GFP_KERNEL);
if (!flow_group_in)
return -ENOMEM;
......@@ -1083,19 +896,6 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
goto ns_err;
}
max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
MLX5_CAP_GEN(dev, max_flow_counter_15_0);
fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size);
esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d), groups(%d), max flow table size(%d))\n",
MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size),
max_flow_counter, ESW_OFFLOADS_NUM_GROUPS,
fdb_max);
for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++)
esw->fdb_table.offloads.fdb_left[i] =
ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0;
table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ +
MLX5_ESW_MISS_FLOWS + esw->total_vports;
......@@ -1118,16 +918,10 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
}
esw->fdb_table.offloads.slow_fdb = fdb;
/* If lazy creation isn't supported, open the fast path tables now */
if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, multi_fdb_encap) &&
esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) {
esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
esw_warn(dev, "Lazy creation of flow tables isn't supported, ignoring priorities\n");
esw_get_prio_table(esw, 0, 1, 0);
esw_get_prio_table(esw, 0, 1, 1);
} else {
esw_debug(dev, "Lazy creation of flow tables supported, deferring table opening\n");
esw->fdb_table.flags |= ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
err = mlx5_esw_chains_create(esw);
if (err) {
esw_warn(dev, "Failed to create fdb chains err(%d)\n", err);
goto fdb_chains_err;
}
/* create send-to-vport group */
......@@ -1218,7 +1012,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
peer_miss_err:
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
send_vport_err:
esw_destroy_offloads_fast_fdb_tables(esw);
mlx5_esw_chains_destroy(esw);
fdb_chains_err:
mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
slow_fdb_err:
/* Holds true only as long as DMFS is the default */
......@@ -1240,8 +1035,8 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
mlx5_esw_chains_destroy(esw);
mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
esw_destroy_offloads_fast_fdb_tables(esw);
/* Holds true only as long as DMFS is the default */
mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns,
MLX5_FLOW_STEERING_MODE_DMFS);
......@@ -2111,7 +1906,6 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
total_vports = num_vfs + MLX5_SPECIAL_VPORTS(esw->dev);
memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb));
mutex_init(&esw->fdb_table.offloads.fdb_prio_lock);
err = esw_create_uplink_offloads_acl_tables(esw);
if (err)
......
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
// Copyright (c) 2020 Mellanox Technologies.
#include <linux/mlx5/driver.h>
#include <linux/mlx5/mlx5_ifc.h>
#include <linux/mlx5/fs.h>
#include "eswitch_offloads_chains.h"
#include "mlx5_core.h"
#include "fs_core.h"
#include "eswitch.h"
#include "en.h"
#define esw_chains_priv(esw) ((esw)->fdb_table.offloads.esw_chains_priv)
#define esw_chains_lock(esw) (esw_chains_priv(esw)->lock)
#define esw_chains_ht(esw) (esw_chains_priv(esw)->chains_ht)
#define esw_prios_ht(esw) (esw_chains_priv(esw)->prios_ht)
#define fdb_pool_left(esw) (esw_chains_priv(esw)->fdb_left)
#define tc_slow_fdb(esw) ((esw)->fdb_table.offloads.slow_fdb)
#define tc_end_fdb(esw) (esw_chains_priv(esw)->tc_end_fdb)
#define fdb_ignore_flow_level_supported(esw) \
(MLX5_CAP_ESW_FLOWTABLE_FDB((esw)->dev, ignore_flow_level))
#define ESW_OFFLOADS_NUM_GROUPS 4
/* Firmware currently has 4 pool of 4 sizes that it supports (ESW_POOLS),
* and a virtual memory region of 16M (ESW_SIZE), this region is duplicated
* for each flow table pool. We can allocate up to 16M of each pool,
* and we keep track of how much we used via get_next_avail_sz_from_pool.
* Firmware doesn't report any of this for now.
* ESW_POOL is expected to be sorted from large to small and match firmware
* pools.
*/
#define ESW_SIZE (16 * 1024 * 1024)
const unsigned int ESW_POOLS[] = { 4 * 1024 * 1024,
1 * 1024 * 1024,
64 * 1024,
4 * 1024, };
struct mlx5_esw_chains_priv {
struct rhashtable chains_ht;
struct rhashtable prios_ht;
/* Protects above chains_ht and prios_ht */
struct mutex lock;
struct mlx5_flow_table *tc_end_fdb;
int fdb_left[ARRAY_SIZE(ESW_POOLS)];
};
struct fdb_chain {
struct rhash_head node;
u32 chain;
int ref;
struct mlx5_eswitch *esw;
struct list_head prios_list;
};
struct fdb_prio_key {
u32 chain;
u32 prio;
u32 level;
};
struct fdb_prio {
struct rhash_head node;
struct list_head list;
struct fdb_prio_key key;
int ref;
struct fdb_chain *fdb_chain;
struct mlx5_flow_table *fdb;
struct mlx5_flow_table *next_fdb;
struct mlx5_flow_group *miss_group;
struct mlx5_flow_handle *miss_rule;
};
static const struct rhashtable_params chain_params = {
.head_offset = offsetof(struct fdb_chain, node),
.key_offset = offsetof(struct fdb_chain, chain),
.key_len = sizeof_field(struct fdb_chain, chain),
.automatic_shrinking = true,
};
static const struct rhashtable_params prio_params = {
.head_offset = offsetof(struct fdb_prio, node),
.key_offset = offsetof(struct fdb_prio, key),
.key_len = sizeof_field(struct fdb_prio, key),
.automatic_shrinking = true,
};
bool mlx5_esw_chains_prios_supported(struct mlx5_eswitch *esw)
{
return esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
}
u32 mlx5_esw_chains_get_chain_range(struct mlx5_eswitch *esw)
{
if (!mlx5_esw_chains_prios_supported(esw))
return 1;
if (fdb_ignore_flow_level_supported(esw))
return UINT_MAX - 1;
return FDB_TC_MAX_CHAIN;
}
u32 mlx5_esw_chains_get_ft_chain(struct mlx5_eswitch *esw)
{
return mlx5_esw_chains_get_chain_range(esw) + 1;
}
u32 mlx5_esw_chains_get_prio_range(struct mlx5_eswitch *esw)
{
if (!mlx5_esw_chains_prios_supported(esw))
return 1;
if (fdb_ignore_flow_level_supported(esw))
return UINT_MAX;
return FDB_TC_MAX_PRIO;
}
static unsigned int mlx5_esw_chains_get_level_range(struct mlx5_eswitch *esw)
{
if (fdb_ignore_flow_level_supported(esw))
return UINT_MAX;
return FDB_TC_LEVELS_PER_PRIO;
}
#define POOL_NEXT_SIZE 0
static int
mlx5_esw_chains_get_avail_sz_from_pool(struct mlx5_eswitch *esw,
int desired_size)
{
int i, found_i = -1;
for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--) {
if (fdb_pool_left(esw)[i] && ESW_POOLS[i] > desired_size) {
found_i = i;
if (desired_size != POOL_NEXT_SIZE)
break;
}
}
if (found_i != -1) {
--fdb_pool_left(esw)[found_i];
return ESW_POOLS[found_i];
}
return 0;
}
static void
mlx5_esw_chains_put_sz_to_pool(struct mlx5_eswitch *esw, int sz)
{
int i;
for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--) {
if (sz == ESW_POOLS[i]) {
++fdb_pool_left(esw)[i];
return;
}
}
WARN_ONCE(1, "Couldn't find size %d in fdb size pool", sz);
}
static void
mlx5_esw_chains_init_sz_pool(struct mlx5_eswitch *esw)
{
u32 fdb_max;
int i;
fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, log_max_ft_size);
for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--)
fdb_pool_left(esw)[i] =
ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0;
}
static struct mlx5_flow_table *
mlx5_esw_chains_create_fdb_table(struct mlx5_eswitch *esw,
u32 chain, u32 prio, u32 level)
{
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_namespace *ns;
struct mlx5_flow_table *fdb;
int sz;
if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
ft_attr.flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
sz = mlx5_esw_chains_get_avail_sz_from_pool(esw, POOL_NEXT_SIZE);
if (!sz)
return ERR_PTR(-ENOSPC);
ft_attr.max_fte = sz;
/* We use tc_slow_fdb(esw) as the table's next_ft till
* ignore_flow_level is allowed on FT creation and not just for FTEs.
* Instead caller should add an explicit miss rule if needed.
*/
ft_attr.next_ft = tc_slow_fdb(esw);
/* The root table(chain 0, prio 1, level 0) is required to be
* connected to the previous prio (FDB_BYPASS_PATH if exists).
* We always create it, as a managed table, in order to align with
* fs_core logic.
*/
if (!fdb_ignore_flow_level_supported(esw) ||
(chain == 0 && prio == 1 && level == 0)) {
ft_attr.level = level;
ft_attr.prio = prio - 1;
ns = mlx5_get_fdb_sub_ns(esw->dev, chain);
} else {
ft_attr.flags |= MLX5_FLOW_TABLE_UNMANAGED;
ft_attr.prio = FDB_TC_OFFLOAD;
/* Firmware doesn't allow us to create another level 0 table,
* so we create all unmanaged tables as level 1.
*
* To connect them, we use explicit miss rules with
* ignore_flow_level. Caller is responsible to create
* these rules (if needed).
*/
ft_attr.level = 1;
ns = mlx5_get_flow_namespace(esw->dev, MLX5_FLOW_NAMESPACE_FDB);
}
ft_attr.autogroup.num_reserved_entries = 2;
ft_attr.autogroup.max_num_groups = ESW_OFFLOADS_NUM_GROUPS;
fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
if (IS_ERR(fdb)) {
esw_warn(esw->dev,
"Failed to create FDB table err %d (chain: %d, prio: %d, level: %d, size: %d)\n",
(int)PTR_ERR(fdb), chain, prio, level, sz);
mlx5_esw_chains_put_sz_to_pool(esw, sz);
return fdb;
}
return fdb;
}
static void
mlx5_esw_chains_destroy_fdb_table(struct mlx5_eswitch *esw,
struct mlx5_flow_table *fdb)
{
mlx5_esw_chains_put_sz_to_pool(esw, fdb->max_fte);
mlx5_destroy_flow_table(fdb);
}
static struct fdb_chain *
mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain)
{
struct fdb_chain *fdb_chain = NULL;
int err;
fdb_chain = kvzalloc(sizeof(*fdb_chain), GFP_KERNEL);
if (!fdb_chain)
return ERR_PTR(-ENOMEM);
fdb_chain->esw = esw;
fdb_chain->chain = chain;
INIT_LIST_HEAD(&fdb_chain->prios_list);
err = rhashtable_insert_fast(&esw_chains_ht(esw), &fdb_chain->node,
chain_params);
if (err)
goto err_insert;
return fdb_chain;
err_insert:
kvfree(fdb_chain);
return ERR_PTR(err);
}
static void
mlx5_esw_chains_destroy_fdb_chain(struct fdb_chain *fdb_chain)
{
struct mlx5_eswitch *esw = fdb_chain->esw;
rhashtable_remove_fast(&esw_chains_ht(esw), &fdb_chain->node,
chain_params);
kvfree(fdb_chain);
}
static struct fdb_chain *
mlx5_esw_chains_get_fdb_chain(struct mlx5_eswitch *esw, u32 chain)
{
struct fdb_chain *fdb_chain;
fdb_chain = rhashtable_lookup_fast(&esw_chains_ht(esw), &chain,
chain_params);
if (!fdb_chain) {
fdb_chain = mlx5_esw_chains_create_fdb_chain(esw, chain);
if (IS_ERR(fdb_chain))
return fdb_chain;
}
fdb_chain->ref++;
return fdb_chain;
}
static struct mlx5_flow_handle *
mlx5_esw_chains_add_miss_rule(struct mlx5_flow_table *fdb,
struct mlx5_flow_table *next_fdb)
{
static const struct mlx5_flow_spec spec = {};
struct mlx5_flow_destination dest = {};
struct mlx5_flow_act act = {};
act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND;
act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
dest.ft = next_fdb;
return mlx5_add_flow_rules(fdb, &spec, &act, &dest, 1);
}
static int
mlx5_esw_chains_update_prio_prevs(struct fdb_prio *fdb_prio,
struct mlx5_flow_table *next_fdb)
{
struct mlx5_flow_handle *miss_rules[FDB_TC_LEVELS_PER_PRIO + 1] = {};
struct fdb_chain *fdb_chain = fdb_prio->fdb_chain;
struct fdb_prio *pos;
int n = 0, err;
if (fdb_prio->key.level)
return 0;
/* Iterate in reverse order until reaching the level 0 rule of
* the previous priority, adding all the miss rules first, so we can
* revert them if any of them fails.
*/
pos = fdb_prio;
list_for_each_entry_continue_reverse(pos,
&fdb_chain->prios_list,
list) {
miss_rules[n] = mlx5_esw_chains_add_miss_rule(pos->fdb,
next_fdb);
if (IS_ERR(miss_rules[n])) {
err = PTR_ERR(miss_rules[n]);
goto err_prev_rule;
}
n++;
if (!pos->key.level)
break;
}
/* Success, delete old miss rules, and update the pointers. */
n = 0;
pos = fdb_prio;
list_for_each_entry_continue_reverse(pos,
&fdb_chain->prios_list,
list) {
mlx5_del_flow_rules(pos->miss_rule);
pos->miss_rule = miss_rules[n];
pos->next_fdb = next_fdb;
n++;
if (!pos->key.level)
break;
}
return 0;
err_prev_rule:
while (--n >= 0)
mlx5_del_flow_rules(miss_rules[n]);
return err;
}
static void
mlx5_esw_chains_put_fdb_chain(struct fdb_chain *fdb_chain)
{
if (--fdb_chain->ref == 0)
mlx5_esw_chains_destroy_fdb_chain(fdb_chain);
}
static struct fdb_prio *
mlx5_esw_chains_create_fdb_prio(struct mlx5_eswitch *esw,
u32 chain, u32 prio, u32 level)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_flow_handle *miss_rule = NULL;
struct mlx5_flow_group *miss_group;
struct fdb_prio *fdb_prio = NULL;
struct mlx5_flow_table *next_fdb;
struct fdb_chain *fdb_chain;
struct mlx5_flow_table *fdb;
struct list_head *pos;
u32 *flow_group_in;
int err;
fdb_chain = mlx5_esw_chains_get_fdb_chain(esw, chain);
if (IS_ERR(fdb_chain))
return ERR_CAST(fdb_chain);
fdb_prio = kvzalloc(sizeof(*fdb_prio), GFP_KERNEL);
flow_group_in = kvzalloc(inlen, GFP_KERNEL);
if (!fdb_prio || !flow_group_in) {
err = -ENOMEM;
goto err_alloc;
}
/* Chain's prio list is sorted by prio and level.
* And all levels of some prio point to the next prio's level 0.
* Example list (prio, level):
* (3,0)->(3,1)->(5,0)->(5,1)->(6,1)->(7,0)
* In hardware, we will we have the following pointers:
* (3,0) -> (5,0) -> (7,0) -> Slow path
* (3,1) -> (5,0)
* (5,1) -> (7,0)
* (6,1) -> (7,0)
*/
/* Default miss for each chain: */
next_fdb = (chain == mlx5_esw_chains_get_ft_chain(esw)) ?
tc_slow_fdb(esw) :
tc_end_fdb(esw);
list_for_each(pos, &fdb_chain->prios_list) {
struct fdb_prio *p = list_entry(pos, struct fdb_prio, list);
/* exit on first pos that is larger */
if (prio < p->key.prio || (prio == p->key.prio &&
level < p->key.level)) {
/* Get next level 0 table */
next_fdb = p->key.level == 0 ? p->fdb : p->next_fdb;
break;
}
}
fdb = mlx5_esw_chains_create_fdb_table(esw, chain, prio, level);
if (IS_ERR(fdb)) {
err = PTR_ERR(fdb);
goto err_create;
}
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index,
fdb->max_fte - 2);
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
fdb->max_fte - 1);
miss_group = mlx5_create_flow_group(fdb, flow_group_in);
if (IS_ERR(miss_group)) {
err = PTR_ERR(miss_group);
goto err_group;
}
/* Add miss rule to next_fdb */
miss_rule = mlx5_esw_chains_add_miss_rule(fdb, next_fdb);
if (IS_ERR(miss_rule)) {
err = PTR_ERR(miss_rule);
goto err_miss_rule;
}
fdb_prio->miss_group = miss_group;
fdb_prio->miss_rule = miss_rule;
fdb_prio->next_fdb = next_fdb;
fdb_prio->fdb_chain = fdb_chain;
fdb_prio->key.chain = chain;
fdb_prio->key.prio = prio;
fdb_prio->key.level = level;
fdb_prio->fdb = fdb;
err = rhashtable_insert_fast(&esw_prios_ht(esw), &fdb_prio->node,
prio_params);
if (err)
goto err_insert;
list_add(&fdb_prio->list, pos->prev);
/* Table is ready, connect it */
err = mlx5_esw_chains_update_prio_prevs(fdb_prio, fdb);
if (err)
goto err_update;
kvfree(flow_group_in);
return fdb_prio;
err_update:
list_del(&fdb_prio->list);
rhashtable_remove_fast(&esw_prios_ht(esw), &fdb_prio->node,
prio_params);
err_insert:
mlx5_del_flow_rules(miss_rule);
err_miss_rule:
mlx5_destroy_flow_group(miss_group);
err_group:
mlx5_esw_chains_destroy_fdb_table(esw, fdb);
err_create:
err_alloc:
kvfree(fdb_prio);
kvfree(flow_group_in);
mlx5_esw_chains_put_fdb_chain(fdb_chain);
return ERR_PTR(err);
}
static void
mlx5_esw_chains_destroy_fdb_prio(struct mlx5_eswitch *esw,
struct fdb_prio *fdb_prio)
{
struct fdb_chain *fdb_chain = fdb_prio->fdb_chain;
WARN_ON(mlx5_esw_chains_update_prio_prevs(fdb_prio,
fdb_prio->next_fdb));
list_del(&fdb_prio->list);
rhashtable_remove_fast(&esw_prios_ht(esw), &fdb_prio->node,
prio_params);
mlx5_del_flow_rules(fdb_prio->miss_rule);
mlx5_destroy_flow_group(fdb_prio->miss_group);
mlx5_esw_chains_destroy_fdb_table(esw, fdb_prio->fdb);
mlx5_esw_chains_put_fdb_chain(fdb_chain);
kvfree(fdb_prio);
}
struct mlx5_flow_table *
mlx5_esw_chains_get_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
u32 level)
{
struct mlx5_flow_table *prev_fts;
struct fdb_prio *fdb_prio;
struct fdb_prio_key key;
int l = 0;
if ((chain > mlx5_esw_chains_get_chain_range(esw) &&
chain != mlx5_esw_chains_get_ft_chain(esw)) ||
prio > mlx5_esw_chains_get_prio_range(esw) ||
level > mlx5_esw_chains_get_level_range(esw))
return ERR_PTR(-EOPNOTSUPP);
/* create earlier levels for correct fs_core lookup when
* connecting tables.
*/
for (l = 0; l < level; l++) {
prev_fts = mlx5_esw_chains_get_table(esw, chain, prio, l);
if (IS_ERR(prev_fts)) {
fdb_prio = ERR_CAST(prev_fts);
goto err_get_prevs;
}
}
key.chain = chain;
key.prio = prio;
key.level = level;
mutex_lock(&esw_chains_lock(esw));
fdb_prio = rhashtable_lookup_fast(&esw_prios_ht(esw), &key,
prio_params);
if (!fdb_prio) {
fdb_prio = mlx5_esw_chains_create_fdb_prio(esw, chain,
prio, level);
if (IS_ERR(fdb_prio))
goto err_create_prio;
}
++fdb_prio->ref;
mutex_unlock(&esw_chains_lock(esw));
return fdb_prio->fdb;
err_create_prio:
mutex_unlock(&esw_chains_lock(esw));
err_get_prevs:
while (--l >= 0)
mlx5_esw_chains_put_table(esw, chain, prio, l);
return ERR_CAST(fdb_prio);
}
void
mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
u32 level)
{
struct fdb_prio *fdb_prio;
struct fdb_prio_key key;
key.chain = chain;
key.prio = prio;
key.level = level;
mutex_lock(&esw_chains_lock(esw));
fdb_prio = rhashtable_lookup_fast(&esw_prios_ht(esw), &key,
prio_params);
if (!fdb_prio)
goto err_get_prio;
if (--fdb_prio->ref == 0)
mlx5_esw_chains_destroy_fdb_prio(esw, fdb_prio);
mutex_unlock(&esw_chains_lock(esw));
while (level-- > 0)
mlx5_esw_chains_put_table(esw, chain, prio, level);
return;
err_get_prio:
mutex_unlock(&esw_chains_lock(esw));
WARN_ONCE(1,
"Couldn't find table: (chain: %d prio: %d level: %d)",
chain, prio, level);
}
struct mlx5_flow_table *
mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw)
{
return tc_end_fdb(esw);
}
static int
mlx5_esw_chains_init(struct mlx5_eswitch *esw)
{
struct mlx5_esw_chains_priv *chains_priv;
struct mlx5_core_dev *dev = esw->dev;
u32 max_flow_counter, fdb_max;
int err;
chains_priv = kzalloc(sizeof(*chains_priv), GFP_KERNEL);
if (!chains_priv)
return -ENOMEM;
esw_chains_priv(esw) = chains_priv;
max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
MLX5_CAP_GEN(dev, max_flow_counter_15_0);
fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size);
esw_debug(dev,
"Init esw offloads chains, max counters(%d), groups(%d), max flow table size(%d)\n",
max_flow_counter, ESW_OFFLOADS_NUM_GROUPS, fdb_max);
mlx5_esw_chains_init_sz_pool(esw);
if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, multi_fdb_encap) &&
esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) {
esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
esw_warn(dev, "Tc chains and priorities offload aren't supported, update firmware if needed\n");
} else {
esw->fdb_table.flags |= ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
esw_info(dev, "Supported tc offload range - chains: %u, prios: %u\n",
mlx5_esw_chains_get_chain_range(esw),
mlx5_esw_chains_get_prio_range(esw));
}
err = rhashtable_init(&esw_chains_ht(esw), &chain_params);
if (err)
goto init_chains_ht_err;
err = rhashtable_init(&esw_prios_ht(esw), &prio_params);
if (err)
goto init_prios_ht_err;
mutex_init(&esw_chains_lock(esw));
return 0;
init_prios_ht_err:
rhashtable_destroy(&esw_chains_ht(esw));
init_chains_ht_err:
kfree(chains_priv);
return err;
}
static void
mlx5_esw_chains_cleanup(struct mlx5_eswitch *esw)
{
mutex_destroy(&esw_chains_lock(esw));
rhashtable_destroy(&esw_prios_ht(esw));
rhashtable_destroy(&esw_chains_ht(esw));
kfree(esw_chains_priv(esw));
}
static int
mlx5_esw_chains_open(struct mlx5_eswitch *esw)
{
struct mlx5_flow_table *ft;
int err;
/* Create tc_end_fdb(esw) which is the always created ft chain */
ft = mlx5_esw_chains_get_table(esw, mlx5_esw_chains_get_ft_chain(esw),
1, 0);
if (IS_ERR(ft))
return PTR_ERR(ft);
tc_end_fdb(esw) = ft;
/* Always open the root for fast path */
ft = mlx5_esw_chains_get_table(esw, 0, 1, 0);
if (IS_ERR(ft)) {
err = PTR_ERR(ft);
goto level_0_err;
}
/* Open level 1 for split rules now if prios isn't supported */
if (!mlx5_esw_chains_prios_supported(esw)) {
ft = mlx5_esw_chains_get_table(esw, 0, 1, 1);
if (IS_ERR(ft)) {
err = PTR_ERR(ft);
goto level_1_err;
}
}
return 0;
level_1_err:
mlx5_esw_chains_put_table(esw, 0, 1, 0);
level_0_err:
mlx5_esw_chains_put_table(esw, mlx5_esw_chains_get_ft_chain(esw), 1, 0);
return err;
}
static void
mlx5_esw_chains_close(struct mlx5_eswitch *esw)
{
if (!mlx5_esw_chains_prios_supported(esw))
mlx5_esw_chains_put_table(esw, 0, 1, 1);
mlx5_esw_chains_put_table(esw, 0, 1, 0);
mlx5_esw_chains_put_table(esw, mlx5_esw_chains_get_ft_chain(esw), 1, 0);
}
int
mlx5_esw_chains_create(struct mlx5_eswitch *esw)
{
int err;
err = mlx5_esw_chains_init(esw);
if (err)
return err;
err = mlx5_esw_chains_open(esw);
if (err)
goto err_open;
return 0;
err_open:
mlx5_esw_chains_cleanup(esw);
return err;
}
void
mlx5_esw_chains_destroy(struct mlx5_eswitch *esw)
{
mlx5_esw_chains_close(esw);
mlx5_esw_chains_cleanup(esw);
}
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2020 Mellanox Technologies. */
#ifndef __ML5_ESW_CHAINS_H__
#define __ML5_ESW_CHAINS_H__
bool
mlx5_esw_chains_prios_supported(struct mlx5_eswitch *esw);
u32
mlx5_esw_chains_get_prio_range(struct mlx5_eswitch *esw);
u32
mlx5_esw_chains_get_chain_range(struct mlx5_eswitch *esw);
u32
mlx5_esw_chains_get_ft_chain(struct mlx5_eswitch *esw);
struct mlx5_flow_table *
mlx5_esw_chains_get_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
u32 level);
void
mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
u32 level);
struct mlx5_flow_table *
mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw);
int mlx5_esw_chains_create(struct mlx5_eswitch *esw);
void mlx5_esw_chains_destroy(struct mlx5_eswitch *esw);
#endif /* __ML5_ESW_CHAINS_H__ */
......@@ -50,8 +50,8 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev,
struct mlx5_flow_act *flow_act)
{
static const struct mlx5_flow_spec spec = {};
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_namespace *root_ns;
int prio, flags;
int err;
root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
......@@ -63,10 +63,11 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev,
/* As this is the terminating action then the termination table is the
* same prio as the slow path
*/
prio = FDB_SLOW_PATH;
flags = MLX5_FLOW_TABLE_TERMINATION;
tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, prio, 1, 1,
0, flags);
ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION;
ft_attr.prio = FDB_SLOW_PATH;
ft_attr.max_fte = 1;
ft_attr.autogroup.max_num_groups = 1;
tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr);
if (IS_ERR(tt->termtbl)) {
esw_warn(dev, "Failed to create termination table\n");
return -EOPNOTSUPP;
......
......@@ -432,6 +432,9 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
MLX5_SET(set_fte_in, in, table_type, ft->type);
MLX5_SET(set_fte_in, in, table_id, ft->id);
MLX5_SET(set_fte_in, in, flow_index, fte->index);
MLX5_SET(set_fte_in, in, ignore_flow_level,
!!(fte->action.flags & FLOW_ACT_IGNORE_FLOW_LEVEL));
if (ft->vport) {
MLX5_SET(set_fte_in, in, vport_number, ft->vport);
MLX5_SET(set_fte_in, in, other_vport, 1);
......
......@@ -579,7 +579,9 @@ static void del_sw_flow_group(struct fs_node *node)
rhashtable_destroy(&fg->ftes_hash);
ida_destroy(&fg->fte_allocator);
if (ft->autogroup.active && fg->max_ftes == ft->autogroup.group_size)
if (ft->autogroup.active &&
fg->max_ftes == ft->autogroup.group_size &&
fg->start_index < ft->autogroup.max_fte)
ft->autogroup.num_groups--;
err = rhltable_remove(&ft->fgs_hash,
&fg->hash,
......@@ -1006,7 +1008,8 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
u16 vport)
{
struct mlx5_flow_root_namespace *root = find_root(&ns->node);
struct mlx5_flow_table *next_ft = NULL;
bool unmanaged = ft_attr->flags & MLX5_FLOW_TABLE_UNMANAGED;
struct mlx5_flow_table *next_ft;
struct fs_prio *fs_prio = NULL;
struct mlx5_flow_table *ft;
int log_table_sz;
......@@ -1023,14 +1026,21 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
err = -EINVAL;
goto unlock_root;
}
if (ft_attr->level >= fs_prio->num_levels) {
err = -ENOSPC;
goto unlock_root;
if (!unmanaged) {
/* The level is related to the
* priority level range.
*/
if (ft_attr->level >= fs_prio->num_levels) {
err = -ENOSPC;
goto unlock_root;
}
ft_attr->level += fs_prio->start_level;
}
/* The level is related to the
* priority level range.
*/
ft_attr->level += fs_prio->start_level;
ft = alloc_flow_table(ft_attr->level,
vport,
ft_attr->max_fte ? roundup_pow_of_two(ft_attr->max_fte) : 0,
......@@ -1043,19 +1053,27 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table);
log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0;
next_ft = find_next_chained_ft(fs_prio);
next_ft = unmanaged ? ft_attr->next_ft :
find_next_chained_ft(fs_prio);
ft->def_miss_action = ns->def_miss_action;
err = root->cmds->create_flow_table(root, ft, log_table_sz, next_ft);
if (err)
goto free_ft;
err = connect_flow_table(root->dev, ft, fs_prio);
if (err)
goto destroy_ft;
if (!unmanaged) {
err = connect_flow_table(root->dev, ft, fs_prio);
if (err)
goto destroy_ft;
}
ft->node.active = true;
down_write_ref_node(&fs_prio->node, false);
tree_add_node(&ft->node, &fs_prio->node);
list_add_flow_table(ft, fs_prio);
if (!unmanaged) {
tree_add_node(&ft->node, &fs_prio->node);
list_add_flow_table(ft, fs_prio);
} else {
ft->node.root = fs_prio->node.root;
}
fs_prio->num_ft++;
up_write_ref_node(&fs_prio->node, false);
mutex_unlock(&root->chain_lock);
......@@ -1103,31 +1121,27 @@ EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table);
struct mlx5_flow_table*
mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
int prio,
int num_flow_table_entries,
int max_num_groups,
u32 level,
u32 flags)
struct mlx5_flow_table_attr *ft_attr)
{
struct mlx5_flow_table_attr ft_attr = {};
int num_reserved_entries = ft_attr->autogroup.num_reserved_entries;
int autogroups_max_fte = ft_attr->max_fte - num_reserved_entries;
int max_num_groups = ft_attr->autogroup.max_num_groups;
struct mlx5_flow_table *ft;
if (max_num_groups > num_flow_table_entries)
if (max_num_groups > autogroups_max_fte)
return ERR_PTR(-EINVAL);
if (num_reserved_entries > ft_attr->max_fte)
return ERR_PTR(-EINVAL);
ft_attr.max_fte = num_flow_table_entries;
ft_attr.prio = prio;
ft_attr.level = level;
ft_attr.flags = flags;
ft = mlx5_create_flow_table(ns, &ft_attr);
ft = mlx5_create_flow_table(ns, ft_attr);
if (IS_ERR(ft))
return ft;
ft->autogroup.active = true;
ft->autogroup.required_groups = max_num_groups;
ft->autogroup.max_fte = autogroups_max_fte;
/* We save place for flow groups in addition to max types */
ft->autogroup.group_size = ft->max_fte / (max_num_groups + 1);
ft->autogroup.group_size = autogroups_max_fte / (max_num_groups + 1);
return ft;
}
......@@ -1149,7 +1163,7 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
struct mlx5_flow_group *fg;
int err;
if (ft->autogroup.active)
if (ft->autogroup.active && start_index < ft->autogroup.max_fte)
return ERR_PTR(-EPERM);
down_write_ref_node(&ft->node, false);
......@@ -1322,9 +1336,10 @@ static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft
const struct mlx5_flow_spec *spec)
{
struct list_head *prev = &ft->node.children;
struct mlx5_flow_group *fg;
u32 max_fte = ft->autogroup.max_fte;
unsigned int candidate_index = 0;
unsigned int group_size = 0;
struct mlx5_flow_group *fg;
if (!ft->autogroup.active)
return ERR_PTR(-ENOENT);
......@@ -1332,7 +1347,7 @@ static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft
if (ft->autogroup.num_groups < ft->autogroup.required_groups)
group_size = ft->autogroup.group_size;
/* ft->max_fte == ft->autogroup.max_types */
/* max_fte == ft->autogroup.max_types */
if (group_size == 0)
group_size = 1;
......@@ -1345,7 +1360,7 @@ static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft
prev = &fg->node.list;
}
if (candidate_index + group_size > ft->max_fte)
if (candidate_index + group_size > max_fte)
return ERR_PTR(-ENOSPC);
fg = alloc_insert_flow_group(ft,
......@@ -1529,18 +1544,30 @@ static bool counter_is_valid(u32 action)
}
static bool dest_is_valid(struct mlx5_flow_destination *dest,
u32 action,
struct mlx5_flow_act *flow_act,
struct mlx5_flow_table *ft)
{
bool ignore_level = flow_act->flags & FLOW_ACT_IGNORE_FLOW_LEVEL;
u32 action = flow_act->action;
if (dest && (dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER))
return counter_is_valid(action);
if (!(action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
return true;
if (ignore_level) {
if (ft->type != FS_FT_FDB)
return false;
if (dest->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
dest->ft->type != FS_FT_FDB)
return false;
}
if (!dest || ((dest->type ==
MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) &&
(dest->ft->level <= ft->level)))
(dest->ft->level <= ft->level && !ignore_level)))
return false;
return true;
}
......@@ -1770,7 +1797,7 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft,
return ERR_PTR(-EINVAL);
for (i = 0; i < dest_num; i++) {
if (!dest_is_valid(&dest[i], flow_act->action, ft))
if (!dest_is_valid(&dest[i], flow_act, ft))
return ERR_PTR(-EINVAL);
}
nested_down_read_ref_node(&ft->node, FS_LOCK_GRANDPARENT);
......@@ -2033,7 +2060,8 @@ int mlx5_destroy_flow_table(struct mlx5_flow_table *ft)
int err = 0;
mutex_lock(&root->chain_lock);
err = disconnect_flow_table(ft);
if (!(ft->flags & MLX5_FLOW_TABLE_UNMANAGED))
err = disconnect_flow_table(ft);
if (err) {
mutex_unlock(&root->chain_lock);
return err;
......
......@@ -164,6 +164,7 @@ struct mlx5_flow_table {
unsigned int required_groups;
unsigned int group_size;
unsigned int num_groups;
unsigned int max_fte;
} autogroup;
/* Protect fwd_rules */
struct mutex lock;
......
......@@ -131,11 +131,11 @@ static int mlx5_get_pcam_reg(struct mlx5_core_dev *dev)
MLX5_PCAM_REGS_5000_TO_507F);
}
static int mlx5_get_mcam_reg(struct mlx5_core_dev *dev)
static int mlx5_get_mcam_access_reg_group(struct mlx5_core_dev *dev,
enum mlx5_mcam_reg_groups group)
{
return mlx5_query_mcam_reg(dev, dev->caps.mcam,
MLX5_MCAM_FEATURE_ENHANCED_FEATURES,
MLX5_MCAM_REGS_FIRST_128);
return mlx5_query_mcam_reg(dev, dev->caps.mcam[group],
MLX5_MCAM_FEATURE_ENHANCED_FEATURES, group);
}
static int mlx5_get_qcam_reg(struct mlx5_core_dev *dev)
......@@ -221,8 +221,11 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
if (MLX5_CAP_GEN(dev, pcam_reg))
mlx5_get_pcam_reg(dev);
if (MLX5_CAP_GEN(dev, mcam_reg))
mlx5_get_mcam_reg(dev);
if (MLX5_CAP_GEN(dev, mcam_reg)) {
mlx5_get_mcam_access_reg_group(dev, MLX5_MCAM_REGS_FIRST_128);
mlx5_get_mcam_access_reg_group(dev, MLX5_MCAM_REGS_0x9080_0x90FF);
mlx5_get_mcam_access_reg_group(dev, MLX5_MCAM_REGS_0x9100_0x917F);
}
if (MLX5_CAP_GEN(dev, qcam_reg))
mlx5_get_qcam_reg(dev);
......@@ -245,6 +248,13 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
return err;
}
if (MLX5_CAP_GEN_64(dev, general_obj_types) &
MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q) {
err = mlx5_core_get_caps(dev, MLX5_CAP_VDPA_EMULATION);
if (err)
return err;
}
return 0;
}
......
......@@ -1105,6 +1105,7 @@ enum mlx5_cap_type {
MLX5_CAP_DEV_MEM,
MLX5_CAP_RESERVED_16,
MLX5_CAP_TLS,
MLX5_CAP_VDPA_EMULATION = 0x13,
MLX5_CAP_DEV_EVENT = 0x14,
/* NUM OF CAP Types */
MLX5_CAP_NUM
......@@ -1120,6 +1121,9 @@ enum mlx5_pcam_feature_groups {
enum mlx5_mcam_reg_groups {
MLX5_MCAM_REGS_FIRST_128 = 0x0,
MLX5_MCAM_REGS_0x9080_0x90FF = 0x1,
MLX5_MCAM_REGS_0x9100_0x917F = 0x2,
MLX5_MCAM_REGS_NUM = 0x3,
};
enum mlx5_mcam_feature_groups {
......@@ -1268,7 +1272,16 @@ enum mlx5_qcam_feature_groups {
MLX5_GET(pcam_reg, (mdev)->caps.pcam, port_access_reg_cap_mask.regs_5000_to_507f.reg)
#define MLX5_CAP_MCAM_REG(mdev, reg) \
MLX5_GET(mcam_reg, (mdev)->caps.mcam, mng_access_reg_cap_mask.access_regs.reg)
MLX5_GET(mcam_reg, (mdev)->caps.mcam[MLX5_MCAM_REGS_FIRST_128], \
mng_access_reg_cap_mask.access_regs.reg)
#define MLX5_CAP_MCAM_REG1(mdev, reg) \
MLX5_GET(mcam_reg, (mdev)->caps.mcam[MLX5_MCAM_REGS_0x9080_0x90FF], \
mng_access_reg_cap_mask.access_regs1.reg)
#define MLX5_CAP_MCAM_REG2(mdev, reg) \
MLX5_GET(mcam_reg, (mdev)->caps.mcam[MLX5_MCAM_REGS_0x9100_0x917F], \
mng_access_reg_cap_mask.access_regs2.reg)
#define MLX5_CAP_MCAM_FEATURE(mdev, fld) \
MLX5_GET(mcam_reg, (mdev)->caps.mcam, mng_feature_cap_mask.enhanced_features.fld)
......@@ -1297,6 +1310,14 @@ enum mlx5_qcam_feature_groups {
#define MLX5_CAP_DEV_EVENT(mdev, cap)\
MLX5_ADDR_OF(device_event_cap, (mdev)->caps.hca_cur[MLX5_CAP_DEV_EVENT], cap)
#define MLX5_CAP_DEV_VDPA_EMULATION(mdev, cap)\
MLX5_GET(device_virtio_emulation_cap, \
(mdev)->caps.hca_cur[MLX5_CAP_VDPA_EMULATION], cap)
#define MLX5_CAP64_DEV_VDPA_EMULATION(mdev, cap)\
MLX5_GET64(device_virtio_emulation_cap, \
(mdev)->caps.hca_cur[MLX5_CAP_VDPA_EMULATION], cap)
enum {
MLX5_CMD_STAT_OK = 0x0,
MLX5_CMD_STAT_INT_ERR = 0x1,
......
......@@ -145,6 +145,8 @@ enum {
MLX5_REG_MCC = 0x9062,
MLX5_REG_MCDA = 0x9063,
MLX5_REG_MCAM = 0x907f,
MLX5_REG_MIRC = 0x9162,
MLX5_REG_RESOURCE_DUMP = 0xC000,
};
enum mlx5_qpts_trust_state {
......@@ -684,7 +686,7 @@ struct mlx5_core_dev {
u32 hca_cur[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)];
u32 hca_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)];
u32 pcam[MLX5_ST_SZ_DW(pcam_reg)];
u32 mcam[MLX5_ST_SZ_DW(mcam_reg)];
u32 mcam[MLX5_MCAM_REGS_NUM][MLX5_ST_SZ_DW(mcam_reg)];
u32 fpga[MLX5_ST_SZ_DW(fpga_cap)];
u32 qcam[MLX5_ST_SZ_DW(qcam_reg)];
u8 embedded_cpu;
......
......@@ -48,6 +48,7 @@ enum {
MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT = BIT(0),
MLX5_FLOW_TABLE_TUNNEL_EN_DECAP = BIT(1),
MLX5_FLOW_TABLE_TERMINATION = BIT(2),
MLX5_FLOW_TABLE_UNMANAGED = BIT(3),
};
#define LEFTOVERS_RULE_NUM 2
......@@ -145,25 +146,27 @@ mlx5_get_flow_vport_acl_namespace(struct mlx5_core_dev *dev,
enum mlx5_flow_namespace_type type,
int vport);
struct mlx5_flow_table *
mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
int prio,
int num_flow_table_entries,
int max_num_groups,
u32 level,
u32 flags);
struct mlx5_flow_table_attr {
int prio;
int max_fte;
u32 level;
u32 flags;
struct mlx5_flow_table *next_ft;
struct {
int max_num_groups;
int num_reserved_entries;
} autogroup;
};
struct mlx5_flow_table *
mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
struct mlx5_flow_table_attr *ft_attr);
struct mlx5_flow_table *
mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
struct mlx5_flow_table_attr *ft_attr);
struct mlx5_flow_table *
mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
int prio,
......@@ -194,6 +197,7 @@ struct mlx5_fs_vlan {
enum {
FLOW_ACT_NO_APPEND = BIT(0),
FLOW_ACT_IGNORE_FLOW_LEVEL = BIT(1),
};
struct mlx5_flow_act {
......
......@@ -87,6 +87,7 @@ enum {
enum {
MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM = (1ULL << MLX5_OBJ_TYPE_SW_ICM),
MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT = (1ULL << 11),
MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q = (1ULL << 13),
};
enum {
......@@ -374,8 +375,17 @@ struct mlx5_ifc_flow_table_fields_supported_bits {
u8 outer_esp_spi[0x1];
u8 reserved_at_58[0x2];
u8 bth_dst_qp[0x1];
u8 reserved_at_5b[0x5];
u8 reserved_at_5b[0x25];
u8 reserved_at_60[0x18];
u8 metadata_reg_c_7[0x1];
u8 metadata_reg_c_6[0x1];
u8 metadata_reg_c_5[0x1];
u8 metadata_reg_c_4[0x1];
u8 metadata_reg_c_3[0x1];
u8 metadata_reg_c_2[0x1];
u8 metadata_reg_c_1[0x1];
u8 metadata_reg_c_0[0x1];
};
struct mlx5_ifc_flow_table_prop_layout_bits {
......@@ -400,7 +410,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
u8 reformat_l3_tunnel_to_l2[0x1];
u8 reformat_l2_to_l3_tunnel[0x1];
u8 reformat_and_modify_action[0x1];
u8 reserved_at_15[0x2];
u8 ignore_flow_level[0x1];
u8 reserved_at_16[0x1];
u8 table_miss_action_domain[0x1];
u8 termination_table[0x1];
u8 reserved_at_19[0x7];
......@@ -721,7 +732,9 @@ enum {
struct mlx5_ifc_flow_table_eswitch_cap_bits {
u8 fdb_to_vport_reg_c_id[0x8];
u8 reserved_at_8[0xf];
u8 reserved_at_8[0xd];
u8 fdb_modify_header_fwd_to_table[0x1];
u8 reserved_at_16[0x1];
u8 flow_source[0x1];
u8 reserved_at_18[0x2];
u8 multi_fdb_encap[0x1];
......@@ -822,7 +835,9 @@ struct mlx5_ifc_qos_cap_bits {
struct mlx5_ifc_debug_cap_bits {
u8 core_dump_general[0x1];
u8 core_dump_qp[0x1];
u8 reserved_at_2[0x1e];
u8 reserved_at_2[0x7];
u8 resource_dump[0x1];
u8 reserved_at_a[0x16];
u8 reserved_at_20[0x2];
u8 stall_detect[0x1];
......@@ -953,6 +968,19 @@ struct mlx5_ifc_device_event_cap_bits {
u8 user_unaffiliated_events[4][0x40];
};
struct mlx5_ifc_device_virtio_emulation_cap_bits {
u8 reserved_at_0[0x20];
u8 reserved_at_20[0x13];
u8 log_doorbell_stride[0x5];
u8 reserved_at_38[0x3];
u8 log_doorbell_bar_size[0x5];
u8 doorbell_bar_offset[0x40];
u8 reserved_at_80[0x780];
};
enum {
MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_1_BYTE = 0x0,
MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_2_BYTES = 0x2,
......@@ -1753,6 +1781,132 @@ struct mlx5_ifc_resize_field_select_bits {
u8 resize_field_select[0x20];
};
struct mlx5_ifc_resource_dump_bits {
u8 more_dump[0x1];
u8 inline_dump[0x1];
u8 reserved_at_2[0xa];
u8 seq_num[0x4];
u8 segment_type[0x10];
u8 reserved_at_20[0x10];
u8 vhca_id[0x10];
u8 index1[0x20];
u8 index2[0x20];
u8 num_of_obj1[0x10];
u8 num_of_obj2[0x10];
u8 reserved_at_a0[0x20];
u8 device_opaque[0x40];
u8 mkey[0x20];
u8 size[0x20];
u8 address[0x40];
u8 inline_data[52][0x20];
};
struct mlx5_ifc_resource_dump_menu_record_bits {
u8 reserved_at_0[0x4];
u8 num_of_obj2_supports_active[0x1];
u8 num_of_obj2_supports_all[0x1];
u8 must_have_num_of_obj2[0x1];
u8 support_num_of_obj2[0x1];
u8 num_of_obj1_supports_active[0x1];
u8 num_of_obj1_supports_all[0x1];
u8 must_have_num_of_obj1[0x1];
u8 support_num_of_obj1[0x1];
u8 must_have_index2[0x1];
u8 support_index2[0x1];
u8 must_have_index1[0x1];
u8 support_index1[0x1];
u8 segment_type[0x10];
u8 segment_name[4][0x20];
u8 index1_name[4][0x20];
u8 index2_name[4][0x20];
};
struct mlx5_ifc_resource_dump_segment_header_bits {
u8 length_dw[0x10];
u8 segment_type[0x10];
};
struct mlx5_ifc_resource_dump_command_segment_bits {
struct mlx5_ifc_resource_dump_segment_header_bits segment_header;
u8 segment_called[0x10];
u8 vhca_id[0x10];
u8 index1[0x20];
u8 index2[0x20];
u8 num_of_obj1[0x10];
u8 num_of_obj2[0x10];
};
struct mlx5_ifc_resource_dump_error_segment_bits {
struct mlx5_ifc_resource_dump_segment_header_bits segment_header;
u8 reserved_at_20[0x10];
u8 syndrome_id[0x10];
u8 reserved_at_40[0x40];
u8 error[8][0x20];
};
struct mlx5_ifc_resource_dump_info_segment_bits {
struct mlx5_ifc_resource_dump_segment_header_bits segment_header;
u8 reserved_at_20[0x18];
u8 dump_version[0x8];
u8 hw_version[0x20];
u8 fw_version[0x20];
};
struct mlx5_ifc_resource_dump_menu_segment_bits {
struct mlx5_ifc_resource_dump_segment_header_bits segment_header;
u8 reserved_at_20[0x10];
u8 num_of_records[0x10];
struct mlx5_ifc_resource_dump_menu_record_bits record[0];
};
struct mlx5_ifc_resource_dump_resource_segment_bits {
struct mlx5_ifc_resource_dump_segment_header_bits segment_header;
u8 reserved_at_20[0x20];
u8 index1[0x20];
u8 index2[0x20];
u8 payload[0][0x20];
};
struct mlx5_ifc_resource_dump_terminate_segment_bits {
struct mlx5_ifc_resource_dump_segment_header_bits segment_header;
};
struct mlx5_ifc_menu_resource_dump_response_bits {
struct mlx5_ifc_resource_dump_info_segment_bits info;
struct mlx5_ifc_resource_dump_command_segment_bits cmd;
struct mlx5_ifc_resource_dump_menu_segment_bits menu;
struct mlx5_ifc_resource_dump_terminate_segment_bits terminate;
};
enum {
MLX5_MODIFY_FIELD_SELECT_MODIFY_FIELD_SELECT_CQ_PERIOD = 0x1,
MLX5_MODIFY_FIELD_SELECT_MODIFY_FIELD_SELECT_CQ_MAX_COUNT = 0x2,
......@@ -2026,7 +2180,9 @@ struct mlx5_ifc_eth_per_prio_grp_data_layout_bits {
u8 rx_pause_transition_low[0x20];
u8 reserved_at_3c0[0x40];
u8 rx_discards_high[0x20];
u8 rx_discards_low[0x20];
u8 device_stall_minor_watermark_cnt_high[0x20];
......@@ -2751,6 +2907,7 @@ union mlx5_ifc_hca_cap_union_bits {
struct mlx5_ifc_fpga_cap_bits fpga_cap;
struct mlx5_ifc_tls_cap_bits tls_cap;
struct mlx5_ifc_device_mem_cap_bits device_mem_cap;
struct mlx5_ifc_device_virtio_emulation_cap_bits virtio_emulation_cap;
u8 reserved_at_0[0x8000];
};
......@@ -3998,7 +4155,8 @@ struct mlx5_ifc_set_fte_in_bits {
u8 reserved_at_a0[0x8];
u8 table_id[0x18];
u8 reserved_at_c0[0x18];
u8 ignore_flow_level[0x1];
u8 reserved_at_c1[0x17];
u8 modify_enable_mask[0x8];
u8 reserved_at_e0[0x20];
......@@ -5466,15 +5624,32 @@ struct mlx5_ifc_add_action_in_bits {
u8 data[0x20];
};
struct mlx5_ifc_copy_action_in_bits {
u8 action_type[0x4];
u8 src_field[0xc];
u8 reserved_at_10[0x3];
u8 src_offset[0x5];
u8 reserved_at_18[0x3];
u8 length[0x5];
u8 reserved_at_20[0x4];
u8 dst_field[0xc];
u8 reserved_at_30[0x3];
u8 dst_offset[0x5];
u8 reserved_at_38[0x8];
};
union mlx5_ifc_set_action_in_add_action_in_auto_bits {
struct mlx5_ifc_set_action_in_bits set_action_in;
struct mlx5_ifc_add_action_in_bits add_action_in;
struct mlx5_ifc_copy_action_in_bits copy_action_in;
u8 reserved_at_0[0x40];
};
enum {
MLX5_ACTION_TYPE_SET = 0x1,
MLX5_ACTION_TYPE_ADD = 0x2,
MLX5_ACTION_TYPE_COPY = 0x3,
};
enum {
......@@ -5510,6 +5685,8 @@ enum {
MLX5_ACTION_IN_FIELD_METADATA_REG_C_3 = 0x54,
MLX5_ACTION_IN_FIELD_METADATA_REG_C_4 = 0x55,
MLX5_ACTION_IN_FIELD_METADATA_REG_C_5 = 0x56,
MLX5_ACTION_IN_FIELD_METADATA_REG_C_6 = 0x57,
MLX5_ACTION_IN_FIELD_METADATA_REG_C_7 = 0x58,
MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM = 0x59,
MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM = 0x5B,
};
......@@ -8406,6 +8583,18 @@ struct mlx5_ifc_pplm_reg_bits {
u8 fec_override_admin_50g[0x4];
u8 fec_override_admin_25g[0x4];
u8 fec_override_admin_10g_40g[0x4];
u8 fec_override_cap_400g_8x[0x10];
u8 fec_override_cap_200g_4x[0x10];
u8 fec_override_cap_100g_2x[0x10];
u8 fec_override_cap_50g_1x[0x10];
u8 fec_override_admin_400g_8x[0x10];
u8 fec_override_admin_200g_4x[0x10];
u8 fec_override_admin_100g_2x[0x10];
u8 fec_override_admin_50g_1x[0x10];
};
struct mlx5_ifc_ppcnt_reg_bits {
......@@ -8732,7 +8921,9 @@ struct mlx5_ifc_mpegc_reg_bits {
};
struct mlx5_ifc_pcam_enhanced_features_bits {
u8 reserved_at_0[0x6d];
u8 reserved_at_0[0x68];
u8 fec_50G_per_lane_in_pplm[0x1];
u8 reserved_at_69[0x4];
u8 rx_icrc_encapsulated_counter[0x1];
u8 reserved_at_6e[0x4];
u8 ptys_extended_ethernet[0x1];
......@@ -8817,6 +9008,28 @@ struct mlx5_ifc_mcam_access_reg_bits {
u8 regs_31_to_0[0x20];
};
struct mlx5_ifc_mcam_access_reg_bits1 {
u8 regs_127_to_96[0x20];
u8 regs_95_to_64[0x20];
u8 regs_63_to_32[0x20];
u8 regs_31_to_0[0x20];
};
struct mlx5_ifc_mcam_access_reg_bits2 {
u8 regs_127_to_99[0x1d];
u8 mirc[0x1];
u8 regs_97_to_96[0x2];
u8 regs_95_to_64[0x20];
u8 regs_63_to_32[0x20];
u8 regs_31_to_0[0x20];
};
struct mlx5_ifc_mcam_reg_bits {
u8 reserved_at_0[0x8];
u8 feature_group[0x8];
......@@ -8827,6 +9040,8 @@ struct mlx5_ifc_mcam_reg_bits {
union {
struct mlx5_ifc_mcam_access_reg_bits access_regs;
struct mlx5_ifc_mcam_access_reg_bits1 access_regs1;
struct mlx5_ifc_mcam_access_reg_bits2 access_regs2;
u8 reserved_at_0[0x80];
} mng_access_reg_cap_mask;
......@@ -9432,6 +9647,13 @@ struct mlx5_ifc_mcda_reg_bits {
u8 data[0][0x20];
};
struct mlx5_ifc_mirc_reg_bits {
u8 reserved_at_0[0x18];
u8 status_code[0x8];
u8 reserved_at_20[0x20];
};
union mlx5_ifc_ports_control_registers_document_bits {
struct mlx5_ifc_bufferx_reg_bits bufferx_reg;
struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout;
......@@ -9487,6 +9709,7 @@ union mlx5_ifc_ports_control_registers_document_bits {
struct mlx5_ifc_mcqi_reg_bits mcqi_reg;
struct mlx5_ifc_mcc_reg_bits mcc_reg;
struct mlx5_ifc_mcda_reg_bits mcda_reg;
struct mlx5_ifc_mirc_reg_bits mirc_reg;
u8 reserved_at_0[0x60e0];
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment