Commit 8914add2 authored by Vlad Buslov's avatar Vlad Buslov Committed by Saeed Mahameed

net/mlx5e: Handle FIB events to update tunnel endpoint device

Process FIB route update events to dynamically update the stack device
rules when tunnel routing changes. Use rtnl lock to prevent FIB event
handler from running concurrently with neigh update and neigh stats
workqueue tasks. Use encap_tbl_lock mutex to synchronize with TC rule
update path that doesn't use rtnl lock.

FIB event workflow for encap flows:

- Unoffload all flows attached to route encaps from slow or fast path
depending on encap destination endpoint neigh state.

- Update encap IP header according to new route dev.

- Update flows mod_hdr action that is responsible for overwriting reg_c0
source port bits to source port of new underlying VF of new route dev. This
step requires changing flow create/delete code to save flow parse attribute
mod_hdr_acts structure for whole flow lifetime instead of deallocating it
after flow creation. Refactor mod_hdr code to allow saving id of individual
mod_hdr actions and updating them with dedicated helper.

- Offload all flows to either slow or fast path depending on encap
destination endpoint neigh state.

FIB event workflow for decap flows:

- Unoffload all route flows from hardware. When last route flow is deleted
all indirect table rules for the route dev will also be deleted.

- Update flow attr decap_vport and destination MAC according to underlying
VF of new rote dev.

- Offload all route flows back to hardware creating new indirect table
rules according to updated flow attribute data.

Extract some neigh update code to helper functions to be used by both neigh
update and route update infrastructure.
Signed-off-by: default avatarVlad Buslov <vladbu@nvidia.com>
Signed-off-by: default avatarDmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: default avatarRoi Dayan <roid@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent 021905f8
......@@ -26,6 +26,7 @@ enum {
MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7,
MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8,
MLX5E_TC_FLOW_FLAG_TUN_RX = MLX5E_TC_FLOW_BASE + 9,
MLX5E_TC_FLOW_FLAG_FAILED = MLX5E_TC_FLOW_BASE + 10,
};
struct mlx5e_tc_flow_parse_attr {
......
......@@ -32,4 +32,7 @@ struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec);
struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv);
void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap);
#endif /* __MLX5_EN_TC_TUN_ENCAP_H__ */
......@@ -59,6 +59,8 @@ struct mlx5e_neigh_update_table {
struct mlx5_tc_ct_priv;
struct mlx5e_rep_bond;
struct mlx5e_tc_tun_encap;
struct mlx5_rep_uplink_priv {
/* Filters DB - instantiated by the uplink representor and shared by
* the uplink's VFs
......@@ -90,6 +92,9 @@ struct mlx5_rep_uplink_priv {
/* support eswitch vports bonding */
struct mlx5e_rep_bond *bond;
/* tc tunneling encapsulation private data */
struct mlx5e_tc_tun_encap *encap;
};
struct mlx5e_rep_priv {
......@@ -153,6 +158,7 @@ enum {
/* set when the encap entry is successfully offloaded into HW */
MLX5_ENCAP_ENTRY_VALID = BIT(0),
MLX5_REFORMAT_DECAP = BIT(1),
MLX5_ENCAP_ENTRY_NO_ROUTE = BIT(2),
};
struct mlx5e_decap_key {
......
......@@ -1279,11 +1279,11 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
struct net_device *out_dev, *encap_dev = NULL;
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5_flow_attr *attr = flow->attr;
bool vf_tun = false, encap_valid = true;
struct mlx5_esw_flow_attr *esw_attr;
struct mlx5_fc *counter = NULL;
struct mlx5e_rep_priv *rpriv;
struct mlx5e_priv *out_priv;
bool encap_valid = true;
u32 max_prio, max_chain;
int err = 0;
int out_index;
......@@ -1297,26 +1297,28 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) {
NL_SET_ERR_MSG_MOD(extack,
"Requested chain is out of supported range");
return -EOPNOTSUPP;
err = -EOPNOTSUPP;
goto err_out;
}
max_prio = mlx5_chains_get_prio_range(esw_chains(esw));
if (attr->prio > max_prio) {
NL_SET_ERR_MSG_MOD(extack,
"Requested priority is out of supported range");
return -EOPNOTSUPP;
err = -EOPNOTSUPP;
goto err_out;
}
if (flow_flag_test(flow, TUN_RX)) {
err = mlx5e_attach_decap_route(priv, flow);
if (err)
return err;
goto err_out;
}
if (flow_flag_test(flow, L3_TO_L2_DECAP)) {
err = mlx5e_attach_decap(priv, flow, extack);
if (err)
return err;
goto err_out;
}
parse_attr = attr->parse_attr;
......@@ -1334,8 +1336,11 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
err = mlx5e_attach_encap(priv, flow, out_dev, out_index,
extack, &encap_dev, &encap_valid);
if (err)
return err;
goto err_out;
if (esw_attr->dests[out_index].flags &
MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
vf_tun = true;
out_priv = netdev_priv(encap_dev);
rpriv = out_priv->ppriv;
esw_attr->dests[out_index].rep = rpriv->rep;
......@@ -1344,19 +1349,27 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
err = mlx5_eswitch_add_vlan_action(esw, attr);
if (err)
return err;
goto err_out;
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
!(attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR)) {
if (vf_tun) {
err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow);
if (err)
goto err_out;
} else {
err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
if (err)
return err;
goto err_out;
}
}
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
counter = mlx5_fc_create(esw_attr->counter_dev, true);
if (IS_ERR(counter))
return PTR_ERR(counter);
if (IS_ERR(counter)) {
err = PTR_ERR(counter);
goto err_out;
}
attr->counter = counter;
}
......@@ -1370,12 +1383,17 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
else
flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
if (IS_ERR(flow->rule[0]))
return PTR_ERR(flow->rule[0]);
else
if (IS_ERR(flow->rule[0])) {
err = PTR_ERR(flow->rule[0]);
goto err_out;
}
flow_flag_set(flow, OFFLOADED);
return 0;
err_out:
flow_flag_set(flow, FAILED);
return err;
}
static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
......@@ -1397,6 +1415,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_flow_attr *attr = flow->attr;
struct mlx5_esw_flow_attr *esw_attr;
bool vf_tun = false;
int out_index;
esw_attr = attr->esw_attr;
......@@ -1421,20 +1440,26 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
mlx5e_detach_decap_route(priv, flow);
for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
if (esw_attr->dests[out_index].flags &
MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
vf_tun = true;
if (esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
mlx5e_detach_encap(priv, flow, out_index);
kfree(attr->parse_attr->tun_info[out_index]);
}
}
kvfree(attr->parse_attr);
kvfree(attr->esw_attr->rx_tun_attr);
mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
dealloc_mod_hdr_actions(&attr->parse_attr->mod_hdr_acts);
if (vf_tun && attr->modify_hdr)
mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr);
else
mlx5e_detach_mod_hdr(priv, flow);
}
kvfree(attr->parse_attr);
kvfree(attr->esw_attr->rx_tun_attr);
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
mlx5_fc_destroy(esw_attr->counter_dev, attr->counter);
......@@ -4044,7 +4069,6 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
return flow;
err_free:
dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
mlx5e_flow_put(priv, flow);
out:
return ERR_PTR(err);
......@@ -4189,6 +4213,7 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv,
return 0;
err_free:
flow_flag_set(flow, FAILED);
dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
mlx5e_flow_put(priv, flow);
out:
......@@ -4724,8 +4749,14 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key);
uplink_priv->encap = mlx5e_tc_tun_init(priv);
if (IS_ERR(uplink_priv->encap))
goto err_register_fib_notifier;
return err;
err_register_fib_notifier:
rhashtable_destroy(tc_ht);
err_ht_init:
mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
err_enc_opts_mapping:
......@@ -4742,10 +4773,11 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
{
struct mlx5_rep_uplink_priv *uplink_priv;
rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
mlx5e_tc_tun_cleanup(uplink_priv->encap);
mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
mapping_destroy(uplink_priv->tunnel_mapping);
......
......@@ -1830,7 +1830,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
mutex_init(&esw->offloads.decap_tbl_lock);
hash_init(esw->offloads.decap_tbl);
mlx5e_mod_hdr_tbl_init(&esw->offloads.mod_hdr);
hash_init(esw->offloads.route_tbl);
atomic64_set(&esw->offloads.num_flows, 0);
ida_init(&esw->offloads.vport_metadata_ida);
xa_init_flags(&esw->offloads.vhca_map, XA_FLAGS_ALLOC);
......
......@@ -214,7 +214,6 @@ struct mlx5_esw_offload {
struct mutex peer_mutex;
struct mutex encap_tbl_lock; /* protects encap_tbl */
DECLARE_HASHTABLE(encap_tbl, 8);
DECLARE_HASHTABLE(route_tbl, 8);
struct mutex decap_tbl_lock; /* protects decap_tbl */
DECLARE_HASHTABLE(decap_tbl, 8);
struct mod_hdr_tbl mod_hdr;
......@@ -424,6 +423,7 @@ struct mlx5_esw_flow_attr {
struct mlx5_pkt_reformat *pkt_reformat;
struct mlx5_core_dev *mdev;
struct mlx5_termtbl_handle *termtbl;
int src_port_rewrite_act_id;
} dests[MLX5_MAX_FLOW_FWD_VPORTS];
struct mlx5_rx_tun_attr *rx_tun_attr;
struct mlx5_pkt_reformat *decap_pkt_reformat;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment