Commit 777bb800 authored by Vlad Buslov's avatar Vlad Buslov Committed by Saeed Mahameed

net/mlx5e: Create route entry infrastructure

Implement dedicated route entry infrastructure to be used in following
patch by route update event. Both encap (indirectly through their
corresponding encap entries) and decap (directly) flows are attached to
routing entry. Since route update also requires updating encap (route
device MAC address is a source MAC address of tunnel encapsulation), same
encap_tbl_lock mutex is used for synchronization.

The new infrastructure looks similar to existing infrastructures for shared
encap, mod_hdr and hairpin entries:

- Per-eswitch hash table is used for quick entry lookup.

- Flows are attached to per-entry linked list and hold reference to entry
  during their lifetime.

- Atomic reference counting and rcu mechanisms are used as synchronization
  primitives for concurrent access.

The infrastructure also enables connection tracking on stacked devices
topology by attaching CT chain 0 flow on tunneling dev to decap route
entry.
Signed-off-by: default avatarVlad Buslov <vladbu@nvidia.com>
Signed-off-by: default avatarDmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: default avatarRoi Dayan <roid@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent 0d9f9647
......@@ -25,6 +25,7 @@ enum {
MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6,
MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7,
MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8,
MLX5E_TC_FLOW_FLAG_TUN_RX = MLX5E_TC_FLOW_BASE + 9,
};
struct mlx5e_tc_flow_parse_attr {
......@@ -59,6 +60,11 @@ struct encap_flow_item {
int index;
};
struct encap_route_flow_item {
struct mlx5e_route_entry *r; /* attached route instance */
int index;
};
struct mlx5e_tc_flow {
struct rhash_head node;
struct mlx5e_priv *priv;
......@@ -70,6 +76,11 @@ struct mlx5e_tc_flow {
struct list_head l3_to_l2_reformat;
struct mlx5e_decap_entry *decap_reformat;
/* flows sharing same route entry */
struct list_head decap_routes;
struct mlx5e_route_entry *decap_route;
struct encap_route_flow_item encap_routes[MLX5_MAX_FLOW_FWD_VPORTS];
/* Flow can be associated with multiple encap IDs.
* The number of encaps is bounded by the number of supported
* destinations.
......
......@@ -7,6 +7,23 @@
#include "rep/tc.h"
#include "diag/en_tc_tracepoint.h"
struct mlx5e_route_key {
int ip_version;
union {
__be32 v4;
struct in6_addr v6;
} endpoint_ip;
};
struct mlx5e_route_entry {
struct mlx5e_route_key key;
struct list_head encap_entries;
struct list_head decap_flows;
struct hlist_node hlist;
refcount_t refcnt;
struct rcu_head rcu;
};
int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec)
{
......@@ -29,10 +46,13 @@ int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
tun_attr->dst_ip.v4 = *(__be32 *)daddr;
tun_attr->src_ip.v4 = *(__be32 *)saddr;
if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
return 0;
}
#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
else if (ip_version == 6) {
int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
struct in6_addr zerov6 = {};
daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
......@@ -40,8 +60,15 @@ int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
if (!memcmp(&tun_attr->dst_ip.v6, &zerov6, sizeof(zerov6)) ||
!memcmp(&tun_attr->src_ip.v6, &zerov6, sizeof(zerov6)))
return 0;
}
#endif
/* Only set the flag if both src and dst ip addresses exist. They are
* required to establish routing.
*/
flow_flag_set(flow, TUN_RX);
return 0;
}
......@@ -325,6 +352,7 @@ void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
return;
list_del(&e->route_list);
hash_del_rcu(&e->encap_hlist);
mutex_unlock(&esw->offloads.encap_tbl_lock);
......@@ -343,12 +371,20 @@ static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d
mlx5e_decap_dealloc(priv, d);
}
static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
int out_index);
void mlx5e_detach_encap(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow, int out_index)
{
struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
if (flow->attr->esw_attr->dests[out_index].flags &
MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
mlx5e_detach_encap_route(priv, flow, out_index);
/* flow wasn't fully initialized */
if (!e)
return;
......@@ -360,6 +396,7 @@ void mlx5e_detach_encap(struct mlx5e_priv *priv,
mutex_unlock(&esw->offloads.encap_tbl_lock);
return;
}
list_del(&e->route_list);
hash_del_rcu(&e->encap_hlist);
mutex_unlock(&esw->offloads.encap_tbl_lock);
......@@ -531,6 +568,12 @@ static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
return err;
}
static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct mlx5e_encap_entry *e,
bool new_encap_entry,
int out_index);
int mlx5e_attach_encap(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct net_device *mirred_dev,
......@@ -545,6 +588,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
const struct ip_tunnel_info *tun_info;
struct encap_key key;
struct mlx5e_encap_entry *e;
bool entry_created = false;
unsigned short family;
uintptr_t hash_key;
int err = 0;
......@@ -592,6 +636,8 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
refcount_set(&e->refcnt, 1);
init_completion(&e->res_ready);
entry_created = true;
INIT_LIST_HEAD(&e->route_list);
tun_info = mlx5e_dup_tun_info(tun_info);
if (!tun_info) {
......@@ -622,8 +668,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
e->compl_result = 1;
attach_flow:
err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
e->route_dev_ifindex, out_index);
err = mlx5e_attach_encap_route(priv, flow, e, entry_created, out_index);
if (err)
goto out_err;
......@@ -732,3 +777,212 @@ int mlx5e_attach_decap(struct mlx5e_priv *priv,
mutex_unlock(&esw->offloads.decap_tbl_lock);
return err;
}
static int cmp_route_info(struct mlx5e_route_key *a,
struct mlx5e_route_key *b)
{
if (a->ip_version == 4 && b->ip_version == 4)
return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
sizeof(a->endpoint_ip.v4));
else if (a->ip_version == 6 && b->ip_version == 6)
return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
sizeof(a->endpoint_ip.v6));
return 1;
}
static u32 hash_route_info(struct mlx5e_route_key *key)
{
if (key->ip_version == 4)
return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
}
static struct mlx5e_route_entry *
mlx5e_route_get(struct mlx5e_priv *priv, struct mlx5e_route_key *key,
u32 hash_key)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_route_key r_key;
struct mlx5e_route_entry *r;
hash_for_each_possible(esw->offloads.route_tbl, r, hlist, hash_key) {
r_key = r->key;
if (!cmp_route_info(&r_key, key) &&
refcount_inc_not_zero(&r->refcnt))
return r;
}
return NULL;
}
static struct mlx5e_route_entry *
mlx5e_route_get_create(struct mlx5e_priv *priv,
struct mlx5e_route_key *key)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_route_entry *r;
u32 hash_key;
hash_key = hash_route_info(key);
r = mlx5e_route_get(priv, key, hash_key);
if (r)
return r;
r = kzalloc(sizeof(*r), GFP_KERNEL);
if (!r)
return ERR_PTR(-ENOMEM);
r->key = *key;
refcount_set(&r->refcnt, 1);
INIT_LIST_HEAD(&r->decap_flows);
INIT_LIST_HEAD(&r->encap_entries);
hash_add(esw->offloads.route_tbl, &r->hlist, hash_key);
return r;
}
int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5_flow_attr *attr = flow->attr;
struct mlx5_esw_flow_attr *esw_attr;
struct mlx5e_route_entry *r;
struct mlx5e_route_key key;
int err = 0;
esw_attr = attr->esw_attr;
parse_attr = attr->parse_attr;
mutex_lock(&esw->offloads.encap_tbl_lock);
if (!esw_attr->rx_tun_attr)
goto out;
err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr);
if (err || !esw_attr->rx_tun_attr->decap_vport)
goto out;
key.ip_version = attr->ip_version;
if (key.ip_version == 4)
key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
else
key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
r = mlx5e_route_get_create(priv, &key);
if (IS_ERR(r)) {
err = PTR_ERR(r);
goto out;
}
flow->decap_route = r;
list_add(&flow->decap_routes, &r->decap_flows);
mutex_unlock(&esw->offloads.encap_tbl_lock);
return 0;
out:
mutex_unlock(&esw->offloads.encap_tbl_lock);
return err;
}
static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct mlx5e_encap_entry *e,
bool new_encap_entry,
int out_index)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5_flow_attr *attr = flow->attr;
const struct ip_tunnel_info *tun_info;
struct mlx5_esw_flow_attr *esw_attr;
struct mlx5e_route_entry *r;
struct mlx5e_route_key key;
unsigned short family;
int err = 0;
esw_attr = attr->esw_attr;
parse_attr = attr->parse_attr;
tun_info = parse_attr->tun_info[out_index];
family = ip_tunnel_info_af(tun_info);
if (family == AF_INET) {
key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
key.ip_version = 4;
} else if (family == AF_INET6) {
key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
key.ip_version = 6;
}
err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
e->route_dev_ifindex, out_index);
if (err || !(esw_attr->dests[out_index].flags &
MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
return err;
r = mlx5e_route_get_create(priv, &key);
if (IS_ERR(r))
return PTR_ERR(r);
flow->encap_routes[out_index].r = r;
if (new_encap_entry)
list_add(&e->route_list, &r->encap_entries);
flow->encap_routes[out_index].index = out_index;
return 0;
}
static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
struct mlx5e_route_entry *r)
{
WARN_ON(!list_empty(&r->decap_flows));
WARN_ON(!list_empty(&r->encap_entries));
kfree_rcu(r, rcu);
}
void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_route_entry *r = flow->decap_route;
if (!r)
return;
mutex_lock(&esw->offloads.encap_tbl_lock);
list_del(&flow->decap_routes);
flow->decap_route = NULL;
if (!refcount_dec_and_test(&r->refcnt)) {
mutex_unlock(&esw->offloads.encap_tbl_lock);
return;
}
hash_del_rcu(&r->hlist);
mutex_unlock(&esw->offloads.encap_tbl_lock);
mlx5e_route_dealloc(priv, r);
}
static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
int out_index)
{
struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_encap_entry *e, *tmp;
if (!r)
return;
mutex_lock(&esw->offloads.encap_tbl_lock);
flow->encap_routes[out_index].r = NULL;
if (!refcount_dec_and_test(&r->refcnt)) {
mutex_unlock(&esw->offloads.encap_tbl_lock);
return;
}
list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
list_del_init(&e->route_list);
hash_del_rcu(&r->hlist);
mutex_unlock(&esw->offloads.encap_tbl_lock);
mlx5e_route_dealloc(priv, r);
}
......@@ -22,6 +22,11 @@ int mlx5e_attach_decap(struct mlx5e_priv *priv,
void mlx5e_detach_decap(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow);
int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow);
void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow);
struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info);
int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
......
......@@ -181,6 +181,7 @@ struct mlx5e_encap_entry {
*/
struct hlist_node encap_hlist;
struct list_head flows;
struct list_head route_list;
struct mlx5_pkt_reformat *pkt_reformat;
const struct ip_tunnel_info *tun_info;
unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
......
......@@ -1251,6 +1251,12 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
return -EOPNOTSUPP;
}
if (flow_flag_test(flow, TUN_RX)) {
err = mlx5e_attach_decap_route(priv, flow);
if (err)
return err;
}
if (flow_flag_test(flow, L3_TO_L2_DECAP)) {
err = mlx5e_attach_decap(priv, flow, extack);
if (err)
......@@ -1335,8 +1341,10 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_flow_attr *attr = flow->attr;
struct mlx5_esw_flow_attr *esw_attr;
int out_index;
esw_attr = attr->esw_attr;
mlx5e_put_flow_tunnel_id(flow);
if (flow_flag_test(flow, NOT_READY))
......@@ -1354,11 +1362,15 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
mlx5_eswitch_del_vlan_action(esw, attr);
for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
if (attr->esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
if (flow->decap_route)
mlx5e_detach_decap_route(priv, flow);
for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
if (esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
mlx5e_detach_encap(priv, flow, out_index);
kfree(attr->parse_attr->tun_info[out_index]);
}
}
kvfree(attr->parse_attr);
kvfree(attr->esw_attr->rx_tun_attr);
......@@ -1368,7 +1380,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
mlx5e_detach_mod_hdr(priv, flow);
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
mlx5_fc_destroy(attr->esw_attr->counter_dev, attr->counter);
mlx5_fc_destroy(esw_attr->counter_dev, attr->counter);
if (flow_flag_test(flow, L3_TO_L2_DECAP))
mlx5e_detach_decap(priv, flow);
......@@ -3692,12 +3704,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
}
}
if (decap && esw_attr->rx_tun_attr) {
err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr);
if (err)
return err;
}
/* always set IP version for indirect table handling */
attr->ip_version = mlx5e_tc_get_ip_version(&parse_attr->spec, true);
......
......@@ -1830,6 +1830,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
mutex_init(&esw->offloads.decap_tbl_lock);
hash_init(esw->offloads.decap_tbl);
mlx5e_mod_hdr_tbl_init(&esw->offloads.mod_hdr);
hash_init(esw->offloads.route_tbl);
atomic64_set(&esw->offloads.num_flows, 0);
ida_init(&esw->offloads.vport_metadata_ida);
xa_init_flags(&esw->offloads.vhca_map, XA_FLAGS_ALLOC);
......
......@@ -214,6 +214,7 @@ struct mlx5_esw_offload {
struct mutex peer_mutex;
struct mutex encap_tbl_lock; /* protects encap_tbl */
DECLARE_HASHTABLE(encap_tbl, 8);
DECLARE_HASHTABLE(route_tbl, 8);
struct mutex decap_tbl_lock; /* protects decap_tbl */
DECLARE_HASHTABLE(decap_tbl, 8);
struct mod_hdr_tbl mod_hdr;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment