Commit 2221d954 authored by Vlad Buslov's avatar Vlad Buslov Committed by Saeed Mahameed

net/mlx5e: Refactor neigh update infrastructure

Following patches in series implements route update which can cause encap
entries to migrate between routing devices. Consecutively, their parent
nhe's need to be also transferable between devices instead of having neigh
device as a part of their immutable key. Move neigh device from struct
mlx5_neigh to struct mlx5e_neigh_hash_entry and check that nhe and neigh
devices are the same in workqueue neigh update handler.

Save neigh net_device that can change dynamically in dedicated nhe->dev
field. With FIB event handler that is implemented in following patches
changing nhe->dev, NETEVENT_DELAY_PROBE_TIME_UPDATE handler can
concurrently access the nhe entry when traversing neigh list under rcu read
lock. Processing stale values in that handler doesn't change the handler
logic, so just wrap all accesses to the dev pointer in {WRITE|READ}_ONCE()
helpers.
Signed-off-by: default avatarVlad Buslov <vladbu@nvidia.com>
Signed-off-by: default avatarDmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: default avatarRoi Dayan <roid@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent 777bb800
...@@ -15,7 +15,7 @@ TRACE_EVENT(mlx5e_rep_neigh_update, ...@@ -15,7 +15,7 @@ TRACE_EVENT(mlx5e_rep_neigh_update,
TP_PROTO(const struct mlx5e_neigh_hash_entry *nhe, const u8 *ha, TP_PROTO(const struct mlx5e_neigh_hash_entry *nhe, const u8 *ha,
bool neigh_connected), bool neigh_connected),
TP_ARGS(nhe, ha, neigh_connected), TP_ARGS(nhe, ha, neigh_connected),
TP_STRUCT__entry(__string(devname, nhe->m_neigh.dev->name) TP_STRUCT__entry(__string(devname, nhe->neigh_dev->name)
__array(u8, ha, ETH_ALEN) __array(u8, ha, ETH_ALEN)
__array(u8, v4, 4) __array(u8, v4, 4)
__array(u8, v6, 16) __array(u8, v6, 16)
...@@ -25,7 +25,7 @@ TRACE_EVENT(mlx5e_rep_neigh_update, ...@@ -25,7 +25,7 @@ TRACE_EVENT(mlx5e_rep_neigh_update,
struct in6_addr *pin6; struct in6_addr *pin6;
__be32 *p32; __be32 *p32;
__assign_str(devname, mn->dev->name); __assign_str(devname, nhe->neigh_dev->name);
__entry->neigh_connected = neigh_connected; __entry->neigh_connected = neigh_connected;
memcpy(__entry->ha, ha, ETH_ALEN); memcpy(__entry->ha, ha, ETH_ALEN);
......
...@@ -77,7 +77,7 @@ TRACE_EVENT(mlx5e_stats_flower, ...@@ -77,7 +77,7 @@ TRACE_EVENT(mlx5e_stats_flower,
TRACE_EVENT(mlx5e_tc_update_neigh_used_value, TRACE_EVENT(mlx5e_tc_update_neigh_used_value,
TP_PROTO(const struct mlx5e_neigh_hash_entry *nhe, bool neigh_used), TP_PROTO(const struct mlx5e_neigh_hash_entry *nhe, bool neigh_used),
TP_ARGS(nhe, neigh_used), TP_ARGS(nhe, neigh_used),
TP_STRUCT__entry(__string(devname, nhe->m_neigh.dev->name) TP_STRUCT__entry(__string(devname, nhe->neigh_dev->name)
__array(u8, v4, 4) __array(u8, v4, 4)
__array(u8, v6, 16) __array(u8, v6, 16)
__field(bool, neigh_used) __field(bool, neigh_used)
...@@ -86,7 +86,7 @@ TRACE_EVENT(mlx5e_tc_update_neigh_used_value, ...@@ -86,7 +86,7 @@ TRACE_EVENT(mlx5e_tc_update_neigh_used_value,
struct in6_addr *pin6; struct in6_addr *pin6;
__be32 *p32; __be32 *p32;
__assign_str(devname, mn->dev->name); __assign_str(devname, nhe->neigh_dev->name);
__entry->neigh_used = neigh_used; __entry->neigh_used = neigh_used;
p32 = (__be32 *)__entry->v4; p32 = (__be32 *)__entry->v4;
......
...@@ -129,10 +129,10 @@ static void mlx5e_rep_neigh_update(struct work_struct *work) ...@@ -129,10 +129,10 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
work); work);
struct mlx5e_neigh_hash_entry *nhe = update_work->nhe; struct mlx5e_neigh_hash_entry *nhe = update_work->nhe;
struct neighbour *n = update_work->n; struct neighbour *n = update_work->n;
bool neigh_connected, same_dev;
struct mlx5e_encap_entry *e; struct mlx5e_encap_entry *e;
unsigned char ha[ETH_ALEN]; unsigned char ha[ETH_ALEN];
struct mlx5e_priv *priv; struct mlx5e_priv *priv;
bool neigh_connected;
u8 nud_state, dead; u8 nud_state, dead;
rtnl_lock(); rtnl_lock();
...@@ -146,12 +146,16 @@ static void mlx5e_rep_neigh_update(struct work_struct *work) ...@@ -146,12 +146,16 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
memcpy(ha, n->ha, ETH_ALEN); memcpy(ha, n->ha, ETH_ALEN);
nud_state = n->nud_state; nud_state = n->nud_state;
dead = n->dead; dead = n->dead;
same_dev = READ_ONCE(nhe->neigh_dev) == n->dev;
read_unlock_bh(&n->lock); read_unlock_bh(&n->lock);
neigh_connected = (nud_state & NUD_VALID) && !dead; neigh_connected = (nud_state & NUD_VALID) && !dead;
trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected); trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected);
if (!same_dev)
goto out;
list_for_each_entry(e, &nhe->encap_list, encap_list) { list_for_each_entry(e, &nhe->encap_list, encap_list) {
if (!mlx5e_encap_take(e)) if (!mlx5e_encap_take(e))
continue; continue;
...@@ -160,6 +164,7 @@ static void mlx5e_rep_neigh_update(struct work_struct *work) ...@@ -160,6 +164,7 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
mlx5e_rep_update_flows(priv, e, neigh_connected, ha); mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
mlx5e_encap_put(priv, e); mlx5e_encap_put(priv, e);
} }
out:
rtnl_unlock(); rtnl_unlock();
mlx5e_release_neigh_update_work(update_work); mlx5e_release_neigh_update_work(update_work);
} }
...@@ -175,7 +180,6 @@ static struct neigh_update_work *mlx5e_alloc_neigh_update_work(struct mlx5e_priv ...@@ -175,7 +180,6 @@ static struct neigh_update_work *mlx5e_alloc_neigh_update_work(struct mlx5e_priv
if (WARN_ON(!update_work)) if (WARN_ON(!update_work))
return NULL; return NULL;
m_neigh.dev = n->dev;
m_neigh.family = n->ops->family; m_neigh.family = n->ops->family;
memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len); memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
...@@ -246,7 +250,7 @@ static int mlx5e_rep_netevent_event(struct notifier_block *nb, ...@@ -246,7 +250,7 @@ static int mlx5e_rep_netevent_event(struct notifier_block *nb,
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_rcu(nhe, &neigh_update->neigh_list, list_for_each_entry_rcu(nhe, &neigh_update->neigh_list,
neigh_list) { neigh_list) {
if (p->dev == nhe->m_neigh.dev) { if (p->dev == READ_ONCE(nhe->neigh_dev)) {
found = true; found = true;
break; break;
} }
...@@ -369,7 +373,8 @@ mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, ...@@ -369,7 +373,8 @@ mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
} }
int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e, struct mlx5e_neigh *m_neigh,
struct net_device *neigh_dev,
struct mlx5e_neigh_hash_entry **nhe) struct mlx5e_neigh_hash_entry **nhe)
{ {
int err; int err;
...@@ -379,10 +384,11 @@ int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, ...@@ -379,10 +384,11 @@ int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
return -ENOMEM; return -ENOMEM;
(*nhe)->priv = priv; (*nhe)->priv = priv;
memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh)); memcpy(&(*nhe)->m_neigh, m_neigh, sizeof(*m_neigh));
spin_lock_init(&(*nhe)->encap_list_lock); spin_lock_init(&(*nhe)->encap_list_lock);
INIT_LIST_HEAD(&(*nhe)->encap_list); INIT_LIST_HEAD(&(*nhe)->encap_list);
refcount_set(&(*nhe)->refcnt, 1); refcount_set(&(*nhe)->refcnt, 1);
WRITE_ONCE((*nhe)->neigh_dev, neigh_dev);
err = mlx5e_rep_neigh_entry_insert(priv, *nhe); err = mlx5e_rep_neigh_entry_insert(priv, *nhe);
if (err) if (err)
......
...@@ -16,7 +16,8 @@ struct mlx5e_neigh_hash_entry * ...@@ -16,7 +16,8 @@ struct mlx5e_neigh_hash_entry *
mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
struct mlx5e_neigh *m_neigh); struct mlx5e_neigh *m_neigh);
int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e, struct mlx5e_neigh *m_neigh,
struct net_device *neigh_dev,
struct mlx5e_neigh_hash_entry **nhe); struct mlx5e_neigh_hash_entry **nhe);
void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe); void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe);
......
...@@ -26,7 +26,9 @@ struct mlx5e_rep_indr_block_priv { ...@@ -26,7 +26,9 @@ struct mlx5e_rep_indr_block_priv {
}; };
int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e) struct mlx5e_encap_entry *e,
struct mlx5e_neigh *m_neigh,
struct net_device *neigh_dev)
{ {
struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
...@@ -39,9 +41,9 @@ int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, ...@@ -39,9 +41,9 @@ int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
return err; return err;
mutex_lock(&rpriv->neigh_update.encap_lock); mutex_lock(&rpriv->neigh_update.encap_lock);
nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh); nhe = mlx5e_rep_neigh_entry_lookup(priv, m_neigh);
if (!nhe) { if (!nhe) {
err = mlx5e_rep_neigh_entry_create(priv, e, &nhe); err = mlx5e_rep_neigh_entry_create(priv, m_neigh, neigh_dev, &nhe);
if (err) { if (err) {
mutex_unlock(&rpriv->neigh_update.encap_lock); mutex_unlock(&rpriv->neigh_update.encap_lock);
mlx5_tun_entropy_refcount_dec(tun_entropy, mlx5_tun_entropy_refcount_dec(tun_entropy,
......
...@@ -27,7 +27,9 @@ void mlx5e_rep_update_flows(struct mlx5e_priv *priv, ...@@ -27,7 +27,9 @@ void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
unsigned char ha[ETH_ALEN]); unsigned char ha[ETH_ALEN]);
int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e); struct mlx5e_encap_entry *e,
struct mlx5e_neigh *m_neigh,
struct net_device *neigh_dev);
void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e); struct mlx5e_encap_entry *e);
......
...@@ -212,6 +212,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, ...@@ -212,6 +212,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
{ {
int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
const struct ip_tunnel_key *tun_key = &e->tun_info->key; const struct ip_tunnel_key *tun_key = &e->tun_info->key;
struct mlx5e_neigh m_neigh = {};
TC_TUN_ROUTE_ATTR_INIT(attr); TC_TUN_ROUTE_ATTR_INIT(attr);
int ipv4_encap_size; int ipv4_encap_size;
char *encap_header; char *encap_header;
...@@ -247,12 +248,8 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, ...@@ -247,12 +248,8 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
goto release_neigh; goto release_neigh;
} }
/* used by mlx5e_detach_encap to lookup a neigh hash table m_neigh.family = attr.n->ops->family;
* entry in the neigh hash table when a user deletes a rule memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len);
*/
e->m_neigh.dev = attr.n->dev;
e->m_neigh.family = attr.n->ops->family;
memcpy(&e->m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len);
e->out_dev = attr.out_dev; e->out_dev = attr.out_dev;
e->route_dev_ifindex = attr.route_dev->ifindex; e->route_dev_ifindex = attr.route_dev->ifindex;
...@@ -261,7 +258,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, ...@@ -261,7 +258,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
* neigh changes it's validity state, we would find the relevant neigh * neigh changes it's validity state, we would find the relevant neigh
* in the hash. * in the hash.
*/ */
err = mlx5e_rep_encap_entry_attach(netdev_priv(attr.out_dev), e); err = mlx5e_rep_encap_entry_attach(netdev_priv(attr.out_dev), e, &m_neigh, attr.n->dev);
if (err) if (err)
goto free_encap; goto free_encap;
...@@ -375,6 +372,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, ...@@ -375,6 +372,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
{ {
int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
const struct ip_tunnel_key *tun_key = &e->tun_info->key; const struct ip_tunnel_key *tun_key = &e->tun_info->key;
struct mlx5e_neigh m_neigh = {};
TC_TUN_ROUTE_ATTR_INIT(attr); TC_TUN_ROUTE_ATTR_INIT(attr);
struct ipv6hdr *ip6h; struct ipv6hdr *ip6h;
int ipv6_encap_size; int ipv6_encap_size;
...@@ -409,12 +407,8 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, ...@@ -409,12 +407,8 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
goto release_neigh; goto release_neigh;
} }
/* used by mlx5e_detach_encap to lookup a neigh hash table m_neigh.family = attr.n->ops->family;
* entry in the neigh hash table when a user deletes a rule memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len);
*/
e->m_neigh.dev = attr.n->dev;
e->m_neigh.family = attr.n->ops->family;
memcpy(&e->m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len);
e->out_dev = attr.out_dev; e->out_dev = attr.out_dev;
e->route_dev_ifindex = attr.route_dev->ifindex; e->route_dev_ifindex = attr.route_dev->ifindex;
...@@ -423,7 +417,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, ...@@ -423,7 +417,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
* neigh changes it's validity state, we would find the relevant neigh * neigh changes it's validity state, we would find the relevant neigh
* in the hash. * in the hash.
*/ */
err = mlx5e_rep_encap_entry_attach(netdev_priv(attr.out_dev), e); err = mlx5e_rep_encap_entry_attach(netdev_priv(attr.out_dev), e, &m_neigh, attr.n->dev);
if (err) if (err)
goto free_encap; goto free_encap;
......
...@@ -310,7 +310,7 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) ...@@ -310,7 +310,7 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
/* find the relevant neigh according to the cached device and /* find the relevant neigh according to the cached device and
* dst ip pair * dst ip pair
*/ */
n = neigh_lookup(tbl, &m_neigh->dst_ip, m_neigh->dev); n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
if (!n) if (!n)
return; return;
......
...@@ -110,7 +110,6 @@ struct mlx5e_rep_priv *mlx5e_rep_to_rep_priv(struct mlx5_eswitch_rep *rep) ...@@ -110,7 +110,6 @@ struct mlx5e_rep_priv *mlx5e_rep_to_rep_priv(struct mlx5_eswitch_rep *rep)
} }
struct mlx5e_neigh { struct mlx5e_neigh {
struct net_device *dev;
union { union {
__be32 v4; __be32 v4;
struct in6_addr v6; struct in6_addr v6;
...@@ -122,6 +121,7 @@ struct mlx5e_neigh_hash_entry { ...@@ -122,6 +121,7 @@ struct mlx5e_neigh_hash_entry {
struct rhash_head rhash_node; struct rhash_head rhash_node;
struct mlx5e_neigh m_neigh; struct mlx5e_neigh m_neigh;
struct mlx5e_priv *priv; struct mlx5e_priv *priv;
struct net_device *neigh_dev;
/* Save the neigh hash entry in a list on the representor in /* Save the neigh hash entry in a list on the representor in
* addition to the hash table. In order to iterate easily over the * addition to the hash table. In order to iterate easily over the
...@@ -175,7 +175,6 @@ struct mlx5e_encap_entry { ...@@ -175,7 +175,6 @@ struct mlx5e_encap_entry {
struct mlx5e_neigh_hash_entry *nhe; struct mlx5e_neigh_hash_entry *nhe;
/* neigh hash entry list of encaps sharing the same neigh */ /* neigh hash entry list of encaps sharing the same neigh */
struct list_head encap_list; struct list_head encap_list;
struct mlx5e_neigh m_neigh;
/* a node of the eswitch encap hash table which keeping all the encap /* a node of the eswitch encap hash table which keeping all the encap
* entries * entries
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment