Commit d5fa9c55 authored by David S. Miller's avatar David S. Miller

Merge tag 'mlx5-updates-2019-03-01' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux

Saeed Mahameed says:

====================
mlx5-updates-2019-03-01

This series adds multipath offload support and contains some small updates
to mlx5 driver.

Multipath offload support from Roi Dayan:

We are going to track SW multipath route and related nexthops and reflect
that as port affinity to the HW.

1) Some patches are preparation.
2) add the multipath mode and fib events handling.
3) add support to handle offload failure for net error, i.e.
port down.
4) Small updates to match the behavior of multipath

Two small updates from Eran Ben Elisha,
5) Make a function static
6) Update PCIe supported devices list.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 4e7df119 85327a9c
...@@ -30,7 +30,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ ...@@ -30,7 +30,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o
mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o
mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o
# #
# Core extra # Core extra
......
...@@ -179,7 +179,7 @@ int mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq) ...@@ -179,7 +179,7 @@ int mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq)
/* state lock cannot be grabbed within this function. /* state lock cannot be grabbed within this function.
* It can cause a dead lock or a read-after-free. * It can cause a dead lock or a read-after-free.
*/ */
int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_tx_err_ctx *err_ctx) static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_tx_err_ctx *err_ctx)
{ {
return err_ctx->recover(err_ctx->sq); return err_ctx->recover(err_ctx->sq);
} }
......
...@@ -54,12 +54,24 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, ...@@ -54,12 +54,24 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
struct neighbour *n = NULL; struct neighbour *n = NULL;
#if IS_ENABLED(CONFIG_INET) #if IS_ENABLED(CONFIG_INET)
struct mlx5_core_dev *mdev = priv->mdev;
struct net_device *uplink_dev;
int ret; int ret;
if (mlx5_lag_is_multipath(mdev)) {
struct mlx5_eswitch *esw = mdev->priv.eswitch;
uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
fl4->flowi4_oif = uplink_dev->ifindex;
}
rt = ip_route_output_key(dev_net(mirred_dev), fl4); rt = ip_route_output_key(dev_net(mirred_dev), fl4);
ret = PTR_ERR_OR_ZERO(rt); ret = PTR_ERR_OR_ZERO(rt);
if (ret) if (ret)
return ret; return ret;
if (mlx5_lag_is_multipath(mdev) && !rt->rt_gateway)
return -ENETUNREACH;
#else #else
return -EOPNOTSUPP; return -EOPNOTSUPP;
#endif #endif
...@@ -295,7 +307,9 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, ...@@ -295,7 +307,9 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
if (!(nud_state & NUD_VALID)) { if (!(nud_state & NUD_VALID)) {
neigh_event_send(n, NULL); neigh_event_send(n, NULL);
err = -EAGAIN; /* the encap entry will be made valid on neigh update event
* and not used before that.
*/
goto out; goto out;
} }
...@@ -408,7 +422,9 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, ...@@ -408,7 +422,9 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
if (!(nud_state & NUD_VALID)) { if (!(nud_state & NUD_VALID)) {
neigh_event_send(n, NULL); neigh_event_send(n, NULL);
err = -EAGAIN; /* the encap entry will be made valid on neigh update event
* and not used before that.
*/
goto out; goto out;
} }
......
...@@ -1573,6 +1573,8 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) ...@@ -1573,6 +1573,8 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
if (rpriv->rep->vport == MLX5_VPORT_UPLINK) { if (rpriv->rep->vport == MLX5_VPORT_UPLINK) {
uplink_priv = &rpriv->uplink_priv; uplink_priv = &rpriv->uplink_priv;
INIT_LIST_HEAD(&uplink_priv->unready_flows);
/* init shared tc flow table */ /* init shared tc flow table */
err = mlx5e_tc_esw_init(&uplink_priv->tc_ht); err = mlx5e_tc_esw_init(&uplink_priv->tc_ht);
if (err) if (err)
...@@ -1632,27 +1634,38 @@ static void mlx5e_vf_rep_enable(struct mlx5e_priv *priv) ...@@ -1632,27 +1634,38 @@ static void mlx5e_vf_rep_enable(struct mlx5e_priv *priv)
static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event, void *data) static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event, void *data)
{ {
struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb); struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb);
struct mlx5_eqe *eqe = data;
if (event != MLX5_EVENT_TYPE_PORT_CHANGE) if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
return NOTIFY_DONE; struct mlx5_eqe *eqe = data;
switch (eqe->sub_type) { switch (eqe->sub_type) {
case MLX5_PORT_CHANGE_SUBTYPE_DOWN: case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
queue_work(priv->wq, &priv->update_carrier_work); queue_work(priv->wq, &priv->update_carrier_work);
break; break;
default: default:
return NOTIFY_DONE; return NOTIFY_DONE;
}
return NOTIFY_OK;
} }
return NOTIFY_OK; if (event == MLX5_DEV_EVENT_PORT_AFFINITY) {
struct mlx5e_rep_priv *rpriv = priv->ppriv;
queue_work(priv->wq, &rpriv->uplink_priv.reoffload_flows_work);
return NOTIFY_OK;
}
return NOTIFY_DONE;
} }
static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
{ {
struct net_device *netdev = priv->netdev; struct net_device *netdev = priv->netdev;
struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
u16 max_mtu; u16 max_mtu;
netdev->min_mtu = ETH_MIN_MTU; netdev->min_mtu = ETH_MIN_MTU;
...@@ -1660,6 +1673,9 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) ...@@ -1660,6 +1673,9 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu); netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
mlx5e_set_dev_port_mtu(priv); mlx5e_set_dev_port_mtu(priv);
INIT_WORK(&rpriv->uplink_priv.reoffload_flows_work,
mlx5e_tc_reoffload_flows_work);
mlx5_lag_add(mdev, netdev); mlx5_lag_add(mdev, netdev);
priv->events_nb.notifier_call = uplink_rep_async_event; priv->events_nb.notifier_call = uplink_rep_async_event;
mlx5_notifier_register(mdev, &priv->events_nb); mlx5_notifier_register(mdev, &priv->events_nb);
...@@ -1672,11 +1688,13 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) ...@@ -1672,11 +1688,13 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
{ {
struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
#ifdef CONFIG_MLX5_CORE_EN_DCB #ifdef CONFIG_MLX5_CORE_EN_DCB
mlx5e_dcbnl_delete_app(priv); mlx5e_dcbnl_delete_app(priv);
#endif #endif
mlx5_notifier_unregister(mdev, &priv->events_nb); mlx5_notifier_unregister(mdev, &priv->events_nb);
cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work);
mlx5_lag_remove(mdev); mlx5_lag_remove(mdev);
} }
......
...@@ -74,6 +74,9 @@ struct mlx5_rep_uplink_priv { ...@@ -74,6 +74,9 @@ struct mlx5_rep_uplink_priv {
struct notifier_block netdevice_nb; struct notifier_block netdevice_nb;
struct mlx5_tun_entropy tun_entropy; struct mlx5_tun_entropy tun_entropy;
struct list_head unready_flows;
struct work_struct reoffload_flows_work;
}; };
struct mlx5e_rep_priv { struct mlx5e_rep_priv {
......
...@@ -72,6 +72,7 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe); ...@@ -72,6 +72,7 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe);
int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags); int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags);
void mlx5e_tc_reoffload_flows_work(struct work_struct *work);
#else /* CONFIG_MLX5_ESWITCH */ #else /* CONFIG_MLX5_ESWITCH */
static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
......
...@@ -2476,3 +2476,10 @@ bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) ...@@ -2476,3 +2476,10 @@ bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1)
return false; return false;
} }
bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0,
struct mlx5_core_dev *dev1)
{
return (dev0->priv.eswitch->mode == SRIOV_OFFLOADS &&
dev1->priv.eswitch->mode == SRIOV_OFFLOADS);
}
...@@ -371,6 +371,8 @@ static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev ...@@ -371,6 +371,8 @@ static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev
bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0,
struct mlx5_core_dev *dev1); struct mlx5_core_dev *dev1);
bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0,
struct mlx5_core_dev *dev1);
#define MLX5_DEBUG_ESWITCH_MASK BIT(3) #define MLX5_DEBUG_ESWITCH_MASK BIT(3)
......
...@@ -35,37 +35,8 @@ ...@@ -35,37 +35,8 @@
#include <linux/mlx5/vport.h> #include <linux/mlx5/vport.h>
#include "mlx5_core.h" #include "mlx5_core.h"
#include "eswitch.h" #include "eswitch.h"
#include "lag.h"
enum { #include "lag_mp.h"
MLX5_LAG_FLAG_ROCE = 1 << 0,
MLX5_LAG_FLAG_SRIOV = 1 << 1,
};
#define MLX5_LAG_MODE_FLAGS (MLX5_LAG_FLAG_ROCE | MLX5_LAG_FLAG_SRIOV)
struct lag_func {
struct mlx5_core_dev *dev;
struct net_device *netdev;
};
/* Used for collection of netdev event info. */
struct lag_tracker {
enum netdev_lag_tx_type tx_type;
struct netdev_lag_lower_state_info netdev_state[MLX5_MAX_PORTS];
bool is_bonded;
};
/* LAG data of a ConnectX card.
* It serves both its phys functions.
*/
struct mlx5_lag {
u8 flags;
u8 v2p_map[MLX5_MAX_PORTS];
struct lag_func pf[MLX5_MAX_PORTS];
struct lag_tracker tracker;
struct delayed_work bond_work;
struct notifier_block nb;
};
/* General purpose, use for short periods of time. /* General purpose, use for short periods of time.
* Beware of lock dependencies (preferably, no locks should be acquired * Beware of lock dependencies (preferably, no locks should be acquired
...@@ -147,13 +118,8 @@ static int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev, ...@@ -147,13 +118,8 @@ static int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
} }
static struct mlx5_lag *mlx5_lag_dev_get(struct mlx5_core_dev *dev) int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
{ struct net_device *ndev)
return dev->priv.lag;
}
static int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
struct net_device *ndev)
{ {
int i; int i;
...@@ -174,11 +140,6 @@ static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev) ...@@ -174,11 +140,6 @@ static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV); return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV);
} }
static bool __mlx5_lag_is_active(struct mlx5_lag *ldev)
{
return !!(ldev->flags & MLX5_LAG_MODE_FLAGS);
}
static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
u8 *port1, u8 *port2) u8 *port1, u8 *port2)
{ {
...@@ -195,8 +156,8 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, ...@@ -195,8 +156,8 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
*port2 = 1; *port2 = 1;
} }
static void mlx5_modify_lag(struct mlx5_lag *ldev, void mlx5_modify_lag(struct mlx5_lag *ldev,
struct lag_tracker *tracker) struct lag_tracker *tracker)
{ {
struct mlx5_core_dev *dev0 = ldev->pf[0].dev; struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
u8 v2p_port1, v2p_port2; u8 v2p_port1, v2p_port2;
...@@ -241,9 +202,9 @@ static int mlx5_create_lag(struct mlx5_lag *ldev, ...@@ -241,9 +202,9 @@ static int mlx5_create_lag(struct mlx5_lag *ldev,
return err; return err;
} }
static int mlx5_activate_lag(struct mlx5_lag *ldev, int mlx5_activate_lag(struct mlx5_lag *ldev,
struct lag_tracker *tracker, struct lag_tracker *tracker,
u8 flags) u8 flags)
{ {
bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE); bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
struct mlx5_core_dev *dev0 = ldev->pf[0].dev; struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
...@@ -386,7 +347,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) ...@@ -386,7 +347,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay) static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
{ {
schedule_delayed_work(&ldev->bond_work, delay); queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
} }
static void mlx5_do_bond_work(struct work_struct *work) static void mlx5_do_bond_work(struct work_struct *work)
...@@ -538,6 +499,12 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(void) ...@@ -538,6 +499,12 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(void)
if (!ldev) if (!ldev)
return NULL; return NULL;
ldev->wq = create_singlethread_workqueue("mlx5_lag");
if (!ldev->wq) {
kfree(ldev);
return NULL;
}
INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
return ldev; return ldev;
...@@ -545,6 +512,7 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(void) ...@@ -545,6 +512,7 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(void)
static void mlx5_lag_dev_free(struct mlx5_lag *ldev) static void mlx5_lag_dev_free(struct mlx5_lag *ldev)
{ {
destroy_workqueue(ldev->wq);
kfree(ldev); kfree(ldev);
} }
...@@ -592,6 +560,7 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) ...@@ -592,6 +560,7 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
{ {
struct mlx5_lag *ldev = NULL; struct mlx5_lag *ldev = NULL;
struct mlx5_core_dev *tmp_dev; struct mlx5_core_dev *tmp_dev;
int err;
if (!MLX5_CAP_GEN(dev, vport_group_manager) || if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
!MLX5_CAP_GEN(dev, lag_master) || !MLX5_CAP_GEN(dev, lag_master) ||
...@@ -619,6 +588,11 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) ...@@ -619,6 +588,11 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
mlx5_core_err(dev, "Failed to register LAG netdev notifier\n"); mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
} }
} }
err = mlx5_lag_mp_init(ldev);
if (err)
mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
err);
} }
int mlx5_lag_get_pf_num(struct mlx5_core_dev *dev, int *pf_num) int mlx5_lag_get_pf_num(struct mlx5_core_dev *dev, int *pf_num)
...@@ -664,6 +638,7 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev) ...@@ -664,6 +638,7 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev)
if (i == MLX5_MAX_PORTS) { if (i == MLX5_MAX_PORTS) {
if (ldev->nb.notifier_call) if (ldev->nb.notifier_call)
unregister_netdevice_notifier(&ldev->nb); unregister_netdevice_notifier(&ldev->nb);
mlx5_lag_mp_cleanup(ldev);
cancel_delayed_work_sync(&ldev->bond_work); cancel_delayed_work_sync(&ldev->bond_work);
mlx5_lag_dev_free(ldev); mlx5_lag_dev_free(ldev);
} }
......
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2019 Mellanox Technologies. */
#ifndef __MLX5_LAG_H__
#define __MLX5_LAG_H__
#include "mlx5_core.h"
#include "lag_mp.h"
enum {
MLX5_LAG_FLAG_ROCE = 1 << 0,
MLX5_LAG_FLAG_SRIOV = 1 << 1,
MLX5_LAG_FLAG_MULTIPATH = 1 << 2,
};
#define MLX5_LAG_MODE_FLAGS (MLX5_LAG_FLAG_ROCE | MLX5_LAG_FLAG_SRIOV |\
MLX5_LAG_FLAG_MULTIPATH)
struct lag_func {
struct mlx5_core_dev *dev;
struct net_device *netdev;
};
/* Used for collection of netdev event info. */
struct lag_tracker {
enum netdev_lag_tx_type tx_type;
struct netdev_lag_lower_state_info netdev_state[MLX5_MAX_PORTS];
unsigned int is_bonded:1;
};
/* LAG data of a ConnectX card.
* It serves both its phys functions.
*/
struct mlx5_lag {
u8 flags;
u8 v2p_map[MLX5_MAX_PORTS];
struct lag_func pf[MLX5_MAX_PORTS];
struct lag_tracker tracker;
struct workqueue_struct *wq;
struct delayed_work bond_work;
struct notifier_block nb;
struct lag_mp lag_mp;
};
static inline struct mlx5_lag *
mlx5_lag_dev_get(struct mlx5_core_dev *dev)
{
return dev->priv.lag;
}
static inline bool
__mlx5_lag_is_active(struct mlx5_lag *ldev)
{
return !!(ldev->flags & MLX5_LAG_MODE_FLAGS);
}
void mlx5_modify_lag(struct mlx5_lag *ldev,
struct lag_tracker *tracker);
int mlx5_activate_lag(struct mlx5_lag *ldev,
struct lag_tracker *tracker,
u8 flags);
int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
struct net_device *ndev);
#endif /* __MLX5_LAG_H__ */
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2019 Mellanox Technologies. */
#include <linux/netdevice.h>
#include "lag.h"
#include "lag_mp.h"
#include "mlx5_core.h"
#include "eswitch.h"
#include "lib/mlx5.h"
static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
{
if (!ldev->pf[0].dev || !ldev->pf[1].dev)
return false;
return mlx5_esw_multipath_prereq(ldev->pf[0].dev, ldev->pf[1].dev);
}
static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
{
return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH);
}
bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
bool res;
ldev = mlx5_lag_dev_get(dev);
res = ldev && __mlx5_lag_is_multipath(ldev);
return res;
}
/**
* Set lag port affinity
*
* @ldev: lag device
* @port:
* 0 - set normal affinity.
* 1 - set affinity to port 1.
* 2 - set affinity to port 2.
*
**/
static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, int port)
{
struct lag_tracker tracker;
if (!__mlx5_lag_is_multipath(ldev))
return;
switch (port) {
case 0:
tracker.netdev_state[0].tx_enabled = true;
tracker.netdev_state[1].tx_enabled = true;
tracker.netdev_state[0].link_up = true;
tracker.netdev_state[1].link_up = true;
break;
case 1:
tracker.netdev_state[0].tx_enabled = true;
tracker.netdev_state[0].link_up = true;
tracker.netdev_state[1].tx_enabled = false;
tracker.netdev_state[1].link_up = false;
break;
case 2:
tracker.netdev_state[0].tx_enabled = false;
tracker.netdev_state[0].link_up = false;
tracker.netdev_state[1].tx_enabled = true;
tracker.netdev_state[1].link_up = true;
break;
default:
mlx5_core_warn(ldev->pf[0].dev, "Invalid affinity port %d",
port);
return;
}
if (tracker.netdev_state[0].tx_enabled)
mlx5_notifier_call_chain(ldev->pf[0].dev->priv.events,
MLX5_DEV_EVENT_PORT_AFFINITY,
(void *)0);
if (tracker.netdev_state[1].tx_enabled)
mlx5_notifier_call_chain(ldev->pf[1].dev->priv.events,
MLX5_DEV_EVENT_PORT_AFFINITY,
(void *)0);
mlx5_modify_lag(ldev, &tracker);
}
static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
{
struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
flush_workqueue(ldev->wq);
}
struct mlx5_fib_event_work {
struct work_struct work;
struct mlx5_lag *ldev;
unsigned long event;
union {
struct fib_entry_notifier_info fen_info;
struct fib_nh_notifier_info fnh_info;
};
};
static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
unsigned long event,
struct fib_info *fi)
{
struct lag_mp *mp = &ldev->lag_mp;
/* Handle delete event */
if (event == FIB_EVENT_ENTRY_DEL) {
/* stop track */
if (mp->mfi == fi)
mp->mfi = NULL;
return;
}
/* Handle add/replace event */
if (fi->fib_nhs == 1) {
if (__mlx5_lag_is_active(ldev)) {
struct net_device *nh_dev = fi->fib_nh[0].nh_dev;
int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
mlx5_lag_set_port_affinity(ldev, ++i);
}
return;
}
if (fi->fib_nhs != 2)
return;
/* Verify next hops are ports of the same hca */
if (!(fi->fib_nh[0].nh_dev == ldev->pf[0].netdev &&
fi->fib_nh[1].nh_dev == ldev->pf[1].netdev) &&
!(fi->fib_nh[0].nh_dev == ldev->pf[1].netdev &&
fi->fib_nh[1].nh_dev == ldev->pf[0].netdev)) {
mlx5_core_warn(ldev->pf[0].dev, "Multipath offload require two ports of the same HCA\n");
return;
}
/* First time we see multipath route */
if (!mp->mfi && !__mlx5_lag_is_active(ldev)) {
struct lag_tracker tracker;
tracker = ldev->tracker;
mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH);
}
mlx5_lag_set_port_affinity(ldev, 0);
mp->mfi = fi;
}
static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
unsigned long event,
struct fib_nh *fib_nh,
struct fib_info *fi)
{
struct lag_mp *mp = &ldev->lag_mp;
/* Check the nh event is related to the route */
if (!mp->mfi || mp->mfi != fi)
return;
/* nh added/removed */
if (event == FIB_EVENT_NH_DEL) {
int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->nh_dev);
if (i >= 0) {
i = (i + 1) % 2 + 1; /* peer port */
mlx5_lag_set_port_affinity(ldev, i);
}
} else if (event == FIB_EVENT_NH_ADD &&
fi->fib_nhs == 2) {
mlx5_lag_set_port_affinity(ldev, 0);
}
}
static void mlx5_lag_fib_update(struct work_struct *work)
{
struct mlx5_fib_event_work *fib_work =
container_of(work, struct mlx5_fib_event_work, work);
struct mlx5_lag *ldev = fib_work->ldev;
struct fib_nh *fib_nh;
/* Protect internal structures from changes */
rtnl_lock();
switch (fib_work->event) {
case FIB_EVENT_ENTRY_REPLACE: /* fall through */
case FIB_EVENT_ENTRY_APPEND: /* fall through */
case FIB_EVENT_ENTRY_ADD: /* fall through */
case FIB_EVENT_ENTRY_DEL:
mlx5_lag_fib_route_event(ldev, fib_work->event,
fib_work->fen_info.fi);
fib_info_put(fib_work->fen_info.fi);
break;
case FIB_EVENT_NH_ADD: /* fall through */
case FIB_EVENT_NH_DEL:
fib_nh = fib_work->fnh_info.fib_nh;
mlx5_lag_fib_nexthop_event(ldev,
fib_work->event,
fib_work->fnh_info.fib_nh,
fib_nh->nh_parent);
fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
break;
}
rtnl_unlock();
kfree(fib_work);
}
static struct mlx5_fib_event_work *
mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event)
{
struct mlx5_fib_event_work *fib_work;
fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
if (WARN_ON(!fib_work))
return NULL;
INIT_WORK(&fib_work->work, mlx5_lag_fib_update);
fib_work->ldev = ldev;
fib_work->event = event;
return fib_work;
}
static int mlx5_lag_fib_event(struct notifier_block *nb,
unsigned long event,
void *ptr)
{
struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
struct fib_notifier_info *info = ptr;
struct mlx5_fib_event_work *fib_work;
struct fib_entry_notifier_info *fen_info;
struct fib_nh_notifier_info *fnh_info;
struct fib_info *fi;
if (info->family != AF_INET)
return NOTIFY_DONE;
if (!mlx5_lag_multipath_check_prereq(ldev))
return NOTIFY_DONE;
switch (event) {
case FIB_EVENT_ENTRY_REPLACE: /* fall through */
case FIB_EVENT_ENTRY_APPEND: /* fall through */
case FIB_EVENT_ENTRY_ADD: /* fall through */
case FIB_EVENT_ENTRY_DEL:
fen_info = container_of(info, struct fib_entry_notifier_info,
info);
fi = fen_info->fi;
if (fi->fib_dev != ldev->pf[0].netdev &&
fi->fib_dev != ldev->pf[1].netdev) {
return NOTIFY_DONE;
}
fib_work = mlx5_lag_init_fib_work(ldev, event);
if (!fib_work)
return NOTIFY_DONE;
fib_work->fen_info = *fen_info;
/* Take reference on fib_info to prevent it from being
* freed while work is queued. Release it afterwards.
*/
fib_info_hold(fib_work->fen_info.fi);
break;
case FIB_EVENT_NH_ADD: /* fall through */
case FIB_EVENT_NH_DEL:
fnh_info = container_of(info, struct fib_nh_notifier_info,
info);
fib_work = mlx5_lag_init_fib_work(ldev, event);
if (!fib_work)
return NOTIFY_DONE;
fib_work->fnh_info = *fnh_info;
fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
break;
default:
return NOTIFY_DONE;
}
queue_work(ldev->wq, &fib_work->work);
return NOTIFY_DONE;
}
int mlx5_lag_mp_init(struct mlx5_lag *ldev)
{
struct lag_mp *mp = &ldev->lag_mp;
int err;
if (mp->fib_nb.notifier_call)
return 0;
mp->fib_nb.notifier_call = mlx5_lag_fib_event;
err = register_fib_notifier(&mp->fib_nb,
mlx5_lag_fib_event_flush);
if (err)
mp->fib_nb.notifier_call = NULL;
return err;
}
void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
{
struct lag_mp *mp = &ldev->lag_mp;
if (!mp->fib_nb.notifier_call)
return;
unregister_fib_notifier(&mp->fib_nb);
mp->fib_nb.notifier_call = NULL;
}
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2019 Mellanox Technologies. */
#ifndef __MLX5_LAG_MP_H__
#define __MLX5_LAG_MP_H__
#include "lag.h"
#include "mlx5_core.h"
struct lag_mp {
struct notifier_block fib_nb;
struct fib_info *mfi; /* used in tracking fib events */
};
#ifdef CONFIG_MLX5_ESWITCH
int mlx5_lag_mp_init(struct mlx5_lag *ldev);
void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev);
#else /* CONFIG_MLX5_ESWITCH */
static inline int mlx5_lag_mp_init(struct mlx5_lag *ldev) { return 0; }
static inline void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) {}
#endif /* CONFIG_MLX5_ESWITCH */
#endif /* __MLX5_LAG_MP_H__ */
...@@ -1486,6 +1486,8 @@ static const struct pci_device_id mlx5_core_pci_table[] = { ...@@ -1486,6 +1486,8 @@ static const struct pci_device_id mlx5_core_pci_table[] = {
{ PCI_VDEVICE(MELLANOX, 0x101a), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 Ex VF */ { PCI_VDEVICE(MELLANOX, 0x101a), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 Ex VF */
{ PCI_VDEVICE(MELLANOX, 0x101b) }, /* ConnectX-6 */ { PCI_VDEVICE(MELLANOX, 0x101b) }, /* ConnectX-6 */
{ PCI_VDEVICE(MELLANOX, 0x101c), MLX5_PCI_DEV_IS_VF}, /* ConnectX-6 VF */ { PCI_VDEVICE(MELLANOX, 0x101c), MLX5_PCI_DEV_IS_VF}, /* ConnectX-6 VF */
{ PCI_VDEVICE(MELLANOX, 0x101d) }, /* ConnectX-6 Dx */
{ PCI_VDEVICE(MELLANOX, 0x101e), MLX5_PCI_DEV_IS_VF}, /* ConnectX Family mlx5Gen Virtual Function */
{ PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */ { PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */
{ PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */ { PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */
{ 0, } { 0, }
......
...@@ -195,6 +195,7 @@ struct mlx5_rsc_debug { ...@@ -195,6 +195,7 @@ struct mlx5_rsc_debug {
enum mlx5_dev_event { enum mlx5_dev_event {
MLX5_DEV_EVENT_SYS_ERROR = 128, /* 0 - 127 are FW events */ MLX5_DEV_EVENT_SYS_ERROR = 128, /* 0 - 127 are FW events */
MLX5_DEV_EVENT_PORT_AFFINITY = 129,
}; };
enum mlx5_port_status { enum mlx5_port_status {
...@@ -1041,6 +1042,7 @@ int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev); ...@@ -1041,6 +1042,7 @@ int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev);
int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev); int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev);
bool mlx5_lag_is_roce(struct mlx5_core_dev *dev); bool mlx5_lag_is_roce(struct mlx5_core_dev *dev);
bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev); bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev);
bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev);
bool mlx5_lag_is_active(struct mlx5_core_dev *dev); bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev);
int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment