Commit 76cd622f authored by Saeed Mahameed's avatar Saeed Mahameed

Merge branch 'mlx5-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux

This merge includes updates to bonding driver needed for the rdma stack,
to avoid conflicts with the RDMA branch.

Maor Gottlieb Says:

====================
Bonding: Add support to get xmit slave

The following series adds support to get the LAG master xmit slave by
introducing new .ndo - ndo_get_xmit_slave. Every LAG module can
implement it and it first implemented in the bond driver.
This is follow-up to the RFC discussion [1].

The main motivation for doing this is for drivers that offload part
of the LAG functionality. For example, Mellanox Connect-X hardware
implements RoCE LAG which selects the TX affinity when the resources
are created and port is remapped when it goes down.

The first part of this patchset introduces the new .ndo and add the
support to the bonding module.

The second part adds support to get the RoCE LAG xmit slave by building
skb of the RoCE packet based on the AH attributes and call to the new
.ndo.

The third part change the mlx5 driver driver to set the QP's affinity
port according to the slave which found by the .ndo.
====================
Signed-off-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
parents 5eb2bcf2 c6bc6041
...@@ -1331,11 +1331,11 @@ static netdev_tx_t bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond, ...@@ -1331,11 +1331,11 @@ static netdev_tx_t bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond,
return bond_tx_drop(bond->dev, skb); return bond_tx_drop(bond->dev, skb);
} }
netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) struct slave *bond_xmit_tlb_slave_get(struct bonding *bond,
struct sk_buff *skb)
{ {
struct bonding *bond = netdev_priv(bond_dev);
struct ethhdr *eth_data;
struct slave *tx_slave = NULL; struct slave *tx_slave = NULL;
struct ethhdr *eth_data;
u32 hash_index; u32 hash_index;
skb_reset_mac_header(skb); skb_reset_mac_header(skb);
...@@ -1357,7 +1357,7 @@ netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) ...@@ -1357,7 +1357,7 @@ netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
struct bond_up_slave *slaves; struct bond_up_slave *slaves;
unsigned int count; unsigned int count;
slaves = rcu_dereference(bond->slave_arr); slaves = rcu_dereference(bond->usable_slaves);
count = slaves ? READ_ONCE(slaves->count) : 0; count = slaves ? READ_ONCE(slaves->count) : 0;
if (likely(count)) if (likely(count))
tx_slave = slaves->arr[hash_index % tx_slave = slaves->arr[hash_index %
...@@ -1366,20 +1366,29 @@ netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) ...@@ -1366,20 +1366,29 @@ netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
break; break;
} }
} }
return bond_do_alb_xmit(skb, bond, tx_slave); return tx_slave;
} }
netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
{ {
struct bonding *bond = netdev_priv(bond_dev); struct bonding *bond = netdev_priv(bond_dev);
struct ethhdr *eth_data; struct slave *tx_slave;
tx_slave = bond_xmit_tlb_slave_get(bond, skb);
return bond_do_alb_xmit(skb, bond, tx_slave);
}
struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
struct sk_buff *skb)
{
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
struct slave *tx_slave = NULL;
static const __be32 ip_bcast = htonl(0xffffffff); static const __be32 ip_bcast = htonl(0xffffffff);
int hash_size = 0; struct slave *tx_slave = NULL;
const u8 *hash_start = NULL;
bool do_tx_balance = true; bool do_tx_balance = true;
struct ethhdr *eth_data;
u32 hash_index = 0; u32 hash_index = 0;
const u8 *hash_start = NULL; int hash_size = 0;
skb_reset_mac_header(skb); skb_reset_mac_header(skb);
eth_data = eth_hdr(skb); eth_data = eth_hdr(skb);
...@@ -1491,14 +1500,22 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) ...@@ -1491,14 +1500,22 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
struct bond_up_slave *slaves; struct bond_up_slave *slaves;
unsigned int count; unsigned int count;
slaves = rcu_dereference(bond->slave_arr); slaves = rcu_dereference(bond->usable_slaves);
count = slaves ? READ_ONCE(slaves->count) : 0; count = slaves ? READ_ONCE(slaves->count) : 0;
if (likely(count)) if (likely(count))
tx_slave = slaves->arr[bond_xmit_hash(bond, skb) % tx_slave = slaves->arr[bond_xmit_hash(bond, skb) %
count]; count];
} }
} }
return tx_slave;
}
netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
{
struct bonding *bond = netdev_priv(bond_dev);
struct slave *tx_slave = NULL;
tx_slave = bond_xmit_alb_slave_get(bond, skb);
return bond_do_alb_xmit(skb, bond, tx_slave); return bond_do_alb_xmit(skb, bond, tx_slave);
} }
......
This diff is collapsed.
...@@ -42,7 +42,7 @@ ...@@ -42,7 +42,7 @@
* Beware of lock dependencies (preferably, no locks should be acquired * Beware of lock dependencies (preferably, no locks should be acquired
* under it). * under it).
*/ */
static DEFINE_MUTEX(lag_mutex); static DEFINE_SPINLOCK(lag_lock);
static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1, static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
u8 remap_port2) u8 remap_port2)
...@@ -274,9 +274,9 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) ...@@ -274,9 +274,9 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
if (!dev0 || !dev1) if (!dev0 || !dev1)
return; return;
mutex_lock(&lag_mutex); spin_lock(&lag_lock);
tracker = ldev->tracker; tracker = ldev->tracker;
mutex_unlock(&lag_mutex); spin_unlock(&lag_lock);
do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
...@@ -458,9 +458,9 @@ static int mlx5_lag_netdev_event(struct notifier_block *this, ...@@ -458,9 +458,9 @@ static int mlx5_lag_netdev_event(struct notifier_block *this,
break; break;
} }
mutex_lock(&lag_mutex); spin_lock(&lag_lock);
ldev->tracker = tracker; ldev->tracker = tracker;
mutex_unlock(&lag_mutex); spin_unlock(&lag_lock);
if (changed) if (changed)
mlx5_queue_bond_work(ldev, 0); mlx5_queue_bond_work(ldev, 0);
...@@ -502,7 +502,7 @@ static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev, ...@@ -502,7 +502,7 @@ static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
if (fn >= MLX5_MAX_PORTS) if (fn >= MLX5_MAX_PORTS)
return; return;
mutex_lock(&lag_mutex); spin_lock(&lag_lock);
ldev->pf[fn].dev = dev; ldev->pf[fn].dev = dev;
ldev->pf[fn].netdev = netdev; ldev->pf[fn].netdev = netdev;
ldev->tracker.netdev_state[fn].link_up = 0; ldev->tracker.netdev_state[fn].link_up = 0;
...@@ -510,7 +510,7 @@ static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev, ...@@ -510,7 +510,7 @@ static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
dev->priv.lag = ldev; dev->priv.lag = ldev;
mutex_unlock(&lag_mutex); spin_unlock(&lag_lock);
} }
static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev, static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
...@@ -525,11 +525,11 @@ static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev, ...@@ -525,11 +525,11 @@ static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
if (i == MLX5_MAX_PORTS) if (i == MLX5_MAX_PORTS)
return; return;
mutex_lock(&lag_mutex); spin_lock(&lag_lock);
memset(&ldev->pf[i], 0, sizeof(*ldev->pf)); memset(&ldev->pf[i], 0, sizeof(*ldev->pf));
dev->priv.lag = NULL; dev->priv.lag = NULL;
mutex_unlock(&lag_mutex); spin_unlock(&lag_lock);
} }
/* Must be called with intf_mutex held */ /* Must be called with intf_mutex held */
...@@ -607,10 +607,10 @@ bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) ...@@ -607,10 +607,10 @@ bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
struct mlx5_lag *ldev; struct mlx5_lag *ldev;
bool res; bool res;
mutex_lock(&lag_mutex); spin_lock(&lag_lock);
ldev = mlx5_lag_dev_get(dev); ldev = mlx5_lag_dev_get(dev);
res = ldev && __mlx5_lag_is_roce(ldev); res = ldev && __mlx5_lag_is_roce(ldev);
mutex_unlock(&lag_mutex); spin_unlock(&lag_lock);
return res; return res;
} }
...@@ -621,10 +621,10 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev) ...@@ -621,10 +621,10 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
struct mlx5_lag *ldev; struct mlx5_lag *ldev;
bool res; bool res;
mutex_lock(&lag_mutex); spin_lock(&lag_lock);
ldev = mlx5_lag_dev_get(dev); ldev = mlx5_lag_dev_get(dev);
res = ldev && __mlx5_lag_is_active(ldev); res = ldev && __mlx5_lag_is_active(ldev);
mutex_unlock(&lag_mutex); spin_unlock(&lag_lock);
return res; return res;
} }
...@@ -635,10 +635,10 @@ bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) ...@@ -635,10 +635,10 @@ bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
struct mlx5_lag *ldev; struct mlx5_lag *ldev;
bool res; bool res;
mutex_lock(&lag_mutex); spin_lock(&lag_lock);
ldev = mlx5_lag_dev_get(dev); ldev = mlx5_lag_dev_get(dev);
res = ldev && __mlx5_lag_is_sriov(ldev); res = ldev && __mlx5_lag_is_sriov(ldev);
mutex_unlock(&lag_mutex); spin_unlock(&lag_lock);
return res; return res;
} }
...@@ -664,7 +664,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) ...@@ -664,7 +664,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
struct net_device *ndev = NULL; struct net_device *ndev = NULL;
struct mlx5_lag *ldev; struct mlx5_lag *ldev;
mutex_lock(&lag_mutex); spin_lock(&lag_lock);
ldev = mlx5_lag_dev_get(dev); ldev = mlx5_lag_dev_get(dev);
if (!(ldev && __mlx5_lag_is_roce(ldev))) if (!(ldev && __mlx5_lag_is_roce(ldev)))
...@@ -681,12 +681,36 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) ...@@ -681,12 +681,36 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
dev_hold(ndev); dev_hold(ndev);
unlock: unlock:
mutex_unlock(&lag_mutex); spin_unlock(&lag_lock);
return ndev; return ndev;
} }
EXPORT_SYMBOL(mlx5_lag_get_roce_netdev); EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
struct net_device *slave)
{
struct mlx5_lag *ldev;
u8 port = 0;
spin_lock(&lag_lock);
ldev = mlx5_lag_dev_get(dev);
if (!(ldev && __mlx5_lag_is_roce(ldev)))
goto unlock;
if (ldev->pf[MLX5_LAG_P1].netdev == slave)
port = MLX5_LAG_P1;
else
port = MLX5_LAG_P2;
port = ldev->v2p_map[port];
unlock:
spin_unlock(&lag_lock);
return port;
}
EXPORT_SYMBOL(mlx5_lag_get_slave_port);
bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv) bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv)
{ {
struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev,
...@@ -723,7 +747,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, ...@@ -723,7 +747,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
memset(values, 0, sizeof(*values) * num_counters); memset(values, 0, sizeof(*values) * num_counters);
mutex_lock(&lag_mutex); spin_lock(&lag_lock);
ldev = mlx5_lag_dev_get(dev); ldev = mlx5_lag_dev_get(dev);
if (ldev && __mlx5_lag_is_roce(ldev)) { if (ldev && __mlx5_lag_is_roce(ldev)) {
num_ports = MLX5_MAX_PORTS; num_ports = MLX5_MAX_PORTS;
...@@ -733,6 +757,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, ...@@ -733,6 +757,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
num_ports = 1; num_ports = 1;
mdev[MLX5_LAG_P1] = dev; mdev[MLX5_LAG_P1] = dev;
} }
spin_unlock(&lag_lock);
for (i = 0; i < num_ports; ++i) { for (i = 0; i < num_ports; ++i) {
u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {}; u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
...@@ -742,14 +767,13 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, ...@@ -742,14 +767,13 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in, ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
out); out);
if (ret) if (ret)
goto unlock; goto free;
for (j = 0; j < num_counters; ++j) for (j = 0; j < num_counters; ++j)
values[j] += be64_to_cpup((__be64 *)(out + offsets[j])); values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
} }
unlock: free:
mutex_unlock(&lag_mutex);
kvfree(out); kvfree(out);
return ret; return ret;
} }
......
...@@ -1074,6 +1074,8 @@ bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev); ...@@ -1074,6 +1074,8 @@ bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev);
bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev); bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev);
bool mlx5_lag_is_active(struct mlx5_core_dev *dev); bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev);
u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
struct net_device *slave);
int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
u64 *values, u64 *values,
int num_counters, int num_counters,
......
...@@ -1148,6 +1148,12 @@ struct netdev_net_notifier { ...@@ -1148,6 +1148,12 @@ struct netdev_net_notifier {
* int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); * int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev);
* Called to release previously enslaved netdev. * Called to release previously enslaved netdev.
* *
* struct net_device *(*ndo_get_xmit_slave)(struct net_device *dev,
* struct sk_buff *skb,
* bool all_slaves);
* Get the xmit slave of master device. If all_slaves is true, function
* assume all the slaves can transmit.
*
* Feature/offload setting functions. * Feature/offload setting functions.
* netdev_features_t (*ndo_fix_features)(struct net_device *dev, * netdev_features_t (*ndo_fix_features)(struct net_device *dev,
* netdev_features_t features); * netdev_features_t features);
...@@ -1391,6 +1397,9 @@ struct net_device_ops { ...@@ -1391,6 +1397,9 @@ struct net_device_ops {
struct netlink_ext_ack *extack); struct netlink_ext_ack *extack);
int (*ndo_del_slave)(struct net_device *dev, int (*ndo_del_slave)(struct net_device *dev,
struct net_device *slave_dev); struct net_device *slave_dev);
struct net_device* (*ndo_get_xmit_slave)(struct net_device *dev,
struct sk_buff *skb,
bool all_slaves);
netdev_features_t (*ndo_fix_features)(struct net_device *dev, netdev_features_t (*ndo_fix_features)(struct net_device *dev,
netdev_features_t features); netdev_features_t features);
int (*ndo_set_features)(struct net_device *dev, int (*ndo_set_features)(struct net_device *dev,
...@@ -2745,6 +2754,9 @@ void netdev_freemem(struct net_device *dev); ...@@ -2745,6 +2754,9 @@ void netdev_freemem(struct net_device *dev);
void synchronize_net(void); void synchronize_net(void);
int init_dummy_netdev(struct net_device *dev); int init_dummy_netdev(struct net_device *dev);
struct net_device *netdev_get_xmit_slave(struct net_device *dev,
struct sk_buff *skb,
bool all_slaves);
struct net_device *dev_get_by_index(struct net *net, int ifindex); struct net_device *dev_get_by_index(struct net *net, int ifindex);
struct net_device *__dev_get_by_index(struct net *net, int ifindex); struct net_device *__dev_get_by_index(struct net *net, int ifindex);
struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
......
...@@ -158,6 +158,10 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char ...@@ -158,6 +158,10 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char
void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave); void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave);
int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev); int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev);
int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev); int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev);
struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
struct sk_buff *skb);
struct slave *bond_xmit_tlb_slave_get(struct bonding *bond,
struct sk_buff *skb);
void bond_alb_monitor(struct work_struct *); void bond_alb_monitor(struct work_struct *);
int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr); int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr);
void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id); void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id);
......
...@@ -200,7 +200,8 @@ struct bonding { ...@@ -200,7 +200,8 @@ struct bonding {
struct slave __rcu *curr_active_slave; struct slave __rcu *curr_active_slave;
struct slave __rcu *current_arp_slave; struct slave __rcu *current_arp_slave;
struct slave __rcu *primary_slave; struct slave __rcu *primary_slave;
struct bond_up_slave __rcu *slave_arr; /* Array of usable slaves */ struct bond_up_slave __rcu *usable_slaves;
struct bond_up_slave __rcu *all_slaves;
bool force_primary; bool force_primary;
s32 slave_cnt; /* never change this value outside the attach/detach wrappers */ s32 slave_cnt; /* never change this value outside the attach/detach wrappers */
int (*recv_probe)(const struct sk_buff *, struct bonding *, int (*recv_probe)(const struct sk_buff *, struct bonding *,
......
...@@ -7861,6 +7861,28 @@ void netdev_bonding_info_change(struct net_device *dev, ...@@ -7861,6 +7861,28 @@ void netdev_bonding_info_change(struct net_device *dev,
} }
EXPORT_SYMBOL(netdev_bonding_info_change); EXPORT_SYMBOL(netdev_bonding_info_change);
/**
* netdev_get_xmit_slave - Get the xmit slave of master device
* @skb: The packet
* @all_slaves: assume all the slaves are active
*
* The reference counters are not incremented so the caller must be
* careful with locks. The caller must hold RCU lock.
* %NULL is returned if no slave is found.
*/
struct net_device *netdev_get_xmit_slave(struct net_device *dev,
struct sk_buff *skb,
bool all_slaves)
{
const struct net_device_ops *ops = dev->netdev_ops;
if (!ops->ndo_get_xmit_slave)
return NULL;
return ops->ndo_get_xmit_slave(dev, skb, all_slaves);
}
EXPORT_SYMBOL(netdev_get_xmit_slave);
static void netdev_adjacent_add_links(struct net_device *dev) static void netdev_adjacent_add_links(struct net_device *dev)
{ {
struct netdev_adjacent *iter; struct netdev_adjacent *iter;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment