Commit 76cd622f authored by Saeed Mahameed's avatar Saeed Mahameed

Merge branch 'mlx5-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux

This merge includes updates to bonding driver needed for the rdma stack,
to avoid conflicts with the RDMA branch.

Maor Gottlieb Says:

====================
Bonding: Add support to get xmit slave

The following series adds support to get the LAG master xmit slave by
introducing new .ndo - ndo_get_xmit_slave. Every LAG module can
implement it and it first implemented in the bond driver.
This is follow-up to the RFC discussion [1].

The main motivation for doing this is for drivers that offload part
of the LAG functionality. For example, Mellanox Connect-X hardware
implements RoCE LAG which selects the TX affinity when the resources
are created and port is remapped when it goes down.

The first part of this patchset introduces the new .ndo and add the
support to the bonding module.

The second part adds support to get the RoCE LAG xmit slave by building
skb of the RoCE packet based on the AH attributes and call to the new
.ndo.

The third part change the mlx5 driver driver to set the QP's affinity
port according to the slave which found by the .ndo.
====================
Signed-off-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
parents 5eb2bcf2 c6bc6041
...@@ -1331,11 +1331,11 @@ static netdev_tx_t bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond, ...@@ -1331,11 +1331,11 @@ static netdev_tx_t bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond,
return bond_tx_drop(bond->dev, skb); return bond_tx_drop(bond->dev, skb);
} }
netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) struct slave *bond_xmit_tlb_slave_get(struct bonding *bond,
struct sk_buff *skb)
{ {
struct bonding *bond = netdev_priv(bond_dev);
struct ethhdr *eth_data;
struct slave *tx_slave = NULL; struct slave *tx_slave = NULL;
struct ethhdr *eth_data;
u32 hash_index; u32 hash_index;
skb_reset_mac_header(skb); skb_reset_mac_header(skb);
...@@ -1357,7 +1357,7 @@ netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) ...@@ -1357,7 +1357,7 @@ netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
struct bond_up_slave *slaves; struct bond_up_slave *slaves;
unsigned int count; unsigned int count;
slaves = rcu_dereference(bond->slave_arr); slaves = rcu_dereference(bond->usable_slaves);
count = slaves ? READ_ONCE(slaves->count) : 0; count = slaves ? READ_ONCE(slaves->count) : 0;
if (likely(count)) if (likely(count))
tx_slave = slaves->arr[hash_index % tx_slave = slaves->arr[hash_index %
...@@ -1366,20 +1366,29 @@ netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) ...@@ -1366,20 +1366,29 @@ netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
break; break;
} }
} }
return bond_do_alb_xmit(skb, bond, tx_slave); return tx_slave;
} }
netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
{ {
struct bonding *bond = netdev_priv(bond_dev); struct bonding *bond = netdev_priv(bond_dev);
struct ethhdr *eth_data; struct slave *tx_slave;
tx_slave = bond_xmit_tlb_slave_get(bond, skb);
return bond_do_alb_xmit(skb, bond, tx_slave);
}
struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
struct sk_buff *skb)
{
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
struct slave *tx_slave = NULL;
static const __be32 ip_bcast = htonl(0xffffffff); static const __be32 ip_bcast = htonl(0xffffffff);
int hash_size = 0; struct slave *tx_slave = NULL;
const u8 *hash_start = NULL;
bool do_tx_balance = true; bool do_tx_balance = true;
struct ethhdr *eth_data;
u32 hash_index = 0; u32 hash_index = 0;
const u8 *hash_start = NULL; int hash_size = 0;
skb_reset_mac_header(skb); skb_reset_mac_header(skb);
eth_data = eth_hdr(skb); eth_data = eth_hdr(skb);
...@@ -1491,14 +1500,22 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) ...@@ -1491,14 +1500,22 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
struct bond_up_slave *slaves; struct bond_up_slave *slaves;
unsigned int count; unsigned int count;
slaves = rcu_dereference(bond->slave_arr); slaves = rcu_dereference(bond->usable_slaves);
count = slaves ? READ_ONCE(slaves->count) : 0; count = slaves ? READ_ONCE(slaves->count) : 0;
if (likely(count)) if (likely(count))
tx_slave = slaves->arr[bond_xmit_hash(bond, skb) % tx_slave = slaves->arr[bond_xmit_hash(bond, skb) %
count]; count];
} }
} }
return tx_slave;
}
netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
{
struct bonding *bond = netdev_priv(bond_dev);
struct slave *tx_slave = NULL;
tx_slave = bond_xmit_alb_slave_get(bond, skb);
return bond_do_alb_xmit(skb, bond, tx_slave); return bond_do_alb_xmit(skb, bond, tx_slave);
} }
......
...@@ -3923,16 +3923,15 @@ static int bond_set_mac_address(struct net_device *bond_dev, void *addr) ...@@ -3923,16 +3923,15 @@ static int bond_set_mac_address(struct net_device *bond_dev, void *addr)
} }
/** /**
* bond_xmit_slave_id - transmit skb through slave with slave_id * bond_get_slave_by_id - get xmit slave with slave_id
* @bond: bonding device that is transmitting * @bond: bonding device that is transmitting
* @skb: buffer to transmit
* @slave_id: slave id up to slave_cnt-1 through which to transmit * @slave_id: slave id up to slave_cnt-1 through which to transmit
* *
* This function tries to transmit through slave with slave_id but in case * This function tries to get slave with slave_id but in case
* it fails, it tries to find the first available slave for transmission. * it fails, it tries to find the first available slave for transmission.
* The skb is consumed in all cases, thus the function is void.
*/ */
static netdev_tx_t bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id) static struct slave *bond_get_slave_by_id(struct bonding *bond,
int slave_id)
{ {
struct list_head *iter; struct list_head *iter;
struct slave *slave; struct slave *slave;
...@@ -3942,7 +3941,7 @@ static netdev_tx_t bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, ...@@ -3942,7 +3941,7 @@ static netdev_tx_t bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb,
bond_for_each_slave_rcu(bond, slave, iter) { bond_for_each_slave_rcu(bond, slave, iter) {
if (--i < 0) { if (--i < 0) {
if (bond_slave_can_tx(slave)) if (bond_slave_can_tx(slave))
return bond_dev_queue_xmit(bond, skb, slave->dev); return slave;
} }
} }
...@@ -3952,10 +3951,10 @@ static netdev_tx_t bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, ...@@ -3952,10 +3951,10 @@ static netdev_tx_t bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb,
if (--i < 0) if (--i < 0)
break; break;
if (bond_slave_can_tx(slave)) if (bond_slave_can_tx(slave))
return bond_dev_queue_xmit(bond, skb, slave->dev); return slave;
} }
/* no slave that can tx has been found */ /* no slave that can tx has been found */
return bond_tx_drop(bond->dev, skb); return NULL;
} }
/** /**
...@@ -3991,10 +3990,9 @@ static u32 bond_rr_gen_slave_id(struct bonding *bond) ...@@ -3991,10 +3990,9 @@ static u32 bond_rr_gen_slave_id(struct bonding *bond)
return slave_id; return slave_id;
} }
static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb, static struct slave *bond_xmit_roundrobin_slave_get(struct bonding *bond,
struct net_device *bond_dev) struct sk_buff *skb)
{ {
struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave; struct slave *slave;
int slave_cnt; int slave_cnt;
u32 slave_id; u32 slave_id;
...@@ -4016,20 +4014,39 @@ static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb, ...@@ -4016,20 +4014,39 @@ static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb,
if (iph->protocol == IPPROTO_IGMP) { if (iph->protocol == IPPROTO_IGMP) {
slave = rcu_dereference(bond->curr_active_slave); slave = rcu_dereference(bond->curr_active_slave);
if (slave) if (slave)
return bond_dev_queue_xmit(bond, skb, slave->dev); return slave;
return bond_xmit_slave_id(bond, skb, 0); return bond_get_slave_by_id(bond, 0);
} }
} }
non_igmp: non_igmp:
slave_cnt = READ_ONCE(bond->slave_cnt); slave_cnt = READ_ONCE(bond->slave_cnt);
if (likely(slave_cnt)) { if (likely(slave_cnt)) {
slave_id = bond_rr_gen_slave_id(bond); slave_id = bond_rr_gen_slave_id(bond) % slave_cnt;
return bond_xmit_slave_id(bond, skb, slave_id % slave_cnt); return bond_get_slave_by_id(bond, slave_id);
} }
return NULL;
}
static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb,
struct net_device *bond_dev)
{
struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave;
slave = bond_xmit_roundrobin_slave_get(bond, skb);
if (likely(slave))
return bond_dev_queue_xmit(bond, skb, slave->dev);
return bond_tx_drop(bond_dev, skb); return bond_tx_drop(bond_dev, skb);
} }
static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond,
struct sk_buff *skb)
{
return rcu_dereference(bond->curr_active_slave);
}
/* In active-backup mode, we know that bond->curr_active_slave is always valid if /* In active-backup mode, we know that bond->curr_active_slave is always valid if
* the bond has a usable interface. * the bond has a usable interface.
*/ */
...@@ -4039,7 +4056,7 @@ static netdev_tx_t bond_xmit_activebackup(struct sk_buff *skb, ...@@ -4039,7 +4056,7 @@ static netdev_tx_t bond_xmit_activebackup(struct sk_buff *skb,
struct bonding *bond = netdev_priv(bond_dev); struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave; struct slave *slave;
slave = rcu_dereference(bond->curr_active_slave); slave = bond_xmit_activebackup_slave_get(bond, skb);
if (slave) if (slave)
return bond_dev_queue_xmit(bond, skb, slave->dev); return bond_dev_queue_xmit(bond, skb, slave->dev);
...@@ -4077,6 +4094,61 @@ static void bond_slave_arr_handler(struct work_struct *work) ...@@ -4077,6 +4094,61 @@ static void bond_slave_arr_handler(struct work_struct *work)
bond_slave_arr_work_rearm(bond, 1); bond_slave_arr_work_rearm(bond, 1);
} }
static void bond_skip_slave(struct bond_up_slave *slaves,
struct slave *skipslave)
{
int idx;
/* Rare situation where caller has asked to skip a specific
* slave but allocation failed (most likely!). BTW this is
* only possible when the call is initiated from
* __bond_release_one(). In this situation; overwrite the
* skipslave entry in the array with the last entry from the
* array to avoid a situation where the xmit path may choose
* this to-be-skipped slave to send a packet out.
*/
for (idx = 0; slaves && idx < slaves->count; idx++) {
if (skipslave == slaves->arr[idx]) {
slaves->arr[idx] =
slaves->arr[slaves->count - 1];
slaves->count--;
break;
}
}
}
static void bond_set_slave_arr(struct bonding *bond,
struct bond_up_slave *usable_slaves,
struct bond_up_slave *all_slaves)
{
struct bond_up_slave *usable, *all;
usable = rtnl_dereference(bond->usable_slaves);
rcu_assign_pointer(bond->usable_slaves, usable_slaves);
kfree_rcu(usable, rcu);
all = rtnl_dereference(bond->all_slaves);
rcu_assign_pointer(bond->all_slaves, all_slaves);
kfree_rcu(all, rcu);
}
static void bond_reset_slave_arr(struct bonding *bond)
{
struct bond_up_slave *usable, *all;
usable = rtnl_dereference(bond->usable_slaves);
if (usable) {
RCU_INIT_POINTER(bond->usable_slaves, NULL);
kfree_rcu(usable, rcu);
}
all = rtnl_dereference(bond->all_slaves);
if (all) {
RCU_INIT_POINTER(bond->all_slaves, NULL);
kfree_rcu(all, rcu);
}
}
/* Build the usable slaves array in control path for modes that use xmit-hash /* Build the usable slaves array in control path for modes that use xmit-hash
* to determine the slave interface - * to determine the slave interface -
* (a) BOND_MODE_8023AD * (a) BOND_MODE_8023AD
...@@ -4087,9 +4159,9 @@ static void bond_slave_arr_handler(struct work_struct *work) ...@@ -4087,9 +4159,9 @@ static void bond_slave_arr_handler(struct work_struct *work)
*/ */
int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
{ {
struct bond_up_slave *usable_slaves = NULL, *all_slaves = NULL;
struct slave *slave; struct slave *slave;
struct list_head *iter; struct list_head *iter;
struct bond_up_slave *new_arr, *old_arr;
int agg_id = 0; int agg_id = 0;
int ret = 0; int ret = 0;
...@@ -4097,11 +4169,12 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) ...@@ -4097,11 +4169,12 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
WARN_ON(lockdep_is_held(&bond->mode_lock)); WARN_ON(lockdep_is_held(&bond->mode_lock));
#endif #endif
new_arr = kzalloc(offsetof(struct bond_up_slave, arr[bond->slave_cnt]), usable_slaves = kzalloc(struct_size(usable_slaves, arr,
GFP_KERNEL); bond->slave_cnt), GFP_KERNEL);
if (!new_arr) { all_slaves = kzalloc(struct_size(all_slaves, arr,
bond->slave_cnt), GFP_KERNEL);
if (!usable_slaves || !all_slaves) {
ret = -ENOMEM; ret = -ENOMEM;
pr_err("Failed to build slave-array.\n");
goto out; goto out;
} }
if (BOND_MODE(bond) == BOND_MODE_8023AD) { if (BOND_MODE(bond) == BOND_MODE_8023AD) {
...@@ -4109,20 +4182,19 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) ...@@ -4109,20 +4182,19 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
if (bond_3ad_get_active_agg_info(bond, &ad_info)) { if (bond_3ad_get_active_agg_info(bond, &ad_info)) {
pr_debug("bond_3ad_get_active_agg_info failed\n"); pr_debug("bond_3ad_get_active_agg_info failed\n");
kfree_rcu(new_arr, rcu);
/* No active aggragator means it's not safe to use /* No active aggragator means it's not safe to use
* the previous array. * the previous array.
*/ */
old_arr = rtnl_dereference(bond->slave_arr); bond_reset_slave_arr(bond);
if (old_arr) {
RCU_INIT_POINTER(bond->slave_arr, NULL);
kfree_rcu(old_arr, rcu);
}
goto out; goto out;
} }
agg_id = ad_info.aggregator_id; agg_id = ad_info.aggregator_id;
} }
bond_for_each_slave(bond, slave, iter) { bond_for_each_slave(bond, slave, iter) {
if (skipslave == slave)
continue;
all_slaves->arr[all_slaves->count++] = slave;
if (BOND_MODE(bond) == BOND_MODE_8023AD) { if (BOND_MODE(bond) == BOND_MODE_8023AD) {
struct aggregator *agg; struct aggregator *agg;
...@@ -4132,44 +4204,45 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) ...@@ -4132,44 +4204,45 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
} }
if (!bond_slave_can_tx(slave)) if (!bond_slave_can_tx(slave))
continue; continue;
if (skipslave == slave)
continue;
slave_dbg(bond->dev, slave->dev, "Adding slave to tx hash array[%d]\n", slave_dbg(bond->dev, slave->dev, "Adding slave to tx hash array[%d]\n",
new_arr->count); usable_slaves->count);
new_arr->arr[new_arr->count++] = slave; usable_slaves->arr[usable_slaves->count++] = slave;
} }
old_arr = rtnl_dereference(bond->slave_arr); bond_set_slave_arr(bond, usable_slaves, all_slaves);
rcu_assign_pointer(bond->slave_arr, new_arr); return ret;
if (old_arr)
kfree_rcu(old_arr, rcu);
out: out:
if (ret != 0 && skipslave) { if (ret != 0 && skipslave) {
int idx; bond_skip_slave(rtnl_dereference(bond->all_slaves),
skipslave);
/* Rare situation where caller has asked to skip a specific bond_skip_slave(rtnl_dereference(bond->usable_slaves),
* slave but allocation failed (most likely!). BTW this is skipslave);
* only possible when the call is initiated from
* __bond_release_one(). In this situation; overwrite the
* skipslave entry in the array with the last entry from the
* array to avoid a situation where the xmit path may choose
* this to-be-skipped slave to send a packet out.
*/
old_arr = rtnl_dereference(bond->slave_arr);
for (idx = 0; old_arr != NULL && idx < old_arr->count; idx++) {
if (skipslave == old_arr->arr[idx]) {
old_arr->arr[idx] =
old_arr->arr[old_arr->count-1];
old_arr->count--;
break;
}
}
} }
kfree_rcu(all_slaves, rcu);
kfree_rcu(usable_slaves, rcu);
return ret; return ret;
} }
static struct slave *bond_xmit_3ad_xor_slave_get(struct bonding *bond,
struct sk_buff *skb,
struct bond_up_slave *slaves)
{
struct slave *slave;
unsigned int count;
u32 hash;
hash = bond_xmit_hash(bond, skb);
count = slaves ? READ_ONCE(slaves->count) : 0;
if (unlikely(!count))
return NULL;
slave = slaves->arr[hash % count];
return slave;
}
/* Use this Xmit function for 3AD as well as XOR modes. The current /* Use this Xmit function for 3AD as well as XOR modes. The current
* usable slave array is formed in the control path. The xmit function * usable slave array is formed in the control path. The xmit function
* just calculates hash and sends the packet out. * just calculates hash and sends the packet out.
...@@ -4178,16 +4251,14 @@ static netdev_tx_t bond_3ad_xor_xmit(struct sk_buff *skb, ...@@ -4178,16 +4251,14 @@ static netdev_tx_t bond_3ad_xor_xmit(struct sk_buff *skb,
struct net_device *dev) struct net_device *dev)
{ {
struct bonding *bond = netdev_priv(dev); struct bonding *bond = netdev_priv(dev);
struct slave *slave;
struct bond_up_slave *slaves; struct bond_up_slave *slaves;
unsigned int count; struct slave *slave;
slaves = rcu_dereference(bond->slave_arr); slaves = rcu_dereference(bond->usable_slaves);
count = slaves ? READ_ONCE(slaves->count) : 0; slave = bond_xmit_3ad_xor_slave_get(bond, skb, slaves);
if (likely(count)) { if (likely(slave))
slave = slaves->arr[bond_xmit_hash(bond, skb) % count];
return bond_dev_queue_xmit(bond, skb, slave->dev); return bond_dev_queue_xmit(bond, skb, slave->dev);
}
return bond_tx_drop(dev, skb); return bond_tx_drop(dev, skb);
} }
...@@ -4269,6 +4340,48 @@ static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb, ...@@ -4269,6 +4340,48 @@ static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb,
return txq; return txq;
} }
static struct net_device *bond_xmit_get_slave(struct net_device *master_dev,
struct sk_buff *skb,
bool all_slaves)
{
struct bonding *bond = netdev_priv(master_dev);
struct bond_up_slave *slaves;
struct slave *slave = NULL;
switch (BOND_MODE(bond)) {
case BOND_MODE_ROUNDROBIN:
slave = bond_xmit_roundrobin_slave_get(bond, skb);
break;
case BOND_MODE_ACTIVEBACKUP:
slave = bond_xmit_activebackup_slave_get(bond, skb);
break;
case BOND_MODE_8023AD:
case BOND_MODE_XOR:
if (all_slaves)
slaves = rcu_dereference(bond->all_slaves);
else
slaves = rcu_dereference(bond->usable_slaves);
slave = bond_xmit_3ad_xor_slave_get(bond, skb, slaves);
break;
case BOND_MODE_BROADCAST:
break;
case BOND_MODE_ALB:
slave = bond_xmit_alb_slave_get(bond, skb);
break;
case BOND_MODE_TLB:
slave = bond_xmit_tlb_slave_get(bond, skb);
break;
default:
/* Should never happen, mode already checked */
WARN_ONCE(true, "Unknown bonding mode");
break;
}
if (slave)
return slave->dev;
return NULL;
}
static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev) static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
{ {
struct bonding *bond = netdev_priv(dev); struct bonding *bond = netdev_priv(dev);
...@@ -4389,6 +4502,7 @@ static const struct net_device_ops bond_netdev_ops = { ...@@ -4389,6 +4502,7 @@ static const struct net_device_ops bond_netdev_ops = {
.ndo_del_slave = bond_release, .ndo_del_slave = bond_release,
.ndo_fix_features = bond_fix_features, .ndo_fix_features = bond_fix_features,
.ndo_features_check = passthru_features_check, .ndo_features_check = passthru_features_check,
.ndo_get_xmit_slave = bond_xmit_get_slave,
}; };
static const struct device_type bond_type = { static const struct device_type bond_type = {
...@@ -4456,9 +4570,9 @@ void bond_setup(struct net_device *bond_dev) ...@@ -4456,9 +4570,9 @@ void bond_setup(struct net_device *bond_dev)
static void bond_uninit(struct net_device *bond_dev) static void bond_uninit(struct net_device *bond_dev)
{ {
struct bonding *bond = netdev_priv(bond_dev); struct bonding *bond = netdev_priv(bond_dev);
struct bond_up_slave *usable, *all;
struct list_head *iter; struct list_head *iter;
struct slave *slave; struct slave *slave;
struct bond_up_slave *arr;
bond_netpoll_cleanup(bond_dev); bond_netpoll_cleanup(bond_dev);
...@@ -4467,10 +4581,16 @@ static void bond_uninit(struct net_device *bond_dev) ...@@ -4467,10 +4581,16 @@ static void bond_uninit(struct net_device *bond_dev)
__bond_release_one(bond_dev, slave->dev, true, true); __bond_release_one(bond_dev, slave->dev, true, true);
netdev_info(bond_dev, "Released all slaves\n"); netdev_info(bond_dev, "Released all slaves\n");
arr = rtnl_dereference(bond->slave_arr); usable = rtnl_dereference(bond->usable_slaves);
if (arr) { if (usable) {
RCU_INIT_POINTER(bond->slave_arr, NULL); RCU_INIT_POINTER(bond->usable_slaves, NULL);
kfree_rcu(arr, rcu); kfree_rcu(usable, rcu);
}
all = rtnl_dereference(bond->all_slaves);
if (all) {
RCU_INIT_POINTER(bond->all_slaves, NULL);
kfree_rcu(all, rcu);
} }
list_del(&bond->bond_list); list_del(&bond->bond_list);
......
...@@ -42,7 +42,7 @@ ...@@ -42,7 +42,7 @@
* Beware of lock dependencies (preferably, no locks should be acquired * Beware of lock dependencies (preferably, no locks should be acquired
* under it). * under it).
*/ */
static DEFINE_MUTEX(lag_mutex); static DEFINE_SPINLOCK(lag_lock);
static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1, static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
u8 remap_port2) u8 remap_port2)
...@@ -274,9 +274,9 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) ...@@ -274,9 +274,9 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
if (!dev0 || !dev1) if (!dev0 || !dev1)
return; return;
mutex_lock(&lag_mutex); spin_lock(&lag_lock);
tracker = ldev->tracker; tracker = ldev->tracker;
mutex_unlock(&lag_mutex); spin_unlock(&lag_lock);
do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
...@@ -458,9 +458,9 @@ static int mlx5_lag_netdev_event(struct notifier_block *this, ...@@ -458,9 +458,9 @@ static int mlx5_lag_netdev_event(struct notifier_block *this,
break; break;
} }
mutex_lock(&lag_mutex); spin_lock(&lag_lock);
ldev->tracker = tracker; ldev->tracker = tracker;
mutex_unlock(&lag_mutex); spin_unlock(&lag_lock);
if (changed) if (changed)
mlx5_queue_bond_work(ldev, 0); mlx5_queue_bond_work(ldev, 0);
...@@ -502,7 +502,7 @@ static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev, ...@@ -502,7 +502,7 @@ static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
if (fn >= MLX5_MAX_PORTS) if (fn >= MLX5_MAX_PORTS)
return; return;
mutex_lock(&lag_mutex); spin_lock(&lag_lock);
ldev->pf[fn].dev = dev; ldev->pf[fn].dev = dev;
ldev->pf[fn].netdev = netdev; ldev->pf[fn].netdev = netdev;
ldev->tracker.netdev_state[fn].link_up = 0; ldev->tracker.netdev_state[fn].link_up = 0;
...@@ -510,7 +510,7 @@ static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev, ...@@ -510,7 +510,7 @@ static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
dev->priv.lag = ldev; dev->priv.lag = ldev;
mutex_unlock(&lag_mutex); spin_unlock(&lag_lock);
} }
static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev, static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
...@@ -525,11 +525,11 @@ static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev, ...@@ -525,11 +525,11 @@ static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
if (i == MLX5_MAX_PORTS) if (i == MLX5_MAX_PORTS)
return; return;
mutex_lock(&lag_mutex); spin_lock(&lag_lock);
memset(&ldev->pf[i], 0, sizeof(*ldev->pf)); memset(&ldev->pf[i], 0, sizeof(*ldev->pf));
dev->priv.lag = NULL; dev->priv.lag = NULL;
mutex_unlock(&lag_mutex); spin_unlock(&lag_lock);
} }
/* Must be called with intf_mutex held */ /* Must be called with intf_mutex held */
...@@ -607,10 +607,10 @@ bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) ...@@ -607,10 +607,10 @@ bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
struct mlx5_lag *ldev; struct mlx5_lag *ldev;
bool res; bool res;
mutex_lock(&lag_mutex); spin_lock(&lag_lock);
ldev = mlx5_lag_dev_get(dev); ldev = mlx5_lag_dev_get(dev);
res = ldev && __mlx5_lag_is_roce(ldev); res = ldev && __mlx5_lag_is_roce(ldev);
mutex_unlock(&lag_mutex); spin_unlock(&lag_lock);
return res; return res;
} }
...@@ -621,10 +621,10 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev) ...@@ -621,10 +621,10 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
struct mlx5_lag *ldev; struct mlx5_lag *ldev;
bool res; bool res;
mutex_lock(&lag_mutex); spin_lock(&lag_lock);
ldev = mlx5_lag_dev_get(dev); ldev = mlx5_lag_dev_get(dev);
res = ldev && __mlx5_lag_is_active(ldev); res = ldev && __mlx5_lag_is_active(ldev);
mutex_unlock(&lag_mutex); spin_unlock(&lag_lock);
return res; return res;
} }
...@@ -635,10 +635,10 @@ bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) ...@@ -635,10 +635,10 @@ bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
struct mlx5_lag *ldev; struct mlx5_lag *ldev;
bool res; bool res;
mutex_lock(&lag_mutex); spin_lock(&lag_lock);
ldev = mlx5_lag_dev_get(dev); ldev = mlx5_lag_dev_get(dev);
res = ldev && __mlx5_lag_is_sriov(ldev); res = ldev && __mlx5_lag_is_sriov(ldev);
mutex_unlock(&lag_mutex); spin_unlock(&lag_lock);
return res; return res;
} }
...@@ -664,7 +664,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) ...@@ -664,7 +664,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
struct net_device *ndev = NULL; struct net_device *ndev = NULL;
struct mlx5_lag *ldev; struct mlx5_lag *ldev;
mutex_lock(&lag_mutex); spin_lock(&lag_lock);
ldev = mlx5_lag_dev_get(dev); ldev = mlx5_lag_dev_get(dev);
if (!(ldev && __mlx5_lag_is_roce(ldev))) if (!(ldev && __mlx5_lag_is_roce(ldev)))
...@@ -681,12 +681,36 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) ...@@ -681,12 +681,36 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
dev_hold(ndev); dev_hold(ndev);
unlock: unlock:
mutex_unlock(&lag_mutex); spin_unlock(&lag_lock);
return ndev; return ndev;
} }
EXPORT_SYMBOL(mlx5_lag_get_roce_netdev); EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
struct net_device *slave)
{
struct mlx5_lag *ldev;
u8 port = 0;
spin_lock(&lag_lock);
ldev = mlx5_lag_dev_get(dev);
if (!(ldev && __mlx5_lag_is_roce(ldev)))
goto unlock;
if (ldev->pf[MLX5_LAG_P1].netdev == slave)
port = MLX5_LAG_P1;
else
port = MLX5_LAG_P2;
port = ldev->v2p_map[port];
unlock:
spin_unlock(&lag_lock);
return port;
}
EXPORT_SYMBOL(mlx5_lag_get_slave_port);
bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv) bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv)
{ {
struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev,
...@@ -723,7 +747,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, ...@@ -723,7 +747,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
memset(values, 0, sizeof(*values) * num_counters); memset(values, 0, sizeof(*values) * num_counters);
mutex_lock(&lag_mutex); spin_lock(&lag_lock);
ldev = mlx5_lag_dev_get(dev); ldev = mlx5_lag_dev_get(dev);
if (ldev && __mlx5_lag_is_roce(ldev)) { if (ldev && __mlx5_lag_is_roce(ldev)) {
num_ports = MLX5_MAX_PORTS; num_ports = MLX5_MAX_PORTS;
...@@ -733,6 +757,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, ...@@ -733,6 +757,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
num_ports = 1; num_ports = 1;
mdev[MLX5_LAG_P1] = dev; mdev[MLX5_LAG_P1] = dev;
} }
spin_unlock(&lag_lock);
for (i = 0; i < num_ports; ++i) { for (i = 0; i < num_ports; ++i) {
u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {}; u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
...@@ -742,14 +767,13 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, ...@@ -742,14 +767,13 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in, ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
out); out);
if (ret) if (ret)
goto unlock; goto free;
for (j = 0; j < num_counters; ++j) for (j = 0; j < num_counters; ++j)
values[j] += be64_to_cpup((__be64 *)(out + offsets[j])); values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
} }
unlock: free:
mutex_unlock(&lag_mutex);
kvfree(out); kvfree(out);
return ret; return ret;
} }
......
...@@ -1074,6 +1074,8 @@ bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev); ...@@ -1074,6 +1074,8 @@ bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev);
bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev); bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev);
bool mlx5_lag_is_active(struct mlx5_core_dev *dev); bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev);
u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
struct net_device *slave);
int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
u64 *values, u64 *values,
int num_counters, int num_counters,
......
...@@ -1148,6 +1148,12 @@ struct netdev_net_notifier { ...@@ -1148,6 +1148,12 @@ struct netdev_net_notifier {
* int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); * int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev);
* Called to release previously enslaved netdev. * Called to release previously enslaved netdev.
* *
* struct net_device *(*ndo_get_xmit_slave)(struct net_device *dev,
* struct sk_buff *skb,
* bool all_slaves);
* Get the xmit slave of master device. If all_slaves is true, function
* assume all the slaves can transmit.
*
* Feature/offload setting functions. * Feature/offload setting functions.
* netdev_features_t (*ndo_fix_features)(struct net_device *dev, * netdev_features_t (*ndo_fix_features)(struct net_device *dev,
* netdev_features_t features); * netdev_features_t features);
...@@ -1391,6 +1397,9 @@ struct net_device_ops { ...@@ -1391,6 +1397,9 @@ struct net_device_ops {
struct netlink_ext_ack *extack); struct netlink_ext_ack *extack);
int (*ndo_del_slave)(struct net_device *dev, int (*ndo_del_slave)(struct net_device *dev,
struct net_device *slave_dev); struct net_device *slave_dev);
struct net_device* (*ndo_get_xmit_slave)(struct net_device *dev,
struct sk_buff *skb,
bool all_slaves);
netdev_features_t (*ndo_fix_features)(struct net_device *dev, netdev_features_t (*ndo_fix_features)(struct net_device *dev,
netdev_features_t features); netdev_features_t features);
int (*ndo_set_features)(struct net_device *dev, int (*ndo_set_features)(struct net_device *dev,
...@@ -2745,6 +2754,9 @@ void netdev_freemem(struct net_device *dev); ...@@ -2745,6 +2754,9 @@ void netdev_freemem(struct net_device *dev);
void synchronize_net(void); void synchronize_net(void);
int init_dummy_netdev(struct net_device *dev); int init_dummy_netdev(struct net_device *dev);
struct net_device *netdev_get_xmit_slave(struct net_device *dev,
struct sk_buff *skb,
bool all_slaves);
struct net_device *dev_get_by_index(struct net *net, int ifindex); struct net_device *dev_get_by_index(struct net *net, int ifindex);
struct net_device *__dev_get_by_index(struct net *net, int ifindex); struct net_device *__dev_get_by_index(struct net *net, int ifindex);
struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
......
...@@ -158,6 +158,10 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char ...@@ -158,6 +158,10 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char
void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave); void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave);
int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev); int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev);
int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev); int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev);
struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
struct sk_buff *skb);
struct slave *bond_xmit_tlb_slave_get(struct bonding *bond,
struct sk_buff *skb);
void bond_alb_monitor(struct work_struct *); void bond_alb_monitor(struct work_struct *);
int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr); int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr);
void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id); void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id);
......
...@@ -200,7 +200,8 @@ struct bonding { ...@@ -200,7 +200,8 @@ struct bonding {
struct slave __rcu *curr_active_slave; struct slave __rcu *curr_active_slave;
struct slave __rcu *current_arp_slave; struct slave __rcu *current_arp_slave;
struct slave __rcu *primary_slave; struct slave __rcu *primary_slave;
struct bond_up_slave __rcu *slave_arr; /* Array of usable slaves */ struct bond_up_slave __rcu *usable_slaves;
struct bond_up_slave __rcu *all_slaves;
bool force_primary; bool force_primary;
s32 slave_cnt; /* never change this value outside the attach/detach wrappers */ s32 slave_cnt; /* never change this value outside the attach/detach wrappers */
int (*recv_probe)(const struct sk_buff *, struct bonding *, int (*recv_probe)(const struct sk_buff *, struct bonding *,
......
...@@ -7861,6 +7861,28 @@ void netdev_bonding_info_change(struct net_device *dev, ...@@ -7861,6 +7861,28 @@ void netdev_bonding_info_change(struct net_device *dev,
} }
EXPORT_SYMBOL(netdev_bonding_info_change); EXPORT_SYMBOL(netdev_bonding_info_change);
/**
* netdev_get_xmit_slave - Get the xmit slave of master device
* @skb: The packet
* @all_slaves: assume all the slaves are active
*
* The reference counters are not incremented so the caller must be
* careful with locks. The caller must hold RCU lock.
* %NULL is returned if no slave is found.
*/
struct net_device *netdev_get_xmit_slave(struct net_device *dev,
struct sk_buff *skb,
bool all_slaves)
{
const struct net_device_ops *ops = dev->netdev_ops;
if (!ops->ndo_get_xmit_slave)
return NULL;
return ops->ndo_get_xmit_slave(dev, skb, all_slaves);
}
EXPORT_SYMBOL(netdev_get_xmit_slave);
static void netdev_adjacent_add_links(struct net_device *dev) static void netdev_adjacent_add_links(struct net_device *dev)
{ {
struct netdev_adjacent *iter; struct netdev_adjacent *iter;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment