Commit 8b09be5f authored by Yuval Mintz's avatar Yuval Mintz Committed by David S. Miller

bnx2x: Revising locking scheme for MAC configuration

On very rare occasions, repeated load/unload stress test in the presence of
our storage driver (bnx2i/bnx2fc) causes a kernel panic in bnx2x code
(NULL pointer dereference). Stack traces indicate the issue happens during MAC
configuration; thorough code review showed that indeed several races exist
in which one thread can iterate over the list of configured MACs while another
deletes entries from the same list.

This patch adds a varient on the single-writer/Multiple-reader lock mechanism -
It utilizes an already exsiting bottom-half lock, using it so that Whenever
a writer is unable to continue due to the existence of another writer/reader,
it pends its request for future deliverance.
The writer / last readers will check for the existence of such requests and
perform them instead of the original initiator.
This prevents the writer from having to sleep while waiting for the lock
to be accessible, which might cause deadlocks given the locks already
held by the writer.

Another result of this patch is that setting of Rx Mode is now made in
sleepable context - Setting of Rx Mode is made under a bottom-half lock, which
was always nontrivial for the bnx2x driver, as the HW/FW configuration requires
wait for completions.
Since sleep was impossible (due to the sleepless-context), various mechanisms
were utilized to prevent the calling thread from sleep, but the truth was that
when the caller thread (i.e, the one calling ndo_set_rx_mode()) returned, the
Rx mode was still not set in HW/FW.

bnx2x_set_rx_mode() will now overtly schedule for the Rx changes to be
configured by the sp_rtnl_task which hold the RTNL lock and is sleepable
context.
Signed-off-by: default avatarYuval Mintz <yuvalmin@broadcom.com>
Signed-off-by: default avatarAriel Elior <ariele@broadcom.com>
Signed-off-by: default avatarEilon Greenstein <eilong@broadcom.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 4beac029
...@@ -1331,7 +1331,7 @@ enum { ...@@ -1331,7 +1331,7 @@ enum {
BNX2X_SP_RTNL_ENABLE_SRIOV, BNX2X_SP_RTNL_ENABLE_SRIOV,
BNX2X_SP_RTNL_VFPF_MCAST, BNX2X_SP_RTNL_VFPF_MCAST,
BNX2X_SP_RTNL_VFPF_CHANNEL_DOWN, BNX2X_SP_RTNL_VFPF_CHANNEL_DOWN,
BNX2X_SP_RTNL_VFPF_STORM_RX_MODE, BNX2X_SP_RTNL_RX_MODE,
BNX2X_SP_RTNL_HYPERVISOR_VLAN, BNX2X_SP_RTNL_HYPERVISOR_VLAN,
}; };
......
...@@ -2060,7 +2060,11 @@ void bnx2x_squeeze_objects(struct bnx2x *bp) ...@@ -2060,7 +2060,11 @@ void bnx2x_squeeze_objects(struct bnx2x *bp)
rparam.mcast_obj = &bp->mcast_obj; rparam.mcast_obj = &bp->mcast_obj;
__set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags); __set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
/* Add a DEL command... */ /* Add a DEL command... - Since we're doing a driver cleanup only,
* we take a lock surrounding both the initial send and the CONTs,
* as we don't want a true completion to disrupt us in the middle.
*/
netif_addr_lock_bh(bp->dev);
rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_DEL); rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_DEL);
if (rc < 0) if (rc < 0)
BNX2X_ERR("Failed to add a new DEL command to a multi-cast object: %d\n", BNX2X_ERR("Failed to add a new DEL command to a multi-cast object: %d\n",
...@@ -2072,11 +2076,13 @@ void bnx2x_squeeze_objects(struct bnx2x *bp) ...@@ -2072,11 +2076,13 @@ void bnx2x_squeeze_objects(struct bnx2x *bp)
if (rc < 0) { if (rc < 0) {
BNX2X_ERR("Failed to clean multi-cast object: %d\n", BNX2X_ERR("Failed to clean multi-cast object: %d\n",
rc); rc);
netif_addr_unlock_bh(bp->dev);
return; return;
} }
rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_CONT); rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_CONT);
} }
netif_addr_unlock_bh(bp->dev);
} }
#ifndef BNX2X_STOP_ON_ERROR #ifndef BNX2X_STOP_ON_ERROR
...@@ -2432,9 +2438,7 @@ int bnx2x_load_cnic(struct bnx2x *bp) ...@@ -2432,9 +2438,7 @@ int bnx2x_load_cnic(struct bnx2x *bp)
} }
/* Initialize Rx filter. */ /* Initialize Rx filter. */
netif_addr_lock_bh(bp->dev); bnx2x_set_rx_mode_inner(bp);
bnx2x_set_rx_mode(bp->dev);
netif_addr_unlock_bh(bp->dev);
/* re-read iscsi info */ /* re-read iscsi info */
bnx2x_get_iscsi_info(bp); bnx2x_get_iscsi_info(bp);
...@@ -2704,9 +2708,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) ...@@ -2704,9 +2708,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
/* Start fast path */ /* Start fast path */
/* Initialize Rx filter. */ /* Initialize Rx filter. */
netif_addr_lock_bh(bp->dev); bnx2x_set_rx_mode_inner(bp);
bnx2x_set_rx_mode(bp->dev);
netif_addr_unlock_bh(bp->dev);
/* Start the Tx */ /* Start the Tx */
switch (load_mode) { switch (load_mode) {
......
...@@ -418,6 +418,7 @@ int bnx2x_set_eth_mac(struct bnx2x *bp, bool set); ...@@ -418,6 +418,7 @@ int bnx2x_set_eth_mac(struct bnx2x *bp, bool set);
* netif_addr_lock_bh() * netif_addr_lock_bh()
*/ */
void bnx2x_set_rx_mode(struct net_device *dev); void bnx2x_set_rx_mode(struct net_device *dev);
void bnx2x_set_rx_mode_inner(struct bnx2x *bp);
/** /**
* bnx2x_set_storm_rx_mode - configure MAC filtering rules in a FW. * bnx2x_set_storm_rx_mode - configure MAC filtering rules in a FW.
......
...@@ -9628,11 +9628,9 @@ static void bnx2x_sp_rtnl_task(struct work_struct *work) ...@@ -9628,11 +9628,9 @@ static void bnx2x_sp_rtnl_task(struct work_struct *work)
} }
} }
if (test_and_clear_bit(BNX2X_SP_RTNL_VFPF_STORM_RX_MODE, if (test_and_clear_bit(BNX2X_SP_RTNL_RX_MODE, &bp->sp_rtnl_state)) {
&bp->sp_rtnl_state)) { DP(BNX2X_MSG_SP, "Handling Rx Mode setting\n");
DP(BNX2X_MSG_SP, bnx2x_set_rx_mode_inner(bp);
"sending set storm rx mode vf pf channel message from rtnl sp-task\n");
bnx2x_vfpf_storm_rx_mode(bp);
} }
if (test_and_clear_bit(BNX2X_SP_RTNL_HYPERVISOR_VLAN, if (test_and_clear_bit(BNX2X_SP_RTNL_HYPERVISOR_VLAN,
...@@ -11849,34 +11847,48 @@ static int bnx2x_set_mc_list(struct bnx2x *bp) ...@@ -11849,34 +11847,48 @@ static int bnx2x_set_mc_list(struct bnx2x *bp)
void bnx2x_set_rx_mode(struct net_device *dev) void bnx2x_set_rx_mode(struct net_device *dev)
{ {
struct bnx2x *bp = netdev_priv(dev); struct bnx2x *bp = netdev_priv(dev);
u32 rx_mode = BNX2X_RX_MODE_NORMAL;
if (bp->state != BNX2X_STATE_OPEN) { if (bp->state != BNX2X_STATE_OPEN) {
DP(NETIF_MSG_IFUP, "state is %x, returning\n", bp->state); DP(NETIF_MSG_IFUP, "state is %x, returning\n", bp->state);
return; return;
} else {
/* Schedule an SP task to handle rest of change */
DP(NETIF_MSG_IFUP, "Scheduling an Rx mode change\n");
smp_mb__before_clear_bit();
set_bit(BNX2X_SP_RTNL_RX_MODE, &bp->sp_rtnl_state);
smp_mb__after_clear_bit();
schedule_delayed_work(&bp->sp_rtnl_task, 0);
} }
}
void bnx2x_set_rx_mode_inner(struct bnx2x *bp)
{
u32 rx_mode = BNX2X_RX_MODE_NORMAL;
DP(NETIF_MSG_IFUP, "dev->flags = %x\n", bp->dev->flags); DP(NETIF_MSG_IFUP, "dev->flags = %x\n", bp->dev->flags);
if (dev->flags & IFF_PROMISC) netif_addr_lock_bh(bp->dev);
if (bp->dev->flags & IFF_PROMISC) {
rx_mode = BNX2X_RX_MODE_PROMISC; rx_mode = BNX2X_RX_MODE_PROMISC;
else if ((dev->flags & IFF_ALLMULTI) || } else if ((bp->dev->flags & IFF_ALLMULTI) ||
((netdev_mc_count(dev) > BNX2X_MAX_MULTICAST) && ((netdev_mc_count(bp->dev) > BNX2X_MAX_MULTICAST) &&
CHIP_IS_E1(bp))) CHIP_IS_E1(bp))) {
rx_mode = BNX2X_RX_MODE_ALLMULTI; rx_mode = BNX2X_RX_MODE_ALLMULTI;
else { } else {
if (IS_PF(bp)) { if (IS_PF(bp)) {
/* some multicasts */ /* some multicasts */
if (bnx2x_set_mc_list(bp) < 0) if (bnx2x_set_mc_list(bp) < 0)
rx_mode = BNX2X_RX_MODE_ALLMULTI; rx_mode = BNX2X_RX_MODE_ALLMULTI;
/* release bh lock, as bnx2x_set_uc_list might sleep */
netif_addr_unlock_bh(bp->dev);
if (bnx2x_set_uc_list(bp) < 0) if (bnx2x_set_uc_list(bp) < 0)
rx_mode = BNX2X_RX_MODE_PROMISC; rx_mode = BNX2X_RX_MODE_PROMISC;
netif_addr_lock_bh(bp->dev);
} else { } else {
/* configuring mcast to a vf involves sleeping (when we /* configuring mcast to a vf involves sleeping (when we
* wait for the pf's response). Since this function is * wait for the pf's response).
* called from non sleepable context we must schedule
* a work item for this purpose
*/ */
smp_mb__before_clear_bit(); smp_mb__before_clear_bit();
set_bit(BNX2X_SP_RTNL_VFPF_MCAST, set_bit(BNX2X_SP_RTNL_VFPF_MCAST,
...@@ -11894,22 +11906,20 @@ void bnx2x_set_rx_mode(struct net_device *dev) ...@@ -11894,22 +11906,20 @@ void bnx2x_set_rx_mode(struct net_device *dev)
/* Schedule the rx_mode command */ /* Schedule the rx_mode command */
if (test_bit(BNX2X_FILTER_RX_MODE_PENDING, &bp->sp_state)) { if (test_bit(BNX2X_FILTER_RX_MODE_PENDING, &bp->sp_state)) {
set_bit(BNX2X_FILTER_RX_MODE_SCHED, &bp->sp_state); set_bit(BNX2X_FILTER_RX_MODE_SCHED, &bp->sp_state);
netif_addr_unlock_bh(bp->dev);
return; return;
} }
if (IS_PF(bp)) { if (IS_PF(bp)) {
bnx2x_set_storm_rx_mode(bp); bnx2x_set_storm_rx_mode(bp);
netif_addr_unlock_bh(bp->dev);
} else { } else {
/* configuring rx mode to storms in a vf involves sleeping (when /* VF will need to request the PF to make this change, and so
* we wait for the pf's response). Since this function is * the VF needs to release the bottom-half lock prior to the
* called from non sleepable context we must schedule * request (as it will likely require sleep on the VF side)
* a work item for this purpose
*/ */
smp_mb__before_clear_bit(); netif_addr_unlock_bh(bp->dev);
set_bit(BNX2X_SP_RTNL_VFPF_STORM_RX_MODE, bnx2x_vfpf_storm_rx_mode(bp);
&bp->sp_rtnl_state);
smp_mb__after_clear_bit();
schedule_delayed_work(&bp->sp_rtnl_task, 0);
} }
} }
......
...@@ -285,6 +285,12 @@ struct bnx2x_vlan_mac_obj { ...@@ -285,6 +285,12 @@ struct bnx2x_vlan_mac_obj {
* entries. * entries.
*/ */
struct list_head head; struct list_head head;
/* Implement a simple reader/writer lock on the head list.
* all these fields should only be accessed under the exe_queue lock
*/
u8 head_reader; /* Num. of readers accessing head list */
bool head_exe_request; /* Pending execution request. */
unsigned long saved_ramrod_flags; /* Ramrods of pending execution */
/* TODO: Add it's initialization in the init functions */ /* TODO: Add it's initialization in the init functions */
struct bnx2x_exe_queue_obj exe_queue; struct bnx2x_exe_queue_obj exe_queue;
...@@ -1302,6 +1308,14 @@ void bnx2x_init_vlan_mac_obj(struct bnx2x *bp, ...@@ -1302,6 +1308,14 @@ void bnx2x_init_vlan_mac_obj(struct bnx2x *bp,
struct bnx2x_credit_pool_obj *macs_pool, struct bnx2x_credit_pool_obj *macs_pool,
struct bnx2x_credit_pool_obj *vlans_pool); struct bnx2x_credit_pool_obj *vlans_pool);
int bnx2x_vlan_mac_h_read_lock(struct bnx2x *bp,
struct bnx2x_vlan_mac_obj *o);
void bnx2x_vlan_mac_h_read_unlock(struct bnx2x *bp,
struct bnx2x_vlan_mac_obj *o);
int bnx2x_vlan_mac_h_write_lock(struct bnx2x *bp,
struct bnx2x_vlan_mac_obj *o);
void bnx2x_vlan_mac_h_write_unlock(struct bnx2x *bp,
struct bnx2x_vlan_mac_obj *o);
int bnx2x_config_vlan_mac(struct bnx2x *bp, int bnx2x_config_vlan_mac(struct bnx2x *bp,
struct bnx2x_vlan_mac_ramrod_params *p); struct bnx2x_vlan_mac_ramrod_params *p);
......
...@@ -491,12 +491,20 @@ static inline void bnx2x_vfop_credit(struct bnx2x *bp, ...@@ -491,12 +491,20 @@ static inline void bnx2x_vfop_credit(struct bnx2x *bp,
* and a valid credit counter * and a valid credit counter
*/ */
if (!vfop->rc && args->credit) { if (!vfop->rc && args->credit) {
int cnt = 0;
struct list_head *pos; struct list_head *pos;
int read_lock;
int cnt = 0;
read_lock = bnx2x_vlan_mac_h_read_lock(bp, obj);
if (read_lock)
DP(BNX2X_MSG_SP, "Failed to take vlan mac read head; continuing anyway\n");
list_for_each(pos, &obj->head) list_for_each(pos, &obj->head)
cnt++; cnt++;
if (!read_lock)
bnx2x_vlan_mac_h_read_unlock(bp, obj);
atomic_set(args->credit, cnt); atomic_set(args->credit, cnt);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment