Commit 1a9be843 authored by Amir Shehata's avatar Amir Shehata Committed by Greg Kroah-Hartman

staging: lustre: remove messages from lazy portal on NI shutdown

When shutting down an NI in a busy system, some messages received
on this NI, might be on the lazy portal.  They would have grabbed
a ref count on the NI.  Therefore NI will not be removed until
messages are processed.

In order to avoid this scenario, when an NI is shutdown go through
all messages queued on the lazy portal and drop messages for the
NI being shutdown
Signed-off-by: default avatarAmir Shehata <amir.shehata@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-6040
Reviewed-on: http://review.whamcloud.com/13836Reviewed-by: default avatarIsaac Huang <he.huang@intel.com>
Reviewed-by: default avatarLiang Zhen <liang.zhen@intel.com>
Reviewed-by: default avatarOleg Drokin <oleg.drokin@intel.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 341bc960
...@@ -482,6 +482,7 @@ int lnet_dyn_add_ni(lnet_pid_t requested_pid, char *nets, ...@@ -482,6 +482,7 @@ int lnet_dyn_add_ni(lnet_pid_t requested_pid, char *nets,
__s32 peer_timeout, __s32 peer_cr, __s32 peer_buf_cr, __s32 peer_timeout, __s32 peer_cr, __s32 peer_buf_cr,
__s32 credits); __s32 credits);
int lnet_dyn_del_ni(__u32 net); int lnet_dyn_del_ni(__u32 net);
int lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason);
int lnet_islocalnid(lnet_nid_t nid); int lnet_islocalnid(lnet_nid_t nid);
int lnet_islocalnet(__u32 net); int lnet_islocalnet(__u32 net);
......
...@@ -1196,10 +1196,16 @@ lnet_shutdown_lndnis(void) ...@@ -1196,10 +1196,16 @@ lnet_shutdown_lndnis(void)
static void static void
lnet_shutdown_lndni(struct lnet_ni *ni) lnet_shutdown_lndni(struct lnet_ni *ni)
{ {
int i;
lnet_net_lock(LNET_LOCK_EX); lnet_net_lock(LNET_LOCK_EX);
lnet_ni_unlink_locked(ni); lnet_ni_unlink_locked(ni);
lnet_net_unlock(LNET_LOCK_EX); lnet_net_unlock(LNET_LOCK_EX);
/* clear messages for this NI on the lazy portal */
for (i = 0; i < the_lnet.ln_nportals; i++)
lnet_clear_lazy_portal(ni, i, "Shutting down NI");
/* Do peer table cleanup for this ni */ /* Do peer table cleanup for this ni */
lnet_peer_tables_cleanup(ni); lnet_peer_tables_cleanup(ni);
......
...@@ -902,17 +902,8 @@ LNetSetLazyPortal(int portal) ...@@ -902,17 +902,8 @@ LNetSetLazyPortal(int portal)
} }
EXPORT_SYMBOL(LNetSetLazyPortal); EXPORT_SYMBOL(LNetSetLazyPortal);
/**
* Turn off the lazy portal attribute. Delayed requests on the portal,
* if any, will be all dropped when this function returns.
*
* \param portal Index of the portal to disable the lazy attribute on.
*
* \retval 0 On success.
* \retval -EINVAL If \a portal is not a valid index.
*/
int int
LNetClearLazyPortal(int portal) lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason)
{ {
struct lnet_portal *ptl; struct lnet_portal *ptl;
LIST_HEAD(zombies); LIST_HEAD(zombies);
...@@ -931,21 +922,48 @@ LNetClearLazyPortal(int portal) ...@@ -931,21 +922,48 @@ LNetClearLazyPortal(int portal)
return 0; return 0;
} }
if (the_lnet.ln_shutdown) if (ni) {
CWARN("Active lazy portal %d on exit\n", portal); struct lnet_msg *msg, *tmp;
else
CDEBUG(D_NET, "clearing portal %d lazy\n", portal);
/* grab all the blocked messages atomically */ /* grab all messages which are on the NI passed in */
list_splice_init(&ptl->ptl_msg_delayed, &zombies); list_for_each_entry_safe(msg, tmp, &ptl->ptl_msg_delayed,
msg_list) {
if (msg->msg_rxpeer->lp_ni == ni)
list_move(&msg->msg_list, &zombies);
}
} else {
if (the_lnet.ln_shutdown)
CWARN("Active lazy portal %d on exit\n", portal);
else
CDEBUG(D_NET, "clearing portal %d lazy\n", portal);
/* grab all the blocked messages atomically */
list_splice_init(&ptl->ptl_msg_delayed, &zombies);
lnet_ptl_unsetopt(ptl, LNET_PTL_LAZY); lnet_ptl_unsetopt(ptl, LNET_PTL_LAZY);
}
lnet_ptl_unlock(ptl); lnet_ptl_unlock(ptl);
lnet_res_unlock(LNET_LOCK_EX); lnet_res_unlock(LNET_LOCK_EX);
lnet_drop_delayed_msg_list(&zombies, "Clearing lazy portal attr"); lnet_drop_delayed_msg_list(&zombies, reason);
return 0; return 0;
} }
/**
* Turn off the lazy portal attribute. Delayed requests on the portal,
* if any, will be all dropped when this function returns.
*
* \param portal Index of the portal to disable the lazy attribute on.
*
* \retval 0 On success.
* \retval -EINVAL If \a portal is not a valid index.
*/
int
LNetClearLazyPortal(int portal)
{
return lnet_clear_lazy_portal(NULL, portal,
"Clearing lazy portal attr");
}
EXPORT_SYMBOL(LNetClearLazyPortal); EXPORT_SYMBOL(LNetClearLazyPortal);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment