Commit d09cfb00 authored by Alexander Boyko's avatar Alexander Boyko Committed by Greg Kroah-Hartman

staging: lustre: ptlrpc: fix race between connect vs resend

Buggy code at ptlrpc_connect_interpret()
finish:
    rc = ptlrpc_import_recovery_state_machine(imp);
    ...
    Set import connection flags
When import has FULL state ptlrpc_import_recovery_state_machine()
wakeup all waiters on import and all delayed request, which was
resented. And it could happened that request was send without
updated flags and AT is disabled. If such request is in progress
on the server, server drop the new instance, and could do early reply
for it. But this early reply confuse client, cause it wait real
reply(no AT for this request). Client try to touch buffer outside
reply and got EPROTO error.
The same bug existed for initital connect too. Import became FULL
before import connection flags was set.
Signed-off-by: default avatarAlexander Boyko <alexander_boyko@xyratex.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-5528
Xyratex-bug-id: MRP-2034
Reviewed-on: http://review.whamcloud.com/11723Reviewed-by: default avatarLi Wei <wei.g.li@intel.com>
Reviewed-by: default avatarAlexander Boyko <alexander.boyko@seagate.com>
Reviewed-by: default avatarLiang Zhen <liang.zhen@intel.com>
Reviewed-by: default avatarOleg Drokin <oleg.drokin@intel.com>
Signed-off-by: default avatarJames Simmons <jsimmons@infradead.org>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 01cd98ff
...@@ -491,7 +491,8 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) ...@@ -491,7 +491,8 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
struct ptlrpc_connection *connection; struct ptlrpc_connection *connection;
lnet_handle_me_t reply_me_h; lnet_handle_me_t reply_me_h;
lnet_md_t reply_md; lnet_md_t reply_md;
struct obd_device *obd = request->rq_import->imp_obd; struct obd_import *imp = request->rq_import;
struct obd_device *obd = imp->imp_obd;
if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_RPC)) if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_RPC))
return 0; return 0;
...@@ -504,7 +505,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) ...@@ -504,7 +505,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
*/ */
LASSERT(!request->rq_receiving_reply); LASSERT(!request->rq_receiving_reply);
LASSERT(!((lustre_msg_get_flags(request->rq_reqmsg) & MSG_REPLAY) && LASSERT(!((lustre_msg_get_flags(request->rq_reqmsg) & MSG_REPLAY) &&
(request->rq_import->imp_state == LUSTRE_IMP_FULL))); (imp->imp_state == LUSTRE_IMP_FULL)));
if (unlikely(obd && obd->obd_fail)) { if (unlikely(obd && obd->obd_fail)) {
CDEBUG(D_HA, "muting rpc for failed imp obd %s\n", CDEBUG(D_HA, "muting rpc for failed imp obd %s\n",
...@@ -517,15 +518,22 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) ...@@ -517,15 +518,22 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
return -ENODEV; return -ENODEV;
} }
connection = request->rq_import->imp_connection; connection = imp->imp_connection;
lustre_msg_set_handle(request->rq_reqmsg, lustre_msg_set_handle(request->rq_reqmsg,
&request->rq_import->imp_remote_handle); &imp->imp_remote_handle);
lustre_msg_set_type(request->rq_reqmsg, PTL_RPC_MSG_REQUEST); lustre_msg_set_type(request->rq_reqmsg, PTL_RPC_MSG_REQUEST);
lustre_msg_set_conn_cnt(request->rq_reqmsg, lustre_msg_set_conn_cnt(request->rq_reqmsg, imp->imp_conn_cnt);
request->rq_import->imp_conn_cnt); lustre_msghdr_set_flags(request->rq_reqmsg, imp->imp_msghdr_flags);
lustre_msghdr_set_flags(request->rq_reqmsg,
request->rq_import->imp_msghdr_flags); /**
* For enabled AT all request should have AT_SUPPORT in the
* FULL import state when OBD_CONNECT_AT is set
*/
LASSERT(AT_OFF || imp->imp_state != LUSTRE_IMP_FULL ||
(imp->imp_msghdr_flags & MSGHDR_AT_SUPPORT) ||
!(imp->imp_connect_data.ocd_connect_flags &
OBD_CONNECT_AT));
if (request->rq_resend) if (request->rq_resend)
lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT); lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT);
...@@ -629,7 +637,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) ...@@ -629,7 +637,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
ptlrpc_request_addref(request); ptlrpc_request_addref(request);
if (obd && obd->obd_svc_stats) if (obd && obd->obd_svc_stats)
lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQACTIVE_CNTR, lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQACTIVE_CNTR,
atomic_read(&request->rq_import->imp_inflight)); atomic_read(&imp->imp_inflight));
OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND, request->rq_timeout + 5); OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND, request->rq_timeout + 5);
...@@ -641,7 +649,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) ...@@ -641,7 +649,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
request->rq_deadline = request->rq_sent + request->rq_timeout + request->rq_deadline = request->rq_sent + request->rq_timeout +
ptlrpc_at_get_net_latency(request); ptlrpc_at_get_net_latency(request);
ptlrpc_pinger_sending_on_import(request->rq_import); ptlrpc_pinger_sending_on_import(imp);
DEBUG_REQ(D_INFO, request, "send flg=%x", DEBUG_REQ(D_INFO, request, "send flg=%x",
lustre_msg_get_flags(request->rq_reqmsg)); lustre_msg_get_flags(request->rq_reqmsg));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment