Commit f0b60bfa authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'dlm-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm

Pull dlm updates from David Teigland:
 "This set focuses, as usual, on fixes to the comms layer.

  New testing of the dlm with ocfs2 uncovered a number of bugs in the
  TCP connection handling during recovery, starting, and stopping"

* tag 'dlm-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm:
  dlm: remove dlm_send_rcom_lookup_dump
  dlm: recheck kthread_should_stop() before schedule()
  DLM: fix NULL pointer dereference in send_to_sock()
  DLM: fix to reschedule rwork
  DLM: fix to use sk_callback_lock correctly
  DLM: fix overflow dlm_cb_seq
  DLM: fix memory leak in tcp_accept_from_sock()
  DLM: fix conversion deadlock when DLM_LKF_NODLCKWT flag is set
  DLM: use CF_CLOSE flag to stop dlm_send correctly
  DLM: Reanimate CF_WRITE_PENDING flag
  DLM: fix race condition between dlm_recoverd_stop and dlm_recoverd
  DLM: close othercon at send/receive error
  DLM: retry rcom when dlm_wait_function is timed out.
  DLM: fix to use sock_mutex correctly in xxx_accept_from_sock
  DLM: fix race condition between dlm_send and dlm_recv
  DLM: fix double list_del()
  DLM: fix remove save_cb argument from add_sock()
  DLM: Fix saving of NULL callbacks
  DLM: Eliminate CF_WRITE_PENDING flag
  DLM: Eliminate CF_CONNECT_PENDING flag
parents 29309a4e 9250e523
...@@ -181,6 +181,8 @@ void dlm_add_cb(struct dlm_lkb *lkb, uint32_t flags, int mode, int status, ...@@ -181,6 +181,8 @@ void dlm_add_cb(struct dlm_lkb *lkb, uint32_t flags, int mode, int status,
spin_lock(&dlm_cb_seq_spin); spin_lock(&dlm_cb_seq_spin);
new_seq = ++dlm_cb_seq; new_seq = ++dlm_cb_seq;
if (!dlm_cb_seq)
new_seq = ++dlm_cb_seq;
spin_unlock(&dlm_cb_seq_spin); spin_unlock(&dlm_cb_seq_spin);
if (lkb->lkb_flags & DLM_IFL_USER) { if (lkb->lkb_flags & DLM_IFL_USER) {
......
...@@ -1003,7 +1003,6 @@ int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, char *name, int len, ...@@ -1003,7 +1003,6 @@ int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, char *name, int len,
if (r->res_master_nodeid == our_nodeid) { if (r->res_master_nodeid == our_nodeid) {
log_error(ls, "from_master %d our_master", from_nodeid); log_error(ls, "from_master %d our_master", from_nodeid);
dlm_dump_rsb(r); dlm_dump_rsb(r);
dlm_send_rcom_lookup_dump(r, from_nodeid);
goto out_found; goto out_found;
} }
...@@ -2465,14 +2464,12 @@ static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now, ...@@ -2465,14 +2464,12 @@ static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now,
if (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) { if (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) {
lkb->lkb_grmode = DLM_LOCK_NL; lkb->lkb_grmode = DLM_LOCK_NL;
lkb->lkb_sbflags |= DLM_SBF_DEMOTED; lkb->lkb_sbflags |= DLM_SBF_DEMOTED;
} else if (!(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) { } else if (err) {
if (err) *err = -EDEADLK;
*err = -EDEADLK; } else {
else { log_print("can_be_granted deadlock %x now %d",
log_print("can_be_granted deadlock %x now %d", lkb->lkb_id, now);
lkb->lkb_id, now); dlm_dump_rsb(r);
dlm_dump_rsb(r);
}
} }
goto out; goto out;
} }
...@@ -2501,13 +2498,6 @@ static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now, ...@@ -2501,13 +2498,6 @@ static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now,
return rv; return rv;
} }
/* FIXME: I don't think that can_be_granted() can/will demote or find deadlock
for locks pending on the convert list. Once verified (watch for these
log_prints), we should be able to just call _can_be_granted() and not
bother with the demote/deadlk cases here (and there's no easy way to deal
with a deadlk here, we'd have to generate something like grant_lock with
the deadlk error.) */
/* Returns the highest requested mode of all blocked conversions; sets /* Returns the highest requested mode of all blocked conversions; sets
cw if there's a blocked conversion to DLM_LOCK_CW. */ cw if there's a blocked conversion to DLM_LOCK_CW. */
...@@ -2545,9 +2535,22 @@ static int grant_pending_convert(struct dlm_rsb *r, int high, int *cw, ...@@ -2545,9 +2535,22 @@ static int grant_pending_convert(struct dlm_rsb *r, int high, int *cw,
} }
if (deadlk) { if (deadlk) {
log_print("WARN: pending deadlock %x node %d %s", /*
lkb->lkb_id, lkb->lkb_nodeid, r->res_name); * If DLM_LKB_NODLKWT flag is set and conversion
dlm_dump_rsb(r); * deadlock is detected, we request blocking AST and
* down (or cancel) conversion.
*/
if (lkb->lkb_exflags & DLM_LKF_NODLCKWT) {
if (lkb->lkb_highbast < lkb->lkb_rqmode) {
queue_bast(r, lkb, lkb->lkb_rqmode);
lkb->lkb_highbast = lkb->lkb_rqmode;
}
} else {
log_print("WARN: pending deadlock %x node %d %s",
lkb->lkb_id, lkb->lkb_nodeid,
r->res_name);
dlm_dump_rsb(r);
}
continue; continue;
} }
...@@ -3123,7 +3126,7 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) ...@@ -3123,7 +3126,7 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
deadlock, so we leave it on the granted queue and return EDEADLK in deadlock, so we leave it on the granted queue and return EDEADLK in
the ast for the convert. */ the ast for the convert. */
if (deadlk) { if (deadlk && !(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
/* it's left on the granted queue */ /* it's left on the granted queue */
revert_lock(r, lkb); revert_lock(r, lkb);
queue_cast(r, lkb, -EDEADLK); queue_cast(r, lkb, -EDEADLK);
......
This diff is collapsed.
...@@ -155,6 +155,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags) ...@@ -155,6 +155,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags)
goto out; goto out;
} }
retry:
error = create_rcom(ls, nodeid, DLM_RCOM_STATUS, error = create_rcom(ls, nodeid, DLM_RCOM_STATUS,
sizeof(struct rcom_status), &rc, &mh); sizeof(struct rcom_status), &rc, &mh);
if (error) if (error)
...@@ -169,6 +170,8 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags) ...@@ -169,6 +170,8 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags)
error = dlm_wait_function(ls, &rcom_response); error = dlm_wait_function(ls, &rcom_response);
disallow_sync_reply(ls); disallow_sync_reply(ls);
if (error == -ETIMEDOUT)
goto retry;
if (error) if (error)
goto out; goto out;
...@@ -276,6 +279,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len) ...@@ -276,6 +279,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
ls->ls_recover_nodeid = nodeid; ls->ls_recover_nodeid = nodeid;
retry:
error = create_rcom(ls, nodeid, DLM_RCOM_NAMES, last_len, &rc, &mh); error = create_rcom(ls, nodeid, DLM_RCOM_NAMES, last_len, &rc, &mh);
if (error) if (error)
goto out; goto out;
...@@ -288,6 +292,8 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len) ...@@ -288,6 +292,8 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
error = dlm_wait_function(ls, &rcom_response); error = dlm_wait_function(ls, &rcom_response);
disallow_sync_reply(ls); disallow_sync_reply(ls);
if (error == -ETIMEDOUT)
goto retry;
out: out:
return error; return error;
} }
...@@ -332,25 +338,6 @@ int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid) ...@@ -332,25 +338,6 @@ int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid)
return error; return error;
} }
int dlm_send_rcom_lookup_dump(struct dlm_rsb *r, int to_nodeid)
{
struct dlm_rcom *rc;
struct dlm_mhandle *mh;
struct dlm_ls *ls = r->res_ls;
int error;
error = create_rcom(ls, to_nodeid, DLM_RCOM_LOOKUP, r->res_length,
&rc, &mh);
if (error)
goto out;
memcpy(rc->rc_buf, r->res_name, r->res_length);
rc->rc_id = 0xFFFFFFFF;
send_rcom(ls, mh, rc);
out:
return error;
}
static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in) static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in)
{ {
struct dlm_rcom *rc; struct dlm_rcom *rc;
...@@ -362,6 +349,7 @@ static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in) ...@@ -362,6 +349,7 @@ static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in)
if (error) if (error)
return; return;
/* Old code would send this special id to trigger a debug dump. */
if (rc_in->rc_id == 0xFFFFFFFF) { if (rc_in->rc_id == 0xFFFFFFFF) {
log_error(ls, "receive_rcom_lookup dump from %d", nodeid); log_error(ls, "receive_rcom_lookup dump from %d", nodeid);
dlm_dump_rsb_name(ls, rc_in->rc_buf, len); dlm_dump_rsb_name(ls, rc_in->rc_buf, len);
......
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags); int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags);
int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name,int last_len); int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name,int last_len);
int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid); int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid);
int dlm_send_rcom_lookup_dump(struct dlm_rsb *r, int to_nodeid);
int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb); int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid); void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid);
int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in); int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in);
......
...@@ -52,6 +52,10 @@ int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls)) ...@@ -52,6 +52,10 @@ int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls))
dlm_config.ci_recover_timer * HZ); dlm_config.ci_recover_timer * HZ);
if (rv) if (rv)
break; break;
if (test_bit(LSFL_RCOM_WAIT, &ls->ls_flags)) {
log_debug(ls, "dlm_wait_function timed out");
return -ETIMEDOUT;
}
} }
if (dlm_recovery_stopped(ls)) { if (dlm_recovery_stopped(ls)) {
......
...@@ -287,11 +287,23 @@ static int dlm_recoverd(void *arg) ...@@ -287,11 +287,23 @@ static int dlm_recoverd(void *arg)
set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags); set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
wake_up(&ls->ls_recover_lock_wait); wake_up(&ls->ls_recover_lock_wait);
while (!kthread_should_stop()) { while (1) {
/*
* We call kthread_should_stop() after set_current_state().
* This is because it works correctly if kthread_stop() is
* called just before set_current_state().
*/
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
if (kthread_should_stop()) {
set_current_state(TASK_RUNNING);
break;
}
if (!test_bit(LSFL_RECOVER_WORK, &ls->ls_flags) && if (!test_bit(LSFL_RECOVER_WORK, &ls->ls_flags) &&
!test_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) !test_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) {
if (kthread_should_stop())
break;
schedule(); schedule();
}
set_current_state(TASK_RUNNING); set_current_state(TASK_RUNNING);
if (test_and_clear_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) { if (test_and_clear_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment