Commit 475f230c authored by David Teigland's avatar David Teigland

dlm: fix unlock balance warnings

The in_recovery rw_semaphore has always been acquired and
released by different threads by design.  To work around
the "BUG: bad unlock balance detected!" messages, adjust
things so the dlm_recoverd thread always does both down_write
and up_write.
Signed-off-by: default avatarDavid Teigland <teigland@redhat.com>
parent 6ad22916
...@@ -604,6 +604,7 @@ struct dlm_ls { ...@@ -604,6 +604,7 @@ struct dlm_ls {
struct idr ls_recover_idr; struct idr ls_recover_idr;
spinlock_t ls_recover_idr_lock; spinlock_t ls_recover_idr_lock;
wait_queue_head_t ls_wait_general; wait_queue_head_t ls_wait_general;
wait_queue_head_t ls_recover_lock_wait;
struct mutex ls_clear_proc_locks; struct mutex ls_clear_proc_locks;
struct list_head ls_root_list; /* root resources */ struct list_head ls_root_list; /* root resources */
...@@ -616,15 +617,40 @@ struct dlm_ls { ...@@ -616,15 +617,40 @@ struct dlm_ls {
char ls_name[1]; char ls_name[1];
}; };
#define LSFL_WORK 0 /*
#define LSFL_RUNNING 1 * LSFL_RECOVER_STOP - dlm_ls_stop() sets this to tell dlm recovery routines
#define LSFL_RECOVERY_STOP 2 * that they should abort what they're doing so new recovery can be started.
#define LSFL_RCOM_READY 3 *
#define LSFL_RCOM_WAIT 4 * LSFL_RECOVER_DOWN - dlm_ls_stop() sets this to tell dlm_recoverd that it
#define LSFL_UEVENT_WAIT 5 * should do down_write() on the in_recovery rw_semaphore. (doing down_write
#define LSFL_TIMEWARN 6 * within dlm_ls_stop causes complaints about the lock acquired/released
#define LSFL_CB_DELAY 7 * in different contexts.)
#define LSFL_NODIR 8 *
* LSFL_RECOVER_LOCK - dlm_recoverd holds the in_recovery rw_semaphore.
* It sets this after it is done with down_write() on the in_recovery
* rw_semaphore and clears it after it has released the rw_semaphore.
*
* LSFL_RECOVER_WORK - dlm_ls_start() sets this to tell dlm_recoverd that it
* should begin recovery of the lockspace.
*
* LSFL_RUNNING - set when normal locking activity is enabled.
* dlm_ls_stop() clears this to tell dlm locking routines that they should
* quit what they are doing so recovery can run. dlm_recoverd sets
* this after recovery is finished.
*/
#define LSFL_RECOVER_STOP 0
#define LSFL_RECOVER_DOWN 1
#define LSFL_RECOVER_LOCK 2
#define LSFL_RECOVER_WORK 3
#define LSFL_RUNNING 4
#define LSFL_RCOM_READY 5
#define LSFL_RCOM_WAIT 6
#define LSFL_UEVENT_WAIT 7
#define LSFL_TIMEWARN 8
#define LSFL_CB_DELAY 9
#define LSFL_NODIR 10
/* much of this is just saving user space pointers associated with the /* much of this is just saving user space pointers associated with the
lock that we pass back to the user lib with an ast */ lock that we pass back to the user lib with an ast */
...@@ -667,7 +693,7 @@ static inline int dlm_locking_stopped(struct dlm_ls *ls) ...@@ -667,7 +693,7 @@ static inline int dlm_locking_stopped(struct dlm_ls *ls)
static inline int dlm_recovery_stopped(struct dlm_ls *ls) static inline int dlm_recovery_stopped(struct dlm_ls *ls)
{ {
return test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); return test_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
} }
static inline int dlm_no_directory(struct dlm_ls *ls) static inline int dlm_no_directory(struct dlm_ls *ls)
......
...@@ -582,8 +582,6 @@ static int new_lockspace(const char *name, const char *cluster, ...@@ -582,8 +582,6 @@ static int new_lockspace(const char *name, const char *cluster,
INIT_LIST_HEAD(&ls->ls_root_list); INIT_LIST_HEAD(&ls->ls_root_list);
init_rwsem(&ls->ls_root_sem); init_rwsem(&ls->ls_root_sem);
down_write(&ls->ls_in_recovery);
spin_lock(&lslist_lock); spin_lock(&lslist_lock);
ls->ls_create_count = 1; ls->ls_create_count = 1;
list_add(&ls->ls_list, &lslist); list_add(&ls->ls_list, &lslist);
...@@ -597,13 +595,24 @@ static int new_lockspace(const char *name, const char *cluster, ...@@ -597,13 +595,24 @@ static int new_lockspace(const char *name, const char *cluster,
} }
} }
/* needs to find ls in lslist */ init_waitqueue_head(&ls->ls_recover_lock_wait);
/*
* Once started, dlm_recoverd first looks for ls in lslist, then
* initializes ls_in_recovery as locked in "down" mode. We need
* to wait for the wakeup from dlm_recoverd because in_recovery
* has to start out in down mode.
*/
error = dlm_recoverd_start(ls); error = dlm_recoverd_start(ls);
if (error) { if (error) {
log_error(ls, "can't start dlm_recoverd %d", error); log_error(ls, "can't start dlm_recoverd %d", error);
goto out_callback; goto out_callback;
} }
wait_event(ls->ls_recover_lock_wait,
test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags));
ls->ls_kobj.kset = dlm_kset; ls->ls_kobj.kset = dlm_kset;
error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL, error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL,
"%s", ls->ls_name); "%s", ls->ls_name);
......
...@@ -616,13 +616,13 @@ int dlm_ls_stop(struct dlm_ls *ls) ...@@ -616,13 +616,13 @@ int dlm_ls_stop(struct dlm_ls *ls)
down_write(&ls->ls_recv_active); down_write(&ls->ls_recv_active);
/* /*
* Abort any recovery that's in progress (see RECOVERY_STOP, * Abort any recovery that's in progress (see RECOVER_STOP,
* dlm_recovery_stopped()) and tell any other threads running in the * dlm_recovery_stopped()) and tell any other threads running in the
* dlm to quit any processing (see RUNNING, dlm_locking_stopped()). * dlm to quit any processing (see RUNNING, dlm_locking_stopped()).
*/ */
spin_lock(&ls->ls_recover_lock); spin_lock(&ls->ls_recover_lock);
set_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); set_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
new = test_and_clear_bit(LSFL_RUNNING, &ls->ls_flags); new = test_and_clear_bit(LSFL_RUNNING, &ls->ls_flags);
ls->ls_recover_seq++; ls->ls_recover_seq++;
spin_unlock(&ls->ls_recover_lock); spin_unlock(&ls->ls_recover_lock);
...@@ -642,12 +642,16 @@ int dlm_ls_stop(struct dlm_ls *ls) ...@@ -642,12 +642,16 @@ int dlm_ls_stop(struct dlm_ls *ls)
* when recovery is complete. * when recovery is complete.
*/ */
if (new) if (new) {
down_write(&ls->ls_in_recovery); set_bit(LSFL_RECOVER_DOWN, &ls->ls_flags);
wake_up_process(ls->ls_recoverd_task);
wait_event(ls->ls_recover_lock_wait,
test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags));
}
/* /*
* The recoverd suspend/resume makes sure that dlm_recoverd (if * The recoverd suspend/resume makes sure that dlm_recoverd (if
* running) has noticed RECOVERY_STOP above and quit processing the * running) has noticed RECOVER_STOP above and quit processing the
* previous recovery. * previous recovery.
*/ */
...@@ -709,7 +713,8 @@ int dlm_ls_start(struct dlm_ls *ls) ...@@ -709,7 +713,8 @@ int dlm_ls_start(struct dlm_ls *ls)
kfree(rv_old); kfree(rv_old);
} }
dlm_recoverd_kick(ls); set_bit(LSFL_RECOVER_WORK, &ls->ls_flags);
wake_up_process(ls->ls_recoverd_task);
return 0; return 0;
fail: fail:
......
...@@ -581,7 +581,7 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) ...@@ -581,7 +581,7 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
spin_lock(&ls->ls_recover_lock); spin_lock(&ls->ls_recover_lock);
status = ls->ls_recover_status; status = ls->ls_recover_status;
stop = test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); stop = test_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
seq = ls->ls_recover_seq; seq = ls->ls_recover_seq;
spin_unlock(&ls->ls_recover_lock); spin_unlock(&ls->ls_recover_lock);
......
...@@ -41,6 +41,7 @@ static int enable_locking(struct dlm_ls *ls, uint64_t seq) ...@@ -41,6 +41,7 @@ static int enable_locking(struct dlm_ls *ls, uint64_t seq)
set_bit(LSFL_RUNNING, &ls->ls_flags); set_bit(LSFL_RUNNING, &ls->ls_flags);
/* unblocks processes waiting to enter the dlm */ /* unblocks processes waiting to enter the dlm */
up_write(&ls->ls_in_recovery); up_write(&ls->ls_in_recovery);
clear_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
error = 0; error = 0;
} }
spin_unlock(&ls->ls_recover_lock); spin_unlock(&ls->ls_recover_lock);
...@@ -262,7 +263,7 @@ static void do_ls_recovery(struct dlm_ls *ls) ...@@ -262,7 +263,7 @@ static void do_ls_recovery(struct dlm_ls *ls)
rv = ls->ls_recover_args; rv = ls->ls_recover_args;
ls->ls_recover_args = NULL; ls->ls_recover_args = NULL;
if (rv && ls->ls_recover_seq == rv->seq) if (rv && ls->ls_recover_seq == rv->seq)
clear_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); clear_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
spin_unlock(&ls->ls_recover_lock); spin_unlock(&ls->ls_recover_lock);
if (rv) { if (rv) {
...@@ -282,26 +283,34 @@ static int dlm_recoverd(void *arg) ...@@ -282,26 +283,34 @@ static int dlm_recoverd(void *arg)
return -1; return -1;
} }
down_write(&ls->ls_in_recovery);
set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
wake_up(&ls->ls_recover_lock_wait);
while (!kthread_should_stop()) { while (!kthread_should_stop()) {
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
if (!test_bit(LSFL_WORK, &ls->ls_flags)) if (!test_bit(LSFL_RECOVER_WORK, &ls->ls_flags) &&
!test_bit(LSFL_RECOVER_DOWN, &ls->ls_flags))
schedule(); schedule();
set_current_state(TASK_RUNNING); set_current_state(TASK_RUNNING);
if (test_and_clear_bit(LSFL_WORK, &ls->ls_flags)) if (test_and_clear_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) {
down_write(&ls->ls_in_recovery);
set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
wake_up(&ls->ls_recover_lock_wait);
}
if (test_and_clear_bit(LSFL_RECOVER_WORK, &ls->ls_flags))
do_ls_recovery(ls); do_ls_recovery(ls);
} }
if (test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags))
up_write(&ls->ls_in_recovery);
dlm_put_lockspace(ls); dlm_put_lockspace(ls);
return 0; return 0;
} }
void dlm_recoverd_kick(struct dlm_ls *ls)
{
set_bit(LSFL_WORK, &ls->ls_flags);
wake_up_process(ls->ls_recoverd_task);
}
int dlm_recoverd_start(struct dlm_ls *ls) int dlm_recoverd_start(struct dlm_ls *ls)
{ {
struct task_struct *p; struct task_struct *p;
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
#ifndef __RECOVERD_DOT_H__ #ifndef __RECOVERD_DOT_H__
#define __RECOVERD_DOT_H__ #define __RECOVERD_DOT_H__
void dlm_recoverd_kick(struct dlm_ls *ls);
void dlm_recoverd_stop(struct dlm_ls *ls); void dlm_recoverd_stop(struct dlm_ls *ls);
int dlm_recoverd_start(struct dlm_ls *ls); int dlm_recoverd_start(struct dlm_ls *ls);
void dlm_recoverd_suspend(struct dlm_ls *ls); void dlm_recoverd_suspend(struct dlm_ls *ls);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment