Commit eb99adde authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2

* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: (56 commits)
  [PATCH] fs/ocfs2/dlm/: cleanups
  ocfs2: fix compiler warnings in dlm_convert_lock_handler()
  ocfs2: dlm_print_one_mle() needs to be defined
  ocfs2: remove whitespace in dlmunlock.c
  ocfs2: move dlm work to a private work queue
  ocfs2: fix incorrect error returns
  ocfs2: tune down some noisy messages during dlm recovery
  ocfs2: display message before waiting for recovery to complete
  ocfs2: mlog in dlm_convert_lock_handler() should be ML_ERROR
  ocfs2: retry operations when a lock is marked in recovery
  ocfs2: use cond_resched() in dlm_thread()
  ocfs2: use GFP_NOFS in some dlm operations
  ocfs2: wait for recovery when starting lock mastery
  ocfs2: continue recovery when a dead node is encountered
  ocfs2: remove unneccesary spin_unlock() in dlm_remaster_locks()
  ocfs2: dlm_remaster_locks() should never exit without completing
  ocfs2: special case recovery lock in dlmlock_remote()
  ocfs2: pending mastery asserts and migrations should block each other
  ocfs2: temporarily disable automatic lock migration
  ocfs2: do not unconditionally purge the lockres in dlmlock_remote()
  ...
parents f6e6e883 3fb5a989
...@@ -197,12 +197,14 @@ static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, ...@@ -197,12 +197,14 @@ static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
lock->ml.node == dlm->node_num ? "master" : lock->ml.node == dlm->node_num ? "master" :
"remote"); "remote");
memcpy(lksb->lvb, res->lvb, DLM_LVB_LEN); memcpy(lksb->lvb, res->lvb, DLM_LVB_LEN);
} else if (lksb->flags & DLM_LKSB_PUT_LVB) {
mlog(0, "setting lvb from lockres for %s node\n",
lock->ml.node == dlm->node_num ? "master" :
"remote");
memcpy(res->lvb, lksb->lvb, DLM_LVB_LEN);
} }
/* Do nothing for lvb put requests - they should be done in
* place when the lock is downconverted - otherwise we risk
* racing gets and puts which could result in old lvb data
* being propagated. We leave the put flag set and clear it
* here. In the future we might want to clear it at the time
* the put is actually done.
*/
spin_unlock(&res->spinlock); spin_unlock(&res->spinlock);
} }
......
...@@ -37,7 +37,17 @@ ...@@ -37,7 +37,17 @@
#define DLM_THREAD_SHUFFLE_INTERVAL 5 // flush everything every 5 passes #define DLM_THREAD_SHUFFLE_INTERVAL 5 // flush everything every 5 passes
#define DLM_THREAD_MS 200 // flush at least every 200 ms #define DLM_THREAD_MS 200 // flush at least every 200 ms
#define DLM_HASH_BUCKETS (PAGE_SIZE / sizeof(struct hlist_head)) #define DLM_HASH_SIZE_DEFAULT (1 << 14)
#if DLM_HASH_SIZE_DEFAULT < PAGE_SIZE
# define DLM_HASH_PAGES 1
#else
# define DLM_HASH_PAGES (DLM_HASH_SIZE_DEFAULT / PAGE_SIZE)
#endif
#define DLM_BUCKETS_PER_PAGE (PAGE_SIZE / sizeof(struct hlist_head))
#define DLM_HASH_BUCKETS (DLM_HASH_PAGES * DLM_BUCKETS_PER_PAGE)
/* Intended to make it easier for us to switch out hash functions */
#define dlm_lockid_hash(_n, _l) full_name_hash(_n, _l)
enum dlm_ast_type { enum dlm_ast_type {
DLM_AST = 0, DLM_AST = 0,
...@@ -62,6 +72,7 @@ static inline int dlm_is_recovery_lock(const char *lock_name, int name_len) ...@@ -62,6 +72,7 @@ static inline int dlm_is_recovery_lock(const char *lock_name, int name_len)
} }
#define DLM_RECO_STATE_ACTIVE 0x0001 #define DLM_RECO_STATE_ACTIVE 0x0001
#define DLM_RECO_STATE_FINALIZE 0x0002
struct dlm_recovery_ctxt struct dlm_recovery_ctxt
{ {
...@@ -85,7 +96,7 @@ enum dlm_ctxt_state { ...@@ -85,7 +96,7 @@ enum dlm_ctxt_state {
struct dlm_ctxt struct dlm_ctxt
{ {
struct list_head list; struct list_head list;
struct hlist_head *lockres_hash; struct hlist_head **lockres_hash;
struct list_head dirty_list; struct list_head dirty_list;
struct list_head purge_list; struct list_head purge_list;
struct list_head pending_asts; struct list_head pending_asts;
...@@ -120,6 +131,7 @@ struct dlm_ctxt ...@@ -120,6 +131,7 @@ struct dlm_ctxt
struct o2hb_callback_func dlm_hb_down; struct o2hb_callback_func dlm_hb_down;
struct task_struct *dlm_thread_task; struct task_struct *dlm_thread_task;
struct task_struct *dlm_reco_thread_task; struct task_struct *dlm_reco_thread_task;
struct workqueue_struct *dlm_worker;
wait_queue_head_t dlm_thread_wq; wait_queue_head_t dlm_thread_wq;
wait_queue_head_t dlm_reco_thread_wq; wait_queue_head_t dlm_reco_thread_wq;
wait_queue_head_t ast_wq; wait_queue_head_t ast_wq;
...@@ -132,6 +144,11 @@ struct dlm_ctxt ...@@ -132,6 +144,11 @@ struct dlm_ctxt
struct list_head dlm_eviction_callbacks; struct list_head dlm_eviction_callbacks;
}; };
static inline struct hlist_head *dlm_lockres_hash(struct dlm_ctxt *dlm, unsigned i)
{
return dlm->lockres_hash[(i / DLM_BUCKETS_PER_PAGE) % DLM_HASH_PAGES] + (i % DLM_BUCKETS_PER_PAGE);
}
/* these keventd work queue items are for less-frequently /* these keventd work queue items are for less-frequently
* called functions that cannot be directly called from the * called functions that cannot be directly called from the
* net message handlers for some reason, usually because * net message handlers for some reason, usually because
...@@ -216,20 +233,29 @@ struct dlm_lock_resource ...@@ -216,20 +233,29 @@ struct dlm_lock_resource
/* WARNING: Please see the comment in dlm_init_lockres before /* WARNING: Please see the comment in dlm_init_lockres before
* adding fields here. */ * adding fields here. */
struct hlist_node hash_node; struct hlist_node hash_node;
struct qstr lockname;
struct kref refs; struct kref refs;
/* please keep these next 3 in this order /*
* some funcs want to iterate over all lists */ * Please keep granted, converting, and blocked in this order,
* as some funcs want to iterate over all lists.
*
* All four lists are protected by the hash's reference.
*/
struct list_head granted; struct list_head granted;
struct list_head converting; struct list_head converting;
struct list_head blocked; struct list_head blocked;
struct list_head purge;
/*
* These two lists require you to hold an additional reference
* while they are on the list.
*/
struct list_head dirty; struct list_head dirty;
struct list_head recovering; // dlm_recovery_ctxt.resources list struct list_head recovering; // dlm_recovery_ctxt.resources list
/* unused lock resources have their last_used stamped and are /* unused lock resources have their last_used stamped and are
* put on a list for the dlm thread to run. */ * put on a list for the dlm thread to run. */
struct list_head purge;
unsigned long last_used; unsigned long last_used;
unsigned migration_pending:1; unsigned migration_pending:1;
...@@ -238,7 +264,6 @@ struct dlm_lock_resource ...@@ -238,7 +264,6 @@ struct dlm_lock_resource
wait_queue_head_t wq; wait_queue_head_t wq;
u8 owner; //node which owns the lock resource, or unknown u8 owner; //node which owns the lock resource, or unknown
u16 state; u16 state;
struct qstr lockname;
char lvb[DLM_LVB_LEN]; char lvb[DLM_LVB_LEN];
}; };
...@@ -300,6 +325,15 @@ enum dlm_lockres_list { ...@@ -300,6 +325,15 @@ enum dlm_lockres_list {
DLM_BLOCKED_LIST DLM_BLOCKED_LIST
}; };
static inline int dlm_lvb_is_empty(char *lvb)
{
int i;
for (i=0; i<DLM_LVB_LEN; i++)
if (lvb[i])
return 0;
return 1;
}
static inline struct list_head * static inline struct list_head *
dlm_list_idx_to_ptr(struct dlm_lock_resource *res, enum dlm_lockres_list idx) dlm_list_idx_to_ptr(struct dlm_lock_resource *res, enum dlm_lockres_list idx)
{ {
...@@ -609,7 +643,8 @@ struct dlm_finalize_reco ...@@ -609,7 +643,8 @@ struct dlm_finalize_reco
{ {
u8 node_idx; u8 node_idx;
u8 dead_node; u8 dead_node;
__be16 pad1; u8 flags;
u8 pad1;
__be32 pad2; __be32 pad2;
}; };
...@@ -676,6 +711,7 @@ void dlm_wait_for_recovery(struct dlm_ctxt *dlm); ...@@ -676,6 +711,7 @@ void dlm_wait_for_recovery(struct dlm_ctxt *dlm);
void dlm_kick_recovery_thread(struct dlm_ctxt *dlm); void dlm_kick_recovery_thread(struct dlm_ctxt *dlm);
int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node); int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node);
int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout); int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout);
int dlm_wait_for_node_recovery(struct dlm_ctxt *dlm, u8 node, int timeout);
void dlm_put(struct dlm_ctxt *dlm); void dlm_put(struct dlm_ctxt *dlm);
struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm); struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm);
...@@ -687,14 +723,20 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm, ...@@ -687,14 +723,20 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res); struct dlm_lock_resource *res);
void dlm_purge_lockres(struct dlm_ctxt *dlm, void dlm_purge_lockres(struct dlm_ctxt *dlm,
struct dlm_lock_resource *lockres); struct dlm_lock_resource *lockres);
void dlm_lockres_get(struct dlm_lock_resource *res); static inline void dlm_lockres_get(struct dlm_lock_resource *res)
{
/* This is called on every lookup, so it might be worth
* inlining. */
kref_get(&res->refs);
}
void dlm_lockres_put(struct dlm_lock_resource *res); void dlm_lockres_put(struct dlm_lock_resource *res);
void __dlm_unhash_lockres(struct dlm_lock_resource *res); void __dlm_unhash_lockres(struct dlm_lock_resource *res);
void __dlm_insert_lockres(struct dlm_ctxt *dlm, void __dlm_insert_lockres(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res); struct dlm_lock_resource *res);
struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm,
const char *name, const char *name,
unsigned int len); unsigned int len,
unsigned int hash);
struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm,
const char *name, const char *name,
unsigned int len); unsigned int len);
...@@ -819,6 +861,7 @@ void dlm_clean_master_list(struct dlm_ctxt *dlm, ...@@ -819,6 +861,7 @@ void dlm_clean_master_list(struct dlm_ctxt *dlm,
u8 dead_node); u8 dead_node);
int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock); int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock);
int __dlm_lockres_unused(struct dlm_lock_resource *res);
static inline const char * dlm_lock_mode_name(int mode) static inline const char * dlm_lock_mode_name(int mode)
{ {
......
...@@ -214,6 +214,9 @@ static enum dlm_status __dlmconvert_master(struct dlm_ctxt *dlm, ...@@ -214,6 +214,9 @@ static enum dlm_status __dlmconvert_master(struct dlm_ctxt *dlm,
if (lock->ml.node == dlm->node_num) if (lock->ml.node == dlm->node_num)
mlog(0, "doing in-place convert for nonlocal lock\n"); mlog(0, "doing in-place convert for nonlocal lock\n");
lock->ml.type = type; lock->ml.type = type;
if (lock->lksb->flags & DLM_LKSB_PUT_LVB)
memcpy(res->lvb, lock->lksb->lvb, DLM_LVB_LEN);
status = DLM_NORMAL; status = DLM_NORMAL;
*call_ast = 1; *call_ast = 1;
goto unlock_exit; goto unlock_exit;
...@@ -461,6 +464,12 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data) ...@@ -461,6 +464,12 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data)
} }
spin_lock(&res->spinlock); spin_lock(&res->spinlock);
status = __dlm_lockres_state_to_status(res);
if (status != DLM_NORMAL) {
spin_unlock(&res->spinlock);
dlm_error(status);
goto leave;
}
list_for_each(iter, &res->granted) { list_for_each(iter, &res->granted) {
lock = list_entry(iter, struct dlm_lock, list); lock = list_entry(iter, struct dlm_lock, list);
if (lock->ml.cookie == cnv->cookie && if (lock->ml.cookie == cnv->cookie &&
...@@ -470,6 +479,21 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data) ...@@ -470,6 +479,21 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data)
} }
lock = NULL; lock = NULL;
} }
if (!lock) {
__dlm_print_one_lock_resource(res);
list_for_each(iter, &res->granted) {
lock = list_entry(iter, struct dlm_lock, list);
if (lock->ml.node == cnv->node_idx) {
mlog(ML_ERROR, "There is something here "
"for node %u, lock->ml.cookie=%llu, "
"cnv->cookie=%llu\n", cnv->node_idx,
(unsigned long long)lock->ml.cookie,
(unsigned long long)cnv->cookie);
break;
}
}
lock = NULL;
}
spin_unlock(&res->spinlock); spin_unlock(&res->spinlock);
if (!lock) { if (!lock) {
status = DLM_IVLOCKID; status = DLM_IVLOCKID;
......
...@@ -37,10 +37,8 @@ ...@@ -37,10 +37,8 @@
#include "dlmapi.h" #include "dlmapi.h"
#include "dlmcommon.h" #include "dlmcommon.h"
#include "dlmdebug.h"
#include "dlmdomain.h" #include "dlmdomain.h"
#include "dlmdebug.h"
#define MLOG_MASK_PREFIX ML_DLM #define MLOG_MASK_PREFIX ML_DLM
#include "cluster/masklog.h" #include "cluster/masklog.h"
...@@ -120,6 +118,7 @@ void dlm_print_one_lock(struct dlm_lock *lockid) ...@@ -120,6 +118,7 @@ void dlm_print_one_lock(struct dlm_lock *lockid)
} }
EXPORT_SYMBOL_GPL(dlm_print_one_lock); EXPORT_SYMBOL_GPL(dlm_print_one_lock);
#if 0
void dlm_dump_lock_resources(struct dlm_ctxt *dlm) void dlm_dump_lock_resources(struct dlm_ctxt *dlm)
{ {
struct dlm_lock_resource *res; struct dlm_lock_resource *res;
...@@ -136,12 +135,13 @@ void dlm_dump_lock_resources(struct dlm_ctxt *dlm) ...@@ -136,12 +135,13 @@ void dlm_dump_lock_resources(struct dlm_ctxt *dlm)
spin_lock(&dlm->spinlock); spin_lock(&dlm->spinlock);
for (i=0; i<DLM_HASH_BUCKETS; i++) { for (i=0; i<DLM_HASH_BUCKETS; i++) {
bucket = &(dlm->lockres_hash[i]); bucket = dlm_lockres_hash(dlm, i);
hlist_for_each_entry(res, iter, bucket, hash_node) hlist_for_each_entry(res, iter, bucket, hash_node)
dlm_print_one_lock_resource(res); dlm_print_one_lock_resource(res);
} }
spin_unlock(&dlm->spinlock); spin_unlock(&dlm->spinlock);
} }
#endif /* 0 */
static const char *dlm_errnames[] = { static const char *dlm_errnames[] = {
[DLM_NORMAL] = "DLM_NORMAL", [DLM_NORMAL] = "DLM_NORMAL",
......
/* -*- mode: c; c-basic-offset: 8; -*-
* vim: noexpandtab sw=8 ts=8 sts=0:
*
* dlmdebug.h
*
* Copyright (C) 2004 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*
*/
#ifndef DLMDEBUG_H
#define DLMDEBUG_H
void dlm_dump_lock_resources(struct dlm_ctxt *dlm);
#endif
...@@ -41,7 +41,6 @@ ...@@ -41,7 +41,6 @@
#include "dlmapi.h" #include "dlmapi.h"
#include "dlmcommon.h" #include "dlmcommon.h"
#include "dlmdebug.h"
#include "dlmdomain.h" #include "dlmdomain.h"
#include "dlmver.h" #include "dlmver.h"
...@@ -49,6 +48,33 @@ ...@@ -49,6 +48,33 @@
#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN) #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN)
#include "cluster/masklog.h" #include "cluster/masklog.h"
static void dlm_free_pagevec(void **vec, int pages)
{
while (pages--)
free_page((unsigned long)vec[pages]);
kfree(vec);
}
static void **dlm_alloc_pagevec(int pages)
{
void **vec = kmalloc(pages * sizeof(void *), GFP_KERNEL);
int i;
if (!vec)
return NULL;
for (i = 0; i < pages; i++)
if (!(vec[i] = (void *)__get_free_page(GFP_KERNEL)))
goto out_free;
mlog(0, "Allocated DLM hash pagevec; %d pages (%lu expected), %lu buckets per page\n",
pages, DLM_HASH_PAGES, (unsigned long)DLM_BUCKETS_PER_PAGE);
return vec;
out_free:
dlm_free_pagevec(vec, i);
return NULL;
}
/* /*
* *
* spinlock lock ordering: if multiple locks are needed, obey this ordering: * spinlock lock ordering: if multiple locks are needed, obey this ordering:
...@@ -90,8 +116,7 @@ void __dlm_insert_lockres(struct dlm_ctxt *dlm, ...@@ -90,8 +116,7 @@ void __dlm_insert_lockres(struct dlm_ctxt *dlm,
assert_spin_locked(&dlm->spinlock); assert_spin_locked(&dlm->spinlock);
q = &res->lockname; q = &res->lockname;
q->hash = full_name_hash(q->name, q->len); bucket = dlm_lockres_hash(dlm, q->hash);
bucket = &(dlm->lockres_hash[q->hash % DLM_HASH_BUCKETS]);
/* get a reference for our hashtable */ /* get a reference for our hashtable */
dlm_lockres_get(res); dlm_lockres_get(res);
...@@ -101,33 +126,31 @@ void __dlm_insert_lockres(struct dlm_ctxt *dlm, ...@@ -101,33 +126,31 @@ void __dlm_insert_lockres(struct dlm_ctxt *dlm,
struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm,
const char *name, const char *name,
unsigned int len) unsigned int len,
unsigned int hash)
{ {
unsigned int hash;
struct hlist_node *iter;
struct dlm_lock_resource *tmpres=NULL;
struct hlist_head *bucket; struct hlist_head *bucket;
struct hlist_node *list;
mlog_entry("%.*s\n", len, name); mlog_entry("%.*s\n", len, name);
assert_spin_locked(&dlm->spinlock); assert_spin_locked(&dlm->spinlock);
hash = full_name_hash(name, len); bucket = dlm_lockres_hash(dlm, hash);
bucket = &(dlm->lockres_hash[hash % DLM_HASH_BUCKETS]);
/* check for pre-existing lock */
hlist_for_each(iter, bucket) {
tmpres = hlist_entry(iter, struct dlm_lock_resource, hash_node);
if (tmpres->lockname.len == len &&
memcmp(tmpres->lockname.name, name, len) == 0) {
dlm_lockres_get(tmpres);
break;
}
tmpres = NULL; hlist_for_each(list, bucket) {
struct dlm_lock_resource *res = hlist_entry(list,
struct dlm_lock_resource, hash_node);
if (res->lockname.name[0] != name[0])
continue;
if (unlikely(res->lockname.len != len))
continue;
if (memcmp(res->lockname.name + 1, name + 1, len - 1))
continue;
dlm_lockres_get(res);
return res;
} }
return tmpres; return NULL;
} }
struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm,
...@@ -135,9 +158,10 @@ struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm, ...@@ -135,9 +158,10 @@ struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm,
unsigned int len) unsigned int len)
{ {
struct dlm_lock_resource *res; struct dlm_lock_resource *res;
unsigned int hash = dlm_lockid_hash(name, len);
spin_lock(&dlm->spinlock); spin_lock(&dlm->spinlock);
res = __dlm_lookup_lockres(dlm, name, len); res = __dlm_lookup_lockres(dlm, name, len, hash);
spin_unlock(&dlm->spinlock); spin_unlock(&dlm->spinlock);
return res; return res;
} }
...@@ -194,7 +218,7 @@ static int dlm_wait_on_domain_helper(const char *domain) ...@@ -194,7 +218,7 @@ static int dlm_wait_on_domain_helper(const char *domain)
static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm) static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm)
{ {
if (dlm->lockres_hash) if (dlm->lockres_hash)
free_page((unsigned long) dlm->lockres_hash); dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES);
if (dlm->name) if (dlm->name)
kfree(dlm->name); kfree(dlm->name);
...@@ -278,11 +302,21 @@ int dlm_domain_fully_joined(struct dlm_ctxt *dlm) ...@@ -278,11 +302,21 @@ int dlm_domain_fully_joined(struct dlm_ctxt *dlm)
return ret; return ret;
} }
static void dlm_destroy_dlm_worker(struct dlm_ctxt *dlm)
{
if (dlm->dlm_worker) {
flush_workqueue(dlm->dlm_worker);
destroy_workqueue(dlm->dlm_worker);
dlm->dlm_worker = NULL;
}
}
static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm) static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm)
{ {
dlm_unregister_domain_handlers(dlm); dlm_unregister_domain_handlers(dlm);
dlm_complete_thread(dlm); dlm_complete_thread(dlm);
dlm_complete_recovery_thread(dlm); dlm_complete_recovery_thread(dlm);
dlm_destroy_dlm_worker(dlm);
/* We've left the domain. Now we can take ourselves out of the /* We've left the domain. Now we can take ourselves out of the
* list and allow the kref stuff to help us free the * list and allow the kref stuff to help us free the
...@@ -304,8 +338,8 @@ static void dlm_migrate_all_locks(struct dlm_ctxt *dlm) ...@@ -304,8 +338,8 @@ static void dlm_migrate_all_locks(struct dlm_ctxt *dlm)
restart: restart:
spin_lock(&dlm->spinlock); spin_lock(&dlm->spinlock);
for (i = 0; i < DLM_HASH_BUCKETS; i++) { for (i = 0; i < DLM_HASH_BUCKETS; i++) {
while (!hlist_empty(&dlm->lockres_hash[i])) { while (!hlist_empty(dlm_lockres_hash(dlm, i))) {
res = hlist_entry(dlm->lockres_hash[i].first, res = hlist_entry(dlm_lockres_hash(dlm, i)->first,
struct dlm_lock_resource, hash_node); struct dlm_lock_resource, hash_node);
/* need reference when manually grabbing lockres */ /* need reference when manually grabbing lockres */
dlm_lockres_get(res); dlm_lockres_get(res);
...@@ -1126,6 +1160,13 @@ static int dlm_join_domain(struct dlm_ctxt *dlm) ...@@ -1126,6 +1160,13 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
goto bail; goto bail;
} }
dlm->dlm_worker = create_singlethread_workqueue("dlm_wq");
if (!dlm->dlm_worker) {
status = -ENOMEM;
mlog_errno(status);
goto bail;
}
do { do {
unsigned int backoff; unsigned int backoff;
status = dlm_try_to_join_domain(dlm); status = dlm_try_to_join_domain(dlm);
...@@ -1166,6 +1207,7 @@ static int dlm_join_domain(struct dlm_ctxt *dlm) ...@@ -1166,6 +1207,7 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
dlm_unregister_domain_handlers(dlm); dlm_unregister_domain_handlers(dlm);
dlm_complete_thread(dlm); dlm_complete_thread(dlm);
dlm_complete_recovery_thread(dlm); dlm_complete_recovery_thread(dlm);
dlm_destroy_dlm_worker(dlm);
} }
return status; return status;
...@@ -1191,7 +1233,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, ...@@ -1191,7 +1233,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
goto leave; goto leave;
} }
dlm->lockres_hash = (struct hlist_head *) __get_free_page(GFP_KERNEL); dlm->lockres_hash = (struct hlist_head **)dlm_alloc_pagevec(DLM_HASH_PAGES);
if (!dlm->lockres_hash) { if (!dlm->lockres_hash) {
mlog_errno(-ENOMEM); mlog_errno(-ENOMEM);
kfree(dlm->name); kfree(dlm->name);
...@@ -1200,8 +1242,8 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, ...@@ -1200,8 +1242,8 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
goto leave; goto leave;
} }
for (i=0; i<DLM_HASH_BUCKETS; i++) for (i = 0; i < DLM_HASH_BUCKETS; i++)
INIT_HLIST_HEAD(&dlm->lockres_hash[i]); INIT_HLIST_HEAD(dlm_lockres_hash(dlm, i));
strcpy(dlm->name, domain); strcpy(dlm->name, domain);
dlm->key = key; dlm->key = key;
...@@ -1231,6 +1273,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, ...@@ -1231,6 +1273,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
dlm->dlm_thread_task = NULL; dlm->dlm_thread_task = NULL;
dlm->dlm_reco_thread_task = NULL; dlm->dlm_reco_thread_task = NULL;
dlm->dlm_worker = NULL;
init_waitqueue_head(&dlm->dlm_thread_wq); init_waitqueue_head(&dlm->dlm_thread_wq);
init_waitqueue_head(&dlm->dlm_reco_thread_wq); init_waitqueue_head(&dlm->dlm_reco_thread_wq);
init_waitqueue_head(&dlm->reco.event); init_waitqueue_head(&dlm->reco.event);
......
...@@ -116,7 +116,7 @@ static int dlmfs_file_open(struct inode *inode, ...@@ -116,7 +116,7 @@ static int dlmfs_file_open(struct inode *inode,
* doesn't make sense for LVB writes. */ * doesn't make sense for LVB writes. */
file->f_flags &= ~O_APPEND; file->f_flags &= ~O_APPEND;
fp = kmalloc(sizeof(*fp), GFP_KERNEL); fp = kmalloc(sizeof(*fp), GFP_NOFS);
if (!fp) { if (!fp) {
status = -ENOMEM; status = -ENOMEM;
goto bail; goto bail;
...@@ -196,7 +196,7 @@ static ssize_t dlmfs_file_read(struct file *filp, ...@@ -196,7 +196,7 @@ static ssize_t dlmfs_file_read(struct file *filp,
else else
readlen = count - *ppos; readlen = count - *ppos;
lvb_buf = kmalloc(readlen, GFP_KERNEL); lvb_buf = kmalloc(readlen, GFP_NOFS);
if (!lvb_buf) if (!lvb_buf)
return -ENOMEM; return -ENOMEM;
...@@ -240,7 +240,7 @@ static ssize_t dlmfs_file_write(struct file *filp, ...@@ -240,7 +240,7 @@ static ssize_t dlmfs_file_write(struct file *filp,
else else
writelen = count - *ppos; writelen = count - *ppos;
lvb_buf = kmalloc(writelen, GFP_KERNEL); lvb_buf = kmalloc(writelen, GFP_NOFS);
if (!lvb_buf) if (!lvb_buf)
return -ENOMEM; return -ENOMEM;
......
...@@ -201,6 +201,7 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm, ...@@ -201,6 +201,7 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
struct dlm_lock *lock, int flags) struct dlm_lock *lock, int flags)
{ {
enum dlm_status status = DLM_DENIED; enum dlm_status status = DLM_DENIED;
int lockres_changed = 1;
mlog_entry("type=%d\n", lock->ml.type); mlog_entry("type=%d\n", lock->ml.type);
mlog(0, "lockres %.*s, flags = 0x%x\n", res->lockname.len, mlog(0, "lockres %.*s, flags = 0x%x\n", res->lockname.len,
...@@ -226,8 +227,25 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm, ...@@ -226,8 +227,25 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
res->state &= ~DLM_LOCK_RES_IN_PROGRESS; res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
lock->lock_pending = 0; lock->lock_pending = 0;
if (status != DLM_NORMAL) { if (status != DLM_NORMAL) {
if (status != DLM_NOTQUEUED) if (status == DLM_RECOVERING &&
dlm_is_recovery_lock(res->lockname.name,
res->lockname.len)) {
/* recovery lock was mastered by dead node.
* we need to have calc_usage shoot down this
* lockres and completely remaster it. */
mlog(0, "%s: recovery lock was owned by "
"dead node %u, remaster it now.\n",
dlm->name, res->owner);
} else if (status != DLM_NOTQUEUED) {
/*
* DO NOT call calc_usage, as this would unhash
* the remote lockres before we ever get to use
* it. treat as if we never made any change to
* the lockres.
*/
lockres_changed = 0;
dlm_error(status); dlm_error(status);
}
dlm_revert_pending_lock(res, lock); dlm_revert_pending_lock(res, lock);
dlm_lock_put(lock); dlm_lock_put(lock);
} else if (dlm_is_recovery_lock(res->lockname.name, } else if (dlm_is_recovery_lock(res->lockname.name,
...@@ -243,6 +261,7 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm, ...@@ -243,6 +261,7 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
} }
spin_unlock(&res->spinlock); spin_unlock(&res->spinlock);
if (lockres_changed)
dlm_lockres_calc_usage(dlm, res); dlm_lockres_calc_usage(dlm, res);
wake_up(&res->wq); wake_up(&res->wq);
...@@ -280,6 +299,14 @@ static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm, ...@@ -280,6 +299,14 @@ static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm,
if (tmpret >= 0) { if (tmpret >= 0) {
// successfully sent and received // successfully sent and received
ret = status; // this is already a dlm_status ret = status; // this is already a dlm_status
if (ret == DLM_REJECTED) {
mlog(ML_ERROR, "%s:%.*s: BUG. this is a stale lockres "
"no longer owned by %u. that node is coming back "
"up currently.\n", dlm->name, create.namelen,
create.name, res->owner);
dlm_print_one_lock_resource(res);
BUG();
}
} else { } else {
mlog_errno(tmpret); mlog_errno(tmpret);
if (dlm_is_host_down(tmpret)) { if (dlm_is_host_down(tmpret)) {
...@@ -381,13 +408,13 @@ struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie, ...@@ -381,13 +408,13 @@ struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie,
struct dlm_lock *lock; struct dlm_lock *lock;
int kernel_allocated = 0; int kernel_allocated = 0;
lock = kcalloc(1, sizeof(*lock), GFP_KERNEL); lock = kcalloc(1, sizeof(*lock), GFP_NOFS);
if (!lock) if (!lock)
return NULL; return NULL;
if (!lksb) { if (!lksb) {
/* zero memory only if kernel-allocated */ /* zero memory only if kernel-allocated */
lksb = kcalloc(1, sizeof(*lksb), GFP_KERNEL); lksb = kcalloc(1, sizeof(*lksb), GFP_NOFS);
if (!lksb) { if (!lksb) {
kfree(lock); kfree(lock);
return NULL; return NULL;
...@@ -428,11 +455,16 @@ int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data) ...@@ -428,11 +455,16 @@ int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data)
if (!dlm_grab(dlm)) if (!dlm_grab(dlm))
return DLM_REJECTED; return DLM_REJECTED;
mlog_bug_on_msg(!dlm_domain_fully_joined(dlm),
"Domain %s not fully joined!\n", dlm->name);
name = create->name; name = create->name;
namelen = create->namelen; namelen = create->namelen;
status = DLM_REJECTED;
if (!dlm_domain_fully_joined(dlm)) {
mlog(ML_ERROR, "Domain %s not fully joined, but node %u is "
"sending a create_lock message for lock %.*s!\n",
dlm->name, create->node_idx, namelen, name);
dlm_error(status);
goto leave;
}
status = DLM_IVBUFLEN; status = DLM_IVBUFLEN;
if (namelen > DLM_LOCKID_NAME_MAX) { if (namelen > DLM_LOCKID_NAME_MAX) {
...@@ -668,19 +700,23 @@ enum dlm_status dlmlock(struct dlm_ctxt *dlm, int mode, ...@@ -668,19 +700,23 @@ enum dlm_status dlmlock(struct dlm_ctxt *dlm, int mode,
msleep(100); msleep(100);
/* no waiting for dlm_reco_thread */ /* no waiting for dlm_reco_thread */
if (recovery) { if (recovery) {
if (status == DLM_RECOVERING) { if (status != DLM_RECOVERING)
goto retry_lock;
mlog(0, "%s: got RECOVERING " mlog(0, "%s: got RECOVERING "
"for $REOCVERY lock, master " "for $RECOVERY lock, master "
"was %u\n", dlm->name, "was %u\n", dlm->name,
res->owner); res->owner);
/* wait to see the node go down, then
* drop down and allow the lockres to
* get cleaned up. need to remaster. */
dlm_wait_for_node_death(dlm, res->owner, dlm_wait_for_node_death(dlm, res->owner,
DLM_NODE_DEATH_WAIT_MAX); DLM_NODE_DEATH_WAIT_MAX);
}
} else { } else {
dlm_wait_for_recovery(dlm); dlm_wait_for_recovery(dlm);
}
goto retry_lock; goto retry_lock;
} }
}
if (status != DLM_NORMAL) { if (status != DLM_NORMAL) {
lock->lksb->flags &= ~DLM_LKSB_GET_LVB; lock->lksb->flags &= ~DLM_LKSB_GET_LVB;
......
This diff is collapsed.
This diff is collapsed.
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include <linux/inet.h> #include <linux/inet.h>
#include <linux/timer.h> #include <linux/timer.h>
#include <linux/kthread.h> #include <linux/kthread.h>
#include <linux/delay.h>
#include "cluster/heartbeat.h" #include "cluster/heartbeat.h"
...@@ -53,6 +54,8 @@ ...@@ -53,6 +54,8 @@
#include "cluster/masklog.h" #include "cluster/masklog.h"
static int dlm_thread(void *data); static int dlm_thread(void *data);
static void dlm_purge_lockres_now(struct dlm_ctxt *dlm,
struct dlm_lock_resource *lockres);
static void dlm_flush_asts(struct dlm_ctxt *dlm); static void dlm_flush_asts(struct dlm_ctxt *dlm);
...@@ -80,7 +83,7 @@ void __dlm_wait_on_lockres_flags(struct dlm_lock_resource *res, int flags) ...@@ -80,7 +83,7 @@ void __dlm_wait_on_lockres_flags(struct dlm_lock_resource *res, int flags)
} }
static int __dlm_lockres_unused(struct dlm_lock_resource *res) int __dlm_lockres_unused(struct dlm_lock_resource *res)
{ {
if (list_empty(&res->granted) && if (list_empty(&res->granted) &&
list_empty(&res->converting) && list_empty(&res->converting) &&
...@@ -103,6 +106,20 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm, ...@@ -103,6 +106,20 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
assert_spin_locked(&res->spinlock); assert_spin_locked(&res->spinlock);
if (__dlm_lockres_unused(res)){ if (__dlm_lockres_unused(res)){
/* For now, just keep any resource we master */
if (res->owner == dlm->node_num)
{
if (!list_empty(&res->purge)) {
mlog(0, "we master %s:%.*s, but it is on "
"the purge list. Removing\n",
dlm->name, res->lockname.len,
res->lockname.name);
list_del_init(&res->purge);
dlm->purge_count--;
}
return;
}
if (list_empty(&res->purge)) { if (list_empty(&res->purge)) {
mlog(0, "putting lockres %.*s from purge list\n", mlog(0, "putting lockres %.*s from purge list\n",
res->lockname.len, res->lockname.name); res->lockname.len, res->lockname.name);
...@@ -110,10 +127,23 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm, ...@@ -110,10 +127,23 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
res->last_used = jiffies; res->last_used = jiffies;
list_add_tail(&res->purge, &dlm->purge_list); list_add_tail(&res->purge, &dlm->purge_list);
dlm->purge_count++; dlm->purge_count++;
/* if this node is not the owner, there is
* no way to keep track of who the owner could be.
* unhash it to avoid serious problems. */
if (res->owner != dlm->node_num) {
mlog(0, "%s:%.*s: doing immediate "
"purge of lockres owned by %u\n",
dlm->name, res->lockname.len,
res->lockname.name, res->owner);
dlm_purge_lockres_now(dlm, res);
}
} }
} else if (!list_empty(&res->purge)) { } else if (!list_empty(&res->purge)) {
mlog(0, "removing lockres %.*s from purge list\n", mlog(0, "removing lockres %.*s from purge list, "
res->lockname.len, res->lockname.name); "owner=%u\n", res->lockname.len, res->lockname.name,
res->owner);
list_del_init(&res->purge); list_del_init(&res->purge);
dlm->purge_count--; dlm->purge_count--;
...@@ -165,6 +195,7 @@ void dlm_purge_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *lockres) ...@@ -165,6 +195,7 @@ void dlm_purge_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *lockres)
} else if (ret < 0) { } else if (ret < 0) {
mlog(ML_NOTICE, "lockres %.*s: migrate failed, retrying\n", mlog(ML_NOTICE, "lockres %.*s: migrate failed, retrying\n",
lockres->lockname.len, lockres->lockname.name); lockres->lockname.len, lockres->lockname.name);
msleep(100);
goto again; goto again;
} }
...@@ -178,6 +209,24 @@ void dlm_purge_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *lockres) ...@@ -178,6 +209,24 @@ void dlm_purge_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *lockres)
__dlm_unhash_lockres(lockres); __dlm_unhash_lockres(lockres);
} }
/* make an unused lockres go away immediately.
* as soon as the dlm spinlock is dropped, this lockres
* will not be found. kfree still happens on last put. */
static void dlm_purge_lockres_now(struct dlm_ctxt *dlm,
struct dlm_lock_resource *lockres)
{
assert_spin_locked(&dlm->spinlock);
assert_spin_locked(&lockres->spinlock);
BUG_ON(!__dlm_lockres_unused(lockres));
if (!list_empty(&lockres->purge)) {
list_del_init(&lockres->purge);
dlm->purge_count--;
}
__dlm_unhash_lockres(lockres);
}
static void dlm_run_purge_list(struct dlm_ctxt *dlm, static void dlm_run_purge_list(struct dlm_ctxt *dlm,
int purge_now) int purge_now)
{ {
...@@ -420,6 +469,8 @@ void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) ...@@ -420,6 +469,8 @@ void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
/* don't shuffle secondary queues */ /* don't shuffle secondary queues */
if ((res->owner == dlm->node_num) && if ((res->owner == dlm->node_num) &&
!(res->state & DLM_LOCK_RES_DIRTY)) { !(res->state & DLM_LOCK_RES_DIRTY)) {
/* ref for dirty_list */
dlm_lockres_get(res);
list_add_tail(&res->dirty, &dlm->dirty_list); list_add_tail(&res->dirty, &dlm->dirty_list);
res->state |= DLM_LOCK_RES_DIRTY; res->state |= DLM_LOCK_RES_DIRTY;
} }
...@@ -604,6 +655,8 @@ static int dlm_thread(void *data) ...@@ -604,6 +655,8 @@ static int dlm_thread(void *data)
list_del_init(&res->dirty); list_del_init(&res->dirty);
spin_unlock(&res->spinlock); spin_unlock(&res->spinlock);
spin_unlock(&dlm->spinlock); spin_unlock(&dlm->spinlock);
/* Drop dirty_list ref */
dlm_lockres_put(res);
/* lockres can be re-dirtied/re-added to the /* lockres can be re-dirtied/re-added to the
* dirty_list in this gap, but that is ok */ * dirty_list in this gap, but that is ok */
...@@ -640,8 +693,9 @@ static int dlm_thread(void *data) ...@@ -640,8 +693,9 @@ static int dlm_thread(void *data)
* spinlock and do NOT have the dlm lock. * spinlock and do NOT have the dlm lock.
* safe to reserve/queue asts and run the lists. */ * safe to reserve/queue asts and run the lists. */
mlog(0, "calling dlm_shuffle_lists with dlm=%p, " mlog(0, "calling dlm_shuffle_lists with dlm=%s, "
"res=%p\n", dlm, res); "res=%.*s\n", dlm->name,
res->lockname.len, res->lockname.name);
/* called while holding lockres lock */ /* called while holding lockres lock */
dlm_shuffle_lists(dlm, res); dlm_shuffle_lists(dlm, res);
...@@ -655,6 +709,8 @@ static int dlm_thread(void *data) ...@@ -655,6 +709,8 @@ static int dlm_thread(void *data)
/* if the lock was in-progress, stick /* if the lock was in-progress, stick
* it on the back of the list */ * it on the back of the list */
if (delay) { if (delay) {
/* ref for dirty_list */
dlm_lockres_get(res);
spin_lock(&res->spinlock); spin_lock(&res->spinlock);
list_add_tail(&res->dirty, &dlm->dirty_list); list_add_tail(&res->dirty, &dlm->dirty_list);
res->state |= DLM_LOCK_RES_DIRTY; res->state |= DLM_LOCK_RES_DIRTY;
...@@ -675,7 +731,7 @@ static int dlm_thread(void *data) ...@@ -675,7 +731,7 @@ static int dlm_thread(void *data)
/* yield and continue right away if there is more work to do */ /* yield and continue right away if there is more work to do */
if (!n) { if (!n) {
yield(); cond_resched();
continue; continue;
} }
......
...@@ -318,6 +318,16 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm, ...@@ -318,6 +318,16 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
mlog_entry("%.*s\n", res->lockname.len, res->lockname.name); mlog_entry("%.*s\n", res->lockname.len, res->lockname.name);
if (owner == dlm->node_num) {
/* ended up trying to contact ourself. this means
* that the lockres had been remote but became local
* via a migration. just retry it, now as local */
mlog(0, "%s:%.*s: this node became the master due to a "
"migration, re-evaluate now\n", dlm->name,
res->lockname.len, res->lockname.name);
return DLM_FORWARD;
}
memset(&unlock, 0, sizeof(unlock)); memset(&unlock, 0, sizeof(unlock));
unlock.node_idx = dlm->node_num; unlock.node_idx = dlm->node_num;
unlock.flags = cpu_to_be32(flags); unlock.flags = cpu_to_be32(flags);
......
...@@ -672,7 +672,7 @@ struct dlm_ctxt *user_dlm_register_context(struct qstr *name) ...@@ -672,7 +672,7 @@ struct dlm_ctxt *user_dlm_register_context(struct qstr *name)
u32 dlm_key; u32 dlm_key;
char *domain; char *domain;
domain = kmalloc(name->len + 1, GFP_KERNEL); domain = kmalloc(name->len + 1, GFP_NOFS);
if (!domain) { if (!domain) {
mlog_errno(-ENOMEM); mlog_errno(-ENOMEM);
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment