Commit dc732906 authored by Bob Peterson's avatar Bob Peterson Committed by Andreas Gruenbacher

gfs2: Introduce flag for glock holder auto-demotion

This patch introduces a new HIF_MAY_DEMOTE flag and infrastructure that
will allow glocks to be demoted automatically on locking conflicts.
When a locking request comes in that isn't compatible with the locking
state of an active holder and that holder has the HIF_MAY_DEMOTE flag
set, the holder will be demoted before the incoming locking request is
granted.

Note that this mechanism demotes active holders (with the HIF_HOLDER
flag set), while before we were only demoting glocks without any active
holders.  This allows processes to keep hold of locks that may form a
cyclic locking dependency; the core glock logic will then break those
dependencies in case a conflicting locking request occurs.  We'll use
this to avoid giving up the inode glock proactively before faulting in
pages.

Processes that allow a glock holder to be taken away indicate this by
calling gfs2_holder_allow_demote(), which sets the HIF_MAY_DEMOTE flag.
Later, they call gfs2_holder_disallow_demote() to clear the flag again,
and then they check if their holder is still queued: if it is, they are
still holding the glock; if it isn't, they can re-acquire the glock (or
abort).
Signed-off-by: default avatarBob Peterson <rpeterso@redhat.com>
Signed-off-by: default avatarAndreas Gruenbacher <agruenba@redhat.com>
parent 61444649
...@@ -58,6 +58,7 @@ struct gfs2_glock_iter { ...@@ -58,6 +58,7 @@ struct gfs2_glock_iter {
typedef void (*glock_examiner) (struct gfs2_glock * gl); typedef void (*glock_examiner) (struct gfs2_glock * gl);
static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
static void __gfs2_glock_dq(struct gfs2_holder *gh);
static struct dentry *gfs2_root; static struct dentry *gfs2_root;
static struct workqueue_struct *glock_workqueue; static struct workqueue_struct *glock_workqueue;
...@@ -197,6 +198,12 @@ static int demote_ok(const struct gfs2_glock *gl) ...@@ -197,6 +198,12 @@ static int demote_ok(const struct gfs2_glock *gl)
if (gl->gl_state == LM_ST_UNLOCKED) if (gl->gl_state == LM_ST_UNLOCKED)
return 0; return 0;
/*
* Note that demote_ok is used for the lru process of disposing of
* glocks. For this purpose, we don't care if the glock's holders
* have the HIF_MAY_DEMOTE flag set or not. If someone is using
* them, don't demote.
*/
if (!list_empty(&gl->gl_holders)) if (!list_empty(&gl->gl_holders))
return 0; return 0;
if (glops->go_demote_ok) if (glops->go_demote_ok)
...@@ -379,7 +386,7 @@ static void do_error(struct gfs2_glock *gl, const int ret) ...@@ -379,7 +386,7 @@ static void do_error(struct gfs2_glock *gl, const int ret)
struct gfs2_holder *gh, *tmp; struct gfs2_holder *gh, *tmp;
list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
if (test_bit(HIF_HOLDER, &gh->gh_iflags)) if (!test_bit(HIF_WAIT, &gh->gh_iflags))
continue; continue;
if (ret & LM_OUT_ERROR) if (ret & LM_OUT_ERROR)
gh->gh_error = -EIO; gh->gh_error = -EIO;
...@@ -393,6 +400,40 @@ static void do_error(struct gfs2_glock *gl, const int ret) ...@@ -393,6 +400,40 @@ static void do_error(struct gfs2_glock *gl, const int ret)
} }
} }
/**
* demote_incompat_holders - demote incompatible demoteable holders
* @gl: the glock we want to promote
* @new_gh: the new holder to be promoted
*/
static void demote_incompat_holders(struct gfs2_glock *gl,
struct gfs2_holder *new_gh)
{
struct gfs2_holder *gh;
/*
* Demote incompatible holders before we make ourselves eligible.
* (This holder may or may not allow auto-demoting, but we don't want
* to demote the new holder before it's even granted.)
*/
list_for_each_entry(gh, &gl->gl_holders, gh_list) {
/*
* Since holders are at the front of the list, we stop when we
* find the first non-holder.
*/
if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
return;
if (test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags) &&
!may_grant(gl, new_gh, gh)) {
/*
* We should not recurse into do_promote because
* __gfs2_glock_dq only calls handle_callback,
* gfs2_glock_add_to_lru and __gfs2_glock_queue_work.
*/
__gfs2_glock_dq(gh);
}
}
}
/** /**
* find_first_holder - find the first "holder" gh * find_first_holder - find the first "holder" gh
* @gl: the glock * @gl: the glock
...@@ -411,6 +452,26 @@ static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl) ...@@ -411,6 +452,26 @@ static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl)
return NULL; return NULL;
} }
/**
* find_first_strong_holder - find the first non-demoteable holder
* @gl: the glock
*
* Find the first holder that doesn't have the HIF_MAY_DEMOTE flag set.
*/
static inline struct gfs2_holder *
find_first_strong_holder(struct gfs2_glock *gl)
{
struct gfs2_holder *gh;
list_for_each_entry(gh, &gl->gl_holders, gh_list) {
if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
return NULL;
if (!test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags))
return gh;
}
return NULL;
}
/** /**
* do_promote - promote as many requests as possible on the current queue * do_promote - promote as many requests as possible on the current queue
* @gl: The glock * @gl: The glock
...@@ -425,14 +486,20 @@ __acquires(&gl->gl_lockref.lock) ...@@ -425,14 +486,20 @@ __acquires(&gl->gl_lockref.lock)
{ {
const struct gfs2_glock_operations *glops = gl->gl_ops; const struct gfs2_glock_operations *glops = gl->gl_ops;
struct gfs2_holder *gh, *tmp, *first_gh; struct gfs2_holder *gh, *tmp, *first_gh;
bool incompat_holders_demoted = false;
int ret; int ret;
restart: restart:
first_gh = find_first_holder(gl); first_gh = find_first_strong_holder(gl);
list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
if (test_bit(HIF_HOLDER, &gh->gh_iflags)) if (!test_bit(HIF_WAIT, &gh->gh_iflags))
continue; continue;
if (may_grant(gl, first_gh, gh)) { if (may_grant(gl, first_gh, gh)) {
if (!incompat_holders_demoted) {
demote_incompat_holders(gl, first_gh);
incompat_holders_demoted = true;
first_gh = gh;
}
if (gh->gh_list.prev == &gl->gl_holders && if (gh->gh_list.prev == &gl->gl_holders &&
glops->go_lock) { glops->go_lock) {
spin_unlock(&gl->gl_lockref.lock); spin_unlock(&gl->gl_lockref.lock);
...@@ -458,6 +525,11 @@ __acquires(&gl->gl_lockref.lock) ...@@ -458,6 +525,11 @@ __acquires(&gl->gl_lockref.lock)
gfs2_holder_wake(gh); gfs2_holder_wake(gh);
continue; continue;
} }
/*
* If we get here, it means we may not grant this holder for
* some reason. If this holder is the head of the list, it
* means we have a blocked holder at the head, so return 1.
*/
if (gh->gh_list.prev == &gl->gl_holders) if (gh->gh_list.prev == &gl->gl_holders)
return 1; return 1;
do_error(gl, 0); do_error(gl, 0);
...@@ -1372,7 +1444,7 @@ __acquires(&gl->gl_lockref.lock) ...@@ -1372,7 +1444,7 @@ __acquires(&gl->gl_lockref.lock)
if (test_bit(GLF_LOCK, &gl->gl_flags)) { if (test_bit(GLF_LOCK, &gl->gl_flags)) {
struct gfs2_holder *first_gh; struct gfs2_holder *first_gh;
first_gh = find_first_holder(gl); first_gh = find_first_strong_holder(gl);
try_futile = !may_grant(gl, first_gh, gh); try_futile = !may_grant(gl, first_gh, gh);
} }
if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
...@@ -1381,7 +1453,8 @@ __acquires(&gl->gl_lockref.lock) ...@@ -1381,7 +1453,8 @@ __acquires(&gl->gl_lockref.lock)
list_for_each_entry(gh2, &gl->gl_holders, gh_list) { list_for_each_entry(gh2, &gl->gl_holders, gh_list) {
if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid && if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid &&
(gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK))) (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK) &&
!test_bit(HIF_MAY_DEMOTE, &gh2->gh_iflags)))
goto trap_recursive; goto trap_recursive;
if (try_futile && if (try_futile &&
!(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) { !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) {
...@@ -1477,51 +1550,83 @@ int gfs2_glock_poll(struct gfs2_holder *gh) ...@@ -1477,51 +1550,83 @@ int gfs2_glock_poll(struct gfs2_holder *gh)
return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1; return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1;
} }
/** static inline bool needs_demote(struct gfs2_glock *gl)
* gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock) {
* @gh: the glock holder return (test_bit(GLF_DEMOTE, &gl->gl_flags) ||
* test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags));
*/ }
void gfs2_glock_dq(struct gfs2_holder *gh) static void __gfs2_glock_dq(struct gfs2_holder *gh)
{ {
struct gfs2_glock *gl = gh->gh_gl; struct gfs2_glock *gl = gh->gh_gl;
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
unsigned delay = 0; unsigned delay = 0;
int fast_path = 0; int fast_path = 0;
spin_lock(&gl->gl_lockref.lock);
/* /*
* If we're in the process of file system withdraw, we cannot just * This while loop is similar to function demote_incompat_holders:
* dequeue any glocks until our journal is recovered, lest we * If the glock is due to be demoted (which may be from another node
* introduce file system corruption. We need two exceptions to this * or even if this holder is GL_NOCACHE), the weak holders are
* rule: We need to allow unlocking of nondisk glocks and the glock * demoted as well, allowing the glock to be demoted.
* for our own journal that needs recovery.
*/ */
if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) && while (gh) {
glock_blocked_by_withdraw(gl) && /*
gh->gh_gl != sdp->sd_jinode_gl) { * If we're in the process of file system withdraw, we cannot
sdp->sd_glock_dqs_held++; * just dequeue any glocks until our journal is recovered, lest
spin_unlock(&gl->gl_lockref.lock); * we introduce file system corruption. We need two exceptions
might_sleep(); * to this rule: We need to allow unlocking of nondisk glocks
wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY, * and the glock for our own journal that needs recovery.
TASK_UNINTERRUPTIBLE); */
spin_lock(&gl->gl_lockref.lock); if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) &&
} glock_blocked_by_withdraw(gl) &&
if (gh->gh_flags & GL_NOCACHE) gh->gh_gl != sdp->sd_jinode_gl) {
handle_callback(gl, LM_ST_UNLOCKED, 0, false); sdp->sd_glock_dqs_held++;
spin_unlock(&gl->gl_lockref.lock);
might_sleep();
wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY,
TASK_UNINTERRUPTIBLE);
spin_lock(&gl->gl_lockref.lock);
}
/*
* This holder should not be cached, so mark it for demote.
* Note: this should be done before the check for needs_demote
* below.
*/
if (gh->gh_flags & GL_NOCACHE)
handle_callback(gl, LM_ST_UNLOCKED, 0, false);
list_del_init(&gh->gh_list);
clear_bit(HIF_HOLDER, &gh->gh_iflags);
trace_gfs2_glock_queue(gh, 0);
/*
* If there hasn't been a demote request we are done.
* (Let the remaining holders, if any, keep holding it.)
*/
if (!needs_demote(gl)) {
if (list_empty(&gl->gl_holders))
fast_path = 1;
break;
}
/*
* If we have another strong holder (we cannot auto-demote)
* we are done. It keeps holding it until it is done.
*/
if (find_first_strong_holder(gl))
break;
list_del_init(&gh->gh_list); /*
clear_bit(HIF_HOLDER, &gh->gh_iflags); * If we have a weak holder at the head of the list, it
if (list_empty(&gl->gl_holders) && * (and all others like it) must be auto-demoted. If there
!test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && * are no more weak holders, we exit the while loop.
!test_bit(GLF_DEMOTE, &gl->gl_flags)) */
fast_path = 1; gh = find_first_holder(gl);
}
if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl)) if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl))
gfs2_glock_add_to_lru(gl); gfs2_glock_add_to_lru(gl);
trace_gfs2_glock_queue(gh, 0);
if (unlikely(!fast_path)) { if (unlikely(!fast_path)) {
gl->gl_lockref.count++; gl->gl_lockref.count++;
if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
...@@ -1530,6 +1635,19 @@ void gfs2_glock_dq(struct gfs2_holder *gh) ...@@ -1530,6 +1635,19 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
delay = gl->gl_hold_time; delay = gl->gl_hold_time;
__gfs2_glock_queue_work(gl, delay); __gfs2_glock_queue_work(gl, delay);
} }
}
/**
* gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock)
* @gh: the glock holder
*
*/
void gfs2_glock_dq(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
spin_lock(&gl->gl_lockref.lock);
__gfs2_glock_dq(gh);
spin_unlock(&gl->gl_lockref.lock); spin_unlock(&gl->gl_lockref.lock);
} }
...@@ -1692,6 +1810,7 @@ void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs) ...@@ -1692,6 +1810,7 @@ void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
{ {
struct gfs2_holder mock_gh = { .gh_gl = gl, .gh_state = state, };
unsigned long delay = 0; unsigned long delay = 0;
unsigned long holdtime; unsigned long holdtime;
unsigned long now = jiffies; unsigned long now = jiffies;
...@@ -1706,6 +1825,28 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) ...@@ -1706,6 +1825,28 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
delay = gl->gl_hold_time; delay = gl->gl_hold_time;
} }
/*
* Note 1: We cannot call demote_incompat_holders from handle_callback
* or gfs2_set_demote due to recursion problems like: gfs2_glock_dq ->
* handle_callback -> demote_incompat_holders -> gfs2_glock_dq
* Plus, we only want to demote the holders if the request comes from
* a remote cluster node because local holder conflicts are resolved
* elsewhere.
*
* Note 2: if a remote node wants this glock in EX mode, lock_dlm will
* request that we set our state to UNLOCKED. Here we mock up a holder
* to make it look like someone wants the lock EX locally. Any SH
* and DF requests should be able to share the lock without demoting.
*
* Note 3: We only want to demote the demoteable holders when there
* are no more strong holders. The demoteable holders might as well
* keep the glock until the last strong holder is done with it.
*/
if (!find_first_strong_holder(gl)) {
if (state == LM_ST_UNLOCKED)
mock_gh.gh_state = LM_ST_EXCLUSIVE;
demote_incompat_holders(gl, &mock_gh);
}
handle_callback(gl, state, delay, true); handle_callback(gl, state, delay, true);
__gfs2_glock_queue_work(gl, delay); __gfs2_glock_queue_work(gl, delay);
spin_unlock(&gl->gl_lockref.lock); spin_unlock(&gl->gl_lockref.lock);
...@@ -2095,6 +2236,8 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags) ...@@ -2095,6 +2236,8 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags)
*p++ = 'H'; *p++ = 'H';
if (test_bit(HIF_WAIT, &iflags)) if (test_bit(HIF_WAIT, &iflags))
*p++ = 'W'; *p++ = 'W';
if (test_bit(HIF_MAY_DEMOTE, &iflags))
*p++ = 'D';
*p = 0; *p = 0;
return buf; return buf;
} }
......
...@@ -150,6 +150,8 @@ static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock * ...@@ -150,6 +150,8 @@ static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *
list_for_each_entry(gh, &gl->gl_holders, gh_list) { list_for_each_entry(gh, &gl->gl_holders, gh_list) {
if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
break; break;
if (test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags))
continue;
if (gh->gh_owner_pid == pid) if (gh->gh_owner_pid == pid)
goto out; goto out;
} }
...@@ -325,6 +327,24 @@ static inline void glock_clear_object(struct gfs2_glock *gl, void *object) ...@@ -325,6 +327,24 @@ static inline void glock_clear_object(struct gfs2_glock *gl, void *object)
spin_unlock(&gl->gl_lockref.lock); spin_unlock(&gl->gl_lockref.lock);
} }
static inline void gfs2_holder_allow_demote(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
spin_lock(&gl->gl_lockref.lock);
set_bit(HIF_MAY_DEMOTE, &gh->gh_iflags);
spin_unlock(&gl->gl_lockref.lock);
}
static inline void gfs2_holder_disallow_demote(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
spin_lock(&gl->gl_lockref.lock);
clear_bit(HIF_MAY_DEMOTE, &gh->gh_iflags);
spin_unlock(&gl->gl_lockref.lock);
}
extern void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation); extern void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation);
extern bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation); extern bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation);
......
...@@ -252,6 +252,7 @@ struct gfs2_lkstats { ...@@ -252,6 +252,7 @@ struct gfs2_lkstats {
enum { enum {
/* States */ /* States */
HIF_MAY_DEMOTE = 1,
HIF_HOLDER = 6, /* Set for gh that "holds" the glock */ HIF_HOLDER = 6, /* Set for gh that "holds" the glock */
HIF_WAIT = 10, HIF_WAIT = 10,
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment