Commit 4b4f8580 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'locks-v3.20-1' of git://git.samba.org/jlayton/linux

Pull file locking related changes #1 from Jeff Layton:
 "This patchset contains a fairly major overhaul of how file locks are
  tracked within the inode.  Rather than a single list, we now create a
  per-inode "lock context" that contains individual lists for the file
  locks, and a new dedicated spinlock for them.

  There are changes in other trees that are based on top of this set so
  it may be easiest to pull this in early"

* tag 'locks-v3.20-1' of git://git.samba.org/jlayton/linux:
  locks: update comments that refer to inode->i_flock
  locks: consolidate NULL i_flctx checks in locks_remove_file
  locks: keep a count of locks on the flctx lists
  locks: clean up the lm_change prototype
  locks: add a dedicated spinlock to protect i_flctx lists
  locks: remove i_flock field from struct inode
  locks: convert lease handling to file_lock_context
  locks: convert posix locks to file_lock_context
  locks: move flock locks to file_lock_context
  ceph: move spinlocking into ceph_encode_locks_to_buffer and ceph_count_locks
  locks: add a new struct file_locking_context pointer to struct inode
  locks: have locks_release_file use flock_lock_file to release generic flock locks
  locks: add new struct list_head to struct file_lock
parents 87291235 8116bf4c
...@@ -239,23 +239,21 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) ...@@ -239,23 +239,21 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
return err; return err;
} }
/** /*
* Must be called with lock_flocks() already held. Fills in the passed * Fills in the passed counter variables, so you can prepare pagelist metadata
* counter variables, so you can prepare pagelist metadata before calling * before calling ceph_encode_locks.
* ceph_encode_locks.
*/ */
void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
{ {
struct file_lock *lock; struct file_lock_context *ctx;
*fcntl_count = 0; *fcntl_count = 0;
*flock_count = 0; *flock_count = 0;
for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { ctx = inode->i_flctx;
if (lock->fl_flags & FL_POSIX) if (ctx) {
++(*fcntl_count); *fcntl_count = ctx->flc_posix_cnt;
else if (lock->fl_flags & FL_FLOCK) *flock_count = ctx->flc_flock_cnt;
++(*flock_count);
} }
dout("counted %d flock locks and %d fcntl locks", dout("counted %d flock locks and %d fcntl locks",
*flock_count, *fcntl_count); *flock_count, *fcntl_count);
...@@ -271,6 +269,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode, ...@@ -271,6 +269,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode,
int num_fcntl_locks, int num_flock_locks) int num_fcntl_locks, int num_flock_locks)
{ {
struct file_lock *lock; struct file_lock *lock;
struct file_lock_context *ctx = inode->i_flctx;
int err = 0; int err = 0;
int seen_fcntl = 0; int seen_fcntl = 0;
int seen_flock = 0; int seen_flock = 0;
...@@ -279,8 +278,11 @@ int ceph_encode_locks_to_buffer(struct inode *inode, ...@@ -279,8 +278,11 @@ int ceph_encode_locks_to_buffer(struct inode *inode,
dout("encoding %d flock and %d fcntl locks", num_flock_locks, dout("encoding %d flock and %d fcntl locks", num_flock_locks,
num_fcntl_locks); num_fcntl_locks);
for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { if (!ctx)
if (lock->fl_flags & FL_POSIX) { return 0;
spin_lock(&ctx->flc_lock);
list_for_each_entry(lock, &ctx->flc_flock, fl_list) {
++seen_fcntl; ++seen_fcntl;
if (seen_fcntl > num_fcntl_locks) { if (seen_fcntl > num_fcntl_locks) {
err = -ENOSPC; err = -ENOSPC;
...@@ -291,9 +293,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode, ...@@ -291,9 +293,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode,
goto fail; goto fail;
++l; ++l;
} }
} list_for_each_entry(lock, &ctx->flc_flock, fl_list) {
for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
if (lock->fl_flags & FL_FLOCK) {
++seen_flock; ++seen_flock;
if (seen_flock > num_flock_locks) { if (seen_flock > num_flock_locks) {
err = -ENOSPC; err = -ENOSPC;
...@@ -304,8 +304,8 @@ int ceph_encode_locks_to_buffer(struct inode *inode, ...@@ -304,8 +304,8 @@ int ceph_encode_locks_to_buffer(struct inode *inode,
goto fail; goto fail;
++l; ++l;
} }
}
fail: fail:
spin_unlock(&ctx->flc_lock);
return err; return err;
} }
......
...@@ -2700,20 +2700,16 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, ...@@ -2700,20 +2700,16 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
struct ceph_filelock *flocks; struct ceph_filelock *flocks;
encode_again: encode_again:
spin_lock(&inode->i_lock);
ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks);
spin_unlock(&inode->i_lock);
flocks = kmalloc((num_fcntl_locks+num_flock_locks) * flocks = kmalloc((num_fcntl_locks+num_flock_locks) *
sizeof(struct ceph_filelock), GFP_NOFS); sizeof(struct ceph_filelock), GFP_NOFS);
if (!flocks) { if (!flocks) {
err = -ENOMEM; err = -ENOMEM;
goto out_free; goto out_free;
} }
spin_lock(&inode->i_lock);
err = ceph_encode_locks_to_buffer(inode, flocks, err = ceph_encode_locks_to_buffer(inode, flocks,
num_fcntl_locks, num_fcntl_locks,
num_flock_locks); num_flock_locks);
spin_unlock(&inode->i_lock);
if (err) { if (err) {
kfree(flocks); kfree(flocks);
if (err == -ENOSPC) if (err == -ENOSPC)
......
...@@ -1113,11 +1113,6 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) ...@@ -1113,11 +1113,6 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
return rc; return rc;
} }
/* copied from fs/locks.c with a name change */
#define cifs_for_each_lock(inode, lockp) \
for (lockp = &inode->i_flock; *lockp != NULL; \
lockp = &(*lockp)->fl_next)
struct lock_to_push { struct lock_to_push {
struct list_head llist; struct list_head llist;
__u64 offset; __u64 offset;
...@@ -1132,8 +1127,9 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) ...@@ -1132,8 +1127,9 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
{ {
struct inode *inode = cfile->dentry->d_inode; struct inode *inode = cfile->dentry->d_inode;
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
struct file_lock *flock, **before; struct file_lock *flock;
unsigned int count = 0, i = 0; struct file_lock_context *flctx = inode->i_flctx;
unsigned int i;
int rc = 0, xid, type; int rc = 0, xid, type;
struct list_head locks_to_send, *el; struct list_head locks_to_send, *el;
struct lock_to_push *lck, *tmp; struct lock_to_push *lck, *tmp;
...@@ -1141,21 +1137,17 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) ...@@ -1141,21 +1137,17 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
xid = get_xid(); xid = get_xid();
spin_lock(&inode->i_lock); if (!flctx)
cifs_for_each_lock(inode, before) { goto out;
if ((*before)->fl_flags & FL_POSIX)
count++;
}
spin_unlock(&inode->i_lock);
INIT_LIST_HEAD(&locks_to_send); INIT_LIST_HEAD(&locks_to_send);
/* /*
* Allocating count locks is enough because no FL_POSIX locks can be * Allocating flc_posix_cnt locks is enough because no FL_POSIX locks
* added to the list while we are holding cinode->lock_sem that * can be added to the list while we are holding cinode->lock_sem that
* protects locking operations of this inode. * protects locking operations of this inode.
*/ */
for (; i < count; i++) { for (i = 0; i < flctx->flc_posix_cnt; i++) {
lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL); lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
if (!lck) { if (!lck) {
rc = -ENOMEM; rc = -ENOMEM;
...@@ -1165,11 +1157,8 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) ...@@ -1165,11 +1157,8 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
} }
el = locks_to_send.next; el = locks_to_send.next;
spin_lock(&inode->i_lock); spin_lock(&flctx->flc_lock);
cifs_for_each_lock(inode, before) { list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
flock = *before;
if ((flock->fl_flags & FL_POSIX) == 0)
continue;
if (el == &locks_to_send) { if (el == &locks_to_send) {
/* /*
* The list ended. We don't have enough allocated * The list ended. We don't have enough allocated
...@@ -1189,9 +1178,8 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) ...@@ -1189,9 +1178,8 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
lck->length = length; lck->length = length;
lck->type = type; lck->type = type;
lck->offset = flock->fl_start; lck->offset = flock->fl_start;
el = el->next;
} }
spin_unlock(&inode->i_lock); spin_unlock(&flctx->flc_lock);
list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
int stored_rc; int stored_rc;
......
...@@ -194,7 +194,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) ...@@ -194,7 +194,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
#ifdef CONFIG_FSNOTIFY #ifdef CONFIG_FSNOTIFY
inode->i_fsnotify_mask = 0; inode->i_fsnotify_mask = 0;
#endif #endif
inode->i_flctx = NULL;
this_cpu_inc(nr_inodes); this_cpu_inc(nr_inodes);
return 0; return 0;
...@@ -237,6 +237,7 @@ void __destroy_inode(struct inode *inode) ...@@ -237,6 +237,7 @@ void __destroy_inode(struct inode *inode)
BUG_ON(inode_has_buffers(inode)); BUG_ON(inode_has_buffers(inode));
security_inode_free(inode); security_inode_free(inode);
fsnotify_inode_delete(inode); fsnotify_inode_delete(inode);
locks_free_lock_context(inode->i_flctx);
if (!inode->i_nlink) { if (!inode->i_nlink) {
WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0); WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0);
atomic_long_dec(&inode->i_sb->s_remove_count); atomic_long_dec(&inode->i_sb->s_remove_count);
......
...@@ -164,12 +164,15 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, ...@@ -164,12 +164,15 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file,
{ {
struct inode *inode = nlmsvc_file_inode(file); struct inode *inode = nlmsvc_file_inode(file);
struct file_lock *fl; struct file_lock *fl;
struct file_lock_context *flctx = inode->i_flctx;
struct nlm_host *lockhost; struct nlm_host *lockhost;
if (!flctx || list_empty_careful(&flctx->flc_posix))
return 0;
again: again:
file->f_locks = 0; file->f_locks = 0;
spin_lock(&inode->i_lock); spin_lock(&flctx->flc_lock);
for (fl = inode->i_flock; fl; fl = fl->fl_next) { list_for_each_entry(fl, &flctx->flc_posix, fl_list) {
if (fl->fl_lmops != &nlmsvc_lock_operations) if (fl->fl_lmops != &nlmsvc_lock_operations)
continue; continue;
...@@ -180,7 +183,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, ...@@ -180,7 +183,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file,
if (match(lockhost, host)) { if (match(lockhost, host)) {
struct file_lock lock = *fl; struct file_lock lock = *fl;
spin_unlock(&inode->i_lock); spin_unlock(&flctx->flc_lock);
lock.fl_type = F_UNLCK; lock.fl_type = F_UNLCK;
lock.fl_start = 0; lock.fl_start = 0;
lock.fl_end = OFFSET_MAX; lock.fl_end = OFFSET_MAX;
...@@ -192,7 +195,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, ...@@ -192,7 +195,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file,
goto again; goto again;
} }
} }
spin_unlock(&inode->i_lock); spin_unlock(&flctx->flc_lock);
return 0; return 0;
} }
...@@ -223,18 +226,21 @@ nlm_file_inuse(struct nlm_file *file) ...@@ -223,18 +226,21 @@ nlm_file_inuse(struct nlm_file *file)
{ {
struct inode *inode = nlmsvc_file_inode(file); struct inode *inode = nlmsvc_file_inode(file);
struct file_lock *fl; struct file_lock *fl;
struct file_lock_context *flctx = inode->i_flctx;
if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares) if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares)
return 1; return 1;
spin_lock(&inode->i_lock); if (flctx && !list_empty_careful(&flctx->flc_posix)) {
for (fl = inode->i_flock; fl; fl = fl->fl_next) { spin_lock(&flctx->flc_lock);
list_for_each_entry(fl, &flctx->flc_posix, fl_list) {
if (fl->fl_lmops == &nlmsvc_lock_operations) { if (fl->fl_lmops == &nlmsvc_lock_operations) {
spin_unlock(&inode->i_lock); spin_unlock(&flctx->flc_lock);
return 1; return 1;
} }
} }
spin_unlock(&inode->i_lock); spin_unlock(&flctx->flc_lock);
}
file->f_locks = 0; file->f_locks = 0;
return 0; return 0;
} }
......
...@@ -157,14 +157,11 @@ static int target_leasetype(struct file_lock *fl) ...@@ -157,14 +157,11 @@ static int target_leasetype(struct file_lock *fl)
int leases_enable = 1; int leases_enable = 1;
int lease_break_time = 45; int lease_break_time = 45;
#define for_each_lock(inode, lockp) \
for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next)
/* /*
* The global file_lock_list is only used for displaying /proc/locks, so we * The global file_lock_list is only used for displaying /proc/locks, so we
* keep a list on each CPU, with each list protected by its own spinlock via * keep a list on each CPU, with each list protected by its own spinlock via
* the file_lock_lglock. Note that alterations to the list also require that * the file_lock_lglock. Note that alterations to the list also require that
* the relevant i_lock is held. * the relevant flc_lock is held.
*/ */
DEFINE_STATIC_LGLOCK(file_lock_lglock); DEFINE_STATIC_LGLOCK(file_lock_lglock);
static DEFINE_PER_CPU(struct hlist_head, file_lock_list); static DEFINE_PER_CPU(struct hlist_head, file_lock_list);
...@@ -192,21 +189,68 @@ static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS); ...@@ -192,21 +189,68 @@ static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS);
* contrast to those that are acting as records of acquired locks). * contrast to those that are acting as records of acquired locks).
* *
* Note that when we acquire this lock in order to change the above fields, * Note that when we acquire this lock in order to change the above fields,
* we often hold the i_lock as well. In certain cases, when reading the fields * we often hold the flc_lock as well. In certain cases, when reading the fields
* protected by this lock, we can skip acquiring it iff we already hold the * protected by this lock, we can skip acquiring it iff we already hold the
* i_lock. * flc_lock.
* *
* In particular, adding an entry to the fl_block list requires that you hold * In particular, adding an entry to the fl_block list requires that you hold
* both the i_lock and the blocked_lock_lock (acquired in that order). Deleting * both the flc_lock and the blocked_lock_lock (acquired in that order).
* an entry from the list however only requires the file_lock_lock. * Deleting an entry from the list however only requires the file_lock_lock.
*/ */
static DEFINE_SPINLOCK(blocked_lock_lock); static DEFINE_SPINLOCK(blocked_lock_lock);
static struct kmem_cache *flctx_cache __read_mostly;
static struct kmem_cache *filelock_cache __read_mostly; static struct kmem_cache *filelock_cache __read_mostly;
static struct file_lock_context *
locks_get_lock_context(struct inode *inode)
{
struct file_lock_context *new;
if (likely(inode->i_flctx))
goto out;
new = kmem_cache_alloc(flctx_cache, GFP_KERNEL);
if (!new)
goto out;
spin_lock_init(&new->flc_lock);
INIT_LIST_HEAD(&new->flc_flock);
INIT_LIST_HEAD(&new->flc_posix);
INIT_LIST_HEAD(&new->flc_lease);
/*
* Assign the pointer if it's not already assigned. If it is, then
* free the context we just allocated.
*/
spin_lock(&inode->i_lock);
if (likely(!inode->i_flctx)) {
inode->i_flctx = new;
new = NULL;
}
spin_unlock(&inode->i_lock);
if (new)
kmem_cache_free(flctx_cache, new);
out:
return inode->i_flctx;
}
void
locks_free_lock_context(struct file_lock_context *ctx)
{
if (ctx) {
WARN_ON_ONCE(!list_empty(&ctx->flc_flock));
WARN_ON_ONCE(!list_empty(&ctx->flc_posix));
WARN_ON_ONCE(!list_empty(&ctx->flc_lease));
kmem_cache_free(flctx_cache, ctx);
}
}
static void locks_init_lock_heads(struct file_lock *fl) static void locks_init_lock_heads(struct file_lock *fl)
{ {
INIT_HLIST_NODE(&fl->fl_link); INIT_HLIST_NODE(&fl->fl_link);
INIT_LIST_HEAD(&fl->fl_list);
INIT_LIST_HEAD(&fl->fl_block); INIT_LIST_HEAD(&fl->fl_block);
init_waitqueue_head(&fl->fl_wait); init_waitqueue_head(&fl->fl_wait);
} }
...@@ -243,6 +287,7 @@ EXPORT_SYMBOL_GPL(locks_release_private); ...@@ -243,6 +287,7 @@ EXPORT_SYMBOL_GPL(locks_release_private);
void locks_free_lock(struct file_lock *fl) void locks_free_lock(struct file_lock *fl)
{ {
BUG_ON(waitqueue_active(&fl->fl_wait)); BUG_ON(waitqueue_active(&fl->fl_wait));
BUG_ON(!list_empty(&fl->fl_list));
BUG_ON(!list_empty(&fl->fl_block)); BUG_ON(!list_empty(&fl->fl_block));
BUG_ON(!hlist_unhashed(&fl->fl_link)); BUG_ON(!hlist_unhashed(&fl->fl_link));
...@@ -257,8 +302,8 @@ locks_dispose_list(struct list_head *dispose) ...@@ -257,8 +302,8 @@ locks_dispose_list(struct list_head *dispose)
struct file_lock *fl; struct file_lock *fl;
while (!list_empty(dispose)) { while (!list_empty(dispose)) {
fl = list_first_entry(dispose, struct file_lock, fl_block); fl = list_first_entry(dispose, struct file_lock, fl_list);
list_del_init(&fl->fl_block); list_del_init(&fl->fl_list);
locks_free_lock(fl); locks_free_lock(fl);
} }
} }
...@@ -513,7 +558,7 @@ static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) ...@@ -513,7 +558,7 @@ static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
return fl1->fl_owner == fl2->fl_owner; return fl1->fl_owner == fl2->fl_owner;
} }
/* Must be called with the i_lock held! */ /* Must be called with the flc_lock held! */
static void locks_insert_global_locks(struct file_lock *fl) static void locks_insert_global_locks(struct file_lock *fl)
{ {
lg_local_lock(&file_lock_lglock); lg_local_lock(&file_lock_lglock);
...@@ -522,12 +567,12 @@ static void locks_insert_global_locks(struct file_lock *fl) ...@@ -522,12 +567,12 @@ static void locks_insert_global_locks(struct file_lock *fl)
lg_local_unlock(&file_lock_lglock); lg_local_unlock(&file_lock_lglock);
} }
/* Must be called with the i_lock held! */ /* Must be called with the flc_lock held! */
static void locks_delete_global_locks(struct file_lock *fl) static void locks_delete_global_locks(struct file_lock *fl)
{ {
/* /*
* Avoid taking lock if already unhashed. This is safe since this check * Avoid taking lock if already unhashed. This is safe since this check
* is done while holding the i_lock, and new insertions into the list * is done while holding the flc_lock, and new insertions into the list
* also require that it be held. * also require that it be held.
*/ */
if (hlist_unhashed(&fl->fl_link)) if (hlist_unhashed(&fl->fl_link))
...@@ -579,10 +624,10 @@ static void locks_delete_block(struct file_lock *waiter) ...@@ -579,10 +624,10 @@ static void locks_delete_block(struct file_lock *waiter)
* the order they blocked. The documentation doesn't require this but * the order they blocked. The documentation doesn't require this but
* it seems like the reasonable thing to do. * it seems like the reasonable thing to do.
* *
* Must be called with both the i_lock and blocked_lock_lock held. The fl_block * Must be called with both the flc_lock and blocked_lock_lock held. The
* list itself is protected by the blocked_lock_lock, but by ensuring that the * fl_block list itself is protected by the blocked_lock_lock, but by ensuring
* i_lock is also held on insertions we can avoid taking the blocked_lock_lock * that the flc_lock is also held on insertions we can avoid taking the
* in some cases when we see that the fl_block list is empty. * blocked_lock_lock in some cases when we see that the fl_block list is empty.
*/ */
static void __locks_insert_block(struct file_lock *blocker, static void __locks_insert_block(struct file_lock *blocker,
struct file_lock *waiter) struct file_lock *waiter)
...@@ -594,7 +639,7 @@ static void __locks_insert_block(struct file_lock *blocker, ...@@ -594,7 +639,7 @@ static void __locks_insert_block(struct file_lock *blocker,
locks_insert_global_blocked(waiter); locks_insert_global_blocked(waiter);
} }
/* Must be called with i_lock held. */ /* Must be called with flc_lock held. */
static void locks_insert_block(struct file_lock *blocker, static void locks_insert_block(struct file_lock *blocker,
struct file_lock *waiter) struct file_lock *waiter)
{ {
...@@ -606,15 +651,15 @@ static void locks_insert_block(struct file_lock *blocker, ...@@ -606,15 +651,15 @@ static void locks_insert_block(struct file_lock *blocker,
/* /*
* Wake up processes blocked waiting for blocker. * Wake up processes blocked waiting for blocker.
* *
* Must be called with the inode->i_lock held! * Must be called with the inode->flc_lock held!
*/ */
static void locks_wake_up_blocks(struct file_lock *blocker) static void locks_wake_up_blocks(struct file_lock *blocker)
{ {
/* /*
* Avoid taking global lock if list is empty. This is safe since new * Avoid taking global lock if list is empty. This is safe since new
* blocked requests are only added to the list under the i_lock, and * blocked requests are only added to the list under the flc_lock, and
* the i_lock is always held here. Note that removal from the fl_block * the flc_lock is always held here. Note that removal from the fl_block
* list does not require the i_lock, so we must recheck list_empty() * list does not require the flc_lock, so we must recheck list_empty()
* after acquiring the blocked_lock_lock. * after acquiring the blocked_lock_lock.
*/ */
if (list_empty(&blocker->fl_block)) if (list_empty(&blocker->fl_block))
...@@ -635,63 +680,36 @@ static void locks_wake_up_blocks(struct file_lock *blocker) ...@@ -635,63 +680,36 @@ static void locks_wake_up_blocks(struct file_lock *blocker)
spin_unlock(&blocked_lock_lock); spin_unlock(&blocked_lock_lock);
} }
/* Insert file lock fl into an inode's lock list at the position indicated static void
* by pos. At the same time add the lock to the global file lock list. locks_insert_lock_ctx(struct file_lock *fl, int *counter,
* struct list_head *before)
* Must be called with the i_lock held!
*/
static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl)
{ {
fl->fl_nspid = get_pid(task_tgid(current)); fl->fl_nspid = get_pid(task_tgid(current));
list_add_tail(&fl->fl_list, before);
/* insert into file's list */ ++*counter;
fl->fl_next = *pos;
*pos = fl;
locks_insert_global_locks(fl); locks_insert_global_locks(fl);
} }
/** static void
* locks_delete_lock - Delete a lock and then free it. locks_unlink_lock_ctx(struct file_lock *fl, int *counter)
* @thisfl_p: pointer that points to the fl_next field of the previous
* inode->i_flock list entry
*
* Unlink a lock from all lists and free the namespace reference, but don't
* free it yet. Wake up processes that are blocked waiting for this lock and
* notify the FS that the lock has been cleared.
*
* Must be called with the i_lock held!
*/
static void locks_unlink_lock(struct file_lock **thisfl_p)
{ {
struct file_lock *fl = *thisfl_p;
locks_delete_global_locks(fl); locks_delete_global_locks(fl);
list_del_init(&fl->fl_list);
*thisfl_p = fl->fl_next; --*counter;
fl->fl_next = NULL;
if (fl->fl_nspid) { if (fl->fl_nspid) {
put_pid(fl->fl_nspid); put_pid(fl->fl_nspid);
fl->fl_nspid = NULL; fl->fl_nspid = NULL;
} }
locks_wake_up_blocks(fl); locks_wake_up_blocks(fl);
} }
/* static void
* Unlink a lock from all lists and free it. locks_delete_lock_ctx(struct file_lock *fl, int *counter,
*
* Must be called with i_lock held!
*/
static void locks_delete_lock(struct file_lock **thisfl_p,
struct list_head *dispose) struct list_head *dispose)
{ {
struct file_lock *fl = *thisfl_p; locks_unlink_lock_ctx(fl, counter);
locks_unlink_lock(thisfl_p);
if (dispose) if (dispose)
list_add(&fl->fl_block, dispose); list_add(&fl->fl_list, dispose);
else else
locks_free_lock(fl); locks_free_lock(fl);
} }
...@@ -746,22 +764,27 @@ void ...@@ -746,22 +764,27 @@ void
posix_test_lock(struct file *filp, struct file_lock *fl) posix_test_lock(struct file *filp, struct file_lock *fl)
{ {
struct file_lock *cfl; struct file_lock *cfl;
struct file_lock_context *ctx;
struct inode *inode = file_inode(filp); struct inode *inode = file_inode(filp);
spin_lock(&inode->i_lock); ctx = inode->i_flctx;
for (cfl = file_inode(filp)->i_flock; cfl; cfl = cfl->fl_next) { if (!ctx || list_empty_careful(&ctx->flc_posix)) {
if (!IS_POSIX(cfl)) fl->fl_type = F_UNLCK;
continue; return;
if (posix_locks_conflict(fl, cfl))
break;
} }
if (cfl) {
spin_lock(&ctx->flc_lock);
list_for_each_entry(cfl, &ctx->flc_posix, fl_list) {
if (posix_locks_conflict(fl, cfl)) {
locks_copy_conflock(fl, cfl); locks_copy_conflock(fl, cfl);
if (cfl->fl_nspid) if (cfl->fl_nspid)
fl->fl_pid = pid_vnr(cfl->fl_nspid); fl->fl_pid = pid_vnr(cfl->fl_nspid);
} else goto out;
}
}
fl->fl_type = F_UNLCK; fl->fl_type = F_UNLCK;
spin_unlock(&inode->i_lock); out:
spin_unlock(&ctx->flc_lock);
return; return;
} }
EXPORT_SYMBOL(posix_test_lock); EXPORT_SYMBOL(posix_test_lock);
...@@ -845,34 +868,34 @@ static int posix_locks_deadlock(struct file_lock *caller_fl, ...@@ -845,34 +868,34 @@ static int posix_locks_deadlock(struct file_lock *caller_fl,
static int flock_lock_file(struct file *filp, struct file_lock *request) static int flock_lock_file(struct file *filp, struct file_lock *request)
{ {
struct file_lock *new_fl = NULL; struct file_lock *new_fl = NULL;
struct file_lock **before; struct file_lock *fl;
struct inode * inode = file_inode(filp); struct file_lock_context *ctx;
struct inode *inode = file_inode(filp);
int error = 0; int error = 0;
int found = 0; bool found = false;
LIST_HEAD(dispose); LIST_HEAD(dispose);
ctx = locks_get_lock_context(inode);
if (!ctx)
return -ENOMEM;
if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) { if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) {
new_fl = locks_alloc_lock(); new_fl = locks_alloc_lock();
if (!new_fl) if (!new_fl)
return -ENOMEM; return -ENOMEM;
} }
spin_lock(&inode->i_lock); spin_lock(&ctx->flc_lock);
if (request->fl_flags & FL_ACCESS) if (request->fl_flags & FL_ACCESS)
goto find_conflict; goto find_conflict;
for_each_lock(inode, before) { list_for_each_entry(fl, &ctx->flc_flock, fl_list) {
struct file_lock *fl = *before;
if (IS_POSIX(fl))
break;
if (IS_LEASE(fl))
continue;
if (filp != fl->fl_file) if (filp != fl->fl_file)
continue; continue;
if (request->fl_type == fl->fl_type) if (request->fl_type == fl->fl_type)
goto out; goto out;
found = 1; found = true;
locks_delete_lock(before, &dispose); locks_delete_lock_ctx(fl, &ctx->flc_flock_cnt, &dispose);
break; break;
} }
...@@ -887,18 +910,13 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) ...@@ -887,18 +910,13 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
* give it the opportunity to lock the file. * give it the opportunity to lock the file.
*/ */
if (found) { if (found) {
spin_unlock(&inode->i_lock); spin_unlock(&ctx->flc_lock);
cond_resched(); cond_resched();
spin_lock(&inode->i_lock); spin_lock(&ctx->flc_lock);
} }
find_conflict: find_conflict:
for_each_lock(inode, before) { list_for_each_entry(fl, &ctx->flc_flock, fl_list) {
struct file_lock *fl = *before;
if (IS_POSIX(fl))
break;
if (IS_LEASE(fl))
continue;
if (!flock_locks_conflict(request, fl)) if (!flock_locks_conflict(request, fl))
continue; continue;
error = -EAGAIN; error = -EAGAIN;
...@@ -911,12 +929,12 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) ...@@ -911,12 +929,12 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
if (request->fl_flags & FL_ACCESS) if (request->fl_flags & FL_ACCESS)
goto out; goto out;
locks_copy_lock(new_fl, request); locks_copy_lock(new_fl, request);
locks_insert_lock(before, new_fl); locks_insert_lock_ctx(new_fl, &ctx->flc_flock_cnt, &ctx->flc_flock);
new_fl = NULL; new_fl = NULL;
error = 0; error = 0;
out: out:
spin_unlock(&inode->i_lock); spin_unlock(&ctx->flc_lock);
if (new_fl) if (new_fl)
locks_free_lock(new_fl); locks_free_lock(new_fl);
locks_dispose_list(&dispose); locks_dispose_list(&dispose);
...@@ -925,16 +943,20 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) ...@@ -925,16 +943,20 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock) static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
{ {
struct file_lock *fl; struct file_lock *fl, *tmp;
struct file_lock *new_fl = NULL; struct file_lock *new_fl = NULL;
struct file_lock *new_fl2 = NULL; struct file_lock *new_fl2 = NULL;
struct file_lock *left = NULL; struct file_lock *left = NULL;
struct file_lock *right = NULL; struct file_lock *right = NULL;
struct file_lock **before; struct file_lock_context *ctx;
int error; int error;
bool added = false; bool added = false;
LIST_HEAD(dispose); LIST_HEAD(dispose);
ctx = locks_get_lock_context(inode);
if (!ctx)
return -ENOMEM;
/* /*
* We may need two file_lock structures for this operation, * We may need two file_lock structures for this operation,
* so we get them in advance to avoid races. * so we get them in advance to avoid races.
...@@ -948,15 +970,14 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str ...@@ -948,15 +970,14 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
new_fl2 = locks_alloc_lock(); new_fl2 = locks_alloc_lock();
} }
spin_lock(&inode->i_lock); spin_lock(&ctx->flc_lock);
/* /*
* New lock request. Walk all POSIX locks and look for conflicts. If * New lock request. Walk all POSIX locks and look for conflicts. If
* there are any, either return error or put the request on the * there are any, either return error or put the request on the
* blocker's list of waiters and the global blocked_hash. * blocker's list of waiters and the global blocked_hash.
*/ */
if (request->fl_type != F_UNLCK) { if (request->fl_type != F_UNLCK) {
for_each_lock(inode, before) { list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
fl = *before;
if (!IS_POSIX(fl)) if (!IS_POSIX(fl))
continue; continue;
if (!posix_locks_conflict(request, fl)) if (!posix_locks_conflict(request, fl))
...@@ -986,29 +1007,25 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str ...@@ -986,29 +1007,25 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
if (request->fl_flags & FL_ACCESS) if (request->fl_flags & FL_ACCESS)
goto out; goto out;
/* /* Find the first old lock with the same owner as the new lock */
* Find the first old lock with the same owner as the new lock. list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
*/ if (posix_same_owner(request, fl))
break;
before = &inode->i_flock;
/* First skip locks owned by other processes. */
while ((fl = *before) && (!IS_POSIX(fl) ||
!posix_same_owner(request, fl))) {
before = &fl->fl_next;
} }
/* Process locks with this owner. */ /* Process locks with this owner. */
while ((fl = *before) && posix_same_owner(request, fl)) { list_for_each_entry_safe_from(fl, tmp, &ctx->flc_posix, fl_list) {
/* Detect adjacent or overlapping regions (if same lock type) if (!posix_same_owner(request, fl))
*/ break;
/* Detect adjacent or overlapping regions (if same lock type) */
if (request->fl_type == fl->fl_type) { if (request->fl_type == fl->fl_type) {
/* In all comparisons of start vs end, use /* In all comparisons of start vs end, use
* "start - 1" rather than "end + 1". If end * "start - 1" rather than "end + 1". If end
* is OFFSET_MAX, end + 1 will become negative. * is OFFSET_MAX, end + 1 will become negative.
*/ */
if (fl->fl_end < request->fl_start - 1) if (fl->fl_end < request->fl_start - 1)
goto next_lock; continue;
/* If the next lock in the list has entirely bigger /* If the next lock in the list has entirely bigger
* addresses than the new one, insert the lock here. * addresses than the new one, insert the lock here.
*/ */
...@@ -1029,18 +1046,18 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str ...@@ -1029,18 +1046,18 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
else else
request->fl_end = fl->fl_end; request->fl_end = fl->fl_end;
if (added) { if (added) {
locks_delete_lock(before, &dispose); locks_delete_lock_ctx(fl, &ctx->flc_posix_cnt,
&dispose);
continue; continue;
} }
request = fl; request = fl;
added = true; added = true;
} } else {
else {
/* Processing for different lock types is a bit /* Processing for different lock types is a bit
* more complex. * more complex.
*/ */
if (fl->fl_end < request->fl_start) if (fl->fl_end < request->fl_start)
goto next_lock; continue;
if (fl->fl_start > request->fl_end) if (fl->fl_start > request->fl_end)
break; break;
if (request->fl_type == F_UNLCK) if (request->fl_type == F_UNLCK)
...@@ -1059,7 +1076,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str ...@@ -1059,7 +1076,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
* one (This may happen several times). * one (This may happen several times).
*/ */
if (added) { if (added) {
locks_delete_lock(before, &dispose); locks_delete_lock_ctx(fl,
&ctx->flc_posix_cnt, &dispose);
continue; continue;
} }
/* /*
...@@ -1075,15 +1093,13 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str ...@@ -1075,15 +1093,13 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
locks_copy_lock(new_fl, request); locks_copy_lock(new_fl, request);
request = new_fl; request = new_fl;
new_fl = NULL; new_fl = NULL;
locks_delete_lock(before, &dispose); locks_insert_lock_ctx(request,
locks_insert_lock(before, request); &ctx->flc_posix_cnt, &fl->fl_list);
locks_delete_lock_ctx(fl,
&ctx->flc_posix_cnt, &dispose);
added = true; added = true;
} }
} }
/* Go on to next lock.
*/
next_lock:
before = &fl->fl_next;
} }
/* /*
...@@ -1108,7 +1124,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str ...@@ -1108,7 +1124,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
goto out; goto out;
} }
locks_copy_lock(new_fl, request); locks_copy_lock(new_fl, request);
locks_insert_lock(before, new_fl); locks_insert_lock_ctx(new_fl, &ctx->flc_posix_cnt,
&fl->fl_list);
new_fl = NULL; new_fl = NULL;
} }
if (right) { if (right) {
...@@ -1119,7 +1136,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str ...@@ -1119,7 +1136,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
left = new_fl2; left = new_fl2;
new_fl2 = NULL; new_fl2 = NULL;
locks_copy_lock(left, right); locks_copy_lock(left, right);
locks_insert_lock(before, left); locks_insert_lock_ctx(left, &ctx->flc_posix_cnt,
&fl->fl_list);
} }
right->fl_start = request->fl_end + 1; right->fl_start = request->fl_end + 1;
locks_wake_up_blocks(right); locks_wake_up_blocks(right);
...@@ -1129,7 +1147,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str ...@@ -1129,7 +1147,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
locks_wake_up_blocks(left); locks_wake_up_blocks(left);
} }
out: out:
spin_unlock(&inode->i_lock); spin_unlock(&ctx->flc_lock);
/* /*
* Free any unused locks. * Free any unused locks.
*/ */
...@@ -1199,22 +1217,29 @@ EXPORT_SYMBOL(posix_lock_file_wait); ...@@ -1199,22 +1217,29 @@ EXPORT_SYMBOL(posix_lock_file_wait);
*/ */
int locks_mandatory_locked(struct file *file) int locks_mandatory_locked(struct file *file)
{ {
int ret;
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
struct file_lock_context *ctx;
struct file_lock *fl; struct file_lock *fl;
ctx = inode->i_flctx;
if (!ctx || list_empty_careful(&ctx->flc_posix))
return 0;
/* /*
* Search the lock list for this inode for any POSIX locks. * Search the lock list for this inode for any POSIX locks.
*/ */
spin_lock(&inode->i_lock); spin_lock(&ctx->flc_lock);
for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { ret = 0;
if (!IS_POSIX(fl)) list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
continue;
if (fl->fl_owner != current->files && if (fl->fl_owner != current->files &&
fl->fl_owner != file) fl->fl_owner != file) {
ret = -EAGAIN;
break; break;
} }
spin_unlock(&inode->i_lock); }
return fl ? -EAGAIN : 0; spin_unlock(&ctx->flc_lock);
return ret;
} }
/** /**
...@@ -1294,9 +1319,9 @@ static void lease_clear_pending(struct file_lock *fl, int arg) ...@@ -1294,9 +1319,9 @@ static void lease_clear_pending(struct file_lock *fl, int arg)
} }
/* We already had a lease on this file; just change its type */ /* We already had a lease on this file; just change its type */
int lease_modify(struct file_lock **before, int arg, struct list_head *dispose) int lease_modify(struct file_lock *fl, int arg, struct list_head *dispose)
{ {
struct file_lock *fl = *before; struct file_lock_context *flctx;
int error = assign_type(fl, arg); int error = assign_type(fl, arg);
if (error) if (error)
...@@ -1306,6 +1331,7 @@ int lease_modify(struct file_lock **before, int arg, struct list_head *dispose) ...@@ -1306,6 +1331,7 @@ int lease_modify(struct file_lock **before, int arg, struct list_head *dispose)
if (arg == F_UNLCK) { if (arg == F_UNLCK) {
struct file *filp = fl->fl_file; struct file *filp = fl->fl_file;
flctx = file_inode(filp)->i_flctx;
f_delown(filp); f_delown(filp);
filp->f_owner.signum = 0; filp->f_owner.signum = 0;
fasync_helper(0, fl->fl_file, 0, &fl->fl_fasync); fasync_helper(0, fl->fl_file, 0, &fl->fl_fasync);
...@@ -1313,7 +1339,7 @@ int lease_modify(struct file_lock **before, int arg, struct list_head *dispose) ...@@ -1313,7 +1339,7 @@ int lease_modify(struct file_lock **before, int arg, struct list_head *dispose)
printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync); printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync);
fl->fl_fasync = NULL; fl->fl_fasync = NULL;
} }
locks_delete_lock(before, dispose); locks_delete_lock_ctx(fl, &flctx->flc_lease_cnt, dispose);
} }
return 0; return 0;
} }
...@@ -1329,20 +1355,17 @@ static bool past_time(unsigned long then) ...@@ -1329,20 +1355,17 @@ static bool past_time(unsigned long then)
static void time_out_leases(struct inode *inode, struct list_head *dispose) static void time_out_leases(struct inode *inode, struct list_head *dispose)
{ {
struct file_lock **before; struct file_lock_context *ctx = inode->i_flctx;
struct file_lock *fl; struct file_lock *fl, *tmp;
lockdep_assert_held(&inode->i_lock); lockdep_assert_held(&ctx->flc_lock);
before = &inode->i_flock; list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) {
while ((fl = *before) && IS_LEASE(fl) && lease_breaking(fl)) {
trace_time_out_leases(inode, fl); trace_time_out_leases(inode, fl);
if (past_time(fl->fl_downgrade_time)) if (past_time(fl->fl_downgrade_time))
lease_modify(before, F_RDLCK, dispose); lease_modify(fl, F_RDLCK, dispose);
if (past_time(fl->fl_break_time)) if (past_time(fl->fl_break_time))
lease_modify(before, F_UNLCK, dispose); lease_modify(fl, F_UNLCK, dispose);
if (fl == *before) /* lease_modify may have freed fl */
before = &fl->fl_next;
} }
} }
...@@ -1356,11 +1379,12 @@ static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker) ...@@ -1356,11 +1379,12 @@ static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker)
static bool static bool
any_leases_conflict(struct inode *inode, struct file_lock *breaker) any_leases_conflict(struct inode *inode, struct file_lock *breaker)
{ {
struct file_lock_context *ctx = inode->i_flctx;
struct file_lock *fl; struct file_lock *fl;
lockdep_assert_held(&inode->i_lock); lockdep_assert_held(&ctx->flc_lock);
for (fl = inode->i_flock ; fl && IS_LEASE(fl); fl = fl->fl_next) { list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
if (leases_conflict(fl, breaker)) if (leases_conflict(fl, breaker))
return true; return true;
} }
...@@ -1384,7 +1408,8 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) ...@@ -1384,7 +1408,8 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
{ {
int error = 0; int error = 0;
struct file_lock *new_fl; struct file_lock *new_fl;
struct file_lock *fl, **before; struct file_lock_context *ctx = inode->i_flctx;
struct file_lock *fl;
unsigned long break_time; unsigned long break_time;
int want_write = (mode & O_ACCMODE) != O_RDONLY; int want_write = (mode & O_ACCMODE) != O_RDONLY;
LIST_HEAD(dispose); LIST_HEAD(dispose);
...@@ -1394,7 +1419,13 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) ...@@ -1394,7 +1419,13 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
return PTR_ERR(new_fl); return PTR_ERR(new_fl);
new_fl->fl_flags = type; new_fl->fl_flags = type;
spin_lock(&inode->i_lock); /* typically we will check that ctx is non-NULL before calling */
if (!ctx) {
WARN_ON_ONCE(1);
return error;
}
spin_lock(&ctx->flc_lock);
time_out_leases(inode, &dispose); time_out_leases(inode, &dispose);
...@@ -1408,9 +1439,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) ...@@ -1408,9 +1439,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
break_time++; /* so that 0 means no break time */ break_time++; /* so that 0 means no break time */
} }
for (before = &inode->i_flock; list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
((fl = *before) != NULL) && IS_LEASE(fl);
before = &fl->fl_next) {
if (!leases_conflict(fl, new_fl)) if (!leases_conflict(fl, new_fl))
continue; continue;
if (want_write) { if (want_write) {
...@@ -1419,17 +1448,17 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) ...@@ -1419,17 +1448,17 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
fl->fl_flags |= FL_UNLOCK_PENDING; fl->fl_flags |= FL_UNLOCK_PENDING;
fl->fl_break_time = break_time; fl->fl_break_time = break_time;
} else { } else {
if (lease_breaking(inode->i_flock)) if (lease_breaking(fl))
continue; continue;
fl->fl_flags |= FL_DOWNGRADE_PENDING; fl->fl_flags |= FL_DOWNGRADE_PENDING;
fl->fl_downgrade_time = break_time; fl->fl_downgrade_time = break_time;
} }
if (fl->fl_lmops->lm_break(fl)) if (fl->fl_lmops->lm_break(fl))
locks_delete_lock(before, &dispose); locks_delete_lock_ctx(fl, &ctx->flc_lease_cnt,
&dispose);
} }
fl = inode->i_flock; if (list_empty(&ctx->flc_lease))
if (!fl || !IS_LEASE(fl))
goto out; goto out;
if (mode & O_NONBLOCK) { if (mode & O_NONBLOCK) {
...@@ -1439,18 +1468,19 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) ...@@ -1439,18 +1468,19 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
} }
restart: restart:
break_time = inode->i_flock->fl_break_time; fl = list_first_entry(&ctx->flc_lease, struct file_lock, fl_list);
break_time = fl->fl_break_time;
if (break_time != 0) if (break_time != 0)
break_time -= jiffies; break_time -= jiffies;
if (break_time == 0) if (break_time == 0)
break_time++; break_time++;
locks_insert_block(inode->i_flock, new_fl); locks_insert_block(fl, new_fl);
trace_break_lease_block(inode, new_fl); trace_break_lease_block(inode, new_fl);
spin_unlock(&inode->i_lock); spin_unlock(&ctx->flc_lock);
locks_dispose_list(&dispose); locks_dispose_list(&dispose);
error = wait_event_interruptible_timeout(new_fl->fl_wait, error = wait_event_interruptible_timeout(new_fl->fl_wait,
!new_fl->fl_next, break_time); !new_fl->fl_next, break_time);
spin_lock(&inode->i_lock); spin_lock(&ctx->flc_lock);
trace_break_lease_unblock(inode, new_fl); trace_break_lease_unblock(inode, new_fl);
locks_delete_block(new_fl); locks_delete_block(new_fl);
if (error >= 0) { if (error >= 0) {
...@@ -1462,12 +1492,10 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) ...@@ -1462,12 +1492,10 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
time_out_leases(inode, &dispose); time_out_leases(inode, &dispose);
if (any_leases_conflict(inode, new_fl)) if (any_leases_conflict(inode, new_fl))
goto restart; goto restart;
error = 0; error = 0;
} }
out: out:
spin_unlock(&inode->i_lock); spin_unlock(&ctx->flc_lock);
locks_dispose_list(&dispose); locks_dispose_list(&dispose);
locks_free_lock(new_fl); locks_free_lock(new_fl);
return error; return error;
...@@ -1487,14 +1515,18 @@ EXPORT_SYMBOL(__break_lease); ...@@ -1487,14 +1515,18 @@ EXPORT_SYMBOL(__break_lease);
void lease_get_mtime(struct inode *inode, struct timespec *time) void lease_get_mtime(struct inode *inode, struct timespec *time)
{ {
bool has_lease = false; bool has_lease = false;
struct file_lock *flock; struct file_lock_context *ctx = inode->i_flctx;
struct file_lock *fl;
if (inode->i_flock) { if (ctx && !list_empty_careful(&ctx->flc_lease)) {
spin_lock(&inode->i_lock); spin_lock(&ctx->flc_lock);
flock = inode->i_flock; if (!list_empty(&ctx->flc_lease)) {
if (flock && IS_LEASE(flock) && (flock->fl_type == F_WRLCK)) fl = list_first_entry(&ctx->flc_lease,
struct file_lock, fl_list);
if (fl->fl_type == F_WRLCK)
has_lease = true; has_lease = true;
spin_unlock(&inode->i_lock); }
spin_unlock(&ctx->flc_lock);
} }
if (has_lease) if (has_lease)
...@@ -1532,20 +1564,22 @@ int fcntl_getlease(struct file *filp) ...@@ -1532,20 +1564,22 @@ int fcntl_getlease(struct file *filp)
{ {
struct file_lock *fl; struct file_lock *fl;
struct inode *inode = file_inode(filp); struct inode *inode = file_inode(filp);
struct file_lock_context *ctx = inode->i_flctx;
int type = F_UNLCK; int type = F_UNLCK;
LIST_HEAD(dispose); LIST_HEAD(dispose);
spin_lock(&inode->i_lock); if (ctx && !list_empty_careful(&ctx->flc_lease)) {
spin_lock(&ctx->flc_lock);
time_out_leases(file_inode(filp), &dispose); time_out_leases(file_inode(filp), &dispose);
for (fl = file_inode(filp)->i_flock; fl && IS_LEASE(fl); list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
fl = fl->fl_next) { if (fl->fl_file != filp)
if (fl->fl_file == filp) { continue;
type = target_leasetype(fl); type = target_leasetype(fl);
break; break;
} }
} spin_unlock(&ctx->flc_lock);
spin_unlock(&inode->i_lock);
locks_dispose_list(&dispose); locks_dispose_list(&dispose);
}
return type; return type;
} }
...@@ -1578,9 +1612,10 @@ check_conflicting_open(const struct dentry *dentry, const long arg) ...@@ -1578,9 +1612,10 @@ check_conflicting_open(const struct dentry *dentry, const long arg)
static int static int
generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv) generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv)
{ {
struct file_lock *fl, **before, **my_before = NULL, *lease; struct file_lock *fl, *my_fl = NULL, *lease;
struct dentry *dentry = filp->f_path.dentry; struct dentry *dentry = filp->f_path.dentry;
struct inode *inode = dentry->d_inode; struct inode *inode = dentry->d_inode;
struct file_lock_context *ctx;
bool is_deleg = (*flp)->fl_flags & FL_DELEG; bool is_deleg = (*flp)->fl_flags & FL_DELEG;
int error; int error;
LIST_HEAD(dispose); LIST_HEAD(dispose);
...@@ -1588,6 +1623,10 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr ...@@ -1588,6 +1623,10 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
lease = *flp; lease = *flp;
trace_generic_add_lease(inode, lease); trace_generic_add_lease(inode, lease);
ctx = locks_get_lock_context(inode);
if (!ctx)
return -ENOMEM;
/* /*
* In the delegation case we need mutual exclusion with * In the delegation case we need mutual exclusion with
* a number of operations that take the i_mutex. We trylock * a number of operations that take the i_mutex. We trylock
...@@ -1606,7 +1645,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr ...@@ -1606,7 +1645,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
return -EINVAL; return -EINVAL;
} }
spin_lock(&inode->i_lock); spin_lock(&ctx->flc_lock);
time_out_leases(inode, &dispose); time_out_leases(inode, &dispose);
error = check_conflicting_open(dentry, arg); error = check_conflicting_open(dentry, arg);
if (error) if (error)
...@@ -1621,13 +1660,12 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr ...@@ -1621,13 +1660,12 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
* except for this filp. * except for this filp.
*/ */
error = -EAGAIN; error = -EAGAIN;
for (before = &inode->i_flock; list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
((fl = *before) != NULL) && IS_LEASE(fl);
before = &fl->fl_next) {
if (fl->fl_file == filp) { if (fl->fl_file == filp) {
my_before = before; my_fl = fl;
continue; continue;
} }
/* /*
* No exclusive leases if someone else has a lease on * No exclusive leases if someone else has a lease on
* this file: * this file:
...@@ -1642,9 +1680,8 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr ...@@ -1642,9 +1680,8 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
goto out; goto out;
} }
if (my_before != NULL) { if (my_fl != NULL) {
lease = *my_before; error = lease->fl_lmops->lm_change(my_fl, arg, &dispose);
error = lease->fl_lmops->lm_change(my_before, arg, &dispose);
if (error) if (error)
goto out; goto out;
goto out_setup; goto out_setup;
...@@ -1654,7 +1691,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr ...@@ -1654,7 +1691,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
if (!leases_enable) if (!leases_enable)
goto out; goto out;
locks_insert_lock(before, lease); locks_insert_lock_ctx(lease, &ctx->flc_lease_cnt, &ctx->flc_lease);
/* /*
* The check in break_lease() is lockless. It's possible for another * The check in break_lease() is lockless. It's possible for another
* open to race in after we did the earlier check for a conflicting * open to race in after we did the earlier check for a conflicting
...@@ -1666,45 +1703,49 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr ...@@ -1666,45 +1703,49 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
*/ */
smp_mb(); smp_mb();
error = check_conflicting_open(dentry, arg); error = check_conflicting_open(dentry, arg);
if (error) if (error) {
goto out_unlink; locks_unlink_lock_ctx(lease, &ctx->flc_lease_cnt);
goto out;
}
out_setup: out_setup:
if (lease->fl_lmops->lm_setup) if (lease->fl_lmops->lm_setup)
lease->fl_lmops->lm_setup(lease, priv); lease->fl_lmops->lm_setup(lease, priv);
out: out:
spin_unlock(&inode->i_lock); spin_unlock(&ctx->flc_lock);
locks_dispose_list(&dispose); locks_dispose_list(&dispose);
if (is_deleg) if (is_deleg)
mutex_unlock(&inode->i_mutex); mutex_unlock(&inode->i_mutex);
if (!error && !my_before) if (!error && !my_fl)
*flp = NULL; *flp = NULL;
return error; return error;
out_unlink:
locks_unlink_lock(before);
goto out;
} }
static int generic_delete_lease(struct file *filp) static int generic_delete_lease(struct file *filp)
{ {
int error = -EAGAIN; int error = -EAGAIN;
struct file_lock *fl, **before; struct file_lock *fl, *victim = NULL;
struct dentry *dentry = filp->f_path.dentry; struct dentry *dentry = filp->f_path.dentry;
struct inode *inode = dentry->d_inode; struct inode *inode = dentry->d_inode;
struct file_lock_context *ctx = inode->i_flctx;
LIST_HEAD(dispose); LIST_HEAD(dispose);
spin_lock(&inode->i_lock); if (!ctx) {
time_out_leases(inode, &dispose); trace_generic_delete_lease(inode, NULL);
for (before = &inode->i_flock; return error;
((fl = *before) != NULL) && IS_LEASE(fl); }
before = &fl->fl_next) {
if (fl->fl_file == filp) spin_lock(&ctx->flc_lock);
list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
if (fl->fl_file == filp) {
victim = fl;
break; break;
} }
}
trace_generic_delete_lease(inode, fl); trace_generic_delete_lease(inode, fl);
if (fl && IS_LEASE(fl)) if (victim)
error = fl->fl_lmops->lm_change(before, F_UNLCK, &dispose); error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose);
spin_unlock(&inode->i_lock); spin_unlock(&ctx->flc_lock);
locks_dispose_list(&dispose); locks_dispose_list(&dispose);
return error; return error;
} }
...@@ -2171,7 +2212,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, ...@@ -2171,7 +2212,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
*/ */
/* /*
* we need that spin_lock here - it prevents reordering between * we need that spin_lock here - it prevents reordering between
* update of inode->i_flock and check for it done in close(). * update of i_flctx->flc_posix and check for it done in close().
* rcu_read_lock() wouldn't do. * rcu_read_lock() wouldn't do.
*/ */
spin_lock(&current->files->file_lock); spin_lock(&current->files->file_lock);
...@@ -2331,13 +2372,14 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, ...@@ -2331,13 +2372,14 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
void locks_remove_posix(struct file *filp, fl_owner_t owner) void locks_remove_posix(struct file *filp, fl_owner_t owner)
{ {
struct file_lock lock; struct file_lock lock;
struct file_lock_context *ctx = file_inode(filp)->i_flctx;
/* /*
* If there are no locks held on this file, we don't need to call * If there are no locks held on this file, we don't need to call
* posix_lock_file(). Another process could be setting a lock on this * posix_lock_file(). Another process could be setting a lock on this
* file at the same time, but we wouldn't remove that lock anyway. * file at the same time, but we wouldn't remove that lock anyway.
*/ */
if (!file_inode(filp)->i_flock) if (!ctx || list_empty(&ctx->flc_posix))
return; return;
lock.fl_type = F_UNLCK; lock.fl_type = F_UNLCK;
...@@ -2358,22 +2400,10 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner) ...@@ -2358,22 +2400,10 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
EXPORT_SYMBOL(locks_remove_posix); EXPORT_SYMBOL(locks_remove_posix);
/* /* The i_flctx must be valid when calling into here */
* This function is called on the last close of an open file. static void
*/ locks_remove_flock(struct file *filp)
void locks_remove_file(struct file *filp)
{ {
struct inode * inode = file_inode(filp);
struct file_lock *fl;
struct file_lock **before;
LIST_HEAD(dispose);
if (!inode->i_flock)
return;
locks_remove_posix(filp, filp);
if (filp->f_op->flock) {
struct file_lock fl = { struct file_lock fl = {
.fl_owner = filp, .fl_owner = filp,
.fl_pid = current->tgid, .fl_pid = current->tgid,
...@@ -2382,45 +2412,57 @@ void locks_remove_file(struct file *filp) ...@@ -2382,45 +2412,57 @@ void locks_remove_file(struct file *filp)
.fl_type = F_UNLCK, .fl_type = F_UNLCK,
.fl_end = OFFSET_MAX, .fl_end = OFFSET_MAX,
}; };
struct file_lock_context *flctx = file_inode(filp)->i_flctx;
if (list_empty(&flctx->flc_flock))
return;
if (filp->f_op->flock)
filp->f_op->flock(filp, F_SETLKW, &fl); filp->f_op->flock(filp, F_SETLKW, &fl);
else
flock_lock_file(filp, &fl);
if (fl.fl_ops && fl.fl_ops->fl_release_private) if (fl.fl_ops && fl.fl_ops->fl_release_private)
fl.fl_ops->fl_release_private(&fl); fl.fl_ops->fl_release_private(&fl);
} }
spin_lock(&inode->i_lock);
before = &inode->i_flock;
while ((fl = *before) != NULL) { /* The i_flctx must be valid when calling into here */
if (fl->fl_file == filp) { static void
if (IS_LEASE(fl)) { locks_remove_lease(struct file *filp)
lease_modify(before, F_UNLCK, &dispose); {
continue; struct inode *inode = file_inode(filp);
} struct file_lock_context *ctx = inode->i_flctx;
struct file_lock *fl, *tmp;
LIST_HEAD(dispose);
/* if (list_empty(&ctx->flc_lease))
* There's a leftover lock on the list of a type that return;
* we didn't expect to see. Most likely a classic
* POSIX lock that ended up not getting released
* properly, or that raced onto the list somehow. Log
* some info about it and then just remove it from
* the list.
*/
WARN(!IS_FLOCK(fl),
"leftover lock: dev=%u:%u ino=%lu type=%hhd flags=0x%x start=%lld end=%lld\n",
MAJOR(inode->i_sb->s_dev),
MINOR(inode->i_sb->s_dev), inode->i_ino,
fl->fl_type, fl->fl_flags,
fl->fl_start, fl->fl_end);
locks_delete_lock(before, &dispose); spin_lock(&ctx->flc_lock);
continue; list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list)
} lease_modify(fl, F_UNLCK, &dispose);
before = &fl->fl_next; spin_unlock(&ctx->flc_lock);
}
spin_unlock(&inode->i_lock);
locks_dispose_list(&dispose); locks_dispose_list(&dispose);
} }
/*
* This function is called on the last close of an open file.
*/
void locks_remove_file(struct file *filp)
{
if (!file_inode(filp)->i_flctx)
return;
/* remove any OFD locks */
locks_remove_posix(filp, filp);
/* remove flock locks */
locks_remove_flock(filp);
/* remove any leases */
locks_remove_lease(filp);
}
/** /**
* posix_unblock_lock - stop waiting for a file lock * posix_unblock_lock - stop waiting for a file lock
* @waiter: the lock which was waiting * @waiter: the lock which was waiting
...@@ -2621,6 +2663,9 @@ static int __init filelock_init(void) ...@@ -2621,6 +2663,9 @@ static int __init filelock_init(void)
{ {
int i; int i;
flctx_cache = kmem_cache_create("file_lock_ctx",
sizeof(struct file_lock_context), 0, SLAB_PANIC, NULL);
filelock_cache = kmem_cache_create("file_lock_cache", filelock_cache = kmem_cache_create("file_lock_cache",
sizeof(struct file_lock), 0, SLAB_PANIC, NULL); sizeof(struct file_lock), 0, SLAB_PANIC, NULL);
......
...@@ -85,25 +85,30 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ ...@@ -85,25 +85,30 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
{ {
struct inode *inode = state->inode; struct inode *inode = state->inode;
struct file_lock *fl; struct file_lock *fl;
struct file_lock_context *flctx = inode->i_flctx;
struct list_head *list;
int status = 0; int status = 0;
if (inode->i_flock == NULL) if (flctx == NULL)
goto out; goto out;
/* Protect inode->i_flock using the i_lock */ list = &flctx->flc_posix;
spin_lock(&inode->i_lock); spin_lock(&flctx->flc_lock);
for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { restart:
if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) list_for_each_entry(fl, list, fl_list) {
continue;
if (nfs_file_open_context(fl->fl_file) != ctx) if (nfs_file_open_context(fl->fl_file) != ctx)
continue; continue;
spin_unlock(&inode->i_lock); spin_unlock(&flctx->flc_lock);
status = nfs4_lock_delegation_recall(fl, state, stateid); status = nfs4_lock_delegation_recall(fl, state, stateid);
if (status < 0) if (status < 0)
goto out; goto out;
spin_lock(&inode->i_lock); spin_lock(&flctx->flc_lock);
} }
spin_unlock(&inode->i_lock); if (list == &flctx->flc_posix) {
list = &flctx->flc_flock;
goto restart;
}
spin_unlock(&flctx->flc_lock);
out: out:
return status; return status;
} }
......
...@@ -1366,20 +1366,22 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ ...@@ -1366,20 +1366,22 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
struct nfs_inode *nfsi = NFS_I(inode); struct nfs_inode *nfsi = NFS_I(inode);
struct file_lock *fl; struct file_lock *fl;
int status = 0; int status = 0;
struct file_lock_context *flctx = inode->i_flctx;
struct list_head *list;
if (inode->i_flock == NULL) if (flctx == NULL)
return 0; return 0;
list = &flctx->flc_posix;
/* Guard against delegation returns and new lock/unlock calls */ /* Guard against delegation returns and new lock/unlock calls */
down_write(&nfsi->rwsem); down_write(&nfsi->rwsem);
/* Protect inode->i_flock using the BKL */ spin_lock(&flctx->flc_lock);
spin_lock(&inode->i_lock); restart:
for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { list_for_each_entry(fl, list, fl_list) {
if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
continue;
if (nfs_file_open_context(fl->fl_file)->state != state) if (nfs_file_open_context(fl->fl_file)->state != state)
continue; continue;
spin_unlock(&inode->i_lock); spin_unlock(&flctx->flc_lock);
status = ops->recover_lock(state, fl); status = ops->recover_lock(state, fl);
switch (status) { switch (status) {
case 0: case 0:
...@@ -1397,7 +1399,7 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ ...@@ -1397,7 +1399,7 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
goto out; goto out;
default: default:
printk(KERN_ERR "NFS: %s: unhandled error %d\n", pr_err("NFS: %s: unhandled error %d\n",
__func__, status); __func__, status);
case -ENOMEM: case -ENOMEM:
case -NFS4ERR_DENIED: case -NFS4ERR_DENIED:
...@@ -1406,9 +1408,13 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ ...@@ -1406,9 +1408,13 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
/* kill_proc(fl->fl_pid, SIGLOST, 1); */ /* kill_proc(fl->fl_pid, SIGLOST, 1); */
status = 0; status = 0;
} }
spin_lock(&inode->i_lock); spin_lock(&flctx->flc_lock);
} }
spin_unlock(&inode->i_lock); if (list == &flctx->flc_posix) {
list = &flctx->flc_flock;
goto restart;
}
spin_unlock(&flctx->flc_lock);
out: out:
up_write(&nfsi->rwsem); up_write(&nfsi->rwsem);
return status; return status;
......
...@@ -826,11 +826,15 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, ...@@ -826,11 +826,15 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
struct nfs_pageio_descriptor *pgio) struct nfs_pageio_descriptor *pgio)
{ {
size_t size; size_t size;
struct file_lock_context *flctx;
if (prev) { if (prev) {
if (!nfs_match_open_context(req->wb_context, prev->wb_context)) if (!nfs_match_open_context(req->wb_context, prev->wb_context))
return false; return false;
if (req->wb_context->dentry->d_inode->i_flock != NULL && flctx = req->wb_context->dentry->d_inode->i_flctx;
if (flctx != NULL &&
!(list_empty_careful(&flctx->flc_posix) &&
list_empty_careful(&flctx->flc_flock)) &&
!nfs_match_lock_context(req->wb_lock_context, !nfs_match_lock_context(req->wb_lock_context,
prev->wb_lock_context)) prev->wb_lock_context))
return false; return false;
......
...@@ -1091,6 +1091,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page) ...@@ -1091,6 +1091,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
{ {
struct nfs_open_context *ctx = nfs_file_open_context(file); struct nfs_open_context *ctx = nfs_file_open_context(file);
struct nfs_lock_context *l_ctx; struct nfs_lock_context *l_ctx;
struct file_lock_context *flctx = file_inode(file)->i_flctx;
struct nfs_page *req; struct nfs_page *req;
int do_flush, status; int do_flush, status;
/* /*
...@@ -1109,7 +1110,9 @@ int nfs_flush_incompatible(struct file *file, struct page *page) ...@@ -1109,7 +1110,9 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
do_flush = req->wb_page != page || req->wb_context != ctx; do_flush = req->wb_page != page || req->wb_context != ctx;
/* for now, flush if more than 1 request in page_group */ /* for now, flush if more than 1 request in page_group */
do_flush |= req->wb_this_page != req; do_flush |= req->wb_this_page != req;
if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) { if (l_ctx && flctx &&
!(list_empty_careful(&flctx->flc_posix) &&
list_empty_careful(&flctx->flc_flock))) {
do_flush |= l_ctx->lockowner.l_owner != current->files do_flush |= l_ctx->lockowner.l_owner != current->files
|| l_ctx->lockowner.l_pid != current->tgid; || l_ctx->lockowner.l_pid != current->tgid;
} }
...@@ -1170,6 +1173,13 @@ static bool nfs_write_pageuptodate(struct page *page, struct inode *inode) ...@@ -1170,6 +1173,13 @@ static bool nfs_write_pageuptodate(struct page *page, struct inode *inode)
return PageUptodate(page) != 0; return PageUptodate(page) != 0;
} }
static bool
is_whole_file_wrlock(struct file_lock *fl)
{
return fl->fl_start == 0 && fl->fl_end == OFFSET_MAX &&
fl->fl_type == F_WRLCK;
}
/* If we know the page is up to date, and we're not using byte range locks (or /* If we know the page is up to date, and we're not using byte range locks (or
* if we have the whole file locked for writing), it may be more efficient to * if we have the whole file locked for writing), it may be more efficient to
* extend the write to cover the entire page in order to avoid fragmentation * extend the write to cover the entire page in order to avoid fragmentation
...@@ -1180,17 +1190,36 @@ static bool nfs_write_pageuptodate(struct page *page, struct inode *inode) ...@@ -1180,17 +1190,36 @@ static bool nfs_write_pageuptodate(struct page *page, struct inode *inode)
*/ */
static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode) static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode)
{ {
int ret;
struct file_lock_context *flctx = inode->i_flctx;
struct file_lock *fl;
if (file->f_flags & O_DSYNC) if (file->f_flags & O_DSYNC)
return 0; return 0;
if (!nfs_write_pageuptodate(page, inode)) if (!nfs_write_pageuptodate(page, inode))
return 0; return 0;
if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
return 1; return 1;
if (inode->i_flock == NULL || (inode->i_flock->fl_start == 0 && if (!flctx || (list_empty_careful(&flctx->flc_flock) &&
inode->i_flock->fl_end == OFFSET_MAX && list_empty_careful(&flctx->flc_posix)))
inode->i_flock->fl_type != F_RDLCK))
return 1;
return 0; return 0;
/* Check to see if there are whole file write locks */
ret = 0;
spin_lock(&flctx->flc_lock);
if (!list_empty(&flctx->flc_posix)) {
fl = list_first_entry(&flctx->flc_posix, struct file_lock,
fl_list);
if (is_whole_file_wrlock(fl))
ret = 1;
} else if (!list_empty(&flctx->flc_flock)) {
fl = list_first_entry(&flctx->flc_flock, struct file_lock,
fl_list);
if (fl->fl_type == F_WRLCK)
ret = 1;
}
spin_unlock(&flctx->flc_lock);
return ret;
} }
/* /*
......
...@@ -3477,7 +3477,8 @@ nfsd_break_deleg_cb(struct file_lock *fl) ...@@ -3477,7 +3477,8 @@ nfsd_break_deleg_cb(struct file_lock *fl)
} }
static int static int
nfsd_change_deleg_cb(struct file_lock **onlist, int arg, struct list_head *dispose) nfsd_change_deleg_cb(struct file_lock *onlist, int arg,
struct list_head *dispose)
{ {
if (arg & F_UNLCK) if (arg & F_UNLCK)
return lease_modify(onlist, arg, dispose); return lease_modify(onlist, arg, dispose);
...@@ -5556,10 +5557,11 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, ...@@ -5556,10 +5557,11 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
static bool static bool
check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
{ {
struct file_lock **flpp; struct file_lock *fl;
int status = false; int status = false;
struct file *filp = find_any_file(fp); struct file *filp = find_any_file(fp);
struct inode *inode; struct inode *inode;
struct file_lock_context *flctx;
if (!filp) { if (!filp) {
/* Any valid lock stateid should have some sort of access */ /* Any valid lock stateid should have some sort of access */
...@@ -5568,15 +5570,18 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) ...@@ -5568,15 +5570,18 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
} }
inode = file_inode(filp); inode = file_inode(filp);
flctx = inode->i_flctx;
spin_lock(&inode->i_lock); if (flctx && !list_empty_careful(&flctx->flc_posix)) {
for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) { spin_lock(&flctx->flc_lock);
if ((*flpp)->fl_owner == (fl_owner_t)lowner) { list_for_each_entry(fl, &flctx->flc_posix, fl_list) {
if (fl->fl_owner == (fl_owner_t)lowner) {
status = true; status = true;
break; break;
} }
} }
spin_unlock(&inode->i_lock); spin_unlock(&flctx->flc_lock);
}
fput(filp); fput(filp);
return status; return status;
} }
......
...@@ -358,7 +358,7 @@ int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t ...@@ -358,7 +358,7 @@ int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t
return retval; return retval;
} }
if (unlikely(inode->i_flock && mandatory_lock(inode))) { if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
retval = locks_mandatory_area( retval = locks_mandatory_area(
read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
inode, file, pos, count); inode, file, pos, count);
......
...@@ -625,7 +625,7 @@ struct inode { ...@@ -625,7 +625,7 @@ struct inode {
atomic_t i_readcount; /* struct files open RO */ atomic_t i_readcount; /* struct files open RO */
#endif #endif
const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
struct file_lock *i_flock; struct file_lock_context *i_flctx;
struct address_space i_data; struct address_space i_data;
struct list_head i_devices; struct list_head i_devices;
union { union {
...@@ -885,6 +885,8 @@ static inline struct file *get_file(struct file *f) ...@@ -885,6 +885,8 @@ static inline struct file *get_file(struct file *f)
/* legacy typedef, should eventually be removed */ /* legacy typedef, should eventually be removed */
typedef void *fl_owner_t; typedef void *fl_owner_t;
struct file_lock;
struct file_lock_operations { struct file_lock_operations {
void (*fl_copy_lock)(struct file_lock *, struct file_lock *); void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
void (*fl_release_private)(struct file_lock *); void (*fl_release_private)(struct file_lock *);
...@@ -898,7 +900,7 @@ struct lock_manager_operations { ...@@ -898,7 +900,7 @@ struct lock_manager_operations {
void (*lm_notify)(struct file_lock *); /* unblock callback */ void (*lm_notify)(struct file_lock *); /* unblock callback */
int (*lm_grant)(struct file_lock *, int); int (*lm_grant)(struct file_lock *, int);
bool (*lm_break)(struct file_lock *); bool (*lm_break)(struct file_lock *);
int (*lm_change)(struct file_lock **, int, struct list_head *); int (*lm_change)(struct file_lock *, int, struct list_head *);
void (*lm_setup)(struct file_lock *, void **); void (*lm_setup)(struct file_lock *, void **);
}; };
...@@ -923,17 +925,17 @@ int locks_in_grace(struct net *); ...@@ -923,17 +925,17 @@ int locks_in_grace(struct net *);
* FIXME: should we create a separate "struct lock_request" to help distinguish * FIXME: should we create a separate "struct lock_request" to help distinguish
* these two uses? * these two uses?
* *
* The i_flock list is ordered by: * The varous i_flctx lists are ordered by:
* *
* 1) lock type -- FL_LEASEs first, then FL_FLOCK, and finally FL_POSIX * 1) lock owner
* 2) lock owner * 2) lock range start
* 3) lock range start * 3) lock range end
* 4) lock range end
* *
* Obviously, the last two criteria only matter for POSIX locks. * Obviously, the last two criteria only matter for POSIX locks.
*/ */
struct file_lock { struct file_lock {
struct file_lock *fl_next; /* singly linked list for this inode */ struct file_lock *fl_next; /* singly linked list for this inode */
struct list_head fl_list; /* link into file_lock_context */
struct hlist_node fl_link; /* node in global lists */ struct hlist_node fl_link; /* node in global lists */
struct list_head fl_block; /* circular list of blocked processes */ struct list_head fl_block; /* circular list of blocked processes */
fl_owner_t fl_owner; fl_owner_t fl_owner;
...@@ -964,6 +966,16 @@ struct file_lock { ...@@ -964,6 +966,16 @@ struct file_lock {
} fl_u; } fl_u;
}; };
struct file_lock_context {
spinlock_t flc_lock;
struct list_head flc_flock;
struct list_head flc_posix;
struct list_head flc_lease;
int flc_flock_cnt;
int flc_posix_cnt;
int flc_lease_cnt;
};
/* The following constant reflects the upper bound of the file/locking space */ /* The following constant reflects the upper bound of the file/locking space */
#ifndef OFFSET_MAX #ifndef OFFSET_MAX
#define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1))) #define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1)))
...@@ -990,6 +1002,7 @@ extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg); ...@@ -990,6 +1002,7 @@ extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
extern int fcntl_getlease(struct file *filp); extern int fcntl_getlease(struct file *filp);
/* fs/locks.c */ /* fs/locks.c */
void locks_free_lock_context(struct file_lock_context *ctx);
void locks_free_lock(struct file_lock *fl); void locks_free_lock(struct file_lock *fl);
extern void locks_init_lock(struct file_lock *); extern void locks_init_lock(struct file_lock *);
extern struct file_lock * locks_alloc_lock(void); extern struct file_lock * locks_alloc_lock(void);
...@@ -1010,7 +1023,7 @@ extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int t ...@@ -1010,7 +1023,7 @@ extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int t
extern void lease_get_mtime(struct inode *, struct timespec *time); extern void lease_get_mtime(struct inode *, struct timespec *time);
extern int generic_setlease(struct file *, long, struct file_lock **, void **priv); extern int generic_setlease(struct file *, long, struct file_lock **, void **priv);
extern int vfs_setlease(struct file *, long, struct file_lock **, void **); extern int vfs_setlease(struct file *, long, struct file_lock **, void **);
extern int lease_modify(struct file_lock **, int, struct list_head *); extern int lease_modify(struct file_lock *, int, struct list_head *);
#else /* !CONFIG_FILE_LOCKING */ #else /* !CONFIG_FILE_LOCKING */
static inline int fcntl_getlk(struct file *file, unsigned int cmd, static inline int fcntl_getlk(struct file *file, unsigned int cmd,
struct flock __user *user) struct flock __user *user)
...@@ -1047,6 +1060,11 @@ static inline int fcntl_getlease(struct file *filp) ...@@ -1047,6 +1060,11 @@ static inline int fcntl_getlease(struct file *filp)
return F_UNLCK; return F_UNLCK;
} }
static inline void
locks_free_lock_context(struct file_lock_context *ctx)
{
}
static inline void locks_init_lock(struct file_lock *fl) static inline void locks_init_lock(struct file_lock *fl)
{ {
return; return;
...@@ -1137,7 +1155,7 @@ static inline int vfs_setlease(struct file *filp, long arg, ...@@ -1137,7 +1155,7 @@ static inline int vfs_setlease(struct file *filp, long arg,
return -EINVAL; return -EINVAL;
} }
static inline int lease_modify(struct file_lock **before, int arg, static inline int lease_modify(struct file_lock *fl, int arg,
struct list_head *dispose) struct list_head *dispose)
{ {
return -EINVAL; return -EINVAL;
...@@ -1959,7 +1977,7 @@ static inline int locks_verify_truncate(struct inode *inode, ...@@ -1959,7 +1977,7 @@ static inline int locks_verify_truncate(struct inode *inode,
struct file *filp, struct file *filp,
loff_t size) loff_t size)
{ {
if (inode->i_flock && mandatory_lock(inode)) if (inode->i_flctx && mandatory_lock(inode))
return locks_mandatory_area( return locks_mandatory_area(
FLOCK_VERIFY_WRITE, inode, filp, FLOCK_VERIFY_WRITE, inode, filp,
size < inode->i_size ? size : inode->i_size, size < inode->i_size ? size : inode->i_size,
...@@ -1973,11 +1991,12 @@ static inline int break_lease(struct inode *inode, unsigned int mode) ...@@ -1973,11 +1991,12 @@ static inline int break_lease(struct inode *inode, unsigned int mode)
{ {
/* /*
* Since this check is lockless, we must ensure that any refcounts * Since this check is lockless, we must ensure that any refcounts
* taken are done before checking inode->i_flock. Otherwise, we could * taken are done before checking i_flctx->flc_lease. Otherwise, we
* end up racing with tasks trying to set a new lease on this file. * could end up racing with tasks trying to set a new lease on this
* file.
*/ */
smp_mb(); smp_mb();
if (inode->i_flock) if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease))
return __break_lease(inode, mode, FL_LEASE); return __break_lease(inode, mode, FL_LEASE);
return 0; return 0;
} }
...@@ -1986,11 +2005,12 @@ static inline int break_deleg(struct inode *inode, unsigned int mode) ...@@ -1986,11 +2005,12 @@ static inline int break_deleg(struct inode *inode, unsigned int mode)
{ {
/* /*
* Since this check is lockless, we must ensure that any refcounts * Since this check is lockless, we must ensure that any refcounts
* taken are done before checking inode->i_flock. Otherwise, we could * taken are done before checking i_flctx->flc_lease. Otherwise, we
* end up racing with tasks trying to set a new lease on this file. * could end up racing with tasks trying to set a new lease on this
* file.
*/ */
smp_mb(); smp_mb();
if (inode->i_flock) if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease))
return __break_lease(inode, mode, FL_DELEG); return __break_lease(inode, mode, FL_DELEG);
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment