Commit d4f4de5e authored by Al Viro's avatar Al Viro

Fix the locking in dcache_readdir() and friends

There are two problems in dcache_readdir() - one is that lockless traversal
of the list needs non-trivial cooperation of d_alloc() (at least a switch
to list_add_rcu(), and probably more than just that) and another is that
it assumes that no removal will happen without the directory locked exclusive.
Said assumption had always been there, never had been stated explicitly and
is violated by several places in the kernel (devpts and selinuxfs).

        * replacement of next_positive() with different calling conventions:
it returns struct list_head * instead of struct dentry *; the latter is
passed in and out by reference, grabbing the result and dropping the original
value.
        * scan is under ->d_lock.  If we run out of timeslice, cursor is moved
after the last position we'd reached and we reschedule; then the scan continues
from that place.  To avoid livelocks between multiple lseek() (with cursors
getting moved past each other, never reaching the real entries) we always
skip the cursors, need_resched() or not.
        * returned list_head * is either ->d_child of dentry we'd found or
->d_subdirs of parent (if we got to the end of the list).
        * dcache_readdir() and dcache_dir_lseek() switched to new helper.
dcache_readdir() always holds a reference to dentry passed to dir_emit() now.
Cursor is moved to just before the entry where dir_emit() has failed or into
the very end of the list, if we'd run out.
        * move_cursor() eliminated - it had sucky calling conventions and
after fixing that it became simply list_move() (in lseek and scan_positives)
or list_move_tail() (in readdir).

        All operations with the list are under ->d_lock now, and we do not
depend upon having all file removals done with parent locked exclusive
anymore.

Cc: stable@vger.kernel.org
Reported-by: default avatar"zhengbin (A)" <zhengbin13@huawei.com>
Signed-off-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
parent 4d856f72
...@@ -89,58 +89,47 @@ int dcache_dir_close(struct inode *inode, struct file *file) ...@@ -89,58 +89,47 @@ int dcache_dir_close(struct inode *inode, struct file *file)
EXPORT_SYMBOL(dcache_dir_close); EXPORT_SYMBOL(dcache_dir_close);
/* parent is locked at least shared */ /* parent is locked at least shared */
static struct dentry *next_positive(struct dentry *parent, /*
struct list_head *from, * Returns an element of siblings' list.
int count) * We are looking for <count>th positive after <p>; if
* found, dentry is grabbed and passed to caller via *<res>.
* If no such element exists, the anchor of list is returned
* and *<res> is set to NULL.
*/
static struct list_head *scan_positives(struct dentry *cursor,
struct list_head *p,
loff_t count,
struct dentry **res)
{ {
unsigned *seq = &parent->d_inode->i_dir_seq, n; struct dentry *dentry = cursor->d_parent, *found = NULL;
struct dentry *res;
struct list_head *p;
bool skipped;
int i;
retry: spin_lock(&dentry->d_lock);
i = count; while ((p = p->next) != &dentry->d_subdirs) {
skipped = false;
n = smp_load_acquire(seq) & ~1;
res = NULL;
rcu_read_lock();
for (p = from->next; p != &parent->d_subdirs; p = p->next) {
struct dentry *d = list_entry(p, struct dentry, d_child); struct dentry *d = list_entry(p, struct dentry, d_child);
if (!simple_positive(d)) { // we must at least skip cursors, to avoid livelocks
skipped = true; if (d->d_flags & DCACHE_DENTRY_CURSOR)
} else if (!--i) { continue;
res = d; if (simple_positive(d) && !--count) {
spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
if (simple_positive(d))
found = dget_dlock(d);
spin_unlock(&d->d_lock);
if (likely(found))
break; break;
count = 1;
} }
if (need_resched()) {
list_move(&cursor->d_child, p);
p = &cursor->d_child;
spin_unlock(&dentry->d_lock);
cond_resched();
spin_lock(&dentry->d_lock);
} }
rcu_read_unlock();
if (skipped) {
smp_rmb();
if (unlikely(*seq != n))
goto retry;
} }
return res; spin_unlock(&dentry->d_lock);
} dput(*res);
*res = found;
static void move_cursor(struct dentry *cursor, struct list_head *after) return p;
{
struct dentry *parent = cursor->d_parent;
unsigned n, *seq = &parent->d_inode->i_dir_seq;
spin_lock(&parent->d_lock);
for (;;) {
n = *seq;
if (!(n & 1) && cmpxchg(seq, n, n + 1) == n)
break;
cpu_relax();
}
__list_del(cursor->d_child.prev, cursor->d_child.next);
if (after)
list_add(&cursor->d_child, after);
else
list_add_tail(&cursor->d_child, &parent->d_subdirs);
smp_store_release(seq, n + 2);
spin_unlock(&parent->d_lock);
} }
loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
...@@ -158,17 +147,28 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) ...@@ -158,17 +147,28 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
return -EINVAL; return -EINVAL;
} }
if (offset != file->f_pos) { if (offset != file->f_pos) {
file->f_pos = offset;
if (file->f_pos >= 2) {
struct dentry *cursor = file->private_data; struct dentry *cursor = file->private_data;
struct dentry *to; struct dentry *to = NULL;
loff_t n = file->f_pos - 2; struct list_head *p;
file->f_pos = offset;
inode_lock_shared(dentry->d_inode); inode_lock_shared(dentry->d_inode);
to = next_positive(dentry, &dentry->d_subdirs, n);
move_cursor(cursor, to ? &to->d_child : NULL); if (file->f_pos > 2) {
inode_unlock_shared(dentry->d_inode); p = scan_positives(cursor, &dentry->d_subdirs,
file->f_pos - 2, &to);
spin_lock(&dentry->d_lock);
list_move(&cursor->d_child, p);
spin_unlock(&dentry->d_lock);
} else {
spin_lock(&dentry->d_lock);
list_del_init(&cursor->d_child);
spin_unlock(&dentry->d_lock);
} }
dput(to);
inode_unlock_shared(dentry->d_inode);
} }
return offset; return offset;
} }
...@@ -190,25 +190,29 @@ int dcache_readdir(struct file *file, struct dir_context *ctx) ...@@ -190,25 +190,29 @@ int dcache_readdir(struct file *file, struct dir_context *ctx)
{ {
struct dentry *dentry = file->f_path.dentry; struct dentry *dentry = file->f_path.dentry;
struct dentry *cursor = file->private_data; struct dentry *cursor = file->private_data;
struct list_head *p = &cursor->d_child; struct list_head *anchor = &dentry->d_subdirs;
struct dentry *next; struct dentry *next = NULL;
bool moved = false; struct list_head *p;
if (!dir_emit_dots(file, ctx)) if (!dir_emit_dots(file, ctx))
return 0; return 0;
if (ctx->pos == 2) if (ctx->pos == 2)
p = &dentry->d_subdirs; p = anchor;
while ((next = next_positive(dentry, p, 1)) != NULL) { else
p = &cursor->d_child;
while ((p = scan_positives(cursor, p, 1, &next)) != anchor) {
if (!dir_emit(ctx, next->d_name.name, next->d_name.len, if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
d_inode(next)->i_ino, dt_type(d_inode(next)))) d_inode(next)->i_ino, dt_type(d_inode(next))))
break; break;
moved = true;
p = &next->d_child;
ctx->pos++; ctx->pos++;
} }
if (moved) spin_lock(&dentry->d_lock);
move_cursor(cursor, p); list_move_tail(&cursor->d_child, p);
spin_unlock(&dentry->d_lock);
dput(next);
return 0; return 0;
} }
EXPORT_SYMBOL(dcache_readdir); EXPORT_SYMBOL(dcache_readdir);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment