Commit 14f7dd63 authored by David Woodhouse's avatar David Woodhouse Committed by Al Viro

[PATCH] Copy XFS readdir hack into nfsd code.

Some file systems with their own internal locking have problems with the
way that nfsd calls the ->lookup() method from within a filldir function
called from their ->readdir() method. The recursion back into the file
system code can cause deadlock.

XFS has a fairly hackish solution to this which involves doing the
readdir() into a locally-allocated buffer, then going back through it
calling the filldir function afterwards. It's not ideal, but it works.

It's particularly suboptimal because XFS does this for local file
systems too, where it's completely unnecessary.

Copy this hack into the NFS code where it can be used only for NFS
export. In response to feedback, use it unconditionally rather than only
for the affected file systems.
Signed-off-by: default avatarDavid Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
parent 2628b766
...@@ -1813,26 +1813,104 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, ...@@ -1813,26 +1813,104 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
return err; return err;
} }
static int nfsd_do_readdir(struct file *file, filldir_t func, /*
* We do this buffering because we must not call back into the file
* system's ->lookup() method from the filldir callback. That may well
* deadlock a number of file systems.
*
* This is based heavily on the implementation of same in XFS.
*/
struct buffered_dirent {
u64 ino;
loff_t offset;
int namlen;
unsigned int d_type;
char name[];
};
struct readdir_data {
char *dirent;
size_t used;
};
static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen,
loff_t offset, u64 ino, unsigned int d_type)
{
struct readdir_data *buf = __buf;
struct buffered_dirent *de = (void *)(buf->dirent + buf->used);
unsigned int reclen;
reclen = ALIGN(sizeof(struct buffered_dirent) + namlen, sizeof(u64));
if (buf->used + reclen > PAGE_SIZE)
return -EINVAL;
de->namlen = namlen;
de->offset = offset;
de->ino = ino;
de->d_type = d_type;
memcpy(de->name, name, namlen);
buf->used += reclen;
return 0;
}
static int nfsd_buffered_readdir(struct file *file, filldir_t func,
struct readdir_cd *cdp, loff_t *offsetp) struct readdir_cd *cdp, loff_t *offsetp)
{ {
struct readdir_data buf;
struct buffered_dirent *de;
int host_err; int host_err;
int size;
loff_t offset;
/* buf.dirent = (void *)__get_free_page(GFP_KERNEL);
* Read the directory entries. This silly loop is necessary because if (!buf.dirent)
* readdir() is not guaranteed to fill up the entire buffer, but return -ENOMEM;
* may choose to do less.
*/ offset = *offsetp;
do {
cdp->err = nfserr_eof; /* will be cleared on successful read */ cdp->err = nfserr_eof; /* will be cleared on successful read */
host_err = vfs_readdir(file, func, cdp);
} while (host_err >=0 && cdp->err == nfs_ok);
*offsetp = vfs_llseek(file, 0, 1); while (1) {
unsigned int reclen;
buf.used = 0;
host_err = vfs_readdir(file, nfsd_buffered_filldir, &buf);
if (host_err)
break;
size = buf.used;
if (!size)
break;
de = (struct buffered_dirent *)buf.dirent;
while (size > 0) {
offset = de->offset;
if (func(cdp, de->name, de->namlen, de->offset,
de->ino, de->d_type))
goto done;
if (cdp->err != nfs_ok)
goto done;
reclen = ALIGN(sizeof(*de) + de->namlen,
sizeof(u64));
size -= reclen;
de = (struct buffered_dirent *)((char *)de + reclen);
}
offset = vfs_llseek(file, 0, 1);
}
done:
free_page((unsigned long)(buf.dirent));
if (host_err) if (host_err)
return nfserrno(host_err); return nfserrno(host_err);
else
*offsetp = offset;
return cdp->err; return cdp->err;
} }
...@@ -1858,7 +1936,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, ...@@ -1858,7 +1936,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
goto out_close; goto out_close;
} }
err = nfsd_do_readdir(file, func, cdp, offsetp); err = nfsd_buffered_readdir(file, func, cdp, offsetp);
if (err == nfserr_eof || err == nfserr_toosmall) if (err == nfserr_eof || err == nfserr_toosmall)
err = nfs_ok; /* can still be found in ->err */ err = nfs_ok; /* can still be found in ->err */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment