Commit dab17c1a authored by David Howells's avatar David Howells

afs: Fix directory read/modify race

Because parsing of the directory wasn't being done under any sort of lock,
the pages holding the directory content can get invalidated whilst the
parsing is ongoing.

Further, the directory page check function gets called outside of the page
lock, so if the page gets cleared or updated, this may return reports of
bad magic numbers in the directory page.

Also, the directory may change size whilst checking and parsing are
ongoing, so more care needs to be taken here.

Fix this by:

 (1) Perform the page check from the page filling function before we set
     PageUptodate and drop the page lock.

 (2) Check for the file having shrunk and the page having been abandoned
     before checking the page contents.

 (3) Lock the page whilst parsing it for the directory iterator.

Whilst we're at it, add a tracepoint to report check failure.
Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
parent 2c099014
......@@ -130,10 +130,11 @@ struct afs_lookup_cookie {
/*
* check that a directory page is valid
*/
static inline bool afs_dir_check_page(struct inode *dir, struct page *page)
bool afs_dir_check_page(struct inode *dir, struct page *page)
{
struct afs_dir_page *dbuf;
loff_t latter;
struct afs_vnode *vnode = AFS_FS_I(dir);
loff_t latter, i_size, off;
int tmp, qty;
#if 0
......@@ -150,8 +151,15 @@ static inline bool afs_dir_check_page(struct inode *dir, struct page *page)
}
#endif
/* determine how many magic numbers there should be in this page */
latter = dir->i_size - page_offset(page);
/* Determine how many magic numbers there should be in this page, but
* we must take care because the directory may change size under us.
*/
off = page_offset(page);
i_size = i_size_read(dir);
if (i_size <= off)
goto checked;
latter = i_size - off;
if (latter >= PAGE_SIZE)
qty = PAGE_SIZE;
else
......@@ -162,13 +170,15 @@ static inline bool afs_dir_check_page(struct inode *dir, struct page *page)
dbuf = page_address(page);
for (tmp = 0; tmp < qty; tmp++) {
if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) {
printk("kAFS: %s(%lu): bad magic %d/%d is %04hx\n",
printk("kAFS: %s(%lx): bad magic %d/%d is %04hx\n",
__func__, dir->i_ino, tmp, qty,
ntohs(dbuf->blocks[tmp].pagehdr.magic));
trace_afs_dir_check_failed(vnode, off, i_size);
goto error;
}
}
checked:
SetPageChecked(page);
return true;
......@@ -183,6 +193,7 @@ static inline bool afs_dir_check_page(struct inode *dir, struct page *page)
static inline void afs_dir_put_page(struct page *page)
{
kunmap(page);
unlock_page(page);
put_page(page);
}
......@@ -197,9 +208,10 @@ static struct page *afs_dir_get_page(struct inode *dir, unsigned long index,
page = read_cache_page(dir->i_mapping, index, afs_page_filler, key);
if (!IS_ERR(page)) {
lock_page(page);
kmap(page);
if (unlikely(!PageChecked(page))) {
if (PageError(page) || !afs_dir_check_page(dir, page))
if (PageError(page))
goto fail;
}
}
......@@ -384,8 +396,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
*/
static int afs_readdir(struct file *file, struct dir_context *ctx)
{
return afs_dir_iterate(file_inode(file),
ctx, file->private_data);
return afs_dir_iterate(file_inode(file), ctx, file->private_data);
}
/*
......
......@@ -232,6 +232,11 @@ int afs_page_filler(void *data, struct page *page)
* page */
ret = afs_fetch_data(vnode, key, req);
afs_put_read(req);
if (ret >= 0 && S_ISDIR(inode->i_mode) &&
!afs_dir_check_page(inode, page))
ret = -EIO;
if (ret < 0) {
if (ret == -ENOENT) {
_debug("got NOENT from server"
......
......@@ -622,6 +622,7 @@ extern bool afs_cm_incoming_call(struct afs_call *);
/*
* dir.c
*/
extern bool afs_dir_check_page(struct inode *, struct page *);
extern const struct inode_operations afs_dir_inode_operations;
extern const struct dentry_operations afs_fs_dentry_operations;
extern const struct file_operations afs_dir_file_operations;
......
......@@ -381,6 +381,27 @@ TRACE_EVENT(afs_sent_pages,
__entry->cursor, __entry->ret)
);
TRACE_EVENT(afs_dir_check_failed,
TP_PROTO(struct afs_vnode *vnode, loff_t off, loff_t i_size),
TP_ARGS(vnode, off, i_size),
TP_STRUCT__entry(
__field(struct afs_vnode *, vnode )
__field(loff_t, off )
__field(loff_t, i_size )
),
TP_fast_assign(
__entry->vnode = vnode;
__entry->off = off;
__entry->i_size = i_size;
),
TP_printk("vn=%p %llx/%llx",
__entry->vnode, __entry->off, __entry->i_size)
);
#endif /* _TRACE_AFS_H */
/* This part must be outside protection */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment