Commit 20181273 authored by David Howells's avatar David Howells

afs: Rewrite writeback handling

Rewrite the writeback handling to make the writeback records refcounted
separately from the completion management so that a ref can be taken on one
without preventing completion from happening.
Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
parent f357dbca
...@@ -23,7 +23,6 @@ static int afs_readpage(struct file *file, struct page *page); ...@@ -23,7 +23,6 @@ static int afs_readpage(struct file *file, struct page *page);
static void afs_invalidatepage(struct page *page, unsigned int offset, static void afs_invalidatepage(struct page *page, unsigned int offset,
unsigned int length); unsigned int length);
static int afs_releasepage(struct page *page, gfp_t gfp_flags); static int afs_releasepage(struct page *page, gfp_t gfp_flags);
static int afs_launder_page(struct page *page);
static int afs_readpages(struct file *filp, struct address_space *mapping, static int afs_readpages(struct file *filp, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages); struct list_head *pages, unsigned nr_pages);
...@@ -463,13 +462,23 @@ static int afs_readpages(struct file *file, struct address_space *mapping, ...@@ -463,13 +462,23 @@ static int afs_readpages(struct file *file, struct address_space *mapping,
} }
/* /*
* write back a dirty page * Try to remove a page from any writeback it may be entertaining.
*/ */
static int afs_launder_page(struct page *page) static bool afs_remove_wb_from_page(struct afs_vnode *vnode, struct page *page,
enum afs_writeback_trace why)
{ {
_enter("{%lu}", page->index); struct afs_writeback *wb;
bool cleared = true;
spin_lock(&vnode->writeback_lock);
wb = afs_get_writeback(vnode, (struct afs_writeback *)page_private(page), why);
spin_unlock(&vnode->writeback_lock);
if (wb) {
cleared = afs_writeback_remove_page(vnode, wb, page);
afs_put_writeback(vnode, wb, 1);
}
return 0; return cleared;
} }
/* /*
...@@ -480,7 +489,6 @@ static int afs_launder_page(struct page *page) ...@@ -480,7 +489,6 @@ static int afs_launder_page(struct page *page)
static void afs_invalidatepage(struct page *page, unsigned int offset, static void afs_invalidatepage(struct page *page, unsigned int offset,
unsigned int length) unsigned int length)
{ {
struct afs_writeback *wb = (struct afs_writeback *) page_private(page);
struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
_enter("{%lu},%u,%u", page->index, offset, length); _enter("{%lu},%u,%u", page->index, offset, length);
...@@ -496,15 +504,8 @@ static void afs_invalidatepage(struct page *page, unsigned int offset, ...@@ -496,15 +504,8 @@ static void afs_invalidatepage(struct page *page, unsigned int offset,
} }
#endif #endif
if (PagePrivate(page)) { afs_remove_wb_from_page(vnode, page,
if (wb && !PageWriteback(page)) { afs_writeback_trace_invalidate_page);
set_page_private(page, 0);
afs_put_writeback(vnode, wb);
}
if (!page_private(page))
ClearPagePrivate(page);
}
} }
_leave(""); _leave("");
...@@ -516,8 +517,8 @@ static void afs_invalidatepage(struct page *page, unsigned int offset, ...@@ -516,8 +517,8 @@ static void afs_invalidatepage(struct page *page, unsigned int offset,
*/ */
static int afs_releasepage(struct page *page, gfp_t gfp_flags) static int afs_releasepage(struct page *page, gfp_t gfp_flags)
{ {
struct afs_writeback *wb = (struct afs_writeback *) page_private(page);
struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
bool cleared = true;
_enter("{{%x:%u}[%lu],%lx},%x", _enter("{{%x:%u}[%lu],%lx},%x",
vnode->fid.vid, vnode->fid.vnode, page->index, page->flags, vnode->fid.vid, vnode->fid.vnode, page->index, page->flags,
...@@ -532,15 +533,8 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags) ...@@ -532,15 +533,8 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags)
} }
#endif #endif
if (PagePrivate(page)) { cleared = afs_remove_wb_from_page(vnode, page,
if (wb) { afs_writeback_trace_release_page);
set_page_private(page, 0); _leave(" = %d", cleared);
afs_put_writeback(vnode, wb); return cleared;
}
ClearPagePrivate(page);
}
/* indicate that the page can be released */
_leave(" = T");
return 1;
} }
...@@ -474,10 +474,8 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr) ...@@ -474,10 +474,8 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
} }
/* flush any dirty data outstanding on a regular file */ /* flush any dirty data outstanding on a regular file */
if (S_ISREG(vnode->vfs_inode.i_mode)) { if (S_ISREG(vnode->vfs_inode.i_mode))
filemap_write_and_wait(vnode->vfs_inode.i_mapping);
afs_writeback_all(vnode); afs_writeback_all(vnode);
}
if (attr->ia_valid & ATTR_FILE) { if (attr->ia_valid & ATTR_FILE) {
key = attr->ia_file->private_data; key = attr->ia_file->private_data;
......
...@@ -156,7 +156,8 @@ struct afs_writeback { ...@@ -156,7 +156,8 @@ struct afs_writeback {
unsigned offset_first; /* offset into first page of start of write */ unsigned offset_first; /* offset into first page of start of write */
unsigned to_last; /* offset into last page of end of write */ unsigned to_last; /* offset into last page of end of write */
int num_conflicts; /* count of conflicting writes in list */ int num_conflicts; /* count of conflicting writes in list */
int usage; atomic_t usage;
int nr_pages; /* Number of pages contributing */
bool conflicts; /* T if has dependent conflicts */ bool conflicts; /* T if has dependent conflicts */
enum { enum {
AFS_WBACK_SYNCING, /* synchronisation being performed */ AFS_WBACK_SYNCING, /* synchronisation being performed */
...@@ -411,6 +412,8 @@ struct afs_interface { ...@@ -411,6 +412,8 @@ struct afs_interface {
unsigned mtu; /* MTU of interface */ unsigned mtu; /* MTU of interface */
}; };
#include <trace/events/afs.h>
/*****************************************************************************/ /*****************************************************************************/
/* /*
* cache.c * cache.c
...@@ -709,7 +712,13 @@ extern int afs_volume_release_fileserver(struct afs_vnode *, ...@@ -709,7 +712,13 @@ extern int afs_volume_release_fileserver(struct afs_vnode *,
* write.c * write.c
*/ */
extern int afs_set_page_dirty(struct page *); extern int afs_set_page_dirty(struct page *);
extern void afs_put_writeback(struct afs_vnode *, struct afs_writeback *); extern struct afs_writeback *afs_get_writeback(struct afs_vnode *,
struct afs_writeback *,
enum afs_writeback_trace);
extern void afs_put_writeback(struct afs_vnode *, struct afs_writeback *,
unsigned);
extern bool afs_writeback_remove_page(struct afs_vnode *,
struct afs_writeback *, struct page *);
extern int afs_write_begin(struct file *file, struct address_space *mapping, extern int afs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags, loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata); struct page **pagep, void **fsdata);
...@@ -723,14 +732,12 @@ extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *); ...@@ -723,14 +732,12 @@ extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
extern int afs_writeback_all(struct afs_vnode *); extern int afs_writeback_all(struct afs_vnode *);
extern int afs_flush(struct file *, fl_owner_t); extern int afs_flush(struct file *, fl_owner_t);
extern int afs_fsync(struct file *, loff_t, loff_t, int); extern int afs_fsync(struct file *, loff_t, loff_t, int);
extern int afs_launder_page(struct page *);
/*****************************************************************************/ /*****************************************************************************/
/* /*
* debug tracing * debug tracing
*/ */
#include <trace/events/afs.h>
extern unsigned afs_debug; extern unsigned afs_debug;
#define dbgprintk(FMT,...) \ #define dbgprintk(FMT,...) \
......
...@@ -330,7 +330,7 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg) ...@@ -330,7 +330,7 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
first += nr; first += nr;
} while (first <= last); } while (first <= last);
trace_afs_sent_pages(call, first, last, ret); trace_afs_sent_pages(call, call->first, last, first, ret);
return ret; return ret;
} }
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
* as published by the Free Software Foundation; either version * as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version. * 2 of the License, or (at your option) any later version.
*/ */
#include <linux/backing-dev.h> #include <linux/backing-dev.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/fs.h> #include <linux/fs.h>
...@@ -19,6 +20,8 @@ ...@@ -19,6 +20,8 @@
static int afs_write_back_from_locked_page(struct afs_vnode *vnode, static int afs_write_back_from_locked_page(struct afs_vnode *vnode,
struct afs_writeback *wb, struct afs_writeback *wb,
struct page *page); struct page *page);
static int afs_sync_data(struct afs_vnode *vnode, loff_t start, loff_t end,
enum afs_writeback_trace why);
/* /*
* mark a page as having been made dirty and thus needing writeback * mark a page as having been made dirty and thus needing writeback
...@@ -30,53 +33,152 @@ int afs_set_page_dirty(struct page *page) ...@@ -30,53 +33,152 @@ int afs_set_page_dirty(struct page *page)
} }
/* /*
* unlink a writeback record because its usage has reached zero * Allocate a writeback record.
* - must be called with the vnode->writeback_lock held */
static struct afs_writeback *afs_alloc_writeback(struct afs_vnode *vnode,
struct key *key,
pgoff_t index,
unsigned int from,
unsigned int to)
{
struct afs_writeback *wb;
wb = kzalloc(sizeof(*wb), GFP_KERNEL);
if (wb) {
wb->first = wb->last = index;
wb->offset_first = from;
wb->to_last = to;
wb->state = AFS_WBACK_PENDING;
wb->key = key;
atomic_set(&wb->usage, 1);
INIT_LIST_HEAD(&wb->link);
init_waitqueue_head(&wb->waitq);
trace_afs_writeback(vnode, wb, afs_writeback_trace_alloc, 1, 1);
}
_leave(" = %p", wb);
return wb;
}
/*
* Get a reference on a writeback record.
*/
struct afs_writeback *afs_get_writeback(struct afs_vnode *vnode,
struct afs_writeback *wb,
enum afs_writeback_trace why)
{
int n;
if (wb) {
n = atomic_inc_return(&wb->usage);
trace_afs_writeback(vnode, wb, why, n, 1);
}
return wb;
}
/*
* Discard a reference to a writeback record.
*/
void afs_put_writeback(struct afs_vnode *vnode, struct afs_writeback *wb,
unsigned delta)
{
int n;
if (wb && delta) {
n = atomic_sub_return(delta, &wb->usage);
trace_afs_writeback(vnode, wb, afs_writeback_trace_put, n, -delta);
ASSERTCMP(n, >=, 0);
if (n == 0) {
key_put(wb->key);
kfree(wb);
}
}
}
/*
* Unlink a writeback record because the number of pages it covers has reached
* zero.
*
* Must be called with the vnode->writeback_lock held.
*/ */
static void afs_unlink_writeback(struct afs_vnode *vnode, static void afs_unlink_writeback(struct afs_vnode *vnode,
struct afs_writeback *wb) struct afs_writeback *wb)
{ {
struct afs_writeback *front; struct afs_writeback *front;
trace_afs_writeback(vnode, wb, afs_writeback_trace_unlink,
atomic_read(&wb->usage), 0);
list_del_init(&wb->link); list_del_init(&wb->link);
if (!list_empty(&vnode->writebacks)) {
/* if an fsync rises to the front of the queue then wake it while (!list_empty(&vnode->writebacks)) {
* up */ /* Remove and wake up any syncs that rise to the front. */
front = list_entry(vnode->writebacks.next, front = list_entry(vnode->writebacks.next,
struct afs_writeback, link); struct afs_writeback, link);
if (front->state == AFS_WBACK_SYNCING) { _debug("front %p %u", front, front->state);
_debug("wake up sync"); if (front->state != AFS_WBACK_SYNCING) {
front->state = AFS_WBACK_COMPLETE; trace_afs_writeback(vnode, front,
wake_up(&front->waitq); afs_writeback_trace_no_wake,
atomic_read(&front->usage), 0);
break;
} }
trace_afs_writeback(vnode, front, afs_writeback_trace_wake,
atomic_read(&front->usage), 0);
list_del_init(&front->link);
front->state = AFS_WBACK_COMPLETE;
wake_up(&front->waitq);
afs_put_writeback(vnode, front, 1);
} }
} }
/* /*
* free a writeback record * Remove a page from a writeback record. Returns true if we managed to clear
* the page and false if the record is still attached because the page is
* undergoing writeback.
*/ */
static void afs_free_writeback(struct afs_writeback *wb) static bool __afs_writeback_remove_page(struct afs_vnode *vnode,
struct afs_writeback *wb,
struct page *page,
unsigned *_delta)
{ {
_enter(""); bool cleared = true;
key_put(wb->key);
kfree(wb); if (wb && !PageWriteback(page)) {
set_page_private(page, 0);
ASSERTCMP(wb->nr_pages, >, 0);
wb->nr_pages--;
if (wb->first == page->index) {
wb->first++;
wb->offset_first = 0;
}
*_delta += 1;
if (wb->nr_pages == 0) {
afs_unlink_writeback(vnode, wb);
*_delta += 1;
}
}
cleared = !page_private(page);
if (cleared)
ClearPagePrivate(page);
return cleared;
} }
/* bool afs_writeback_remove_page(struct afs_vnode *vnode,
* dispose of a reference to a writeback record struct afs_writeback *wb, struct page *page)
*/
void afs_put_writeback(struct afs_vnode *vnode, struct afs_writeback *wb)
{ {
_enter("{%d}", wb->usage); unsigned delta = 0;
bool cleared = true;
spin_lock(&vnode->writeback_lock); if (PagePrivate(page)) {
if (--wb->usage == 0) spin_lock(&vnode->writeback_lock);
afs_unlink_writeback(vnode, wb); cleared = __afs_writeback_remove_page(vnode, wb, page, &delta);
else spin_unlock(&vnode->writeback_lock);
wb = NULL; afs_put_writeback(vnode, wb, delta);
spin_unlock(&vnode->writeback_lock); }
if (wb)
afs_free_writeback(wb); return cleared;
} }
/* /*
...@@ -118,83 +220,57 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key, ...@@ -118,83 +220,57 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
} }
/* /*
* prepare to perform part of a write to a page * Make a note that a page will require writing back.
*
* The writeback is used or discarded unless we return -EAGAIN, in which case
* the page has been unlocked and we should be called again.
*/ */
int afs_write_begin(struct file *file, struct address_space *mapping, static int afs_add_writeback(struct afs_vnode *vnode,
loff_t pos, unsigned len, unsigned flags, struct afs_writeback *candidate,
struct page **pagep, void **fsdata) struct page *page)
{ {
struct afs_writeback *candidate, *wb; struct afs_writeback *wb;
struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); unsigned int from, to;
struct page *page; pgoff_t index;
struct key *key = file->private_data;
unsigned from = pos & (PAGE_SIZE - 1);
unsigned to = from + len;
pgoff_t index = pos >> PAGE_SHIFT;
int ret;
_enter("{%x:%u},{%lx},%u,%u",
vnode->fid.vid, vnode->fid.vnode, index, from, to);
candidate = kzalloc(sizeof(*candidate), GFP_KERNEL);
if (!candidate)
return -ENOMEM;
candidate->first = candidate->last = index;
candidate->offset_first = from;
candidate->to_last = to;
INIT_LIST_HEAD(&candidate->link);
candidate->usage = 1;
candidate->state = AFS_WBACK_PENDING;
init_waitqueue_head(&candidate->waitq);
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) {
kfree(candidate);
return -ENOMEM;
}
if (!PageUptodate(page) && len != PAGE_SIZE) {
ret = afs_fill_page(vnode, key, pos & PAGE_MASK, PAGE_SIZE, page);
if (ret < 0) {
unlock_page(page);
put_page(page);
kfree(candidate);
_leave(" = %d [prep]", ret);
return ret;
}
SetPageUptodate(page);
}
/* page won't leak in error case: it eventually gets cleaned off LRU */ _enter("");
*pagep = page;
try_again:
spin_lock(&vnode->writeback_lock); spin_lock(&vnode->writeback_lock);
index = page->index;
/* see if this page is already pending a writeback under a suitable key from = candidate->offset_first;
* - if so we can just join onto that one */ to = candidate->to_last;
wb = (struct afs_writeback *) page_private(page);
/* See if this page is already pending a writeback under a suitable key
* - if so we can just join onto that one.
*/
wb = (struct afs_writeback *)page_private(page);
if (wb) { if (wb) {
if (wb->key == key && wb->state == AFS_WBACK_PENDING) if (wb->key == candidate->key &&
wb->state == AFS_WBACK_PENDING)
goto subsume_in_current_wb; goto subsume_in_current_wb;
goto flush_conflicting_wb; goto flush_conflicting_wb;
} }
if (index > 0) { if (index > 0) {
/* see if we can find an already pending writeback that we can /* See if we can find an already pending writeback that we can
* append this page to */ * append this page to.
*/
list_for_each_entry(wb, &vnode->writebacks, link) { list_for_each_entry(wb, &vnode->writebacks, link) {
if (wb->last == index - 1 && wb->key == key && if (wb->last == index - 1 &&
wb->key == candidate->key &&
wb->state == AFS_WBACK_PENDING) wb->state == AFS_WBACK_PENDING)
goto append_to_previous_wb; goto append_to_previous_wb;
} }
} }
afs_get_writeback(vnode, candidate, afs_writeback_trace_new);
key_get(candidate->key);
list_add_tail(&candidate->link, &vnode->writebacks); list_add_tail(&candidate->link, &vnode->writebacks);
candidate->key = key_get(key); candidate->nr_pages++;
spin_unlock(&vnode->writeback_lock);
SetPagePrivate(page); SetPagePrivate(page);
set_page_private(page, (unsigned long) candidate); set_page_private(page, (unsigned long)candidate);
spin_unlock(&vnode->writeback_lock);
_leave(" = 0 [new]"); _leave(" = 0 [new]");
return 0; return 0;
...@@ -205,44 +281,99 @@ int afs_write_begin(struct file *file, struct address_space *mapping, ...@@ -205,44 +281,99 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
wb->offset_first = from; wb->offset_first = from;
if (index == wb->last && to > wb->to_last) if (index == wb->last && to > wb->to_last)
wb->to_last = to; wb->to_last = to;
trace_afs_writeback(vnode, wb, afs_writeback_trace_subsume,
atomic_read(&wb->usage), 0);
spin_unlock(&vnode->writeback_lock); spin_unlock(&vnode->writeback_lock);
trace_afs_writeback(vnode, candidate, afs_writeback_trace_discard, 0, 0);
kfree(candidate); kfree(candidate);
_leave(" = 0 [sub]"); _leave(" = 0 [sub]");
return 0; return 0;
append_to_previous_wb: append_to_previous_wb:
_debug("append into %lx-%lx", wb->first, wb->last); _debug("append into %lx-%lx", wb->first, wb->last);
wb->usage++;
wb->last++; wb->last++;
wb->to_last = to; wb->to_last = to;
spin_unlock(&vnode->writeback_lock); wb->nr_pages++;
afs_get_writeback(vnode, wb, afs_writeback_trace_append);
SetPagePrivate(page); SetPagePrivate(page);
set_page_private(page, (unsigned long) wb); set_page_private(page, (unsigned long)wb);
spin_unlock(&vnode->writeback_lock);
trace_afs_writeback(vnode, candidate, afs_writeback_trace_discard, 0, 0);
kfree(candidate); kfree(candidate);
_leave(" = 0 [app]"); _leave(" = 0 [app]");
return 0; return 0;
/* the page is currently bound to another context, so if it's dirty we /* The page is currently bound to another context, so if it's dirty we
* need to flush it before we can use the new context */ * need to flush it before we can use the new context.
*/
flush_conflicting_wb: flush_conflicting_wb:
_debug("flush conflict"); _debug("flush conflict");
afs_get_writeback(vnode, wb, afs_writeback_trace_conflict);
if (wb->state == AFS_WBACK_PENDING) if (wb->state == AFS_WBACK_PENDING)
wb->state = AFS_WBACK_CONFLICTING; wb->state = AFS_WBACK_CONFLICTING;
spin_unlock(&vnode->writeback_lock); spin_unlock(&vnode->writeback_lock);
if (clear_page_dirty_for_io(page)) { unlock_page(page);
ret = afs_write_back_from_locked_page(vnode, wb, page); put_page(page);
afs_sync_data(vnode,
((loff_t)wb->first << PAGE_SHIFT) + wb->offset_first,
((loff_t)wb->last << PAGE_SHIFT) + wb->to_last,
afs_writeback_trace_fsync);
return -EAGAIN;
}
/*
* prepare to perform part of a write to a page
*/
int afs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
struct afs_writeback *candidate;
struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
struct page *page;
struct key *key = file->private_data;
unsigned from = pos & (PAGE_SIZE - 1);
unsigned to = from + len;
pgoff_t index = pos >> PAGE_SHIFT;
int ret;
_enter("{%x:%u},{%lx},%u,%u",
vnode->fid.vid, vnode->fid.vnode, index, from, to);
trace_afs_write_begin(vnode, index, from, to, flags);
candidate = afs_alloc_writeback(vnode, key, index, from, to);
if (!candidate)
return -ENOMEM;
retry:
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) {
kfree(candidate);
return -ENOMEM;
}
if (!PageUptodate(page) && len != PAGE_SIZE) {
ret = afs_fill_page(vnode, key, pos & PAGE_MASK, PAGE_SIZE, page);
if (ret < 0) { if (ret < 0) {
afs_put_writeback(vnode, candidate); unlock_page(page);
_leave(" = %d", ret); put_page(page);
kfree(candidate);
_leave(" = %d [prep]", ret);
return ret; return ret;
} }
SetPageUptodate(page);
} }
/* the page holds a ref on the writeback record */ /* page won't leak in error case: it eventually gets cleaned off LRU */
afs_put_writeback(vnode, wb); *pagep = page;
set_page_private(page, 0);
ClearPagePrivate(page); ret = afs_add_writeback(vnode, candidate, page);
goto try_again; if (ret == -EAGAIN)
goto retry;
return ret;
} }
/* /*
...@@ -351,6 +482,9 @@ static int afs_write_back_from_locked_page(struct afs_vnode *vnode, ...@@ -351,6 +482,9 @@ static int afs_write_back_from_locked_page(struct afs_vnode *vnode,
_enter(",%lx", primary_page->index); _enter(",%lx", primary_page->index);
trace_afs_writeback(vnode, wb, afs_writeback_trace_write,
atomic_read(&wb->usage), 0);
count = 1; count = 1;
if (test_set_page_writeback(primary_page)) if (test_set_page_writeback(primary_page))
BUG(); BUG();
...@@ -491,12 +625,53 @@ static int afs_writepages_region(struct address_space *mapping, ...@@ -491,12 +625,53 @@ static int afs_writepages_region(struct address_space *mapping,
pgoff_t index, pgoff_t end, pgoff_t *_next) pgoff_t index, pgoff_t end, pgoff_t *_next)
{ {
struct afs_vnode *vnode = AFS_FS_I(mapping->host); struct afs_vnode *vnode = AFS_FS_I(mapping->host);
struct afs_writeback *wb; struct afs_writeback *wb, *x;
struct page *page; struct page *page;
pgoff_t lowest;
int ret, n; int ret, n;
_enter(",,%lx,%lx,", index, end); _enter(",,%lx,%lx,", index, end);
next_wb:
spin_lock(&vnode->writeback_lock);
/* Look for a data writeback that overlaps the range specified. Note
* that the writeback list is ordered oldest first so that sync records
* float through the list as records are written back.
*/
lowest = end;
wb = NULL;
list_for_each_entry(x, &vnode->writebacks, link) {
if (x->state != AFS_WBACK_PENDING &&
x->state != AFS_WBACK_CONFLICTING)
continue;
if (x->first > end || x->last < index)
continue;
if (x->first <= lowest) {
wb = x;
lowest = x->first;
}
}
if (!wb) {
spin_unlock(&vnode->writeback_lock);
*_next = end;
_leave(" = 0 [no wb %lx]", *_next);
return 0;
}
_debug("found wb %lx-%lx", wb->first, wb->last);
afs_get_writeback(vnode, wb, afs_writeback_trace_writepages);
wb->state = AFS_WBACK_WRITING;
spin_unlock(&vnode->writeback_lock);
/* We could, at this point, trim non-dirty pages off of the front and
* back of the writeback, but this will only happen if ->writepage()
* interferes. Since ->writepage() is called with the target page
* locked, we can't lock any earlier page without risking deadlock.
*/
index = wb->first;
do { do {
n = find_get_pages_tag(mapping, &index, PAGECACHE_TAG_DIRTY, n = find_get_pages_tag(mapping, &index, PAGECACHE_TAG_DIRTY,
1, &page); 1, &page);
...@@ -505,12 +680,8 @@ static int afs_writepages_region(struct address_space *mapping, ...@@ -505,12 +680,8 @@ static int afs_writepages_region(struct address_space *mapping,
_debug("wback %lx", page->index); _debug("wback %lx", page->index);
if (page->index > end) { if (page->index > wb->last)
*_next = index; break;
put_page(page);
_leave(" = 0 [%lx]", *_next);
return 0;
}
/* at this point we hold neither mapping->tree_lock nor lock on /* at this point we hold neither mapping->tree_lock nor lock on
* the page itself: the page may be truncated or invalidated * the page itself: the page may be truncated or invalidated
...@@ -527,18 +698,12 @@ static int afs_writepages_region(struct address_space *mapping, ...@@ -527,18 +698,12 @@ static int afs_writepages_region(struct address_space *mapping,
if (PageWriteback(page)) { if (PageWriteback(page)) {
unlock_page(page); unlock_page(page);
if (wbc->sync_mode != WB_SYNC_NONE) wait_on_page_writeback(page);
wait_on_page_writeback(page);
put_page(page); put_page(page);
continue; continue;
} }
wb = (struct afs_writeback *) page_private(page); ASSERTCMP((struct afs_writeback *)page_private(page), ==, wb);
ASSERT(wb != NULL);
spin_lock(&vnode->writeback_lock);
wb->state = AFS_WBACK_WRITING;
spin_unlock(&vnode->writeback_lock);
if (!clear_page_dirty_for_io(page)) if (!clear_page_dirty_for_io(page))
BUG(); BUG();
...@@ -546,6 +711,7 @@ static int afs_writepages_region(struct address_space *mapping, ...@@ -546,6 +711,7 @@ static int afs_writepages_region(struct address_space *mapping,
unlock_page(page); unlock_page(page);
put_page(page); put_page(page);
if (ret < 0) { if (ret < 0) {
afs_put_writeback(vnode, wb, 1);
_leave(" = %d", ret); _leave(" = %d", ret);
return ret; return ret;
} }
...@@ -553,7 +719,12 @@ static int afs_writepages_region(struct address_space *mapping, ...@@ -553,7 +719,12 @@ static int afs_writepages_region(struct address_space *mapping,
wbc->nr_to_write -= ret; wbc->nr_to_write -= ret;
cond_resched(); cond_resched();
} while (index < end && wbc->nr_to_write > 0); } while (index < wb->last);
index = wb->last + 1;
afs_put_writeback(vnode, wb, 1);
if (index < end && wbc->nr_to_write > 0)
goto next_wb;
*_next = index; *_next = index;
_leave(" = 0 [%lx]", *_next); _leave(" = 0 [%lx]", *_next);
...@@ -603,13 +774,16 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) ...@@ -603,13 +774,16 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
struct pagevec pv; struct pagevec pv;
unsigned count, loop; unsigned count, loop;
pgoff_t first = call->first, last = call->last; pgoff_t first = call->first, last = call->last;
bool free_wb; unsigned delta = 0;
_enter("{%x:%u},{%lx-%lx}", _enter("{%x:%u},{%lx-%lx}",
vnode->fid.vid, vnode->fid.vnode, first, last); vnode->fid.vid, vnode->fid.vnode, first, last);
ASSERT(wb != NULL); ASSERT(wb != NULL);
trace_afs_writeback(vnode, wb, afs_writeback_trace_written,
atomic_read(&wb->usage), 0);
pagevec_init(&pv, 0); pagevec_init(&pv, 0);
do { do {
...@@ -626,27 +800,15 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) ...@@ -626,27 +800,15 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
for (loop = 0; loop < count; loop++) { for (loop = 0; loop < count; loop++) {
struct page *page = pv.pages[loop]; struct page *page = pv.pages[loop];
end_page_writeback(page); end_page_writeback(page);
if (page_private(page) == (unsigned long) wb) { __afs_writeback_remove_page(vnode, wb, page, &delta);
set_page_private(page, 0);
ClearPagePrivate(page);
wb->usage--;
}
}
free_wb = false;
if (wb->usage == 0) {
afs_unlink_writeback(vnode, wb);
free_wb = true;
} }
spin_unlock(&vnode->writeback_lock); spin_unlock(&vnode->writeback_lock);
first += count; first += count;
if (free_wb) {
afs_free_writeback(wb);
wb = NULL;
}
__pagevec_release(&pv); __pagevec_release(&pv);
} while (first <= last); } while (first <= last);
afs_put_writeback(vnode, wb, delta);
_leave(""); _leave("");
} }
...@@ -704,34 +866,29 @@ int afs_writeback_all(struct afs_vnode *vnode) ...@@ -704,34 +866,29 @@ int afs_writeback_all(struct afs_vnode *vnode)
* - the return status from this call provides a reliable indication of * - the return status from this call provides a reliable indication of
* whether any write errors occurred for this process. * whether any write errors occurred for this process.
*/ */
static int afs_sync_file(struct file *file, loff_t start, loff_t end, bool sync) static int afs_sync_data(struct afs_vnode *vnode, loff_t start, loff_t end,
enum afs_writeback_trace why)
{ {
struct inode *inode = file_inode(file);
struct afs_writeback *wb, *xwb; struct afs_writeback *wb, *xwb;
struct afs_vnode *vnode = AFS_FS_I(inode);
bool do_sync = false; bool do_sync = false;
int ret; int ret;
_enter("{%x:%u},{n=%pD},%d", _enter("{%x:%u},%d", vnode->fid.vid, vnode->fid.vnode, why);
vnode->fid.vid, vnode->fid.vnode, file,
sync); if (list_empty(&vnode->writebacks))
return 0;
/* use a writeback record as a marker in the queue - when this reaches /* use a writeback record as a marker in the queue - when this reaches
* the front of the queue, all the outstanding writes are either * the front of the queue, all the outstanding writes are either
* completed or rejected */ * completed or rejected */
wb = kzalloc(sizeof(*wb), GFP_KERNEL); wb = afs_alloc_writeback(vnode, NULL, 0, 0, PAGE_SIZE);
if (!wb) { if (!wb) {
ret = -ENOMEM; ret = -ENOMEM;
goto out; goto out;
} }
wb->first = 0;
wb->last = -1; wb->last = -1;
wb->offset_first = 0;
wb->to_last = PAGE_SIZE;
wb->usage = 1;
wb->state = AFS_WBACK_SYNCING; wb->state = AFS_WBACK_SYNCING;
init_waitqueue_head(&wb->waitq);
INIT_LIST_HEAD(&wb->link);
spin_lock(&vnode->writeback_lock); spin_lock(&vnode->writeback_lock);
list_for_each_entry(xwb, &vnode->writebacks, link) { list_for_each_entry(xwb, &vnode->writebacks, link) {
...@@ -741,7 +898,7 @@ static int afs_sync_file(struct file *file, loff_t start, loff_t end, bool sync) ...@@ -741,7 +898,7 @@ static int afs_sync_file(struct file *file, loff_t start, loff_t end, bool sync)
do_sync = true; do_sync = true;
break; break;
default: default:
do_sync |= sync; do_sync |= (why == afs_writeback_trace_fsync);
break; break;
case AFS_WBACK_SYNCING: case AFS_WBACK_SYNCING:
break; break;
...@@ -750,16 +907,19 @@ static int afs_sync_file(struct file *file, loff_t start, loff_t end, bool sync) ...@@ -750,16 +907,19 @@ static int afs_sync_file(struct file *file, loff_t start, loff_t end, bool sync)
break; break;
} }
} }
if (do_sync)
if (do_sync) {
afs_get_writeback(vnode, wb, why);
list_add_tail(&wb->link, &vnode->writebacks); list_add_tail(&wb->link, &vnode->writebacks);
}
spin_unlock(&vnode->writeback_lock); spin_unlock(&vnode->writeback_lock);
ret = 0; ret = 0;
if (do_sync) { if (do_sync) {
/* push all the outstanding writebacks to the server */ /* push all the outstanding writebacks to the server */
inode_lock(inode); //inode_lock(&vnode->vfs_inode);
ret = afs_writeback_all(vnode); ret = afs_writeback_all(vnode);
inode_unlock(inode); //inode_unlock(&vnode->vfs_inode);
if (ret < 0) if (ret < 0)
goto out; goto out;
...@@ -770,14 +930,15 @@ static int afs_sync_file(struct file *file, loff_t start, loff_t end, bool sync) ...@@ -770,14 +930,15 @@ static int afs_sync_file(struct file *file, loff_t start, loff_t end, bool sync)
} }
out: out:
afs_put_writeback(vnode, wb); afs_put_writeback(vnode, wb, 1);
_leave(" = %d", ret); _leave(" = %d", ret);
return ret; return ret;
} }
int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync) int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{ {
return afs_sync_file(file, start, end, true); return afs_sync_data(AFS_FS_I(file_inode(file)), start, end,
afs_writeback_trace_fsync);
} }
/* /*
...@@ -791,7 +952,8 @@ int afs_flush(struct file *file, fl_owner_t id) ...@@ -791,7 +952,8 @@ int afs_flush(struct file *file, fl_owner_t id)
if ((file->f_mode & FMODE_WRITE) == 0) if ((file->f_mode & FMODE_WRITE) == 0)
return 0; return 0;
return afs_sync_file(file, 0, LLONG_MAX, false); return afs_sync_data(AFS_FS_I(file_inode(file)), 0, LLONG_MAX,
afs_writeback_trace_flush);
} }
/* /*
...@@ -814,3 +976,13 @@ int afs_page_mkwrite(struct vm_area_struct *vma, struct page *page) ...@@ -814,3 +976,13 @@ int afs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
_leave(" = 0"); _leave(" = 0");
return 0; return 0;
} }
/*
* write back a dirty page
*/
int afs_launder_page(struct page *page)
{
_enter("{%lu}", page->index);
return 0;
}
...@@ -60,6 +60,26 @@ enum afs_vl_operation { ...@@ -60,6 +60,26 @@ enum afs_vl_operation {
afs_VL_Probe = 514, /* AFS Probe Volume Location Service operation ID */ afs_VL_Probe = 514, /* AFS Probe Volume Location Service operation ID */
}; };
enum afs_writeback_trace {
afs_writeback_trace_alloc,
afs_writeback_trace_append,
afs_writeback_trace_conflict,
afs_writeback_trace_discard,
afs_writeback_trace_flush,
afs_writeback_trace_fsync,
afs_writeback_trace_invalidate_page,
afs_writeback_trace_new,
afs_writeback_trace_no_wake,
afs_writeback_trace_put,
afs_writeback_trace_release_page,
afs_writeback_trace_subsume,
afs_writeback_trace_unlink,
afs_writeback_trace_wake,
afs_writeback_trace_write,
afs_writeback_trace_writepages,
afs_writeback_trace_written,
};
#endif /* end __AFS_DECLARE_TRACE_ENUMS_ONCE_ONLY */ #endif /* end __AFS_DECLARE_TRACE_ENUMS_ONCE_ONLY */
/* /*
...@@ -100,6 +120,25 @@ enum afs_vl_operation { ...@@ -100,6 +120,25 @@ enum afs_vl_operation {
EM(afs_VL_GetEntryByName, "VL.GetEntryByName") \ EM(afs_VL_GetEntryByName, "VL.GetEntryByName") \
E_(afs_VL_Probe, "VL.Probe") E_(afs_VL_Probe, "VL.Probe")
#define afs_writeback_traces \
EM(afs_writeback_trace_alloc, "Alloc ") \
EM(afs_writeback_trace_append, "Append") \
EM(afs_writeback_trace_conflict, "Conflc") \
EM(afs_writeback_trace_discard, "Discrd") \
EM(afs_writeback_trace_flush, "Flush ") \
EM(afs_writeback_trace_fsync, "Fsync ") \
EM(afs_writeback_trace_invalidate_page, "InvlPg") \
EM(afs_writeback_trace_new, "New ") \
EM(afs_writeback_trace_no_wake, "NoWake") \
EM(afs_writeback_trace_put, "Put ") \
EM(afs_writeback_trace_release_page, "RelsPg") \
EM(afs_writeback_trace_subsume, "Subsum") \
EM(afs_writeback_trace_unlink, "Unlink") \
EM(afs_writeback_trace_wake, "Wake ") \
EM(afs_writeback_trace_write, "Write ") \
EM(afs_writeback_trace_writepages, "Wpages") \
E_(afs_writeback_trace_written, "Writtn")
/* /*
* Export enum symbols via userspace. * Export enum symbols via userspace.
...@@ -112,6 +151,7 @@ enum afs_vl_operation { ...@@ -112,6 +151,7 @@ enum afs_vl_operation {
afs_call_traces; afs_call_traces;
afs_fs_operations; afs_fs_operations;
afs_vl_operations; afs_vl_operations;
afs_writeback_traces;
/* /*
* Now redefine the EM() and E_() macros to map the enums to the strings that * Now redefine the EM() and E_() macros to map the enums to the strings that
...@@ -344,14 +384,16 @@ TRACE_EVENT(afs_send_pages, ...@@ -344,14 +384,16 @@ TRACE_EVENT(afs_send_pages,
); );
TRACE_EVENT(afs_sent_pages, TRACE_EVENT(afs_sent_pages,
TP_PROTO(struct afs_call *call, pgoff_t first, pgoff_t last, int ret), TP_PROTO(struct afs_call *call, pgoff_t first, pgoff_t last,
pgoff_t cursor, int ret),
TP_ARGS(call, first, last, ret), TP_ARGS(call, first, last, cursor, ret),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(struct afs_call *, call ) __field(struct afs_call *, call )
__field(pgoff_t, first ) __field(pgoff_t, first )
__field(pgoff_t, last ) __field(pgoff_t, last )
__field(pgoff_t, cursor )
__field(int, ret ) __field(int, ret )
), ),
...@@ -359,13 +401,69 @@ TRACE_EVENT(afs_sent_pages, ...@@ -359,13 +401,69 @@ TRACE_EVENT(afs_sent_pages,
__entry->call = call; __entry->call = call;
__entry->first = first; __entry->first = first;
__entry->last = last; __entry->last = last;
__entry->cursor = cursor;
__entry->ret = ret; __entry->ret = ret;
), ),
TP_printk(" c=%p %lx-%lx r=%d", TP_printk(" c=%p %lx-%lx c=%lx r=%d",
__entry->call, __entry->call,
__entry->first, __entry->last, __entry->first, __entry->last,
__entry->ret) __entry->cursor, __entry->ret)
);
TRACE_EVENT(afs_write_begin,
TP_PROTO(struct afs_vnode *vnode, pgoff_t index,
unsigned from, unsigned to, unsigned flags),
TP_ARGS(vnode, index, from, to, flags),
TP_STRUCT__entry(
__field(struct afs_vnode *, vnode )
__field(pgoff_t, index )
__field(unsigned, from )
__field(unsigned, to )
__field(unsigned, flags )
),
TP_fast_assign(
__entry->vnode = vnode;
__entry->index = index;
__entry->from = from;
__entry->to = to;
__entry->flags = flags;
),
TP_printk("vn=%p pg=%lu %u-%u fl=%u",
__entry->vnode, __entry->index,
__entry->from, __entry->to, __entry->flags)
);
TRACE_EVENT(afs_writeback,
TP_PROTO(struct afs_vnode *vnode, struct afs_writeback *wb,
enum afs_writeback_trace op, int usage, int delta),
TP_ARGS(vnode, wb, op, usage, delta),
TP_STRUCT__entry(
__field(struct afs_vnode *, vnode )
__field(struct afs_writeback *, wb )
__field(int, op )
__field(int, usage )
__field(int, delta )
),
TP_fast_assign(
__entry->vnode = vnode;
__entry->wb = wb;
__entry->op = op;
__entry->usage = usage;
__entry->delta = delta;
),
TP_printk("vn=%p wb=%p %s u=%d d=%d",
__entry->vnode, __entry->wb,
__print_symbolic(__entry->op, afs_writeback_traces),
__entry->usage, __entry->delta)
); );
#endif /* _TRACE_AFS_H */ #endif /* _TRACE_AFS_H */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment