Commit 9672a337 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] writeback efficiency and QoS improvements

The radix-tree walk for writeback has a couple of problems:

a) It always scans a file from its first dirty page, so if someone
   is repeatedly dirtying the front part of a file, pages near the end
   may be starved of writeout.  (Well, not completely: the `kupdate'
   function will write an entire file once the file's dirty timestamp
   has expired).  

b) When the disk queues are huge (10000 requests), there can be a
   very large number of locked pages.  Scanning past these in writeback
   consumes quite some CPU time.

So in each address_space we record the index at which the last batch of
writeout terminated and start the next batch of writeback from that
point.
parent bd134f27
...@@ -610,6 +610,7 @@ mpage_writepages(struct address_space *mapping, ...@@ -610,6 +610,7 @@ mpage_writepages(struct address_space *mapping,
struct pagevec pvec; struct pagevec pvec;
int nr_pages; int nr_pages;
pgoff_t index; pgoff_t index;
int scanned = 0;
if (wbc->nonblocking && bdi_write_congested(bdi)) { if (wbc->nonblocking && bdi_write_congested(bdi)) {
wbc->encountered_congestion = 1; wbc->encountered_congestion = 1;
...@@ -621,11 +622,18 @@ mpage_writepages(struct address_space *mapping, ...@@ -621,11 +622,18 @@ mpage_writepages(struct address_space *mapping,
writepage = mapping->a_ops->writepage; writepage = mapping->a_ops->writepage;
pagevec_init(&pvec, 0); pagevec_init(&pvec, 0);
index = 0; if (wbc->sync_mode == WB_SYNC_NONE) {
index = mapping->writeback_index; /* Start from prev offset */
} else {
index = 0; /* whole-file sweep */
scanned = 1;
}
retry:
while (!done && (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, while (!done && (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
PAGECACHE_TAG_DIRTY, PAGEVEC_SIZE))) { PAGECACHE_TAG_DIRTY, PAGEVEC_SIZE))) {
unsigned i; unsigned i;
scanned = 1;
for (i = 0; i < nr_pages; i++) { for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i]; struct page *page = pvec.pages[i];
...@@ -672,6 +680,16 @@ mpage_writepages(struct address_space *mapping, ...@@ -672,6 +680,16 @@ mpage_writepages(struct address_space *mapping,
} }
pagevec_release(&pvec); pagevec_release(&pvec);
} }
if (!scanned && !done) {
/*
* We hit the last page and there is more work to be done: wrap
* back to the start of the file
*/
scanned = 1;
index = 0;
goto retry;
}
mapping->writeback_index = index;
if (bio) if (bio)
mpage_bio_submit(WRITE, bio); mpage_bio_submit(WRITE, bio);
return ret; return ret;
......
...@@ -324,6 +324,7 @@ struct address_space { ...@@ -324,6 +324,7 @@ struct address_space {
struct radix_tree_root page_tree; /* radix tree of all pages */ struct radix_tree_root page_tree; /* radix tree of all pages */
spinlock_t tree_lock; /* and spinlock protecting it */ spinlock_t tree_lock; /* and spinlock protecting it */
unsigned long nrpages; /* number of total pages */ unsigned long nrpages; /* number of total pages */
pgoff_t writeback_index;/* writeback starts here */
struct address_space_operations *a_ops; /* methods */ struct address_space_operations *a_ops; /* methods */
struct list_head i_mmap; /* list of private mappings */ struct list_head i_mmap; /* list of private mappings */
struct list_head i_mmap_shared; /* list of shared mappings */ struct list_head i_mmap_shared; /* list of shared mappings */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment