Commit 6f734a1a authored by Steven Pratt's avatar Steven Pratt Committed by Linus Torvalds

[PATCH] Simplified readahead

With Ram Pai <linuxram@us.ibm.com>

- request size is now passed into page_cache_readahead.  This allows the
  removal of the size averaging code in the current readahead logic.

- readahead rampup is now faster  (especially for larger request sizes)

- No longer "slow read path".  Readahead is turn off at first random access,
  turned back on at first sequential access.

- Code now handles thrashing, slowly reducing readahead window until
  thrashing stops, or min size reached.

- Returned to old behavior where first access is assumed sequential only if
  at offset 0.

- designed to handle larger (1M or above) window sizes efficiently


Benchmark results:

machine 1: 8 way pentiumIV 1GB memory, tests run to 36GB SCSI disk
(Similar results were seen on a 1 way 866Mhz box with IDE disk.)

TioBench:

tiobench.pl --dir /mnt/tmp --block 4096 --size 4000 --numruns 2 --threads 1(4,16,64)

4k request size sequential read results in MB/sec

  Threads         2.6.9    w/patches    %diff         diff
parent d4cf1012
...@@ -563,16 +563,17 @@ struct fown_struct { ...@@ -563,16 +563,17 @@ struct fown_struct {
struct file_ra_state { struct file_ra_state {
unsigned long start; /* Current window */ unsigned long start; /* Current window */
unsigned long size; unsigned long size;
unsigned long next_size; /* Next window size */ unsigned long flags; /* ra flags RA_FLAG_xxx*/
unsigned long cache_hit; /* cache hit count*/
unsigned long prev_page; /* Cache last read() position */ unsigned long prev_page; /* Cache last read() position */
unsigned long ahead_start; /* Ahead window */ unsigned long ahead_start; /* Ahead window */
unsigned long ahead_size; unsigned long ahead_size;
unsigned long currnt_wnd_hit; /* locality in the current window */
unsigned long average; /* size of next current window */
unsigned long ra_pages; /* Maximum readahead window */ unsigned long ra_pages; /* Maximum readahead window */
unsigned long mmap_hit; /* Cache hit stat for mmap accesses */ unsigned long mmap_hit; /* Cache hit stat for mmap accesses */
unsigned long mmap_miss; /* Cache miss stat for mmap accesses */ unsigned long mmap_miss; /* Cache miss stat for mmap accesses */
}; };
#define RA_FLAG_MISS 0x01 /* a cache miss occured against this file */
#define RA_FLAG_INCACHE 0x02 /* file is already in cache */
struct file { struct file {
struct list_head f_list; struct list_head f_list;
......
...@@ -732,15 +732,18 @@ int write_one_page(struct page *page, int wait); ...@@ -732,15 +732,18 @@ int write_one_page(struct page *page, int wait);
/* readahead.c */ /* readahead.c */
#define VM_MAX_READAHEAD 128 /* kbytes */ #define VM_MAX_READAHEAD 128 /* kbytes */
#define VM_MIN_READAHEAD 16 /* kbytes (includes current page) */ #define VM_MIN_READAHEAD 16 /* kbytes (includes current page) */
#define VM_MAX_CACHE_HIT 256 /* max pages in a row in cache before
* turning readahead off */
int do_page_cache_readahead(struct address_space *mapping, struct file *filp, int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
unsigned long offset, unsigned long nr_to_read); unsigned long offset, unsigned long nr_to_read);
int force_page_cache_readahead(struct address_space *mapping, struct file *filp, int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
unsigned long offset, unsigned long nr_to_read); unsigned long offset, unsigned long nr_to_read);
void page_cache_readahead(struct address_space *mapping, unsigned long page_cache_readahead(struct address_space *mapping,
struct file_ra_state *ra, struct file_ra_state *ra,
struct file *filp, struct file *filp,
unsigned long offset); unsigned long offset,
unsigned long size);
void handle_ra_miss(struct address_space *mapping, void handle_ra_miss(struct address_space *mapping,
struct file_ra_state *ra, pgoff_t offset); struct file_ra_state *ra, pgoff_t offset);
unsigned long max_sane_readahead(unsigned long nr); unsigned long max_sane_readahead(unsigned long nr);
......
...@@ -688,7 +688,11 @@ void do_generic_mapping_read(struct address_space *mapping, ...@@ -688,7 +688,11 @@ void do_generic_mapping_read(struct address_space *mapping,
read_actor_t actor) read_actor_t actor)
{ {
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
unsigned long index, end_index, offset; unsigned long index;
unsigned long end_index;
unsigned long offset;
unsigned long req_size;
unsigned long next_index;
loff_t isize; loff_t isize;
struct page *cached_page; struct page *cached_page;
int error; int error;
...@@ -696,6 +700,8 @@ void do_generic_mapping_read(struct address_space *mapping, ...@@ -696,6 +700,8 @@ void do_generic_mapping_read(struct address_space *mapping,
cached_page = NULL; cached_page = NULL;
index = *ppos >> PAGE_CACHE_SHIFT; index = *ppos >> PAGE_CACHE_SHIFT;
next_index = index;
req_size = (desc->count + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
offset = *ppos & ~PAGE_CACHE_MASK; offset = *ppos & ~PAGE_CACHE_MASK;
isize = i_size_read(inode); isize = i_size_read(inode);
...@@ -705,7 +711,7 @@ void do_generic_mapping_read(struct address_space *mapping, ...@@ -705,7 +711,7 @@ void do_generic_mapping_read(struct address_space *mapping,
end_index = (isize - 1) >> PAGE_CACHE_SHIFT; end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
for (;;) { for (;;) {
struct page *page; struct page *page;
unsigned long nr, ret; unsigned long ret_size, nr, ret;
/* nr is the maximum number of bytes to copy from this page */ /* nr is the maximum number of bytes to copy from this page */
nr = PAGE_CACHE_SIZE; nr = PAGE_CACHE_SIZE;
...@@ -720,7 +726,12 @@ void do_generic_mapping_read(struct address_space *mapping, ...@@ -720,7 +726,12 @@ void do_generic_mapping_read(struct address_space *mapping,
nr = nr - offset; nr = nr - offset;
cond_resched(); cond_resched();
page_cache_readahead(mapping, &ra, filp, index); if (index == next_index && req_size) {
ret_size = page_cache_readahead(mapping, &ra,
filp, index, req_size);
next_index += ret_size;
req_size -= ret_size;
}
find_page: find_page:
page = find_get_page(mapping, index); page = find_get_page(mapping, index);
...@@ -1166,7 +1177,7 @@ struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address ...@@ -1166,7 +1177,7 @@ struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address
* For sequential accesses, we use the generic readahead logic. * For sequential accesses, we use the generic readahead logic.
*/ */
if (VM_SequentialReadHint(area)) if (VM_SequentialReadHint(area))
page_cache_readahead(mapping, ra, file, pgoff); page_cache_readahead(mapping, ra, file, pgoff, 1);
/* /*
* Do we have something in the page cache already? * Do we have something in the page cache already?
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment