Commit e4f8c4aa authored by Ram Pai's avatar Ram Pai Committed by Linus Torvalds

[PATCH] readahead fixes

Here is a consolidated readahead patch that takes care of the performance
regression seen with multiple threaded writes to the same file descriptor. 


	The patch does the following:

	1. Instead of calculating the average count of sequential
		access in the read patterns, it calculates the
		average amount of hits in the current window.
	2. This average is used to guide the size of the next current
		window.
	3. Since the field serial_cnt in the ra structure does not
	 	make sense with the introduction of the new logic,
		I have renamed that field as currnt_wnd_hit.

This patch will help the read patterns that are not neccessarily sequential
but have sufficient locality.  However it may regress random workload.  

	Results:

	1. Berkley Shands has reported great performance with this
		patch.
	2. iozone showed negligible effect on various read patterns.
	3. DSS workload saw neglible change.
	4. Sysbench saw a small improvement.
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent da7d2464
...@@ -555,8 +555,8 @@ struct file_ra_state { ...@@ -555,8 +555,8 @@ struct file_ra_state {
unsigned long prev_page; /* Cache last read() position */ unsigned long prev_page; /* Cache last read() position */
unsigned long ahead_start; /* Ahead window */ unsigned long ahead_start; /* Ahead window */
unsigned long ahead_size; unsigned long ahead_size;
unsigned long serial_cnt; /* measure of sequentiality */ unsigned long currnt_wnd_hit; /* locality in the current window */
unsigned long average; /* another measure of sequentiality */ unsigned long average; /* size of next current window */
unsigned long ra_pages; /* Maximum readahead window */ unsigned long ra_pages; /* Maximum readahead window */
unsigned long mmap_hit; /* Cache hit stat for mmap accesses */ unsigned long mmap_hit; /* Cache hit stat for mmap accesses */
unsigned long mmap_miss; /* Cache miss stat for mmap accesses */ unsigned long mmap_miss; /* Cache miss stat for mmap accesses */
......
...@@ -384,25 +384,10 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra, ...@@ -384,25 +384,10 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
first_access=1; first_access=1;
ra->next_size = max / 2; ra->next_size = max / 2;
ra->prev_page = offset; ra->prev_page = offset;
ra->serial_cnt++; ra->currnt_wnd_hit++;
goto do_io; goto do_io;
} }
if (offset == ra->prev_page + 1) {
if (ra->serial_cnt <= (max * 2))
ra->serial_cnt++;
} else {
/*
* to avoid rounding errors, ensure that 'average'
* tends towards the value of ra->serial_cnt.
*/
average = ra->average;
if (average < ra->serial_cnt) {
average++;
}
ra->average = (average + ra->serial_cnt) / 2;
ra->serial_cnt = 1;
}
ra->prev_page = offset; ra->prev_page = offset;
if (offset >= ra->start && offset <= (ra->start + ra->size)) { if (offset >= ra->start && offset <= (ra->start + ra->size)) {
...@@ -411,12 +396,22 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra, ...@@ -411,12 +396,22 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
* page beyond the end. Expand the next readahead size. * page beyond the end. Expand the next readahead size.
*/ */
ra->next_size += 2; ra->next_size += 2;
if (ra->currnt_wnd_hit <= (max * 2))
ra->currnt_wnd_hit++;
} else { } else {
/* /*
* A miss - lseek, pagefault, pread, etc. Shrink the readahead * A miss - lseek, pagefault, pread, etc. Shrink the readahead
* window. * window.
*/ */
ra->next_size -= 2; ra->next_size -= 2;
average = ra->average;
if (average < ra->currnt_wnd_hit) {
average++;
}
ra->average = (average + ra->currnt_wnd_hit) / 2;
ra->currnt_wnd_hit = 1;
} }
if ((long)ra->next_size > (long)max) if ((long)ra->next_size > (long)max)
...@@ -468,7 +463,11 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra, ...@@ -468,7 +463,11 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
* pages shall be accessed in the next * pages shall be accessed in the next
* current window. * current window.
*/ */
ra->next_size = min(ra->average , (unsigned long)max); average = ra->average;
if (ra->currnt_wnd_hit > average)
average = (ra->currnt_wnd_hit + ra->average + 1) / 2;
ra->next_size = min(average , (unsigned long)max);
} }
ra->start = offset; ra->start = offset;
ra->size = ra->next_size; ra->size = ra->next_size;
...@@ -501,8 +500,8 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra, ...@@ -501,8 +500,8 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
* random. Hence don't bother to readahead. * random. Hence don't bother to readahead.
*/ */
average = ra->average; average = ra->average;
if (ra->serial_cnt > average) if (ra->currnt_wnd_hit > average)
average = (ra->serial_cnt + ra->average + 1) / 2; average = (ra->currnt_wnd_hit + ra->average + 1) / 2;
if (average > max) { if (average > max) {
ra->ahead_start = ra->start + ra->size; ra->ahead_start = ra->start + ra->size;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment