[PATCH] Simplified readahead

With Ram Pai <linuxram@us.ibm.com> - request size is now passed into page_cache_readahead. This allows the removal of the size averaging code in the current readahead logic. - readahead rampup is now faster (especially for larger request sizes) - No longer "slow read path". Readahead is turn off at first random access, turned back on at first sequential access. - Code now handles thrashing, slowly reducing readahead window until thrashing stops, or min size reached. - Returned to old behavior where first access is assumed sequential only if at offset 0. - designed to handle larger (1M or above) window sizes efficiently Benchmark results: machine 1: 8 way pentiumIV 1GB memory, tests run to 36GB SCSI disk (Similar results were seen on a 1 way 866Mhz box with IDE disk.) TioBench: tiobench.pl --dir /mnt/tmp --block 4096 --size 4000 --numruns 2 --threads 1(4,16,64) 4k request size sequential read results in MB/sec Threads 2.6.9 w/patches %diff diff

[PATCH] Simplified readahead
With Ram Pai <linuxram@us.ibm.com> - request size is now passed into page_cache_readahead. This allows the removal of the size averaging code in the current readahead logic. - readahead rampup is now faster (especially for larger request sizes) - No longer "slow read path". Readahead is turn off at first random access, turned back on at first sequential access. - Code now handles thrashing, slowly reducing readahead window until thrashing stops, or min size reached. - Returned to old behavior where first access is assumed sequential only if at offset 0. - designed to handle larger (1M or above) window sizes efficiently Benchmark results: machine 1: 8 way pentiumIV 1GB memory, tests run to 36GB SCSI disk (Similar results were seen on a 1 way 866Mhz box with IDE disk.) TioBench: tiobench.pl --dir /mnt/tmp --block 4096 --size 4000 --numruns 2 --threads 1(4,16,64) 4k request size sequential read results in MB/sec Threads 2.6.9 w/patches %diff diff
6f734a1a · Steven Pratt · Linus Torvalds · d4cf1012 · 6f734a1a · 6f734a1a
Commit 6f734a1a authored Jan 03, 2005 by Steven Pratt Committed by Linus Torvalds Jan 03, 2005
Hide whitespace changes
Inline Side-by-side

Showing with 204 additions and 248 deletions

include/linux/fs.h include/linux/fs.h +4 -3

include/linux/mm.h include/linux/mm.h +5 -2

mm/filemap.c mm/filemap.c +15 -4

mm/readahead.c mm/readahead.c +180 -239

No files found.
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -563,16 +563,17 @@ struct fown_struct {
 struct file_ra_state {
 	unsigned long start;		/* Current window */
 	unsigned long size;
-	unsigned long next_size;	/* Next window size */
+	unsigned long flags;		/* ra flags RA_FLAG_xxx*/
+	unsigned long cache_hit;	/* cache hit count*/
 	unsigned long prev_page;	/* Cache last read() position */
 	unsigned long ahead_start;	/* Ahead window */
 	unsigned long ahead_size;
-	unsigned long currnt_wnd_hit;	/* locality in the current window */
-	unsigned long average;		/* size of next current window */
 	unsigned long ra_pages;		/* Maximum readahead window */
 	unsigned long mmap_hit;		/* Cache hit stat for mmap accesses */
 	unsigned long mmap_miss;	/* Cache miss stat for mmap accesses */
 };
+#define RA_FLAG_MISS 0x01	/* a cache miss occured against this file */
+#define RA_FLAG_INCACHE 0x02	/* file is already in cache */
 struct file {
 	struct list_head	f_list;

--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -732,15 +732,18 @@ int write_one_page(struct page *page, int wait);
 /* readahead.c */
 #define VM_MAX_READAHEAD	128	/* kbytes */
 #define VM_MIN_READAHEAD	16	/* kbytes (includes current page) */
+#define VM_MAX_CACHE_HIT    	256	/* max pages in a row in cache before
+					 * turning readahead off */
 int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			unsigned long offset, unsigned long nr_to_read);
 int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			unsigned long offset, unsigned long nr_to_read);
-void page_cache_readahead(struct address_space *mapping, 
+unsigned long  page_cache_readahead(struct address_space *mapping,
 			  struct file_ra_state *ra,
 			  struct file *filp,
-			  unsigned long offset);
+			  unsigned long offset,
+			  unsigned long size);
 void handle_ra_miss(struct address_space *mapping, 
 		    struct file_ra_state *ra, pgoff_t offset);
 unsigned long max_sane_readahead(unsigned long nr);

--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -688,7 +688,11 @@ void do_generic_mapping_read(struct address_space *mapping,
 			     read_actor_t actor)
 {
 	struct inode *inode = mapping->host;
-	unsigned long index, end_index, offset;
+	unsigned long index;
+	unsigned long end_index;
+	unsigned long offset;
+	unsigned long req_size;
+	unsigned long next_index;
 	loff_t isize;
 	struct page *cached_page;
 	int error;
@@ -696,6 +700,8 @@ void do_generic_mapping_read(struct address_space *mapping,
 	cached_page = NULL;
 	index = *ppos >> PAGE_CACHE_SHIFT;
+	next_index = index;
+	req_size = (desc->count + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 	offset = *ppos & ~PAGE_CACHE_MASK;
 	isize = i_size_read(inode);
@@ -705,7 +711,7 @@ void do_generic_mapping_read(struct address_space *mapping,
 	end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
 	for (;;) {
 		struct page *page;
-		unsigned long nr, ret;
+		unsigned long ret_size, nr, ret;
 		/* nr is the maximum number of bytes to copy from this page */
 		nr = PAGE_CACHE_SIZE;
@@ -720,7 +726,12 @@ void do_generic_mapping_read(struct address_space *mapping,
 		nr = nr - offset;
 		cond_resched();
-		page_cache_readahead(mapping, &ra, filp, index);
+		if (index == next_index && req_size) {
+			ret_size = page_cache_readahead(mapping, &ra,
+					filp, index, req_size);
+			next_index += ret_size;
+			req_size -= ret_size;
+		}
 find_page:
 		page = find_get_page(mapping, index);
@@ -1166,7 +1177,7 @@ struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address
 	 * For sequential accesses, we use the generic readahead logic.
 	 */
 	if (VM_SequentialReadHint(area))
-		page_cache_readahead(mapping, ra, file, pgoff);
+		page_cache_readahead(mapping, ra, file, pgoff, 1);
 	/*
 	 * Do we have something in the page cache already?

--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -35,7 +35,6 @@ void
 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
 {
 	ra->ra_pages = mapping->backing_dev_info->ra_pages;
-	ra->average = ra->ra_pages / 2;
 }
 /*
@@ -51,6 +50,56 @@ static inline unsigned long get_min_readahead(struct file_ra_state *ra)
 	return (VM_MIN_READAHEAD * 1024) / PAGE_CACHE_SIZE;
 }
+static inline void ra_off(struct file_ra_state *ra)
+{
+	ra->start = 0;
+	ra->flags = 0;
+	ra->size = -1;
+	ra->ahead_start = 0;
+	ra->ahead_size = 0;
+	return;
+}
+/*
+ * Set the initial window size, round to next power of 2 and square
+ * for small size, x 4 for medium, and x 2 for large
+ * for 128k (32 page) max ra
+ * 1-8 page = 32k initial, > 8 page = 128k initial
+ */
+static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
+{
+	unsigned long newsize = roundup_pow_of_two(size);
+	if (newsize <= max / 64)
+		newsize = newsize * newsize;
+	else if (newsize <= max / 4)
+		newsize = max / 4;
+	else
+		newsize = max;
+	return newsize;
+}
+/*
+ * Set the new window size, this is called only when I/O is to be submitted,
+ * not for each call to readahead.  If a cache miss occured, reduce next I/O
+ * size, else increase depending on how close to max we are.
+ */
+static unsigned long get_next_ra_size(unsigned long cur, unsigned long max,
+				unsigned long min, unsigned long * flags)
+{
+	unsigned long newsize;
+	if (*flags & RA_FLAG_MISS) {
+		newsize = max((cur - 2), min);
+		*flags &= ~RA_FLAG_MISS;
+	} else if (cur < max / 16) {
+		newsize = 4 * cur;
+	} else {
+		newsize = 2 * cur;
+	}
+	return min(newsize, max);
+}
 #define list_to_page(head) (list_entry((head)->prev, struct page, lru))
 /**
@@ -65,7 +114,7 @@ static inline unsigned long get_min_readahead(struct file_ra_state *ra)
 * Hides the details of the LRU cache etc from the filesystems.
 */
 int read_cache_pages(struct address_space *mapping, struct list_head *pages,
-		 int (*filler)(void *, struct page *), void *data)
+			int (*filler)(void *, struct page *), void *data)
 {
 	struct page *page;
 	struct pagevec lru_pvec;
@@ -151,19 +200,16 @@ static int read_pages(struct address_space *mapping, struct file *filp,
 * ahead_size:  Together, these form the "ahead window".
 * ra_pages:	The externally controlled max readahead for this fd.
 *
- * When readahead is in the "maximally shrunk" state (next_size == -1UL),
+ * When readahead is in the off state (size == -1UL), readahead is disabled.
- * readahead is disabled.  In this state, prev_page and size are used, inside
+ * In this state, prev_page is used to detect the resumption of sequential I/O.
- * handle_ra_miss(), to detect the resumption of sequential I/O.  Once there
- * has been a decent run of sequential I/O (defined by get_min_readahead),
- * readahead is reenabled.
 *
 * The readahead code manages two windows - the "current" and the "ahead"
 * windows.  The intent is that while the application is walking the pages
 * in the current window, I/O is underway on the ahead window.  When the
 * current window is fully traversed, it is replaced by the ahead window
 * and the ahead window is invalidated.  When this copying happens, the
- * new current window's pages are probably still locked.  When I/O has
+ * new current window's pages are probably still locked.  So
- * completed, we submit a new batch of I/O, creating a new ahead window.
+ * we submit a new batch of I/O immediately, creating a new ahead window.
 *
 * So:
 *
@@ -175,34 +221,25 @@ static int read_pages(struct address_space *mapping, struct file *filp,
 *           ahead window.
 *
 * A `readahead hit' occurs when a read request is made against a page which is
- * inside the current window.  Hits are good, and the window size (next_size)
+ * the next sequential page. Ahead windowe calculations are done only when it
- * is grown aggressively when hits occur.  Two pages are added to the next
+ * is time to submit a new IO.  The code ramps up the size agressively at first,
- * window size on each hit, which will end up doubling the next window size by
+ * but slow down as it approaches max_readhead.
- * the time I/O is submitted for it.
 *
- * If readahead hits are more sparse (say, the application is only reading
+ * Any seek/ramdom IO will result in readahead being turned off.  It will resume
- * every second page) then the window will build more slowly.
+ * at the first sequential access.
- *
- * On a readahead miss (the application seeked away) the readahead window is
- * shrunk by 25%.  We don't want to drop it too aggressively, because it is a
- * good assumption that an application which has built a good readahead window
- * will continue to perform linear reads.  Either at the new file position, or
- * at the old one after another seek.
- *
- * After enough misses, readahead is fully disabled. (next_size = -1UL).
 *
 * There is a special-case: if the first page which the application tries to
 * read happens to be the first page of the file, it is assumed that a linear
- * read is about to happen and the window is immediately set to half of the
+ * read is about to happen and the window is immediately set to the initial size
- * device maximum.
+ * based on I/O request size and the max_readahead.
 * 
 * A page request at (start + size) is not a miss at all - it's just a part of
 * sequential file reading.
 *
- * This function is to be called for every page which is read, rather than when
+ * This function is to be called for every read request, rather than when
- * it is time to perform readahead.  This is so the readahead algorithm can
+ * it is time to perform readahead.  It is called only oce for the entire I/O
- * centrally work out the access patterns.  This could be costly with many tiny
+ * regardless of size unless readahead is unable to start enough I/O to satisfy
- * read()s, so we specifically optimise for that case with prev_page.
+ * the request (I/O request > max_readahead).
 */
 /*
@@ -211,7 +248,7 @@ static int read_pages(struct address_space *mapping, struct file *filp,
 * behaviour which would occur if page allocations are causing VM writeback.
 * We really don't want to intermingle reads and writes like that.
 *
- * Returns the number of pages which actually had IO started against them.
+ * Returns the number of pages requested, or the maximum amount of I/O allowed.
 */
 static inline int
 __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
@@ -299,266 +336,170 @@ int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
 	return ret;
 }
-/*
- * This version skips the IO if the queue is read-congested, and will tell the
- * block layer to abandon the readahead if request allocation would block.
- *
- * force_page_cache_readahead() will ignore queue congestion and will block on
- * request queues.
- */
-int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
-			unsigned long offset, unsigned long nr_to_read)
-{
-	if (!bdi_read_congested(mapping->backing_dev_info))
-		return __do_page_cache_readahead(mapping, filp,
-						offset, nr_to_read);
-	return 0;
-}
 /*
 * Check how effective readahead is being.  If the amount of started IO is
 * less than expected then the file is partly or fully in pagecache and
- * readahead isn't helping.  Shrink the window.
+ * readahead isn't helping.
 *
- * But don't shrink it too much - the application may read the same page
- * occasionally.
 */
-static inline void
+int check_ra_success(struct file_ra_state *ra, unsigned long nr_to_read,
-check_ra_success(struct file_ra_state *ra, pgoff_t attempt,
+				 unsigned long actual)
-			pgoff_t actual, pgoff_t orig_next_size)
 {
 	if (actual == 0) {
-		if (orig_next_size > 1) {
+		ra->cache_hit += nr_to_read;
-			ra->next_size = orig_next_size - 1;
+		if (ra->cache_hit >= VM_MAX_CACHE_HIT) {
-			if (ra->ahead_size)
+			ra_off(ra);
-				ra->ahead_size = ra->next_size;
+			ra->flags |= RA_FLAG_INCACHE;
-		} else {
+			return 0;
-			ra->next_size = -1UL;
-			ra->size = 0;
 		}
+	} else {
+		ra->cache_hit=0;
 	}
+	return 1;
+}
+/*
+ * Issue the I/O. If pages already in cache, increment the hit count until
+ * we exceed max, then turn RA off until we start missing again.
+ */
+int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
+			unsigned long offset, unsigned long nr_to_read)
+{
+	return  __do_page_cache_readahead(mapping, filp, offset, nr_to_read);
 }
 /*
 * page_cache_readahead is the main function.  If performs the adaptive
 * readahead window size management and submits the readahead I/O.
 */
-void
+unsigned long
 page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
-			struct file *filp, unsigned long offset)
+		     struct file *filp, unsigned long offset,
+		     unsigned long req_size)
 {
-	unsigned max;
+	unsigned long max, min;
-	unsigned orig_next_size;
+	unsigned long newsize = req_size;
-	unsigned actual;
+	unsigned long actual=0;
-	int first_access=0;
-	unsigned long average;
 	/*
 	 * Here we detect the case where the application is performing
 	 * sub-page sized reads.  We avoid doing extra work and bogusly
 	 * perturbing the readahead window expansion logic.
-	 * If next_size is zero, this is the very first read for this
+	 * If size is zero, there is no read ahead window so we need one
-	 * file handle, or the window is maximally shrunk.
 	 */
-	if (offset == ra->prev_page) {
+	if (offset == ra->prev_page && req_size == 1 && ra->size != 0)
-		if (ra->next_size != 0)
+		goto out;
-			goto out;
-	}
-	if (ra->next_size == -1UL)
-		goto out;	/* Maximally shrunk */
 	max = get_max_readahead(ra);
-	if (max == 0)
+	min = get_min_readahead(ra);
-		goto out;	/* No readahead */
+//	maxsane = max_sane_readahead(max);
+	newsize = min(req_size, max);
-	orig_next_size = ra->next_size;
-	if (ra->next_size == 0) {
+	if (newsize == 0 || (ra->flags & RA_FLAG_INCACHE)) {
-		/*
+		newsize = 1;
-		 * Special case - first read.
+		goto out;	/* No readahead or file already in cache */
-		 * We'll assume it's a whole-file read, and
-		 * grow the window fast.
-		 */
-		first_access=1;
-		ra->next_size = max / 2;
-		ra->prev_page = offset;
-		ra->currnt_wnd_hit++;
-		goto do_io;
 	}
+	/*
+	 * Special case - first read.  We'll assume it's a whole-file read if
+	 * at start of file, and grow the window fast.  Or detect first
+	 * sequential access
+	 */
+	if ((ra->size == 0 && offset == 0)	/* first io and start of file */
+	    || (ra->size == -1 && ra->prev_page == offset - 1)) {
+		/* First sequential */
+		ra->prev_page  = offset + newsize-1;
+		ra->size = get_init_ra_size(newsize, max);
+		ra->start = offset;
+		actual = do_page_cache_readahead(mapping, filp, offset,
+						ra->size);
+		if (!check_ra_success(ra, ra->size, actual))
+			goto out;
-	ra->prev_page = offset;
-	if (offset >= ra->start && offset <= (ra->start + ra->size)) {
-		/*
-		 * A readahead hit.  Either inside the window, or one
-		 * page beyond the end.  Expand the next readahead size.
-		 */
-		ra->next_size += 2;
-		if (ra->currnt_wnd_hit <= (max * 2))
-			ra->currnt_wnd_hit++;
-	} else {
 		/*
-		 * A miss - lseek, pagefault, pread, etc.  Shrink the readahead
+		 * If the request size is larger than our max readahead, we
-		 * window.
+		 * at least want to be sure that we get 2 IOs in flight and
+		 * we know that we will definitly need the new I/O.
+		 * once we do this, subsequent calls should be able to overlap
+		 * IOs,* thus preventing stalls. so issue the ahead window
+		 * immediately.
 		 */
-		ra->next_size -= 2;
+		if (req_size >= max) {
+			ra->ahead_size = get_next_ra_size(ra->size, max, min,
-		average = ra->average;
+							  &ra->flags);
-		if (average < ra->currnt_wnd_hit) {
+			ra->ahead_start = ra->start + ra->size;
-			average++;
+			actual = do_page_cache_readahead(mapping, filp,
+					 ra->ahead_start, ra->ahead_size);
+			check_ra_success(ra, ra->ahead_size, actual);
 		}
-		ra->average = (average + ra->currnt_wnd_hit) / 2;
+		goto out;
-		ra->currnt_wnd_hit = 1;
 	}
-	if ((long)ra->next_size > (long)max)
+	/*
-		ra->next_size = max;
+	 * Now handle the random case:
-	if ((long)ra->next_size <= 0L) {
+	 * partial page reads and first access were handled above,
-		ra->next_size = -1UL;
+	 * so this must be the next page otherwise it is random
-		ra->size = 0;
+	 */
-		goto out;		/* Readahead is off */
+	if ((offset != (ra->prev_page+1) || (ra->size == 0))) {
+		ra_off(ra);
+		ra->prev_page  = offset + newsize-1;
+		actual = do_page_cache_readahead(mapping, filp, offset,
+						 newsize);
+		check_ra_success(ra, newsize, actual);
+		goto out;
 	}
 	/*
-	 * Is this request outside the current window?
+	 * If we get here we are doing sequential IO and this was not the first
+	 * occurence (ie we have an existing window)
 	 */
-	if (offset < ra->start || offset >= (ra->start + ra->size)) {
-		/*
+	if (ra->ahead_start == 0) {	 /* no ahead window yet */
-		 * A miss against the current window.  Have we merely
+		ra->ahead_size = get_next_ra_size(max(newsize,ra->size),
-		 * advanced into the ahead window?
+						  max, min, &ra->flags);
-		 */
+		ra->ahead_start = ra->start + ra->size;
-		if (offset == ra->ahead_start) {
+		newsize = min (newsize, ra->ahead_start - offset);
-			/*
+		actual = do_page_cache_readahead(mapping, filp,
-			 * Yes, we have.  The ahead window now becomes
+					 ra->ahead_start, ra->ahead_size);
-			 * the current window.
+		if (!check_ra_success(ra, ra->ahead_size, actual))
-			 */
-			ra->start = ra->ahead_start;
-			ra->size = ra->ahead_size;
-			ra->prev_page = ra->start;
-			ra->ahead_start = 0;
-			ra->ahead_size = 0;
-			/*
-			 * Control now returns, probably to sleep until I/O
-			 * completes against the first ahead page.
-			 * When the second page in the old ahead window is
-			 * requested, control will return here and more I/O
-			 * will be submitted to build the new ahead window.
-			 */
 			goto out;
-		}
+	}
-do_io:
+	/*
-		/*
+	 * Already have an ahead window, check if we crossed into it if so,
-		 * This is the "unusual" path.  We come here during
+	 * shift windows and issue a new ahead window.
-		 * startup or after an lseek.  We invalidate the
+	 * Only return the #pages that are in the current window, so that we
-		 * ahead window and get some I/O underway for the new
+	 * get called back on the first page of the ahead window which will
-		 * current window.
+	 * allow us to submit more IO.
-		 */
+	 */
-		if (!first_access) {
+	if ((offset + newsize -1) >= ra->ahead_start) {
-			 /* Heuristic: there is a high probability
+		ra->start = ra->ahead_start;
-			  * that around  ra->average number of
+		ra->size = ra->ahead_size;
-			  * pages shall be accessed in the next
+		ra->ahead_start = ra->ahead_start + ra->ahead_size;
-			  * current window.
+		ra->ahead_size = get_next_ra_size(ra->ahead_size,
-			  */
+						  max, min, &ra->flags);
-			average = ra->average;
+		newsize = min (newsize, ra->ahead_start - offset);
-			if (ra->currnt_wnd_hit > average)
+		actual = do_page_cache_readahead(mapping, filp,
-				average = (ra->currnt_wnd_hit + ra->average + 1) / 2;
+				 ra->ahead_start, ra->ahead_size);
+		check_ra_success(ra, ra->ahead_size, actual);
-			ra->next_size = min(average , (unsigned long)max);
-		}
-		ra->start = offset;
-		ra->size = ra->next_size;
-		ra->ahead_start = 0;		/* Invalidate these */
-		ra->ahead_size = 0;
-		actual = do_page_cache_readahead(mapping, filp, offset,
-						 ra->size);
-		if(!first_access) {
-			/*
-			 * do not adjust the readahead window size the first
-			 * time, the ahead window might get closed if all
-			 * the pages are already in the cache.
-			 */
-			check_ra_success(ra, ra->size, actual, orig_next_size);
-		}
-	} else {
-		/*
-		 * This read request is within the current window.  It may be
-		 * time to submit I/O for the ahead window while the
-		 * application is about to step into the ahead window.
-		 */
-		if (ra->ahead_start == 0) {
-			/*
-			 * If the average io-size is more than maximum
-			 * readahead size of the file the io pattern is
-			 * sequential. Hence  bring in the readahead window
-			 * immediately.
-			 * If the average io-size is less than maximum
-			 * readahead size of the file the io pattern is
-			 * random. Hence don't bother to readahead.
-			 */
-			average = ra->average;
-			if (ra->currnt_wnd_hit > average)
-				average = (ra->currnt_wnd_hit + ra->average + 1) / 2;
-			if (average > max) {
-				ra->ahead_start = ra->start + ra->size;
-				ra->ahead_size = ra->next_size;
-				actual = do_page_cache_readahead(mapping, filp,
-					ra->ahead_start, ra->ahead_size);
-				check_ra_success(ra, ra->ahead_size,
-						actual, orig_next_size);
-			}
-		}
 	}
 out:
-	return;
+	ra->prev_page = offset + newsize - 1;
+	return(newsize);
 }
 /*
 * handle_ra_miss() is called when it is known that a page which should have
 * been present in the pagecache (we just did some readahead there) was in fact
 * not found.  This will happen if it was evicted by the VM (readahead
- * thrashing) or if the readahead window is maximally shrunk.
+ * thrashing)
 *
- * If the window has been maximally shrunk (next_size == -1UL) then look to see
+ * Turn on the cache miss flag in the RA struct, this will cause the RA code
- * if we are getting misses against sequential file offsets.  If so, and this
+ * to reduce the RA size on the next read.
- * persists then resume readahead.
- *
- * Otherwise we're thrashing, so shrink the readahead window by three pages.
- * This is because it is grown by two pages on a readahead hit.  Theory being
- * that the readahead window size will stabilise around the maximum level at
- * which there is no thrashing.
 */
 void handle_ra_miss(struct address_space *mapping,
 		struct file_ra_state *ra, pgoff_t offset)
 {
-	if (ra->next_size == -1UL) {
+	ra->flags |= RA_FLAG_MISS;
-		const unsigned long max = get_max_readahead(ra);
+	ra->flags &= ~RA_FLAG_INCACHE;
-		if (offset != ra->prev_page + 1) {
-			ra->size = ra->size?ra->size-1:0; /* Not sequential */
-		} else {
-			ra->size++;			/* A sequential read */
-			if (ra->size >= max) {		/* Resume readahead */
-				ra->start = offset - max;
-				ra->next_size = max;
-				ra->size = max;
-				ra->ahead_start = 0;
-				ra->ahead_size = 0;
-				ra->average = max / 2;
-			}
-		}
-		ra->prev_page = offset;
-	} else {
-		const unsigned long min = get_min_readahead(ra);
-		ra->next_size -= 3;
-		if (ra->next_size < min)
-			ra->next_size = min;
-	}
 }
 /*