Commit 2332dc78 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] vmscan.c scan rate fixes

We've been futzing with the scan rates of the inactive and active lists far
too much, and it's still not right (Anton reports interrupt-off times of over
a second).

- We have this logic in there from 2.4.early (at least) which tries to keep
  the inactive list 1/3rd the size of the active list.  Or something.

  I really cannot see any logic behind this, so toss it out and change the
  arithmetic in there so that all pages on both lists have equal scan rates.

- Chunk the work up so we never hold interrupts off for more that 32 pages
  worth of scanning.

- Make the per-zone scan-count accumulators unsigned long rather than
  atomic_t.

  Mainly because atomic_t's could conceivably overflow, but also because
  access to these counters is racy-by-design anyway.
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent acba6041
......@@ -118,8 +118,8 @@ struct zone {
spinlock_t lru_lock;
struct list_head active_list;
struct list_head inactive_list;
atomic_t nr_scan_active;
atomic_t nr_scan_inactive;
unsigned long nr_scan_active;
unsigned long nr_scan_inactive;
unsigned long nr_active;
unsigned long nr_inactive;
int all_unreclaimable; /* All pages pinned */
......
......@@ -1482,8 +1482,8 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
zone_names[j], realsize, batch);
INIT_LIST_HEAD(&zone->active_list);
INIT_LIST_HEAD(&zone->inactive_list);
atomic_set(&zone->nr_scan_active, 0);
atomic_set(&zone->nr_scan_inactive, 0);
zone->nr_scan_active = 0;
zone->nr_scan_inactive = 0;
zone->nr_active = 0;
zone->nr_inactive = 0;
if (!size)
......
......@@ -789,55 +789,47 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
}
/*
* Scan `nr_pages' from this zone. Returns the number of reclaimed pages.
* This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
*/
static void
shrink_zone(struct zone *zone, struct scan_control *sc)
{
unsigned long scan_active, scan_inactive;
int count;
scan_inactive = (zone->nr_active + zone->nr_inactive) >> sc->priority;
unsigned long nr_active;
unsigned long nr_inactive;
/*
* Try to keep the active list 2/3 of the size of the cache. And
* make sure that refill_inactive is given a decent number of pages.
*
* The "scan_active + 1" here is important. With pagecache-intensive
* workloads the inactive list is huge, and `ratio' evaluates to zero
* all the time. Which pins the active list memory. So we add one to
* `scan_active' just to make sure that the kernel will slowly sift
* through the active list.
* Add one to `nr_to_scan' just to make sure that the kernel will
* slowly sift through the active list.
*/
if (zone->nr_active >= 4*(zone->nr_inactive*2 + 1)) {
/* Don't scan more than 4 times the inactive list scan size */
scan_active = 4*scan_inactive;
} else {
unsigned long long tmp;
/* Cast to long long so the multiply doesn't overflow */
zone->nr_scan_active += (zone->nr_active >> sc->priority) + 1;
nr_active = zone->nr_scan_active;
if (nr_active >= SWAP_CLUSTER_MAX)
zone->nr_scan_active = 0;
else
nr_active = 0;
tmp = (unsigned long long)scan_inactive * zone->nr_active;
do_div(tmp, zone->nr_inactive*2 + 1);
scan_active = (unsigned long)tmp;
}
zone->nr_scan_inactive += (zone->nr_inactive >> sc->priority) + 1;
nr_inactive = zone->nr_scan_inactive;
if (nr_inactive >= SWAP_CLUSTER_MAX)
zone->nr_scan_inactive = 0;
else
nr_inactive = 0;
atomic_add(scan_active + 1, &zone->nr_scan_active);
count = atomic_read(&zone->nr_scan_active);
if (count >= SWAP_CLUSTER_MAX) {
atomic_set(&zone->nr_scan_active, 0);
sc->nr_to_scan = count;
while (nr_active || nr_inactive) {
if (nr_active) {
sc->nr_to_scan = min(nr_active,
(unsigned long)SWAP_CLUSTER_MAX);
nr_active -= sc->nr_to_scan;
refill_inactive_zone(zone, sc);
}
atomic_add(scan_inactive, &zone->nr_scan_inactive);
count = atomic_read(&zone->nr_scan_inactive);
if (count >= SWAP_CLUSTER_MAX) {
atomic_set(&zone->nr_scan_inactive, 0);
sc->nr_to_scan = count;
if (nr_inactive) {
sc->nr_to_scan = min(nr_inactive,
(unsigned long)SWAP_CLUSTER_MAX);
nr_inactive -= sc->nr_to_scan;
shrink_cache(zone, sc);
}
}
}
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment