Commit 61b63972 authored by Huang Ying's avatar Huang Ying Committed by Linus Torvalds

mm, swap: use page-cluster as max window of VMA based swap readahead

When the VMA based swap readahead was introduced, a new knob

  /sys/kernel/mm/swap/vma_ra_max_order

was added as the max window of VMA swap readahead.  This is to make it
possible to use different max window for VMA based readahead and
original physical readahead.  But Minchan Kim pointed out that this will
cause a regression because setting page-cluster sysctl to zero cannot
disable swap readahead with the change.

To fix the regression, the page-cluster sysctl is used as the max window
of both the VMA based swap readahead and original physical swap
readahead.  If more fine grained control is needed in the future, more
knobs can be added as the subordinate knobs of the page-cluster sysctl.

The vma_ra_max_order knob is deleted.  Because the knob was introduced
in v4.14-rc1, and this patch is targeting being merged before v4.14
releasing, there should be no existing users of this newly added ABI.

Link: http://lkml.kernel.org/r/20171011070847.16003-1-ying.huang@intel.com
Fixes: ec560175 ("mm, swap: VMA based swap readahead")
Signed-off-by: default avatar"Huang, Ying" <ying.huang@intel.com>
Reported-by: default avatarMinchan Kim <minchan@kernel.org>
Acked-by: default avatarMinchan Kim <minchan@kernel.org>
Acked-by: default avatarMichal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Shaohua Li <shli@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Cc: Tim Chen <tim.c.chen@intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent a7b10095
...@@ -14,13 +14,3 @@ Description: Enable/disable VMA based swap readahead. ...@@ -14,13 +14,3 @@ Description: Enable/disable VMA based swap readahead.
still used for tmpfs etc. other users. If set to still used for tmpfs etc. other users. If set to
false, the global swap readahead algorithm will be false, the global swap readahead algorithm will be
used for all swappable pages. used for all swappable pages.
What: /sys/kernel/mm/swap/vma_ra_max_order
Date: August 2017
Contact: Linux memory management mailing list <linux-mm@kvack.org>
Description: The max readahead size in order for VMA based swap readahead
VMA based swap readahead algorithm will readahead at
most 1 << max_order pages for each readahead. The
real readahead size for each readahead will be scaled
according to the estimation algorithm.
...@@ -39,10 +39,6 @@ struct address_space *swapper_spaces[MAX_SWAPFILES]; ...@@ -39,10 +39,6 @@ struct address_space *swapper_spaces[MAX_SWAPFILES];
static unsigned int nr_swapper_spaces[MAX_SWAPFILES]; static unsigned int nr_swapper_spaces[MAX_SWAPFILES];
bool swap_vma_readahead = true; bool swap_vma_readahead = true;
#define SWAP_RA_MAX_ORDER_DEFAULT 3
static int swap_ra_max_order = SWAP_RA_MAX_ORDER_DEFAULT;
#define SWAP_RA_WIN_SHIFT (PAGE_SHIFT / 2) #define SWAP_RA_WIN_SHIFT (PAGE_SHIFT / 2)
#define SWAP_RA_HITS_MASK ((1UL << SWAP_RA_WIN_SHIFT) - 1) #define SWAP_RA_HITS_MASK ((1UL << SWAP_RA_WIN_SHIFT) - 1)
#define SWAP_RA_HITS_MAX SWAP_RA_HITS_MASK #define SWAP_RA_HITS_MAX SWAP_RA_HITS_MASK
...@@ -664,6 +660,13 @@ struct page *swap_readahead_detect(struct vm_fault *vmf, ...@@ -664,6 +660,13 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
pte_t *tpte; pte_t *tpte;
#endif #endif
max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster),
SWAP_RA_ORDER_CEILING);
if (max_win == 1) {
swap_ra->win = 1;
return NULL;
}
faddr = vmf->address; faddr = vmf->address;
entry = pte_to_swp_entry(vmf->orig_pte); entry = pte_to_swp_entry(vmf->orig_pte);
if ((unlikely(non_swap_entry(entry)))) if ((unlikely(non_swap_entry(entry))))
...@@ -672,12 +675,6 @@ struct page *swap_readahead_detect(struct vm_fault *vmf, ...@@ -672,12 +675,6 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
if (page) if (page)
return page; return page;
max_win = 1 << READ_ONCE(swap_ra_max_order);
if (max_win == 1) {
swap_ra->win = 1;
return NULL;
}
fpfn = PFN_DOWN(faddr); fpfn = PFN_DOWN(faddr);
swap_ra_info = GET_SWAP_RA_VAL(vma); swap_ra_info = GET_SWAP_RA_VAL(vma);
pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info)); pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info));
...@@ -786,32 +783,8 @@ static struct kobj_attribute vma_ra_enabled_attr = ...@@ -786,32 +783,8 @@ static struct kobj_attribute vma_ra_enabled_attr =
__ATTR(vma_ra_enabled, 0644, vma_ra_enabled_show, __ATTR(vma_ra_enabled, 0644, vma_ra_enabled_show,
vma_ra_enabled_store); vma_ra_enabled_store);
static ssize_t vma_ra_max_order_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
return sprintf(buf, "%d\n", swap_ra_max_order);
}
static ssize_t vma_ra_max_order_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
{
int err, v;
err = kstrtoint(buf, 10, &v);
if (err || v > SWAP_RA_ORDER_CEILING || v <= 0)
return -EINVAL;
swap_ra_max_order = v;
return count;
}
static struct kobj_attribute vma_ra_max_order_attr =
__ATTR(vma_ra_max_order, 0644, vma_ra_max_order_show,
vma_ra_max_order_store);
static struct attribute *swap_attrs[] = { static struct attribute *swap_attrs[] = {
&vma_ra_enabled_attr.attr, &vma_ra_enabled_attr.attr,
&vma_ra_max_order_attr.attr,
NULL, NULL,
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment