Commit 2a28713a authored by Zi Yan's avatar Zi Yan Committed by Andrew Morton

memory tiering: introduce folio_use_access_time() check

If memory tiering mode is on and a folio is not in the top tier memory,
folio's cpupid field is repurposed to store page access time.  Instead of
an open coded check, use a function to encapsulate the check.

Link: https://lkml.kernel.org/r/20240724130115.793641-3-ziy@nvidia.comSigned-off-by: default avatarZi Yan <ziy@nvidia.com>
Reviewed-by: default avatar"Huang, Ying" <ying.huang@intel.com>
Acked-by: default avatarDavid Hildenbrand <david@redhat.com>
Reviewed-by: default avatarKefeng Wang <wangkefeng.wang@huawei.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 3eb2091c
...@@ -1745,6 +1745,8 @@ static inline void vma_set_access_pid_bit(struct vm_area_struct *vma) ...@@ -1745,6 +1745,8 @@ static inline void vma_set_access_pid_bit(struct vm_area_struct *vma)
__set_bit(pid_bit, &vma->numab_state->pids_active[1]); __set_bit(pid_bit, &vma->numab_state->pids_active[1]);
} }
} }
bool folio_use_access_time(struct folio *folio);
#else /* !CONFIG_NUMA_BALANCING */ #else /* !CONFIG_NUMA_BALANCING */
static inline int folio_xchg_last_cpupid(struct folio *folio, int cpupid) static inline int folio_xchg_last_cpupid(struct folio *folio, int cpupid)
{ {
...@@ -1798,6 +1800,10 @@ static inline bool cpupid_match_pid(struct task_struct *task, int cpupid) ...@@ -1798,6 +1800,10 @@ static inline bool cpupid_match_pid(struct task_struct *task, int cpupid)
static inline void vma_set_access_pid_bit(struct vm_area_struct *vma) static inline void vma_set_access_pid_bit(struct vm_area_struct *vma)
{ {
} }
static inline bool folio_use_access_time(struct folio *folio)
{
return false;
}
#endif /* CONFIG_NUMA_BALANCING */ #endif /* CONFIG_NUMA_BALANCING */
#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) #if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS)
......
...@@ -1840,8 +1840,7 @@ bool should_numa_migrate_memory(struct task_struct *p, struct folio *folio, ...@@ -1840,8 +1840,7 @@ bool should_numa_migrate_memory(struct task_struct *p, struct folio *folio,
* The pages in slow memory node should be migrated according * The pages in slow memory node should be migrated according
* to hot/cold instead of private/shared. * to hot/cold instead of private/shared.
*/ */
if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING && if (folio_use_access_time(folio)) {
!node_is_toptier(src_nid)) {
struct pglist_data *pgdat; struct pglist_data *pgdat;
unsigned long rate_limit; unsigned long rate_limit;
unsigned int latency, th, def_th; unsigned int latency, th, def_th;
......
...@@ -1707,8 +1707,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) ...@@ -1707,8 +1707,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
* For memory tiering mode, cpupid of slow memory page is used * For memory tiering mode, cpupid of slow memory page is used
* to record page access time. So use default value. * to record page access time. So use default value.
*/ */
if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) || if (!folio_use_access_time(folio))
node_is_toptier(nid))
last_cpupid = folio_last_cpupid(folio); last_cpupid = folio_last_cpupid(folio);
target_nid = numa_migrate_prep(folio, vmf, haddr, nid, &flags); target_nid = numa_migrate_prep(folio, vmf, haddr, nid, &flags);
if (target_nid == NUMA_NO_NODE) if (target_nid == NUMA_NO_NODE)
...@@ -2058,8 +2057,7 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, ...@@ -2058,8 +2057,7 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
toptier) toptier)
goto unlock; goto unlock;
if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING && if (folio_use_access_time(folio))
!toptier)
folio_xchg_access_time(folio, folio_xchg_access_time(folio,
jiffies_to_msecs(jiffies)); jiffies_to_msecs(jiffies));
} }
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include <linux/memory.h> #include <linux/memory.h>
#include <linux/memory-tiers.h> #include <linux/memory-tiers.h>
#include <linux/notifier.h> #include <linux/notifier.h>
#include <linux/sched/sysctl.h>
#include "internal.h" #include "internal.h"
...@@ -50,6 +51,24 @@ static const struct bus_type memory_tier_subsys = { ...@@ -50,6 +51,24 @@ static const struct bus_type memory_tier_subsys = {
.dev_name = "memory_tier", .dev_name = "memory_tier",
}; };
#ifdef CONFIG_NUMA_BALANCING
/**
* folio_use_access_time - check if a folio reuses cpupid for page access time
* @folio: folio to check
*
* folio's _last_cpupid field is repurposed by memory tiering. In memory
* tiering mode, cpupid of slow memory folio (not toptier memory) is used to
* record page access time.
*
* Return: the folio _last_cpupid is used to record page access time
*/
bool folio_use_access_time(struct folio *folio)
{
return (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) &&
!node_is_toptier(folio_nid(folio));
}
#endif
#ifdef CONFIG_MIGRATION #ifdef CONFIG_MIGRATION
static int top_tier_adistance; static int top_tier_adistance;
/* /*
......
...@@ -5337,8 +5337,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) ...@@ -5337,8 +5337,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
* For memory tiering mode, cpupid of slow memory page is used * For memory tiering mode, cpupid of slow memory page is used
* to record page access time. So use default value. * to record page access time. So use default value.
*/ */
if ((sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) && if (folio_use_access_time(folio))
!node_is_toptier(nid))
last_cpupid = (-1 & LAST_CPUPID_MASK); last_cpupid = (-1 & LAST_CPUPID_MASK);
else else
last_cpupid = folio_last_cpupid(folio); last_cpupid = folio_last_cpupid(folio);
......
...@@ -161,8 +161,7 @@ static long change_pte_range(struct mmu_gather *tlb, ...@@ -161,8 +161,7 @@ static long change_pte_range(struct mmu_gather *tlb,
if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) && if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) &&
toptier) toptier)
continue; continue;
if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING && if (folio_use_access_time(folio))
!toptier)
folio_xchg_access_time(folio, folio_xchg_access_time(folio,
jiffies_to_msecs(jiffies)); jiffies_to_msecs(jiffies));
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment