Commit a62aa88b authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'mm-hotfixes-stable-2023-12-15-07-11' of...

Merge tag 'mm-hotfixes-stable-2023-12-15-07-11' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull misc fixes from Andrew Morton:
 "17 hotfixes. 8 are cc:stable and the other 9 pertain to post-6.6
  issues"

* tag 'mm-hotfixes-stable-2023-12-15-07-11' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm:
  mm/mglru: reclaim offlined memcgs harder
  mm/mglru: respect min_ttl_ms with memcgs
  mm/mglru: try to stop at high watermarks
  mm/mglru: fix underprotected page cache
  mm/shmem: fix race in shmem_undo_range w/THP
  Revert "selftests: error out if kernel header files are not yet built"
  crash_core: fix the check for whether crashkernel is from high memory
  x86, kexec: fix the wrong ifdeffery CONFIG_KEXEC
  sh, kexec: fix the incorrect ifdeffery and dependency of CONFIG_KEXEC
  mips, kexec: fix the incorrect ifdeffery and dependency of CONFIG_KEXEC
  m68k, kexec: fix the incorrect ifdeffery and build dependency of CONFIG_KEXEC
  loongarch, kexec: change dependency of object files
  mm/damon/core: make damon_start() waits until kdamond_fn() starts
  selftests/mm: cow: print ksft header before printing anything else
  mm: fix VMA heap bounds checking
  riscv: fix VMALLOC_START definition
  kexec: drop dependency on ARCH_SUPPORTS_KEXEC from CRASH_DUMP
parents 26e7a301 4376807b
......@@ -57,7 +57,7 @@ obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o
obj-$(CONFIG_RELOCATABLE) += relocate.o
obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o
......
......@@ -2,7 +2,7 @@
#ifndef _ASM_M68K_KEXEC_H
#define _ASM_M68K_KEXEC_H
#ifdef CONFIG_KEXEC
#ifdef CONFIG_KEXEC_CORE
/* Maximum physical address we can use pages from */
#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
......@@ -25,6 +25,6 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
#endif /* __ASSEMBLY__ */
#endif /* CONFIG_KEXEC */
#endif /* CONFIG_KEXEC_CORE */
#endif /* _ASM_M68K_KEXEC_H */
......@@ -25,7 +25,7 @@ obj-$(CONFIG_PCI) += pcibios.o
obj-$(CONFIG_M68K_NONCOHERENT_DMA) += dma.o
obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o
obj-$(CONFIG_BOOTINFO_PROC) += bootinfo_proc.o
obj-$(CONFIG_UBOOT) += uboot.o
......
......@@ -422,7 +422,7 @@ static const struct plat_smp_ops octeon_smp_ops = {
.cpu_disable = octeon_cpu_disable,
.cpu_die = octeon_cpu_die,
#endif
#ifdef CONFIG_KEXEC
#ifdef CONFIG_KEXEC_CORE
.kexec_nonboot_cpu = kexec_nonboot_cpu_jump,
#endif
};
......@@ -502,7 +502,7 @@ static const struct plat_smp_ops octeon_78xx_smp_ops = {
.cpu_disable = octeon_cpu_disable,
.cpu_die = octeon_cpu_die,
#endif
#ifdef CONFIG_KEXEC
#ifdef CONFIG_KEXEC_CORE
.kexec_nonboot_cpu = kexec_nonboot_cpu_jump,
#endif
};
......
......@@ -31,7 +31,7 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
prepare_frametrace(newregs);
}
#ifdef CONFIG_KEXEC
#ifdef CONFIG_KEXEC_CORE
struct kimage;
extern unsigned long kexec_args[4];
extern int (*_machine_kexec_prepare)(struct kimage *);
......
......@@ -35,7 +35,7 @@ struct plat_smp_ops {
void (*cpu_die)(unsigned int cpu);
void (*cleanup_dead_cpu)(unsigned cpu);
#endif
#ifdef CONFIG_KEXEC
#ifdef CONFIG_KEXEC_CORE
void (*kexec_nonboot_cpu)(void);
#endif
};
......
......@@ -93,7 +93,7 @@ static inline void __cpu_die(unsigned int cpu)
extern void __noreturn play_dead(void);
#endif
#ifdef CONFIG_KEXEC
#ifdef CONFIG_KEXEC_CORE
static inline void kexec_nonboot_cpu(void)
{
extern const struct plat_smp_ops *mp_ops; /* private */
......
......@@ -90,7 +90,7 @@ obj-$(CONFIG_GPIO_TXX9) += gpio_txx9.o
obj-$(CONFIG_RELOCATABLE) += relocate.o
obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_EARLY_PRINTK_8250) += early_printk_8250.o
......
......@@ -434,7 +434,7 @@ const struct plat_smp_ops bmips43xx_smp_ops = {
.cpu_disable = bmips_cpu_disable,
.cpu_die = bmips_cpu_die,
#endif
#ifdef CONFIG_KEXEC
#ifdef CONFIG_KEXEC_CORE
.kexec_nonboot_cpu = kexec_nonboot_cpu_jump,
#endif
};
......@@ -451,7 +451,7 @@ const struct plat_smp_ops bmips5000_smp_ops = {
.cpu_disable = bmips_cpu_disable,
.cpu_die = bmips_cpu_die,
#endif
#ifdef CONFIG_KEXEC
#ifdef CONFIG_KEXEC_CORE
.kexec_nonboot_cpu = kexec_nonboot_cpu_jump,
#endif
};
......
......@@ -392,7 +392,7 @@ static void cps_smp_finish(void)
local_irq_enable();
}
#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC)
#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC_CORE)
enum cpu_death {
CPU_DEATH_HALT,
......@@ -429,7 +429,7 @@ static void cps_shutdown_this_cpu(enum cpu_death death)
}
}
#ifdef CONFIG_KEXEC
#ifdef CONFIG_KEXEC_CORE
static void cps_kexec_nonboot_cpu(void)
{
......@@ -439,9 +439,9 @@ static void cps_kexec_nonboot_cpu(void)
cps_shutdown_this_cpu(CPU_DEATH_POWER);
}
#endif /* CONFIG_KEXEC */
#endif /* CONFIG_KEXEC_CORE */
#endif /* CONFIG_HOTPLUG_CPU || CONFIG_KEXEC */
#endif /* CONFIG_HOTPLUG_CPU || CONFIG_KEXEC_CORE */
#ifdef CONFIG_HOTPLUG_CPU
......@@ -610,7 +610,7 @@ static const struct plat_smp_ops cps_smp_ops = {
.cpu_die = cps_cpu_die,
.cleanup_dead_cpu = cps_cleanup_dead_cpu,
#endif
#ifdef CONFIG_KEXEC
#ifdef CONFIG_KEXEC_CORE
.kexec_nonboot_cpu = cps_kexec_nonboot_cpu,
#endif
};
......
......@@ -53,7 +53,7 @@ static void loongson_halt(void)
}
}
#ifdef CONFIG_KEXEC
#ifdef CONFIG_KEXEC_CORE
/* 0X80000000~0X80200000 is safe */
#define MAX_ARGS 64
......@@ -158,7 +158,7 @@ static int __init mips_reboot_setup(void)
_machine_halt = loongson_halt;
pm_power_off = loongson_poweroff;
#ifdef CONFIG_KEXEC
#ifdef CONFIG_KEXEC_CORE
kexec_argv = kmalloc(KEXEC_ARGV_SIZE, GFP_KERNEL);
if (WARN_ON(!kexec_argv))
return -ENOMEM;
......
......@@ -864,7 +864,7 @@ const struct plat_smp_ops loongson3_smp_ops = {
.cpu_disable = loongson3_cpu_disable,
.cpu_die = loongson3_cpu_die,
#endif
#ifdef CONFIG_KEXEC
#ifdef CONFIG_KEXEC_CORE
.kexec_nonboot_cpu = kexec_nonboot_cpu_jump,
#endif
};
......@@ -685,7 +685,7 @@ config RISCV_BOOT_SPINWAIT
If unsure what to do here, say N.
config ARCH_SUPPORTS_KEXEC
def_bool MMU
def_bool y
config ARCH_SELECTS_KEXEC
def_bool y
......@@ -693,7 +693,7 @@ config ARCH_SELECTS_KEXEC
select HOTPLUG_CPU if SMP
config ARCH_SUPPORTS_KEXEC_FILE
def_bool 64BIT && MMU
def_bool 64BIT
config ARCH_SELECTS_KEXEC_FILE
def_bool y
......
......@@ -899,7 +899,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
#define PAGE_KERNEL __pgprot(0)
#define swapper_pg_dir NULL
#define TASK_SIZE 0xffffffffUL
#define VMALLOC_START 0
#define VMALLOC_START _AC(0, UL)
#define VMALLOC_END TASK_SIZE
#endif /* !CONFIG_MMU */
......
......@@ -5,17 +5,19 @@
void arch_crash_save_vmcoreinfo(void)
{
VMCOREINFO_NUMBER(VA_BITS);
VMCOREINFO_NUMBER(phys_ram_base);
vmcoreinfo_append_str("NUMBER(PAGE_OFFSET)=0x%lx\n", PAGE_OFFSET);
vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START);
vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END);
#ifdef CONFIG_MMU
VMCOREINFO_NUMBER(VA_BITS);
vmcoreinfo_append_str("NUMBER(VMEMMAP_START)=0x%lx\n", VMEMMAP_START);
vmcoreinfo_append_str("NUMBER(VMEMMAP_END)=0x%lx\n", VMEMMAP_END);
#ifdef CONFIG_64BIT
vmcoreinfo_append_str("NUMBER(MODULES_VADDR)=0x%lx\n", MODULES_VADDR);
vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END);
#endif
#endif
vmcoreinfo_append_str("NUMBER(KERNEL_LINK_ADDR)=0x%lx\n", KERNEL_LINK_ADDR);
vmcoreinfo_append_str("NUMBER(va_kernel_pa_offset)=0x%lx\n",
......
......@@ -28,7 +28,7 @@
/* The native architecture */
#define KEXEC_ARCH KEXEC_ARCH_SH
#ifdef CONFIG_KEXEC
#ifdef CONFIG_KEXEC_CORE
/* arch/sh/kernel/machine_kexec.c */
void reserve_crashkernel(void);
......@@ -67,6 +67,6 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
}
#else
static inline void reserve_crashkernel(void) { }
#endif /* CONFIG_KEXEC */
#endif /* CONFIG_KEXEC_CORE */
#endif /* __ASM_SH_KEXEC_H */
......@@ -33,7 +33,7 @@ obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SH_STANDARD_BIOS) += sh_bios.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_MODULES) += sh_ksyms_32.o module.o
obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_IO_TRAPPED) += io_trapped.o
......
......@@ -63,7 +63,7 @@ struct machine_ops machine_ops = {
.shutdown = native_machine_shutdown,
.restart = native_machine_restart,
.halt = native_machine_halt,
#ifdef CONFIG_KEXEC
#ifdef CONFIG_KEXEC_CORE
.crash_shutdown = native_machine_crash_shutdown,
#endif
};
......@@ -88,7 +88,7 @@ void machine_halt(void)
machine_ops.halt();
}
#ifdef CONFIG_KEXEC
#ifdef CONFIG_KEXEC_CORE
void machine_crash_shutdown(struct pt_regs *regs)
{
machine_ops.crash_shutdown(regs);
......
......@@ -220,7 +220,7 @@ void __init __add_active_range(unsigned int nid, unsigned long start_pfn,
request_resource(res, &code_resource);
request_resource(res, &data_resource);
request_resource(res, &bss_resource);
#ifdef CONFIG_KEXEC
#ifdef CONFIG_KEXEC_CORE
request_resource(res, &crashk_res);
#endif
......
......@@ -178,7 +178,7 @@ static unsigned long get_cmdline_acpi_rsdp(void)
{
unsigned long addr = 0;
#ifdef CONFIG_KEXEC
#ifdef CONFIG_KEXEC_CORE
char val[MAX_ADDR_LEN] = { };
int ret;
......
......@@ -559,6 +559,8 @@ struct damon_ctx {
* update
*/
unsigned long next_ops_update_sis;
/* for waiting until the execution of the kdamond_fn is started */
struct completion kdamond_started;
/* public: */
struct task_struct *kdamond;
......
......@@ -886,8 +886,8 @@ static inline bool vma_is_anonymous(struct vm_area_struct *vma)
*/
static inline bool vma_is_initial_heap(const struct vm_area_struct *vma)
{
return vma->vm_start <= vma->vm_mm->brk &&
vma->vm_end >= vma->vm_mm->start_brk;
return vma->vm_start < vma->vm_mm->brk &&
vma->vm_end > vma->vm_mm->start_brk;
}
/*
......@@ -901,8 +901,8 @@ static inline bool vma_is_initial_stack(const struct vm_area_struct *vma)
* its "stack". It's not even well-defined for programs written
* languages like Go.
*/
return vma->vm_start <= vma->vm_mm->start_stack &&
vma->vm_end >= vma->vm_mm->start_stack;
return vma->vm_start <= vma->vm_mm->start_stack &&
vma->vm_end >= vma->vm_mm->start_stack;
}
static inline bool vma_is_temporary_stack(struct vm_area_struct *vma)
......
......@@ -232,22 +232,27 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio,
if (folio_test_unevictable(folio) || !lrugen->enabled)
return false;
/*
* There are three common cases for this page:
* 1. If it's hot, e.g., freshly faulted in or previously hot and
* migrated, add it to the youngest generation.
* 2. If it's cold but can't be evicted immediately, i.e., an anon page
* not in swapcache or a dirty page pending writeback, add it to the
* second oldest generation.
* 3. Everything else (clean, cold) is added to the oldest generation.
* There are four common cases for this page:
* 1. If it's hot, i.e., freshly faulted in, add it to the youngest
* generation, and it's protected over the rest below.
* 2. If it can't be evicted immediately, i.e., a dirty page pending
* writeback, add it to the second youngest generation.
* 3. If it should be evicted first, e.g., cold and clean from
* folio_rotate_reclaimable(), add it to the oldest generation.
* 4. Everything else falls between 2 & 3 above and is added to the
* second oldest generation if it's considered inactive, or the
* oldest generation otherwise. See lru_gen_is_active().
*/
if (folio_test_active(folio))
seq = lrugen->max_seq;
else if ((type == LRU_GEN_ANON && !folio_test_swapcache(folio)) ||
(folio_test_reclaim(folio) &&
(folio_test_dirty(folio) || folio_test_writeback(folio))))
seq = lrugen->min_seq[type] + 1;
else
seq = lrugen->max_seq - 1;
else if (reclaiming || lrugen->min_seq[type] + MIN_NR_GENS >= lrugen->max_seq)
seq = lrugen->min_seq[type];
else
seq = lrugen->min_seq[type] + 1;
gen = lru_gen_from_seq(seq);
flags = (gen + 1UL) << LRU_GEN_PGOFF;
......
......@@ -505,33 +505,37 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
* the old generation, is incremented when all its bins become empty.
*
* There are four operations:
* 1. MEMCG_LRU_HEAD, which moves an memcg to the head of a random bin in its
* 1. MEMCG_LRU_HEAD, which moves a memcg to the head of a random bin in its
* current generation (old or young) and updates its "seg" to "head";
* 2. MEMCG_LRU_TAIL, which moves an memcg to the tail of a random bin in its
* 2. MEMCG_LRU_TAIL, which moves a memcg to the tail of a random bin in its
* current generation (old or young) and updates its "seg" to "tail";
* 3. MEMCG_LRU_OLD, which moves an memcg to the head of a random bin in the old
* 3. MEMCG_LRU_OLD, which moves a memcg to the head of a random bin in the old
* generation, updates its "gen" to "old" and resets its "seg" to "default";
* 4. MEMCG_LRU_YOUNG, which moves an memcg to the tail of a random bin in the
* 4. MEMCG_LRU_YOUNG, which moves a memcg to the tail of a random bin in the
* young generation, updates its "gen" to "young" and resets its "seg" to
* "default".
*
* The events that trigger the above operations are:
* 1. Exceeding the soft limit, which triggers MEMCG_LRU_HEAD;
* 2. The first attempt to reclaim an memcg below low, which triggers
* 2. The first attempt to reclaim a memcg below low, which triggers
* MEMCG_LRU_TAIL;
* 3. The first attempt to reclaim an memcg below reclaimable size threshold,
* which triggers MEMCG_LRU_TAIL;
* 4. The second attempt to reclaim an memcg below reclaimable size threshold,
* which triggers MEMCG_LRU_YOUNG;
* 5. Attempting to reclaim an memcg below min, which triggers MEMCG_LRU_YOUNG;
* 3. The first attempt to reclaim a memcg offlined or below reclaimable size
* threshold, which triggers MEMCG_LRU_TAIL;
* 4. The second attempt to reclaim a memcg offlined or below reclaimable size
* threshold, which triggers MEMCG_LRU_YOUNG;
* 5. Attempting to reclaim a memcg below min, which triggers MEMCG_LRU_YOUNG;
* 6. Finishing the aging on the eviction path, which triggers MEMCG_LRU_YOUNG;
* 7. Offlining an memcg, which triggers MEMCG_LRU_OLD.
* 7. Offlining a memcg, which triggers MEMCG_LRU_OLD.
*
* Note that memcg LRU only applies to global reclaim, and the round-robin
* incrementing of their max_seq counters ensures the eventual fairness to all
* eligible memcgs. For memcg reclaim, it still relies on mem_cgroup_iter().
* Notes:
* 1. Memcg LRU only applies to global reclaim, and the round-robin incrementing
* of their max_seq counters ensures the eventual fairness to all eligible
* memcgs. For memcg reclaim, it still relies on mem_cgroup_iter().
* 2. There are only two valid generations: old (seq) and young (seq+1).
* MEMCG_NR_GENS is set to three so that when reading the generation counter
* locklessly, a stale value (seq-1) does not wraparound to young.
*/
#define MEMCG_NR_GENS 2
#define MEMCG_NR_GENS 3
#define MEMCG_NR_BINS 8
struct lru_gen_memcg {
......
......@@ -94,7 +94,6 @@ config KEXEC_JUMP
config CRASH_DUMP
bool "kernel crash dumps"
depends on ARCH_SUPPORTS_CRASH_DUMP
depends on ARCH_SUPPORTS_KEXEC
select CRASH_CORE
select KEXEC_CORE
help
......
......@@ -199,7 +199,7 @@ static __initdata char *suffix_tbl[] = {
* It returns 0 on success and -EINVAL on failure.
*/
static int __init parse_crashkernel_suffix(char *cmdline,
unsigned long long *crash_size,
unsigned long long *crash_size,
const char *suffix)
{
char *cur = cmdline;
......@@ -268,9 +268,9 @@ static int __init __parse_crashkernel(char *cmdline,
unsigned long long *crash_base,
const char *suffix)
{
char *first_colon, *first_space;
char *ck_cmdline;
char *name = "crashkernel=";
char *first_colon, *first_space;
char *ck_cmdline;
char *name = "crashkernel=";
BUG_ON(!crash_size || !crash_base);
*crash_size = 0;
......@@ -440,7 +440,7 @@ void __init reserve_crashkernel_generic(char *cmdline,
return;
}
if ((crash_base > CRASH_ADDR_LOW_MAX) &&
if ((crash_base >= CRASH_ADDR_LOW_MAX) &&
crash_low_size && reserve_crashkernel_low(crash_low_size)) {
memblock_phys_free(crash_base, crash_size);
return;
......
......@@ -445,6 +445,8 @@ struct damon_ctx *damon_new_ctx(void)
if (!ctx)
return NULL;
init_completion(&ctx->kdamond_started);
ctx->attrs.sample_interval = 5 * 1000;
ctx->attrs.aggr_interval = 100 * 1000;
ctx->attrs.ops_update_interval = 60 * 1000 * 1000;
......@@ -668,11 +670,14 @@ static int __damon_start(struct damon_ctx *ctx)
mutex_lock(&ctx->kdamond_lock);
if (!ctx->kdamond) {
err = 0;
reinit_completion(&ctx->kdamond_started);
ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d",
nr_running_ctxs);
if (IS_ERR(ctx->kdamond)) {
err = PTR_ERR(ctx->kdamond);
ctx->kdamond = NULL;
} else {
wait_for_completion(&ctx->kdamond_started);
}
}
mutex_unlock(&ctx->kdamond_lock);
......@@ -1433,6 +1438,7 @@ static int kdamond_fn(void *data)
pr_debug("kdamond (%d) starts\n", current->pid);
complete(&ctx->kdamond_started);
kdamond_init_intervals_sis(ctx);
if (ctx->ops.init)
......
......@@ -1080,7 +1080,24 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
}
VM_BUG_ON_FOLIO(folio_test_writeback(folio),
folio);
truncate_inode_folio(mapping, folio);
if (!folio_test_large(folio)) {
truncate_inode_folio(mapping, folio);
} else if (truncate_inode_partial_folio(folio, lstart, lend)) {
/*
* If we split a page, reset the loop so
* that we pick up the new sub pages.
* Otherwise the THP was entirely
* dropped or the target range was
* zeroed, so just continue the loop as
* is.
*/
if (!folio_test_large(folio)) {
folio_unlock(folio);
index = start;
break;
}
}
}
folio_unlock(folio);
}
......
......@@ -4089,6 +4089,9 @@ static void lru_gen_rotate_memcg(struct lruvec *lruvec, int op)
else
VM_WARN_ON_ONCE(true);
WRITE_ONCE(lruvec->lrugen.seg, seg);
WRITE_ONCE(lruvec->lrugen.gen, new);
hlist_nulls_del_rcu(&lruvec->lrugen.list);
if (op == MEMCG_LRU_HEAD || op == MEMCG_LRU_OLD)
......@@ -4099,9 +4102,6 @@ static void lru_gen_rotate_memcg(struct lruvec *lruvec, int op)
pgdat->memcg_lru.nr_memcgs[old]--;
pgdat->memcg_lru.nr_memcgs[new]++;
lruvec->lrugen.gen = new;
WRITE_ONCE(lruvec->lrugen.seg, seg);
if (!pgdat->memcg_lru.nr_memcgs[old] && old == get_memcg_gen(pgdat->memcg_lru.seq))
WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1);
......@@ -4124,11 +4124,11 @@ void lru_gen_online_memcg(struct mem_cgroup *memcg)
gen = get_memcg_gen(pgdat->memcg_lru.seq);
lruvec->lrugen.gen = gen;
hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[gen][bin]);
pgdat->memcg_lru.nr_memcgs[gen]++;
lruvec->lrugen.gen = gen;
spin_unlock_irq(&pgdat->memcg_lru.lock);
}
}
......@@ -4232,7 +4232,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c
}
/* protected */
if (tier > tier_idx) {
if (tier > tier_idx || refs == BIT(LRU_REFS_WIDTH)) {
int hist = lru_hist_from_seq(lrugen->min_seq[type]);
gen = folio_inc_gen(lruvec, folio, false);
......@@ -4598,7 +4598,12 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq,
}
/* try to scrape all its memory if this memcg was deleted */
*nr_to_scan = mem_cgroup_online(memcg) ? (total >> sc->priority) : total;
if (!mem_cgroup_online(memcg)) {
*nr_to_scan = total;
return false;
}
*nr_to_scan = total >> sc->priority;
/*
* The aging tries to be lazy to reduce the overhead, while the eviction
......@@ -4635,7 +4640,7 @@ static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool
DEFINE_MAX_SEQ(lruvec);
if (mem_cgroup_below_min(sc->target_mem_cgroup, memcg))
return 0;
return -1;
if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan))
return nr_to_scan;
......@@ -4648,20 +4653,41 @@ static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool
return try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false) ? -1 : 0;
}
static unsigned long get_nr_to_reclaim(struct scan_control *sc)
static bool should_abort_scan(struct lruvec *lruvec, struct scan_control *sc)
{
int i;
enum zone_watermarks mark;
/* don't abort memcg reclaim to ensure fairness */
if (!root_reclaim(sc))
return -1;
return false;
if (sc->nr_reclaimed >= max(sc->nr_to_reclaim, compact_gap(sc->order)))
return true;
/* check the order to exclude compaction-induced reclaim */
if (!current_is_kswapd() || sc->order)
return false;
mark = sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING ?
WMARK_PROMO : WMARK_HIGH;
for (i = 0; i <= sc->reclaim_idx; i++) {
struct zone *zone = lruvec_pgdat(lruvec)->node_zones + i;
unsigned long size = wmark_pages(zone, mark) + MIN_LRU_BATCH;
if (managed_zone(zone) && !zone_watermark_ok(zone, 0, size, sc->reclaim_idx, 0))
return false;
}
return max(sc->nr_to_reclaim, compact_gap(sc->order));
/* kswapd should abort if all eligible zones are safe */
return true;
}
static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
{
long nr_to_scan;
unsigned long scanned = 0;
unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
int swappiness = get_swappiness(lruvec, sc);
/* clean file folios are more likely to exist */
......@@ -4683,13 +4709,13 @@ static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
if (scanned >= nr_to_scan)
break;
if (sc->nr_reclaimed >= nr_to_reclaim)
if (should_abort_scan(lruvec, sc))
break;
cond_resched();
}
/* whether try_to_inc_max_seq() was successful */
/* whether this lruvec should be rotated */
return nr_to_scan < 0;
}
......@@ -4698,14 +4724,9 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc)
bool success;
unsigned long scanned = sc->nr_scanned;
unsigned long reclaimed = sc->nr_reclaimed;
int seg = lru_gen_memcg_seg(lruvec);
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
/* see the comment on MEMCG_NR_GENS */
if (!lruvec_is_sizable(lruvec, sc))
return seg != MEMCG_LRU_TAIL ? MEMCG_LRU_TAIL : MEMCG_LRU_YOUNG;
mem_cgroup_calculate_protection(NULL, memcg);
if (mem_cgroup_below_min(NULL, memcg))
......@@ -4713,7 +4734,7 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc)
if (mem_cgroup_below_low(NULL, memcg)) {
/* see the comment on MEMCG_NR_GENS */
if (seg != MEMCG_LRU_TAIL)
if (lru_gen_memcg_seg(lruvec) != MEMCG_LRU_TAIL)
return MEMCG_LRU_TAIL;
memcg_memory_event(memcg, MEMCG_LOW);
......@@ -4729,7 +4750,15 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc)
flush_reclaim_state(sc);
return success ? MEMCG_LRU_YOUNG : 0;
if (success && mem_cgroup_online(memcg))
return MEMCG_LRU_YOUNG;
if (!success && lruvec_is_sizable(lruvec, sc))
return 0;
/* one retry if offlined or too small */
return lru_gen_memcg_seg(lruvec) != MEMCG_LRU_TAIL ?
MEMCG_LRU_TAIL : MEMCG_LRU_YOUNG;
}
#ifdef CONFIG_MEMCG
......@@ -4743,14 +4772,13 @@ static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
struct lruvec *lruvec;
struct lru_gen_folio *lrugen;
struct mem_cgroup *memcg;
const struct hlist_nulls_node *pos;
unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
struct hlist_nulls_node *pos;
gen = get_memcg_gen(READ_ONCE(pgdat->memcg_lru.seq));
bin = first_bin = get_random_u32_below(MEMCG_NR_BINS);
restart:
op = 0;
memcg = NULL;
gen = get_memcg_gen(READ_ONCE(pgdat->memcg_lru.seq));
rcu_read_lock();
......@@ -4761,6 +4789,10 @@ static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
}
mem_cgroup_put(memcg);
memcg = NULL;
if (gen != READ_ONCE(lrugen->gen))
continue;
lruvec = container_of(lrugen, struct lruvec, lrugen);
memcg = lruvec_memcg(lruvec);
......@@ -4777,7 +4809,7 @@ static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
rcu_read_lock();
if (sc->nr_reclaimed >= nr_to_reclaim)
if (should_abort_scan(lruvec, sc))
break;
}
......@@ -4788,7 +4820,7 @@ static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
mem_cgroup_put(memcg);
if (sc->nr_reclaimed >= nr_to_reclaim)
if (!is_a_nulls(pos))
return;
/* restart if raced with lru_gen_rotate_memcg() */
......@@ -4845,16 +4877,14 @@ static void set_initial_priority(struct pglist_data *pgdat, struct scan_control
if (sc->priority != DEF_PRIORITY || sc->nr_to_reclaim < MIN_LRU_BATCH)
return;
/*
* Determine the initial priority based on ((total / MEMCG_NR_GENS) >>
* priority) * reclaimed_to_scanned_ratio = nr_to_reclaim, where the
* estimated reclaimed_to_scanned_ratio = inactive / total.
* Determine the initial priority based on
* (total >> priority) * reclaimed_to_scanned_ratio = nr_to_reclaim,
* where reclaimed_to_scanned_ratio = inactive / total.
*/
reclaimable = node_page_state(pgdat, NR_INACTIVE_FILE);
if (get_swappiness(lruvec, sc))
reclaimable += node_page_state(pgdat, NR_INACTIVE_ANON);
reclaimable /= MEMCG_NR_GENS;
/* round down reclaimable and round up sc->nr_to_reclaim */
priority = fls_long(reclaimable) - 1 - fls_long(sc->nr_to_reclaim - 1);
......
......@@ -313,10 +313,10 @@ static void lru_gen_refault(struct folio *folio, void *shadow)
* 1. For pages accessed through page tables, hotter pages pushed out
* hot pages which refaulted immediately.
* 2. For pages accessed multiple times through file descriptors,
* numbers of accesses might have been out of the range.
* they would have been protected by sort_folio().
*/
if (lru_gen_in_fault() || refs == BIT(LRU_REFS_WIDTH)) {
folio_set_workingset(folio);
if (lru_gen_in_fault() || refs >= BIT(LRU_REFS_WIDTH) - 1) {
set_mask_bits(&folio->flags, 0, LRU_REFS_MASK | BIT(PG_workingset));
mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + type, delta);
}
unlock:
......
......@@ -155,12 +155,10 @@ ifneq ($(KBUILD_OUTPUT),)
abs_objtree := $(realpath $(abs_objtree))
BUILD := $(abs_objtree)/kselftest
KHDR_INCLUDES := -isystem ${abs_objtree}/usr/include
KHDR_DIR := ${abs_objtree}/usr/include
else
BUILD := $(CURDIR)
abs_srctree := $(shell cd $(top_srcdir) && pwd)
KHDR_INCLUDES := -isystem ${abs_srctree}/usr/include
KHDR_DIR := ${abs_srctree}/usr/include
DEFAULT_INSTALL_HDR_PATH := 1
endif
......@@ -174,7 +172,7 @@ export KHDR_INCLUDES
# all isn't the first target in the file.
.DEFAULT_GOAL := all
all: kernel_header_files
all:
@ret=1; \
for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
......@@ -185,23 +183,6 @@ all: kernel_header_files
ret=$$((ret * $$?)); \
done; exit $$ret;
kernel_header_files:
@ls $(KHDR_DIR)/linux/*.h >/dev/null 2>/dev/null; \
if [ $$? -ne 0 ]; then \
RED='\033[1;31m'; \
NOCOLOR='\033[0m'; \
echo; \
echo -e "$${RED}error$${NOCOLOR}: missing kernel header files."; \
echo "Please run this and try again:"; \
echo; \
echo " cd $(top_srcdir)"; \
echo " make headers"; \
echo; \
exit 1; \
fi
.PHONY: kernel_header_files
run_tests: all
@for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
......
......@@ -44,26 +44,10 @@ endif
selfdir = $(realpath $(dir $(filter %/lib.mk,$(MAKEFILE_LIST))))
top_srcdir = $(selfdir)/../../..
ifeq ("$(origin O)", "command line")
KBUILD_OUTPUT := $(O)
ifeq ($(KHDR_INCLUDES),)
KHDR_INCLUDES := -isystem $(top_srcdir)/usr/include
endif
ifneq ($(KBUILD_OUTPUT),)
# Make's built-in functions such as $(abspath ...), $(realpath ...) cannot
# expand a shell special character '~'. We use a somewhat tedious way here.
abs_objtree := $(shell cd $(top_srcdir) && mkdir -p $(KBUILD_OUTPUT) && cd $(KBUILD_OUTPUT) && pwd)
$(if $(abs_objtree),, \
$(error failed to create output directory "$(KBUILD_OUTPUT)"))
# $(realpath ...) resolves symlinks
abs_objtree := $(realpath $(abs_objtree))
KHDR_DIR := ${abs_objtree}/usr/include
else
abs_srctree := $(shell cd $(top_srcdir) && pwd)
KHDR_DIR := ${abs_srctree}/usr/include
endif
KHDR_INCLUDES := -isystem $(KHDR_DIR)
# The following are built by lib.mk common compile rules.
# TEST_CUSTOM_PROGS should be used by tests that require
# custom build rule and prevent common build rule use.
......@@ -74,25 +58,7 @@ TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS))
TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED))
TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
all: kernel_header_files $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) \
$(TEST_GEN_FILES)
kernel_header_files:
@ls $(KHDR_DIR)/linux/*.h >/dev/null 2>/dev/null; \
if [ $$? -ne 0 ]; then \
RED='\033[1;31m'; \
NOCOLOR='\033[0m'; \
echo; \
echo -e "$${RED}error$${NOCOLOR}: missing kernel header files."; \
echo "Please run this and try again:"; \
echo; \
echo " cd $(top_srcdir)"; \
echo " make headers"; \
echo; \
exit 1; \
fi
.PHONY: kernel_header_files
all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
define RUN_TESTS
BASE_DIR="$(selfdir)"; \
......
......@@ -1680,6 +1680,8 @@ int main(int argc, char **argv)
{
int err;
ksft_print_header();
pagesize = getpagesize();
thpsize = read_pmd_pagesize();
if (thpsize)
......@@ -1689,7 +1691,6 @@ int main(int argc, char **argv)
ARRAY_SIZE(hugetlbsizes));
detect_huge_zeropage();
ksft_print_header();
ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() +
ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() +
ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case());
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment