Commit 9917ff5f authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
 "5 patches.

  Subsystems affected by this patch series: binfmt, procfs, and mm
  (vmscan, memcg, and kfence)"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  kfence: make test case compatible with run time set sample interval
  mm: memcg: synchronize objcg lists with a dedicated spinlock
  mm: vmscan: remove deadlock due to throttling failing to make progress
  fs/proc: task_mmu.c: don't read mapcount for migration entry
  fs/binfmt_elf: fix PT_LOAD p_align values for loaders
parents 83e39664 8913c610
...@@ -1117,7 +1117,7 @@ static int load_elf_binary(struct linux_binprm *bprm) ...@@ -1117,7 +1117,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
* without MAP_FIXED nor MAP_FIXED_NOREPLACE). * without MAP_FIXED nor MAP_FIXED_NOREPLACE).
*/ */
alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum); alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
if (alignment > ELF_MIN_ALIGN) { if (interpreter || alignment > ELF_MIN_ALIGN) {
load_bias = ELF_ET_DYN_BASE; load_bias = ELF_ET_DYN_BASE;
if (current->flags & PF_RANDOMIZE) if (current->flags & PF_RANDOMIZE)
load_bias += arch_mmap_rnd(); load_bias += arch_mmap_rnd();
......
...@@ -440,7 +440,8 @@ static void smaps_page_accumulate(struct mem_size_stats *mss, ...@@ -440,7 +440,8 @@ static void smaps_page_accumulate(struct mem_size_stats *mss,
} }
static void smaps_account(struct mem_size_stats *mss, struct page *page, static void smaps_account(struct mem_size_stats *mss, struct page *page,
bool compound, bool young, bool dirty, bool locked) bool compound, bool young, bool dirty, bool locked,
bool migration)
{ {
int i, nr = compound ? compound_nr(page) : 1; int i, nr = compound ? compound_nr(page) : 1;
unsigned long size = nr * PAGE_SIZE; unsigned long size = nr * PAGE_SIZE;
...@@ -467,8 +468,15 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page, ...@@ -467,8 +468,15 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
* page_count(page) == 1 guarantees the page is mapped exactly once. * page_count(page) == 1 guarantees the page is mapped exactly once.
* If any subpage of the compound page mapped with PTE it would elevate * If any subpage of the compound page mapped with PTE it would elevate
* page_count(). * page_count().
*
* The page_mapcount() is called to get a snapshot of the mapcount.
* Without holding the page lock this snapshot can be slightly wrong as
* we cannot always read the mapcount atomically. It is not safe to
* call page_mapcount() even with PTL held if the page is not mapped,
* especially for migration entries. Treat regular migration entries
* as mapcount == 1.
*/ */
if (page_count(page) == 1) { if ((page_count(page) == 1) || migration) {
smaps_page_accumulate(mss, page, size, size << PSS_SHIFT, dirty, smaps_page_accumulate(mss, page, size, size << PSS_SHIFT, dirty,
locked, true); locked, true);
return; return;
...@@ -517,6 +525,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, ...@@ -517,6 +525,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
struct vm_area_struct *vma = walk->vma; struct vm_area_struct *vma = walk->vma;
bool locked = !!(vma->vm_flags & VM_LOCKED); bool locked = !!(vma->vm_flags & VM_LOCKED);
struct page *page = NULL; struct page *page = NULL;
bool migration = false;
if (pte_present(*pte)) { if (pte_present(*pte)) {
page = vm_normal_page(vma, addr, *pte); page = vm_normal_page(vma, addr, *pte);
...@@ -536,8 +545,11 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, ...@@ -536,8 +545,11 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
} else { } else {
mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT; mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT;
} }
} else if (is_pfn_swap_entry(swpent)) } else if (is_pfn_swap_entry(swpent)) {
if (is_migration_entry(swpent))
migration = true;
page = pfn_swap_entry_to_page(swpent); page = pfn_swap_entry_to_page(swpent);
}
} else { } else {
smaps_pte_hole_lookup(addr, walk); smaps_pte_hole_lookup(addr, walk);
return; return;
...@@ -546,7 +558,8 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, ...@@ -546,7 +558,8 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
if (!page) if (!page)
return; return;
smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), locked); smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte),
locked, migration);
} }
#ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifdef CONFIG_TRANSPARENT_HUGEPAGE
...@@ -557,6 +570,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, ...@@ -557,6 +570,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
struct vm_area_struct *vma = walk->vma; struct vm_area_struct *vma = walk->vma;
bool locked = !!(vma->vm_flags & VM_LOCKED); bool locked = !!(vma->vm_flags & VM_LOCKED);
struct page *page = NULL; struct page *page = NULL;
bool migration = false;
if (pmd_present(*pmd)) { if (pmd_present(*pmd)) {
/* FOLL_DUMP will return -EFAULT on huge zero page */ /* FOLL_DUMP will return -EFAULT on huge zero page */
...@@ -564,8 +578,10 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, ...@@ -564,8 +578,10 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
} else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) { } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) {
swp_entry_t entry = pmd_to_swp_entry(*pmd); swp_entry_t entry = pmd_to_swp_entry(*pmd);
if (is_migration_entry(entry)) if (is_migration_entry(entry)) {
migration = true;
page = pfn_swap_entry_to_page(entry); page = pfn_swap_entry_to_page(entry);
}
} }
if (IS_ERR_OR_NULL(page)) if (IS_ERR_OR_NULL(page))
return; return;
...@@ -577,7 +593,9 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, ...@@ -577,7 +593,9 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
/* pass */; /* pass */;
else else
mss->file_thp += HPAGE_PMD_SIZE; mss->file_thp += HPAGE_PMD_SIZE;
smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked);
smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd),
locked, migration);
} }
#else #else
static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
...@@ -1378,6 +1396,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, ...@@ -1378,6 +1396,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
{ {
u64 frame = 0, flags = 0; u64 frame = 0, flags = 0;
struct page *page = NULL; struct page *page = NULL;
bool migration = false;
if (pte_present(pte)) { if (pte_present(pte)) {
if (pm->show_pfn) if (pm->show_pfn)
...@@ -1399,13 +1418,14 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, ...@@ -1399,13 +1418,14 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
frame = swp_type(entry) | frame = swp_type(entry) |
(swp_offset(entry) << MAX_SWAPFILES_SHIFT); (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
flags |= PM_SWAP; flags |= PM_SWAP;
migration = is_migration_entry(entry);
if (is_pfn_swap_entry(entry)) if (is_pfn_swap_entry(entry))
page = pfn_swap_entry_to_page(entry); page = pfn_swap_entry_to_page(entry);
} }
if (page && !PageAnon(page)) if (page && !PageAnon(page))
flags |= PM_FILE; flags |= PM_FILE;
if (page && page_mapcount(page) == 1) if (page && !migration && page_mapcount(page) == 1)
flags |= PM_MMAP_EXCLUSIVE; flags |= PM_MMAP_EXCLUSIVE;
if (vma->vm_flags & VM_SOFTDIRTY) if (vma->vm_flags & VM_SOFTDIRTY)
flags |= PM_SOFT_DIRTY; flags |= PM_SOFT_DIRTY;
...@@ -1421,8 +1441,9 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, ...@@ -1421,8 +1441,9 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
spinlock_t *ptl; spinlock_t *ptl;
pte_t *pte, *orig_pte; pte_t *pte, *orig_pte;
int err = 0; int err = 0;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifdef CONFIG_TRANSPARENT_HUGEPAGE
bool migration = false;
ptl = pmd_trans_huge_lock(pmdp, vma); ptl = pmd_trans_huge_lock(pmdp, vma);
if (ptl) { if (ptl) {
u64 flags = 0, frame = 0; u64 flags = 0, frame = 0;
...@@ -1461,11 +1482,12 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, ...@@ -1461,11 +1482,12 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
if (pmd_swp_uffd_wp(pmd)) if (pmd_swp_uffd_wp(pmd))
flags |= PM_UFFD_WP; flags |= PM_UFFD_WP;
VM_BUG_ON(!is_pmd_migration_entry(pmd)); VM_BUG_ON(!is_pmd_migration_entry(pmd));
migration = is_migration_entry(entry);
page = pfn_swap_entry_to_page(entry); page = pfn_swap_entry_to_page(entry);
} }
#endif #endif
if (page && page_mapcount(page) == 1) if (page && !migration && page_mapcount(page) == 1)
flags |= PM_MMAP_EXCLUSIVE; flags |= PM_MMAP_EXCLUSIVE;
for (; addr != end; addr += PAGE_SIZE) { for (; addr != end; addr += PAGE_SIZE) {
......
...@@ -17,6 +17,8 @@ ...@@ -17,6 +17,8 @@
#include <linux/atomic.h> #include <linux/atomic.h>
#include <linux/static_key.h> #include <linux/static_key.h>
extern unsigned long kfence_sample_interval;
/* /*
* We allocate an even number of pages, as it simplifies calculations to map * We allocate an even number of pages, as it simplifies calculations to map
* address to metadata indices; effectively, the very first page serves as an * address to metadata indices; effectively, the very first page serves as an
......
...@@ -219,7 +219,7 @@ struct obj_cgroup { ...@@ -219,7 +219,7 @@ struct obj_cgroup {
struct mem_cgroup *memcg; struct mem_cgroup *memcg;
atomic_t nr_charged_bytes; atomic_t nr_charged_bytes;
union { union {
struct list_head list; struct list_head list; /* protected by objcg_lock */
struct rcu_head rcu; struct rcu_head rcu;
}; };
}; };
...@@ -315,7 +315,8 @@ struct mem_cgroup { ...@@ -315,7 +315,8 @@ struct mem_cgroup {
#ifdef CONFIG_MEMCG_KMEM #ifdef CONFIG_MEMCG_KMEM
int kmemcg_id; int kmemcg_id;
struct obj_cgroup __rcu *objcg; struct obj_cgroup __rcu *objcg;
struct list_head objcg_list; /* list of inherited objcgs */ /* list of inherited objcgs, protected by objcg_lock */
struct list_head objcg_list;
#endif #endif
MEMCG_PADDING(_pad2_); MEMCG_PADDING(_pad2_);
......
...@@ -47,7 +47,8 @@ ...@@ -47,7 +47,8 @@
static bool kfence_enabled __read_mostly; static bool kfence_enabled __read_mostly;
static unsigned long kfence_sample_interval __read_mostly = CONFIG_KFENCE_SAMPLE_INTERVAL; unsigned long kfence_sample_interval __read_mostly = CONFIG_KFENCE_SAMPLE_INTERVAL;
EXPORT_SYMBOL_GPL(kfence_sample_interval); /* Export for test modules. */
#ifdef MODULE_PARAM_PREFIX #ifdef MODULE_PARAM_PREFIX
#undef MODULE_PARAM_PREFIX #undef MODULE_PARAM_PREFIX
......
...@@ -268,13 +268,13 @@ static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocat ...@@ -268,13 +268,13 @@ static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocat
* 100x the sample interval should be more than enough to ensure we get * 100x the sample interval should be more than enough to ensure we get
* a KFENCE allocation eventually. * a KFENCE allocation eventually.
*/ */
timeout = jiffies + msecs_to_jiffies(100 * CONFIG_KFENCE_SAMPLE_INTERVAL); timeout = jiffies + msecs_to_jiffies(100 * kfence_sample_interval);
/* /*
* Especially for non-preemption kernels, ensure the allocation-gate * Especially for non-preemption kernels, ensure the allocation-gate
* timer can catch up: after @resched_after, every failed allocation * timer can catch up: after @resched_after, every failed allocation
* attempt yields, to ensure the allocation-gate timer is scheduled. * attempt yields, to ensure the allocation-gate timer is scheduled.
*/ */
resched_after = jiffies + msecs_to_jiffies(CONFIG_KFENCE_SAMPLE_INTERVAL); resched_after = jiffies + msecs_to_jiffies(kfence_sample_interval);
do { do {
if (test_cache) if (test_cache)
alloc = kmem_cache_alloc(test_cache, gfp); alloc = kmem_cache_alloc(test_cache, gfp);
...@@ -608,7 +608,7 @@ static void test_gfpzero(struct kunit *test) ...@@ -608,7 +608,7 @@ static void test_gfpzero(struct kunit *test)
int i; int i;
/* Skip if we think it'd take too long. */ /* Skip if we think it'd take too long. */
KFENCE_TEST_REQUIRES(test, CONFIG_KFENCE_SAMPLE_INTERVAL <= 100); KFENCE_TEST_REQUIRES(test, kfence_sample_interval <= 100);
setup_test_cache(test, size, 0, NULL); setup_test_cache(test, size, 0, NULL);
buf1 = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY); buf1 = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
...@@ -739,7 +739,7 @@ static void test_memcache_alloc_bulk(struct kunit *test) ...@@ -739,7 +739,7 @@ static void test_memcache_alloc_bulk(struct kunit *test)
* 100x the sample interval should be more than enough to ensure we get * 100x the sample interval should be more than enough to ensure we get
* a KFENCE allocation eventually. * a KFENCE allocation eventually.
*/ */
timeout = jiffies + msecs_to_jiffies(100 * CONFIG_KFENCE_SAMPLE_INTERVAL); timeout = jiffies + msecs_to_jiffies(100 * kfence_sample_interval);
do { do {
void *objects[100]; void *objects[100];
int i, num = kmem_cache_alloc_bulk(test_cache, GFP_ATOMIC, ARRAY_SIZE(objects), int i, num = kmem_cache_alloc_bulk(test_cache, GFP_ATOMIC, ARRAY_SIZE(objects),
......
...@@ -254,7 +254,7 @@ struct mem_cgroup *vmpressure_to_memcg(struct vmpressure *vmpr) ...@@ -254,7 +254,7 @@ struct mem_cgroup *vmpressure_to_memcg(struct vmpressure *vmpr)
} }
#ifdef CONFIG_MEMCG_KMEM #ifdef CONFIG_MEMCG_KMEM
extern spinlock_t css_set_lock; static DEFINE_SPINLOCK(objcg_lock);
bool mem_cgroup_kmem_disabled(void) bool mem_cgroup_kmem_disabled(void)
{ {
...@@ -298,9 +298,9 @@ static void obj_cgroup_release(struct percpu_ref *ref) ...@@ -298,9 +298,9 @@ static void obj_cgroup_release(struct percpu_ref *ref)
if (nr_pages) if (nr_pages)
obj_cgroup_uncharge_pages(objcg, nr_pages); obj_cgroup_uncharge_pages(objcg, nr_pages);
spin_lock_irqsave(&css_set_lock, flags); spin_lock_irqsave(&objcg_lock, flags);
list_del(&objcg->list); list_del(&objcg->list);
spin_unlock_irqrestore(&css_set_lock, flags); spin_unlock_irqrestore(&objcg_lock, flags);
percpu_ref_exit(ref); percpu_ref_exit(ref);
kfree_rcu(objcg, rcu); kfree_rcu(objcg, rcu);
...@@ -332,7 +332,7 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg, ...@@ -332,7 +332,7 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg,
objcg = rcu_replace_pointer(memcg->objcg, NULL, true); objcg = rcu_replace_pointer(memcg->objcg, NULL, true);
spin_lock_irq(&css_set_lock); spin_lock_irq(&objcg_lock);
/* 1) Ready to reparent active objcg. */ /* 1) Ready to reparent active objcg. */
list_add(&objcg->list, &memcg->objcg_list); list_add(&objcg->list, &memcg->objcg_list);
...@@ -342,7 +342,7 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg, ...@@ -342,7 +342,7 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg,
/* 3) Move already reparented objcgs to the parent's list */ /* 3) Move already reparented objcgs to the parent's list */
list_splice(&memcg->objcg_list, &parent->objcg_list); list_splice(&memcg->objcg_list, &parent->objcg_list);
spin_unlock_irq(&css_set_lock); spin_unlock_irq(&objcg_lock);
percpu_ref_kill(&objcg->refcnt); percpu_ref_kill(&objcg->refcnt);
} }
......
...@@ -1066,8 +1066,10 @@ void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason) ...@@ -1066,8 +1066,10 @@ void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason)
* forward progress (e.g. journalling workqueues or kthreads). * forward progress (e.g. journalling workqueues or kthreads).
*/ */
if (!current_is_kswapd() && if (!current_is_kswapd() &&
current->flags & (PF_IO_WORKER|PF_KTHREAD)) current->flags & (PF_IO_WORKER|PF_KTHREAD)) {
cond_resched();
return; return;
}
/* /*
* These figures are pulled out of thin air. * These figures are pulled out of thin air.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment