Commit ed3bad2e authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
 "10 patches.

  Subsystems affected by this patch series: MAINTAINERS and mm (shmem,
  pagealloc, tracing, memcg, memory-failure, vmscan, kfence, and
  hugetlb)"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  hugetlb: don't pass page cache pages to restore_reserve_on_error
  kfence: fix is_kfence_address() for addresses below KFENCE_POOL_SIZE
  mm: vmscan: fix missing psi annotation for node_reclaim()
  mm/hwpoison: retry with shake_page() for unhandlable pages
  mm: memcontrol: fix occasional OOMs due to proportional memory.low reclaim
  MAINTAINERS: update ClangBuiltLinux IRC chat
  mmflags.h: add missing __GFP_ZEROTAGS and __GFP_SKIP_KASAN_POISON names
  mm/page_alloc: don't corrupt pcppage_migratetype
  Revert "mm: swap: check if swap backing device is congested or not"
  Revert "mm/shmem: fix shmem_swapin() race with swapoff"
parents 8ba9fbe1 c7b1850d
...@@ -4508,7 +4508,7 @@ L: clang-built-linux@googlegroups.com ...@@ -4508,7 +4508,7 @@ L: clang-built-linux@googlegroups.com
S: Supported S: Supported
W: https://clangbuiltlinux.github.io/ W: https://clangbuiltlinux.github.io/
B: https://github.com/ClangBuiltLinux/linux/issues B: https://github.com/ClangBuiltLinux/linux/issues
C: irc://chat.freenode.net/clangbuiltlinux C: irc://irc.libera.chat/clangbuiltlinux
F: Documentation/kbuild/llvm.rst F: Documentation/kbuild/llvm.rst
F: include/linux/compiler-clang.h F: include/linux/compiler-clang.h
F: scripts/clang-tools/ F: scripts/clang-tools/
......
...@@ -51,10 +51,11 @@ extern atomic_t kfence_allocation_gate; ...@@ -51,10 +51,11 @@ extern atomic_t kfence_allocation_gate;
static __always_inline bool is_kfence_address(const void *addr) static __always_inline bool is_kfence_address(const void *addr)
{ {
/* /*
* The non-NULL check is required in case the __kfence_pool pointer was * The __kfence_pool != NULL check is required to deal with the case
* never initialized; keep it in the slow-path after the range-check. * where __kfence_pool == NULL && addr < KFENCE_POOL_SIZE. Keep it in
* the slow-path after the range-check!
*/ */
return unlikely((unsigned long)((char *)addr - __kfence_pool) < KFENCE_POOL_SIZE && addr); return unlikely((unsigned long)((char *)addr - __kfence_pool) < KFENCE_POOL_SIZE && __kfence_pool);
} }
/** /**
......
...@@ -612,12 +612,15 @@ static inline bool mem_cgroup_disabled(void) ...@@ -612,12 +612,15 @@ static inline bool mem_cgroup_disabled(void)
return !cgroup_subsys_enabled(memory_cgrp_subsys); return !cgroup_subsys_enabled(memory_cgrp_subsys);
} }
static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root, static inline void mem_cgroup_protection(struct mem_cgroup *root,
struct mem_cgroup *memcg, struct mem_cgroup *memcg,
bool in_low_reclaim) unsigned long *min,
unsigned long *low)
{ {
*min = *low = 0;
if (mem_cgroup_disabled()) if (mem_cgroup_disabled())
return 0; return;
/* /*
* There is no reclaim protection applied to a targeted reclaim. * There is no reclaim protection applied to a targeted reclaim.
...@@ -653,13 +656,10 @@ static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root, ...@@ -653,13 +656,10 @@ static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root,
* *
*/ */
if (root == memcg) if (root == memcg)
return 0; return;
if (in_low_reclaim)
return READ_ONCE(memcg->memory.emin);
return max(READ_ONCE(memcg->memory.emin), *min = READ_ONCE(memcg->memory.emin);
READ_ONCE(memcg->memory.elow)); *low = READ_ONCE(memcg->memory.elow);
} }
void mem_cgroup_calculate_protection(struct mem_cgroup *root, void mem_cgroup_calculate_protection(struct mem_cgroup *root,
...@@ -1147,11 +1147,12 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm, ...@@ -1147,11 +1147,12 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,
{ {
} }
static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root, static inline void mem_cgroup_protection(struct mem_cgroup *root,
struct mem_cgroup *memcg, struct mem_cgroup *memcg,
bool in_low_reclaim) unsigned long *min,
unsigned long *low)
{ {
return 0; *min = *low = 0;
} }
static inline void mem_cgroup_calculate_protection(struct mem_cgroup *root, static inline void mem_cgroup_calculate_protection(struct mem_cgroup *root,
......
...@@ -48,7 +48,9 @@ ...@@ -48,7 +48,9 @@
{(unsigned long)__GFP_WRITE, "__GFP_WRITE"}, \ {(unsigned long)__GFP_WRITE, "__GFP_WRITE"}, \
{(unsigned long)__GFP_RECLAIM, "__GFP_RECLAIM"}, \ {(unsigned long)__GFP_RECLAIM, "__GFP_RECLAIM"}, \
{(unsigned long)__GFP_DIRECT_RECLAIM, "__GFP_DIRECT_RECLAIM"},\ {(unsigned long)__GFP_DIRECT_RECLAIM, "__GFP_DIRECT_RECLAIM"},\
{(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"}\ {(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"},\
{(unsigned long)__GFP_ZEROTAGS, "__GFP_ZEROTAGS"}, \
{(unsigned long)__GFP_SKIP_KASAN_POISON,"__GFP_SKIP_KASAN_POISON"}\
#define show_gfp_flags(flags) \ #define show_gfp_flags(flags) \
(flags) ? __print_flags(flags, "|", \ (flags) ? __print_flags(flags, "|", \
......
...@@ -2476,7 +2476,7 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, ...@@ -2476,7 +2476,7 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
if (!rc) { if (!rc) {
/* /*
* This indicates there is an entry in the reserve map * This indicates there is an entry in the reserve map
* added by alloc_huge_page. We know it was added * not added by alloc_huge_page. We know it was added
* before the alloc_huge_page call, otherwise * before the alloc_huge_page call, otherwise
* HPageRestoreReserve would be set on the page. * HPageRestoreReserve would be set on the page.
* Remove the entry so that a subsequent allocation * Remove the entry so that a subsequent allocation
...@@ -4660,6 +4660,8 @@ static vm_fault_t hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -4660,6 +4660,8 @@ static vm_fault_t hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
spin_unlock(ptl); spin_unlock(ptl);
mmu_notifier_invalidate_range_end(&range); mmu_notifier_invalidate_range_end(&range);
out_release_all: out_release_all:
/* No restore in case of successful pagetable update (Break COW) */
if (new_page != old_page)
restore_reserve_on_error(h, vma, haddr, new_page); restore_reserve_on_error(h, vma, haddr, new_page);
put_page(new_page); put_page(new_page);
out_release_old: out_release_old:
...@@ -4776,7 +4778,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, ...@@ -4776,7 +4778,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
pte_t new_pte; pte_t new_pte;
spinlock_t *ptl; spinlock_t *ptl;
unsigned long haddr = address & huge_page_mask(h); unsigned long haddr = address & huge_page_mask(h);
bool new_page = false; bool new_page, new_pagecache_page = false;
/* /*
* Currently, we are forced to kill the process in the event the * Currently, we are forced to kill the process in the event the
...@@ -4799,6 +4801,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, ...@@ -4799,6 +4801,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
goto out; goto out;
retry: retry:
new_page = false;
page = find_lock_page(mapping, idx); page = find_lock_page(mapping, idx);
if (!page) { if (!page) {
/* Check for page in userfault range */ /* Check for page in userfault range */
...@@ -4842,6 +4845,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, ...@@ -4842,6 +4845,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
goto retry; goto retry;
goto out; goto out;
} }
new_pagecache_page = true;
} else { } else {
lock_page(page); lock_page(page);
if (unlikely(anon_vma_prepare(vma))) { if (unlikely(anon_vma_prepare(vma))) {
...@@ -4926,6 +4930,8 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, ...@@ -4926,6 +4930,8 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
spin_unlock(ptl); spin_unlock(ptl);
backout_unlocked: backout_unlocked:
unlock_page(page); unlock_page(page);
/* restore reserve for newly allocated pages not in page cache */
if (new_page && !new_pagecache_page)
restore_reserve_on_error(h, vma, haddr, page); restore_reserve_on_error(h, vma, haddr, page);
put_page(page); put_page(page);
goto out; goto out;
...@@ -5135,6 +5141,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, ...@@ -5135,6 +5141,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
int ret = -ENOMEM; int ret = -ENOMEM;
struct page *page; struct page *page;
int writable; int writable;
bool new_pagecache_page = false;
if (is_continue) { if (is_continue) {
ret = -EFAULT; ret = -EFAULT;
...@@ -5228,6 +5235,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, ...@@ -5228,6 +5235,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
ret = huge_add_to_page_cache(page, mapping, idx); ret = huge_add_to_page_cache(page, mapping, idx);
if (ret) if (ret)
goto out_release_nounlock; goto out_release_nounlock;
new_pagecache_page = true;
} }
ptl = huge_pte_lockptr(h, dst_mm, dst_pte); ptl = huge_pte_lockptr(h, dst_mm, dst_pte);
...@@ -5291,6 +5299,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, ...@@ -5291,6 +5299,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
if (vm_shared || is_continue) if (vm_shared || is_continue)
unlock_page(page); unlock_page(page);
out_release_nounlock: out_release_nounlock:
if (!new_pagecache_page)
restore_reserve_on_error(h, dst_vma, dst_addr, page); restore_reserve_on_error(h, dst_vma, dst_addr, page);
put_page(page); put_page(page);
goto out; goto out;
......
...@@ -1146,7 +1146,7 @@ static int __get_hwpoison_page(struct page *page) ...@@ -1146,7 +1146,7 @@ static int __get_hwpoison_page(struct page *page)
* unexpected races caused by taking a page refcount. * unexpected races caused by taking a page refcount.
*/ */
if (!HWPoisonHandlable(head)) if (!HWPoisonHandlable(head))
return 0; return -EBUSY;
if (PageTransHuge(head)) { if (PageTransHuge(head)) {
/* /*
...@@ -1199,9 +1199,15 @@ static int get_any_page(struct page *p, unsigned long flags) ...@@ -1199,9 +1199,15 @@ static int get_any_page(struct page *p, unsigned long flags)
} }
goto out; goto out;
} else if (ret == -EBUSY) { } else if (ret == -EBUSY) {
/* We raced with freeing huge page to buddy, retry. */ /*
if (pass++ < 3) * We raced with (possibly temporary) unhandlable
* page, retry.
*/
if (pass++ < 3) {
shake_page(p, 1);
goto try_again; goto try_again;
}
ret = -EIO;
goto out; goto out;
} }
} }
......
...@@ -3453,21 +3453,12 @@ void free_unref_page_list(struct list_head *list) ...@@ -3453,21 +3453,12 @@ void free_unref_page_list(struct list_head *list)
* comment in free_unref_page. * comment in free_unref_page.
*/ */
migratetype = get_pcppage_migratetype(page); migratetype = get_pcppage_migratetype(page);
if (unlikely(migratetype >= MIGRATE_PCPTYPES)) {
if (unlikely(is_migrate_isolate(migratetype))) { if (unlikely(is_migrate_isolate(migratetype))) {
list_del(&page->lru); list_del(&page->lru);
free_one_page(page_zone(page), page, pfn, 0, free_one_page(page_zone(page), page, pfn, 0, migratetype, FPI_NONE);
migratetype, FPI_NONE);
continue; continue;
} }
/*
* Non-isolated types over MIGRATE_PCPTYPES get added
* to the MIGRATE_MOVABLE pcp list.
*/
set_pcppage_migratetype(page, MIGRATE_MOVABLE);
}
set_page_private(page, pfn); set_page_private(page, pfn);
} }
...@@ -3475,7 +3466,15 @@ void free_unref_page_list(struct list_head *list) ...@@ -3475,7 +3466,15 @@ void free_unref_page_list(struct list_head *list)
list_for_each_entry_safe(page, next, list, lru) { list_for_each_entry_safe(page, next, list, lru) {
pfn = page_private(page); pfn = page_private(page);
set_page_private(page, 0); set_page_private(page, 0);
/*
* Non-isolated types over MIGRATE_PCPTYPES get added
* to the MIGRATE_MOVABLE pcp list.
*/
migratetype = get_pcppage_migratetype(page); migratetype = get_pcppage_migratetype(page);
if (unlikely(migratetype >= MIGRATE_PCPTYPES))
migratetype = MIGRATE_MOVABLE;
trace_mm_page_free_batched(page); trace_mm_page_free_batched(page);
free_unref_page_commit(page, pfn, migratetype, 0); free_unref_page_commit(page, pfn, migratetype, 0);
......
...@@ -1696,8 +1696,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, ...@@ -1696,8 +1696,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
struct address_space *mapping = inode->i_mapping; struct address_space *mapping = inode->i_mapping;
struct shmem_inode_info *info = SHMEM_I(inode); struct shmem_inode_info *info = SHMEM_I(inode);
struct mm_struct *charge_mm = vma ? vma->vm_mm : NULL; struct mm_struct *charge_mm = vma ? vma->vm_mm : NULL;
struct swap_info_struct *si; struct page *page;
struct page *page = NULL;
swp_entry_t swap; swp_entry_t swap;
int error; int error;
...@@ -1705,12 +1704,6 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, ...@@ -1705,12 +1704,6 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
swap = radix_to_swp_entry(*pagep); swap = radix_to_swp_entry(*pagep);
*pagep = NULL; *pagep = NULL;
/* Prevent swapoff from happening to us. */
si = get_swap_device(swap);
if (!si) {
error = EINVAL;
goto failed;
}
/* Look it up and read it in.. */ /* Look it up and read it in.. */
page = lookup_swap_cache(swap, NULL, 0); page = lookup_swap_cache(swap, NULL, 0);
if (!page) { if (!page) {
...@@ -1772,8 +1765,6 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, ...@@ -1772,8 +1765,6 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
swap_free(swap); swap_free(swap);
*pagep = page; *pagep = page;
if (si)
put_swap_device(si);
return 0; return 0;
failed: failed:
if (!shmem_confirm_swap(mapping, index, swap)) if (!shmem_confirm_swap(mapping, index, swap))
...@@ -1784,9 +1775,6 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, ...@@ -1784,9 +1775,6 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
put_page(page); put_page(page);
} }
if (si)
put_swap_device(si);
return error; return error;
} }
......
...@@ -628,13 +628,6 @@ struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask, ...@@ -628,13 +628,6 @@ struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
if (!mask) if (!mask)
goto skip; goto skip;
/* Test swap type to make sure the dereference is safe */
if (likely(si->flags & (SWP_BLKDEV | SWP_FS_OPS))) {
struct inode *inode = si->swap_file->f_mapping->host;
if (inode_read_congested(inode))
goto skip;
}
do_poll = false; do_poll = false;
/* Read a page_cluster sized and aligned cluster around offset. */ /* Read a page_cluster sized and aligned cluster around offset. */
start_offset = offset & ~mask; start_offset = offset & ~mask;
......
...@@ -100,9 +100,12 @@ struct scan_control { ...@@ -100,9 +100,12 @@ struct scan_control {
unsigned int may_swap:1; unsigned int may_swap:1;
/* /*
* Cgroups are not reclaimed below their configured memory.low, * Cgroup memory below memory.low is protected as long as we
* unless we threaten to OOM. If any cgroups are skipped due to * don't threaten to OOM. If any cgroup is reclaimed at
* memory.low and nothing was reclaimed, go back for memory.low. * reduced force or passed over entirely due to its memory.low
* setting (memcg_low_skipped), and nothing is reclaimed as a
* result, then go back for one more cycle that reclaims the protected
* memory (memcg_low_reclaim) to avert OOM.
*/ */
unsigned int memcg_low_reclaim:1; unsigned int memcg_low_reclaim:1;
unsigned int memcg_low_skipped:1; unsigned int memcg_low_skipped:1;
...@@ -2537,15 +2540,14 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, ...@@ -2537,15 +2540,14 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
for_each_evictable_lru(lru) { for_each_evictable_lru(lru) {
int file = is_file_lru(lru); int file = is_file_lru(lru);
unsigned long lruvec_size; unsigned long lruvec_size;
unsigned long low, min;
unsigned long scan; unsigned long scan;
unsigned long protection;
lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx); lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
protection = mem_cgroup_protection(sc->target_mem_cgroup, mem_cgroup_protection(sc->target_mem_cgroup, memcg,
memcg, &min, &low);
sc->memcg_low_reclaim);
if (protection) { if (min || low) {
/* /*
* Scale a cgroup's reclaim pressure by proportioning * Scale a cgroup's reclaim pressure by proportioning
* its current usage to its memory.low or memory.min * its current usage to its memory.low or memory.min
...@@ -2576,6 +2578,15 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, ...@@ -2576,6 +2578,15 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
* hard protection. * hard protection.
*/ */
unsigned long cgroup_size = mem_cgroup_size(memcg); unsigned long cgroup_size = mem_cgroup_size(memcg);
unsigned long protection;
/* memory.low scaling, make sure we retry before OOM */
if (!sc->memcg_low_reclaim && low > min) {
protection = low;
sc->memcg_low_skipped = 1;
} else {
protection = min;
}
/* Avoid TOCTOU with earlier protection check */ /* Avoid TOCTOU with earlier protection check */
cgroup_size = max(cgroup_size, protection); cgroup_size = max(cgroup_size, protection);
...@@ -4413,11 +4424,13 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in ...@@ -4413,11 +4424,13 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
.may_swap = 1, .may_swap = 1,
.reclaim_idx = gfp_zone(gfp_mask), .reclaim_idx = gfp_zone(gfp_mask),
}; };
unsigned long pflags;
trace_mm_vmscan_node_reclaim_begin(pgdat->node_id, order, trace_mm_vmscan_node_reclaim_begin(pgdat->node_id, order,
sc.gfp_mask); sc.gfp_mask);
cond_resched(); cond_resched();
psi_memstall_enter(&pflags);
fs_reclaim_acquire(sc.gfp_mask); fs_reclaim_acquire(sc.gfp_mask);
/* /*
* We need to be able to allocate from the reserves for RECLAIM_UNMAP * We need to be able to allocate from the reserves for RECLAIM_UNMAP
...@@ -4442,6 +4455,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in ...@@ -4442,6 +4455,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
current->flags &= ~PF_SWAPWRITE; current->flags &= ~PF_SWAPWRITE;
memalloc_noreclaim_restore(noreclaim_flag); memalloc_noreclaim_restore(noreclaim_flag);
fs_reclaim_release(sc.gfp_mask); fs_reclaim_release(sc.gfp_mask);
psi_memstall_leave(&pflags);
trace_mm_vmscan_node_reclaim_end(sc.nr_reclaimed); trace_mm_vmscan_node_reclaim_end(sc.nr_reclaimed);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment