Commit 9d9a2f29 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'mm-hotfixes-stable-2024-07-10-13-19' of...

Merge tag 'mm-hotfixes-stable-2024-07-10-13-19' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull misc fixes from Andrew Morton:
 "21 hotfixes, 15 of which are cc:stable.

  No identifiable theme here - all are singleton patches, 19 are for MM"

* tag 'mm-hotfixes-stable-2024-07-10-13-19' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (21 commits)
  mm/hugetlb: fix kernel NULL pointer dereference when migrating hugetlb folio
  mm/hugetlb: fix potential race in __update_and_free_hugetlb_folio()
  filemap: replace pte_offset_map() with pte_offset_map_nolock()
  arch/xtensa: always_inline get_current() and current_thread_info()
  sched.h: always_inline alloc_tag_{save|restore} to fix modpost warnings
  MAINTAINERS: mailmap: update Lorenzo Stoakes's email address
  mm: fix crashes from deferred split racing folio migration
  lib/build_OID_registry: avoid non-destructive substitution for Perl < 5.13.2 compat
  mm: gup: stop abusing try_grab_folio
  nilfs2: fix kernel bug on rename operation of broken directory
  mm/hugetlb_vmemmap: fix race with speculative PFN walkers
  cachestat: do not flush stats in recency check
  mm/shmem: disable PMD-sized page cache if needed
  mm/filemap: skip to create PMD-sized page cache if needed
  mm/readahead: limit page cache size in page_cache_ra_order()
  mm/filemap: make MAX_PAGECACHE_ORDER acceptable to xarray
  mm/damon/core: merge regions aggressively when max_nr_regions is unmet
  Fix userfaultfd_api to return EINVAL as expected
  mm: vmalloc: check if a hash-index is in cpu_possible_mask
  mm: prevent derefencing NULL ptr in pfn_section_valid()
  ...
parents ef2b7eb5 f708f697
......@@ -384,6 +384,7 @@ Li Yang <leoyang.li@nxp.com> <leoli@freescale.com>
Li Yang <leoyang.li@nxp.com> <leo@zh-kernel.org>
Lior David <quic_liord@quicinc.com> <liord@codeaurora.org>
Lorenzo Pieralisi <lpieralisi@kernel.org> <lorenzo.pieralisi@arm.com>
Lorenzo Stoakes <lorenzo.stoakes@oracle.com> <lstoakes@gmail.com>
Luca Ceresoli <luca.ceresoli@bootlin.com> <luca@lucaceresoli.net>
Lukasz Luba <lukasz.luba@arm.com> <l.luba@partner.samsung.com>
Luo Jie <quic_luoj@quicinc.com> <luoj@codeaurora.org>
......
......@@ -14472,7 +14472,7 @@ MEMORY MAPPING
M: Andrew Morton <akpm@linux-foundation.org>
R: Liam R. Howlett <Liam.Howlett@oracle.com>
R: Vlastimil Babka <vbabka@suse.cz>
R: Lorenzo Stoakes <lstoakes@gmail.com>
R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
L: linux-mm@kvack.org
S: Maintained
W: http://www.linux-mm.org
......
......@@ -19,7 +19,7 @@
struct task_struct;
static inline struct task_struct *get_current(void)
static __always_inline struct task_struct *get_current(void)
{
return current_thread_info()->task;
}
......
......@@ -91,7 +91,7 @@ struct thread_info {
}
/* how to get the thread information struct from C */
static inline struct thread_info *current_thread_info(void)
static __always_inline struct thread_info *current_thread_info(void)
{
struct thread_info *ti;
__asm__("extui %0, a1, 0, "__stringify(CURRENT_SHIFT)"\n\t"
......
......@@ -383,11 +383,39 @@ struct nilfs_dir_entry *nilfs_find_entry(struct inode *dir,
struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct folio **foliop)
{
struct nilfs_dir_entry *de = nilfs_get_folio(dir, 0, foliop);
struct folio *folio;
struct nilfs_dir_entry *de, *next_de;
size_t limit;
char *msg;
de = nilfs_get_folio(dir, 0, &folio);
if (IS_ERR(de))
return NULL;
return nilfs_next_entry(de);
limit = nilfs_last_byte(dir, 0); /* is a multiple of chunk size */
if (unlikely(!limit || le64_to_cpu(de->inode) != dir->i_ino ||
!nilfs_match(1, ".", de))) {
msg = "missing '.'";
goto fail;
}
next_de = nilfs_next_entry(de);
/*
* If "next_de" has not reached the end of the chunk, there is
* at least one more record. Check whether it matches "..".
*/
if (unlikely((char *)next_de == (char *)de + nilfs_chunk_size(dir) ||
!nilfs_match(2, "..", next_de))) {
msg = "missing '..'";
goto fail;
}
*foliop = folio;
return next_de;
fail:
nilfs_error(dir->i_sb, "directory #%lu %s", dir->i_ino, msg);
folio_release_kmap(folio, de);
return NULL;
}
ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr)
......
......@@ -2057,7 +2057,7 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
goto out;
features = uffdio_api.features;
ret = -EINVAL;
if (uffdio_api.api != UFFD_API || (features & ~UFFD_API_FEATURES))
if (uffdio_api.api != UFFD_API)
goto err_out;
ret = -EPERM;
if ((features & UFFD_FEATURE_EVENT_FORK) && !capable(CAP_SYS_PTRACE))
......@@ -2081,6 +2081,11 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED;
uffdio_api.features &= ~UFFD_FEATURE_WP_ASYNC;
#endif
ret = -EINVAL;
if (features & ~uffdio_api.features)
goto err_out;
uffdio_api.ioctls = UFFD_API_IOCTLS;
ret = -EFAULT;
if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api)))
......
......@@ -1979,8 +1979,9 @@ static inline int subsection_map_index(unsigned long pfn)
static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
{
int idx = subsection_map_index(pfn);
struct mem_section_usage *usage = READ_ONCE(ms->usage);
return test_bit(idx, READ_ONCE(ms->usage)->subsection_map);
return usage ? test_bit(idx, usage->subsection_map) : 0;
}
#else
static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
......
......@@ -230,7 +230,13 @@ static inline int folio_ref_dec_return(struct folio *folio)
static inline bool page_ref_add_unless(struct page *page, int nr, int u)
{
bool ret = atomic_add_unless(&page->_refcount, nr, u);
bool ret = false;
rcu_read_lock();
/* avoid writing to the vmemmap area being remapped */
if (!page_is_fake_head(page) && page_ref_count(page) != u)
ret = atomic_add_unless(&page->_refcount, nr, u);
rcu_read_unlock();
if (page_ref_tracepoint_active(page_ref_mod_unless))
__page_ref_mod_unless(page, nr, ret);
......@@ -258,54 +264,9 @@ static inline bool folio_try_get(struct folio *folio)
return folio_ref_add_unless(folio, 1, 0);
}
static inline bool folio_ref_try_add_rcu(struct folio *folio, int count)
{
#ifdef CONFIG_TINY_RCU
/*
* The caller guarantees the folio will not be freed from interrupt
* context, so (on !SMP) we only need preemption to be disabled
* and TINY_RCU does that for us.
*/
# ifdef CONFIG_PREEMPT_COUNT
VM_BUG_ON(!in_atomic() && !irqs_disabled());
# endif
VM_BUG_ON_FOLIO(folio_ref_count(folio) == 0, folio);
folio_ref_add(folio, count);
#else
if (unlikely(!folio_ref_add_unless(folio, count, 0))) {
/* Either the folio has been freed, or will be freed. */
return false;
}
#endif
return true;
}
/**
* folio_try_get_rcu - Attempt to increase the refcount on a folio.
* @folio: The folio.
*
* This is a version of folio_try_get() optimised for non-SMP kernels.
* If you are still holding the rcu_read_lock() after looking up the
* page and know that the page cannot have its refcount decreased to
* zero in interrupt context, you can use this instead of folio_try_get().
*
* Example users include get_user_pages_fast() (as pages are not unmapped
* from interrupt context) and the page cache lookups (as pages are not
* truncated from interrupt context). We also know that pages are not
* frozen in interrupt context for the purposes of splitting or migration.
*
* You can also use this function if you're holding a lock that prevents
* pages being frozen & removed; eg the i_pages lock for the page cache
* or the mmap_lock or page table lock for page tables. In this case,
* it will always succeed, and you could have used a plain folio_get(),
* but it's sometimes more convenient to have a common function called
* from both locked and RCU-protected contexts.
*
* Return: True if the reference count was successfully incremented.
*/
static inline bool folio_try_get_rcu(struct folio *folio)
static inline bool folio_ref_try_add(struct folio *folio, int count)
{
return folio_ref_try_add_rcu(folio, 1);
return folio_ref_add_unless(folio, count, 0);
}
static inline int page_ref_freeze(struct page *page, int count)
......
......@@ -354,11 +354,18 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
* a good order (that's 1MB if you're using 4kB pages)
*/
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
#define PREFERRED_MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
#else
#define MAX_PAGECACHE_ORDER 8
#define PREFERRED_MAX_PAGECACHE_ORDER 8
#endif
/*
* xas_split_alloc() does not support arbitrary orders. This implies no
* 512MB THP on ARM64 with 64KB base page size.
*/
#define MAX_XAS_ORDER (XA_CHUNK_SHIFT * 2 - 1)
#define MAX_PAGECACHE_ORDER min(MAX_XAS_ORDER, PREFERRED_MAX_PAGECACHE_ORDER)
/**
* mapping_set_large_folios() - Indicate the file supports large folios.
* @mapping: The file.
......
......@@ -2192,13 +2192,13 @@ static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); }
extern void sched_set_stop_task(int cpu, struct task_struct *stop);
#ifdef CONFIG_MEM_ALLOC_PROFILING
static inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag)
static __always_inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag)
{
swap(current->alloc_tag, tag);
return tag;
}
static inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old)
static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old)
{
#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
WARN(current->alloc_tag != tag, "current->alloc_tag was changed:\n");
......
......@@ -354,7 +354,8 @@ static inline swp_entry_t page_swap_entry(struct page *page)
}
/* linux/mm/workingset.c */
bool workingset_test_recent(void *shadow, bool file, bool *workingset);
bool workingset_test_recent(void *shadow, bool file, bool *workingset,
bool flush);
void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg);
void workingset_refault(struct folio *folio, void *shadow);
......
......@@ -38,7 +38,9 @@ close IN_FILE || die;
#
open C_FILE, ">$ARGV[1]" or die;
print C_FILE "/*\n";
print C_FILE " * Automatically generated by ", $0 =~ s#^\Q$abs_srctree/\E##r, ". Do not edit\n";
my $scriptname = $0;
$scriptname =~ s#^\Q$abs_srctree/\E##;
print C_FILE " * Automatically generated by ", $scriptname, ". Do not edit\n";
print C_FILE " */\n";
#
......
......@@ -1358,14 +1358,31 @@ static void damon_merge_regions_of(struct damon_target *t, unsigned int thres,
* access frequencies are similar. This is for minimizing the monitoring
* overhead under the dynamically changeable access pattern. If a merge was
* unnecessarily made, later 'kdamond_split_regions()' will revert it.
*
* The total number of regions could be higher than the user-defined limit,
* max_nr_regions for some cases. For example, the user can update
* max_nr_regions to a number that lower than the current number of regions
* while DAMON is running. For such a case, repeat merging until the limit is
* met while increasing @threshold up to possible maximum level.
*/
static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold,
unsigned long sz_limit)
{
struct damon_target *t;
damon_for_each_target(t, c)
damon_merge_regions_of(t, threshold, sz_limit);
unsigned int nr_regions;
unsigned int max_thres;
max_thres = c->attrs.aggr_interval /
(c->attrs.sample_interval ? c->attrs.sample_interval : 1);
do {
nr_regions = 0;
damon_for_each_target(t, c) {
damon_merge_regions_of(t, threshold, sz_limit);
nr_regions += damon_nr_regions(t);
}
threshold = max(1, threshold * 2);
} while (nr_regions > c->attrs.max_nr_regions &&
threshold / 2 < max_thres);
}
/*
......
......@@ -1847,7 +1847,7 @@ void *filemap_get_entry(struct address_space *mapping, pgoff_t index)
if (!folio || xa_is_value(folio))
goto out;
if (!folio_try_get_rcu(folio))
if (!folio_try_get(folio))
goto repeat;
if (unlikely(folio != xas_reload(&xas))) {
......@@ -2001,7 +2001,7 @@ static inline struct folio *find_get_entry(struct xa_state *xas, pgoff_t max,
if (!folio || xa_is_value(folio))
return folio;
if (!folio_try_get_rcu(folio))
if (!folio_try_get(folio))
goto reset;
if (unlikely(folio != xas_reload(xas))) {
......@@ -2181,7 +2181,7 @@ unsigned filemap_get_folios_contig(struct address_space *mapping,
if (xa_is_value(folio))
goto update_start;
if (!folio_try_get_rcu(folio))
if (!folio_try_get(folio))
goto retry;
if (unlikely(folio != xas_reload(&xas)))
......@@ -2313,7 +2313,7 @@ static void filemap_get_read_batch(struct address_space *mapping,
break;
if (xa_is_sibling(folio))
break;
if (!folio_try_get_rcu(folio))
if (!folio_try_get(folio))
goto retry;
if (unlikely(folio != xas_reload(&xas)))
......@@ -3124,7 +3124,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/* Use the readahead code, even if readahead is disabled */
if (vm_flags & VM_HUGEPAGE) {
if ((vm_flags & VM_HUGEPAGE) && HPAGE_PMD_ORDER <= MAX_PAGECACHE_ORDER) {
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
ractl._index &= ~((unsigned long)HPAGE_PMD_NR - 1);
ra->size = HPAGE_PMD_NR;
......@@ -3231,7 +3231,8 @@ static vm_fault_t filemap_fault_recheck_pte_none(struct vm_fault *vmf)
if (!(vmf->flags & FAULT_FLAG_ORIG_PTE_VALID))
return 0;
ptep = pte_offset_map(vmf->pmd, vmf->address);
ptep = pte_offset_map_nolock(vma->vm_mm, vmf->pmd, vmf->address,
&vmf->ptl);
if (unlikely(!ptep))
return VM_FAULT_NOPAGE;
......@@ -3472,7 +3473,7 @@ static struct folio *next_uptodate_folio(struct xa_state *xas,
continue;
if (folio_test_locked(folio))
continue;
if (!folio_try_get_rcu(folio))
if (!folio_try_get(folio))
continue;
/* Has the page moved or been split? */
if (unlikely(folio != xas_reload(xas)))
......@@ -4248,6 +4249,9 @@ static void filemap_cachestat(struct address_space *mapping,
XA_STATE(xas, &mapping->i_pages, first_index);
struct folio *folio;
/* Flush stats (and potentially sleep) outside the RCU read section. */
mem_cgroup_flush_stats_ratelimited(NULL);
rcu_read_lock();
xas_for_each(&xas, folio, last_index) {
int order;
......@@ -4311,7 +4315,7 @@ static void filemap_cachestat(struct address_space *mapping,
goto resched;
}
#endif
if (workingset_test_recent(shadow, true, &workingset))
if (workingset_test_recent(shadow, true, &workingset, false))
cs->nr_recently_evicted += nr_pages;
goto resched;
......
This diff is collapsed.
......@@ -1331,7 +1331,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
if (!*pgmap)
return ERR_PTR(-EFAULT);
page = pfn_to_page(pfn);
ret = try_grab_page(page, flags);
ret = try_grab_folio(page_folio(page), 1, flags);
if (ret)
page = ERR_PTR(ret);
......
......@@ -1625,13 +1625,10 @@ static inline void destroy_compound_gigantic_folio(struct folio *folio,
* folio appears as just a compound page. Otherwise, wait until after
* allocating vmemmap to clear the flag.
*
* A reference is held on the folio, except in the case of demote.
*
* Must be called with hugetlb lock held.
*/
static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio,
bool adjust_surplus,
bool demote)
static void remove_hugetlb_folio(struct hstate *h, struct folio *folio,
bool adjust_surplus)
{
int nid = folio_nid(folio);
......@@ -1645,6 +1642,7 @@ static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio,
list_del(&folio->lru);
if (folio_test_hugetlb_freed(folio)) {
folio_clear_hugetlb_freed(folio);
h->free_huge_pages--;
h->free_huge_pages_node[nid]--;
}
......@@ -1661,33 +1659,13 @@ static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio,
if (!folio_test_hugetlb_vmemmap_optimized(folio))
__folio_clear_hugetlb(folio);
/*
* In the case of demote we do not ref count the page as it will soon
* be turned into a page of smaller size.
*/
if (!demote)
folio_ref_unfreeze(folio, 1);
h->nr_huge_pages--;
h->nr_huge_pages_node[nid]--;
}
static void remove_hugetlb_folio(struct hstate *h, struct folio *folio,
bool adjust_surplus)
{
__remove_hugetlb_folio(h, folio, adjust_surplus, false);
}
static void remove_hugetlb_folio_for_demote(struct hstate *h, struct folio *folio,
bool adjust_surplus)
{
__remove_hugetlb_folio(h, folio, adjust_surplus, true);
}
static void add_hugetlb_folio(struct hstate *h, struct folio *folio,
bool adjust_surplus)
{
int zeroed;
int nid = folio_nid(folio);
VM_BUG_ON_FOLIO(!folio_test_hugetlb_vmemmap_optimized(folio), folio);
......@@ -1711,21 +1689,6 @@ static void add_hugetlb_folio(struct hstate *h, struct folio *folio,
*/
folio_set_hugetlb_vmemmap_optimized(folio);
/*
* This folio is about to be managed by the hugetlb allocator and
* should have no users. Drop our reference, and check for others
* just in case.
*/
zeroed = folio_put_testzero(folio);
if (unlikely(!zeroed))
/*
* It is VERY unlikely soneone else has taken a ref
* on the folio. In this case, we simply return as
* free_huge_folio() will be called when this other ref
* is dropped.
*/
return;
arch_clear_hugetlb_flags(folio);
enqueue_hugetlb_folio(h, folio);
}
......@@ -1762,13 +1725,6 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
return;
}
/*
* Move PageHWPoison flag from head page to the raw error pages,
* which makes any healthy subpages reusable.
*/
if (unlikely(folio_test_hwpoison(folio)))
folio_clear_hugetlb_hwpoison(folio);
/*
* If vmemmap pages were allocated above, then we need to clear the
* hugetlb flag under the hugetlb lock.
......@@ -1779,6 +1735,15 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
spin_unlock_irq(&hugetlb_lock);
}
/*
* Move PageHWPoison flag from head page to the raw error pages,
* which makes any healthy subpages reusable.
*/
if (unlikely(folio_test_hwpoison(folio)))
folio_clear_hugetlb_hwpoison(folio);
folio_ref_unfreeze(folio, 1);
/*
* Non-gigantic pages demoted from CMA allocated gigantic pages
* need to be given back to CMA in free_gigantic_folio.
......@@ -2197,6 +2162,9 @@ static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
nid = numa_mem_id();
retry:
folio = __folio_alloc(gfp_mask, order, nid, nmask);
/* Ensure hugetlb folio won't have large_rmappable flag set. */
if (folio)
folio_clear_large_rmappable(folio);
if (folio && !folio_ref_freeze(folio, 1)) {
folio_put(folio);
......@@ -3079,11 +3047,8 @@ static int alloc_and_dissolve_hugetlb_folio(struct hstate *h,
free_new:
spin_unlock_irq(&hugetlb_lock);
if (new_folio) {
/* Folio has a zero ref count, but needs a ref to be freed */
folio_ref_unfreeze(new_folio, 1);
if (new_folio)
update_and_free_hugetlb_folio(h, new_folio, false);
}
return ret;
}
......@@ -3938,7 +3903,7 @@ static int demote_free_hugetlb_folio(struct hstate *h, struct folio *folio)
target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order);
remove_hugetlb_folio_for_demote(h, folio, false);
remove_hugetlb_folio(h, folio, false);
spin_unlock_irq(&hugetlb_lock);
/*
......@@ -3952,7 +3917,6 @@ static int demote_free_hugetlb_folio(struct hstate *h, struct folio *folio)
if (rc) {
/* Allocation of vmemmmap failed, we can not demote folio */
spin_lock_irq(&hugetlb_lock);
folio_ref_unfreeze(folio, 1);
add_hugetlb_folio(h, folio, false);
return rc;
}
......
......@@ -446,6 +446,8 @@ static int __hugetlb_vmemmap_restore_folio(const struct hstate *h,
unsigned long vmemmap_reuse;
VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(folio), folio);
VM_WARN_ON_ONCE_FOLIO(folio_ref_count(folio), folio);
if (!folio_test_hugetlb_vmemmap_optimized(folio))
return 0;
......@@ -481,6 +483,9 @@ static int __hugetlb_vmemmap_restore_folio(const struct hstate *h,
*/
int hugetlb_vmemmap_restore_folio(const struct hstate *h, struct folio *folio)
{
/* avoid writes from page_ref_add_unless() while unfolding vmemmap */
synchronize_rcu();
return __hugetlb_vmemmap_restore_folio(h, folio, 0);
}
......@@ -505,6 +510,9 @@ long hugetlb_vmemmap_restore_folios(const struct hstate *h,
long restored = 0;
long ret = 0;
/* avoid writes from page_ref_add_unless() while unfolding vmemmap */
synchronize_rcu();
list_for_each_entry_safe(folio, t_folio, folio_list, lru) {
if (folio_test_hugetlb_vmemmap_optimized(folio)) {
ret = __hugetlb_vmemmap_restore_folio(h, folio,
......@@ -550,6 +558,8 @@ static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
unsigned long vmemmap_reuse;
VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(folio), folio);
VM_WARN_ON_ONCE_FOLIO(folio_ref_count(folio), folio);
if (!vmemmap_should_optimize_folio(h, folio))
return ret;
......@@ -601,6 +611,9 @@ void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio)
{
LIST_HEAD(vmemmap_pages);
/* avoid writes from page_ref_add_unless() while folding vmemmap */
synchronize_rcu();
__hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, 0);
free_vmemmap_page_list(&vmemmap_pages);
}
......@@ -644,6 +657,9 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l
flush_tlb_all();
/* avoid writes from page_ref_add_unless() while folding vmemmap */
synchronize_rcu();
list_for_each_entry(folio, folio_list, lru) {
int ret;
......
......@@ -1182,8 +1182,8 @@ int migrate_device_coherent_page(struct page *page);
/*
* mm/gup.c
*/
struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags);
int __must_check try_grab_page(struct page *page, unsigned int flags);
int __must_check try_grab_folio(struct folio *folio, int refs,
unsigned int flags);
/*
* mm/huge_memory.c
......
......@@ -7823,17 +7823,6 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new)
/* Transfer the charge and the css ref */
commit_charge(new, memcg);
/*
* If the old folio is a large folio and is in the split queue, it needs
* to be removed from the split queue now, in case getting an incorrect
* split queue in destroy_large_folio() after the memcg of the old folio
* is cleared.
*
* In addition, the old folio is about to be freed after migration, so
* removing from the split queue a bit earlier seems reasonable.
*/
if (folio_test_large(old) && folio_test_large_rmappable(old))
folio_undo_large_rmappable(old);
old->memcg_data = 0;
}
......
......@@ -415,6 +415,15 @@ int folio_migrate_mapping(struct address_space *mapping,
if (folio_ref_count(folio) != expected_count)
return -EAGAIN;
/* Take off deferred split queue while frozen and memcg set */
if (folio_test_large(folio) &&
folio_test_large_rmappable(folio)) {
if (!folio_ref_freeze(folio, expected_count))
return -EAGAIN;
folio_undo_large_rmappable(folio);
folio_ref_unfreeze(folio, expected_count);
}
/* No turning back from here */
newfolio->index = folio->index;
newfolio->mapping = folio->mapping;
......@@ -433,6 +442,10 @@ int folio_migrate_mapping(struct address_space *mapping,
return -EAGAIN;
}
/* Take off deferred split queue while frozen and memcg set */
if (folio_test_large(folio) && folio_test_large_rmappable(folio))
folio_undo_large_rmappable(folio);
/*
* Now we know that no one else is looking at the folio:
* no turning back from here.
......
......@@ -503,11 +503,11 @@ void page_cache_ra_order(struct readahead_control *ractl,
limit = min(limit, index + ra->size - 1);
if (new_order < MAX_PAGECACHE_ORDER) {
if (new_order < MAX_PAGECACHE_ORDER)
new_order += 2;
new_order = min_t(unsigned int, MAX_PAGECACHE_ORDER, new_order);
new_order = min_t(unsigned int, new_order, ilog2(ra->size));
}
new_order = min_t(unsigned int, MAX_PAGECACHE_ORDER, new_order);
new_order = min_t(unsigned int, new_order, ilog2(ra->size));
/* See comment in page_cache_ra_unbounded() */
nofs = memalloc_nofs_save();
......
......@@ -541,8 +541,9 @@ static bool shmem_confirm_swap(struct address_space *mapping,
static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER;
bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
struct mm_struct *mm, unsigned long vm_flags)
static bool __shmem_is_huge(struct inode *inode, pgoff_t index,
bool shmem_huge_force, struct mm_struct *mm,
unsigned long vm_flags)
{
loff_t i_size;
......@@ -573,6 +574,16 @@ bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
}
}
bool shmem_is_huge(struct inode *inode, pgoff_t index,
bool shmem_huge_force, struct mm_struct *mm,
unsigned long vm_flags)
{
if (HPAGE_PMD_ORDER > MAX_PAGECACHE_ORDER)
return false;
return __shmem_is_huge(inode, index, shmem_huge_force, mm, vm_flags);
}
#if defined(CONFIG_SYSFS)
static int shmem_parse_huge(const char *str)
{
......
......@@ -2543,7 +2543,15 @@ static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
static struct xarray *
addr_to_vb_xa(unsigned long addr)
{
int index = (addr / VMAP_BLOCK_SIZE) % num_possible_cpus();
int index = (addr / VMAP_BLOCK_SIZE) % nr_cpu_ids;
/*
* Please note, nr_cpu_ids points on a highest set
* possible bit, i.e. we never invoke cpumask_next()
* if an index points on it which is nr_cpu_ids - 1.
*/
if (!cpu_possible(index))
index = cpumask_next(index, cpu_possible_mask);
return &per_cpu(vmap_block_queue, index).vmap_blocks;
}
......
......@@ -412,10 +412,12 @@ void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg)
* @file: whether the corresponding folio is from the file lru.
* @workingset: where the workingset value unpacked from shadow should
* be stored.
* @flush: whether to flush cgroup rstat.
*
* Return: true if the shadow is for a recently evicted folio; false otherwise.
*/
bool workingset_test_recent(void *shadow, bool file, bool *workingset)
bool workingset_test_recent(void *shadow, bool file, bool *workingset,
bool flush)
{
struct mem_cgroup *eviction_memcg;
struct lruvec *eviction_lruvec;
......@@ -467,10 +469,16 @@ bool workingset_test_recent(void *shadow, bool file, bool *workingset)
/*
* Flush stats (and potentially sleep) outside the RCU read section.
*
* Note that workingset_test_recent() itself might be called in RCU read
* section (for e.g, in cachestat) - these callers need to skip flushing
* stats (via the flush argument).
*
* XXX: With per-memcg flushing and thresholding, is ratelimiting
* still needed here?
*/
mem_cgroup_flush_stats_ratelimited(eviction_memcg);
if (flush)
mem_cgroup_flush_stats_ratelimited(eviction_memcg);
eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat);
refault = atomic_long_read(&eviction_lruvec->nonresident_age);
......@@ -558,7 +566,7 @@ void workingset_refault(struct folio *folio, void *shadow)
mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr);
if (!workingset_test_recent(shadow, file, &workingset))
if (!workingset_test_recent(shadow, file, &workingset, true))
return;
folio_set_active(folio);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment