Commit 839c4f59 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'mm-hotfixes-stable-2024-09-19-00-31' of...

Merge tag 'mm-hotfixes-stable-2024-09-19-00-31' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull misc hotfixes from Andrew Morton:
 "12 hotfixes, 11 of which are cc:stable.

  Four fixes for longstanding ocfs2 issues and the remainder address
  random MM things"

* tag 'mm-hotfixes-stable-2024-09-19-00-31' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm:
  mm/madvise: process_madvise() drop capability check if same mm
  mm/huge_memory: ensure huge_zero_folio won't have large_rmappable flag set
  mm/hugetlb.c: fix UAF of vma in hugetlb fault pathway
  mm: change vmf_anon_prepare() to __vmf_anon_prepare()
  resource: fix region_intersects() vs add_memory_driver_managed()
  zsmalloc: use unique zsmalloc caches names
  mm/damon/vaddr: protect vma traversal in __damon_va_thre_regions() with rcu read lock
  mm: vmscan.c: fix OOM on swap stress test
  ocfs2: cancel dqi_sync_work before freeing oinfo
  ocfs2: fix possible null-ptr-deref in ocfs2_set_buffer_uptodate
  ocfs2: remove unreasonable unlock in ocfs2_read_blocks
  ocfs2: fix null-ptr-deref when journal load failed.
parents a1d1eb2f 22af8caf
...@@ -235,7 +235,6 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, ...@@ -235,7 +235,6 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
if (bhs[i] == NULL) { if (bhs[i] == NULL) {
bhs[i] = sb_getblk(sb, block++); bhs[i] = sb_getblk(sb, block++);
if (bhs[i] == NULL) { if (bhs[i] == NULL) {
ocfs2_metadata_cache_io_unlock(ci);
status = -ENOMEM; status = -ENOMEM;
mlog_errno(status); mlog_errno(status);
/* Don't forget to put previous bh! */ /* Don't forget to put previous bh! */
...@@ -389,7 +388,8 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, ...@@ -389,7 +388,8 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
/* Always set the buffer in the cache, even if it was /* Always set the buffer in the cache, even if it was
* a forced read, or read-ahead which hasn't yet * a forced read, or read-ahead which hasn't yet
* completed. */ * completed. */
ocfs2_set_buffer_uptodate(ci, bh); if (bh)
ocfs2_set_buffer_uptodate(ci, bh);
} }
ocfs2_metadata_cache_io_unlock(ci); ocfs2_metadata_cache_io_unlock(ci);
......
...@@ -1055,7 +1055,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) ...@@ -1055,7 +1055,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
if (!igrab(inode)) if (!igrab(inode))
BUG(); BUG();
num_running_trans = atomic_read(&(osb->journal->j_num_trans)); num_running_trans = atomic_read(&(journal->j_num_trans));
trace_ocfs2_journal_shutdown(num_running_trans); trace_ocfs2_journal_shutdown(num_running_trans);
/* Do a commit_cache here. It will flush our journal, *and* /* Do a commit_cache here. It will flush our journal, *and*
...@@ -1074,9 +1074,10 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) ...@@ -1074,9 +1074,10 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
osb->commit_task = NULL; osb->commit_task = NULL;
} }
BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0); BUG_ON(atomic_read(&(journal->j_num_trans)) != 0);
if (ocfs2_mount_local(osb)) { if (ocfs2_mount_local(osb) &&
(journal->j_journal->j_flags & JBD2_LOADED)) {
jbd2_journal_lock_updates(journal->j_journal); jbd2_journal_lock_updates(journal->j_journal);
status = jbd2_journal_flush(journal->j_journal, 0); status = jbd2_journal_flush(journal->j_journal, 0);
jbd2_journal_unlock_updates(journal->j_journal); jbd2_journal_unlock_updates(journal->j_journal);
......
...@@ -692,7 +692,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type) ...@@ -692,7 +692,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
int status; int status;
struct buffer_head *bh = NULL; struct buffer_head *bh = NULL;
struct ocfs2_quota_recovery *rec; struct ocfs2_quota_recovery *rec;
int locked = 0; int locked = 0, global_read = 0;
info->dqi_max_spc_limit = 0x7fffffffffffffffLL; info->dqi_max_spc_limit = 0x7fffffffffffffffLL;
info->dqi_max_ino_limit = 0x7fffffffffffffffLL; info->dqi_max_ino_limit = 0x7fffffffffffffffLL;
...@@ -700,6 +700,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type) ...@@ -700,6 +700,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
if (!oinfo) { if (!oinfo) {
mlog(ML_ERROR, "failed to allocate memory for ocfs2 quota" mlog(ML_ERROR, "failed to allocate memory for ocfs2 quota"
" info."); " info.");
status = -ENOMEM;
goto out_err; goto out_err;
} }
info->dqi_priv = oinfo; info->dqi_priv = oinfo;
...@@ -712,6 +713,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type) ...@@ -712,6 +713,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
status = ocfs2_global_read_info(sb, type); status = ocfs2_global_read_info(sb, type);
if (status < 0) if (status < 0)
goto out_err; goto out_err;
global_read = 1;
status = ocfs2_inode_lock(lqinode, &oinfo->dqi_lqi_bh, 1); status = ocfs2_inode_lock(lqinode, &oinfo->dqi_lqi_bh, 1);
if (status < 0) { if (status < 0) {
...@@ -782,10 +784,12 @@ static int ocfs2_local_read_info(struct super_block *sb, int type) ...@@ -782,10 +784,12 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
if (locked) if (locked)
ocfs2_inode_unlock(lqinode, 1); ocfs2_inode_unlock(lqinode, 1);
ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk); ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk);
if (global_read)
cancel_delayed_work_sync(&oinfo->dqi_sync_work);
kfree(oinfo); kfree(oinfo);
} }
brelse(bh); brelse(bh);
return -1; return status;
} }
/* Write local info to quota file */ /* Write local info to quota file */
......
...@@ -540,20 +540,62 @@ static int __region_intersects(struct resource *parent, resource_size_t start, ...@@ -540,20 +540,62 @@ static int __region_intersects(struct resource *parent, resource_size_t start,
size_t size, unsigned long flags, size_t size, unsigned long flags,
unsigned long desc) unsigned long desc)
{ {
struct resource res; resource_size_t ostart, oend;
int type = 0; int other = 0; int type = 0; int other = 0;
struct resource *p; struct resource *p, *dp;
bool is_type, covered;
struct resource res;
res.start = start; res.start = start;
res.end = start + size - 1; res.end = start + size - 1;
for (p = parent->child; p ; p = p->sibling) { for (p = parent->child; p ; p = p->sibling) {
bool is_type = (((p->flags & flags) == flags) && if (!resource_overlaps(p, &res))
((desc == IORES_DESC_NONE) || continue;
(desc == p->desc))); is_type = (p->flags & flags) == flags &&
(desc == IORES_DESC_NONE || desc == p->desc);
if (resource_overlaps(p, &res)) if (is_type) {
is_type ? type++ : other++; type++;
continue;
}
/*
* Continue to search in descendant resources as if the
* matched descendant resources cover some ranges of 'p'.
*
* |------------- "CXL Window 0" ------------|
* |-- "System RAM" --|
*
* will behave similar as the following fake resource
* tree when searching "System RAM".
*
* |-- "System RAM" --||-- "CXL Window 0a" --|
*/
covered = false;
ostart = max(res.start, p->start);
oend = min(res.end, p->end);
for_each_resource(p, dp, false) {
if (!resource_overlaps(dp, &res))
continue;
is_type = (dp->flags & flags) == flags &&
(desc == IORES_DESC_NONE || desc == dp->desc);
if (is_type) {
type++;
/*
* Range from 'ostart' to 'dp->start'
* isn't covered by matched resource.
*/
if (dp->start > ostart)
break;
if (dp->end >= oend) {
covered = true;
break;
}
/* Remove covered range */
ostart = max(ostart, dp->end + 1);
}
}
if (!covered)
other++;
} }
if (type == 0) if (type == 0)
......
...@@ -126,6 +126,7 @@ static int __damon_va_three_regions(struct mm_struct *mm, ...@@ -126,6 +126,7 @@ static int __damon_va_three_regions(struct mm_struct *mm,
* If this is too slow, it can be optimised to examine the maple * If this is too slow, it can be optimised to examine the maple
* tree gaps. * tree gaps.
*/ */
rcu_read_lock();
for_each_vma(vmi, vma) { for_each_vma(vmi, vma) {
unsigned long gap; unsigned long gap;
...@@ -146,6 +147,7 @@ static int __damon_va_three_regions(struct mm_struct *mm, ...@@ -146,6 +147,7 @@ static int __damon_va_three_regions(struct mm_struct *mm,
next: next:
prev = vma; prev = vma;
} }
rcu_read_unlock();
if (!sz_range(&second_gap) || !sz_range(&first_gap)) if (!sz_range(&second_gap) || !sz_range(&first_gap))
return -EINVAL; return -EINVAL;
......
...@@ -220,6 +220,8 @@ static bool get_huge_zero_page(void) ...@@ -220,6 +220,8 @@ static bool get_huge_zero_page(void)
count_vm_event(THP_ZERO_PAGE_ALLOC_FAILED); count_vm_event(THP_ZERO_PAGE_ALLOC_FAILED);
return false; return false;
} }
/* Ensure zero folio won't have large_rmappable flag set. */
folio_clear_large_rmappable(zero_folio);
preempt_disable(); preempt_disable();
if (cmpxchg(&huge_zero_folio, NULL, zero_folio)) { if (cmpxchg(&huge_zero_folio, NULL, zero_folio)) {
preempt_enable(); preempt_enable();
......
...@@ -6048,7 +6048,7 @@ static vm_fault_t hugetlb_wp(struct folio *pagecache_folio, ...@@ -6048,7 +6048,7 @@ static vm_fault_t hugetlb_wp(struct folio *pagecache_folio,
* When the original hugepage is shared one, it does not have * When the original hugepage is shared one, it does not have
* anon_vma prepared. * anon_vma prepared.
*/ */
ret = vmf_anon_prepare(vmf); ret = __vmf_anon_prepare(vmf);
if (unlikely(ret)) if (unlikely(ret))
goto out_release_all; goto out_release_all;
...@@ -6247,7 +6247,7 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping, ...@@ -6247,7 +6247,7 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping,
} }
if (!(vma->vm_flags & VM_MAYSHARE)) { if (!(vma->vm_flags & VM_MAYSHARE)) {
ret = vmf_anon_prepare(vmf); ret = __vmf_anon_prepare(vmf);
if (unlikely(ret)) if (unlikely(ret))
goto out; goto out;
} }
...@@ -6378,6 +6378,14 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping, ...@@ -6378,6 +6378,14 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping,
folio_unlock(folio); folio_unlock(folio);
out: out:
hugetlb_vma_unlock_read(vma); hugetlb_vma_unlock_read(vma);
/*
* We must check to release the per-VMA lock. __vmf_anon_prepare() is
* the only way ret can be set to VM_FAULT_RETRY.
*/
if (unlikely(ret & VM_FAULT_RETRY))
vma_end_read(vma);
mutex_unlock(&hugetlb_fault_mutex_table[hash]); mutex_unlock(&hugetlb_fault_mutex_table[hash]);
return ret; return ret;
...@@ -6599,6 +6607,14 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -6599,6 +6607,14 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
} }
out_mutex: out_mutex:
hugetlb_vma_unlock_read(vma); hugetlb_vma_unlock_read(vma);
/*
* We must check to release the per-VMA lock. __vmf_anon_prepare() in
* hugetlb_wp() is the only way ret can be set to VM_FAULT_RETRY.
*/
if (unlikely(ret & VM_FAULT_RETRY))
vma_end_read(vma);
mutex_unlock(&hugetlb_fault_mutex_table[hash]); mutex_unlock(&hugetlb_fault_mutex_table[hash]);
/* /*
* Generally it's safe to hold refcount during waiting page lock. But * Generally it's safe to hold refcount during waiting page lock. But
......
...@@ -310,7 +310,16 @@ static inline void wake_throttle_isolated(pg_data_t *pgdat) ...@@ -310,7 +310,16 @@ static inline void wake_throttle_isolated(pg_data_t *pgdat)
wake_up(wqh); wake_up(wqh);
} }
vm_fault_t vmf_anon_prepare(struct vm_fault *vmf); vm_fault_t __vmf_anon_prepare(struct vm_fault *vmf);
static inline vm_fault_t vmf_anon_prepare(struct vm_fault *vmf)
{
vm_fault_t ret = __vmf_anon_prepare(vmf);
if (unlikely(ret & VM_FAULT_RETRY))
vma_end_read(vmf->vma);
return ret;
}
vm_fault_t do_swap_page(struct vm_fault *vmf); vm_fault_t do_swap_page(struct vm_fault *vmf);
void folio_rotate_reclaimable(struct folio *folio); void folio_rotate_reclaimable(struct folio *folio);
bool __folio_end_writeback(struct folio *folio); bool __folio_end_writeback(struct folio *folio);
......
...@@ -1527,7 +1527,7 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec, ...@@ -1527,7 +1527,7 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
* Require CAP_SYS_NICE for influencing process performance. Note that * Require CAP_SYS_NICE for influencing process performance. Note that
* only non-destructive hints are currently supported. * only non-destructive hints are currently supported.
*/ */
if (!capable(CAP_SYS_NICE)) { if (mm != current->mm && !capable(CAP_SYS_NICE)) {
ret = -EPERM; ret = -EPERM;
goto release_mm; goto release_mm;
} }
......
...@@ -3276,7 +3276,7 @@ static inline vm_fault_t vmf_can_call_fault(const struct vm_fault *vmf) ...@@ -3276,7 +3276,7 @@ static inline vm_fault_t vmf_can_call_fault(const struct vm_fault *vmf)
} }
/** /**
* vmf_anon_prepare - Prepare to handle an anonymous fault. * __vmf_anon_prepare - Prepare to handle an anonymous fault.
* @vmf: The vm_fault descriptor passed from the fault handler. * @vmf: The vm_fault descriptor passed from the fault handler.
* *
* When preparing to insert an anonymous page into a VMA from a * When preparing to insert an anonymous page into a VMA from a
...@@ -3290,7 +3290,7 @@ static inline vm_fault_t vmf_can_call_fault(const struct vm_fault *vmf) ...@@ -3290,7 +3290,7 @@ static inline vm_fault_t vmf_can_call_fault(const struct vm_fault *vmf)
* Return: 0 if fault handling can proceed. Any other value should be * Return: 0 if fault handling can proceed. Any other value should be
* returned to the caller. * returned to the caller.
*/ */
vm_fault_t vmf_anon_prepare(struct vm_fault *vmf) vm_fault_t __vmf_anon_prepare(struct vm_fault *vmf)
{ {
struct vm_area_struct *vma = vmf->vma; struct vm_area_struct *vma = vmf->vma;
vm_fault_t ret = 0; vm_fault_t ret = 0;
...@@ -3298,10 +3298,8 @@ vm_fault_t vmf_anon_prepare(struct vm_fault *vmf) ...@@ -3298,10 +3298,8 @@ vm_fault_t vmf_anon_prepare(struct vm_fault *vmf)
if (likely(vma->anon_vma)) if (likely(vma->anon_vma))
return 0; return 0;
if (vmf->flags & FAULT_FLAG_VMA_LOCK) { if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
if (!mmap_read_trylock(vma->vm_mm)) { if (!mmap_read_trylock(vma->vm_mm))
vma_end_read(vma);
return VM_FAULT_RETRY; return VM_FAULT_RETRY;
}
} }
if (__anon_vma_prepare(vma)) if (__anon_vma_prepare(vma))
ret = VM_FAULT_OOM; ret = VM_FAULT_OOM;
......
...@@ -4300,7 +4300,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c ...@@ -4300,7 +4300,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c
} }
/* ineligible */ /* ineligible */
if (zone > sc->reclaim_idx) { if (!folio_test_lru(folio) || zone > sc->reclaim_idx) {
gen = folio_inc_gen(lruvec, folio, false); gen = folio_inc_gen(lruvec, folio, false);
list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]); list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
return true; return true;
......
...@@ -54,6 +54,7 @@ ...@@ -54,6 +54,7 @@
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/preempt.h> #include <linux/preempt.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/sprintf.h>
#include <linux/shrinker.h> #include <linux/shrinker.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
...@@ -293,17 +294,27 @@ static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {} ...@@ -293,17 +294,27 @@ static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {}
static int create_cache(struct zs_pool *pool) static int create_cache(struct zs_pool *pool)
{ {
pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE, char *name;
0, 0, NULL);
name = kasprintf(GFP_KERNEL, "zs_handle-%s", pool->name);
if (!name)
return -ENOMEM;
pool->handle_cachep = kmem_cache_create(name, ZS_HANDLE_SIZE,
0, 0, NULL);
kfree(name);
if (!pool->handle_cachep) if (!pool->handle_cachep)
return 1; return -EINVAL;
pool->zspage_cachep = kmem_cache_create("zspage", sizeof(struct zspage), name = kasprintf(GFP_KERNEL, "zspage-%s", pool->name);
0, 0, NULL); if (!name)
return -ENOMEM;
pool->zspage_cachep = kmem_cache_create(name, sizeof(struct zspage),
0, 0, NULL);
kfree(name);
if (!pool->zspage_cachep) { if (!pool->zspage_cachep) {
kmem_cache_destroy(pool->handle_cachep); kmem_cache_destroy(pool->handle_cachep);
pool->handle_cachep = NULL; pool->handle_cachep = NULL;
return 1; return -EINVAL;
} }
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment