Commit 190bf7b1 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'mm-hotfixes-stable-2023-08-11-13-44' of...

Merge tag 'mm-hotfixes-stable-2023-08-11-13-44' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull misc fixes from Andrew Morton:
 "14 hotfixes. 11 of these are cc:stable and the remainder address
  post-6.4 issues, or are not considered suitable for -stable
  backporting"

* tag 'mm-hotfixes-stable-2023-08-11-13-44' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm:
  mm/damon/core: initialize damo_filter->list from damos_new_filter()
  nilfs2: fix use-after-free of nilfs_root in dirtying inodes via iput
  selftests: cgroup: fix test_kmem_basic false positives
  fs/proc/kcore: reinstate bounce buffer for KCORE_TEXT regions
  MAINTAINERS: add maple tree mailing list
  mm: compaction: fix endless looping over same migrate block
  selftests: mm: ksm: fix incorrect evaluation of parameter
  hugetlb: do not clear hugetlb dtor until allocating vmemmap
  mm: memory-failure: avoid false hwpoison page mapped error info
  mm: memory-failure: fix potential unexpected return value from unpoison_memory()
  mm/swapfile: fix wrong swap entry type for hwpoisoned swapcache page
  radix tree test suite: fix incorrect allocation size for pthreads
  crypto, cifs: fix error handling in extract_iter_to_sg()
  zsmalloc: fix races between modifications of fullness and isolated
parents 29d99aae 5f1fc67f
......@@ -12480,6 +12480,7 @@ F: net/mctp/
MAPLE TREE
M: Liam R. Howlett <Liam.Howlett@oracle.com>
L: maple-tree@lists.infradead.org
L: linux-mm@kvack.org
S: Supported
F: Documentation/core-api/maple_tree.rst
......
......@@ -1101,9 +1101,17 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty)
int __nilfs_mark_inode_dirty(struct inode *inode, int flags)
{
struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
struct buffer_head *ibh;
int err;
/*
* Do not dirty inodes after the log writer has been detached
* and its nilfs_root struct has been freed.
*/
if (unlikely(nilfs_purging(nilfs)))
return 0;
err = nilfs_load_inode_block(inode, &ibh);
if (unlikely(err)) {
nilfs_warn(inode->i_sb,
......
......@@ -2845,6 +2845,7 @@ void nilfs_detach_log_writer(struct super_block *sb)
nilfs_segctor_destroy(nilfs->ns_writer);
nilfs->ns_writer = NULL;
}
set_nilfs_purging(nilfs);
/* Force to free the list of dirty files */
spin_lock(&nilfs->ns_inode_lock);
......@@ -2857,4 +2858,5 @@ void nilfs_detach_log_writer(struct super_block *sb)
up_write(&nilfs->ns_segctor_sem);
nilfs_dispose_list(nilfs, &garbage_list, 1);
clear_nilfs_purging(nilfs);
}
......@@ -29,6 +29,7 @@ enum {
THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */
THE_NILFS_GC_RUNNING, /* gc process is running */
THE_NILFS_SB_DIRTY, /* super block is dirty */
THE_NILFS_PURGING, /* disposing dirty files for cleanup */
};
/**
......@@ -208,6 +209,7 @@ THE_NILFS_FNS(INIT, init)
THE_NILFS_FNS(DISCONTINUED, discontinued)
THE_NILFS_FNS(GC_RUNNING, gc_running)
THE_NILFS_FNS(SB_DIRTY, sb_dirty)
THE_NILFS_FNS(PURGING, purging)
/*
* Mount option operations
......
......@@ -309,6 +309,8 @@ static void append_kcore_note(char *notes, size_t *i, const char *name,
static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct file *file = iocb->ki_filp;
char *buf = file->private_data;
loff_t *fpos = &iocb->ki_pos;
size_t phdrs_offset, notes_offset, data_offset;
size_t page_offline_frozen = 1;
......@@ -555,10 +557,21 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
case KCORE_VMEMMAP:
case KCORE_TEXT:
/*
* We use _copy_to_iter() to bypass usermode hardening
* which would otherwise prevent this operation.
* Sadly we must use a bounce buffer here to be able to
* make use of copy_from_kernel_nofault(), as these
* memory regions might not always be mapped on all
* architectures.
*/
if (_copy_to_iter((char *)start, tsz, iter) != tsz) {
if (copy_from_kernel_nofault(buf, (void *)start, tsz)) {
if (iov_iter_zero(tsz, iter) != tsz) {
ret = -EFAULT;
goto out;
}
/*
* We know the bounce buffer is safe to copy from, so
* use _copy_to_iter() directly.
*/
} else if (_copy_to_iter(buf, tsz, iter) != tsz) {
ret = -EFAULT;
goto out;
}
......@@ -595,6 +608,10 @@ static int open_kcore(struct inode *inode, struct file *filp)
if (ret)
return ret;
filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!filp->private_data)
return -ENOMEM;
if (kcore_need_update)
kcore_update_ram();
if (i_size_read(inode) != proc_root_kcore->size) {
......@@ -605,9 +622,16 @@ static int open_kcore(struct inode *inode, struct file *filp)
return 0;
}
static int release_kcore(struct inode *inode, struct file *file)
{
kfree(file->private_data);
return 0;
}
static const struct proc_ops kcore_proc_ops = {
.proc_read_iter = read_kcore_iter,
.proc_open = open_kcore,
.proc_release = release_kcore,
.proc_lseek = default_llseek,
};
......
......@@ -1148,7 +1148,7 @@ static ssize_t extract_user_to_sg(struct iov_iter *iter,
failed:
while (sgtable->nents > sgtable->orig_nents)
put_page(sg_page(&sgtable->sgl[--sgtable->nents]));
unpin_user_page(sg_page(&sgtable->sgl[--sgtable->nents]));
return res;
}
......
......@@ -912,11 +912,12 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
/*
* Check if the pageblock has already been marked skipped.
* Only the aligned PFN is checked as the caller isolates
* Only the first PFN is checked as the caller isolates
* COMPACT_CLUSTER_MAX at a time so the second call must
* not falsely conclude that the block should be skipped.
*/
if (!valid_page && pageblock_aligned(low_pfn)) {
if (!valid_page && (pageblock_aligned(low_pfn) ||
low_pfn == cc->zone->zone_start_pfn)) {
if (!isolation_suitable(cc, page)) {
low_pfn = end_pfn;
folio = NULL;
......@@ -2002,7 +2003,8 @@ static isolate_migrate_t isolate_migratepages(struct compact_control *cc)
* before making it "skip" so other compaction instances do
* not scan the same block.
*/
if (pageblock_aligned(low_pfn) &&
if ((pageblock_aligned(low_pfn) ||
low_pfn == cc->zone->zone_start_pfn) &&
!fast_find_block && !isolation_suitable(cc, page))
continue;
......
......@@ -273,6 +273,7 @@ struct damos_filter *damos_new_filter(enum damos_filter_type type,
return NULL;
filter->type = type;
filter->matching = matching;
INIT_LIST_HEAD(&filter->list);
return filter;
}
......
......@@ -1579,9 +1579,37 @@ static inline void destroy_compound_gigantic_folio(struct folio *folio,
unsigned int order) { }
#endif
static inline void __clear_hugetlb_destructor(struct hstate *h,
struct folio *folio)
{
lockdep_assert_held(&hugetlb_lock);
/*
* Very subtle
*
* For non-gigantic pages set the destructor to the normal compound
* page dtor. This is needed in case someone takes an additional
* temporary ref to the page, and freeing is delayed until they drop
* their reference.
*
* For gigantic pages set the destructor to the null dtor. This
* destructor will never be called. Before freeing the gigantic
* page destroy_compound_gigantic_folio will turn the folio into a
* simple group of pages. After this the destructor does not
* apply.
*
*/
if (hstate_is_gigantic(h))
folio_set_compound_dtor(folio, NULL_COMPOUND_DTOR);
else
folio_set_compound_dtor(folio, COMPOUND_PAGE_DTOR);
}
/*
* Remove hugetlb folio from lists, and update dtor so that the folio appears
* as just a compound page.
* Remove hugetlb folio from lists.
* If vmemmap exists for the folio, update dtor so that the folio appears
* as just a compound page. Otherwise, wait until after allocating vmemmap
* to update dtor.
*
* A reference is held on the folio, except in the case of demote.
*
......@@ -1612,31 +1640,19 @@ static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio,
}
/*
* Very subtle
*
* For non-gigantic pages set the destructor to the normal compound
* page dtor. This is needed in case someone takes an additional
* temporary ref to the page, and freeing is delayed until they drop
* their reference.
*
* For gigantic pages set the destructor to the null dtor. This
* destructor will never be called. Before freeing the gigantic
* page destroy_compound_gigantic_folio will turn the folio into a
* simple group of pages. After this the destructor does not
* apply.
*
* This handles the case where more than one ref is held when and
* after update_and_free_hugetlb_folio is called.
*
* We can only clear the hugetlb destructor after allocating vmemmap
* pages. Otherwise, someone (memory error handling) may try to write
* to tail struct pages.
*/
if (!folio_test_hugetlb_vmemmap_optimized(folio))
__clear_hugetlb_destructor(h, folio);
/*
* In the case of demote we do not ref count the page as it will soon
* be turned into a page of smaller size.
*/
if (!demote)
folio_ref_unfreeze(folio, 1);
if (hstate_is_gigantic(h))
folio_set_compound_dtor(folio, NULL_COMPOUND_DTOR);
else
folio_set_compound_dtor(folio, COMPOUND_PAGE_DTOR);
h->nr_huge_pages--;
h->nr_huge_pages_node[nid]--;
......@@ -1705,6 +1721,7 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
{
int i;
struct page *subpage;
bool clear_dtor = folio_test_hugetlb_vmemmap_optimized(folio);
if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
return;
......@@ -1735,6 +1752,16 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
if (unlikely(folio_test_hwpoison(folio)))
folio_clear_hugetlb_hwpoison(folio);
/*
* If vmemmap pages were allocated above, then we need to clear the
* hugetlb destructor under the hugetlb lock.
*/
if (clear_dtor) {
spin_lock_irq(&hugetlb_lock);
__clear_hugetlb_destructor(h, folio);
spin_unlock_irq(&hugetlb_lock);
}
for (i = 0; i < pages_per_huge_page(h); i++) {
subpage = folio_page(folio, i);
subpage->flags &= ~(1 << PG_locked | 1 << PG_error |
......
......@@ -2784,6 +2784,8 @@ struct page *ksm_might_need_to_copy(struct page *page,
anon_vma->root == vma->anon_vma->root) {
return page; /* still no need to copy it */
}
if (PageHWPoison(page))
return ERR_PTR(-EHWPOISON);
if (!PageUptodate(page))
return page; /* let do_swap_page report the error */
......
......@@ -2466,7 +2466,7 @@ int unpoison_memory(unsigned long pfn)
{
struct folio *folio;
struct page *p;
int ret = -EBUSY;
int ret = -EBUSY, ghp;
unsigned long count = 1;
bool huge = false;
static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
......@@ -2499,6 +2499,13 @@ int unpoison_memory(unsigned long pfn)
goto unlock_mutex;
}
if (folio_test_slab(folio) || PageTable(&folio->page) || folio_test_reserved(folio))
goto unlock_mutex;
/*
* Note that folio->_mapcount is overloaded in SLAB, so the simple test
* in folio_mapped() has to be done after folio_test_slab() is checked.
*/
if (folio_mapped(folio)) {
unpoison_pr_info("Unpoison: Someone maps the hwpoison page %#lx\n",
pfn, &unpoison_rs);
......@@ -2511,32 +2518,28 @@ int unpoison_memory(unsigned long pfn)
goto unlock_mutex;
}
if (folio_test_slab(folio) || PageTable(&folio->page) || folio_test_reserved(folio))
goto unlock_mutex;
ret = get_hwpoison_page(p, MF_UNPOISON);
if (!ret) {
ghp = get_hwpoison_page(p, MF_UNPOISON);
if (!ghp) {
if (PageHuge(p)) {
huge = true;
count = folio_free_raw_hwp(folio, false);
if (count == 0) {
ret = -EBUSY;
if (count == 0)
goto unlock_mutex;
}
}
ret = folio_test_clear_hwpoison(folio) ? 0 : -EBUSY;
} else if (ret < 0) {
if (ret == -EHWPOISON) {
} else if (ghp < 0) {
if (ghp == -EHWPOISON) {
ret = put_page_back_buddy(p) ? 0 : -EBUSY;
} else
} else {
ret = ghp;
unpoison_pr_info("Unpoison: failed to grab page %#lx\n",
pfn, &unpoison_rs);
}
} else {
if (PageHuge(p)) {
huge = true;
count = folio_free_raw_hwp(folio, false);
if (count == 0) {
ret = -EBUSY;
folio_put(folio);
goto unlock_mutex;
}
......
......@@ -1746,7 +1746,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
struct page *swapcache;
spinlock_t *ptl;
pte_t *pte, new_pte, old_pte;
bool hwposioned = false;
bool hwpoisoned = PageHWPoison(page);
int ret = 1;
swapcache = page;
......@@ -1754,7 +1754,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
if (unlikely(!page))
return -ENOMEM;
else if (unlikely(PTR_ERR(page) == -EHWPOISON))
hwposioned = true;
hwpoisoned = true;
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
if (unlikely(!pte || !pte_same_as_swp(ptep_get(pte),
......@@ -1765,11 +1765,11 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
old_pte = ptep_get(pte);
if (unlikely(hwposioned || !PageUptodate(page))) {
if (unlikely(hwpoisoned || !PageUptodate(page))) {
swp_entry_t swp_entry;
dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
if (hwposioned) {
if (hwpoisoned) {
swp_entry = make_hwpoison_entry(swapcache);
page = swapcache;
} else {
......
......@@ -1798,6 +1798,7 @@ static void replace_sub_page(struct size_class *class, struct zspage *zspage,
static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
{
struct zs_pool *pool;
struct zspage *zspage;
/*
......@@ -1807,9 +1808,10 @@ static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
VM_BUG_ON_PAGE(PageIsolated(page), page);
zspage = get_zspage(page);
migrate_write_lock(zspage);
pool = zspage->pool;
spin_lock(&pool->lock);
inc_zspage_isolation(zspage);
migrate_write_unlock(zspage);
spin_unlock(&pool->lock);
return true;
}
......@@ -1875,12 +1877,12 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
kunmap_atomic(s_addr);
replace_sub_page(class, zspage, newpage, page);
dec_zspage_isolation(zspage);
/*
* Since we complete the data copy and set up new zspage structure,
* it's okay to release the pool's lock.
*/
spin_unlock(&pool->lock);
dec_zspage_isolation(zspage);
migrate_write_unlock(zspage);
get_page(newpage);
......@@ -1897,14 +1899,16 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
static void zs_page_putback(struct page *page)
{
struct zs_pool *pool;
struct zspage *zspage;
VM_BUG_ON_PAGE(!PageIsolated(page), page);
zspage = get_zspage(page);
migrate_write_lock(zspage);
pool = zspage->pool;
spin_lock(&pool->lock);
dec_zspage_isolation(zspage);
migrate_write_unlock(zspage);
spin_unlock(&pool->lock);
}
static const struct movable_operations zsmalloc_mops = {
......
......@@ -177,7 +177,7 @@ void regression1_test(void)
nr_threads = 2;
pthread_barrier_init(&worker_barrier, NULL, nr_threads);
threads = malloc(nr_threads * sizeof(pthread_t *));
threads = malloc(nr_threads * sizeof(*threads));
for (i = 0; i < nr_threads; i++) {
arg = i;
......
......@@ -70,6 +70,10 @@ static int test_kmem_basic(const char *root)
goto cleanup;
cg_write(cg, "memory.high", "1M");
/* wait for RCU freeing */
sleep(1);
slab1 = cg_read_key_long(cg, "memory.stat", "slab ");
if (slab1 <= 0)
goto cleanup;
......
......@@ -831,6 +831,7 @@ int main(int argc, char *argv[])
printf("Size must be greater than 0\n");
return KSFT_FAIL;
}
break;
case 't':
{
int tmp = atoi(optarg);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment