Commit 2c04d67e authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
 "11 patches.

  Subsystems affected by this patch series: mm (memcg, memory-failure,
  oom-kill, secretmem, vmalloc, hugetlb, damon, and tools), and ocfs2"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  tools/testing/selftests/vm/split_huge_page_test.c: fix application of sizeof to pointer
  mm/damon/core-test: fix wrong expectations for 'damon_split_regions_of()'
  mm: khugepaged: skip huge page collapse for special files
  mm, thp: bail out early in collapse_file for writeback page
  mm/vmalloc: fix numa spreading for large hash tables
  mm/secretmem: avoid letting secretmem_users drop to zero
  ocfs2: fix race between searching chunks and release journal_head from buffer_head
  mm/oom_kill.c: prevent a race between process_mrelease and exit_mmap
  mm: filemap: check if THP has hwpoisoned subpage for PMD page fault
  mm: hwpoison: remove the unnecessary THP check
  memcg: page_alloc: skip bulk allocator for __GFP_ACCOUNT
parents f25a5481 9c7516d6
......@@ -1251,7 +1251,7 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
{
struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
struct journal_head *jh;
int ret;
int ret = 1;
if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
return 0;
......@@ -1259,14 +1259,18 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
if (!buffer_jbd(bg_bh))
return 1;
jh = bh2jh(bg_bh);
spin_lock(&jh->b_state_lock);
bg = (struct ocfs2_group_desc *) jh->b_committed_data;
if (bg)
ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
else
ret = 1;
spin_unlock(&jh->b_state_lock);
jbd_lock_bh_journal_head(bg_bh);
if (buffer_jbd(bg_bh)) {
jh = bh2jh(bg_bh);
spin_lock(&jh->b_state_lock);
bg = (struct ocfs2_group_desc *) jh->b_committed_data;
if (bg)
ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
else
ret = 1;
spin_unlock(&jh->b_state_lock);
}
jbd_unlock_bh_journal_head(bg_bh);
return ret;
}
......
......@@ -171,6 +171,15 @@ enum pageflags {
/* Compound pages. Stored in first tail page's flags */
PG_double_map = PG_workingset,
#ifdef CONFIG_MEMORY_FAILURE
/*
* Compound pages. Stored in first tail page's flags.
* Indicates that at least one subpage is hwpoisoned in the
* THP.
*/
PG_has_hwpoisoned = PG_mappedtodisk,
#endif
/* non-lru isolated movable page */
PG_isolated = PG_reclaim,
......@@ -668,6 +677,20 @@ PAGEFLAG_FALSE(DoubleMap)
TESTSCFLAG_FALSE(DoubleMap)
#endif
#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
/*
* PageHasHWPoisoned indicates that at least one subpage is hwpoisoned in the
* compound page.
*
* This flag is set by hwpoison handler. Cleared by THP split or free page.
*/
PAGEFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND)
TESTSCFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND)
#else
PAGEFLAG_FALSE(HasHWPoisoned)
TESTSCFLAG_FALSE(HasHWPoisoned)
#endif
/*
* Check if a page is currently marked HWPoisoned. Note that this check is
* best effort only and inherently racy: there is no way to synchronize with
......
......@@ -219,14 +219,14 @@ static void damon_test_split_regions_of(struct kunit *test)
r = damon_new_region(0, 22);
damon_add_region(r, t);
damon_split_regions_of(c, t, 2);
KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 2u);
KUNIT_EXPECT_LE(test, damon_nr_regions(t), 2u);
damon_free_target(t);
t = damon_new_target(42);
r = damon_new_region(0, 220);
damon_add_region(r, t);
damon_split_regions_of(c, t, 4);
KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 4u);
KUNIT_EXPECT_LE(test, damon_nr_regions(t), 4u);
damon_free_target(t);
damon_destroy_ctx(c);
}
......
......@@ -2426,6 +2426,8 @@ static void __split_huge_page(struct page *page, struct list_head *list,
/* lock lru list/PageCompound, ref frozen by page_ref_freeze */
lruvec = lock_page_lruvec(head);
ClearPageHasHWPoisoned(head);
for (i = nr - 1; i >= 1; i--) {
__split_huge_page_tail(head, i, lruvec, list);
/* Some pages can be beyond EOF: drop them from page cache */
......
......@@ -445,22 +445,25 @@ static bool hugepage_vma_check(struct vm_area_struct *vma,
if (!transhuge_vma_enabled(vma, vm_flags))
return false;
if (vma->vm_file && !IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) -
vma->vm_pgoff, HPAGE_PMD_NR))
return false;
/* Enabled via shmem mount options or sysfs settings. */
if (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) {
return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
HPAGE_PMD_NR);
}
if (shmem_file(vma->vm_file))
return shmem_huge_enabled(vma);
/* THP settings require madvise. */
if (!(vm_flags & VM_HUGEPAGE) && !khugepaged_always())
return false;
/* Read-only file mappings need to be aligned for THP to work. */
/* Only regular file is valid */
if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && vma->vm_file &&
!inode_is_open_for_write(vma->vm_file->f_inode) &&
(vm_flags & VM_EXEC)) {
return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
HPAGE_PMD_NR);
struct inode *inode = vma->vm_file->f_inode;
return !inode_is_open_for_write(inode) &&
S_ISREG(inode->i_mode);
}
if (!vma->anon_vma || vma->vm_ops)
......@@ -1763,6 +1766,10 @@ static void collapse_file(struct mm_struct *mm,
filemap_flush(mapping);
result = SCAN_FAIL;
goto xa_unlocked;
} else if (PageWriteback(page)) {
xas_unlock_irq(&xas);
result = SCAN_FAIL;
goto xa_unlocked;
} else if (trylock_page(page)) {
get_page(page);
xas_unlock_irq(&xas);
......@@ -1798,7 +1805,8 @@ static void collapse_file(struct mm_struct *mm,
goto out_unlock;
}
if (!is_shmem && PageDirty(page)) {
if (!is_shmem && (PageDirty(page) ||
PageWriteback(page))) {
/*
* khugepaged only works on read-only fd, so this
* page is dirty because it hasn't been flushed
......
......@@ -1147,20 +1147,6 @@ static int __get_hwpoison_page(struct page *page)
if (!HWPoisonHandlable(head))
return -EBUSY;
if (PageTransHuge(head)) {
/*
* Non anonymous thp exists only in allocation/free time. We
* can't handle such a case correctly, so let's give it up.
* This should be better than triggering BUG_ON when kernel
* tries to touch the "partially handled" page.
*/
if (!PageAnon(head)) {
pr_err("Memory failure: %#lx: non anonymous thp\n",
page_to_pfn(page));
return 0;
}
}
if (get_page_unless_zero(head)) {
if (head == compound_head(page))
return 1;
......@@ -1708,6 +1694,20 @@ int memory_failure(unsigned long pfn, int flags)
}
if (PageTransHuge(hpage)) {
/*
* The flag must be set after the refcount is bumped
* otherwise it may race with THP split.
* And the flag can't be set in get_hwpoison_page() since
* it is called by soft offline too and it is just called
* for !MF_COUNT_INCREASE. So here seems to be the best
* place.
*
* Don't need care about the above error handling paths for
* get_hwpoison_page() since they handle either free page
* or unhandlable page. The refcount is bumped iff the
* page is a valid handlable page.
*/
SetPageHasHWPoisoned(hpage);
if (try_to_split_thp_page(p, "Memory Failure") < 0) {
action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED);
res = -EBUSY;
......
......@@ -3906,6 +3906,15 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
if (compound_order(page) != HPAGE_PMD_ORDER)
return ret;
/*
* Just backoff if any subpage of a THP is corrupted otherwise
* the corrupted page may mapped by PMD silently to escape the
* check. This kind of THP just can be PTE mapped. Access to
* the corrupted subpage should trigger SIGBUS as expected.
*/
if (unlikely(PageHasHWPoisoned(page)))
return ret;
/*
* Archs like ppc64 need additional space to store information
* related to pte entry. Use the preallocated table for that.
......
......@@ -1150,7 +1150,7 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
struct task_struct *task;
struct task_struct *p;
unsigned int f_flags;
bool reap = true;
bool reap = false;
struct pid *pid;
long ret = 0;
......@@ -1177,15 +1177,15 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
goto put_task;
}
mm = p->mm;
mmgrab(mm);
/* If the work has been done already, just exit with success */
if (test_bit(MMF_OOM_SKIP, &mm->flags))
reap = false;
else if (!task_will_free_mem(p)) {
reap = false;
ret = -EINVAL;
if (mmget_not_zero(p->mm)) {
mm = p->mm;
if (task_will_free_mem(p))
reap = true;
else {
/* Error only if the work has not been done already */
if (!test_bit(MMF_OOM_SKIP, &mm->flags))
ret = -EINVAL;
}
}
task_unlock(p);
......@@ -1201,7 +1201,8 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
mmap_read_unlock(mm);
drop_mm:
mmdrop(mm);
if (mm)
mmput(mm);
put_task:
put_task_struct(task);
put_pid:
......
......@@ -1312,8 +1312,10 @@ static __always_inline bool free_pages_prepare(struct page *page,
VM_BUG_ON_PAGE(compound && compound_order(page) != order, page);
if (compound)
if (compound) {
ClearPageDoubleMap(page);
ClearPageHasHWPoisoned(page);
}
for (i = 1; i < (1 << order); i++) {
if (compound)
bad += free_tail_pages_check(page, page + i);
......@@ -5223,6 +5225,10 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
if (unlikely(page_array && nr_pages - nr_populated == 0))
goto out;
/* Bulk allocator does not support memcg accounting. */
if (memcg_kmem_enabled() && (gfp & __GFP_ACCOUNT))
goto failed;
/* Use the single page allocator for one page. */
if (nr_pages - nr_populated == 1)
goto failed;
......
......@@ -218,8 +218,8 @@ SYSCALL_DEFINE1(memfd_secret, unsigned int, flags)
file->f_flags |= O_LARGEFILE;
fd_install(fd, file);
atomic_inc(&secretmem_users);
fd_install(fd, file);
return fd;
err_put_fd:
......
......@@ -2816,6 +2816,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
unsigned int order, unsigned int nr_pages, struct page **pages)
{
unsigned int nr_allocated = 0;
struct page *page;
int i;
/*
* For order-0 pages we make use of bulk allocator, if
......@@ -2823,7 +2825,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
* to fails, fallback to a single page allocator that is
* more permissive.
*/
if (!order) {
if (!order && nid != NUMA_NO_NODE) {
while (nr_allocated < nr_pages) {
unsigned int nr, nr_pages_request;
......@@ -2848,7 +2850,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
if (nr != nr_pages_request)
break;
}
} else
} else if (order)
/*
* Compound pages required for remap_vmalloc_page if
* high-order pages.
......@@ -2856,11 +2858,12 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
gfp |= __GFP_COMP;
/* High-order pages or fallback path if "bulk" fails. */
while (nr_allocated < nr_pages) {
struct page *page;
int i;
page = alloc_pages_node(nid, gfp, order);
while (nr_allocated < nr_pages) {
if (nid == NUMA_NO_NODE)
page = alloc_pages(gfp, order);
else
page = alloc_pages_node(nid, gfp, order);
if (unlikely(!page))
break;
......
......@@ -341,7 +341,7 @@ void split_file_backed_thp(void)
}
/* write something to the file, so a file-backed THP can be allocated */
num_written = write(fd, tmpfs_loc, sizeof(tmpfs_loc));
num_written = write(fd, tmpfs_loc, strlen(tmpfs_loc) + 1);
close(fd);
if (num_written < 1) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment