Merge branch 'akpm' (patches from Andrew)

Merge fixes from Andrew Morton: "15 patches. Subsystems affected by this patch series: mailmap, mm/hotfixes, mm/thp, mm/memory-hotplug, misc, kcsan" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: kcsan: kconfig: move to menu 'Generic Kernel Debugging Instruments' fs/fs-writeback.c: adjust dirtytime_interval_handler definition to match prototype stackleak: let stack_erasing_sysctl take a kernel pointer buffer ftrace: let ftrace_enable_sysctl take a kernel pointer buffer mm/memory_hotplug: drain per-cpu pages again during memory offline selftests/vm: fix display of page size in map_hugetlb mm/thp: fix __split_huge_pmd_locked() for migration PMD kprobes: fix kill kprobe which has been marked as gone tmpfs: restore functionality of nr_inodes=0 mlock: fix unevictable_pgs event counts on THP mm: fix check_move_unevictable_pages() on THP mm: migration of hugetlbfs page skip memcg ksm: reinstate memcg charge on copied pages mailmap: add older email addresses for Kees Cook

Merge branch 'akpm' (patches from Andrew)
Merge fixes from Andrew Morton: "15 patches. Subsystems affected by this patch series: mailmap, mm/hotfixes, mm/thp, mm/memory-hotplug, misc, kcsan" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: kcsan: kconfig: move to menu 'Generic Kernel Debugging Instruments' fs/fs-writeback.c: adjust dirtytime_interval_handler definition to match prototype stackleak: let stack_erasing_sysctl take a kernel pointer buffer ftrace: let ftrace_enable_sysctl take a kernel pointer buffer mm/memory_hotplug: drain per-cpu pages again during memory offline selftests/vm: fix display of page size in map_hugetlb mm/thp: fix __split_huge_pmd_locked() for migration PMD kprobes: fix kill kprobe which has been marked as gone tmpfs: restore functionality of nr_inodes=0 mlock: fix unevictable_pgs event counts on THP mm: fix check_move_unevictable_pages() on THP mm: migration of hugetlbfs page skip memcg ksm: reinstate memcg charge on copied pages mailmap: add older email addresses for Kees Cook
325d0eab · Linus Torvalds · c8d1a46f · 2645d432 · 325d0eab · 325d0eab
Commit 325d0eab authored Sep 19, 2020 by Linus Torvalds
18 changed files
--- a/.mailmap
+++ b/.mailmap
@@ -169,6 +169,10 @@ Juha Yrjola <juha.yrjola@solidboot.com>
 Julien Thierry <julien.thierry.kdev@gmail.com> <julien.thierry@arm.com>
 Kamil Konieczny <k.konieczny@samsung.com> <k.konieczny@partner.samsung.com>
 Kay Sievers <kay.sievers@vrfy.org>
+Kees Cook <keescook@chromium.org> <kees.cook@canonical.com>
+Kees Cook <keescook@chromium.org> <keescook@google.com>
+Kees Cook <keescook@chromium.org> <kees@outflux.net>
+Kees Cook <keescook@chromium.org> <kees@ubuntu.com>
 Kenneth W Chen <kenneth.w.chen@intel.com>
 Konstantin Khlebnikov <koct9i@gmail.com> <khlebnikov@yandex-team.ru>
 Konstantin Khlebnikov <koct9i@gmail.com> <k.khlebnikov@samsung.com>

--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -2184,7 +2184,7 @@ static int __init start_dirtytime_writeback(void)
 __initcall(start_dirtytime_writeback);
 int dirtytime_interval_handler(struct ctl_table *table, int write,
-			       void __user *buffer, size_t *lenp, loff_t *ppos)
+			       void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int ret;

--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -85,8 +85,7 @@ static inline int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *val
 extern int ftrace_enabled;
 extern int
 ftrace_enable_sysctl(struct ctl_table *table, int write,
-		     void __user *buffer, size_t *lenp,
+		     void *buffer, size_t *lenp, loff_t *ppos);
-		     loff_t *ppos);
 struct ftrace_ops;

--- a/include/linux/stackleak.h
+++ b/include/linux/stackleak.h
@@ -25,7 +25,7 @@ static inline void stackleak_task_init(struct task_struct *t)
 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
 int stack_erasing_sysctl(struct ctl_table *table, int write,
-			void __user *buffer, size_t *lenp, loff_t *ppos);
+			void *buffer, size_t *lenp, loff_t *ppos);
 #endif
 #else /* !CONFIG_GCC_PLUGIN_STACKLEAK */

--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -2140,6 +2140,9 @@ static void kill_kprobe(struct kprobe *p)
 	lockdep_assert_held(&kprobe_mutex);
+	if (WARN_ON_ONCE(kprobe_gone(p)))
+		return;
 	p->flags |= KPROBE_FLAG_GONE;
 	if (kprobe_aggrprobe(p)) {
 		/*
@@ -2419,7 +2422,10 @@ static int kprobes_module_callback(struct notifier_block *nb,
 	mutex_lock(&kprobe_mutex);
 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
 		head = &kprobe_table[i];
-		hlist_for_each_entry(p, head, hlist)
+		hlist_for_each_entry(p, head, hlist) {
+			if (kprobe_gone(p))
+				continue;
 			if (within_module_init((unsigned long)p->addr, mod) ||
 			    (checkcore &&
 			     within_module_core((unsigned long)p->addr, mod))) {
@@ -2437,6 +2443,7 @@ static int kprobes_module_callback(struct notifier_block *nb,
 				kill_kprobe(p);
 			}
 		}
+	}
 	if (val == MODULE_STATE_GOING)
 		remove_module_kprobe_blacklist(mod);
 	mutex_unlock(&kprobe_mutex);

--- a/kernel/stackleak.c
+++ b/kernel/stackleak.c
@@ -20,7 +20,7 @@
 static DEFINE_STATIC_KEY_FALSE(stack_erasing_bypass);
 int stack_erasing_sysctl(struct ctl_table *table, int write,
-			void __user *buffer, size_t *lenp, loff_t *ppos)
+			void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int ret = 0;
 	int state = !static_branch_unlikely(&stack_erasing_bypass);

--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -7531,8 +7531,7 @@ static bool is_permanent_ops_registered(void)
 int
 ftrace_enable_sysctl(struct ctl_table *table, int write,
-		     void __user *buffer, size_t *lenp,
+		     void *buffer, size_t *lenp, loff_t *ppos)
-		     loff_t *ppos)
 {
 	int ret = -ENODEV;

--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -520,8 +520,8 @@ config DEBUG_FS_ALLOW_NONE
 endchoice
 source "lib/Kconfig.kgdb"
 source "lib/Kconfig.ubsan"
+source "lib/Kconfig.kcsan"
 endmenu
@@ -1620,8 +1620,6 @@ config PROVIDE_OHCI1394_DMA_INIT
 source "samples/Kconfig"
-source "lib/Kconfig.kcsan"
 config ARCH_HAS_DEVMEM_IS_ALLOWED
 	bool

--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2022,7 +2022,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 		put_page(page);
 		add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR);
 		return;
-	} else if (is_huge_zero_pmd(*pmd)) {
+	} else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) {
 		/*
 		 * FIXME: Do we want to invalidate secondary mmu by calling
 		 * mmu_notifier_invalidate_range() see comments below inside
@@ -2116,15 +2116,18 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 		pte = pte_offset_map(&_pmd, addr);
 		BUG_ON(!pte_none(*pte));
 		set_pte_at(mm, addr, pte, entry);
+		if (!pmd_migration)
 			atomic_inc(&page[i]._mapcount);
 		pte_unmap(pte);
 	}
+	if (!pmd_migration) {
 		/*
 		 * Set PG_double_map before dropping compound_mapcount to avoid
 		 * false-negative page_mapped().
 		 */
-	if (compound_mapcount(page) > 1 && !TestSetPageDoubleMap(page)) {
+		if (compound_mapcount(page) > 1 &&
+		    !TestSetPageDoubleMap(page)) {
 			for (i = 0; i < HPAGE_PMD_NR; i++)
 				atomic_inc(&page[i]._mapcount);
 		}
@@ -2140,6 +2143,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 			}
 		}
 		unlock_page_memcg(page);
+	}
 	smp_wmb(); /* make pte visible before pmd */
 	pmd_populate(mm, pmd, pgtable);

--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2586,6 +2586,10 @@ struct page *ksm_might_need_to_copy(struct page *page,
 		return page;		/* let do_swap_page report the error */
 	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
+	if (new_page && mem_cgroup_charge(new_page, vma->vm_mm, GFP_KERNEL)) {
+		put_page(new_page);
+		new_page = NULL;
+	}
 	if (new_page) {
 		copy_user_highpage(new_page, page, address, vma);

--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1575,6 +1575,20 @@ static int __ref __offline_pages(unsigned long start_pfn,
 		/* check again */
 		ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn,
 					    NULL, check_pages_isolated_cb);
+		/*
+		 * per-cpu pages are drained in start_isolate_page_range, but if
+		 * there are still pages that are not free, make sure that we
+		 * drain again, because when we isolated range we might
+		 * have raced with another thread that was adding pages to pcp
+		 * list.
+		 *
+		 * Forward progress should be still guaranteed because
+		 * pages on the pcp list can only belong to MOVABLE_ZONE
+		 * because has_unmovable_pages explicitly checks for
+		 * PageBuddy on freed pages on other zones.
+		 */
+		if (ret)
+			drain_all_pages(zone);
 	} while (ret);
 	/* Ok, all of our target is isolated.

--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -668,6 +668,7 @@ void migrate_page_states(struct page *newpage, struct page *page)
 	copy_page_owner(page, newpage);
+	if (!PageHuge(page))
 		mem_cgroup_migrate(page, newpage);
 }
 EXPORT_SYMBOL(migrate_page_states);

--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -58,11 +58,14 @@ EXPORT_SYMBOL(can_do_mlock);
 */
 void clear_page_mlock(struct page *page)
 {
+	int nr_pages;
 	if (!TestClearPageMlocked(page))
 		return;
-	mod_zone_page_state(page_zone(page), NR_MLOCK, -thp_nr_pages(page));
+	nr_pages = thp_nr_pages(page);
-	count_vm_event(UNEVICTABLE_PGCLEARED);
+	mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
+	count_vm_events(UNEVICTABLE_PGCLEARED, nr_pages);
 	/*
 	 * The previous TestClearPageMlocked() corresponds to the smp_mb()
 	 * in __pagevec_lru_add_fn().
@@ -76,7 +79,7 @@ void clear_page_mlock(struct page *page)
 		 * We lost the race. the page already moved to evictable list.
 		 */
 		if (PageUnevictable(page))
-			count_vm_event(UNEVICTABLE_PGSTRANDED);
+			count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages);
 	}
 }
@@ -93,9 +96,10 @@ void mlock_vma_page(struct page *page)
 	VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page);
 	if (!TestSetPageMlocked(page)) {
-		mod_zone_page_state(page_zone(page), NR_MLOCK,
+		int nr_pages = thp_nr_pages(page);
-				    thp_nr_pages(page));
-		count_vm_event(UNEVICTABLE_PGMLOCKED);
+		mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages);
+		count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
 		if (!isolate_lru_page(page))
 			putback_lru_page(page);
 	}
@@ -138,7 +142,7 @@ static void __munlock_isolated_page(struct page *page)
 	/* Did try_to_unlock() succeed or punt? */
 	if (!PageMlocked(page))
-		count_vm_event(UNEVICTABLE_PGMUNLOCKED);
+		count_vm_events(UNEVICTABLE_PGMUNLOCKED, thp_nr_pages(page));
 	putback_lru_page(page);
 }
@@ -154,10 +158,12 @@ static void __munlock_isolated_page(struct page *page)
 */
 static void __munlock_isolation_failed(struct page *page)
 {
+	int nr_pages = thp_nr_pages(page);
 	if (PageUnevictable(page))
-		__count_vm_event(UNEVICTABLE_PGSTRANDED);
+		__count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages);
 	else
-		__count_vm_event(UNEVICTABLE_PGMUNLOCKED);
+		__count_vm_events(UNEVICTABLE_PGMUNLOCKED, nr_pages);
 }
 /**

--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -170,6 +170,14 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
 * pageblocks we may have modified and return -EBUSY to caller. This
 * prevents two threads from simultaneously working on overlapping ranges.
 *
+ * Please note that there is no strong synchronization with the page allocator
+ * either. Pages might be freed while their page blocks are marked ISOLATED.
+ * In some cases pages might still end up on pcp lists and that would allow
+ * for their allocation even when they are in fact isolated already. Depending
+ * on how strong of a guarantee the caller needs drain_all_pages might be needed
+ * (e.g. __offline_pages will need to call it after check for isolated range for
+ * a next retry).
+ *
 * Return: the number of isolated pageblocks on success and -EBUSY if any part
 * of range cannot be isolated.
 */

--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -279,11 +279,13 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
 	if (!(sb->s_flags & SB_KERNMOUNT)) {
 		spin_lock(&sbinfo->stat_lock);
+		if (sbinfo->max_inodes) {
 			if (!sbinfo->free_inodes) {
 				spin_unlock(&sbinfo->stat_lock);
 				return -ENOSPC;
 			}
 			sbinfo->free_inodes--;
+		}
 		if (inop) {
 			ino = sbinfo->next_ino++;
 			if (unlikely(is_zero_ino(ino)))

--- a/mm/swap.c
+++ b/mm/swap.c
@@ -494,14 +494,14 @@ void lru_cache_add_inactive_or_unevictable(struct page *page,
 	unevictable = (vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) == VM_LOCKED;
 	if (unlikely(unevictable) && !TestSetPageMlocked(page)) {
+		int nr_pages = thp_nr_pages(page);
 		/*
 		 * We use the irq-unsafe __mod_zone_page_stat because this
 		 * counter is not modified from interrupt context, and the pte
 		 * lock is held(spinlock), which implies preemption disabled.
 		 */
-		__mod_zone_page_state(page_zone(page), NR_MLOCK,
+		__mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages);
-				    thp_nr_pages(page));
+		count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
-		count_vm_event(UNEVICTABLE_PGMLOCKED);
 	}
 	lru_cache_add(page);
 }

--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4268,8 +4268,14 @@ void check_move_unevictable_pages(struct pagevec *pvec)
 	for (i = 0; i < pvec->nr; i++) {
 		struct page *page = pvec->pages[i];
 		struct pglist_data *pagepgdat = page_pgdat(page);
+		int nr_pages;
+		if (PageTransTail(page))
+			continue;
+		nr_pages = thp_nr_pages(page);
+		pgscanned += nr_pages;
-		pgscanned++;
 		if (pagepgdat != pgdat) {
 			if (pgdat)
 				spin_unlock_irq(&pgdat->lru_lock);
@@ -4288,7 +4294,7 @@ void check_move_unevictable_pages(struct pagevec *pvec)
 			ClearPageUnevictable(page);
 			del_page_from_lru_list(page, lruvec, LRU_UNEVICTABLE);
 			add_page_to_lru_list(page, lruvec, lru);
-			pgrescued++;
+			pgrescued += nr_pages;
 		}
 	}

--- a/tools/testing/selftests/vm/map_hugetlb.c
+++ b/tools/testing/selftests/vm/map_hugetlb.c
@@ -83,7 +83,7 @@ int main(int argc, char **argv)
 	}
 	if (shift)
-		printf("%u kB hugepages\n", 1 << shift);
+		printf("%u kB hugepages\n", 1 << (shift - 10));
 	else
 		printf("Default size hugepages\n");
 	printf("Mapping %lu Mbytes\n", (unsigned long)length >> 20);