Commit 5bc52f64 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge more fixes from Andrew Morton:
 "The usual shower of hotfixes and some followups to the recently merged
  page_owner enhancements"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  mm/memory-failure: poison read receives SIGKILL instead of SIGBUS if mmaped more than once
  mm/slab.c: fix kernel-doc warning for __ksize()
  xarray.h: fix kernel-doc warning
  bitmap.h: fix kernel-doc warning and typo
  fs/fs-writeback.c: fix kernel-doc warning
  fs/libfs.c: fix kernel-doc warning
  fs/direct-io.c: fix kernel-doc warning
  mm, compaction: fix wrong pfn handling in __reset_isolation_pfn()
  mm, hugetlb: allow hugepage allocations to reclaim as needed
  lib/test_meminit: add a kmem_cache_alloc_bulk() test
  mm/slub.c: init_on_free=1 should wipe freelist ptr for bulk allocations
  lib/generic-radix-tree.c: add kmemleak annotations
  mm/slub: fix a deadlock in show_slab_objects()
  mm, page_owner: rename flag indicating that page is allocated
  mm, page_owner: decouple freeing stack trace from debug_pagealloc
  mm, page_owner: fix off-by-one error in __set_page_owner_handle()
parents 2abd839a 3d7fed4a
......@@ -41,6 +41,9 @@ smaller binary while the latter is 1.1 - 2 times faster.
Both KASAN modes work with both SLUB and SLAB memory allocators.
For better bug detection and nicer reporting, enable CONFIG_STACKTRACE.
To augment reports with last allocation and freeing stack of the physical page,
it is recommended to enable also CONFIG_PAGE_OWNER and boot with page_owner=on.
To disable instrumentation for specific files or directories, add a line
similar to the following to the respective kernel Makefile:
......
......@@ -241,9 +241,8 @@ void dio_warn_stale_pagecache(struct file *filp)
}
}
/**
/*
* dio_complete() - called when all DIO BIO I/O has been completed
* @offset: the byte offset in the file of the completed operation
*
* This drops i_dio_count, lets interested parties know that a DIO operation
* has completed, and calculates the resulting return code for the operation.
......
......@@ -905,7 +905,7 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
* cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs
* @bdi_id: target bdi id
* @memcg_id: target memcg css id
* @nr_pages: number of pages to write, 0 for best-effort dirty flushing
* @nr: number of pages to write, 0 for best-effort dirty flushing
* @reason: reason why some writeback work initiated
* @done: target wb_completion
*
......
......@@ -473,8 +473,7 @@ EXPORT_SYMBOL(simple_write_begin);
/**
* simple_write_end - .write_end helper for non-block-device FSes
* @available: See .write_end of address_space_operations
* @file: "
* @file: See .write_end of address_space_operations
* @mapping: "
* @pos: "
* @len: "
......
......@@ -326,10 +326,11 @@ static inline int bitmap_equal(const unsigned long *src1,
}
/**
* bitmap_or_equal - Check whether the or of two bitnaps is equal to a third
* bitmap_or_equal - Check whether the or of two bitmaps is equal to a third
* @src1: Pointer to bitmap 1
* @src2: Pointer to bitmap 2 will be or'ed with bitmap 1
* @src3: Pointer to bitmap 3. Compare to the result of *@src1 | *@src2
* @nbits: number of bits in each of these bitmaps
*
* Returns: True if (*@src1 | *@src2) == *@src3, false otherwise
*/
......
......@@ -18,7 +18,7 @@ struct page_ext_operations {
enum page_ext_flags {
PAGE_EXT_OWNER,
PAGE_EXT_OWNER_ACTIVE,
PAGE_EXT_OWNER_ALLOCATED,
#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT)
PAGE_EXT_YOUNG,
PAGE_EXT_IDLE,
......@@ -36,6 +36,7 @@ struct page_ext {
unsigned long flags;
};
extern unsigned long page_ext_size;
extern void pgdat_page_ext_init(struct pglist_data *pgdat);
#ifdef CONFIG_SPARSEMEM
......@@ -52,6 +53,13 @@ static inline void page_ext_init(void)
struct page_ext *lookup_page_ext(const struct page *page);
static inline struct page_ext *page_ext_next(struct page_ext *curr)
{
void *next = curr;
next += page_ext_size;
return next;
}
#else /* !CONFIG_PAGE_EXTENSION */
struct page_ext;
......
......@@ -2,6 +2,7 @@
#include <linux/export.h>
#include <linux/generic-radix-tree.h>
#include <linux/gfp.h>
#include <linux/kmemleak.h>
#define GENRADIX_ARY (PAGE_SIZE / sizeof(struct genradix_node *))
#define GENRADIX_ARY_SHIFT ilog2(GENRADIX_ARY)
......@@ -75,6 +76,27 @@ void *__genradix_ptr(struct __genradix *radix, size_t offset)
}
EXPORT_SYMBOL(__genradix_ptr);
static inline struct genradix_node *genradix_alloc_node(gfp_t gfp_mask)
{
struct genradix_node *node;
node = (struct genradix_node *)__get_free_page(gfp_mask|__GFP_ZERO);
/*
* We're using pages (not slab allocations) directly for kernel data
* structures, so we need to explicitly inform kmemleak of them in order
* to avoid false positive memory leak reports.
*/
kmemleak_alloc(node, PAGE_SIZE, 1, gfp_mask);
return node;
}
static inline void genradix_free_node(struct genradix_node *node)
{
kmemleak_free(node);
free_page((unsigned long)node);
}
/*
* Returns pointer to the specified byte @offset within @radix, allocating it if
* necessary - newly allocated slots are always zeroed out:
......@@ -97,8 +119,7 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
break;
if (!new_node) {
new_node = (void *)
__get_free_page(gfp_mask|__GFP_ZERO);
new_node = genradix_alloc_node(gfp_mask);
if (!new_node)
return NULL;
}
......@@ -121,8 +142,7 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
n = READ_ONCE(*p);
if (!n) {
if (!new_node) {
new_node = (void *)
__get_free_page(gfp_mask|__GFP_ZERO);
new_node = genradix_alloc_node(gfp_mask);
if (!new_node)
return NULL;
}
......@@ -133,7 +153,7 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
}
if (new_node)
free_page((unsigned long) new_node);
genradix_free_node(new_node);
return &n->data[offset];
}
......@@ -191,7 +211,7 @@ static void genradix_free_recurse(struct genradix_node *n, unsigned level)
genradix_free_recurse(n->children[i], level - 1);
}
free_page((unsigned long) n);
genradix_free_node(n);
}
int __genradix_prealloc(struct __genradix *radix, size_t size,
......
......@@ -297,6 +297,32 @@ static int __init do_kmem_cache_rcu_persistent(int size, int *total_failures)
return 1;
}
static int __init do_kmem_cache_size_bulk(int size, int *total_failures)
{
struct kmem_cache *c;
int i, iter, maxiter = 1024;
int num, bytes;
bool fail = false;
void *objects[10];
c = kmem_cache_create("test_cache", size, size, 0, NULL);
for (iter = 0; (iter < maxiter) && !fail; iter++) {
num = kmem_cache_alloc_bulk(c, GFP_KERNEL, ARRAY_SIZE(objects),
objects);
for (i = 0; i < num; i++) {
bytes = count_nonzero_bytes(objects[i], size);
if (bytes)
fail = true;
fill_with_garbage(objects[i], size);
}
if (num)
kmem_cache_free_bulk(c, num, objects);
}
*total_failures += fail;
return 1;
}
/*
* Test kmem_cache allocation by creating caches of different sizes, with and
* without constructors, with and without SLAB_TYPESAFE_BY_RCU.
......@@ -318,6 +344,7 @@ static int __init test_kmemcache(int *total_failures)
num_tests += do_kmem_cache_size(size, ctor, rcu, zero,
&failures);
}
num_tests += do_kmem_cache_size_bulk(size, &failures);
}
REPORT_FAILURES_IN_FN();
*total_failures += failures;
......
......@@ -270,14 +270,15 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
/* Ensure the start of the pageblock or zone is online and valid */
block_pfn = pageblock_start_pfn(pfn);
block_page = pfn_to_online_page(max(block_pfn, zone->zone_start_pfn));
block_pfn = max(block_pfn, zone->zone_start_pfn);
block_page = pfn_to_online_page(block_pfn);
if (block_page) {
page = block_page;
pfn = block_pfn;
}
/* Ensure the end of the pageblock or zone is online and valid */
block_pfn += pageblock_nr_pages;
block_pfn = pageblock_end_pfn(pfn) - 1;
block_pfn = min(block_pfn, zone_end_pfn(zone) - 1);
end_page = pfn_to_online_page(block_pfn);
if (!end_page)
......@@ -303,7 +304,7 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
page += (1 << PAGE_ALLOC_COSTLY_ORDER);
pfn += (1 << PAGE_ALLOC_COSTLY_ORDER);
} while (page < end_page);
} while (page <= end_page);
return false;
}
......
......@@ -199,7 +199,6 @@ struct to_kill {
struct task_struct *tsk;
unsigned long addr;
short size_shift;
char addr_valid;
};
/*
......@@ -324,22 +323,27 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
}
}
tk->addr = page_address_in_vma(p, vma);
tk->addr_valid = 1;
if (is_zone_device_page(p))
tk->size_shift = dev_pagemap_mapping_shift(p, vma);
else
tk->size_shift = compound_order(compound_head(p)) + PAGE_SHIFT;
/*
* In theory we don't have to kill when the page was
* munmaped. But it could be also a mremap. Since that's
* likely very rare kill anyways just out of paranoia, but use
* a SIGKILL because the error is not contained anymore.
*/
if (tk->addr == -EFAULT || tk->size_shift == 0) {
* Send SIGKILL if "tk->addr == -EFAULT". Also, as
* "tk->size_shift" is always non-zero for !is_zone_device_page(),
* so "tk->size_shift == 0" effectively checks no mapping on
* ZONE_DEVICE. Indeed, when a devdax page is mmapped N times
* to a process' address space, it's possible not all N VMAs
* contain mappings for the page, but at least one VMA does.
* Only deliver SIGBUS with payload derived from the VMA that
* has a mapping for the page.
*/
if (tk->addr == -EFAULT) {
pr_info("Memory failure: Unable to find user space address %lx in %s\n",
page_to_pfn(p), tsk->comm);
tk->addr_valid = 0;
} else if (tk->size_shift == 0) {
kfree(tk);
return;
}
get_task_struct(tsk);
tk->tsk = tsk;
......@@ -366,7 +370,7 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail,
* make sure the process doesn't catch the
* signal and then access the memory. Just kill it.
*/
if (fail || tk->addr_valid == 0) {
if (fail || tk->addr == -EFAULT) {
pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
pfn, tk->tsk->comm, tk->tsk->pid);
do_send_sig_info(SIGKILL, SEND_SIG_PRIV,
......
......@@ -4473,12 +4473,14 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
if (page)
goto got_pg;
if (order >= pageblock_order && (gfp_mask & __GFP_IO)) {
if (order >= pageblock_order && (gfp_mask & __GFP_IO) &&
!(gfp_mask & __GFP_RETRY_MAYFAIL)) {
/*
* If allocating entire pageblock(s) and compaction
* failed because all zones are below low watermarks
* or is prohibited because it recently failed at this
* order, fail immediately.
* order, fail immediately unless the allocator has
* requested compaction and reclaim retry.
*
* Reclaim is
* - potentially very expensive because zones are far
......
......@@ -67,8 +67,9 @@ static struct page_ext_operations *page_ext_ops[] = {
#endif
};
unsigned long page_ext_size = sizeof(struct page_ext);
static unsigned long total_usage;
static unsigned long extra_mem;
static bool __init invoke_need_callbacks(void)
{
......@@ -78,9 +79,8 @@ static bool __init invoke_need_callbacks(void)
for (i = 0; i < entries; i++) {
if (page_ext_ops[i]->need && page_ext_ops[i]->need()) {
page_ext_ops[i]->offset = sizeof(struct page_ext) +
extra_mem;
extra_mem += page_ext_ops[i]->size;
page_ext_ops[i]->offset = page_ext_size;
page_ext_size += page_ext_ops[i]->size;
need = true;
}
}
......@@ -99,14 +99,9 @@ static void __init invoke_init_callbacks(void)
}
}
static unsigned long get_entry_size(void)
{
return sizeof(struct page_ext) + extra_mem;
}
static inline struct page_ext *get_entry(void *base, unsigned long index)
{
return base + get_entry_size() * index;
return base + page_ext_size * index;
}
#if !defined(CONFIG_SPARSEMEM)
......@@ -156,7 +151,7 @@ static int __init alloc_node_page_ext(int nid)
!IS_ALIGNED(node_end_pfn(nid), MAX_ORDER_NR_PAGES))
nr_pages += MAX_ORDER_NR_PAGES;
table_size = get_entry_size() * nr_pages;
table_size = page_ext_size * nr_pages;
base = memblock_alloc_try_nid(
table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
......@@ -234,7 +229,7 @@ static int __meminit init_section_page_ext(unsigned long pfn, int nid)
if (section->page_ext)
return 0;
table_size = get_entry_size() * PAGES_PER_SECTION;
table_size = page_ext_size * PAGES_PER_SECTION;
base = alloc_page_ext(table_size, nid);
/*
......@@ -254,7 +249,7 @@ static int __meminit init_section_page_ext(unsigned long pfn, int nid)
* we need to apply a mask.
*/
pfn &= PAGE_SECTION_MASK;
section->page_ext = (void *)base - get_entry_size() * pfn;
section->page_ext = (void *)base - page_ext_size * pfn;
total_usage += table_size;
return 0;
}
......@@ -267,7 +262,7 @@ static void free_page_ext(void *addr)
struct page *page = virt_to_page(addr);
size_t table_size;
table_size = get_entry_size() * PAGES_PER_SECTION;
table_size = page_ext_size * PAGES_PER_SECTION;
BUG_ON(PageReserved(page));
kmemleak_free(addr);
......
......@@ -24,12 +24,10 @@ struct page_owner {
short last_migrate_reason;
gfp_t gfp_mask;
depot_stack_handle_t handle;
#ifdef CONFIG_DEBUG_PAGEALLOC
depot_stack_handle_t free_handle;
#endif
};
static bool page_owner_disabled = true;
static bool page_owner_enabled = false;
DEFINE_STATIC_KEY_FALSE(page_owner_inited);
static depot_stack_handle_t dummy_handle;
......@@ -44,7 +42,7 @@ static int __init early_page_owner_param(char *buf)
return -EINVAL;
if (strcmp(buf, "on") == 0)
page_owner_disabled = false;
page_owner_enabled = true;
return 0;
}
......@@ -52,10 +50,7 @@ early_param("page_owner", early_page_owner_param);
static bool need_page_owner(void)
{
if (page_owner_disabled)
return false;
return true;
return page_owner_enabled;
}
static __always_inline depot_stack_handle_t create_dummy_stack(void)
......@@ -84,7 +79,7 @@ static noinline void register_early_stack(void)
static void init_page_owner(void)
{
if (page_owner_disabled)
if (!page_owner_enabled)
return;
register_dummy_stack();
......@@ -148,25 +143,19 @@ void __reset_page_owner(struct page *page, unsigned int order)
{
int i;
struct page_ext *page_ext;
#ifdef CONFIG_DEBUG_PAGEALLOC
depot_stack_handle_t handle = 0;
struct page_owner *page_owner;
if (debug_pagealloc_enabled())
handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
#endif
for (i = 0; i < (1 << order); i++) {
page_ext = lookup_page_ext(page + i);
page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
continue;
__clear_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags);
#ifdef CONFIG_DEBUG_PAGEALLOC
if (debug_pagealloc_enabled()) {
return;
for (i = 0; i < (1 << order); i++) {
__clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
page_owner = get_page_owner(page_ext);
page_owner->free_handle = handle;
}
#endif
page_ext = page_ext_next(page_ext);
}
}
......@@ -184,9 +173,9 @@ static inline void __set_page_owner_handle(struct page *page,
page_owner->gfp_mask = gfp_mask;
page_owner->last_migrate_reason = -1;
__set_bit(PAGE_EXT_OWNER, &page_ext->flags);
__set_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags);
__set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
page_ext = lookup_page_ext(page + i);
page_ext = page_ext_next(page_ext);
}
}
......@@ -224,12 +213,10 @@ void __split_page_owner(struct page *page, unsigned int order)
if (unlikely(!page_ext))
return;
for (i = 0; i < (1 << order); i++) {
page_owner = get_page_owner(page_ext);
page_owner->order = 0;
for (i = 1; i < (1 << order); i++) {
page_ext = lookup_page_ext(page + i);
page_owner = get_page_owner(page_ext);
page_owner->order = 0;
page_ext = page_ext_next(page_ext);
}
}
......@@ -260,7 +247,7 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage)
* the new page, which will be freed.
*/
__set_bit(PAGE_EXT_OWNER, &new_ext->flags);
__set_bit(PAGE_EXT_OWNER_ACTIVE, &new_ext->flags);
__set_bit(PAGE_EXT_OWNER_ALLOCATED, &new_ext->flags);
}
void pagetypeinfo_showmixedcount_print(struct seq_file *m,
......@@ -320,7 +307,7 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m,
if (unlikely(!page_ext))
continue;
if (!test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags))
if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
continue;
page_owner = get_page_owner(page_ext);
......@@ -435,7 +422,7 @@ void __dump_page_owner(struct page *page)
return;
}
if (test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags))
if (test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
pr_alert("page_owner tracks the page as allocated\n");
else
pr_alert("page_owner tracks the page as freed\n");
......@@ -451,7 +438,6 @@ void __dump_page_owner(struct page *page)
stack_trace_print(entries, nr_entries, 0);
}
#ifdef CONFIG_DEBUG_PAGEALLOC
handle = READ_ONCE(page_owner->free_handle);
if (!handle) {
pr_alert("page_owner free stack trace missing\n");
......@@ -460,7 +446,6 @@ void __dump_page_owner(struct page *page)
pr_alert("page last free stack trace:\n");
stack_trace_print(entries, nr_entries, 0);
}
#endif
if (page_owner->last_migrate_reason != -1)
pr_alert("page has been migrated, last migrate reason: %s\n",
......@@ -527,7 +512,7 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
* Although we do have the info about past allocation of free
* pages, it's not relevant for current memory usage.
*/
if (!test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags))
if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
continue;
page_owner = get_page_owner(page_ext);
......
......@@ -4206,9 +4206,12 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
/**
* __ksize -- Uninstrumented ksize.
* @objp: pointer to the object
*
* Unlike ksize(), __ksize() is uninstrumented, and does not provide the same
* safety checks as ksize() with KASAN instrumentation enabled.
*
* Return: size of the actual memory used by @objp in bytes
*/
size_t __ksize(const void *objp)
{
......
......@@ -2671,6 +2671,17 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
return p;
}
/*
* If the object has been wiped upon free, make sure it's fully initialized by
* zeroing out freelist pointer.
*/
static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
void *obj)
{
if (unlikely(slab_want_init_on_free(s)) && obj)
memset((void *)((char *)obj + s->offset), 0, sizeof(void *));
}
/*
* Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
* have the fastpath folded into their functions. So no function call
......@@ -2759,12 +2770,8 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
prefetch_freepointer(s, next_object);
stat(s, ALLOC_FASTPATH);
}
/*
* If the object has been wiped upon free, make sure it's fully
* initialized by zeroing out freelist pointer.
*/
if (unlikely(slab_want_init_on_free(s)) && object)
memset(object + s->offset, 0, sizeof(void *));
maybe_wipe_obj_freeptr(s, object);
if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object)
memset(object, 0, s->object_size);
......@@ -3178,10 +3185,13 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
goto error;
c = this_cpu_ptr(s->cpu_slab);
maybe_wipe_obj_freeptr(s, p[i]);
continue; /* goto for-loop */
}
c->freelist = get_freepointer(s, object);
p[i] = object;
maybe_wipe_obj_freeptr(s, p[i]);
}
c->tid = next_tid(c->tid);
local_irq_enable();
......@@ -4846,7 +4856,17 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
}
}
get_online_mems();
/*
* It is impossible to take "mem_hotplug_lock" here with "kernfs_mutex"
* already held which will conflict with an existing lock order:
*
* mem_hotplug_lock->slab_mutex->kernfs_mutex
*
* We don't really need mem_hotplug_lock (to hold off
* slab_mem_going_offline_callback) here because slab's memory hot
* unplug code doesn't destroy the kmem_cache->node[] data.
*/
#ifdef CONFIG_SLUB_DEBUG
if (flags & SO_ALL) {
struct kmem_cache_node *n;
......@@ -4887,7 +4907,6 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
x += sprintf(buf + x, " N%d=%lu",
node, nodes[node]);
#endif
put_online_mems();
kfree(nodes);
return x + sprintf(buf + x, "\n");
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment