Commit 1f1183c4 authored by Andrew Morton's avatar Andrew Morton

merge mm-hotfixes-stable into mm-nonmm-stable to pick up stackdepot changes

parents 7d8cebb9 720da1e5
...@@ -14111,6 +14111,17 @@ F: mm/ ...@@ -14111,6 +14111,17 @@ F: mm/
F: tools/mm/ F: tools/mm/
F: tools/testing/selftests/mm/ F: tools/testing/selftests/mm/
MEMORY MAPPING
M: Andrew Morton <akpm@linux-foundation.org>
R: Liam R. Howlett <Liam.Howlett@oracle.com>
R: Vlastimil Babka <vbabka@suse.cz>
R: Lorenzo Stoakes <lstoakes@gmail.com>
L: linux-mm@kvack.org
S: Maintained
W: http://www.linux-mm.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
F: mm/mmap.c
MEMORY TECHNOLOGY DEVICES (MTD) MEMORY TECHNOLOGY DEVICES (MTD)
M: Miquel Raynal <miquel.raynal@bootlin.com> M: Miquel Raynal <miquel.raynal@bootlin.com>
M: Richard Weinberger <richard@nod.at> M: Richard Weinberger <richard@nod.at>
......
...@@ -92,4 +92,7 @@ ...@@ -92,4 +92,7 @@
/********** VFS **********/ /********** VFS **********/
#define VFS_PTR_POISON ((void *)(0xF5 + POISON_POINTER_DELTA)) #define VFS_PTR_POISON ((void *)(0xF5 + POISON_POINTER_DELTA))
/********** lib/stackdepot.c **********/
#define STACK_DEPOT_POISON ((void *)(0xD390 + POISON_POINTER_DELTA))
#endif #endif
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <linux/list.h> #include <linux/list.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/poison.h>
#include <linux/printk.h> #include <linux/printk.h>
#include <linux/rculist.h> #include <linux/rculist.h>
#include <linux/rcupdate.h> #include <linux/rcupdate.h>
...@@ -43,17 +44,7 @@ ...@@ -43,17 +44,7 @@
#define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGE_SHIFT - DEPOT_STACK_ALIGN) #define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGE_SHIFT - DEPOT_STACK_ALIGN)
#define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \ #define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \
STACK_DEPOT_EXTRA_BITS) STACK_DEPOT_EXTRA_BITS)
#if IS_ENABLED(CONFIG_KMSAN) && CONFIG_STACKDEPOT_MAX_FRAMES >= 32
/*
* KMSAN is frequently used in fuzzing scenarios and thus saves a lot of stack
* traces. As KMSAN does not support evicting stack traces from the stack
* depot, the stack depot capacity might be reached quickly with large stack
* records. Adjust the maximum number of stack depot pools for this case.
*/
#define DEPOT_POOLS_CAP (8192 * (CONFIG_STACKDEPOT_MAX_FRAMES / 16))
#else
#define DEPOT_POOLS_CAP 8192 #define DEPOT_POOLS_CAP 8192
#endif
#define DEPOT_MAX_POOLS \ #define DEPOT_MAX_POOLS \
(((1LL << (DEPOT_POOL_INDEX_BITS)) < DEPOT_POOLS_CAP) ? \ (((1LL << (DEPOT_POOL_INDEX_BITS)) < DEPOT_POOLS_CAP) ? \
(1LL << (DEPOT_POOL_INDEX_BITS)) : DEPOT_POOLS_CAP) (1LL << (DEPOT_POOL_INDEX_BITS)) : DEPOT_POOLS_CAP)
...@@ -93,9 +84,6 @@ struct stack_record { ...@@ -93,9 +84,6 @@ struct stack_record {
}; };
}; };
#define DEPOT_STACK_RECORD_SIZE \
ALIGN(sizeof(struct stack_record), 1 << DEPOT_STACK_ALIGN)
static bool stack_depot_disabled; static bool stack_depot_disabled;
static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT); static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT);
static bool __stack_depot_early_init_passed __initdata; static bool __stack_depot_early_init_passed __initdata;
...@@ -121,32 +109,31 @@ static void *stack_pools[DEPOT_MAX_POOLS]; ...@@ -121,32 +109,31 @@ static void *stack_pools[DEPOT_MAX_POOLS];
static void *new_pool; static void *new_pool;
/* Number of pools in stack_pools. */ /* Number of pools in stack_pools. */
static int pools_num; static int pools_num;
/* Offset to the unused space in the currently used pool. */
static size_t pool_offset = DEPOT_POOL_SIZE;
/* Freelist of stack records within stack_pools. */ /* Freelist of stack records within stack_pools. */
static LIST_HEAD(free_stacks); static LIST_HEAD(free_stacks);
/*
* Stack depot tries to keep an extra pool allocated even before it runs out
* of space in the currently used pool. This flag marks whether this extra pool
* needs to be allocated. It has the value 0 when either an extra pool is not
* yet allocated or if the limit on the number of pools is reached.
*/
static bool new_pool_required = true;
/* The lock must be held when performing pool or freelist modifications. */ /* The lock must be held when performing pool or freelist modifications. */
static DEFINE_RAW_SPINLOCK(pool_lock); static DEFINE_RAW_SPINLOCK(pool_lock);
/* Statistics counters for debugfs. */ /* Statistics counters for debugfs. */
enum depot_counter_id { enum depot_counter_id {
DEPOT_COUNTER_ALLOCS, DEPOT_COUNTER_REFD_ALLOCS,
DEPOT_COUNTER_FREES, DEPOT_COUNTER_REFD_FREES,
DEPOT_COUNTER_INUSE, DEPOT_COUNTER_REFD_INUSE,
DEPOT_COUNTER_FREELIST_SIZE, DEPOT_COUNTER_FREELIST_SIZE,
DEPOT_COUNTER_PERSIST_COUNT,
DEPOT_COUNTER_PERSIST_BYTES,
DEPOT_COUNTER_COUNT, DEPOT_COUNTER_COUNT,
}; };
static long counters[DEPOT_COUNTER_COUNT]; static long counters[DEPOT_COUNTER_COUNT];
static const char *const counter_names[] = { static const char *const counter_names[] = {
[DEPOT_COUNTER_ALLOCS] = "allocations", [DEPOT_COUNTER_REFD_ALLOCS] = "refcounted_allocations",
[DEPOT_COUNTER_FREES] = "frees", [DEPOT_COUNTER_REFD_FREES] = "refcounted_frees",
[DEPOT_COUNTER_INUSE] = "in_use", [DEPOT_COUNTER_REFD_INUSE] = "refcounted_in_use",
[DEPOT_COUNTER_FREELIST_SIZE] = "freelist_size", [DEPOT_COUNTER_FREELIST_SIZE] = "freelist_size",
[DEPOT_COUNTER_PERSIST_COUNT] = "persistent_count",
[DEPOT_COUNTER_PERSIST_BYTES] = "persistent_bytes",
}; };
static_assert(ARRAY_SIZE(counter_names) == DEPOT_COUNTER_COUNT); static_assert(ARRAY_SIZE(counter_names) == DEPOT_COUNTER_COUNT);
...@@ -294,48 +281,52 @@ int stack_depot_init(void) ...@@ -294,48 +281,52 @@ int stack_depot_init(void)
EXPORT_SYMBOL_GPL(stack_depot_init); EXPORT_SYMBOL_GPL(stack_depot_init);
/* /*
* Initializes new stack depot @pool, release all its entries to the freelist, * Initializes new stack pool, and updates the list of pools.
* and update the list of pools.
*/ */
static void depot_init_pool(void *pool) static bool depot_init_pool(void **prealloc)
{ {
int offset;
lockdep_assert_held(&pool_lock); lockdep_assert_held(&pool_lock);
/* Initialize handles and link stack records into the freelist. */ if (unlikely(pools_num >= DEPOT_MAX_POOLS)) {
for (offset = 0; offset <= DEPOT_POOL_SIZE - DEPOT_STACK_RECORD_SIZE; /* Bail out if we reached the pool limit. */
offset += DEPOT_STACK_RECORD_SIZE) { WARN_ON_ONCE(pools_num > DEPOT_MAX_POOLS); /* should never happen */
struct stack_record *stack = pool + offset; WARN_ON_ONCE(!new_pool); /* to avoid unnecessary pre-allocation */
WARN_ONCE(1, "Stack depot reached limit capacity");
stack->handle.pool_index = pools_num; return false;
stack->handle.offset = offset >> DEPOT_STACK_ALIGN; }
stack->handle.extra = 0;
/*
* Stack traces of size 0 are never saved, and we can simply use
* the size field as an indicator if this is a new unused stack
* record in the freelist.
*/
stack->size = 0;
INIT_LIST_HEAD(&stack->hash_list); if (!new_pool && *prealloc) {
/* /* We have preallocated memory, use it. */
* Add to the freelist front to prioritize never-used entries: WRITE_ONCE(new_pool, *prealloc);
* required in case there are entries in the freelist, but their *prealloc = NULL;
* RCU cookie still belongs to the current RCU grace period
* (there can still be concurrent readers).
*/
list_add(&stack->free_list, &free_stacks);
counters[DEPOT_COUNTER_FREELIST_SIZE]++;
} }
if (!new_pool)
return false; /* new_pool and *prealloc are NULL */
/* Save reference to the pool to be used by depot_fetch_stack(). */ /* Save reference to the pool to be used by depot_fetch_stack(). */
stack_pools[pools_num] = pool; stack_pools[pools_num] = new_pool;
/*
* Stack depot tries to keep an extra pool allocated even before it runs
* out of space in the currently used pool.
*
* To indicate that a new preallocation is needed new_pool is reset to
* NULL; do not reset to NULL if we have reached the maximum number of
* pools.
*/
if (pools_num < DEPOT_MAX_POOLS)
WRITE_ONCE(new_pool, NULL);
else
WRITE_ONCE(new_pool, STACK_DEPOT_POISON);
/* Pairs with concurrent READ_ONCE() in depot_fetch_stack(). */ /* Pairs with concurrent READ_ONCE() in depot_fetch_stack(). */
WRITE_ONCE(pools_num, pools_num + 1); WRITE_ONCE(pools_num, pools_num + 1);
ASSERT_EXCLUSIVE_WRITER(pools_num); ASSERT_EXCLUSIVE_WRITER(pools_num);
pool_offset = 0;
return true;
} }
/* Keeps the preallocated memory to be used for a new stack depot pool. */ /* Keeps the preallocated memory to be used for a new stack depot pool. */
...@@ -347,63 +338,51 @@ static void depot_keep_new_pool(void **prealloc) ...@@ -347,63 +338,51 @@ static void depot_keep_new_pool(void **prealloc)
* If a new pool is already saved or the maximum number of * If a new pool is already saved or the maximum number of
* pools is reached, do not use the preallocated memory. * pools is reached, do not use the preallocated memory.
*/ */
if (!new_pool_required) if (new_pool)
return; return;
/* WRITE_ONCE(new_pool, *prealloc);
* Use the preallocated memory for the new pool *prealloc = NULL;
* as long as we do not exceed the maximum number of pools.
*/
if (pools_num < DEPOT_MAX_POOLS) {
new_pool = *prealloc;
*prealloc = NULL;
}
/*
* At this point, either a new pool is kept or the maximum
* number of pools is reached. In either case, take note that
* keeping another pool is not required.
*/
WRITE_ONCE(new_pool_required, false);
} }
/* /*
* Try to initialize a new stack depot pool from either a previous or the * Try to initialize a new stack record from the current pool, a cached pool, or
* current pre-allocation, and release all its entries to the freelist. * the current pre-allocation.
*/ */
static bool depot_try_init_pool(void **prealloc) static struct stack_record *depot_pop_free_pool(void **prealloc, size_t size)
{ {
struct stack_record *stack;
void *current_pool;
u32 pool_index;
lockdep_assert_held(&pool_lock); lockdep_assert_held(&pool_lock);
/* Check if we have a new pool saved and use it. */ if (pool_offset + size > DEPOT_POOL_SIZE) {
if (new_pool) { if (!depot_init_pool(prealloc))
depot_init_pool(new_pool); return NULL;
new_pool = NULL; }
/* Take note that we might need a new new_pool. */ if (WARN_ON_ONCE(pools_num < 1))
if (pools_num < DEPOT_MAX_POOLS) return NULL;
WRITE_ONCE(new_pool_required, true); pool_index = pools_num - 1;
current_pool = stack_pools[pool_index];
if (WARN_ON_ONCE(!current_pool))
return NULL;
return true; stack = current_pool + pool_offset;
}
/* Bail out if we reached the pool limit. */ /* Pre-initialize handle once. */
if (unlikely(pools_num >= DEPOT_MAX_POOLS)) { stack->handle.pool_index = pool_index;
WARN_ONCE(1, "Stack depot reached limit capacity"); stack->handle.offset = pool_offset >> DEPOT_STACK_ALIGN;
return false; stack->handle.extra = 0;
} INIT_LIST_HEAD(&stack->hash_list);
/* Check if we have preallocated memory and use it. */ pool_offset += size;
if (*prealloc) {
depot_init_pool(*prealloc);
*prealloc = NULL;
return true;
}
return false; return stack;
} }
/* Try to find next free usable entry. */ /* Try to find next free usable entry from the freelist. */
static struct stack_record *depot_pop_free(void) static struct stack_record *depot_pop_free(void)
{ {
struct stack_record *stack; struct stack_record *stack;
...@@ -420,7 +399,7 @@ static struct stack_record *depot_pop_free(void) ...@@ -420,7 +399,7 @@ static struct stack_record *depot_pop_free(void)
* check the first entry. * check the first entry.
*/ */
stack = list_first_entry(&free_stacks, struct stack_record, free_list); stack = list_first_entry(&free_stacks, struct stack_record, free_list);
if (stack->size && !poll_state_synchronize_rcu(stack->rcu_state)) if (!poll_state_synchronize_rcu(stack->rcu_state))
return NULL; return NULL;
list_del(&stack->free_list); list_del(&stack->free_list);
...@@ -429,48 +408,73 @@ static struct stack_record *depot_pop_free(void) ...@@ -429,48 +408,73 @@ static struct stack_record *depot_pop_free(void)
return stack; return stack;
} }
static inline size_t depot_stack_record_size(struct stack_record *s, unsigned int nr_entries)
{
const size_t used = flex_array_size(s, entries, nr_entries);
const size_t unused = sizeof(s->entries) - used;
WARN_ON_ONCE(sizeof(s->entries) < used);
return ALIGN(sizeof(struct stack_record) - unused, 1 << DEPOT_STACK_ALIGN);
}
/* Allocates a new stack in a stack depot pool. */ /* Allocates a new stack in a stack depot pool. */
static struct stack_record * static struct stack_record *
depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) depot_alloc_stack(unsigned long *entries, unsigned int nr_entries, u32 hash, depot_flags_t flags, void **prealloc)
{ {
struct stack_record *stack; struct stack_record *stack = NULL;
size_t record_size;
lockdep_assert_held(&pool_lock); lockdep_assert_held(&pool_lock);
/* This should already be checked by public API entry points. */ /* This should already be checked by public API entry points. */
if (WARN_ON_ONCE(!size)) if (WARN_ON_ONCE(!nr_entries))
return NULL; return NULL;
/* Check if we have a stack record to save the stack trace. */ /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */
stack = depot_pop_free(); if (nr_entries > CONFIG_STACKDEPOT_MAX_FRAMES)
if (!stack) { nr_entries = CONFIG_STACKDEPOT_MAX_FRAMES;
/* No usable entries on the freelist - try to refill the freelist. */
if (!depot_try_init_pool(prealloc)) if (flags & STACK_DEPOT_FLAG_GET) {
return NULL; /*
* Evictable entries have to allocate the max. size so they may
* safely be re-used by differently sized allocations.
*/
record_size = depot_stack_record_size(stack, CONFIG_STACKDEPOT_MAX_FRAMES);
stack = depot_pop_free(); stack = depot_pop_free();
if (WARN_ON(!stack)) } else {
return NULL; record_size = depot_stack_record_size(stack, nr_entries);
} }
/* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */ if (!stack) {
if (size > CONFIG_STACKDEPOT_MAX_FRAMES) stack = depot_pop_free_pool(prealloc, record_size);
size = CONFIG_STACKDEPOT_MAX_FRAMES; if (!stack)
return NULL;
}
/* Save the stack trace. */ /* Save the stack trace. */
stack->hash = hash; stack->hash = hash;
stack->size = size; stack->size = nr_entries;
/* stack->handle is already filled in by depot_init_pool(). */ /* stack->handle is already filled in by depot_pop_free_pool(). */
refcount_set(&stack->count, 1); memcpy(stack->entries, entries, flex_array_size(stack, entries, nr_entries));
memcpy(stack->entries, entries, flex_array_size(stack, entries, size));
if (flags & STACK_DEPOT_FLAG_GET) {
refcount_set(&stack->count, 1);
counters[DEPOT_COUNTER_REFD_ALLOCS]++;
counters[DEPOT_COUNTER_REFD_INUSE]++;
} else {
/* Warn on attempts to switch to refcounting this entry. */
refcount_set(&stack->count, REFCOUNT_SATURATED);
counters[DEPOT_COUNTER_PERSIST_COUNT]++;
counters[DEPOT_COUNTER_PERSIST_BYTES] += record_size;
}
/* /*
* Let KMSAN know the stored stack record is initialized. This shall * Let KMSAN know the stored stack record is initialized. This shall
* prevent false positive reports if instrumented code accesses it. * prevent false positive reports if instrumented code accesses it.
*/ */
kmsan_unpoison_memory(stack, DEPOT_STACK_RECORD_SIZE); kmsan_unpoison_memory(stack, record_size);
counters[DEPOT_COUNTER_ALLOCS]++;
counters[DEPOT_COUNTER_INUSE]++;
return stack; return stack;
} }
...@@ -538,8 +542,8 @@ static void depot_free_stack(struct stack_record *stack) ...@@ -538,8 +542,8 @@ static void depot_free_stack(struct stack_record *stack)
list_add_tail(&stack->free_list, &free_stacks); list_add_tail(&stack->free_list, &free_stacks);
counters[DEPOT_COUNTER_FREELIST_SIZE]++; counters[DEPOT_COUNTER_FREELIST_SIZE]++;
counters[DEPOT_COUNTER_FREES]++; counters[DEPOT_COUNTER_REFD_FREES]++;
counters[DEPOT_COUNTER_INUSE]--; counters[DEPOT_COUNTER_REFD_INUSE]--;
printk_deferred_exit(); printk_deferred_exit();
raw_spin_unlock_irqrestore(&pool_lock, flags); raw_spin_unlock_irqrestore(&pool_lock, flags);
...@@ -660,7 +664,7 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, ...@@ -660,7 +664,7 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
* Allocate memory for a new pool if required now: * Allocate memory for a new pool if required now:
* we won't be able to do that under the lock. * we won't be able to do that under the lock.
*/ */
if (unlikely(can_alloc && READ_ONCE(new_pool_required))) { if (unlikely(can_alloc && !READ_ONCE(new_pool))) {
/* /*
* Zero out zone modifiers, as we don't have specific zone * Zero out zone modifiers, as we don't have specific zone
* requirements. Keep the flags related to allocation in atomic * requirements. Keep the flags related to allocation in atomic
...@@ -681,7 +685,7 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, ...@@ -681,7 +685,7 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
found = find_stack(bucket, entries, nr_entries, hash, depot_flags); found = find_stack(bucket, entries, nr_entries, hash, depot_flags);
if (!found) { if (!found) {
struct stack_record *new = struct stack_record *new =
depot_alloc_stack(entries, nr_entries, hash, &prealloc); depot_alloc_stack(entries, nr_entries, hash, depot_flags, &prealloc);
if (new) { if (new) {
/* /*
......
...@@ -362,6 +362,12 @@ static void __init pud_advanced_tests(struct pgtable_debug_args *args) ...@@ -362,6 +362,12 @@ static void __init pud_advanced_tests(struct pgtable_debug_args *args)
vaddr &= HPAGE_PUD_MASK; vaddr &= HPAGE_PUD_MASK;
pud = pfn_pud(args->pud_pfn, args->page_prot); pud = pfn_pud(args->pud_pfn, args->page_prot);
/*
* Some architectures have debug checks to make sure
* huge pud mapping are only found with devmap entries
* For now test with only devmap entries.
*/
pud = pud_mkdevmap(pud);
set_pud_at(args->mm, vaddr, args->pudp, pud); set_pud_at(args->mm, vaddr, args->pudp, pud);
flush_dcache_page(page); flush_dcache_page(page);
pudp_set_wrprotect(args->mm, vaddr, args->pudp); pudp_set_wrprotect(args->mm, vaddr, args->pudp);
...@@ -374,6 +380,7 @@ static void __init pud_advanced_tests(struct pgtable_debug_args *args) ...@@ -374,6 +380,7 @@ static void __init pud_advanced_tests(struct pgtable_debug_args *args)
WARN_ON(!pud_none(pud)); WARN_ON(!pud_none(pud));
#endif /* __PAGETABLE_PMD_FOLDED */ #endif /* __PAGETABLE_PMD_FOLDED */
pud = pfn_pud(args->pud_pfn, args->page_prot); pud = pfn_pud(args->pud_pfn, args->page_prot);
pud = pud_mkdevmap(pud);
pud = pud_wrprotect(pud); pud = pud_wrprotect(pud);
pud = pud_mkclean(pud); pud = pud_mkclean(pud);
set_pud_at(args->mm, vaddr, args->pudp, pud); set_pud_at(args->mm, vaddr, args->pudp, pud);
...@@ -391,6 +398,7 @@ static void __init pud_advanced_tests(struct pgtable_debug_args *args) ...@@ -391,6 +398,7 @@ static void __init pud_advanced_tests(struct pgtable_debug_args *args)
#endif /* __PAGETABLE_PMD_FOLDED */ #endif /* __PAGETABLE_PMD_FOLDED */
pud = pfn_pud(args->pud_pfn, args->page_prot); pud = pfn_pud(args->pud_pfn, args->page_prot);
pud = pud_mkdevmap(pud);
pud = pud_mkyoung(pud); pud = pud_mkyoung(pud);
set_pud_at(args->mm, vaddr, args->pudp, pud); set_pud_at(args->mm, vaddr, args->pudp, pud);
flush_dcache_page(page); flush_dcache_page(page);
......
...@@ -4111,28 +4111,40 @@ static void filemap_cachestat(struct address_space *mapping, ...@@ -4111,28 +4111,40 @@ static void filemap_cachestat(struct address_space *mapping,
rcu_read_lock(); rcu_read_lock();
xas_for_each(&xas, folio, last_index) { xas_for_each(&xas, folio, last_index) {
int order;
unsigned long nr_pages; unsigned long nr_pages;
pgoff_t folio_first_index, folio_last_index; pgoff_t folio_first_index, folio_last_index;
/*
* Don't deref the folio. It is not pinned, and might
* get freed (and reused) underneath us.
*
* We *could* pin it, but that would be expensive for
* what should be a fast and lightweight syscall.
*
* Instead, derive all information of interest from
* the rcu-protected xarray.
*/
if (xas_retry(&xas, folio)) if (xas_retry(&xas, folio))
continue; continue;
order = xa_get_order(xas.xa, xas.xa_index);
nr_pages = 1 << order;
folio_first_index = round_down(xas.xa_index, 1 << order);
folio_last_index = folio_first_index + nr_pages - 1;
/* Folios might straddle the range boundaries, only count covered pages */
if (folio_first_index < first_index)
nr_pages -= first_index - folio_first_index;
if (folio_last_index > last_index)
nr_pages -= folio_last_index - last_index;
if (xa_is_value(folio)) { if (xa_is_value(folio)) {
/* page is evicted */ /* page is evicted */
void *shadow = (void *)folio; void *shadow = (void *)folio;
bool workingset; /* not used */ bool workingset; /* not used */
int order = xa_get_order(xas.xa, xas.xa_index);
nr_pages = 1 << order;
folio_first_index = round_down(xas.xa_index, 1 << order);
folio_last_index = folio_first_index + nr_pages - 1;
/* Folios might straddle the range boundaries, only count covered pages */
if (folio_first_index < first_index)
nr_pages -= first_index - folio_first_index;
if (folio_last_index > last_index)
nr_pages -= folio_last_index - last_index;
cs->nr_evicted += nr_pages; cs->nr_evicted += nr_pages;
...@@ -4150,24 +4162,13 @@ static void filemap_cachestat(struct address_space *mapping, ...@@ -4150,24 +4162,13 @@ static void filemap_cachestat(struct address_space *mapping,
goto resched; goto resched;
} }
nr_pages = folio_nr_pages(folio);
folio_first_index = folio_pgoff(folio);
folio_last_index = folio_first_index + nr_pages - 1;
/* Folios might straddle the range boundaries, only count covered pages */
if (folio_first_index < first_index)
nr_pages -= first_index - folio_first_index;
if (folio_last_index > last_index)
nr_pages -= folio_last_index - last_index;
/* page is in cache */ /* page is in cache */
cs->nr_cache += nr_pages; cs->nr_cache += nr_pages;
if (folio_test_dirty(folio)) if (xas_get_mark(&xas, PAGECACHE_TAG_DIRTY))
cs->nr_dirty += nr_pages; cs->nr_dirty += nr_pages;
if (folio_test_writeback(folio)) if (xas_get_mark(&xas, PAGECACHE_TAG_WRITEBACK))
cs->nr_writeback += nr_pages; cs->nr_writeback += nr_pages;
resched: resched:
......
...@@ -65,8 +65,7 @@ void kasan_save_track(struct kasan_track *track, gfp_t flags) ...@@ -65,8 +65,7 @@ void kasan_save_track(struct kasan_track *track, gfp_t flags)
{ {
depot_stack_handle_t stack; depot_stack_handle_t stack;
stack = kasan_save_stack(flags, stack = kasan_save_stack(flags, STACK_DEPOT_FLAG_CAN_ALLOC);
STACK_DEPOT_FLAG_CAN_ALLOC | STACK_DEPOT_FLAG_GET);
kasan_set_track(track, stack); kasan_set_track(track, stack);
} }
...@@ -266,10 +265,9 @@ bool __kasan_slab_free(struct kmem_cache *cache, void *object, ...@@ -266,10 +265,9 @@ bool __kasan_slab_free(struct kmem_cache *cache, void *object,
return true; return true;
/* /*
* If the object is not put into quarantine, it will likely be quickly * Note: Keep per-object metadata to allow KASAN print stack traces for
* reallocated. Thus, release its metadata now. * use-after-free-before-realloc bugs.
*/ */
kasan_release_object_meta(cache, object);
/* Let slab put the object onto the freelist. */ /* Let slab put the object onto the freelist. */
return false; return false;
......
...@@ -485,16 +485,6 @@ void kasan_init_object_meta(struct kmem_cache *cache, const void *object) ...@@ -485,16 +485,6 @@ void kasan_init_object_meta(struct kmem_cache *cache, const void *object)
if (alloc_meta) { if (alloc_meta) {
/* Zero out alloc meta to mark it as invalid. */ /* Zero out alloc meta to mark it as invalid. */
__memset(alloc_meta, 0, sizeof(*alloc_meta)); __memset(alloc_meta, 0, sizeof(*alloc_meta));
/*
* Prepare the lock for saving auxiliary stack traces.
* Temporarily disable KASAN bug reporting to allow instrumented
* raw_spin_lock_init to access aux_lock, which resides inside
* of a redzone.
*/
kasan_disable_current();
raw_spin_lock_init(&alloc_meta->aux_lock);
kasan_enable_current();
} }
/* /*
...@@ -506,18 +496,8 @@ void kasan_init_object_meta(struct kmem_cache *cache, const void *object) ...@@ -506,18 +496,8 @@ void kasan_init_object_meta(struct kmem_cache *cache, const void *object)
static void release_alloc_meta(struct kasan_alloc_meta *meta) static void release_alloc_meta(struct kasan_alloc_meta *meta)
{ {
/* Evict the stack traces from stack depot. */ /* Zero out alloc meta to mark it as invalid. */
stack_depot_put(meta->alloc_track.stack); __memset(meta, 0, sizeof(*meta));
stack_depot_put(meta->aux_stack[0]);
stack_depot_put(meta->aux_stack[1]);
/*
* Zero out alloc meta to mark it as invalid but keep aux_lock
* initialized to avoid having to reinitialize it when another object
* is allocated in the same slot.
*/
__memset(&meta->alloc_track, 0, sizeof(meta->alloc_track));
__memset(meta->aux_stack, 0, sizeof(meta->aux_stack));
} }
static void release_free_meta(const void *object, struct kasan_free_meta *meta) static void release_free_meta(const void *object, struct kasan_free_meta *meta)
...@@ -529,27 +509,10 @@ static void release_free_meta(const void *object, struct kasan_free_meta *meta) ...@@ -529,27 +509,10 @@ static void release_free_meta(const void *object, struct kasan_free_meta *meta)
if (*(u8 *)kasan_mem_to_shadow(object) != KASAN_SLAB_FREE_META) if (*(u8 *)kasan_mem_to_shadow(object) != KASAN_SLAB_FREE_META)
return; return;
/* Evict the stack trace from the stack depot. */
stack_depot_put(meta->free_track.stack);
/* Mark free meta as invalid. */ /* Mark free meta as invalid. */
*(u8 *)kasan_mem_to_shadow(object) = KASAN_SLAB_FREE; *(u8 *)kasan_mem_to_shadow(object) = KASAN_SLAB_FREE;
} }
void kasan_release_object_meta(struct kmem_cache *cache, const void *object)
{
struct kasan_alloc_meta *alloc_meta;
struct kasan_free_meta *free_meta;
alloc_meta = kasan_get_alloc_meta(cache, object);
if (alloc_meta)
release_alloc_meta(alloc_meta);
free_meta = kasan_get_free_meta(cache, object);
if (free_meta)
release_free_meta(object, free_meta);
}
size_t kasan_metadata_size(struct kmem_cache *cache, bool in_object) size_t kasan_metadata_size(struct kmem_cache *cache, bool in_object)
{ {
struct kasan_cache *info = &cache->kasan_info; struct kasan_cache *info = &cache->kasan_info;
...@@ -574,8 +537,6 @@ static void __kasan_record_aux_stack(void *addr, depot_flags_t depot_flags) ...@@ -574,8 +537,6 @@ static void __kasan_record_aux_stack(void *addr, depot_flags_t depot_flags)
struct kmem_cache *cache; struct kmem_cache *cache;
struct kasan_alloc_meta *alloc_meta; struct kasan_alloc_meta *alloc_meta;
void *object; void *object;
depot_stack_handle_t new_handle, old_handle;
unsigned long flags;
if (is_kfence_address(addr) || !slab) if (is_kfence_address(addr) || !slab)
return; return;
...@@ -586,33 +547,18 @@ static void __kasan_record_aux_stack(void *addr, depot_flags_t depot_flags) ...@@ -586,33 +547,18 @@ static void __kasan_record_aux_stack(void *addr, depot_flags_t depot_flags)
if (!alloc_meta) if (!alloc_meta)
return; return;
new_handle = kasan_save_stack(0, depot_flags);
/*
* Temporarily disable KASAN bug reporting to allow instrumented
* spinlock functions to access aux_lock, which resides inside of a
* redzone.
*/
kasan_disable_current();
raw_spin_lock_irqsave(&alloc_meta->aux_lock, flags);
old_handle = alloc_meta->aux_stack[1];
alloc_meta->aux_stack[1] = alloc_meta->aux_stack[0]; alloc_meta->aux_stack[1] = alloc_meta->aux_stack[0];
alloc_meta->aux_stack[0] = new_handle; alloc_meta->aux_stack[0] = kasan_save_stack(0, depot_flags);
raw_spin_unlock_irqrestore(&alloc_meta->aux_lock, flags);
kasan_enable_current();
stack_depot_put(old_handle);
} }
void kasan_record_aux_stack(void *addr) void kasan_record_aux_stack(void *addr)
{ {
return __kasan_record_aux_stack(addr, return __kasan_record_aux_stack(addr, STACK_DEPOT_FLAG_CAN_ALLOC);
STACK_DEPOT_FLAG_CAN_ALLOC | STACK_DEPOT_FLAG_GET);
} }
void kasan_record_aux_stack_noalloc(void *addr) void kasan_record_aux_stack_noalloc(void *addr)
{ {
return __kasan_record_aux_stack(addr, STACK_DEPOT_FLAG_GET); return __kasan_record_aux_stack(addr, 0);
} }
void kasan_save_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags) void kasan_save_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags)
...@@ -623,7 +569,7 @@ void kasan_save_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags) ...@@ -623,7 +569,7 @@ void kasan_save_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags)
if (!alloc_meta) if (!alloc_meta)
return; return;
/* Evict previous stack traces (might exist for krealloc or mempool). */ /* Invalidate previous stack traces (might exist for krealloc or mempool). */
release_alloc_meta(alloc_meta); release_alloc_meta(alloc_meta);
kasan_save_track(&alloc_meta->alloc_track, flags); kasan_save_track(&alloc_meta->alloc_track, flags);
...@@ -637,7 +583,7 @@ void kasan_save_free_info(struct kmem_cache *cache, void *object) ...@@ -637,7 +583,7 @@ void kasan_save_free_info(struct kmem_cache *cache, void *object)
if (!free_meta) if (!free_meta)
return; return;
/* Evict previous stack trace (might exist for mempool). */ /* Invalidate previous stack trace (might exist for mempool). */
release_free_meta(object, free_meta); release_free_meta(object, free_meta);
kasan_save_track(&free_meta->free_track, 0); kasan_save_track(&free_meta->free_track, 0);
......
...@@ -6,7 +6,6 @@ ...@@ -6,7 +6,6 @@
#include <linux/kasan.h> #include <linux/kasan.h>
#include <linux/kasan-tags.h> #include <linux/kasan-tags.h>
#include <linux/kfence.h> #include <linux/kfence.h>
#include <linux/spinlock.h>
#include <linux/stackdepot.h> #include <linux/stackdepot.h>
#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) #if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS)
...@@ -265,13 +264,6 @@ struct kasan_global { ...@@ -265,13 +264,6 @@ struct kasan_global {
struct kasan_alloc_meta { struct kasan_alloc_meta {
struct kasan_track alloc_track; struct kasan_track alloc_track;
/* Free track is stored in kasan_free_meta. */ /* Free track is stored in kasan_free_meta. */
/*
* aux_lock protects aux_stack from accesses from concurrent
* kasan_record_aux_stack calls. It is a raw spinlock to avoid sleeping
* on RT kernels, as kasan_record_aux_stack_noalloc can be called from
* non-sleepable contexts.
*/
raw_spinlock_t aux_lock;
depot_stack_handle_t aux_stack[2]; depot_stack_handle_t aux_stack[2];
}; };
...@@ -398,10 +390,8 @@ struct kasan_alloc_meta *kasan_get_alloc_meta(struct kmem_cache *cache, ...@@ -398,10 +390,8 @@ struct kasan_alloc_meta *kasan_get_alloc_meta(struct kmem_cache *cache,
struct kasan_free_meta *kasan_get_free_meta(struct kmem_cache *cache, struct kasan_free_meta *kasan_get_free_meta(struct kmem_cache *cache,
const void *object); const void *object);
void kasan_init_object_meta(struct kmem_cache *cache, const void *object); void kasan_init_object_meta(struct kmem_cache *cache, const void *object);
void kasan_release_object_meta(struct kmem_cache *cache, const void *object);
#else #else
static inline void kasan_init_object_meta(struct kmem_cache *cache, const void *object) { } static inline void kasan_init_object_meta(struct kmem_cache *cache, const void *object) { }
static inline void kasan_release_object_meta(struct kmem_cache *cache, const void *object) { }
#endif #endif
depot_stack_handle_t kasan_save_stack(gfp_t flags, depot_flags_t depot_flags); depot_stack_handle_t kasan_save_stack(gfp_t flags, depot_flags_t depot_flags);
......
...@@ -145,7 +145,10 @@ static void qlink_free(struct qlist_node *qlink, struct kmem_cache *cache) ...@@ -145,7 +145,10 @@ static void qlink_free(struct qlist_node *qlink, struct kmem_cache *cache)
void *object = qlink_to_object(qlink, cache); void *object = qlink_to_object(qlink, cache);
struct kasan_free_meta *free_meta = kasan_get_free_meta(cache, object); struct kasan_free_meta *free_meta = kasan_get_free_meta(cache, object);
kasan_release_object_meta(cache, object); /*
* Note: Keep per-object metadata to allow KASAN print stack traces for
* use-after-free-before-realloc bugs.
*/
/* /*
* If init_on_free is enabled and KASAN's free metadata is stored in * If init_on_free is enabled and KASAN's free metadata is stored in
......
...@@ -2522,6 +2522,14 @@ static int numamigrate_isolate_folio(pg_data_t *pgdat, struct folio *folio) ...@@ -2522,6 +2522,14 @@ static int numamigrate_isolate_folio(pg_data_t *pgdat, struct folio *folio)
if (managed_zone(pgdat->node_zones + z)) if (managed_zone(pgdat->node_zones + z))
break; break;
} }
/*
* If there are no managed zones, it should not proceed
* further.
*/
if (z < 0)
return 0;
wakeup_kswapd(pgdat->node_zones + z, 0, wakeup_kswapd(pgdat->node_zones + z, 0,
folio_order(folio), ZONE_MOVABLE); folio_order(folio), ZONE_MOVABLE);
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment