Commit 5959725a authored by Vlastimil Babka's avatar Vlastimil Babka

Merge branch 'slab/for-6.1/slub_validation_locking' into slab/for-next

My series [1] to fix validation races for caches with enabled debugging.

By decoupling the debug cache operation more from non-debug fastpaths,
additional locking simplifications were possible and done afterwards.

Additional cleanup of PREEMPT_RT specific code on top, by Thomas Gleixner.

[1] https://lore.kernel.org/all/20220823170400.26546-1-vbabka@suse.cz/
parents 3662c13e 1f04b07d
...@@ -50,7 +50,7 @@ ...@@ -50,7 +50,7 @@
* 1. slab_mutex (Global Mutex) * 1. slab_mutex (Global Mutex)
* 2. node->list_lock (Spinlock) * 2. node->list_lock (Spinlock)
* 3. kmem_cache->cpu_slab->lock (Local lock) * 3. kmem_cache->cpu_slab->lock (Local lock)
* 4. slab_lock(slab) (Only on some arches or for debugging) * 4. slab_lock(slab) (Only on some arches)
* 5. object_map_lock (Only for debugging) * 5. object_map_lock (Only for debugging)
* *
* slab_mutex * slab_mutex
...@@ -64,8 +64,9 @@ ...@@ -64,8 +64,9 @@
* The slab_lock is a wrapper around the page lock, thus it is a bit * The slab_lock is a wrapper around the page lock, thus it is a bit
* spinlock. * spinlock.
* *
* The slab_lock is only used for debugging and on arches that do not * The slab_lock is only used on arches that do not have the ability
* have the ability to do a cmpxchg_double. It only protects: * to do a cmpxchg_double. It only protects:
*
* A. slab->freelist -> List of free objects in a slab * A. slab->freelist -> List of free objects in a slab
* B. slab->inuse -> Number of objects in use * B. slab->inuse -> Number of objects in use
* C. slab->objects -> Number of objects in slab * C. slab->objects -> Number of objects in slab
...@@ -94,15 +95,20 @@ ...@@ -94,15 +95,20 @@
* allocating a long series of objects that fill up slabs does not require * allocating a long series of objects that fill up slabs does not require
* the list lock. * the list lock.
* *
* For debug caches, all allocations are forced to go through a list_lock
* protected region to serialize against concurrent validation.
*
* cpu_slab->lock local lock * cpu_slab->lock local lock
* *
* This locks protect slowpath manipulation of all kmem_cache_cpu fields * This locks protect slowpath manipulation of all kmem_cache_cpu fields
* except the stat counters. This is a percpu structure manipulated only by * except the stat counters. This is a percpu structure manipulated only by
* the local cpu, so the lock protects against being preempted or interrupted * the local cpu, so the lock protects against being preempted or interrupted
* by an irq. Fast path operations rely on lockless operations instead. * by an irq. Fast path operations rely on lockless operations instead.
* On PREEMPT_RT, the local lock does not actually disable irqs (and thus *
* prevent the lockless operations), so fastpath operations also need to take * On PREEMPT_RT, the local lock neither disables interrupts nor preemption
* the lock and are no longer lockless. * which means the lockless fastpath cannot be used as it might interfere with
* an in-progress slow path operations. In this case the local lock is always
* taken but it still utilizes the freelist for the common operations.
* *
* lockless fastpaths * lockless fastpaths
* *
...@@ -163,8 +169,9 @@ ...@@ -163,8 +169,9 @@
* function call even on !PREEMPT_RT, use inline preempt_disable() there. * function call even on !PREEMPT_RT, use inline preempt_disable() there.
*/ */
#ifndef CONFIG_PREEMPT_RT #ifndef CONFIG_PREEMPT_RT
#define slub_get_cpu_ptr(var) get_cpu_ptr(var) #define slub_get_cpu_ptr(var) get_cpu_ptr(var)
#define slub_put_cpu_ptr(var) put_cpu_ptr(var) #define slub_put_cpu_ptr(var) put_cpu_ptr(var)
#define USE_LOCKLESS_FAST_PATH() (true)
#else #else
#define slub_get_cpu_ptr(var) \ #define slub_get_cpu_ptr(var) \
({ \ ({ \
...@@ -176,6 +183,7 @@ do { \ ...@@ -176,6 +183,7 @@ do { \
(void)(var); \ (void)(var); \
migrate_enable(); \ migrate_enable(); \
} while (0) } while (0)
#define USE_LOCKLESS_FAST_PATH() (false)
#endif #endif
#ifdef CONFIG_SLUB_DEBUG #ifdef CONFIG_SLUB_DEBUG
...@@ -447,7 +455,7 @@ slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects) ...@@ -447,7 +455,7 @@ slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects)
/* /*
* Per slab locking using the pagelock * Per slab locking using the pagelock
*/ */
static __always_inline void __slab_lock(struct slab *slab) static __always_inline void slab_lock(struct slab *slab)
{ {
struct page *page = slab_page(slab); struct page *page = slab_page(slab);
...@@ -455,7 +463,7 @@ static __always_inline void __slab_lock(struct slab *slab) ...@@ -455,7 +463,7 @@ static __always_inline void __slab_lock(struct slab *slab)
bit_spin_lock(PG_locked, &page->flags); bit_spin_lock(PG_locked, &page->flags);
} }
static __always_inline void __slab_unlock(struct slab *slab) static __always_inline void slab_unlock(struct slab *slab)
{ {
struct page *page = slab_page(slab); struct page *page = slab_page(slab);
...@@ -463,31 +471,19 @@ static __always_inline void __slab_unlock(struct slab *slab) ...@@ -463,31 +471,19 @@ static __always_inline void __slab_unlock(struct slab *slab)
__bit_spin_unlock(PG_locked, &page->flags); __bit_spin_unlock(PG_locked, &page->flags);
} }
static __always_inline void slab_lock(struct slab *slab, unsigned long *flags)
{
if (IS_ENABLED(CONFIG_PREEMPT_RT))
local_irq_save(*flags);
__slab_lock(slab);
}
static __always_inline void slab_unlock(struct slab *slab, unsigned long *flags)
{
__slab_unlock(slab);
if (IS_ENABLED(CONFIG_PREEMPT_RT))
local_irq_restore(*flags);
}
/* /*
* Interrupts must be disabled (for the fallback code to work right), typically * Interrupts must be disabled (for the fallback code to work right), typically
* by an _irqsave() lock variant. Except on PREEMPT_RT where locks are different * by an _irqsave() lock variant. On PREEMPT_RT the preempt_disable(), which is
* so we disable interrupts as part of slab_[un]lock(). * part of bit_spin_lock(), is sufficient because the policy is not to allow any
* allocation/ free operation in hardirq context. Therefore nothing can
* interrupt the operation.
*/ */
static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab, static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab,
void *freelist_old, unsigned long counters_old, void *freelist_old, unsigned long counters_old,
void *freelist_new, unsigned long counters_new, void *freelist_new, unsigned long counters_new,
const char *n) const char *n)
{ {
if (!IS_ENABLED(CONFIG_PREEMPT_RT)) if (USE_LOCKLESS_FAST_PATH())
lockdep_assert_irqs_disabled(); lockdep_assert_irqs_disabled();
#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
...@@ -499,18 +495,15 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab ...@@ -499,18 +495,15 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab
} else } else
#endif #endif
{ {
/* init to 0 to prevent spurious warnings */ slab_lock(slab);
unsigned long flags = 0;
slab_lock(slab, &flags);
if (slab->freelist == freelist_old && if (slab->freelist == freelist_old &&
slab->counters == counters_old) { slab->counters == counters_old) {
slab->freelist = freelist_new; slab->freelist = freelist_new;
slab->counters = counters_new; slab->counters = counters_new;
slab_unlock(slab, &flags); slab_unlock(slab);
return true; return true;
} }
slab_unlock(slab, &flags); slab_unlock(slab);
} }
cpu_relax(); cpu_relax();
...@@ -541,16 +534,16 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab, ...@@ -541,16 +534,16 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab,
unsigned long flags; unsigned long flags;
local_irq_save(flags); local_irq_save(flags);
__slab_lock(slab); slab_lock(slab);
if (slab->freelist == freelist_old && if (slab->freelist == freelist_old &&
slab->counters == counters_old) { slab->counters == counters_old) {
slab->freelist = freelist_new; slab->freelist = freelist_new;
slab->counters = counters_new; slab->counters = counters_new;
__slab_unlock(slab); slab_unlock(slab);
local_irq_restore(flags); local_irq_restore(flags);
return true; return true;
} }
__slab_unlock(slab); slab_unlock(slab);
local_irq_restore(flags); local_irq_restore(flags);
} }
...@@ -566,7 +559,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab, ...@@ -566,7 +559,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab,
#ifdef CONFIG_SLUB_DEBUG #ifdef CONFIG_SLUB_DEBUG
static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)]; static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)];
static DEFINE_RAW_SPINLOCK(object_map_lock); static DEFINE_SPINLOCK(object_map_lock);
static void __fill_map(unsigned long *obj_map, struct kmem_cache *s, static void __fill_map(unsigned long *obj_map, struct kmem_cache *s,
struct slab *slab) struct slab *slab)
...@@ -600,30 +593,6 @@ static bool slab_add_kunit_errors(void) ...@@ -600,30 +593,6 @@ static bool slab_add_kunit_errors(void)
static inline bool slab_add_kunit_errors(void) { return false; } static inline bool slab_add_kunit_errors(void) { return false; }
#endif #endif
/*
* Determine a map of objects in use in a slab.
*
* Node listlock must be held to guarantee that the slab does
* not vanish from under us.
*/
static unsigned long *get_map(struct kmem_cache *s, struct slab *slab)
__acquires(&object_map_lock)
{
VM_BUG_ON(!irqs_disabled());
raw_spin_lock(&object_map_lock);
__fill_map(object_map, s, slab);
return object_map;
}
static void put_map(unsigned long *map) __releases(&object_map_lock)
{
VM_BUG_ON(map != object_map);
raw_spin_unlock(&object_map_lock);
}
static inline unsigned int size_from_object(struct kmem_cache *s) static inline unsigned int size_from_object(struct kmem_cache *s)
{ {
if (s->flags & SLAB_RED_ZONE) if (s->flags & SLAB_RED_ZONE)
...@@ -1329,17 +1298,14 @@ static inline int alloc_consistency_checks(struct kmem_cache *s, ...@@ -1329,17 +1298,14 @@ static inline int alloc_consistency_checks(struct kmem_cache *s,
} }
static noinline int alloc_debug_processing(struct kmem_cache *s, static noinline int alloc_debug_processing(struct kmem_cache *s,
struct slab *slab, struct slab *slab, void *object)
void *object, unsigned long addr)
{ {
if (s->flags & SLAB_CONSISTENCY_CHECKS) { if (s->flags & SLAB_CONSISTENCY_CHECKS) {
if (!alloc_consistency_checks(s, slab, object)) if (!alloc_consistency_checks(s, slab, object))
goto bad; goto bad;
} }
/* Success perform special debug activities for allocs */ /* Success. Perform special debug activities for allocs */
if (s->flags & SLAB_STORE_USER)
set_track(s, object, TRACK_ALLOC, addr);
trace(s, slab, object, 1); trace(s, slab, object, 1);
init_object(s, object, SLUB_RED_ACTIVE); init_object(s, object, SLUB_RED_ACTIVE);
return 1; return 1;
...@@ -1390,63 +1356,6 @@ static inline int free_consistency_checks(struct kmem_cache *s, ...@@ -1390,63 +1356,6 @@ static inline int free_consistency_checks(struct kmem_cache *s,
return 1; return 1;
} }
/* Supports checking bulk free of a constructed freelist */
static noinline int free_debug_processing(
struct kmem_cache *s, struct slab *slab,
void *head, void *tail, int bulk_cnt,
unsigned long addr)
{
struct kmem_cache_node *n = get_node(s, slab_nid(slab));
void *object = head;
int cnt = 0;
unsigned long flags, flags2;
int ret = 0;
depot_stack_handle_t handle = 0;
if (s->flags & SLAB_STORE_USER)
handle = set_track_prepare();
spin_lock_irqsave(&n->list_lock, flags);
slab_lock(slab, &flags2);
if (s->flags & SLAB_CONSISTENCY_CHECKS) {
if (!check_slab(s, slab))
goto out;
}
next_object:
cnt++;
if (s->flags & SLAB_CONSISTENCY_CHECKS) {
if (!free_consistency_checks(s, slab, object, addr))
goto out;
}
if (s->flags & SLAB_STORE_USER)
set_track_update(s, object, TRACK_FREE, addr, handle);
trace(s, slab, object, 0);
/* Freepointer not overwritten by init_object(), SLAB_POISON moved it */
init_object(s, object, SLUB_RED_INACTIVE);
/* Reached end of constructed freelist yet? */
if (object != tail) {
object = get_freepointer(s, object);
goto next_object;
}
ret = 1;
out:
if (cnt != bulk_cnt)
slab_err(s, slab, "Bulk freelist count(%d) invalid(%d)\n",
bulk_cnt, cnt);
slab_unlock(slab, &flags2);
spin_unlock_irqrestore(&n->list_lock, flags);
if (!ret)
slab_fix(s, "Object at 0x%p not freed", object);
return ret;
}
/* /*
* Parse a block of slub_debug options. Blocks are delimited by ';' * Parse a block of slub_debug options. Blocks are delimited by ';'
* *
...@@ -1666,16 +1575,18 @@ static inline ...@@ -1666,16 +1575,18 @@ static inline
void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr) {} void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr) {}
static inline int alloc_debug_processing(struct kmem_cache *s, static inline int alloc_debug_processing(struct kmem_cache *s,
struct slab *slab, void *object, unsigned long addr) { return 0; } struct slab *slab, void *object) { return 0; }
static inline int free_debug_processing( static inline void free_debug_processing(
struct kmem_cache *s, struct slab *slab, struct kmem_cache *s, struct slab *slab,
void *head, void *tail, int bulk_cnt, void *head, void *tail, int bulk_cnt,
unsigned long addr) { return 0; } unsigned long addr) {}
static inline void slab_pad_check(struct kmem_cache *s, struct slab *slab) {} static inline void slab_pad_check(struct kmem_cache *s, struct slab *slab) {}
static inline int check_object(struct kmem_cache *s, struct slab *slab, static inline int check_object(struct kmem_cache *s, struct slab *slab,
void *object, u8 val) { return 1; } void *object, u8 val) { return 1; }
static inline void set_track(struct kmem_cache *s, void *object,
enum track_item alloc, unsigned long addr) {}
static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
struct slab *slab) {} struct slab *slab) {}
static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
...@@ -1967,11 +1878,13 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) ...@@ -1967,11 +1878,13 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
*/ */
slab = alloc_slab_page(alloc_gfp, node, oo); slab = alloc_slab_page(alloc_gfp, node, oo);
if (unlikely(!slab)) if (unlikely(!slab))
goto out; return NULL;
stat(s, ORDER_FALLBACK); stat(s, ORDER_FALLBACK);
} }
slab->objects = oo_objects(oo); slab->objects = oo_objects(oo);
slab->inuse = 0;
slab->frozen = 0;
account_slab(slab, oo_order(oo), s, flags); account_slab(slab, oo_order(oo), s, flags);
...@@ -1998,15 +1911,6 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) ...@@ -1998,15 +1911,6 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
set_freepointer(s, p, NULL); set_freepointer(s, p, NULL);
} }
slab->inuse = slab->objects;
slab->frozen = 1;
out:
if (!slab)
return NULL;
inc_slabs_node(s, slab_nid(slab), slab->objects);
return slab; return slab;
} }
...@@ -2093,6 +1997,75 @@ static inline void remove_partial(struct kmem_cache_node *n, ...@@ -2093,6 +1997,75 @@ static inline void remove_partial(struct kmem_cache_node *n,
n->nr_partial--; n->nr_partial--;
} }
/*
* Called only for kmem_cache_debug() caches instead of acquire_slab(), with a
* slab from the n->partial list. Remove only a single object from the slab, do
* the alloc_debug_processing() checks and leave the slab on the list, or move
* it to full list if it was the last free object.
*/
static void *alloc_single_from_partial(struct kmem_cache *s,
struct kmem_cache_node *n, struct slab *slab)
{
void *object;
lockdep_assert_held(&n->list_lock);
object = slab->freelist;
slab->freelist = get_freepointer(s, object);
slab->inuse++;
if (!alloc_debug_processing(s, slab, object)) {
remove_partial(n, slab);
return NULL;
}
if (slab->inuse == slab->objects) {
remove_partial(n, slab);
add_full(s, n, slab);
}
return object;
}
/*
* Called only for kmem_cache_debug() caches to allocate from a freshly
* allocated slab. Allocate a single object instead of whole freelist
* and put the slab to the partial (or full) list.
*/
static void *alloc_single_from_new_slab(struct kmem_cache *s,
struct slab *slab)
{
int nid = slab_nid(slab);
struct kmem_cache_node *n = get_node(s, nid);
unsigned long flags;
void *object;
object = slab->freelist;
slab->freelist = get_freepointer(s, object);
slab->inuse = 1;
if (!alloc_debug_processing(s, slab, object))
/*
* It's not really expected that this would fail on a
* freshly allocated slab, but a concurrent memory
* corruption in theory could cause that.
*/
return NULL;
spin_lock_irqsave(&n->list_lock, flags);
if (slab->inuse == slab->objects)
add_full(s, n, slab);
else
add_partial(n, slab, DEACTIVATE_TO_HEAD);
inc_slabs_node(s, nid, slab->objects);
spin_unlock_irqrestore(&n->list_lock, flags);
return object;
}
/* /*
* Remove slab from the partial list, freeze it and * Remove slab from the partial list, freeze it and
* return the pointer to the freelist. * return the pointer to the freelist.
...@@ -2173,6 +2146,13 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, ...@@ -2173,6 +2146,13 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
if (!pfmemalloc_match(slab, gfpflags)) if (!pfmemalloc_match(slab, gfpflags))
continue; continue;
if (kmem_cache_debug(s)) {
object = alloc_single_from_partial(s, n, slab);
if (object)
break;
continue;
}
t = acquire_slab(s, n, slab, object == NULL); t = acquire_slab(s, n, slab, object == NULL);
if (!t) if (!t)
break; break;
...@@ -2779,6 +2759,110 @@ static inline unsigned long node_nr_objs(struct kmem_cache_node *n) ...@@ -2779,6 +2759,110 @@ static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
{ {
return atomic_long_read(&n->total_objects); return atomic_long_read(&n->total_objects);
} }
/* Supports checking bulk free of a constructed freelist */
static noinline void free_debug_processing(
struct kmem_cache *s, struct slab *slab,
void *head, void *tail, int bulk_cnt,
unsigned long addr)
{
struct kmem_cache_node *n = get_node(s, slab_nid(slab));
struct slab *slab_free = NULL;
void *object = head;
int cnt = 0;
unsigned long flags;
bool checks_ok = false;
depot_stack_handle_t handle = 0;
if (s->flags & SLAB_STORE_USER)
handle = set_track_prepare();
spin_lock_irqsave(&n->list_lock, flags);
if (s->flags & SLAB_CONSISTENCY_CHECKS) {
if (!check_slab(s, slab))
goto out;
}
if (slab->inuse < bulk_cnt) {
slab_err(s, slab, "Slab has %d allocated objects but %d are to be freed\n",
slab->inuse, bulk_cnt);
goto out;
}
next_object:
if (++cnt > bulk_cnt)
goto out_cnt;
if (s->flags & SLAB_CONSISTENCY_CHECKS) {
if (!free_consistency_checks(s, slab, object, addr))
goto out;
}
if (s->flags & SLAB_STORE_USER)
set_track_update(s, object, TRACK_FREE, addr, handle);
trace(s, slab, object, 0);
/* Freepointer not overwritten by init_object(), SLAB_POISON moved it */
init_object(s, object, SLUB_RED_INACTIVE);
/* Reached end of constructed freelist yet? */
if (object != tail) {
object = get_freepointer(s, object);
goto next_object;
}
checks_ok = true;
out_cnt:
if (cnt != bulk_cnt)
slab_err(s, slab, "Bulk free expected %d objects but found %d\n",
bulk_cnt, cnt);
out:
if (checks_ok) {
void *prior = slab->freelist;
/* Perform the actual freeing while we still hold the locks */
slab->inuse -= cnt;
set_freepointer(s, tail, prior);
slab->freelist = head;
/* Do we need to remove the slab from full or partial list? */
if (!prior) {
remove_full(s, n, slab);
} else if (slab->inuse == 0 &&
n->nr_partial >= s->min_partial) {
remove_partial(n, slab);
stat(s, FREE_REMOVE_PARTIAL);
}
/* Do we need to discard the slab or add to partial list? */
if (slab->inuse == 0 && n->nr_partial >= s->min_partial) {
slab_free = slab;
} else if (!prior) {
add_partial(n, slab, DEACTIVATE_TO_TAIL);
stat(s, FREE_ADD_PARTIAL);
}
}
if (slab_free) {
/*
* Update the counters while still holding n->list_lock to
* prevent spurious validation warnings
*/
dec_slabs_node(s, slab_nid(slab_free), slab_free->objects);
}
spin_unlock_irqrestore(&n->list_lock, flags);
if (!checks_ok)
slab_fix(s, "Object at 0x%p not freed", object);
if (slab_free) {
stat(s, FREE_SLAB);
free_slab(s, slab_free);
}
}
#endif /* CONFIG_SLUB_DEBUG */ #endif /* CONFIG_SLUB_DEBUG */
#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS) #if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
...@@ -3027,36 +3111,52 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, ...@@ -3027,36 +3111,52 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
return NULL; return NULL;
} }
stat(s, ALLOC_SLAB);
if (kmem_cache_debug(s)) {
freelist = alloc_single_from_new_slab(s, slab);
if (unlikely(!freelist))
goto new_objects;
if (s->flags & SLAB_STORE_USER)
set_track(s, freelist, TRACK_ALLOC, addr);
return freelist;
}
/* /*
* No other reference to the slab yet so we can * No other reference to the slab yet so we can
* muck around with it freely without cmpxchg * muck around with it freely without cmpxchg
*/ */
freelist = slab->freelist; freelist = slab->freelist;
slab->freelist = NULL; slab->freelist = NULL;
slab->inuse = slab->objects;
slab->frozen = 1;
stat(s, ALLOC_SLAB); inc_slabs_node(s, slab_nid(slab), slab->objects);
check_new_slab: check_new_slab:
if (kmem_cache_debug(s)) { if (kmem_cache_debug(s)) {
if (!alloc_debug_processing(s, slab, freelist, addr)) { /*
/* Slab failed checks. Next slab needed */ * For debug caches here we had to go through
goto new_slab; * alloc_single_from_partial() so just store the tracking info
} else { * and return the object
/* */
* For debug case, we don't load freelist so that all if (s->flags & SLAB_STORE_USER)
* allocations go through alloc_debug_processing() set_track(s, freelist, TRACK_ALLOC, addr);
*/ return freelist;
goto return_single;
}
} }
if (unlikely(!pfmemalloc_match(slab, gfpflags))) if (unlikely(!pfmemalloc_match(slab, gfpflags))) {
/* /*
* For !pfmemalloc_match() case we don't load freelist so that * For !pfmemalloc_match() case we don't load freelist so that
* we don't make further mismatched allocations easier. * we don't make further mismatched allocations easier.
*/ */
goto return_single; deactivate_slab(s, slab, get_freepointer(s, freelist));
return freelist;
}
retry_load_slab: retry_load_slab:
...@@ -3080,11 +3180,6 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, ...@@ -3080,11 +3180,6 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
c->slab = slab; c->slab = slab;
goto load_freelist; goto load_freelist;
return_single:
deactivate_slab(s, slab, get_freepointer(s, freelist));
return freelist;
} }
/* /*
...@@ -3188,14 +3283,8 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, struct list_l ...@@ -3188,14 +3283,8 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, struct list_l
object = c->freelist; object = c->freelist;
slab = c->slab; slab = c->slab;
/*
* We cannot use the lockless fastpath on PREEMPT_RT because if a if (!USE_LOCKLESS_FAST_PATH() ||
* slowpath has taken the local_lock_irqsave(), it is not protected
* against a fast path operation in an irq handler. So we need to take
* the slow path which uses local_lock. It is still relatively fast if
* there is a suitable cpu freelist.
*/
if (IS_ENABLED(CONFIG_PREEMPT_RT) ||
unlikely(!object || !slab || !node_match(slab, node))) { unlikely(!object || !slab || !node_match(slab, node))) {
object = __slab_alloc(s, gfpflags, node, addr, c); object = __slab_alloc(s, gfpflags, node, addr, c);
} else { } else {
...@@ -3309,9 +3398,10 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab, ...@@ -3309,9 +3398,10 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
if (kfence_free(head)) if (kfence_free(head))
return; return;
if (kmem_cache_debug(s) && if (kmem_cache_debug(s)) {
!free_debug_processing(s, slab, head, tail, cnt, addr)) free_debug_processing(s, slab, head, tail, cnt, addr);
return; return;
}
do { do {
if (unlikely(n)) { if (unlikely(n)) {
...@@ -3431,6 +3521,7 @@ static __always_inline void do_slab_free(struct kmem_cache *s, ...@@ -3431,6 +3521,7 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
void *tail_obj = tail ? : head; void *tail_obj = tail ? : head;
struct kmem_cache_cpu *c; struct kmem_cache_cpu *c;
unsigned long tid; unsigned long tid;
void **freelist;
redo: redo:
/* /*
...@@ -3445,9 +3536,13 @@ static __always_inline void do_slab_free(struct kmem_cache *s, ...@@ -3445,9 +3536,13 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
/* Same with comment on barrier() in slab_alloc_node() */ /* Same with comment on barrier() in slab_alloc_node() */
barrier(); barrier();
if (likely(slab == c->slab)) { if (unlikely(slab != c->slab)) {
#ifndef CONFIG_PREEMPT_RT __slab_free(s, slab, head, tail_obj, cnt, addr);
void **freelist = READ_ONCE(c->freelist); return;
}
if (USE_LOCKLESS_FAST_PATH()) {
freelist = READ_ONCE(c->freelist);
set_freepointer(s, tail_obj, freelist); set_freepointer(s, tail_obj, freelist);
...@@ -3459,16 +3554,8 @@ static __always_inline void do_slab_free(struct kmem_cache *s, ...@@ -3459,16 +3554,8 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
note_cmpxchg_failure("slab_free", s, tid); note_cmpxchg_failure("slab_free", s, tid);
goto redo; goto redo;
} }
#else /* CONFIG_PREEMPT_RT */ } else {
/* /* Update the free list under the local lock */
* We cannot use the lockless fastpath on PREEMPT_RT because if
* a slowpath has taken the local_lock_irqsave(), it is not
* protected against a fast path operation in an irq handler. So
* we need to take the local_lock. We shouldn't simply defer to
* __slab_free() as that wouldn't use the cpu freelist at all.
*/
void **freelist;
local_lock(&s->cpu_slab->lock); local_lock(&s->cpu_slab->lock);
c = this_cpu_ptr(s->cpu_slab); c = this_cpu_ptr(s->cpu_slab);
if (unlikely(slab != c->slab)) { if (unlikely(slab != c->slab)) {
...@@ -3483,11 +3570,8 @@ static __always_inline void do_slab_free(struct kmem_cache *s, ...@@ -3483,11 +3570,8 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
c->tid = next_tid(tid); c->tid = next_tid(tid);
local_unlock(&s->cpu_slab->lock); local_unlock(&s->cpu_slab->lock);
#endif }
stat(s, FREE_FASTPATH); stat(s, FREE_FASTPATH);
} else
__slab_free(s, slab, head, tail_obj, cnt, addr);
} }
static __always_inline void slab_free(struct kmem_cache *s, struct slab *slab, static __always_inline void slab_free(struct kmem_cache *s, struct slab *slab,
...@@ -3896,6 +3980,7 @@ static void early_kmem_cache_node_alloc(int node) ...@@ -3896,6 +3980,7 @@ static void early_kmem_cache_node_alloc(int node)
slab = new_slab(kmem_cache_node, GFP_NOWAIT, node); slab = new_slab(kmem_cache_node, GFP_NOWAIT, node);
BUG_ON(!slab); BUG_ON(!slab);
inc_slabs_node(kmem_cache_node, slab_nid(slab), slab->objects);
if (slab_nid(slab) != node) { if (slab_nid(slab) != node) {
pr_err("SLUB: Unable to allocate memory from node %d\n", node); pr_err("SLUB: Unable to allocate memory from node %d\n", node);
pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n"); pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n");
...@@ -3910,7 +3995,6 @@ static void early_kmem_cache_node_alloc(int node) ...@@ -3910,7 +3995,6 @@ static void early_kmem_cache_node_alloc(int node)
n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL, false); n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL, false);
slab->freelist = get_freepointer(kmem_cache_node, n); slab->freelist = get_freepointer(kmem_cache_node, n);
slab->inuse = 1; slab->inuse = 1;
slab->frozen = 0;
kmem_cache_node->node[node] = n; kmem_cache_node->node[node] = n;
init_kmem_cache_node(n); init_kmem_cache_node(n);
inc_slabs_node(kmem_cache_node, node, slab->objects); inc_slabs_node(kmem_cache_node, node, slab->objects);
...@@ -4197,23 +4281,21 @@ static void list_slab_objects(struct kmem_cache *s, struct slab *slab, ...@@ -4197,23 +4281,21 @@ static void list_slab_objects(struct kmem_cache *s, struct slab *slab,
{ {
#ifdef CONFIG_SLUB_DEBUG #ifdef CONFIG_SLUB_DEBUG
void *addr = slab_address(slab); void *addr = slab_address(slab);
unsigned long flags;
unsigned long *map;
void *p; void *p;
slab_err(s, slab, text, s->name); slab_err(s, slab, text, s->name);
slab_lock(slab, &flags);
map = get_map(s, slab); spin_lock(&object_map_lock);
__fill_map(object_map, s, slab);
for_each_object(p, s, addr, slab->objects) { for_each_object(p, s, addr, slab->objects) {
if (!test_bit(__obj_to_index(s, addr, p), map)) { if (!test_bit(__obj_to_index(s, addr, p), object_map)) {
pr_err("Object 0x%p @offset=%tu\n", p, p - addr); pr_err("Object 0x%p @offset=%tu\n", p, p - addr);
print_tracking(s, p); print_tracking(s, p);
} }
} }
put_map(map); spin_unlock(&object_map_lock);
slab_unlock(slab, &flags);
#endif #endif
} }
...@@ -4462,6 +4544,7 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s) ...@@ -4462,6 +4544,7 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s)
if (free == slab->objects) { if (free == slab->objects) {
list_move(&slab->slab_list, &discard); list_move(&slab->slab_list, &discard);
n->nr_partial--; n->nr_partial--;
dec_slabs_node(s, node, slab->objects);
} else if (free <= SHRINK_PROMOTE_MAX) } else if (free <= SHRINK_PROMOTE_MAX)
list_move(&slab->slab_list, promote + free - 1); list_move(&slab->slab_list, promote + free - 1);
} }
...@@ -4477,7 +4560,7 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s) ...@@ -4477,7 +4560,7 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s)
/* Release empty slabs */ /* Release empty slabs */
list_for_each_entry_safe(slab, t, &discard, slab_list) list_for_each_entry_safe(slab, t, &discard, slab_list)
discard_slab(s, slab); free_slab(s, slab);
if (slabs_node(s, node)) if (slabs_node(s, node))
ret = 1; ret = 1;
...@@ -4779,12 +4862,9 @@ static void validate_slab(struct kmem_cache *s, struct slab *slab, ...@@ -4779,12 +4862,9 @@ static void validate_slab(struct kmem_cache *s, struct slab *slab,
{ {
void *p; void *p;
void *addr = slab_address(slab); void *addr = slab_address(slab);
unsigned long flags;
slab_lock(slab, &flags);
if (!check_slab(s, slab) || !on_freelist(s, slab, NULL)) if (!check_slab(s, slab) || !on_freelist(s, slab, NULL))
goto unlock; return;
/* Now we know that a valid freelist exists */ /* Now we know that a valid freelist exists */
__fill_map(obj_map, s, slab); __fill_map(obj_map, s, slab);
...@@ -4795,8 +4875,6 @@ static void validate_slab(struct kmem_cache *s, struct slab *slab, ...@@ -4795,8 +4875,6 @@ static void validate_slab(struct kmem_cache *s, struct slab *slab,
if (!check_object(s, slab, p, val)) if (!check_object(s, slab, p, val))
break; break;
} }
unlock:
slab_unlock(slab, &flags);
} }
static int validate_slab_node(struct kmem_cache *s, static int validate_slab_node(struct kmem_cache *s,
...@@ -5400,7 +5478,7 @@ static ssize_t validate_store(struct kmem_cache *s, ...@@ -5400,7 +5478,7 @@ static ssize_t validate_store(struct kmem_cache *s,
{ {
int ret = -EINVAL; int ret = -EINVAL;
if (buf[0] == '1') { if (buf[0] == '1' && kmem_cache_debug(s)) {
ret = validate_slab_cache(s); ret = validate_slab_cache(s);
if (ret >= 0) if (ret >= 0)
ret = length; ret = length;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment