Commit ca1a46d6 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'slab-for-5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab

Pull slab updates from Vlastimil Babka:

 - Separate struct slab from struct page - an offshot of the page folio
   work.

   Struct page fields used by slab allocators are moved from struct page
   to a new struct slab, that uses the same physical storage. Similar to
   struct folio, it always is a head page. This brings better type
   safety, separation of large kmalloc allocations from true slabs, and
   cleanup of related objcg code.

 - A SLAB_MERGE_DEFAULT config optimization.

* tag 'slab-for-5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab: (33 commits)
  mm/slob: Remove unnecessary page_mapcount_reset() function call
  bootmem: Use page->index instead of page->freelist
  zsmalloc: Stop using slab fields in struct page
  mm/slub: Define struct slab fields for CONFIG_SLUB_CPU_PARTIAL only when enabled
  mm/slub: Simplify struct slab slabs field definition
  mm/sl*b: Differentiate struct slab fields by sl*b implementations
  mm/kfence: Convert kfence_guarded_alloc() to struct slab
  mm/kasan: Convert to struct folio and struct slab
  mm/slob: Convert SLOB to use struct slab and struct folio
  mm/memcg: Convert slab objcgs from struct page to struct slab
  mm: Convert struct page to struct slab in functions used by other subsystems
  mm/slab: Finish struct page to struct slab conversion
  mm/slab: Convert most struct page to struct slab by spatch
  mm/slab: Convert kmem_getpages() and kmem_freepages() to struct slab
  mm/slub: Finish struct page to struct slab conversion
  mm/slub: Convert most struct page to struct slab by spatch
  mm/slub: Convert pfmemalloc_match() to take a struct slab
  mm/slub: Convert __free_slab() to use struct slab
  mm/slub: Convert alloc_slab_page() to return a struct slab
  mm/slub: Convert print_page_info() to print_slab_info()
  ...
parents d93aebbd 9d6c59c1
......@@ -981,7 +981,7 @@ static void __meminit free_pagetable(struct page *page, int order)
if (PageReserved(page)) {
__ClearPageReserved(page);
magic = (unsigned long)page->freelist;
magic = page->index;
if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
while (nr_pages--)
put_page_bootmem(page++);
......
......@@ -30,7 +30,7 @@ void put_page_bootmem(struct page *page);
*/
static inline void free_bootmem_page(struct page *page)
{
unsigned long magic = (unsigned long)page->freelist;
unsigned long magic = page->index;
/*
* The reserve_bootmem_region sets the reserved flag on bootmem
......
......@@ -9,6 +9,7 @@
struct kmem_cache;
struct page;
struct slab;
struct vm_struct;
struct task_struct;
......@@ -193,11 +194,11 @@ static __always_inline size_t kasan_metadata_size(struct kmem_cache *cache)
return 0;
}
void __kasan_poison_slab(struct page *page);
static __always_inline void kasan_poison_slab(struct page *page)
void __kasan_poison_slab(struct slab *slab);
static __always_inline void kasan_poison_slab(struct slab *slab)
{
if (kasan_enabled())
__kasan_poison_slab(page);
__kasan_poison_slab(slab);
}
void __kasan_unpoison_object_data(struct kmem_cache *cache, void *object);
......@@ -322,7 +323,7 @@ static inline void kasan_cache_create(struct kmem_cache *cache,
slab_flags_t *flags) {}
static inline void kasan_cache_create_kmalloc(struct kmem_cache *cache) {}
static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; }
static inline void kasan_poison_slab(struct page *page) {}
static inline void kasan_poison_slab(struct slab *slab) {}
static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
void *object) {}
static inline void kasan_poison_object_data(struct kmem_cache *cache,
......
......@@ -536,45 +536,6 @@ static inline bool folio_memcg_kmem(struct folio *folio)
return folio->memcg_data & MEMCG_DATA_KMEM;
}
/*
* page_objcgs - get the object cgroups vector associated with a page
* @page: a pointer to the page struct
*
* Returns a pointer to the object cgroups vector associated with the page,
* or NULL. This function assumes that the page is known to have an
* associated object cgroups vector. It's not safe to call this function
* against pages, which might have an associated memory cgroup: e.g.
* kernel stack pages.
*/
static inline struct obj_cgroup **page_objcgs(struct page *page)
{
unsigned long memcg_data = READ_ONCE(page->memcg_data);
VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS), page);
VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page);
return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
/*
* page_objcgs_check - get the object cgroups vector associated with a page
* @page: a pointer to the page struct
*
* Returns a pointer to the object cgroups vector associated with the page,
* or NULL. This function is safe to use if the page can be directly associated
* with a memory cgroup.
*/
static inline struct obj_cgroup **page_objcgs_check(struct page *page)
{
unsigned long memcg_data = READ_ONCE(page->memcg_data);
if (!memcg_data || !(memcg_data & MEMCG_DATA_OBJCGS))
return NULL;
VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page);
return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
#else
static inline bool folio_memcg_kmem(struct folio *folio)
......@@ -582,15 +543,6 @@ static inline bool folio_memcg_kmem(struct folio *folio)
return false;
}
static inline struct obj_cgroup **page_objcgs(struct page *page)
{
return NULL;
}
static inline struct obj_cgroup **page_objcgs_check(struct page *page)
{
return NULL;
}
#endif
static inline bool PageMemcgKmem(struct page *page)
......
......@@ -863,6 +863,13 @@ static inline struct page *virt_to_head_page(const void *x)
return compound_head(page);
}
static inline struct folio *virt_to_folio(const void *x)
{
struct page *page = virt_to_page(x);
return page_folio(page);
}
void __put_page(struct page *page);
void put_pages_list(struct list_head *pages);
......@@ -1753,6 +1760,11 @@ void page_address_init(void);
#define page_address_init() do { } while(0)
#endif
static inline void *folio_address(const struct folio *folio)
{
return page_address(&folio->page);
}
extern void *page_rmapping(struct page *page);
extern struct anon_vma *page_anon_vma(struct page *page);
extern pgoff_t __page_file_index(struct page *page);
......
......@@ -56,11 +56,11 @@ struct mem_cgroup;
* in each subpage, but you may need to restore some of their values
* afterwards.
*
* SLUB uses cmpxchg_double() to atomically update its freelist and
* counters. That requires that freelist & counters be adjacent and
* double-word aligned. We align all struct pages to double-word
* boundaries, and ensure that 'freelist' is aligned within the
* struct.
* SLUB uses cmpxchg_double() to atomically update its freelist and counters.
* That requires that freelist & counters in struct slab be adjacent and
* double-word aligned. Because struct slab currently just reinterprets the
* bits of struct page, we align all struct pages to double-word boundaries,
* and ensure that 'freelist' is aligned within struct slab.
*/
#ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE
#define _struct_page_alignment __aligned(2 * sizeof(unsigned long))
......
......@@ -189,14 +189,6 @@ bool kmem_valid_obj(void *object);
void kmem_dump_obj(void *object);
#endif
#ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR
void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
bool to_user);
#else
static inline void __check_heap_object(const void *ptr, unsigned long n,
struct page *page, bool to_user) { }
#endif
/*
* Some archs want to perform DMA into kmalloc caches and need a guaranteed
* alignment larger than the alignment of a 64-bit integer.
......
......@@ -87,11 +87,11 @@ struct kmem_cache {
struct kmem_cache_node *node[MAX_NUMNODES];
};
static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab,
void *x)
{
void *object = x - (x - page->s_mem) % cache->size;
void *last_object = page->s_mem + (cache->num - 1) * cache->size;
void *object = x - (x - slab->s_mem) % cache->size;
void *last_object = slab->s_mem + (cache->num - 1) * cache->size;
if (unlikely(object > last_object))
return last_object;
......@@ -106,16 +106,16 @@ static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
* reciprocal_divide(offset, cache->reciprocal_buffer_size)
*/
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
const struct page *page, void *obj)
const struct slab *slab, void *obj)
{
u32 offset = (obj - page->s_mem);
u32 offset = (obj - slab->s_mem);
return reciprocal_divide(offset, cache->reciprocal_buffer_size);
}
static inline int objs_per_slab_page(const struct kmem_cache *cache,
const struct page *page)
static inline int objs_per_slab(const struct kmem_cache *cache,
const struct slab *slab)
{
if (is_kfence_address(page_address(page)))
if (is_kfence_address(slab_address(slab)))
return 1;
return cache->num;
}
......
......@@ -48,9 +48,9 @@ enum stat_item {
struct kmem_cache_cpu {
void **freelist; /* Pointer to next available object */
unsigned long tid; /* Globally unique transaction id */
struct page *page; /* The slab from which we are allocating */
struct slab *slab; /* The slab from which we are allocating */
#ifdef CONFIG_SLUB_CPU_PARTIAL
struct page *partial; /* Partially allocated frozen slabs */
struct slab *partial; /* Partially allocated frozen slabs */
#endif
local_lock_t lock; /* Protects the fields above */
#ifdef CONFIG_SLUB_STATS
......@@ -99,8 +99,8 @@ struct kmem_cache {
#ifdef CONFIG_SLUB_CPU_PARTIAL
/* Number of per cpu partial objects to keep around */
unsigned int cpu_partial;
/* Number of per cpu partial pages to keep around */
unsigned int cpu_partial_pages;
/* Number of per cpu partial slabs to keep around */
unsigned int cpu_partial_slabs;
#endif
struct kmem_cache_order_objects oo;
......@@ -156,16 +156,13 @@ static inline void sysfs_slab_release(struct kmem_cache *s)
}
#endif
void object_err(struct kmem_cache *s, struct page *page,
u8 *object, char *reason);
void *fixup_red_left(struct kmem_cache *s, void *p);
static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab,
void *x) {
void *object = x - (x - page_address(page)) % cache->size;
void *last_object = page_address(page) +
(page->objects - 1) * cache->size;
void *object = x - (x - slab_address(slab)) % cache->size;
void *last_object = slab_address(slab) +
(slab->objects - 1) * cache->size;
void *result = (unlikely(object > last_object)) ? last_object : object;
result = fixup_red_left(cache, result);
......@@ -181,16 +178,16 @@ static inline unsigned int __obj_to_index(const struct kmem_cache *cache,
}
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
const struct page *page, void *obj)
const struct slab *slab, void *obj)
{
if (is_kfence_address(obj))
return 0;
return __obj_to_index(cache, page_address(page), obj);
return __obj_to_index(cache, slab_address(slab), obj);
}
static inline int objs_per_slab_page(const struct kmem_cache *cache,
const struct page *page)
static inline int objs_per_slab(const struct kmem_cache *cache,
const struct slab *slab)
{
return page->objects;
return slab->objects;
}
#endif /* _LINUX_SLUB_DEF_H */
......@@ -1933,6 +1933,7 @@ endchoice
config SLAB_MERGE_DEFAULT
bool "Allow slab caches to be merged"
default y
depends on SLAB || SLUB
help
For reduced kernel memory fragmentation, slab caches can be
merged when they share the same size and other characteristics.
......
......@@ -15,7 +15,7 @@
void get_page_bootmem(unsigned long info, struct page *page, unsigned long type)
{
page->freelist = (void *)type;
page->index = type;
SetPagePrivate(page);
set_page_private(page, info);
page_ref_inc(page);
......@@ -23,14 +23,13 @@ void get_page_bootmem(unsigned long info, struct page *page, unsigned long type)
void put_page_bootmem(struct page *page)
{
unsigned long type;
unsigned long type = page->index;
type = (unsigned long) page->freelist;
BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE);
if (page_ref_dec_return(page) == 1) {
page->freelist = NULL;
page->index = 0;
ClearPagePrivate(page);
set_page_private(page, 0);
INIT_LIST_HEAD(&page->lru);
......
......@@ -247,8 +247,9 @@ struct kasan_free_meta *kasan_get_free_meta(struct kmem_cache *cache,
}
#endif
void __kasan_poison_slab(struct page *page)
void __kasan_poison_slab(struct slab *slab)
{
struct page *page = slab_page(slab);
unsigned long i;
for (i = 0; i < compound_nr(page); i++)
......@@ -298,7 +299,7 @@ static inline u8 assign_tag(struct kmem_cache *cache,
/* For caches that either have a constructor or SLAB_TYPESAFE_BY_RCU: */
#ifdef CONFIG_SLAB
/* For SLAB assign tags based on the object index in the freelist. */
return (u8)obj_to_index(cache, virt_to_head_page(object), (void *)object);
return (u8)obj_to_index(cache, virt_to_slab(object), (void *)object);
#else
/*
* For SLUB assign a random tag during slab creation, otherwise reuse
......@@ -341,7 +342,7 @@ static inline bool ____kasan_slab_free(struct kmem_cache *cache, void *object,
if (is_kfence_address(object))
return false;
if (unlikely(nearest_obj(cache, virt_to_head_page(object), object) !=
if (unlikely(nearest_obj(cache, virt_to_slab(object), object) !=
object)) {
kasan_report_invalid_free(tagged_object, ip);
return true;
......@@ -401,9 +402,9 @@ void __kasan_kfree_large(void *ptr, unsigned long ip)
void __kasan_slab_free_mempool(void *ptr, unsigned long ip)
{
struct page *page;
struct folio *folio;
page = virt_to_head_page(ptr);
folio = virt_to_folio(ptr);
/*
* Even though this function is only called for kmem_cache_alloc and
......@@ -411,12 +412,14 @@ void __kasan_slab_free_mempool(void *ptr, unsigned long ip)
* !PageSlab() when the size provided to kmalloc is larger than
* KMALLOC_MAX_SIZE, and kmalloc falls back onto page_alloc.
*/
if (unlikely(!PageSlab(page))) {
if (unlikely(!folio_test_slab(folio))) {
if (____kasan_kfree_large(ptr, ip))
return;
kasan_poison(ptr, page_size(page), KASAN_FREE_PAGE, false);
kasan_poison(ptr, folio_size(folio), KASAN_FREE_PAGE, false);
} else {
____kasan_slab_free(page->slab_cache, ptr, ip, false, false);
struct slab *slab = folio_slab(folio);
____kasan_slab_free(slab->slab_cache, ptr, ip, false, false);
}
}
......@@ -560,7 +563,7 @@ void * __must_check __kasan_kmalloc_large(const void *ptr, size_t size,
void * __must_check __kasan_krealloc(const void *object, size_t size, gfp_t flags)
{
struct page *page;
struct slab *slab;
if (unlikely(object == ZERO_SIZE_PTR))
return (void *)object;
......@@ -572,13 +575,13 @@ void * __must_check __kasan_krealloc(const void *object, size_t size, gfp_t flag
*/
kasan_unpoison(object, size, false);
page = virt_to_head_page(object);
slab = virt_to_slab(object);
/* Piggy-back on kmalloc() instrumentation to poison the redzone. */
if (unlikely(!PageSlab(page)))
if (unlikely(!slab))
return __kasan_kmalloc_large(object, size, flags);
else
return ____kasan_kmalloc(page->slab_cache, object, size, flags);
return ____kasan_kmalloc(slab->slab_cache, object, size, flags);
}
bool __kasan_check_byte(const void *address, unsigned long ip)
......
......@@ -330,16 +330,16 @@ DEFINE_ASAN_SET_SHADOW(f8);
static void __kasan_record_aux_stack(void *addr, bool can_alloc)
{
struct page *page = kasan_addr_to_page(addr);
struct slab *slab = kasan_addr_to_slab(addr);
struct kmem_cache *cache;
struct kasan_alloc_meta *alloc_meta;
void *object;
if (is_kfence_address(addr) || !(page && PageSlab(page)))
if (is_kfence_address(addr) || !slab)
return;
cache = page->slab_cache;
object = nearest_obj(cache, page, addr);
cache = slab->slab_cache;
object = nearest_obj(cache, slab, addr);
alloc_meta = kasan_get_alloc_meta(cache, object);
if (!alloc_meta)
return;
......
......@@ -265,6 +265,7 @@ bool kasan_report(unsigned long addr, size_t size,
void kasan_report_invalid_free(void *object, unsigned long ip);
struct page *kasan_addr_to_page(const void *addr);
struct slab *kasan_addr_to_slab(const void *addr);
depot_stack_handle_t kasan_save_stack(gfp_t flags, bool can_alloc);
void kasan_set_track(struct kasan_track *track, gfp_t flags);
......
......@@ -117,7 +117,7 @@ static unsigned long quarantine_batch_size;
static struct kmem_cache *qlink_to_cache(struct qlist_node *qlink)
{
return virt_to_head_page(qlink)->slab_cache;
return virt_to_slab(qlink)->slab_cache;
}
static void *qlink_to_object(struct qlist_node *qlink, struct kmem_cache *cache)
......
......@@ -150,6 +150,14 @@ struct page *kasan_addr_to_page(const void *addr)
return NULL;
}
struct slab *kasan_addr_to_slab(const void *addr)
{
if ((addr >= (void *)PAGE_OFFSET) &&
(addr < high_memory))
return virt_to_slab(addr);
return NULL;
}
static void describe_object_addr(struct kmem_cache *cache, void *object,
const void *addr)
{
......@@ -248,8 +256,9 @@ static void print_address_description(void *addr, u8 tag)
pr_err("\n");
if (page && PageSlab(page)) {
struct kmem_cache *cache = page->slab_cache;
void *object = nearest_obj(cache, page, addr);
struct slab *slab = page_slab(page);
struct kmem_cache *cache = slab->slab_cache;
void *object = nearest_obj(cache, slab, addr);
describe_object(cache, object, addr, tag);
}
......
......@@ -12,7 +12,7 @@ const char *kasan_get_bug_type(struct kasan_access_info *info)
#ifdef CONFIG_KASAN_TAGS_IDENTIFY
struct kasan_alloc_meta *alloc_meta;
struct kmem_cache *cache;
struct page *page;
struct slab *slab;
const void *addr;
void *object;
u8 tag;
......@@ -20,10 +20,10 @@ const char *kasan_get_bug_type(struct kasan_access_info *info)
tag = get_tag(info->access_addr);
addr = kasan_reset_tag(info->access_addr);
page = kasan_addr_to_page(addr);
if (page && PageSlab(page)) {
cache = page->slab_cache;
object = nearest_obj(cache, page, (void *)addr);
slab = kasan_addr_to_slab(addr);
if (slab) {
cache = slab->slab_cache;
object = nearest_obj(cache, slab, (void *)addr);
alloc_meta = kasan_get_alloc_meta(cache, object);
if (alloc_meta) {
......
......@@ -360,7 +360,7 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g
{
struct kfence_metadata *meta = NULL;
unsigned long flags;
struct page *page;
struct slab *slab;
void *addr;
/* Try to obtain a free object. */
......@@ -424,13 +424,14 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g
alloc_covered_add(alloc_stack_hash, 1);
/* Set required struct page fields. */
page = virt_to_page(meta->addr);
page->slab_cache = cache;
if (IS_ENABLED(CONFIG_SLUB))
page->objects = 1;
if (IS_ENABLED(CONFIG_SLAB))
page->s_mem = addr;
/* Set required slab fields. */
slab = virt_to_slab((void *)meta->addr);
slab->slab_cache = cache;
#if defined(CONFIG_SLUB)
slab->objects = 1;
#elif defined(CONFIG_SLAB)
slab->s_mem = addr;
#endif
/* Memory initialization. */
for_each_canary(meta, set_canary_byte);
......
......@@ -282,7 +282,7 @@ static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocat
alloc = kmalloc(size, gfp);
if (is_kfence_address(alloc)) {
struct page *page = virt_to_head_page(alloc);
struct slab *slab = virt_to_slab(alloc);
struct kmem_cache *s = test_cache ?:
kmalloc_caches[kmalloc_type(GFP_KERNEL)][__kmalloc_index(size, false)];
......@@ -291,8 +291,8 @@ static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocat
* even for KFENCE objects; these are required so that
* memcg accounting works correctly.
*/
KUNIT_EXPECT_EQ(test, obj_to_index(s, page, alloc), 0U);
KUNIT_EXPECT_EQ(test, objs_per_slab_page(s, page), 1);
KUNIT_EXPECT_EQ(test, obj_to_index(s, slab, alloc), 0U);
KUNIT_EXPECT_EQ(test, objs_per_slab(s, slab), 1);
if (policy == ALLOCATE_ANY)
return alloc;
......
......@@ -2816,31 +2816,31 @@ static inline void mod_objcg_mlstate(struct obj_cgroup *objcg,
rcu_read_unlock();
}
int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
gfp_t gfp, bool new_page)
int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s,
gfp_t gfp, bool new_slab)
{
unsigned int objects = objs_per_slab_page(s, page);
unsigned int objects = objs_per_slab(s, slab);
unsigned long memcg_data;
void *vec;
gfp &= ~OBJCGS_CLEAR_MASK;
vec = kcalloc_node(objects, sizeof(struct obj_cgroup *), gfp,
page_to_nid(page));
slab_nid(slab));
if (!vec)
return -ENOMEM;
memcg_data = (unsigned long) vec | MEMCG_DATA_OBJCGS;
if (new_page) {
if (new_slab) {
/*
* If the slab page is brand new and nobody can yet access
* it's memcg_data, no synchronization is required and
* memcg_data can be simply assigned.
* If the slab is brand new and nobody can yet access its
* memcg_data, no synchronization is required and memcg_data can
* be simply assigned.
*/
page->memcg_data = memcg_data;
} else if (cmpxchg(&page->memcg_data, 0, memcg_data)) {
slab->memcg_data = memcg_data;
} else if (cmpxchg(&slab->memcg_data, 0, memcg_data)) {
/*
* If the slab page is already in use, somebody can allocate
* and assign obj_cgroups in parallel. In this case the existing
* If the slab is already in use, somebody can allocate and
* assign obj_cgroups in parallel. In this case the existing
* objcg vector should be reused.
*/
kfree(vec);
......@@ -2865,38 +2865,43 @@ int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
*/
struct mem_cgroup *mem_cgroup_from_obj(void *p)
{
struct page *page;
struct folio *folio;
if (mem_cgroup_disabled())
return NULL;
page = virt_to_head_page(p);
folio = virt_to_folio(p);
/*
* Slab objects are accounted individually, not per-page.
* Memcg membership data for each individual object is saved in
* the page->obj_cgroups.
* slab->memcg_data.
*/
if (page_objcgs_check(page)) {
struct obj_cgroup *objcg;
if (folio_test_slab(folio)) {
struct obj_cgroup **objcgs;
struct slab *slab;
unsigned int off;
off = obj_to_index(page->slab_cache, page, p);
objcg = page_objcgs(page)[off];
if (objcg)
return obj_cgroup_memcg(objcg);
slab = folio_slab(folio);
objcgs = slab_objcgs(slab);
if (!objcgs)
return NULL;
off = obj_to_index(slab->slab_cache, slab, p);
if (objcgs[off])
return obj_cgroup_memcg(objcgs[off]);
return NULL;
}
/*
* page_memcg_check() is used here, because page_has_obj_cgroups()
* check above could fail because the object cgroups vector wasn't set
* at that moment, but it can be set concurrently.
* page_memcg_check() is used here, because in theory we can encounter
* a folio where the slab flag has been cleared already, but
* slab->memcg_data has not been freed yet
* page_memcg_check(page) will guarantee that a proper memory
* cgroup pointer or NULL will be returned.
*/
return page_memcg_check(page);
return page_memcg_check(folio_page(folio, 0));
}
__always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)
......
......@@ -218,7 +218,7 @@ static void cache_reap(struct work_struct *unused);
static inline void fixup_objfreelist_debug(struct kmem_cache *cachep,
void **list);
static inline void fixup_slab_list(struct kmem_cache *cachep,
struct kmem_cache_node *n, struct page *page,
struct kmem_cache_node *n, struct slab *slab,
void **list);
static int slab_early_init = 1;
......@@ -372,10 +372,10 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp)
static int slab_max_order = SLAB_MAX_ORDER_LO;
static bool slab_max_order_set __initdata;
static inline void *index_to_obj(struct kmem_cache *cache, struct page *page,
unsigned int idx)
static inline void *index_to_obj(struct kmem_cache *cache,
const struct slab *slab, unsigned int idx)
{
return page->s_mem + cache->size * idx;
return slab->s_mem + cache->size * idx;
}
#define BOOT_CPUCACHE_ENTRIES 1
......@@ -550,17 +550,17 @@ static struct array_cache *alloc_arraycache(int node, int entries,
}
static noinline void cache_free_pfmemalloc(struct kmem_cache *cachep,
struct page *page, void *objp)
struct slab *slab, void *objp)
{
struct kmem_cache_node *n;
int page_node;
int slab_node;
LIST_HEAD(list);
page_node = page_to_nid(page);
n = get_node(cachep, page_node);
slab_node = slab_nid(slab);
n = get_node(cachep, slab_node);
spin_lock(&n->list_lock);
free_block(cachep, &objp, 1, page_node, &list);
free_block(cachep, &objp, 1, slab_node, &list);
spin_unlock(&n->list_lock);
slabs_destroy(cachep, &list);
......@@ -761,7 +761,7 @@ static void drain_alien_cache(struct kmem_cache *cachep,
}
static int __cache_free_alien(struct kmem_cache *cachep, void *objp,
int node, int page_node)
int node, int slab_node)
{
struct kmem_cache_node *n;
struct alien_cache *alien = NULL;
......@@ -770,21 +770,21 @@ static int __cache_free_alien(struct kmem_cache *cachep, void *objp,
n = get_node(cachep, node);
STATS_INC_NODEFREES(cachep);
if (n->alien && n->alien[page_node]) {
alien = n->alien[page_node];
if (n->alien && n->alien[slab_node]) {
alien = n->alien[slab_node];
ac = &alien->ac;
spin_lock(&alien->lock);
if (unlikely(ac->avail == ac->limit)) {
STATS_INC_ACOVERFLOW(cachep);
__drain_alien_cache(cachep, ac, page_node, &list);
__drain_alien_cache(cachep, ac, slab_node, &list);
}
__free_one(ac, objp);
spin_unlock(&alien->lock);
slabs_destroy(cachep, &list);
} else {
n = get_node(cachep, page_node);
n = get_node(cachep, slab_node);
spin_lock(&n->list_lock);
free_block(cachep, &objp, 1, page_node, &list);
free_block(cachep, &objp, 1, slab_node, &list);
spin_unlock(&n->list_lock);
slabs_destroy(cachep, &list);
}
......@@ -793,16 +793,16 @@ static int __cache_free_alien(struct kmem_cache *cachep, void *objp,
static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
{
int page_node = page_to_nid(virt_to_page(objp));
int slab_node = slab_nid(virt_to_slab(objp));
int node = numa_mem_id();
/*
* Make sure we are not freeing a object from another node to the array
* cache on this cpu.
*/
if (likely(node == page_node))
if (likely(node == slab_node))
return 0;
return __cache_free_alien(cachep, objp, node, page_node);
return __cache_free_alien(cachep, objp, node, slab_node);
}
/*
......@@ -1367,57 +1367,60 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
* did not request dmaable memory, we might get it, but that
* would be relatively rare and ignorable.
*/
static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
static struct slab *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
int nodeid)
{
struct page *page;
struct folio *folio;
struct slab *slab;
flags |= cachep->allocflags;
page = __alloc_pages_node(nodeid, flags, cachep->gfporder);
if (!page) {
folio = (struct folio *) __alloc_pages_node(nodeid, flags, cachep->gfporder);
if (!folio) {
slab_out_of_memory(cachep, flags, nodeid);
return NULL;
}
account_slab_page(page, cachep->gfporder, cachep, flags);
__SetPageSlab(page);
slab = folio_slab(folio);
account_slab(slab, cachep->gfporder, cachep, flags);
__folio_set_slab(folio);
/* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */
if (sk_memalloc_socks() && page_is_pfmemalloc(page))
SetPageSlabPfmemalloc(page);
if (sk_memalloc_socks() && page_is_pfmemalloc(folio_page(folio, 0)))
slab_set_pfmemalloc(slab);
return page;
return slab;
}
/*
* Interface to system's page release.
*/
static void kmem_freepages(struct kmem_cache *cachep, struct page *page)
static void kmem_freepages(struct kmem_cache *cachep, struct slab *slab)
{
int order = cachep->gfporder;
struct folio *folio = slab_folio(slab);
BUG_ON(!PageSlab(page));
__ClearPageSlabPfmemalloc(page);
__ClearPageSlab(page);
page_mapcount_reset(page);
/* In union with page->mapping where page allocator expects NULL */
page->slab_cache = NULL;
BUG_ON(!folio_test_slab(folio));
__slab_clear_pfmemalloc(slab);
__folio_clear_slab(folio);
page_mapcount_reset(folio_page(folio, 0));
folio->mapping = NULL;
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += 1 << order;
unaccount_slab_page(page, order, cachep);
__free_pages(page, order);
unaccount_slab(slab, order, cachep);
__free_pages(folio_page(folio, 0), order);
}
static void kmem_rcu_free(struct rcu_head *head)
{
struct kmem_cache *cachep;
struct page *page;
struct slab *slab;
page = container_of(head, struct page, rcu_head);
cachep = page->slab_cache;
slab = container_of(head, struct slab, rcu_head);
cachep = slab->slab_cache;
kmem_freepages(cachep, page);
kmem_freepages(cachep, slab);
}
#if DEBUG
......@@ -1553,18 +1556,18 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
/* Print some data about the neighboring objects, if they
* exist:
*/
struct page *page = virt_to_head_page(objp);
struct slab *slab = virt_to_slab(objp);
unsigned int objnr;
objnr = obj_to_index(cachep, page, objp);
objnr = obj_to_index(cachep, slab, objp);
if (objnr) {
objp = index_to_obj(cachep, page, objnr - 1);
objp = index_to_obj(cachep, slab, objnr - 1);
realobj = (char *)objp + obj_offset(cachep);
pr_err("Prev obj: start=%px, len=%d\n", realobj, size);
print_objinfo(cachep, objp, 2);
}
if (objnr + 1 < cachep->num) {
objp = index_to_obj(cachep, page, objnr + 1);
objp = index_to_obj(cachep, slab, objnr + 1);
realobj = (char *)objp + obj_offset(cachep);
pr_err("Next obj: start=%px, len=%d\n", realobj, size);
print_objinfo(cachep, objp, 2);
......@@ -1575,17 +1578,17 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
#if DEBUG
static void slab_destroy_debugcheck(struct kmem_cache *cachep,
struct page *page)
struct slab *slab)
{
int i;
if (OBJFREELIST_SLAB(cachep) && cachep->flags & SLAB_POISON) {
poison_obj(cachep, page->freelist - obj_offset(cachep),
poison_obj(cachep, slab->freelist - obj_offset(cachep),
POISON_FREE);
}
for (i = 0; i < cachep->num; i++) {
void *objp = index_to_obj(cachep, page, i);
void *objp = index_to_obj(cachep, slab, i);
if (cachep->flags & SLAB_POISON) {
check_poison_obj(cachep, objp);
......@@ -1601,7 +1604,7 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep,
}
#else
static void slab_destroy_debugcheck(struct kmem_cache *cachep,
struct page *page)
struct slab *slab)
{
}
#endif
......@@ -1609,22 +1612,22 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep,
/**
* slab_destroy - destroy and release all objects in a slab
* @cachep: cache pointer being destroyed
* @page: page pointer being destroyed
* @slab: slab being destroyed
*
* Destroy all the objs in a slab page, and release the mem back to the system.
* Before calling the slab page must have been unlinked from the cache. The
* Destroy all the objs in a slab, and release the mem back to the system.
* Before calling the slab must have been unlinked from the cache. The
* kmem_cache_node ->list_lock is not held/needed.
*/
static void slab_destroy(struct kmem_cache *cachep, struct page *page)
static void slab_destroy(struct kmem_cache *cachep, struct slab *slab)
{
void *freelist;
freelist = page->freelist;
slab_destroy_debugcheck(cachep, page);
freelist = slab->freelist;
slab_destroy_debugcheck(cachep, slab);
if (unlikely(cachep->flags & SLAB_TYPESAFE_BY_RCU))
call_rcu(&page->rcu_head, kmem_rcu_free);
call_rcu(&slab->rcu_head, kmem_rcu_free);
else
kmem_freepages(cachep, page);
kmem_freepages(cachep, slab);
/*
* From now on, we don't use freelist
......@@ -1640,11 +1643,11 @@ static void slab_destroy(struct kmem_cache *cachep, struct page *page)
*/
static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list)
{
struct page *page, *n;
struct slab *slab, *n;
list_for_each_entry_safe(page, n, list, slab_list) {
list_del(&page->slab_list);
slab_destroy(cachep, page);
list_for_each_entry_safe(slab, n, list, slab_list) {
list_del(&slab->slab_list);
slab_destroy(cachep, slab);
}
}
......@@ -2194,7 +2197,7 @@ static int drain_freelist(struct kmem_cache *cache,
{
struct list_head *p;
int nr_freed;
struct page *page;
struct slab *slab;
nr_freed = 0;
while (nr_freed < tofree && !list_empty(&n->slabs_free)) {
......@@ -2206,8 +2209,8 @@ static int drain_freelist(struct kmem_cache *cache,
goto out;
}
page = list_entry(p, struct page, slab_list);
list_del(&page->slab_list);
slab = list_entry(p, struct slab, slab_list);
list_del(&slab->slab_list);
n->free_slabs--;
n->total_slabs--;
/*
......@@ -2216,7 +2219,7 @@ static int drain_freelist(struct kmem_cache *cache,
*/
n->free_objects -= cache->num;
spin_unlock_irq(&n->list_lock);
slab_destroy(cache, page);
slab_destroy(cache, slab);
nr_freed++;
}
out:
......@@ -2291,14 +2294,14 @@ void __kmem_cache_release(struct kmem_cache *cachep)
* which are all initialized during kmem_cache_init().
*/
static void *alloc_slabmgmt(struct kmem_cache *cachep,
struct page *page, int colour_off,
struct slab *slab, int colour_off,
gfp_t local_flags, int nodeid)
{
void *freelist;
void *addr = page_address(page);
void *addr = slab_address(slab);
page->s_mem = addr + colour_off;
page->active = 0;
slab->s_mem = addr + colour_off;
slab->active = 0;
if (OBJFREELIST_SLAB(cachep))
freelist = NULL;
......@@ -2315,24 +2318,24 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep,
return freelist;
}
static inline freelist_idx_t get_free_obj(struct page *page, unsigned int idx)
static inline freelist_idx_t get_free_obj(struct slab *slab, unsigned int idx)
{
return ((freelist_idx_t *)page->freelist)[idx];
return ((freelist_idx_t *) slab->freelist)[idx];
}
static inline void set_free_obj(struct page *page,
static inline void set_free_obj(struct slab *slab,
unsigned int idx, freelist_idx_t val)
{
((freelist_idx_t *)(page->freelist))[idx] = val;
((freelist_idx_t *)(slab->freelist))[idx] = val;
}
static void cache_init_objs_debug(struct kmem_cache *cachep, struct page *page)
static void cache_init_objs_debug(struct kmem_cache *cachep, struct slab *slab)
{
#if DEBUG
int i;
for (i = 0; i < cachep->num; i++) {
void *objp = index_to_obj(cachep, page, i);
void *objp = index_to_obj(cachep, slab, i);
if (cachep->flags & SLAB_STORE_USER)
*dbg_userword(cachep, objp) = NULL;
......@@ -2416,17 +2419,17 @@ static freelist_idx_t next_random_slot(union freelist_init_state *state)
}
/* Swap two freelist entries */
static void swap_free_obj(struct page *page, unsigned int a, unsigned int b)
static void swap_free_obj(struct slab *slab, unsigned int a, unsigned int b)
{
swap(((freelist_idx_t *)page->freelist)[a],
((freelist_idx_t *)page->freelist)[b]);
swap(((freelist_idx_t *) slab->freelist)[a],
((freelist_idx_t *) slab->freelist)[b]);
}
/*
* Shuffle the freelist initialization state based on pre-computed lists.
* return true if the list was successfully shuffled, false otherwise.
*/
static bool shuffle_freelist(struct kmem_cache *cachep, struct page *page)
static bool shuffle_freelist(struct kmem_cache *cachep, struct slab *slab)
{
unsigned int objfreelist = 0, i, rand, count = cachep->num;
union freelist_init_state state;
......@@ -2443,7 +2446,7 @@ static bool shuffle_freelist(struct kmem_cache *cachep, struct page *page)
objfreelist = count - 1;
else
objfreelist = next_random_slot(&state);
page->freelist = index_to_obj(cachep, page, objfreelist) +
slab->freelist = index_to_obj(cachep, slab, objfreelist) +
obj_offset(cachep);
count--;
}
......@@ -2454,51 +2457,51 @@ static bool shuffle_freelist(struct kmem_cache *cachep, struct page *page)
*/
if (!precomputed) {
for (i = 0; i < count; i++)
set_free_obj(page, i, i);
set_free_obj(slab, i, i);
/* Fisher-Yates shuffle */
for (i = count - 1; i > 0; i--) {
rand = prandom_u32_state(&state.rnd_state);
rand %= (i + 1);
swap_free_obj(page, i, rand);
swap_free_obj(slab, i, rand);
}
} else {
for (i = 0; i < count; i++)
set_free_obj(page, i, next_random_slot(&state));
set_free_obj(slab, i, next_random_slot(&state));
}
if (OBJFREELIST_SLAB(cachep))
set_free_obj(page, cachep->num - 1, objfreelist);
set_free_obj(slab, cachep->num - 1, objfreelist);
return true;
}
#else
static inline bool shuffle_freelist(struct kmem_cache *cachep,
struct page *page)
struct slab *slab)
{
return false;
}
#endif /* CONFIG_SLAB_FREELIST_RANDOM */
static void cache_init_objs(struct kmem_cache *cachep,
struct page *page)
struct slab *slab)
{
int i;
void *objp;
bool shuffled;
cache_init_objs_debug(cachep, page);
cache_init_objs_debug(cachep, slab);
/* Try to randomize the freelist if enabled */
shuffled = shuffle_freelist(cachep, page);
shuffled = shuffle_freelist(cachep, slab);
if (!shuffled && OBJFREELIST_SLAB(cachep)) {
page->freelist = index_to_obj(cachep, page, cachep->num - 1) +
slab->freelist = index_to_obj(cachep, slab, cachep->num - 1) +
obj_offset(cachep);
}
for (i = 0; i < cachep->num; i++) {
objp = index_to_obj(cachep, page, i);
objp = index_to_obj(cachep, slab, i);
objp = kasan_init_slab_obj(cachep, objp);
/* constructor could break poison info */
......@@ -2509,68 +2512,56 @@ static void cache_init_objs(struct kmem_cache *cachep,
}
if (!shuffled)
set_free_obj(page, i, i);
set_free_obj(slab, i, i);
}
}
static void *slab_get_obj(struct kmem_cache *cachep, struct page *page)
static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slab)
{
void *objp;
objp = index_to_obj(cachep, page, get_free_obj(page, page->active));
page->active++;
objp = index_to_obj(cachep, slab, get_free_obj(slab, slab->active));
slab->active++;
return objp;
}
static void slab_put_obj(struct kmem_cache *cachep,
struct page *page, void *objp)
struct slab *slab, void *objp)
{
unsigned int objnr = obj_to_index(cachep, page, objp);
unsigned int objnr = obj_to_index(cachep, slab, objp);
#if DEBUG
unsigned int i;
/* Verify double free bug */
for (i = page->active; i < cachep->num; i++) {
if (get_free_obj(page, i) == objnr) {
for (i = slab->active; i < cachep->num; i++) {
if (get_free_obj(slab, i) == objnr) {
pr_err("slab: double free detected in cache '%s', objp %px\n",
cachep->name, objp);
BUG();
}
}
#endif
page->active--;
if (!page->freelist)
page->freelist = objp + obj_offset(cachep);
set_free_obj(page, page->active, objnr);
}
slab->active--;
if (!slab->freelist)
slab->freelist = objp + obj_offset(cachep);
/*
* Map pages beginning at addr to the given cache and slab. This is required
* for the slab allocator to be able to lookup the cache and slab of a
* virtual address for kfree, ksize, and slab debugging.
*/
static void slab_map_pages(struct kmem_cache *cache, struct page *page,
void *freelist)
{
page->slab_cache = cache;
page->freelist = freelist;
set_free_obj(slab, slab->active, objnr);
}
/*
* Grow (by 1) the number of slabs within a cache. This is called by
* kmem_cache_alloc() when there are no active objs left in a cache.
*/
static struct page *cache_grow_begin(struct kmem_cache *cachep,
static struct slab *cache_grow_begin(struct kmem_cache *cachep,
gfp_t flags, int nodeid)
{
void *freelist;
size_t offset;
gfp_t local_flags;
int page_node;
int slab_node;
struct kmem_cache_node *n;
struct page *page;
struct slab *slab;
/*
* Be lazy and only check for valid flags here, keeping it out of the
......@@ -2590,12 +2581,12 @@ static struct page *cache_grow_begin(struct kmem_cache *cachep,
* Get mem for the objs. Attempt to allocate a physical page from
* 'nodeid'.
*/
page = kmem_getpages(cachep, local_flags, nodeid);
if (!page)
slab = kmem_getpages(cachep, local_flags, nodeid);
if (!slab)
goto failed;
page_node = page_to_nid(page);
n = get_node(cachep, page_node);
slab_node = slab_nid(slab);
n = get_node(cachep, slab_node);
/* Get colour for the slab, and cal the next value. */
n->colour_next++;
......@@ -2613,54 +2604,55 @@ static struct page *cache_grow_begin(struct kmem_cache *cachep,
* page_address() in the latter returns a non-tagged pointer,
* as it should be for slab pages.
*/
kasan_poison_slab(page);
kasan_poison_slab(slab);
/* Get slab management. */
freelist = alloc_slabmgmt(cachep, page, offset,
local_flags & ~GFP_CONSTRAINT_MASK, page_node);
freelist = alloc_slabmgmt(cachep, slab, offset,
local_flags & ~GFP_CONSTRAINT_MASK, slab_node);
if (OFF_SLAB(cachep) && !freelist)
goto opps1;
slab_map_pages(cachep, page, freelist);
slab->slab_cache = cachep;
slab->freelist = freelist;
cache_init_objs(cachep, page);
cache_init_objs(cachep, slab);
if (gfpflags_allow_blocking(local_flags))
local_irq_disable();
return page;
return slab;
opps1:
kmem_freepages(cachep, page);
kmem_freepages(cachep, slab);
failed:
if (gfpflags_allow_blocking(local_flags))
local_irq_disable();
return NULL;
}
static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
static void cache_grow_end(struct kmem_cache *cachep, struct slab *slab)
{
struct kmem_cache_node *n;
void *list = NULL;
check_irq_off();
if (!page)
if (!slab)
return;
INIT_LIST_HEAD(&page->slab_list);
n = get_node(cachep, page_to_nid(page));
INIT_LIST_HEAD(&slab->slab_list);
n = get_node(cachep, slab_nid(slab));
spin_lock(&n->list_lock);
n->total_slabs++;
if (!page->active) {
list_add_tail(&page->slab_list, &n->slabs_free);
if (!slab->active) {
list_add_tail(&slab->slab_list, &n->slabs_free);
n->free_slabs++;
} else
fixup_slab_list(cachep, n, page, &list);
fixup_slab_list(cachep, n, slab, &list);
STATS_INC_GROWN(cachep);
n->free_objects += cachep->num - page->active;
n->free_objects += cachep->num - slab->active;
spin_unlock(&n->list_lock);
fixup_objfreelist_debug(cachep, &list);
......@@ -2708,13 +2700,13 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
unsigned long caller)
{
unsigned int objnr;
struct page *page;
struct slab *slab;
BUG_ON(virt_to_cache(objp) != cachep);
objp -= obj_offset(cachep);
kfree_debugcheck(objp);
page = virt_to_head_page(objp);
slab = virt_to_slab(objp);
if (cachep->flags & SLAB_RED_ZONE) {
verify_redzone_free(cachep, objp);
......@@ -2724,10 +2716,10 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
if (cachep->flags & SLAB_STORE_USER)
*dbg_userword(cachep, objp) = (void *)caller;
objnr = obj_to_index(cachep, page, objp);
objnr = obj_to_index(cachep, slab, objp);
BUG_ON(objnr >= cachep->num);
BUG_ON(objp != index_to_obj(cachep, page, objnr));
BUG_ON(objp != index_to_obj(cachep, slab, objnr));
if (cachep->flags & SLAB_POISON) {
poison_obj(cachep, objp, POISON_FREE);
......@@ -2757,97 +2749,97 @@ static inline void fixup_objfreelist_debug(struct kmem_cache *cachep,
}
static inline void fixup_slab_list(struct kmem_cache *cachep,
struct kmem_cache_node *n, struct page *page,
struct kmem_cache_node *n, struct slab *slab,
void **list)
{
/* move slabp to correct slabp list: */
list_del(&page->slab_list);
if (page->active == cachep->num) {
list_add(&page->slab_list, &n->slabs_full);
list_del(&slab->slab_list);
if (slab->active == cachep->num) {
list_add(&slab->slab_list, &n->slabs_full);
if (OBJFREELIST_SLAB(cachep)) {
#if DEBUG
/* Poisoning will be done without holding the lock */
if (cachep->flags & SLAB_POISON) {
void **objp = page->freelist;
void **objp = slab->freelist;
*objp = *list;
*list = objp;
}
#endif
page->freelist = NULL;
slab->freelist = NULL;
}
} else
list_add(&page->slab_list, &n->slabs_partial);
list_add(&slab->slab_list, &n->slabs_partial);
}
/* Try to find non-pfmemalloc slab if needed */
static noinline struct page *get_valid_first_slab(struct kmem_cache_node *n,
struct page *page, bool pfmemalloc)
static noinline struct slab *get_valid_first_slab(struct kmem_cache_node *n,
struct slab *slab, bool pfmemalloc)
{
if (!page)
if (!slab)
return NULL;
if (pfmemalloc)
return page;
return slab;
if (!PageSlabPfmemalloc(page))
return page;
if (!slab_test_pfmemalloc(slab))
return slab;
/* No need to keep pfmemalloc slab if we have enough free objects */
if (n->free_objects > n->free_limit) {
ClearPageSlabPfmemalloc(page);
return page;
slab_clear_pfmemalloc(slab);
return slab;
}
/* Move pfmemalloc slab to the end of list to speed up next search */
list_del(&page->slab_list);
if (!page->active) {
list_add_tail(&page->slab_list, &n->slabs_free);
list_del(&slab->slab_list);
if (!slab->active) {
list_add_tail(&slab->slab_list, &n->slabs_free);
n->free_slabs++;
} else
list_add_tail(&page->slab_list, &n->slabs_partial);
list_add_tail(&slab->slab_list, &n->slabs_partial);
list_for_each_entry(page, &n->slabs_partial, slab_list) {
if (!PageSlabPfmemalloc(page))
return page;
list_for_each_entry(slab, &n->slabs_partial, slab_list) {
if (!slab_test_pfmemalloc(slab))
return slab;
}
n->free_touched = 1;
list_for_each_entry(page, &n->slabs_free, slab_list) {
if (!PageSlabPfmemalloc(page)) {
list_for_each_entry(slab, &n->slabs_free, slab_list) {
if (!slab_test_pfmemalloc(slab)) {
n->free_slabs--;
return page;
return slab;
}
}
return NULL;
}
static struct page *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc)
static struct slab *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc)
{
struct page *page;
struct slab *slab;
assert_spin_locked(&n->list_lock);
page = list_first_entry_or_null(&n->slabs_partial, struct page,
slab = list_first_entry_or_null(&n->slabs_partial, struct slab,
slab_list);
if (!page) {
if (!slab) {
n->free_touched = 1;
page = list_first_entry_or_null(&n->slabs_free, struct page,
slab = list_first_entry_or_null(&n->slabs_free, struct slab,
slab_list);
if (page)
if (slab)
n->free_slabs--;
}
if (sk_memalloc_socks())
page = get_valid_first_slab(n, page, pfmemalloc);
slab = get_valid_first_slab(n, slab, pfmemalloc);
return page;
return slab;
}
static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep,
struct kmem_cache_node *n, gfp_t flags)
{
struct page *page;
struct slab *slab;
void *obj;
void *list = NULL;
......@@ -2855,16 +2847,16 @@ static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep,
return NULL;
spin_lock(&n->list_lock);
page = get_first_slab(n, true);
if (!page) {
slab = get_first_slab(n, true);
if (!slab) {
spin_unlock(&n->list_lock);
return NULL;
}
obj = slab_get_obj(cachep, page);
obj = slab_get_obj(cachep, slab);
n->free_objects--;
fixup_slab_list(cachep, n, page, &list);
fixup_slab_list(cachep, n, slab, &list);
spin_unlock(&n->list_lock);
fixup_objfreelist_debug(cachep, &list);
......@@ -2877,20 +2869,20 @@ static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep,
* or cache_grow_end() for new slab
*/
static __always_inline int alloc_block(struct kmem_cache *cachep,
struct array_cache *ac, struct page *page, int batchcount)
struct array_cache *ac, struct slab *slab, int batchcount)
{
/*
* There must be at least one object available for
* allocation.
*/
BUG_ON(page->active >= cachep->num);
BUG_ON(slab->active >= cachep->num);
while (page->active < cachep->num && batchcount--) {
while (slab->active < cachep->num && batchcount--) {
STATS_INC_ALLOCED(cachep);
STATS_INC_ACTIVE(cachep);
STATS_SET_HIGH(cachep);
ac->entry[ac->avail++] = slab_get_obj(cachep, page);
ac->entry[ac->avail++] = slab_get_obj(cachep, slab);
}
return batchcount;
......@@ -2903,7 +2895,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
struct array_cache *ac, *shared;
int node;
void *list = NULL;
struct page *page;
struct slab *slab;
check_irq_off();
node = numa_mem_id();
......@@ -2936,14 +2928,14 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
while (batchcount > 0) {
/* Get slab alloc is to come from. */
page = get_first_slab(n, false);
if (!page)
slab = get_first_slab(n, false);
if (!slab)
goto must_grow;
check_spinlock_acquired(cachep);
batchcount = alloc_block(cachep, ac, page, batchcount);
fixup_slab_list(cachep, n, page, &list);
batchcount = alloc_block(cachep, ac, slab, batchcount);
fixup_slab_list(cachep, n, slab, &list);
}
must_grow:
......@@ -2962,16 +2954,16 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
return obj;
}
page = cache_grow_begin(cachep, gfp_exact_node(flags), node);
slab = cache_grow_begin(cachep, gfp_exact_node(flags), node);
/*
* cache_grow_begin() can reenable interrupts,
* then ac could change.
*/
ac = cpu_cache_get(cachep);
if (!ac->avail && page)
alloc_block(cachep, ac, page, batchcount);
cache_grow_end(cachep, page);
if (!ac->avail && slab)
alloc_block(cachep, ac, slab, batchcount);
cache_grow_end(cachep, slab);
if (!ac->avail)
return NULL;
......@@ -3101,7 +3093,7 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
struct zone *zone;
enum zone_type highest_zoneidx = gfp_zone(flags);
void *obj = NULL;
struct page *page;
struct slab *slab;
int nid;
unsigned int cpuset_mems_cookie;
......@@ -3137,10 +3129,10 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
* We may trigger various forms of reclaim on the allowed
* set and go into memory reserves if necessary.
*/
page = cache_grow_begin(cache, flags, numa_mem_id());
cache_grow_end(cache, page);
if (page) {
nid = page_to_nid(page);
slab = cache_grow_begin(cache, flags, numa_mem_id());
cache_grow_end(cache, slab);
if (slab) {
nid = slab_nid(slab);
obj = ____cache_alloc_node(cache,
gfp_exact_node(flags), nid);
......@@ -3164,7 +3156,7 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
int nodeid)
{
struct page *page;
struct slab *slab;
struct kmem_cache_node *n;
void *obj = NULL;
void *list = NULL;
......@@ -3175,8 +3167,8 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
check_irq_off();
spin_lock(&n->list_lock);
page = get_first_slab(n, false);
if (!page)
slab = get_first_slab(n, false);
if (!slab)
goto must_grow;
check_spinlock_acquired_node(cachep, nodeid);
......@@ -3185,12 +3177,12 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
STATS_INC_ACTIVE(cachep);
STATS_SET_HIGH(cachep);
BUG_ON(page->active == cachep->num);
BUG_ON(slab->active == cachep->num);
obj = slab_get_obj(cachep, page);
obj = slab_get_obj(cachep, slab);
n->free_objects--;
fixup_slab_list(cachep, n, page, &list);
fixup_slab_list(cachep, n, slab, &list);
spin_unlock(&n->list_lock);
fixup_objfreelist_debug(cachep, &list);
......@@ -3198,12 +3190,12 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
must_grow:
spin_unlock(&n->list_lock);
page = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid);
if (page) {
slab = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid);
if (slab) {
/* This slab isn't counted yet so don't update free_objects */
obj = slab_get_obj(cachep, page);
obj = slab_get_obj(cachep, slab);
}
cache_grow_end(cachep, page);
cache_grow_end(cachep, slab);
return obj ? obj : fallback_alloc(cachep, flags);
}
......@@ -3333,40 +3325,40 @@ static void free_block(struct kmem_cache *cachep, void **objpp,
{
int i;
struct kmem_cache_node *n = get_node(cachep, node);
struct page *page;
struct slab *slab;
n->free_objects += nr_objects;
for (i = 0; i < nr_objects; i++) {
void *objp;
struct page *page;
struct slab *slab;
objp = objpp[i];
page = virt_to_head_page(objp);
list_del(&page->slab_list);
slab = virt_to_slab(objp);
list_del(&slab->slab_list);
check_spinlock_acquired_node(cachep, node);
slab_put_obj(cachep, page, objp);
slab_put_obj(cachep, slab, objp);
STATS_DEC_ACTIVE(cachep);
/* fixup slab chains */
if (page->active == 0) {
list_add(&page->slab_list, &n->slabs_free);
if (slab->active == 0) {
list_add(&slab->slab_list, &n->slabs_free);
n->free_slabs++;
} else {
/* Unconditionally move a slab to the end of the
* partial list on free - maximum time for the
* other objects to be freed, too.
*/
list_add_tail(&page->slab_list, &n->slabs_partial);
list_add_tail(&slab->slab_list, &n->slabs_partial);
}
}
while (n->free_objects > n->free_limit && !list_empty(&n->slabs_free)) {
n->free_objects -= cachep->num;
page = list_last_entry(&n->slabs_free, struct page, slab_list);
list_move(&page->slab_list, list);
slab = list_last_entry(&n->slabs_free, struct slab, slab_list);
list_move(&slab->slab_list, list);
n->free_slabs--;
n->total_slabs--;
}
......@@ -3402,10 +3394,10 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
#if STATS
{
int i = 0;
struct page *page;
struct slab *slab;
list_for_each_entry(page, &n->slabs_free, slab_list) {
BUG_ON(page->active);
list_for_each_entry(slab, &n->slabs_free, slab_list) {
BUG_ON(slab->active);
i++;
}
......@@ -3481,10 +3473,10 @@ void ___cache_free(struct kmem_cache *cachep, void *objp,
}
if (sk_memalloc_socks()) {
struct page *page = virt_to_head_page(objp);
struct slab *slab = virt_to_slab(objp);
if (unlikely(PageSlabPfmemalloc(page))) {
cache_free_pfmemalloc(cachep, page, objp);
if (unlikely(slab_test_pfmemalloc(slab))) {
cache_free_pfmemalloc(cachep, slab, objp);
return;
}
}
......@@ -3657,21 +3649,21 @@ EXPORT_SYMBOL(__kmalloc_node_track_caller);
#endif /* CONFIG_NUMA */
#ifdef CONFIG_PRINTK
void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
{
struct kmem_cache *cachep;
unsigned int objnr;
void *objp;
kpp->kp_ptr = object;
kpp->kp_page = page;
cachep = page->slab_cache;
kpp->kp_slab = slab;
cachep = slab->slab_cache;
kpp->kp_slab_cache = cachep;
objp = object - obj_offset(cachep);
kpp->kp_data_offset = obj_offset(cachep);
page = virt_to_head_page(objp);
objnr = obj_to_index(cachep, page, objp);
objp = index_to_obj(cachep, page, objnr);
slab = virt_to_slab(objp);
objnr = obj_to_index(cachep, slab, objp);
objp = index_to_obj(cachep, slab, objnr);
kpp->kp_objp = objp;
if (DEBUG && cachep->flags & SLAB_STORE_USER)
kpp->kp_ret = *dbg_userword(cachep, objp);
......@@ -4177,8 +4169,8 @@ ssize_t slabinfo_write(struct file *file, const char __user *buffer,
* Returns NULL if check passes, otherwise const char * to name of cache
* to indicate an error.
*/
void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
bool to_user)
void __check_heap_object(const void *ptr, unsigned long n,
const struct slab *slab, bool to_user)
{
struct kmem_cache *cachep;
unsigned int objnr;
......@@ -4187,15 +4179,15 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
ptr = kasan_reset_tag(ptr);
/* Find and validate object. */
cachep = page->slab_cache;
objnr = obj_to_index(cachep, page, (void *)ptr);
cachep = slab->slab_cache;
objnr = obj_to_index(cachep, slab, (void *)ptr);
BUG_ON(objnr >= cachep->num);
/* Find offset within object. */
if (is_kfence_address(ptr))
offset = ptr - kfence_object_start(ptr);
else
offset = ptr - index_to_obj(cachep, page, objnr) - obj_offset(cachep);
offset = ptr - index_to_obj(cachep, slab, objnr) - obj_offset(cachep);
/* Allow address range falling entirely within usercopy region. */
if (offset >= cachep->useroffset &&
......
......@@ -5,6 +5,197 @@
* Internal slab definitions
*/
/* Reuses the bits in struct page */
struct slab {
unsigned long __page_flags;
#if defined(CONFIG_SLAB)
union {
struct list_head slab_list;
struct rcu_head rcu_head;
};
struct kmem_cache *slab_cache;
void *freelist; /* array of free object indexes */
void *s_mem; /* first object */
unsigned int active;
#elif defined(CONFIG_SLUB)
union {
struct list_head slab_list;
struct rcu_head rcu_head;
#ifdef CONFIG_SLUB_CPU_PARTIAL
struct {
struct slab *next;
int slabs; /* Nr of slabs left */
};
#endif
};
struct kmem_cache *slab_cache;
/* Double-word boundary */
void *freelist; /* first free object */
union {
unsigned long counters;
struct {
unsigned inuse:16;
unsigned objects:15;
unsigned frozen:1;
};
};
unsigned int __unused;
#elif defined(CONFIG_SLOB)
struct list_head slab_list;
void *__unused_1;
void *freelist; /* first free block */
long units;
unsigned int __unused_2;
#else
#error "Unexpected slab allocator configured"
#endif
atomic_t __page_refcount;
#ifdef CONFIG_MEMCG
unsigned long memcg_data;
#endif
};
#define SLAB_MATCH(pg, sl) \
static_assert(offsetof(struct page, pg) == offsetof(struct slab, sl))
SLAB_MATCH(flags, __page_flags);
SLAB_MATCH(compound_head, slab_list); /* Ensure bit 0 is clear */
SLAB_MATCH(slab_list, slab_list);
#ifndef CONFIG_SLOB
SLAB_MATCH(rcu_head, rcu_head);
SLAB_MATCH(slab_cache, slab_cache);
#endif
#ifdef CONFIG_SLAB
SLAB_MATCH(s_mem, s_mem);
SLAB_MATCH(active, active);
#endif
SLAB_MATCH(_refcount, __page_refcount);
#ifdef CONFIG_MEMCG
SLAB_MATCH(memcg_data, memcg_data);
#endif
#undef SLAB_MATCH
static_assert(sizeof(struct slab) <= sizeof(struct page));
/**
* folio_slab - Converts from folio to slab.
* @folio: The folio.
*
* Currently struct slab is a different representation of a folio where
* folio_test_slab() is true.
*
* Return: The slab which contains this folio.
*/
#define folio_slab(folio) (_Generic((folio), \
const struct folio *: (const struct slab *)(folio), \
struct folio *: (struct slab *)(folio)))
/**
* slab_folio - The folio allocated for a slab
* @slab: The slab.
*
* Slabs are allocated as folios that contain the individual objects and are
* using some fields in the first struct page of the folio - those fields are
* now accessed by struct slab. It is occasionally necessary to convert back to
* a folio in order to communicate with the rest of the mm. Please use this
* helper function instead of casting yourself, as the implementation may change
* in the future.
*/
#define slab_folio(s) (_Generic((s), \
const struct slab *: (const struct folio *)s, \
struct slab *: (struct folio *)s))
/**
* page_slab - Converts from first struct page to slab.
* @p: The first (either head of compound or single) page of slab.
*
* A temporary wrapper to convert struct page to struct slab in situations where
* we know the page is the compound head, or single order-0 page.
*
* Long-term ideally everything would work with struct slab directly or go
* through folio to struct slab.
*
* Return: The slab which contains this page
*/
#define page_slab(p) (_Generic((p), \
const struct page *: (const struct slab *)(p), \
struct page *: (struct slab *)(p)))
/**
* slab_page - The first struct page allocated for a slab
* @slab: The slab.
*
* A convenience wrapper for converting slab to the first struct page of the
* underlying folio, to communicate with code not yet converted to folio or
* struct slab.
*/
#define slab_page(s) folio_page(slab_folio(s), 0)
/*
* If network-based swap is enabled, sl*b must keep track of whether pages
* were allocated from pfmemalloc reserves.
*/
static inline bool slab_test_pfmemalloc(const struct slab *slab)
{
return folio_test_active((struct folio *)slab_folio(slab));
}
static inline void slab_set_pfmemalloc(struct slab *slab)
{
folio_set_active(slab_folio(slab));
}
static inline void slab_clear_pfmemalloc(struct slab *slab)
{
folio_clear_active(slab_folio(slab));
}
static inline void __slab_clear_pfmemalloc(struct slab *slab)
{
__folio_clear_active(slab_folio(slab));
}
static inline void *slab_address(const struct slab *slab)
{
return folio_address(slab_folio(slab));
}
static inline int slab_nid(const struct slab *slab)
{
return folio_nid(slab_folio(slab));
}
static inline pg_data_t *slab_pgdat(const struct slab *slab)
{
return folio_pgdat(slab_folio(slab));
}
static inline struct slab *virt_to_slab(const void *addr)
{
struct folio *folio = virt_to_folio(addr);
if (!folio_test_slab(folio))
return NULL;
return folio_slab(folio);
}
static inline int slab_order(const struct slab *slab)
{
return folio_order((struct folio *)slab_folio(slab));
}
static inline size_t slab_size(const struct slab *slab)
{
return PAGE_SIZE << slab_order(slab);
}
#ifdef CONFIG_SLOB
/*
* Common fields provided in kmem_cache by all slab allocators
......@@ -245,15 +436,33 @@ static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t fla
}
#ifdef CONFIG_MEMCG_KMEM
int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
gfp_t gfp, bool new_page);
/*
* slab_objcgs - get the object cgroups vector associated with a slab
* @slab: a pointer to the slab struct
*
* Returns a pointer to the object cgroups vector associated with the slab,
* or NULL if no such vector has been associated yet.
*/
static inline struct obj_cgroup **slab_objcgs(struct slab *slab)
{
unsigned long memcg_data = READ_ONCE(slab->memcg_data);
VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS),
slab_page(slab));
VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, slab_page(slab));
return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s,
gfp_t gfp, bool new_slab);
void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
enum node_stat_item idx, int nr);
static inline void memcg_free_page_obj_cgroups(struct page *page)
static inline void memcg_free_slab_cgroups(struct slab *slab)
{
kfree(page_objcgs(page));
page->memcg_data = 0;
kfree(slab_objcgs(slab));
slab->memcg_data = 0;
}
static inline size_t obj_full_size(struct kmem_cache *s)
......@@ -298,7 +507,7 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
gfp_t flags, size_t size,
void **p)
{
struct page *page;
struct slab *slab;
unsigned long off;
size_t i;
......@@ -307,19 +516,19 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
for (i = 0; i < size; i++) {
if (likely(p[i])) {
page = virt_to_head_page(p[i]);
slab = virt_to_slab(p[i]);
if (!page_objcgs(page) &&
memcg_alloc_page_obj_cgroups(page, s, flags,
if (!slab_objcgs(slab) &&
memcg_alloc_slab_cgroups(slab, s, flags,
false)) {
obj_cgroup_uncharge(objcg, obj_full_size(s));
continue;
}
off = obj_to_index(s, page, p[i]);
off = obj_to_index(s, slab, p[i]);
obj_cgroup_get(objcg);
page_objcgs(page)[off] = objcg;
mod_objcg_state(objcg, page_pgdat(page),
slab_objcgs(slab)[off] = objcg;
mod_objcg_state(objcg, slab_pgdat(slab),
cache_vmstat_idx(s), obj_full_size(s));
} else {
obj_cgroup_uncharge(objcg, obj_full_size(s));
......@@ -334,7 +543,7 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig,
struct kmem_cache *s;
struct obj_cgroup **objcgs;
struct obj_cgroup *objcg;
struct page *page;
struct slab *slab;
unsigned int off;
int i;
......@@ -345,43 +554,52 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig,
if (unlikely(!p[i]))
continue;
page = virt_to_head_page(p[i]);
objcgs = page_objcgs_check(page);
slab = virt_to_slab(p[i]);
/* we could be given a kmalloc_large() object, skip those */
if (!slab)
continue;
objcgs = slab_objcgs(slab);
if (!objcgs)
continue;
if (!s_orig)
s = page->slab_cache;
s = slab->slab_cache;
else
s = s_orig;
off = obj_to_index(s, page, p[i]);
off = obj_to_index(s, slab, p[i]);
objcg = objcgs[off];
if (!objcg)
continue;
objcgs[off] = NULL;
obj_cgroup_uncharge(objcg, obj_full_size(s));
mod_objcg_state(objcg, page_pgdat(page), cache_vmstat_idx(s),
mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s),
-obj_full_size(s));
obj_cgroup_put(objcg);
}
}
#else /* CONFIG_MEMCG_KMEM */
static inline struct obj_cgroup **slab_objcgs(struct slab *slab)
{
return NULL;
}
static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr)
{
return NULL;
}
static inline int memcg_alloc_page_obj_cgroups(struct page *page,
static inline int memcg_alloc_slab_cgroups(struct slab *slab,
struct kmem_cache *s, gfp_t gfp,
bool new_page)
bool new_slab)
{
return 0;
}
static inline void memcg_free_page_obj_cgroups(struct page *page)
static inline void memcg_free_slab_cgroups(struct slab *slab)
{
}
......@@ -405,35 +623,35 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s,
}
#endif /* CONFIG_MEMCG_KMEM */
#ifndef CONFIG_SLOB
static inline struct kmem_cache *virt_to_cache(const void *obj)
{
struct page *page;
struct slab *slab;
page = virt_to_head_page(obj);
if (WARN_ONCE(!PageSlab(page), "%s: Object is not a Slab page!\n",
slab = virt_to_slab(obj);
if (WARN_ONCE(!slab, "%s: Object is not a Slab page!\n",
__func__))
return NULL;
return page->slab_cache;
return slab->slab_cache;
}
static __always_inline void account_slab_page(struct page *page, int order,
struct kmem_cache *s,
gfp_t gfp)
static __always_inline void account_slab(struct slab *slab, int order,
struct kmem_cache *s, gfp_t gfp)
{
if (memcg_kmem_enabled() && (s->flags & SLAB_ACCOUNT))
memcg_alloc_page_obj_cgroups(page, s, gfp, true);
memcg_alloc_slab_cgroups(slab, s, gfp, true);
mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
PAGE_SIZE << order);
}
static __always_inline void unaccount_slab_page(struct page *page, int order,
struct kmem_cache *s)
static __always_inline void unaccount_slab(struct slab *slab, int order,
struct kmem_cache *s)
{
if (memcg_kmem_enabled())
memcg_free_page_obj_cgroups(page);
memcg_free_slab_cgroups(slab);
mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
-(PAGE_SIZE << order));
}
......@@ -452,6 +670,7 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
print_tracking(cachep, x);
return cachep;
}
#endif /* CONFIG_SLOB */
static inline size_t slab_ksize(const struct kmem_cache *s)
{
......@@ -635,7 +854,7 @@ static inline void debugfs_slab_release(struct kmem_cache *s) { }
#define KS_ADDRS_COUNT 16
struct kmem_obj_info {
void *kp_ptr;
struct page *kp_page;
struct slab *kp_slab;
void *kp_objp;
unsigned long kp_data_offset;
struct kmem_cache *kp_slab_cache;
......@@ -643,7 +862,18 @@ struct kmem_obj_info {
void *kp_stack[KS_ADDRS_COUNT];
void *kp_free_stack[KS_ADDRS_COUNT];
};
void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page);
void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab);
#endif
#ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR
void __check_heap_object(const void *ptr, unsigned long n,
const struct slab *slab, bool to_user);
#else
static inline
void __check_heap_object(const void *ptr, unsigned long n,
const struct slab *slab, bool to_user)
{
}
#endif
#endif /* MM_SLAB_H */
......@@ -550,13 +550,13 @@ bool slab_is_available(void)
*/
bool kmem_valid_obj(void *object)
{
struct page *page;
struct folio *folio;
/* Some arches consider ZERO_SIZE_PTR to be a valid address. */
if (object < (void *)PAGE_SIZE || !virt_addr_valid(object))
return false;
page = virt_to_head_page(object);
return PageSlab(page);
folio = virt_to_folio(object);
return folio_test_slab(folio);
}
EXPORT_SYMBOL_GPL(kmem_valid_obj);
......@@ -579,18 +579,18 @@ void kmem_dump_obj(void *object)
{
char *cp = IS_ENABLED(CONFIG_MMU) ? "" : "/vmalloc";
int i;
struct page *page;
struct slab *slab;
unsigned long ptroffset;
struct kmem_obj_info kp = { };
if (WARN_ON_ONCE(!virt_addr_valid(object)))
return;
page = virt_to_head_page(object);
if (WARN_ON_ONCE(!PageSlab(page))) {
slab = virt_to_slab(object);
if (WARN_ON_ONCE(!slab)) {
pr_cont(" non-slab memory.\n");
return;
}
kmem_obj_info(&kp, object, page);
kmem_obj_info(&kp, object, slab);
if (kp.kp_slab_cache)
pr_cont(" slab%s %s", cp, kp.kp_slab_cache->name);
else
......
......@@ -30,7 +30,7 @@
* If kmalloc is asked for objects of PAGE_SIZE or larger, it calls
* alloc_pages() directly, allocating compound pages so the page order
* does not have to be separately tracked.
* These objects are detected in kfree() because PageSlab()
* These objects are detected in kfree() because folio_test_slab()
* is false for them.
*
* SLAB is emulated on top of SLOB by simply calling constructors and
......@@ -105,21 +105,21 @@ static LIST_HEAD(free_slob_large);
/*
* slob_page_free: true for pages on free_slob_pages list.
*/
static inline int slob_page_free(struct page *sp)
static inline int slob_page_free(struct slab *slab)
{
return PageSlobFree(sp);
return PageSlobFree(slab_page(slab));
}
static void set_slob_page_free(struct page *sp, struct list_head *list)
static void set_slob_page_free(struct slab *slab, struct list_head *list)
{
list_add(&sp->slab_list, list);
__SetPageSlobFree(sp);
list_add(&slab->slab_list, list);
__SetPageSlobFree(slab_page(slab));
}
static inline void clear_slob_page_free(struct page *sp)
static inline void clear_slob_page_free(struct slab *slab)
{
list_del(&sp->slab_list);
__ClearPageSlobFree(sp);
list_del(&slab->slab_list);
__ClearPageSlobFree(slab_page(slab));
}
#define SLOB_UNIT sizeof(slob_t)
......@@ -234,7 +234,7 @@ static void slob_free_pages(void *b, int order)
* freelist, in this case @page_removed_from_list will be set to
* true (set to false otherwise).
*/
static void *slob_page_alloc(struct page *sp, size_t size, int align,
static void *slob_page_alloc(struct slab *sp, size_t size, int align,
int align_offset, bool *page_removed_from_list)
{
slob_t *prev, *cur, *aligned = NULL;
......@@ -301,7 +301,8 @@ static void *slob_page_alloc(struct page *sp, size_t size, int align,
static void *slob_alloc(size_t size, gfp_t gfp, int align, int node,
int align_offset)
{
struct page *sp;
struct folio *folio;
struct slab *sp;
struct list_head *slob_list;
slob_t *b = NULL;
unsigned long flags;
......@@ -323,7 +324,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node,
* If there's a node specification, search for a partial
* page with a matching node id in the freelist.
*/
if (node != NUMA_NO_NODE && page_to_nid(sp) != node)
if (node != NUMA_NO_NODE && slab_nid(sp) != node)
continue;
#endif
/* Enough room on this page? */
......@@ -358,8 +359,9 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node,
b = slob_new_pages(gfp & ~__GFP_ZERO, 0, node);
if (!b)
return NULL;
sp = virt_to_page(b);
__SetPageSlab(sp);
folio = virt_to_folio(b);
__folio_set_slab(folio);
sp = folio_slab(folio);
spin_lock_irqsave(&slob_lock, flags);
sp->units = SLOB_UNITS(PAGE_SIZE);
......@@ -381,7 +383,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node,
*/
static void slob_free(void *block, int size)
{
struct page *sp;
struct slab *sp;
slob_t *prev, *next, *b = (slob_t *)block;
slobidx_t units;
unsigned long flags;
......@@ -391,7 +393,7 @@ static void slob_free(void *block, int size)
return;
BUG_ON(!size);
sp = virt_to_page(block);
sp = virt_to_slab(block);
units = SLOB_UNITS(size);
spin_lock_irqsave(&slob_lock, flags);
......@@ -401,8 +403,7 @@ static void slob_free(void *block, int size)
if (slob_page_free(sp))
clear_slob_page_free(sp);
spin_unlock_irqrestore(&slob_lock, flags);
__ClearPageSlab(sp);
page_mapcount_reset(sp);
__folio_clear_slab(slab_folio(sp));
slob_free_pages(b, 0);
return;
}
......@@ -462,10 +463,10 @@ static void slob_free(void *block, int size)
}
#ifdef CONFIG_PRINTK
void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
{
kpp->kp_ptr = object;
kpp->kp_page = page;
kpp->kp_slab = slab;
}
#endif
......@@ -544,7 +545,7 @@ EXPORT_SYMBOL(__kmalloc_node_track_caller);
void kfree(const void *block)
{
struct page *sp;
struct folio *sp;
trace_kfree(_RET_IP_, block);
......@@ -552,16 +553,17 @@ void kfree(const void *block)
return;
kmemleak_free(block);
sp = virt_to_page(block);
if (PageSlab(sp)) {
sp = virt_to_folio(block);
if (folio_test_slab(sp)) {
int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
unsigned int *m = (unsigned int *)(block - align);
slob_free(m, *m + align);
} else {
unsigned int order = compound_order(sp);
mod_node_page_state(page_pgdat(sp), NR_SLAB_UNRECLAIMABLE_B,
unsigned int order = folio_order(sp);
mod_node_page_state(folio_pgdat(sp), NR_SLAB_UNRECLAIMABLE_B,
-(PAGE_SIZE << order));
__free_pages(sp, order);
__free_pages(folio_page(sp, 0), order);
}
}
......@@ -570,7 +572,7 @@ EXPORT_SYMBOL(kfree);
/* can't use ksize for kmem_cache_alloc memory, only kmalloc */
size_t __ksize(const void *block)
{
struct page *sp;
struct folio *folio;
int align;
unsigned int *m;
......@@ -578,9 +580,9 @@ size_t __ksize(const void *block)
if (unlikely(block == ZERO_SIZE_PTR))
return 0;
sp = virt_to_page(block);
if (unlikely(!PageSlab(sp)))
return page_size(sp);
folio = virt_to_folio(block);
if (unlikely(!folio_test_slab(folio)))
return folio_size(folio);
align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
m = (unsigned int *)(block - align);
......
......@@ -48,7 +48,7 @@
* 1. slab_mutex (Global Mutex)
* 2. node->list_lock (Spinlock)
* 3. kmem_cache->cpu_slab->lock (Local lock)
* 4. slab_lock(page) (Only on some arches or for debugging)
* 4. slab_lock(slab) (Only on some arches or for debugging)
* 5. object_map_lock (Only for debugging)
*
* slab_mutex
......@@ -64,19 +64,19 @@
*
* The slab_lock is only used for debugging and on arches that do not
* have the ability to do a cmpxchg_double. It only protects:
* A. page->freelist -> List of object free in a page
* B. page->inuse -> Number of objects in use
* C. page->objects -> Number of objects in page
* D. page->frozen -> frozen state
* A. slab->freelist -> List of free objects in a slab
* B. slab->inuse -> Number of objects in use
* C. slab->objects -> Number of objects in slab
* D. slab->frozen -> frozen state
*
* Frozen slabs
*
* If a slab is frozen then it is exempt from list management. It is not
* on any list except per cpu partial list. The processor that froze the
* slab is the one who can perform list operations on the page. Other
* slab is the one who can perform list operations on the slab. Other
* processors may put objects onto the freelist but the processor that
* froze the slab is the only one that can retrieve the objects from the
* page's freelist.
* slab's freelist.
*
* list_lock
*
......@@ -135,7 +135,7 @@
* minimal so we rely on the page allocators per cpu caches for
* fast frees and allocs.
*
* page->frozen The slab is frozen and exempt from list processing.
* slab->frozen The slab is frozen and exempt from list processing.
* This means that the slab is dedicated to a purpose
* such as satisfying allocations for a specific
* processor. Objects may be freed in the slab while
......@@ -250,7 +250,7 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
#define OO_SHIFT 16
#define OO_MASK ((1 << OO_SHIFT) - 1)
#define MAX_OBJS_PER_PAGE 32767 /* since page.objects is u15 */
#define MAX_OBJS_PER_PAGE 32767 /* since slab.objects is u15 */
/* Internal SLUB flags */
/* Poison object */
......@@ -417,18 +417,18 @@ static inline unsigned int oo_objects(struct kmem_cache_order_objects x)
#ifdef CONFIG_SLUB_CPU_PARTIAL
static void slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects)
{
unsigned int nr_pages;
unsigned int nr_slabs;
s->cpu_partial = nr_objects;
/*
* We take the number of objects but actually limit the number of
* pages on the per cpu partial list, in order to limit excessive
* growth of the list. For simplicity we assume that the pages will
* slabs on the per cpu partial list, in order to limit excessive
* growth of the list. For simplicity we assume that the slabs will
* be half-full.
*/
nr_pages = DIV_ROUND_UP(nr_objects * 2, oo_objects(s->oo));
s->cpu_partial_pages = nr_pages;
nr_slabs = DIV_ROUND_UP(nr_objects * 2, oo_objects(s->oo));
s->cpu_partial_slabs = nr_slabs;
}
#else
static inline void
......@@ -440,28 +440,32 @@ slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects)
/*
* Per slab locking using the pagelock
*/
static __always_inline void __slab_lock(struct page *page)
static __always_inline void __slab_lock(struct slab *slab)
{
struct page *page = slab_page(slab);
VM_BUG_ON_PAGE(PageTail(page), page);
bit_spin_lock(PG_locked, &page->flags);
}
static __always_inline void __slab_unlock(struct page *page)
static __always_inline void __slab_unlock(struct slab *slab)
{
struct page *page = slab_page(slab);
VM_BUG_ON_PAGE(PageTail(page), page);
__bit_spin_unlock(PG_locked, &page->flags);
}
static __always_inline void slab_lock(struct page *page, unsigned long *flags)
static __always_inline void slab_lock(struct slab *slab, unsigned long *flags)
{
if (IS_ENABLED(CONFIG_PREEMPT_RT))
local_irq_save(*flags);
__slab_lock(page);
__slab_lock(slab);
}
static __always_inline void slab_unlock(struct page *page, unsigned long *flags)
static __always_inline void slab_unlock(struct slab *slab, unsigned long *flags)
{
__slab_unlock(page);
__slab_unlock(slab);
if (IS_ENABLED(CONFIG_PREEMPT_RT))
local_irq_restore(*flags);
}
......@@ -471,7 +475,7 @@ static __always_inline void slab_unlock(struct page *page, unsigned long *flags)
* by an _irqsave() lock variant. Except on PREEMPT_RT where locks are different
* so we disable interrupts as part of slab_[un]lock().
*/
static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab,
void *freelist_old, unsigned long counters_old,
void *freelist_new, unsigned long counters_new,
const char *n)
......@@ -481,7 +485,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
if (s->flags & __CMPXCHG_DOUBLE) {
if (cmpxchg_double(&page->freelist, &page->counters,
if (cmpxchg_double(&slab->freelist, &slab->counters,
freelist_old, counters_old,
freelist_new, counters_new))
return true;
......@@ -491,15 +495,15 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
/* init to 0 to prevent spurious warnings */
unsigned long flags = 0;
slab_lock(page, &flags);
if (page->freelist == freelist_old &&
page->counters == counters_old) {
page->freelist = freelist_new;
page->counters = counters_new;
slab_unlock(page, &flags);
slab_lock(slab, &flags);
if (slab->freelist == freelist_old &&
slab->counters == counters_old) {
slab->freelist = freelist_new;
slab->counters = counters_new;
slab_unlock(slab, &flags);
return true;
}
slab_unlock(page, &flags);
slab_unlock(slab, &flags);
}
cpu_relax();
......@@ -512,7 +516,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
return false;
}
static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab,
void *freelist_old, unsigned long counters_old,
void *freelist_new, unsigned long counters_new,
const char *n)
......@@ -520,7 +524,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
if (s->flags & __CMPXCHG_DOUBLE) {
if (cmpxchg_double(&page->freelist, &page->counters,
if (cmpxchg_double(&slab->freelist, &slab->counters,
freelist_old, counters_old,
freelist_new, counters_new))
return true;
......@@ -530,16 +534,16 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
unsigned long flags;
local_irq_save(flags);
__slab_lock(page);
if (page->freelist == freelist_old &&
page->counters == counters_old) {
page->freelist = freelist_new;
page->counters = counters_new;
__slab_unlock(page);
__slab_lock(slab);
if (slab->freelist == freelist_old &&
slab->counters == counters_old) {
slab->freelist = freelist_new;
slab->counters = counters_new;
__slab_unlock(slab);
local_irq_restore(flags);
return true;
}
__slab_unlock(page);
__slab_unlock(slab);
local_irq_restore(flags);
}
......@@ -558,14 +562,14 @@ static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)];
static DEFINE_RAW_SPINLOCK(object_map_lock);
static void __fill_map(unsigned long *obj_map, struct kmem_cache *s,
struct page *page)
struct slab *slab)
{
void *addr = page_address(page);
void *addr = slab_address(slab);
void *p;
bitmap_zero(obj_map, page->objects);
bitmap_zero(obj_map, slab->objects);
for (p = page->freelist; p; p = get_freepointer(s, p))
for (p = slab->freelist; p; p = get_freepointer(s, p))
set_bit(__obj_to_index(s, addr, p), obj_map);
}
......@@ -590,19 +594,19 @@ static inline bool slab_add_kunit_errors(void) { return false; }
#endif
/*
* Determine a map of object in use on a page.
* Determine a map of objects in use in a slab.
*
* Node listlock must be held to guarantee that the page does
* Node listlock must be held to guarantee that the slab does
* not vanish from under us.
*/
static unsigned long *get_map(struct kmem_cache *s, struct page *page)
static unsigned long *get_map(struct kmem_cache *s, struct slab *slab)
__acquires(&object_map_lock)
{
VM_BUG_ON(!irqs_disabled());
raw_spin_lock(&object_map_lock);
__fill_map(object_map, s, page);
__fill_map(object_map, s, slab);
return object_map;
}
......@@ -663,17 +667,17 @@ static inline void metadata_access_disable(void)
/* Verify that a pointer has an address that is valid within a slab page */
static inline int check_valid_pointer(struct kmem_cache *s,
struct page *page, void *object)
struct slab *slab, void *object)
{
void *base;
if (!object)
return 1;
base = page_address(page);
base = slab_address(slab);
object = kasan_reset_tag(object);
object = restore_red_left(s, object);
if (object < base || object >= base + page->objects * s->size ||
if (object < base || object >= base + slab->objects * s->size ||
(object - base) % s->size) {
return 0;
}
......@@ -784,12 +788,13 @@ void print_tracking(struct kmem_cache *s, void *object)
print_track("Freed", get_track(s, object, TRACK_FREE), pr_time);
}
static void print_page_info(struct page *page)
static void print_slab_info(const struct slab *slab)
{
pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%pGp\n",
page, page->objects, page->inuse, page->freelist,
&page->flags);
struct folio *folio = (struct folio *)slab_folio(slab);
pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%pGp\n",
slab, slab->objects, slab->inuse, slab->freelist,
folio_flags(folio, 0));
}
static void slab_bug(struct kmem_cache *s, char *fmt, ...)
......@@ -822,28 +827,14 @@ static void slab_fix(struct kmem_cache *s, char *fmt, ...)
va_end(args);
}
static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
void **freelist, void *nextfree)
{
if ((s->flags & SLAB_CONSISTENCY_CHECKS) &&
!check_valid_pointer(s, page, nextfree) && freelist) {
object_err(s, page, *freelist, "Freechain corrupt");
*freelist = NULL;
slab_fix(s, "Isolate corrupted freechain");
return true;
}
return false;
}
static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
static void print_trailer(struct kmem_cache *s, struct slab *slab, u8 *p)
{
unsigned int off; /* Offset of last byte */
u8 *addr = page_address(page);
u8 *addr = slab_address(slab);
print_tracking(s, p);
print_page_info(page);
print_slab_info(slab);
pr_err("Object 0x%p @offset=%tu fp=0x%p\n\n",
p, p - addr, get_freepointer(s, p));
......@@ -875,18 +866,32 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
dump_stack();
}
void object_err(struct kmem_cache *s, struct page *page,
static void object_err(struct kmem_cache *s, struct slab *slab,
u8 *object, char *reason)
{
if (slab_add_kunit_errors())
return;
slab_bug(s, "%s", reason);
print_trailer(s, page, object);
print_trailer(s, slab, object);
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
}
static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
void **freelist, void *nextfree)
{
if ((s->flags & SLAB_CONSISTENCY_CHECKS) &&
!check_valid_pointer(s, slab, nextfree) && freelist) {
object_err(s, slab, *freelist, "Freechain corrupt");
*freelist = NULL;
slab_fix(s, "Isolate corrupted freechain");
return true;
}
return false;
}
static __printf(3, 4) void slab_err(struct kmem_cache *s, struct slab *slab,
const char *fmt, ...)
{
va_list args;
......@@ -899,7 +904,7 @@ static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
vsnprintf(buf, sizeof(buf), fmt, args);
va_end(args);
slab_bug(s, "%s", buf);
print_page_info(page);
print_slab_info(slab);
dump_stack();
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
}
......@@ -927,13 +932,13 @@ static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
memset(from, data, to - from);
}
static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
static int check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
u8 *object, char *what,
u8 *start, unsigned int value, unsigned int bytes)
{
u8 *fault;
u8 *end;
u8 *addr = page_address(page);
u8 *addr = slab_address(slab);
metadata_access_enable();
fault = memchr_inv(kasan_reset_tag(start), value, bytes);
......@@ -952,7 +957,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
fault, end - 1, fault - addr,
fault[0], value);
print_trailer(s, page, object);
print_trailer(s, slab, object);
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
skip_bug_print:
......@@ -998,7 +1003,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
* may be used with merged slabcaches.
*/
static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p)
{
unsigned long off = get_info_end(s); /* The end of info */
......@@ -1011,12 +1016,12 @@ static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
if (size_from_object(s) == off)
return 1;
return check_bytes_and_report(s, page, p, "Object padding",
return check_bytes_and_report(s, slab, p, "Object padding",
p + off, POISON_INUSE, size_from_object(s) - off);
}
/* Check the pad bytes at the end of a slab page */
static int slab_pad_check(struct kmem_cache *s, struct page *page)
static int slab_pad_check(struct kmem_cache *s, struct slab *slab)
{
u8 *start;
u8 *fault;
......@@ -1028,8 +1033,8 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
if (!(s->flags & SLAB_POISON))
return 1;
start = page_address(page);
length = page_size(page);
start = slab_address(slab);
length = slab_size(slab);
end = start + length;
remainder = length % s->size;
if (!remainder)
......@@ -1044,7 +1049,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
while (end > fault && end[-1] == POISON_INUSE)
end--;
slab_err(s, page, "Padding overwritten. 0x%p-0x%p @offset=%tu",
slab_err(s, slab, "Padding overwritten. 0x%p-0x%p @offset=%tu",
fault, end - 1, fault - start);
print_section(KERN_ERR, "Padding ", pad, remainder);
......@@ -1052,23 +1057,23 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
return 0;
}
static int check_object(struct kmem_cache *s, struct page *page,
static int check_object(struct kmem_cache *s, struct slab *slab,
void *object, u8 val)
{
u8 *p = object;
u8 *endobject = object + s->object_size;
if (s->flags & SLAB_RED_ZONE) {
if (!check_bytes_and_report(s, page, object, "Left Redzone",
if (!check_bytes_and_report(s, slab, object, "Left Redzone",
object - s->red_left_pad, val, s->red_left_pad))
return 0;
if (!check_bytes_and_report(s, page, object, "Right Redzone",
if (!check_bytes_and_report(s, slab, object, "Right Redzone",
endobject, val, s->inuse - s->object_size))
return 0;
} else {
if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
check_bytes_and_report(s, page, p, "Alignment padding",
check_bytes_and_report(s, slab, p, "Alignment padding",
endobject, POISON_INUSE,
s->inuse - s->object_size);
}
......@@ -1076,15 +1081,15 @@ static int check_object(struct kmem_cache *s, struct page *page,
if (s->flags & SLAB_POISON) {
if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
(!check_bytes_and_report(s, page, p, "Poison", p,
(!check_bytes_and_report(s, slab, p, "Poison", p,
POISON_FREE, s->object_size - 1) ||
!check_bytes_and_report(s, page, p, "End Poison",
!check_bytes_and_report(s, slab, p, "End Poison",
p + s->object_size - 1, POISON_END, 1)))
return 0;
/*
* check_pad_bytes cleans up on its own.
*/
check_pad_bytes(s, page, p);
check_pad_bytes(s, slab, p);
}
if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE)
......@@ -1095,8 +1100,8 @@ static int check_object(struct kmem_cache *s, struct page *page,
return 1;
/* Check free pointer validity */
if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
object_err(s, page, p, "Freepointer corrupt");
if (!check_valid_pointer(s, slab, get_freepointer(s, p))) {
object_err(s, slab, p, "Freepointer corrupt");
/*
* No choice but to zap it and thus lose the remainder
* of the free objects in this slab. May cause
......@@ -1108,55 +1113,55 @@ static int check_object(struct kmem_cache *s, struct page *page,
return 1;
}
static int check_slab(struct kmem_cache *s, struct page *page)
static int check_slab(struct kmem_cache *s, struct slab *slab)
{
int maxobj;
if (!PageSlab(page)) {
slab_err(s, page, "Not a valid slab page");
if (!folio_test_slab(slab_folio(slab))) {
slab_err(s, slab, "Not a valid slab page");
return 0;
}
maxobj = order_objects(compound_order(page), s->size);
if (page->objects > maxobj) {
slab_err(s, page, "objects %u > max %u",
page->objects, maxobj);
maxobj = order_objects(slab_order(slab), s->size);
if (slab->objects > maxobj) {
slab_err(s, slab, "objects %u > max %u",
slab->objects, maxobj);
return 0;
}
if (page->inuse > page->objects) {
slab_err(s, page, "inuse %u > max %u",
page->inuse, page->objects);
if (slab->inuse > slab->objects) {
slab_err(s, slab, "inuse %u > max %u",
slab->inuse, slab->objects);
return 0;
}
/* Slab_pad_check fixes things up after itself */
slab_pad_check(s, page);
slab_pad_check(s, slab);
return 1;
}
/*
* Determine if a certain object on a page is on the freelist. Must hold the
* Determine if a certain object in a slab is on the freelist. Must hold the
* slab lock to guarantee that the chains are in a consistent state.
*/
static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
static int on_freelist(struct kmem_cache *s, struct slab *slab, void *search)
{
int nr = 0;
void *fp;
void *object = NULL;
int max_objects;
fp = page->freelist;
while (fp && nr <= page->objects) {
fp = slab->freelist;
while (fp && nr <= slab->objects) {
if (fp == search)
return 1;
if (!check_valid_pointer(s, page, fp)) {
if (!check_valid_pointer(s, slab, fp)) {
if (object) {
object_err(s, page, object,
object_err(s, slab, object,
"Freechain corrupt");
set_freepointer(s, object, NULL);
} else {
slab_err(s, page, "Freepointer corrupt");
page->freelist = NULL;
page->inuse = page->objects;
slab_err(s, slab, "Freepointer corrupt");
slab->freelist = NULL;
slab->inuse = slab->objects;
slab_fix(s, "Freelist cleared");
return 0;
}
......@@ -1167,34 +1172,34 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
nr++;
}
max_objects = order_objects(compound_order(page), s->size);
max_objects = order_objects(slab_order(slab), s->size);
if (max_objects > MAX_OBJS_PER_PAGE)
max_objects = MAX_OBJS_PER_PAGE;
if (page->objects != max_objects) {
slab_err(s, page, "Wrong number of objects. Found %d but should be %d",
page->objects, max_objects);
page->objects = max_objects;
if (slab->objects != max_objects) {
slab_err(s, slab, "Wrong number of objects. Found %d but should be %d",
slab->objects, max_objects);
slab->objects = max_objects;
slab_fix(s, "Number of objects adjusted");
}
if (page->inuse != page->objects - nr) {
slab_err(s, page, "Wrong object count. Counter is %d but counted were %d",
page->inuse, page->objects - nr);
page->inuse = page->objects - nr;
if (slab->inuse != slab->objects - nr) {
slab_err(s, slab, "Wrong object count. Counter is %d but counted were %d",
slab->inuse, slab->objects - nr);
slab->inuse = slab->objects - nr;
slab_fix(s, "Object count adjusted");
}
return search == NULL;
}
static void trace(struct kmem_cache *s, struct page *page, void *object,
static void trace(struct kmem_cache *s, struct slab *slab, void *object,
int alloc)
{
if (s->flags & SLAB_TRACE) {
pr_info("TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
s->name,
alloc ? "alloc" : "free",
object, page->inuse,
page->freelist);
object, slab->inuse,
slab->freelist);
if (!alloc)
print_section(KERN_INFO, "Object ", (void *)object,
......@@ -1208,22 +1213,22 @@ static void trace(struct kmem_cache *s, struct page *page, void *object,
* Tracking of fully allocated slabs for debugging purposes.
*/
static void add_full(struct kmem_cache *s,
struct kmem_cache_node *n, struct page *page)
struct kmem_cache_node *n, struct slab *slab)
{
if (!(s->flags & SLAB_STORE_USER))
return;
lockdep_assert_held(&n->list_lock);
list_add(&page->slab_list, &n->full);
list_add(&slab->slab_list, &n->full);
}
static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page)
static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct slab *slab)
{
if (!(s->flags & SLAB_STORE_USER))
return;
lockdep_assert_held(&n->list_lock);
list_del(&page->slab_list);
list_del(&slab->slab_list);
}
/* Tracking of the number of slabs for debugging purposes */
......@@ -1263,7 +1268,7 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
}
/* Object debug checks for alloc/free paths */
static void setup_object_debug(struct kmem_cache *s, struct page *page,
static void setup_object_debug(struct kmem_cache *s, struct slab *slab,
void *object)
{
if (!kmem_cache_debug_flags(s, SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON))
......@@ -1274,89 +1279,89 @@ static void setup_object_debug(struct kmem_cache *s, struct page *page,
}
static
void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr)
void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr)
{
if (!kmem_cache_debug_flags(s, SLAB_POISON))
return;
metadata_access_enable();
memset(kasan_reset_tag(addr), POISON_INUSE, page_size(page));
memset(kasan_reset_tag(addr), POISON_INUSE, slab_size(slab));
metadata_access_disable();
}
static inline int alloc_consistency_checks(struct kmem_cache *s,
struct page *page, void *object)
struct slab *slab, void *object)
{
if (!check_slab(s, page))
if (!check_slab(s, slab))
return 0;
if (!check_valid_pointer(s, page, object)) {
object_err(s, page, object, "Freelist Pointer check fails");
if (!check_valid_pointer(s, slab, object)) {
object_err(s, slab, object, "Freelist Pointer check fails");
return 0;
}
if (!check_object(s, page, object, SLUB_RED_INACTIVE))
if (!check_object(s, slab, object, SLUB_RED_INACTIVE))
return 0;
return 1;
}
static noinline int alloc_debug_processing(struct kmem_cache *s,
struct page *page,
struct slab *slab,
void *object, unsigned long addr)
{
if (s->flags & SLAB_CONSISTENCY_CHECKS) {
if (!alloc_consistency_checks(s, page, object))
if (!alloc_consistency_checks(s, slab, object))
goto bad;
}
/* Success perform special debug activities for allocs */
if (s->flags & SLAB_STORE_USER)
set_track(s, object, TRACK_ALLOC, addr);
trace(s, page, object, 1);
trace(s, slab, object, 1);
init_object(s, object, SLUB_RED_ACTIVE);
return 1;
bad:
if (PageSlab(page)) {
if (folio_test_slab(slab_folio(slab))) {
/*
* If this is a slab page then lets do the best we can
* to avoid issues in the future. Marking all objects
* as used avoids touching the remaining objects.
*/
slab_fix(s, "Marking all objects used");
page->inuse = page->objects;
page->freelist = NULL;
slab->inuse = slab->objects;
slab->freelist = NULL;
}
return 0;
}
static inline int free_consistency_checks(struct kmem_cache *s,
struct page *page, void *object, unsigned long addr)
struct slab *slab, void *object, unsigned long addr)
{
if (!check_valid_pointer(s, page, object)) {
slab_err(s, page, "Invalid object pointer 0x%p", object);
if (!check_valid_pointer(s, slab, object)) {
slab_err(s, slab, "Invalid object pointer 0x%p", object);
return 0;
}
if (on_freelist(s, page, object)) {
object_err(s, page, object, "Object already free");
if (on_freelist(s, slab, object)) {
object_err(s, slab, object, "Object already free");
return 0;
}
if (!check_object(s, page, object, SLUB_RED_ACTIVE))
if (!check_object(s, slab, object, SLUB_RED_ACTIVE))
return 0;
if (unlikely(s != page->slab_cache)) {
if (!PageSlab(page)) {
slab_err(s, page, "Attempt to free object(0x%p) outside of slab",
if (unlikely(s != slab->slab_cache)) {
if (!folio_test_slab(slab_folio(slab))) {
slab_err(s, slab, "Attempt to free object(0x%p) outside of slab",
object);
} else if (!page->slab_cache) {
} else if (!slab->slab_cache) {
pr_err("SLUB <none>: no slab for object 0x%p.\n",
object);
dump_stack();
} else
object_err(s, page, object,
object_err(s, slab, object,
"page slab pointer corrupt.");
return 0;
}
......@@ -1365,21 +1370,21 @@ static inline int free_consistency_checks(struct kmem_cache *s,
/* Supports checking bulk free of a constructed freelist */
static noinline int free_debug_processing(
struct kmem_cache *s, struct page *page,
struct kmem_cache *s, struct slab *slab,
void *head, void *tail, int bulk_cnt,
unsigned long addr)
{
struct kmem_cache_node *n = get_node(s, page_to_nid(page));
struct kmem_cache_node *n = get_node(s, slab_nid(slab));
void *object = head;
int cnt = 0;
unsigned long flags, flags2;
int ret = 0;
spin_lock_irqsave(&n->list_lock, flags);
slab_lock(page, &flags2);
slab_lock(slab, &flags2);
if (s->flags & SLAB_CONSISTENCY_CHECKS) {
if (!check_slab(s, page))
if (!check_slab(s, slab))
goto out;
}
......@@ -1387,13 +1392,13 @@ static noinline int free_debug_processing(
cnt++;
if (s->flags & SLAB_CONSISTENCY_CHECKS) {
if (!free_consistency_checks(s, page, object, addr))
if (!free_consistency_checks(s, slab, object, addr))
goto out;
}
if (s->flags & SLAB_STORE_USER)
set_track(s, object, TRACK_FREE, addr);
trace(s, page, object, 0);
trace(s, slab, object, 0);
/* Freepointer not overwritten by init_object(), SLAB_POISON moved it */
init_object(s, object, SLUB_RED_INACTIVE);
......@@ -1406,10 +1411,10 @@ static noinline int free_debug_processing(
out:
if (cnt != bulk_cnt)
slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n",
slab_err(s, slab, "Bulk freelist count(%d) invalid(%d)\n",
bulk_cnt, cnt);
slab_unlock(page, &flags2);
slab_unlock(slab, &flags2);
spin_unlock_irqrestore(&n->list_lock, flags);
if (!ret)
slab_fix(s, "Object at 0x%p not freed", object);
......@@ -1624,26 +1629,26 @@ slab_flags_t kmem_cache_flags(unsigned int object_size,
}
#else /* !CONFIG_SLUB_DEBUG */
static inline void setup_object_debug(struct kmem_cache *s,
struct page *page, void *object) {}
struct slab *slab, void *object) {}
static inline
void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr) {}
void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr) {}
static inline int alloc_debug_processing(struct kmem_cache *s,
struct page *page, void *object, unsigned long addr) { return 0; }
struct slab *slab, void *object, unsigned long addr) { return 0; }
static inline int free_debug_processing(
struct kmem_cache *s, struct page *page,
struct kmem_cache *s, struct slab *slab,
void *head, void *tail, int bulk_cnt,
unsigned long addr) { return 0; }
static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
static inline int slab_pad_check(struct kmem_cache *s, struct slab *slab)
{ return 1; }
static inline int check_object(struct kmem_cache *s, struct page *page,
static inline int check_object(struct kmem_cache *s, struct slab *slab,
void *object, u8 val) { return 1; }
static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
struct page *page) {}
struct slab *slab) {}
static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
struct page *page) {}
struct slab *slab) {}
slab_flags_t kmem_cache_flags(unsigned int object_size,
slab_flags_t flags, const char *name)
{
......@@ -1662,7 +1667,7 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node,
static inline void dec_slabs_node(struct kmem_cache *s, int node,
int objects) {}
static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
void **freelist, void *nextfree)
{
return false;
......@@ -1767,10 +1772,10 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
return *head != NULL;
}
static void *setup_object(struct kmem_cache *s, struct page *page,
static void *setup_object(struct kmem_cache *s, struct slab *slab,
void *object)
{
setup_object_debug(s, page, object);
setup_object_debug(s, slab, object);
object = kasan_init_slab_obj(s, object);
if (unlikely(s->ctor)) {
kasan_unpoison_object_data(s, object);
......@@ -1783,18 +1788,27 @@ static void *setup_object(struct kmem_cache *s, struct page *page,
/*
* Slab allocation and freeing
*/
static inline struct page *alloc_slab_page(struct kmem_cache *s,
static inline struct slab *alloc_slab_page(struct kmem_cache *s,
gfp_t flags, int node, struct kmem_cache_order_objects oo)
{
struct page *page;
struct folio *folio;
struct slab *slab;
unsigned int order = oo_order(oo);
if (node == NUMA_NO_NODE)
page = alloc_pages(flags, order);
folio = (struct folio *)alloc_pages(flags, order);
else
page = __alloc_pages_node(node, flags, order);
folio = (struct folio *)__alloc_pages_node(node, flags, order);
if (!folio)
return NULL;
slab = folio_slab(folio);
__folio_set_slab(folio);
if (page_is_pfmemalloc(folio_page(folio, 0)))
slab_set_pfmemalloc(slab);
return page;
return slab;
}
#ifdef CONFIG_SLAB_FREELIST_RANDOM
......@@ -1839,7 +1853,7 @@ static void __init init_freelist_randomization(void)
}
/* Get the next entry on the pre-computed freelist randomized */
static void *next_freelist_entry(struct kmem_cache *s, struct page *page,
static void *next_freelist_entry(struct kmem_cache *s, struct slab *slab,
unsigned long *pos, void *start,
unsigned long page_limit,
unsigned long freelist_count)
......@@ -1861,32 +1875,32 @@ static void *next_freelist_entry(struct kmem_cache *s, struct page *page,
}
/* Shuffle the single linked freelist based on a random pre-computed sequence */
static bool shuffle_freelist(struct kmem_cache *s, struct page *page)
static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
{
void *start;
void *cur;
void *next;
unsigned long idx, pos, page_limit, freelist_count;
if (page->objects < 2 || !s->random_seq)
if (slab->objects < 2 || !s->random_seq)
return false;
freelist_count = oo_objects(s->oo);
pos = get_random_int() % freelist_count;
page_limit = page->objects * s->size;
start = fixup_red_left(s, page_address(page));
page_limit = slab->objects * s->size;
start = fixup_red_left(s, slab_address(slab));
/* First entry is used as the base of the freelist */
cur = next_freelist_entry(s, page, &pos, start, page_limit,
cur = next_freelist_entry(s, slab, &pos, start, page_limit,
freelist_count);
cur = setup_object(s, page, cur);
page->freelist = cur;
cur = setup_object(s, slab, cur);
slab->freelist = cur;
for (idx = 1; idx < page->objects; idx++) {
next = next_freelist_entry(s, page, &pos, start, page_limit,
for (idx = 1; idx < slab->objects; idx++) {
next = next_freelist_entry(s, slab, &pos, start, page_limit,
freelist_count);
next = setup_object(s, page, next);
next = setup_object(s, slab, next);
set_freepointer(s, cur, next);
cur = next;
}
......@@ -1900,15 +1914,15 @@ static inline int init_cache_random_seq(struct kmem_cache *s)
return 0;
}
static inline void init_freelist_randomization(void) { }
static inline bool shuffle_freelist(struct kmem_cache *s, struct page *page)
static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
{
return false;
}
#endif /* CONFIG_SLAB_FREELIST_RANDOM */
static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
{
struct page *page;
struct slab *slab;
struct kmem_cache_order_objects oo = s->oo;
gfp_t alloc_gfp;
void *start, *p, *next;
......@@ -1927,63 +1941,60 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min))
alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~(__GFP_RECLAIM|__GFP_NOFAIL);
page = alloc_slab_page(s, alloc_gfp, node, oo);
if (unlikely(!page)) {
slab = alloc_slab_page(s, alloc_gfp, node, oo);
if (unlikely(!slab)) {
oo = s->min;
alloc_gfp = flags;
/*
* Allocation may have failed due to fragmentation.
* Try a lower order alloc if possible
*/
page = alloc_slab_page(s, alloc_gfp, node, oo);
if (unlikely(!page))
slab = alloc_slab_page(s, alloc_gfp, node, oo);
if (unlikely(!slab))
goto out;
stat(s, ORDER_FALLBACK);
}
page->objects = oo_objects(oo);
slab->objects = oo_objects(oo);
account_slab_page(page, oo_order(oo), s, flags);
account_slab(slab, oo_order(oo), s, flags);
page->slab_cache = s;
__SetPageSlab(page);
if (page_is_pfmemalloc(page))
SetPageSlabPfmemalloc(page);
slab->slab_cache = s;
kasan_poison_slab(page);
kasan_poison_slab(slab);
start = page_address(page);
start = slab_address(slab);
setup_page_debug(s, page, start);
setup_slab_debug(s, slab, start);
shuffle = shuffle_freelist(s, page);
shuffle = shuffle_freelist(s, slab);
if (!shuffle) {
start = fixup_red_left(s, start);
start = setup_object(s, page, start);
page->freelist = start;
for (idx = 0, p = start; idx < page->objects - 1; idx++) {
start = setup_object(s, slab, start);
slab->freelist = start;
for (idx = 0, p = start; idx < slab->objects - 1; idx++) {
next = p + s->size;
next = setup_object(s, page, next);
next = setup_object(s, slab, next);
set_freepointer(s, p, next);
p = next;
}
set_freepointer(s, p, NULL);
}
page->inuse = page->objects;
page->frozen = 1;
slab->inuse = slab->objects;
slab->frozen = 1;
out:
if (!page)
if (!slab)
return NULL;
inc_slabs_node(s, page_to_nid(page), page->objects);
inc_slabs_node(s, slab_nid(slab), slab->objects);
return page;
return slab;
}
static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
static struct slab *new_slab(struct kmem_cache *s, gfp_t flags, int node)
{
if (unlikely(flags & GFP_SLAB_BUG_MASK))
flags = kmalloc_fix_flags(flags);
......@@ -1994,76 +2005,75 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
}
static void __free_slab(struct kmem_cache *s, struct page *page)
static void __free_slab(struct kmem_cache *s, struct slab *slab)
{
int order = compound_order(page);
struct folio *folio = slab_folio(slab);
int order = folio_order(folio);
int pages = 1 << order;
if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) {
void *p;
slab_pad_check(s, page);
for_each_object(p, s, page_address(page),
page->objects)
check_object(s, page, p, SLUB_RED_INACTIVE);
slab_pad_check(s, slab);
for_each_object(p, s, slab_address(slab), slab->objects)
check_object(s, slab, p, SLUB_RED_INACTIVE);
}
__ClearPageSlabPfmemalloc(page);
__ClearPageSlab(page);
/* In union with page->mapping where page allocator expects NULL */
page->slab_cache = NULL;
__slab_clear_pfmemalloc(slab);
__folio_clear_slab(folio);
folio->mapping = NULL;
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += pages;
unaccount_slab_page(page, order, s);
__free_pages(page, order);
unaccount_slab(slab, order, s);
__free_pages(folio_page(folio, 0), order);
}
static void rcu_free_slab(struct rcu_head *h)
{
struct page *page = container_of(h, struct page, rcu_head);
struct slab *slab = container_of(h, struct slab, rcu_head);
__free_slab(page->slab_cache, page);
__free_slab(slab->slab_cache, slab);
}
static void free_slab(struct kmem_cache *s, struct page *page)
static void free_slab(struct kmem_cache *s, struct slab *slab)
{
if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
call_rcu(&page->rcu_head, rcu_free_slab);
call_rcu(&slab->rcu_head, rcu_free_slab);
} else
__free_slab(s, page);
__free_slab(s, slab);
}
static void discard_slab(struct kmem_cache *s, struct page *page)
static void discard_slab(struct kmem_cache *s, struct slab *slab)
{
dec_slabs_node(s, page_to_nid(page), page->objects);
free_slab(s, page);
dec_slabs_node(s, slab_nid(slab), slab->objects);
free_slab(s, slab);
}
/*
* Management of partially allocated slabs.
*/
static inline void
__add_partial(struct kmem_cache_node *n, struct page *page, int tail)
__add_partial(struct kmem_cache_node *n, struct slab *slab, int tail)
{
n->nr_partial++;
if (tail == DEACTIVATE_TO_TAIL)
list_add_tail(&page->slab_list, &n->partial);
list_add_tail(&slab->slab_list, &n->partial);
else
list_add(&page->slab_list, &n->partial);
list_add(&slab->slab_list, &n->partial);
}
static inline void add_partial(struct kmem_cache_node *n,
struct page *page, int tail)
struct slab *slab, int tail)
{
lockdep_assert_held(&n->list_lock);
__add_partial(n, page, tail);
__add_partial(n, slab, tail);
}
static inline void remove_partial(struct kmem_cache_node *n,
struct page *page)
struct slab *slab)
{
lockdep_assert_held(&n->list_lock);
list_del(&page->slab_list);
list_del(&slab->slab_list);
n->nr_partial--;
}
......@@ -2074,12 +2084,12 @@ static inline void remove_partial(struct kmem_cache_node *n,
* Returns a list of objects or NULL if it fails.
*/
static inline void *acquire_slab(struct kmem_cache *s,
struct kmem_cache_node *n, struct page *page,
struct kmem_cache_node *n, struct slab *slab,
int mode)
{
void *freelist;
unsigned long counters;
struct page new;
struct slab new;
lockdep_assert_held(&n->list_lock);
......@@ -2088,11 +2098,11 @@ static inline void *acquire_slab(struct kmem_cache *s,
* The old freelist is the list of objects for the
* per cpu allocation list.
*/
freelist = page->freelist;
counters = page->counters;
freelist = slab->freelist;
counters = slab->counters;
new.counters = counters;
if (mode) {
new.inuse = page->objects;
new.inuse = slab->objects;
new.freelist = NULL;
} else {
new.freelist = freelist;
......@@ -2101,35 +2111,35 @@ static inline void *acquire_slab(struct kmem_cache *s,
VM_BUG_ON(new.frozen);
new.frozen = 1;
if (!__cmpxchg_double_slab(s, page,
if (!__cmpxchg_double_slab(s, slab,
freelist, counters,
new.freelist, new.counters,
"acquire_slab"))
return NULL;
remove_partial(n, page);
remove_partial(n, slab);
WARN_ON(!freelist);
return freelist;
}
#ifdef CONFIG_SLUB_CPU_PARTIAL
static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
static void put_cpu_partial(struct kmem_cache *s, struct slab *slab, int drain);
#else
static inline void put_cpu_partial(struct kmem_cache *s, struct page *page,
static inline void put_cpu_partial(struct kmem_cache *s, struct slab *slab,
int drain) { }
#endif
static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags);
/*
* Try to allocate a partial slab from a specific node.
*/
static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
struct page **ret_page, gfp_t gfpflags)
struct slab **ret_slab, gfp_t gfpflags)
{
struct page *page, *page2;
struct slab *slab, *slab2;
void *object = NULL;
unsigned long flags;
unsigned int partial_pages = 0;
unsigned int partial_slabs = 0;
/*
* Racy check. If we mistakenly see no partial slabs then we
......@@ -2141,28 +2151,28 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
return NULL;
spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry_safe(page, page2, &n->partial, slab_list) {
list_for_each_entry_safe(slab, slab2, &n->partial, slab_list) {
void *t;
if (!pfmemalloc_match(page, gfpflags))
if (!pfmemalloc_match(slab, gfpflags))
continue;
t = acquire_slab(s, n, page, object == NULL);
t = acquire_slab(s, n, slab, object == NULL);
if (!t)
break;
if (!object) {
*ret_page = page;
*ret_slab = slab;
stat(s, ALLOC_FROM_PARTIAL);
object = t;
} else {
put_cpu_partial(s, page, 0);
put_cpu_partial(s, slab, 0);
stat(s, CPU_PARTIAL_NODE);
partial_pages++;
partial_slabs++;
}
#ifdef CONFIG_SLUB_CPU_PARTIAL
if (!kmem_cache_has_cpu_partial(s)
|| partial_pages > s->cpu_partial_pages / 2)
|| partial_slabs > s->cpu_partial_slabs / 2)
break;
#else
break;
......@@ -2174,10 +2184,10 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
}
/*
* Get a page from somewhere. Search in increasing NUMA distances.
* Get a slab from somewhere. Search in increasing NUMA distances.
*/
static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
struct page **ret_page)
struct slab **ret_slab)
{
#ifdef CONFIG_NUMA
struct zonelist *zonelist;
......@@ -2219,7 +2229,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
if (n && cpuset_zone_allowed(zone, flags) &&
n->nr_partial > s->min_partial) {
object = get_partial_node(s, n, ret_page, flags);
object = get_partial_node(s, n, ret_slab, flags);
if (object) {
/*
* Don't check read_mems_allowed_retry()
......@@ -2238,10 +2248,10 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
}
/*
* Get a partial page, lock it and return it.
* Get a partial slab, lock it and return it.
*/
static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
struct page **ret_page)
struct slab **ret_slab)
{
void *object;
int searchnode = node;
......@@ -2249,11 +2259,11 @@ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
if (node == NUMA_NO_NODE)
searchnode = numa_mem_id();
object = get_partial_node(s, get_node(s, searchnode), ret_page, flags);
object = get_partial_node(s, get_node(s, searchnode), ret_slab, flags);
if (object || node != NUMA_NO_NODE)
return object;
return get_any_partial(s, flags, ret_page);
return get_any_partial(s, flags, ret_slab);
}
#ifdef CONFIG_PREEMPTION
......@@ -2330,25 +2340,25 @@ static void init_kmem_cache_cpus(struct kmem_cache *s)
}
/*
* Finishes removing the cpu slab. Merges cpu's freelist with page's freelist,
* Finishes removing the cpu slab. Merges cpu's freelist with slab's freelist,
* unfreezes the slabs and puts it on the proper list.
* Assumes the slab has been already safely taken away from kmem_cache_cpu
* by the caller.
*/
static void deactivate_slab(struct kmem_cache *s, struct page *page,
static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
void *freelist)
{
enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
struct kmem_cache_node *n = get_node(s, page_to_nid(page));
struct kmem_cache_node *n = get_node(s, slab_nid(slab));
int lock = 0, free_delta = 0;
enum slab_modes l = M_NONE, m = M_NONE;
void *nextfree, *freelist_iter, *freelist_tail;
int tail = DEACTIVATE_TO_HEAD;
unsigned long flags = 0;
struct page new;
struct page old;
struct slab new;
struct slab old;
if (page->freelist) {
if (slab->freelist) {
stat(s, DEACTIVATE_REMOTE_FREES);
tail = DEACTIVATE_TO_TAIL;
}
......@@ -2367,7 +2377,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
* 'freelist_iter' is already corrupted. So isolate all objects
* starting at 'freelist_iter' by skipping them.
*/
if (freelist_corrupted(s, page, &freelist_iter, nextfree))
if (freelist_corrupted(s, slab, &freelist_iter, nextfree))
break;
freelist_tail = freelist_iter;
......@@ -2377,25 +2387,25 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
}
/*
* Stage two: Unfreeze the page while splicing the per-cpu
* freelist to the head of page's freelist.
* Stage two: Unfreeze the slab while splicing the per-cpu
* freelist to the head of slab's freelist.
*
* Ensure that the page is unfrozen while the list presence
* Ensure that the slab is unfrozen while the list presence
* reflects the actual number of objects during unfreeze.
*
* We setup the list membership and then perform a cmpxchg
* with the count. If there is a mismatch then the page
* is not unfrozen but the page is on the wrong list.
* with the count. If there is a mismatch then the slab
* is not unfrozen but the slab is on the wrong list.
*
* Then we restart the process which may have to remove
* the page from the list that we just put it on again
* the slab from the list that we just put it on again
* because the number of objects in the slab may have
* changed.
*/
redo:
old.freelist = READ_ONCE(page->freelist);
old.counters = READ_ONCE(page->counters);
old.freelist = READ_ONCE(slab->freelist);
old.counters = READ_ONCE(slab->counters);
VM_BUG_ON(!old.frozen);
/* Determine target state of the slab */
......@@ -2416,9 +2426,8 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
if (!lock) {
lock = 1;
/*
* Taking the spinlock removes the possibility
* that acquire_slab() will see a slab page that
* is frozen
* Taking the spinlock removes the possibility that
* acquire_slab() will see a slab that is frozen
*/
spin_lock_irqsave(&n->list_lock, flags);
}
......@@ -2437,18 +2446,18 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
if (l != m) {
if (l == M_PARTIAL)
remove_partial(n, page);
remove_partial(n, slab);
else if (l == M_FULL)
remove_full(s, n, page);
remove_full(s, n, slab);
if (m == M_PARTIAL)
add_partial(n, page, tail);
add_partial(n, slab, tail);
else if (m == M_FULL)
add_full(s, n, page);
add_full(s, n, slab);
}
l = m;
if (!cmpxchg_double_slab(s, page,
if (!cmpxchg_double_slab(s, slab,
old.freelist, old.counters,
new.freelist, new.counters,
"unfreezing slab"))
......@@ -2463,26 +2472,26 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
stat(s, DEACTIVATE_FULL);
else if (m == M_FREE) {
stat(s, DEACTIVATE_EMPTY);
discard_slab(s, page);
discard_slab(s, slab);
stat(s, FREE_SLAB);
}
}
#ifdef CONFIG_SLUB_CPU_PARTIAL
static void __unfreeze_partials(struct kmem_cache *s, struct page *partial_page)
static void __unfreeze_partials(struct kmem_cache *s, struct slab *partial_slab)
{
struct kmem_cache_node *n = NULL, *n2 = NULL;
struct page *page, *discard_page = NULL;
struct slab *slab, *slab_to_discard = NULL;
unsigned long flags = 0;
while (partial_page) {
struct page new;
struct page old;
while (partial_slab) {
struct slab new;
struct slab old;
page = partial_page;
partial_page = page->next;
slab = partial_slab;
partial_slab = slab->next;
n2 = get_node(s, page_to_nid(page));
n2 = get_node(s, slab_nid(slab));
if (n != n2) {
if (n)
spin_unlock_irqrestore(&n->list_lock, flags);
......@@ -2493,8 +2502,8 @@ static void __unfreeze_partials(struct kmem_cache *s, struct page *partial_page)
do {
old.freelist = page->freelist;
old.counters = page->counters;
old.freelist = slab->freelist;
old.counters = slab->counters;
VM_BUG_ON(!old.frozen);
new.counters = old.counters;
......@@ -2502,16 +2511,16 @@ static void __unfreeze_partials(struct kmem_cache *s, struct page *partial_page)
new.frozen = 0;
} while (!__cmpxchg_double_slab(s, page,
} while (!__cmpxchg_double_slab(s, slab,
old.freelist, old.counters,
new.freelist, new.counters,
"unfreezing slab"));
if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
page->next = discard_page;
discard_page = page;
slab->next = slab_to_discard;
slab_to_discard = slab;
} else {
add_partial(n, page, DEACTIVATE_TO_TAIL);
add_partial(n, slab, DEACTIVATE_TO_TAIL);
stat(s, FREE_ADD_PARTIAL);
}
}
......@@ -2519,12 +2528,12 @@ static void __unfreeze_partials(struct kmem_cache *s, struct page *partial_page)
if (n)
spin_unlock_irqrestore(&n->list_lock, flags);
while (discard_page) {
page = discard_page;
discard_page = discard_page->next;
while (slab_to_discard) {
slab = slab_to_discard;
slab_to_discard = slab_to_discard->next;
stat(s, DEACTIVATE_EMPTY);
discard_slab(s, page);
discard_slab(s, slab);
stat(s, FREE_SLAB);
}
}
......@@ -2534,73 +2543,73 @@ static void __unfreeze_partials(struct kmem_cache *s, struct page *partial_page)
*/
static void unfreeze_partials(struct kmem_cache *s)
{
struct page *partial_page;
struct slab *partial_slab;
unsigned long flags;
local_lock_irqsave(&s->cpu_slab->lock, flags);
partial_page = this_cpu_read(s->cpu_slab->partial);
partial_slab = this_cpu_read(s->cpu_slab->partial);
this_cpu_write(s->cpu_slab->partial, NULL);
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
if (partial_page)
__unfreeze_partials(s, partial_page);
if (partial_slab)
__unfreeze_partials(s, partial_slab);
}
static void unfreeze_partials_cpu(struct kmem_cache *s,
struct kmem_cache_cpu *c)
{
struct page *partial_page;
struct slab *partial_slab;
partial_page = slub_percpu_partial(c);
partial_slab = slub_percpu_partial(c);
c->partial = NULL;
if (partial_page)
__unfreeze_partials(s, partial_page);
if (partial_slab)
__unfreeze_partials(s, partial_slab);
}
/*
* Put a page that was just frozen (in __slab_free|get_partial_node) into a
* partial page slot if available.
* Put a slab that was just frozen (in __slab_free|get_partial_node) into a
* partial slab slot if available.
*
* If we did not find a slot then simply move all the partials to the
* per node partial list.
*/
static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
static void put_cpu_partial(struct kmem_cache *s, struct slab *slab, int drain)
{
struct page *oldpage;
struct page *page_to_unfreeze = NULL;
struct slab *oldslab;
struct slab *slab_to_unfreeze = NULL;
unsigned long flags;
int pages = 0;
int slabs = 0;
local_lock_irqsave(&s->cpu_slab->lock, flags);
oldpage = this_cpu_read(s->cpu_slab->partial);
oldslab = this_cpu_read(s->cpu_slab->partial);
if (oldpage) {
if (drain && oldpage->pages >= s->cpu_partial_pages) {
if (oldslab) {
if (drain && oldslab->slabs >= s->cpu_partial_slabs) {
/*
* Partial array is full. Move the existing set to the
* per node partial list. Postpone the actual unfreezing
* outside of the critical section.
*/
page_to_unfreeze = oldpage;
oldpage = NULL;
slab_to_unfreeze = oldslab;
oldslab = NULL;
} else {
pages = oldpage->pages;
slabs = oldslab->slabs;
}
}
pages++;
slabs++;
page->pages = pages;
page->next = oldpage;
slab->slabs = slabs;
slab->next = oldslab;
this_cpu_write(s->cpu_slab->partial, page);
this_cpu_write(s->cpu_slab->partial, slab);
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
if (page_to_unfreeze) {
__unfreeze_partials(s, page_to_unfreeze);
if (slab_to_unfreeze) {
__unfreeze_partials(s, slab_to_unfreeze);
stat(s, CPU_PARTIAL_DRAIN);
}
}
......@@ -2616,22 +2625,22 @@ static inline void unfreeze_partials_cpu(struct kmem_cache *s,
static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
{
unsigned long flags;
struct page *page;
struct slab *slab;
void *freelist;
local_lock_irqsave(&s->cpu_slab->lock, flags);
page = c->page;
slab = c->slab;
freelist = c->freelist;
c->page = NULL;
c->slab = NULL;
c->freelist = NULL;
c->tid = next_tid(c->tid);
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
if (page) {
deactivate_slab(s, page, freelist);
if (slab) {
deactivate_slab(s, slab, freelist);
stat(s, CPUSLAB_FLUSH);
}
}
......@@ -2640,14 +2649,14 @@ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
{
struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
void *freelist = c->freelist;
struct page *page = c->page;
struct slab *slab = c->slab;
c->page = NULL;
c->slab = NULL;
c->freelist = NULL;
c->tid = next_tid(c->tid);
if (page) {
deactivate_slab(s, page, freelist);
if (slab) {
deactivate_slab(s, slab, freelist);
stat(s, CPUSLAB_FLUSH);
}
......@@ -2676,7 +2685,7 @@ static void flush_cpu_slab(struct work_struct *w)
s = sfw->s;
c = this_cpu_ptr(s->cpu_slab);
if (c->page)
if (c->slab)
flush_slab(s, c);
unfreeze_partials(s);
......@@ -2686,7 +2695,7 @@ static bool has_cpu_slab(int cpu, struct kmem_cache *s)
{
struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
return c->page || slub_percpu_partial(c);
return c->slab || slub_percpu_partial(c);
}
static DEFINE_MUTEX(flush_lock);
......@@ -2748,19 +2757,19 @@ static int slub_cpu_dead(unsigned int cpu)
* Check if the objects in a per cpu structure fit numa
* locality expectations.
*/
static inline int node_match(struct page *page, int node)
static inline int node_match(struct slab *slab, int node)
{
#ifdef CONFIG_NUMA
if (node != NUMA_NO_NODE && page_to_nid(page) != node)
if (node != NUMA_NO_NODE && slab_nid(slab) != node)
return 0;
#endif
return 1;
}
#ifdef CONFIG_SLUB_DEBUG
static int count_free(struct page *page)
static int count_free(struct slab *slab)
{
return page->objects - page->inuse;
return slab->objects - slab->inuse;
}
static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
......@@ -2771,15 +2780,15 @@ static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
static unsigned long count_partial(struct kmem_cache_node *n,
int (*get_count)(struct page *))
int (*get_count)(struct slab *))
{
unsigned long flags;
unsigned long x = 0;
struct page *page;
struct slab *slab;
spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(page, &n->partial, slab_list)
x += get_count(page);
list_for_each_entry(slab, &n->partial, slab_list)
x += get_count(slab);
spin_unlock_irqrestore(&n->list_lock, flags);
return x;
}
......@@ -2822,54 +2831,41 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
#endif
}
static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags)
{
if (unlikely(PageSlabPfmemalloc(page)))
if (unlikely(slab_test_pfmemalloc(slab)))
return gfp_pfmemalloc_allowed(gfpflags);
return true;
}
/*
* A variant of pfmemalloc_match() that tests page flags without asserting
* PageSlab. Intended for opportunistic checks before taking a lock and
* rechecking that nobody else freed the page under us.
*/
static inline bool pfmemalloc_match_unsafe(struct page *page, gfp_t gfpflags)
{
if (unlikely(__PageSlabPfmemalloc(page)))
return gfp_pfmemalloc_allowed(gfpflags);
return true;
}
/*
* Check the page->freelist of a page and either transfer the freelist to the
* per cpu freelist or deactivate the page.
* Check the slab->freelist and either transfer the freelist to the
* per cpu freelist or deactivate the slab.
*
* The page is still frozen if the return value is not NULL.
* The slab is still frozen if the return value is not NULL.
*
* If this function returns NULL then the page has been unfrozen.
* If this function returns NULL then the slab has been unfrozen.
*/
static inline void *get_freelist(struct kmem_cache *s, struct page *page)
static inline void *get_freelist(struct kmem_cache *s, struct slab *slab)
{
struct page new;
struct slab new;
unsigned long counters;
void *freelist;
lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock));
do {
freelist = page->freelist;
counters = page->counters;
freelist = slab->freelist;
counters = slab->counters;
new.counters = counters;
VM_BUG_ON(!new.frozen);
new.inuse = page->objects;
new.inuse = slab->objects;
new.frozen = freelist != NULL;
} while (!__cmpxchg_double_slab(s, page,
} while (!__cmpxchg_double_slab(s, slab,
freelist, counters,
NULL, new.counters,
"get_freelist"));
......@@ -2900,15 +2896,15 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
unsigned long addr, struct kmem_cache_cpu *c)
{
void *freelist;
struct page *page;
struct slab *slab;
unsigned long flags;
stat(s, ALLOC_SLOWPATH);
reread_page:
reread_slab:
page = READ_ONCE(c->page);
if (!page) {
slab = READ_ONCE(c->slab);
if (!slab) {
/*
* if the node is not online or has no normal memory, just
* ignore the node constraint
......@@ -2920,7 +2916,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
}
redo:
if (unlikely(!node_match(page, node))) {
if (unlikely(!node_match(slab, node))) {
/*
* same as above but node_match() being false already
* implies node != NUMA_NO_NODE
......@@ -2939,23 +2935,23 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
* PFMEMALLOC but right now, we are losing the pfmemalloc
* information when the page leaves the per-cpu allocator
*/
if (unlikely(!pfmemalloc_match_unsafe(page, gfpflags)))
if (unlikely(!pfmemalloc_match(slab, gfpflags)))
goto deactivate_slab;
/* must check again c->page in case we got preempted and it changed */
/* must check again c->slab in case we got preempted and it changed */
local_lock_irqsave(&s->cpu_slab->lock, flags);
if (unlikely(page != c->page)) {
if (unlikely(slab != c->slab)) {
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
goto reread_page;
goto reread_slab;
}
freelist = c->freelist;
if (freelist)
goto load_freelist;
freelist = get_freelist(s, page);
freelist = get_freelist(s, slab);
if (!freelist) {
c->page = NULL;
c->slab = NULL;
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
stat(s, DEACTIVATE_BYPASS);
goto new_slab;
......@@ -2969,10 +2965,10 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
/*
* freelist is pointing to the list of objects to be used.
* page is pointing to the page from which the objects are obtained.
* That page must be frozen for per cpu allocations to work.
* slab is pointing to the slab from which the objects are obtained.
* That slab must be frozen for per cpu allocations to work.
*/
VM_BUG_ON(!c->page->frozen);
VM_BUG_ON(!c->slab->frozen);
c->freelist = get_freepointer(s, freelist);
c->tid = next_tid(c->tid);
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
......@@ -2981,23 +2977,23 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
deactivate_slab:
local_lock_irqsave(&s->cpu_slab->lock, flags);
if (page != c->page) {
if (slab != c->slab) {
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
goto reread_page;
goto reread_slab;
}
freelist = c->freelist;
c->page = NULL;
c->slab = NULL;
c->freelist = NULL;
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
deactivate_slab(s, page, freelist);
deactivate_slab(s, slab, freelist);
new_slab:
if (slub_percpu_partial(c)) {
local_lock_irqsave(&s->cpu_slab->lock, flags);
if (unlikely(c->page)) {
if (unlikely(c->slab)) {
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
goto reread_page;
goto reread_slab;
}
if (unlikely(!slub_percpu_partial(c))) {
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
......@@ -3005,8 +3001,8 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
goto new_objects;
}
page = c->page = slub_percpu_partial(c);
slub_set_percpu_partial(c, page);
slab = c->slab = slub_percpu_partial(c);
slub_set_percpu_partial(c, slab);
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
stat(s, CPU_PARTIAL_ALLOC);
goto redo;
......@@ -3014,32 +3010,32 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
new_objects:
freelist = get_partial(s, gfpflags, node, &page);
freelist = get_partial(s, gfpflags, node, &slab);
if (freelist)
goto check_new_page;
goto check_new_slab;
slub_put_cpu_ptr(s->cpu_slab);
page = new_slab(s, gfpflags, node);
slab = new_slab(s, gfpflags, node);
c = slub_get_cpu_ptr(s->cpu_slab);
if (unlikely(!page)) {
if (unlikely(!slab)) {
slab_out_of_memory(s, gfpflags, node);
return NULL;
}
/*
* No other reference to the page yet so we can
* No other reference to the slab yet so we can
* muck around with it freely without cmpxchg
*/
freelist = page->freelist;
page->freelist = NULL;
freelist = slab->freelist;
slab->freelist = NULL;
stat(s, ALLOC_SLAB);
check_new_page:
check_new_slab:
if (kmem_cache_debug(s)) {
if (!alloc_debug_processing(s, page, freelist, addr)) {
if (!alloc_debug_processing(s, slab, freelist, addr)) {
/* Slab failed checks. Next slab needed */
goto new_slab;
} else {
......@@ -3051,39 +3047,39 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
}
}
if (unlikely(!pfmemalloc_match(page, gfpflags)))
if (unlikely(!pfmemalloc_match(slab, gfpflags)))
/*
* For !pfmemalloc_match() case we don't load freelist so that
* we don't make further mismatched allocations easier.
*/
goto return_single;
retry_load_page:
retry_load_slab:
local_lock_irqsave(&s->cpu_slab->lock, flags);
if (unlikely(c->page)) {
if (unlikely(c->slab)) {
void *flush_freelist = c->freelist;
struct page *flush_page = c->page;
struct slab *flush_slab = c->slab;
c->page = NULL;
c->slab = NULL;
c->freelist = NULL;
c->tid = next_tid(c->tid);
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
deactivate_slab(s, flush_page, flush_freelist);
deactivate_slab(s, flush_slab, flush_freelist);
stat(s, CPUSLAB_FLUSH);
goto retry_load_page;
goto retry_load_slab;
}
c->page = page;
c->slab = slab;
goto load_freelist;
return_single:
deactivate_slab(s, page, get_freepointer(s, freelist));
deactivate_slab(s, slab, get_freepointer(s, freelist));
return freelist;
}
......@@ -3140,7 +3136,7 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
{
void *object;
struct kmem_cache_cpu *c;
struct page *page;
struct slab *slab;
unsigned long tid;
struct obj_cgroup *objcg = NULL;
bool init = false;
......@@ -3172,9 +3168,9 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
/*
* Irqless object alloc/free algorithm used here depends on sequence
* of fetching cpu_slab's data. tid should be fetched before anything
* on c to guarantee that object and page associated with previous tid
* on c to guarantee that object and slab associated with previous tid
* won't be used with current tid. If we fetch tid first, object and
* page could be one associated with next tid and our alloc/free
* slab could be one associated with next tid and our alloc/free
* request will be failed. In this case, we will retry. So, no problem.
*/
barrier();
......@@ -3187,7 +3183,7 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
*/
object = c->freelist;
page = c->page;
slab = c->slab;
/*
* We cannot use the lockless fastpath on PREEMPT_RT because if a
* slowpath has taken the local_lock_irqsave(), it is not protected
......@@ -3196,7 +3192,7 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
* there is a suitable cpu freelist.
*/
if (IS_ENABLED(CONFIG_PREEMPT_RT) ||
unlikely(!object || !page || !node_match(page, node))) {
unlikely(!object || !slab || !node_match(slab, node))) {
object = __slab_alloc(s, gfpflags, node, addr, c);
} else {
void *next_object = get_freepointer_safe(s, object);
......@@ -3298,17 +3294,17 @@ EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
* have a longer lifetime than the cpu slabs in most processing loads.
*
* So we still attempt to reduce cache line usage. Just take the slab
* lock and free the item. If there is no additional partial page
* lock and free the item. If there is no additional partial slab
* handling required then we can return immediately.
*/
static void __slab_free(struct kmem_cache *s, struct page *page,
static void __slab_free(struct kmem_cache *s, struct slab *slab,
void *head, void *tail, int cnt,
unsigned long addr)
{
void *prior;
int was_frozen;
struct page new;
struct slab new;
unsigned long counters;
struct kmem_cache_node *n = NULL;
unsigned long flags;
......@@ -3319,7 +3315,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
return;
if (kmem_cache_debug(s) &&
!free_debug_processing(s, page, head, tail, cnt, addr))
!free_debug_processing(s, slab, head, tail, cnt, addr))
return;
do {
......@@ -3327,8 +3323,8 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
spin_unlock_irqrestore(&n->list_lock, flags);
n = NULL;
}
prior = page->freelist;
counters = page->counters;
prior = slab->freelist;
counters = slab->counters;
set_freepointer(s, tail, prior);
new.counters = counters;
was_frozen = new.frozen;
......@@ -3347,7 +3343,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
} else { /* Needs to be taken off a list */
n = get_node(s, page_to_nid(page));
n = get_node(s, slab_nid(slab));
/*
* Speculatively acquire the list_lock.
* If the cmpxchg does not succeed then we may
......@@ -3361,7 +3357,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
}
}
} while (!cmpxchg_double_slab(s, page,
} while (!cmpxchg_double_slab(s, slab,
prior, counters,
head, new.counters,
"__slab_free"));
......@@ -3376,10 +3372,10 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
stat(s, FREE_FROZEN);
} else if (new.frozen) {
/*
* If we just froze the page then put it onto the
* If we just froze the slab then put it onto the
* per cpu partial list.
*/
put_cpu_partial(s, page, 1);
put_cpu_partial(s, slab, 1);
stat(s, CPU_PARTIAL_FREE);
}
......@@ -3394,8 +3390,8 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
* then add it.
*/
if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
remove_full(s, n, page);
add_partial(n, page, DEACTIVATE_TO_TAIL);
remove_full(s, n, slab);
add_partial(n, slab, DEACTIVATE_TO_TAIL);
stat(s, FREE_ADD_PARTIAL);
}
spin_unlock_irqrestore(&n->list_lock, flags);
......@@ -3406,16 +3402,16 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
/*
* Slab on the partial list.
*/
remove_partial(n, page);
remove_partial(n, slab);
stat(s, FREE_REMOVE_PARTIAL);
} else {
/* Slab must be on the full list */
remove_full(s, n, page);
remove_full(s, n, slab);
}
spin_unlock_irqrestore(&n->list_lock, flags);
stat(s, FREE_SLAB);
discard_slab(s, page);
discard_slab(s, slab);
}
/*
......@@ -3430,11 +3426,11 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
* with all sorts of special processing.
*
* Bulk free of a freelist with several objects (all pointing to the
* same page) possible by specifying head and tail ptr, plus objects
* same slab) possible by specifying head and tail ptr, plus objects
* count (cnt). Bulk free indicated by tail pointer being set.
*/
static __always_inline void do_slab_free(struct kmem_cache *s,
struct page *page, void *head, void *tail,
struct slab *slab, void *head, void *tail,
int cnt, unsigned long addr)
{
void *tail_obj = tail ? : head;
......@@ -3457,7 +3453,7 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
/* Same with comment on barrier() in slab_alloc_node() */
barrier();
if (likely(page == c->page)) {
if (likely(slab == c->slab)) {
#ifndef CONFIG_PREEMPT_RT
void **freelist = READ_ONCE(c->freelist);
......@@ -3483,7 +3479,7 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
local_lock(&s->cpu_slab->lock);
c = this_cpu_ptr(s->cpu_slab);
if (unlikely(page != c->page)) {
if (unlikely(slab != c->slab)) {
local_unlock(&s->cpu_slab->lock);
goto redo;
}
......@@ -3498,11 +3494,11 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
#endif
stat(s, FREE_FASTPATH);
} else
__slab_free(s, page, head, tail_obj, cnt, addr);
__slab_free(s, slab, head, tail_obj, cnt, addr);
}
static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
static __always_inline void slab_free(struct kmem_cache *s, struct slab *slab,
void *head, void *tail, int cnt,
unsigned long addr)
{
......@@ -3511,13 +3507,13 @@ static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
* to remove objects, whose reuse must be delayed.
*/
if (slab_free_freelist_hook(s, &head, &tail, &cnt))
do_slab_free(s, page, head, tail, cnt, addr);
do_slab_free(s, slab, head, tail, cnt, addr);
}
#ifdef CONFIG_KASAN_GENERIC
void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
{
do_slab_free(cache, virt_to_head_page(x), x, NULL, 1, addr);
do_slab_free(cache, virt_to_slab(x), x, NULL, 1, addr);
}
#endif
......@@ -3527,35 +3523,36 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
if (!s)
return;
trace_kmem_cache_free(_RET_IP_, x, s->name);
slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);
slab_free(s, virt_to_slab(x), x, NULL, 1, _RET_IP_);
}
EXPORT_SYMBOL(kmem_cache_free);
struct detached_freelist {
struct page *page;
struct slab *slab;
void *tail;
void *freelist;
int cnt;
struct kmem_cache *s;
};
static inline void free_nonslab_page(struct page *page, void *object)
static inline void free_large_kmalloc(struct folio *folio, void *object)
{
unsigned int order = compound_order(page);
unsigned int order = folio_order(folio);
if (WARN_ON_ONCE(!PageCompound(page)))
if (WARN_ON_ONCE(order == 0))
pr_warn_once("object pointer: 0x%p\n", object);
kfree_hook(object);
mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, -(PAGE_SIZE << order));
__free_pages(page, order);
mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
-(PAGE_SIZE << order));
__free_pages(folio_page(folio, 0), order);
}
/*
* This function progressively scans the array with free objects (with
* a limited look ahead) and extract objects belonging to the same
* page. It builds a detached freelist directly within the given
* page/objects. This can happen without any need for
* slab. It builds a detached freelist directly within the given
* slab/objects. This can happen without any need for
* synchronization, because the objects are owned by running process.
* The freelist is build up as a single linked list in the objects.
* The idea is, that this detached freelist can then be bulk
......@@ -3570,10 +3567,11 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
size_t first_skipped_index = 0;
int lookahead = 3;
void *object;
struct page *page;
struct folio *folio;
struct slab *slab;
/* Always re-init detached_freelist */
df->page = NULL;
df->slab = NULL;
do {
object = p[--size];
......@@ -3583,17 +3581,19 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
if (!object)
return 0;
page = virt_to_head_page(object);
folio = virt_to_folio(object);
if (!s) {
/* Handle kalloc'ed objects */
if (unlikely(!PageSlab(page))) {
free_nonslab_page(page, object);
if (unlikely(!folio_test_slab(folio))) {
free_large_kmalloc(folio, object);
p[size] = NULL; /* mark object processed */
return size;
}
/* Derive kmem_cache from object */
df->s = page->slab_cache;
slab = folio_slab(folio);
df->s = slab->slab_cache;
} else {
slab = folio_slab(folio);
df->s = cache_from_obj(s, object); /* Support for memcg */
}
......@@ -3605,7 +3605,7 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
}
/* Start new detached freelist */
df->page = page;
df->slab = slab;
set_freepointer(df->s, object, NULL);
df->tail = object;
df->freelist = object;
......@@ -3617,8 +3617,8 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
if (!object)
continue; /* Skip processed objects */
/* df->page is always set at this point */
if (df->page == virt_to_head_page(object)) {
/* df->slab is always set at this point */
if (df->slab == virt_to_slab(object)) {
/* Opportunity build freelist */
set_freepointer(df->s, object, df->freelist);
df->freelist = object;
......@@ -3650,10 +3650,10 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
struct detached_freelist df;
size = build_detached_freelist(s, size, p, &df);
if (!df.page)
if (!df.slab)
continue;
slab_free(df.s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_);
slab_free(df.s, df.slab, df.freelist, df.tail, df.cnt, _RET_IP_);
} while (likely(size));
}
EXPORT_SYMBOL(kmem_cache_free_bulk);
......@@ -3787,7 +3787,7 @@ static unsigned int slub_min_objects;
* requested a higher minimum order then we start with that one instead of
* the smallest order which will fit the object.
*/
static inline unsigned int slab_order(unsigned int size,
static inline unsigned int calc_slab_order(unsigned int size,
unsigned int min_objects, unsigned int max_order,
unsigned int fract_leftover)
{
......@@ -3851,7 +3851,7 @@ static inline int calculate_order(unsigned int size)
fraction = 16;
while (fraction >= 4) {
order = slab_order(size, min_objects,
order = calc_slab_order(size, min_objects,
slub_max_order, fraction);
if (order <= slub_max_order)
return order;
......@@ -3864,14 +3864,14 @@ static inline int calculate_order(unsigned int size)
* We were unable to place multiple objects in a slab. Now
* lets see if we can place a single object there.
*/
order = slab_order(size, 1, slub_max_order, 1);
order = calc_slab_order(size, 1, slub_max_order, 1);
if (order <= slub_max_order)
return order;
/*
* Doh this slab cannot be placed using slub_max_order.
*/
order = slab_order(size, 1, MAX_ORDER, 1);
order = calc_slab_order(size, 1, MAX_ORDER, 1);
if (order < MAX_ORDER)
return order;
return -ENOSYS;
......@@ -3923,38 +3923,38 @@ static struct kmem_cache *kmem_cache_node;
*/
static void early_kmem_cache_node_alloc(int node)
{
struct page *page;
struct slab *slab;
struct kmem_cache_node *n;
BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
slab = new_slab(kmem_cache_node, GFP_NOWAIT, node);
BUG_ON(!page);
if (page_to_nid(page) != node) {
BUG_ON(!slab);
if (slab_nid(slab) != node) {
pr_err("SLUB: Unable to allocate memory from node %d\n", node);
pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n");
}
n = page->freelist;
n = slab->freelist;
BUG_ON(!n);
#ifdef CONFIG_SLUB_DEBUG
init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
init_tracking(kmem_cache_node, n);
#endif
n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL, false);
page->freelist = get_freepointer(kmem_cache_node, n);
page->inuse = 1;
page->frozen = 0;
slab->freelist = get_freepointer(kmem_cache_node, n);
slab->inuse = 1;
slab->frozen = 0;
kmem_cache_node->node[node] = n;
init_kmem_cache_node(n);
inc_slabs_node(kmem_cache_node, node, page->objects);
inc_slabs_node(kmem_cache_node, node, slab->objects);
/*
* No locks need to be taken here as it has just been
* initialized and there is no concurrent access.
*/
__add_partial(n, page, DEACTIVATE_TO_HEAD);
__add_partial(n, slab, DEACTIVATE_TO_HEAD);
}
static void free_kmem_cache_nodes(struct kmem_cache *s)
......@@ -4212,7 +4212,7 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
#endif
/*
* The larger the object size is, the more pages we want on the partial
* The larger the object size is, the more slabs we want on the partial
* list to avoid pounding the page allocator excessively.
*/
set_min_partial(s, ilog2(s->size) / 2);
......@@ -4240,20 +4240,20 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
return -EINVAL;
}
static void list_slab_objects(struct kmem_cache *s, struct page *page,
static void list_slab_objects(struct kmem_cache *s, struct slab *slab,
const char *text)
{
#ifdef CONFIG_SLUB_DEBUG
void *addr = page_address(page);
void *addr = slab_address(slab);
unsigned long flags;
unsigned long *map;
void *p;
slab_err(s, page, text, s->name);
slab_lock(page, &flags);
slab_err(s, slab, text, s->name);
slab_lock(slab, &flags);
map = get_map(s, page);
for_each_object(p, s, addr, page->objects) {
map = get_map(s, slab);
for_each_object(p, s, addr, slab->objects) {
if (!test_bit(__obj_to_index(s, addr, p), map)) {
pr_err("Object 0x%p @offset=%tu\n", p, p - addr);
......@@ -4261,7 +4261,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
}
}
put_map(map);
slab_unlock(page, &flags);
slab_unlock(slab, &flags);
#endif
}
......@@ -4273,23 +4273,23 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
{
LIST_HEAD(discard);
struct page *page, *h;
struct slab *slab, *h;
BUG_ON(irqs_disabled());
spin_lock_irq(&n->list_lock);
list_for_each_entry_safe(page, h, &n->partial, slab_list) {
if (!page->inuse) {
remove_partial(n, page);
list_add(&page->slab_list, &discard);
list_for_each_entry_safe(slab, h, &n->partial, slab_list) {
if (!slab->inuse) {
remove_partial(n, slab);
list_add(&slab->slab_list, &discard);
} else {
list_slab_objects(s, page,
list_slab_objects(s, slab,
"Objects remaining in %s on __kmem_cache_shutdown()");
}
}
spin_unlock_irq(&n->list_lock);
list_for_each_entry_safe(page, h, &discard, slab_list)
discard_slab(s, page);
list_for_each_entry_safe(slab, h, &discard, slab_list)
discard_slab(s, slab);
}
bool __kmem_cache_empty(struct kmem_cache *s)
......@@ -4322,31 +4322,32 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
}
#ifdef CONFIG_PRINTK
void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
{
void *base;
int __maybe_unused i;
unsigned int objnr;
void *objp;
void *objp0;
struct kmem_cache *s = page->slab_cache;
struct kmem_cache *s = slab->slab_cache;
struct track __maybe_unused *trackp;
kpp->kp_ptr = object;
kpp->kp_page = page;
kpp->kp_slab = slab;
kpp->kp_slab_cache = s;
base = page_address(page);
base = slab_address(slab);
objp0 = kasan_reset_tag(object);
#ifdef CONFIG_SLUB_DEBUG
objp = restore_red_left(s, objp0);
#else
objp = objp0;
#endif
objnr = obj_to_index(s, page, objp);
objnr = obj_to_index(s, slab, objp);
kpp->kp_data_offset = (unsigned long)((char *)objp0 - (char *)objp);
objp = base + s->size * objnr;
kpp->kp_objp = objp;
if (WARN_ON_ONCE(objp < base || objp >= base + page->objects * s->size || (objp - base) % s->size) ||
if (WARN_ON_ONCE(objp < base || objp >= base + slab->objects * s->size
|| (objp - base) % s->size) ||
!(s->flags & SLAB_STORE_USER))
return;
#ifdef CONFIG_SLUB_DEBUG
......@@ -4484,8 +4485,8 @@ EXPORT_SYMBOL(__kmalloc_node);
* Returns NULL if check passes, otherwise const char * to name of cache
* to indicate an error.
*/
void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
bool to_user)
void __check_heap_object(const void *ptr, unsigned long n,
const struct slab *slab, bool to_user)
{
struct kmem_cache *s;
unsigned int offset;
......@@ -4494,10 +4495,10 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
ptr = kasan_reset_tag(ptr);
/* Find object and usable object size. */
s = page->slab_cache;
s = slab->slab_cache;
/* Reject impossible pointers. */
if (ptr < page_address(page))
if (ptr < slab_address(slab))
usercopy_abort("SLUB object not in SLUB page?!", NULL,
to_user, 0, n);
......@@ -4505,7 +4506,7 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
if (is_kfence)
offset = ptr - kfence_object_start(ptr);
else
offset = (ptr - page_address(page)) % s->size;
offset = (ptr - slab_address(slab)) % s->size;
/* Adjust for redzone and reject if within the redzone. */
if (!is_kfence && kmem_cache_debug_flags(s, SLAB_RED_ZONE)) {
......@@ -4527,25 +4528,24 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
size_t __ksize(const void *object)
{
struct page *page;
struct folio *folio;
if (unlikely(object == ZERO_SIZE_PTR))
return 0;
page = virt_to_head_page(object);
folio = virt_to_folio(object);
if (unlikely(!PageSlab(page))) {
WARN_ON(!PageCompound(page));
return page_size(page);
}
if (unlikely(!folio_test_slab(folio)))
return folio_size(folio);
return slab_ksize(page->slab_cache);
return slab_ksize(folio_slab(folio)->slab_cache);
}
EXPORT_SYMBOL(__ksize);
void kfree(const void *x)
{
struct page *page;
struct folio *folio;
struct slab *slab;
void *object = (void *)x;
trace_kfree(_RET_IP_, x);
......@@ -4553,12 +4553,13 @@ void kfree(const void *x)
if (unlikely(ZERO_OR_NULL_PTR(x)))
return;
page = virt_to_head_page(x);
if (unlikely(!PageSlab(page))) {
free_nonslab_page(page, object);
folio = virt_to_folio(x);
if (unlikely(!folio_test_slab(folio))) {
free_large_kmalloc(folio, object);
return;
}
slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
slab = folio_slab(folio);
slab_free(slab->slab_cache, slab, object, NULL, 1, _RET_IP_);
}
EXPORT_SYMBOL(kfree);
......@@ -4578,8 +4579,8 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s)
int node;
int i;
struct kmem_cache_node *n;
struct page *page;
struct page *t;
struct slab *slab;
struct slab *t;
struct list_head discard;
struct list_head promote[SHRINK_PROMOTE_MAX];
unsigned long flags;
......@@ -4596,22 +4597,22 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s)
* Build lists of slabs to discard or promote.
*
* Note that concurrent frees may occur while we hold the
* list_lock. page->inuse here is the upper limit.
* list_lock. slab->inuse here is the upper limit.
*/
list_for_each_entry_safe(page, t, &n->partial, slab_list) {
int free = page->objects - page->inuse;
list_for_each_entry_safe(slab, t, &n->partial, slab_list) {
int free = slab->objects - slab->inuse;
/* Do not reread page->inuse */
/* Do not reread slab->inuse */
barrier();
/* We do not keep full slabs on the list */
BUG_ON(free <= 0);
if (free == page->objects) {
list_move(&page->slab_list, &discard);
if (free == slab->objects) {
list_move(&slab->slab_list, &discard);
n->nr_partial--;
} else if (free <= SHRINK_PROMOTE_MAX)
list_move(&page->slab_list, promote + free - 1);
list_move(&slab->slab_list, promote + free - 1);
}
/*
......@@ -4624,8 +4625,8 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s)
spin_unlock_irqrestore(&n->list_lock, flags);
/* Release empty slabs */
list_for_each_entry_safe(page, t, &discard, slab_list)
discard_slab(s, page);
list_for_each_entry_safe(slab, t, &discard, slab_list)
discard_slab(s, slab);
if (slabs_node(s, node))
ret = 1;
......@@ -4786,7 +4787,7 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
*/
__flush_cpu_slab(s, smp_processor_id());
for_each_kmem_cache_node(s, node, n) {
struct page *p;
struct slab *p;
list_for_each_entry(p, &n->partial, slab_list)
p->slab_cache = s;
......@@ -4964,54 +4965,54 @@ EXPORT_SYMBOL(__kmalloc_node_track_caller);
#endif
#ifdef CONFIG_SYSFS
static int count_inuse(struct page *page)
static int count_inuse(struct slab *slab)
{
return page->inuse;
return slab->inuse;
}
static int count_total(struct page *page)
static int count_total(struct slab *slab)
{
return page->objects;
return slab->objects;
}
#endif
#ifdef CONFIG_SLUB_DEBUG
static void validate_slab(struct kmem_cache *s, struct page *page,
static void validate_slab(struct kmem_cache *s, struct slab *slab,
unsigned long *obj_map)
{
void *p;
void *addr = page_address(page);
void *addr = slab_address(slab);
unsigned long flags;
slab_lock(page, &flags);
slab_lock(slab, &flags);
if (!check_slab(s, page) || !on_freelist(s, page, NULL))
if (!check_slab(s, slab) || !on_freelist(s, slab, NULL))
goto unlock;
/* Now we know that a valid freelist exists */
__fill_map(obj_map, s, page);
for_each_object(p, s, addr, page->objects) {
__fill_map(obj_map, s, slab);
for_each_object(p, s, addr, slab->objects) {
u8 val = test_bit(__obj_to_index(s, addr, p), obj_map) ?
SLUB_RED_INACTIVE : SLUB_RED_ACTIVE;
if (!check_object(s, page, p, val))
if (!check_object(s, slab, p, val))
break;
}
unlock:
slab_unlock(page, &flags);
slab_unlock(slab, &flags);
}
static int validate_slab_node(struct kmem_cache *s,
struct kmem_cache_node *n, unsigned long *obj_map)
{
unsigned long count = 0;
struct page *page;
struct slab *slab;
unsigned long flags;
spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(page, &n->partial, slab_list) {
validate_slab(s, page, obj_map);
list_for_each_entry(slab, &n->partial, slab_list) {
validate_slab(s, slab, obj_map);
count++;
}
if (count != n->nr_partial) {
......@@ -5023,8 +5024,8 @@ static int validate_slab_node(struct kmem_cache *s,
if (!(s->flags & SLAB_STORE_USER))
goto out;
list_for_each_entry(page, &n->full, slab_list) {
validate_slab(s, page, obj_map);
list_for_each_entry(slab, &n->full, slab_list) {
validate_slab(s, slab, obj_map);
count++;
}
if (count != atomic_long_read(&n->nr_slabs)) {
......@@ -5190,15 +5191,15 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
}
static void process_slab(struct loc_track *t, struct kmem_cache *s,
struct page *page, enum track_item alloc,
struct slab *slab, enum track_item alloc,
unsigned long *obj_map)
{
void *addr = page_address(page);
void *addr = slab_address(slab);
void *p;
__fill_map(obj_map, s, page);
__fill_map(obj_map, s, slab);
for_each_object(p, s, addr, page->objects)
for_each_object(p, s, addr, slab->objects)
if (!test_bit(__obj_to_index(s, addr, p), obj_map))
add_location(t, s, get_track(s, p, alloc));
}
......@@ -5240,35 +5241,37 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab,
cpu);
int node;
struct page *page;
struct slab *slab;
page = READ_ONCE(c->page);
if (!page)
slab = READ_ONCE(c->slab);
if (!slab)
continue;
node = page_to_nid(page);
node = slab_nid(slab);
if (flags & SO_TOTAL)
x = page->objects;
x = slab->objects;
else if (flags & SO_OBJECTS)
x = page->inuse;
x = slab->inuse;
else
x = 1;
total += x;
nodes[node] += x;
page = slub_percpu_partial_read_once(c);
if (page) {
node = page_to_nid(page);
#ifdef CONFIG_SLUB_CPU_PARTIAL
slab = slub_percpu_partial_read_once(c);
if (slab) {
node = slab_nid(slab);
if (flags & SO_TOTAL)
WARN_ON_ONCE(1);
else if (flags & SO_OBJECTS)
WARN_ON_ONCE(1);
else
x = page->pages;
x = slab->slabs;
total += x;
nodes[node] += x;
}
#endif
}
}
......@@ -5467,33 +5470,35 @@ SLAB_ATTR_RO(objects_partial);
static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
{
int objects = 0;
int pages = 0;
int cpu;
int slabs = 0;
int cpu __maybe_unused;
int len = 0;
#ifdef CONFIG_SLUB_CPU_PARTIAL
for_each_online_cpu(cpu) {
struct page *page;
struct slab *slab;
page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
if (page)
pages += page->pages;
if (slab)
slabs += slab->slabs;
}
#endif
/* Approximate half-full pages , see slub_set_cpu_partial() */
objects = (pages * oo_objects(s->oo)) / 2;
len += sysfs_emit_at(buf, len, "%d(%d)", objects, pages);
/* Approximate half-full slabs, see slub_set_cpu_partial() */
objects = (slabs * oo_objects(s->oo)) / 2;
len += sysfs_emit_at(buf, len, "%d(%d)", objects, slabs);
#ifdef CONFIG_SMP
#if defined(CONFIG_SLUB_CPU_PARTIAL) && defined(CONFIG_SMP)
for_each_online_cpu(cpu) {
struct page *page;
struct slab *slab;
page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
if (page) {
pages = READ_ONCE(page->pages);
objects = (pages * oo_objects(s->oo)) / 2;
slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
if (slab) {
slabs = READ_ONCE(slab->slabs);
objects = (slabs * oo_objects(s->oo)) / 2;
len += sysfs_emit_at(buf, len, " C%d=%d(%d)",
cpu, objects, pages);
cpu, objects, slabs);
}
}
#endif
......@@ -6161,16 +6166,16 @@ static int slab_debug_trace_open(struct inode *inode, struct file *filep)
for_each_kmem_cache_node(s, node, n) {
unsigned long flags;
struct page *page;
struct slab *slab;
if (!atomic_long_read(&n->nr_slabs))
continue;
spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(page, &n->partial, slab_list)
process_slab(t, s, page, alloc, obj_map);
list_for_each_entry(page, &n->full, slab_list)
process_slab(t, s, page, alloc, obj_map);
list_for_each_entry(slab, &n->partial, slab_list)
process_slab(t, s, slab, alloc, obj_map);
list_for_each_entry(slab, &n->full, slab_list)
process_slab(t, s, slab, alloc, obj_map);
spin_unlock_irqrestore(&n->list_lock, flags);
}
......
......@@ -722,7 +722,7 @@ static void free_map_bootmem(struct page *memmap)
>> PAGE_SHIFT;
for (i = 0; i < nr_pages; i++, page++) {
magic = (unsigned long) page->freelist;
magic = page->index;
BUG_ON(magic == NODE_INFO);
......
......@@ -20,6 +20,7 @@
#include <linux/atomic.h>
#include <linux/jump_label.h>
#include <asm/sections.h>
#include "slab.h"
/*
* Checks if a given pointer and length is contained by the current
......@@ -223,7 +224,7 @@ static inline void check_page_span(const void *ptr, unsigned long n,
static inline void check_heap_object(const void *ptr, unsigned long n,
bool to_user)
{
struct page *page;
struct folio *folio;
if (!virt_addr_valid(ptr))
return;
......@@ -231,16 +232,16 @@ static inline void check_heap_object(const void *ptr, unsigned long n,
/*
* When CONFIG_HIGHMEM=y, kmap_to_page() will give either the
* highmem page or fallback to virt_to_page(). The following
* is effectively a highmem-aware virt_to_head_page().
* is effectively a highmem-aware virt_to_slab().
*/
page = compound_head(kmap_to_page((void *)ptr));
folio = page_folio(kmap_to_page((void *)ptr));
if (PageSlab(page)) {
if (folio_test_slab(folio)) {
/* Check slab allocator for flags and size. */
__check_heap_object(ptr, n, page, to_user);
__check_heap_object(ptr, n, folio_slab(folio), to_user);
} else {
/* Verify object does not incorrectly span multiple pages. */
check_page_span(ptr, n, page, to_user);
check_page_span(ptr, n, folio_page(folio, 0), to_user);
}
}
......
......@@ -17,10 +17,10 @@
*
* Usage of struct page fields:
* page->private: points to zspage
* page->freelist(index): links together all component pages of a zspage
* page->index: links together all component pages of a zspage
* For the huge page, this is always 0, so we use this field
* to store handle.
* page->units: first object offset in a subpage of zspage
* page->page_type: first object offset in a subpage of zspage
*
* Usage of struct page flags:
* PG_private: identifies the first component page
......@@ -489,12 +489,12 @@ static inline struct page *get_first_page(struct zspage *zspage)
static inline int get_first_obj_offset(struct page *page)
{
return page->units;
return page->page_type;
}
static inline void set_first_obj_offset(struct page *page, int offset)
{
page->units = offset;
page->page_type = offset;
}
static inline unsigned int get_freeobj(struct zspage *zspage)
......@@ -827,7 +827,7 @@ static struct page *get_next_page(struct page *page)
if (unlikely(PageHugeObject(page)))
return NULL;
return page->freelist;
return (struct page *)page->index;
}
/**
......@@ -901,7 +901,7 @@ static void reset_page(struct page *page)
set_page_private(page, 0);
page_mapcount_reset(page);
ClearPageHugeObject(page);
page->freelist = NULL;
page->index = 0;
}
static int trylock_zspage(struct zspage *zspage)
......@@ -1027,7 +1027,7 @@ static void create_page_chain(struct size_class *class, struct zspage *zspage,
/*
* Allocate individual pages and link them together as:
* 1. all pages are linked together using page->freelist
* 1. all pages are linked together using page->index
* 2. each sub-page point to zspage using page->private
*
* we set PG_private to identify the first page (i.e. no other sub-page
......@@ -1036,7 +1036,7 @@ static void create_page_chain(struct size_class *class, struct zspage *zspage,
for (i = 0; i < nr_pages; i++) {
page = pages[i];
set_page_private(page, (unsigned long)zspage);
page->freelist = NULL;
page->index = 0;
if (i == 0) {
zspage->first_page = page;
SetPagePrivate(page);
......@@ -1044,7 +1044,7 @@ static void create_page_chain(struct size_class *class, struct zspage *zspage,
class->pages_per_zspage == 1))
SetPageHugeObject(page);
} else {
prev_page->freelist = page;
prev_page->index = (unsigned long)page;
}
prev_page = page;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment