Commit 9d6c59c1 authored by Vlastimil Babka's avatar Vlastimil Babka

Merge branch 'for-5.17/struct-slab' into for-linus

Series "Separate struct slab from struct page" v4

This is originally an offshoot of the folio work by Matthew. One of the more
complex parts of the struct page definition are the parts used by the slab
allocators. It would be good for the MM in general if struct slab were its own
data type, and it also helps to prevent tail pages from slipping in anywhere.
As Matthew requested in his proof of concept series, I have taken over the
development of this series, so it's a mix of patches from him (often modified
by me) and my own.

One big difference is the use of coccinelle to perform the relatively trivial
parts of the conversions automatically and at once, instead of a larger number
of smaller incremental reviewable steps. Thanks to Julia Lawall and Luis
Chamberlain for all their help!

Another notable difference is (based also on review feedback) I don't represent
with a struct slab the large kmalloc allocations which are not really a slab,
but use page allocator directly. When going from an object address to a struct
slab, the code tests first folio slab flag, and only if it's set it converts to
struct slab. This makes the struct slab type stronger.

Finally, although Matthew's version didn't use any of the folio work, the
initial support has been merged meanwhile so my version builds on top of it
where appropriate. This eliminates some of the redundant compound_head()
being performed e.g. when testing the slab flag.

To sum up, after this series, struct page fields used by slab allocators are
moved from struct page to a new struct slab, that uses the same physical
storage. The availability of the fields is further distinguished by the
selected slab allocator implementation. The advantages include:

- Similar to folios, if the slab is of order > 0, struct slab always is
  guaranteed to be the head page. Additionally it's guaranteed to be an actual
  slab page, not a large kmalloc. This removes uncertainty and potential for
  bugs.
- It's not possible to accidentally use fields of the slab implementation that's
  not configured.
- Other subsystems cannot use slab's fields in struct page anymore (some
  existing non-slab usages had to be adjusted in this series), so slab
  implementations have more freedom in rearranging them in the struct slab.

Link: https://lore.kernel.org/all/20220104001046.12263-1-vbabka@suse.cz/
parents eb52c0fc b01af5c0
......@@ -981,7 +981,7 @@ static void __meminit free_pagetable(struct page *page, int order)
if (PageReserved(page)) {
__ClearPageReserved(page);
magic = (unsigned long)page->freelist;
magic = page->index;
if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
while (nr_pages--)
put_page_bootmem(page++);
......
......@@ -30,7 +30,7 @@ void put_page_bootmem(struct page *page);
*/
static inline void free_bootmem_page(struct page *page)
{
unsigned long magic = (unsigned long)page->freelist;
unsigned long magic = page->index;
/*
* The reserve_bootmem_region sets the reserved flag on bootmem
......
......@@ -9,6 +9,7 @@
struct kmem_cache;
struct page;
struct slab;
struct vm_struct;
struct task_struct;
......@@ -193,11 +194,11 @@ static __always_inline size_t kasan_metadata_size(struct kmem_cache *cache)
return 0;
}
void __kasan_poison_slab(struct page *page);
static __always_inline void kasan_poison_slab(struct page *page)
void __kasan_poison_slab(struct slab *slab);
static __always_inline void kasan_poison_slab(struct slab *slab)
{
if (kasan_enabled())
__kasan_poison_slab(page);
__kasan_poison_slab(slab);
}
void __kasan_unpoison_object_data(struct kmem_cache *cache, void *object);
......@@ -322,7 +323,7 @@ static inline void kasan_cache_create(struct kmem_cache *cache,
slab_flags_t *flags) {}
static inline void kasan_cache_create_kmalloc(struct kmem_cache *cache) {}
static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; }
static inline void kasan_poison_slab(struct page *page) {}
static inline void kasan_poison_slab(struct slab *slab) {}
static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
void *object) {}
static inline void kasan_poison_object_data(struct kmem_cache *cache,
......
......@@ -536,45 +536,6 @@ static inline bool folio_memcg_kmem(struct folio *folio)
return folio->memcg_data & MEMCG_DATA_KMEM;
}
/*
* page_objcgs - get the object cgroups vector associated with a page
* @page: a pointer to the page struct
*
* Returns a pointer to the object cgroups vector associated with the page,
* or NULL. This function assumes that the page is known to have an
* associated object cgroups vector. It's not safe to call this function
* against pages, which might have an associated memory cgroup: e.g.
* kernel stack pages.
*/
static inline struct obj_cgroup **page_objcgs(struct page *page)
{
unsigned long memcg_data = READ_ONCE(page->memcg_data);
VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS), page);
VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page);
return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
/*
* page_objcgs_check - get the object cgroups vector associated with a page
* @page: a pointer to the page struct
*
* Returns a pointer to the object cgroups vector associated with the page,
* or NULL. This function is safe to use if the page can be directly associated
* with a memory cgroup.
*/
static inline struct obj_cgroup **page_objcgs_check(struct page *page)
{
unsigned long memcg_data = READ_ONCE(page->memcg_data);
if (!memcg_data || !(memcg_data & MEMCG_DATA_OBJCGS))
return NULL;
VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page);
return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
#else
static inline bool folio_memcg_kmem(struct folio *folio)
......@@ -582,15 +543,6 @@ static inline bool folio_memcg_kmem(struct folio *folio)
return false;
}
static inline struct obj_cgroup **page_objcgs(struct page *page)
{
return NULL;
}
static inline struct obj_cgroup **page_objcgs_check(struct page *page)
{
return NULL;
}
#endif
static inline bool PageMemcgKmem(struct page *page)
......
......@@ -863,6 +863,13 @@ static inline struct page *virt_to_head_page(const void *x)
return compound_head(page);
}
static inline struct folio *virt_to_folio(const void *x)
{
struct page *page = virt_to_page(x);
return page_folio(page);
}
void __put_page(struct page *page);
void put_pages_list(struct list_head *pages);
......@@ -1753,6 +1760,11 @@ void page_address_init(void);
#define page_address_init() do { } while(0)
#endif
static inline void *folio_address(const struct folio *folio)
{
return page_address(&folio->page);
}
extern void *page_rmapping(struct page *page);
extern struct anon_vma *page_anon_vma(struct page *page);
extern pgoff_t __page_file_index(struct page *page);
......
......@@ -56,11 +56,11 @@ struct mem_cgroup;
* in each subpage, but you may need to restore some of their values
* afterwards.
*
* SLUB uses cmpxchg_double() to atomically update its freelist and
* counters. That requires that freelist & counters be adjacent and
* double-word aligned. We align all struct pages to double-word
* boundaries, and ensure that 'freelist' is aligned within the
* struct.
* SLUB uses cmpxchg_double() to atomically update its freelist and counters.
* That requires that freelist & counters in struct slab be adjacent and
* double-word aligned. Because struct slab currently just reinterprets the
* bits of struct page, we align all struct pages to double-word boundaries,
* and ensure that 'freelist' is aligned within struct slab.
*/
#ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE
#define _struct_page_alignment __aligned(2 * sizeof(unsigned long))
......
......@@ -189,14 +189,6 @@ bool kmem_valid_obj(void *object);
void kmem_dump_obj(void *object);
#endif
#ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR
void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
bool to_user);
#else
static inline void __check_heap_object(const void *ptr, unsigned long n,
struct page *page, bool to_user) { }
#endif
/*
* Some archs want to perform DMA into kmalloc caches and need a guaranteed
* alignment larger than the alignment of a 64-bit integer.
......
......@@ -87,11 +87,11 @@ struct kmem_cache {
struct kmem_cache_node *node[MAX_NUMNODES];
};
static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab,
void *x)
{
void *object = x - (x - page->s_mem) % cache->size;
void *last_object = page->s_mem + (cache->num - 1) * cache->size;
void *object = x - (x - slab->s_mem) % cache->size;
void *last_object = slab->s_mem + (cache->num - 1) * cache->size;
if (unlikely(object > last_object))
return last_object;
......@@ -106,16 +106,16 @@ static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
* reciprocal_divide(offset, cache->reciprocal_buffer_size)
*/
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
const struct page *page, void *obj)
const struct slab *slab, void *obj)
{
u32 offset = (obj - page->s_mem);
u32 offset = (obj - slab->s_mem);
return reciprocal_divide(offset, cache->reciprocal_buffer_size);
}
static inline int objs_per_slab_page(const struct kmem_cache *cache,
const struct page *page)
static inline int objs_per_slab(const struct kmem_cache *cache,
const struct slab *slab)
{
if (is_kfence_address(page_address(page)))
if (is_kfence_address(slab_address(slab)))
return 1;
return cache->num;
}
......
......@@ -48,9 +48,9 @@ enum stat_item {
struct kmem_cache_cpu {
void **freelist; /* Pointer to next available object */
unsigned long tid; /* Globally unique transaction id */
struct page *page; /* The slab from which we are allocating */
struct slab *slab; /* The slab from which we are allocating */
#ifdef CONFIG_SLUB_CPU_PARTIAL
struct page *partial; /* Partially allocated frozen slabs */
struct slab *partial; /* Partially allocated frozen slabs */
#endif
local_lock_t lock; /* Protects the fields above */
#ifdef CONFIG_SLUB_STATS
......@@ -99,8 +99,8 @@ struct kmem_cache {
#ifdef CONFIG_SLUB_CPU_PARTIAL
/* Number of per cpu partial objects to keep around */
unsigned int cpu_partial;
/* Number of per cpu partial pages to keep around */
unsigned int cpu_partial_pages;
/* Number of per cpu partial slabs to keep around */
unsigned int cpu_partial_slabs;
#endif
struct kmem_cache_order_objects oo;
......@@ -156,16 +156,13 @@ static inline void sysfs_slab_release(struct kmem_cache *s)
}
#endif
void object_err(struct kmem_cache *s, struct page *page,
u8 *object, char *reason);
void *fixup_red_left(struct kmem_cache *s, void *p);
static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab,
void *x) {
void *object = x - (x - page_address(page)) % cache->size;
void *last_object = page_address(page) +
(page->objects - 1) * cache->size;
void *object = x - (x - slab_address(slab)) % cache->size;
void *last_object = slab_address(slab) +
(slab->objects - 1) * cache->size;
void *result = (unlikely(object > last_object)) ? last_object : object;
result = fixup_red_left(cache, result);
......@@ -181,16 +178,16 @@ static inline unsigned int __obj_to_index(const struct kmem_cache *cache,
}
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
const struct page *page, void *obj)
const struct slab *slab, void *obj)
{
if (is_kfence_address(obj))
return 0;
return __obj_to_index(cache, page_address(page), obj);
return __obj_to_index(cache, slab_address(slab), obj);
}
static inline int objs_per_slab_page(const struct kmem_cache *cache,
const struct page *page)
static inline int objs_per_slab(const struct kmem_cache *cache,
const struct slab *slab)
{
return page->objects;
return slab->objects;
}
#endif /* _LINUX_SLUB_DEF_H */
......@@ -15,7 +15,7 @@
void get_page_bootmem(unsigned long info, struct page *page, unsigned long type)
{
page->freelist = (void *)type;
page->index = type;
SetPagePrivate(page);
set_page_private(page, info);
page_ref_inc(page);
......@@ -23,14 +23,13 @@ void get_page_bootmem(unsigned long info, struct page *page, unsigned long type)
void put_page_bootmem(struct page *page)
{
unsigned long type;
unsigned long type = page->index;
type = (unsigned long) page->freelist;
BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE);
if (page_ref_dec_return(page) == 1) {
page->freelist = NULL;
page->index = 0;
ClearPagePrivate(page);
set_page_private(page, 0);
INIT_LIST_HEAD(&page->lru);
......
......@@ -247,8 +247,9 @@ struct kasan_free_meta *kasan_get_free_meta(struct kmem_cache *cache,
}
#endif
void __kasan_poison_slab(struct page *page)
void __kasan_poison_slab(struct slab *slab)
{
struct page *page = slab_page(slab);
unsigned long i;
for (i = 0; i < compound_nr(page); i++)
......@@ -298,7 +299,7 @@ static inline u8 assign_tag(struct kmem_cache *cache,
/* For caches that either have a constructor or SLAB_TYPESAFE_BY_RCU: */
#ifdef CONFIG_SLAB
/* For SLAB assign tags based on the object index in the freelist. */
return (u8)obj_to_index(cache, virt_to_head_page(object), (void *)object);
return (u8)obj_to_index(cache, virt_to_slab(object), (void *)object);
#else
/*
* For SLUB assign a random tag during slab creation, otherwise reuse
......@@ -341,7 +342,7 @@ static inline bool ____kasan_slab_free(struct kmem_cache *cache, void *object,
if (is_kfence_address(object))
return false;
if (unlikely(nearest_obj(cache, virt_to_head_page(object), object) !=
if (unlikely(nearest_obj(cache, virt_to_slab(object), object) !=
object)) {
kasan_report_invalid_free(tagged_object, ip);
return true;
......@@ -401,9 +402,9 @@ void __kasan_kfree_large(void *ptr, unsigned long ip)
void __kasan_slab_free_mempool(void *ptr, unsigned long ip)
{
struct page *page;
struct folio *folio;
page = virt_to_head_page(ptr);
folio = virt_to_folio(ptr);
/*
* Even though this function is only called for kmem_cache_alloc and
......@@ -411,12 +412,14 @@ void __kasan_slab_free_mempool(void *ptr, unsigned long ip)
* !PageSlab() when the size provided to kmalloc is larger than
* KMALLOC_MAX_SIZE, and kmalloc falls back onto page_alloc.
*/
if (unlikely(!PageSlab(page))) {
if (unlikely(!folio_test_slab(folio))) {
if (____kasan_kfree_large(ptr, ip))
return;
kasan_poison(ptr, page_size(page), KASAN_FREE_PAGE, false);
kasan_poison(ptr, folio_size(folio), KASAN_FREE_PAGE, false);
} else {
____kasan_slab_free(page->slab_cache, ptr, ip, false, false);
struct slab *slab = folio_slab(folio);
____kasan_slab_free(slab->slab_cache, ptr, ip, false, false);
}
}
......@@ -560,7 +563,7 @@ void * __must_check __kasan_kmalloc_large(const void *ptr, size_t size,
void * __must_check __kasan_krealloc(const void *object, size_t size, gfp_t flags)
{
struct page *page;
struct slab *slab;
if (unlikely(object == ZERO_SIZE_PTR))
return (void *)object;
......@@ -572,13 +575,13 @@ void * __must_check __kasan_krealloc(const void *object, size_t size, gfp_t flag
*/
kasan_unpoison(object, size, false);
page = virt_to_head_page(object);
slab = virt_to_slab(object);
/* Piggy-back on kmalloc() instrumentation to poison the redzone. */
if (unlikely(!PageSlab(page)))
if (unlikely(!slab))
return __kasan_kmalloc_large(object, size, flags);
else
return ____kasan_kmalloc(page->slab_cache, object, size, flags);
return ____kasan_kmalloc(slab->slab_cache, object, size, flags);
}
bool __kasan_check_byte(const void *address, unsigned long ip)
......
......@@ -330,16 +330,16 @@ DEFINE_ASAN_SET_SHADOW(f8);
static void __kasan_record_aux_stack(void *addr, bool can_alloc)
{
struct page *page = kasan_addr_to_page(addr);
struct slab *slab = kasan_addr_to_slab(addr);
struct kmem_cache *cache;
struct kasan_alloc_meta *alloc_meta;
void *object;
if (is_kfence_address(addr) || !(page && PageSlab(page)))
if (is_kfence_address(addr) || !slab)
return;
cache = page->slab_cache;
object = nearest_obj(cache, page, addr);
cache = slab->slab_cache;
object = nearest_obj(cache, slab, addr);
alloc_meta = kasan_get_alloc_meta(cache, object);
if (!alloc_meta)
return;
......
......@@ -265,6 +265,7 @@ bool kasan_report(unsigned long addr, size_t size,
void kasan_report_invalid_free(void *object, unsigned long ip);
struct page *kasan_addr_to_page(const void *addr);
struct slab *kasan_addr_to_slab(const void *addr);
depot_stack_handle_t kasan_save_stack(gfp_t flags, bool can_alloc);
void kasan_set_track(struct kasan_track *track, gfp_t flags);
......
......@@ -117,7 +117,7 @@ static unsigned long quarantine_batch_size;
static struct kmem_cache *qlink_to_cache(struct qlist_node *qlink)
{
return virt_to_head_page(qlink)->slab_cache;
return virt_to_slab(qlink)->slab_cache;
}
static void *qlink_to_object(struct qlist_node *qlink, struct kmem_cache *cache)
......
......@@ -150,6 +150,14 @@ struct page *kasan_addr_to_page(const void *addr)
return NULL;
}
struct slab *kasan_addr_to_slab(const void *addr)
{
if ((addr >= (void *)PAGE_OFFSET) &&
(addr < high_memory))
return virt_to_slab(addr);
return NULL;
}
static void describe_object_addr(struct kmem_cache *cache, void *object,
const void *addr)
{
......@@ -248,8 +256,9 @@ static void print_address_description(void *addr, u8 tag)
pr_err("\n");
if (page && PageSlab(page)) {
struct kmem_cache *cache = page->slab_cache;
void *object = nearest_obj(cache, page, addr);
struct slab *slab = page_slab(page);
struct kmem_cache *cache = slab->slab_cache;
void *object = nearest_obj(cache, slab, addr);
describe_object(cache, object, addr, tag);
}
......
......@@ -12,7 +12,7 @@ const char *kasan_get_bug_type(struct kasan_access_info *info)
#ifdef CONFIG_KASAN_TAGS_IDENTIFY
struct kasan_alloc_meta *alloc_meta;
struct kmem_cache *cache;
struct page *page;
struct slab *slab;
const void *addr;
void *object;
u8 tag;
......@@ -20,10 +20,10 @@ const char *kasan_get_bug_type(struct kasan_access_info *info)
tag = get_tag(info->access_addr);
addr = kasan_reset_tag(info->access_addr);
page = kasan_addr_to_page(addr);
if (page && PageSlab(page)) {
cache = page->slab_cache;
object = nearest_obj(cache, page, (void *)addr);
slab = kasan_addr_to_slab(addr);
if (slab) {
cache = slab->slab_cache;
object = nearest_obj(cache, slab, (void *)addr);
alloc_meta = kasan_get_alloc_meta(cache, object);
if (alloc_meta) {
......
......@@ -360,7 +360,7 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g
{
struct kfence_metadata *meta = NULL;
unsigned long flags;
struct page *page;
struct slab *slab;
void *addr;
/* Try to obtain a free object. */
......@@ -424,13 +424,14 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g
alloc_covered_add(alloc_stack_hash, 1);
/* Set required struct page fields. */
page = virt_to_page(meta->addr);
page->slab_cache = cache;
if (IS_ENABLED(CONFIG_SLUB))
page->objects = 1;
if (IS_ENABLED(CONFIG_SLAB))
page->s_mem = addr;
/* Set required slab fields. */
slab = virt_to_slab((void *)meta->addr);
slab->slab_cache = cache;
#if defined(CONFIG_SLUB)
slab->objects = 1;
#elif defined(CONFIG_SLAB)
slab->s_mem = addr;
#endif
/* Memory initialization. */
for_each_canary(meta, set_canary_byte);
......
......@@ -282,7 +282,7 @@ static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocat
alloc = kmalloc(size, gfp);
if (is_kfence_address(alloc)) {
struct page *page = virt_to_head_page(alloc);
struct slab *slab = virt_to_slab(alloc);
struct kmem_cache *s = test_cache ?:
kmalloc_caches[kmalloc_type(GFP_KERNEL)][__kmalloc_index(size, false)];
......@@ -291,8 +291,8 @@ static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocat
* even for KFENCE objects; these are required so that
* memcg accounting works correctly.
*/
KUNIT_EXPECT_EQ(test, obj_to_index(s, page, alloc), 0U);
KUNIT_EXPECT_EQ(test, objs_per_slab_page(s, page), 1);
KUNIT_EXPECT_EQ(test, obj_to_index(s, slab, alloc), 0U);
KUNIT_EXPECT_EQ(test, objs_per_slab(s, slab), 1);
if (policy == ALLOCATE_ANY)
return alloc;
......
......@@ -2816,31 +2816,31 @@ static inline void mod_objcg_mlstate(struct obj_cgroup *objcg,
rcu_read_unlock();
}
int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
gfp_t gfp, bool new_page)
int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s,
gfp_t gfp, bool new_slab)
{
unsigned int objects = objs_per_slab_page(s, page);
unsigned int objects = objs_per_slab(s, slab);
unsigned long memcg_data;
void *vec;
gfp &= ~OBJCGS_CLEAR_MASK;
vec = kcalloc_node(objects, sizeof(struct obj_cgroup *), gfp,
page_to_nid(page));
slab_nid(slab));
if (!vec)
return -ENOMEM;
memcg_data = (unsigned long) vec | MEMCG_DATA_OBJCGS;
if (new_page) {
if (new_slab) {
/*
* If the slab page is brand new and nobody can yet access
* it's memcg_data, no synchronization is required and
* memcg_data can be simply assigned.
* If the slab is brand new and nobody can yet access its
* memcg_data, no synchronization is required and memcg_data can
* be simply assigned.
*/
page->memcg_data = memcg_data;
} else if (cmpxchg(&page->memcg_data, 0, memcg_data)) {
slab->memcg_data = memcg_data;
} else if (cmpxchg(&slab->memcg_data, 0, memcg_data)) {
/*
* If the slab page is already in use, somebody can allocate
* and assign obj_cgroups in parallel. In this case the existing
* If the slab is already in use, somebody can allocate and
* assign obj_cgroups in parallel. In this case the existing
* objcg vector should be reused.
*/
kfree(vec);
......@@ -2865,38 +2865,43 @@ int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
*/
struct mem_cgroup *mem_cgroup_from_obj(void *p)
{
struct page *page;
struct folio *folio;
if (mem_cgroup_disabled())
return NULL;
page = virt_to_head_page(p);
folio = virt_to_folio(p);
/*
* Slab objects are accounted individually, not per-page.
* Memcg membership data for each individual object is saved in
* the page->obj_cgroups.
* slab->memcg_data.
*/
if (page_objcgs_check(page)) {
struct obj_cgroup *objcg;
if (folio_test_slab(folio)) {
struct obj_cgroup **objcgs;
struct slab *slab;
unsigned int off;
off = obj_to_index(page->slab_cache, page, p);
objcg = page_objcgs(page)[off];
if (objcg)
return obj_cgroup_memcg(objcg);
slab = folio_slab(folio);
objcgs = slab_objcgs(slab);
if (!objcgs)
return NULL;
off = obj_to_index(slab->slab_cache, slab, p);
if (objcgs[off])
return obj_cgroup_memcg(objcgs[off]);
return NULL;
}
/*
* page_memcg_check() is used here, because page_has_obj_cgroups()
* check above could fail because the object cgroups vector wasn't set
* at that moment, but it can be set concurrently.
* page_memcg_check() is used here, because in theory we can encounter
* a folio where the slab flag has been cleared already, but
* slab->memcg_data has not been freed yet
* page_memcg_check(page) will guarantee that a proper memory
* cgroup pointer or NULL will be returned.
*/
return page_memcg_check(page);
return page_memcg_check(folio_page(folio, 0));
}
__always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)
......
......@@ -218,7 +218,7 @@ static void cache_reap(struct work_struct *unused);
static inline void fixup_objfreelist_debug(struct kmem_cache *cachep,
void **list);
static inline void fixup_slab_list(struct kmem_cache *cachep,
struct kmem_cache_node *n, struct page *page,
struct kmem_cache_node *n, struct slab *slab,
void **list);
static int slab_early_init = 1;
......@@ -372,10 +372,10 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp)
static int slab_max_order = SLAB_MAX_ORDER_LO;
static bool slab_max_order_set __initdata;
static inline void *index_to_obj(struct kmem_cache *cache, struct page *page,
unsigned int idx)
static inline void *index_to_obj(struct kmem_cache *cache,
const struct slab *slab, unsigned int idx)
{
return page->s_mem + cache->size * idx;
return slab->s_mem + cache->size * idx;
}
#define BOOT_CPUCACHE_ENTRIES 1
......@@ -550,17 +550,17 @@ static struct array_cache *alloc_arraycache(int node, int entries,
}
static noinline void cache_free_pfmemalloc(struct kmem_cache *cachep,
struct page *page, void *objp)
struct slab *slab, void *objp)
{
struct kmem_cache_node *n;
int page_node;
int slab_node;
LIST_HEAD(list);
page_node = page_to_nid(page);
n = get_node(cachep, page_node);
slab_node = slab_nid(slab);
n = get_node(cachep, slab_node);
spin_lock(&n->list_lock);
free_block(cachep, &objp, 1, page_node, &list);
free_block(cachep, &objp, 1, slab_node, &list);
spin_unlock(&n->list_lock);
slabs_destroy(cachep, &list);
......@@ -761,7 +761,7 @@ static void drain_alien_cache(struct kmem_cache *cachep,
}
static int __cache_free_alien(struct kmem_cache *cachep, void *objp,
int node, int page_node)
int node, int slab_node)
{
struct kmem_cache_node *n;
struct alien_cache *alien = NULL;
......@@ -770,21 +770,21 @@ static int __cache_free_alien(struct kmem_cache *cachep, void *objp,
n = get_node(cachep, node);
STATS_INC_NODEFREES(cachep);
if (n->alien && n->alien[page_node]) {
alien = n->alien[page_node];
if (n->alien && n->alien[slab_node]) {
alien = n->alien[slab_node];
ac = &alien->ac;
spin_lock(&alien->lock);
if (unlikely(ac->avail == ac->limit)) {
STATS_INC_ACOVERFLOW(cachep);
__drain_alien_cache(cachep, ac, page_node, &list);
__drain_alien_cache(cachep, ac, slab_node, &list);
}
__free_one(ac, objp);
spin_unlock(&alien->lock);
slabs_destroy(cachep, &list);
} else {
n = get_node(cachep, page_node);
n = get_node(cachep, slab_node);
spin_lock(&n->list_lock);
free_block(cachep, &objp, 1, page_node, &list);
free_block(cachep, &objp, 1, slab_node, &list);
spin_unlock(&n->list_lock);
slabs_destroy(cachep, &list);
}
......@@ -793,16 +793,16 @@ static int __cache_free_alien(struct kmem_cache *cachep, void *objp,
static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
{
int page_node = page_to_nid(virt_to_page(objp));
int slab_node = slab_nid(virt_to_slab(objp));
int node = numa_mem_id();
/*
* Make sure we are not freeing a object from another node to the array
* cache on this cpu.
*/
if (likely(node == page_node))
if (likely(node == slab_node))
return 0;
return __cache_free_alien(cachep, objp, node, page_node);
return __cache_free_alien(cachep, objp, node, slab_node);
}
/*
......@@ -1367,57 +1367,60 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
* did not request dmaable memory, we might get it, but that
* would be relatively rare and ignorable.
*/
static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
static struct slab *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
int nodeid)
{
struct page *page;
struct folio *folio;
struct slab *slab;
flags |= cachep->allocflags;
page = __alloc_pages_node(nodeid, flags, cachep->gfporder);
if (!page) {
folio = (struct folio *) __alloc_pages_node(nodeid, flags, cachep->gfporder);
if (!folio) {
slab_out_of_memory(cachep, flags, nodeid);
return NULL;
}
account_slab_page(page, cachep->gfporder, cachep, flags);
__SetPageSlab(page);
slab = folio_slab(folio);
account_slab(slab, cachep->gfporder, cachep, flags);
__folio_set_slab(folio);
/* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */
if (sk_memalloc_socks() && page_is_pfmemalloc(page))
SetPageSlabPfmemalloc(page);
if (sk_memalloc_socks() && page_is_pfmemalloc(folio_page(folio, 0)))
slab_set_pfmemalloc(slab);
return page;
return slab;
}
/*
* Interface to system's page release.
*/
static void kmem_freepages(struct kmem_cache *cachep, struct page *page)
static void kmem_freepages(struct kmem_cache *cachep, struct slab *slab)
{
int order = cachep->gfporder;
struct folio *folio = slab_folio(slab);
BUG_ON(!PageSlab(page));
__ClearPageSlabPfmemalloc(page);
__ClearPageSlab(page);
page_mapcount_reset(page);
/* In union with page->mapping where page allocator expects NULL */
page->slab_cache = NULL;
BUG_ON(!folio_test_slab(folio));
__slab_clear_pfmemalloc(slab);
__folio_clear_slab(folio);
page_mapcount_reset(folio_page(folio, 0));
folio->mapping = NULL;
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += 1 << order;
unaccount_slab_page(page, order, cachep);
__free_pages(page, order);
unaccount_slab(slab, order, cachep);
__free_pages(folio_page(folio, 0), order);
}
static void kmem_rcu_free(struct rcu_head *head)
{
struct kmem_cache *cachep;
struct page *page;
struct slab *slab;
page = container_of(head, struct page, rcu_head);
cachep = page->slab_cache;
slab = container_of(head, struct slab, rcu_head);
cachep = slab->slab_cache;
kmem_freepages(cachep, page);
kmem_freepages(cachep, slab);
}
#if DEBUG
......@@ -1553,18 +1556,18 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
/* Print some data about the neighboring objects, if they
* exist:
*/
struct page *page = virt_to_head_page(objp);
struct slab *slab = virt_to_slab(objp);
unsigned int objnr;
objnr = obj_to_index(cachep, page, objp);
objnr = obj_to_index(cachep, slab, objp);
if (objnr) {
objp = index_to_obj(cachep, page, objnr - 1);
objp = index_to_obj(cachep, slab, objnr - 1);
realobj = (char *)objp + obj_offset(cachep);
pr_err("Prev obj: start=%px, len=%d\n", realobj, size);
print_objinfo(cachep, objp, 2);
}
if (objnr + 1 < cachep->num) {
objp = index_to_obj(cachep, page, objnr + 1);
objp = index_to_obj(cachep, slab, objnr + 1);
realobj = (char *)objp + obj_offset(cachep);
pr_err("Next obj: start=%px, len=%d\n", realobj, size);
print_objinfo(cachep, objp, 2);
......@@ -1575,17 +1578,17 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
#if DEBUG
static void slab_destroy_debugcheck(struct kmem_cache *cachep,
struct page *page)
struct slab *slab)
{
int i;
if (OBJFREELIST_SLAB(cachep) && cachep->flags & SLAB_POISON) {
poison_obj(cachep, page->freelist - obj_offset(cachep),
poison_obj(cachep, slab->freelist - obj_offset(cachep),
POISON_FREE);
}
for (i = 0; i < cachep->num; i++) {
void *objp = index_to_obj(cachep, page, i);
void *objp = index_to_obj(cachep, slab, i);
if (cachep->flags & SLAB_POISON) {
check_poison_obj(cachep, objp);
......@@ -1601,7 +1604,7 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep,
}
#else
static void slab_destroy_debugcheck(struct kmem_cache *cachep,
struct page *page)
struct slab *slab)
{
}
#endif
......@@ -1609,22 +1612,22 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep,
/**
* slab_destroy - destroy and release all objects in a slab
* @cachep: cache pointer being destroyed
* @page: page pointer being destroyed
* @slab: slab being destroyed
*
* Destroy all the objs in a slab page, and release the mem back to the system.
* Before calling the slab page must have been unlinked from the cache. The
* Destroy all the objs in a slab, and release the mem back to the system.
* Before calling the slab must have been unlinked from the cache. The
* kmem_cache_node ->list_lock is not held/needed.
*/
static void slab_destroy(struct kmem_cache *cachep, struct page *page)
static void slab_destroy(struct kmem_cache *cachep, struct slab *slab)
{
void *freelist;
freelist = page->freelist;
slab_destroy_debugcheck(cachep, page);
freelist = slab->freelist;
slab_destroy_debugcheck(cachep, slab);
if (unlikely(cachep->flags & SLAB_TYPESAFE_BY_RCU))
call_rcu(&page->rcu_head, kmem_rcu_free);
call_rcu(&slab->rcu_head, kmem_rcu_free);
else
kmem_freepages(cachep, page);
kmem_freepages(cachep, slab);
/*
* From now on, we don't use freelist
......@@ -1640,11 +1643,11 @@ static void slab_destroy(struct kmem_cache *cachep, struct page *page)
*/
static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list)
{
struct page *page, *n;
struct slab *slab, *n;
list_for_each_entry_safe(page, n, list, slab_list) {
list_del(&page->slab_list);
slab_destroy(cachep, page);
list_for_each_entry_safe(slab, n, list, slab_list) {
list_del(&slab->slab_list);
slab_destroy(cachep, slab);
}
}
......@@ -2194,7 +2197,7 @@ static int drain_freelist(struct kmem_cache *cache,
{
struct list_head *p;
int nr_freed;
struct page *page;
struct slab *slab;
nr_freed = 0;
while (nr_freed < tofree && !list_empty(&n->slabs_free)) {
......@@ -2206,8 +2209,8 @@ static int drain_freelist(struct kmem_cache *cache,
goto out;
}
page = list_entry(p, struct page, slab_list);
list_del(&page->slab_list);
slab = list_entry(p, struct slab, slab_list);
list_del(&slab->slab_list);
n->free_slabs--;
n->total_slabs--;
/*
......@@ -2216,7 +2219,7 @@ static int drain_freelist(struct kmem_cache *cache,
*/
n->free_objects -= cache->num;
spin_unlock_irq(&n->list_lock);
slab_destroy(cache, page);
slab_destroy(cache, slab);
nr_freed++;
}
out:
......@@ -2291,14 +2294,14 @@ void __kmem_cache_release(struct kmem_cache *cachep)
* which are all initialized during kmem_cache_init().
*/
static void *alloc_slabmgmt(struct kmem_cache *cachep,
struct page *page, int colour_off,
struct slab *slab, int colour_off,
gfp_t local_flags, int nodeid)
{
void *freelist;
void *addr = page_address(page);
void *addr = slab_address(slab);
page->s_mem = addr + colour_off;
page->active = 0;
slab->s_mem = addr + colour_off;
slab->active = 0;
if (OBJFREELIST_SLAB(cachep))
freelist = NULL;
......@@ -2315,24 +2318,24 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep,
return freelist;
}
static inline freelist_idx_t get_free_obj(struct page *page, unsigned int idx)
static inline freelist_idx_t get_free_obj(struct slab *slab, unsigned int idx)
{
return ((freelist_idx_t *)page->freelist)[idx];
return ((freelist_idx_t *) slab->freelist)[idx];
}
static inline void set_free_obj(struct page *page,
static inline void set_free_obj(struct slab *slab,
unsigned int idx, freelist_idx_t val)
{
((freelist_idx_t *)(page->freelist))[idx] = val;
((freelist_idx_t *)(slab->freelist))[idx] = val;
}
static void cache_init_objs_debug(struct kmem_cache *cachep, struct page *page)
static void cache_init_objs_debug(struct kmem_cache *cachep, struct slab *slab)
{
#if DEBUG
int i;
for (i = 0; i < cachep->num; i++) {
void *objp = index_to_obj(cachep, page, i);
void *objp = index_to_obj(cachep, slab, i);
if (cachep->flags & SLAB_STORE_USER)
*dbg_userword(cachep, objp) = NULL;
......@@ -2416,17 +2419,17 @@ static freelist_idx_t next_random_slot(union freelist_init_state *state)
}
/* Swap two freelist entries */
static void swap_free_obj(struct page *page, unsigned int a, unsigned int b)
static void swap_free_obj(struct slab *slab, unsigned int a, unsigned int b)
{
swap(((freelist_idx_t *)page->freelist)[a],
((freelist_idx_t *)page->freelist)[b]);
swap(((freelist_idx_t *) slab->freelist)[a],
((freelist_idx_t *) slab->freelist)[b]);
}
/*
* Shuffle the freelist initialization state based on pre-computed lists.
* return true if the list was successfully shuffled, false otherwise.
*/
static bool shuffle_freelist(struct kmem_cache *cachep, struct page *page)
static bool shuffle_freelist(struct kmem_cache *cachep, struct slab *slab)
{
unsigned int objfreelist = 0, i, rand, count = cachep->num;
union freelist_init_state state;
......@@ -2443,7 +2446,7 @@ static bool shuffle_freelist(struct kmem_cache *cachep, struct page *page)
objfreelist = count - 1;
else
objfreelist = next_random_slot(&state);
page->freelist = index_to_obj(cachep, page, objfreelist) +
slab->freelist = index_to_obj(cachep, slab, objfreelist) +
obj_offset(cachep);
count--;
}
......@@ -2454,51 +2457,51 @@ static bool shuffle_freelist(struct kmem_cache *cachep, struct page *page)
*/
if (!precomputed) {
for (i = 0; i < count; i++)
set_free_obj(page, i, i);
set_free_obj(slab, i, i);
/* Fisher-Yates shuffle */
for (i = count - 1; i > 0; i--) {
rand = prandom_u32_state(&state.rnd_state);
rand %= (i + 1);
swap_free_obj(page, i, rand);
swap_free_obj(slab, i, rand);
}
} else {
for (i = 0; i < count; i++)
set_free_obj(page, i, next_random_slot(&state));
set_free_obj(slab, i, next_random_slot(&state));
}
if (OBJFREELIST_SLAB(cachep))
set_free_obj(page, cachep->num - 1, objfreelist);
set_free_obj(slab, cachep->num - 1, objfreelist);
return true;
}
#else
static inline bool shuffle_freelist(struct kmem_cache *cachep,
struct page *page)
struct slab *slab)
{
return false;
}
#endif /* CONFIG_SLAB_FREELIST_RANDOM */
static void cache_init_objs(struct kmem_cache *cachep,
struct page *page)
struct slab *slab)
{
int i;
void *objp;
bool shuffled;
cache_init_objs_debug(cachep, page);
cache_init_objs_debug(cachep, slab);
/* Try to randomize the freelist if enabled */
shuffled = shuffle_freelist(cachep, page);
shuffled = shuffle_freelist(cachep, slab);
if (!shuffled && OBJFREELIST_SLAB(cachep)) {
page->freelist = index_to_obj(cachep, page, cachep->num - 1) +
slab->freelist = index_to_obj(cachep, slab, cachep->num - 1) +
obj_offset(cachep);
}
for (i = 0; i < cachep->num; i++) {
objp = index_to_obj(cachep, page, i);
objp = index_to_obj(cachep, slab, i);
objp = kasan_init_slab_obj(cachep, objp);
/* constructor could break poison info */
......@@ -2509,68 +2512,56 @@ static void cache_init_objs(struct kmem_cache *cachep,
}
if (!shuffled)
set_free_obj(page, i, i);
set_free_obj(slab, i, i);
}
}
static void *slab_get_obj(struct kmem_cache *cachep, struct page *page)
static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slab)
{
void *objp;
objp = index_to_obj(cachep, page, get_free_obj(page, page->active));
page->active++;
objp = index_to_obj(cachep, slab, get_free_obj(slab, slab->active));
slab->active++;
return objp;
}
static void slab_put_obj(struct kmem_cache *cachep,
struct page *page, void *objp)
struct slab *slab, void *objp)
{
unsigned int objnr = obj_to_index(cachep, page, objp);
unsigned int objnr = obj_to_index(cachep, slab, objp);
#if DEBUG
unsigned int i;
/* Verify double free bug */
for (i = page->active; i < cachep->num; i++) {
if (get_free_obj(page, i) == objnr) {
for (i = slab->active; i < cachep->num; i++) {
if (get_free_obj(slab, i) == objnr) {
pr_err("slab: double free detected in cache '%s', objp %px\n",
cachep->name, objp);
BUG();
}
}
#endif
page->active--;
if (!page->freelist)
page->freelist = objp + obj_offset(cachep);
set_free_obj(page, page->active, objnr);
}
slab->active--;
if (!slab->freelist)
slab->freelist = objp + obj_offset(cachep);
/*
* Map pages beginning at addr to the given cache and slab. This is required
* for the slab allocator to be able to lookup the cache and slab of a
* virtual address for kfree, ksize, and slab debugging.
*/
static void slab_map_pages(struct kmem_cache *cache, struct page *page,
void *freelist)
{
page->slab_cache = cache;
page->freelist = freelist;
set_free_obj(slab, slab->active, objnr);
}
/*
* Grow (by 1) the number of slabs within a cache. This is called by
* kmem_cache_alloc() when there are no active objs left in a cache.
*/
static struct page *cache_grow_begin(struct kmem_cache *cachep,
static struct slab *cache_grow_begin(struct kmem_cache *cachep,
gfp_t flags, int nodeid)
{
void *freelist;
size_t offset;
gfp_t local_flags;
int page_node;
int slab_node;
struct kmem_cache_node *n;
struct page *page;
struct slab *slab;
/*
* Be lazy and only check for valid flags here, keeping it out of the
......@@ -2590,12 +2581,12 @@ static struct page *cache_grow_begin(struct kmem_cache *cachep,
* Get mem for the objs. Attempt to allocate a physical page from
* 'nodeid'.
*/
page = kmem_getpages(cachep, local_flags, nodeid);
if (!page)
slab = kmem_getpages(cachep, local_flags, nodeid);
if (!slab)
goto failed;
page_node = page_to_nid(page);
n = get_node(cachep, page_node);
slab_node = slab_nid(slab);
n = get_node(cachep, slab_node);
/* Get colour for the slab, and cal the next value. */
n->colour_next++;
......@@ -2613,54 +2604,55 @@ static struct page *cache_grow_begin(struct kmem_cache *cachep,
* page_address() in the latter returns a non-tagged pointer,
* as it should be for slab pages.
*/
kasan_poison_slab(page);
kasan_poison_slab(slab);
/* Get slab management. */
freelist = alloc_slabmgmt(cachep, page, offset,
local_flags & ~GFP_CONSTRAINT_MASK, page_node);
freelist = alloc_slabmgmt(cachep, slab, offset,
local_flags & ~GFP_CONSTRAINT_MASK, slab_node);
if (OFF_SLAB(cachep) && !freelist)
goto opps1;
slab_map_pages(cachep, page, freelist);
slab->slab_cache = cachep;
slab->freelist = freelist;
cache_init_objs(cachep, page);
cache_init_objs(cachep, slab);
if (gfpflags_allow_blocking(local_flags))
local_irq_disable();
return page;
return slab;
opps1:
kmem_freepages(cachep, page);
kmem_freepages(cachep, slab);
failed:
if (gfpflags_allow_blocking(local_flags))
local_irq_disable();
return NULL;
}
static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
static void cache_grow_end(struct kmem_cache *cachep, struct slab *slab)
{
struct kmem_cache_node *n;
void *list = NULL;
check_irq_off();
if (!page)
if (!slab)
return;
INIT_LIST_HEAD(&page->slab_list);
n = get_node(cachep, page_to_nid(page));
INIT_LIST_HEAD(&slab->slab_list);
n = get_node(cachep, slab_nid(slab));
spin_lock(&n->list_lock);
n->total_slabs++;
if (!page->active) {
list_add_tail(&page->slab_list, &n->slabs_free);
if (!slab->active) {
list_add_tail(&slab->slab_list, &n->slabs_free);
n->free_slabs++;
} else
fixup_slab_list(cachep, n, page, &list);
fixup_slab_list(cachep, n, slab, &list);
STATS_INC_GROWN(cachep);
n->free_objects += cachep->num - page->active;
n->free_objects += cachep->num - slab->active;
spin_unlock(&n->list_lock);
fixup_objfreelist_debug(cachep, &list);
......@@ -2708,13 +2700,13 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
unsigned long caller)
{
unsigned int objnr;
struct page *page;
struct slab *slab;
BUG_ON(virt_to_cache(objp) != cachep);
objp -= obj_offset(cachep);
kfree_debugcheck(objp);
page = virt_to_head_page(objp);
slab = virt_to_slab(objp);
if (cachep->flags & SLAB_RED_ZONE) {
verify_redzone_free(cachep, objp);
......@@ -2724,10 +2716,10 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
if (cachep->flags & SLAB_STORE_USER)
*dbg_userword(cachep, objp) = (void *)caller;
objnr = obj_to_index(cachep, page, objp);
objnr = obj_to_index(cachep, slab, objp);
BUG_ON(objnr >= cachep->num);
BUG_ON(objp != index_to_obj(cachep, page, objnr));
BUG_ON(objp != index_to_obj(cachep, slab, objnr));
if (cachep->flags & SLAB_POISON) {
poison_obj(cachep, objp, POISON_FREE);
......@@ -2757,97 +2749,97 @@ static inline void fixup_objfreelist_debug(struct kmem_cache *cachep,
}
static inline void fixup_slab_list(struct kmem_cache *cachep,
struct kmem_cache_node *n, struct page *page,
struct kmem_cache_node *n, struct slab *slab,
void **list)
{
/* move slabp to correct slabp list: */
list_del(&page->slab_list);
if (page->active == cachep->num) {
list_add(&page->slab_list, &n->slabs_full);
list_del(&slab->slab_list);
if (slab->active == cachep->num) {
list_add(&slab->slab_list, &n->slabs_full);
if (OBJFREELIST_SLAB(cachep)) {
#if DEBUG
/* Poisoning will be done without holding the lock */
if (cachep->flags & SLAB_POISON) {
void **objp = page->freelist;
void **objp = slab->freelist;
*objp = *list;
*list = objp;
}
#endif
page->freelist = NULL;
slab->freelist = NULL;
}
} else
list_add(&page->slab_list, &n->slabs_partial);
list_add(&slab->slab_list, &n->slabs_partial);
}
/* Try to find non-pfmemalloc slab if needed */
static noinline struct page *get_valid_first_slab(struct kmem_cache_node *n,
struct page *page, bool pfmemalloc)
static noinline struct slab *get_valid_first_slab(struct kmem_cache_node *n,
struct slab *slab, bool pfmemalloc)
{
if (!page)
if (!slab)
return NULL;
if (pfmemalloc)
return page;
return slab;
if (!PageSlabPfmemalloc(page))
return page;
if (!slab_test_pfmemalloc(slab))
return slab;
/* No need to keep pfmemalloc slab if we have enough free objects */
if (n->free_objects > n->free_limit) {
ClearPageSlabPfmemalloc(page);
return page;
slab_clear_pfmemalloc(slab);
return slab;
}
/* Move pfmemalloc slab to the end of list to speed up next search */
list_del(&page->slab_list);
if (!page->active) {
list_add_tail(&page->slab_list, &n->slabs_free);
list_del(&slab->slab_list);
if (!slab->active) {
list_add_tail(&slab->slab_list, &n->slabs_free);
n->free_slabs++;
} else
list_add_tail(&page->slab_list, &n->slabs_partial);
list_add_tail(&slab->slab_list, &n->slabs_partial);
list_for_each_entry(page, &n->slabs_partial, slab_list) {
if (!PageSlabPfmemalloc(page))
return page;
list_for_each_entry(slab, &n->slabs_partial, slab_list) {
if (!slab_test_pfmemalloc(slab))
return slab;
}
n->free_touched = 1;
list_for_each_entry(page, &n->slabs_free, slab_list) {
if (!PageSlabPfmemalloc(page)) {
list_for_each_entry(slab, &n->slabs_free, slab_list) {
if (!slab_test_pfmemalloc(slab)) {
n->free_slabs--;
return page;
return slab;
}
}
return NULL;
}
static struct page *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc)
static struct slab *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc)
{
struct page *page;
struct slab *slab;
assert_spin_locked(&n->list_lock);
page = list_first_entry_or_null(&n->slabs_partial, struct page,
slab = list_first_entry_or_null(&n->slabs_partial, struct slab,
slab_list);
if (!page) {
if (!slab) {
n->free_touched = 1;
page = list_first_entry_or_null(&n->slabs_free, struct page,
slab = list_first_entry_or_null(&n->slabs_free, struct slab,
slab_list);
if (page)
if (slab)
n->free_slabs--;
}
if (sk_memalloc_socks())
page = get_valid_first_slab(n, page, pfmemalloc);
slab = get_valid_first_slab(n, slab, pfmemalloc);
return page;
return slab;
}
static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep,
struct kmem_cache_node *n, gfp_t flags)
{
struct page *page;
struct slab *slab;
void *obj;
void *list = NULL;
......@@ -2855,16 +2847,16 @@ static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep,
return NULL;
spin_lock(&n->list_lock);
page = get_first_slab(n, true);
if (!page) {
slab = get_first_slab(n, true);
if (!slab) {
spin_unlock(&n->list_lock);
return NULL;
}
obj = slab_get_obj(cachep, page);
obj = slab_get_obj(cachep, slab);
n->free_objects--;
fixup_slab_list(cachep, n, page, &list);
fixup_slab_list(cachep, n, slab, &list);
spin_unlock(&n->list_lock);
fixup_objfreelist_debug(cachep, &list);
......@@ -2877,20 +2869,20 @@ static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep,
* or cache_grow_end() for new slab
*/
static __always_inline int alloc_block(struct kmem_cache *cachep,
struct array_cache *ac, struct page *page, int batchcount)
struct array_cache *ac, struct slab *slab, int batchcount)
{
/*
* There must be at least one object available for
* allocation.
*/
BUG_ON(page->active >= cachep->num);
BUG_ON(slab->active >= cachep->num);
while (page->active < cachep->num && batchcount--) {
while (slab->active < cachep->num && batchcount--) {
STATS_INC_ALLOCED(cachep);
STATS_INC_ACTIVE(cachep);
STATS_SET_HIGH(cachep);
ac->entry[ac->avail++] = slab_get_obj(cachep, page);
ac->entry[ac->avail++] = slab_get_obj(cachep, slab);
}
return batchcount;
......@@ -2903,7 +2895,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
struct array_cache *ac, *shared;
int node;
void *list = NULL;
struct page *page;
struct slab *slab;
check_irq_off();
node = numa_mem_id();
......@@ -2936,14 +2928,14 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
while (batchcount > 0) {
/* Get slab alloc is to come from. */
page = get_first_slab(n, false);
if (!page)
slab = get_first_slab(n, false);
if (!slab)
goto must_grow;
check_spinlock_acquired(cachep);
batchcount = alloc_block(cachep, ac, page, batchcount);
fixup_slab_list(cachep, n, page, &list);
batchcount = alloc_block(cachep, ac, slab, batchcount);
fixup_slab_list(cachep, n, slab, &list);
}
must_grow:
......@@ -2962,16 +2954,16 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
return obj;
}
page = cache_grow_begin(cachep, gfp_exact_node(flags), node);
slab = cache_grow_begin(cachep, gfp_exact_node(flags), node);
/*
* cache_grow_begin() can reenable interrupts,
* then ac could change.
*/
ac = cpu_cache_get(cachep);
if (!ac->avail && page)
alloc_block(cachep, ac, page, batchcount);
cache_grow_end(cachep, page);
if (!ac->avail && slab)
alloc_block(cachep, ac, slab, batchcount);
cache_grow_end(cachep, slab);
if (!ac->avail)
return NULL;
......@@ -3101,7 +3093,7 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
struct zone *zone;
enum zone_type highest_zoneidx = gfp_zone(flags);
void *obj = NULL;
struct page *page;
struct slab *slab;
int nid;
unsigned int cpuset_mems_cookie;
......@@ -3137,10 +3129,10 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
* We may trigger various forms of reclaim on the allowed
* set and go into memory reserves if necessary.
*/
page = cache_grow_begin(cache, flags, numa_mem_id());
cache_grow_end(cache, page);
if (page) {
nid = page_to_nid(page);
slab = cache_grow_begin(cache, flags, numa_mem_id());
cache_grow_end(cache, slab);
if (slab) {
nid = slab_nid(slab);
obj = ____cache_alloc_node(cache,
gfp_exact_node(flags), nid);
......@@ -3164,7 +3156,7 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
int nodeid)
{
struct page *page;
struct slab *slab;
struct kmem_cache_node *n;
void *obj = NULL;
void *list = NULL;
......@@ -3175,8 +3167,8 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
check_irq_off();
spin_lock(&n->list_lock);
page = get_first_slab(n, false);
if (!page)
slab = get_first_slab(n, false);
if (!slab)
goto must_grow;
check_spinlock_acquired_node(cachep, nodeid);
......@@ -3185,12 +3177,12 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
STATS_INC_ACTIVE(cachep);
STATS_SET_HIGH(cachep);
BUG_ON(page->active == cachep->num);
BUG_ON(slab->active == cachep->num);
obj = slab_get_obj(cachep, page);
obj = slab_get_obj(cachep, slab);
n->free_objects--;
fixup_slab_list(cachep, n, page, &list);
fixup_slab_list(cachep, n, slab, &list);
spin_unlock(&n->list_lock);
fixup_objfreelist_debug(cachep, &list);
......@@ -3198,12 +3190,12 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
must_grow:
spin_unlock(&n->list_lock);
page = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid);
if (page) {
slab = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid);
if (slab) {
/* This slab isn't counted yet so don't update free_objects */
obj = slab_get_obj(cachep, page);
obj = slab_get_obj(cachep, slab);
}
cache_grow_end(cachep, page);
cache_grow_end(cachep, slab);
return obj ? obj : fallback_alloc(cachep, flags);
}
......@@ -3333,40 +3325,40 @@ static void free_block(struct kmem_cache *cachep, void **objpp,
{
int i;
struct kmem_cache_node *n = get_node(cachep, node);
struct page *page;
struct slab *slab;
n->free_objects += nr_objects;
for (i = 0; i < nr_objects; i++) {
void *objp;
struct page *page;
struct slab *slab;
objp = objpp[i];
page = virt_to_head_page(objp);
list_del(&page->slab_list);
slab = virt_to_slab(objp);
list_del(&slab->slab_list);
check_spinlock_acquired_node(cachep, node);
slab_put_obj(cachep, page, objp);
slab_put_obj(cachep, slab, objp);
STATS_DEC_ACTIVE(cachep);
/* fixup slab chains */
if (page->active == 0) {
list_add(&page->slab_list, &n->slabs_free);
if (slab->active == 0) {
list_add(&slab->slab_list, &n->slabs_free);
n->free_slabs++;
} else {
/* Unconditionally move a slab to the end of the
* partial list on free - maximum time for the
* other objects to be freed, too.
*/
list_add_tail(&page->slab_list, &n->slabs_partial);
list_add_tail(&slab->slab_list, &n->slabs_partial);
}
}
while (n->free_objects > n->free_limit && !list_empty(&n->slabs_free)) {
n->free_objects -= cachep->num;
page = list_last_entry(&n->slabs_free, struct page, slab_list);
list_move(&page->slab_list, list);
slab = list_last_entry(&n->slabs_free, struct slab, slab_list);
list_move(&slab->slab_list, list);
n->free_slabs--;
n->total_slabs--;
}
......@@ -3402,10 +3394,10 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
#if STATS
{
int i = 0;
struct page *page;
struct slab *slab;
list_for_each_entry(page, &n->slabs_free, slab_list) {
BUG_ON(page->active);
list_for_each_entry(slab, &n->slabs_free, slab_list) {
BUG_ON(slab->active);
i++;
}
......@@ -3481,10 +3473,10 @@ void ___cache_free(struct kmem_cache *cachep, void *objp,
}
if (sk_memalloc_socks()) {
struct page *page = virt_to_head_page(objp);
struct slab *slab = virt_to_slab(objp);
if (unlikely(PageSlabPfmemalloc(page))) {
cache_free_pfmemalloc(cachep, page, objp);
if (unlikely(slab_test_pfmemalloc(slab))) {
cache_free_pfmemalloc(cachep, slab, objp);
return;
}
}
......@@ -3657,21 +3649,21 @@ EXPORT_SYMBOL(__kmalloc_node_track_caller);
#endif /* CONFIG_NUMA */
#ifdef CONFIG_PRINTK
void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
{
struct kmem_cache *cachep;
unsigned int objnr;
void *objp;
kpp->kp_ptr = object;
kpp->kp_page = page;
cachep = page->slab_cache;
kpp->kp_slab = slab;
cachep = slab->slab_cache;
kpp->kp_slab_cache = cachep;
objp = object - obj_offset(cachep);
kpp->kp_data_offset = obj_offset(cachep);
page = virt_to_head_page(objp);
objnr = obj_to_index(cachep, page, objp);
objp = index_to_obj(cachep, page, objnr);
slab = virt_to_slab(objp);
objnr = obj_to_index(cachep, slab, objp);
objp = index_to_obj(cachep, slab, objnr);
kpp->kp_objp = objp;
if (DEBUG && cachep->flags & SLAB_STORE_USER)
kpp->kp_ret = *dbg_userword(cachep, objp);
......@@ -4177,8 +4169,8 @@ ssize_t slabinfo_write(struct file *file, const char __user *buffer,
* Returns NULL if check passes, otherwise const char * to name of cache
* to indicate an error.
*/
void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
bool to_user)
void __check_heap_object(const void *ptr, unsigned long n,
const struct slab *slab, bool to_user)
{
struct kmem_cache *cachep;
unsigned int objnr;
......@@ -4187,15 +4179,15 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
ptr = kasan_reset_tag(ptr);
/* Find and validate object. */
cachep = page->slab_cache;
objnr = obj_to_index(cachep, page, (void *)ptr);
cachep = slab->slab_cache;
objnr = obj_to_index(cachep, slab, (void *)ptr);
BUG_ON(objnr >= cachep->num);
/* Find offset within object. */
if (is_kfence_address(ptr))
offset = ptr - kfence_object_start(ptr);
else
offset = ptr - index_to_obj(cachep, page, objnr) - obj_offset(cachep);
offset = ptr - index_to_obj(cachep, slab, objnr) - obj_offset(cachep);
/* Allow address range falling entirely within usercopy region. */
if (offset >= cachep->useroffset &&
......
......@@ -5,6 +5,197 @@
* Internal slab definitions
*/
/* Reuses the bits in struct page */
struct slab {
unsigned long __page_flags;
#if defined(CONFIG_SLAB)
union {
struct list_head slab_list;
struct rcu_head rcu_head;
};
struct kmem_cache *slab_cache;
void *freelist; /* array of free object indexes */
void *s_mem; /* first object */
unsigned int active;
#elif defined(CONFIG_SLUB)
union {
struct list_head slab_list;
struct rcu_head rcu_head;
#ifdef CONFIG_SLUB_CPU_PARTIAL
struct {
struct slab *next;
int slabs; /* Nr of slabs left */
};
#endif
};
struct kmem_cache *slab_cache;
/* Double-word boundary */
void *freelist; /* first free object */
union {
unsigned long counters;
struct {
unsigned inuse:16;
unsigned objects:15;
unsigned frozen:1;
};
};
unsigned int __unused;
#elif defined(CONFIG_SLOB)
struct list_head slab_list;
void *__unused_1;
void *freelist; /* first free block */
long units;
unsigned int __unused_2;
#else
#error "Unexpected slab allocator configured"
#endif
atomic_t __page_refcount;
#ifdef CONFIG_MEMCG
unsigned long memcg_data;
#endif
};
#define SLAB_MATCH(pg, sl) \
static_assert(offsetof(struct page, pg) == offsetof(struct slab, sl))
SLAB_MATCH(flags, __page_flags);
SLAB_MATCH(compound_head, slab_list); /* Ensure bit 0 is clear */
SLAB_MATCH(slab_list, slab_list);
#ifndef CONFIG_SLOB
SLAB_MATCH(rcu_head, rcu_head);
SLAB_MATCH(slab_cache, slab_cache);
#endif
#ifdef CONFIG_SLAB
SLAB_MATCH(s_mem, s_mem);
SLAB_MATCH(active, active);
#endif
SLAB_MATCH(_refcount, __page_refcount);
#ifdef CONFIG_MEMCG
SLAB_MATCH(memcg_data, memcg_data);
#endif
#undef SLAB_MATCH
static_assert(sizeof(struct slab) <= sizeof(struct page));
/**
* folio_slab - Converts from folio to slab.
* @folio: The folio.
*
* Currently struct slab is a different representation of a folio where
* folio_test_slab() is true.
*
* Return: The slab which contains this folio.
*/
#define folio_slab(folio) (_Generic((folio), \
const struct folio *: (const struct slab *)(folio), \
struct folio *: (struct slab *)(folio)))
/**
* slab_folio - The folio allocated for a slab
* @slab: The slab.
*
* Slabs are allocated as folios that contain the individual objects and are
* using some fields in the first struct page of the folio - those fields are
* now accessed by struct slab. It is occasionally necessary to convert back to
* a folio in order to communicate with the rest of the mm. Please use this
* helper function instead of casting yourself, as the implementation may change
* in the future.
*/
#define slab_folio(s) (_Generic((s), \
const struct slab *: (const struct folio *)s, \
struct slab *: (struct folio *)s))
/**
* page_slab - Converts from first struct page to slab.
* @p: The first (either head of compound or single) page of slab.
*
* A temporary wrapper to convert struct page to struct slab in situations where
* we know the page is the compound head, or single order-0 page.
*
* Long-term ideally everything would work with struct slab directly or go
* through folio to struct slab.
*
* Return: The slab which contains this page
*/
#define page_slab(p) (_Generic((p), \
const struct page *: (const struct slab *)(p), \
struct page *: (struct slab *)(p)))
/**
* slab_page - The first struct page allocated for a slab
* @slab: The slab.
*
* A convenience wrapper for converting slab to the first struct page of the
* underlying folio, to communicate with code not yet converted to folio or
* struct slab.
*/
#define slab_page(s) folio_page(slab_folio(s), 0)
/*
* If network-based swap is enabled, sl*b must keep track of whether pages
* were allocated from pfmemalloc reserves.
*/
static inline bool slab_test_pfmemalloc(const struct slab *slab)
{
return folio_test_active((struct folio *)slab_folio(slab));
}
static inline void slab_set_pfmemalloc(struct slab *slab)
{
folio_set_active(slab_folio(slab));
}
static inline void slab_clear_pfmemalloc(struct slab *slab)
{
folio_clear_active(slab_folio(slab));
}
static inline void __slab_clear_pfmemalloc(struct slab *slab)
{
__folio_clear_active(slab_folio(slab));
}
static inline void *slab_address(const struct slab *slab)
{
return folio_address(slab_folio(slab));
}
static inline int slab_nid(const struct slab *slab)
{
return folio_nid(slab_folio(slab));
}
static inline pg_data_t *slab_pgdat(const struct slab *slab)
{
return folio_pgdat(slab_folio(slab));
}
static inline struct slab *virt_to_slab(const void *addr)
{
struct folio *folio = virt_to_folio(addr);
if (!folio_test_slab(folio))
return NULL;
return folio_slab(folio);
}
static inline int slab_order(const struct slab *slab)
{
return folio_order((struct folio *)slab_folio(slab));
}
static inline size_t slab_size(const struct slab *slab)
{
return PAGE_SIZE << slab_order(slab);
}
#ifdef CONFIG_SLOB
/*
* Common fields provided in kmem_cache by all slab allocators
......@@ -245,15 +436,33 @@ static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t fla
}
#ifdef CONFIG_MEMCG_KMEM
int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
gfp_t gfp, bool new_page);
/*
* slab_objcgs - get the object cgroups vector associated with a slab
* @slab: a pointer to the slab struct
*
* Returns a pointer to the object cgroups vector associated with the slab,
* or NULL if no such vector has been associated yet.
*/
static inline struct obj_cgroup **slab_objcgs(struct slab *slab)
{
unsigned long memcg_data = READ_ONCE(slab->memcg_data);
VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS),
slab_page(slab));
VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, slab_page(slab));
return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s,
gfp_t gfp, bool new_slab);
void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
enum node_stat_item idx, int nr);
static inline void memcg_free_page_obj_cgroups(struct page *page)
static inline void memcg_free_slab_cgroups(struct slab *slab)
{
kfree(page_objcgs(page));
page->memcg_data = 0;
kfree(slab_objcgs(slab));
slab->memcg_data = 0;
}
static inline size_t obj_full_size(struct kmem_cache *s)
......@@ -298,7 +507,7 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
gfp_t flags, size_t size,
void **p)
{
struct page *page;
struct slab *slab;
unsigned long off;
size_t i;
......@@ -307,19 +516,19 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
for (i = 0; i < size; i++) {
if (likely(p[i])) {
page = virt_to_head_page(p[i]);
slab = virt_to_slab(p[i]);
if (!page_objcgs(page) &&
memcg_alloc_page_obj_cgroups(page, s, flags,
if (!slab_objcgs(slab) &&
memcg_alloc_slab_cgroups(slab, s, flags,
false)) {
obj_cgroup_uncharge(objcg, obj_full_size(s));
continue;
}
off = obj_to_index(s, page, p[i]);
off = obj_to_index(s, slab, p[i]);
obj_cgroup_get(objcg);
page_objcgs(page)[off] = objcg;
mod_objcg_state(objcg, page_pgdat(page),
slab_objcgs(slab)[off] = objcg;
mod_objcg_state(objcg, slab_pgdat(slab),
cache_vmstat_idx(s), obj_full_size(s));
} else {
obj_cgroup_uncharge(objcg, obj_full_size(s));
......@@ -334,7 +543,7 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig,
struct kmem_cache *s;
struct obj_cgroup **objcgs;
struct obj_cgroup *objcg;
struct page *page;
struct slab *slab;
unsigned int off;
int i;
......@@ -345,43 +554,52 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig,
if (unlikely(!p[i]))
continue;
page = virt_to_head_page(p[i]);
objcgs = page_objcgs_check(page);
slab = virt_to_slab(p[i]);
/* we could be given a kmalloc_large() object, skip those */
if (!slab)
continue;
objcgs = slab_objcgs(slab);
if (!objcgs)
continue;
if (!s_orig)
s = page->slab_cache;
s = slab->slab_cache;
else
s = s_orig;
off = obj_to_index(s, page, p[i]);
off = obj_to_index(s, slab, p[i]);
objcg = objcgs[off];
if (!objcg)
continue;
objcgs[off] = NULL;
obj_cgroup_uncharge(objcg, obj_full_size(s));
mod_objcg_state(objcg, page_pgdat(page), cache_vmstat_idx(s),
mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s),
-obj_full_size(s));
obj_cgroup_put(objcg);
}
}
#else /* CONFIG_MEMCG_KMEM */
static inline struct obj_cgroup **slab_objcgs(struct slab *slab)
{
return NULL;
}
static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr)
{
return NULL;
}
static inline int memcg_alloc_page_obj_cgroups(struct page *page,
static inline int memcg_alloc_slab_cgroups(struct slab *slab,
struct kmem_cache *s, gfp_t gfp,
bool new_page)
bool new_slab)
{
return 0;
}
static inline void memcg_free_page_obj_cgroups(struct page *page)
static inline void memcg_free_slab_cgroups(struct slab *slab)
{
}
......@@ -405,35 +623,35 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s,
}
#endif /* CONFIG_MEMCG_KMEM */
#ifndef CONFIG_SLOB
static inline struct kmem_cache *virt_to_cache(const void *obj)
{
struct page *page;
struct slab *slab;
page = virt_to_head_page(obj);
if (WARN_ONCE(!PageSlab(page), "%s: Object is not a Slab page!\n",
slab = virt_to_slab(obj);
if (WARN_ONCE(!slab, "%s: Object is not a Slab page!\n",
__func__))
return NULL;
return page->slab_cache;
return slab->slab_cache;
}
static __always_inline void account_slab_page(struct page *page, int order,
struct kmem_cache *s,
gfp_t gfp)
static __always_inline void account_slab(struct slab *slab, int order,
struct kmem_cache *s, gfp_t gfp)
{
if (memcg_kmem_enabled() && (s->flags & SLAB_ACCOUNT))
memcg_alloc_page_obj_cgroups(page, s, gfp, true);
memcg_alloc_slab_cgroups(slab, s, gfp, true);
mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
PAGE_SIZE << order);
}
static __always_inline void unaccount_slab_page(struct page *page, int order,
struct kmem_cache *s)
static __always_inline void unaccount_slab(struct slab *slab, int order,
struct kmem_cache *s)
{
if (memcg_kmem_enabled())
memcg_free_page_obj_cgroups(page);
memcg_free_slab_cgroups(slab);
mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
-(PAGE_SIZE << order));
}
......@@ -452,6 +670,7 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
print_tracking(cachep, x);
return cachep;
}
#endif /* CONFIG_SLOB */
static inline size_t slab_ksize(const struct kmem_cache *s)
{
......@@ -635,7 +854,7 @@ static inline void debugfs_slab_release(struct kmem_cache *s) { }
#define KS_ADDRS_COUNT 16
struct kmem_obj_info {
void *kp_ptr;
struct page *kp_page;
struct slab *kp_slab;
void *kp_objp;
unsigned long kp_data_offset;
struct kmem_cache *kp_slab_cache;
......@@ -643,7 +862,18 @@ struct kmem_obj_info {
void *kp_stack[KS_ADDRS_COUNT];
void *kp_free_stack[KS_ADDRS_COUNT];
};
void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page);
void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab);
#endif
#ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR
void __check_heap_object(const void *ptr, unsigned long n,
const struct slab *slab, bool to_user);
#else
static inline
void __check_heap_object(const void *ptr, unsigned long n,
const struct slab *slab, bool to_user)
{
}
#endif
#endif /* MM_SLAB_H */
......@@ -550,13 +550,13 @@ bool slab_is_available(void)
*/
bool kmem_valid_obj(void *object)
{
struct page *page;
struct folio *folio;
/* Some arches consider ZERO_SIZE_PTR to be a valid address. */
if (object < (void *)PAGE_SIZE || !virt_addr_valid(object))
return false;
page = virt_to_head_page(object);
return PageSlab(page);
folio = virt_to_folio(object);
return folio_test_slab(folio);
}
EXPORT_SYMBOL_GPL(kmem_valid_obj);
......@@ -579,18 +579,18 @@ void kmem_dump_obj(void *object)
{
char *cp = IS_ENABLED(CONFIG_MMU) ? "" : "/vmalloc";
int i;
struct page *page;
struct slab *slab;
unsigned long ptroffset;
struct kmem_obj_info kp = { };
if (WARN_ON_ONCE(!virt_addr_valid(object)))
return;
page = virt_to_head_page(object);
if (WARN_ON_ONCE(!PageSlab(page))) {
slab = virt_to_slab(object);
if (WARN_ON_ONCE(!slab)) {
pr_cont(" non-slab memory.\n");
return;
}
kmem_obj_info(&kp, object, page);
kmem_obj_info(&kp, object, slab);
if (kp.kp_slab_cache)
pr_cont(" slab%s %s", cp, kp.kp_slab_cache->name);
else
......
......@@ -30,7 +30,7 @@
* If kmalloc is asked for objects of PAGE_SIZE or larger, it calls
* alloc_pages() directly, allocating compound pages so the page order
* does not have to be separately tracked.
* These objects are detected in kfree() because PageSlab()
* These objects are detected in kfree() because folio_test_slab()
* is false for them.
*
* SLAB is emulated on top of SLOB by simply calling constructors and
......@@ -105,21 +105,21 @@ static LIST_HEAD(free_slob_large);
/*
* slob_page_free: true for pages on free_slob_pages list.
*/
static inline int slob_page_free(struct page *sp)
static inline int slob_page_free(struct slab *slab)
{
return PageSlobFree(sp);
return PageSlobFree(slab_page(slab));
}
static void set_slob_page_free(struct page *sp, struct list_head *list)
static void set_slob_page_free(struct slab *slab, struct list_head *list)
{
list_add(&sp->slab_list, list);
__SetPageSlobFree(sp);
list_add(&slab->slab_list, list);
__SetPageSlobFree(slab_page(slab));
}
static inline void clear_slob_page_free(struct page *sp)
static inline void clear_slob_page_free(struct slab *slab)
{
list_del(&sp->slab_list);
__ClearPageSlobFree(sp);
list_del(&slab->slab_list);
__ClearPageSlobFree(slab_page(slab));
}
#define SLOB_UNIT sizeof(slob_t)
......@@ -234,7 +234,7 @@ static void slob_free_pages(void *b, int order)
* freelist, in this case @page_removed_from_list will be set to
* true (set to false otherwise).
*/
static void *slob_page_alloc(struct page *sp, size_t size, int align,
static void *slob_page_alloc(struct slab *sp, size_t size, int align,
int align_offset, bool *page_removed_from_list)
{
slob_t *prev, *cur, *aligned = NULL;
......@@ -301,7 +301,8 @@ static void *slob_page_alloc(struct page *sp, size_t size, int align,
static void *slob_alloc(size_t size, gfp_t gfp, int align, int node,
int align_offset)
{
struct page *sp;
struct folio *folio;
struct slab *sp;
struct list_head *slob_list;
slob_t *b = NULL;
unsigned long flags;
......@@ -323,7 +324,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node,
* If there's a node specification, search for a partial
* page with a matching node id in the freelist.
*/
if (node != NUMA_NO_NODE && page_to_nid(sp) != node)
if (node != NUMA_NO_NODE && slab_nid(sp) != node)
continue;
#endif
/* Enough room on this page? */
......@@ -358,8 +359,9 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node,
b = slob_new_pages(gfp & ~__GFP_ZERO, 0, node);
if (!b)
return NULL;
sp = virt_to_page(b);
__SetPageSlab(sp);
folio = virt_to_folio(b);
__folio_set_slab(folio);
sp = folio_slab(folio);
spin_lock_irqsave(&slob_lock, flags);
sp->units = SLOB_UNITS(PAGE_SIZE);
......@@ -381,7 +383,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node,
*/
static void slob_free(void *block, int size)
{
struct page *sp;
struct slab *sp;
slob_t *prev, *next, *b = (slob_t *)block;
slobidx_t units;
unsigned long flags;
......@@ -391,7 +393,7 @@ static void slob_free(void *block, int size)
return;
BUG_ON(!size);
sp = virt_to_page(block);
sp = virt_to_slab(block);
units = SLOB_UNITS(size);
spin_lock_irqsave(&slob_lock, flags);
......@@ -401,8 +403,7 @@ static void slob_free(void *block, int size)
if (slob_page_free(sp))
clear_slob_page_free(sp);
spin_unlock_irqrestore(&slob_lock, flags);
__ClearPageSlab(sp);
page_mapcount_reset(sp);
__folio_clear_slab(slab_folio(sp));
slob_free_pages(b, 0);
return;
}
......@@ -462,10 +463,10 @@ static void slob_free(void *block, int size)
}
#ifdef CONFIG_PRINTK
void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
{
kpp->kp_ptr = object;
kpp->kp_page = page;
kpp->kp_slab = slab;
}
#endif
......@@ -544,7 +545,7 @@ EXPORT_SYMBOL(__kmalloc_node_track_caller);
void kfree(const void *block)
{
struct page *sp;
struct folio *sp;
trace_kfree(_RET_IP_, block);
......@@ -552,16 +553,17 @@ void kfree(const void *block)
return;
kmemleak_free(block);
sp = virt_to_page(block);
if (PageSlab(sp)) {
sp = virt_to_folio(block);
if (folio_test_slab(sp)) {
int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
unsigned int *m = (unsigned int *)(block - align);
slob_free(m, *m + align);
} else {
unsigned int order = compound_order(sp);
mod_node_page_state(page_pgdat(sp), NR_SLAB_UNRECLAIMABLE_B,
unsigned int order = folio_order(sp);
mod_node_page_state(folio_pgdat(sp), NR_SLAB_UNRECLAIMABLE_B,
-(PAGE_SIZE << order));
__free_pages(sp, order);
__free_pages(folio_page(sp, 0), order);
}
}
......@@ -570,7 +572,7 @@ EXPORT_SYMBOL(kfree);
/* can't use ksize for kmem_cache_alloc memory, only kmalloc */
size_t __ksize(const void *block)
{
struct page *sp;
struct folio *folio;
int align;
unsigned int *m;
......@@ -578,9 +580,9 @@ size_t __ksize(const void *block)
if (unlikely(block == ZERO_SIZE_PTR))
return 0;
sp = virt_to_page(block);
if (unlikely(!PageSlab(sp)))
return page_size(sp);
folio = virt_to_folio(block);
if (unlikely(!folio_test_slab(folio)))
return folio_size(folio);
align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
m = (unsigned int *)(block - align);
......
......@@ -48,7 +48,7 @@
* 1. slab_mutex (Global Mutex)
* 2. node->list_lock (Spinlock)
* 3. kmem_cache->cpu_slab->lock (Local lock)
* 4. slab_lock(page) (Only on some arches or for debugging)
* 4. slab_lock(slab) (Only on some arches or for debugging)
* 5. object_map_lock (Only for debugging)
*
* slab_mutex
......@@ -64,19 +64,19 @@
*
* The slab_lock is only used for debugging and on arches that do not
* have the ability to do a cmpxchg_double. It only protects:
* A. page->freelist -> List of object free in a page
* B. page->inuse -> Number of objects in use
* C. page->objects -> Number of objects in page
* D. page->frozen -> frozen state
* A. slab->freelist -> List of free objects in a slab
* B. slab->inuse -> Number of objects in use
* C. slab->objects -> Number of objects in slab
* D. slab->frozen -> frozen state
*
* Frozen slabs
*
* If a slab is frozen then it is exempt from list management. It is not
* on any list except per cpu partial list. The processor that froze the
* slab is the one who can perform list operations on the page. Other
* slab is the one who can perform list operations on the slab. Other
* processors may put objects onto the freelist but the processor that
* froze the slab is the only one that can retrieve the objects from the
* page's freelist.
* slab's freelist.
*
* list_lock
*
......@@ -135,7 +135,7 @@
* minimal so we rely on the page allocators per cpu caches for
* fast frees and allocs.
*
* page->frozen The slab is frozen and exempt from list processing.
* slab->frozen The slab is frozen and exempt from list processing.
* This means that the slab is dedicated to a purpose
* such as satisfying allocations for a specific
* processor. Objects may be freed in the slab while
......@@ -250,7 +250,7 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
#define OO_SHIFT 16
#define OO_MASK ((1 << OO_SHIFT) - 1)
#define MAX_OBJS_PER_PAGE 32767 /* since page.objects is u15 */
#define MAX_OBJS_PER_PAGE 32767 /* since slab.objects is u15 */
/* Internal SLUB flags */
/* Poison object */
......@@ -417,18 +417,18 @@ static inline unsigned int oo_objects(struct kmem_cache_order_objects x)
#ifdef CONFIG_SLUB_CPU_PARTIAL
static void slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects)
{
unsigned int nr_pages;
unsigned int nr_slabs;
s->cpu_partial = nr_objects;
/*
* We take the number of objects but actually limit the number of
* pages on the per cpu partial list, in order to limit excessive
* growth of the list. For simplicity we assume that the pages will
* slabs on the per cpu partial list, in order to limit excessive
* growth of the list. For simplicity we assume that the slabs will
* be half-full.
*/
nr_pages = DIV_ROUND_UP(nr_objects * 2, oo_objects(s->oo));
s->cpu_partial_pages = nr_pages;
nr_slabs = DIV_ROUND_UP(nr_objects * 2, oo_objects(s->oo));
s->cpu_partial_slabs = nr_slabs;
}
#else
static inline void
......@@ -440,28 +440,32 @@ slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects)
/*
* Per slab locking using the pagelock
*/
static __always_inline void __slab_lock(struct page *page)
static __always_inline void __slab_lock(struct slab *slab)
{
struct page *page = slab_page(slab);
VM_BUG_ON_PAGE(PageTail(page), page);
bit_spin_lock(PG_locked, &page->flags);
}
static __always_inline void __slab_unlock(struct page *page)
static __always_inline void __slab_unlock(struct slab *slab)
{
struct page *page = slab_page(slab);
VM_BUG_ON_PAGE(PageTail(page), page);
__bit_spin_unlock(PG_locked, &page->flags);
}
static __always_inline void slab_lock(struct page *page, unsigned long *flags)
static __always_inline void slab_lock(struct slab *slab, unsigned long *flags)
{
if (IS_ENABLED(CONFIG_PREEMPT_RT))
local_irq_save(*flags);
__slab_lock(page);
__slab_lock(slab);
}
static __always_inline void slab_unlock(struct page *page, unsigned long *flags)
static __always_inline void slab_unlock(struct slab *slab, unsigned long *flags)
{
__slab_unlock(page);
__slab_unlock(slab);
if (IS_ENABLED(CONFIG_PREEMPT_RT))
local_irq_restore(*flags);
}
......@@ -471,7 +475,7 @@ static __always_inline void slab_unlock(struct page *page, unsigned long *flags)
* by an _irqsave() lock variant. Except on PREEMPT_RT where locks are different
* so we disable interrupts as part of slab_[un]lock().
*/
static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab,
void *freelist_old, unsigned long counters_old,
void *freelist_new, unsigned long counters_new,
const char *n)
......@@ -481,7 +485,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
if (s->flags & __CMPXCHG_DOUBLE) {
if (cmpxchg_double(&page->freelist, &page->counters,
if (cmpxchg_double(&slab->freelist, &slab->counters,
freelist_old, counters_old,
freelist_new, counters_new))
return true;
......@@ -491,15 +495,15 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
/* init to 0 to prevent spurious warnings */
unsigned long flags = 0;
slab_lock(page, &flags);
if (page->freelist == freelist_old &&
page->counters == counters_old) {
page->freelist = freelist_new;
page->counters = counters_new;
slab_unlock(page, &flags);
slab_lock(slab, &flags);
if (slab->freelist == freelist_old &&
slab->counters == counters_old) {
slab->freelist = freelist_new;
slab->counters = counters_new;
slab_unlock(slab, &flags);
return true;
}
slab_unlock(page, &flags);
slab_unlock(slab, &flags);
}
cpu_relax();
......@@ -512,7 +516,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
return false;
}
static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab,
void *freelist_old, unsigned long counters_old,
void *freelist_new, unsigned long counters_new,
const char *n)
......@@ -520,7 +524,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
if (s->flags & __CMPXCHG_DOUBLE) {
if (cmpxchg_double(&page->freelist, &page->counters,
if (cmpxchg_double(&slab->freelist, &slab->counters,
freelist_old, counters_old,
freelist_new, counters_new))
return true;
......@@ -530,16 +534,16 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
unsigned long flags;
local_irq_save(flags);
__slab_lock(page);
if (page->freelist == freelist_old &&
page->counters == counters_old) {
page->freelist = freelist_new;
page->counters = counters_new;
__slab_unlock(page);
__slab_lock(slab);
if (slab->freelist == freelist_old &&
slab->counters == counters_old) {
slab->freelist = freelist_new;
slab->counters = counters_new;
__slab_unlock(slab);
local_irq_restore(flags);
return true;
}
__slab_unlock(page);
__slab_unlock(slab);
local_irq_restore(flags);
}
......@@ -558,14 +562,14 @@ static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)];
static DEFINE_RAW_SPINLOCK(object_map_lock);
static void __fill_map(unsigned long *obj_map, struct kmem_cache *s,
struct page *page)
struct slab *slab)
{
void *addr = page_address(page);
void *addr = slab_address(slab);
void *p;
bitmap_zero(obj_map, page->objects);
bitmap_zero(obj_map, slab->objects);
for (p = page->freelist; p; p = get_freepointer(s, p))
for (p = slab->freelist; p; p = get_freepointer(s, p))
set_bit(__obj_to_index(s, addr, p), obj_map);
}
......@@ -590,19 +594,19 @@ static inline bool slab_add_kunit_errors(void) { return false; }
#endif
/*
* Determine a map of object in use on a page.
* Determine a map of objects in use in a slab.
*
* Node listlock must be held to guarantee that the page does
* Node listlock must be held to guarantee that the slab does
* not vanish from under us.
*/
static unsigned long *get_map(struct kmem_cache *s, struct page *page)
static unsigned long *get_map(struct kmem_cache *s, struct slab *slab)
__acquires(&object_map_lock)
{
VM_BUG_ON(!irqs_disabled());
raw_spin_lock(&object_map_lock);
__fill_map(object_map, s, page);
__fill_map(object_map, s, slab);
return object_map;
}
......@@ -663,17 +667,17 @@ static inline void metadata_access_disable(void)
/* Verify that a pointer has an address that is valid within a slab page */
static inline int check_valid_pointer(struct kmem_cache *s,
struct page *page, void *object)
struct slab *slab, void *object)
{
void *base;
if (!object)
return 1;
base = page_address(page);
base = slab_address(slab);
object = kasan_reset_tag(object);
object = restore_red_left(s, object);
if (object < base || object >= base + page->objects * s->size ||
if (object < base || object >= base + slab->objects * s->size ||
(object - base) % s->size) {
return 0;
}
......@@ -784,12 +788,13 @@ void print_tracking(struct kmem_cache *s, void *object)
print_track("Freed", get_track(s, object, TRACK_FREE), pr_time);
}
static void print_page_info(struct page *page)
static void print_slab_info(const struct slab *slab)
{
pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%pGp\n",
page, page->objects, page->inuse, page->freelist,
&page->flags);
struct folio *folio = (struct folio *)slab_folio(slab);
pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%pGp\n",
slab, slab->objects, slab->inuse, slab->freelist,
folio_flags(folio, 0));
}
static void slab_bug(struct kmem_cache *s, char *fmt, ...)
......@@ -822,28 +827,14 @@ static void slab_fix(struct kmem_cache *s, char *fmt, ...)
va_end(args);
}
static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
void **freelist, void *nextfree)
{
if ((s->flags & SLAB_CONSISTENCY_CHECKS) &&
!check_valid_pointer(s, page, nextfree) && freelist) {
object_err(s, page, *freelist, "Freechain corrupt");
*freelist = NULL;
slab_fix(s, "Isolate corrupted freechain");
return true;
}
return false;
}
static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
static void print_trailer(struct kmem_cache *s, struct slab *slab, u8 *p)
{
unsigned int off; /* Offset of last byte */
u8 *addr = page_address(page);
u8 *addr = slab_address(slab);
print_tracking(s, p);
print_page_info(page);
print_slab_info(slab);
pr_err("Object 0x%p @offset=%tu fp=0x%p\n\n",
p, p - addr, get_freepointer(s, p));
......@@ -875,18 +866,32 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
dump_stack();
}
void object_err(struct kmem_cache *s, struct page *page,
static void object_err(struct kmem_cache *s, struct slab *slab,
u8 *object, char *reason)
{
if (slab_add_kunit_errors())
return;
slab_bug(s, "%s", reason);
print_trailer(s, page, object);
print_trailer(s, slab, object);
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
}
static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
void **freelist, void *nextfree)
{
if ((s->flags & SLAB_CONSISTENCY_CHECKS) &&
!check_valid_pointer(s, slab, nextfree) && freelist) {
object_err(s, slab, *freelist, "Freechain corrupt");
*freelist = NULL;
slab_fix(s, "Isolate corrupted freechain");
return true;
}
return false;
}
static __printf(3, 4) void slab_err(struct kmem_cache *s, struct slab *slab,
const char *fmt, ...)
{
va_list args;
......@@ -899,7 +904,7 @@ static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
vsnprintf(buf, sizeof(buf), fmt, args);
va_end(args);
slab_bug(s, "%s", buf);
print_page_info(page);
print_slab_info(slab);
dump_stack();
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
}
......@@ -927,13 +932,13 @@ static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
memset(from, data, to - from);
}
static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
static int check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
u8 *object, char *what,
u8 *start, unsigned int value, unsigned int bytes)
{
u8 *fault;
u8 *end;
u8 *addr = page_address(page);
u8 *addr = slab_address(slab);
metadata_access_enable();
fault = memchr_inv(kasan_reset_tag(start), value, bytes);
......@@ -952,7 +957,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
fault, end - 1, fault - addr,
fault[0], value);
print_trailer(s, page, object);
print_trailer(s, slab, object);
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
skip_bug_print:
......@@ -998,7 +1003,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
* may be used with merged slabcaches.
*/
static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p)
{
unsigned long off = get_info_end(s); /* The end of info */
......@@ -1011,12 +1016,12 @@ static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
if (size_from_object(s) == off)
return 1;
return check_bytes_and_report(s, page, p, "Object padding",
return check_bytes_and_report(s, slab, p, "Object padding",
p + off, POISON_INUSE, size_from_object(s) - off);
}
/* Check the pad bytes at the end of a slab page */
static int slab_pad_check(struct kmem_cache *s, struct page *page)
static int slab_pad_check(struct kmem_cache *s, struct slab *slab)
{
u8 *start;
u8 *fault;
......@@ -1028,8 +1033,8 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
if (!(s->flags & SLAB_POISON))
return 1;
start = page_address(page);
length = page_size(page);
start = slab_address(slab);
length = slab_size(slab);
end = start + length;
remainder = length % s->size;
if (!remainder)
......@@ -1044,7 +1049,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
while (end > fault && end[-1] == POISON_INUSE)
end--;
slab_err(s, page, "Padding overwritten. 0x%p-0x%p @offset=%tu",
slab_err(s, slab, "Padding overwritten. 0x%p-0x%p @offset=%tu",
fault, end - 1, fault - start);
print_section(KERN_ERR, "Padding ", pad, remainder);
......@@ -1052,23 +1057,23 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
return 0;
}
static int check_object(struct kmem_cache *s, struct page *page,
static int check_object(struct kmem_cache *s, struct slab *slab,
void *object, u8 val)
{
u8 *p = object;
u8 *endobject = object + s->object_size;
if (s->flags & SLAB_RED_ZONE) {
if (!check_bytes_and_report(s, page, object, "Left Redzone",
if (!check_bytes_and_report(s, slab, object, "Left Redzone",
object - s->red_left_pad, val, s->red_left_pad))
return 0;
if (!check_bytes_and_report(s, page, object, "Right Redzone",
if (!check_bytes_and_report(s, slab, object, "Right Redzone",
endobject, val, s->inuse - s->object_size))
return 0;
} else {
if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
check_bytes_and_report(s, page, p, "Alignment padding",
check_bytes_and_report(s, slab, p, "Alignment padding",
endobject, POISON_INUSE,
s->inuse - s->object_size);
}
......@@ -1076,15 +1081,15 @@ static int check_object(struct kmem_cache *s, struct page *page,
if (s->flags & SLAB_POISON) {
if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
(!check_bytes_and_report(s, page, p, "Poison", p,
(!check_bytes_and_report(s, slab, p, "Poison", p,
POISON_FREE, s->object_size - 1) ||
!check_bytes_and_report(s, page, p, "End Poison",
!check_bytes_and_report(s, slab, p, "End Poison",
p + s->object_size - 1, POISON_END, 1)))
return 0;
/*
* check_pad_bytes cleans up on its own.
*/
check_pad_bytes(s, page, p);
check_pad_bytes(s, slab, p);
}
if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE)
......@@ -1095,8 +1100,8 @@ static int check_object(struct kmem_cache *s, struct page *page,
return 1;
/* Check free pointer validity */
if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
object_err(s, page, p, "Freepointer corrupt");
if (!check_valid_pointer(s, slab, get_freepointer(s, p))) {
object_err(s, slab, p, "Freepointer corrupt");
/*
* No choice but to zap it and thus lose the remainder
* of the free objects in this slab. May cause
......@@ -1108,55 +1113,55 @@ static int check_object(struct kmem_cache *s, struct page *page,
return 1;
}
static int check_slab(struct kmem_cache *s, struct page *page)
static int check_slab(struct kmem_cache *s, struct slab *slab)
{
int maxobj;
if (!PageSlab(page)) {
slab_err(s, page, "Not a valid slab page");
if (!folio_test_slab(slab_folio(slab))) {
slab_err(s, slab, "Not a valid slab page");
return 0;
}
maxobj = order_objects(compound_order(page), s->size);
if (page->objects > maxobj) {
slab_err(s, page, "objects %u > max %u",
page->objects, maxobj);
maxobj = order_objects(slab_order(slab), s->size);
if (slab->objects > maxobj) {
slab_err(s, slab, "objects %u > max %u",
slab->objects, maxobj);
return 0;
}
if (page->inuse > page->objects) {
slab_err(s, page, "inuse %u > max %u",
page->inuse, page->objects);
if (slab->inuse > slab->objects) {
slab_err(s, slab, "inuse %u > max %u",
slab->inuse, slab->objects);
return 0;
}
/* Slab_pad_check fixes things up after itself */
slab_pad_check(s, page);
slab_pad_check(s, slab);
return 1;
}
/*
* Determine if a certain object on a page is on the freelist. Must hold the
* Determine if a certain object in a slab is on the freelist. Must hold the
* slab lock to guarantee that the chains are in a consistent state.
*/
static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
static int on_freelist(struct kmem_cache *s, struct slab *slab, void *search)
{
int nr = 0;
void *fp;
void *object = NULL;
int max_objects;
fp = page->freelist;
while (fp && nr <= page->objects) {
fp = slab->freelist;
while (fp && nr <= slab->objects) {
if (fp == search)
return 1;
if (!check_valid_pointer(s, page, fp)) {
if (!check_valid_pointer(s, slab, fp)) {
if (object) {
object_err(s, page, object,
object_err(s, slab, object,
"Freechain corrupt");
set_freepointer(s, object, NULL);
} else {
slab_err(s, page, "Freepointer corrupt");
page->freelist = NULL;
page->inuse = page->objects;
slab_err(s, slab, "Freepointer corrupt");
slab->freelist = NULL;
slab->inuse = slab->objects;
slab_fix(s, "Freelist cleared");
return 0;
}
......@@ -1167,34 +1172,34 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
nr++;
}
max_objects = order_objects(compound_order(page), s->size);
max_objects = order_objects(slab_order(slab), s->size);
if (max_objects > MAX_OBJS_PER_PAGE)
max_objects = MAX_OBJS_PER_PAGE;
if (page->objects != max_objects) {
slab_err(s, page, "Wrong number of objects. Found %d but should be %d",
page->objects, max_objects);
page->objects = max_objects;
if (slab->objects != max_objects) {
slab_err(s, slab, "Wrong number of objects. Found %d but should be %d",
slab->objects, max_objects);
slab->objects = max_objects;
slab_fix(s, "Number of objects adjusted");
}
if (page->inuse != page->objects - nr) {
slab_err(s, page, "Wrong object count. Counter is %d but counted were %d",
page->inuse, page->objects - nr);
page->inuse = page->objects - nr;
if (slab->inuse != slab->objects - nr) {
slab_err(s, slab, "Wrong object count. Counter is %d but counted were %d",
slab->inuse, slab->objects - nr);
slab->inuse = slab->objects - nr;
slab_fix(s, "Object count adjusted");
}
return search == NULL;
}
static void trace(struct kmem_cache *s, struct page *page, void *object,
static void trace(struct kmem_cache *s, struct slab *slab, void *object,
int alloc)
{
if (s->flags & SLAB_TRACE) {
pr_info("TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
s->name,
alloc ? "alloc" : "free",
object, page->inuse,
page->freelist);
object, slab->inuse,
slab->freelist);
if (!alloc)
print_section(KERN_INFO, "Object ", (void *)object,
......@@ -1208,22 +1213,22 @@ static void trace(struct kmem_cache *s, struct page *page, void *object,
* Tracking of fully allocated slabs for debugging purposes.
*/
static void add_full(struct kmem_cache *s,
struct kmem_cache_node *n, struct page *page)
struct kmem_cache_node *n, struct slab *slab)
{
if (!(s->flags & SLAB_STORE_USER))
return;
lockdep_assert_held(&n->list_lock);
list_add(&page->slab_list, &n->full);
list_add(&slab->slab_list, &n->full);
}
static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page)
static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct slab *slab)
{
if (!(s->flags & SLAB_STORE_USER))
return;
lockdep_assert_held(&n->list_lock);
list_del(&page->slab_list);
list_del(&slab->slab_list);
}
/* Tracking of the number of slabs for debugging purposes */
......@@ -1263,7 +1268,7 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
}
/* Object debug checks for alloc/free paths */
static void setup_object_debug(struct kmem_cache *s, struct page *page,
static void setup_object_debug(struct kmem_cache *s, struct slab *slab,
void *object)
{
if (!kmem_cache_debug_flags(s, SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON))
......@@ -1274,89 +1279,89 @@ static void setup_object_debug(struct kmem_cache *s, struct page *page,
}
static
void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr)
void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr)
{
if (!kmem_cache_debug_flags(s, SLAB_POISON))
return;
metadata_access_enable();
memset(kasan_reset_tag(addr), POISON_INUSE, page_size(page));
memset(kasan_reset_tag(addr), POISON_INUSE, slab_size(slab));
metadata_access_disable();
}
static inline int alloc_consistency_checks(struct kmem_cache *s,
struct page *page, void *object)
struct slab *slab, void *object)
{
if (!check_slab(s, page))
if (!check_slab(s, slab))
return 0;
if (!check_valid_pointer(s, page, object)) {
object_err(s, page, object, "Freelist Pointer check fails");
if (!check_valid_pointer(s, slab, object)) {
object_err(s, slab, object, "Freelist Pointer check fails");
return 0;
}
if (!check_object(s, page, object, SLUB_RED_INACTIVE))
if (!check_object(s, slab, object, SLUB_RED_INACTIVE))
return 0;
return 1;
}
static noinline int alloc_debug_processing(struct kmem_cache *s,
struct page *page,
struct slab *slab,
void *object, unsigned long addr)
{
if (s->flags & SLAB_CONSISTENCY_CHECKS) {
if (!alloc_consistency_checks(s, page, object))
if (!alloc_consistency_checks(s, slab, object))
goto bad;
}
/* Success perform special debug activities for allocs */
if (s->flags & SLAB_STORE_USER)
set_track(s, object, TRACK_ALLOC, addr);
trace(s, page, object, 1);
trace(s, slab, object, 1);
init_object(s, object, SLUB_RED_ACTIVE);
return 1;
bad:
if (PageSlab(page)) {
if (folio_test_slab(slab_folio(slab))) {
/*
* If this is a slab page then lets do the best we can
* to avoid issues in the future. Marking all objects
* as used avoids touching the remaining objects.
*/
slab_fix(s, "Marking all objects used");
page->inuse = page->objects;
page->freelist = NULL;
slab->inuse = slab->objects;
slab->freelist = NULL;
}
return 0;
}
static inline int free_consistency_checks(struct kmem_cache *s,
struct page *page, void *object, unsigned long addr)
struct slab *slab, void *object, unsigned long addr)
{
if (!check_valid_pointer(s, page, object)) {
slab_err(s, page, "Invalid object pointer 0x%p", object);
if (!check_valid_pointer(s, slab, object)) {
slab_err(s, slab, "Invalid object pointer 0x%p", object);
return 0;
}
if (on_freelist(s, page, object)) {
object_err(s, page, object, "Object already free");
if (on_freelist(s, slab, object)) {
object_err(s, slab, object, "Object already free");
return 0;
}
if (!check_object(s, page, object, SLUB_RED_ACTIVE))
if (!check_object(s, slab, object, SLUB_RED_ACTIVE))
return 0;
if (unlikely(s != page->slab_cache)) {
if (!PageSlab(page)) {
slab_err(s, page, "Attempt to free object(0x%p) outside of slab",
if (unlikely(s != slab->slab_cache)) {
if (!folio_test_slab(slab_folio(slab))) {
slab_err(s, slab, "Attempt to free object(0x%p) outside of slab",
object);
} else if (!page->slab_cache) {
} else if (!slab->slab_cache) {
pr_err("SLUB <none>: no slab for object 0x%p.\n",
object);
dump_stack();
} else
object_err(s, page, object,
object_err(s, slab, object,
"page slab pointer corrupt.");
return 0;
}
......@@ -1365,21 +1370,21 @@ static inline int free_consistency_checks(struct kmem_cache *s,
/* Supports checking bulk free of a constructed freelist */
static noinline int free_debug_processing(
struct kmem_cache *s, struct page *page,
struct kmem_cache *s, struct slab *slab,
void *head, void *tail, int bulk_cnt,
unsigned long addr)
{
struct kmem_cache_node *n = get_node(s, page_to_nid(page));
struct kmem_cache_node *n = get_node(s, slab_nid(slab));
void *object = head;
int cnt = 0;
unsigned long flags, flags2;
int ret = 0;
spin_lock_irqsave(&n->list_lock, flags);
slab_lock(page, &flags2);
slab_lock(slab, &flags2);
if (s->flags & SLAB_CONSISTENCY_CHECKS) {
if (!check_slab(s, page))
if (!check_slab(s, slab))
goto out;
}
......@@ -1387,13 +1392,13 @@ static noinline int free_debug_processing(
cnt++;
if (s->flags & SLAB_CONSISTENCY_CHECKS) {
if (!free_consistency_checks(s, page, object, addr))
if (!free_consistency_checks(s, slab, object, addr))
goto out;
}
if (s->flags & SLAB_STORE_USER)
set_track(s, object, TRACK_FREE, addr);
trace(s, page, object, 0);
trace(s, slab, object, 0);
/* Freepointer not overwritten by init_object(), SLAB_POISON moved it */
init_object(s, object, SLUB_RED_INACTIVE);
......@@ -1406,10 +1411,10 @@ static noinline int free_debug_processing(
out:
if (cnt != bulk_cnt)
slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n",
slab_err(s, slab, "Bulk freelist count(%d) invalid(%d)\n",
bulk_cnt, cnt);
slab_unlock(page, &flags2);
slab_unlock(slab, &flags2);
spin_unlock_irqrestore(&n->list_lock, flags);
if (!ret)
slab_fix(s, "Object at 0x%p not freed", object);
......@@ -1624,26 +1629,26 @@ slab_flags_t kmem_cache_flags(unsigned int object_size,
}
#else /* !CONFIG_SLUB_DEBUG */
static inline void setup_object_debug(struct kmem_cache *s,
struct page *page, void *object) {}
struct slab *slab, void *object) {}
static inline
void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr) {}
void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr) {}
static inline int alloc_debug_processing(struct kmem_cache *s,
struct page *page, void *object, unsigned long addr) { return 0; }
struct slab *slab, void *object, unsigned long addr) { return 0; }
static inline int free_debug_processing(
struct kmem_cache *s, struct page *page,
struct kmem_cache *s, struct slab *slab,
void *head, void *tail, int bulk_cnt,
unsigned long addr) { return 0; }
static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
static inline int slab_pad_check(struct kmem_cache *s, struct slab *slab)
{ return 1; }
static inline int check_object(struct kmem_cache *s, struct page *page,
static inline int check_object(struct kmem_cache *s, struct slab *slab,
void *object, u8 val) { return 1; }
static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
struct page *page) {}
struct slab *slab) {}
static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
struct page *page) {}
struct slab *slab) {}
slab_flags_t kmem_cache_flags(unsigned int object_size,
slab_flags_t flags, const char *name)
{
......@@ -1662,7 +1667,7 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node,
static inline void dec_slabs_node(struct kmem_cache *s, int node,
int objects) {}
static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
void **freelist, void *nextfree)
{
return false;
......@@ -1767,10 +1772,10 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
return *head != NULL;
}
static void *setup_object(struct kmem_cache *s, struct page *page,
static void *setup_object(struct kmem_cache *s, struct slab *slab,
void *object)
{
setup_object_debug(s, page, object);
setup_object_debug(s, slab, object);
object = kasan_init_slab_obj(s, object);
if (unlikely(s->ctor)) {
kasan_unpoison_object_data(s, object);
......@@ -1783,18 +1788,27 @@ static void *setup_object(struct kmem_cache *s, struct page *page,
/*
* Slab allocation and freeing
*/
static inline struct page *alloc_slab_page(struct kmem_cache *s,
static inline struct slab *alloc_slab_page(struct kmem_cache *s,
gfp_t flags, int node, struct kmem_cache_order_objects oo)
{
struct page *page;
struct folio *folio;
struct slab *slab;
unsigned int order = oo_order(oo);
if (node == NUMA_NO_NODE)
page = alloc_pages(flags, order);
folio = (struct folio *)alloc_pages(flags, order);
else
page = __alloc_pages_node(node, flags, order);
folio = (struct folio *)__alloc_pages_node(node, flags, order);
if (!folio)
return NULL;
slab = folio_slab(folio);
__folio_set_slab(folio);
if (page_is_pfmemalloc(folio_page(folio, 0)))
slab_set_pfmemalloc(slab);
return page;
return slab;
}
#ifdef CONFIG_SLAB_FREELIST_RANDOM
......@@ -1839,7 +1853,7 @@ static void __init init_freelist_randomization(void)
}
/* Get the next entry on the pre-computed freelist randomized */
static void *next_freelist_entry(struct kmem_cache *s, struct page *page,
static void *next_freelist_entry(struct kmem_cache *s, struct slab *slab,
unsigned long *pos, void *start,
unsigned long page_limit,
unsigned long freelist_count)
......@@ -1861,32 +1875,32 @@ static void *next_freelist_entry(struct kmem_cache *s, struct page *page,
}
/* Shuffle the single linked freelist based on a random pre-computed sequence */
static bool shuffle_freelist(struct kmem_cache *s, struct page *page)
static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
{
void *start;
void *cur;
void *next;
unsigned long idx, pos, page_limit, freelist_count;
if (page->objects < 2 || !s->random_seq)
if (slab->objects < 2 || !s->random_seq)
return false;
freelist_count = oo_objects(s->oo);
pos = get_random_int() % freelist_count;
page_limit = page->objects * s->size;
start = fixup_red_left(s, page_address(page));
page_limit = slab->objects * s->size;
start = fixup_red_left(s, slab_address(slab));
/* First entry is used as the base of the freelist */
cur = next_freelist_entry(s, page, &pos, start, page_limit,
cur = next_freelist_entry(s, slab, &pos, start, page_limit,
freelist_count);
cur = setup_object(s, page, cur);
page->freelist = cur;
cur = setup_object(s, slab, cur);
slab->freelist = cur;
for (idx = 1; idx < page->objects; idx++) {
next = next_freelist_entry(s, page, &pos, start, page_limit,
for (idx = 1; idx < slab->objects; idx++) {
next = next_freelist_entry(s, slab, &pos, start, page_limit,
freelist_count);
next = setup_object(s, page, next);
next = setup_object(s, slab, next);
set_freepointer(s, cur, next);
cur = next;
}
......@@ -1900,15 +1914,15 @@ static inline int init_cache_random_seq(struct kmem_cache *s)
return 0;
}
static inline void init_freelist_randomization(void) { }
static inline bool shuffle_freelist(struct kmem_cache *s, struct page *page)
static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
{
return false;
}
#endif /* CONFIG_SLAB_FREELIST_RANDOM */
static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
{
struct page *page;
struct slab *slab;
struct kmem_cache_order_objects oo = s->oo;
gfp_t alloc_gfp;
void *start, *p, *next;
......@@ -1927,63 +1941,60 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min))
alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~(__GFP_RECLAIM|__GFP_NOFAIL);
page = alloc_slab_page(s, alloc_gfp, node, oo);
if (unlikely(!page)) {
slab = alloc_slab_page(s, alloc_gfp, node, oo);
if (unlikely(!slab)) {
oo = s->min;
alloc_gfp = flags;
/*
* Allocation may have failed due to fragmentation.
* Try a lower order alloc if possible
*/
page = alloc_slab_page(s, alloc_gfp, node, oo);
if (unlikely(!page))
slab = alloc_slab_page(s, alloc_gfp, node, oo);
if (unlikely(!slab))
goto out;
stat(s, ORDER_FALLBACK);
}
page->objects = oo_objects(oo);
slab->objects = oo_objects(oo);
account_slab_page(page, oo_order(oo), s, flags);
account_slab(slab, oo_order(oo), s, flags);
page->slab_cache = s;
__SetPageSlab(page);
if (page_is_pfmemalloc(page))
SetPageSlabPfmemalloc(page);
slab->slab_cache = s;
kasan_poison_slab(page);
kasan_poison_slab(slab);
start = page_address(page);
start = slab_address(slab);
setup_page_debug(s, page, start);
setup_slab_debug(s, slab, start);
shuffle = shuffle_freelist(s, page);
shuffle = shuffle_freelist(s, slab);
if (!shuffle) {
start = fixup_red_left(s, start);
start = setup_object(s, page, start);
page->freelist = start;
for (idx = 0, p = start; idx < page->objects - 1; idx++) {
start = setup_object(s, slab, start);
slab->freelist = start;
for (idx = 0, p = start; idx < slab->objects - 1; idx++) {
next = p + s->size;
next = setup_object(s, page, next);
next = setup_object(s, slab, next);
set_freepointer(s, p, next);
p = next;
}
set_freepointer(s, p, NULL);
}
page->inuse = page->objects;
page->frozen = 1;
slab->inuse = slab->objects;
slab->frozen = 1;
out:
if (!page)
if (!slab)
return NULL;
inc_slabs_node(s, page_to_nid(page), page->objects);
inc_slabs_node(s, slab_nid(slab), slab->objects);
return page;
return slab;
}
static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
static struct slab *new_slab(struct kmem_cache *s, gfp_t flags, int node)
{
if (unlikely(flags & GFP_SLAB_BUG_MASK))
flags = kmalloc_fix_flags(flags);
......@@ -1994,76 +2005,75 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
}
static void __free_slab(struct kmem_cache *s, struct page *page)
static void __free_slab(struct kmem_cache *s, struct slab *slab)
{
int order = compound_order(page);
struct folio *folio = slab_folio(slab);
int order = folio_order(folio);
int pages = 1 << order;
if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) {
void *p;
slab_pad_check(s, page);
for_each_object(p, s, page_address(page),
page->objects)
check_object(s, page, p, SLUB_RED_INACTIVE);
slab_pad_check(s, slab);
for_each_object(p, s, slab_address(slab), slab->objects)
check_object(s, slab, p, SLUB_RED_INACTIVE);
}
__ClearPageSlabPfmemalloc(page);
__ClearPageSlab(page);
/* In union with page->mapping where page allocator expects NULL */
page->slab_cache = NULL;
__slab_clear_pfmemalloc(slab);
__folio_clear_slab(folio);
folio->mapping = NULL;
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += pages;
unaccount_slab_page(page, order, s);
__free_pages(page, order);
unaccount_slab(slab, order, s);
__free_pages(folio_page(folio, 0), order);
}
static void rcu_free_slab(struct rcu_head *h)
{
struct page *page = container_of(h, struct page, rcu_head);
struct slab *slab = container_of(h, struct slab, rcu_head);
__free_slab(page->slab_cache, page);
__free_slab(slab->slab_cache, slab);
}
static void free_slab(struct kmem_cache *s, struct page *page)
static void free_slab(struct kmem_cache *s, struct slab *slab)
{
if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
call_rcu(&page->rcu_head, rcu_free_slab);
call_rcu(&slab->rcu_head, rcu_free_slab);
} else
__free_slab(s, page);
__free_slab(s, slab);
}
static void discard_slab(struct kmem_cache *s, struct page *page)
static void discard_slab(struct kmem_cache *s, struct slab *slab)
{
dec_slabs_node(s, page_to_nid(page), page->objects);
free_slab(s, page);
dec_slabs_node(s, slab_nid(slab), slab->objects);
free_slab(s, slab);
}
/*
* Management of partially allocated slabs.
*/
static inline void
__add_partial(struct kmem_cache_node *n, struct page *page, int tail)
__add_partial(struct kmem_cache_node *n, struct slab *slab, int tail)
{
n->nr_partial++;
if (tail == DEACTIVATE_TO_TAIL)
list_add_tail(&page->slab_list, &n->partial);
list_add_tail(&slab->slab_list, &n->partial);
else
list_add(&page->slab_list, &n->partial);
list_add(&slab->slab_list, &n->partial);
}
static inline void add_partial(struct kmem_cache_node *n,
struct page *page, int tail)
struct slab *slab, int tail)
{
lockdep_assert_held(&n->list_lock);
__add_partial(n, page, tail);
__add_partial(n, slab, tail);
}
static inline void remove_partial(struct kmem_cache_node *n,
struct page *page)
struct slab *slab)
{
lockdep_assert_held(&n->list_lock);
list_del(&page->slab_list);
list_del(&slab->slab_list);
n->nr_partial--;
}
......@@ -2074,12 +2084,12 @@ static inline void remove_partial(struct kmem_cache_node *n,
* Returns a list of objects or NULL if it fails.
*/
static inline void *acquire_slab(struct kmem_cache *s,
struct kmem_cache_node *n, struct page *page,
struct kmem_cache_node *n, struct slab *slab,
int mode)
{
void *freelist;
unsigned long counters;
struct page new;
struct slab new;
lockdep_assert_held(&n->list_lock);
......@@ -2088,11 +2098,11 @@ static inline void *acquire_slab(struct kmem_cache *s,
* The old freelist is the list of objects for the
* per cpu allocation list.
*/
freelist = page->freelist;
counters = page->counters;
freelist = slab->freelist;
counters = slab->counters;
new.counters = counters;
if (mode) {
new.inuse = page->objects;
new.inuse = slab->objects;
new.freelist = NULL;
} else {
new.freelist = freelist;
......@@ -2101,35 +2111,35 @@ static inline void *acquire_slab(struct kmem_cache *s,
VM_BUG_ON(new.frozen);
new.frozen = 1;
if (!__cmpxchg_double_slab(s, page,
if (!__cmpxchg_double_slab(s, slab,
freelist, counters,
new.freelist, new.counters,
"acquire_slab"))
return NULL;
remove_partial(n, page);
remove_partial(n, slab);
WARN_ON(!freelist);
return freelist;
}
#ifdef CONFIG_SLUB_CPU_PARTIAL
static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
static void put_cpu_partial(struct kmem_cache *s, struct slab *slab, int drain);
#else
static inline void put_cpu_partial(struct kmem_cache *s, struct page *page,
static inline void put_cpu_partial(struct kmem_cache *s, struct slab *slab,
int drain) { }
#endif
static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags);
/*
* Try to allocate a partial slab from a specific node.
*/
static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
struct page **ret_page, gfp_t gfpflags)
struct slab **ret_slab, gfp_t gfpflags)
{
struct page *page, *page2;
struct slab *slab, *slab2;
void *object = NULL;
unsigned long flags;
unsigned int partial_pages = 0;
unsigned int partial_slabs = 0;
/*
* Racy check. If we mistakenly see no partial slabs then we
......@@ -2141,28 +2151,28 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
return NULL;
spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry_safe(page, page2, &n->partial, slab_list) {
list_for_each_entry_safe(slab, slab2, &n->partial, slab_list) {
void *t;
if (!pfmemalloc_match(page, gfpflags))
if (!pfmemalloc_match(slab, gfpflags))
continue;
t = acquire_slab(s, n, page, object == NULL);
t = acquire_slab(s, n, slab, object == NULL);
if (!t)
break;
if (!object) {
*ret_page = page;
*ret_slab = slab;
stat(s, ALLOC_FROM_PARTIAL);
object = t;
} else {
put_cpu_partial(s, page, 0);
put_cpu_partial(s, slab, 0);
stat(s, CPU_PARTIAL_NODE);
partial_pages++;
partial_slabs++;
}
#ifdef CONFIG_SLUB_CPU_PARTIAL
if (!kmem_cache_has_cpu_partial(s)
|| partial_pages > s->cpu_partial_pages / 2)
|| partial_slabs > s->cpu_partial_slabs / 2)
break;
#else
break;
......@@ -2174,10 +2184,10 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
}
/*
* Get a page from somewhere. Search in increasing NUMA distances.
* Get a slab from somewhere. Search in increasing NUMA distances.
*/
static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
struct page **ret_page)
struct slab **ret_slab)
{
#ifdef CONFIG_NUMA
struct zonelist *zonelist;
......@@ -2219,7 +2229,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
if (n && cpuset_zone_allowed(zone, flags) &&
n->nr_partial > s->min_partial) {
object = get_partial_node(s, n, ret_page, flags);
object = get_partial_node(s, n, ret_slab, flags);
if (object) {
/*
* Don't check read_mems_allowed_retry()
......@@ -2238,10 +2248,10 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
}
/*
* Get a partial page, lock it and return it.
* Get a partial slab, lock it and return it.
*/
static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
struct page **ret_page)
struct slab **ret_slab)
{
void *object;
int searchnode = node;
......@@ -2249,11 +2259,11 @@ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
if (node == NUMA_NO_NODE)
searchnode = numa_mem_id();
object = get_partial_node(s, get_node(s, searchnode), ret_page, flags);
object = get_partial_node(s, get_node(s, searchnode), ret_slab, flags);
if (object || node != NUMA_NO_NODE)
return object;
return get_any_partial(s, flags, ret_page);
return get_any_partial(s, flags, ret_slab);
}
#ifdef CONFIG_PREEMPTION
......@@ -2330,25 +2340,25 @@ static void init_kmem_cache_cpus(struct kmem_cache *s)
}
/*
* Finishes removing the cpu slab. Merges cpu's freelist with page's freelist,
* Finishes removing the cpu slab. Merges cpu's freelist with slab's freelist,
* unfreezes the slabs and puts it on the proper list.
* Assumes the slab has been already safely taken away from kmem_cache_cpu
* by the caller.
*/
static void deactivate_slab(struct kmem_cache *s, struct page *page,
static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
void *freelist)
{
enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
struct kmem_cache_node *n = get_node(s, page_to_nid(page));
struct kmem_cache_node *n = get_node(s, slab_nid(slab));
int lock = 0, free_delta = 0;
enum slab_modes l = M_NONE, m = M_NONE;
void *nextfree, *freelist_iter, *freelist_tail;
int tail = DEACTIVATE_TO_HEAD;
unsigned long flags = 0;
struct page new;
struct page old;
struct slab new;
struct slab old;
if (page->freelist) {
if (slab->freelist) {
stat(s, DEACTIVATE_REMOTE_FREES);
tail = DEACTIVATE_TO_TAIL;
}
......@@ -2367,7 +2377,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
* 'freelist_iter' is already corrupted. So isolate all objects
* starting at 'freelist_iter' by skipping them.
*/
if (freelist_corrupted(s, page, &freelist_iter, nextfree))
if (freelist_corrupted(s, slab, &freelist_iter, nextfree))
break;
freelist_tail = freelist_iter;
......@@ -2377,25 +2387,25 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
}
/*
* Stage two: Unfreeze the page while splicing the per-cpu
* freelist to the head of page's freelist.
* Stage two: Unfreeze the slab while splicing the per-cpu
* freelist to the head of slab's freelist.
*
* Ensure that the page is unfrozen while the list presence
* Ensure that the slab is unfrozen while the list presence
* reflects the actual number of objects during unfreeze.
*
* We setup the list membership and then perform a cmpxchg
* with the count. If there is a mismatch then the page
* is not unfrozen but the page is on the wrong list.
* with the count. If there is a mismatch then the slab
* is not unfrozen but the slab is on the wrong list.
*
* Then we restart the process which may have to remove
* the page from the list that we just put it on again
* the slab from the list that we just put it on again
* because the number of objects in the slab may have
* changed.
*/
redo:
old.freelist = READ_ONCE(page->freelist);
old.counters = READ_ONCE(page->counters);
old.freelist = READ_ONCE(slab->freelist);
old.counters = READ_ONCE(slab->counters);
VM_BUG_ON(!old.frozen);
/* Determine target state of the slab */
......@@ -2416,9 +2426,8 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
if (!lock) {
lock = 1;
/*
* Taking the spinlock removes the possibility
* that acquire_slab() will see a slab page that
* is frozen
* Taking the spinlock removes the possibility that
* acquire_slab() will see a slab that is frozen
*/
spin_lock_irqsave(&n->list_lock, flags);
}
......@@ -2437,18 +2446,18 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
if (l != m) {
if (l == M_PARTIAL)
remove_partial(n, page);
remove_partial(n, slab);
else if (l == M_FULL)
remove_full(s, n, page);
remove_full(s, n, slab);
if (m == M_PARTIAL)
add_partial(n, page, tail);
add_partial(n, slab, tail);
else if (m == M_FULL)
add_full(s, n, page);
add_full(s, n, slab);
}
l = m;
if (!cmpxchg_double_slab(s, page,
if (!cmpxchg_double_slab(s, slab,
old.freelist, old.counters,
new.freelist, new.counters,
"unfreezing slab"))
......@@ -2463,26 +2472,26 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
stat(s, DEACTIVATE_FULL);
else if (m == M_FREE) {
stat(s, DEACTIVATE_EMPTY);
discard_slab(s, page);
discard_slab(s, slab);
stat(s, FREE_SLAB);
}
}
#ifdef CONFIG_SLUB_CPU_PARTIAL
static void __unfreeze_partials(struct kmem_cache *s, struct page *partial_page)
static void __unfreeze_partials(struct kmem_cache *s, struct slab *partial_slab)
{
struct kmem_cache_node *n = NULL, *n2 = NULL;
struct page *page, *discard_page = NULL;
struct slab *slab, *slab_to_discard = NULL;
unsigned long flags = 0;
while (partial_page) {
struct page new;
struct page old;
while (partial_slab) {
struct slab new;
struct slab old;
page = partial_page;
partial_page = page->next;
slab = partial_slab;
partial_slab = slab->next;
n2 = get_node(s, page_to_nid(page));
n2 = get_node(s, slab_nid(slab));
if (n != n2) {
if (n)
spin_unlock_irqrestore(&n->list_lock, flags);
......@@ -2493,8 +2502,8 @@ static void __unfreeze_partials(struct kmem_cache *s, struct page *partial_page)
do {
old.freelist = page->freelist;
old.counters = page->counters;
old.freelist = slab->freelist;
old.counters = slab->counters;
VM_BUG_ON(!old.frozen);
new.counters = old.counters;
......@@ -2502,16 +2511,16 @@ static void __unfreeze_partials(struct kmem_cache *s, struct page *partial_page)
new.frozen = 0;
} while (!__cmpxchg_double_slab(s, page,
} while (!__cmpxchg_double_slab(s, slab,
old.freelist, old.counters,
new.freelist, new.counters,
"unfreezing slab"));
if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
page->next = discard_page;
discard_page = page;
slab->next = slab_to_discard;
slab_to_discard = slab;
} else {
add_partial(n, page, DEACTIVATE_TO_TAIL);
add_partial(n, slab, DEACTIVATE_TO_TAIL);
stat(s, FREE_ADD_PARTIAL);
}
}
......@@ -2519,12 +2528,12 @@ static void __unfreeze_partials(struct kmem_cache *s, struct page *partial_page)
if (n)
spin_unlock_irqrestore(&n->list_lock, flags);
while (discard_page) {
page = discard_page;
discard_page = discard_page->next;
while (slab_to_discard) {
slab = slab_to_discard;
slab_to_discard = slab_to_discard->next;
stat(s, DEACTIVATE_EMPTY);
discard_slab(s, page);
discard_slab(s, slab);
stat(s, FREE_SLAB);
}
}
......@@ -2534,73 +2543,73 @@ static void __unfreeze_partials(struct kmem_cache *s, struct page *partial_page)
*/
static void unfreeze_partials(struct kmem_cache *s)
{
struct page *partial_page;
struct slab *partial_slab;
unsigned long flags;
local_lock_irqsave(&s->cpu_slab->lock, flags);
partial_page = this_cpu_read(s->cpu_slab->partial);
partial_slab = this_cpu_read(s->cpu_slab->partial);
this_cpu_write(s->cpu_slab->partial, NULL);
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
if (partial_page)
__unfreeze_partials(s, partial_page);
if (partial_slab)
__unfreeze_partials(s, partial_slab);
}
static void unfreeze_partials_cpu(struct kmem_cache *s,
struct kmem_cache_cpu *c)
{
struct page *partial_page;
struct slab *partial_slab;
partial_page = slub_percpu_partial(c);
partial_slab = slub_percpu_partial(c);
c->partial = NULL;
if (partial_page)
__unfreeze_partials(s, partial_page);
if (partial_slab)
__unfreeze_partials(s, partial_slab);
}
/*
* Put a page that was just frozen (in __slab_free|get_partial_node) into a
* partial page slot if available.
* Put a slab that was just frozen (in __slab_free|get_partial_node) into a
* partial slab slot if available.
*
* If we did not find a slot then simply move all the partials to the
* per node partial list.
*/
static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
static void put_cpu_partial(struct kmem_cache *s, struct slab *slab, int drain)
{
struct page *oldpage;
struct page *page_to_unfreeze = NULL;
struct slab *oldslab;
struct slab *slab_to_unfreeze = NULL;
unsigned long flags;
int pages = 0;
int slabs = 0;
local_lock_irqsave(&s->cpu_slab->lock, flags);
oldpage = this_cpu_read(s->cpu_slab->partial);
oldslab = this_cpu_read(s->cpu_slab->partial);
if (oldpage) {
if (drain && oldpage->pages >= s->cpu_partial_pages) {
if (oldslab) {
if (drain && oldslab->slabs >= s->cpu_partial_slabs) {
/*
* Partial array is full. Move the existing set to the
* per node partial list. Postpone the actual unfreezing
* outside of the critical section.
*/
page_to_unfreeze = oldpage;
oldpage = NULL;
slab_to_unfreeze = oldslab;
oldslab = NULL;
} else {
pages = oldpage->pages;
slabs = oldslab->slabs;
}
}
pages++;
slabs++;
page->pages = pages;
page->next = oldpage;
slab->slabs = slabs;
slab->next = oldslab;
this_cpu_write(s->cpu_slab->partial, page);
this_cpu_write(s->cpu_slab->partial, slab);
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
if (page_to_unfreeze) {
__unfreeze_partials(s, page_to_unfreeze);
if (slab_to_unfreeze) {
__unfreeze_partials(s, slab_to_unfreeze);
stat(s, CPU_PARTIAL_DRAIN);
}
}
......@@ -2616,22 +2625,22 @@ static inline void unfreeze_partials_cpu(struct kmem_cache *s,
static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
{
unsigned long flags;
struct page *page;
struct slab *slab;
void *freelist;
local_lock_irqsave(&s->cpu_slab->lock, flags);
page = c->page;
slab = c->slab;
freelist = c->freelist;
c->page = NULL;
c->slab = NULL;
c->freelist = NULL;
c->tid = next_tid(c->tid);
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
if (page) {
deactivate_slab(s, page, freelist);
if (slab) {
deactivate_slab(s, slab, freelist);
stat(s, CPUSLAB_FLUSH);
}
}
......@@ -2640,14 +2649,14 @@ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
{
struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
void *freelist = c->freelist;
struct page *page = c->page;
struct slab *slab = c->slab;
c->page = NULL;
c->slab = NULL;
c->freelist = NULL;
c->tid = next_tid(c->tid);
if (page) {
deactivate_slab(s, page, freelist);
if (slab) {
deactivate_slab(s, slab, freelist);
stat(s, CPUSLAB_FLUSH);
}
......@@ -2676,7 +2685,7 @@ static void flush_cpu_slab(struct work_struct *w)
s = sfw->s;
c = this_cpu_ptr(s->cpu_slab);
if (c->page)
if (c->slab)
flush_slab(s, c);
unfreeze_partials(s);
......@@ -2686,7 +2695,7 @@ static bool has_cpu_slab(int cpu, struct kmem_cache *s)
{
struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
return c->page || slub_percpu_partial(c);
return c->slab || slub_percpu_partial(c);
}
static DEFINE_MUTEX(flush_lock);
......@@ -2748,19 +2757,19 @@ static int slub_cpu_dead(unsigned int cpu)
* Check if the objects in a per cpu structure fit numa
* locality expectations.
*/
static inline int node_match(struct page *page, int node)
static inline int node_match(struct slab *slab, int node)
{
#ifdef CONFIG_NUMA
if (node != NUMA_NO_NODE && page_to_nid(page) != node)
if (node != NUMA_NO_NODE && slab_nid(slab) != node)
return 0;
#endif
return 1;
}
#ifdef CONFIG_SLUB_DEBUG
static int count_free(struct page *page)
static int count_free(struct slab *slab)
{
return page->objects - page->inuse;
return slab->objects - slab->inuse;
}
static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
......@@ -2771,15 +2780,15 @@ static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
static unsigned long count_partial(struct kmem_cache_node *n,
int (*get_count)(struct page *))
int (*get_count)(struct slab *))
{
unsigned long flags;
unsigned long x = 0;
struct page *page;
struct slab *slab;
spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(page, &n->partial, slab_list)
x += get_count(page);
list_for_each_entry(slab, &n->partial, slab_list)
x += get_count(slab);
spin_unlock_irqrestore(&n->list_lock, flags);
return x;
}
......@@ -2822,54 +2831,41 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
#endif
}
static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags)
{
if (unlikely(PageSlabPfmemalloc(page)))
if (unlikely(slab_test_pfmemalloc(slab)))
return gfp_pfmemalloc_allowed(gfpflags);
return true;
}
/*
* A variant of pfmemalloc_match() that tests page flags without asserting
* PageSlab. Intended for opportunistic checks before taking a lock and
* rechecking that nobody else freed the page under us.
*/
static inline bool pfmemalloc_match_unsafe(struct page *page, gfp_t gfpflags)
{
if (unlikely(__PageSlabPfmemalloc(page)))
return gfp_pfmemalloc_allowed(gfpflags);
return true;
}
/*
* Check the page->freelist of a page and either transfer the freelist to the
* per cpu freelist or deactivate the page.
* Check the slab->freelist and either transfer the freelist to the
* per cpu freelist or deactivate the slab.
*
* The page is still frozen if the return value is not NULL.
* The slab is still frozen if the return value is not NULL.
*
* If this function returns NULL then the page has been unfrozen.
* If this function returns NULL then the slab has been unfrozen.
*/
static inline void *get_freelist(struct kmem_cache *s, struct page *page)
static inline void *get_freelist(struct kmem_cache *s, struct slab *slab)
{
struct page new;
struct slab new;
unsigned long counters;
void *freelist;
lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock));
do {
freelist = page->freelist;
counters = page->counters;
freelist = slab->freelist;
counters = slab->counters;
new.counters = counters;
VM_BUG_ON(!new.frozen);
new.inuse = page->objects;
new.inuse = slab->objects;
new.frozen = freelist != NULL;
} while (!__cmpxchg_double_slab(s, page,
} while (!__cmpxchg_double_slab(s, slab,
freelist, counters,
NULL, new.counters,
"get_freelist"));
......@@ -2900,15 +2896,15 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
unsigned long addr, struct kmem_cache_cpu *c)
{
void *freelist;
struct page *page;
struct slab *slab;
unsigned long flags;
stat(s, ALLOC_SLOWPATH);
reread_page:
reread_slab:
page = READ_ONCE(c->page);
if (!page) {
slab = READ_ONCE(c->slab);
if (!slab) {
/*
* if the node is not online or has no normal memory, just
* ignore the node constraint
......@@ -2920,7 +2916,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
}
redo:
if (unlikely(!node_match(page, node))) {
if (unlikely(!node_match(slab, node))) {
/*
* same as above but node_match() being false already
* implies node != NUMA_NO_NODE
......@@ -2939,23 +2935,23 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
* PFMEMALLOC but right now, we are losing the pfmemalloc
* information when the page leaves the per-cpu allocator
*/
if (unlikely(!pfmemalloc_match_unsafe(page, gfpflags)))
if (unlikely(!pfmemalloc_match(slab, gfpflags)))
goto deactivate_slab;
/* must check again c->page in case we got preempted and it changed */
/* must check again c->slab in case we got preempted and it changed */
local_lock_irqsave(&s->cpu_slab->lock, flags);
if (unlikely(page != c->page)) {
if (unlikely(slab != c->slab)) {
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
goto reread_page;
goto reread_slab;
}
freelist = c->freelist;
if (freelist)
goto load_freelist;
freelist = get_freelist(s, page);
freelist = get_freelist(s, slab);
if (!freelist) {
c->page = NULL;
c->slab = NULL;
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
stat(s, DEACTIVATE_BYPASS);
goto new_slab;
......@@ -2969,10 +2965,10 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
/*
* freelist is pointing to the list of objects to be used.
* page is pointing to the page from which the objects are obtained.
* That page must be frozen for per cpu allocations to work.
* slab is pointing to the slab from which the objects are obtained.
* That slab must be frozen for per cpu allocations to work.
*/
VM_BUG_ON(!c->page->frozen);
VM_BUG_ON(!c->slab->frozen);
c->freelist = get_freepointer(s, freelist);
c->tid = next_tid(c->tid);
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
......@@ -2981,23 +2977,23 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
deactivate_slab:
local_lock_irqsave(&s->cpu_slab->lock, flags);
if (page != c->page) {
if (slab != c->slab) {
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
goto reread_page;
goto reread_slab;
}
freelist = c->freelist;
c->page = NULL;
c->slab = NULL;
c->freelist = NULL;
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
deactivate_slab(s, page, freelist);
deactivate_slab(s, slab, freelist);
new_slab:
if (slub_percpu_partial(c)) {
local_lock_irqsave(&s->cpu_slab->lock, flags);
if (unlikely(c->page)) {
if (unlikely(c->slab)) {
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
goto reread_page;
goto reread_slab;
}
if (unlikely(!slub_percpu_partial(c))) {
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
......@@ -3005,8 +3001,8 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
goto new_objects;
}
page = c->page = slub_percpu_partial(c);
slub_set_percpu_partial(c, page);
slab = c->slab = slub_percpu_partial(c);
slub_set_percpu_partial(c, slab);
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
stat(s, CPU_PARTIAL_ALLOC);
goto redo;
......@@ -3014,32 +3010,32 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
new_objects:
freelist = get_partial(s, gfpflags, node, &page);
freelist = get_partial(s, gfpflags, node, &slab);
if (freelist)
goto check_new_page;
goto check_new_slab;
slub_put_cpu_ptr(s->cpu_slab);
page = new_slab(s, gfpflags, node);
slab = new_slab(s, gfpflags, node);
c = slub_get_cpu_ptr(s->cpu_slab);
if (unlikely(!page)) {
if (unlikely(!slab)) {
slab_out_of_memory(s, gfpflags, node);
return NULL;
}
/*
* No other reference to the page yet so we can
* No other reference to the slab yet so we can
* muck around with it freely without cmpxchg
*/
freelist = page->freelist;
page->freelist = NULL;
freelist = slab->freelist;
slab->freelist = NULL;
stat(s, ALLOC_SLAB);
check_new_page:
check_new_slab:
if (kmem_cache_debug(s)) {
if (!alloc_debug_processing(s, page, freelist, addr)) {
if (!alloc_debug_processing(s, slab, freelist, addr)) {
/* Slab failed checks. Next slab needed */
goto new_slab;
} else {
......@@ -3051,39 +3047,39 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
}
}
if (unlikely(!pfmemalloc_match(page, gfpflags)))
if (unlikely(!pfmemalloc_match(slab, gfpflags)))
/*
* For !pfmemalloc_match() case we don't load freelist so that
* we don't make further mismatched allocations easier.
*/
goto return_single;
retry_load_page:
retry_load_slab:
local_lock_irqsave(&s->cpu_slab->lock, flags);
if (unlikely(c->page)) {
if (unlikely(c->slab)) {
void *flush_freelist = c->freelist;
struct page *flush_page = c->page;
struct slab *flush_slab = c->slab;
c->page = NULL;
c->slab = NULL;
c->freelist = NULL;
c->tid = next_tid(c->tid);
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
deactivate_slab(s, flush_page, flush_freelist);
deactivate_slab(s, flush_slab, flush_freelist);
stat(s, CPUSLAB_FLUSH);
goto retry_load_page;
goto retry_load_slab;
}
c->page = page;
c->slab = slab;
goto load_freelist;
return_single:
deactivate_slab(s, page, get_freepointer(s, freelist));
deactivate_slab(s, slab, get_freepointer(s, freelist));
return freelist;
}
......@@ -3140,7 +3136,7 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
{
void *object;
struct kmem_cache_cpu *c;
struct page *page;
struct slab *slab;
unsigned long tid;
struct obj_cgroup *objcg = NULL;
bool init = false;
......@@ -3172,9 +3168,9 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
/*
* Irqless object alloc/free algorithm used here depends on sequence
* of fetching cpu_slab's data. tid should be fetched before anything
* on c to guarantee that object and page associated with previous tid
* on c to guarantee that object and slab associated with previous tid
* won't be used with current tid. If we fetch tid first, object and
* page could be one associated with next tid and our alloc/free
* slab could be one associated with next tid and our alloc/free
* request will be failed. In this case, we will retry. So, no problem.
*/
barrier();
......@@ -3187,7 +3183,7 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
*/
object = c->freelist;
page = c->page;
slab = c->slab;
/*
* We cannot use the lockless fastpath on PREEMPT_RT because if a
* slowpath has taken the local_lock_irqsave(), it is not protected
......@@ -3196,7 +3192,7 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
* there is a suitable cpu freelist.
*/
if (IS_ENABLED(CONFIG_PREEMPT_RT) ||
unlikely(!object || !page || !node_match(page, node))) {
unlikely(!object || !slab || !node_match(slab, node))) {
object = __slab_alloc(s, gfpflags, node, addr, c);
} else {
void *next_object = get_freepointer_safe(s, object);
......@@ -3298,17 +3294,17 @@ EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
* have a longer lifetime than the cpu slabs in most processing loads.
*
* So we still attempt to reduce cache line usage. Just take the slab
* lock and free the item. If there is no additional partial page
* lock and free the item. If there is no additional partial slab
* handling required then we can return immediately.
*/
static void __slab_free(struct kmem_cache *s, struct page *page,
static void __slab_free(struct kmem_cache *s, struct slab *slab,
void *head, void *tail, int cnt,
unsigned long addr)
{
void *prior;
int was_frozen;
struct page new;
struct slab new;
unsigned long counters;
struct kmem_cache_node *n = NULL;
unsigned long flags;
......@@ -3319,7 +3315,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
return;
if (kmem_cache_debug(s) &&
!free_debug_processing(s, page, head, tail, cnt, addr))
!free_debug_processing(s, slab, head, tail, cnt, addr))
return;
do {
......@@ -3327,8 +3323,8 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
spin_unlock_irqrestore(&n->list_lock, flags);
n = NULL;
}
prior = page->freelist;
counters = page->counters;
prior = slab->freelist;
counters = slab->counters;
set_freepointer(s, tail, prior);
new.counters = counters;
was_frozen = new.frozen;
......@@ -3347,7 +3343,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
} else { /* Needs to be taken off a list */
n = get_node(s, page_to_nid(page));
n = get_node(s, slab_nid(slab));
/*
* Speculatively acquire the list_lock.
* If the cmpxchg does not succeed then we may
......@@ -3361,7 +3357,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
}
}
} while (!cmpxchg_double_slab(s, page,
} while (!cmpxchg_double_slab(s, slab,
prior, counters,
head, new.counters,
"__slab_free"));
......@@ -3376,10 +3372,10 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
stat(s, FREE_FROZEN);
} else if (new.frozen) {
/*
* If we just froze the page then put it onto the
* If we just froze the slab then put it onto the
* per cpu partial list.
*/
put_cpu_partial(s, page, 1);
put_cpu_partial(s, slab, 1);
stat(s, CPU_PARTIAL_FREE);
}
......@@ -3394,8 +3390,8 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
* then add it.
*/
if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
remove_full(s, n, page);
add_partial(n, page, DEACTIVATE_TO_TAIL);
remove_full(s, n, slab);
add_partial(n, slab, DEACTIVATE_TO_TAIL);
stat(s, FREE_ADD_PARTIAL);
}
spin_unlock_irqrestore(&n->list_lock, flags);
......@@ -3406,16 +3402,16 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
/*
* Slab on the partial list.
*/
remove_partial(n, page);
remove_partial(n, slab);
stat(s, FREE_REMOVE_PARTIAL);
} else {
/* Slab must be on the full list */
remove_full(s, n, page);
remove_full(s, n, slab);
}
spin_unlock_irqrestore(&n->list_lock, flags);
stat(s, FREE_SLAB);
discard_slab(s, page);
discard_slab(s, slab);
}
/*
......@@ -3430,11 +3426,11 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
* with all sorts of special processing.
*
* Bulk free of a freelist with several objects (all pointing to the
* same page) possible by specifying head and tail ptr, plus objects
* same slab) possible by specifying head and tail ptr, plus objects
* count (cnt). Bulk free indicated by tail pointer being set.
*/
static __always_inline void do_slab_free(struct kmem_cache *s,
struct page *page, void *head, void *tail,
struct slab *slab, void *head, void *tail,
int cnt, unsigned long addr)
{
void *tail_obj = tail ? : head;
......@@ -3457,7 +3453,7 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
/* Same with comment on barrier() in slab_alloc_node() */
barrier();
if (likely(page == c->page)) {
if (likely(slab == c->slab)) {
#ifndef CONFIG_PREEMPT_RT
void **freelist = READ_ONCE(c->freelist);
......@@ -3483,7 +3479,7 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
local_lock(&s->cpu_slab->lock);
c = this_cpu_ptr(s->cpu_slab);
if (unlikely(page != c->page)) {
if (unlikely(slab != c->slab)) {
local_unlock(&s->cpu_slab->lock);
goto redo;
}
......@@ -3498,11 +3494,11 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
#endif
stat(s, FREE_FASTPATH);
} else
__slab_free(s, page, head, tail_obj, cnt, addr);
__slab_free(s, slab, head, tail_obj, cnt, addr);
}
static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
static __always_inline void slab_free(struct kmem_cache *s, struct slab *slab,
void *head, void *tail, int cnt,
unsigned long addr)
{
......@@ -3511,13 +3507,13 @@ static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
* to remove objects, whose reuse must be delayed.
*/
if (slab_free_freelist_hook(s, &head, &tail, &cnt))
do_slab_free(s, page, head, tail, cnt, addr);
do_slab_free(s, slab, head, tail, cnt, addr);
}
#ifdef CONFIG_KASAN_GENERIC
void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
{
do_slab_free(cache, virt_to_head_page(x), x, NULL, 1, addr);
do_slab_free(cache, virt_to_slab(x), x, NULL, 1, addr);
}
#endif
......@@ -3527,35 +3523,36 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
if (!s)
return;
trace_kmem_cache_free(_RET_IP_, x, s->name);
slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);
slab_free(s, virt_to_slab(x), x, NULL, 1, _RET_IP_);
}
EXPORT_SYMBOL(kmem_cache_free);
struct detached_freelist {
struct page *page;
struct slab *slab;
void *tail;
void *freelist;
int cnt;
struct kmem_cache *s;
};
static inline void free_nonslab_page(struct page *page, void *object)
static inline void free_large_kmalloc(struct folio *folio, void *object)
{
unsigned int order = compound_order(page);
unsigned int order = folio_order(folio);
if (WARN_ON_ONCE(!PageCompound(page)))
if (WARN_ON_ONCE(order == 0))
pr_warn_once("object pointer: 0x%p\n", object);
kfree_hook(object);
mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, -(PAGE_SIZE << order));
__free_pages(page, order);
mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
-(PAGE_SIZE << order));
__free_pages(folio_page(folio, 0), order);
}
/*
* This function progressively scans the array with free objects (with
* a limited look ahead) and extract objects belonging to the same
* page. It builds a detached freelist directly within the given
* page/objects. This can happen without any need for
* slab. It builds a detached freelist directly within the given
* slab/objects. This can happen without any need for
* synchronization, because the objects are owned by running process.
* The freelist is build up as a single linked list in the objects.
* The idea is, that this detached freelist can then be bulk
......@@ -3570,10 +3567,11 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
size_t first_skipped_index = 0;
int lookahead = 3;
void *object;
struct page *page;
struct folio *folio;
struct slab *slab;
/* Always re-init detached_freelist */
df->page = NULL;
df->slab = NULL;
do {
object = p[--size];
......@@ -3583,17 +3581,19 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
if (!object)
return 0;
page = virt_to_head_page(object);
folio = virt_to_folio(object);
if (!s) {
/* Handle kalloc'ed objects */
if (unlikely(!PageSlab(page))) {
free_nonslab_page(page, object);
if (unlikely(!folio_test_slab(folio))) {
free_large_kmalloc(folio, object);
p[size] = NULL; /* mark object processed */
return size;
}
/* Derive kmem_cache from object */
df->s = page->slab_cache;
slab = folio_slab(folio);
df->s = slab->slab_cache;
} else {
slab = folio_slab(folio);
df->s = cache_from_obj(s, object); /* Support for memcg */
}
......@@ -3605,7 +3605,7 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
}
/* Start new detached freelist */
df->page = page;
df->slab = slab;
set_freepointer(df->s, object, NULL);
df->tail = object;
df->freelist = object;
......@@ -3617,8 +3617,8 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
if (!object)
continue; /* Skip processed objects */
/* df->page is always set at this point */
if (df->page == virt_to_head_page(object)) {
/* df->slab is always set at this point */
if (df->slab == virt_to_slab(object)) {
/* Opportunity build freelist */
set_freepointer(df->s, object, df->freelist);
df->freelist = object;
......@@ -3650,10 +3650,10 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
struct detached_freelist df;
size = build_detached_freelist(s, size, p, &df);
if (!df.page)
if (!df.slab)
continue;
slab_free(df.s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_);
slab_free(df.s, df.slab, df.freelist, df.tail, df.cnt, _RET_IP_);
} while (likely(size));
}
EXPORT_SYMBOL(kmem_cache_free_bulk);
......@@ -3787,7 +3787,7 @@ static unsigned int slub_min_objects;
* requested a higher minimum order then we start with that one instead of
* the smallest order which will fit the object.
*/
static inline unsigned int slab_order(unsigned int size,
static inline unsigned int calc_slab_order(unsigned int size,
unsigned int min_objects, unsigned int max_order,
unsigned int fract_leftover)
{
......@@ -3851,7 +3851,7 @@ static inline int calculate_order(unsigned int size)
fraction = 16;
while (fraction >= 4) {
order = slab_order(size, min_objects,
order = calc_slab_order(size, min_objects,
slub_max_order, fraction);
if (order <= slub_max_order)
return order;
......@@ -3864,14 +3864,14 @@ static inline int calculate_order(unsigned int size)
* We were unable to place multiple objects in a slab. Now
* lets see if we can place a single object there.
*/
order = slab_order(size, 1, slub_max_order, 1);
order = calc_slab_order(size, 1, slub_max_order, 1);
if (order <= slub_max_order)
return order;
/*
* Doh this slab cannot be placed using slub_max_order.
*/
order = slab_order(size, 1, MAX_ORDER, 1);
order = calc_slab_order(size, 1, MAX_ORDER, 1);
if (order < MAX_ORDER)
return order;
return -ENOSYS;
......@@ -3923,38 +3923,38 @@ static struct kmem_cache *kmem_cache_node;
*/
static void early_kmem_cache_node_alloc(int node)
{
struct page *page;
struct slab *slab;
struct kmem_cache_node *n;
BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
slab = new_slab(kmem_cache_node, GFP_NOWAIT, node);
BUG_ON(!page);
if (page_to_nid(page) != node) {
BUG_ON(!slab);
if (slab_nid(slab) != node) {
pr_err("SLUB: Unable to allocate memory from node %d\n", node);
pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n");
}
n = page->freelist;
n = slab->freelist;
BUG_ON(!n);
#ifdef CONFIG_SLUB_DEBUG
init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
init_tracking(kmem_cache_node, n);
#endif
n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL, false);
page->freelist = get_freepointer(kmem_cache_node, n);
page->inuse = 1;
page->frozen = 0;
slab->freelist = get_freepointer(kmem_cache_node, n);
slab->inuse = 1;
slab->frozen = 0;
kmem_cache_node->node[node] = n;
init_kmem_cache_node(n);
inc_slabs_node(kmem_cache_node, node, page->objects);
inc_slabs_node(kmem_cache_node, node, slab->objects);
/*
* No locks need to be taken here as it has just been
* initialized and there is no concurrent access.
*/
__add_partial(n, page, DEACTIVATE_TO_HEAD);
__add_partial(n, slab, DEACTIVATE_TO_HEAD);
}
static void free_kmem_cache_nodes(struct kmem_cache *s)
......@@ -4212,7 +4212,7 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
#endif
/*
* The larger the object size is, the more pages we want on the partial
* The larger the object size is, the more slabs we want on the partial
* list to avoid pounding the page allocator excessively.
*/
set_min_partial(s, ilog2(s->size) / 2);
......@@ -4240,20 +4240,20 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
return -EINVAL;
}
static void list_slab_objects(struct kmem_cache *s, struct page *page,
static void list_slab_objects(struct kmem_cache *s, struct slab *slab,
const char *text)
{
#ifdef CONFIG_SLUB_DEBUG
void *addr = page_address(page);
void *addr = slab_address(slab);
unsigned long flags;
unsigned long *map;
void *p;
slab_err(s, page, text, s->name);
slab_lock(page, &flags);
slab_err(s, slab, text, s->name);
slab_lock(slab, &flags);
map = get_map(s, page);
for_each_object(p, s, addr, page->objects) {
map = get_map(s, slab);
for_each_object(p, s, addr, slab->objects) {
if (!test_bit(__obj_to_index(s, addr, p), map)) {
pr_err("Object 0x%p @offset=%tu\n", p, p - addr);
......@@ -4261,7 +4261,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
}
}
put_map(map);
slab_unlock(page, &flags);
slab_unlock(slab, &flags);
#endif
}
......@@ -4273,23 +4273,23 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
{
LIST_HEAD(discard);
struct page *page, *h;
struct slab *slab, *h;
BUG_ON(irqs_disabled());
spin_lock_irq(&n->list_lock);
list_for_each_entry_safe(page, h, &n->partial, slab_list) {
if (!page->inuse) {
remove_partial(n, page);
list_add(&page->slab_list, &discard);
list_for_each_entry_safe(slab, h, &n->partial, slab_list) {
if (!slab->inuse) {
remove_partial(n, slab);
list_add(&slab->slab_list, &discard);
} else {
list_slab_objects(s, page,
list_slab_objects(s, slab,
"Objects remaining in %s on __kmem_cache_shutdown()");
}
}
spin_unlock_irq(&n->list_lock);
list_for_each_entry_safe(page, h, &discard, slab_list)
discard_slab(s, page);
list_for_each_entry_safe(slab, h, &discard, slab_list)
discard_slab(s, slab);
}
bool __kmem_cache_empty(struct kmem_cache *s)
......@@ -4322,31 +4322,32 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
}
#ifdef CONFIG_PRINTK
void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
{
void *base;
int __maybe_unused i;
unsigned int objnr;
void *objp;
void *objp0;
struct kmem_cache *s = page->slab_cache;
struct kmem_cache *s = slab->slab_cache;
struct track __maybe_unused *trackp;
kpp->kp_ptr = object;
kpp->kp_page = page;
kpp->kp_slab = slab;
kpp->kp_slab_cache = s;
base = page_address(page);
base = slab_address(slab);
objp0 = kasan_reset_tag(object);
#ifdef CONFIG_SLUB_DEBUG
objp = restore_red_left(s, objp0);
#else
objp = objp0;
#endif
objnr = obj_to_index(s, page, objp);
objnr = obj_to_index(s, slab, objp);
kpp->kp_data_offset = (unsigned long)((char *)objp0 - (char *)objp);
objp = base + s->size * objnr;
kpp->kp_objp = objp;
if (WARN_ON_ONCE(objp < base || objp >= base + page->objects * s->size || (objp - base) % s->size) ||
if (WARN_ON_ONCE(objp < base || objp >= base + slab->objects * s->size
|| (objp - base) % s->size) ||
!(s->flags & SLAB_STORE_USER))
return;
#ifdef CONFIG_SLUB_DEBUG
......@@ -4484,8 +4485,8 @@ EXPORT_SYMBOL(__kmalloc_node);
* Returns NULL if check passes, otherwise const char * to name of cache
* to indicate an error.
*/
void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
bool to_user)
void __check_heap_object(const void *ptr, unsigned long n,
const struct slab *slab, bool to_user)
{
struct kmem_cache *s;
unsigned int offset;
......@@ -4494,10 +4495,10 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
ptr = kasan_reset_tag(ptr);
/* Find object and usable object size. */
s = page->slab_cache;
s = slab->slab_cache;
/* Reject impossible pointers. */
if (ptr < page_address(page))
if (ptr < slab_address(slab))
usercopy_abort("SLUB object not in SLUB page?!", NULL,
to_user, 0, n);
......@@ -4505,7 +4506,7 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
if (is_kfence)
offset = ptr - kfence_object_start(ptr);
else
offset = (ptr - page_address(page)) % s->size;
offset = (ptr - slab_address(slab)) % s->size;
/* Adjust for redzone and reject if within the redzone. */
if (!is_kfence && kmem_cache_debug_flags(s, SLAB_RED_ZONE)) {
......@@ -4527,25 +4528,24 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
size_t __ksize(const void *object)
{
struct page *page;
struct folio *folio;
if (unlikely(object == ZERO_SIZE_PTR))
return 0;
page = virt_to_head_page(object);
folio = virt_to_folio(object);
if (unlikely(!PageSlab(page))) {
WARN_ON(!PageCompound(page));
return page_size(page);
}
if (unlikely(!folio_test_slab(folio)))
return folio_size(folio);
return slab_ksize(page->slab_cache);
return slab_ksize(folio_slab(folio)->slab_cache);
}
EXPORT_SYMBOL(__ksize);
void kfree(const void *x)
{
struct page *page;
struct folio *folio;
struct slab *slab;
void *object = (void *)x;
trace_kfree(_RET_IP_, x);
......@@ -4553,12 +4553,13 @@ void kfree(const void *x)
if (unlikely(ZERO_OR_NULL_PTR(x)))
return;
page = virt_to_head_page(x);
if (unlikely(!PageSlab(page))) {
free_nonslab_page(page, object);
folio = virt_to_folio(x);
if (unlikely(!folio_test_slab(folio))) {
free_large_kmalloc(folio, object);
return;
}
slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
slab = folio_slab(folio);
slab_free(slab->slab_cache, slab, object, NULL, 1, _RET_IP_);
}
EXPORT_SYMBOL(kfree);
......@@ -4578,8 +4579,8 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s)
int node;
int i;
struct kmem_cache_node *n;
struct page *page;
struct page *t;
struct slab *slab;
struct slab *t;
struct list_head discard;
struct list_head promote[SHRINK_PROMOTE_MAX];
unsigned long flags;
......@@ -4596,22 +4597,22 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s)
* Build lists of slabs to discard or promote.
*
* Note that concurrent frees may occur while we hold the
* list_lock. page->inuse here is the upper limit.
* list_lock. slab->inuse here is the upper limit.
*/
list_for_each_entry_safe(page, t, &n->partial, slab_list) {
int free = page->objects - page->inuse;
list_for_each_entry_safe(slab, t, &n->partial, slab_list) {
int free = slab->objects - slab->inuse;
/* Do not reread page->inuse */
/* Do not reread slab->inuse */
barrier();
/* We do not keep full slabs on the list */
BUG_ON(free <= 0);
if (free == page->objects) {
list_move(&page->slab_list, &discard);
if (free == slab->objects) {
list_move(&slab->slab_list, &discard);
n->nr_partial--;
} else if (free <= SHRINK_PROMOTE_MAX)
list_move(&page->slab_list, promote + free - 1);
list_move(&slab->slab_list, promote + free - 1);
}
/*
......@@ -4624,8 +4625,8 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s)
spin_unlock_irqrestore(&n->list_lock, flags);
/* Release empty slabs */
list_for_each_entry_safe(page, t, &discard, slab_list)
discard_slab(s, page);
list_for_each_entry_safe(slab, t, &discard, slab_list)
discard_slab(s, slab);
if (slabs_node(s, node))
ret = 1;
......@@ -4786,7 +4787,7 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
*/
__flush_cpu_slab(s, smp_processor_id());
for_each_kmem_cache_node(s, node, n) {
struct page *p;
struct slab *p;
list_for_each_entry(p, &n->partial, slab_list)
p->slab_cache = s;
......@@ -4964,54 +4965,54 @@ EXPORT_SYMBOL(__kmalloc_node_track_caller);
#endif
#ifdef CONFIG_SYSFS
static int count_inuse(struct page *page)
static int count_inuse(struct slab *slab)
{
return page->inuse;
return slab->inuse;
}
static int count_total(struct page *page)
static int count_total(struct slab *slab)
{
return page->objects;
return slab->objects;
}
#endif
#ifdef CONFIG_SLUB_DEBUG
static void validate_slab(struct kmem_cache *s, struct page *page,
static void validate_slab(struct kmem_cache *s, struct slab *slab,
unsigned long *obj_map)
{
void *p;
void *addr = page_address(page);
void *addr = slab_address(slab);
unsigned long flags;
slab_lock(page, &flags);
slab_lock(slab, &flags);
if (!check_slab(s, page) || !on_freelist(s, page, NULL))
if (!check_slab(s, slab) || !on_freelist(s, slab, NULL))
goto unlock;
/* Now we know that a valid freelist exists */
__fill_map(obj_map, s, page);
for_each_object(p, s, addr, page->objects) {
__fill_map(obj_map, s, slab);
for_each_object(p, s, addr, slab->objects) {
u8 val = test_bit(__obj_to_index(s, addr, p), obj_map) ?
SLUB_RED_INACTIVE : SLUB_RED_ACTIVE;
if (!check_object(s, page, p, val))
if (!check_object(s, slab, p, val))
break;
}
unlock:
slab_unlock(page, &flags);
slab_unlock(slab, &flags);
}
static int validate_slab_node(struct kmem_cache *s,
struct kmem_cache_node *n, unsigned long *obj_map)
{
unsigned long count = 0;
struct page *page;
struct slab *slab;
unsigned long flags;
spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(page, &n->partial, slab_list) {
validate_slab(s, page, obj_map);
list_for_each_entry(slab, &n->partial, slab_list) {
validate_slab(s, slab, obj_map);
count++;
}
if (count != n->nr_partial) {
......@@ -5023,8 +5024,8 @@ static int validate_slab_node(struct kmem_cache *s,
if (!(s->flags & SLAB_STORE_USER))
goto out;
list_for_each_entry(page, &n->full, slab_list) {
validate_slab(s, page, obj_map);
list_for_each_entry(slab, &n->full, slab_list) {
validate_slab(s, slab, obj_map);
count++;
}
if (count != atomic_long_read(&n->nr_slabs)) {
......@@ -5190,15 +5191,15 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
}
static void process_slab(struct loc_track *t, struct kmem_cache *s,
struct page *page, enum track_item alloc,
struct slab *slab, enum track_item alloc,
unsigned long *obj_map)
{
void *addr = page_address(page);
void *addr = slab_address(slab);
void *p;
__fill_map(obj_map, s, page);
__fill_map(obj_map, s, slab);
for_each_object(p, s, addr, page->objects)
for_each_object(p, s, addr, slab->objects)
if (!test_bit(__obj_to_index(s, addr, p), obj_map))
add_location(t, s, get_track(s, p, alloc));
}
......@@ -5240,35 +5241,37 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab,
cpu);
int node;
struct page *page;
struct slab *slab;
page = READ_ONCE(c->page);
if (!page)
slab = READ_ONCE(c->slab);
if (!slab)
continue;
node = page_to_nid(page);
node = slab_nid(slab);
if (flags & SO_TOTAL)
x = page->objects;
x = slab->objects;
else if (flags & SO_OBJECTS)
x = page->inuse;
x = slab->inuse;
else
x = 1;
total += x;
nodes[node] += x;
page = slub_percpu_partial_read_once(c);
if (page) {
node = page_to_nid(page);
#ifdef CONFIG_SLUB_CPU_PARTIAL
slab = slub_percpu_partial_read_once(c);
if (slab) {
node = slab_nid(slab);
if (flags & SO_TOTAL)
WARN_ON_ONCE(1);
else if (flags & SO_OBJECTS)
WARN_ON_ONCE(1);
else
x = page->pages;
x = slab->slabs;
total += x;
nodes[node] += x;
}
#endif
}
}
......@@ -5467,33 +5470,35 @@ SLAB_ATTR_RO(objects_partial);
static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
{
int objects = 0;
int pages = 0;
int cpu;
int slabs = 0;
int cpu __maybe_unused;
int len = 0;
#ifdef CONFIG_SLUB_CPU_PARTIAL
for_each_online_cpu(cpu) {
struct page *page;
struct slab *slab;
page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
if (page)
pages += page->pages;
if (slab)
slabs += slab->slabs;
}
#endif
/* Approximate half-full pages , see slub_set_cpu_partial() */
objects = (pages * oo_objects(s->oo)) / 2;
len += sysfs_emit_at(buf, len, "%d(%d)", objects, pages);
/* Approximate half-full slabs, see slub_set_cpu_partial() */
objects = (slabs * oo_objects(s->oo)) / 2;
len += sysfs_emit_at(buf, len, "%d(%d)", objects, slabs);
#ifdef CONFIG_SMP
#if defined(CONFIG_SLUB_CPU_PARTIAL) && defined(CONFIG_SMP)
for_each_online_cpu(cpu) {
struct page *page;
struct slab *slab;
page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
if (page) {
pages = READ_ONCE(page->pages);
objects = (pages * oo_objects(s->oo)) / 2;
slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
if (slab) {
slabs = READ_ONCE(slab->slabs);
objects = (slabs * oo_objects(s->oo)) / 2;
len += sysfs_emit_at(buf, len, " C%d=%d(%d)",
cpu, objects, pages);
cpu, objects, slabs);
}
}
#endif
......@@ -6161,16 +6166,16 @@ static int slab_debug_trace_open(struct inode *inode, struct file *filep)
for_each_kmem_cache_node(s, node, n) {
unsigned long flags;
struct page *page;
struct slab *slab;
if (!atomic_long_read(&n->nr_slabs))
continue;
spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(page, &n->partial, slab_list)
process_slab(t, s, page, alloc, obj_map);
list_for_each_entry(page, &n->full, slab_list)
process_slab(t, s, page, alloc, obj_map);
list_for_each_entry(slab, &n->partial, slab_list)
process_slab(t, s, slab, alloc, obj_map);
list_for_each_entry(slab, &n->full, slab_list)
process_slab(t, s, slab, alloc, obj_map);
spin_unlock_irqrestore(&n->list_lock, flags);
}
......
......@@ -722,7 +722,7 @@ static void free_map_bootmem(struct page *memmap)
>> PAGE_SHIFT;
for (i = 0; i < nr_pages; i++, page++) {
magic = (unsigned long) page->freelist;
magic = page->index;
BUG_ON(magic == NODE_INFO);
......
......@@ -20,6 +20,7 @@
#include <linux/atomic.h>
#include <linux/jump_label.h>
#include <asm/sections.h>
#include "slab.h"
/*
* Checks if a given pointer and length is contained by the current
......@@ -223,7 +224,7 @@ static inline void check_page_span(const void *ptr, unsigned long n,
static inline void check_heap_object(const void *ptr, unsigned long n,
bool to_user)
{
struct page *page;
struct folio *folio;
if (!virt_addr_valid(ptr))
return;
......@@ -231,16 +232,16 @@ static inline void check_heap_object(const void *ptr, unsigned long n,
/*
* When CONFIG_HIGHMEM=y, kmap_to_page() will give either the
* highmem page or fallback to virt_to_page(). The following
* is effectively a highmem-aware virt_to_head_page().
* is effectively a highmem-aware virt_to_slab().
*/
page = compound_head(kmap_to_page((void *)ptr));
folio = page_folio(kmap_to_page((void *)ptr));
if (PageSlab(page)) {
if (folio_test_slab(folio)) {
/* Check slab allocator for flags and size. */
__check_heap_object(ptr, n, page, to_user);
__check_heap_object(ptr, n, folio_slab(folio), to_user);
} else {
/* Verify object does not incorrectly span multiple pages. */
check_page_span(ptr, n, page, to_user);
check_page_span(ptr, n, folio_page(folio, 0), to_user);
}
}
......
......@@ -17,10 +17,10 @@
*
* Usage of struct page fields:
* page->private: points to zspage
* page->freelist(index): links together all component pages of a zspage
* page->index: links together all component pages of a zspage
* For the huge page, this is always 0, so we use this field
* to store handle.
* page->units: first object offset in a subpage of zspage
* page->page_type: first object offset in a subpage of zspage
*
* Usage of struct page flags:
* PG_private: identifies the first component page
......@@ -489,12 +489,12 @@ static inline struct page *get_first_page(struct zspage *zspage)
static inline int get_first_obj_offset(struct page *page)
{
return page->units;
return page->page_type;
}
static inline void set_first_obj_offset(struct page *page, int offset)
{
page->units = offset;
page->page_type = offset;
}
static inline unsigned int get_freeobj(struct zspage *zspage)
......@@ -827,7 +827,7 @@ static struct page *get_next_page(struct page *page)
if (unlikely(PageHugeObject(page)))
return NULL;
return page->freelist;
return (struct page *)page->index;
}
/**
......@@ -901,7 +901,7 @@ static void reset_page(struct page *page)
set_page_private(page, 0);
page_mapcount_reset(page);
ClearPageHugeObject(page);
page->freelist = NULL;
page->index = 0;
}
static int trylock_zspage(struct zspage *zspage)
......@@ -1027,7 +1027,7 @@ static void create_page_chain(struct size_class *class, struct zspage *zspage,
/*
* Allocate individual pages and link them together as:
* 1. all pages are linked together using page->freelist
* 1. all pages are linked together using page->index
* 2. each sub-page point to zspage using page->private
*
* we set PG_private to identify the first page (i.e. no other sub-page
......@@ -1036,7 +1036,7 @@ static void create_page_chain(struct size_class *class, struct zspage *zspage,
for (i = 0; i < nr_pages; i++) {
page = pages[i];
set_page_private(page, (unsigned long)zspage);
page->freelist = NULL;
page->index = 0;
if (i == 0) {
zspage->first_page = page;
SetPagePrivate(page);
......@@ -1044,7 +1044,7 @@ static void create_page_chain(struct size_class *class, struct zspage *zspage,
class->pages_per_zspage == 1))
SetPageHugeObject(page);
} else {
prev_page->freelist = page;
prev_page->index = (unsigned long)page;
}
prev_page = page;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment