Commit 436381ea authored by Vlastimil Babka's avatar Vlastimil Babka

Merge branch 'slab/for-6.11/buckets' into slab/for-next

Merge all the slab patches previously collected on top of v6.10-rc1,
over cleanups/fixes that had to be based on rc6.
parents a52c6330 d73778e4
......@@ -145,7 +145,9 @@ smaller than page size.
The address of a chunk allocated with `kmalloc` is aligned to at least
ARCH_KMALLOC_MINALIGN bytes. For sizes which are a power of two, the
alignment is also guaranteed to be at least the respective size.
alignment is also guaranteed to be at least the respective size. For other
sizes, the alignment is guaranteed to be at least the largest power-of-two
divisor of the size.
Chunks allocated with kmalloc() can be resized with krealloc(). Similarly
to kmalloc_array(): a helper for resizing arrays is provided in the form of
......
......@@ -1110,7 +1110,7 @@ static inline unsigned int compound_order(struct page *page)
*
* Return: The order of the folio.
*/
static inline unsigned int folio_order(struct folio *folio)
static inline unsigned int folio_order(const struct folio *folio)
{
if (!folio_test_large(folio))
return 0;
......@@ -2150,7 +2150,7 @@ static inline struct folio *folio_next(struct folio *folio)
* it from being split. It is not necessary for the folio to be locked.
* Return: The base-2 logarithm of the size of this folio.
*/
static inline unsigned int folio_shift(struct folio *folio)
static inline unsigned int folio_shift(const struct folio *folio)
{
return PAGE_SHIFT + folio_order(folio);
}
......@@ -2163,7 +2163,7 @@ static inline unsigned int folio_shift(struct folio *folio)
* it from being split. It is not necessary for the folio to be locked.
* Return: The number of bytes in this folio.
*/
static inline size_t folio_size(struct folio *folio)
static inline size_t folio_size(const struct folio *folio)
{
return PAGE_SIZE << folio_order(folio);
}
......
......@@ -38,11 +38,8 @@
* Magic nums for obj red zoning.
* Placed in the first word before and the first word after an obj.
*/
#define RED_INACTIVE 0x09F911029D74E35BULL /* when obj is inactive */
#define RED_ACTIVE 0xD84156C5635688C0ULL /* when obj is active */
#define SLUB_RED_INACTIVE 0xbb
#define SLUB_RED_ACTIVE 0xcc
#define SLUB_RED_INACTIVE 0xbb /* when obj is inactive */
#define SLUB_RED_ACTIVE 0xcc /* when obj is active */
/* ...and for poisoning */
#define POISON_INUSE 0x5a /* for use-uninitialised poisoning */
......
......@@ -426,8 +426,9 @@ enum kmalloc_cache_type {
NR_KMALLOC_TYPES
};
extern struct kmem_cache *
kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1];
typedef struct kmem_cache * kmem_buckets[KMALLOC_SHIFT_HIGH + 1];
extern kmem_buckets kmalloc_caches[NR_KMALLOC_TYPES];
/*
* Define gfp bits that should not be set for KMALLOC_NORMAL.
......@@ -528,9 +529,6 @@ static_assert(PAGE_SHIFT <= 20);
#include <linux/alloc_tag.h>
void *__kmalloc_noprof(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1);
#define __kmalloc(...) alloc_hooks(__kmalloc_noprof(__VA_ARGS__))
/**
* kmem_cache_alloc - Allocate an object
* @cachep: The cache to allocate from.
......@@ -551,6 +549,10 @@ void *kmem_cache_alloc_lru_noprof(struct kmem_cache *s, struct list_lru *lru,
void kmem_cache_free(struct kmem_cache *s, void *objp);
kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags,
unsigned int useroffset, unsigned int usersize,
void (*ctor)(void *));
/*
* Bulk allocation and freeing operations. These are accelerated in an
* allocator specific way to avoid taking locks repeatedly or building
......@@ -568,31 +570,49 @@ static __always_inline void kfree_bulk(size_t size, void **p)
kmem_cache_free_bulk(NULL, size, p);
}
void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment
__alloc_size(1);
#define __kmalloc_node(...) alloc_hooks(__kmalloc_node_noprof(__VA_ARGS__))
void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags,
int node) __assume_slab_alignment __malloc;
#define kmem_cache_alloc_node(...) alloc_hooks(kmem_cache_alloc_node_noprof(__VA_ARGS__))
void *kmalloc_trace_noprof(struct kmem_cache *s, gfp_t flags, size_t size)
__assume_kmalloc_alignment __alloc_size(3);
/*
* These macros allow declaring a kmem_buckets * parameter alongside size, which
* can be compiled out with CONFIG_SLAB_BUCKETS=n so that a large number of call
* sites don't have to pass NULL.
*/
#ifdef CONFIG_SLAB_BUCKETS
#define DECL_BUCKET_PARAMS(_size, _b) size_t (_size), kmem_buckets *(_b)
#define PASS_BUCKET_PARAMS(_size, _b) (_size), (_b)
#define PASS_BUCKET_PARAM(_b) (_b)
#else
#define DECL_BUCKET_PARAMS(_size, _b) size_t (_size)
#define PASS_BUCKET_PARAMS(_size, _b) (_size)
#define PASS_BUCKET_PARAM(_b) NULL
#endif
void *kmalloc_node_trace_noprof(struct kmem_cache *s, gfp_t gfpflags,
int node, size_t size) __assume_kmalloc_alignment
__alloc_size(4);
#define kmalloc_trace(...) alloc_hooks(kmalloc_trace_noprof(__VA_ARGS__))
/*
* The following functions are not to be used directly and are intended only
* for internal use from kmalloc() and kmalloc_node()
* with the exception of kunit tests
*/
#define kmalloc_node_trace(...) alloc_hooks(kmalloc_node_trace_noprof(__VA_ARGS__))
void *__kmalloc_noprof(size_t size, gfp_t flags)
__assume_kmalloc_alignment __alloc_size(1);
void *kmalloc_large_noprof(size_t size, gfp_t flags) __assume_page_alignment
__alloc_size(1);
#define kmalloc_large(...) alloc_hooks(kmalloc_large_noprof(__VA_ARGS__))
void *__kmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node)
__assume_kmalloc_alignment __alloc_size(1);
void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t flags, size_t size)
__assume_kmalloc_alignment __alloc_size(3);
void *kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) __assume_page_alignment
__alloc_size(1);
#define kmalloc_large_node(...) alloc_hooks(kmalloc_large_node_noprof(__VA_ARGS__))
void *__kmalloc_cache_node_noprof(struct kmem_cache *s, gfp_t gfpflags,
int node, size_t size)
__assume_kmalloc_alignment __alloc_size(4);
void *__kmalloc_large_noprof(size_t size, gfp_t flags)
__assume_page_alignment __alloc_size(1);
void *__kmalloc_large_node_noprof(size_t size, gfp_t flags, int node)
__assume_page_alignment __alloc_size(1);
/**
* kmalloc - allocate kernel memory
......@@ -604,7 +624,8 @@ void *kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) __assume_pag
*
* The allocated object address is aligned to at least ARCH_KMALLOC_MINALIGN
* bytes. For @size of power of two bytes, the alignment is also guaranteed
* to be at least to the size.
* to be at least to the size. For other sizes, the alignment is guaranteed to
* be at least the largest power-of-two divisor of @size.
*
* The @flags argument may be one of the GFP flags defined at
* include/linux/gfp_types.h and described at
......@@ -654,10 +675,10 @@ static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t f
unsigned int index;
if (size > KMALLOC_MAX_CACHE_SIZE)
return kmalloc_large_noprof(size, flags);
return __kmalloc_large_noprof(size, flags);
index = kmalloc_index(size);
return kmalloc_trace_noprof(
return __kmalloc_cache_noprof(
kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index],
flags, size);
}
......@@ -665,20 +686,26 @@ static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t f
}
#define kmalloc(...) alloc_hooks(kmalloc_noprof(__VA_ARGS__))
#define kmem_buckets_alloc(_b, _size, _flags) \
alloc_hooks(__kmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE))
#define kmem_buckets_alloc_track_caller(_b, _size, _flags) \
alloc_hooks(__kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE, _RET_IP_))
static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gfp_t flags, int node)
{
if (__builtin_constant_p(size) && size) {
unsigned int index;
if (size > KMALLOC_MAX_CACHE_SIZE)
return kmalloc_large_node_noprof(size, flags, node);
return __kmalloc_large_node_noprof(size, flags, node);
index = kmalloc_index(size);
return kmalloc_node_trace_noprof(
return __kmalloc_cache_node_noprof(
kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index],
flags, node, size);
}
return __kmalloc_node_noprof(size, flags, node);
return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node);
}
#define kmalloc_node(...) alloc_hooks(kmalloc_node_noprof(__VA_ARGS__))
......@@ -729,8 +756,10 @@ static inline __realloc_size(2, 3) void * __must_check krealloc_array_noprof(voi
*/
#define kcalloc(n, size, flags) kmalloc_array(n, size, (flags) | __GFP_ZERO)
void *kmalloc_node_track_caller_noprof(size_t size, gfp_t flags, int node,
void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node,
unsigned long caller) __alloc_size(1);
#define kmalloc_node_track_caller_noprof(size, flags, node, caller) \
__kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node, caller)
#define kmalloc_node_track_caller(...) \
alloc_hooks(kmalloc_node_track_caller_noprof(__VA_ARGS__, _RET_IP_))
......@@ -756,7 +785,7 @@ static inline __alloc_size(1, 2) void *kmalloc_array_node_noprof(size_t n, size_
return NULL;
if (__builtin_constant_p(n) && __builtin_constant_p(size))
return kmalloc_node_noprof(bytes, flags, node);
return __kmalloc_node_noprof(bytes, flags, node);
return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(bytes, NULL), flags, node);
}
#define kmalloc_array_node(...) alloc_hooks(kmalloc_array_node_noprof(__VA_ARGS__))
......@@ -780,7 +809,9 @@ static inline __alloc_size(1) void *kzalloc_noprof(size_t size, gfp_t flags)
#define kzalloc(...) alloc_hooks(kzalloc_noprof(__VA_ARGS__))
#define kzalloc_node(_size, _flags, _node) kmalloc_node(_size, (_flags)|__GFP_ZERO, _node)
extern void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node) __alloc_size(1);
void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) __alloc_size(1);
#define kvmalloc_node_noprof(size, flags, node) \
__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node)
#define kvmalloc_node(...) alloc_hooks(kvmalloc_node_noprof(__VA_ARGS__))
#define kvmalloc(_size, _flags) kvmalloc_node(_size, _flags, NUMA_NO_NODE)
......@@ -788,6 +819,8 @@ extern void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node) __alloc_si
#define kvzalloc(_size, _flags) kvmalloc(_size, (_flags)|__GFP_ZERO)
#define kvzalloc_node(_size, _flags, _node) kvmalloc_node(_size, (_flags)|__GFP_ZERO, _node)
#define kmem_buckets_valloc(_b, _size, _flags) \
alloc_hooks(__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE))
static inline __alloc_size(1, 2) void *
kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node)
......
......@@ -42,6 +42,17 @@ struct msg_msgseg {
#define DATALEN_MSG ((size_t)PAGE_SIZE-sizeof(struct msg_msg))
#define DATALEN_SEG ((size_t)PAGE_SIZE-sizeof(struct msg_msgseg))
static kmem_buckets *msg_buckets __ro_after_init;
static int __init init_msg_buckets(void)
{
msg_buckets = kmem_buckets_create("msg_msg", SLAB_ACCOUNT,
sizeof(struct msg_msg),
DATALEN_MSG, NULL);
return 0;
}
subsys_initcall(init_msg_buckets);
static struct msg_msg *alloc_msg(size_t len)
{
......@@ -50,7 +61,7 @@ static struct msg_msg *alloc_msg(size_t len)
size_t alen;
alen = min(len, DATALEN_MSG);
msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL_ACCOUNT);
msg = kmem_buckets_alloc(msg_buckets, sizeof(*msg) + alen, GFP_KERNEL);
if (msg == NULL)
return NULL;
......
......@@ -20,6 +20,7 @@ CONFIG_RANDOMIZE_MEMORY=y
# Randomize allocator freelists, harden metadata.
CONFIG_SLAB_FREELIST_RANDOM=y
CONFIG_SLAB_FREELIST_HARDENED=y
CONFIG_SLAB_BUCKETS=y
CONFIG_SHUFFLE_PAGE_ALLOCATOR=y
CONFIG_RANDOM_KMALLOC_CACHES=y
......
......@@ -233,8 +233,6 @@ static void fortify_test_alloc_size_##allocator##_dynamic(struct kunit *test) \
kfree(p)); \
checker(expected_size, \
kmalloc_array_node(alloc_size, 1, gfp, NUMA_NO_NODE), \
kfree(p)); \
checker(expected_size, __kmalloc(alloc_size, gfp), \
kfree(p)); \
\
orig = kmalloc(alloc_size, gfp); \
......
......@@ -140,7 +140,7 @@ static void test_kmalloc_redzone_access(struct kunit *test)
{
struct kmem_cache *s = test_kmem_cache_create("TestSlub_RZ_kmalloc", 32,
SLAB_KMALLOC|SLAB_STORE_USER|SLAB_RED_ZONE);
u8 *p = kmalloc_trace(s, GFP_KERNEL, 18);
u8 *p = __kmalloc_cache_noprof(s, GFP_KERNEL, 18);
kasan_disable_current();
......
......@@ -273,6 +273,23 @@ config SLAB_FREELIST_HARDENED
sacrifices to harden the kernel slab allocator against common
freelist exploit methods.
config SLAB_BUCKETS
bool "Support allocation from separate kmalloc buckets"
depends on !SLUB_TINY
default SLAB_FREELIST_HARDENED
help
Kernel heap attacks frequently depend on being able to create
specifically-sized allocations with user-controlled contents
that will be allocated into the same kmalloc bucket as a
target object. To avoid sharing these allocation buckets,
provide an explicitly separated set of buckets to be used for
user-controlled allocations. This may very slightly increase
memory fragmentation, though in practice it's only a handful
of extra pages since the bulk of user-controlled allocations
are relatively long-lived.
If unsure, say Y.
config SLUB_STATS
default n
bool "Enable performance statistics"
......
......@@ -168,7 +168,7 @@ static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t)
*/
static inline bool slab_test_pfmemalloc(const struct slab *slab)
{
return folio_test_active((struct folio *)slab_folio(slab));
return folio_test_active(slab_folio(slab));
}
static inline void slab_set_pfmemalloc(struct slab *slab)
......@@ -213,7 +213,7 @@ static inline struct slab *virt_to_slab(const void *addr)
static inline int slab_order(const struct slab *slab)
{
return folio_order((struct folio *)slab_folio(slab));
return folio_order(slab_folio(slab));
}
static inline size_t slab_size(const struct slab *slab)
......@@ -405,16 +405,18 @@ static inline unsigned int size_index_elem(unsigned int bytes)
* KMALLOC_MAX_CACHE_SIZE and the caller must check that.
*/
static inline struct kmem_cache *
kmalloc_slab(size_t size, gfp_t flags, unsigned long caller)
kmalloc_slab(size_t size, kmem_buckets *b, gfp_t flags, unsigned long caller)
{
unsigned int index;
if (!b)
b = &kmalloc_caches[kmalloc_type(flags, caller)];
if (size <= 192)
index = kmalloc_size_index[size_index_elem(size)];
else
index = fls(size - 1);
return kmalloc_caches[kmalloc_type(flags, caller)][index];
return (*b)[index];
}
gfp_t kmalloc_fix_flags(gfp_t flags);
......
......@@ -392,6 +392,98 @@ kmem_cache_create(const char *name, unsigned int size, unsigned int align,
}
EXPORT_SYMBOL(kmem_cache_create);
static struct kmem_cache *kmem_buckets_cache __ro_after_init;
/**
* kmem_buckets_create - Create a set of caches that handle dynamic sized
* allocations via kmem_buckets_alloc()
* @name: A prefix string which is used in /proc/slabinfo to identify this
* cache. The individual caches with have their sizes as the suffix.
* @flags: SLAB flags (see kmem_cache_create() for details).
* @useroffset: Starting offset within an allocation that may be copied
* to/from userspace.
* @usersize: How many bytes, starting at @useroffset, may be copied
* to/from userspace.
* @ctor: A constructor for the objects, run when new allocations are made.
*
* Cannot be called within an interrupt, but can be interrupted.
*
* Return: a pointer to the cache on success, NULL on failure. When
* CONFIG_SLAB_BUCKETS is not enabled, ZERO_SIZE_PTR is returned, and
* subsequent calls to kmem_buckets_alloc() will fall back to kmalloc().
* (i.e. callers only need to check for NULL on failure.)
*/
kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags,
unsigned int useroffset,
unsigned int usersize,
void (*ctor)(void *))
{
kmem_buckets *b;
int idx;
/*
* When the separate buckets API is not built in, just return
* a non-NULL value for the kmem_buckets pointer, which will be
* unused when performing allocations.
*/
if (!IS_ENABLED(CONFIG_SLAB_BUCKETS))
return ZERO_SIZE_PTR;
if (WARN_ON(!kmem_buckets_cache))
return NULL;
b = kmem_cache_alloc(kmem_buckets_cache, GFP_KERNEL|__GFP_ZERO);
if (WARN_ON(!b))
return NULL;
flags |= SLAB_NO_MERGE;
for (idx = 0; idx < ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]); idx++) {
char *short_size, *cache_name;
unsigned int cache_useroffset, cache_usersize;
unsigned int size;
if (!kmalloc_caches[KMALLOC_NORMAL][idx])
continue;
size = kmalloc_caches[KMALLOC_NORMAL][idx]->object_size;
if (!size)
continue;
short_size = strchr(kmalloc_caches[KMALLOC_NORMAL][idx]->name, '-');
if (WARN_ON(!short_size))
goto fail;
cache_name = kasprintf(GFP_KERNEL, "%s-%s", name, short_size + 1);
if (WARN_ON(!cache_name))
goto fail;
if (useroffset >= size) {
cache_useroffset = 0;
cache_usersize = 0;
} else {
cache_useroffset = useroffset;
cache_usersize = min(size - cache_useroffset, usersize);
}
(*b)[idx] = kmem_cache_create_usercopy(cache_name, size,
0, flags, cache_useroffset,
cache_usersize, ctor);
kfree(cache_name);
if (WARN_ON(!(*b)[idx]))
goto fail;
}
return b;
fail:
for (idx = 0; idx < ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]); idx++)
kmem_cache_destroy((*b)[idx]);
kfree(b);
return NULL;
}
EXPORT_SYMBOL(kmem_buckets_create);
#ifdef SLAB_SUPPORTS_SYSFS
/*
* For a given kmem_cache, kmem_cache_destroy() should only be called
......@@ -617,11 +709,12 @@ void __init create_boot_cache(struct kmem_cache *s, const char *name,
s->size = s->object_size = size;
/*
* For power of two sizes, guarantee natural alignment for kmalloc
* caches, regardless of SL*B debugging options.
* kmalloc caches guarantee alignment of at least the largest
* power-of-two divisor of the size. For power-of-two sizes,
* it is the size itself.
*/
if (is_power_of_2(size))
align = max(align, size);
if (flags & SLAB_KMALLOC)
align = max(align, 1U << (ffs(size) - 1));
s->align = calculate_alignment(flags, align, size);
#ifdef CONFIG_HARDENED_USERCOPY
......@@ -653,8 +746,7 @@ static struct kmem_cache *__init create_kmalloc_cache(const char *name,
return s;
}
struct kmem_cache *
kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init =
kmem_buckets kmalloc_caches[NR_KMALLOC_TYPES] __ro_after_init =
{ /* initialization for https://llvm.org/pr42570 */ };
EXPORT_SYMBOL(kmalloc_caches);
......@@ -703,7 +795,7 @@ size_t kmalloc_size_roundup(size_t size)
* The flags don't matter since size_index is common to all.
* Neither does the caller for just getting ->object_size.
*/
return kmalloc_slab(size, GFP_KERNEL, 0)->object_size;
return kmalloc_slab(size, NULL, GFP_KERNEL, 0)->object_size;
}
/* Above the smaller buckets, size is a multiple of page size. */
......@@ -932,6 +1024,11 @@ void __init create_kmalloc_caches(void)
/* Kmalloc array is now usable */
slab_state = UP;
if (IS_ENABLED(CONFIG_SLAB_BUCKETS))
kmem_buckets_cache = kmem_cache_create("kmalloc_buckets",
sizeof(kmem_buckets),
0, SLAB_NO_MERGE, NULL);
}
/**
......
......@@ -788,8 +788,24 @@ static bool slab_add_kunit_errors(void)
kunit_put_resource(resource);
return true;
}
static bool slab_in_kunit_test(void)
{
struct kunit_resource *resource;
if (!kunit_get_current_test())
return false;
resource = kunit_find_named_resource(current->kunit_test, "slab_errors");
if (!resource)
return false;
kunit_put_resource(resource);
return true;
}
#else
static inline bool slab_add_kunit_errors(void) { return false; }
static inline bool slab_in_kunit_test(void) { return false; }
#endif
static inline unsigned int size_from_object(struct kmem_cache *s)
......@@ -962,11 +978,9 @@ void print_tracking(struct kmem_cache *s, void *object)
static void print_slab_info(const struct slab *slab)
{
struct folio *folio = (struct folio *)slab_folio(slab);
pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%pGp\n",
slab, slab->objects, slab->inuse, slab->freelist,
folio_flags(folio, 0));
&slab->__page_flags);
}
/*
......@@ -1192,8 +1206,6 @@ static int check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
fault, end - 1, fault - addr,
fault[0], value);
print_trailer(s, slab, object);
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
skip_bug_print:
restore_bytes(s, what, value, fault, end);
......@@ -1216,8 +1228,8 @@ static int check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
* Padding is extended by another word if Redzoning is enabled and
* object_size == inuse.
*
* We fill with 0xbb (RED_INACTIVE) for inactive objects and with
* 0xcc (RED_ACTIVE) for objects in use.
* We fill with 0xbb (SLUB_RED_INACTIVE) for inactive objects and with
* 0xcc (SLUB_RED_ACTIVE) for objects in use.
*
* object + s->inuse
* Meta data starts here.
......@@ -1302,15 +1314,16 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
u8 *p = object;
u8 *endobject = object + s->object_size;
unsigned int orig_size, kasan_meta_size;
int ret = 1;
if (s->flags & SLAB_RED_ZONE) {
if (!check_bytes_and_report(s, slab, object, "Left Redzone",
object - s->red_left_pad, val, s->red_left_pad))
return 0;
ret = 0;
if (!check_bytes_and_report(s, slab, object, "Right Redzone",
endobject, val, s->inuse - s->object_size))
return 0;
ret = 0;
if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) {
orig_size = get_orig_size(s, object);
......@@ -1319,14 +1332,15 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
!check_bytes_and_report(s, slab, object,
"kmalloc Redzone", p + orig_size,
val, s->object_size - orig_size)) {
return 0;
ret = 0;
}
}
} else {
if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
check_bytes_and_report(s, slab, p, "Alignment padding",
if (!check_bytes_and_report(s, slab, p, "Alignment padding",
endobject, POISON_INUSE,
s->inuse - s->object_size);
s->inuse - s->object_size))
ret = 0;
}
}
......@@ -1342,27 +1356,25 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
!check_bytes_and_report(s, slab, p, "Poison",
p + kasan_meta_size, POISON_FREE,
s->object_size - kasan_meta_size - 1))
return 0;
ret = 0;
if (kasan_meta_size < s->object_size &&
!check_bytes_and_report(s, slab, p, "End Poison",
p + s->object_size - 1, POISON_END, 1))
return 0;
ret = 0;
}
/*
* check_pad_bytes cleans up on its own.
*/
check_pad_bytes(s, slab, p);
if (!check_pad_bytes(s, slab, p))
ret = 0;
}
if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE)
/*
* Object and freepointer overlap. Cannot check
* freepointer while object is allocated.
* Cannot check freepointer while object is allocated if
* object and freepointer overlap.
*/
return 1;
/* Check free pointer validity */
if (!check_valid_pointer(s, slab, get_freepointer(s, p))) {
if ((freeptr_outside_object(s) || val != SLUB_RED_ACTIVE) &&
!check_valid_pointer(s, slab, get_freepointer(s, p))) {
object_err(s, slab, p, "Freepointer corrupt");
/*
* No choice but to zap it and thus lose the remainder
......@@ -1370,9 +1382,15 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
* another error because the object count is now wrong.
*/
set_freepointer(s, p, NULL);
return 0;
ret = 0;
}
return 1;
if (!ret && !slab_in_kunit_test()) {
print_trailer(s, slab, object);
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
}
return ret;
}
static int check_slab(struct kmem_cache *s, struct slab *slab)
......@@ -2554,7 +2572,7 @@ static void discard_slab(struct kmem_cache *s, struct slab *slab)
*/
static inline bool slab_test_node_partial(const struct slab *slab)
{
return folio_test_workingset((struct folio *)slab_folio(slab));
return folio_test_workingset(slab_folio(slab));
}
static inline void slab_set_node_partial(struct slab *slab)
......@@ -4063,7 +4081,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_node_noprof);
* directly to the page allocator. We use __GFP_COMP, because we will need to
* know the allocation order to free the pages properly in kfree.
*/
static void *__kmalloc_large_node(size_t size, gfp_t flags, int node)
static void *___kmalloc_large_node(size_t size, gfp_t flags, int node)
{
struct folio *folio;
void *ptr = NULL;
......@@ -4088,35 +4106,35 @@ static void *__kmalloc_large_node(size_t size, gfp_t flags, int node)
return ptr;
}
void *kmalloc_large_noprof(size_t size, gfp_t flags)
void *__kmalloc_large_noprof(size_t size, gfp_t flags)
{
void *ret = __kmalloc_large_node(size, flags, NUMA_NO_NODE);
void *ret = ___kmalloc_large_node(size, flags, NUMA_NO_NODE);
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
flags, NUMA_NO_NODE);
return ret;
}
EXPORT_SYMBOL(kmalloc_large_noprof);
EXPORT_SYMBOL(__kmalloc_large_noprof);
void *kmalloc_large_node_noprof(size_t size, gfp_t flags, int node)
void *__kmalloc_large_node_noprof(size_t size, gfp_t flags, int node)
{
void *ret = __kmalloc_large_node(size, flags, node);
void *ret = ___kmalloc_large_node(size, flags, node);
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
flags, node);
return ret;
}
EXPORT_SYMBOL(kmalloc_large_node_noprof);
EXPORT_SYMBOL(__kmalloc_large_node_noprof);
static __always_inline
void *__do_kmalloc_node(size_t size, gfp_t flags, int node,
void *__do_kmalloc_node(size_t size, kmem_buckets *b, gfp_t flags, int node,
unsigned long caller)
{
struct kmem_cache *s;
void *ret;
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
ret = __kmalloc_large_node(size, flags, node);
ret = __kmalloc_large_node_noprof(size, flags, node);
trace_kmalloc(caller, ret, size,
PAGE_SIZE << get_order(size), flags, node);
return ret;
......@@ -4125,34 +4143,34 @@ void *__do_kmalloc_node(size_t size, gfp_t flags, int node,
if (unlikely(!size))
return ZERO_SIZE_PTR;
s = kmalloc_slab(size, flags, caller);
s = kmalloc_slab(size, b, flags, caller);
ret = slab_alloc_node(s, NULL, flags, node, caller, size);
ret = kasan_kmalloc(s, ret, size, flags);
trace_kmalloc(caller, ret, size, s->size, flags, node);
return ret;
}
void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node)
void *__kmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node)
{
return __do_kmalloc_node(size, flags, node, _RET_IP_);
return __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), flags, node, _RET_IP_);
}
EXPORT_SYMBOL(__kmalloc_node_noprof);
void *__kmalloc_noprof(size_t size, gfp_t flags)
{
return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_);
return __do_kmalloc_node(size, NULL, flags, NUMA_NO_NODE, _RET_IP_);
}
EXPORT_SYMBOL(__kmalloc_noprof);
void *kmalloc_node_track_caller_noprof(size_t size, gfp_t flags,
void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags,
int node, unsigned long caller)
{
return __do_kmalloc_node(size, flags, node, caller);
return __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), flags, node, caller);
}
EXPORT_SYMBOL(kmalloc_node_track_caller_noprof);
EXPORT_SYMBOL(__kmalloc_node_track_caller_noprof);
void *kmalloc_trace_noprof(struct kmem_cache *s, gfp_t gfpflags, size_t size)
void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t gfpflags, size_t size)
{
void *ret = slab_alloc_node(s, NULL, gfpflags, NUMA_NO_NODE,
_RET_IP_, size);
......@@ -4162,9 +4180,9 @@ void *kmalloc_trace_noprof(struct kmem_cache *s, gfp_t gfpflags, size_t size)
ret = kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
EXPORT_SYMBOL(kmalloc_trace_noprof);
EXPORT_SYMBOL(__kmalloc_cache_noprof);
void *kmalloc_node_trace_noprof(struct kmem_cache *s, gfp_t gfpflags,
void *__kmalloc_cache_node_noprof(struct kmem_cache *s, gfp_t gfpflags,
int node, size_t size)
{
void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);
......@@ -4174,7 +4192,7 @@ void *kmalloc_node_trace_noprof(struct kmem_cache *s, gfp_t gfpflags,
ret = kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
EXPORT_SYMBOL(kmalloc_node_trace_noprof);
EXPORT_SYMBOL(__kmalloc_cache_node_noprof);
static noinline void free_to_partial_list(
struct kmem_cache *s, struct slab *slab,
......@@ -5159,10 +5177,9 @@ static int calculate_sizes(struct kmem_cache *s)
*/
s->inuse = size;
if (slub_debug_orig_size(s) ||
(flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) ||
s->ctor) {
if ((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) || s->ctor ||
((flags & SLAB_RED_ZONE) &&
(s->object_size < sizeof(void *) || slub_debug_orig_size(s)))) {
/*
* Relocate free pointer after the object if it is not
* permitted to overwrite the first word of the object on
......@@ -5170,7 +5187,9 @@ static int calculate_sizes(struct kmem_cache *s)
*
* This is the case if we do RCU, have a constructor or
* destructor, are poisoning the objects, or are
* redzoning an object smaller than sizeof(void *).
* redzoning an object smaller than sizeof(void *) or are
* redzoning an object with slub_debug_orig_size() enabled,
* in which case the right redzone may be extended.
*
* The assumption that s->offset >= s->inuse means free
* pointer is outside of the object is used in the
......
......@@ -198,6 +198,16 @@ char *kmemdup_nul(const char *s, size_t len, gfp_t gfp)
}
EXPORT_SYMBOL(kmemdup_nul);
static kmem_buckets *user_buckets __ro_after_init;
static int __init init_user_buckets(void)
{
user_buckets = kmem_buckets_create("memdup_user", 0, 0, INT_MAX, NULL);
return 0;
}
subsys_initcall(init_user_buckets);
/**
* memdup_user - duplicate memory region from user space
*
......@@ -211,7 +221,7 @@ void *memdup_user(const void __user *src, size_t len)
{
void *p;
p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN);
p = kmem_buckets_alloc_track_caller(user_buckets, len, GFP_USER | __GFP_NOWARN);
if (!p)
return ERR_PTR(-ENOMEM);
......@@ -237,7 +247,7 @@ void *vmemdup_user(const void __user *src, size_t len)
{
void *p;
p = kvmalloc(len, GFP_USER);
p = kmem_buckets_valloc(user_buckets, len, GFP_USER);
if (!p)
return ERR_PTR(-ENOMEM);
......@@ -594,9 +604,10 @@ unsigned long vm_mmap(struct file *file, unsigned long addr,
EXPORT_SYMBOL(vm_mmap);
/**
* kvmalloc_node - attempt to allocate physically contiguous memory, but upon
* __kvmalloc_node - attempt to allocate physically contiguous memory, but upon
* failure, fall back to non-contiguous (vmalloc) allocation.
* @size: size of the request.
* @b: which set of kmalloc buckets to allocate from.
* @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL.
* @node: numa node to allocate from
*
......@@ -609,7 +620,7 @@ EXPORT_SYMBOL(vm_mmap);
*
* Return: pointer to the allocated memory of %NULL in case of failure
*/
void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node)
void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node)
{
gfp_t kmalloc_flags = flags;
void *ret;
......@@ -631,7 +642,7 @@ void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node)
kmalloc_flags &= ~__GFP_NOFAIL;
}
ret = kmalloc_node_noprof(size, kmalloc_flags, node);
ret = __kmalloc_node_noprof(PASS_BUCKET_PARAMS(size, b), kmalloc_flags, node);
/*
* It doesn't really make sense to fallback to vmalloc for sub page
......@@ -660,7 +671,7 @@ void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node)
flags, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
node, __builtin_return_address(0));
}
EXPORT_SYMBOL(kvmalloc_node_noprof);
EXPORT_SYMBOL(__kvmalloc_node_noprof);
/**
* kvfree() - Free memory.
......
......@@ -18,23 +18,16 @@ pub(crate) unsafe fn krealloc_aligned(ptr: *mut u8, new_layout: Layout, flags: F
// Customized layouts from `Layout::from_size_align()` can have size < align, so pad first.
let layout = new_layout.pad_to_align();
let mut size = layout.size();
if layout.align() > bindings::ARCH_SLAB_MINALIGN {
// The alignment requirement exceeds the slab guarantee, thus try to enlarge the size
// to use the "power-of-two" size/alignment guarantee (see comments in `kmalloc()` for
// more information).
//
// Note that `layout.size()` (after padding) is guaranteed to be a multiple of
// `layout.align()`, so `next_power_of_two` gives enough alignment guarantee.
size = size.next_power_of_two();
}
// Note that `layout.size()` (after padding) is guaranteed to be a multiple of `layout.align()`
// which together with the slab guarantees means the `krealloc` will return a properly aligned
// object (see comments in `kmalloc()` for more information).
let size = layout.size();
// SAFETY:
// - `ptr` is either null or a pointer returned from a previous `k{re}alloc()` by the
// function safety requirement.
// - `size` is greater than 0 since it's either a `layout.size()` (which cannot be zero
// according to the function safety requirement) or a result from `next_power_of_two()`.
// - `size` is greater than 0 since it's from `layout.size()` (which cannot be zero according
// to the function safety requirement)
unsafe { bindings::krealloc(ptr as *const core::ffi::c_void, size, flags.0) as *mut u8 }
}
......
......@@ -1729,6 +1729,7 @@ sub dump_function($$) {
$prototype =~ s/__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +//;
$prototype =~ s/__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +//;
$prototype =~ s/__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +//;
$prototype =~ s/DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)/$1, $2/;
my $define = $prototype =~ s/^#\s*define\s+//; #ak added
$prototype =~ s/__attribute_const__ +//;
$prototype =~ s/__attribute__\s*\(\(
......
......@@ -47,11 +47,8 @@
* Magic nums for obj red zoning.
* Placed in the first word before and the first word after an obj.
*/
#define RED_INACTIVE 0x09F911029D74E35BULL /* when obj is inactive */
#define RED_ACTIVE 0xD84156C5635688C0ULL /* when obj is active */
#define SLUB_RED_INACTIVE 0xbb
#define SLUB_RED_ACTIVE 0xcc
#define SLUB_RED_INACTIVE 0xbb /* when obj is inactive */
#define SLUB_RED_ACTIVE 0xcc /* when obj is active */
/* ...and for poisoning */
#define POISON_INUSE 0x5a /* for use-uninitialised poisoning */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment