Commit fb26504d authored by Inada Naoki's avatar Inada Naoki Committed by GitHub

bpo-37543: optimize pymalloc (#14674)

PyObject_Malloc() and PyObject_Free() inlines pymalloc_alloc and
pymalloc_free partially.
But when PGO is not used, compiler don't know where is the hot part
in pymalloc_alloc and pymalloc_free.
parent 7036e1de
......@@ -710,19 +710,21 @@ PyObject_Free(void *ptr)
}
#ifdef WITH_PYMALLOC
#ifdef WITH_VALGRIND
#include <valgrind/valgrind.h>
/* If we're using GCC, use __builtin_expect() to reduce overhead of
the valgrind checks */
#if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
# define UNLIKELY(value) __builtin_expect((value), 0)
# define LIKELY(value) __builtin_expect((value), 1)
#else
# define UNLIKELY(value) (value)
# define LIKELY(value) (value)
#endif
#ifdef WITH_PYMALLOC
#ifdef WITH_VALGRIND
#include <valgrind/valgrind.h>
/* -1 indicates that we haven't checked that we're running on valgrind yet. */
static int running_on_valgrind = -1;
#endif
......@@ -1424,96 +1426,48 @@ address_in_range(void *p, poolp pool)
/*==========================================================================*/
/* pymalloc allocator
The basic blocks are ordered by decreasing execution frequency,
which minimizes the number of jumps in the most common cases,
improves branching prediction and instruction scheduling (small
block allocations typically result in a couple of instructions).
Unless the optimizer reorders everything, being too smart...
Return 1 if pymalloc allocated memory and wrote the pointer into *ptr_p.
Return 0 if pymalloc failed to allocate the memory block: on bigger
requests, on error in the code below (as a last chance to serve the request)
or when the max memory limit has been reached. */
static int
pymalloc_alloc(void *ctx, void **ptr_p, size_t nbytes)
// Called when freelist is exhausted. Extend the freelist if there is
// space for a block. Otherwise, remove this pool from usedpools.
static void
pymalloc_pool_extend(poolp pool, uint size)
{
block *bp;
poolp pool;
poolp next;
uint size;
#ifdef WITH_VALGRIND
if (UNLIKELY(running_on_valgrind == -1)) {
running_on_valgrind = RUNNING_ON_VALGRIND;
}
if (UNLIKELY(running_on_valgrind)) {
return 0;
}
#endif
if (nbytes == 0) {
return 0;
}
if (nbytes > SMALL_REQUEST_THRESHOLD) {
return 0;
if (UNLIKELY(pool->nextoffset <= pool->maxnextoffset)) {
/* There is room for another block. */
pool->freeblock = (block*)pool + pool->nextoffset;
pool->nextoffset += INDEX2SIZE(size);
*(block **)(pool->freeblock) = NULL;
return;
}
/*
* Most frequent paths first
*/
size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
pool = usedpools[size + size];
if (pool != pool->nextpool) {
/*
* There is a used pool for this size class.
* Pick up the head block of its free list.
*/
++pool->ref.count;
bp = pool->freeblock;
assert(bp != NULL);
if ((pool->freeblock = *(block **)bp) != NULL) {
goto success;
}
/*
* Reached the end of the free list, try to extend it.
*/
if (pool->nextoffset <= pool->maxnextoffset) {
/* There is room for another block. */
pool->freeblock = (block*)pool +
pool->nextoffset;
pool->nextoffset += INDEX2SIZE(size);
*(block **)(pool->freeblock) = NULL;
goto success;
}
/* Pool is full, unlink from used pools. */
next = pool->nextpool;
pool = pool->prevpool;
next->prevpool = pool;
pool->nextpool = next;
goto success;
}
/* Pool is full, unlink from used pools. */
poolp next;
next = pool->nextpool;
pool = pool->prevpool;
next->prevpool = pool;
pool->nextpool = next;
}
/* called when pymalloc_alloc can not allocate a block from usedpool.
* This function takes new pool and allocate a block from it.
*/
static void*
allocate_from_new_pool(uint size)
{
/* There isn't a pool of the right size class immediately
* available: use a free pool.
*/
if (usable_arenas == NULL) {
if (UNLIKELY(usable_arenas == NULL)) {
/* No arena has a free pool: allocate a new arena. */
#ifdef WITH_MEMORY_LIMITS
if (narenas_currently_allocated >= MAX_ARENAS) {
goto failed;
return NULL;
}
#endif
usable_arenas = new_arena();
if (usable_arenas == NULL) {
goto failed;
return NULL;
}
usable_arenas->nextarena =
usable_arenas->prevarena = NULL;
usable_arenas->nextarena = usable_arenas->prevarena = NULL;
assert(nfp2lasta[usable_arenas->nfreepools] == NULL);
nfp2lasta[usable_arenas->nfreepools] = usable_arenas;
}
......@@ -1536,12 +1490,12 @@ pymalloc_alloc(void *ctx, void **ptr_p, size_t nbytes)
}
/* Try to get a cached free pool. */
pool = usable_arenas->freepools;
if (pool != NULL) {
poolp pool = usable_arenas->freepools;
if (LIKELY(pool != NULL)) {
/* Unlink from cached pools. */
usable_arenas->freepools = pool->nextpool;
--usable_arenas->nfreepools;
if (usable_arenas->nfreepools == 0) {
usable_arenas->nfreepools--;
if (UNLIKELY(usable_arenas->nfreepools == 0)) {
/* Wholly allocated: remove. */
assert(usable_arenas->freepools == NULL);
assert(usable_arenas->nextarena == NULL ||
......@@ -1564,73 +1518,123 @@ pymalloc_alloc(void *ctx, void **ptr_p, size_t nbytes)
(block*)usable_arenas->address +
ARENA_SIZE - POOL_SIZE);
}
}
else {
/* Carve off a new pool. */
assert(usable_arenas->nfreepools > 0);
assert(usable_arenas->freepools == NULL);
pool = (poolp)usable_arenas->pool_address;
assert((block*)pool <= (block*)usable_arenas->address +
ARENA_SIZE - POOL_SIZE);
pool->arenaindex = (uint)(usable_arenas - arenas);
assert(&arenas[pool->arenaindex] == usable_arenas);
pool->szidx = DUMMY_SIZE_IDX;
usable_arenas->pool_address += POOL_SIZE;
--usable_arenas->nfreepools;
init_pool:
/* Frontlink to used pools. */
next = usedpools[size + size]; /* == prev */
pool->nextpool = next;
pool->prevpool = next;
next->nextpool = pool;
next->prevpool = pool;
pool->ref.count = 1;
if (pool->szidx == size) {
/* Luckily, this pool last contained blocks
* of the same size class, so its header
* and free list are already initialized.
*/
bp = pool->freeblock;
assert(bp != NULL);
pool->freeblock = *(block **)bp;
goto success;
if (usable_arenas->nfreepools == 0) {
assert(usable_arenas->nextarena == NULL ||
usable_arenas->nextarena->prevarena ==
usable_arenas);
/* Unlink the arena: it is completely allocated. */
usable_arenas = usable_arenas->nextarena;
if (usable_arenas != NULL) {
usable_arenas->prevarena = NULL;
assert(usable_arenas->address != 0);
}
}
/*
* Initialize the pool header, set up the free list to
* contain just the second block, and return the first
* block.
}
/* Frontlink to used pools. */
block *bp;
poolp next = usedpools[size + size]; /* == prev */
pool->nextpool = next;
pool->prevpool = next;
next->nextpool = pool;
next->prevpool = pool;
pool->ref.count = 1;
if (pool->szidx == size) {
/* Luckily, this pool last contained blocks
* of the same size class, so its header
* and free list are already initialized.
*/
pool->szidx = size;
size = INDEX2SIZE(size);
bp = (block *)pool + POOL_OVERHEAD;
pool->nextoffset = POOL_OVERHEAD + (size << 1);
pool->maxnextoffset = POOL_SIZE - size;
pool->freeblock = bp + size;
*(block **)(pool->freeblock) = NULL;
goto success;
bp = pool->freeblock;
assert(bp != NULL);
pool->freeblock = *(block **)bp;
return bp;
}
/*
* Initialize the pool header, set up the free list to
* contain just the second block, and return the first
* block.
*/
pool->szidx = size;
size = INDEX2SIZE(size);
bp = (block *)pool + POOL_OVERHEAD;
pool->nextoffset = POOL_OVERHEAD + (size << 1);
pool->maxnextoffset = POOL_SIZE - size;
pool->freeblock = bp + size;
*(block **)(pool->freeblock) = NULL;
return bp;
}
/* pymalloc allocator
Return 1 if pymalloc allocated memory and wrote the pointer into *ptr_p.
Return 0 if pymalloc failed to allocate the memory block: on bigger
requests, on error in the code below (as a last chance to serve the request)
or when the max memory limit has been reached.
*/
static inline int
pymalloc_alloc(void *ctx, void **ptr_p, size_t nbytes)
{
#ifdef WITH_VALGRIND
if (UNLIKELY(running_on_valgrind == -1)) {
running_on_valgrind = RUNNING_ON_VALGRIND;
}
if (UNLIKELY(running_on_valgrind)) {
return 0;
}
#endif
/* Carve off a new pool. */
assert(usable_arenas->nfreepools > 0);
assert(usable_arenas->freepools == NULL);
pool = (poolp)usable_arenas->pool_address;
assert((block*)pool <= (block*)usable_arenas->address +
ARENA_SIZE - POOL_SIZE);
pool->arenaindex = (uint)(usable_arenas - arenas);
assert(&arenas[pool->arenaindex] == usable_arenas);
pool->szidx = DUMMY_SIZE_IDX;
usable_arenas->pool_address += POOL_SIZE;
--usable_arenas->nfreepools;
if (usable_arenas->nfreepools == 0) {
assert(usable_arenas->nextarena == NULL ||
usable_arenas->nextarena->prevarena ==
usable_arenas);
/* Unlink the arena: it is completely allocated. */
usable_arenas = usable_arenas->nextarena;
if (usable_arenas != NULL) {
usable_arenas->prevarena = NULL;
assert(usable_arenas->address != 0);
}
if (UNLIKELY(nbytes == 0)) {
return 0;
}
if (UNLIKELY(nbytes > SMALL_REQUEST_THRESHOLD)) {
return 0;
}
goto init_pool;
uint size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
poolp pool = usedpools[size + size];
block *bp;
if (LIKELY(pool != pool->nextpool)) {
/*
* There is a used pool for this size class.
* Pick up the head block of its free list.
*/
++pool->ref.count;
bp = pool->freeblock;
if (UNLIKELY((pool->freeblock = *(block **)bp) == NULL)) {
// Reached the end of the free list, try to extend it.
pymalloc_pool_extend(pool, size);
}
}
else {
/* There isn't a pool of the right size class immediately
* available: use a free pool.
*/
bp = allocate_from_new_pool(size);
if (UNLIKELY(bp == NULL)) {
return 0;
}
}
success:
assert(bp != NULL);
*ptr_p = (void *)bp;
return 1;
failed:
return 0;
}
......@@ -1638,7 +1642,7 @@ static void *
_PyObject_Malloc(void *ctx, size_t nbytes)
{
void* ptr;
if (pymalloc_alloc(ctx, &ptr, nbytes)) {
if (LIKELY(pymalloc_alloc(ctx, &ptr, nbytes))) {
return ptr;
}
......@@ -1658,7 +1662,7 @@ _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
size_t nbytes = nelem * elsize;
if (pymalloc_alloc(ctx, &ptr, nbytes)) {
if (LIKELY(pymalloc_alloc(ctx, &ptr, nbytes))) {
memset(ptr, 0, nbytes);
return ptr;
}
......@@ -1671,88 +1675,37 @@ _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
}
/* Free a memory block allocated by pymalloc_alloc().
Return 1 if it was freed.
Return 0 if the block was not allocated by pymalloc_alloc(). */
static int
pymalloc_free(void *ctx, void *p)
static void
insert_to_usedpool(poolp pool)
{
poolp pool;
block *lastfree;
poolp next, prev;
uint size;
assert(p != NULL);
#ifdef WITH_VALGRIND
if (UNLIKELY(running_on_valgrind > 0)) {
return 0;
}
#endif
assert(pool->ref.count > 0); /* else the pool is empty */
pool = POOL_ADDR(p);
if (!address_in_range(p, pool)) {
return 0;
}
/* We allocated this address. */
/* Link p to the start of the pool's freeblock list. Since
* the pool had at least the p block outstanding, the pool
* wasn't empty (so it's already in a usedpools[] list, or
* was full and is in no list -- it's not in the freeblocks
* list in any case).
*/
assert(pool->ref.count > 0); /* else it was empty */
*(block **)p = lastfree = pool->freeblock;
pool->freeblock = (block *)p;
if (!lastfree) {
/* Pool was full, so doesn't currently live in any list:
* link it to the front of the appropriate usedpools[] list.
* This mimics LRU pool usage for new allocations and
* targets optimal filling when several pools contain
* blocks of the same size class.
*/
--pool->ref.count;
assert(pool->ref.count > 0); /* else the pool is empty */
size = pool->szidx;
next = usedpools[size + size];
prev = next->prevpool;
/* insert pool before next: prev <-> pool <-> next */
pool->nextpool = next;
pool->prevpool = prev;
next->prevpool = pool;
prev->nextpool = pool;
goto success;
}
uint size = pool->szidx;
poolp next = usedpools[size + size];
poolp prev = next->prevpool;
struct arena_object* ao;
uint nf; /* ao->nfreepools */
/* insert pool before next: prev <-> pool <-> next */
pool->nextpool = next;
pool->prevpool = prev;
next->prevpool = pool;
prev->nextpool = pool;
}
/* freeblock wasn't NULL, so the pool wasn't full,
* and the pool is in a usedpools[] list.
*/
if (--pool->ref.count != 0) {
/* pool isn't empty: leave it in usedpools */
goto success;
}
/* Pool is now empty: unlink from usedpools, and
* link to the front of freepools. This ensures that
* previously freed pools will be allocated later
* (being not referenced, they are perhaps paged out).
*/
next = pool->nextpool;
prev = pool->prevpool;
static void
insert_to_freepool(poolp pool)
{
poolp next = pool->nextpool;
poolp prev = pool->prevpool;
next->prevpool = prev;
prev->nextpool = next;
/* Link the pool to freepools. This is a singly-linked
* list, and pool->prevpool isn't used there.
*/
ao = &arenas[pool->arenaindex];
struct arena_object *ao = &arenas[pool->arenaindex];
pool->nextpool = ao->freepools;
ao->freepools = pool;
nf = ao->nfreepools;
uint nf = ao->nfreepools;
/* If this is the rightmost arena with this number of free pools,
* nfp2lasta[nf] needs to change. Caution: if nf is 0, there
* are no arenas in usable_arenas with that value.
......@@ -1826,7 +1779,7 @@ pymalloc_free(void *ctx, void *p)
ao->address = 0; /* mark unassociated */
--narenas_currently_allocated;
goto success;
return;
}
if (nf == 1) {
......@@ -1845,7 +1798,7 @@ pymalloc_free(void *ctx, void *p)
nfp2lasta[1] = ao;
}
goto success;
return;
}
/* If this arena is now out of order, we need to keep
......@@ -1862,7 +1815,7 @@ pymalloc_free(void *ctx, void *p)
/* If this was the rightmost of the old size, it remains in place. */
if (ao == lastnf) {
/* Case 4. Nothing to do. */
goto success;
return;
}
/* If ao were the only arena in the list, the last block would have
* gotten us out.
......@@ -1898,10 +1851,65 @@ pymalloc_free(void *ctx, void *p)
assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
assert((usable_arenas == ao && ao->prevarena == NULL)
|| ao->prevarena->nextarena == ao);
}
/* Free a memory block allocated by pymalloc_alloc().
Return 1 if it was freed.
Return 0 if the block was not allocated by pymalloc_alloc(). */
static inline int
pymalloc_free(void *ctx, void *p)
{
assert(p != NULL);
#ifdef WITH_VALGRIND
if (UNLIKELY(running_on_valgrind > 0)) {
return 0;
}
#endif
poolp pool = POOL_ADDR(p);
if (UNLIKELY(!address_in_range(p, pool))) {
return 0;
}
/* We allocated this address. */
/* Link p to the start of the pool's freeblock list. Since
* the pool had at least the p block outstanding, the pool
* wasn't empty (so it's already in a usedpools[] list, or
* was full and is in no list -- it's not in the freeblocks
* list in any case).
*/
assert(pool->ref.count > 0); /* else it was empty */
block *lastfree = pool->freeblock;
*(block **)p = lastfree;
pool->freeblock = (block *)p;
pool->ref.count--;
if (UNLIKELY(lastfree == NULL)) {
/* Pool was full, so doesn't currently live in any list:
* link it to the front of the appropriate usedpools[] list.
* This mimics LRU pool usage for new allocations and
* targets optimal filling when several pools contain
* blocks of the same size class.
*/
insert_to_usedpool(pool);
return 1;
}
goto success;
/* freeblock wasn't NULL, so the pool wasn't full,
* and the pool is in a usedpools[] list.
*/
if (LIKELY(pool->ref.count != 0)) {
/* pool isn't empty: leave it in usedpools */
return 1;
}
success:
/* Pool is now empty: unlink from usedpools, and
* link to the front of freepools. This ensures that
* previously freed pools will be allocated later
* (being not referenced, they are perhaps paged out).
*/
insert_to_freepool(pool);
return 1;
}
......@@ -1914,7 +1922,7 @@ _PyObject_Free(void *ctx, void *p)
return;
}
if (!pymalloc_free(ctx, p)) {
if (UNLIKELY(!pymalloc_free(ctx, p))) {
/* pymalloc didn't allocate this address */
PyMem_RawFree(p);
raw_allocated_blocks--;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment