Commit 919dd3df authored by Chris Toshok's avatar Chris Toshok

Add a third size class (between small/large) to the gc.

port over sgen's idea of LOSSections as a mid-sized arena, so that we now have:

SmallArena original non-large allocator, free bitmaps, segregated-fit allocator.
  handles objects where size <= 3584 bytes
LargeArena (new code, size-specific free lists.)
  handles object  where 3584 < size <= ~1 meg
HugeArena (original large allocator, 1 mmap per object.
  handles objects where size > ~1meg
parent 4afb0656
import time
PIDIGITS_LEN = 1500
def pidigits(length):
i = k = ns = 0
k1 = 1
n,a,d,t,u = 1,0,1,0,0
while(True):
k += 1
t = n<<1
n *= k
a += t
k1 += 2
a *= k1
d *= k1
if a >= n:
t,u = divmod(n*3 + a,d)
u += n
if d > u:
ns = ns*10 + t
i += 1
if i % 10 == 0:
ns = 0
if i >= length:
break
a -= d*t
a *= 10
n *= 10
def main(n):
l = []
for i in range(n):
t0 = time.time()
pidigits(PIDIGITS_LEN)
l.append(time.time() - t0)
return l
main(100)
......@@ -17,7 +17,6 @@
#include <cstdlib>
#include <cstring>
#include <stdint.h>
#include <sys/mman.h>
#include "core/common.h"
#include "core/util.h"
......@@ -34,6 +33,35 @@
namespace pyston {
namespace gc {
void _doFree(GCAllocation* al);
// these template functions are for both large and huge sections
template <class ListT> inline void unlinkNode(ListT* node) {
*node->prev = node->next;
if (node->next)
node->next->prev = node->prev;
}
template <class ListT, typename Free>
inline void sweepHeap(ListT* head, std::function<void(GCAllocation*)> __free, Free free_func) {
auto cur = head;
while (cur) {
GCAllocation* al = cur->data;
if (isMarked(al)) {
clearMark(al);
cur = cur->next;
} else {
__free(al);
unlinkNode(cur);
auto to_free = cur;
cur = cur->next;
free_func(to_free);
}
}
}
static unsigned bytesAllocatedSinceCollection;
static __thread unsigned thread_bytesAllocatedSinceCollection;
#define ALLOCBYTES_PER_COLLECTION 10000000
......@@ -64,73 +92,384 @@ void registerGCManagedBytes(size_t bytes) {
Heap global_heap;
#define PAGE_SIZE 4096
class Arena {
private:
void* start;
void* cur;
GCAllocation* SmallArena::realloc(GCAllocation* al, size_t bytes) {
Block* b = Block::forPointer(al);
size_t size = b->size;
if (size >= bytes && size < bytes * 2)
return al;
GCAllocation* rtn = heap->alloc(bytes);
#ifndef NVALGRIND
VALGRIND_DISABLE_ERROR_REPORTING;
memcpy(rtn, al, std::min(bytes, size));
VALGRIND_ENABLE_ERROR_REPORTING;
#else
memcpy(rtn, al, std::min(bytes, size));
#endif
_free(al, b);
return rtn;
}
GCAllocation* SmallArena::allocationFrom(void* ptr) {
Block* b = Block::forPointer(ptr);
size_t size = b->size;
int offset = (char*)ptr - (char*)b;
int obj_idx = offset / size;
if (obj_idx < b->minObjIndex() || obj_idx >= b->numObjects())
return NULL;
int atom_idx = obj_idx * b->atomsPerObj();
if (b->isfree.isSet(atom_idx))
return NULL;
return reinterpret_cast<GCAllocation*>(&b->atoms[atom_idx]);
}
SmallArena::Block** SmallArena::freeChain(Block** head) {
while (Block* b = *head) {
int num_objects = b->numObjects();
int first_obj = b->minObjIndex();
int atoms_per_obj = b->atomsPerObj();
public:
constexpr Arena(void* start) : start(start), cur(start) {}
for (int obj_idx = first_obj; obj_idx < num_objects; obj_idx++) {
int atom_idx = obj_idx * atoms_per_obj;
void* doMmap(size_t size) {
assert(size % PAGE_SIZE == 0);
// printf("mmap %ld\n", size);
if (b->isfree.isSet(atom_idx))
continue;
void* p = &b->atoms[atom_idx];
GCAllocation* al = reinterpret_cast<GCAllocation*>(p);
void* mrtn = mmap(cur, size, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
assert((uintptr_t)mrtn != -1 && "failed to allocate memory from OS");
ASSERT(mrtn == cur, "%p %p\n", mrtn, cur);
cur = (uint8_t*)cur + size;
return mrtn;
if (isMarked(al)) {
clearMark(al);
} else {
_doFree(al);
// assert(p != (void*)0x127000d960); // the main module
b->isfree.set(atom_idx);
}
}
bool contains(void* addr) { return start <= addr && addr < cur; }
};
head = &b->next;
}
return head;
}
static Arena small_arena((void*)0x1270000000L);
static Arena large_arena((void*)0x2270000000L);
struct LargeObj {
LargeObj* next, **prev;
size_t obj_size;
GCAllocation data[0];
void SmallArena::freeUnmarked() {
thread_caches.forEachValue([this](ThreadBlockCache* cache) {
for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) {
Block* h = cache->cache_free_heads[bidx];
// Try to limit the amount of unused memory a thread can hold onto;
// currently pretty dumb, just limit the number of blocks in the free-list
// to 50. (blocks in the full list don't need to be limited, since we're sure
// that the thread had just actively used those.)
// Eventually may want to come up with some scrounging system.
// TODO does this thread locality even help at all?
for (int i = 0; i < 50; i++) {
if (h)
h = h->next;
else
break;
}
if (h) {
removeFromLL(h);
insertIntoLL(&heads[bidx], h);
}
int mmap_size() {
size_t total_size = obj_size + sizeof(LargeObj);
total_size = (total_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
return total_size;
Block** chain_end = freeChain(&cache->cache_free_heads[bidx]);
freeChain(&cache->cache_full_heads[bidx]);
while (Block* b = cache->cache_full_heads[bidx]) {
removeFromLL(b);
insertIntoLL(chain_end, b);
}
}
});
int capacity() { return mmap_size() - sizeof(LargeObj); }
for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) {
Block** chain_end = freeChain(&heads[bidx]);
freeChain(&full_heads[bidx]);
static LargeObj* fromAllocation(GCAllocation* alloc) {
char* rtn = (char*)alloc - offsetof(LargeObj, data);
assert((uintptr_t)rtn % PAGE_SIZE == 0);
return reinterpret_cast<LargeObj*>(rtn);
while (Block* b = full_heads[bidx]) {
removeFromLL(b);
insertIntoLL(chain_end, b);
}
};
}
}
#define LARGE_BLOCK_NUM_CHUNKS ((BLOCK_SIZE >> CHUNK_BITS) - 1)
#define LARGE_BLOCK_FOR_OBJ(obj) ((LargeBlock*)((int64_t)(obj) & ~(int64_t)(BLOCK_SIZE - 1)))
#define LARGE_CHUNK_INDEX(obj, section) (((char*)(obj) - (char*)(section)) >> CHUNK_BITS)
int64_t los_memory_usage = 0;
static int64_t large_object_count = 0;
static int large_block_count = 0;
void LargeArena::add_free_chunk(LargeFreeChunk* free_chunks, size_t size) {
size_t num_chunks = size >> CHUNK_BITS;
free_chunks->size = size;
if (num_chunks >= NUM_FREE_LISTS)
num_chunks = 0;
free_chunks->next_size = free_lists[num_chunks];
free_lists[num_chunks] = free_chunks;
}
LargeArena::LargeFreeChunk* LargeArena::get_from_size_list(LargeFreeChunk** list, size_t size) {
LargeFreeChunk* free_chunks = NULL;
LargeBlock* section;
size_t i, num_chunks, start_index;
assert((size & (CHUNK_SIZE - 1)) == 0);
while (*list) {
free_chunks = *list;
if (free_chunks->size >= size)
break;
list = &(*list)->next_size;
}
if (!*list)
return NULL;
*list = free_chunks->next_size;
if (free_chunks->size > size)
add_free_chunk((LargeFreeChunk*)((char*)free_chunks + size), free_chunks->size - size);
num_chunks = size >> CHUNK_BITS;
section = LARGE_BLOCK_FOR_OBJ(free_chunks);
start_index = LARGE_CHUNK_INDEX(free_chunks, section);
for (i = start_index; i < start_index + num_chunks; ++i) {
assert(section->free_chunk_map[i]);
section->free_chunk_map[i] = 0;
}
section->num_free_chunks -= size >> CHUNK_BITS;
assert(section->num_free_chunks >= 0);
return free_chunks;
}
LargeArena::LargeObj* LargeArena::_allocInternal(size_t size) {
LargeBlock* section;
LargeFreeChunk* free_chunks;
size_t num_chunks;
size += CHUNK_SIZE - 1;
size &= ~(CHUNK_SIZE - 1);
num_chunks = size >> CHUNK_BITS;
assert(size > 0 && size - sizeof(LargeObj) <= ALLOC_SIZE_LIMIT);
assert(num_chunks > 0);
retry:
if (num_chunks >= NUM_FREE_LISTS) {
free_chunks = get_from_size_list(&free_lists[0], size);
} else {
size_t i;
for (i = num_chunks; i < NUM_FREE_LISTS; ++i) {
free_chunks = get_from_size_list(&free_lists[i], size);
if (free_chunks)
break;
}
if (!free_chunks)
free_chunks = get_from_size_list(&free_lists[0], size);
}
if (free_chunks)
return (LargeObj*)free_chunks;
section = (LargeBlock*)doMmap(BLOCK_SIZE);
if (!section)
return NULL;
GCAllocation* Heap::allocLarge(size_t size) {
free_chunks = (LargeFreeChunk*)((char*)section + CHUNK_SIZE);
free_chunks->size = BLOCK_SIZE - CHUNK_SIZE;
free_chunks->next_size = free_lists[0];
free_lists[0] = free_chunks;
section->num_free_chunks = LARGE_BLOCK_NUM_CHUNKS;
section->free_chunk_map = (unsigned char*)section + sizeof(LargeBlock);
assert(sizeof(LargeBlock) + LARGE_BLOCK_NUM_CHUNKS + 1 <= CHUNK_SIZE);
section->free_chunk_map[0] = 0;
memset(section->free_chunk_map + 1, 1, LARGE_BLOCK_NUM_CHUNKS);
section->next = blocks;
blocks = section;
++large_block_count;
goto retry;
}
void LargeArena::_freeInternal(LargeObj* obj, size_t size) {
LargeBlock* section = LARGE_BLOCK_FOR_OBJ(obj);
size_t num_chunks, i, start_index;
size += CHUNK_SIZE - 1;
size &= ~(CHUNK_SIZE - 1);
num_chunks = size >> CHUNK_BITS;
assert(size > 0 && size - sizeof(LargeObj) <= ALLOC_SIZE_LIMIT);
assert(num_chunks > 0);
section->num_free_chunks += num_chunks;
assert(section->num_free_chunks <= LARGE_BLOCK_NUM_CHUNKS);
/*
* We could free the LOS section here if it's empty, but we
* can't unless we also remove its free chunks from the fast
* free lists. Instead, we do it in los_sweep().
*/
start_index = LARGE_CHUNK_INDEX(obj, section);
for (i = start_index; i < start_index + num_chunks; ++i) {
assert(!section->free_chunk_map[i]);
section->free_chunk_map[i] = 1;
}
add_free_chunk((LargeFreeChunk*)obj, size);
}
void LargeArena::_free(LargeObj* obj) {
unlinkNode(obj);
_freeInternal(obj, obj->size);
}
void LargeArena::freeUnmarked() {
sweepHeap(head, _doFree, [this](LargeObj* ptr) { _freeInternal(ptr, ptr->size); });
}
GCAllocation* LargeArena::alloc(size_t size) {
registerGCManagedBytes(size);
LOCK_REGION(heap->lock);
// printf ("allocLarge %zu\n", size);
LargeObj* obj = _allocInternal(size + sizeof(GCAllocation) + sizeof(LargeObj));
obj->size = size;
obj->next = head;
if (obj->next)
obj->next->prev = &obj->next;
obj->prev = &head;
head = obj;
large_object_count++;
return obj->data;
}
GCAllocation* LargeArena::realloc(GCAllocation* al, size_t bytes) {
LargeObj* obj = (LargeObj*)((char*)al - offsetof(LargeObj, data));
int size = obj->size;
if (size >= bytes && size < bytes * 2)
return al;
GCAllocation* rtn = heap->alloc(bytes);
memcpy(rtn, al, std::min(bytes, obj->size));
_free(obj);
return rtn;
}
void LargeArena::free(GCAllocation* al) {
LargeObj* obj = (LargeObj*)((char*)al - offsetof(LargeObj, data));
_free(obj);
}
GCAllocation* LargeArena::allocationFrom(void* ptr) {
LargeObj* obj = NULL;
for (obj = head; obj; obj = obj->next) {
char* end = (char*)&obj->data + obj->size;
if (ptr >= obj->data && ptr < end) {
return &obj->data[0];
}
}
return NULL;
}
void HugeArena::freeUnmarked() {
sweepHeap(head, _doFree, [this](HugeObj* ptr) { _freeHugeObj(ptr); });
}
GCAllocation* HugeArena::alloc(size_t size) {
registerGCManagedBytes(size);
LOCK_REGION(lock);
LOCK_REGION(heap->lock);
size_t total_size = size + sizeof(LargeObj);
size_t total_size = size + sizeof(HugeObj);
total_size = (total_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
LargeObj* rtn = (LargeObj*)large_arena.doMmap(total_size);
HugeObj* rtn = (HugeObj*)doMmap(total_size);
rtn->obj_size = size;
rtn->next = large_head;
rtn->next = head;
if (rtn->next)
rtn->next->prev = &rtn->next;
rtn->prev = &large_head;
large_head = rtn;
rtn->prev = &head;
head = rtn;
return rtn->data;
}
static Block* alloc_block(uint64_t size, Block** prev) {
Block* rtn = (Block*)small_arena.doMmap(sizeof(Block));
GCAllocation* HugeArena::realloc(GCAllocation* al, size_t bytes) {
HugeObj* lobj = HugeObj::fromAllocation(al);
int capacity = lobj->capacity();
if (capacity >= bytes && capacity < bytes * 2)
return al;
GCAllocation* rtn = heap->alloc(bytes);
memcpy(rtn, al, std::min(bytes, lobj->obj_size));
_freeHugeObj(lobj);
return rtn;
}
void HugeArena::_freeHugeObj(HugeObj* lobj) {
unlinkNode(lobj);
int r = munmap(lobj, lobj->mmap_size());
assert(r == 0);
}
void HugeArena::free(GCAllocation* al) {
HugeObj* lobj = HugeObj::fromAllocation(al);
_freeHugeObj(lobj);
}
GCAllocation* HugeArena::allocationFrom(void* ptr) {
HugeObj* cur = head;
while (cur) {
if (ptr >= cur && ptr < &cur->data[cur->obj_size])
return &cur->data[0];
cur = cur->next;
}
return NULL;
}
SmallArena::Block* SmallArena::alloc_block(uint64_t size, Block** prev) {
Block* rtn = (Block*)doMmap(sizeof(Block));
assert(rtn);
rtn->size = size;
rtn->num_obj = BLOCK_SIZE / size;
......@@ -165,7 +504,7 @@ static Block* alloc_block(uint64_t size, Block** prev) {
return rtn;
}
static void insertIntoLL(Block** next_pointer, Block* next) {
void SmallArena::insertIntoLL(Block** next_pointer, Block* next) {
assert(next_pointer);
assert(next);
assert(!next->next);
......@@ -178,32 +517,29 @@ static void insertIntoLL(Block** next_pointer, Block* next) {
next->prev = next_pointer;
}
static void removeFromLL(Block* b) {
if (b->next)
b->next->prev = b->prev;
*b->prev = b->next;
void SmallArena::removeFromLL(Block* b) {
unlinkNode(b);
b->next = NULL;
b->prev = NULL;
}
Heap::ThreadBlockCache::~ThreadBlockCache() {
SmallArena::ThreadBlockCache::~ThreadBlockCache() {
LOCK_REGION(heap->lock);
for (int i = 0; i < NUM_BUCKETS; i++) {
while (Block* b = cache_free_heads[i]) {
removeFromLL(b);
insertIntoLL(&heap->heads[i], b);
small->removeFromLL(b);
small->insertIntoLL(&small->heads[i], b);
}
while (Block* b = cache_full_heads[i]) {
removeFromLL(b);
insertIntoLL(&heap->full_heads[i], b);
small->removeFromLL(b);
small->insertIntoLL(&small->full_heads[i], b);
}
}
}
static GCAllocation* allocFromBlock(Block* b) {
GCAllocation* SmallArena::allocFromBlock(Block* b) {
int idx = b->isfree.scanForNext(b->next_to_check);
if (idx == -1)
return NULL;
......@@ -212,7 +548,7 @@ static GCAllocation* allocFromBlock(Block* b) {
return reinterpret_cast<GCAllocation*>(rtn);
}
static Block* claimBlock(size_t rounded_size, Block** free_head) {
SmallArena::Block* SmallArena::claimBlock(size_t rounded_size, Block** free_head) {
Block* free_block = *free_head;
if (free_block) {
removeFromLL(free_block);
......@@ -222,7 +558,7 @@ static Block* claimBlock(size_t rounded_size, Block** free_head) {
return alloc_block(rounded_size, NULL);
}
GCAllocation* Heap::allocSmall(size_t rounded_size, int bucket_idx) {
GCAllocation* SmallArena::_alloc(size_t rounded_size, int bucket_idx) {
registerGCManagedBytes(rounded_size);
Block** free_head = &heads[bucket_idx];
......@@ -253,7 +589,7 @@ GCAllocation* Heap::allocSmall(size_t rounded_size, int bucket_idx) {
// static StatCounter sc_fallback("gc_allocs_cachemiss");
// sc_fallback.log();
LOCK_REGION(lock);
LOCK_REGION(heap->lock);
assert(*cache_head == NULL);
......@@ -269,7 +605,7 @@ GCAllocation* Heap::allocSmall(size_t rounded_size, int bucket_idx) {
}
}
void _freeFrom(GCAllocation* alloc, Block* b) {
void SmallArena::_free(GCAllocation* alloc, Block* b) {
assert(b == Block::forPointer(alloc));
size_t size = b->size;
......@@ -285,16 +621,7 @@ void _freeFrom(GCAllocation* alloc, Block* b) {
#endif
}
static void _freeLargeObj(LargeObj* lobj) {
*lobj->prev = lobj->next;
if (lobj->next)
lobj->next->prev = lobj->prev;
int r = munmap(lobj, lobj->mmap_size());
assert(r == 0);
}
static void _doFree(GCAllocation* al) {
void _doFree(GCAllocation* al) {
if (VERBOSITY() >= 2)
printf("Freeing %p\n", al->user_data);
......@@ -307,178 +634,8 @@ static void _doFree(GCAllocation* al) {
}
}
void Heap::free(GCAllocation* al) {
_doFree(al);
if (large_arena.contains(al)) {
LargeObj* lobj = LargeObj::fromAllocation(al);
_freeLargeObj(lobj);
return;
}
assert(small_arena.contains(al));
Block* b = Block::forPointer(al);
_freeFrom(al, b);
}
GCAllocation* Heap::realloc(GCAllocation* al, size_t bytes) {
if (large_arena.contains(al)) {
LargeObj* lobj = LargeObj::fromAllocation(al);
int capacity = lobj->capacity();
if (capacity >= bytes && capacity < bytes * 2)
return al;
GCAllocation* rtn = alloc(bytes);
memcpy(rtn, al, std::min(bytes, lobj->obj_size));
_freeLargeObj(lobj);
return rtn;
}
assert(small_arena.contains(al));
Block* b = Block::forPointer(al);
size_t size = b->size;
if (size >= bytes && size < bytes * 2)
return al;
GCAllocation* rtn = alloc(bytes);
#ifndef NVALGRIND
VALGRIND_DISABLE_ERROR_REPORTING;
memcpy(rtn, al, std::min(bytes, size));
VALGRIND_ENABLE_ERROR_REPORTING;
#else
memcpy(rtn, al, std::min(bytes, size));
#endif
_freeFrom(al, b);
return rtn;
}
GCAllocation* Heap::getAllocationFromInteriorPointer(void* ptr) {
if (large_arena.contains(ptr)) {
LargeObj* cur = large_head;
while (cur) {
if (ptr >= cur && ptr < &cur->data[cur->obj_size])
return &cur->data[0];
cur = cur->next;
}
return NULL;
}
if (!small_arena.contains(ptr))
return NULL;
Block* b = Block::forPointer(ptr);
size_t size = b->size;
int offset = (char*)ptr - (char*)b;
int obj_idx = offset / size;
if (obj_idx < b->minObjIndex() || obj_idx >= b->numObjects())
return NULL;
int atom_idx = obj_idx * b->atomsPerObj();
if (b->isfree.isSet(atom_idx))
return NULL;
return reinterpret_cast<GCAllocation*>(&b->atoms[atom_idx]);
}
static Block** freeChain(Block** head) {
while (Block* b = *head) {
int num_objects = b->numObjects();
int first_obj = b->minObjIndex();
int atoms_per_obj = b->atomsPerObj();
for (int obj_idx = first_obj; obj_idx < num_objects; obj_idx++) {
int atom_idx = obj_idx * atoms_per_obj;
if (b->isfree.isSet(atom_idx))
continue;
void* p = &b->atoms[atom_idx];
GCAllocation* al = reinterpret_cast<GCAllocation*>(p);
if (isMarked(al)) {
clearMark(al);
} else {
_doFree(al);
// assert(p != (void*)0x127000d960); // the main module
b->isfree.set(atom_idx);
}
}
head = &b->next;
}
return head;
}
void Heap::freeUnmarked() {
thread_caches.forEachValue([this](ThreadBlockCache* cache) {
for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) {
Block* h = cache->cache_free_heads[bidx];
// Try to limit the amount of unused memory a thread can hold onto;
// currently pretty dumb, just limit the number of blocks in the free-list
// to 50. (blocks in the full list don't need to be limited, since we're sure
// that the thread had just actively used those.)
// Eventually may want to come up with some scrounging system.
// TODO does this thread locality even help at all?
for (int i = 0; i < 50; i++) {
if (h)
h = h->next;
else
break;
}
if (h) {
removeFromLL(h);
insertIntoLL(&heads[bidx], h);
}
Block** chain_end = freeChain(&cache->cache_free_heads[bidx]);
freeChain(&cache->cache_full_heads[bidx]);
while (Block* b = cache->cache_full_heads[bidx]) {
removeFromLL(b);
insertIntoLL(chain_end, b);
}
}
});
for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) {
Block** chain_end = freeChain(&heads[bidx]);
freeChain(&full_heads[bidx]);
while (Block* b = full_heads[bidx]) {
removeFromLL(b);
insertIntoLL(chain_end, b);
}
}
LargeObj* cur = large_head;
while (cur) {
GCAllocation* al = cur->data;
if (isMarked(al)) {
clearMark(al);
} else {
void Heap::destroyContents(GCAllocation* al) {
_doFree(al);
*cur->prev = cur->next;
if (cur->next)
cur->next->prev = cur->prev;
LargeObj* to_free = cur;
cur = cur->next;
_freeLargeObj(to_free);
continue;
}
cur = cur->next;
}
}
void dumpHeapStatistics() {
......@@ -527,7 +684,7 @@ void addStatistic(HeapStatistics* stats, GCAllocation* al, int nbytes) {
}
// TODO: copy-pasted from freeChain
void getChainStatistics(HeapStatistics* stats, Block** head) {
void SmallArena::getChainStatistics(HeapStatistics* stats, Block** head) {
while (Block* b = *head) {
int num_objects = b->numObjects();
int first_obj = b->minObjIndex();
......@@ -550,32 +707,50 @@ void getChainStatistics(HeapStatistics* stats, Block** head) {
}
// TODO: copy-pasted from freeUnmarked()
void Heap::dumpHeapStatistics() {
threading::GLPromoteRegion _lock;
HeapStatistics stats;
thread_caches.forEachValue([this, &stats](ThreadBlockCache* cache) {
void SmallArena::getStatistics(HeapStatistics* stats) {
thread_caches.forEachValue([this, stats](ThreadBlockCache* cache) {
for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) {
Block* h = cache->cache_free_heads[bidx];
getChainStatistics(&stats, &cache->cache_free_heads[bidx]);
getChainStatistics(&stats, &cache->cache_full_heads[bidx]);
getChainStatistics(stats, &cache->cache_free_heads[bidx]);
getChainStatistics(stats, &cache->cache_full_heads[bidx]);
}
});
for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) {
getChainStatistics(&stats, &heads[bidx]);
getChainStatistics(&stats, &full_heads[bidx]);
getChainStatistics(stats, &heads[bidx]);
getChainStatistics(stats, &full_heads[bidx]);
}
}
LargeObj* cur = large_head;
void LargeArena::getStatistics(HeapStatistics* stats) {
LargeObj* cur = head;
while (cur) {
GCAllocation* al = cur->data;
addStatistic(&stats, al, cur->capacity());
addStatistic(stats, al, cur->size);
cur = cur->next;
}
}
void HugeArena::getStatistics(HeapStatistics* stats) {
HugeObj* cur = head;
while (cur) {
GCAllocation* al = cur->data;
addStatistic(stats, al, cur->capacity());
cur = cur->next;
}
}
void Heap::dumpHeapStatistics() {
threading::GLPromoteRegion _lock;
HeapStatistics stats;
small_arena.getStatistics(&stats);
large_arena.getStatistics(&stats);
huge_arena.getStatistics(&stats);
stats.conservative.print("conservative");
stats.untracked.print("untracked");
......
......@@ -17,6 +17,7 @@
#include <cstddef>
#include <cstdint>
#include <sys/mman.h>
#include "core/common.h"
#include "core/threading.h"
......@@ -24,6 +25,9 @@
namespace pyston {
namespace gc {
class Heap;
struct HeapStatistics;
typedef uint8_t kindid_t;
struct GCAllocation {
unsigned int gc_flags : 8;
......@@ -59,14 +63,57 @@ inline void clearMark(GCAllocation* header) {
#undef MARK_BIT
#define PAGE_SIZE 4096
template <int N> class Bitmap {
static_assert(N % 64 == 0, "");
template <uintptr_t start> class Arena {
private:
void* cur;
protected:
Arena() : cur((void*)start) {}
public:
void* doMmap(size_t size) {
assert(size % PAGE_SIZE == 0);
void* mrtn = mmap(cur, size, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
assert((uintptr_t)mrtn != -1 && "failed to allocate memory from OS");
ASSERT(mrtn == cur, "%p %p\n", mrtn, cur);
cur = (uint8_t*)cur + size;
return mrtn;
}
bool contains(void* addr) { return (void*)start <= addr && addr < cur; }
};
constexpr uintptr_t SMALL_ARENA_START = 0x1270000000L;
constexpr uintptr_t LARGE_ARENA_START = 0x2270000000L;
constexpr uintptr_t HUGE_ARENA_START = 0x3270000000L;
//
// The SmallArena allocates objects <= 3584 bytes.
//
// it uses segregated-fit allocation, and each block contains free
// bitmap for objects of a given size (assigned to the block)
//
static const size_t sizes[] = {
16, 32, 48, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384,
448, 512, 640, 768, 896, 1024, 1280, 1536, 1792, 2048, 2560, 3072, 3584, // 4096,
};
static constexpr size_t NUM_BUCKETS = sizeof(sizes) / sizeof(sizes[0]);
class SmallArena : public Arena<SMALL_ARENA_START> {
public:
private:
template <int N> class Bitmap {
static_assert(N % 64 == 0, "");
private:
uint64_t data[N / 64];
public:
public:
void setAllZero() { memset(data, 0, sizeof(data)); }
struct Scanner {
......@@ -113,25 +160,26 @@ public:
int idx = first + i * 64;
return idx;
}
};
};
#define BLOCK_SIZE (4 * 4096)
static constexpr size_t BLOCK_SIZE = 4 * 4096;
#define ATOM_SIZE 16
static_assert(BLOCK_SIZE % ATOM_SIZE == 0, "");
static_assert(BLOCK_SIZE % ATOM_SIZE == 0, "");
#define ATOMS_PER_BLOCK (BLOCK_SIZE / ATOM_SIZE)
static_assert(ATOMS_PER_BLOCK % 64 == 0, "");
static_assert(ATOMS_PER_BLOCK % 64 == 0, "");
#define BITFIELD_SIZE (ATOMS_PER_BLOCK / 8)
#define BITFIELD_ELTS (BITFIELD_SIZE / 8)
#define BLOCK_HEADER_SIZE (BITFIELD_SIZE + 4 * sizeof(void*))
#define BLOCK_HEADER_ATOMS ((BLOCK_HEADER_SIZE + ATOM_SIZE - 1) / ATOM_SIZE)
struct Atoms {
struct Atoms {
char _data[ATOM_SIZE];
};
};
struct Block {
struct Block {
union {
struct {
Block* next, **prev;
......@@ -153,79 +201,263 @@ struct Block {
inline int atomsPerObj() const { return atoms_per_obj; }
static Block* forPointer(void* ptr) { return (Block*)((uintptr_t)ptr & ~(BLOCK_SIZE - 1)); }
};
static_assert(sizeof(Block) == BLOCK_SIZE, "bad size");
static_assert(offsetof(Block, _header_end) >= BLOCK_HEADER_SIZE, "bad header size");
static_assert(offsetof(Block, _header_end) <= BLOCK_HEADER_SIZE, "bad header size");
constexpr const size_t sizes[] = {
16, 32, 48, 64, 80, 96, 112, 128, 160, 192, 224, 256,
320, 384, 448, 512, 640, 768, 896, 1024, 1280, 1536, 1792, 2048,
// 2560, 3072, 3584, // 4096,
};
#define NUM_BUCKETS (sizeof(sizes) / sizeof(sizes[0]))
struct LargeObj;
class Heap {
private:
Block* heads[NUM_BUCKETS];
Block* full_heads[NUM_BUCKETS];
LargeObj* large_head = NULL;
GCAllocation* __attribute__((__malloc__)) allocSmall(size_t rounded_size, int bucket_idx);
GCAllocation* __attribute__((__malloc__)) allocLarge(size_t bytes);
// DS_DEFINE_MUTEX(lock);
DS_DEFINE_SPINLOCK(lock);
};
static_assert(sizeof(Block) == BLOCK_SIZE, "bad size");
static_assert(offsetof(Block, _header_end) >= BLOCK_HEADER_SIZE, "bad header size");
static_assert(offsetof(Block, _header_end) <= BLOCK_HEADER_SIZE, "bad header size");
// forward (public) definition of ThreadBlockCache so we can reference it both in this class (privately) and in Heap
// (for a friend ref).
struct ThreadBlockCache {
Heap* heap;
SmallArena* small;
Block* cache_free_heads[NUM_BUCKETS];
Block* cache_full_heads[NUM_BUCKETS];
ThreadBlockCache(Heap* heap) : heap(heap) {
ThreadBlockCache(Heap* heap, SmallArena* small) : heap(heap), small(small) {
memset(cache_free_heads, 0, sizeof(cache_free_heads));
memset(cache_full_heads, 0, sizeof(cache_full_heads));
}
~ThreadBlockCache();
};
Block* heads[NUM_BUCKETS];
Block* full_heads[NUM_BUCKETS];
friend struct ThreadBlockCache;
Heap* heap;
// TODO only use thread caches if we're in GRWL mode?
threading::PerThreadSet<ThreadBlockCache, Heap*> thread_caches;
threading::PerThreadSet<ThreadBlockCache, Heap*, SmallArena*> thread_caches;
public:
Heap() : thread_caches(this) {}
GCAllocation* realloc(GCAllocation* alloc, size_t bytes);
Block* alloc_block(uint64_t size, Block** prev);
GCAllocation* allocFromBlock(Block* b);
Block* claimBlock(size_t rounded_size, Block** free_head);
void insertIntoLL(Block** next_pointer, Block* next);
void removeFromLL(Block* b);
Block** freeChain(Block** head);
void getChainStatistics(HeapStatistics* stats, Block** head);
GCAllocation* __attribute__((__malloc__)) _alloc(size_t bytes, int bucket_idx);
void _free(GCAllocation* al, Block* b);
public:
SmallArena(Heap* heap) : Arena(), heap(heap), thread_caches(heap, this) {}
GCAllocation* __attribute__((__malloc__)) alloc(size_t bytes) {
GCAllocation* rtn;
// assert(bytes >= 16);
if (bytes <= 16)
rtn = allocSmall(16, 0);
return _alloc(16, 0);
else if (bytes <= 32)
rtn = allocSmall(32, 1);
else if (bytes > sizes[NUM_BUCKETS - 1])
rtn = allocLarge(bytes);
return _alloc(32, 1);
else {
rtn = NULL;
for (int i = 2; i < NUM_BUCKETS; i++) {
if (sizes[i] >= bytes) {
rtn = allocSmall(sizes[i], i);
break;
return _alloc(sizes[i], i);
}
}
return NULL;
}
}
GCAllocation* realloc(GCAllocation* alloc, size_t bytes);
return rtn;
void free(GCAllocation* al) {
Block* b = Block::forPointer(al);
_free(al, b);
}
void getStatistics(HeapStatistics* stats);
GCAllocation* allocationFrom(void* ptr);
void freeUnmarked();
};
//
// The LargeArena allocates objects where 3584 < size <1024*1024 bytes.
//
// it maintains a set of size-segregated free lists, and a special
// free list for larger objects. If the free list specific to a given
// size has no entries, we search the large free list.
//
class LargeArena : public Arena<LARGE_ARENA_START> {
struct LargeFreeChunk {
LargeFreeChunk* next_size;
size_t size;
};
struct LargeBlock {
LargeBlock* next;
size_t num_free_chunks;
unsigned char* free_chunk_map;
};
struct LargeObj {
LargeObj* next, **prev;
size_t size;
GCAllocation data[0];
};
/*
* This shouldn't be much smaller or larger than the largest small size bucket.
* Must be at least sizeof (LargeBlock).
*/
static constexpr size_t CHUNK_SIZE = 4096;
static constexpr int CHUNK_BITS = 12;
static_assert(CHUNK_SIZE > sizeof(LargeBlock), "bad large block size");
static constexpr int BLOCK_SIZE = 1024 * 1024;
static constexpr int NUM_FREE_LISTS = 32;
void add_free_chunk(LargeFreeChunk* free_chunks, size_t size);
LargeFreeChunk* get_from_size_list(LargeFreeChunk** list, size_t size);
LargeObj* _allocInternal(size_t size);
void _freeInternal(LargeObj* obj, size_t size);
void _free(LargeObj* obj);
LargeObj* head;
LargeBlock* blocks;
LargeFreeChunk* free_lists[NUM_FREE_LISTS]; /* 0 is for larger sizes */
Heap* heap;
public:
LargeArena(Heap* heap) : head(NULL), blocks(NULL), heap(heap) {}
/* Largest object that can be allocated in a large block. */
static constexpr size_t ALLOC_SIZE_LIMIT = BLOCK_SIZE - CHUNK_SIZE - sizeof(LargeObj);
GCAllocation* __attribute__((__malloc__)) alloc(size_t bytes);
GCAllocation* realloc(GCAllocation* alloc, size_t bytes);
void free(GCAllocation* alloc);
void freeUnmarked();
GCAllocation* allocationFrom(void* ptr);
void getStatistics(HeapStatistics* stats);
};
// The HugeArena allocates objects where size > 1024*1024 bytes.
//
// Objects are allocated with individual mmap() calls, and kept in a
// linked list. They are not reused.
class HugeArena : public Arena<HUGE_ARENA_START> {
struct HugeObj {
HugeObj* next, **prev;
size_t obj_size;
GCAllocation data[0];
int mmap_size() {
size_t total_size = obj_size + sizeof(HugeObj);
total_size = (total_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
return total_size;
}
int capacity() { return mmap_size() - sizeof(HugeObj); }
static HugeObj* fromAllocation(GCAllocation* alloc) {
char* rtn = (char*)alloc - offsetof(HugeObj, data);
assert((uintptr_t)rtn % PAGE_SIZE == 0);
return reinterpret_cast<HugeObj*>(rtn);
}
};
void _freeHugeObj(HugeObj* lobj);
HugeObj* head;
Heap* heap;
public:
HugeArena(Heap* heap) : heap(heap) {}
GCAllocation* __attribute__((__malloc__)) alloc(size_t bytes);
GCAllocation* realloc(GCAllocation* alloc, size_t bytes);
void free(GCAllocation* alloc);
void freeUnmarked();
GCAllocation* allocationFrom(void* ptr);
void getStatistics(HeapStatistics* stats);
};
class Heap {
private:
SmallArena small_arena;
LargeArena large_arena;
HugeArena huge_arena;
friend class SmallArena;
friend class LargeArena;
friend class HugeArena;
// DS_DEFINE_MUTEX(lock);
DS_DEFINE_SPINLOCK(lock);
public:
Heap() : small_arena(this), large_arena(this), huge_arena(this) {}
GCAllocation* realloc(GCAllocation* alloc, size_t bytes) {
if (large_arena.contains(alloc)) {
return large_arena.realloc(alloc, bytes);
} else if (huge_arena.contains(alloc)) {
return huge_arena.realloc(alloc, bytes);
}
assert(small_arena.contains(alloc));
return small_arena.realloc(alloc, bytes);
}
GCAllocation* __attribute__((__malloc__)) alloc(size_t bytes) {
if (bytes > LargeArena::ALLOC_SIZE_LIMIT)
return huge_arena.alloc(bytes);
else if (bytes > sizes[NUM_BUCKETS - 1])
return large_arena.alloc(bytes);
else
return small_arena.alloc(bytes);
}
void destroyContents(GCAllocation* alloc);
void free(GCAllocation* alloc) {
destroyContents(alloc);
if (large_arena.contains(alloc)) {
large_arena.free(alloc);
return;
}
if (huge_arena.contains(alloc)) {
huge_arena.free(alloc);
return;
}
assert(small_arena.contains(alloc));
small_arena.free(alloc);
}
// not thread safe:
GCAllocation* getAllocationFromInteriorPointer(void* ptr);
GCAllocation* getAllocationFromInteriorPointer(void* ptr) {
if (large_arena.contains(ptr)) {
return large_arena.allocationFrom(ptr);
} else if (huge_arena.contains(ptr)) {
return huge_arena.allocationFrom(ptr);
} else if (small_arena.contains(ptr)) {
return small_arena.allocationFrom(ptr);
}
return NULL;
}
// not thread safe:
void freeUnmarked();
void freeUnmarked() {
small_arena.freeUnmarked();
large_arena.freeUnmarked();
huge_arena.freeUnmarked();
}
void dumpHeapStatistics();
};
......
......@@ -68,6 +68,9 @@ TEST(alloc, alloc64) { testAlloc(64); }
TEST(alloc, alloc128) { testAlloc(128); }
TEST(alloc, alloc258) { testAlloc(258); }
TEST(alloc, alloc3584) { testAlloc(3584); }
TEST(alloc, alloc4096) { testAlloc(4096); }
TEST(alloc, alloc8192) { testAlloc(8192); }
TEST(alloc, alloc16384) { testAlloc(16384); }
TEST(alloc, largeallocs) {
int s1 = 1 << 20;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment