Commit 7cd29d79 authored by Chris Toshok's avatar Chris Toshok

mucho cleanup

parent 1a268096
......@@ -35,15 +35,40 @@ namespace gc {
void _doFree(GCAllocation* al);
// these template functions are for both large and huge sections
template <class ListT> inline void unlinkNode(ListT* node) {
// lots of linked lists around here, so let's just use template functions for them all
template <class ListT> inline void nullNextPrev(ListT* node) {
node->next = NULL;
node->prev = NULL;
}
template <class ListT> inline void removeFromLL(ListT* node) {
*node->prev = node->next;
if (node->next)
node->next->prev = node->prev;
}
template <class ListT> inline void removeFromLLAndNull(ListT* node) {
*node->prev = node->next;
if (node->next)
node->next->prev = node->prev;
nullNextPrev(node);
}
template <class ListT> inline void insertIntoLL(ListT** next_pointer, ListT* next) {
assert(next_pointer);
assert(next);
assert(!next->next);
assert(!next->prev);
next->next = *next_pointer;
if (next->next)
next->next->prev = &next->next;
*next_pointer = next;
next->prev = next_pointer;
}
template <class ListT, typename Free>
inline void sweepHeap(ListT* head, std::function<void(GCAllocation*)> __free, Free free_func) {
template <class ListT, typename Free> inline void sweepList(ListT* head, Free free_func) {
auto cur = head;
while (cur) {
GCAllocation* al = cur->data;
......@@ -51,9 +76,9 @@ inline void sweepHeap(ListT* head, std::function<void(GCAllocation*)> __free, Fr
clearMark(al);
cur = cur->next;
} else {
__free(al);
_doFree(al);
unlinkNode(cur);
removeFromLL(cur);
auto to_free = cur;
cur = cur->next;
......@@ -92,6 +117,121 @@ void registerGCManagedBytes(size_t bytes) {
Heap global_heap;
void _doFree(GCAllocation* al) {
if (VERBOSITY() >= 2)
printf("Freeing %p\n", al->user_data);
#ifndef NVALGRIND
VALGRIND_DISABLE_ERROR_REPORTING;
#endif
GCKind alloc_kind = al->kind_id;
#ifndef NVALGRIND
VALGRIND_ENABLE_ERROR_REPORTING;
#endif
if (alloc_kind == GCKind::PYTHON) {
#ifndef NVALGRIND
VALGRIND_DISABLE_ERROR_REPORTING;
#endif
Box* b = (Box*)al->user_data;
#ifndef NVALGRIND
VALGRIND_ENABLE_ERROR_REPORTING;
#endif
ASSERT(b->cls->tp_dealloc == NULL, "%s", getTypeName(b));
if (b->cls->simple_destructor)
b->cls->simple_destructor(b);
}
}
void Heap::destructContents(GCAllocation* al) {
_doFree(al);
}
struct HeapStatistics {
struct TypeStats {
int64_t nallocs;
int64_t nbytes;
TypeStats() : nallocs(0), nbytes(0) {}
void print(const char* name) const {
if (nbytes > (1 << 20))
printf("%s: %ld allocations for %.1f MB\n", name, nallocs, nbytes * 1.0 / (1 << 20));
else if (nbytes > (1 << 10))
printf("%s: %ld allocations for %.1f KB\n", name, nallocs, nbytes * 1.0 / (1 << 10));
else
printf("%s: %ld allocations for %ld bytes\n", name, nallocs, nbytes);
}
};
std::unordered_map<BoxedClass*, TypeStats> by_cls;
TypeStats conservative, untracked;
TypeStats total;
};
void addStatistic(HeapStatistics* stats, GCAllocation* al, int nbytes) {
stats->total.nallocs++;
stats->total.nbytes += nbytes;
if (al->kind_id == GCKind::PYTHON) {
Box* b = (Box*)al->user_data;
auto& t = stats->by_cls[b->cls];
t.nallocs++;
t.nbytes += nbytes;
} else if (al->kind_id == GCKind::CONSERVATIVE) {
stats->conservative.nallocs++;
stats->conservative.nbytes += nbytes;
} else if (al->kind_id == GCKind::UNTRACKED) {
stats->untracked.nallocs++;
stats->untracked.nbytes += nbytes;
} else {
RELEASE_ASSERT(0, "%d", (int)al->kind_id);
}
}
void Heap::dumpHeapStatistics() {
threading::GLPromoteRegion _lock;
HeapStatistics stats;
small_arena.getStatistics(&stats);
large_arena.getStatistics(&stats);
huge_arena.getStatistics(&stats);
stats.conservative.print("conservative");
stats.untracked.print("untracked");
for (const auto& p : stats.by_cls) {
p.second.print(getFullNameOfClass(p.first).c_str());
}
stats.total.print("Total");
printf("\n");
}
void dumpHeapStatistics() {
global_heap.dumpHeapStatistics();
}
//////
/// Small Arena
GCAllocation* SmallArena::alloc(size_t bytes) {
registerGCManagedBytes(bytes);
if (bytes <= 16)
return _alloc(16, 0);
else if (bytes <= 32)
return _alloc(32, 1);
else {
for (int i = 2; i < NUM_BUCKETS; i++) {
if (sizes[i] >= bytes) {
return _alloc(sizes[i], i);
}
}
return NULL;
}
}
GCAllocation* SmallArena::realloc(GCAllocation* al, size_t bytes) {
Block* b = Block::forPointer(al);
......@@ -110,10 +250,25 @@ GCAllocation* SmallArena::realloc(GCAllocation* al, size_t bytes) {
memcpy(rtn, al, std::min(bytes, size));
#endif
_free(al, b);
free(al);
return rtn;
}
void SmallArena::free(GCAllocation* alloc) {
Block* b = Block::forPointer(alloc);
size_t size = b->size;
int offset = (char*)alloc - (char*)b;
assert(offset % size == 0);
int atom_idx = offset / ATOM_SIZE;
assert(!b->isfree.isSet(atom_idx));
b->isfree.set(atom_idx);
#ifndef NVALGRIND
// VALGRIND_MEMPOOL_FREE(b, ptr);
#endif
}
GCAllocation* SmallArena::allocationFrom(void* ptr) {
Block* b = Block::forPointer(ptr);
size_t size = b->size;
......@@ -131,37 +286,6 @@ GCAllocation* SmallArena::allocationFrom(void* ptr) {
return reinterpret_cast<GCAllocation*>(&b->atoms[atom_idx]);
}
SmallArena::Block** SmallArena::freeChain(Block** head) {
while (Block* b = *head) {
int num_objects = b->numObjects();
int first_obj = b->minObjIndex();
int atoms_per_obj = b->atomsPerObj();
for (int obj_idx = first_obj; obj_idx < num_objects; obj_idx++) {
int atom_idx = obj_idx * atoms_per_obj;
if (b->isfree.isSet(atom_idx))
continue;
void* p = &b->atoms[atom_idx];
GCAllocation* al = reinterpret_cast<GCAllocation*>(p);
if (isMarked(al)) {
clearMark(al);
} else {
_doFree(al);
// assert(p != (void*)0x127000d960); // the main module
b->isfree.set(atom_idx);
}
}
head = &b->next;
}
return head;
}
void SmallArena::freeUnmarked() {
thread_caches.forEachValue([this](ThreadBlockCache* cache) {
for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) {
......@@ -179,184 +303,226 @@ void SmallArena::freeUnmarked() {
break;
}
if (h) {
removeFromLL(h);
removeFromLLAndNull(h);
insertIntoLL(&heads[bidx], h);
}
Block** chain_end = freeChain(&cache->cache_free_heads[bidx]);
freeChain(&cache->cache_full_heads[bidx]);
Block** chain_end = _freeChain(&cache->cache_free_heads[bidx]);
_freeChain(&cache->cache_full_heads[bidx]);
while (Block* b = cache->cache_full_heads[bidx]) {
removeFromLL(b);
removeFromLLAndNull(b);
insertIntoLL(chain_end, b);
}
}
});
for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) {
Block** chain_end = freeChain(&heads[bidx]);
freeChain(&full_heads[bidx]);
Block** chain_end = _freeChain(&heads[bidx]);
_freeChain(&full_heads[bidx]);
while (Block* b = full_heads[bidx]) {
removeFromLL(b);
removeFromLLAndNull(b);
insertIntoLL(chain_end, b);
}
}
}
// TODO: copy-pasted from freeUnmarked()
void SmallArena::getStatistics(HeapStatistics* stats) {
thread_caches.forEachValue([this, stats](ThreadBlockCache* cache) {
for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) {
Block* h = cache->cache_free_heads[bidx];
#define LARGE_BLOCK_NUM_CHUNKS ((BLOCK_SIZE >> CHUNK_BITS) - 1)
_getChainStatistics(stats, &cache->cache_free_heads[bidx]);
_getChainStatistics(stats, &cache->cache_full_heads[bidx]);
}
});
#define LARGE_BLOCK_FOR_OBJ(obj) ((LargeBlock*)((int64_t)(obj) & ~(int64_t)(BLOCK_SIZE - 1)))
#define LARGE_CHUNK_INDEX(obj, section) (((char*)(obj) - (char*)(section)) >> CHUNK_BITS)
for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) {
_getChainStatistics(stats, &heads[bidx]);
_getChainStatistics(stats, &full_heads[bidx]);
}
}
int64_t los_memory_usage = 0;
static int64_t large_object_count = 0;
static int large_block_count = 0;
SmallArena::Block** SmallArena::_freeChain(Block** head) {
while (Block* b = *head) {
int num_objects = b->numObjects();
int first_obj = b->minObjIndex();
int atoms_per_obj = b->atomsPerObj();
void LargeArena::add_free_chunk(LargeFreeChunk* free_chunks, size_t size) {
size_t num_chunks = size >> CHUNK_BITS;
for (int obj_idx = first_obj; obj_idx < num_objects; obj_idx++) {
int atom_idx = obj_idx * atoms_per_obj;
free_chunks->size = size;
if (b->isfree.isSet(atom_idx))
continue;
if (num_chunks >= NUM_FREE_LISTS)
num_chunks = 0;
free_chunks->next_size = free_lists[num_chunks];
free_lists[num_chunks] = free_chunks;
}
void* p = &b->atoms[atom_idx];
GCAllocation* al = reinterpret_cast<GCAllocation*>(p);
LargeArena::LargeFreeChunk* LargeArena::get_from_size_list(LargeFreeChunk** list, size_t size) {
LargeFreeChunk* free_chunks = NULL;
LargeBlock* section;
size_t i, num_chunks, start_index;
if (isMarked(al)) {
clearMark(al);
} else {
_doFree(al);
assert((size & (CHUNK_SIZE - 1)) == 0);
// assert(p != (void*)0x127000d960); // the main module
b->isfree.set(atom_idx);
}
}
while (*list) {
free_chunks = *list;
if (free_chunks->size >= size)
break;
list = &(*list)->next_size;
head = &b->next;
}
return head;
}
if (!*list)
return NULL;
*list = free_chunks->next_size;
SmallArena::Block* SmallArena::_allocBlock(uint64_t size, Block** prev) {
Block* rtn = (Block*)doMmap(sizeof(Block));
assert(rtn);
rtn->size = size;
rtn->num_obj = BLOCK_SIZE / size;
rtn->min_obj_index = (BLOCK_HEADER_SIZE + size - 1) / size;
rtn->atoms_per_obj = size / ATOM_SIZE;
rtn->prev = prev;
rtn->next = NULL;
if (free_chunks->size > size)
add_free_chunk((LargeFreeChunk*)((char*)free_chunks + size), free_chunks->size - size);
#ifndef NVALGRIND
// Not sure if this mempool stuff is better than the malloc-like interface:
// VALGRIND_CREATE_MEMPOOL(rtn, 0, true);
#endif
num_chunks = size >> CHUNK_BITS;
// printf("Allocated new block %p\n", rtn);
section = LARGE_BLOCK_FOR_OBJ(free_chunks);
// Don't think I need to do this:
rtn->isfree.setAllZero();
rtn->next_to_check.reset();
start_index = LARGE_CHUNK_INDEX(free_chunks, section);
for (i = start_index; i < start_index + num_chunks; ++i) {
assert(section->free_chunk_map[i]);
section->free_chunk_map[i] = 0;
int num_objects = rtn->numObjects();
int num_lost = rtn->minObjIndex();
int atoms_per_object = rtn->atomsPerObj();
for (int i = num_lost * atoms_per_object; i < num_objects * atoms_per_object; i += atoms_per_object) {
rtn->isfree.set(i);
// printf("%d %d\n", idx, bit);
}
section->num_free_chunks -= size >> CHUNK_BITS;
assert(section->num_free_chunks >= 0);
return free_chunks;
}
// printf("%d %d %d\n", num_objects, num_lost, atoms_per_object);
// for (int i =0; i < BITFIELD_ELTS; i++) {
// printf("%d: %lx\n", i, rtn->isfree[i]);
//}
return rtn;
}
LargeArena::LargeObj* LargeArena::_allocInternal(size_t size) {
LargeBlock* section;
LargeFreeChunk* free_chunks;
size_t num_chunks;
SmallArena::ThreadBlockCache::~ThreadBlockCache() {
LOCK_REGION(heap->lock);
size += CHUNK_SIZE - 1;
size &= ~(CHUNK_SIZE - 1);
for (int i = 0; i < NUM_BUCKETS; i++) {
while (Block* b = cache_free_heads[i]) {
removeFromLLAndNull(b);
insertIntoLL(&small->heads[i], b);
}
num_chunks = size >> CHUNK_BITS;
while (Block* b = cache_full_heads[i]) {
removeFromLLAndNull(b);
insertIntoLL(&small->full_heads[i], b);
}
}
}
assert(size > 0 && size - sizeof(LargeObj) <= ALLOC_SIZE_LIMIT);
assert(num_chunks > 0);
GCAllocation* SmallArena::_allocFromBlock(Block* b) {
int idx = b->isfree.scanForNext(b->next_to_check);
if (idx == -1)
return NULL;
retry:
if (num_chunks >= NUM_FREE_LISTS) {
free_chunks = get_from_size_list(&free_lists[0], size);
} else {
size_t i;
for (i = num_chunks; i < NUM_FREE_LISTS; ++i) {
free_chunks = get_from_size_list(&free_lists[i], size);
if (free_chunks)
break;
}
if (!free_chunks)
free_chunks = get_from_size_list(&free_lists[0], size);
void* rtn = &b->atoms[idx];
return reinterpret_cast<GCAllocation*>(rtn);
}
SmallArena::Block* SmallArena::_claimBlock(size_t rounded_size, Block** free_head) {
Block* free_block = *free_head;
if (free_block) {
removeFromLLAndNull(free_block);
return free_block;
}
if (free_chunks)
return (LargeObj*)free_chunks;
return _allocBlock(rounded_size, NULL);
}
section = (LargeBlock*)doMmap(BLOCK_SIZE);
GCAllocation* SmallArena::_alloc(size_t rounded_size, int bucket_idx) {
Block** free_head = &heads[bucket_idx];
Block** full_head = &full_heads[bucket_idx];
if (!section)
return NULL;
ThreadBlockCache* cache = thread_caches.get();
free_chunks = (LargeFreeChunk*)((char*)section + CHUNK_SIZE);
free_chunks->size = BLOCK_SIZE - CHUNK_SIZE;
free_chunks->next_size = free_lists[0];
free_lists[0] = free_chunks;
Block** cache_head = &cache->cache_free_heads[bucket_idx];
section->num_free_chunks = LARGE_BLOCK_NUM_CHUNKS;
// static __thread int gc_allocs = 0;
// if (++gc_allocs == 128) {
// static StatCounter sc_total("gc_allocs");
// sc_total.log(128);
// gc_allocs = 0;
//}
section->free_chunk_map = (unsigned char*)section + sizeof(LargeBlock);
assert(sizeof(LargeBlock) + LARGE_BLOCK_NUM_CHUNKS + 1 <= CHUNK_SIZE);
section->free_chunk_map[0] = 0;
memset(section->free_chunk_map + 1, 1, LARGE_BLOCK_NUM_CHUNKS);
while (true) {
while (Block* cache_block = *cache_head) {
GCAllocation* rtn = _allocFromBlock(cache_block);
if (rtn)
return rtn;
section->next = blocks;
blocks = section;
removeFromLLAndNull(cache_block);
insertIntoLL(&cache->cache_full_heads[bucket_idx], cache_block);
}
++large_block_count;
// Not very useful to count the cache misses if we don't count the total attempts:
// static StatCounter sc_fallback("gc_allocs_cachemiss");
// sc_fallback.log();
goto retry;
}
LOCK_REGION(heap->lock);
void LargeArena::_freeInternal(LargeObj* obj, size_t size) {
LargeBlock* section = LARGE_BLOCK_FOR_OBJ(obj);
size_t num_chunks, i, start_index;
assert(*cache_head == NULL);
size += CHUNK_SIZE - 1;
size &= ~(CHUNK_SIZE - 1);
// should probably be called allocBlock:
Block* myblock = _claimBlock(rounded_size, &heads[bucket_idx]);
assert(myblock);
assert(!myblock->next);
assert(!myblock->prev);
num_chunks = size >> CHUNK_BITS;
// printf("%d claimed new block %p with %d objects\n", threading::gettid(), myblock, myblock->numObjects());
assert(size > 0 && size - sizeof(LargeObj) <= ALLOC_SIZE_LIMIT);
assert(num_chunks > 0);
insertIntoLL(cache_head, myblock);
}
}
section->num_free_chunks += num_chunks;
assert(section->num_free_chunks <= LARGE_BLOCK_NUM_CHUNKS);
// TODO: copy-pasted from _freeChain
void SmallArena::_getChainStatistics(HeapStatistics* stats, Block** head) {
while (Block* b = *head) {
int num_objects = b->numObjects();
int first_obj = b->minObjIndex();
int atoms_per_obj = b->atomsPerObj();
/*
* We could free the LOS section here if it's empty, but we
* can't unless we also remove its free chunks from the fast
* free lists. Instead, we do it in los_sweep().
*/
for (int obj_idx = first_obj; obj_idx < num_objects; obj_idx++) {
int atom_idx = obj_idx * atoms_per_obj;
start_index = LARGE_CHUNK_INDEX(obj, section);
for (i = start_index; i < start_index + num_chunks; ++i) {
assert(!section->free_chunk_map[i]);
section->free_chunk_map[i] = 1;
if (b->isfree.isSet(atom_idx))
continue;
void* p = &b->atoms[atom_idx];
GCAllocation* al = reinterpret_cast<GCAllocation*>(p);
addStatistic(stats, al, b->size);
}
add_free_chunk((LargeFreeChunk*)obj, size);
head = &b->next;
}
}
void LargeArena::_free(LargeObj* obj) {
unlinkNode(obj);
_freeInternal(obj, obj->size);
}
//////
/// Large Arena
void LargeArena::freeUnmarked() {
sweepHeap(head, _doFree, [this](LargeObj* ptr) { _freeInternal(ptr, ptr->size); });
}
#define LARGE_BLOCK_NUM_CHUNKS ((BLOCK_SIZE >> CHUNK_BITS) - 1)
#define LARGE_BLOCK_FOR_OBJ(obj) ((LargeBlock*)((int64_t)(obj) & ~(int64_t)(BLOCK_SIZE - 1)))
#define LARGE_CHUNK_INDEX(obj, section) (((char*)(obj) - (char*)(section)) >> CHUNK_BITS)
GCAllocation* LargeArena::alloc(size_t size) {
registerGCManagedBytes(size);
......@@ -365,22 +531,18 @@ GCAllocation* LargeArena::alloc(size_t size) {
// printf ("allocLarge %zu\n", size);
LargeObj* obj = _allocInternal(size + sizeof(GCAllocation) + sizeof(LargeObj));
LargeObj* obj = _alloc(size + sizeof(GCAllocation) + sizeof(LargeObj));
obj->size = size;
obj->next = head;
if (obj->next)
obj->next->prev = &obj->next;
obj->prev = &head;
head = obj;
large_object_count++;
nullNextPrev(obj);
insertIntoLL(&head, obj);
return obj->data;
}
GCAllocation* LargeArena::realloc(GCAllocation* al, size_t bytes) {
LargeObj* obj = (LargeObj*)((char*)al - offsetof(LargeObj, data));
LargeObj* obj = LargeObj::fromAllocation(al);
int size = obj->size;
if (size >= bytes && size < bytes * 2)
return al;
......@@ -388,13 +550,12 @@ GCAllocation* LargeArena::realloc(GCAllocation* al, size_t bytes) {
GCAllocation* rtn = heap->alloc(bytes);
memcpy(rtn, al, std::min(bytes, obj->size));
_free(obj);
_freeLargeObj(obj);
return rtn;
}
void LargeArena::free(GCAllocation* al) {
LargeObj* obj = (LargeObj*)((char*)al - offsetof(LargeObj, data));
_free(obj);
_freeLargeObj(LargeObj::fromAllocation(al));
}
GCAllocation* LargeArena::allocationFrom(void* ptr) {
......@@ -410,341 +571,206 @@ GCAllocation* LargeArena::allocationFrom(void* ptr) {
return NULL;
}
void HugeArena::freeUnmarked() {
sweepHeap(head, _doFree, [this](HugeObj* ptr) { _freeHugeObj(ptr); });
void LargeArena::freeUnmarked() {
sweepList(head, [this](LargeObj* ptr) { _freeLargeObj(ptr); });
}
GCAllocation* HugeArena::alloc(size_t size) {
registerGCManagedBytes(size);
LOCK_REGION(heap->lock);
size_t total_size = size + sizeof(HugeObj);
total_size = (total_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
HugeObj* rtn = (HugeObj*)doMmap(total_size);
rtn->obj_size = size;
void LargeArena::getStatistics(HeapStatistics* stats) {
LargeObj* cur = head;
while (cur) {
GCAllocation* al = cur->data;
addStatistic(stats, al, cur->size);
rtn->next = head;
if (rtn->next)
rtn->next->prev = &rtn->next;
rtn->prev = &head;
head = rtn;
return rtn->data;
cur = cur->next;
}
}
GCAllocation* HugeArena::realloc(GCAllocation* al, size_t bytes) {
HugeObj* lobj = HugeObj::fromAllocation(al);
int capacity = lobj->capacity();
if (capacity >= bytes && capacity < bytes * 2)
return al;
GCAllocation* rtn = heap->alloc(bytes);
memcpy(rtn, al, std::min(bytes, lobj->obj_size));
void LargeArena::add_free_chunk(LargeFreeChunk* free_chunks, size_t size) {
size_t num_chunks = size >> CHUNK_BITS;
_freeHugeObj(lobj);
return rtn;
}
free_chunks->size = size;
void HugeArena::_freeHugeObj(HugeObj* lobj) {
unlinkNode(lobj);
int r = munmap(lobj, lobj->mmap_size());
assert(r == 0);
if (num_chunks >= NUM_FREE_LISTS)
num_chunks = 0;
free_chunks->next_size = free_lists[num_chunks];
free_lists[num_chunks] = free_chunks;
}
LargeArena::LargeFreeChunk* LargeArena::get_from_size_list(LargeFreeChunk** list, size_t size) {
LargeFreeChunk* free_chunks = NULL;
LargeBlock* section;
size_t i, num_chunks, start_index;
void HugeArena::free(GCAllocation* al) {
HugeObj* lobj = HugeObj::fromAllocation(al);
_freeHugeObj(lobj);
}
assert((size & (CHUNK_SIZE - 1)) == 0);
GCAllocation* HugeArena::allocationFrom(void* ptr) {
HugeObj* cur = head;
while (cur) {
if (ptr >= cur && ptr < &cur->data[cur->obj_size])
return &cur->data[0];
cur = cur->next;
while (*list) {
free_chunks = *list;
if (free_chunks->size >= size)
break;
list = &(*list)->next_size;
}
if (!*list)
return NULL;
}
SmallArena::Block* SmallArena::alloc_block(uint64_t size, Block** prev) {
Block* rtn = (Block*)doMmap(sizeof(Block));
assert(rtn);
rtn->size = size;
rtn->num_obj = BLOCK_SIZE / size;
rtn->min_obj_index = (BLOCK_HEADER_SIZE + size - 1) / size;
rtn->atoms_per_obj = size / ATOM_SIZE;
rtn->prev = prev;
rtn->next = NULL;
*list = free_chunks->next_size;
#ifndef NVALGRIND
// Not sure if this mempool stuff is better than the malloc-like interface:
// VALGRIND_CREATE_MEMPOOL(rtn, 0, true);
#endif
if (free_chunks->size > size)
add_free_chunk((LargeFreeChunk*)((char*)free_chunks + size), free_chunks->size - size);
// printf("Allocated new block %p\n", rtn);
num_chunks = size >> CHUNK_BITS;
// Don't think I need to do this:
rtn->isfree.setAllZero();
rtn->next_to_check.reset();
section = LARGE_BLOCK_FOR_OBJ(free_chunks);
int num_objects = rtn->numObjects();
int num_lost = rtn->minObjIndex();
int atoms_per_object = rtn->atomsPerObj();
for (int i = num_lost * atoms_per_object; i < num_objects * atoms_per_object; i += atoms_per_object) {
rtn->isfree.set(i);
// printf("%d %d\n", idx, bit);
start_index = LARGE_CHUNK_INDEX(free_chunks, section);
for (i = start_index; i < start_index + num_chunks; ++i) {
assert(section->free_chunk_map[i]);
section->free_chunk_map[i] = 0;
}
// printf("%d %d %d\n", num_objects, num_lost, atoms_per_object);
// for (int i =0; i < BITFIELD_ELTS; i++) {
// printf("%d: %lx\n", i, rtn->isfree[i]);
//}
return rtn;
}
void SmallArena::insertIntoLL(Block** next_pointer, Block* next) {
assert(next_pointer);
assert(next);
assert(!next->next);
assert(!next->prev);
next->next = *next_pointer;
if (next->next)
next->next->prev = &next->next;
*next_pointer = next;
next->prev = next_pointer;
}
section->num_free_chunks -= size >> CHUNK_BITS;
assert(section->num_free_chunks >= 0);
void SmallArena::removeFromLL(Block* b) {
unlinkNode(b);
b->next = NULL;
b->prev = NULL;
return free_chunks;
}
SmallArena::ThreadBlockCache::~ThreadBlockCache() {
LOCK_REGION(heap->lock);
for (int i = 0; i < NUM_BUCKETS; i++) {
while (Block* b = cache_free_heads[i]) {
small->removeFromLL(b);
small->insertIntoLL(&small->heads[i], b);
}
LargeArena::LargeObj* LargeArena::_alloc(size_t size) {
LargeBlock* section;
LargeFreeChunk* free_chunks;
size_t num_chunks;
while (Block* b = cache_full_heads[i]) {
small->removeFromLL(b);
small->insertIntoLL(&small->full_heads[i], b);
}
}
}
size += CHUNK_SIZE - 1;
size &= ~(CHUNK_SIZE - 1);
GCAllocation* SmallArena::allocFromBlock(Block* b) {
int idx = b->isfree.scanForNext(b->next_to_check);
if (idx == -1)
return NULL;
num_chunks = size >> CHUNK_BITS;
void* rtn = &b->atoms[idx];
return reinterpret_cast<GCAllocation*>(rtn);
}
assert(size > 0 && size - sizeof(LargeObj) <= ALLOC_SIZE_LIMIT);
assert(num_chunks > 0);
SmallArena::Block* SmallArena::claimBlock(size_t rounded_size, Block** free_head) {
Block* free_block = *free_head;
if (free_block) {
removeFromLL(free_block);
return free_block;
retry:
if (num_chunks >= NUM_FREE_LISTS) {
free_chunks = get_from_size_list(&free_lists[0], size);
} else {
size_t i;
for (i = num_chunks; i < NUM_FREE_LISTS; ++i) {
free_chunks = get_from_size_list(&free_lists[i], size);
if (free_chunks)
break;
}
if (!free_chunks)
free_chunks = get_from_size_list(&free_lists[0], size);
}
return alloc_block(rounded_size, NULL);
}
GCAllocation* SmallArena::_alloc(size_t rounded_size, int bucket_idx) {
registerGCManagedBytes(rounded_size);
Block** free_head = &heads[bucket_idx];
Block** full_head = &full_heads[bucket_idx];
ThreadBlockCache* cache = thread_caches.get();
Block** cache_head = &cache->cache_free_heads[bucket_idx];
// static __thread int gc_allocs = 0;
// if (++gc_allocs == 128) {
// static StatCounter sc_total("gc_allocs");
// sc_total.log(128);
// gc_allocs = 0;
//}
while (true) {
while (Block* cache_block = *cache_head) {
GCAllocation* rtn = allocFromBlock(cache_block);
if (rtn)
return rtn;
if (free_chunks)
return (LargeObj*)free_chunks;
removeFromLL(cache_block);
insertIntoLL(&cache->cache_full_heads[bucket_idx], cache_block);
}
section = (LargeBlock*)doMmap(BLOCK_SIZE);
// Not very useful to count the cache misses if we don't count the total attempts:
// static StatCounter sc_fallback("gc_allocs_cachemiss");
// sc_fallback.log();
if (!section)
return NULL;
LOCK_REGION(heap->lock);
free_chunks = (LargeFreeChunk*)((char*)section + CHUNK_SIZE);
free_chunks->size = BLOCK_SIZE - CHUNK_SIZE;
free_chunks->next_size = free_lists[0];
free_lists[0] = free_chunks;
assert(*cache_head == NULL);
section->num_free_chunks = LARGE_BLOCK_NUM_CHUNKS;
// should probably be called allocBlock:
Block* myblock = claimBlock(rounded_size, &heads[bucket_idx]);
assert(myblock);
assert(!myblock->next);
assert(!myblock->prev);
section->free_chunk_map = (unsigned char*)section + sizeof(LargeBlock);
assert(sizeof(LargeBlock) + LARGE_BLOCK_NUM_CHUNKS + 1 <= CHUNK_SIZE);
section->free_chunk_map[0] = 0;
memset(section->free_chunk_map + 1, 1, LARGE_BLOCK_NUM_CHUNKS);
// printf("%d claimed new block %p with %d objects\n", threading::gettid(), myblock, myblock->numObjects());
section->next = blocks;
blocks = section;
insertIntoLL(cache_head, myblock);
}
goto retry;
}
void SmallArena::_free(GCAllocation* alloc, Block* b) {
assert(b == Block::forPointer(alloc));
void LargeArena::_freeLargeObj(LargeObj* obj) {
removeFromLL(obj);
size_t size = b->size;
int offset = (char*)alloc - (char*)b;
assert(offset % size == 0);
int atom_idx = offset / ATOM_SIZE;
size_t size = obj->size;
LargeBlock* section = LARGE_BLOCK_FOR_OBJ(obj);
size_t num_chunks, i, start_index;
assert(!b->isfree.isSet(atom_idx));
b->isfree.set(atom_idx);
size += CHUNK_SIZE - 1;
size &= ~(CHUNK_SIZE - 1);
#ifndef NVALGRIND
// VALGRIND_MEMPOOL_FREE(b, ptr);
#endif
}
num_chunks = size >> CHUNK_BITS;
void _doFree(GCAllocation* al) {
if (VERBOSITY() >= 2)
printf("Freeing %p\n", al->user_data);
assert(size > 0 && size - sizeof(LargeObj) <= ALLOC_SIZE_LIMIT);
assert(num_chunks > 0);
#ifndef NVALGRIND
VALGRIND_DISABLE_ERROR_REPORTING;
#endif
GCKind alloc_kind = al->kind_id;
#ifndef NVALGRIND
VALGRIND_ENABLE_ERROR_REPORTING;
#endif
section->num_free_chunks += num_chunks;
assert(section->num_free_chunks <= LARGE_BLOCK_NUM_CHUNKS);
if (alloc_kind == GCKind::PYTHON) {
#ifndef NVALGRIND
VALGRIND_DISABLE_ERROR_REPORTING;
#endif
Box* b = (Box*)al->user_data;
#ifndef NVALGRIND
VALGRIND_ENABLE_ERROR_REPORTING;
#endif
/*
* We could free the LOS section here if it's empty, but we
* can't unless we also remove its free chunks from the fast
* free lists. Instead, we do it in los_sweep().
*/
ASSERT(b->cls->tp_dealloc == NULL, "%s", getTypeName(b));
if (b->cls->simple_destructor)
b->cls->simple_destructor(b);
start_index = LARGE_CHUNK_INDEX(obj, section);
for (i = start_index; i < start_index + num_chunks; ++i) {
assert(!section->free_chunk_map[i]);
section->free_chunk_map[i] = 1;
}
}
void Heap::destroyContents(GCAllocation* al) {
_doFree(al);
add_free_chunk((LargeFreeChunk*)obj, size);
}
void dumpHeapStatistics() {
global_heap.dumpHeapStatistics();
}
//////
/// Huge Arena
struct HeapStatistics {
struct TypeStats {
int64_t nallocs;
int64_t nbytes;
TypeStats() : nallocs(0), nbytes(0) {}
void print(const char* name) const {
if (nbytes > (1 << 20))
printf("%s: %ld allocations for %.1f MB\n", name, nallocs, nbytes * 1.0 / (1 << 20));
else if (nbytes > (1 << 10))
printf("%s: %ld allocations for %.1f KB\n", name, nallocs, nbytes * 1.0 / (1 << 10));
else
printf("%s: %ld allocations for %ld bytes\n", name, nallocs, nbytes);
}
};
std::unordered_map<BoxedClass*, TypeStats> by_cls;
TypeStats conservative, untracked;
TypeStats total;
};
void addStatistic(HeapStatistics* stats, GCAllocation* al, int nbytes) {
stats->total.nallocs++;
stats->total.nbytes += nbytes;
GCAllocation* HugeArena::alloc(size_t size) {
registerGCManagedBytes(size);
if (al->kind_id == GCKind::PYTHON) {
Box* b = (Box*)al->user_data;
auto& t = stats->by_cls[b->cls];
LOCK_REGION(heap->lock);
t.nallocs++;
t.nbytes += nbytes;
} else if (al->kind_id == GCKind::CONSERVATIVE) {
stats->conservative.nallocs++;
stats->conservative.nbytes += nbytes;
} else if (al->kind_id == GCKind::UNTRACKED) {
stats->untracked.nallocs++;
stats->untracked.nbytes += nbytes;
} else {
RELEASE_ASSERT(0, "%d", (int)al->kind_id);
}
}
size_t total_size = size + sizeof(HugeObj);
total_size = (total_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
HugeObj* rtn = (HugeObj*)doMmap(total_size);
rtn->obj_size = size;
// TODO: copy-pasted from freeChain
void SmallArena::getChainStatistics(HeapStatistics* stats, Block** head) {
while (Block* b = *head) {
int num_objects = b->numObjects();
int first_obj = b->minObjIndex();
int atoms_per_obj = b->atomsPerObj();
nullNextPrev(rtn);
insertIntoLL(&head, rtn);
for (int obj_idx = first_obj; obj_idx < num_objects; obj_idx++) {
int atom_idx = obj_idx * atoms_per_obj;
return rtn->data;
}
if (b->isfree.isSet(atom_idx))
continue;
GCAllocation* HugeArena::realloc(GCAllocation* al, size_t bytes) {
HugeObj* obj = HugeObj::fromAllocation(al);
void* p = &b->atoms[atom_idx];
GCAllocation* al = reinterpret_cast<GCAllocation*>(p);
int capacity = obj->capacity();
if (capacity >= bytes && capacity < bytes * 2)
return al;
addStatistic(stats, al, b->size);
}
GCAllocation* rtn = heap->alloc(bytes);
memcpy(rtn, al, std::min(bytes, obj->obj_size));
head = &b->next;
}
_freeHugeObj(obj);
return rtn;
}
// TODO: copy-pasted from freeUnmarked()
void SmallArena::getStatistics(HeapStatistics* stats) {
thread_caches.forEachValue([this, stats](ThreadBlockCache* cache) {
for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) {
Block* h = cache->cache_free_heads[bidx];
getChainStatistics(stats, &cache->cache_free_heads[bidx]);
getChainStatistics(stats, &cache->cache_full_heads[bidx]);
}
});
for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) {
getChainStatistics(stats, &heads[bidx]);
getChainStatistics(stats, &full_heads[bidx]);
}
void HugeArena::free(GCAllocation* al) {
_freeHugeObj(HugeObj::fromAllocation(al));
}
void LargeArena::getStatistics(HeapStatistics* stats) {
LargeObj* cur = head;
GCAllocation* HugeArena::allocationFrom(void* ptr) {
HugeObj* cur = head;
while (cur) {
GCAllocation* al = cur->data;
addStatistic(stats, al, cur->size);
if (ptr >= cur && ptr < &cur->data[cur->obj_size])
return &cur->data[0];
cur = cur->next;
}
return NULL;
}
void HugeArena::freeUnmarked() {
sweepList(head, [this](HugeObj* ptr) { _freeHugeObj(ptr); });
}
void HugeArena::getStatistics(HeapStatistics* stats) {
......@@ -757,23 +783,12 @@ void HugeArena::getStatistics(HeapStatistics* stats) {
}
}
void Heap::dumpHeapStatistics() {
threading::GLPromoteRegion _lock;
HeapStatistics stats;
small_arena.getStatistics(&stats);
large_arena.getStatistics(&stats);
huge_arena.getStatistics(&stats);
stats.conservative.print("conservative");
stats.untracked.print("untracked");
for (const auto& p : stats.by_cls) {
p.second.print(getFullNameOfClass(p.first).c_str());
}
stats.total.print("Total");
printf("\n");
void HugeArena::_freeHugeObj(HugeObj* lobj) {
removeFromLL(lobj);
int r = munmap(lobj, lobj->mmap_size());
assert(r == 0);
}
} // namespace gc
} // namespace pyston
......@@ -65,17 +65,20 @@ inline void clearMark(GCAllocation* header) {
#define PAGE_SIZE 4096
template <uintptr_t start> class Arena {
template <uintptr_t arena_start, uintptr_t arena_size> class Arena {
private:
void* cur;
void* end;
protected:
Arena() : cur((void*)start) {}
Arena() : cur((void*)arena_start), end((void*)(arena_start + arena_size)) {}
public:
void* doMmap(size_t size) {
assert(size % PAGE_SIZE == 0);
assert(((uint8_t*)cur + size) < end && "arena full");
void* mrtn = mmap(cur, size, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
assert((uintptr_t)mrtn != -1 && "failed to allocate memory from OS");
ASSERT(mrtn == cur, "%p %p\n", mrtn, cur);
......@@ -83,9 +86,10 @@ public:
return mrtn;
}
bool contains(void* addr) { return (void*)start <= addr && addr < cur; }
bool contains(void* addr) { return (void*)arena_start <= addr && addr < cur; }
};
constexpr uintptr_t ARENA_SIZE = 0x1000000000L;
constexpr uintptr_t SMALL_ARENA_START = 0x1270000000L;
constexpr uintptr_t LARGE_ARENA_START = 0x2270000000L;
constexpr uintptr_t HUGE_ARENA_START = 0x3270000000L;
......@@ -94,8 +98,8 @@ constexpr uintptr_t HUGE_ARENA_START = 0x3270000000L;
//
// The SmallArena allocates objects <= 3584 bytes.
//
// it uses segregated-fit allocation, and each block contains free
// bitmap for objects of a given size (assigned to the block)
// it uses segregated-fit allocation, and each block contains a free
// bitmap for objects of a given size (constant for the block)
//
static const size_t sizes[] = {
16, 32, 48, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384,
......@@ -103,8 +107,20 @@ static const size_t sizes[] = {
};
static constexpr size_t NUM_BUCKETS = sizeof(sizes) / sizeof(sizes[0]);
class SmallArena : public Arena<SMALL_ARENA_START> {
class SmallArena : public Arena<SMALL_ARENA_START, ARENA_SIZE> {
public:
SmallArena(Heap* heap) : Arena(), heap(heap), thread_caches(heap, this) {}
GCAllocation* __attribute__((__malloc__)) alloc(size_t bytes);
GCAllocation* realloc(GCAllocation* alloc, size_t bytes);
void free(GCAllocation* al);
GCAllocation* allocationFrom(void* ptr);
void freeUnmarked();
void getStatistics(HeapStatistics* stats);
private:
template <int N> class Bitmap {
static_assert(N % 64 == 0, "");
......@@ -205,8 +221,7 @@ private:
static_assert(offsetof(Block, _header_end) >= BLOCK_HEADER_SIZE, "bad header size");
static_assert(offsetof(Block, _header_end) <= BLOCK_HEADER_SIZE, "bad header size");
// forward (public) definition of ThreadBlockCache so we can reference it both in this class (privately) and in Heap
// (for a friend ref).
struct ThreadBlockCache {
Heap* heap;
SmallArena* small;
......@@ -221,7 +236,6 @@ private:
};
Block* heads[NUM_BUCKETS];
Block* full_heads[NUM_BUCKETS];
......@@ -231,71 +245,46 @@ private:
// TODO only use thread caches if we're in GRWL mode?
threading::PerThreadSet<ThreadBlockCache, Heap*, SmallArena*> thread_caches;
Block* alloc_block(uint64_t size, Block** prev);
GCAllocation* allocFromBlock(Block* b);
Block* claimBlock(size_t rounded_size, Block** free_head);
void insertIntoLL(Block** next_pointer, Block* next);
void removeFromLL(Block* b);
Block** freeChain(Block** head);
void getChainStatistics(HeapStatistics* stats, Block** head);
Block* _allocBlock(uint64_t size, Block** prev);
GCAllocation* _allocFromBlock(Block* b);
Block* _claimBlock(size_t rounded_size, Block** free_head);
Block** _freeChain(Block** head);
void _getChainStatistics(HeapStatistics* stats, Block** head);
GCAllocation* __attribute__((__malloc__)) _alloc(size_t bytes, int bucket_idx);
void _free(GCAllocation* al, Block* b);
public:
SmallArena(Heap* heap) : Arena(), heap(heap), thread_caches(heap, this) {}
GCAllocation* __attribute__((__malloc__)) alloc(size_t bytes) {
if (bytes <= 16)
return _alloc(16, 0);
else if (bytes <= 32)
return _alloc(32, 1);
else {
for (int i = 2; i < NUM_BUCKETS; i++) {
if (sizes[i] >= bytes) {
return _alloc(sizes[i], i);
}
}
return NULL;
}
}
GCAllocation* realloc(GCAllocation* alloc, size_t bytes);
void free(GCAllocation* al) {
Block* b = Block::forPointer(al);
_free(al, b);
}
void getStatistics(HeapStatistics* stats);
GCAllocation* allocationFrom(void* ptr);
void freeUnmarked();
};
//
// The LargeArena allocates objects where 3584 < size <1024*1024 bytes.
// The LargeArena allocates objects where 3584 < size <1024*1024-CHUNK_SIZE-sizeof(LargeObject) bytes.
//
// it maintains a set of size-segregated free lists, and a special
// free list for larger objects. If the free list specific to a given
// size has no entries, we search the large free list.
//
class LargeArena : public Arena<LARGE_ARENA_START> {
struct LargeFreeChunk {
LargeFreeChunk* next_size;
size_t size;
};
// Blocks of 1meg are mmap'ed individually, and carved up as needed.
//
class LargeArena : public Arena<LARGE_ARENA_START, ARENA_SIZE> {
private:
struct LargeBlock {
LargeBlock* next;
size_t num_free_chunks;
unsigned char* free_chunk_map;
};
struct LargeFreeChunk {
LargeFreeChunk* next_size;
size_t size;
};
struct LargeObj {
LargeObj* next, **prev;
size_t size;
GCAllocation data[0];
static LargeObj* fromAllocation(GCAllocation* alloc) {
char* rtn = (char*)alloc - offsetof(LargeObj, data);
return reinterpret_cast<LargeObj*>(rtn);
}
};
/*
......@@ -311,20 +300,18 @@ class LargeArena : public Arena<LARGE_ARENA_START> {
static constexpr int NUM_FREE_LISTS = 32;
void add_free_chunk(LargeFreeChunk* free_chunks, size_t size);
LargeFreeChunk* get_from_size_list(LargeFreeChunk** list, size_t size);
LargeObj* _allocInternal(size_t size);
void _freeInternal(LargeObj* obj, size_t size);
void _free(LargeObj* obj);
Heap* heap;
LargeObj* head;
LargeBlock* blocks;
LargeFreeChunk* free_lists[NUM_FREE_LISTS]; /* 0 is for larger sizes */
Heap* heap;
void add_free_chunk(LargeFreeChunk* free_chunks, size_t size);
LargeFreeChunk* get_from_size_list(LargeFreeChunk** list, size_t size);
LargeObj* _alloc(size_t size);
void _freeLargeObj(LargeObj* obj);
public:
LargeArena(Heap* heap) : head(NULL), blocks(NULL), heap(heap) {}
LargeArena(Heap* heap) : heap(heap), head(NULL), blocks(NULL) {}
/* Largest object that can be allocated in a large block. */
static constexpr size_t ALLOC_SIZE_LIMIT = BLOCK_SIZE - CHUNK_SIZE - sizeof(LargeObj);
......@@ -333,9 +320,9 @@ public:
GCAllocation* realloc(GCAllocation* alloc, size_t bytes);
void free(GCAllocation* alloc);
GCAllocation* allocationFrom(void* ptr);
void freeUnmarked();
GCAllocation* allocationFrom(void* ptr);
void getStatistics(HeapStatistics* stats);
};
......@@ -343,7 +330,20 @@ public:
//
// Objects are allocated with individual mmap() calls, and kept in a
// linked list. They are not reused.
class HugeArena : public Arena<HUGE_ARENA_START> {
class HugeArena : public Arena<HUGE_ARENA_START, ARENA_SIZE> {
public:
HugeArena(Heap* heap) : heap(heap) {}
GCAllocation* __attribute__((__malloc__)) alloc(size_t bytes);
GCAllocation* realloc(GCAllocation* alloc, size_t bytes);
void free(GCAllocation* alloc);
GCAllocation* allocationFrom(void* ptr);
void freeUnmarked();
void getStatistics(HeapStatistics* stats);
private:
struct HugeObj {
HugeObj* next, **prev;
size_t obj_size;
......@@ -369,18 +369,6 @@ class HugeArena : public Arena<HUGE_ARENA_START> {
HugeObj* head;
Heap* heap;
public:
HugeArena(Heap* heap) : heap(heap) {}
GCAllocation* __attribute__((__malloc__)) alloc(size_t bytes);
GCAllocation* realloc(GCAllocation* alloc, size_t bytes);
void free(GCAllocation* alloc);
void freeUnmarked();
GCAllocation* allocationFrom(void* ptr);
void getStatistics(HeapStatistics* stats);
};
......@@ -420,10 +408,10 @@ public:
return small_arena.alloc(bytes);
}
void destroyContents(GCAllocation* alloc);
void destructContents(GCAllocation* alloc);
void free(GCAllocation* alloc) {
destroyContents(alloc);
destructContents(alloc);
if (large_arena.contains(alloc)) {
large_arena.free(alloc);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment