Commit 012d6d50 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Merge pull request #298 from toshok/three-arenas

Add third GC arena
parents 7cf92757 d85c9893
import time
PIDIGITS_LEN = 1500
def pidigits(length):
i = k = ns = 0
k1 = 1
n,a,d,t,u = 1,0,1,0,0
while(True):
k += 1
t = n<<1
n *= k
a += t
k1 += 2
a *= k1
d *= k1
if a >= n:
t,u = divmod(n*3 + a,d)
u += n
if d > u:
ns = ns*10 + t
i += 1
if i % 10 == 0:
ns = 0
if i >= length:
break
a -= d*t
a *= 10
n *= 10
def main(n):
l = []
for i in range(n):
t0 = time.time()
pidigits(PIDIGITS_LEN)
l.append(time.time() - t0)
return l
main(100)
...@@ -51,6 +51,11 @@ extern "C" inline void* gc_alloc(size_t bytes, GCKind kind_id) { ...@@ -51,6 +51,11 @@ extern "C" inline void* gc_alloc(size_t bytes, GCKind kind_id) {
#endif #endif
GCAllocation* alloc = global_heap.alloc(alloc_bytes); GCAllocation* alloc = global_heap.alloc(alloc_bytes);
#ifndef NVALGRIND
VALGRIND_DISABLE_ERROR_REPORTING;
#endif
alloc->kind_id = kind_id; alloc->kind_id = kind_id;
alloc->gc_flags = 0; alloc->gc_flags = 0;
...@@ -67,7 +72,10 @@ extern "C" inline void* gc_alloc(size_t bytes, GCKind kind_id) { ...@@ -67,7 +72,10 @@ extern "C" inline void* gc_alloc(size_t bytes, GCKind kind_id) {
} }
void* r = alloc->user_data; void* r = alloc->user_data;
#ifndef NVALGRIND #ifndef NVALGRIND
VALGRIND_ENABLE_ERROR_REPORTING;
if (ENABLE_REDZONES) { if (ENABLE_REDZONES) {
r = ((char*)r) + REDZONE_SIZE; r = ((char*)r) + REDZONE_SIZE;
} }
......
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
#include <cstdlib> #include <cstdlib>
#include <cstring> #include <cstring>
#include <stdint.h> #include <stdint.h>
#include <sys/mman.h>
#include "core/common.h" #include "core/common.h"
#include "core/util.h" #include "core/util.h"
...@@ -34,6 +33,67 @@ ...@@ -34,6 +33,67 @@
namespace pyston { namespace pyston {
namespace gc { namespace gc {
void _doFree(GCAllocation* al);
// lots of linked lists around here, so let's just use template functions for operations on them.
template <class ListT> inline void nullNextPrev(ListT* node) {
node->next = NULL;
node->prev = NULL;
}
template <class ListT> inline void removeFromLL(ListT* node) {
*node->prev = node->next;
if (node->next)
node->next->prev = node->prev;
}
template <class ListT> inline void removeFromLLAndNull(ListT* node) {
*node->prev = node->next;
if (node->next)
node->next->prev = node->prev;
nullNextPrev(node);
}
template <class ListT> inline void insertIntoLL(ListT** next_pointer, ListT* next) {
assert(next_pointer);
assert(next);
assert(!next->next);
assert(!next->prev);
next->next = *next_pointer;
if (next->next)
next->next->prev = &next->next;
*next_pointer = next;
next->prev = next_pointer;
}
template <class ListT, typename Func> inline void forEach(ListT* list, Func func) {
auto cur = list;
while (cur) {
func(cur);
cur = cur->next;
}
}
template <class ListT, typename Free> inline void sweepList(ListT* head, Free free_func) {
auto cur = head;
while (cur) {
GCAllocation* al = cur->data;
if (isMarked(al)) {
clearMark(al);
cur = cur->next;
} else {
_doFree(al);
removeFromLL(cur);
auto to_free = cur;
cur = cur->next;
free_func(to_free);
}
}
}
static unsigned bytesAllocatedSinceCollection; static unsigned bytesAllocatedSinceCollection;
static __thread unsigned thread_bytesAllocatedSinceCollection; static __thread unsigned thread_bytesAllocatedSinceCollection;
#define ALLOCBYTES_PER_COLLECTION 10000000 #define ALLOCBYTES_PER_COLLECTION 10000000
...@@ -64,73 +124,268 @@ void registerGCManagedBytes(size_t bytes) { ...@@ -64,73 +124,268 @@ void registerGCManagedBytes(size_t bytes) {
Heap global_heap; Heap global_heap;
#define PAGE_SIZE 4096 void _doFree(GCAllocation* al) {
class Arena { if (VERBOSITY() >= 2)
private: printf("Freeing %p\n", al->user_data);
void* start;
void* cur;
public: #ifndef NVALGRIND
constexpr Arena(void* start) : start(start), cur(start) {} VALGRIND_DISABLE_ERROR_REPORTING;
#endif
GCKind alloc_kind = al->kind_id;
#ifndef NVALGRIND
VALGRIND_ENABLE_ERROR_REPORTING;
#endif
void* doMmap(size_t size) { if (alloc_kind == GCKind::PYTHON) {
assert(size % PAGE_SIZE == 0); #ifndef NVALGRIND
// printf("mmap %ld\n", size); VALGRIND_DISABLE_ERROR_REPORTING;
#endif
Box* b = (Box*)al->user_data;
#ifndef NVALGRIND
VALGRIND_ENABLE_ERROR_REPORTING;
#endif
void* mrtn = mmap(cur, size, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); ASSERT(b->cls->tp_dealloc == NULL, "%s", getTypeName(b));
assert((uintptr_t)mrtn != -1 && "failed to allocate memory from OS"); if (b->cls->simple_destructor)
ASSERT(mrtn == cur, "%p %p\n", mrtn, cur); b->cls->simple_destructor(b);
cur = (uint8_t*)cur + size;
return mrtn;
} }
}
void Heap::destructContents(GCAllocation* al) {
_doFree(al);
}
struct HeapStatistics {
struct TypeStats {
int64_t nallocs;
int64_t nbytes;
TypeStats() : nallocs(0), nbytes(0) {}
bool contains(void* addr) { return start <= addr && addr < cur; } void print(const char* name) const {
if (nbytes > (1 << 20))
printf("%s: %ld allocations for %.1f MB\n", name, nallocs, nbytes * 1.0 / (1 << 20));
else if (nbytes > (1 << 10))
printf("%s: %ld allocations for %.1f KB\n", name, nallocs, nbytes * 1.0 / (1 << 10));
else
printf("%s: %ld allocations for %ld bytes\n", name, nallocs, nbytes);
}
};
std::unordered_map<BoxedClass*, TypeStats> by_cls;
TypeStats conservative, untracked;
TypeStats total;
}; };
static Arena small_arena((void*)0x1270000000L); void addStatistic(HeapStatistics* stats, GCAllocation* al, int nbytes) {
static Arena large_arena((void*)0x2270000000L); stats->total.nallocs++;
stats->total.nbytes += nbytes;
struct LargeObj { if (al->kind_id == GCKind::PYTHON) {
LargeObj* next, **prev; Box* b = (Box*)al->user_data;
size_t obj_size; auto& t = stats->by_cls[b->cls];
GCAllocation data[0];
int mmap_size() { t.nallocs++;
size_t total_size = obj_size + sizeof(LargeObj); t.nbytes += nbytes;
total_size = (total_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); } else if (al->kind_id == GCKind::CONSERVATIVE) {
return total_size; stats->conservative.nallocs++;
stats->conservative.nbytes += nbytes;
} else if (al->kind_id == GCKind::UNTRACKED) {
stats->untracked.nallocs++;
stats->untracked.nbytes += nbytes;
} else {
RELEASE_ASSERT(0, "%d", (int)al->kind_id);
} }
}
void Heap::dumpHeapStatistics() {
threading::GLPromoteRegion _lock;
HeapStatistics stats;
int capacity() { return mmap_size() - sizeof(LargeObj); } small_arena.getStatistics(&stats);
large_arena.getStatistics(&stats);
huge_arena.getStatistics(&stats);
static LargeObj* fromAllocation(GCAllocation* alloc) { stats.conservative.print("conservative");
char* rtn = (char*)alloc - offsetof(LargeObj, data); stats.untracked.print("untracked");
assert((uintptr_t)rtn % PAGE_SIZE == 0); for (const auto& p : stats.by_cls) {
return reinterpret_cast<LargeObj*>(rtn); p.second.print(getFullNameOfClass(p.first).c_str());
} }
}; stats.total.print("Total");
printf("\n");
}
GCAllocation* Heap::allocLarge(size_t size) { void dumpHeapStatistics() {
registerGCManagedBytes(size); global_heap.dumpHeapStatistics();
}
LOCK_REGION(lock); //////
/// Small Arena
GCAllocation* SmallArena::alloc(size_t bytes) {
registerGCManagedBytes(bytes);
if (bytes <= 16)
return _alloc(16, 0);
else if (bytes <= 32)
return _alloc(32, 1);
else {
for (int i = 2; i < NUM_BUCKETS; i++) {
if (sizes[i] >= bytes) {
return _alloc(sizes[i], i);
}
}
return NULL;
}
}
size_t total_size = size + sizeof(LargeObj); GCAllocation* SmallArena::realloc(GCAllocation* al, size_t bytes) {
total_size = (total_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); Block* b = Block::forPointer(al);
LargeObj* rtn = (LargeObj*)large_arena.doMmap(total_size);
rtn->obj_size = size;
rtn->next = large_head; size_t size = b->size;
if (rtn->next)
rtn->next->prev = &rtn->next;
rtn->prev = &large_head;
large_head = rtn;
return rtn->data; if (size >= bytes && size < bytes * 2)
return al;
GCAllocation* rtn = heap->alloc(bytes);
#ifndef NVALGRIND
VALGRIND_DISABLE_ERROR_REPORTING;
memcpy(rtn, al, std::min(bytes, size));
VALGRIND_ENABLE_ERROR_REPORTING;
#else
memcpy(rtn, al, std::min(bytes, size));
#endif
free(al);
return rtn;
}
void SmallArena::free(GCAllocation* alloc) {
Block* b = Block::forPointer(alloc);
size_t size = b->size;
int offset = (char*)alloc - (char*)b;
assert(offset % size == 0);
int atom_idx = offset / ATOM_SIZE;
assert(!b->isfree.isSet(atom_idx));
b->isfree.set(atom_idx);
#ifndef NVALGRIND
// VALGRIND_MEMPOOL_FREE(b, ptr);
#endif
}
GCAllocation* SmallArena::allocationFrom(void* ptr) {
Block* b = Block::forPointer(ptr);
size_t size = b->size;
int offset = (char*)ptr - (char*)b;
int obj_idx = offset / size;
if (obj_idx < b->minObjIndex() || obj_idx >= b->numObjects())
return NULL;
int atom_idx = obj_idx * b->atomsPerObj();
if (b->isfree.isSet(atom_idx))
return NULL;
return reinterpret_cast<GCAllocation*>(&b->atoms[atom_idx]);
}
void SmallArena::freeUnmarked() {
thread_caches.forEachValue([this](ThreadBlockCache* cache) {
for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) {
Block* h = cache->cache_free_heads[bidx];
// Try to limit the amount of unused memory a thread can hold onto;
// currently pretty dumb, just limit the number of blocks in the free-list
// to 50. (blocks in the full list don't need to be limited, since we're sure
// that the thread had just actively used those.)
// Eventually may want to come up with some scrounging system.
// TODO does this thread locality even help at all?
for (int i = 0; i < 50; i++) {
if (h)
h = h->next;
else
break;
}
if (h) {
removeFromLLAndNull(h);
insertIntoLL(&heads[bidx], h);
}
Block** chain_end = _freeChain(&cache->cache_free_heads[bidx]);
_freeChain(&cache->cache_full_heads[bidx]);
while (Block* b = cache->cache_full_heads[bidx]) {
removeFromLLAndNull(b);
insertIntoLL(chain_end, b);
}
}
});
for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) {
Block** chain_end = _freeChain(&heads[bidx]);
_freeChain(&full_heads[bidx]);
while (Block* b = full_heads[bidx]) {
removeFromLLAndNull(b);
insertIntoLL(chain_end, b);
}
}
} }
static Block* alloc_block(uint64_t size, Block** prev) { // TODO: copy-pasted from freeUnmarked()
Block* rtn = (Block*)small_arena.doMmap(sizeof(Block)); void SmallArena::getStatistics(HeapStatistics* stats) {
thread_caches.forEachValue([this, stats](ThreadBlockCache* cache) {
for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) {
Block* h = cache->cache_free_heads[bidx];
_getChainStatistics(stats, &cache->cache_free_heads[bidx]);
_getChainStatistics(stats, &cache->cache_full_heads[bidx]);
}
});
for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) {
_getChainStatistics(stats, &heads[bidx]);
_getChainStatistics(stats, &full_heads[bidx]);
}
}
SmallArena::Block** SmallArena::_freeChain(Block** head) {
while (Block* b = *head) {
int num_objects = b->numObjects();
int first_obj = b->minObjIndex();
int atoms_per_obj = b->atomsPerObj();
for (int obj_idx = first_obj; obj_idx < num_objects; obj_idx++) {
int atom_idx = obj_idx * atoms_per_obj;
if (b->isfree.isSet(atom_idx))
continue;
void* p = &b->atoms[atom_idx];
GCAllocation* al = reinterpret_cast<GCAllocation*>(p);
if (isMarked(al)) {
clearMark(al);
} else {
_doFree(al);
// assert(p != (void*)0x127000d960); // the main module
b->isfree.set(atom_idx);
}
}
head = &b->next;
}
return head;
}
SmallArena::Block* SmallArena::_allocBlock(uint64_t size, Block** prev) {
Block* rtn = (Block*)doMmap(sizeof(Block));
assert(rtn); assert(rtn);
rtn->size = size; rtn->size = size;
rtn->num_obj = BLOCK_SIZE / size; rtn->num_obj = BLOCK_SIZE / size;
...@@ -165,45 +420,23 @@ static Block* alloc_block(uint64_t size, Block** prev) { ...@@ -165,45 +420,23 @@ static Block* alloc_block(uint64_t size, Block** prev) {
return rtn; return rtn;
} }
static void insertIntoLL(Block** next_pointer, Block* next) { SmallArena::ThreadBlockCache::~ThreadBlockCache() {
assert(next_pointer);
assert(next);
assert(!next->next);
assert(!next->prev);
next->next = *next_pointer;
if (next->next)
next->next->prev = &next->next;
*next_pointer = next;
next->prev = next_pointer;
}
static void removeFromLL(Block* b) {
if (b->next)
b->next->prev = b->prev;
*b->prev = b->next;
b->next = NULL;
b->prev = NULL;
}
Heap::ThreadBlockCache::~ThreadBlockCache() {
LOCK_REGION(heap->lock); LOCK_REGION(heap->lock);
for (int i = 0; i < NUM_BUCKETS; i++) { for (int i = 0; i < NUM_BUCKETS; i++) {
while (Block* b = cache_free_heads[i]) { while (Block* b = cache_free_heads[i]) {
removeFromLL(b); removeFromLLAndNull(b);
insertIntoLL(&heap->heads[i], b); insertIntoLL(&small->heads[i], b);
} }
while (Block* b = cache_full_heads[i]) { while (Block* b = cache_full_heads[i]) {
removeFromLL(b); removeFromLLAndNull(b);
insertIntoLL(&heap->full_heads[i], b); insertIntoLL(&small->full_heads[i], b);
} }
} }
} }
static GCAllocation* allocFromBlock(Block* b) { GCAllocation* SmallArena::_allocFromBlock(Block* b) {
int idx = b->isfree.scanForNext(b->next_to_check); int idx = b->isfree.scanForNext(b->next_to_check);
if (idx == -1) if (idx == -1)
return NULL; return NULL;
...@@ -212,19 +445,17 @@ static GCAllocation* allocFromBlock(Block* b) { ...@@ -212,19 +445,17 @@ static GCAllocation* allocFromBlock(Block* b) {
return reinterpret_cast<GCAllocation*>(rtn); return reinterpret_cast<GCAllocation*>(rtn);
} }
static Block* claimBlock(size_t rounded_size, Block** free_head) { SmallArena::Block* SmallArena::_claimBlock(size_t rounded_size, Block** free_head) {
Block* free_block = *free_head; Block* free_block = *free_head;
if (free_block) { if (free_block) {
removeFromLL(free_block); removeFromLLAndNull(free_block);
return free_block; return free_block;
} }
return alloc_block(rounded_size, NULL); return _allocBlock(rounded_size, NULL);
} }
GCAllocation* Heap::allocSmall(size_t rounded_size, int bucket_idx) { GCAllocation* SmallArena::_alloc(size_t rounded_size, int bucket_idx) {
registerGCManagedBytes(rounded_size);
Block** free_head = &heads[bucket_idx]; Block** free_head = &heads[bucket_idx];
Block** full_head = &full_heads[bucket_idx]; Block** full_head = &full_heads[bucket_idx];
...@@ -241,11 +472,11 @@ GCAllocation* Heap::allocSmall(size_t rounded_size, int bucket_idx) { ...@@ -241,11 +472,11 @@ GCAllocation* Heap::allocSmall(size_t rounded_size, int bucket_idx) {
while (true) { while (true) {
while (Block* cache_block = *cache_head) { while (Block* cache_block = *cache_head) {
GCAllocation* rtn = allocFromBlock(cache_block); GCAllocation* rtn = _allocFromBlock(cache_block);
if (rtn) if (rtn)
return rtn; return rtn;
removeFromLL(cache_block); removeFromLLAndNull(cache_block);
insertIntoLL(&cache->cache_full_heads[bucket_idx], cache_block); insertIntoLL(&cache->cache_full_heads[bucket_idx], cache_block);
} }
...@@ -253,12 +484,12 @@ GCAllocation* Heap::allocSmall(size_t rounded_size, int bucket_idx) { ...@@ -253,12 +484,12 @@ GCAllocation* Heap::allocSmall(size_t rounded_size, int bucket_idx) {
// static StatCounter sc_fallback("gc_allocs_cachemiss"); // static StatCounter sc_fallback("gc_allocs_cachemiss");
// sc_fallback.log(); // sc_fallback.log();
LOCK_REGION(lock); LOCK_REGION(heap->lock);
assert(*cache_head == NULL); assert(*cache_head == NULL);
// should probably be called allocBlock: // should probably be called allocBlock:
Block* myblock = claimBlock(rounded_size, &heads[bucket_idx]); Block* myblock = _claimBlock(rounded_size, &heads[bucket_idx]);
assert(myblock); assert(myblock);
assert(!myblock->next); assert(!myblock->next);
assert(!myblock->prev); assert(!myblock->prev);
...@@ -269,322 +500,290 @@ GCAllocation* Heap::allocSmall(size_t rounded_size, int bucket_idx) { ...@@ -269,322 +500,290 @@ GCAllocation* Heap::allocSmall(size_t rounded_size, int bucket_idx) {
} }
} }
void _freeFrom(GCAllocation* alloc, Block* b) { // TODO: copy-pasted from _freeChain
assert(b == Block::forPointer(alloc)); void SmallArena::_getChainStatistics(HeapStatistics* stats, Block** head) {
while (Block* b = *head) {
size_t size = b->size; int num_objects = b->numObjects();
int offset = (char*)alloc - (char*)b; int first_obj = b->minObjIndex();
assert(offset % size == 0); int atoms_per_obj = b->atomsPerObj();
int atom_idx = offset / ATOM_SIZE;
assert(!b->isfree.isSet(atom_idx));
b->isfree.toggle(atom_idx);
#ifndef NVALGRIND
// VALGRIND_MEMPOOL_FREE(b, ptr);
#endif
}
static void _freeLargeObj(LargeObj* lobj) { for (int obj_idx = first_obj; obj_idx < num_objects; obj_idx++) {
*lobj->prev = lobj->next; int atom_idx = obj_idx * atoms_per_obj;
if (lobj->next)
lobj->next->prev = lobj->prev;
int r = munmap(lobj, lobj->mmap_size()); if (b->isfree.isSet(atom_idx))
assert(r == 0); continue;
}
static void _doFree(GCAllocation* al) { void* p = &b->atoms[atom_idx];
if (VERBOSITY() >= 2) GCAllocation* al = reinterpret_cast<GCAllocation*>(p);
printf("Freeing %p\n", al->user_data);
if (al->kind_id == GCKind::PYTHON) { addStatistic(stats, al, b->size);
Box* b = (Box*)al->user_data; }
ASSERT(b->cls->tp_dealloc == NULL, "%s", getTypeName(b)); head = &b->next;
if (b->cls->simple_destructor)
b->cls->simple_destructor(b);
} }
} }
void Heap::free(GCAllocation* al) { //////
_doFree(al); /// Large Arena
if (large_arena.contains(al)) { #define LARGE_BLOCK_NUM_CHUNKS ((BLOCK_SIZE >> CHUNK_BITS) - 1)
LargeObj* lobj = LargeObj::fromAllocation(al);
_freeLargeObj(lobj);
return;
}
assert(small_arena.contains(al)); #define LARGE_BLOCK_FOR_OBJ(obj) ((LargeBlock*)((int64_t)(obj) & ~(int64_t)(BLOCK_SIZE - 1)))
Block* b = Block::forPointer(al); #define LARGE_CHUNK_INDEX(obj, section) (((char*)(obj) - (char*)(section)) >> CHUNK_BITS)
_freeFrom(al, b);
}
GCAllocation* Heap::realloc(GCAllocation* al, size_t bytes) { GCAllocation* LargeArena::alloc(size_t size) {
if (large_arena.contains(al)) { registerGCManagedBytes(size);
LargeObj* lobj = LargeObj::fromAllocation(al);
int capacity = lobj->capacity(); LOCK_REGION(heap->lock);
if (capacity >= bytes && capacity < bytes * 2)
return al;
GCAllocation* rtn = alloc(bytes); // printf ("allocLarge %zu\n", size);
memcpy(rtn, al, std::min(bytes, lobj->obj_size));
_freeLargeObj(lobj); LargeObj* obj = _alloc(size + sizeof(GCAllocation) + sizeof(LargeObj));
return rtn;
}
assert(small_arena.contains(al)); obj->size = size;
Block* b = Block::forPointer(al);
size_t size = b->size; nullNextPrev(obj);
insertIntoLL(&head, obj);
return obj->data;
}
GCAllocation* LargeArena::realloc(GCAllocation* al, size_t bytes) {
LargeObj* obj = LargeObj::fromAllocation(al);
int size = obj->size;
if (size >= bytes && size < bytes * 2) if (size >= bytes && size < bytes * 2)
return al; return al;
GCAllocation* rtn = alloc(bytes); GCAllocation* rtn = heap->alloc(bytes);
memcpy(rtn, al, std::min(bytes, obj->size));
#ifndef NVALGRIND
VALGRIND_DISABLE_ERROR_REPORTING;
memcpy(rtn, al, std::min(bytes, size));
VALGRIND_ENABLE_ERROR_REPORTING;
#else
memcpy(rtn, al, std::min(bytes, size));
#endif
_freeFrom(al, b); _freeLargeObj(obj);
return rtn; return rtn;
} }
GCAllocation* Heap::getAllocationFromInteriorPointer(void* ptr) { void LargeArena::free(GCAllocation* al) {
if (large_arena.contains(ptr)) { _freeLargeObj(LargeObj::fromAllocation(al));
LargeObj* cur = large_head; }
while (cur) {
if (ptr >= cur && ptr < &cur->data[cur->obj_size]) GCAllocation* LargeArena::allocationFrom(void* ptr) {
return &cur->data[0]; LargeObj* obj = NULL;
cur = cur->next;
for (obj = head; obj; obj = obj->next) {
char* end = (char*)&obj->data + obj->size;
if (ptr >= obj->data && ptr < end) {
return &obj->data[0];
} }
return NULL;
} }
return NULL;
}
if (!small_arena.contains(ptr)) void LargeArena::freeUnmarked() {
return NULL; sweepList(head, [this](LargeObj* ptr) { _freeLargeObj(ptr); });
}
Block* b = Block::forPointer(ptr); void LargeArena::getStatistics(HeapStatistics* stats) {
size_t size = b->size; forEach(head, [stats](LargeObj* obj) { addStatistic(stats, obj->data, obj->size); });
int offset = (char*)ptr - (char*)b; }
int obj_idx = offset / size;
if (obj_idx < b->minObjIndex() || obj_idx >= b->numObjects())
return NULL;
int atom_idx = obj_idx * b->atomsPerObj(); void LargeArena::add_free_chunk(LargeFreeChunk* free_chunks, size_t size) {
size_t num_chunks = size >> CHUNK_BITS;
if (b->isfree.isSet(atom_idx)) free_chunks->size = size;
return NULL;
return reinterpret_cast<GCAllocation*>(&b->atoms[atom_idx]); if (num_chunks >= NUM_FREE_LISTS)
num_chunks = 0;
free_chunks->next_size = free_lists[num_chunks];
free_lists[num_chunks] = free_chunks;
} }
static Block** freeChain(Block** head) { LargeArena::LargeFreeChunk* LargeArena::get_from_size_list(LargeFreeChunk** list, size_t size) {
while (Block* b = *head) { LargeFreeChunk* free_chunks = NULL;
int num_objects = b->numObjects(); LargeBlock* section;
int first_obj = b->minObjIndex(); size_t i, num_chunks, start_index;
int atoms_per_obj = b->atomsPerObj();
for (int obj_idx = first_obj; obj_idx < num_objects; obj_idx++) { assert((size & (CHUNK_SIZE - 1)) == 0);
int atom_idx = obj_idx * atoms_per_obj;
if (b->isfree.isSet(atom_idx)) while (*list) {
continue; free_chunks = *list;
if (free_chunks->size >= size)
break;
list = &(*list)->next_size;
}
void* p = &b->atoms[atom_idx]; if (!*list)
GCAllocation* al = reinterpret_cast<GCAllocation*>(p); return NULL;
if (isMarked(al)) { *list = free_chunks->next_size;
clearMark(al);
} else {
_doFree(al);
// assert(p != (void*)0x127000d960); // the main module if (free_chunks->size > size)
b->isfree.set(atom_idx); add_free_chunk((LargeFreeChunk*)((char*)free_chunks + size), free_chunks->size - size);
}
}
head = &b->next; num_chunks = size >> CHUNK_BITS;
section = LARGE_BLOCK_FOR_OBJ(free_chunks);
start_index = LARGE_CHUNK_INDEX(free_chunks, section);
for (i = start_index; i < start_index + num_chunks; ++i) {
assert(section->free_chunk_map[i]);
section->free_chunk_map[i] = 0;
} }
return head;
section->num_free_chunks -= size >> CHUNK_BITS;
assert(section->num_free_chunks >= 0);
return free_chunks;
} }
void Heap::freeUnmarked() { LargeArena::LargeObj* LargeArena::_alloc(size_t size) {
thread_caches.forEachValue([this](ThreadBlockCache* cache) { LargeBlock* section;
for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) { LargeFreeChunk* free_chunks;
Block* h = cache->cache_free_heads[bidx]; size_t num_chunks;
// Try to limit the amount of unused memory a thread can hold onto;
// currently pretty dumb, just limit the number of blocks in the free-list
// to 50. (blocks in the full list don't need to be limited, since we're sure
// that the thread had just actively used those.)
// Eventually may want to come up with some scrounging system.
// TODO does this thread locality even help at all?
for (int i = 0; i < 50; i++) {
if (h)
h = h->next;
else
break;
}
if (h) {
removeFromLL(h);
insertIntoLL(&heads[bidx], h);
}
Block** chain_end = freeChain(&cache->cache_free_heads[bidx]); size += CHUNK_SIZE - 1;
freeChain(&cache->cache_full_heads[bidx]); size &= ~(CHUNK_SIZE - 1);
while (Block* b = cache->cache_full_heads[bidx]) { num_chunks = size >> CHUNK_BITS;
removeFromLL(b);
insertIntoLL(chain_end, b);
}
}
});
for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) { assert(size > 0 && size - sizeof(LargeObj) <= ALLOC_SIZE_LIMIT);
Block** chain_end = freeChain(&heads[bidx]); assert(num_chunks > 0);
freeChain(&full_heads[bidx]);
while (Block* b = full_heads[bidx]) { retry:
removeFromLL(b); if (num_chunks >= NUM_FREE_LISTS) {
insertIntoLL(chain_end, b); free_chunks = get_from_size_list(&free_lists[0], size);
} else {
size_t i;
for (i = num_chunks; i < NUM_FREE_LISTS; ++i) {
free_chunks = get_from_size_list(&free_lists[i], size);
if (free_chunks)
break;
} }
if (!free_chunks)
free_chunks = get_from_size_list(&free_lists[0], size);
} }
LargeObj* cur = large_head; if (free_chunks)
while (cur) { return (LargeObj*)free_chunks;
GCAllocation* al = cur->data;
if (isMarked(al)) {
clearMark(al);
} else {
_doFree(al);
*cur->prev = cur->next; section = (LargeBlock*)doMmap(BLOCK_SIZE);
if (cur->next)
cur->next->prev = cur->prev;
LargeObj* to_free = cur; if (!section)
cur = cur->next; return NULL;
_freeLargeObj(to_free);
continue;
}
cur = cur->next; free_chunks = (LargeFreeChunk*)((char*)section + CHUNK_SIZE);
} free_chunks->size = BLOCK_SIZE - CHUNK_SIZE;
} free_chunks->next_size = free_lists[0];
free_lists[0] = free_chunks;
void dumpHeapStatistics() { section->num_free_chunks = LARGE_BLOCK_NUM_CHUNKS;
global_heap.dumpHeapStatistics();
section->free_chunk_map = (unsigned char*)section + sizeof(LargeBlock);
assert(sizeof(LargeBlock) + LARGE_BLOCK_NUM_CHUNKS + 1 <= CHUNK_SIZE);
section->free_chunk_map[0] = 0;
memset(section->free_chunk_map + 1, 1, LARGE_BLOCK_NUM_CHUNKS);
section->next = blocks;
blocks = section;
goto retry;
} }
struct HeapStatistics { void LargeArena::_freeLargeObj(LargeObj* obj) {
struct TypeStats { removeFromLL(obj);
int64_t nallocs;
int64_t nbytes;
TypeStats() : nallocs(0), nbytes(0) {}
void print(const char* name) const { size_t size = obj->size;
if (nbytes > (1 << 20)) LargeBlock* section = LARGE_BLOCK_FOR_OBJ(obj);
printf("%s: %ld allocations for %.1f MB\n", name, nallocs, nbytes * 1.0 / (1 << 20)); size_t num_chunks, i, start_index;
else if (nbytes > (1 << 10))
printf("%s: %ld allocations for %.1f KB\n", name, nallocs, nbytes * 1.0 / (1 << 10));
else
printf("%s: %ld allocations for %ld bytes\n", name, nallocs, nbytes);
}
};
std::unordered_map<BoxedClass*, TypeStats> by_cls;
TypeStats conservative, untracked;
TypeStats total;
};
void addStatistic(HeapStatistics* stats, GCAllocation* al, int nbytes) { size += CHUNK_SIZE - 1;
stats->total.nallocs++; size &= ~(CHUNK_SIZE - 1);
stats->total.nbytes += nbytes;
if (al->kind_id == GCKind::PYTHON) { num_chunks = size >> CHUNK_BITS;
Box* b = (Box*)al->user_data;
auto& t = stats->by_cls[b->cls];
t.nallocs++; assert(size > 0 && size - sizeof(LargeObj) <= ALLOC_SIZE_LIMIT);
t.nbytes += nbytes; assert(num_chunks > 0);
} else if (al->kind_id == GCKind::CONSERVATIVE) {
stats->conservative.nallocs++; section->num_free_chunks += num_chunks;
stats->conservative.nbytes += nbytes; assert(section->num_free_chunks <= LARGE_BLOCK_NUM_CHUNKS);
} else if (al->kind_id == GCKind::UNTRACKED) {
stats->untracked.nallocs++; /*
stats->untracked.nbytes += nbytes; * We could free the LOS section here if it's empty, but we
} else { * can't unless we also remove its free chunks from the fast
RELEASE_ASSERT(0, "%d", (int)al->kind_id); * free lists. Instead, we do it in los_sweep().
*/
start_index = LARGE_CHUNK_INDEX(obj, section);
for (i = start_index; i < start_index + num_chunks; ++i) {
assert(!section->free_chunk_map[i]);
section->free_chunk_map[i] = 1;
} }
add_free_chunk((LargeFreeChunk*)obj, size);
} }
// TODO: copy-pasted from freeChain //////
void getChainStatistics(HeapStatistics* stats, Block** head) { /// Huge Arena
while (Block* b = *head) {
int num_objects = b->numObjects();
int first_obj = b->minObjIndex();
int atoms_per_obj = b->atomsPerObj();
for (int obj_idx = first_obj; obj_idx < num_objects; obj_idx++) {
int atom_idx = obj_idx * atoms_per_obj;
if (b->isfree.isSet(atom_idx)) GCAllocation* HugeArena::alloc(size_t size) {
continue; registerGCManagedBytes(size);
void* p = &b->atoms[atom_idx]; LOCK_REGION(heap->lock);
GCAllocation* al = reinterpret_cast<GCAllocation*>(p);
addStatistic(stats, al, b->size); size_t total_size = size + sizeof(HugeObj);
} total_size = (total_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
HugeObj* rtn = (HugeObj*)doMmap(total_size);
rtn->obj_size = size;
head = &b->next; nullNextPrev(rtn);
} insertIntoLL(&head, rtn);
return rtn->data;
} }
// TODO: copy-pasted from freeUnmarked() GCAllocation* HugeArena::realloc(GCAllocation* al, size_t bytes) {
void Heap::dumpHeapStatistics() { HugeObj* obj = HugeObj::fromAllocation(al);
threading::GLPromoteRegion _lock;
HeapStatistics stats; int capacity = obj->capacity();
if (capacity >= bytes && capacity < bytes * 2)
return al;
thread_caches.forEachValue([this, &stats](ThreadBlockCache* cache) { GCAllocation* rtn = heap->alloc(bytes);
for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) { memcpy(rtn, al, std::min(bytes, obj->obj_size));
Block* h = cache->cache_free_heads[bidx];
getChainStatistics(&stats, &cache->cache_free_heads[bidx]); _freeHugeObj(obj);
getChainStatistics(&stats, &cache->cache_full_heads[bidx]); return rtn;
} }
});
for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) { void HugeArena::free(GCAllocation* al) {
getChainStatistics(&stats, &heads[bidx]); _freeHugeObj(HugeObj::fromAllocation(al));
getChainStatistics(&stats, &full_heads[bidx]); }
}
LargeObj* cur = large_head; GCAllocation* HugeArena::allocationFrom(void* ptr) {
HugeObj* cur = head;
while (cur) { while (cur) {
GCAllocation* al = cur->data; if (ptr >= cur && ptr < &cur->data[cur->obj_size])
addStatistic(&stats, al, cur->capacity()); return &cur->data[0];
cur = cur->next; cur = cur->next;
} }
return NULL;
}
stats.conservative.print("conservative"); void HugeArena::freeUnmarked() {
stats.untracked.print("untracked"); sweepList(head, [this](HugeObj* ptr) { _freeHugeObj(ptr); });
for (const auto& p : stats.by_cls) {
p.second.print(getFullNameOfClass(p.first).c_str());
}
stats.total.print("Total");
printf("\n");
} }
void HugeArena::getStatistics(HeapStatistics* stats) {
forEach(head, [stats](HugeObj* obj) { addStatistic(stats, obj->data, obj->capacity()); });
}
void HugeArena::_freeHugeObj(HugeObj* lobj) {
removeFromLL(lobj);
int r = munmap(lobj, lobj->mmap_size());
assert(r == 0);
}
} // namespace gc } // namespace gc
} // namespace pyston } // namespace pyston
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <cstddef> #include <cstddef>
#include <cstdint> #include <cstdint>
#include <sys/mman.h>
#include "core/common.h" #include "core/common.h"
#include "core/threading.h" #include "core/threading.h"
...@@ -24,6 +25,9 @@ ...@@ -24,6 +25,9 @@
namespace pyston { namespace pyston {
namespace gc { namespace gc {
class Heap;
struct HeapStatistics;
typedef uint8_t kindid_t; typedef uint8_t kindid_t;
struct GCAllocation { struct GCAllocation {
unsigned int gc_flags : 8; unsigned int gc_flags : 8;
...@@ -59,173 +63,394 @@ inline void clearMark(GCAllocation* header) { ...@@ -59,173 +63,394 @@ inline void clearMark(GCAllocation* header) {
#undef MARK_BIT #undef MARK_BIT
#define PAGE_SIZE 4096
template <int N> class Bitmap { template <uintptr_t arena_start, uintptr_t arena_size> class Arena {
static_assert(N % 64 == 0, "");
private: private:
uint64_t data[N / 64]; void* cur;
void* end;
protected:
Arena() : cur((void*)arena_start), end((void*)(arena_start + arena_size)) {}
public:
void* doMmap(size_t size) {
assert(size % PAGE_SIZE == 0);
assert(((uint8_t*)cur + size) < end && "arena full");
void* mrtn = mmap(cur, size, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
assert((uintptr_t)mrtn != -1 && "failed to allocate memory from OS");
ASSERT(mrtn == cur, "%p %p\n", mrtn, cur);
cur = (uint8_t*)cur + size;
return mrtn;
}
bool contains(void* addr) { return (void*)arena_start <= addr && addr < cur; }
};
constexpr uintptr_t ARENA_SIZE = 0x1000000000L;
constexpr uintptr_t SMALL_ARENA_START = 0x1270000000L;
constexpr uintptr_t LARGE_ARENA_START = 0x2270000000L;
constexpr uintptr_t HUGE_ARENA_START = 0x3270000000L;
//
// The SmallArena allocates objects <= 3584 bytes.
//
// it uses segregated-fit allocation, and each block contains a free
// bitmap for objects of a given size (constant for the block)
//
static const size_t sizes[] = {
16, 32, 48, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384,
448, 512, 640, 768, 896, 1024, 1280, 1536, 1792, 2048, 2560, 3072, 3584, // 4096,
};
static constexpr size_t NUM_BUCKETS = sizeof(sizes) / sizeof(sizes[0]);
class SmallArena : public Arena<SMALL_ARENA_START, ARENA_SIZE> {
public: public:
void setAllZero() { memset(data, 0, sizeof(data)); } SmallArena(Heap* heap) : Arena(), heap(heap), thread_caches(heap, this) {}
GCAllocation* __attribute__((__malloc__)) alloc(size_t bytes);
GCAllocation* realloc(GCAllocation* alloc, size_t bytes);
void free(GCAllocation* al);
GCAllocation* allocationFrom(void* ptr);
void freeUnmarked();
void getStatistics(HeapStatistics* stats);
private:
template <int N> class Bitmap {
static_assert(N % 64 == 0, "");
struct Scanner {
private: private:
int next_to_check; uint64_t data[N / 64];
friend class Bitmap<N>;
public: public:
void reset() { next_to_check = 0; } void setAllZero() { memset(data, 0, sizeof(data)); }
};
bool isSet(int idx) { return (data[idx / 64] >> (idx % 64)) & 1; } struct Scanner {
private:
int next_to_check;
friend class Bitmap<N>;
void set(int idx) { data[idx / 64] |= 1UL << (idx % 64); } public:
void reset() { next_to_check = 0; }
};
void toggle(int idx) { data[idx / 64] ^= 1UL << (idx % 64); } bool isSet(int idx) { return (data[idx / 64] >> (idx % 64)) & 1; }
void clear(int idx) { data[idx / 64] &= ~(1UL << (idx % 64)); } void set(int idx) { data[idx / 64] |= 1UL << (idx % 64); }
int scanForNext(Scanner& sc) { void toggle(int idx) { data[idx / 64] ^= 1UL << (idx % 64); }
uint64_t mask = data[sc.next_to_check];
if (unlikely(mask == 0L)) { void clear(int idx) { data[idx / 64] &= ~(1UL << (idx % 64)); }
while (true) {
sc.next_to_check++; int scanForNext(Scanner& sc) {
if (sc.next_to_check == N / 64) { uint64_t mask = data[sc.next_to_check];
sc.next_to_check = 0;
return -1; if (unlikely(mask == 0L)) {
} while (true) {
mask = data[sc.next_to_check]; sc.next_to_check++;
if (likely(mask != 0L)) { if (sc.next_to_check == N / 64) {
break; sc.next_to_check = 0;
return -1;
}
mask = data[sc.next_to_check];
if (likely(mask != 0L)) {
break;
}
} }
} }
}
int i = sc.next_to_check; int i = sc.next_to_check;
int first = __builtin_ctzll(mask); int first = __builtin_ctzll(mask);
assert(first < 64); assert(first < 64);
assert(data[i] & (1L << first)); assert(data[i] & (1L << first));
data[i] ^= (1L << first); data[i] ^= (1L << first);
int idx = first + i * 64;
return idx;
}
};
int idx = first + i * 64;
return idx;
}
};
static constexpr size_t BLOCK_SIZE = 4 * 4096;
#define BLOCK_SIZE (4 * 4096)
#define ATOM_SIZE 16 #define ATOM_SIZE 16
static_assert(BLOCK_SIZE % ATOM_SIZE == 0, ""); static_assert(BLOCK_SIZE % ATOM_SIZE == 0, "");
#define ATOMS_PER_BLOCK (BLOCK_SIZE / ATOM_SIZE) #define ATOMS_PER_BLOCK (BLOCK_SIZE / ATOM_SIZE)
static_assert(ATOMS_PER_BLOCK % 64 == 0, ""); static_assert(ATOMS_PER_BLOCK % 64 == 0, "");
#define BITFIELD_SIZE (ATOMS_PER_BLOCK / 8) #define BITFIELD_SIZE (ATOMS_PER_BLOCK / 8)
#define BITFIELD_ELTS (BITFIELD_SIZE / 8) #define BITFIELD_ELTS (BITFIELD_SIZE / 8)
#define BLOCK_HEADER_SIZE (BITFIELD_SIZE + 4 * sizeof(void*)) #define BLOCK_HEADER_SIZE (BITFIELD_SIZE + 4 * sizeof(void*))
#define BLOCK_HEADER_ATOMS ((BLOCK_HEADER_SIZE + ATOM_SIZE - 1) / ATOM_SIZE) #define BLOCK_HEADER_ATOMS ((BLOCK_HEADER_SIZE + ATOM_SIZE - 1) / ATOM_SIZE)
struct Atoms { struct Atoms {
char _data[ATOM_SIZE]; char _data[ATOM_SIZE];
};
struct Block {
union {
struct {
Block* next, **prev;
uint32_t size;
uint16_t num_obj;
uint8_t min_obj_index;
uint8_t atoms_per_obj;
Bitmap<ATOMS_PER_BLOCK> isfree;
Bitmap<ATOMS_PER_BLOCK>::Scanner next_to_check;
void* _header_end[0];
};
Atoms atoms[ATOMS_PER_BLOCK];
}; };
inline int minObjIndex() const { return min_obj_index; } struct Block {
union {
struct {
Block* next, **prev;
uint32_t size;
uint16_t num_obj;
uint8_t min_obj_index;
uint8_t atoms_per_obj;
Bitmap<ATOMS_PER_BLOCK> isfree;
Bitmap<ATOMS_PER_BLOCK>::Scanner next_to_check;
void* _header_end[0];
};
Atoms atoms[ATOMS_PER_BLOCK];
};
inline int numObjects() const { return num_obj; } inline int minObjIndex() const { return min_obj_index; }
inline int atomsPerObj() const { return atoms_per_obj; } inline int numObjects() const { return num_obj; }
static Block* forPointer(void* ptr) { return (Block*)((uintptr_t)ptr & ~(BLOCK_SIZE - 1)); } inline int atomsPerObj() const { return atoms_per_obj; }
};
static_assert(sizeof(Block) == BLOCK_SIZE, "bad size");
static_assert(offsetof(Block, _header_end) >= BLOCK_HEADER_SIZE, "bad header size");
static_assert(offsetof(Block, _header_end) <= BLOCK_HEADER_SIZE, "bad header size");
constexpr const size_t sizes[] = {
16, 32, 48, 64, 80, 96, 112, 128, 160, 192, 224, 256,
320, 384, 448, 512, 640, 768, 896, 1024, 1280, 1536, 1792, 2048,
// 2560, 3072, 3584, // 4096,
};
#define NUM_BUCKETS (sizeof(sizes) / sizeof(sizes[0]))
struct LargeObj; static Block* forPointer(void* ptr) { return (Block*)((uintptr_t)ptr & ~(BLOCK_SIZE - 1)); }
class Heap { };
private: static_assert(sizeof(Block) == BLOCK_SIZE, "bad size");
Block* heads[NUM_BUCKETS]; static_assert(offsetof(Block, _header_end) >= BLOCK_HEADER_SIZE, "bad header size");
Block* full_heads[NUM_BUCKETS]; static_assert(offsetof(Block, _header_end) <= BLOCK_HEADER_SIZE, "bad header size");
LargeObj* large_head = NULL;
GCAllocation* __attribute__((__malloc__)) allocSmall(size_t rounded_size, int bucket_idx);
GCAllocation* __attribute__((__malloc__)) allocLarge(size_t bytes);
// DS_DEFINE_MUTEX(lock);
DS_DEFINE_SPINLOCK(lock);
struct ThreadBlockCache { struct ThreadBlockCache {
Heap* heap; Heap* heap;
SmallArena* small;
Block* cache_free_heads[NUM_BUCKETS]; Block* cache_free_heads[NUM_BUCKETS];
Block* cache_full_heads[NUM_BUCKETS]; Block* cache_full_heads[NUM_BUCKETS];
ThreadBlockCache(Heap* heap) : heap(heap) { ThreadBlockCache(Heap* heap, SmallArena* small) : heap(heap), small(small) {
memset(cache_free_heads, 0, sizeof(cache_free_heads)); memset(cache_free_heads, 0, sizeof(cache_free_heads));
memset(cache_full_heads, 0, sizeof(cache_full_heads)); memset(cache_full_heads, 0, sizeof(cache_full_heads));
} }
~ThreadBlockCache(); ~ThreadBlockCache();
}; };
Block* heads[NUM_BUCKETS];
Block* full_heads[NUM_BUCKETS];
friend struct ThreadBlockCache; friend struct ThreadBlockCache;
Heap* heap;
// TODO only use thread caches if we're in GRWL mode? // TODO only use thread caches if we're in GRWL mode?
threading::PerThreadSet<ThreadBlockCache, Heap*> thread_caches; threading::PerThreadSet<ThreadBlockCache, Heap*, SmallArena*> thread_caches;
Block* _allocBlock(uint64_t size, Block** prev);
GCAllocation* _allocFromBlock(Block* b);
Block* _claimBlock(size_t rounded_size, Block** free_head);
Block** _freeChain(Block** head);
void _getChainStatistics(HeapStatistics* stats, Block** head);
GCAllocation* __attribute__((__malloc__)) _alloc(size_t bytes, int bucket_idx);
};
//
// The LargeArena allocates objects where 3584 < size <1024*1024-CHUNK_SIZE-sizeof(LargeObject) bytes.
//
// it maintains a set of size-segregated free lists, and a special
// free list for larger objects. If the free list specific to a given
// size has no entries, we search the large free list.
//
// Blocks of 1meg are mmap'ed individually, and carved up as needed.
//
class LargeArena : public Arena<LARGE_ARENA_START, ARENA_SIZE> {
private:
struct LargeBlock {
LargeBlock* next;
size_t num_free_chunks;
unsigned char* free_chunk_map;
};
struct LargeFreeChunk {
LargeFreeChunk* next_size;
size_t size;
};
struct LargeObj {
LargeObj* next, **prev;
size_t size;
GCAllocation data[0];
static LargeObj* fromAllocation(GCAllocation* alloc) {
char* rtn = (char*)alloc - offsetof(LargeObj, data);
return reinterpret_cast<LargeObj*>(rtn);
}
};
/*
* This shouldn't be much smaller or larger than the largest small size bucket.
* Must be at least sizeof (LargeBlock).
*/
static constexpr size_t CHUNK_SIZE = 4096;
static constexpr int CHUNK_BITS = 12;
static_assert(CHUNK_SIZE > sizeof(LargeBlock), "bad large block size");
static constexpr int BLOCK_SIZE = 1024 * 1024;
static constexpr int NUM_FREE_LISTS = 32;
Heap* heap;
LargeObj* head;
LargeBlock* blocks;
LargeFreeChunk* free_lists[NUM_FREE_LISTS]; /* 0 is for larger sizes */
void add_free_chunk(LargeFreeChunk* free_chunks, size_t size);
LargeFreeChunk* get_from_size_list(LargeFreeChunk** list, size_t size);
LargeObj* _alloc(size_t size);
void _freeLargeObj(LargeObj* obj);
public:
LargeArena(Heap* heap) : heap(heap), head(NULL), blocks(NULL) {}
/* Largest object that can be allocated in a large block. */
static constexpr size_t ALLOC_SIZE_LIMIT = BLOCK_SIZE - CHUNK_SIZE - sizeof(LargeObj);
GCAllocation* __attribute__((__malloc__)) alloc(size_t bytes);
GCAllocation* realloc(GCAllocation* alloc, size_t bytes);
void free(GCAllocation* alloc);
GCAllocation* allocationFrom(void* ptr);
void freeUnmarked();
void getStatistics(HeapStatistics* stats);
};
// The HugeArena allocates objects where size > 1024*1024 bytes.
//
// Objects are allocated with individual mmap() calls, and kept in a
// linked list. They are not reused.
class HugeArena : public Arena<HUGE_ARENA_START, ARENA_SIZE> {
public: public:
Heap() : thread_caches(this) {} HugeArena(Heap* heap) : heap(heap) {}
GCAllocation* __attribute__((__malloc__)) alloc(size_t bytes);
GCAllocation* realloc(GCAllocation* alloc, size_t bytes); GCAllocation* realloc(GCAllocation* alloc, size_t bytes);
void free(GCAllocation* alloc);
GCAllocation* allocationFrom(void* ptr);
void freeUnmarked();
void getStatistics(HeapStatistics* stats);
private:
struct HugeObj {
HugeObj* next, **prev;
size_t obj_size;
GCAllocation data[0];
int mmap_size() {
size_t total_size = obj_size + sizeof(HugeObj);
total_size = (total_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
return total_size;
}
int capacity() { return mmap_size() - sizeof(HugeObj); }
static HugeObj* fromAllocation(GCAllocation* alloc) {
char* rtn = (char*)alloc - offsetof(HugeObj, data);
assert((uintptr_t)rtn % PAGE_SIZE == 0);
return reinterpret_cast<HugeObj*>(rtn);
}
};
void _freeHugeObj(HugeObj* lobj);
HugeObj* head;
Heap* heap;
};
class Heap {
private:
SmallArena small_arena;
LargeArena large_arena;
HugeArena huge_arena;
friend class SmallArena;
friend class LargeArena;
friend class HugeArena;
// DS_DEFINE_MUTEX(lock);
DS_DEFINE_SPINLOCK(lock);
public:
Heap() : small_arena(this), large_arena(this), huge_arena(this) {}
GCAllocation* realloc(GCAllocation* alloc, size_t bytes) {
// TODO(toshok): there is duplicate code in each of the
// ::realloc methods to test whether the allocation can be
// reused. Would be nice to factor it all out here into this
// method.
if (large_arena.contains(alloc)) {
return large_arena.realloc(alloc, bytes);
} else if (huge_arena.contains(alloc)) {
return huge_arena.realloc(alloc, bytes);
}
assert(small_arena.contains(alloc));
return small_arena.realloc(alloc, bytes);
}
GCAllocation* __attribute__((__malloc__)) alloc(size_t bytes) { GCAllocation* __attribute__((__malloc__)) alloc(size_t bytes) {
GCAllocation* rtn; if (bytes > LargeArena::ALLOC_SIZE_LIMIT)
// assert(bytes >= 16); return huge_arena.alloc(bytes);
if (bytes <= 16)
rtn = allocSmall(16, 0);
else if (bytes <= 32)
rtn = allocSmall(32, 1);
else if (bytes > sizes[NUM_BUCKETS - 1]) else if (bytes > sizes[NUM_BUCKETS - 1])
rtn = allocLarge(bytes); return large_arena.alloc(bytes);
else { else
rtn = NULL; return small_arena.alloc(bytes);
for (int i = 2; i < NUM_BUCKETS; i++) { }
if (sizes[i] >= bytes) {
rtn = allocSmall(sizes[i], i); void destructContents(GCAllocation* alloc);
break;
} void free(GCAllocation* alloc) {
} destructContents(alloc);
if (large_arena.contains(alloc)) {
large_arena.free(alloc);
return;
} }
return rtn; if (huge_arena.contains(alloc)) {
} huge_arena.free(alloc);
return;
}
void free(GCAllocation* alloc); assert(small_arena.contains(alloc));
small_arena.free(alloc);
}
// not thread safe: // not thread safe:
GCAllocation* getAllocationFromInteriorPointer(void* ptr); GCAllocation* getAllocationFromInteriorPointer(void* ptr) {
if (large_arena.contains(ptr)) {
return large_arena.allocationFrom(ptr);
} else if (huge_arena.contains(ptr)) {
return huge_arena.allocationFrom(ptr);
} else if (small_arena.contains(ptr)) {
return small_arena.allocationFrom(ptr);
}
return NULL;
}
// not thread safe: // not thread safe:
void freeUnmarked(); void freeUnmarked() {
small_arena.freeUnmarked();
large_arena.freeUnmarked();
huge_arena.freeUnmarked();
}
void dumpHeapStatistics(); void dumpHeapStatistics();
}; };
......
...@@ -68,6 +68,9 @@ TEST(alloc, alloc64) { testAlloc(64); } ...@@ -68,6 +68,9 @@ TEST(alloc, alloc64) { testAlloc(64); }
TEST(alloc, alloc128) { testAlloc(128); } TEST(alloc, alloc128) { testAlloc(128); }
TEST(alloc, alloc258) { testAlloc(258); } TEST(alloc, alloc258) { testAlloc(258); }
TEST(alloc, alloc3584) { testAlloc(3584); } TEST(alloc, alloc3584) { testAlloc(3584); }
TEST(alloc, alloc4096) { testAlloc(4096); }
TEST(alloc, alloc8192) { testAlloc(8192); }
TEST(alloc, alloc16384) { testAlloc(16384); }
TEST(alloc, largeallocs) { TEST(alloc, largeallocs) {
int s1 = 1 << 20; int s1 = 1 << 20;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment