Commit a80e0725 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Merge pull request #621 from kmod/microoptimizations

Some small micro-optimizations
parents 9067a589 cf19a38c
......@@ -997,7 +997,7 @@ endef
.PHONY: perf_report
perf_report:
perf report -v -n -g flat,1000 | bash $(TOOLS_DIR)/cumulate.sh | less -S
perf report -n
.PHONY: run run_% dbg_% debug_% perf_%
run: run_dbg
......
......@@ -103,7 +103,7 @@ public:
// This must not be inlined, because we rely on being able to detect when we're inside of it (by checking whether
// %rip is inside its instruction range) during a stack-trace in order to produce tracebacks inside interpreted
// code.
__attribute__((__no_inline__)) static Value
__attribute__((__no_inline__)) __attribute__((noinline)) static Value
executeInner(ASTInterpreter& interpreter, CFGBlock* start_block, AST_stmt* start_at, RegisterHelper* reg);
......
......@@ -107,43 +107,22 @@ inline void sweepList(ListT* head, std::vector<Box*>& weakly_referenced, Free fr
}
}
static unsigned bytesAllocatedSinceCollection;
static __thread unsigned thread_bytesAllocatedSinceCollection;
#define ALLOCBYTES_PER_COLLECTION 10000000
unsigned bytesAllocatedSinceCollection;
static StatCounter gc_registered_bytes("gc_registered_bytes");
void _bytesAllocatedTripped() {
gc_registered_bytes.log(bytesAllocatedSinceCollection);
bytesAllocatedSinceCollection = 0;
void registerGCManagedBytes(size_t bytes) {
thread_bytesAllocatedSinceCollection += bytes;
if (unlikely(thread_bytesAllocatedSinceCollection > ALLOCBYTES_PER_COLLECTION / 4)) {
gc_registered_bytes.log(thread_bytesAllocatedSinceCollection);
bytesAllocatedSinceCollection += thread_bytesAllocatedSinceCollection;
thread_bytesAllocatedSinceCollection = 0;
if (bytesAllocatedSinceCollection >= ALLOCBYTES_PER_COLLECTION) {
if (!gcIsEnabled())
return;
// bytesAllocatedSinceCollection = 0;
// threading::GLPromoteRegion _lock;
// runCollection();
threading::GLPromoteRegion _lock;
if (bytesAllocatedSinceCollection >= ALLOCBYTES_PER_COLLECTION) {
runCollection();
bytesAllocatedSinceCollection = 0;
}
}
}
}
if (!gcIsEnabled())
return;
threading::GLPromoteRegion _lock;
runCollection();
}
Heap global_heap;
bool _doFree(GCAllocation* al, std::vector<Box*>* weakly_referenced) {
if (VERBOSITY() >= 4)
printf("Freeing %p\n", al->user_data);
__attribute__((always_inline)) bool _doFree(GCAllocation* al, std::vector<Box*>* weakly_referenced) {
#ifndef NVALGRIND
VALGRIND_DISABLE_ERROR_REPORTING;
#endif
......@@ -459,6 +438,12 @@ SmallArena::Block** SmallArena::_freeChain(Block** head, std::vector<Box*>& weak
for (int atom_idx = first_obj * atoms_per_obj; atom_idx < num_objects * atoms_per_obj;
atom_idx += atoms_per_obj) {
// Note(kmod): it seems like there's some optimizations that could happen in this
// function -- isSet() and set() do roughly the same computation, and set() will
// load the value again before or'ing it and storing it back.
// I tried looking into a bunch of that and it didn't seem to make that much
// of a difference; my guess is that this function is memory-bound so a few
// extra shifts doesn't hurt.
if (b->isfree.isSet(atom_idx))
continue;
......
......@@ -75,12 +75,22 @@ class conservative_unordered_map
namespace gc {
extern unsigned bytesAllocatedSinceCollection;
#define ALLOCBYTES_PER_COLLECTION 10000000
void _bytesAllocatedTripped();
// Notify the gc of n bytes as being under GC management.
// This is called internally for anything allocated through gc_alloc,
// but it can also be called by clients to say that they have memory that
// is ultimately GC managed but did not get allocated through gc_alloc,
// such as memory that will get freed by a gc destructor.
void registerGCManagedBytes(size_t bytes);
inline void registerGCManagedBytes(size_t bytes) {
bytesAllocatedSinceCollection += bytes;
if (unlikely(bytesAllocatedSinceCollection >= ALLOCBYTES_PER_COLLECTION)) {
_bytesAllocatedTripped();
}
}
class Heap;
struct HeapStatistics;
......@@ -212,7 +222,7 @@ private:
struct Scanner {
private:
int next_to_check;
int64_t next_to_check;
friend class Bitmap<N>;
public:
......
......@@ -47,12 +47,5 @@ i64 unboxInt(Box* b) {
return ((BoxedInt*)b)->n;
}
Box* boxInt(int64_t n) {
if (0 <= n && n < NUM_INTERNED_INTS) {
return interned_ints[n];
}
return new BoxedInt(n);
}
// BoxedInt::BoxedInt(int64_t n) : Box(int_cls), n(n) {}
}
......@@ -64,9 +64,6 @@ extern "C" Box* intNew1(Box* cls);
extern "C" Box* intNew2(Box* cls, Box* val);
extern "C" Box* intInit1(Box* self);
extern "C" Box* intInit2(BoxedInt* self, Box* val);
#define NUM_INTERNED_INTS 100
extern BoxedInt* interned_ints[NUM_INTERNED_INTS];
}
#endif
......@@ -105,7 +105,7 @@ extern BoxedModule* sys_module, *builtins_module, *math_module, *time_module, *t
}
extern "C" Box* boxBool(bool);
extern "C" Box* boxInt(i64);
extern "C" Box* boxInt(i64) __attribute__((visibility("default")));
extern "C" i64 unboxInt(Box*);
extern "C" Box* boxFloat(double d);
extern "C" Box* boxInstanceMethod(Box* obj, Box* func, Box* type);
......@@ -924,6 +924,15 @@ inline BoxedString* boxString(llvm::StringRef s) {
}
return new (s.size()) BoxedString(s);
}
#define NUM_INTERNED_INTS 100
extern BoxedInt* interned_ints[NUM_INTERNED_INTS];
extern "C" inline Box* boxInt(int64_t n) {
if (0 <= n && n < NUM_INTERNED_INTS) {
return interned_ints[n];
}
return new BoxedInt(n);
}
}
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment