Commit a80e0725 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Merge pull request #621 from kmod/microoptimizations

Some small micro-optimizations
parents 9067a589 cf19a38c
...@@ -997,7 +997,7 @@ endef ...@@ -997,7 +997,7 @@ endef
.PHONY: perf_report .PHONY: perf_report
perf_report: perf_report:
perf report -v -n -g flat,1000 | bash $(TOOLS_DIR)/cumulate.sh | less -S perf report -n
.PHONY: run run_% dbg_% debug_% perf_% .PHONY: run run_% dbg_% debug_% perf_%
run: run_dbg run: run_dbg
......
...@@ -103,7 +103,7 @@ public: ...@@ -103,7 +103,7 @@ public:
// This must not be inlined, because we rely on being able to detect when we're inside of it (by checking whether // This must not be inlined, because we rely on being able to detect when we're inside of it (by checking whether
// %rip is inside its instruction range) during a stack-trace in order to produce tracebacks inside interpreted // %rip is inside its instruction range) during a stack-trace in order to produce tracebacks inside interpreted
// code. // code.
__attribute__((__no_inline__)) static Value __attribute__((__no_inline__)) __attribute__((noinline)) static Value
executeInner(ASTInterpreter& interpreter, CFGBlock* start_block, AST_stmt* start_at, RegisterHelper* reg); executeInner(ASTInterpreter& interpreter, CFGBlock* start_block, AST_stmt* start_at, RegisterHelper* reg);
......
...@@ -107,43 +107,22 @@ inline void sweepList(ListT* head, std::vector<Box*>& weakly_referenced, Free fr ...@@ -107,43 +107,22 @@ inline void sweepList(ListT* head, std::vector<Box*>& weakly_referenced, Free fr
} }
} }
static unsigned bytesAllocatedSinceCollection; unsigned bytesAllocatedSinceCollection;
static __thread unsigned thread_bytesAllocatedSinceCollection;
#define ALLOCBYTES_PER_COLLECTION 10000000
static StatCounter gc_registered_bytes("gc_registered_bytes"); static StatCounter gc_registered_bytes("gc_registered_bytes");
void _bytesAllocatedTripped() {
gc_registered_bytes.log(bytesAllocatedSinceCollection);
bytesAllocatedSinceCollection = 0;
void registerGCManagedBytes(size_t bytes) { if (!gcIsEnabled())
thread_bytesAllocatedSinceCollection += bytes; return;
if (unlikely(thread_bytesAllocatedSinceCollection > ALLOCBYTES_PER_COLLECTION / 4)) {
gc_registered_bytes.log(thread_bytesAllocatedSinceCollection);
bytesAllocatedSinceCollection += thread_bytesAllocatedSinceCollection;
thread_bytesAllocatedSinceCollection = 0;
if (bytesAllocatedSinceCollection >= ALLOCBYTES_PER_COLLECTION) {
if (!gcIsEnabled())
return;
// bytesAllocatedSinceCollection = 0;
// threading::GLPromoteRegion _lock;
// runCollection();
threading::GLPromoteRegion _lock;
if (bytesAllocatedSinceCollection >= ALLOCBYTES_PER_COLLECTION) {
runCollection();
bytesAllocatedSinceCollection = 0;
}
}
}
}
threading::GLPromoteRegion _lock;
runCollection();
}
Heap global_heap; Heap global_heap;
bool _doFree(GCAllocation* al, std::vector<Box*>* weakly_referenced) { __attribute__((always_inline)) bool _doFree(GCAllocation* al, std::vector<Box*>* weakly_referenced) {
if (VERBOSITY() >= 4)
printf("Freeing %p\n", al->user_data);
#ifndef NVALGRIND #ifndef NVALGRIND
VALGRIND_DISABLE_ERROR_REPORTING; VALGRIND_DISABLE_ERROR_REPORTING;
#endif #endif
...@@ -459,6 +438,12 @@ SmallArena::Block** SmallArena::_freeChain(Block** head, std::vector<Box*>& weak ...@@ -459,6 +438,12 @@ SmallArena::Block** SmallArena::_freeChain(Block** head, std::vector<Box*>& weak
for (int atom_idx = first_obj * atoms_per_obj; atom_idx < num_objects * atoms_per_obj; for (int atom_idx = first_obj * atoms_per_obj; atom_idx < num_objects * atoms_per_obj;
atom_idx += atoms_per_obj) { atom_idx += atoms_per_obj) {
// Note(kmod): it seems like there's some optimizations that could happen in this
// function -- isSet() and set() do roughly the same computation, and set() will
// load the value again before or'ing it and storing it back.
// I tried looking into a bunch of that and it didn't seem to make that much
// of a difference; my guess is that this function is memory-bound so a few
// extra shifts doesn't hurt.
if (b->isfree.isSet(atom_idx)) if (b->isfree.isSet(atom_idx))
continue; continue;
......
...@@ -75,12 +75,22 @@ class conservative_unordered_map ...@@ -75,12 +75,22 @@ class conservative_unordered_map
namespace gc { namespace gc {
extern unsigned bytesAllocatedSinceCollection;
#define ALLOCBYTES_PER_COLLECTION 10000000
void _bytesAllocatedTripped();
// Notify the gc of n bytes as being under GC management. // Notify the gc of n bytes as being under GC management.
// This is called internally for anything allocated through gc_alloc, // This is called internally for anything allocated through gc_alloc,
// but it can also be called by clients to say that they have memory that // but it can also be called by clients to say that they have memory that
// is ultimately GC managed but did not get allocated through gc_alloc, // is ultimately GC managed but did not get allocated through gc_alloc,
// such as memory that will get freed by a gc destructor. // such as memory that will get freed by a gc destructor.
void registerGCManagedBytes(size_t bytes); inline void registerGCManagedBytes(size_t bytes) {
bytesAllocatedSinceCollection += bytes;
if (unlikely(bytesAllocatedSinceCollection >= ALLOCBYTES_PER_COLLECTION)) {
_bytesAllocatedTripped();
}
}
class Heap; class Heap;
struct HeapStatistics; struct HeapStatistics;
...@@ -212,7 +222,7 @@ private: ...@@ -212,7 +222,7 @@ private:
struct Scanner { struct Scanner {
private: private:
int next_to_check; int64_t next_to_check;
friend class Bitmap<N>; friend class Bitmap<N>;
public: public:
......
...@@ -47,12 +47,5 @@ i64 unboxInt(Box* b) { ...@@ -47,12 +47,5 @@ i64 unboxInt(Box* b) {
return ((BoxedInt*)b)->n; return ((BoxedInt*)b)->n;
} }
Box* boxInt(int64_t n) {
if (0 <= n && n < NUM_INTERNED_INTS) {
return interned_ints[n];
}
return new BoxedInt(n);
}
// BoxedInt::BoxedInt(int64_t n) : Box(int_cls), n(n) {} // BoxedInt::BoxedInt(int64_t n) : Box(int_cls), n(n) {}
} }
...@@ -64,9 +64,6 @@ extern "C" Box* intNew1(Box* cls); ...@@ -64,9 +64,6 @@ extern "C" Box* intNew1(Box* cls);
extern "C" Box* intNew2(Box* cls, Box* val); extern "C" Box* intNew2(Box* cls, Box* val);
extern "C" Box* intInit1(Box* self); extern "C" Box* intInit1(Box* self);
extern "C" Box* intInit2(BoxedInt* self, Box* val); extern "C" Box* intInit2(BoxedInt* self, Box* val);
#define NUM_INTERNED_INTS 100
extern BoxedInt* interned_ints[NUM_INTERNED_INTS];
} }
#endif #endif
...@@ -105,7 +105,7 @@ extern BoxedModule* sys_module, *builtins_module, *math_module, *time_module, *t ...@@ -105,7 +105,7 @@ extern BoxedModule* sys_module, *builtins_module, *math_module, *time_module, *t
} }
extern "C" Box* boxBool(bool); extern "C" Box* boxBool(bool);
extern "C" Box* boxInt(i64); extern "C" Box* boxInt(i64) __attribute__((visibility("default")));
extern "C" i64 unboxInt(Box*); extern "C" i64 unboxInt(Box*);
extern "C" Box* boxFloat(double d); extern "C" Box* boxFloat(double d);
extern "C" Box* boxInstanceMethod(Box* obj, Box* func, Box* type); extern "C" Box* boxInstanceMethod(Box* obj, Box* func, Box* type);
...@@ -924,6 +924,15 @@ inline BoxedString* boxString(llvm::StringRef s) { ...@@ -924,6 +924,15 @@ inline BoxedString* boxString(llvm::StringRef s) {
} }
return new (s.size()) BoxedString(s); return new (s.size()) BoxedString(s);
} }
#define NUM_INTERNED_INTS 100
extern BoxedInt* interned_ints[NUM_INTERNED_INTS];
extern "C" inline Box* boxInt(int64_t n) {
if (0 <= n && n < NUM_INTERNED_INTS) {
return interned_ints[n];
}
return new BoxedInt(n);
}
} }
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment