Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
P
Pyston
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Boxiang Sun
Pyston
Commits
012d6d50
Commit
012d6d50
authored
Feb 13, 2015
by
Kevin Modzelewski
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #298 from toshok/three-arenas
Add third GC arena
parents
7cf92757
d85c9893
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
901 additions
and
427 deletions
+901
-427
minibenchmarks/pidigits.py
minibenchmarks/pidigits.py
+39
-0
src/gc/gc_alloc.h
src/gc/gc_alloc.h
+8
-0
src/gc/heap.cpp
src/gc/heap.cpp
+521
-322
src/gc/heap.h
src/gc/heap.h
+330
-105
test/unittests/gc.cpp
test/unittests/gc.cpp
+3
-0
No files found.
minibenchmarks/pidigits.py
0 → 100644
View file @
012d6d50
import
time
PIDIGITS_LEN
=
1500
def
pidigits
(
length
):
i
=
k
=
ns
=
0
k1
=
1
n
,
a
,
d
,
t
,
u
=
1
,
0
,
1
,
0
,
0
while
(
True
):
k
+=
1
t
=
n
<<
1
n
*=
k
a
+=
t
k1
+=
2
a
*=
k1
d
*=
k1
if
a
>=
n
:
t
,
u
=
divmod
(
n
*
3
+
a
,
d
)
u
+=
n
if
d
>
u
:
ns
=
ns
*
10
+
t
i
+=
1
if
i
%
10
==
0
:
ns
=
0
if
i
>=
length
:
break
a
-=
d
*
t
a
*=
10
n
*=
10
def
main
(
n
):
l
=
[]
for
i
in
range
(
n
):
t0
=
time
.
time
()
pidigits
(
PIDIGITS_LEN
)
l
.
append
(
time
.
time
()
-
t0
)
return
l
main
(
100
)
src/gc/gc_alloc.h
View file @
012d6d50
...
...
@@ -51,6 +51,11 @@ extern "C" inline void* gc_alloc(size_t bytes, GCKind kind_id) {
#endif
GCAllocation
*
alloc
=
global_heap
.
alloc
(
alloc_bytes
);
#ifndef NVALGRIND
VALGRIND_DISABLE_ERROR_REPORTING
;
#endif
alloc
->
kind_id
=
kind_id
;
alloc
->
gc_flags
=
0
;
...
...
@@ -67,7 +72,10 @@ extern "C" inline void* gc_alloc(size_t bytes, GCKind kind_id) {
}
void
*
r
=
alloc
->
user_data
;
#ifndef NVALGRIND
VALGRIND_ENABLE_ERROR_REPORTING
;
if
(
ENABLE_REDZONES
)
{
r
=
((
char
*
)
r
)
+
REDZONE_SIZE
;
}
...
...
src/gc/heap.cpp
View file @
012d6d50
...
...
@@ -17,7 +17,6 @@
#include <cstdlib>
#include <cstring>
#include <stdint.h>
#include <sys/mman.h>
#include "core/common.h"
#include "core/util.h"
...
...
@@ -34,6 +33,67 @@
namespace
pyston
{
namespace
gc
{
void
_doFree
(
GCAllocation
*
al
);
// lots of linked lists around here, so let's just use template functions for operations on them.
template
<
class
ListT
>
inline
void
nullNextPrev
(
ListT
*
node
)
{
node
->
next
=
NULL
;
node
->
prev
=
NULL
;
}
template
<
class
ListT
>
inline
void
removeFromLL
(
ListT
*
node
)
{
*
node
->
prev
=
node
->
next
;
if
(
node
->
next
)
node
->
next
->
prev
=
node
->
prev
;
}
template
<
class
ListT
>
inline
void
removeFromLLAndNull
(
ListT
*
node
)
{
*
node
->
prev
=
node
->
next
;
if
(
node
->
next
)
node
->
next
->
prev
=
node
->
prev
;
nullNextPrev
(
node
);
}
template
<
class
ListT
>
inline
void
insertIntoLL
(
ListT
**
next_pointer
,
ListT
*
next
)
{
assert
(
next_pointer
);
assert
(
next
);
assert
(
!
next
->
next
);
assert
(
!
next
->
prev
);
next
->
next
=
*
next_pointer
;
if
(
next
->
next
)
next
->
next
->
prev
=
&
next
->
next
;
*
next_pointer
=
next
;
next
->
prev
=
next_pointer
;
}
template
<
class
ListT
,
typename
Func
>
inline
void
forEach
(
ListT
*
list
,
Func
func
)
{
auto
cur
=
list
;
while
(
cur
)
{
func
(
cur
);
cur
=
cur
->
next
;
}
}
template
<
class
ListT
,
typename
Free
>
inline
void
sweepList
(
ListT
*
head
,
Free
free_func
)
{
auto
cur
=
head
;
while
(
cur
)
{
GCAllocation
*
al
=
cur
->
data
;
if
(
isMarked
(
al
))
{
clearMark
(
al
);
cur
=
cur
->
next
;
}
else
{
_doFree
(
al
);
removeFromLL
(
cur
);
auto
to_free
=
cur
;
cur
=
cur
->
next
;
free_func
(
to_free
);
}
}
}
static
unsigned
bytesAllocatedSinceCollection
;
static
__thread
unsigned
thread_bytesAllocatedSinceCollection
;
#define ALLOCBYTES_PER_COLLECTION 10000000
...
...
@@ -64,73 +124,268 @@ void registerGCManagedBytes(size_t bytes) {
Heap
global_heap
;
#define PAGE_SIZE 4096
class
Arena
{
private:
void
*
start
;
void
*
cur
;
void
_doFree
(
GCAllocation
*
al
)
{
if
(
VERBOSITY
()
>=
2
)
printf
(
"Freeing %p
\n
"
,
al
->
user_data
);
public:
constexpr
Arena
(
void
*
start
)
:
start
(
start
),
cur
(
start
)
{}
#ifndef NVALGRIND
VALGRIND_DISABLE_ERROR_REPORTING
;
#endif
GCKind
alloc_kind
=
al
->
kind_id
;
#ifndef NVALGRIND
VALGRIND_ENABLE_ERROR_REPORTING
;
#endif
void
*
doMmap
(
size_t
size
)
{
assert
(
size
%
PAGE_SIZE
==
0
);
// printf("mmap %ld\n", size);
if
(
alloc_kind
==
GCKind
::
PYTHON
)
{
#ifndef NVALGRIND
VALGRIND_DISABLE_ERROR_REPORTING
;
#endif
Box
*
b
=
(
Box
*
)
al
->
user_data
;
#ifndef NVALGRIND
VALGRIND_ENABLE_ERROR_REPORTING
;
#endif
void
*
mrtn
=
mmap
(
cur
,
size
,
PROT_READ
|
PROT_WRITE
,
MAP_FIXED
|
MAP_PRIVATE
|
MAP_ANONYMOUS
,
-
1
,
0
);
assert
((
uintptr_t
)
mrtn
!=
-
1
&&
"failed to allocate memory from OS"
);
ASSERT
(
mrtn
==
cur
,
"%p %p
\n
"
,
mrtn
,
cur
);
cur
=
(
uint8_t
*
)
cur
+
size
;
return
mrtn
;
ASSERT
(
b
->
cls
->
tp_dealloc
==
NULL
,
"%s"
,
getTypeName
(
b
));
if
(
b
->
cls
->
simple_destructor
)
b
->
cls
->
simple_destructor
(
b
);
}
}
void
Heap
::
destructContents
(
GCAllocation
*
al
)
{
_doFree
(
al
);
}
struct
HeapStatistics
{
struct
TypeStats
{
int64_t
nallocs
;
int64_t
nbytes
;
TypeStats
()
:
nallocs
(
0
),
nbytes
(
0
)
{}
bool
contains
(
void
*
addr
)
{
return
start
<=
addr
&&
addr
<
cur
;
}
void
print
(
const
char
*
name
)
const
{
if
(
nbytes
>
(
1
<<
20
))
printf
(
"%s: %ld allocations for %.1f MB
\n
"
,
name
,
nallocs
,
nbytes
*
1.0
/
(
1
<<
20
));
else
if
(
nbytes
>
(
1
<<
10
))
printf
(
"%s: %ld allocations for %.1f KB
\n
"
,
name
,
nallocs
,
nbytes
*
1.0
/
(
1
<<
10
));
else
printf
(
"%s: %ld allocations for %ld bytes
\n
"
,
name
,
nallocs
,
nbytes
);
}
};
std
::
unordered_map
<
BoxedClass
*
,
TypeStats
>
by_cls
;
TypeStats
conservative
,
untracked
;
TypeStats
total
;
};
static
Arena
small_arena
((
void
*
)
0x1270000000L
);
static
Arena
large_arena
((
void
*
)
0x2270000000L
);
void
addStatistic
(
HeapStatistics
*
stats
,
GCAllocation
*
al
,
int
nbytes
)
{
stats
->
total
.
nallocs
++
;
stats
->
total
.
nbytes
+=
nbytes
;
struct
LargeObj
{
LargeObj
*
next
,
**
prev
;
size_t
obj_size
;
GCAllocation
data
[
0
];
if
(
al
->
kind_id
==
GCKind
::
PYTHON
)
{
Box
*
b
=
(
Box
*
)
al
->
user_data
;
auto
&
t
=
stats
->
by_cls
[
b
->
cls
];
int
mmap_size
()
{
size_t
total_size
=
obj_size
+
sizeof
(
LargeObj
);
total_size
=
(
total_size
+
PAGE_SIZE
-
1
)
&
~
(
PAGE_SIZE
-
1
);
return
total_size
;
t
.
nallocs
++
;
t
.
nbytes
+=
nbytes
;
}
else
if
(
al
->
kind_id
==
GCKind
::
CONSERVATIVE
)
{
stats
->
conservative
.
nallocs
++
;
stats
->
conservative
.
nbytes
+=
nbytes
;
}
else
if
(
al
->
kind_id
==
GCKind
::
UNTRACKED
)
{
stats
->
untracked
.
nallocs
++
;
stats
->
untracked
.
nbytes
+=
nbytes
;
}
else
{
RELEASE_ASSERT
(
0
,
"%d"
,
(
int
)
al
->
kind_id
);
}
}
void
Heap
::
dumpHeapStatistics
()
{
threading
::
GLPromoteRegion
_lock
;
HeapStatistics
stats
;
int
capacity
()
{
return
mmap_size
()
-
sizeof
(
LargeObj
);
}
small_arena
.
getStatistics
(
&
stats
);
large_arena
.
getStatistics
(
&
stats
);
huge_arena
.
getStatistics
(
&
stats
);
stat
ic
LargeObj
*
fromAllocation
(
GCAllocation
*
alloc
)
{
char
*
rtn
=
(
char
*
)
alloc
-
offsetof
(
LargeObj
,
data
);
assert
((
uintptr_t
)
rtn
%
PAGE_SIZE
==
0
);
return
reinterpret_cast
<
LargeObj
*>
(
rtn
);
stat
s
.
conservative
.
print
(
"conservative"
);
stats
.
untracked
.
print
(
"untracked"
);
for
(
const
auto
&
p
:
stats
.
by_cls
)
{
p
.
second
.
print
(
getFullNameOfClass
(
p
.
first
).
c_str
()
);
}
};
stats
.
total
.
print
(
"Total"
);
printf
(
"
\n
"
);
}
GCAllocation
*
Heap
::
allocLarge
(
size_t
size
)
{
registerGCManagedBytes
(
size
);
void
dumpHeapStatistics
()
{
global_heap
.
dumpHeapStatistics
();
}
LOCK_REGION
(
lock
);
//////
/// Small Arena
GCAllocation
*
SmallArena
::
alloc
(
size_t
bytes
)
{
registerGCManagedBytes
(
bytes
);
if
(
bytes
<=
16
)
return
_alloc
(
16
,
0
);
else
if
(
bytes
<=
32
)
return
_alloc
(
32
,
1
);
else
{
for
(
int
i
=
2
;
i
<
NUM_BUCKETS
;
i
++
)
{
if
(
sizes
[
i
]
>=
bytes
)
{
return
_alloc
(
sizes
[
i
],
i
);
}
}
return
NULL
;
}
}
size_t
total_size
=
size
+
sizeof
(
LargeObj
);
total_size
=
(
total_size
+
PAGE_SIZE
-
1
)
&
~
(
PAGE_SIZE
-
1
);
LargeObj
*
rtn
=
(
LargeObj
*
)
large_arena
.
doMmap
(
total_size
);
rtn
->
obj_size
=
size
;
GCAllocation
*
SmallArena
::
realloc
(
GCAllocation
*
al
,
size_t
bytes
)
{
Block
*
b
=
Block
::
forPointer
(
al
);
rtn
->
next
=
large_head
;
if
(
rtn
->
next
)
rtn
->
next
->
prev
=
&
rtn
->
next
;
rtn
->
prev
=
&
large_head
;
large_head
=
rtn
;
size_t
size
=
b
->
size
;
return
rtn
->
data
;
if
(
size
>=
bytes
&&
size
<
bytes
*
2
)
return
al
;
GCAllocation
*
rtn
=
heap
->
alloc
(
bytes
);
#ifndef NVALGRIND
VALGRIND_DISABLE_ERROR_REPORTING
;
memcpy
(
rtn
,
al
,
std
::
min
(
bytes
,
size
));
VALGRIND_ENABLE_ERROR_REPORTING
;
#else
memcpy
(
rtn
,
al
,
std
::
min
(
bytes
,
size
));
#endif
free
(
al
);
return
rtn
;
}
void
SmallArena
::
free
(
GCAllocation
*
alloc
)
{
Block
*
b
=
Block
::
forPointer
(
alloc
);
size_t
size
=
b
->
size
;
int
offset
=
(
char
*
)
alloc
-
(
char
*
)
b
;
assert
(
offset
%
size
==
0
);
int
atom_idx
=
offset
/
ATOM_SIZE
;
assert
(
!
b
->
isfree
.
isSet
(
atom_idx
));
b
->
isfree
.
set
(
atom_idx
);
#ifndef NVALGRIND
// VALGRIND_MEMPOOL_FREE(b, ptr);
#endif
}
GCAllocation
*
SmallArena
::
allocationFrom
(
void
*
ptr
)
{
Block
*
b
=
Block
::
forPointer
(
ptr
);
size_t
size
=
b
->
size
;
int
offset
=
(
char
*
)
ptr
-
(
char
*
)
b
;
int
obj_idx
=
offset
/
size
;
if
(
obj_idx
<
b
->
minObjIndex
()
||
obj_idx
>=
b
->
numObjects
())
return
NULL
;
int
atom_idx
=
obj_idx
*
b
->
atomsPerObj
();
if
(
b
->
isfree
.
isSet
(
atom_idx
))
return
NULL
;
return
reinterpret_cast
<
GCAllocation
*>
(
&
b
->
atoms
[
atom_idx
]);
}
void
SmallArena
::
freeUnmarked
()
{
thread_caches
.
forEachValue
([
this
](
ThreadBlockCache
*
cache
)
{
for
(
int
bidx
=
0
;
bidx
<
NUM_BUCKETS
;
bidx
++
)
{
Block
*
h
=
cache
->
cache_free_heads
[
bidx
];
// Try to limit the amount of unused memory a thread can hold onto;
// currently pretty dumb, just limit the number of blocks in the free-list
// to 50. (blocks in the full list don't need to be limited, since we're sure
// that the thread had just actively used those.)
// Eventually may want to come up with some scrounging system.
// TODO does this thread locality even help at all?
for
(
int
i
=
0
;
i
<
50
;
i
++
)
{
if
(
h
)
h
=
h
->
next
;
else
break
;
}
if
(
h
)
{
removeFromLLAndNull
(
h
);
insertIntoLL
(
&
heads
[
bidx
],
h
);
}
Block
**
chain_end
=
_freeChain
(
&
cache
->
cache_free_heads
[
bidx
]);
_freeChain
(
&
cache
->
cache_full_heads
[
bidx
]);
while
(
Block
*
b
=
cache
->
cache_full_heads
[
bidx
])
{
removeFromLLAndNull
(
b
);
insertIntoLL
(
chain_end
,
b
);
}
}
});
for
(
int
bidx
=
0
;
bidx
<
NUM_BUCKETS
;
bidx
++
)
{
Block
**
chain_end
=
_freeChain
(
&
heads
[
bidx
]);
_freeChain
(
&
full_heads
[
bidx
]);
while
(
Block
*
b
=
full_heads
[
bidx
])
{
removeFromLLAndNull
(
b
);
insertIntoLL
(
chain_end
,
b
);
}
}
}
static
Block
*
alloc_block
(
uint64_t
size
,
Block
**
prev
)
{
Block
*
rtn
=
(
Block
*
)
small_arena
.
doMmap
(
sizeof
(
Block
));
// TODO: copy-pasted from freeUnmarked()
void
SmallArena
::
getStatistics
(
HeapStatistics
*
stats
)
{
thread_caches
.
forEachValue
([
this
,
stats
](
ThreadBlockCache
*
cache
)
{
for
(
int
bidx
=
0
;
bidx
<
NUM_BUCKETS
;
bidx
++
)
{
Block
*
h
=
cache
->
cache_free_heads
[
bidx
];
_getChainStatistics
(
stats
,
&
cache
->
cache_free_heads
[
bidx
]);
_getChainStatistics
(
stats
,
&
cache
->
cache_full_heads
[
bidx
]);
}
});
for
(
int
bidx
=
0
;
bidx
<
NUM_BUCKETS
;
bidx
++
)
{
_getChainStatistics
(
stats
,
&
heads
[
bidx
]);
_getChainStatistics
(
stats
,
&
full_heads
[
bidx
]);
}
}
SmallArena
::
Block
**
SmallArena
::
_freeChain
(
Block
**
head
)
{
while
(
Block
*
b
=
*
head
)
{
int
num_objects
=
b
->
numObjects
();
int
first_obj
=
b
->
minObjIndex
();
int
atoms_per_obj
=
b
->
atomsPerObj
();
for
(
int
obj_idx
=
first_obj
;
obj_idx
<
num_objects
;
obj_idx
++
)
{
int
atom_idx
=
obj_idx
*
atoms_per_obj
;
if
(
b
->
isfree
.
isSet
(
atom_idx
))
continue
;
void
*
p
=
&
b
->
atoms
[
atom_idx
];
GCAllocation
*
al
=
reinterpret_cast
<
GCAllocation
*>
(
p
);
if
(
isMarked
(
al
))
{
clearMark
(
al
);
}
else
{
_doFree
(
al
);
// assert(p != (void*)0x127000d960); // the main module
b
->
isfree
.
set
(
atom_idx
);
}
}
head
=
&
b
->
next
;
}
return
head
;
}
SmallArena
::
Block
*
SmallArena
::
_allocBlock
(
uint64_t
size
,
Block
**
prev
)
{
Block
*
rtn
=
(
Block
*
)
doMmap
(
sizeof
(
Block
));
assert
(
rtn
);
rtn
->
size
=
size
;
rtn
->
num_obj
=
BLOCK_SIZE
/
size
;
...
...
@@ -165,45 +420,23 @@ static Block* alloc_block(uint64_t size, Block** prev) {
return
rtn
;
}
static
void
insertIntoLL
(
Block
**
next_pointer
,
Block
*
next
)
{
assert
(
next_pointer
);
assert
(
next
);
assert
(
!
next
->
next
);
assert
(
!
next
->
prev
);
next
->
next
=
*
next_pointer
;
if
(
next
->
next
)
next
->
next
->
prev
=
&
next
->
next
;
*
next_pointer
=
next
;
next
->
prev
=
next_pointer
;
}
static
void
removeFromLL
(
Block
*
b
)
{
if
(
b
->
next
)
b
->
next
->
prev
=
b
->
prev
;
*
b
->
prev
=
b
->
next
;
b
->
next
=
NULL
;
b
->
prev
=
NULL
;
}
Heap
::
ThreadBlockCache
::~
ThreadBlockCache
()
{
SmallArena
::
ThreadBlockCache
::~
ThreadBlockCache
()
{
LOCK_REGION
(
heap
->
lock
);
for
(
int
i
=
0
;
i
<
NUM_BUCKETS
;
i
++
)
{
while
(
Block
*
b
=
cache_free_heads
[
i
])
{
removeFromLL
(
b
);
insertIntoLL
(
&
heap
->
heads
[
i
],
b
);
removeFromLL
AndNull
(
b
);
insertIntoLL
(
&
small
->
heads
[
i
],
b
);
}
while
(
Block
*
b
=
cache_full_heads
[
i
])
{
removeFromLL
(
b
);
insertIntoLL
(
&
heap
->
full_heads
[
i
],
b
);
removeFromLL
AndNull
(
b
);
insertIntoLL
(
&
small
->
full_heads
[
i
],
b
);
}
}
}
static
GCAllocation
*
allocFromBlock
(
Block
*
b
)
{
GCAllocation
*
SmallArena
::
_
allocFromBlock
(
Block
*
b
)
{
int
idx
=
b
->
isfree
.
scanForNext
(
b
->
next_to_check
);
if
(
idx
==
-
1
)
return
NULL
;
...
...
@@ -212,19 +445,17 @@ static GCAllocation* allocFromBlock(Block* b) {
return
reinterpret_cast
<
GCAllocation
*>
(
rtn
);
}
static
Block
*
claimBlock
(
size_t
rounded_size
,
Block
**
free_head
)
{
SmallArena
::
Block
*
SmallArena
::
_
claimBlock
(
size_t
rounded_size
,
Block
**
free_head
)
{
Block
*
free_block
=
*
free_head
;
if
(
free_block
)
{
removeFromLL
(
free_block
);
removeFromLL
AndNull
(
free_block
);
return
free_block
;
}
return
alloc_b
lock
(
rounded_size
,
NULL
);
return
_allocB
lock
(
rounded_size
,
NULL
);
}
GCAllocation
*
Heap
::
allocSmall
(
size_t
rounded_size
,
int
bucket_idx
)
{
registerGCManagedBytes
(
rounded_size
);
GCAllocation
*
SmallArena
::
_alloc
(
size_t
rounded_size
,
int
bucket_idx
)
{
Block
**
free_head
=
&
heads
[
bucket_idx
];
Block
**
full_head
=
&
full_heads
[
bucket_idx
];
...
...
@@ -241,11 +472,11 @@ GCAllocation* Heap::allocSmall(size_t rounded_size, int bucket_idx) {
while
(
true
)
{
while
(
Block
*
cache_block
=
*
cache_head
)
{
GCAllocation
*
rtn
=
allocFromBlock
(
cache_block
);
GCAllocation
*
rtn
=
_
allocFromBlock
(
cache_block
);
if
(
rtn
)
return
rtn
;
removeFromLL
(
cache_block
);
removeFromLL
AndNull
(
cache_block
);
insertIntoLL
(
&
cache
->
cache_full_heads
[
bucket_idx
],
cache_block
);
}
...
...
@@ -253,12 +484,12 @@ GCAllocation* Heap::allocSmall(size_t rounded_size, int bucket_idx) {
// static StatCounter sc_fallback("gc_allocs_cachemiss");
// sc_fallback.log();
LOCK_REGION
(
lock
);
LOCK_REGION
(
heap
->
lock
);
assert
(
*
cache_head
==
NULL
);
// should probably be called allocBlock:
Block
*
myblock
=
claimBlock
(
rounded_size
,
&
heads
[
bucket_idx
]);
Block
*
myblock
=
_
claimBlock
(
rounded_size
,
&
heads
[
bucket_idx
]);
assert
(
myblock
);
assert
(
!
myblock
->
next
);
assert
(
!
myblock
->
prev
);
...
...
@@ -269,322 +500,290 @@ GCAllocation* Heap::allocSmall(size_t rounded_size, int bucket_idx) {
}
}
void
_freeFrom
(
GCAllocation
*
alloc
,
Block
*
b
)
{
assert
(
b
==
Block
::
forPointer
(
alloc
));
size_t
size
=
b
->
size
;
int
offset
=
(
char
*
)
alloc
-
(
char
*
)
b
;
assert
(
offset
%
size
==
0
);
int
atom_idx
=
offset
/
ATOM_SIZE
;
assert
(
!
b
->
isfree
.
isSet
(
atom_idx
));
b
->
isfree
.
toggle
(
atom_idx
);
#ifndef NVALGRIND
// VALGRIND_MEMPOOL_FREE(b, ptr);
#endif
}
// TODO: copy-pasted from _freeChain
void
SmallArena
::
_getChainStatistics
(
HeapStatistics
*
stats
,
Block
**
head
)
{
while
(
Block
*
b
=
*
head
)
{
int
num_objects
=
b
->
numObjects
();
int
first_obj
=
b
->
minObjIndex
();
int
atoms_per_obj
=
b
->
atomsPerObj
();
static
void
_freeLargeObj
(
LargeObj
*
lobj
)
{
*
lobj
->
prev
=
lobj
->
next
;
if
(
lobj
->
next
)
lobj
->
next
->
prev
=
lobj
->
prev
;
for
(
int
obj_idx
=
first_obj
;
obj_idx
<
num_objects
;
obj_idx
++
)
{
int
atom_idx
=
obj_idx
*
atoms_per_obj
;
int
r
=
munmap
(
lobj
,
lobj
->
mmap_size
());
assert
(
r
==
0
);
}
if
(
b
->
isfree
.
isSet
(
atom_idx
))
continue
;
static
void
_doFree
(
GCAllocation
*
al
)
{
if
(
VERBOSITY
()
>=
2
)
printf
(
"Freeing %p
\n
"
,
al
->
user_data
);
void
*
p
=
&
b
->
atoms
[
atom_idx
];
GCAllocation
*
al
=
reinterpret_cast
<
GCAllocation
*>
(
p
);
if
(
al
->
kind_id
==
GCKind
::
PYTHON
)
{
Box
*
b
=
(
Box
*
)
al
->
user_data
;
addStatistic
(
stats
,
al
,
b
->
size
);
}
ASSERT
(
b
->
cls
->
tp_dealloc
==
NULL
,
"%s"
,
getTypeName
(
b
));
if
(
b
->
cls
->
simple_destructor
)
b
->
cls
->
simple_destructor
(
b
);
head
=
&
b
->
next
;
}
}
void
Heap
::
free
(
GCAllocation
*
al
)
{
_doFree
(
al
);
//////
/// Large Arena
if
(
large_arena
.
contains
(
al
))
{
LargeObj
*
lobj
=
LargeObj
::
fromAllocation
(
al
);
_freeLargeObj
(
lobj
);
return
;
}
#define LARGE_BLOCK_NUM_CHUNKS ((BLOCK_SIZE >> CHUNK_BITS) - 1)
assert
(
small_arena
.
contains
(
al
));
Block
*
b
=
Block
::
forPointer
(
al
);
_freeFrom
(
al
,
b
);
}
#define LARGE_BLOCK_FOR_OBJ(obj) ((LargeBlock*)((int64_t)(obj) & ~(int64_t)(BLOCK_SIZE - 1)))
#define LARGE_CHUNK_INDEX(obj, section) (((char*)(obj) - (char*)(section)) >> CHUNK_BITS)
GCAllocation
*
Heap
::
realloc
(
GCAllocation
*
al
,
size_t
bytes
)
{
if
(
large_arena
.
contains
(
al
))
{
LargeObj
*
lobj
=
LargeObj
::
fromAllocation
(
al
);
GCAllocation
*
LargeArena
::
alloc
(
size_t
size
)
{
registerGCManagedBytes
(
size
);
int
capacity
=
lobj
->
capacity
();
if
(
capacity
>=
bytes
&&
capacity
<
bytes
*
2
)
return
al
;
LOCK_REGION
(
heap
->
lock
);
GCAllocation
*
rtn
=
alloc
(
bytes
);
memcpy
(
rtn
,
al
,
std
::
min
(
bytes
,
lobj
->
obj_size
));
// printf ("allocLarge %zu\n", size);
_freeLargeObj
(
lobj
);
return
rtn
;
}
LargeObj
*
obj
=
_alloc
(
size
+
sizeof
(
GCAllocation
)
+
sizeof
(
LargeObj
));
assert
(
small_arena
.
contains
(
al
));
Block
*
b
=
Block
::
forPointer
(
al
);
obj
->
size
=
size
;
size_t
size
=
b
->
size
;
nullNextPrev
(
obj
);
insertIntoLL
(
&
head
,
obj
);
return
obj
->
data
;
}
GCAllocation
*
LargeArena
::
realloc
(
GCAllocation
*
al
,
size_t
bytes
)
{
LargeObj
*
obj
=
LargeObj
::
fromAllocation
(
al
);
int
size
=
obj
->
size
;
if
(
size
>=
bytes
&&
size
<
bytes
*
2
)
return
al
;
GCAllocation
*
rtn
=
alloc
(
bytes
);
#ifndef NVALGRIND
VALGRIND_DISABLE_ERROR_REPORTING
;
memcpy
(
rtn
,
al
,
std
::
min
(
bytes
,
size
));
VALGRIND_ENABLE_ERROR_REPORTING
;
#else
memcpy
(
rtn
,
al
,
std
::
min
(
bytes
,
size
));
#endif
GCAllocation
*
rtn
=
heap
->
alloc
(
bytes
);
memcpy
(
rtn
,
al
,
std
::
min
(
bytes
,
obj
->
size
));
_free
From
(
al
,
b
);
_free
LargeObj
(
obj
);
return
rtn
;
}
GCAllocation
*
Heap
::
getAllocationFromInteriorPointer
(
void
*
ptr
)
{
if
(
large_arena
.
contains
(
ptr
))
{
LargeObj
*
cur
=
large_head
;
while
(
cur
)
{
if
(
ptr
>=
cur
&&
ptr
<
&
cur
->
data
[
cur
->
obj_size
])
return
&
cur
->
data
[
0
];
cur
=
cur
->
next
;
void
LargeArena
::
free
(
GCAllocation
*
al
)
{
_freeLargeObj
(
LargeObj
::
fromAllocation
(
al
));
}
GCAllocation
*
LargeArena
::
allocationFrom
(
void
*
ptr
)
{
LargeObj
*
obj
=
NULL
;
for
(
obj
=
head
;
obj
;
obj
=
obj
->
next
)
{
char
*
end
=
(
char
*
)
&
obj
->
data
+
obj
->
size
;
if
(
ptr
>=
obj
->
data
&&
ptr
<
end
)
{
return
&
obj
->
data
[
0
];
}
return
NULL
;
}
return
NULL
;
}
if
(
!
small_arena
.
contains
(
ptr
))
return
NULL
;
void
LargeArena
::
freeUnmarked
()
{
sweepList
(
head
,
[
this
](
LargeObj
*
ptr
)
{
_freeLargeObj
(
ptr
);
});
}
Block
*
b
=
Block
::
forPointer
(
ptr
);
size_t
size
=
b
->
size
;
int
offset
=
(
char
*
)
ptr
-
(
char
*
)
b
;
int
obj_idx
=
offset
/
size
;
void
LargeArena
::
getStatistics
(
HeapStatistics
*
stats
)
{
forEach
(
head
,
[
stats
](
LargeObj
*
obj
)
{
addStatistic
(
stats
,
obj
->
data
,
obj
->
size
);
});
}
if
(
obj_idx
<
b
->
minObjIndex
()
||
obj_idx
>=
b
->
numObjects
())
return
NULL
;
int
atom_idx
=
obj_idx
*
b
->
atomsPerObj
();
void
LargeArena
::
add_free_chunk
(
LargeFreeChunk
*
free_chunks
,
size_t
size
)
{
size_t
num_chunks
=
size
>>
CHUNK_BITS
;
if
(
b
->
isfree
.
isSet
(
atom_idx
))
return
NULL
;
free_chunks
->
size
=
size
;
return
reinterpret_cast
<
GCAllocation
*>
(
&
b
->
atoms
[
atom_idx
]);
if
(
num_chunks
>=
NUM_FREE_LISTS
)
num_chunks
=
0
;
free_chunks
->
next_size
=
free_lists
[
num_chunks
];
free_lists
[
num_chunks
]
=
free_chunks
;
}
static
Block
**
freeChain
(
Block
**
head
)
{
while
(
Block
*
b
=
*
head
)
{
int
num_objects
=
b
->
numObjects
();
int
first_obj
=
b
->
minObjIndex
();
int
atoms_per_obj
=
b
->
atomsPerObj
();
LargeArena
::
LargeFreeChunk
*
LargeArena
::
get_from_size_list
(
LargeFreeChunk
**
list
,
size_t
size
)
{
LargeFreeChunk
*
free_chunks
=
NULL
;
LargeBlock
*
section
;
size_t
i
,
num_chunks
,
start_index
;
for
(
int
obj_idx
=
first_obj
;
obj_idx
<
num_objects
;
obj_idx
++
)
{
int
atom_idx
=
obj_idx
*
atoms_per_obj
;
assert
((
size
&
(
CHUNK_SIZE
-
1
))
==
0
);
if
(
b
->
isfree
.
isSet
(
atom_idx
))
continue
;
while
(
*
list
)
{
free_chunks
=
*
list
;
if
(
free_chunks
->
size
>=
size
)
break
;
list
=
&
(
*
list
)
->
next_size
;
}
void
*
p
=
&
b
->
atoms
[
atom_idx
];
GCAllocation
*
al
=
reinterpret_cast
<
GCAllocation
*>
(
p
)
;
if
(
!*
list
)
return
NULL
;
if
(
isMarked
(
al
))
{
clearMark
(
al
);
}
else
{
_doFree
(
al
);
*
list
=
free_chunks
->
next_size
;
// assert(p != (void*)0x127000d960); // the main module
b
->
isfree
.
set
(
atom_idx
);
}
}
if
(
free_chunks
->
size
>
size
)
add_free_chunk
((
LargeFreeChunk
*
)((
char
*
)
free_chunks
+
size
),
free_chunks
->
size
-
size
);
head
=
&
b
->
next
;
num_chunks
=
size
>>
CHUNK_BITS
;
section
=
LARGE_BLOCK_FOR_OBJ
(
free_chunks
);
start_index
=
LARGE_CHUNK_INDEX
(
free_chunks
,
section
);
for
(
i
=
start_index
;
i
<
start_index
+
num_chunks
;
++
i
)
{
assert
(
section
->
free_chunk_map
[
i
]);
section
->
free_chunk_map
[
i
]
=
0
;
}
return
head
;
section
->
num_free_chunks
-=
size
>>
CHUNK_BITS
;
assert
(
section
->
num_free_chunks
>=
0
);
return
free_chunks
;
}
void
Heap
::
freeUnmarked
()
{
thread_caches
.
forEachValue
([
this
](
ThreadBlockCache
*
cache
)
{
for
(
int
bidx
=
0
;
bidx
<
NUM_BUCKETS
;
bidx
++
)
{
Block
*
h
=
cache
->
cache_free_heads
[
bidx
];
// Try to limit the amount of unused memory a thread can hold onto;
// currently pretty dumb, just limit the number of blocks in the free-list
// to 50. (blocks in the full list don't need to be limited, since we're sure
// that the thread had just actively used those.)
// Eventually may want to come up with some scrounging system.
// TODO does this thread locality even help at all?
for
(
int
i
=
0
;
i
<
50
;
i
++
)
{
if
(
h
)
h
=
h
->
next
;
else
break
;
}
if
(
h
)
{
removeFromLL
(
h
);
insertIntoLL
(
&
heads
[
bidx
],
h
);
}
LargeArena
::
LargeObj
*
LargeArena
::
_alloc
(
size_t
size
)
{
LargeBlock
*
section
;
LargeFreeChunk
*
free_chunks
;
size_t
num_chunks
;
Block
**
chain_end
=
freeChain
(
&
cache
->
cache_free_heads
[
bidx
])
;
freeChain
(
&
cache
->
cache_full_heads
[
bidx
]
);
size
+=
CHUNK_SIZE
-
1
;
size
&=
~
(
CHUNK_SIZE
-
1
);
while
(
Block
*
b
=
cache
->
cache_full_heads
[
bidx
])
{
removeFromLL
(
b
);
insertIntoLL
(
chain_end
,
b
);
}
}
});
num_chunks
=
size
>>
CHUNK_BITS
;
for
(
int
bidx
=
0
;
bidx
<
NUM_BUCKETS
;
bidx
++
)
{
Block
**
chain_end
=
freeChain
(
&
heads
[
bidx
]);
freeChain
(
&
full_heads
[
bidx
]);
assert
(
size
>
0
&&
size
-
sizeof
(
LargeObj
)
<=
ALLOC_SIZE_LIMIT
);
assert
(
num_chunks
>
0
);
while
(
Block
*
b
=
full_heads
[
bidx
])
{
removeFromLL
(
b
);
insertIntoLL
(
chain_end
,
b
);
retry:
if
(
num_chunks
>=
NUM_FREE_LISTS
)
{
free_chunks
=
get_from_size_list
(
&
free_lists
[
0
],
size
);
}
else
{
size_t
i
;
for
(
i
=
num_chunks
;
i
<
NUM_FREE_LISTS
;
++
i
)
{
free_chunks
=
get_from_size_list
(
&
free_lists
[
i
],
size
);
if
(
free_chunks
)
break
;
}
if
(
!
free_chunks
)
free_chunks
=
get_from_size_list
(
&
free_lists
[
0
],
size
);
}
LargeObj
*
cur
=
large_head
;
while
(
cur
)
{
GCAllocation
*
al
=
cur
->
data
;
if
(
isMarked
(
al
))
{
clearMark
(
al
);
}
else
{
_doFree
(
al
);
if
(
free_chunks
)
return
(
LargeObj
*
)
free_chunks
;
*
cur
->
prev
=
cur
->
next
;
if
(
cur
->
next
)
cur
->
next
->
prev
=
cur
->
prev
;
section
=
(
LargeBlock
*
)
doMmap
(
BLOCK_SIZE
);
LargeObj
*
to_free
=
cur
;
cur
=
cur
->
next
;
_freeLargeObj
(
to_free
);
continue
;
}
if
(
!
section
)
return
NULL
;
cur
=
cur
->
next
;
}
}
free_chunks
=
(
LargeFreeChunk
*
)((
char
*
)
section
+
CHUNK_SIZE
);
free_chunks
->
size
=
BLOCK_SIZE
-
CHUNK_SIZE
;
free_chunks
->
next_size
=
free_lists
[
0
];
free_lists
[
0
]
=
free_chunks
;
void
dumpHeapStatistics
()
{
global_heap
.
dumpHeapStatistics
();
section
->
num_free_chunks
=
LARGE_BLOCK_NUM_CHUNKS
;
section
->
free_chunk_map
=
(
unsigned
char
*
)
section
+
sizeof
(
LargeBlock
);
assert
(
sizeof
(
LargeBlock
)
+
LARGE_BLOCK_NUM_CHUNKS
+
1
<=
CHUNK_SIZE
);
section
->
free_chunk_map
[
0
]
=
0
;
memset
(
section
->
free_chunk_map
+
1
,
1
,
LARGE_BLOCK_NUM_CHUNKS
);
section
->
next
=
blocks
;
blocks
=
section
;
goto
retry
;
}
struct
HeapStatistics
{
struct
TypeStats
{
int64_t
nallocs
;
int64_t
nbytes
;
TypeStats
()
:
nallocs
(
0
),
nbytes
(
0
)
{}
void
LargeArena
::
_freeLargeObj
(
LargeObj
*
obj
)
{
removeFromLL
(
obj
);
void
print
(
const
char
*
name
)
const
{
if
(
nbytes
>
(
1
<<
20
))
printf
(
"%s: %ld allocations for %.1f MB
\n
"
,
name
,
nallocs
,
nbytes
*
1.0
/
(
1
<<
20
));
else
if
(
nbytes
>
(
1
<<
10
))
printf
(
"%s: %ld allocations for %.1f KB
\n
"
,
name
,
nallocs
,
nbytes
*
1.0
/
(
1
<<
10
));
else
printf
(
"%s: %ld allocations for %ld bytes
\n
"
,
name
,
nallocs
,
nbytes
);
}
};
std
::
unordered_map
<
BoxedClass
*
,
TypeStats
>
by_cls
;
TypeStats
conservative
,
untracked
;
TypeStats
total
;
};
size_t
size
=
obj
->
size
;
LargeBlock
*
section
=
LARGE_BLOCK_FOR_OBJ
(
obj
);
size_t
num_chunks
,
i
,
start_index
;
void
addStatistic
(
HeapStatistics
*
stats
,
GCAllocation
*
al
,
int
nbytes
)
{
stats
->
total
.
nallocs
++
;
stats
->
total
.
nbytes
+=
nbytes
;
size
+=
CHUNK_SIZE
-
1
;
size
&=
~
(
CHUNK_SIZE
-
1
);
if
(
al
->
kind_id
==
GCKind
::
PYTHON
)
{
Box
*
b
=
(
Box
*
)
al
->
user_data
;
auto
&
t
=
stats
->
by_cls
[
b
->
cls
];
num_chunks
=
size
>>
CHUNK_BITS
;
t
.
nallocs
++
;
t
.
nbytes
+=
nbytes
;
}
else
if
(
al
->
kind_id
==
GCKind
::
CONSERVATIVE
)
{
stats
->
conservative
.
nallocs
++
;
stats
->
conservative
.
nbytes
+=
nbytes
;
}
else
if
(
al
->
kind_id
==
GCKind
::
UNTRACKED
)
{
stats
->
untracked
.
nallocs
++
;
stats
->
untracked
.
nbytes
+=
nbytes
;
}
else
{
RELEASE_ASSERT
(
0
,
"%d"
,
(
int
)
al
->
kind_id
);
assert
(
size
>
0
&&
size
-
sizeof
(
LargeObj
)
<=
ALLOC_SIZE_LIMIT
);
assert
(
num_chunks
>
0
);
section
->
num_free_chunks
+=
num_chunks
;
assert
(
section
->
num_free_chunks
<=
LARGE_BLOCK_NUM_CHUNKS
);
/*
* We could free the LOS section here if it's empty, but we
* can't unless we also remove its free chunks from the fast
* free lists. Instead, we do it in los_sweep().
*/
start_index
=
LARGE_CHUNK_INDEX
(
obj
,
section
);
for
(
i
=
start_index
;
i
<
start_index
+
num_chunks
;
++
i
)
{
assert
(
!
section
->
free_chunk_map
[
i
]);
section
->
free_chunk_map
[
i
]
=
1
;
}
add_free_chunk
((
LargeFreeChunk
*
)
obj
,
size
);
}
// TODO: copy-pasted from freeChain
void
getChainStatistics
(
HeapStatistics
*
stats
,
Block
**
head
)
{
while
(
Block
*
b
=
*
head
)
{
int
num_objects
=
b
->
numObjects
();
int
first_obj
=
b
->
minObjIndex
();
int
atoms_per_obj
=
b
->
atomsPerObj
();
//////
/// Huge Arena
for
(
int
obj_idx
=
first_obj
;
obj_idx
<
num_objects
;
obj_idx
++
)
{
int
atom_idx
=
obj_idx
*
atoms_per_obj
;
if
(
b
->
isfree
.
isSet
(
atom_idx
))
continue
;
GCAllocation
*
HugeArena
::
alloc
(
size_t
size
)
{
registerGCManagedBytes
(
size
)
;
void
*
p
=
&
b
->
atoms
[
atom_idx
];
GCAllocation
*
al
=
reinterpret_cast
<
GCAllocation
*>
(
p
);
LOCK_REGION
(
heap
->
lock
);
addStatistic
(
stats
,
al
,
b
->
size
);
}
size_t
total_size
=
size
+
sizeof
(
HugeObj
);
total_size
=
(
total_size
+
PAGE_SIZE
-
1
)
&
~
(
PAGE_SIZE
-
1
);
HugeObj
*
rtn
=
(
HugeObj
*
)
doMmap
(
total_size
);
rtn
->
obj_size
=
size
;
head
=
&
b
->
next
;
}
nullNextPrev
(
rtn
);
insertIntoLL
(
&
head
,
rtn
);
return
rtn
->
data
;
}
// TODO: copy-pasted from freeUnmarked()
void
Heap
::
dumpHeapStatistics
()
{
threading
::
GLPromoteRegion
_lock
;
GCAllocation
*
HugeArena
::
realloc
(
GCAllocation
*
al
,
size_t
bytes
)
{
HugeObj
*
obj
=
HugeObj
::
fromAllocation
(
al
);
HeapStatistics
stats
;
int
capacity
=
obj
->
capacity
();
if
(
capacity
>=
bytes
&&
capacity
<
bytes
*
2
)
return
al
;
thread_caches
.
forEachValue
([
this
,
&
stats
](
ThreadBlockCache
*
cache
)
{
for
(
int
bidx
=
0
;
bidx
<
NUM_BUCKETS
;
bidx
++
)
{
Block
*
h
=
cache
->
cache_free_heads
[
bidx
];
GCAllocation
*
rtn
=
heap
->
alloc
(
bytes
);
memcpy
(
rtn
,
al
,
std
::
min
(
bytes
,
obj
->
obj_size
));
getChainStatistics
(
&
stats
,
&
cache
->
cache_free_heads
[
bidx
]);
getChainStatistics
(
&
stats
,
&
cache
->
cache_full_heads
[
bidx
]);
}
});
_freeHugeObj
(
obj
);
return
rtn
;
}
for
(
int
bidx
=
0
;
bidx
<
NUM_BUCKETS
;
bidx
++
)
{
getChainStatistics
(
&
stats
,
&
heads
[
bidx
]);
getChainStatistics
(
&
stats
,
&
full_heads
[
bidx
]);
}
void
HugeArena
::
free
(
GCAllocation
*
al
)
{
_freeHugeObj
(
HugeObj
::
fromAllocation
(
al
));
}
LargeObj
*
cur
=
large_head
;
GCAllocation
*
HugeArena
::
allocationFrom
(
void
*
ptr
)
{
HugeObj
*
cur
=
head
;
while
(
cur
)
{
GCAllocation
*
al
=
cur
->
data
;
addStatistic
(
&
stats
,
al
,
cur
->
capacity
());
if
(
ptr
>=
cur
&&
ptr
<
&
cur
->
data
[
cur
->
obj_size
])
return
&
cur
->
data
[
0
];
cur
=
cur
->
next
;
}
return
NULL
;
}
stats
.
conservative
.
print
(
"conservative"
);
stats
.
untracked
.
print
(
"untracked"
);
for
(
const
auto
&
p
:
stats
.
by_cls
)
{
p
.
second
.
print
(
getFullNameOfClass
(
p
.
first
).
c_str
());
}
stats
.
total
.
print
(
"Total"
);
printf
(
"
\n
"
);
void
HugeArena
::
freeUnmarked
()
{
sweepList
(
head
,
[
this
](
HugeObj
*
ptr
)
{
_freeHugeObj
(
ptr
);
});
}
void
HugeArena
::
getStatistics
(
HeapStatistics
*
stats
)
{
forEach
(
head
,
[
stats
](
HugeObj
*
obj
)
{
addStatistic
(
stats
,
obj
->
data
,
obj
->
capacity
());
});
}
void
HugeArena
::
_freeHugeObj
(
HugeObj
*
lobj
)
{
removeFromLL
(
lobj
);
int
r
=
munmap
(
lobj
,
lobj
->
mmap_size
());
assert
(
r
==
0
);
}
}
// namespace gc
}
// namespace pyston
src/gc/heap.h
View file @
012d6d50
...
...
@@ -17,6 +17,7 @@
#include <cstddef>
#include <cstdint>
#include <sys/mman.h>
#include "core/common.h"
#include "core/threading.h"
...
...
@@ -24,6 +25,9 @@
namespace
pyston
{
namespace
gc
{
class
Heap
;
struct
HeapStatistics
;
typedef
uint8_t
kindid_t
;
struct
GCAllocation
{
unsigned
int
gc_flags
:
8
;
...
...
@@ -59,173 +63,394 @@ inline void clearMark(GCAllocation* header) {
#undef MARK_BIT
#define PAGE_SIZE 4096
template
<
int
N
>
class
Bitmap
{
static_assert
(
N
%
64
==
0
,
""
);
template
<
uintptr_t
arena_start
,
uintptr_t
arena_size
>
class
Arena
{
private:
uint64_t
data
[
N
/
64
];
void
*
cur
;
void
*
end
;
protected:
Arena
()
:
cur
((
void
*
)
arena_start
),
end
((
void
*
)(
arena_start
+
arena_size
))
{}
public:
void
*
doMmap
(
size_t
size
)
{
assert
(
size
%
PAGE_SIZE
==
0
);
assert
(((
uint8_t
*
)
cur
+
size
)
<
end
&&
"arena full"
);
void
*
mrtn
=
mmap
(
cur
,
size
,
PROT_READ
|
PROT_WRITE
,
MAP_FIXED
|
MAP_PRIVATE
|
MAP_ANONYMOUS
,
-
1
,
0
);
assert
((
uintptr_t
)
mrtn
!=
-
1
&&
"failed to allocate memory from OS"
);
ASSERT
(
mrtn
==
cur
,
"%p %p
\n
"
,
mrtn
,
cur
);
cur
=
(
uint8_t
*
)
cur
+
size
;
return
mrtn
;
}
bool
contains
(
void
*
addr
)
{
return
(
void
*
)
arena_start
<=
addr
&&
addr
<
cur
;
}
};
constexpr
uintptr_t
ARENA_SIZE
=
0x1000000000L
;
constexpr
uintptr_t
SMALL_ARENA_START
=
0x1270000000L
;
constexpr
uintptr_t
LARGE_ARENA_START
=
0x2270000000L
;
constexpr
uintptr_t
HUGE_ARENA_START
=
0x3270000000L
;
//
// The SmallArena allocates objects <= 3584 bytes.
//
// it uses segregated-fit allocation, and each block contains a free
// bitmap for objects of a given size (constant for the block)
//
static
const
size_t
sizes
[]
=
{
16
,
32
,
48
,
64
,
80
,
96
,
112
,
128
,
160
,
192
,
224
,
256
,
320
,
384
,
448
,
512
,
640
,
768
,
896
,
1024
,
1280
,
1536
,
1792
,
2048
,
2560
,
3072
,
3584
,
// 4096,
};
static
constexpr
size_t
NUM_BUCKETS
=
sizeof
(
sizes
)
/
sizeof
(
sizes
[
0
]);
class
SmallArena
:
public
Arena
<
SMALL_ARENA_START
,
ARENA_SIZE
>
{
public:
void
setAllZero
()
{
memset
(
data
,
0
,
sizeof
(
data
));
}
SmallArena
(
Heap
*
heap
)
:
Arena
(),
heap
(
heap
),
thread_caches
(
heap
,
this
)
{}
GCAllocation
*
__attribute__
((
__malloc__
))
alloc
(
size_t
bytes
);
GCAllocation
*
realloc
(
GCAllocation
*
alloc
,
size_t
bytes
);
void
free
(
GCAllocation
*
al
);
GCAllocation
*
allocationFrom
(
void
*
ptr
);
void
freeUnmarked
();
void
getStatistics
(
HeapStatistics
*
stats
);
private:
template
<
int
N
>
class
Bitmap
{
static_assert
(
N
%
64
==
0
,
""
);
struct
Scanner
{
private:
int
next_to_check
;
friend
class
Bitmap
<
N
>
;
uint64_t
data
[
N
/
64
];
public:
void
reset
()
{
next_to_check
=
0
;
}
};
void
setAllZero
()
{
memset
(
data
,
0
,
sizeof
(
data
));
}
bool
isSet
(
int
idx
)
{
return
(
data
[
idx
/
64
]
>>
(
idx
%
64
))
&
1
;
}
struct
Scanner
{
private:
int
next_to_check
;
friend
class
Bitmap
<
N
>
;
void
set
(
int
idx
)
{
data
[
idx
/
64
]
|=
1UL
<<
(
idx
%
64
);
}
public:
void
reset
()
{
next_to_check
=
0
;
}
};
void
toggle
(
int
idx
)
{
data
[
idx
/
64
]
^=
1UL
<<
(
idx
%
64
)
;
}
bool
isSet
(
int
idx
)
{
return
(
data
[
idx
/
64
]
>>
(
idx
%
64
))
&
1
;
}
void
clear
(
int
idx
)
{
data
[
idx
/
64
]
&=
~
(
1UL
<<
(
idx
%
64
)
);
}
void
set
(
int
idx
)
{
data
[
idx
/
64
]
|=
1UL
<<
(
idx
%
64
);
}
int
scanForNext
(
Scanner
&
sc
)
{
uint64_t
mask
=
data
[
sc
.
next_to_check
];
void
toggle
(
int
idx
)
{
data
[
idx
/
64
]
^=
1UL
<<
(
idx
%
64
);
}
if
(
unlikely
(
mask
==
0L
))
{
while
(
true
)
{
sc
.
next_to_check
++
;
if
(
sc
.
next_to_check
==
N
/
64
)
{
sc
.
next_to_check
=
0
;
return
-
1
;
}
mask
=
data
[
sc
.
next_to_check
];
if
(
likely
(
mask
!=
0L
))
{
break
;
void
clear
(
int
idx
)
{
data
[
idx
/
64
]
&=
~
(
1UL
<<
(
idx
%
64
));
}
int
scanForNext
(
Scanner
&
sc
)
{
uint64_t
mask
=
data
[
sc
.
next_to_check
];
if
(
unlikely
(
mask
==
0L
))
{
while
(
true
)
{
sc
.
next_to_check
++
;
if
(
sc
.
next_to_check
==
N
/
64
)
{
sc
.
next_to_check
=
0
;
return
-
1
;
}
mask
=
data
[
sc
.
next_to_check
];
if
(
likely
(
mask
!=
0L
))
{
break
;
}
}
}
}
int
i
=
sc
.
next_to_check
;
int
i
=
sc
.
next_to_check
;
int
first
=
__builtin_ctzll
(
mask
);
assert
(
first
<
64
);
assert
(
data
[
i
]
&
(
1L
<<
first
));
data
[
i
]
^=
(
1L
<<
first
);
int
first
=
__builtin_ctzll
(
mask
);
assert
(
first
<
64
);
assert
(
data
[
i
]
&
(
1L
<<
first
));
data
[
i
]
^=
(
1L
<<
first
);
int
idx
=
first
+
i
*
64
;
return
idx
;
}
};
int
idx
=
first
+
i
*
64
;
return
idx
;
}
};
static
constexpr
size_t
BLOCK_SIZE
=
4
*
4096
;
#define BLOCK_SIZE (4 * 4096)
#define ATOM_SIZE 16
static_assert
(
BLOCK_SIZE
%
ATOM_SIZE
==
0
,
""
);
static_assert
(
BLOCK_SIZE
%
ATOM_SIZE
==
0
,
""
);
#define ATOMS_PER_BLOCK (BLOCK_SIZE / ATOM_SIZE)
static_assert
(
ATOMS_PER_BLOCK
%
64
==
0
,
""
);
static_assert
(
ATOMS_PER_BLOCK
%
64
==
0
,
""
);
#define BITFIELD_SIZE (ATOMS_PER_BLOCK / 8)
#define BITFIELD_ELTS (BITFIELD_SIZE / 8)
#define BLOCK_HEADER_SIZE (BITFIELD_SIZE + 4 * sizeof(void*))
#define BLOCK_HEADER_ATOMS ((BLOCK_HEADER_SIZE + ATOM_SIZE - 1) / ATOM_SIZE)
struct
Atoms
{
char
_data
[
ATOM_SIZE
];
};
struct
Block
{
union
{
struct
{
Block
*
next
,
**
prev
;
uint32_t
size
;
uint16_t
num_obj
;
uint8_t
min_obj_index
;
uint8_t
atoms_per_obj
;
Bitmap
<
ATOMS_PER_BLOCK
>
isfree
;
Bitmap
<
ATOMS_PER_BLOCK
>::
Scanner
next_to_check
;
void
*
_header_end
[
0
];
};
Atoms
atoms
[
ATOMS_PER_BLOCK
];
struct
Atoms
{
char
_data
[
ATOM_SIZE
];
};
inline
int
minObjIndex
()
const
{
return
min_obj_index
;
}
struct
Block
{
union
{
struct
{
Block
*
next
,
**
prev
;
uint32_t
size
;
uint16_t
num_obj
;
uint8_t
min_obj_index
;
uint8_t
atoms_per_obj
;
Bitmap
<
ATOMS_PER_BLOCK
>
isfree
;
Bitmap
<
ATOMS_PER_BLOCK
>::
Scanner
next_to_check
;
void
*
_header_end
[
0
];
};
Atoms
atoms
[
ATOMS_PER_BLOCK
];
};
inline
int
numObjects
()
const
{
return
num_obj
;
}
inline
int
minObjIndex
()
const
{
return
min_obj_index
;
}
inline
int
atomsPerObj
()
const
{
return
atoms_per
_obj
;
}
inline
int
numObjects
()
const
{
return
num
_obj
;
}
static
Block
*
forPointer
(
void
*
ptr
)
{
return
(
Block
*
)((
uintptr_t
)
ptr
&
~
(
BLOCK_SIZE
-
1
));
}
};
static_assert
(
sizeof
(
Block
)
==
BLOCK_SIZE
,
"bad size"
);
static_assert
(
offsetof
(
Block
,
_header_end
)
>=
BLOCK_HEADER_SIZE
,
"bad header size"
);
static_assert
(
offsetof
(
Block
,
_header_end
)
<=
BLOCK_HEADER_SIZE
,
"bad header size"
);
constexpr
const
size_t
sizes
[]
=
{
16
,
32
,
48
,
64
,
80
,
96
,
112
,
128
,
160
,
192
,
224
,
256
,
320
,
384
,
448
,
512
,
640
,
768
,
896
,
1024
,
1280
,
1536
,
1792
,
2048
,
// 2560, 3072, 3584, // 4096,
};
#define NUM_BUCKETS (sizeof(sizes) / sizeof(sizes[0]))
inline
int
atomsPerObj
()
const
{
return
atoms_per_obj
;
}
struct
LargeObj
;
class
Heap
{
private:
Block
*
heads
[
NUM_BUCKETS
];
Block
*
full_heads
[
NUM_BUCKETS
];
LargeObj
*
large_head
=
NULL
;
GCAllocation
*
__attribute__
((
__malloc__
))
allocSmall
(
size_t
rounded_size
,
int
bucket_idx
);
GCAllocation
*
__attribute__
((
__malloc__
))
allocLarge
(
size_t
bytes
);
static
Block
*
forPointer
(
void
*
ptr
)
{
return
(
Block
*
)((
uintptr_t
)
ptr
&
~
(
BLOCK_SIZE
-
1
));
}
};
static_assert
(
sizeof
(
Block
)
==
BLOCK_SIZE
,
"bad size"
);
static_assert
(
offsetof
(
Block
,
_header_end
)
>=
BLOCK_HEADER_SIZE
,
"bad header size"
);
static_assert
(
offsetof
(
Block
,
_header_end
)
<=
BLOCK_HEADER_SIZE
,
"bad header size"
);
// DS_DEFINE_MUTEX(lock);
DS_DEFINE_SPINLOCK
(
lock
);
struct
ThreadBlockCache
{
Heap
*
heap
;
SmallArena
*
small
;
Block
*
cache_free_heads
[
NUM_BUCKETS
];
Block
*
cache_full_heads
[
NUM_BUCKETS
];
ThreadBlockCache
(
Heap
*
heap
)
:
heap
(
heap
)
{
ThreadBlockCache
(
Heap
*
heap
,
SmallArena
*
small
)
:
heap
(
heap
),
small
(
small
)
{
memset
(
cache_free_heads
,
0
,
sizeof
(
cache_free_heads
));
memset
(
cache_full_heads
,
0
,
sizeof
(
cache_full_heads
));
}
~
ThreadBlockCache
();
};
Block
*
heads
[
NUM_BUCKETS
];
Block
*
full_heads
[
NUM_BUCKETS
];
friend
struct
ThreadBlockCache
;
Heap
*
heap
;
// TODO only use thread caches if we're in GRWL mode?
threading
::
PerThreadSet
<
ThreadBlockCache
,
Heap
*>
thread_caches
;
threading
::
PerThreadSet
<
ThreadBlockCache
,
Heap
*
,
SmallArena
*>
thread_caches
;
Block
*
_allocBlock
(
uint64_t
size
,
Block
**
prev
);
GCAllocation
*
_allocFromBlock
(
Block
*
b
);
Block
*
_claimBlock
(
size_t
rounded_size
,
Block
**
free_head
);
Block
**
_freeChain
(
Block
**
head
);
void
_getChainStatistics
(
HeapStatistics
*
stats
,
Block
**
head
);
GCAllocation
*
__attribute__
((
__malloc__
))
_alloc
(
size_t
bytes
,
int
bucket_idx
);
};
//
// The LargeArena allocates objects where 3584 < size <1024*1024-CHUNK_SIZE-sizeof(LargeObject) bytes.
//
// it maintains a set of size-segregated free lists, and a special
// free list for larger objects. If the free list specific to a given
// size has no entries, we search the large free list.
//
// Blocks of 1meg are mmap'ed individually, and carved up as needed.
//
class
LargeArena
:
public
Arena
<
LARGE_ARENA_START
,
ARENA_SIZE
>
{
private:
struct
LargeBlock
{
LargeBlock
*
next
;
size_t
num_free_chunks
;
unsigned
char
*
free_chunk_map
;
};
struct
LargeFreeChunk
{
LargeFreeChunk
*
next_size
;
size_t
size
;
};
struct
LargeObj
{
LargeObj
*
next
,
**
prev
;
size_t
size
;
GCAllocation
data
[
0
];
static
LargeObj
*
fromAllocation
(
GCAllocation
*
alloc
)
{
char
*
rtn
=
(
char
*
)
alloc
-
offsetof
(
LargeObj
,
data
);
return
reinterpret_cast
<
LargeObj
*>
(
rtn
);
}
};
/*
* This shouldn't be much smaller or larger than the largest small size bucket.
* Must be at least sizeof (LargeBlock).
*/
static
constexpr
size_t
CHUNK_SIZE
=
4096
;
static
constexpr
int
CHUNK_BITS
=
12
;
static_assert
(
CHUNK_SIZE
>
sizeof
(
LargeBlock
),
"bad large block size"
);
static
constexpr
int
BLOCK_SIZE
=
1024
*
1024
;
static
constexpr
int
NUM_FREE_LISTS
=
32
;
Heap
*
heap
;
LargeObj
*
head
;
LargeBlock
*
blocks
;
LargeFreeChunk
*
free_lists
[
NUM_FREE_LISTS
];
/* 0 is for larger sizes */
void
add_free_chunk
(
LargeFreeChunk
*
free_chunks
,
size_t
size
);
LargeFreeChunk
*
get_from_size_list
(
LargeFreeChunk
**
list
,
size_t
size
);
LargeObj
*
_alloc
(
size_t
size
);
void
_freeLargeObj
(
LargeObj
*
obj
);
public:
LargeArena
(
Heap
*
heap
)
:
heap
(
heap
),
head
(
NULL
),
blocks
(
NULL
)
{}
/* Largest object that can be allocated in a large block. */
static
constexpr
size_t
ALLOC_SIZE_LIMIT
=
BLOCK_SIZE
-
CHUNK_SIZE
-
sizeof
(
LargeObj
);
GCAllocation
*
__attribute__
((
__malloc__
))
alloc
(
size_t
bytes
);
GCAllocation
*
realloc
(
GCAllocation
*
alloc
,
size_t
bytes
);
void
free
(
GCAllocation
*
alloc
);
GCAllocation
*
allocationFrom
(
void
*
ptr
);
void
freeUnmarked
();
void
getStatistics
(
HeapStatistics
*
stats
);
};
// The HugeArena allocates objects where size > 1024*1024 bytes.
//
// Objects are allocated with individual mmap() calls, and kept in a
// linked list. They are not reused.
class
HugeArena
:
public
Arena
<
HUGE_ARENA_START
,
ARENA_SIZE
>
{
public:
H
eap
()
:
thread_caches
(
this
)
{}
H
ugeArena
(
Heap
*
heap
)
:
heap
(
heap
)
{}
GCAllocation
*
__attribute__
((
__malloc__
))
alloc
(
size_t
bytes
);
GCAllocation
*
realloc
(
GCAllocation
*
alloc
,
size_t
bytes
);
void
free
(
GCAllocation
*
alloc
);
GCAllocation
*
allocationFrom
(
void
*
ptr
);
void
freeUnmarked
();
void
getStatistics
(
HeapStatistics
*
stats
);
private:
struct
HugeObj
{
HugeObj
*
next
,
**
prev
;
size_t
obj_size
;
GCAllocation
data
[
0
];
int
mmap_size
()
{
size_t
total_size
=
obj_size
+
sizeof
(
HugeObj
);
total_size
=
(
total_size
+
PAGE_SIZE
-
1
)
&
~
(
PAGE_SIZE
-
1
);
return
total_size
;
}
int
capacity
()
{
return
mmap_size
()
-
sizeof
(
HugeObj
);
}
static
HugeObj
*
fromAllocation
(
GCAllocation
*
alloc
)
{
char
*
rtn
=
(
char
*
)
alloc
-
offsetof
(
HugeObj
,
data
);
assert
((
uintptr_t
)
rtn
%
PAGE_SIZE
==
0
);
return
reinterpret_cast
<
HugeObj
*>
(
rtn
);
}
};
void
_freeHugeObj
(
HugeObj
*
lobj
);
HugeObj
*
head
;
Heap
*
heap
;
};
class
Heap
{
private:
SmallArena
small_arena
;
LargeArena
large_arena
;
HugeArena
huge_arena
;
friend
class
SmallArena
;
friend
class
LargeArena
;
friend
class
HugeArena
;
// DS_DEFINE_MUTEX(lock);
DS_DEFINE_SPINLOCK
(
lock
);
public:
Heap
()
:
small_arena
(
this
),
large_arena
(
this
),
huge_arena
(
this
)
{}
GCAllocation
*
realloc
(
GCAllocation
*
alloc
,
size_t
bytes
)
{
// TODO(toshok): there is duplicate code in each of the
// ::realloc methods to test whether the allocation can be
// reused. Would be nice to factor it all out here into this
// method.
if
(
large_arena
.
contains
(
alloc
))
{
return
large_arena
.
realloc
(
alloc
,
bytes
);
}
else
if
(
huge_arena
.
contains
(
alloc
))
{
return
huge_arena
.
realloc
(
alloc
,
bytes
);
}
assert
(
small_arena
.
contains
(
alloc
));
return
small_arena
.
realloc
(
alloc
,
bytes
);
}
GCAllocation
*
__attribute__
((
__malloc__
))
alloc
(
size_t
bytes
)
{
GCAllocation
*
rtn
;
// assert(bytes >= 16);
if
(
bytes
<=
16
)
rtn
=
allocSmall
(
16
,
0
);
else
if
(
bytes
<=
32
)
rtn
=
allocSmall
(
32
,
1
);
if
(
bytes
>
LargeArena
::
ALLOC_SIZE_LIMIT
)
return
huge_arena
.
alloc
(
bytes
);
else
if
(
bytes
>
sizes
[
NUM_BUCKETS
-
1
])
rtn
=
allocLarge
(
bytes
);
else
{
rtn
=
NULL
;
for
(
int
i
=
2
;
i
<
NUM_BUCKETS
;
i
++
)
{
if
(
sizes
[
i
]
>=
bytes
)
{
rtn
=
allocSmall
(
sizes
[
i
],
i
);
break
;
}
}
return
large_arena
.
alloc
(
bytes
);
else
return
small_arena
.
alloc
(
bytes
);
}
void
destructContents
(
GCAllocation
*
alloc
);
void
free
(
GCAllocation
*
alloc
)
{
destructContents
(
alloc
);
if
(
large_arena
.
contains
(
alloc
))
{
large_arena
.
free
(
alloc
);
return
;
}
return
rtn
;
}
if
(
huge_arena
.
contains
(
alloc
))
{
huge_arena
.
free
(
alloc
);
return
;
}
void
free
(
GCAllocation
*
alloc
);
assert
(
small_arena
.
contains
(
alloc
));
small_arena
.
free
(
alloc
);
}
// not thread safe:
GCAllocation
*
getAllocationFromInteriorPointer
(
void
*
ptr
);
GCAllocation
*
getAllocationFromInteriorPointer
(
void
*
ptr
)
{
if
(
large_arena
.
contains
(
ptr
))
{
return
large_arena
.
allocationFrom
(
ptr
);
}
else
if
(
huge_arena
.
contains
(
ptr
))
{
return
huge_arena
.
allocationFrom
(
ptr
);
}
else
if
(
small_arena
.
contains
(
ptr
))
{
return
small_arena
.
allocationFrom
(
ptr
);
}
return
NULL
;
}
// not thread safe:
void
freeUnmarked
();
void
freeUnmarked
()
{
small_arena
.
freeUnmarked
();
large_arena
.
freeUnmarked
();
huge_arena
.
freeUnmarked
();
}
void
dumpHeapStatistics
();
};
...
...
test/unittests/gc.cpp
View file @
012d6d50
...
...
@@ -68,6 +68,9 @@ TEST(alloc, alloc64) { testAlloc(64); }
TEST
(
alloc
,
alloc128
)
{
testAlloc
(
128
);
}
TEST
(
alloc
,
alloc258
)
{
testAlloc
(
258
);
}
TEST
(
alloc
,
alloc3584
)
{
testAlloc
(
3584
);
}
TEST
(
alloc
,
alloc4096
)
{
testAlloc
(
4096
);
}
TEST
(
alloc
,
alloc8192
)
{
testAlloc
(
8192
);
}
TEST
(
alloc
,
alloc16384
)
{
testAlloc
(
16384
);
}
TEST
(
alloc
,
largeallocs
)
{
int
s1
=
1
<<
20
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment