Commit ff3fa1b3 authored by Dmitriy Vyukov's avatar Dmitriy Vyukov

runtime: make the GC bitmap a byte array

Half the code in the garbage collector accesses the bitmap
as an array of bytes instead of as an array of uintptrs.
This is tricky to do correctly in a portable fashion,
it breaks on big-endian systems.
Make the bitmap a byte array.
Simplifies markallocated, scanblock and span sweep along the way,
as we don't need to recalculate bitmap position for each word.

LGTM=khr
R=golang-codereviews, khr
CC=golang-codereviews, rlh, rsc
https://golang.org/cl/125250043
parent fb44fb6c
...@@ -619,6 +619,14 @@ TEXT runtime·atomicstore64(SB), NOSPLIT, $0-12 ...@@ -619,6 +619,14 @@ TEXT runtime·atomicstore64(SB), NOSPLIT, $0-12
XADDL AX, (SP) XADDL AX, (SP)
RET RET
// void runtime·atomicor8(byte volatile*, byte);
TEXT runtime·atomicor8(SB), NOSPLIT, $0-8
MOVL ptr+0(FP), AX
MOVB val+4(FP), BX
LOCK
ORB BX, (AX)
RET
// void jmpdefer(fn, sp); // void jmpdefer(fn, sp);
// called from deferreturn. // called from deferreturn.
// 1. pop the caller // 1. pop the caller
......
...@@ -702,6 +702,14 @@ TEXT runtime·atomicstore64(SB), NOSPLIT, $0-16 ...@@ -702,6 +702,14 @@ TEXT runtime·atomicstore64(SB), NOSPLIT, $0-16
XCHGQ AX, 0(BX) XCHGQ AX, 0(BX)
RET RET
// void runtime·atomicor8(byte volatile*, byte);
TEXT runtime·atomicor8(SB), NOSPLIT, $0-16
MOVQ ptr+0(FP), AX
MOVB val+8(FP), BX
LOCK
ORB BX, (AX)
RET
// void jmpdefer(fn, sp); // void jmpdefer(fn, sp);
// called from deferreturn. // called from deferreturn.
// 1. pop the caller // 1. pop the caller
......
...@@ -167,3 +167,19 @@ runtime·atomicstore64(uint64 volatile *addr, uint64 v) ...@@ -167,3 +167,19 @@ runtime·atomicstore64(uint64 volatile *addr, uint64 v)
*addr = v; *addr = v;
runtime·unlock(LOCK(addr)); runtime·unlock(LOCK(addr));
} }
#pragma textflag NOSPLIT
void
runtime·atomicor8(byte volatile *addr, byte v)
{
uint32 *addr32, old, word, shift;
// Align down to 4 bytes and use 32-bit CAS.
addr32 = (uint32*)((uintptr)addr & ~3);
word = ((uint32)v) << (((uintptr)addr & 3) * 8);
for(;;) {
old = *addr32;
if(runtime·cas(addr32, old, old|word))
break;
}
}
...@@ -820,7 +820,8 @@ dumpbvtypes(BitVector *bv, byte *base) ...@@ -820,7 +820,8 @@ dumpbvtypes(BitVector *bv, byte *base)
static BitVector static BitVector
makeheapobjbv(byte *p, uintptr size) makeheapobjbv(byte *p, uintptr size)
{ {
uintptr off, shift, *bitp, bits, nptr, i; uintptr off, nptr, i;
byte shift, *bitp, bits;
bool mw; bool mw;
// Extend the temp buffer if necessary. // Extend the temp buffer if necessary.
...@@ -838,13 +839,13 @@ makeheapobjbv(byte *p, uintptr size) ...@@ -838,13 +839,13 @@ makeheapobjbv(byte *p, uintptr size)
mw = false; mw = false;
for(i = 0; i < nptr; i++) { for(i = 0; i < nptr; i++) {
off = (uintptr*)(p + i*PtrSize) - (uintptr*)runtime·mheap.arena_start; off = (uintptr*)(p + i*PtrSize) - (uintptr*)runtime·mheap.arena_start;
bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; bitp = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1;
shift = (off % wordsPerBitmapWord) * gcBits; shift = (off % wordsPerBitmapByte) * gcBits;
bits = (*bitp >> (shift + 2)) & 3; bits = (*bitp >> (shift + 2)) & BitsMask;
if(!mw && bits == BitsDead) if(!mw && bits == BitsDead)
break; // end of heap object break; // end of heap object
mw = !mw && bits == BitsMultiWord; mw = !mw && bits == BitsMultiWord;
tmpbuf[i*BitsPerPointer/8] &= ~(3<<((i*BitsPerPointer)%8)); tmpbuf[i*BitsPerPointer/8] &= ~(BitsMask<<((i*BitsPerPointer)%8));
tmpbuf[i*BitsPerPointer/8] |= bits<<((i*BitsPerPointer)%8); tmpbuf[i*BitsPerPointer/8] |= bits<<((i*BitsPerPointer)%8);
} }
return (BitVector){i*BitsPerPointer, (uint32*)tmpbuf}; return (BitVector){i*BitsPerPointer, (uint32*)tmpbuf};
......
...@@ -22,8 +22,8 @@ const ( ...@@ -22,8 +22,8 @@ const (
pageSize = 1 << pageShift pageSize = 1 << pageShift
pageMask = pageSize - 1 pageMask = pageSize - 1
wordsPerBitmapWord = ptrSize * 8 / 4
gcBits = 4 gcBits = 4
wordsPerBitmapByte = 8 / gcBits
bitsPerPointer = 2 bitsPerPointer = 2
bitsMask = 1<<bitsPerPointer - 1 bitsMask = 1<<bitsPerPointer - 1
pointersPerByte = 8 / bitsPerPointer pointersPerByte = 8 / bitsPerPointer
...@@ -211,8 +211,8 @@ func gomallocgc(size uintptr, typ *_type, flags int) unsafe.Pointer { ...@@ -211,8 +211,8 @@ func gomallocgc(size uintptr, typ *_type, flags int) unsafe.Pointer {
{ {
arena_start := uintptr(unsafe.Pointer(mheap_.arena_start)) arena_start := uintptr(unsafe.Pointer(mheap_.arena_start))
off := (uintptr(x) - arena_start) / ptrSize off := (uintptr(x) - arena_start) / ptrSize
xbits := (*uintptr)(unsafe.Pointer(arena_start - off/wordsPerBitmapWord*ptrSize - ptrSize)) xbits := (*uint8)(unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1))
shift := (off % wordsPerBitmapWord) * gcBits shift := (off % wordsPerBitmapByte) * gcBits
if debugMalloc && ((*xbits>>shift)&(bitMask|bitPtrMask)) != bitBoundary { if debugMalloc && ((*xbits>>shift)&(bitMask|bitPtrMask)) != bitBoundary {
println("runtime: bits =", (*xbits>>shift)&(bitMask|bitPtrMask)) println("runtime: bits =", (*xbits>>shift)&(bitMask|bitPtrMask))
gothrow("bad bits in markallocated") gothrow("bad bits in markallocated")
...@@ -260,8 +260,7 @@ func gomallocgc(size uintptr, typ *_type, flags int) unsafe.Pointer { ...@@ -260,8 +260,7 @@ func gomallocgc(size uintptr, typ *_type, flags int) unsafe.Pointer {
ptrmask = (*uint8)(unsafe.Pointer(&typ.gc[0])) // embed mask ptrmask = (*uint8)(unsafe.Pointer(&typ.gc[0])) // embed mask
} }
if size == 2*ptrSize { if size == 2*ptrSize {
xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8)) *xbits = *ptrmask | bitBoundary
*xbitsb = *ptrmask | bitBoundary
goto marked goto marked
} }
te = uintptr(typ.size) / ptrSize te = uintptr(typ.size) / ptrSize
...@@ -283,19 +282,12 @@ func gomallocgc(size uintptr, typ *_type, flags int) unsafe.Pointer { ...@@ -283,19 +282,12 @@ func gomallocgc(size uintptr, typ *_type, flags int) unsafe.Pointer {
v &^= uint8(bitPtrMask << 4) v &^= uint8(bitPtrMask << 4)
} }
off := (uintptr(x) + i - arena_start) / ptrSize *xbits = v
xbits := (*uintptr)(unsafe.Pointer(arena_start - off/wordsPerBitmapWord*ptrSize - ptrSize)) xbits = (*byte)(add(unsafe.Pointer(xbits), ^uintptr(0)))
shift := (off % wordsPerBitmapWord) * gcBits
xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8))
*xbitsb = v
} }
if size0%(2*ptrSize) == 0 && size0 < size { if size0%(2*ptrSize) == 0 && size0 < size {
// Mark the word after last object's word as bitsDead. // Mark the word after last object's word as bitsDead.
off := (uintptr(x) + size0 - arena_start) / ptrSize *xbits = bitsDead << 2
xbits := (*uintptr)(unsafe.Pointer(arena_start - off/wordsPerBitmapWord*ptrSize - ptrSize))
shift := (off % wordsPerBitmapWord) * gcBits
xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8))
*xbitsb = bitsDead << 2
} }
} }
marked: marked:
......
...@@ -212,8 +212,8 @@ static struct { ...@@ -212,8 +212,8 @@ static struct {
static void static void
scanblock(byte *b, uintptr n, byte *ptrmask) scanblock(byte *b, uintptr n, byte *ptrmask)
{ {
byte *obj, *p, *arena_start, *arena_used, **wp, *scanbuf[8], bits8; byte *obj, *p, *arena_start, *arena_used, **wp, *scanbuf[8], *ptrbitp, *bitp, bits, xbits, shift, cached;
uintptr i, nobj, size, idx, *bitp, bits, xbits, shift, x, off, cached, scanbufpos; uintptr i, nobj, size, idx, x, off, scanbufpos;
intptr ncached; intptr ncached;
Workbuf *wbuf; Workbuf *wbuf;
String *str; String *str;
...@@ -237,6 +237,10 @@ scanblock(byte *b, uintptr n, byte *ptrmask) ...@@ -237,6 +237,10 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
for(i = 0; i < nelem(scanbuf); i++) for(i = 0; i < nelem(scanbuf); i++)
scanbuf[i] = nil; scanbuf[i] = nil;
ptrbitp = nil;
cached = 0;
ncached = 0;
// ptrmask can have 3 possible values: // ptrmask can have 3 possible values:
// 1. nil - obtain pointer mask from GC bitmap. // 1. nil - obtain pointer mask from GC bitmap.
// 2. ScanConservatively - don't use any mask, scan conservatively. // 2. ScanConservatively - don't use any mask, scan conservatively.
...@@ -295,8 +299,15 @@ scanblock(byte *b, uintptr n, byte *ptrmask) ...@@ -295,8 +299,15 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
} }
ptrmask = ScanConservatively; ptrmask = ScanConservatively;
} }
cached = 0; // Find bits of the beginning of the object.
ncached = 0; if(ptrmask == nil) {
off = (uintptr*)b - (uintptr*)arena_start;
ptrbitp = arena_start - off/wordsPerBitmapByte - 1;
shift = (off % wordsPerBitmapByte) * gcBits;
cached = *ptrbitp >> shift;
cached &= ~bitBoundary;
ncached = (8 - shift)/gcBits;
}
for(i = 0; i < n; i += PtrSize) { for(i = 0; i < n; i += PtrSize) {
obj = nil; obj = nil;
// Find bits for this word. // Find bits for this word.
...@@ -308,16 +319,13 @@ scanblock(byte *b, uintptr n, byte *ptrmask) ...@@ -308,16 +319,13 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
// Consult GC bitmap. // Consult GC bitmap.
if(ncached <= 0) { if(ncached <= 0) {
// Refill cache. // Refill cache.
off = (uintptr*)(b+i) - (uintptr*)arena_start; cached = *--ptrbitp;
bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; ncached = 2;
shift = (off % wordsPerBitmapWord) * gcBits;
cached = *bitp >> shift;
ncached = (PtrSize*8 - shift)/gcBits;
} }
bits = cached; bits = cached;
cached >>= gcBits; cached >>= gcBits;
ncached--; ncached--;
if(i != 0 && (bits&bitBoundary) != 0) if((bits&bitBoundary) != 0)
break; // reached beginning of the next object break; // reached beginning of the next object
bits = (bits>>2)&BitsMask; bits = (bits>>2)&BitsMask;
if(bits == BitsDead) if(bits == BitsDead)
...@@ -336,11 +344,9 @@ scanblock(byte *b, uintptr n, byte *ptrmask) ...@@ -336,11 +344,9 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
// Find the next pair of bits. // Find the next pair of bits.
if(ptrmask == nil) { if(ptrmask == nil) {
if(ncached <= 0) { if(ncached <= 0) {
off = (uintptr*)(b+i+PtrSize) - (uintptr*)arena_start; // Refill cache.
bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; cached = *--ptrbitp;
shift = (off % wordsPerBitmapWord) * gcBits; ncached = 2;
cached = *bitp >> shift;
ncached = (PtrSize*8 - shift)/gcBits;
} }
bits = (cached>>2)&BitsMask; bits = (cached>>2)&BitsMask;
} else } else
...@@ -383,8 +389,14 @@ scanblock(byte *b, uintptr n, byte *ptrmask) ...@@ -383,8 +389,14 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
if(bits == BitsSlice) { if(bits == BitsSlice) {
i += 2*PtrSize; i += 2*PtrSize;
cached >>= 2*gcBits; if(ncached == 2)
ncached -= 2; ncached = 0;
else if(ptrmask == nil) {
// Refill cache and consume one quadruple.
cached = *--ptrbitp;
cached >>= gcBits;
ncached = 1;
}
} else { } else {
i += PtrSize; i += PtrSize;
cached >>= gcBits; cached >>= gcBits;
...@@ -398,20 +410,12 @@ scanblock(byte *b, uintptr n, byte *ptrmask) ...@@ -398,20 +410,12 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
continue; continue;
// Mark the object. // Mark the object.
off = (uintptr*)obj - (uintptr*)arena_start; off = (uintptr*)obj - (uintptr*)arena_start;
bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; bitp = arena_start - off/wordsPerBitmapByte - 1;
shift = (off % wordsPerBitmapWord) * gcBits; shift = (off % wordsPerBitmapByte) * gcBits;
xbits = *bitp; xbits = *bitp;
bits = (xbits >> shift) & bitMask; bits = (xbits >> shift) & bitMask;
if((bits&bitBoundary) == 0) { if((bits&bitBoundary) == 0) {
// Not a beginning of a block, check if we have block boundary in xbits. // Not a beginning of a block, consult span table to find the block beginning.
while(shift > 0) {
obj -= PtrSize;
shift -= gcBits;
bits = (xbits >> shift) & bitMask;
if((bits&bitBoundary) != 0)
goto havebits;
}
// Otherwise consult span table to find the block beginning.
k = (uintptr)obj>>PageShift; k = (uintptr)obj>>PageShift;
x = k; x = k;
x -= (uintptr)arena_start>>PageShift; x -= (uintptr)arena_start>>PageShift;
...@@ -433,7 +437,6 @@ scanblock(byte *b, uintptr n, byte *ptrmask) ...@@ -433,7 +437,6 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
goto markobj; goto markobj;
} }
havebits:
// Now we have bits, bitp, and shift correct for // Now we have bits, bitp, and shift correct for
// obj pointing at the base of the object. // obj pointing at the base of the object.
// Only care about not marked objects. // Only care about not marked objects.
...@@ -449,22 +452,12 @@ scanblock(byte *b, uintptr n, byte *ptrmask) ...@@ -449,22 +452,12 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
// For 8-byte objects we use non-atomic store, if the other // For 8-byte objects we use non-atomic store, if the other
// quadruple is already marked. Otherwise we resort to CAS // quadruple is already marked. Otherwise we resort to CAS
// loop for marking. // loop for marking.
bits8 = xbits>>(shift&~7); if((xbits&(bitMask|(bitMask<<gcBits))) != (bitBoundary|(bitBoundary<<gcBits)) ||
if((bits8&(bitMask|(bitMask<<gcBits))) != (bitBoundary|(bitBoundary<<gcBits)) ||
work.nproc == 1) work.nproc == 1)
((uint8*)bitp)[shift/8] = bits8 | (bitMarked<<(shift&7)); *bitp = xbits | (bitMarked<<shift);
else { else
for(;;) { runtime·atomicor8(bitp, bitMarked<<shift);
if(runtime·casp((void**)bitp, (void*)xbits, (void*)(xbits|(bitMarked<<shift))))
break;
xbits = *bitp;
bits = (xbits>>shift) & bitMask;
if((bits&bitMarked) != 0)
break;
}
if((bits&bitMarked) != 0)
continue;
}
if(((xbits>>(shift+2))&BitsMask) == BitsDead) if(((xbits>>(shift+2))&BitsMask) == BitsDead)
continue; // noscan object continue; // noscan object
...@@ -901,9 +894,9 @@ bool ...@@ -901,9 +894,9 @@ bool
runtime·MSpan_Sweep(MSpan *s, bool preserve) runtime·MSpan_Sweep(MSpan *s, bool preserve)
{ {
int32 cl, n, npages, nfree; int32 cl, n, npages, nfree;
uintptr size, off, *bitp, shift, xbits, bits; uintptr size, off, step;
uint32 sweepgen; uint32 sweepgen;
byte *p; byte *p, *bitp, shift, xbits, bits;
MCache *c; MCache *c;
byte *arena_start; byte *arena_start;
MLink head, *end, *link; MLink head, *end, *link;
...@@ -939,8 +932,8 @@ runtime·MSpan_Sweep(MSpan *s, bool preserve) ...@@ -939,8 +932,8 @@ runtime·MSpan_Sweep(MSpan *s, bool preserve)
// Mark any free objects in this span so we don't collect them. // Mark any free objects in this span so we don't collect them.
for(link = s->freelist; link != nil; link = link->next) { for(link = s->freelist; link != nil; link = link->next) {
off = (uintptr*)link - (uintptr*)arena_start; off = (uintptr*)link - (uintptr*)arena_start;
bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; bitp = arena_start - off/wordsPerBitmapByte - 1;
shift = (off % wordsPerBitmapWord) * gcBits; shift = (off % wordsPerBitmapByte) * gcBits;
*bitp |= bitMarked<<shift; *bitp |= bitMarked<<shift;
} }
...@@ -951,8 +944,8 @@ runtime·MSpan_Sweep(MSpan *s, bool preserve) ...@@ -951,8 +944,8 @@ runtime·MSpan_Sweep(MSpan *s, bool preserve)
// A finalizer can be set for an inner byte of an object, find object beginning. // A finalizer can be set for an inner byte of an object, find object beginning.
p = (byte*)(s->start << PageShift) + special->offset/size*size; p = (byte*)(s->start << PageShift) + special->offset/size*size;
off = (uintptr*)p - (uintptr*)arena_start; off = (uintptr*)p - (uintptr*)arena_start;
bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; bitp = arena_start - off/wordsPerBitmapByte - 1;
shift = (off % wordsPerBitmapWord) * gcBits; shift = (off % wordsPerBitmapByte) * gcBits;
bits = (*bitp>>shift) & bitMask; bits = (*bitp>>shift) & bitMask;
if((bits&bitMarked) == 0) { if((bits&bitMarked) == 0) {
// Find the exact byte for which the special was setup // Find the exact byte for which the special was setup
...@@ -977,10 +970,27 @@ runtime·MSpan_Sweep(MSpan *s, bool preserve) ...@@ -977,10 +970,27 @@ runtime·MSpan_Sweep(MSpan *s, bool preserve)
// This thread owns the span now, so it can manipulate // This thread owns the span now, so it can manipulate
// the block bitmap without atomic operations. // the block bitmap without atomic operations.
p = (byte*)(s->start << PageShift); p = (byte*)(s->start << PageShift);
// Find bits for the beginning of the span.
off = (uintptr*)p - (uintptr*)arena_start;
bitp = arena_start - off/wordsPerBitmapByte - 1;
shift = 0;
step = size/(PtrSize*wordsPerBitmapByte);
// Rewind to the previous quadruple as we move to the next
// in the beginning of the loop.
bitp += step;
if(step == 0) {
// 8-byte objects.
bitp++;
shift = gcBits;
}
for(; n > 0; n--, p += size) { for(; n > 0; n--, p += size) {
off = (uintptr*)p - (uintptr*)arena_start; bitp -= step;
bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; if(step == 0) {
shift = (off % wordsPerBitmapWord) * gcBits; if(shift != 0)
bitp--;
shift = gcBits - shift;
}
xbits = *bitp; xbits = *bitp;
bits = (xbits>>shift) & bitMask; bits = (xbits>>shift) & bitMask;
...@@ -1759,8 +1769,8 @@ runtime·wakefing(void) ...@@ -1759,8 +1769,8 @@ runtime·wakefing(void)
static byte* static byte*
unrollgcprog1(byte *mask, byte *prog, uintptr *ppos, bool inplace, bool sparse) unrollgcprog1(byte *mask, byte *prog, uintptr *ppos, bool inplace, bool sparse)
{ {
uintptr *b, off, shift, pos, siz, i; uintptr pos, siz, i, off;
byte *arena_start, *prog1, v; byte *arena_start, *prog1, v, *bitp, shift;
arena_start = runtime·mheap.arena_start; arena_start = runtime·mheap.arena_start;
pos = *ppos; pos = *ppos;
...@@ -1777,11 +1787,11 @@ unrollgcprog1(byte *mask, byte *prog, uintptr *ppos, bool inplace, bool sparse) ...@@ -1777,11 +1787,11 @@ unrollgcprog1(byte *mask, byte *prog, uintptr *ppos, bool inplace, bool sparse)
if(inplace) { if(inplace) {
// Store directly into GC bitmap. // Store directly into GC bitmap.
off = (uintptr*)(mask+pos) - (uintptr*)arena_start; off = (uintptr*)(mask+pos) - (uintptr*)arena_start;
b = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; bitp = arena_start - off/wordsPerBitmapByte - 1;
shift = (off % wordsPerBitmapWord) * gcBits; shift = (off % wordsPerBitmapByte) * gcBits;
if((shift%8)==0) if(shift==0)
((byte*)b)[shift/8] = 0; *bitp = 0;
((byte*)b)[shift/8] |= v<<((shift%8)+2); *bitp |= v<<(shift+2);
pos += PtrSize; pos += PtrSize;
} else if(sparse) { } else if(sparse) {
// 4-bits per word // 4-bits per word
...@@ -1847,8 +1857,8 @@ unrollglobgcprog(byte *prog, uintptr size) ...@@ -1847,8 +1857,8 @@ unrollglobgcprog(byte *prog, uintptr size)
void void
runtime·unrollgcproginplace_m(void) runtime·unrollgcproginplace_m(void)
{ {
uintptr size, size0, *b, off, shift, pos; uintptr size, size0, pos, off;
byte *arena_start, *prog; byte *arena_start, *prog, *bitp, shift;
Type *typ; Type *typ;
void *v; void *v;
...@@ -1866,15 +1876,15 @@ runtime·unrollgcproginplace_m(void) ...@@ -1866,15 +1876,15 @@ runtime·unrollgcproginplace_m(void)
// Mark first word as bitAllocated. // Mark first word as bitAllocated.
arena_start = runtime·mheap.arena_start; arena_start = runtime·mheap.arena_start;
off = (uintptr*)v - (uintptr*)arena_start; off = (uintptr*)v - (uintptr*)arena_start;
b = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; bitp = arena_start - off/wordsPerBitmapByte - 1;
shift = (off % wordsPerBitmapWord) * gcBits; shift = (off % wordsPerBitmapByte) * gcBits;
*b |= bitBoundary<<shift; *bitp |= bitBoundary<<shift;
// Mark word after last as BitsDead. // Mark word after last as BitsDead.
if(size0 < size) { if(size0 < size) {
off = (uintptr*)((byte*)v + size0) - (uintptr*)arena_start; off = (uintptr*)((byte*)v + size0) - (uintptr*)arena_start;
b = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; bitp = arena_start - off/wordsPerBitmapByte - 1;
shift = (off % wordsPerBitmapWord) * gcBits; shift = (off % wordsPerBitmapByte) * gcBits;
*b &= ~(bitPtrMask<<shift) | ((uintptr)BitsDead<<(shift+2)); *bitp &= ~(bitPtrMask<<shift) | ((uintptr)BitsDead<<(shift+2));
} }
} }
...@@ -1916,60 +1926,67 @@ runtime·unrollgcprog_m(void) ...@@ -1916,60 +1926,67 @@ runtime·unrollgcprog_m(void)
void void
runtime·markspan(void *v, uintptr size, uintptr n, bool leftover) runtime·markspan(void *v, uintptr size, uintptr n, bool leftover)
{ {
uintptr *b, *b0, off, shift, x; uintptr i, off, step;
byte *p; byte *b;
if((byte*)v+size*n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) if((byte*)v+size*n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
runtime·throw("markspan: bad pointer"); runtime·throw("markspan: bad pointer");
p = v; // Find bits of the beginning of the span.
if(leftover) // mark a boundary just past end of last block too off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset
n++; b = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1;
if((off%wordsPerBitmapByte) != 0)
b0 = nil; runtime·throw("markspan: unaligned length");
x = 0;
for(; n-- > 0; p += size) { // Okay to use non-atomic ops here, because we control
// Okay to use non-atomic ops here, because we control // the entire span, and each bitmap byte has bits for only
// the entire span, and each bitmap word has bits for only // one span, so no other goroutines are changing these bitmap words.
// one span, so no other goroutines are changing these
// bitmap words. if(size == PtrSize) {
off = (uintptr*)p - (uintptr*)runtime·mheap.arena_start; // word offset // Possible only on 64-bits (minimal size class is 8 bytes).
b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; // Poor man's memset(0x11).
shift = (off % wordsPerBitmapWord) * gcBits; if(0x11 != ((bitBoundary+BitsDead)<<gcBits) + (bitBoundary+BitsDead))
if(b0 != b) { runtime·throw("markspan: bad bits");
if(b0 != nil) if((n%(wordsPerBitmapByte*PtrSize)) != 0)
*b0 = x; runtime·throw("markspan: unaligned length");
b0 = b; b = b - n/wordsPerBitmapByte + 1; // find first byte
x = 0; if(((uintptr)b%PtrSize) != 0)
} runtime·throw("markspan: unaligned pointer");
x |= (bitBoundary<<shift) | ((uintptr)BitsDead<<(shift+2)); for(i = 0; i != n; i += wordsPerBitmapByte*PtrSize, b += PtrSize)
*(uintptr*)b = (uintptr)0x1111111111111111ULL; // bitBoundary+BitsDead
return;
} }
*b0 = x;
if(leftover)
n++; // mark a boundary just past end of last block too
step = size/(PtrSize*wordsPerBitmapByte);
for(i = 0; i != n; i++, b -= step)
*b = bitBoundary|(BitsDead<<2);
} }
// unmark the span of memory at v of length n bytes. // unmark the span of memory at v of length n bytes.
void void
runtime·unmarkspan(void *v, uintptr n) runtime·unmarkspan(void *v, uintptr n)
{ {
uintptr *p, *b, off; uintptr off;
byte *b;
if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
runtime·throw("markspan: bad pointer"); runtime·throw("markspan: bad pointer");
p = v; off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset
off = p - (uintptr*)runtime·mheap.arena_start; // word offset if((off % (PtrSize*wordsPerBitmapByte)) != 0)
if((off % wordsPerBitmapWord) != 0)
runtime·throw("markspan: unaligned pointer"); runtime·throw("markspan: unaligned pointer");
b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; b = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1;
n /= PtrSize; n /= PtrSize;
if(n%wordsPerBitmapWord != 0) if(n%(PtrSize*wordsPerBitmapByte) != 0)
runtime·throw("unmarkspan: unaligned length"); runtime·throw("unmarkspan: unaligned length");
// Okay to use non-atomic ops here, because we control // Okay to use non-atomic ops here, because we control
// the entire span, and each bitmap word has bits for only // the entire span, and each bitmap word has bits for only
// one span, so no other goroutines are changing these // one span, so no other goroutines are changing these
// bitmap words. // bitmap words.
n /= wordsPerBitmapWord; n /= wordsPerBitmapByte;
runtime·memclr((byte*)(b - n + 1), n*PtrSize); runtime·memclr(b - n + 1, n);
} }
void void
...@@ -1983,7 +2000,7 @@ runtime·MHeap_MapBits(MHeap *h) ...@@ -1983,7 +2000,7 @@ runtime·MHeap_MapBits(MHeap *h)
}; };
uintptr n; uintptr n;
n = (h->arena_used - h->arena_start) / wordsPerBitmapWord; n = (h->arena_used - h->arena_start) / (PtrSize*wordsPerBitmapByte);
n = ROUND(n, bitmapChunk); n = ROUND(n, bitmapChunk);
n = ROUND(n, PhysPageSize); n = ROUND(n, PhysPageSize);
if(h->bitmap_mapped >= n) if(h->bitmap_mapped >= n)
...@@ -2011,8 +2028,8 @@ void ...@@ -2011,8 +2028,8 @@ void
runtime·getgcmask(byte *p, Type *t, byte **mask, uintptr *len) runtime·getgcmask(byte *p, Type *t, byte **mask, uintptr *len)
{ {
Stkframe frame; Stkframe frame;
uintptr i, n, off, bits, shift, *b; uintptr i, n, off;
byte *base; byte *base, bits, shift, *b;
*mask = nil; *mask = nil;
*len = 0; *len = 0;
...@@ -2047,8 +2064,8 @@ runtime·getgcmask(byte *p, Type *t, byte **mask, uintptr *len) ...@@ -2047,8 +2064,8 @@ runtime·getgcmask(byte *p, Type *t, byte **mask, uintptr *len)
*mask = runtime·mallocgc(*len, nil, 0); *mask = runtime·mallocgc(*len, nil, 0);
for(i = 0; i < n; i += PtrSize) { for(i = 0; i < n; i += PtrSize) {
off = (uintptr*)(base+i) - (uintptr*)runtime·mheap.arena_start; off = (uintptr*)(base+i) - (uintptr*)runtime·mheap.arena_start;
b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; b = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1;
shift = (off % wordsPerBitmapWord) * gcBits; shift = (off % wordsPerBitmapByte) * gcBits;
bits = (*b >> (shift+2))&BitsMask; bits = (*b >> (shift+2))&BitsMask;
(*mask)[i/PtrSize] = bits; (*mask)[i/PtrSize] = bits;
} }
......
...@@ -8,8 +8,8 @@ enum { ...@@ -8,8 +8,8 @@ enum {
ScanStackByFrames = 1, ScanStackByFrames = 1,
// Four bits per word (see #defines below). // Four bits per word (see #defines below).
wordsPerBitmapWord = sizeof(void*)*8/4,
gcBits = 4, gcBits = 4,
wordsPerBitmapByte = 8/gcBits,
// GC type info programs. // GC type info programs.
// The programs allow to store type info required for GC in a compact form. // The programs allow to store type info required for GC in a compact form.
......
...@@ -913,6 +913,7 @@ void runtime·atomicstore64(uint64 volatile*, uint64); ...@@ -913,6 +913,7 @@ void runtime·atomicstore64(uint64 volatile*, uint64);
uint64 runtime·atomicload64(uint64 volatile*); uint64 runtime·atomicload64(uint64 volatile*);
void* runtime·atomicloadp(void* volatile*); void* runtime·atomicloadp(void* volatile*);
void runtime·atomicstorep(void* volatile*, void*); void runtime·atomicstorep(void* volatile*, void*);
void runtime·atomicor8(byte volatile*, byte);
void runtime·setg(G*); void runtime·setg(G*);
void runtime·newextram(void); void runtime·newextram(void);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment