Commit 45233734 authored by Elias Naur's avatar Elias Naur Committed by Russ Cox

runtime.cmd/ld: Add ARM external linking and implement -shared in terms of external linking

This CL is an aggregate of 10271047, 10499043, 9733044. Descriptions of each follow:

10499043
runtime,cmd/ld: Merge TLS symbols and teach 5l about ARM TLS

This CL prepares for external linking support to ARM.

The pseudo-symbols runtime.g and runtime.m are merged into a single
runtime.tlsgm symbol. When external linking, the offset of a thread local
variable is stored at a memory location instead of being embedded into a offset
of a ldr instruction. With a single runtime.tlsgm symbol for both g and m, only
one such offset is needed.

The larger part of this CL moves TLS code from gcc compiled to internally
compiled. The TLS code now uses the modern MRC instruction, and 5l is taught
about TLS fallbacks in case the instruction is not available or appropriate.

10271047
This CL adds support for -linkmode external to 5l.

For 5l itself, use addrel to allow for D_CALL relocations to be handled by the
host linker. Of the cases listed in rsc's comment in issue 4069, only case 5 and
63 needed an update. One of the TODO: addrel cases was since replaced, and the
rest of the cases are either covered by indirection through addpool (cases with
LTO or LFROM flags) or stubs (case 74). The addpool cases are covered because
addpool emits AWORD instructions, which in turn are handled by case 11.

In the runtime, change the argv argument in the rt0* functions slightly to be a
pointer to the argv list, instead of relying on a particular location of argv.

9733044
The -shared flag to 6l outputs a shared library, implemented in Go
and callable from non-Go programs such as C.

The main part of this CL change the thread local storage model.
Go uses the fastest and least general mode, local exec. TLS data in shared
libraries normally requires at least the local dynamic mode, however, this CL
instead opts for using the initial exec mode. Initial exec mode is faster than
local dynamic mode and can be used in linux since the linker has reserved a
limited amount of TLS space for performance sensitive TLS code.

Initial exec mode requires an extra load from the GOT table to determine the
TLS offset. This penalty will not be paid if ld is not in -shared mode, since
TLS accesses will be reduced to local exec.

The elf sections .init_array and .rela.init_array are added to register the Go
runtime entry with cgo at library load time.

The "hidden" attribute is added to Cgo functions called from Go, since Go
does not generate call through the GOT table, and adding non-GOT relocations for
a global function is not supported by gcc. Cgo symbols don't need to be global
and avoiding the GOT table is also faster.

The changes to 8l are only removes code relevant to the old -shared mode where
internal linking was used.

This CL only address the low level linker work. It can be submitted by itself,
but to be useful, the runtime changes in CL 9738047 is also needed.

Design discussion at
https://groups.google.com/forum/?fromgroups#!topic/golang-nuts/zmjXkGrEx6Q

Fixes #5590.

R=rsc
CC=golang-dev
https://golang.org/cl/12871044
parent c9228768
......@@ -273,7 +273,7 @@ enum as
#define D_PLT1 (D_NONE+44) // R_ARM_PLT32, 2nd inst: add ip, ip, #0xNN000
#define D_PLT2 (D_NONE+45) // R_ARM_PLT32, 3rd inst: ldr pc, [ip, #0xNNN]!
#define D_CALL (D_NONE+46) // R_ARM_PLT32/R_ARM_CALL/R_ARM_JUMP24, bl xxxxx or b yyyyy
#define D_TLS (D_NONE+47)
#define D_TLS (D_NONE+47) // R_ARM_TLS_LE32
/*
* this is the ranlib header
......
......@@ -93,12 +93,6 @@ braddoff(int32 a, int32 b)
return (((uint32)a) & 0xff000000U) | (0x00ffffffU & (uint32)(a + b));
}
Sym *
lookuprel(void)
{
return lookup(".rel", 0);
}
void
adddynrela(Sym *rel, Sym *s, Reloc *r)
{
......@@ -264,6 +258,26 @@ elfreloc1(Reloc *r, vlong sectoff)
else
return -1;
break;
case D_CALL:
if(r->siz == 4) {
if((r->add & 0xff000000) == 0xeb000000) // BL
LPUT(R_ARM_CALL | elfsym<<8);
else
LPUT(R_ARM_JUMP24 | elfsym<<8);
} else
return -1;
break;
case D_TLS:
if(r->siz == 4) {
if(flag_shared)
LPUT(R_ARM_TLS_IE32 | elfsym<<8);
else
LPUT(R_ARM_TLS_LE32 | elfsym<<8);
} else
return -1;
break;
}
return 0;
......@@ -308,6 +322,34 @@ machoreloc1(Reloc *r, vlong sectoff)
int
archreloc(Reloc *r, Sym *s, vlong *val)
{
Sym *rs;
if(linkmode == LinkExternal) {
switch(r->type) {
case D_CALL:
r->done = 0;
// set up addend for eventual relocation via outer symbol.
rs = r->sym;
r->xadd = r->add;
if(r->xadd & 0x800000)
r->xadd |= ~0xffffff;
r->xadd *= 4;
while(rs->outer != nil) {
r->xadd += symaddr(rs) - symaddr(rs->outer);
rs = rs->outer;
}
if(rs->type != SHOSTOBJ && rs->sect == nil)
diag("missing section for %s", rs->name);
r->xsym = rs;
*val = braddoff((0xff000000U & (uint32)r->add),
(0xffffff & (uint32)(r->xadd / 4)));
return 0;
}
return -1;
}
switch(r->type) {
case D_CONST:
*val = r->add;
......@@ -766,7 +808,7 @@ nopstat(char *f, Count *c)
}
void
asmout(Prog *p, Optab *o, int32 *out)
asmout(Prog *p, Optab *o, int32 *out, Sym *gmsym)
{
int32 o1, o2, o3, o4, o5, o6, v;
int r, rf, rt, rt2;
......@@ -849,11 +891,19 @@ if(debug['G']) print("%ux: %s: arm %d\n", (uint32)(p->pc), p->from.sym->name, p-
break;
case 5: /* bra s */
o1 = opbra(p->as, p->scond);
v = -8;
// TODO: Use addrel.
if(p->to.sym != S && p->to.sym->type != 0) {
rel = addrel(cursym);
rel->off = pc - cursym->value;
rel->siz = 4;
rel->sym = p->to.sym;
rel->add = o1 | ((v >> 2) & 0xffffff);
rel->type = D_CALL;
break;
}
if(p->cond != P)
v = (p->cond->pc - pc) - 8;
o1 = opbra(p->as, p->scond);
o1 |= (v >> 2) & 0xffffff;
break;
......@@ -911,7 +961,13 @@ if(debug['G']) print("%ux: %s: arm %d\n", (uint32)(p->pc), p->from.sym->name, p-
rel->siz = 4;
rel->sym = p->to.sym;
rel->add = p->to.offset;
if(flag_shared) {
if(rel->sym == gmsym) {
rel->type = D_TLS;
if(flag_shared)
rel->add += pc - p->pcrel->pc - 8 - rel->siz;
rel->xadd = rel->add;
rel->xsym = rel->sym;
} else if(flag_shared) {
rel->type = D_PCREL;
rel->add += pc - p->pcrel->pc - 8;
} else
......@@ -1242,9 +1298,22 @@ if(debug['G']) print("%ux: %s: arm %d\n", (uint32)(p->pc), p->from.sym->name, p-
case 63: /* bcase */
if(p->cond != P) {
o1 = p->cond->pc;
if(flag_shared)
o1 = o1 - p->pcrel->pc - 16;
rel = addrel(cursym);
rel->off = pc - cursym->value;
rel->siz = 4;
if(p->to.sym != S && p->to.sym->type != 0) {
rel->sym = p->to.sym;
rel->add = p->to.offset;
} else {
rel->sym = cursym;
rel->add = p->cond->pc - cursym->value;
}
if(o->flag & LPCREL) {
rel->type = D_PCREL;
rel->add += pc - p->pcrel->pc - 16 + rel->siz;
} else
rel->type = D_ADDR;
o1 = 0;
}
break;
......
......@@ -183,7 +183,6 @@ struct Sym
Reloc* r;
int32 nr;
int32 maxr;
int rel_ro;
};
#define SIGNINTERN (1729*325*1729)
......@@ -293,7 +292,6 @@ EXTERN int32 INITDAT; /* data location */
EXTERN int32 INITRND; /* data round above text location */
EXTERN int32 INITTEXT; /* text location */
EXTERN char* INITENTRY; /* entry point */
EXTERN char* LIBINITENTRY; /* shared library entry point */
EXTERN int32 autosize;
EXTERN Auto* curauto;
EXTERN Auto* curhist;
......@@ -364,7 +362,7 @@ int aclass(Adr*);
void addhist(int32, int);
Prog* appendp(Prog*);
void asmb(void);
void asmout(Prog*, Optab*, int32*);
void asmout(Prog*, Optab*, int32*, Sym*);
int32 atolwhex(char*);
Prog* brloop(Prog*);
void buildop(void);
......
......@@ -60,13 +60,14 @@ noops(void)
int o;
int32 arg;
Prog *pmorestack;
Sym *symmorestack;
Sym *symmorestack, *tlsfallback, *gmsym;
/*
* find leaf subroutines
* strip NOPs
* expand RET
* expand BECOME pseudo
* fixup TLS
*/
if(debug['v'])
......@@ -81,6 +82,10 @@ noops(void)
pmorestack = symmorestack->text;
pmorestack->reg |= NOSPLIT;
tlsfallback = lookup("runtime.read_tls_fallback", 0);
gmsym = S;
if(linkmode == LinkExternal)
gmsym = lookup("runtime.tlsgm", 0);
q = P;
for(cursym = textp; cursym != nil; cursym = cursym->next) {
for(p = cursym->text; p != P; p = p->link) {
......@@ -145,6 +150,82 @@ noops(void)
}
}
break;
case AWORD:
// Rewrite TLS register fetch: MRC 15, 0, <reg>, C13, C0, 3
if((p->to.offset & 0xffff0fff) == 0xee1d0f70) {
if(HEADTYPE == Hopenbsd) {
p->as = ARET;
} else if(goarm < 7) {
if(tlsfallback->type != STEXT) {
diag("runtime·read_tls_fallback not defined");
errorexit();
}
// BL runtime.read_tls_fallback(SB)
p->as = ABL;
p->to.type = D_BRANCH;
p->to.sym = tlsfallback;
p->cond = tlsfallback->text;
p->to.offset = 0;
cursym->text->mark &= ~LEAF;
}
if(linkmode == LinkExternal) {
// runtime.tlsgm is relocated with R_ARM_TLS_LE32
// and $runtime.tlsgm will contain the TLS offset.
//
// MOV $runtime.tlsgm+tlsoffset(SB), REGTMP
// ADD REGTMP, <reg>
//
// In shared mode, runtime.tlsgm is relocated with
// R_ARM_TLS_IE32 and runtime.tlsgm(SB) will point
// to the GOT entry containing the TLS offset.
//
// MOV runtime.tlsgm(SB), REGTMP
// ADD REGTMP, <reg>
// SUB -tlsoffset, <reg>
//
// The SUB compensates for tlsoffset
// used in runtime.save_gm and runtime.load_gm.
q = p;
p = appendp(p);
p->as = AMOVW;
p->scond = 14;
p->reg = NREG;
if(flag_shared) {
p->from.type = D_OREG;
p->from.offset = 0;
} else {
p->from.type = D_CONST;
p->from.offset = tlsoffset;
}
p->from.sym = gmsym;
p->from.name = D_EXTERN;
p->to.type = D_REG;
p->to.reg = REGTMP;
p->to.offset = 0;
p = appendp(p);
p->as = AADD;
p->scond = 14;
p->reg = NREG;
p->from.type = D_REG;
p->from.reg = REGTMP;
p->to.type = D_REG;
p->to.reg = (q->to.offset & 0xf000) >> 12;
p->to.offset = 0;
if(flag_shared) {
p = appendp(p);
p->as = ASUB;
p->scond = 14;
p->reg = NREG;
p->from.type = D_CONST;
p->from.offset = -tlsoffset;
p->to.type = D_REG;
p->to.reg = (q->to.offset & 0xf000) >> 12;
p->to.offset = 0;
}
}
}
}
q = p;
}
......
......@@ -81,8 +81,7 @@ main(int argc, char *argv[])
INITDAT = -1;
INITRND = -1;
INITENTRY = 0;
LIBINITENTRY = 0;
linkmode = LinkInternal; // TODO: LinkAuto once everything works.
linkmode = LinkAuto;
nuxiinit();
p = getgoarm();
......@@ -126,34 +125,43 @@ main(int argc, char *argv[])
flagstr("r", "dir1:dir2:...: set ELF dynamic linker search path", &rpath);
flagcount("race", "enable race detector", &flag_race);
flagcount("s", "disable symbol table", &debug['s']);
flagcount("shared", "generate shared object (implies -linkmode external)", &flag_shared);
flagstr("tmpdir", "leave temporary files in this directory", &tmpdir);
flagcount("u", "reject unsafe packages", &debug['u']);
flagcount("v", "print link trace", &debug['v']);
flagcount("w", "disable DWARF generation", &debug['w']);
flagcount("shared", "generate shared object", &flag_shared);
// TODO: link mode flag
flagparse(&argc, &argv, usage);
if(argc != 1)
usage();
if(flag_shared)
linkmode = LinkExternal;
mywhatsys();
if(HEADTYPE == -1)
HEADTYPE = headtype(goos);
// getgoextlinkenabled is based on GO_EXTLINK_ENABLED when
// Go was built; see ../../make.bash.
if(linkmode == LinkAuto && strcmp(getgoextlinkenabled(), "0") == 0)
linkmode = LinkInternal;
if(linkmode == LinkExternal) {
diag("only -linkmode=internal is supported");
errorexit();
} else if(linkmode == LinkAuto) {
linkmode = LinkInternal;
switch(HEADTYPE) {
default:
if(linkmode == LinkAuto)
linkmode = LinkInternal;
if(linkmode == LinkExternal && strcmp(getgoextlinkenabled(), "1") != 0)
sysfatal("cannot use -linkmode=external with -H %s", headstr(HEADTYPE));
break;
case Hlinux:
break;
}
libinit();
if(HEADTYPE == -1)
HEADTYPE = headtype(goos);
switch(HEADTYPE) {
default:
diag("unknown -H option");
......@@ -208,7 +216,7 @@ main(int argc, char *argv[])
case Hnetbsd:
debug['d'] = 0; // with dynamic linking
tlsoffset = -8; // hardcoded number, first 4-byte word for g, and then 4-byte word for m
// this number is known to ../../pkg/runtime/cgo/gcc_linux_arm.c
// this number is known to ../../pkg/runtime/rt0_*_arm.s
elfinit();
HEADR = ELFRESERVE;
if(INITTEXT == -1)
......@@ -253,6 +261,7 @@ main(int argc, char *argv[])
// mark some functions that are only referenced after linker code editing
if(debug['F'])
mark(rlookup("_sfloat", 0));
mark(lookup("runtime.read_tls_fallback", 0));
deadcode();
if(textp == nil) {
diag("no code");
......
......@@ -191,7 +191,7 @@ Optab optab[] =
{ AMOVBU, C_REG, C_NONE, C_SHIFT, 61, 4, 0 },
{ ACASE, C_REG, C_NONE, C_NONE, 62, 4, 0, LPCREL, 8 },
{ ABCASE, C_NONE, C_NONE, C_SBRA, 63, 4, 0 },
{ ABCASE, C_NONE, C_NONE, C_SBRA, 63, 4, 0, LPCREL, 0 },
{ AMOVH, C_REG, C_NONE, C_HAUTO, 70, 4, REGSP, 0 },
{ AMOVH, C_REG, C_NONE, C_HOREG, 70, 4, 0, 0 },
......
......@@ -246,6 +246,13 @@ patch(void)
p->cond = q;
}
}
if(flag_shared) {
s = lookup("init_array", 0);
s->type = SINITARR;
s->reachable = 1;
s->hide = 1;
addaddr(s, lookup(INITENTRY, 0));
}
for(cursym = textp; cursym != nil; cursym = cursym->next) {
for(p = cursym->text; p != P; p = p->link) {
......
......@@ -90,7 +90,7 @@ span(void)
int32 c, otxt, out[6];
Section *sect;
uchar *bp;
Sym *sub;
Sym *sub, *gmsym;
if(debug['v'])
Bprint(&bso, "%5.2f span\n", cputime());
......@@ -237,6 +237,9 @@ span(void)
* code references to be relocated too, and then
* perhaps we'd be able to parallelize the span loop above.
*/
gmsym = S;
if(linkmode == LinkExternal)
gmsym = lookup("runtime.tlsgm", 0);
for(cursym = textp; cursym != nil; cursym = cursym->next) {
p = cursym->text;
if(p == P || p->link == P)
......@@ -249,7 +252,7 @@ span(void)
pc = p->pc;
curp = p;
o = oplook(p);
asmout(p, o, out);
asmout(p, o, out, gmsym);
for(i=0; i<o->size/4; i++) {
v = out[i];
*bp++ = v;
......@@ -574,10 +577,7 @@ aclass(Adr *a)
if(s == S)
break;
instoffset = 0; // s.b. unused but just in case
if(flag_shared)
return C_LCONADDR;
else
return C_LCON;
return C_LCONADDR;
case D_AUTO:
instoffset = autosize + a->offset;
......
......@@ -518,6 +518,15 @@ omem:
$$.scale = $8;
checkscale($$.scale);
}
| con '(' LLREG ')' '(' LSREG '*' con ')'
{
$$ = nullgen;
$$.type = D_INDIR+$3;
$$.offset = $1;
$$.index = $6;
$$.scale = $8;
checkscale($$.scale);
}
| '(' LLREG ')'
{
$$ = nullgen;
......
This diff is collapsed.
......@@ -99,12 +99,6 @@ int nelfsym = 1;
static void addpltsym(Sym*);
static void addgotsym(Sym*);
Sym *
lookuprel(void)
{
return lookup(".rela", 0);
}
void
adddynrela(Sym *rela, Sym *s, Reloc *r)
{
......@@ -312,9 +306,12 @@ elfreloc1(Reloc *r, vlong sectoff)
break;
case D_TLS:
if(r->siz == 4)
VPUT(R_X86_64_TPOFF32 | (uint64)elfsym<<32);
else
if(r->siz == 4) {
if(flag_shared)
VPUT(R_X86_64_GOTTPOFF | (uint64)elfsym<<32);
else
VPUT(R_X86_64_TPOFF32 | (uint64)elfsym<<32);
} else
return -1;
break;
}
......
......@@ -187,7 +187,6 @@ struct Sym
Reloc* r;
int32 nr;
int32 maxr;
int rel_ro;
};
struct Optab
{
......@@ -329,7 +328,6 @@ EXTERN int32 INITRND;
EXTERN int64 INITTEXT;
EXTERN int64 INITDAT;
EXTERN char* INITENTRY; /* entry point */
EXTERN char* LIBINITENTRY; /* shared library entry point */
EXTERN char* pcstr;
EXTERN Auto* curauto;
EXTERN Auto* curhist;
......
......@@ -82,7 +82,6 @@ main(int argc, char *argv[])
INITDAT = -1;
INITRND = -1;
INITENTRY = 0;
LIBINITENTRY = 0;
linkmode = LinkAuto;
nuxiinit();
......@@ -119,7 +118,7 @@ main(int argc, char *argv[])
flagstr("r", "dir1:dir2:...: set ELF dynamic linker search path", &rpath);
flagcount("race", "enable race detector", &flag_race);
flagcount("s", "disable symbol table", &debug['s']);
flagcount("shared", "generate shared object", &flag_shared);
flagcount("shared", "generate shared object (implies -linkmode external)", &flag_shared);
flagstr("tmpdir", "leave temporary files in this directory", &tmpdir);
flagcount("u", "reject unsafe packages", &debug['u']);
flagcount("v", "print link trace", &debug['v']);
......@@ -140,6 +139,9 @@ main(int argc, char *argv[])
if(linkmode == LinkAuto && strcmp(getgoextlinkenabled(), "0") == 0)
linkmode = LinkInternal;
if(flag_shared)
linkmode = LinkExternal;
switch(HEADTYPE) {
default:
if(linkmode == LinkAuto)
......@@ -365,7 +367,7 @@ zaddr(char *pn, Biobuf *f, Adr *a, Sym *h[])
adrgotype = zsym(pn, f, h);
s = a->sym;
t = a->type;
if(t == D_INDIR+D_GS)
if(t == D_INDIR+D_GS || a->index == D_GS)
a->offset += tlsoffset;
if(t != D_AUTO && t != D_PARAM) {
if(s && adrgotype)
......
......@@ -271,7 +271,7 @@ patch(void)
{
int32 c;
Prog *p, *q;
Sym *s;
Sym *s, *gmsym;
int32 vexit;
if(debug['v'])
......@@ -282,6 +282,17 @@ patch(void)
Bprint(&bso, "%5.2f patch\n", cputime());
Bflush(&bso);
if(flag_shared) {
s = lookup("init_array", 0);
s->type = SINITARR;
s->reachable = 1;
s->hide = 1;
addaddr(s, lookup(INITENTRY, 0));
}
gmsym = lookup("runtime.tlsgm", 0);
if(linkmode != LinkExternal)
gmsym->reachable = 0;
s = lookup("exit", 0);
vexit = s->value;
for(cursym = textp; cursym != nil; cursym = cursym->next)
......@@ -317,6 +328,59 @@ patch(void)
p->from.type = D_INDIR+D_FS;
if(p->to.type == D_INDIR+D_GS)
p->to.type = D_INDIR+D_FS;
if(p->from.index == D_GS)
p->from.index = D_FS;
if(p->to.index == D_GS)
p->to.index = D_FS;
}
if(!flag_shared) {
// Convert g() or m() accesses of the form
// op n(reg)(GS*1), reg
// to
// op n(GS*1), reg
if(p->from.index == D_FS || p->from.index == D_GS) {
p->from.type = D_INDIR + p->from.index;
p->from.index = D_NONE;
}
// Convert g() or m() accesses of the form
// op reg, n(reg)(GS*1)
// to
// op reg, n(GS*1)
if(p->to.index == D_FS || p->to.index == D_GS) {
p->to.type = D_INDIR + p->to.index;
p->to.index = D_NONE;
}
// Convert get_tls access of the form
// op runtime.tlsgm(SB), reg
// to
// NOP
if(gmsym != S && p->from.sym == gmsym) {
p->as = ANOP;
p->from.type = D_NONE;
p->to.type = D_NONE;
p->from.sym = nil;
p->to.sym = nil;
continue;
}
} else {
// Convert TLS reads of the form
// op n(GS), reg
// to
// MOVQ $runtime.tlsgm(SB), reg
// op n(reg)(GS*1), reg
if((p->from.type == D_INDIR+D_FS || p->from.type == D_INDIR + D_GS) && p->to.type >= D_AX && p->to.type <= D_DI) {
q = appendp(p);
q->to = p->to;
q->as = p->as;
q->from.type = D_INDIR+p->to.type;
q->from.index = p->from.type - D_INDIR;
q->from.scale = 1;
q->from.offset = p->from.offset;
p->as = AMOVQ;
p->from.type = D_EXTERN;
p->from.sym = gmsym;
p->from.offset = 0;
}
}
if(p->as == ACALL || (p->as == AJMP && p->to.type != D_BRANCH) || (p->as == ARET && p->to.sym != nil)) {
s = p->to.sym;
......@@ -411,7 +475,10 @@ dostkoff(void)
int32 autoffset, deltasp;
int a, pcsize;
uint32 moreconst1, moreconst2, i;
Sym *gmsym;
gmsym = lookup("runtime.tlsgm", 0);
for(i=0; i<nelem(morename); i++) {
symmorestack[i] = lookup(morename[i], 0);
if(symmorestack[i]->type != STEXT)
......@@ -443,6 +510,14 @@ dostkoff(void)
diag("nosplit func likely to overflow stack");
if(!(p->from.scale & NOSPLIT)) {
if(flag_shared) {
// Load TLS offset with MOVQ $runtime.tlsgm(SB), CX
p = appendp(p);
p->as = AMOVQ;
p->from.type = D_EXTERN;
p->from.sym = gmsym;
p->to.type = D_CX;
}
p = appendp(p); // load g into CX
p->as = AMOVQ;
if(HEADTYPE == Hlinux || HEADTYPE == Hfreebsd
......@@ -451,6 +526,11 @@ dostkoff(void)
p->from.type = D_INDIR+D_FS;
else
p->from.type = D_INDIR+D_GS;
if(flag_shared) {
// Add TLS offset stored in CX
p->from.index = p->from.type - D_INDIR;
p->from.type = D_INDIR + D_CX;
}
p->from.offset = tlsoffset+0;
p->to.type = D_CX;
if(HEADTYPE == Hwindows) {
......
......@@ -358,6 +358,18 @@ prefixof(Adr *a)
case D_INDIR+D_GS:
return 0x65;
}
switch(a->index) {
case D_CS:
return 0x2e;
case D_DS:
return 0x3e;
case D_ES:
return 0x26;
case D_FS:
return 0x64;
case D_GS:
return 0x65;
}
return 0;
}
......@@ -735,15 +747,20 @@ vaddr(Adr *a, Reloc *r)
diag("need reloc for %D", a);
errorexit();
}
if(flag_shared)
r->type = D_PCREL;
else
r->type = D_ADDR;
r->siz = 4; // TODO: 8 for external symbols
r->off = -1; // caller must fill in
r->sym = s;
r->add = v;
v = 0;
if(flag_shared) {
if(s->type == STLSBSS) {
r->xadd = r->add - r->siz;
r->type = D_TLS;
r->xsym = s;
} else
r->type = D_PCREL;
} else
r->type = D_ADDR;
}
return v;
}
......@@ -760,7 +777,7 @@ asmandsz(Adr *a, int r, int rex, int m64)
v = a->offset;
t = a->type;
rel.siz = 0;
if(a->index != D_NONE) {
if(a->index != D_NONE && a->index != D_FS && a->index != D_GS) {
if(t < D_INDIR) {
switch(t) {
default:
......@@ -888,18 +905,11 @@ putrelv:
r = addrel(cursym);
r->off = curp->pc + andptr - and;
r->add = 0;
r->xadd = 0;
r->add = a->offset-tlsoffset;
r->xadd = r->add;
r->siz = 4;
r->type = D_TLS;
if(a->offset == tlsoffset+0)
s = lookup("runtime.g", 0);
else
s = lookup("runtime.m", 0);
s->type = STLSBSS;
s->reachable = 1;
s->size = PtrSize;
s->hide = 1;
s = lookup("runtime.tlsgm", 0);
r->sym = s;
r->xsym = s;
v = 0;
......
......@@ -95,12 +95,6 @@ int nelfsym = 1;
static void addpltsym(Sym*);
static void addgotsym(Sym*);
Sym *
lookuprel(void)
{
return lookup(".rel", 0);
}
void
adddynrela(Sym *rela, Sym *s, Reloc *r)
{
......@@ -366,6 +360,8 @@ int
archreloc(Reloc *r, Sym *s, vlong *val)
{
USED(s);
if(linkmode == LinkExternal)
return -1;
switch(r->type) {
case D_CONST:
*val = r->add;
......
......@@ -169,7 +169,6 @@ struct Sym
Reloc* r;
int32 nr;
int32 maxr;
int rel_ro;
};
struct Optab
{
......@@ -285,7 +284,6 @@ EXTERN int32 INITRND;
EXTERN int32 INITTEXT;
EXTERN int32 INITDAT;
EXTERN char* INITENTRY; /* entry point */
EXTERN char* LIBINITENTRY; /* shared library entry point */
EXTERN char* pcstr;
EXTERN Auto* curauto;
EXTERN Auto* curhist;
......
......@@ -89,7 +89,6 @@ main(int argc, char *argv[])
INITDAT = -1;
INITRND = -1;
INITENTRY = 0;
LIBINITENTRY = 0;
linkmode = LinkAuto;
nuxiinit();
......
......@@ -695,18 +695,11 @@ putrelv:
r = addrel(cursym);
r->off = curp->pc + andptr - and;
r->add = 0;
r->xadd = 0;
r->add = a->offset-tlsoffset;
r->xadd = r->add;
r->siz = 4;
r->type = D_TLS;
if(a->offset == tlsoffset+0)
s = lookup("runtime.g", 0);
else
s = lookup("runtime.m", 0);
s->type = STLSBSS;
s->reachable = 1;
s->hide = 1;
s->size = PtrSize;
s = lookup("runtime.tlsgm", 0);
r->sym = s;
r->xsym = s;
v = 0;
......
......@@ -494,7 +494,7 @@ func (p *Package) writeOutputFunc(fgcc *os.File, n *Name) {
// Gcc wrapper unpacks the C argument struct
// and calls the actual C function.
fmt.Fprintf(fgcc, "void\n")
fmt.Fprintf(fgcc, "__attribute__ ((visibility (\"hidden\"))) void\n")
fmt.Fprintf(fgcc, "_cgo%s%s(void *v)\n", cPrefix, n.Mangle)
fmt.Fprintf(fgcc, "{\n")
if n.AddError {
......
......@@ -162,15 +162,19 @@ static struct {
"#define m(r) 8(GS)\n"
"#define procid(r) 16(GS)\n"
},
// The TLS accessors here are defined here to use initial exec model.
// If the linker is not outputting a shared library, it will reduce
// the TLS accessors to the local exec model, effectively removing
// get_tls().
{"amd64", "",
"// The offsets 0 and 8 are known to:\n"
"// ../../cmd/6l/pass.c:/D_GS\n"
"// cgo/gcc_linux_amd64.c:/^threadentry\n"
"// cgo/gcc_darwin_amd64.c:/^threadentry\n"
"//\n"
"#define get_tls(r)\n"
"#define g(r) 0(GS)\n"
"#define m(r) 8(GS)\n"
"#define get_tls(r) MOVQ runtime·tlsgm(SB), r\n"
"#define g(r) 0(r)(GS*1)\n"
"#define m(r) 8(r)(GS*1)\n"
},
{"arm", "",
......
......@@ -178,12 +178,14 @@ relocsym(Sym *s)
switch(r->type) {
default:
o = 0;
if(linkmode == LinkExternal || archreloc(r, s, &o) < 0)
if(archreloc(r, s, &o) < 0)
diag("unknown reloc %d", r->type);
break;
case D_TLS:
r->done = 0;
o = 0;
if(thechar != '6')
o = r->add;
break;
case D_ADDR:
if(linkmode == LinkExternal && r->sym->type != SCONST) {
......@@ -305,8 +307,6 @@ void
dynrelocsym(Sym *s)
{
Reloc *r;
Sym *rel;
Sym *got;
if(HEADTYPE == Hwindows) {
Sym *rel, *targ;
......@@ -343,22 +343,9 @@ dynrelocsym(Sym *s)
return;
}
got = rel = nil;
if(flag_shared) {
rel = lookuprel();
got = lookup(".got", 0);
}
s->rel_ro = 0;
for(r=s->r; r<s->r+s->nr; r++) {
if(r->sym != S && r->sym->type == SDYNIMPORT || r->type >= 256)
adddynrel(s, r);
if(flag_shared && r->sym != S && s->type != SDYNIMPORT && r->type == D_ADDR
&& (s == got || s->type == SDATA || s->type == SGOSTRING || s->type == STYPE || s->type == SRODATA)) {
// Create address based RELATIVE relocation
adddynrela(rel, s, r);
if(s->type < SNOPTRDATA)
s->rel_ro = 1;
}
}
}
......@@ -1099,12 +1086,6 @@ dodata(void)
}
*l = nil;
if(flag_shared) {
for(s=datap; s != nil; s = s->next) {
if(s->rel_ro)
s->type = SDATARELRO;
}
}
datap = listsort(datap, datcmp, offsetof(Sym, next));
/*
......@@ -1138,12 +1119,12 @@ dodata(void)
/* pointer-free data */
sect = addsection(&segdata, ".noptrdata", 06);
sect->align = maxalign(s, SDATARELRO-1);
sect->align = maxalign(s, SINITARR-1);
datsize = rnd(datsize, sect->align);
sect->vaddr = datsize;
lookup("noptrdata", 0)->sect = sect;
lookup("enoptrdata", 0)->sect = sect;
for(; s != nil && s->type < SDATARELRO; s = s->next) {
for(; s != nil && s->type < SINITARR; s = s->next) {
datsize = aligndatsize(datsize, s);
s->sect = sect;
s->type = SDATA;
......@@ -1152,18 +1133,15 @@ dodata(void)
}
sect->len = datsize - sect->vaddr;
/* dynamic relocated rodata */
/* shared library initializer */
if(flag_shared) {
sect = addsection(&segdata, ".data.rel.ro", 06);
sect->align = maxalign(s, SDATARELRO);
sect = addsection(&segdata, ".init_array", 06);
sect->align = maxalign(s, SINITARR);
datsize = rnd(datsize, sect->align);
sect->vaddr = datsize;
lookup("datarelro", 0)->sect = sect;
lookup("edatarelro", 0)->sect = sect;
for(; s != nil && s->type == SDATARELRO; s = s->next) {
for(; s != nil && s->type == SINITARR; s = s->next) {
datsize = aligndatsize(datsize, s);
s->sect = sect;
s->type = SDATA;
s->value = datsize - sect->vaddr;
growdatsize(&datsize, s);
}
......@@ -1178,7 +1156,7 @@ dodata(void)
lookup("data", 0)->sect = sect;
lookup("edata", 0)->sect = sect;
for(; s != nil && s->type < SBSS; s = s->next) {
if(s->type == SDATARELRO) {
if(s->type == SINITARR) {
cursym = s;
diag("unexpected symbol type %d", s->type);
}
......@@ -1423,7 +1401,7 @@ textaddress(void)
void
address(void)
{
Section *s, *text, *data, *rodata, *symtab, *pclntab, *noptr, *bss, *noptrbss, *datarelro;
Section *s, *text, *data, *rodata, *symtab, *pclntab, *noptr, *bss, *noptrbss;
Section *typelink;
Sym *sym, *sub;
uvlong va;
......@@ -1473,7 +1451,6 @@ address(void)
noptr = nil;
bss = nil;
noptrbss = nil;
datarelro = nil;
for(s=segdata.sect; s != nil; s=s->next) {
vlen = s->len;
if(s->next)
......@@ -1489,8 +1466,6 @@ address(void)
bss = s;
if(strcmp(s->name, ".noptrbss") == 0)
noptrbss = s;
if(strcmp(s->name, ".data.rel.ro") == 0)
datarelro = s;
}
segdata.filelen = bss->vaddr - segdata.vaddr;
......@@ -1516,10 +1491,6 @@ address(void)
xdefine("erodata", SRODATA, rodata->vaddr + rodata->len);
xdefine("typelink", SRODATA, typelink->vaddr);
xdefine("etypelink", SRODATA, typelink->vaddr + typelink->len);
if(datarelro != nil) {
xdefine("datarelro", SRODATA, datarelro->vaddr);
xdefine("edatarelro", SRODATA, datarelro->vaddr + datarelro->len);
}
sym = lookup("gcdata", 0);
xdefine("egcdata", SRODATA, symaddr(sym) + sym->size);
......
......@@ -905,8 +905,6 @@ doelf(void)
addstring(shstrtab, ".elfdata");
addstring(shstrtab, ".rodata");
addstring(shstrtab, ".typelink");
if(flag_shared)
addstring(shstrtab, ".data.rel.ro");
addstring(shstrtab, ".gosymtab");
addstring(shstrtab, ".gopclntab");
......@@ -936,6 +934,14 @@ doelf(void)
addstring(shstrtab, ".note.GNU-stack");
}
if(flag_shared) {
addstring(shstrtab, ".init_array");
if(thechar == '6')
addstring(shstrtab, ".rela.init_array");
else
addstring(shstrtab, ".rel.init_array");
}
if(!debug['s']) {
addstring(shstrtab, ".symtab");
addstring(shstrtab, ".strtab");
......@@ -1064,13 +1070,6 @@ doelf(void)
elfwritedynent(s, DT_DEBUG, 0);
if(flag_shared) {
Sym *init_sym = lookup(LIBINITENTRY, 0);
if(init_sym->type != STEXT)
diag("entry not text: %s", init_sym->name);
elfwritedynentsym(s, DT_INIT, init_sym);
}
// Do not write DT_NULL. elfdynhash will finish it.
}
}
......@@ -1469,9 +1468,7 @@ elfobj:
eh->ident[EI_DATA] = ELFDATA2LSB;
eh->ident[EI_VERSION] = EV_CURRENT;
if(flag_shared)
eh->type = ET_DYN;
else if(linkmode == LinkExternal)
if(linkmode == LinkExternal)
eh->type = ET_REL;
else
eh->type = ET_EXEC;
......
......@@ -569,6 +569,8 @@ typedef struct {
#define R_ARM_GOT_PREL 96
#define R_ARM_GNU_VTENTRY 100
#define R_ARM_GNU_VTINHERIT 101
#define R_ARM_TLS_IE32 107
#define R_ARM_TLS_LE32 108
#define R_ARM_RSBREL32 250
#define R_ARM_THM_RPC22 251
#define R_ARM_RREL32 252
......@@ -576,7 +578,7 @@ typedef struct {
#define R_ARM_RPC24 254
#define R_ARM_RBASE 255
#define R_ARM_COUNT 37 /* Count of defined relocation types. */
#define R_ARM_COUNT 38 /* Count of defined relocation types. */
#define R_386_NONE 0 /* No relocation. */
......
......@@ -499,6 +499,9 @@ loadcgo(char *file, char *pkg, char *p, int n)
local = expandpkg(local, pkg);
s = lookup(local, 0);
if(flag_shared && s == lookup("main", 0))
continue;
// export overrides import, for openbsd/cgo.
// see issue 4878.
if(s->dynimplib != nil) {
......@@ -680,8 +683,6 @@ deadcode(void)
Bprint(&bso, "%5.2f deadcode\n", cputime());
mark(lookup(INITENTRY, 0));
if(flag_shared)
mark(lookup(LIBINITENTRY, 0));
for(i=0; i<nelem(markextra); i++)
mark(lookup(markextra[i], 0));
......
......@@ -119,17 +119,14 @@ libinit(void)
}
if(INITENTRY == nil) {
INITENTRY = mal(strlen(goarch)+strlen(goos)+10);
sprint(INITENTRY, "_rt0_%s_%s", goarch, goos);
}
lookup(INITENTRY, 0)->type = SXREF;
if(flag_shared) {
if(LIBINITENTRY == nil) {
LIBINITENTRY = mal(strlen(goarch)+strlen(goos)+20);
sprint(LIBINITENTRY, "_rt0_%s_%s_lib", goarch, goos);
INITENTRY = mal(strlen(goarch)+strlen(goos)+20);
if(!flag_shared) {
sprint(INITENTRY, "_rt0_%s_%s", goarch, goos);
} else {
sprint(INITENTRY, "_rt0_%s_%s_lib", goarch, goos);
}
lookup(LIBINITENTRY, 0)->type = SXREF;
}
lookup(INITENTRY, 0)->type = SXREF;
}
void
......@@ -308,7 +305,13 @@ void
loadlib(void)
{
int i, w, x;
Sym *s;
Sym *s, *gmsym;
if(flag_shared) {
s = lookup("runtime.islibrary", 0);
s->dupok = 1;
adduint8(s, 1);
}
loadinternal("runtime");
if(thechar == '5')
......@@ -357,7 +360,15 @@ loadlib(void)
} else
s->type = 0;
}
}
}
gmsym = lookup("runtime.tlsgm", 0);
gmsym->type = STLSBSS;
gmsym->size = 2*PtrSize;
gmsym->hide = 1;
if(linkmode == LinkExternal)
gmsym->reachable = 1;
else
gmsym->reachable = 0;
// Now that we know the link mode, trim the dynexp list.
x = CgoExportDynamic;
......@@ -669,7 +680,7 @@ hostlink(void)
p = strchr(p + 1, ' ');
}
argv = malloc((10+nhostobj+nldflag+c)*sizeof argv[0]);
argv = malloc((13+nhostobj+nldflag+c)*sizeof argv[0]);
argc = 0;
if(extld == nil)
extld = "gcc";
......@@ -682,7 +693,7 @@ hostlink(void)
argv[argc++] = "-m64";
break;
case '5':
// nothing required for arm
argv[argc++] = "-marm";
break;
}
if(!debug['s'] && !debug_s) {
......@@ -696,6 +707,10 @@ hostlink(void)
if(iself && AssumeGoldLinker)
argv[argc++] = "-Wl,--rosegment";
if(flag_shared) {
argv[argc++] = "-Wl,-Bsymbolic";
argv[argc++] = "-shared";
}
argv[argc++] = "-o";
argv[argc++] = outfile;
......
......@@ -55,7 +55,7 @@ enum
SMACHO, /* Mach-O __nl_symbol_ptr */
SMACHOGOT,
SNOPTRDATA,
SDATARELRO,
SINITARR,
SDATA,
SWINDOWS,
SBSS,
......@@ -224,7 +224,6 @@ void Lflag(char *arg);
void usage(void);
void adddynrel(Sym*, Reloc*);
void adddynrela(Sym*, Sym*, Reloc*);
Sym* lookuprel(void);
void ldobj1(Biobuf *f, char*, int64 len, char *pn);
void ldobj(Biobuf*, char*, int64, char*, char*, int);
void ldelf(Biobuf*, char*, int64, char*);
......
......@@ -181,22 +181,13 @@ asmelfsym(void)
genasmsym(putelfsym);
if(linkmode == LinkExternal && HEADTYPE != Hopenbsd) {
s = lookup("runtime.m", 0);
s = lookup("runtime.tlsgm", 0);
if(s->sect == nil) {
cursym = nil;
diag("missing section for %s", s->name);
errorexit();
}
putelfsyment(putelfstr(s->name), 0, PtrSize, (STB_LOCAL<<4)|STT_TLS, s->sect->elfsect->shnum, 0);
s->elfsym = numelfsym++;
s = lookup("runtime.g", 0);
if(s->sect == nil) {
cursym = nil;
diag("missing section for %s", s->name);
errorexit();
}
putelfsyment(putelfstr(s->name), PtrSize, PtrSize, (STB_LOCAL<<4)|STT_TLS, s->sect->elfsect->shnum, 0);
putelfsyment(putelfstr(s->name), 0, 2*PtrSize, (STB_LOCAL<<4)|STT_TLS, s->sect->elfsect->shnum, 0);
s->elfsym = numelfsym++;
}
......@@ -478,10 +469,6 @@ symtab(void)
xdefine("etypelink", SRODATA, 0);
xdefine("rodata", SRODATA, 0);
xdefine("erodata", SRODATA, 0);
if(flag_shared) {
xdefine("datarelro", SDATARELRO, 0);
xdefine("edatarelro", SDATARELRO, 0);
}
xdefine("noptrdata", SNOPTRDATA, 0);
xdefine("enoptrdata", SNOPTRDATA, 0);
xdefine("data", SDATA, 0);
......
......@@ -13,7 +13,7 @@ TEXT _rt0_go(SB),NOSPLIT,$-4
// copy arguments forward on an even stack
// use R13 instead of SP to avoid linker rewriting the offsets
MOVW 0(R13), R0 // argc
MOVW $4(R13), R1 // argv
MOVW 4(R13), R1 // argv
SUB $64, R13 // plenty of scratch
AND $~7, R13
MOVW R0, 60(R13) // save argc, argv away
......@@ -35,10 +35,15 @@ TEXT _rt0_go(SB),NOSPLIT,$-4
BL runtime·emptyfunc(SB) // fault if stack check is wrong
// if there is an _cgo_init, call it.
MOVW _cgo_init(SB), R2
CMP $0, R2
MOVW.NE g, R0 // first argument of _cgo_init is g
BL.NE (R2) // will clobber R0-R3
MOVW _cgo_init(SB), R4
CMP $0, R4
B.EQ nocgo
BL runtime·save_gm(SB);
MOVW g, R0 // first argument of _cgo_init is g
MOVW $setmg_gcc<>(SB), R1 // second argument is address of save_gm
BL (R4) // will clobber R0-R3
nocgo:
// update stackguard after _cgo_init
MOVW g_stackguard0(g), R0
MOVW R0, g_stackguard(g)
......@@ -119,9 +124,9 @@ TEXT runtime·gogo(SB), NOSPLIT, $-4-4
MOVW 0(FP), R1 // gobuf
MOVW gobuf_g(R1), g
MOVW 0(g), R2 // make sure g != nil
MOVW _cgo_save_gm(SB), R2
MOVB runtime·iscgo(SB), R2
CMP $0, R2 // if in Cgo, we have to save g and m
BL.NE (R2) // this call will clobber R0
BL.NE runtime·save_gm(SB) // this call will clobber R0
MOVW gobuf_sp(R1), SP // restore SP
MOVW gobuf_lr(R1), LR
MOVW gobuf_ret(R1), R0
......@@ -437,9 +442,9 @@ TEXT runtime·cgocallback(SB),NOSPLIT,$12-12
// See cgocall.c for more details.
TEXT runtime·cgocallback_gofunc(SB),NOSPLIT,$8-12
// Load m and g from thread-local storage.
MOVW _cgo_load_gm(SB), R0
MOVB runtime·iscgo(SB), R0
CMP $0, R0
BL.NE (R0)
BL.NE runtime·load_gm(SB)
// If m is nil, Go did not create the current thread.
// Call needm to obtain one for temporary use.
......@@ -519,9 +524,9 @@ TEXT runtime·setmg(SB), NOSPLIT, $0-8
MOVW gg+4(FP), g
// Save m and g to thread-local storage.
MOVW _cgo_save_gm(SB), R0
MOVB runtime·iscgo(SB), R0
CMP $0, R0
BL.NE (R0)
BL.NE runtime·save_gm(SB)
RET
......@@ -615,6 +620,34 @@ _next:
MOVW $0, R0
RET
// We have to resort to TLS variable to save g(R10) and
// m(R9). One reason is that external code might trigger
// SIGSEGV, and our runtime.sigtramp don't even know we
// are in external code, and will continue to use R10/R9,
// this might as well result in another SIGSEGV.
// Note: all three functions will clobber R0, and the last
// two can be called from 5c ABI code.
// g (R10) at 8(TP), m (R9) at 12(TP)
TEXT runtime·save_gm(SB),NOSPLIT,$0
MRC 15, 0, R0, C13, C0, 3 // Fetch TLS register
MOVW g, 8(R0)
MOVW m, 12(R0)
RET
TEXT runtime·load_gm(SB),NOSPLIT,$0
MRC 15, 0, R0, C13, C0, 3 // Fetch TLS register
MOVW 8(R0), g
MOVW 12(R0), m
RET
// void setmg_gcc(M*, G*); set m and g called from gcc.
TEXT setmg_gcc<>(SB),NOSPLIT,$0
MOVW R0, m
MOVW R1, g
B runtime·save_gm(SB)
// TODO: share code with memeq?
TEXT bytes·Equal(SB),NOSPLIT,$0
MOVW a_len+4(FP), R1
......
......@@ -14,12 +14,11 @@ TEXT crosscall2(SB),NOSPLIT,$-4
* push 2 args for fn (R1 and R2).
* Also note that at procedure entry in 5c/5g world, 4(R13) will be the
* first arg, so we must push another dummy reg (R0) for 0(R13).
* Additionally, cgo_tls_set_gm will clobber R0, so we need to save R0
* Additionally, runtime·load_gm will clobber R0, so we need to save R0
* nevertheless.
*/
MOVM.WP [R0, R1, R2, R4, R5, R6, R7, R8, m, g, R11, R12, R14], (R13)
MOVW _cgo_load_gm(SB), R0
BL (R0)
BL runtime·load_gm(SB)
MOVW PC, R14
MOVW 0(R13), PC
MOVM.IAW (R13), [R0, R1, R2, R4, R5, R6, R7, R8, m, g, R11, R12, PC]
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#pragma cgo_import_static x_cgo_load_gm
extern void x_cgo_load_gm(void);
void (*_cgo_load_gm)(void) = x_cgo_load_gm;
#pragma cgo_import_static x_cgo_save_gm
extern void x_cgo_save_gm(void);
void (*_cgo_save_gm)(void) = x_cgo_save_gm;
......@@ -12,7 +12,7 @@
#endif
/*
* void crosscall_arm2(void (*fn)(void), void *g, void *m)
* void crosscall_arm2(void (*fn)(void), void (*setmg_gcc)(void *m, void *g), void *m, void *g)
*
* Calling into the 5c tool chain, where all registers are caller save.
* Called from standard ARM EABI, where r4-r11 are callee-save, so they
......@@ -21,12 +21,12 @@
.globl EXT(crosscall_arm2)
EXT(crosscall_arm2):
push {r4, r5, r6, r7, r8, r9, r10, r11, ip, lr}
mov r10, r1 // g
mov r9, r2 // m
mov r3, r0 // save r0, cgo_tls_set_gm will clobber it
bl EXT(x_cgo_save_gm) // save current g and m into TLS variable
mov lr, pc
mov pc, r3
mov r4, r0
mov r5, r1
mov r0, r2
mov r1, r3
blx r5 // setmg(m, g)
blx r4 // fn()
pop {r4, r5, r6, r7, r8, r9, r10, r11, ip, pc}
.globl EXT(__stack_chk_fail_local)
......
......@@ -8,72 +8,26 @@
#include <string.h>
#include "libcgo.h"
static void *threadentry(void*);
// We have to resort to TLS variable to save g(R10) and
// m(R9). One reason is that external code might trigger
// SIGSEGV, and our runtime.sigtramp don't even know we
// are in external code, and will continue to use R10/R9,
// this might as well result in another SIGSEGV.
// Note: all three functions will clobber R0, and the last
// two can be called from 5c ABI code.
void __aeabi_read_tp(void) __attribute__((naked));
void x_cgo_save_gm(void) __attribute__((naked));
void x_cgo_load_gm(void) __attribute__((naked));
void
__aeabi_read_tp(void)
{
__asm__ __volatile__ (
#ifdef ARM_TP_ADDRESS
// ARM_TP_ADDRESS is (ARM_VECTORS_HIGH + 0x1000) or 0xffff1000
// GCC inline asm doesn't provide a way to provide a constant
// to "ldr r0, =??" pseudo instruction, so we hardcode the value
// and check it with cpp.
// ARM_TP_ADDRESS is (ARM_VECTORS_HIGH + 0x1000) or 0xffff1000
// and is known to runtime.read_tls_fallback. Verify it with
// cpp.
#if ARM_TP_ADDRESS != 0xffff1000
#error Wrong ARM_TP_ADDRESS!
#endif
"ldr r0, =0xffff1000\n\t"
"ldr r0, [r0]\n\t"
#else
"mrc p15, 0, r0, c13, c0, 3\n\t"
#endif
"mov pc, lr\n\t"
);
}
// g (R10) at 8(TP), m (R9) at 12(TP)
void
x_cgo_load_gm(void)
{
__asm__ __volatile__ (
"push {lr}\n\t"
"bl __aeabi_read_tp\n\t"
"ldr r10, [r0, #8]\n\t"
"ldr r9, [r0, #12]\n\t"
"pop {pc}\n\t"
);
}
static void *threadentry(void*);
void
x_cgo_save_gm(void)
{
__asm__ __volatile__ (
"push {lr}\n\t"
"bl __aeabi_read_tp\n\t"
"str r10, [r0, #8]\n\t"
"str r9, [r0, #12]\n\t"
"pop {pc}\n\t"
);
}
static void (*setmg_gcc)(void*, void*);
void
x_cgo_init(G *g)
x_cgo_init(G *g, void (*setmg)(void*, void*))
{
pthread_attr_t attr;
size_t size;
x_cgo_save_gm(); // save g and m for the initial thread
setmg_gcc = setmg;
pthread_attr_init(&attr);
pthread_attr_getstacksize(&attr, &size);
g->stackguard = (uintptr)&attr - size + 4096;
......@@ -104,7 +58,7 @@ _cgo_sys_thread_start(ThreadStart *ts)
}
}
extern void crosscall_arm2(void (*fn)(void), void *g, void *m);
extern void crosscall_arm2(void (*fn)(void), void (*setmg_gcc)(void*, void*), void *g, void *m);
static void*
threadentry(void *v)
{
......@@ -121,6 +75,6 @@ threadentry(void *v)
*/
ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096 * 2;
crosscall_arm2(ts.fn, (void *)ts.g, (void *)ts.m);
crosscall_arm2(ts.fn, setmg_gcc, (void*)ts.m, (void*)ts.g);
return nil;
}
......@@ -8,60 +8,15 @@
static void *threadentry(void*);
// We have to resort to TLS variable to save g(R10) and
// m(R9). One reason is that external code might trigger
// SIGSEGV, and our runtime.sigtramp don't even know we
// are in external code, and will continue to use R10/R9,
// this might as well result in another SIGSEGV.
// Note: all three functions will clobber R0, and the last
// two can be called from 5c ABI code.
void __aeabi_read_tp(void) __attribute__((naked));
void x_cgo_save_gm(void) __attribute__((naked));
void x_cgo_load_gm(void) __attribute__((naked));
static void (*setmg_gcc)(void*, void*);
void
__aeabi_read_tp(void)
{
// b __kuser_get_tls @ 0xffff0fe0
__asm__ __volatile__ (
"mvn r0, #0xf000\n\t"
"sub pc, r0, #31\n\t"
"nop\n\tnop\n\t"
);
}
// g (R10) at 8(TP), m (R9) at 12(TP)
void
x_cgo_load_gm(void)
{
__asm__ __volatile__ (
"push {lr}\n\t"
"bl __aeabi_read_tp\n\t"
"ldr r10, [r0, #8]\n\t"
"ldr r9, [r0, #12]\n\t"
"pop {pc}\n\t"
);
}
void
x_cgo_save_gm(void)
{
__asm__ __volatile__ (
"push {lr}\n\t"
"bl __aeabi_read_tp\n\t"
"str r10, [r0, #8]\n\t"
"str r9, [r0, #12]\n\t"
"pop {pc}\n\t"
);
}
void
x_cgo_init(G *g)
x_cgo_init(G *g, void (*setmg)(void*, void*))
{
pthread_attr_t attr;
size_t size;
x_cgo_save_gm(); // save g and m for the initial thread
setmg_gcc = setmg;
pthread_attr_init(&attr);
pthread_attr_getstacksize(&attr, &size);
g->stackguard = (uintptr)&attr - size + 4096;
......@@ -92,7 +47,7 @@ _cgo_sys_thread_start(ThreadStart *ts)
}
}
extern void crosscall_arm2(void (*fn)(void), void *g, void *m);
extern void crosscall_arm2(void (*fn)(void), void (*setmg_gcc)(void*, void*), void*, void*);
static void*
threadentry(void *v)
{
......@@ -109,6 +64,6 @@ threadentry(void *v)
*/
ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096 * 2;
crosscall_arm2(ts.fn, (void *)ts.g, (void *)ts.m);
crosscall_arm2(ts.fn, setmg_gcc, (void*)ts.m, (void*)ts.g);
return nil;
}
......@@ -10,64 +10,15 @@
static void *threadentry(void*);
// We have to resort to TLS variable to save g(R10) and
// m(R9). One reason is that external code might trigger
// SIGSEGV, and our runtime.sigtramp don't even know we
// are in external code, and will continue to use R10/R9,
// this might as well result in another SIGSEGV.
// Note: all three functions will clobber R0, and the last
// two can be called from 5c ABI code.
void __aeabi_read_tp(void) __attribute__((naked));
void x_cgo_save_gm(void) __attribute__((naked));
void x_cgo_load_gm(void) __attribute__((naked));
static void (*setmg_gcc)(void*, void*);
void
__aeabi_read_tp(void)
{
// this function is only allowed to clobber r0
__asm__ __volatile__ (
"mrc p15, 0, r0, c13, c0, 3\n\t"
"cmp r0, #0\n\t"
"movne pc, lr\n\t"
"push {r1,r2,r3,r12}\n\t"
"svc 0x00a0013c\n\t" // _lwp_getprivate
"pop {r1,r2,r3,r12}\n\t"
"mov pc, lr\n\t"
);
}
// g (R10) at 8(TP), m (R9) at 12(TP)
void
x_cgo_load_gm(void)
{
__asm__ __volatile__ (
"push {lr}\n\t"
"bl __aeabi_read_tp\n\t"
"ldr r10, [r0, #8]\n\t"
"ldr r9, [r0, #12]\n\t"
"pop {pc}\n\t"
);
}
void
x_cgo_save_gm(void)
{
__asm__ __volatile__ (
"push {lr}\n\t"
"bl __aeabi_read_tp\n\t"
"str r10, [r0, #8]\n\t"
"str r9, [r0, #12]\n\t"
"pop {pc}\n\t"
);
}
void
x_cgo_init(G *g)
x_cgo_init(G *g, void (*setmg)(void*, void*))
{
pthread_attr_t attr;
size_t size;
x_cgo_save_gm(); // save g and m for the initial thread
setmg_gcc = setmg;
pthread_attr_init(&attr);
pthread_attr_getstacksize(&attr, &size);
g->stackguard = (uintptr)&attr - size + 4096;
......@@ -100,7 +51,7 @@ _cgo_sys_thread_start(ThreadStart *ts)
}
}
extern void crosscall_arm2(void (*fn)(void), void *g, void *m);
extern void crosscall_arm2(void (*fn)(void), void (*setmg_gcc)(void*, void*), void *g, void *m);
static void*
threadentry(void *v)
{
......@@ -117,6 +68,6 @@ threadentry(void *v)
*/
ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096 * 2;
crosscall_arm2(ts.fn, (void *)ts.g, (void *)ts.m);
crosscall_arm2(ts.fn, setmg_gcc, (void*)ts.m, (void*)ts.g);
return nil;
}
......@@ -87,11 +87,6 @@
void *_cgo_init; /* filled in by dynamic linker when Cgo is available */
static int64 cgosync; /* represents possible synchronization in C code */
// These two are only used by the architecture where TLS based storage isn't
// the default for g and m (e.g., ARM)
void *_cgo_load_gm; /* filled in by dynamic linker when Cgo is available */
void *_cgo_save_gm; /* filled in by dynamic linker when Cgo is available */
static void unwindm(void);
// Call from Go to C.
......
......@@ -35,16 +35,13 @@ runtime·checkgoarm(void)
#pragma textflag NOSPLIT
void
runtime·setup_auxv(int32 argc, void *argv_list)
runtime·setup_auxv(int32 argc, byte **argv)
{
byte **argv;
byte **envp;
byte *rnd;
uint32 *auxv;
uint32 t;
argv = &argv_list;
// skip envp to get to ELF auxiliary vector.
for(envp = &argv[argc+1]; *envp != nil; envp++)
;
......
......@@ -7,4 +7,7 @@
// FreeBSD and Linux use the same linkage to main
TEXT _rt0_arm_freebsd(SB),NOSPLIT,$-4
MOVW (R13), R0 // argc
MOVW $4(R13), R1 // argv
MOVM.DB.W [R0-R1], (R13)
B _rt0_go(SB)
......@@ -5,6 +5,12 @@
#include "../../cmd/ld/textflag.h"
TEXT _rt0_arm_linux(SB),NOSPLIT,$-4
MOVW (R13), R0 // argc
MOVW $4(R13), R1 // argv
MOVW $_rt0_arm_linux1(SB), R4
B (R4)
TEXT _rt0_arm_linux1(SB),NOSPLIT,$-4
// We first need to detect the kernel ABI, and warn the user
// if the system only supports OABI
// The strategy here is to call some EABI syscall to see if
......@@ -14,6 +20,8 @@ TEXT _rt0_arm_linux(SB),NOSPLIT,$-4
// we don't know the kernel ABI... Oh, not really, we can do
// syscall in Thumb mode.
// Save argc and argv
MOVM.DB.W [R0-R1], (R13)
// set up sa_handler
MOVW $bad_abi<>(SB), R0 // sa_handler
MOVW $0, R1 // sa_flags
......@@ -72,3 +80,7 @@ TEXT oabi_syscall<>(SB),NOSPLIT,$-4
// TODO(minux): only supports little-endian CPUs
WORD $0x4770df01 // swi $1; bx lr
TEXT main(SB),NOSPLIT,$-4
MOVW $_rt0_arm_linux1(SB), R4
B (R4)
......@@ -7,4 +7,7 @@
// FreeBSD/NetBSD and Linux use the same linkage to main
TEXT _rt0_arm_netbsd(SB),NOSPLIT,$-4
MOVW (R13), R0 // argc
MOVW $4(R13), R1 // argv
MOVM.DB.W [R0-R1], (R13)
B _rt0_go(SB)
......@@ -282,3 +282,7 @@ TEXT runtime·casp(SB),NOSPLIT,$0
// return 0;
TEXT runtime·cas(SB),NOSPLIT,$0
B runtime·armcas(SB)
TEXT runtime·read_tls_fallback(SB),NOSPLIT,$-4
MOVW $0xffff1000, R0
MOVW (R0), R0
......@@ -286,11 +286,11 @@ TEXT runtime·sigaltstack(SB),NOSPLIT,$0
TEXT runtime·sigtramp(SB),NOSPLIT,$24
// this might be called in external code context,
// where g and m are not set.
// first save R0, because _cgo_load_gm will clobber it
// first save R0, because runtime·load_gm will clobber it
MOVW R0, 4(R13)
MOVW _cgo_load_gm(SB), R0
MOVB runtime·iscgo(SB), R0
CMP $0, R0
BL.NE (R0)
BL.NE runtime·load_gm(SB)
CMP $0, m
BNE 4(PC)
......@@ -441,3 +441,8 @@ TEXT runtime·closeonexec(SB),NOSPLIT,$0
MOVW $SYS_fcntl, R7
SWI $0
RET
// b __kuser_get_tls @ 0xffff0fe0
TEXT runtime·read_tls_fallback(SB),NOSPLIT,$-4
MOVW $0xffff0fe0, R0
B (R0)
......@@ -301,3 +301,9 @@ TEXT runtime·casp(SB),NOSPLIT,$0
// return 0;
TEXT runtime·cas(SB),NOSPLIT,$0
B runtime·armcas(SB)
TEXT runtime·read_tls_fallback(SB),NOSPLIT,$-4
MOVM.WP [R1, R2, R3, R12], (R13)
SWI $0x00a0013c // _lwp_getprivate
MOVM.IAW (R13), [R1, R2, R3, R12]
RET
......@@ -118,7 +118,7 @@ darwin-386 | darwin-amd64)
*) go test -ldflags '-linkmode=external' || exit 1;;
esac
;;
freebsd-386 | freebsd-amd64 | linux-386 | linux-amd64 | netbsd-386 | netbsd-amd64)
freebsd-386 | freebsd-amd64 | linux-386 | linux-amd64 | linux-arm | netbsd-386 | netbsd-amd64)
go test -ldflags '-linkmode=external' || exit 1
go test -ldflags '-linkmode=auto' ../testtls || exit 1
go test -ldflags '-linkmode=external' ../testtls || exit 1
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment