Commit 3d40062c authored by Russ Cox's avatar Russ Cox

cmd/gc, cmd/ld: struct field tracking

This is an experiment in static analysis of Go programs
to understand which struct fields a program might use.
It is not part of the Go language specification, it must
be enabled explicitly when building the toolchain,
and it may be removed at any time.

After building the toolchain with GOEXPERIMENT=fieldtrack,
a specific field can be marked for tracking by including
`go:"track"` in the field tag:

        package pkg

        type T struct {
                F int `go:"track"`
                G int // untracked
        }

To simplify usage, only named struct types can have
tracked fields, and only exported fields can be tracked.

The implementation works by making each function begin
with a sequence of no-op USEFIELD instructions declaring
which tracked fields are accessed by a specific function.
After the linker's dead code elimination removes unused
functions, the fields referred to by the remaining
USEFIELD instructions are the ones reported as used by
the binary.

The -k option to the linker specifies the fully qualified
symbol name (such as my/pkg.list) of a string variable that
should be initialized with the field tracking information
for the program. The field tracking string is a sequence
of lines, each terminated by a \n and describing a single
tracked field referred to by the program. Each line is made
up of one or more tab-separated fields. The first field is
the name of the tracked field, fully qualified, as in
"my/pkg.T.F". Subsequent fields give a shortest path of
reverse references from that field to a global variable or
function, corresponding to one way in which the program
might reach that field.

A common source of false positives in field tracking is
types with large method sets, because a reference to the
type descriptor carries with it references to all methods.
To address this problem, the CL also introduces a comment
annotation

        //go:nointerface

that marks an upcoming method declaration as unavailable
for use in satisfying interfaces, both statically and
dynamically. Such a method is also invisible to package
reflect.

Again, all of this is disabled by default. It only turns on
if you have GOEXPERIMENT=fieldtrack set during make.bash.

R=iant, ken
CC=golang-dev
https://golang.org/cl/6749064
parent 84e20465
......@@ -413,6 +413,8 @@ struct
"MULAWT", LTYPEN, AMULAWT,
"MULAWB", LTYPEN, AMULAWB,
"USEFIELD", LTYPEN, AUSEFIELD,
0
};
......
......@@ -142,6 +142,7 @@ void datagostring(Strlit*, Addr*);
void split64(Node*, Node*, Node*);
void splitclean(void);
Node* ncon(uint32 i);
void gtrack(Sym*);
/*
* obj.c
......
......@@ -225,6 +225,17 @@ ggloblsym(Sym *s, int32 width, int dupok, int rodata)
p->reg |= RODATA;
}
void
gtrack(Sym *s)
{
Prog *p;
p = gins(AUSEFIELD, N, N);
p->from.type = D_OREG;
p->from.name = D_EXTERN;
p->from.sym = s;
}
int
isfat(Type *t)
{
......
......@@ -195,6 +195,8 @@ enum as
AMULWB,
AMULAWT,
AMULAWB,
AUSEFIELD,
ALAST,
};
......
......@@ -153,6 +153,8 @@ struct Sym
Sym* sub; // in SSUB list
Sym* outer; // container of sub
Sym* gotype;
Sym* reachparent;
Sym* queue;
char* file;
char* dynimpname;
char* dynimplib;
......
......@@ -147,6 +147,9 @@ main(int argc, char *argv[])
val = EARGF(usage());
addbuildinfo(val);
break;
case 'k':
tracksym = EARGF(usage());
break;
} ARGEND
USED(argc);
......
......@@ -242,5 +242,7 @@ Optab optab[] =
{ AMULWT, C_REG, C_REG, C_REG, 98, 4, 0 },
{ AMULAWT, C_REG, C_REG, C_REGREG2, 99, 4, 0 },
{ AUSEFIELD, C_ADDR, C_NONE, C_NONE, 0, 0, 0 },
{ AXXX, C_NONE, C_NONE, C_NONE, 0, 4, 0 },
};
......@@ -812,6 +812,7 @@ buildop(void)
case AMOVM:
case ARFE:
case ATEXT:
case AUSEFIELD:
case ACASE:
case ABCASE:
break;
......
......@@ -1014,7 +1014,8 @@ struct
"AESDECLAST", LTYPE3, AAESDECLAST,
"AESIMC", LTYPE3, AAESIMC,
"AESKEYGENASSIST", LTYPEX, AAESKEYGENASSIST,
"PSHUFD", LTYPEX, APSHUFD,
"PSHUFD", LTYPEX, APSHUFD,
"USEFIELD", LTYPEN, AUSEFIELD,
0
};
......
......@@ -125,6 +125,7 @@ void sudoclean(void);
int sudoaddable(int, Node*, Addr*);
void afunclit(Addr*);
void nodfconst(Node*, Type*, Mpflt*);
void gtrack(Sym*);
/*
* cplx.c
......
......@@ -205,6 +205,17 @@ ggloblnod(Node *nam, int32 width)
p->from.scale |= NOPTR;
}
void
gtrack(Sym *s)
{
Prog *p;
p = gins(AUSEFIELD, N, N);
p->from.type = D_EXTERN;
p->from.index = D_NONE;
p->from.sym = s;
}
void
ggloblsym(Sym *s, int32 width, int dupok, int rodata)
{
......
......@@ -756,6 +756,8 @@ enum as
AAESKEYGENASSIST,
APSHUFD,
AUSEFIELD,
ALAST
};
......
......@@ -158,6 +158,8 @@ struct Sym
Sym* next; // in text or data list
Sym* sub; // in SSUB list
Sym* outer; // container of sub
Sym* reachparent;
Sym* queue;
vlong value;
vlong size;
Sym* gotype;
......
......@@ -144,6 +144,9 @@ main(int argc, char *argv[])
val = EARGF(usage());
addbuildinfo(val);
break;
case 'k':
tracksym = EARGF(usage());
break;
} ARGEND
if(argc != 1)
......
......@@ -1315,6 +1315,8 @@ Optab optab[] =
{ APSHUFD, yaes2, Pq, 0x70,(0) },
{ AUSEFIELD, ynop, Px, 0,0 },
{ AEND },
0
};
......
......@@ -781,6 +781,7 @@ struct
"UNPCKLPS", LTYPE3, AUNPCKLPS,
"XORPD", LTYPE3, AXORPD,
"XORPS", LTYPE3, AXORPS,
"USEFIELD", LTYPEN, AUSEFIELD,
0
};
......
......@@ -147,6 +147,7 @@ void afunclit(Addr*);
void split64(Node*, Node*, Node*);
void splitclean(void);
void nswap(Node*, Node*);
void gtrack(Sym*);
/*
* cplx.c
......
......@@ -224,6 +224,17 @@ ggloblsym(Sym *s, int32 width, int dupok, int rodata)
p->from.scale |= RODATA;
}
void
gtrack(Sym *s)
{
Prog *p;
p = gins(AUSEFIELD, N, N);
p->from.type = D_EXTERN;
p->from.index = D_NONE;
p->from.sym = s;
}
int
isfat(Type *t)
{
......
......@@ -566,6 +566,8 @@ enum as
AUNPCKLPS,
AXORPD,
AXORPS,
AUSEFIELD,
ALAST
};
......
......@@ -143,6 +143,8 @@ struct Sym
Sym* sub; // in sub list
Sym* outer; // container of sub
Sym* gotype;
Sym* reachparent;
Sym* queue;
char* file;
char* dynimpname;
char* dynimplib;
......
......@@ -149,6 +149,9 @@ main(int argc, char *argv[])
val = EARGF(usage());
addbuildinfo(val);
break;
case 'k':
tracksym = EARGF(usage());
break;
} ARGEND
if(argc != 1)
......
......@@ -960,5 +960,7 @@ Optab optab[] =
{ AXORPD, yxm, Pe, 0x57 },
{ AXORPS, yxm, Pm, 0x57 },
{ AUSEFIELD, ynop, Px, 0,0 },
0
};
......@@ -1269,7 +1269,7 @@ methodname1(Node *n, Node *t)
* n is fieldname, pa is base type, t is function type
*/
void
addmethod(Sym *sf, Type *t, int local)
addmethod(Sym *sf, Type *t, int local, int nointerface)
{
Type *f, *d, *pa;
Node *n;
......@@ -1352,6 +1352,7 @@ addmethod(Sym *sf, Type *t, int local)
}
f = structfield(n);
f->nointerface = nointerface;
// during import unexported method names should be in the type's package
if(importpkg && f->sym && !exportname(f->sym->name) && f->sym->pkg != structpkg)
......
......@@ -278,6 +278,8 @@ dumpexporttype(Type *t)
// currently that can leave unresolved ONONAMEs in import-dot-ed packages in the wrong package
if(debug['l'] < 2)
typecheckinl(f->type->nname);
if(f->nointerface)
Bprint(bout, "\t//go:nointerface\n");
Bprint(bout, "\tfunc (%#T) %#hhS%#hT { %#H }\n", getthisx(f->type)->type, f->sym, f->type, f->type->nname->inl);
reexportdeplist(f->type->nname->inl);
} else
......
......@@ -137,6 +137,7 @@ typedef struct Label Label;
struct Type
{
uchar etype;
uchar nointerface;
uchar chan;
uchar trecur; // to detect loops
uchar printed;
......@@ -175,6 +176,7 @@ struct Type
// TFIELD
Type* down; // next struct field, also key type in TMAP
Type* outer; // outer struct
Strlit* note; // literal string annotation
// TARRAY
......@@ -185,6 +187,9 @@ struct Type
// for TFORW, where to copy the eventual value to
NodeList *copyto;
// for usefield
Node *lastfn;
};
#define T ((Type*)0)
......@@ -236,6 +241,7 @@ struct Node
NodeList* rlist;
uchar op;
uchar nointerface;
uchar ullman; // sethi/ullman number
uchar addable; // type of addressability - 0 is not addressable
uchar trecur; // to detect loops
......@@ -284,7 +290,7 @@ struct Node
Node* defn; // ONAME: initializing assignment; OLABEL: labeled statement
Node* pack; // real package for import . names
Node* curfn; // function for local variables
Type* paramfld; // TFIELD for this PPARAM
Type* paramfld; // TFIELD for this PPARAM; also for ODOT, curfn
// ONAME func param with PHEAP
Node* heapaddr; // temp holding heap address of param
......@@ -849,6 +855,7 @@ EXTERN Pkg* stringpkg; // fake package for C strings
EXTERN Pkg* typepkg; // fake package for runtime type info
EXTERN Pkg* weaktypepkg; // weak references to runtime type info
EXTERN Pkg* unsafepkg; // package unsafe
EXTERN Pkg* trackpkg; // fake package for field tracking
EXTERN Pkg* phash[128];
EXTERN int tptr; // either TPTR32 or TPTR64
extern char* runtimeimport;
......@@ -929,6 +936,9 @@ EXTERN int typecheckok;
EXTERN int compiling_runtime;
EXTERN int compiling_wrappers;
EXTERN int nointerface;
EXTERN int fieldtrack_enabled;
/*
* y.tab.c
*/
......@@ -1004,7 +1014,7 @@ void nodfconst(Node *n, Type *t, Mpflt* fval);
/*
* dcl.c
*/
void addmethod(Sym *sf, Type *t, int local);
void addmethod(Sym *sf, Type *t, int local, int nointerface);
void addvar(Node *n, Type *t, int ctxt);
NodeList* checkarglist(NodeList *all, int input);
Node* colas(NodeList *left, NodeList *right, int32 lno);
......@@ -1200,8 +1210,10 @@ void dumptypestructs(void);
Type* methodfunc(Type *f, Type*);
Node* typename(Type *t);
Sym* typesym(Type *t);
Sym* tracksym(Type *t);
Sym* typesymprefix(char *prefix, Type *t);
int haspointers(Type *t);
void usefield(Node*);
/*
* select.c
......
......@@ -1275,6 +1275,7 @@ fndcl:
$$->nname = methodname1($$->shortname, rcvr->right);
$$->nname->defn = $$;
$$->nname->ntype = t;
$$->nname->nointerface = nointerface;
declare($$->nname, PFUNC);
funchdr($$);
......@@ -1312,7 +1313,8 @@ hidden_fndcl:
$$->type = functype($2->n, $6, $8);
checkwidth($$->type);
addmethod($4, $$->type, 0);
addmethod($4, $$->type, 0, nointerface);
nointerface = 0;
funchdr($$);
// inl.c's inlnode in on a dotmeth node expects to find the inlineable body as
......@@ -1389,6 +1391,7 @@ xdcl_list:
$$ = concat($1, $2);
if(nsyntaxerrors == 0)
testdclstack();
nointerface = 0;
}
vardcl_list:
......
......@@ -40,6 +40,7 @@ static struct {
int *val;
} exper[] = {
// {"rune32", &rune32},
{"fieldtrack", &fieldtrack_enabled},
{nil, nil},
};
......@@ -199,9 +200,19 @@ main(int argc, char *argv[])
localpkg = mkpkg(strlit(""));
localpkg->prefix = "\"\"";
// pseudo-package, for scoping
builtinpkg = mkpkg(strlit("go.builtin"));
// pseudo-package, accessed by import "unsafe"
unsafepkg = mkpkg(strlit("unsafe"));
unsafepkg->name = "unsafe";
// real package, referred to by generated runtime calls
runtimepkg = mkpkg(strlit("runtime"));
runtimepkg->name = "runtime";
// pseudo-packages used in symbol tables
gostringpkg = mkpkg(strlit("go.string"));
gostringpkg->name = "go.string";
gostringpkg->prefix = "go.string"; // not go%2estring
......@@ -210,18 +221,16 @@ main(int argc, char *argv[])
itabpkg->name = "go.itab";
itabpkg->prefix = "go.itab"; // not go%2eitab
runtimepkg = mkpkg(strlit("runtime"));
runtimepkg->name = "runtime";
typepkg = mkpkg(strlit("type"));
typepkg->name = "type";
weaktypepkg = mkpkg(strlit("go.weak.type"));
weaktypepkg->name = "go.weak.type";
weaktypepkg->prefix = "go.weak.type"; // not go%2eweak%2etype
unsafepkg = mkpkg(strlit("unsafe"));
unsafepkg->name = "unsafe";
trackpkg = mkpkg(strlit("go.track"));
trackpkg->name = "go.track";
trackpkg->prefix = "go.track"; // not go%2etrack
typepkg = mkpkg(strlit("type"));
typepkg->name = "type";
goroot = getgoroot();
goos = getgoos();
......@@ -1443,7 +1452,12 @@ getlinepragma(void)
char *cp, *ep, *linep;
Hist *h;
for(i=0; i<5; i++) {
c = getr();
if(c == 'g' && fieldtrack_enabled)
goto go;
if(c != 'l')
goto out;
for(i=1; i<5; i++) {
c = getr();
if(c != "line "[i])
goto out;
......@@ -1491,6 +1505,20 @@ getlinepragma(void)
}
}
linehist(strdup(lexbuf), n, 0);
goto out;
go:
for(i=1; i<11; i++) {
c = getr();
if(c != "go:nointerface"[i])
goto out;
}
nointerface = 1;
for(;;) {
c = getr();
if(c == EOF || c == '\n')
break;
}
out:
return c;
......
......@@ -83,6 +83,10 @@ compile(Node *fn)
afunclit(&ptxt->from);
ginit();
for(t=curfn->paramfld; t; t=t->down)
gtrack(tracksym(t->type));
genlist(curfn->enter);
retpc = nil;
......@@ -115,6 +119,7 @@ compile(Node *fn)
gclean();
if(nerrors != 0)
goto ret;
pc->as = ARET; // overwrite AEND
pc->lineno = lineno;
......
......@@ -172,6 +172,8 @@ methods(Type *t)
fatal("non-method on %T method %S %T\n", mt, f->sym, f);
if (!getthisx(f->type)->type)
fatal("receiver with no type on %T method %S %T\n", mt, f->sym, f);
if(f->nointerface)
continue;
method = f->sym;
if(method == nil)
......@@ -622,6 +624,18 @@ typesym(Type *t)
return s;
}
Sym*
tracksym(Type *t)
{
char *p;
Sym *s;
p = smprint("%-T.%s", t->outer, t->sym->name);
s = pkglookup(p, trackpkg);
free(p);
return s;
}
Sym*
typesymprefix(char *prefix, Type *t)
{
......@@ -1155,4 +1169,3 @@ dgcsym(Type *t)
return s;
}
......@@ -3040,7 +3040,7 @@ implements(Type *t, Type *iface, Type **m, Type **samename, int *ptr)
for(im=iface->type; im; im=im->down) {
imtype = methodfunc(im->type, 0);
tm = ifacelookdot(im->sym, t, &followptr, 0);
if(tm == T || !eqtype(methodfunc(tm->type, 0), imtype)) {
if(tm == T || tm->nointerface || !eqtype(methodfunc(tm->type, 0), imtype)) {
if(tm == T)
tm = ifacelookdot(im->sym, t, &followptr, 1);
*m = im;
......
......@@ -1833,6 +1833,7 @@ lookdot(Node *n, Type *t, int dostrcmp)
fatal("lookdot badwidth %T %p", f1, f1);
n->xoffset = f1->width;
n->type = f1->type;
n->paramfld = f1;
if(t->etype == TINTER) {
if(isptr[n->left->type->etype]) {
n->left = nod(OIND, n->left, N); // implicitstar
......@@ -2637,7 +2638,7 @@ typecheckfunc(Node *n)
t->nname = n->nname;
rcvr = getthisx(t)->type;
if(rcvr != nil && n->shortname != N && !isblank(n->shortname))
addmethod(n->shortname->sym, t, 1);
addmethod(n->shortname->sym, t, 1, n->nname->nointerface);
}
static void
......
......@@ -429,14 +429,19 @@ walkexpr(Node **np, NodeList **init)
case OCOM:
case OREAL:
case OIMAG:
case ODOT:
case ODOTPTR:
case ODOTMETH:
case ODOTINTER:
case OIND:
walkexpr(&n->left, init);
goto ret;
case ODOT:
case ODOTPTR:
usefield(n);
walkexpr(&n->left, init);
goto ret;
case OEFACE:
walkexpr(&n->left, init);
walkexpr(&n->right, init);
......@@ -2897,3 +2902,44 @@ bounded(Node *n, int64 max)
return 0;
}
void
usefield(Node *n)
{
Type *field, *l;
if(!fieldtrack_enabled)
return;
switch(n->op) {
default:
fatal("usefield %O", n->op);
case ODOT:
case ODOTPTR:
break;
}
field = n->paramfld;
if(field == T)
fatal("usefield %T %S without paramfld", n->left->type, n->right->sym);
if(field->note == nil || strstr(field->note->s, "go:\"track\"") == nil)
return;
// dedup on list
if(field->lastfn == curfn)
return;
field->lastfn = curfn;
field->outer = n->left->type;
if(isptr[field->outer->etype])
field->outer = field->outer->type;
if(field->outer->sym == S)
yyerror("tracked field must be in named struct type");
if(!exportname(field->sym->name))
yyerror("tracked field must be exported (upper case)");
l = typ(0);
l->type = field;
l->down = curfn->paramfld;
curfn->paramfld = l;
}
This diff is collapsed.
......@@ -602,11 +602,15 @@ addstrdata(char *name, char *value)
addstring(sp, value);
s = lookup(name, 0);
s->size = 0;
s->dupok = 1;
addaddr(s, sp);
adduint32(s, strlen(value));
if(PtrSize == 8)
adduint32(s, 0); // round struct to pointer width
// in case reachability has already been computed
sp->reachable = s->reachable;
}
vlong
......
......@@ -431,10 +431,16 @@ parsemethod(char **pp, char *ep, char **methp)
if(p == ep)
return 0;
// might be a comment about the method
if(p + 2 < ep && strncmp(p, "//", 2) == 0)
goto useline;
// if it says "func (", it's a method
if(p + 6 >= ep || strncmp(p, "func (", 6) != 0)
return 0;
if(p + 6 < ep && strncmp(p, "func (", 6) == 0)
goto useline;
return 0;
useline:
// definition to end of line
*methp = p;
while(p < ep && *p != '\n')
......@@ -612,50 +618,56 @@ err:
nerrors++;
}
static int markdepth;
static Sym *markq;
static Sym *emarkq;
static void
marktext(Sym *s)
mark1(Sym *s, Sym *parent)
{
Auto *a;
Prog *p;
if(s == S)
if(s == S || s->reachable)
return;
markdepth++;
if(debug['v'] > 1)
Bprint(&bso, "%d marktext %s\n", markdepth, s->name);
for(a=s->autom; a; a=a->link)
mark(a->gotype);
for(p=s->text; p != P; p=p->link) {
if(p->from.sym)
mark(p->from.sym);
if(p->to.sym)
mark(p->to.sym);
}
markdepth--;
if(strncmp(s->name, "go.weak.", 8) == 0)
return;
s->reachable = 1;
s->reachparent = parent;
if(markq == nil)
markq = s;
else
emarkq->queue = s;
emarkq = s;
}
void
mark(Sym *s)
{
int i;
mark1(s, nil);
}
if(s == S || s->reachable)
return;
if(strncmp(s->name, "go.weak.", 8) == 0)
return;
s->reachable = 1;
if(s->text)
marktext(s);
for(i=0; i<s->nr; i++)
mark(s->r[i].sym);
if(s->gotype)
mark(s->gotype);
if(s->sub)
mark(s->sub);
if(s->outer)
mark(s->outer);
static void
markflood(void)
{
Auto *a;
Prog *p;
Sym *s;
int i;
for(s=markq; s!=S; s=s->queue) {
if(s->text) {
if(debug['v'] > 1)
Bprint(&bso, "marktext %s\n", s->name);
for(a=s->autom; a; a=a->link)
mark1(a->gotype, s);
for(p=s->text; p != P; p=p->link) {
mark1(p->from.sym, s);
mark1(p->to.sym, s);
}
}
for(i=0; i<s->nr; i++)
mark1(s->r[i].sym, s);
mark1(s->gotype, s);
mark1(s->sub, s);
mark1(s->outer, s);
}
}
static char*
......@@ -712,8 +724,9 @@ void
deadcode(void)
{
int i;
Sym *s, *last;
Sym *s, *last, *p;
Auto *z;
Fmt fmt;
if(debug['v'])
Bprint(&bso, "%5.2f deadcode\n", cputime());
......@@ -724,6 +737,8 @@ deadcode(void)
for(i=0; i<ndynexp; i++)
mark(dynexp[i]);
markflood();
// remove dead text but keep file information (z symbols).
last = nil;
......@@ -756,6 +771,29 @@ deadcode(void)
s->reachable = 1;
s->hide = 1;
}
// record field tracking references
fmtstrinit(&fmt);
for(s = allsym; s != S; s = s->allsym) {
if(strncmp(s->name, "go.track.", 9) == 0) {
s->special = 1; // do not lay out in data segment
s->hide = 1;
if(s->reachable) {
fmtprint(&fmt, "%s", s->name+9);
for(p=s->reachparent; p; p=p->reachparent)
fmtprint(&fmt, "\t%s", p->name);
fmtprint(&fmt, "\n");
}
s->type = SCONST;
s->value = 0;
}
}
if(tracksym == nil)
return;
s = lookup(tracksym, 0);
if(!s->reachable)
return;
addstrdata(tracksym, fmtstrflush(&fmt));
}
void
......
......@@ -132,6 +132,7 @@ EXTERN char* thestring;
EXTERN int ndynexp;
EXTERN int havedynamic;
EXTERN int iscgo;
EXTERN char* tracksym;
EXTERN Segment segtext;
EXTERN Segment segdata;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment