popt.c 25 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
// Derived from Inferno utils/6c/reg.c
// http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c
//
//	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
//	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
//	Portions Copyright © 1997-1999 Vita Nuova Limited
//	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
//	Portions Copyright © 2004,2006 Bruce Ellis
//	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
//	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
//	Portions Copyright © 2009 The Go Authors.  All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

// "Portable" optimizations.
// Compiled separately for 5g, 6g, and 8g, so allowed to use gg.h, opt.h.
// Must code to the intersection of the three back ends.

#include	<u.h>
#include	<libc.h>
Russ Cox's avatar
Russ Cox committed
37
#include	"go.h"
Russ Cox's avatar
Russ Cox committed
38
#include	"popt.h"
39 40 41 42 43 44 45 46 47 48 49 50 51

// p is a call instruction. Does the call fail to return?
int
noreturn(Prog *p)
{
	Sym *s;
	int i;
	static Sym*	symlist[10];

	if(symlist[0] == S) {
		symlist[0] = pkglookup("panicindex", runtimepkg);
		symlist[1] = pkglookup("panicslice", runtimepkg);
		symlist[2] = pkglookup("throwinit", runtimepkg);
52
		symlist[3] = pkglookup("gopanic", runtimepkg);
53
		symlist[4] = pkglookup("panicwrap", runtimepkg);
54 55 56
		symlist[5] = pkglookup("throwreturn", runtimepkg);
		symlist[6] = pkglookup("selectgo", runtimepkg);
		symlist[7] = pkglookup("block", runtimepkg);
57 58
	}

Russ Cox's avatar
Russ Cox committed
59 60
	if(p->to.node == nil)
		return 0;
Russ Cox's avatar
Russ Cox committed
61
	s = ((Node*)(p->to.node))->sym;
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
	if(s == S)
		return 0;
	for(i=0; symlist[i]!=S; i++)
		if(s == symlist[i])
			return 1;
	return 0;
}

// JMP chasing and removal.
//
// The code generator depends on being able to write out jump
// instructions that it can jump to now but fill in later.
// the linker will resolve them nicely, but they make the code
// longer and more difficult to follow during debugging.
// Remove them.

/* what instruction does a JMP to p eventually land on? */
static Prog*
chasejmp(Prog *p, int *jmploop)
{
	int n;

	n = 0;
85
	while(p != P && p->as == AJMP && p->to.type == TYPE_BRANCH) {
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
		if(++n > 10) {
			*jmploop = 1;
			break;
		}
		p = p->to.u.branch;
	}
	return p;
}

/*
 * reuse reg pointer for mark/sweep state.
 * leave reg==nil at end because alive==nil.
 */
#define alive ((void*)0)
#define dead ((void*)1)
101 102 103 104
/*c2go
extern void *alive;
extern void *dead;
*/
105 106 107 108 109 110 111 112 113 114 115

/* mark all code reachable from firstp as alive */
static void
mark(Prog *firstp)
{
	Prog *p;
	
	for(p=firstp; p; p=p->link) {
		if(p->opt != dead)
			break;
		p->opt = alive;
116
		if(p->as != ACALL && p->to.type == TYPE_BRANCH && p->to.u.branch)
117
			mark(p->to.u.branch);
118
		if(p->as == AJMP || p->as == ARET || p->as == AUNDEF)
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
			break;
	}
}

void
fixjmp(Prog *firstp)
{
	int jmploop;
	Prog *p, *last;
	
	if(debug['R'] && debug['v'])
		print("\nfixjmp\n");

	// pass 1: resolve jump to jump, mark all code as dead.
	jmploop = 0;
	for(p=firstp; p; p=p->link) {
		if(debug['R'] && debug['v'])
			print("%P\n", p);
137
		if(p->as != ACALL && p->to.type == TYPE_BRANCH && p->to.u.branch && p->to.u.branch->as == AJMP) {
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
			p->to.u.branch = chasejmp(p->to.u.branch, &jmploop);
			if(debug['R'] && debug['v'])
				print("->%P\n", p);
		}
		p->opt = dead;
	}
	if(debug['R'] && debug['v'])
		print("\n");

	// pass 2: mark all reachable code alive
	mark(firstp);
	
	// pass 3: delete dead code (mostly JMPs).
	last = nil;
	for(p=firstp; p; p=p->link) {
		if(p->opt == dead) {
154 155
			if(p->link == P && p->as == ARET && last && last->as != ARET) {
				// This is the final ARET, and the code so far doesn't have one.
156 157 158 159 160 161
				// Let it stay. The register allocator assumes that all live code in
				// the function can be traversed by starting at all the RET instructions
				// and following predecessor links. If we remove the final RET,
				// this assumption will not hold in the case of an infinite loop
				// at the end of a function.
				// Keep the RET but mark it dead for the liveness analysis.
162
				p->mode = 1;
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
			} else {
				if(debug['R'] && debug['v'])
					print("del %P\n", p);
				continue;
			}
		}
		if(last)
			last->link = p;
		last = p;
	}
	last->link = P;
	
	// pass 4: elide JMP to next instruction.
	// only safe if there are no jumps to JMPs anymore.
	if(!jmploop) {
		last = nil;
		for(p=firstp; p; p=p->link) {
180
			if(p->as == AJMP && p->to.type == TYPE_BRANCH && p->to.u.branch == p->link) {
181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
				if(debug['R'] && debug['v'])
					print("del %P\n", p);
				continue;
			}
			if(last)
				last->link = p;
			last = p;
		}
		last->link = P;
	}
	
	if(debug['R'] && debug['v']) {
		print("\n");
		for(p=firstp; p; p=p->link)
			print("%P\n", p);
		print("\n");
	}
}
199

200 201 202
#undef alive
#undef dead

203 204 205
// Control flow analysis. The Flow structures hold predecessor and successor
// information as well as basic loop analysis.
//
206
//	graph = flowstart(firstp, 0);
207 208 209 210 211 212 213 214 215 216 217
//	... use flow graph ...
//	flowend(graph); // free graph
//
// Typical uses of the flow graph are to iterate over all the flow-relevant instructions:
//
//	for(f = graph->start; f != nil; f = f->link)
//
// or, given an instruction f, to iterate over all the predecessors, which is
// f->p1 and this list:
//
//	for(f2 = f->p2; f2 != nil; f2 = f2->p2link)
218 219 220 221
//
// The size argument to flowstart specifies an amount of zeroed memory
// to allocate in every f->data field, for use by the client.
// If size == 0, f->data will be nil.
222 223 224 225

Graph*
flowstart(Prog *firstp, int size)
{
226
	int id, nf;
227 228 229 230
	Flow *f, *f1, *start, *last;
	Graph *graph;
	Prog *p;
	ProgInfo info;
231
	char *data;
232 233 234 235 236

	// Count and mark instructions to annotate.
	nf = 0;
	for(p = firstp; p != P; p = p->link) {
		p->opt = nil; // should be already, but just in case
Russ Cox's avatar
Russ Cox committed
237
		thearch.proginfo(&info, p);
238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
		if(info.flags & Skip)
			continue;
		p->opt = (void*)1;
		nf++;
	}
	
	if(nf == 0)
		return nil;

	if(nf >= 20000) {
		// fatal("%S is too big (%d instructions)", curfn->nname->sym, nf);
		return nil;
	}

	// Allocate annotations and assign to instructions.
253
	graph = calloc(sizeof *graph + sizeof(Flow)*nf + size*nf, 1);
254 255 256 257 258
	if(graph == nil)
		fatal("out of memory");
	start = (Flow*)(graph+1);
	last = nil;
	f = start;
259 260 261 262
	data = (char*)(f+nf);
	if(size == 0)
		data = nil;
	id = 0;
263 264 265 266 267 268 269 270
	for(p = firstp; p != P; p = p->link) {
		if(p->opt == nil)
			continue;
		p->opt = f;
		f->prog = p;
		if(last)
			last->link = f;
		last = f;
271 272 273 274 275
		f->data = data;
		f->id = id;
		f++;
		id++;
		data += size;
276 277 278 279 280
	}

	// Fill in pred/succ information.
	for(f = start; f != nil; f = f->link) {
		p = f->prog;
Russ Cox's avatar
Russ Cox committed
281
		thearch.proginfo(&info, p);
282 283 284 285 286
		if(!(info.flags & Break)) {
			f1 = f->link;
			f->s1 = f1;
			f1->p1 = f;
		}
287
		if(p->to.type == TYPE_BRANCH) {
288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420
			if(p->to.u.branch == P)
				fatal("pnil %P", p);
			f1 = p->to.u.branch->opt;
			if(f1 == nil)
				fatal("fnil %P / %P", p, p->to.u.branch);
			if(f1 == f) {
				//fatal("self loop %P", p);
				continue;
			}
			f->s2 = f1;
			f->p2link = f1->p2;
			f1->p2 = f;
		}
	}
	
	graph->start = start;
	graph->num = nf;
	return graph;
}

void
flowend(Graph *graph)
{
	Flow *f;
	
	for(f = graph->start; f != nil; f = f->link)
		f->prog->opt = nil;
	free(graph);
}

/*
 * find looping structure
 *
 * 1) find reverse postordering
 * 2) find approximate dominators,
 *	the actual dominators if the flow graph is reducible
 *	otherwise, dominators plus some other non-dominators.
 *	See Matthew S. Hecht and Jeffrey D. Ullman,
 *	"Analysis of a Simple Algorithm for Global Data Flow Problems",
 *	Conf.  Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
 *	Oct. 1-3, 1973, pp.  207-217.
 * 3) find all nodes with a predecessor dominated by the current node.
 *	such a node is a loop head.
 *	recursively, all preds with a greater rpo number are in the loop
 */
static int32
postorder(Flow *r, Flow **rpo2r, int32 n)
{
	Flow *r1;

	r->rpo = 1;
	r1 = r->s1;
	if(r1 && !r1->rpo)
		n = postorder(r1, rpo2r, n);
	r1 = r->s2;
	if(r1 && !r1->rpo)
		n = postorder(r1, rpo2r, n);
	rpo2r[n] = r;
	n++;
	return n;
}

static int32
rpolca(int32 *idom, int32 rpo1, int32 rpo2)
{
	int32 t;

	if(rpo1 == -1)
		return rpo2;
	while(rpo1 != rpo2){
		if(rpo1 > rpo2){
			t = rpo2;
			rpo2 = rpo1;
			rpo1 = t;
		}
		while(rpo1 < rpo2){
			t = idom[rpo2];
			if(t >= rpo2)
				fatal("bad idom");
			rpo2 = t;
		}
	}
	return rpo1;
}

static int
doms(int32 *idom, int32 r, int32 s)
{
	while(s > r)
		s = idom[s];
	return s == r;
}

static int
loophead(int32 *idom, Flow *r)
{
	int32 src;

	src = r->rpo;
	if(r->p1 != nil && doms(idom, src, r->p1->rpo))
		return 1;
	for(r = r->p2; r != nil; r = r->p2link)
		if(doms(idom, src, r->rpo))
			return 1;
	return 0;
}

static void
loopmark(Flow **rpo2r, int32 head, Flow *r)
{
	if(r->rpo < head || r->active == head)
		return;
	r->active = head;
	r->loop += LOOP;
	if(r->p1 != nil)
		loopmark(rpo2r, head, r->p1);
	for(r = r->p2; r != nil; r = r->p2link)
		loopmark(rpo2r, head, r);
}

void
flowrpo(Graph *g)
{
	Flow *r1;
	int32 i, d, me, nr, *idom;
	Flow **rpo2r;

	free(g->rpo);
	g->rpo = calloc(g->num*sizeof g->rpo[0], 1);
	idom = calloc(g->num*sizeof idom[0], 1);
	if(g->rpo == nil || idom == nil)
		fatal("out of memory");

421 422 423
	for(r1 = g->start; r1 != nil; r1 = r1->link)
		r1->active = 0;

424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459
	rpo2r = g->rpo;
	d = postorder(g->start, rpo2r, 0);
	nr = g->num;
	if(d > nr)
		fatal("too many reg nodes %d %d", d, nr);
	nr = d;
	for(i = 0; i < nr / 2; i++) {
		r1 = rpo2r[i];
		rpo2r[i] = rpo2r[nr - 1 - i];
		rpo2r[nr - 1 - i] = r1;
	}
	for(i = 0; i < nr; i++)
		rpo2r[i]->rpo = i;

	idom[0] = 0;
	for(i = 0; i < nr; i++) {
		r1 = rpo2r[i];
		me = r1->rpo;
		d = -1;
		// rpo2r[r->rpo] == r protects against considering dead code,
		// which has r->rpo == 0.
		if(r1->p1 != nil && rpo2r[r1->p1->rpo] == r1->p1 && r1->p1->rpo < me)
			d = r1->p1->rpo;
		for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
			if(rpo2r[r1->rpo] == r1 && r1->rpo < me)
				d = rpolca(idom, d, r1->rpo);
		idom[i] = d;
	}

	for(i = 0; i < nr; i++) {
		r1 = rpo2r[i];
		r1->loop++;
		if(r1->p2 != nil && loophead(idom, r1))
			loopmark(rpo2r, i, r1);
	}
	free(idom);
460 461 462

	for(r1 = g->start; r1 != nil; r1 = r1->link)
		r1->active = 0;
463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496
}

Flow*
uniqp(Flow *r)
{
	Flow *r1;

	r1 = r->p1;
	if(r1 == nil) {
		r1 = r->p2;
		if(r1 == nil || r1->p2link != nil)
			return nil;
	} else
		if(r->p2 != nil)
			return nil;
	return r1;
}

Flow*
uniqs(Flow *r)
{
	Flow *r1;

	r1 = r->s1;
	if(r1 == nil) {
		r1 = r->s2;
		if(r1 == nil)
			return nil;
	} else
		if(r->s2 != nil)
			return nil;
	return r1;
}

497 498 499 500 501 502 503 504 505 506 507 508 509
// The compilers assume they can generate temporary variables
// as needed to preserve the right semantics or simplify code
// generation and the back end will still generate good code.
// This results in a large number of ephemeral temporary variables.
// Merge temps with non-overlapping lifetimes and equal types using the
// greedy algorithm in Poletto and Sarkar, "Linear Scan Register Allocation",
// ACM TOPLAS 1999.

typedef struct TempVar TempVar;

struct TempVar
{
	Node *node;
510 511
	Flow *def; // definition of temp var
	Flow *use; // use list, chained through Flow.data
512 513
	TempVar *freelink; // next free temp in Type.opt list
	TempVar *merge; // merge var with this one
514 515
	vlong start; // smallest Prog.pc in live range
	vlong end; // largest Prog.pc in live range
516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531
	uchar addr; // address taken - no accurate end
	uchar removed; // removed from program
};

static int
startcmp(const void *va, const void *vb)
{
	TempVar *a, *b;
	
	a = *(TempVar**)va;
	b = *(TempVar**)vb;

	if(a->start < b->start)
		return -1;
	if(a->start > b->start)
		return +1;
532 533 534 535 536 537 538 539
	// Order what's left by id or symbol name,
	// just so that sort is forced into a specific ordering,
	// so that the result of the sort does not depend on
	// the sort implementation.
	if(a->def != b->def)
		return a->def->id - b->def->id;
	if(a->node != b->node)
		return strcmp(a->node->sym->name, b->node->sym->name);
540 541 542 543 544 545 546
	return 0;
}

// Is n available for merging?
static int
canmerge(Node *n)
{
547
	return n->class == PAUTO && strncmp(n->sym->name, "autotmp", 7) == 0;
548 549
}

550 551
static void mergewalk(TempVar*, Flow*, uint32);
static void varkillwalk(TempVar*, Flow*, uint32);
552 553 554 555 556 557

void
mergetemp(Prog *firstp)
{
	int i, j, nvar, ninuse, nfree, nkill;
	TempVar *var, *v, *v1, **bystart, **inuse;
558
	Flow *f;
559 560 561 562 563 564 565 566
	NodeList *l, **lp;
	Node *n;
	Prog *p, *p1;
	Type *t;
	ProgInfo info, info1;
	int32 gen;
	Graph *g;

567
	enum { debugmerge = 1 };
568

569
	g = flowstart(firstp, 0);
570 571
	if(g == nil)
		return;
572
	
573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593
	// Build list of all mergeable variables.
	nvar = 0;
	for(l = curfn->dcl; l != nil; l = l->next)
		if(canmerge(l->n))
			nvar++;
	
	var = calloc(nvar*sizeof var[0], 1);
	nvar = 0;
	for(l = curfn->dcl; l != nil; l = l->next) {
		n = l->n;
		if(canmerge(n)) {
			v = &var[nvar++];
			n->opt = v;
			v->node = n;
		}
	}
	
	// Build list of uses.
	// We assume that the earliest reference to a temporary is its definition.
	// This is not true of variables in general but our temporaries are all
	// single-use (that's why we have so many!).
594 595
	for(f = g->start; f != nil; f = f->link) {
		p = f->prog;
Russ Cox's avatar
Russ Cox committed
596
		thearch.proginfo(&info, p);
597

Russ Cox's avatar
Russ Cox committed
598
		if(p->from.node != N && ((Node*)(p->from.node))->opt && p->to.node != N && ((Node*)(p->to.node))->opt)
599
			fatal("double node %P", p);
Russ Cox's avatar
Russ Cox committed
600 601 602 603 604 605
		v = nil;
		if((n = p->from.node) != N)
			v = n->opt;
		if(v == nil && (n = p->to.node) != N)
			v = n->opt;
		if(v != nil) {
606
		   	if(v->def == nil)
607 608 609
		   		v->def = f;
		   	f->data = v->use;
			v->use = f;
610 611 612 613 614
			if(n == p->from.node && (info.flags & LeftAddr))
				v->addr = 1;
		}
	}
	
615
	if(debugmerge > 1 && debug['v'])
Russ Cox's avatar
Russ Cox committed
616
		dumpit("before", g->start, 0);
617 618 619 620
	
	nkill = 0;

	// Special case.
621 622
	for(i = 0; i < nvar; i++) {
		v = &var[i];
623 624 625
		if(v->addr)
			continue;
		// Used in only one instruction, which had better be a write.
626 627
		if((f = v->use) != nil && (Flow*)f->data == nil) {
			p = f->prog;
Russ Cox's avatar
Russ Cox committed
628
			thearch.proginfo(&info, p);
629
			if(p->to.node == v->node && (info.flags & RightWrite) && !(info.flags & RightRead)) {
630
				p->as = ANOP;
631
				p->to = zprog.to;
632
				v->removed = 1;
633
				if(debugmerge > 0 && debug['v'])
634 635 636 637 638 639 640 641 642
					print("drop write-only %S\n", v->node->sym);
			} else
				fatal("temp used and not set: %P", p);
			nkill++;
			continue;
		}
		
		// Written in one instruction, read in the next, otherwise unused,
		// no jumps to the next instruction. Happens mainly in 386 compiler.
643 644
		if((f = v->use) != nil && f->link == (Flow*)f->data && (Flow*)((Flow*)f->data)->data == nil && uniqp(f->link) == f) {
			p = f->prog;
Russ Cox's avatar
Russ Cox committed
645
			thearch.proginfo(&info, p);
646
			p1 = f->link->prog;
Russ Cox's avatar
Russ Cox committed
647
			thearch.proginfo(&info1, p1);
648 649 650 651 652 653 654
			enum {
				SizeAny = SizeB | SizeW | SizeL | SizeQ | SizeF | SizeD,
			};
			if(p->from.node == v->node && p1->to.node == v->node && (info.flags & Move) &&
			   !((info.flags|info1.flags) & (LeftAddr|RightAddr)) &&
			   (info.flags & SizeAny) == (info1.flags & SizeAny)) {
				p1->from = p->from;
Russ Cox's avatar
Russ Cox committed
655
				thearch.excise(f);
656
				v->removed = 1;
657
				if(debugmerge > 0 && debug['v'])
658 659 660 661 662 663 664 665 666
					print("drop immediate-use %S\n", v->node->sym);
			}
			nkill++;
			continue;
		}			   
	}

	// Traverse live range of each variable to set start, end.
	// Each flood uses a new value of gen so that we don't have
667
	// to clear all the r->active words after each variable.
668
	gen = 0;
669 670
	for(i = 0; i < nvar; i++) {
		v = &var[i];
671
		gen++;
672 673
		for(f = v->use; f != nil; f = (Flow*)f->data)
			mergewalk(v, f, gen);
674 675
		if(v->addr) {
			gen++;
676 677
			for(f = v->use; f != nil; f = (Flow*)f->data)
				varkillwalk(v, f, gen);
678
		}
679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697
	}

	// Sort variables by start.
	bystart = malloc(nvar*sizeof bystart[0]);
	for(i=0; i<nvar; i++)
		bystart[i] = &var[i];
	qsort(bystart, nvar, sizeof bystart[0], startcmp);

	// List of in-use variables, sorted by end, so that the ones that
	// will last the longest are the earliest ones in the array.
	// The tail inuse[nfree:] holds no-longer-used variables.
	// In theory we should use a sorted tree so that insertions are
	// guaranteed O(log n) and then the loop is guaranteed O(n log n).
	// In practice, it doesn't really matter.
	inuse = malloc(nvar*sizeof inuse[0]);
	ninuse = 0;
	nfree = nvar;
	for(i=0; i<nvar; i++) {
		v = bystart[i];
698 699 700
		if(debugmerge > 0 && debug['v'])
			print("consider %#N: removed=%d\n", v->node, v->removed);
			
701
		if(v->removed)
702 703 704 705 706 707 708 709
			continue;

		// Expire no longer in use.
		while(ninuse > 0 && inuse[ninuse-1]->end < v->start) {
			v1 = inuse[--ninuse];
			inuse[--nfree] = v1;
		}

710 711
		if(debugmerge > 0 && debug['v'])
			print("consider %#N: removed=%d nfree=%d nvar=%d\n", v->node, v->removed, nfree, nvar);
712 713 714 715
		// Find old temp to reuse if possible.
		t = v->node->type;
		for(j=nfree; j<nvar; j++) {
			v1 = inuse[j];
716 717
			if(debugmerge > 0 && debug['v'])
				print("consider %#N: maybe %#N: type=%T,%T addrtaken=%d,%d\n", v->node, v1->node, t, v1->node->type, v->node->addrtaken, v1->node->addrtaken);
718 719 720 721 722 723
			// Require the types to match but also require the addrtaken bits to match.
			// If a variable's address is taken, that disables registerization for the individual
			// words of the variable (for example, the base,len,cap of a slice).
			// We don't want to merge a non-addressed var with an addressed one and
			// inhibit registerization of the former.
			if(eqtype(t, v1->node->type) && v->node->addrtaken == v1->node->addrtaken) {
724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742
				inuse[j] = inuse[nfree++];
				if(v1->merge)
					v->merge = v1->merge;
				else
					v->merge = v1;
				nkill++;
				break;
			}
		}

		// Sort v into inuse.
		j = ninuse++;
		while(j > 0 && inuse[j-1]->end < v->end) {
			inuse[j] = inuse[j-1];
			j--;
		}
		inuse[j] = v;
	}

743
	if(debugmerge > 0 && debug['v']) {
744
		print("%S [%d - %d]\n", curfn->nname->sym, nvar, nkill);
745 746
		for(i = 0; i < nvar; i++) {
			v = &var[i];
747
			print("var %#N %T %lld-%lld", v->node, v->node->type, v->start, v->end);
748 749 750 751 752 753
			if(v->addr)
				print(" addr=1");
			if(v->removed)
				print(" dead=1");
			if(v->merge)
				print(" merge %#N", v->merge->node);
754
			if(v->start == v->end && v->def != nil)
755
				print(" %P", v->def->prog);
756 757 758
			print("\n");
		}
	
759
		if(debugmerge > 1 && debug['v'])
Russ Cox's avatar
Russ Cox committed
760
			dumpit("after", g->start, 0);
761 762 763
	}

	// Update node references to use merged temporaries.
764 765
	for(f = g->start; f != nil; f = f->link) {
		p = f->prog;
766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784
		if((n = p->from.node) != N && (v = n->opt) != nil && v->merge != nil)
			p->from.node = v->merge->node;
		if((n = p->to.node) != N && (v = n->opt) != nil && v->merge != nil)
			p->to.node = v->merge->node;
	}

	// Delete merged nodes from declaration list.
	for(lp = &curfn->dcl; (l = *lp); ) {
		curfn->dcl->end = l;
		n = l->n;
		v = n->opt;
		if(v && (v->merge || v->removed)) {
			*lp = l->next;
			continue;
		}
		lp = &l->next;
	}

	// Clear aux structures.
785 786 787
	for(i = 0; i < nvar; i++)
		var[i].node->opt = nil;

788 789 790 791 792 793 794
	free(var);
	free(bystart);
	free(inuse);
	flowend(g);
}

static void
795
mergewalk(TempVar *v, Flow *f0, uint32 gen)
796 797
{
	Prog *p;
798
	Flow *f1, *f, *f2;
799
	
800 801
	for(f1 = f0; f1 != nil; f1 = f1->p1) {
		if(f1->active == gen)
802
			break;
803 804
		f1->active = gen;
		p = f1->prog;
805 806
		if(v->end < p->pc)
			v->end = p->pc;
807
		if(f1 == v->def) {
808
			v->start = p->pc;
809 810 811 812
			break;
		}
	}
	
813 814 815
	for(f = f0; f != f1; f = f->p1)
		for(f2 = f->p2; f2 != nil; f2 = f2->p2link)
			mergewalk(v, f2, gen);
816
}
817

818
static void
819
varkillwalk(TempVar *v, Flow *f0, uint32 gen)
820 821
{
	Prog *p;
822
	Flow *f1, *f;
823
	
824 825
	for(f1 = f0; f1 != nil; f1 = f1->s1) {
		if(f1->active == gen)
826
			break;
827 828
		f1->active = gen;
		p = f1->prog;
829 830 831 832
		if(v->end < p->pc)
			v->end = p->pc;
		if(v->start > p->pc)
			v->start = p->pc;
833
		if(p->as == ARET || (p->as == AVARKILL && p->to.node == v->node))
834 835 836
			break;
	}
	
837 838
	for(f = f0; f != f1; f = f->s1)
		varkillwalk(v, f->s2, gen);
839 840
}

841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856
// Eliminate redundant nil pointer checks.
//
// The code generation pass emits a CHECKNIL for every possibly nil pointer.
// This pass removes a CHECKNIL if every predecessor path has already
// checked this value for nil.
//
// Simple backwards flood from check to definition.
// Run prog loop backward from end of program to beginning to avoid quadratic
// behavior removing a run of checks.
//
// Assume that stack variables with address not taken can be loaded multiple times
// from memory without being rechecked. Other variables need to be checked on
// each load.
	
typedef struct NilVar NilVar;

857 858
static void nilwalkback(Flow *rcheck);
static void nilwalkfwd(Flow *rcheck);
859

860
static int killed; // f->data is either nil or &killed
861 862 863 864

void
nilopt(Prog *firstp)
{
865
	Flow *f;
866 867 868 869
	Prog *p;
	Graph *g;
	int ncheck, nkill;

870
	g = flowstart(firstp, 0);
871 872 873 874
	if(g == nil)
		return;

	if(debug_checknil > 1 /* || strcmp(curfn->nname->sym->name, "f1") == 0 */)
Russ Cox's avatar
Russ Cox committed
875
		dumpit("nilopt", g->start, 0);
876 877 878

	ncheck = 0;
	nkill = 0;
879 880
	for(f = g->start; f != nil; f = f->link) {
		p = f->prog;
Russ Cox's avatar
Russ Cox committed
881
		if(p->as != ACHECKNIL || !thearch.regtyp(&p->from))
882 883
			continue;
		ncheck++;
Russ Cox's avatar
Russ Cox committed
884
		if(thearch.stackaddr(&p->from)) {
885 886
			if(debug_checknil && p->lineno > 1)
				warnl(p->lineno, "removed nil check of SP address");
887
			f->data = &killed;
888 889
			continue;
		}
890 891
		nilwalkfwd(f);
		if(f->data != nil) {
892 893 894 895
			if(debug_checknil && p->lineno > 1)
				warnl(p->lineno, "removed nil check before indirect");
			continue;
		}
896 897
		nilwalkback(f);
		if(f->data != nil) {
898 899 900 901 902 903
			if(debug_checknil && p->lineno > 1)
				warnl(p->lineno, "removed repeated nil check");
			continue;
		}
	}
	
904 905
	for(f = g->start; f != nil; f = f->link) {
		if(f->data != nil) {
906
			nkill++;
Russ Cox's avatar
Russ Cox committed
907
			thearch.excise(f);
908 909 910 911 912 913 914 915 916 917
		}
	}

	flowend(g);
	
	if(debug_checknil > 1)
		print("%S: removed %d of %d nil checks\n", curfn->nname->sym, nkill, ncheck);
}

static void
918
nilwalkback(Flow *fcheck)
919 920 921
{
	Prog *p;
	ProgInfo info;
922
	Flow *f;
923
	
924 925
	for(f = fcheck; f != nil; f = uniqp(f)) {
		p = f->prog;
Russ Cox's avatar
Russ Cox committed
926 927
		thearch.proginfo(&info, p);
		if((info.flags & RightWrite) && thearch.sameaddr(&p->to, &fcheck->prog->from)) {
928 929 930 931
			// Found initialization of value we're checking for nil.
			// without first finding the check, so this one is unchecked.
			return;
		}
Russ Cox's avatar
Russ Cox committed
932
		if(f != fcheck && p->as == ACHECKNIL && thearch.sameaddr(&p->from, &fcheck->prog->from)) {
933
			fcheck->data = &killed;
934 935 936 937 938
			return;
		}
	}

	// Here is a more complex version that scans backward across branches.
939 940
	// It assumes fcheck->kill = 1 has been set on entry, and its job is to find a reason
	// to keep the check (setting fcheck->kill = 0).
941 942 943 944
	// It doesn't handle copying of aggregates as well as I would like,
	// nor variables with their address taken,
	// and it's too subtle to turn on this late in Go 1.2. Perhaps for Go 1.3.
	/*
945 946
	for(f1 = f0; f1 != nil; f1 = f1->p1) {
		if(f1->active == gen)
947
			break;
948 949
		f1->active = gen;
		p = f1->prog;
950 951 952
		
		// If same check, stop this loop but still check
		// alternate predecessors up to this point.
Russ Cox's avatar
Russ Cox committed
953
		if(f1 != fcheck && p->as == ACHECKNIL && thearch.sameaddr(&p->from, &fcheck->prog->from))
954 955
			break;

Russ Cox's avatar
Russ Cox committed
956 957
		thearch.proginfo(&info, p);
		if((info.flags & RightWrite) && thearch.sameaddr(&p->to, &fcheck->prog->from)) {
958 959
			// Found initialization of value we're checking for nil.
			// without first finding the check, so this one is unchecked.
960
			fcheck->kill = 0;
961 962 963
			return;
		}
		
964 965 966
		if(f1->p1 == nil && f1->p2 == nil) {
			print("lost pred for %P\n", fcheck->prog);
			for(f1=f0; f1!=nil; f1=f1->p1) {
Russ Cox's avatar
Russ Cox committed
967 968
				thearch.proginfo(&info, f1->prog);
				print("\t%P %d %d %D %D\n", r1->prog, info.flags&RightWrite, thearch.sameaddr(&f1->prog->to, &fcheck->prog->from), &f1->prog->to, &fcheck->prog->from);
969 970 971 972 973
			}
			fatal("lost pred trail");
		}
	}

974 975 976
	for(f = f0; f != f1; f = f->p1)
		for(f2 = f->p2; f2 != nil; f2 = f2->p2link)
			nilwalkback(fcheck, f2, gen);
977 978 979 980
	*/
}

static void
981
nilwalkfwd(Flow *fcheck)
982
{
983
	Flow *f, *last;
984 985 986 987 988 989 990 991 992 993
	Prog *p;
	ProgInfo info;
	
	// If the path down from rcheck dereferences the address
	// (possibly with a small offset) before writing to memory
	// and before any subsequent checks, it's okay to wait for
	// that implicit check. Only consider this basic block to
	// avoid problems like:
	//	_ = *x // should panic
	//	for {} // no writes but infinite loop may be considered visible
994
	last = nil;
995 996
	for(f = uniqs(fcheck); f != nil; f = uniqs(f)) {
		p = f->prog;
Russ Cox's avatar
Russ Cox committed
997
		thearch.proginfo(&info, p);
998
		
Russ Cox's avatar
Russ Cox committed
999
		if((info.flags & LeftRead) && thearch.smallindir(&p->from, &fcheck->prog->from)) {
1000
			fcheck->data = &killed;
1001 1002
			return;
		}
Russ Cox's avatar
Russ Cox committed
1003
		if((info.flags & (RightRead|RightWrite)) && thearch.smallindir(&p->to, &fcheck->prog->from)) {
1004
			fcheck->data = &killed;
1005 1006 1007 1008
			return;
		}
		
		// Stop if another nil check happens.
1009
		if(p->as == ACHECKNIL)
1010 1011
			return;
		// Stop if value is lost.
Russ Cox's avatar
Russ Cox committed
1012
		if((info.flags & RightWrite) && thearch.sameaddr(&p->to, &fcheck->prog->from))
1013 1014
			return;
		// Stop if memory write.
Russ Cox's avatar
Russ Cox committed
1015
		if((info.flags & RightWrite) && !thearch.regtyp(&p->to))
1016
			return;
1017
		// Stop if we jump backward.
1018
		if(last != nil && f->id <= last->id)
1019
			return;
1020
		last = f;
1021 1022
	}
}