ggen.go 19.5 KB
Newer Older
1 2 3 4
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

5
package x86
6 7

import (
8
	"cmd/compile/internal/gc"
9
	"cmd/internal/obj"
10
	"cmd/internal/obj/x86"
11 12 13 14 15 16
)

func defframe(ptxt *obj.Prog) {
	// fill in argument size, stack size
	ptxt.To.Type = obj.TYPE_TEXTSIZE

17
	ptxt.To.Val = int32(gc.Rnd(gc.Curfn.Type.Argwid, int64(gc.Widthptr)))
18
	frame := uint32(gc.Rnd(gc.Stksize+gc.Maxarg, int64(gc.Widthreg)))
19 20 21 22 23
	ptxt.To.Offset = int64(frame)

	// insert code to zero ambiguously live variables
	// so that the garbage collector only sees initialized values
	// when it looks for pointers.
24
	p := ptxt
25

26 27 28
	hi := int64(0)
	lo := hi
	ax := uint32(0)
29
	for _, n := range gc.Curfn.Func.Dcl {
30
		if !n.Name.Needzero {
31 32 33
			continue
		}
		if n.Class != gc.PAUTO {
34
			gc.Fatalf("needzero class %d", n.Class)
35 36
		}
		if n.Type.Width%int64(gc.Widthptr) != 0 || n.Xoffset%int64(gc.Widthptr) != 0 || n.Type.Width == 0 {
37
			gc.Fatalf("var %v has size %d offset %d", gc.Nconv(n, obj.FmtLong), int(n.Type.Width), int(n.Xoffset))
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
		}
		if lo != hi && n.Xoffset+n.Type.Width == lo-int64(2*gc.Widthptr) {
			// merge with range we already have
			lo = n.Xoffset

			continue
		}

		// zero old range
		p = zerorange(p, int64(frame), lo, hi, &ax)

		// set new range
		hi = n.Xoffset + n.Type.Width

		lo = n.Xoffset
	}

	// zero final range
	zerorange(p, int64(frame), lo, hi, &ax)
}

func zerorange(p *obj.Prog, frame int64, lo int64, hi int64, ax *uint32) *obj.Prog {
60
	cnt := hi - lo
61 62 63 64
	if cnt == 0 {
		return p
	}
	if *ax == 0 {
65
		p = appendpp(p, x86.AMOVL, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
66 67 68 69
		*ax = 1
	}

	if cnt <= int64(4*gc.Widthreg) {
70
		for i := int64(0); i < cnt; i += int64(gc.Widthreg) {
71
			p = appendpp(p, x86.AMOVL, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo+i)
72 73
		}
	} else if !gc.Nacl && cnt <= int64(128*gc.Widthreg) {
74
		p = appendpp(p, x86.ALEAL, obj.TYPE_MEM, x86.REG_SP, frame+lo, obj.TYPE_REG, x86.REG_DI, 0)
75 76 77
		p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, 1*(128-cnt/int64(gc.Widthreg)))
		p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
	} else {
78 79 80 81
		p = appendpp(p, x86.AMOVL, obj.TYPE_CONST, 0, cnt/int64(gc.Widthreg), obj.TYPE_REG, x86.REG_CX, 0)
		p = appendpp(p, x86.ALEAL, obj.TYPE_MEM, x86.REG_SP, frame+lo, obj.TYPE_REG, x86.REG_DI, 0)
		p = appendpp(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
		p = appendpp(p, x86.ASTOSL, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
82 83 84 85 86
	}

	return p
}

87
func appendpp(p *obj.Prog, as int, ftype obj.AddrType, freg int, foffset int64, ttype obj.AddrType, treg int, toffset int64) *obj.Prog {
88
	q := gc.Ctxt.NewProg()
89 90 91
	gc.Clearp(q)
	q.As = int16(as)
	q.Lineno = p.Lineno
92
	q.From.Type = ftype
93 94
	q.From.Reg = int16(freg)
	q.From.Offset = foffset
95
	q.To.Type = ttype
96 97 98 99 100 101 102 103 104 105 106 107 108
	q.To.Reg = int16(treg)
	q.To.Offset = toffset
	q.Link = p.Link
	p.Link = q
	return q
}

func clearfat(nl *gc.Node) {
	/* clear a fat object */
	if gc.Debug['g'] != 0 {
		gc.Dump("\nclearfat", nl)
	}

109
	w := uint32(nl.Type.Width)
110 111

	// Avoid taking the address for simple enough types.
112
	if gc.Componentgen(nil, nl) {
113 114 115
		return
	}

116 117
	c := w % 4 // bytes
	q := w / 4 // quads
118 119 120 121 122 123 124 125

	if q < 4 {
		// Write sequence of MOV 0, off(base) instead of using STOSL.
		// The hope is that although the code will be slightly longer,
		// the MOVs will have no dependencies and pipeline better
		// than the unrolled STOSL loop.
		// NOTE: Must use agen, not igen, so that optimizer sees address
		// being taken. We are not writing on field boundaries.
126
		var n1 gc.Node
127
		gc.Regalloc(&n1, gc.Types[gc.Tptr], nil)
128

129
		gc.Agen(nl, &n1)
130
		n1.Op = gc.OINDREG
131
		var z gc.Node
132
		gc.Nodconst(&z, gc.Types[gc.TUINT64], 0)
133
		for ; q > 0; q-- {
134
			n1.Type = z.Type
135
			gins(x86.AMOVL, &z, &n1)
136 137 138 139
			n1.Xoffset += 4
		}

		gc.Nodconst(&z, gc.Types[gc.TUINT8], 0)
140
		for ; c > 0; c-- {
141
			n1.Type = z.Type
142
			gins(x86.AMOVB, &z, &n1)
143 144 145
			n1.Xoffset++
		}

146
		gc.Regfree(&n1)
147 148 149
		return
	}

150
	var n1 gc.Node
151
	gc.Nodreg(&n1, gc.Types[gc.Tptr], x86.REG_DI)
152
	gc.Agen(nl, &n1)
153
	gconreg(x86.AMOVL, 0, x86.REG_AX)
154 155

	if q > 128 || (q >= 4 && gc.Nacl) {
156 157 158
		gconreg(x86.AMOVL, int64(q), x86.REG_CX)
		gins(x86.AREP, nil, nil)   // repeat
		gins(x86.ASTOSL, nil, nil) // STOL AL,*(DI)+
159
	} else if q >= 4 {
160
		p := gins(obj.ADUFFZERO, nil, nil)
161 162 163 164 165 166 167
		p.To.Type = obj.TYPE_ADDR
		p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))

		// 1 and 128 = magic constants: see ../../runtime/asm_386.s
		p.To.Offset = 1 * (128 - int64(q))
	} else {
		for q > 0 {
168
			gins(x86.ASTOSL, nil, nil) // STOL AL,*(DI)+
169 170 171 172 173
			q--
		}
	}

	for c > 0 {
174
		gins(x86.ASTOSB, nil, nil) // STOB AL,*(DI)+
175 176 177 178
		c--
	}
}

179 180
var panicdiv *gc.Node

181 182 183 184 185 186 187 188 189 190
/*
 * generate division.
 * caller must set:
 *	ax = allocated AX register
 *	dx = allocated DX register
 * generates one of:
 *	res = nl / nr
 *	res = nl % nr
 * according to op.
 */
191
func dodiv(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node, ax *gc.Node, dx *gc.Node) {
192 193 194 195 196 197 198 199
	// Have to be careful about handling
	// most negative int divided by -1 correctly.
	// The hardware will trap.
	// Also the byte divide instruction needs AH,
	// which we otherwise don't have to deal with.
	// Easiest way to avoid for int8, int16: use int32.
	// For int32 and int64, use explicit test.
	// Could use int64 hw for int32.
200
	t := nl.Type
201

202
	t0 := t
203
	check := false
204
	if gc.Issigned[t.Etype] {
205
		check = true
206
		if gc.Isconst(nl, gc.CTINT) && nl.Int() != -1<<uint64(t.Width*8-1) {
207
			check = false
208
		} else if gc.Isconst(nr, gc.CTINT) && nr.Int() != -1 {
209
			check = false
210 211 212 213
		}
	}

	if t.Width < 4 {
214
		if gc.Issigned[t.Etype] {
215 216 217 218
			t = gc.Types[gc.TINT32]
		} else {
			t = gc.Types[gc.TUINT32]
		}
219
		check = false
220 221
	}

222
	var t1 gc.Node
223
	gc.Tempname(&t1, t)
224
	var t2 gc.Node
225 226
	gc.Tempname(&t2, t)
	if t0 != t {
227
		var t3 gc.Node
228
		gc.Tempname(&t3, t0)
229
		var t4 gc.Node
230
		gc.Tempname(&t4, t0)
231 232
		gc.Cgen(nl, &t3)
		gc.Cgen(nr, &t4)
233 234 235 236 237 238

		// Convert.
		gmove(&t3, &t1)

		gmove(&t4, &t2)
	} else {
239 240
		gc.Cgen(nl, &t1)
		gc.Cgen(nr, &t2)
241 242
	}

243
	var n1 gc.Node
244
	if !gc.Samereg(ax, res) && !gc.Samereg(dx, res) {
245
		gc.Regalloc(&n1, t, res)
246
	} else {
247
		gc.Regalloc(&n1, t, nil)
248 249 250
	}
	gmove(&t2, &n1)
	gmove(&t1, ax)
Russ Cox's avatar
Russ Cox committed
251
	var p2 *obj.Prog
252
	var n4 gc.Node
253 254 255 256 257 258 259
	if gc.Nacl {
		// Native Client does not relay the divide-by-zero trap
		// to the executing program, so we must insert a check
		// for ourselves.
		gc.Nodconst(&n4, t, 0)

		gins(optoas(gc.OCMP, t), &n1, &n4)
260
		p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1)
261 262 263
		if panicdiv == nil {
			panicdiv = gc.Sysfunc("panicdivide")
		}
264
		gc.Ginscall(panicdiv, -1)
265 266 267
		gc.Patch(p1, gc.Pc)
	}

268
	if check {
269 270
		gc.Nodconst(&n4, t, -1)
		gins(optoas(gc.OCMP, t), &n1, &n4)
271
		p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1)
272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
		if op == gc.ODIV {
			// a / (-1) is -a.
			gins(optoas(gc.OMINUS, t), nil, ax)

			gmove(ax, res)
		} else {
			// a % (-1) is 0.
			gc.Nodconst(&n4, t, 0)

			gmove(&n4, res)
		}

		p2 = gc.Gbranch(obj.AJMP, nil, 0)
		gc.Patch(p1, gc.Pc)
	}

288
	if !gc.Issigned[t.Etype] {
289
		var nz gc.Node
290 291 292 293 294 295
		gc.Nodconst(&nz, t, 0)
		gmove(&nz, dx)
	} else {
		gins(optoas(gc.OEXTEND, t), nil, nil)
	}
	gins(optoas(op, t), &n1, nil)
296
	gc.Regfree(&n1)
297 298 299 300 301 302

	if op == gc.ODIV {
		gmove(ax, res)
	} else {
		gmove(dx, res)
	}
303
	if check {
304 305 306 307 308
		gc.Patch(p2, gc.Pc)
	}
}

func savex(dr int, x *gc.Node, oldx *gc.Node, res *gc.Node, t *gc.Type) {
309
	r := gc.GetReg(dr)
310 311 312 313 314 315
	gc.Nodreg(x, gc.Types[gc.TINT32], dr)

	// save current ax and dx if they are live
	// and not the destination
	*oldx = gc.Node{}

316
	if r > 0 && !gc.Samereg(x, res) {
317 318 319 320
		gc.Tempname(oldx, gc.Types[gc.TINT32])
		gmove(x, oldx)
	}

321
	gc.Regalloc(x, t, x)
322 323 324
}

func restx(x *gc.Node, oldx *gc.Node) {
325
	gc.Regfree(x)
326 327 328 329 330 331 332 333 334 335 336 337

	if oldx.Op != 0 {
		x.Type = gc.Types[gc.TINT32]
		gmove(oldx, x)
	}
}

/*
 * generate division according to op, one of:
 *	res = nl / nr
 *	res = nl % nr
 */
338
func cgen_div(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node) {
339
	if gc.Is64(nl.Type) {
340
		gc.Fatalf("cgen_div %v", nl.Type)
341 342
	}

343
	var t *gc.Type
344
	if gc.Issigned[nl.Type.Etype] {
345 346 347 348
		t = gc.Types[gc.TINT32]
	} else {
		t = gc.Types[gc.TUINT32]
	}
349 350
	var ax gc.Node
	var oldax gc.Node
351
	savex(x86.REG_AX, &ax, &oldax, res, t)
352 353
	var olddx gc.Node
	var dx gc.Node
354
	savex(x86.REG_DX, &dx, &olddx, res, t)
355 356 357 358 359 360 361 362 363 364
	dodiv(op, nl, nr, res, &ax, &dx)
	restx(&dx, &olddx)
	restx(&ax, &oldax)
}

/*
 * generate shift according to op, one of:
 *	res = nl << nr
 *	res = nl >> nr
 */
365
func cgen_shift(op gc.Op, bounded bool, nl *gc.Node, nr *gc.Node, res *gc.Node) {
366
	if nl.Type.Width > 4 {
367
		gc.Fatalf("cgen_shift %v", nl.Type)
368 369
	}

370
	w := int(nl.Type.Width * 8)
371

372
	a := optoas(op, nl.Type)
373 374

	if nr.Op == gc.OLITERAL {
375
		var n2 gc.Node
376
		gc.Tempname(&n2, nl.Type)
377
		gc.Cgen(nl, &n2)
378
		var n1 gc.Node
379
		gc.Regalloc(&n1, nl.Type, res)
380
		gmove(&n2, &n1)
381
		sc := uint64(nr.Int())
382 383 384 385 386 387 388 389 390
		if sc >= uint64(nl.Type.Width*8) {
			// large shift gets 2 shifts by width-1
			gins(a, ncon(uint32(w)-1), &n1)

			gins(a, ncon(uint32(w)-1), &n1)
		} else {
			gins(a, nr, &n1)
		}
		gmove(&n1, res)
391
		gc.Regfree(&n1)
392 393 394
		return
	}

Russ Cox's avatar
Russ Cox committed
395
	var oldcx gc.Node
396
	var cx gc.Node
397
	gc.Nodreg(&cx, gc.Types[gc.TUINT32], x86.REG_CX)
398
	if gc.GetReg(x86.REG_CX) > 1 && !gc.Samereg(&cx, res) {
399 400 401 402
		gc.Tempname(&oldcx, gc.Types[gc.TUINT32])
		gmove(&cx, &oldcx)
	}

403 404
	var n1 gc.Node
	var nt gc.Node
405 406 407 408
	if nr.Type.Width > 4 {
		gc.Tempname(&nt, nr.Type)
		n1 = nt
	} else {
409
		gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX)
410
		gc.Regalloc(&n1, nr.Type, &n1) // to hold the shift type in CX
411 412
	}

413
	var n2 gc.Node
414
	if gc.Samereg(&cx, res) {
415
		gc.Regalloc(&n2, nl.Type, nil)
416
	} else {
417
		gc.Regalloc(&n2, nl.Type, res)
418 419
	}
	if nl.Ullman >= nr.Ullman {
420 421
		gc.Cgen(nl, &n2)
		gc.Cgen(nr, &n1)
422
	} else {
423 424
		gc.Cgen(nr, &n1)
		gc.Cgen(nl, &n2)
425 426 427
	}

	// test and fix up large shifts
428
	if bounded {
429 430
		if nr.Type.Width > 4 {
			// delayed reg alloc
431
			gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX)
432

433
			gc.Regalloc(&n1, gc.Types[gc.TUINT32], &n1) // to hold the shift type in CX
434 435
			var lo gc.Node
			var hi gc.Node
436 437 438 439 440
			split64(&nt, &lo, &hi)
			gmove(&lo, &n1)
			splitclean()
		}
	} else {
441
		var p1 *obj.Prog
442 443
		if nr.Type.Width > 4 {
			// delayed reg alloc
444
			gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX)
445

446
			gc.Regalloc(&n1, gc.Types[gc.TUINT32], &n1) // to hold the shift type in CX
447 448
			var lo gc.Node
			var hi gc.Node
449 450 451
			split64(&nt, &lo, &hi)
			gmove(&lo, &n1)
			gins(optoas(gc.OCMP, gc.Types[gc.TUINT32]), &hi, ncon(0))
452
			p2 := gc.Gbranch(optoas(gc.ONE, gc.Types[gc.TUINT32]), nil, +1)
453 454 455 456 457 458 459 460 461
			gins(optoas(gc.OCMP, gc.Types[gc.TUINT32]), &n1, ncon(uint32(w)))
			p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
			splitclean()
			gc.Patch(p2, gc.Pc)
		} else {
			gins(optoas(gc.OCMP, nr.Type), &n1, ncon(uint32(w)))
			p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
		}

462
		if op == gc.ORSH && gc.Issigned[nl.Type.Etype] {
463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478
			gins(a, ncon(uint32(w)-1), &n2)
		} else {
			gmove(ncon(0), &n2)
		}

		gc.Patch(p1, gc.Pc)
	}

	gins(a, &n1, &n2)

	if oldcx.Op != 0 {
		gmove(&oldcx, &cx)
	}

	gmove(&n2, res)

479 480
	gc.Regfree(&n1)
	gc.Regfree(&n2)
481 482 483 484 485 486 487 488
}

/*
 * generate byte multiply:
 *	res = nl * nr
 * there is no 2-operand byte multiply instruction so
 * we do a full-width multiplication and truncate afterwards.
 */
489
func cgen_bmul(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node) bool {
490 491 492 493
	if optoas(op, nl.Type) != x86.AIMULB {
		return false
	}

494
	// copy from byte to full registers
495
	t := gc.Types[gc.TUINT32]
496

497
	if gc.Issigned[nl.Type.Etype] {
498 499 500 501 502
		t = gc.Types[gc.TINT32]
	}

	// largest ullman on left.
	if nl.Ullman < nr.Ullman {
503
		nl, nr = nr, nl
504 505
	}

506
	var nt gc.Node
507
	gc.Tempname(&nt, nl.Type)
508
	gc.Cgen(nl, &nt)
509
	var n1 gc.Node
510 511
	gc.Regalloc(&n1, t, res)
	gc.Cgen(nr, &n1)
512
	var n2 gc.Node
513
	gc.Regalloc(&n2, t, nil)
514
	gmove(&nt, &n2)
515
	a := optoas(op, t)
516
	gins(a, &n2, &n1)
517
	gc.Regfree(&n2)
518
	gmove(&n1, res)
519 520 521
	gc.Regfree(&n1)

	return true
522 523 524 525 526 527 528 529 530 531
}

/*
 * generate high multiply:
 *   res = (nl*nr) >> width
 */
func cgen_hmul(nl *gc.Node, nr *gc.Node, res *gc.Node) {
	var n1 gc.Node
	var n2 gc.Node

532 533
	t := nl.Type
	a := optoas(gc.OHMUL, t)
534 535 536

	// gen nl in n1.
	gc.Tempname(&n1, t)
537
	gc.Cgen(nl, &n1)
538 539

	// gen nr in n2.
540 541
	gc.Regalloc(&n2, t, res)
	gc.Cgen(nr, &n2)
542

543 544 545
	var ax, oldax, dx, olddx gc.Node
	savex(x86.REG_AX, &ax, &oldax, res, gc.Types[gc.TUINT32])
	savex(x86.REG_DX, &dx, &olddx, res, gc.Types[gc.TUINT32])
546 547 548

	gmove(&n2, &ax)
	gins(a, &n1, nil)
549
	gc.Regfree(&n2)
550 551 552

	if t.Width == 1 {
		// byte multiply behaves differently.
553 554 555 556
		var byteAH, byteDX gc.Node
		gc.Nodreg(&byteAH, t, x86.REG_AH)
		gc.Nodreg(&byteDX, t, x86.REG_DX)
		gmove(&byteAH, &byteDX)
557 558 559
	}

	gmove(&dx, res)
560 561 562

	restx(&ax, &oldax)
	restx(&dx, &olddx)
563 564 565 566 567 568
}

/*
 * generate floating-point operation.
 */
func cgen_float(n *gc.Node, res *gc.Node) {
569
	nl := n.Left
570 571 572 573 574 575
	switch n.Op {
	case gc.OEQ,
		gc.ONE,
		gc.OLT,
		gc.OLE,
		gc.OGE:
576 577
		p1 := gc.Gbranch(obj.AJMP, nil, 0)
		p2 := gc.Pc
578
		gmove(gc.Nodbool(true), res)
579
		p3 := gc.Gbranch(obj.AJMP, nil, 0)
580
		gc.Patch(p1, gc.Pc)
581
		gc.Bgen(n, true, 0, p2)
582
		gmove(gc.Nodbool(false), res)
583 584 585 586
		gc.Patch(p3, gc.Pc)
		return

	case gc.OPLUS:
587
		gc.Cgen(nl, res)
588 589 590
		return

	case gc.OCONV:
591
		if gc.Eqtype(n.Type, nl.Type) || gc.Noconv(n.Type, nl.Type) {
592
			gc.Cgen(nl, res)
593 594 595
			return
		}

596
		var n2 gc.Node
597
		gc.Tempname(&n2, n.Type)
598
		var n1 gc.Node
599
		gc.Mgen(nl, &n1, res)
600 601
		gmove(&n1, &n2)
		gmove(&n2, res)
602
		gc.Mfree(&n1)
603 604 605
		return
	}

606
	if gc.Thearch.Use387 {
607
		cgen_float387(n, res)
608 609
	} else {
		cgen_floatsse(n, res)
610 611 612 613 614 615 616 617
	}
}

// floating-point.  387 (not SSE2)
func cgen_float387(n *gc.Node, res *gc.Node) {
	var f0 gc.Node
	var f1 gc.Node

618 619
	nl := n.Left
	nr := n.Right
620 621
	gc.Nodreg(&f0, nl.Type, x86.REG_F0)
	gc.Nodreg(&f1, n.Type, x86.REG_F0+1)
622
	if nr != nil {
Russ Cox's avatar
Russ Cox committed
623 624
		// binary
		if nl.Ullman >= nr.Ullman {
625
			gc.Cgen(nl, &f0)
626
			if nr.Addable {
627
				gins(foptoas(n.Op, n.Type, 0), nr, &f0)
Russ Cox's avatar
Russ Cox committed
628
			} else {
629
				gc.Cgen(nr, &f0)
630
				gins(foptoas(n.Op, n.Type, Fpop), &f0, &f1)
Russ Cox's avatar
Russ Cox committed
631 632
			}
		} else {
633
			gc.Cgen(nr, &f0)
634
			if nl.Addable {
635
				gins(foptoas(n.Op, n.Type, Frev), nl, &f0)
Russ Cox's avatar
Russ Cox committed
636
			} else {
637
				gc.Cgen(nl, &f0)
638
				gins(foptoas(n.Op, n.Type, Frev|Fpop), &f0, &f1)
Russ Cox's avatar
Russ Cox committed
639 640 641 642 643
			}
		}

		gmove(&f0, res)
		return
644 645 646
	}

	// unary
647
	gc.Cgen(nl, &f0)
648 649

	if n.Op != gc.OCONV && n.Op != gc.OPLUS {
650
		gins(foptoas(n.Op, n.Type, 0), nil, nil)
651 652 653 654 655 656 657 658
	}
	gmove(&f0, res)
	return
}

func cgen_floatsse(n *gc.Node, res *gc.Node) {
	var a int

659 660
	nl := n.Left
	nr := n.Right
661 662 663
	switch n.Op {
	default:
		gc.Dump("cgen_floatsse", n)
664
		gc.Fatalf("cgen_floatsse %v", gc.Oconv(int(n.Op), 0))
665 666 667 668 669 670 671 672 673 674 675 676
		return

	case gc.OMINUS,
		gc.OCOM:
		nr = gc.Nodintconst(-1)
		gc.Convlit(&nr, n.Type)
		a = foptoas(gc.OMUL, nl.Type, 0)
		goto sbop

		// symmetric binary
	case gc.OADD,
		gc.OMUL:
677
		a = foptoas(n.Op, nl.Type, 0)
678 679 680 681 682 683 684

		goto sbop

		// asymmetric binary
	case gc.OSUB,
		gc.OMOD,
		gc.ODIV:
685
		a = foptoas(n.Op, nl.Type, 0)
686 687 688 689 690 691

		goto abop
	}

sbop: // symmetric binary
	if nl.Ullman < nr.Ullman || nl.Op == gc.OLITERAL {
692
		nl, nr = nr, nl
693 694 695 696
	}

abop: // asymmetric binary
	if nl.Ullman >= nr.Ullman {
697
		var nt gc.Node
698
		gc.Tempname(&nt, nl.Type)
699
		gc.Cgen(nl, &nt)
700
		var n2 gc.Node
701
		gc.Mgen(nr, &n2, nil)
702
		var n1 gc.Node
703
		gc.Regalloc(&n1, nl.Type, res)
704 705 706
		gmove(&nt, &n1)
		gins(a, &n2, &n1)
		gmove(&n1, res)
707 708
		gc.Regfree(&n1)
		gc.Mfree(&n2)
709
	} else {
710
		var n2 gc.Node
711 712
		gc.Regalloc(&n2, nr.Type, res)
		gc.Cgen(nr, &n2)
713
		var n1 gc.Node
714 715
		gc.Regalloc(&n1, nl.Type, nil)
		gc.Cgen(nl, &n1)
716
		gins(a, &n2, &n1)
717
		gc.Regfree(&n2)
718
		gmove(&n1, res)
719
		gc.Regfree(&n1)
720 721 722 723 724
	}

	return
}

725
func bgen_float(n *gc.Node, wantTrue bool, likely int, to *obj.Prog) {
726 727
	nl := n.Left
	nr := n.Right
728
	op := n.Op
729
	if !wantTrue {
730
		// brcom is not valid on floats when NaN is involved.
731 732
		p1 := gc.Gbranch(obj.AJMP, nil, 0)
		p2 := gc.Gbranch(obj.AJMP, nil, 0)
733 734 735
		gc.Patch(p1, gc.Pc)

		// No need to avoid re-genning ninit.
736
		bgen_float(n, true, -likely, p2)
737 738 739 740 741 742

		gc.Patch(gc.Gbranch(obj.AJMP, nil, 0), to)
		gc.Patch(p2, gc.Pc)
		return
	}

743
	if gc.Thearch.Use387 {
744 745
		op = gc.Brrev(op) // because the args are stacked
		if op == gc.OGE || op == gc.OGT {
746 747
			// only < and <= work right with NaN; reverse if needed
			nl, nr = nr, nl
748
			op = gc.Brrev(op)
Russ Cox's avatar
Russ Cox committed
749 750
		}

751 752 753 754 755 756 757 758 759 760 761 762 763
		var ax, n2, tmp gc.Node
		gc.Nodreg(&tmp, nr.Type, x86.REG_F0)
		gc.Nodreg(&n2, nr.Type, x86.REG_F0+1)
		gc.Nodreg(&ax, gc.Types[gc.TUINT16], x86.REG_AX)
		if gc.Simsimtype(nr.Type) == gc.TFLOAT64 {
			if nl.Ullman > nr.Ullman {
				gc.Cgen(nl, &tmp)
				gc.Cgen(nr, &tmp)
				gins(x86.AFXCHD, &tmp, &n2)
			} else {
				gc.Cgen(nr, &tmp)
				gc.Cgen(nl, &tmp)
			}
764
			gins(x86.AFUCOMPP, &tmp, &n2)
765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781
		} else {
			// TODO(rsc): The moves back and forth to memory
			// here are for truncating the value to 32 bits.
			// This handles 32-bit comparison but presumably
			// all the other ops have the same problem.
			// We need to figure out what the right general
			// solution is, besides telling people to use float64.
			var t1 gc.Node
			gc.Tempname(&t1, gc.Types[gc.TFLOAT32])

			var t2 gc.Node
			gc.Tempname(&t2, gc.Types[gc.TFLOAT32])
			gc.Cgen(nr, &t1)
			gc.Cgen(nl, &t2)
			gmove(&t2, &tmp)
			gins(x86.AFCOMFP, &t1, &tmp)
		}
782 783
		gins(x86.AFSTSW, nil, &ax)
		gins(x86.ASAHF, nil, nil)
784 785 786 787 788
	} else {
		// Not 387
		if !nl.Addable {
			nl = gc.CgenTemp(nl)
		}
789
		if !nr.Addable {
790
			nr = gc.CgenTemp(nr)
Russ Cox's avatar
Russ Cox committed
791 792 793
		}

		var n2 gc.Node
794
		gc.Regalloc(&n2, nr.Type, nil)
Russ Cox's avatar
Russ Cox committed
795 796 797 798 799
		gmove(nr, &n2)
		nr = &n2

		if nl.Op != gc.OREGISTER {
			var n3 gc.Node
800
			gc.Regalloc(&n3, nl.Type, nil)
Russ Cox's avatar
Russ Cox committed
801 802 803 804
			gmove(nl, &n3)
			nl = &n3
		}

805 806
		if op == gc.OGE || op == gc.OGT {
			// only < and <= work right with NopN; reverse if needed
807
			nl, nr = nr, nl
808
			op = gc.Brrev(op)
Russ Cox's avatar
Russ Cox committed
809 810 811 812
		}

		gins(foptoas(gc.OCMP, nr.Type, 0), nl, nr)
		if nl.Op == gc.OREGISTER {
813
			gc.Regfree(nl)
Russ Cox's avatar
Russ Cox committed
814
		}
815
		gc.Regfree(nr)
816 817
	}

818
	switch op {
819
	case gc.OEQ:
820
		// neither NE nor P
821 822
		p1 := gc.Gbranch(x86.AJNE, nil, -likely)
		p2 := gc.Gbranch(x86.AJPS, nil, -likely)
823 824 825
		gc.Patch(gc.Gbranch(obj.AJMP, nil, 0), to)
		gc.Patch(p1, gc.Pc)
		gc.Patch(p2, gc.Pc)
826
	case gc.ONE:
827
		// either NE or P
828 829
		gc.Patch(gc.Gbranch(x86.AJNE, nil, likely), to)
		gc.Patch(gc.Gbranch(x86.AJPS, nil, likely), to)
830
	default:
831
		gc.Patch(gc.Gbranch(optoas(op, nr.Type), nil, likely), to)
832 833 834 835 836 837 838 839 840
	}
}

// Called after regopt and peep have run.
// Expand CHECKNIL pseudo-op into actual nil pointer check.
func expandchecks(firstp *obj.Prog) {
	var p1 *obj.Prog
	var p2 *obj.Prog

841
	for p := firstp; p != nil; p = p.Link {
842 843 844 845
		if p.As != obj.ACHECKNIL {
			continue
		}
		if gc.Debug_checknil != 0 && p.Lineno > 1 { // p->lineno==1 in generated wrappers
846
			gc.Warnl(p.Lineno, "generated nil check")
847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864
		}

		// check is
		//	CMP arg, $0
		//	JNE 2(PC) (likely)
		//	MOV AX, 0
		p1 = gc.Ctxt.NewProg()

		p2 = gc.Ctxt.NewProg()
		gc.Clearp(p1)
		gc.Clearp(p2)
		p1.Link = p2
		p2.Link = p.Link
		p.Link = p1
		p1.Lineno = p.Lineno
		p2.Lineno = p.Lineno
		p1.Pc = 9999
		p2.Pc = 9999
865
		p.As = x86.ACMPL
866 867
		p.To.Type = obj.TYPE_CONST
		p.To.Offset = 0
868
		p1.As = x86.AJNE
869 870 871
		p1.From.Type = obj.TYPE_CONST
		p1.From.Offset = 1 // likely
		p1.To.Type = obj.TYPE_BRANCH
872
		p1.To.Val = p2.Link
873 874 875 876

		// crash by write to memory address 0.
		// if possible, since we know arg is 0, use 0(arg),
		// which will be shorter to encode than plain 0.
877
		p2.As = x86.AMOVL
878 879

		p2.From.Type = obj.TYPE_REG
880
		p2.From.Reg = x86.REG_AX
881
		if regtyp(&p.From) {
882 883 884 885 886 887 888 889
			p2.To.Type = obj.TYPE_MEM
			p2.To.Reg = p.From.Reg
		} else {
			p2.To.Type = obj.TYPE_MEM
		}
		p2.To.Offset = 0
	}
}
890 891 892 893 894 895 896 897 898 899 900 901 902 903

// addr += index*width if possible.
func addindex(index *gc.Node, width int64, addr *gc.Node) bool {
	switch width {
	case 1, 2, 4, 8:
		p1 := gins(x86.ALEAL, index, addr)
		p1.From.Type = obj.TYPE_MEM
		p1.From.Scale = int16(width)
		p1.From.Index = p1.From.Reg
		p1.From.Reg = p1.To.Reg
		return true
	}
	return false
}
904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920

// res = runtime.getg()
func getg(res *gc.Node) {
	var n1 gc.Node
	gc.Regalloc(&n1, res.Type, res)
	mov := optoas(gc.OAS, gc.Types[gc.Tptr])
	p := gins(mov, nil, &n1)
	p.From.Type = obj.TYPE_REG
	p.From.Reg = x86.REG_TLS
	p = gins(mov, nil, &n1)
	p.From = p.To
	p.From.Type = obj.TYPE_MEM
	p.From.Index = x86.REG_TLS
	p.From.Scale = 1
	gmove(&n1, res)
	gc.Regfree(&n1)
}