[dev.ssa] cmd/compile/internal/ssa: redo how sign extension is handled

For integer types less than a machine register, we have to decide what the invariants are for the high bits of the register. We used to set the high bits to the correct extension (sign or zero, as determined by the type) of the low bits. This CL makes the compiler ignore the high bits of the register altogether (they are junk). On this plus side, this means ops that generate subword results don't have to worry about correctly extending them. On the minus side, ops that consume subword arguments have to deal with the input registers not being correctly extended. For x86, this tradeoff is probably worth it. Almost all opcodes have versions that use only the correct subword piece of their inputs. (The one big exception is array indexing.) Not many opcodes can correctly sign extend on output. For other architectures, the tradeoff is probably not so clear, as they don't have many subword-safe opcodes (e.g. 16-bit compare, ignoring the high 16/48 bits). Fortunately we can decide whether we do this per-architecture. For the machine-independent opcodes, we pretend that the "register" size is equal to the type width, so sign extension is immaterial. Opcodes that care about the signedness of the input (e.g. compare, right shift) have two different variants. Change-Id: I465484c5734545ee697afe83bc8bf4b53bd9df8d Reviewed-on: https://go-review.googlesource.com/12600Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>

[dev.ssa] cmd/compile/internal/ssa: redo how sign extension is handled
For integer types less than a machine register, we have to decide what the invariants are for the high bits of the register. We used to set the high bits to the correct extension (sign or zero, as determined by the type) of the low bits. This CL makes the compiler ignore the high bits of the register altogether (they are junk). On this plus side, this means ops that generate subword results don't have to worry about correctly extending them. On the minus side, ops that consume subword arguments have to deal with the input registers not being correctly extended. For x86, this tradeoff is probably worth it. Almost all opcodes have versions that use only the correct subword piece of their inputs. (The one big exception is array indexing.) Not many opcodes can correctly sign extend on output. For other architectures, the tradeoff is probably not so clear, as they don't have many subword-safe opcodes (e.g. 16-bit compare, ignoring the high 16/48 bits). Fortunately we can decide whether we do this per-architecture. For the machine-independent opcodes, we pretend that the "register" size is equal to the type width, so sign extension is immaterial. Opcodes that care about the signedness of the input (e.g. compare, right shift) have two different variants. Change-Id: I465484c5734545ee697afe83bc8bf4b53bd9df8d Reviewed-on: https://go-review.googlesource.com/12600Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
2a5e6c47 · Keith Randall · 9ca24fcd · 2a5e6c47 · 2a5e6c47 · 2a5e6c47
Commit 2a5e6c47 authored Jul 23, 2015 by Keith Randall
9 changed files
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -662,51 +662,51 @@ type opAndType struct {
 var opToSSA = map[opAndType]ssa.Op{
 	opAndType{OADD, TINT8}:   ssa.OpAdd8,
-	opAndType{OADD, TUINT8}:  ssa.OpAdd8U,
+	opAndType{OADD, TUINT8}:  ssa.OpAdd8,
 	opAndType{OADD, TINT16}:  ssa.OpAdd16,
-	opAndType{OADD, TUINT16}: ssa.OpAdd16U,
+	opAndType{OADD, TUINT16}: ssa.OpAdd16,
 	opAndType{OADD, TINT32}:  ssa.OpAdd32,
-	opAndType{OADD, TUINT32}: ssa.OpAdd32U,
+	opAndType{OADD, TUINT32}: ssa.OpAdd32,
 	opAndType{OADD, TINT64}:  ssa.OpAdd64,
-	opAndType{OADD, TUINT64}: ssa.OpAdd64U,
+	opAndType{OADD, TUINT64}: ssa.OpAdd64,
 	opAndType{OSUB, TINT8}:   ssa.OpSub8,
-	opAndType{OSUB, TUINT8}:  ssa.OpSub8U,
+	opAndType{OSUB, TUINT8}:  ssa.OpSub8,
 	opAndType{OSUB, TINT16}:  ssa.OpSub16,
-	opAndType{OSUB, TUINT16}: ssa.OpSub16U,
+	opAndType{OSUB, TUINT16}: ssa.OpSub16,
 	opAndType{OSUB, TINT32}:  ssa.OpSub32,
-	opAndType{OSUB, TUINT32}: ssa.OpSub32U,
+	opAndType{OSUB, TUINT32}: ssa.OpSub32,
 	opAndType{OSUB, TINT64}:  ssa.OpSub64,
-	opAndType{OSUB, TUINT64}: ssa.OpSub64U,
+	opAndType{OSUB, TUINT64}: ssa.OpSub64,
 	opAndType{ONOT, TBOOL}: ssa.OpNot,
 	opAndType{OMINUS, TINT8}:   ssa.OpNeg8,
-	opAndType{OMINUS, TUINT8}:  ssa.OpNeg8U,
+	opAndType{OMINUS, TUINT8}:  ssa.OpNeg8,
 	opAndType{OMINUS, TINT16}:  ssa.OpNeg16,
-	opAndType{OMINUS, TUINT16}: ssa.OpNeg16U,
+	opAndType{OMINUS, TUINT16}: ssa.OpNeg16,
 	opAndType{OMINUS, TINT32}:  ssa.OpNeg32,
-	opAndType{OMINUS, TUINT32}: ssa.OpNeg32U,
+	opAndType{OMINUS, TUINT32}: ssa.OpNeg32,
 	opAndType{OMINUS, TINT64}:  ssa.OpNeg64,
-	opAndType{OMINUS, TUINT64}: ssa.OpNeg64U,
+	opAndType{OMINUS, TUINT64}: ssa.OpNeg64,
 	opAndType{OMUL, TINT8}:   ssa.OpMul8,
-	opAndType{OMUL, TUINT8}:  ssa.OpMul8U,
+	opAndType{OMUL, TUINT8}:  ssa.OpMul8,
 	opAndType{OMUL, TINT16}:  ssa.OpMul16,
-	opAndType{OMUL, TUINT16}: ssa.OpMul16U,
+	opAndType{OMUL, TUINT16}: ssa.OpMul16,
 	opAndType{OMUL, TINT32}:  ssa.OpMul32,
-	opAndType{OMUL, TUINT32}: ssa.OpMul32U,
+	opAndType{OMUL, TUINT32}: ssa.OpMul32,
 	opAndType{OMUL, TINT64}:  ssa.OpMul64,
-	opAndType{OMUL, TUINT64}: ssa.OpMul64U,
+	opAndType{OMUL, TUINT64}: ssa.OpMul64,
 	opAndType{OAND, TINT8}:   ssa.OpAnd8,
-	opAndType{OAND, TUINT8}:  ssa.OpAnd8U,
+	opAndType{OAND, TUINT8}:  ssa.OpAnd8,
 	opAndType{OAND, TINT16}:  ssa.OpAnd16,
-	opAndType{OAND, TUINT16}: ssa.OpAnd16U,
+	opAndType{OAND, TUINT16}: ssa.OpAnd16,
 	opAndType{OAND, TINT32}:  ssa.OpAnd32,
-	opAndType{OAND, TUINT32}: ssa.OpAnd32U,
+	opAndType{OAND, TUINT32}: ssa.OpAnd32,
 	opAndType{OAND, TINT64}:  ssa.OpAnd64,
-	opAndType{OAND, TUINT64}: ssa.OpAnd64U,
+	opAndType{OAND, TUINT64}: ssa.OpAnd64,
 	opAndType{OLSH, TINT8}:   ssa.OpLsh8,
 	opAndType{OLSH, TUINT8}:  ssa.OpLsh8,
@@ -797,20 +797,31 @@ var opToSSA = map[opAndType]ssa.Op{
 	opAndType{OGE, TUINT64}: ssa.OpGeq64U,
 }
-func (s *state) ssaOp(op uint8, t *Type) ssa.Op {
+func (s *state) concreteEtype(t *Type) uint8 {
-	etype := t.Etype
+	e := t.Etype
-	switch etype {
+	switch e {
+	default:
+		return e
 	case TINT:
-		etype = TINT32
+		if s.config.IntSize == 8 {
-		if s.config.PtrSize == 8 {
+			return TINT64
-			etype = TINT64
 		}
+		return TINT32
 	case TUINT:
-		etype = TUINT32
+		if s.config.IntSize == 8 {
+			return TUINT64
+		}
+		return TUINT32
+	case TUINTPTR:
 		if s.config.PtrSize == 8 {
-			etype = TUINT64
+			return TUINT64
 		}
+		return TUINT32
 	}
+}
+func (s *state) ssaOp(op uint8, t *Type) ssa.Op {
+	etype := s.concreteEtype(t)
 	x, ok := opToSSA[opAndType{op, etype}]
 	if !ok {
 		s.Unimplementedf("unhandled binary op %s etype=%s", opnames[op], Econv(int(etype), 0))
@@ -854,7 +865,71 @@ func (s *state) expr(n *Node) *ssa.Value {
 		return s.newValue1(ssa.OpConvNop, n.Type, x)
 	case OCONV:
 		x := s.expr(n.Left)
-		return s.newValue1(ssa.OpConvert, n.Type, x)
+		ft := n.Left.Type // from type
+		tt := n.Type      // to type
+		if ft.IsInteger() && tt.IsInteger() {
+			var op ssa.Op
+			if tt.Size() == ft.Size() {
+				op = ssa.OpConvNop
+			} else if tt.Size() < ft.Size() {
+				// truncation
+				switch 10*ft.Size() + tt.Size() {
+				case 21:
+					op = ssa.OpTrunc16to8
+				case 41:
+					op = ssa.OpTrunc32to8
+				case 42:
+					op = ssa.OpTrunc32to16
+				case 81:
+					op = ssa.OpTrunc64to8
+				case 82:
+					op = ssa.OpTrunc64to16
+				case 84:
+					op = ssa.OpTrunc64to32
+				default:
+					s.Fatalf("weird integer truncation %s -> %s", ft, tt)
+				}
+			} else if ft.IsSigned() {
+				// sign extension
+				switch 10*ft.Size() + tt.Size() {
+				case 12:
+					op = ssa.OpSignExt8to16
+				case 14:
+					op = ssa.OpSignExt8to32
+				case 18:
+					op = ssa.OpSignExt8to64
+				case 24:
+					op = ssa.OpSignExt16to32
+				case 28:
+					op = ssa.OpSignExt16to64
+				case 48:
+					op = ssa.OpSignExt32to64
+				default:
+					s.Fatalf("bad integer sign extension %s -> %s", ft, tt)
+				}
+			} else {
+				// zero extension
+				switch 10*ft.Size() + tt.Size() {
+				case 12:
+					op = ssa.OpZeroExt8to16
+				case 14:
+					op = ssa.OpZeroExt8to32
+				case 18:
+					op = ssa.OpZeroExt8to64
+				case 24:
+					op = ssa.OpZeroExt16to32
+				case 28:
+					op = ssa.OpZeroExt16to64
+				case 48:
+					op = ssa.OpZeroExt32to64
+				default:
+					s.Fatalf("weird integer sign extension %s -> %s", ft, tt)
+				}
+			}
+			return s.newValue1(op, n.Type, x)
+		}
+		s.Unimplementedf("unhandled OCONV %s -> %s", n.Left.Type, n.Type)
+		return nil
 	// binary ops
 	case OLT, OEQ, ONE, OLE, OGE, OGT:
@@ -933,6 +1008,7 @@ func (s *state) expr(n *Node) *ssa.Value {
 		if n.Left.Type.Bound >= 0 { // array or string
 			a := s.expr(n.Left)
 			i := s.expr(n.Right)
+			i = s.extendIndex(i)
 			var elemtype *Type
 			var len *ssa.Value
 			if n.Left.Type.IsString() {
@@ -1099,6 +1175,7 @@ func (s *state) addr(n *Node) *ssa.Value {
 		if n.Left.Type.IsSlice() {
 			a := s.expr(n.Left)
 			i := s.expr(n.Right)
+			i = s.extendIndex(i)
 			len := s.newValue1(ssa.OpSliceLen, s.config.Uintptr, a)
 			s.boundsCheck(i, len)
 			p := s.newValue1(ssa.OpSlicePtr, Ptrto(n.Left.Type.Type), a)
@@ -1106,6 +1183,7 @@ func (s *state) addr(n *Node) *ssa.Value {
 		} else { // array
 			a := s.addr(n.Left)
 			i := s.expr(n.Right)
+			i = s.extendIndex(i)
 			len := s.constInt(s.config.Uintptr, n.Left.Type.Bound)
 			s.boundsCheck(i, len)
 			return s.newValue2(ssa.OpPtrIndex, Ptrto(n.Left.Type.Type), a, i)
@@ -1623,7 +1701,7 @@ func genValue(v *ssa.Value) {
 		p.From.Offset = v.AuxInt
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = x
-	case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload:
+	case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVBQZXload:
 		p := Prog(v.Op.Asm())
 		p.From.Type = obj.TYPE_MEM
 		p.From.Reg = regnum(v.Args[0])
@@ -1646,7 +1724,7 @@ func genValue(v *ssa.Value) {
 		p.To.Type = obj.TYPE_MEM
 		p.To.Reg = regnum(v.Args[0])
 		addAux(&p.To, v)
-	case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX:
+	case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX:
 		p := Prog(v.Op.Asm())
 		p.From.Type = obj.TYPE_REG
 		p.From.Reg = regnum(v.Args[0])
@@ -1868,6 +1946,55 @@ func addAux(a *obj.Addr, v *ssa.Value) {
 	}
 }
+// extendIndex extends v to a full pointer width.
+func (s *state) extendIndex(v *ssa.Value) *ssa.Value {
+	size := v.Type.Size()
+	if size == s.config.PtrSize {
+		return v
+	}
+	if size > s.config.PtrSize {
+		// TODO: truncate 64-bit indexes on 32-bit pointer archs.  We'd need to test
+		// the high word and branch to out-of-bounds failure if it is not 0.
+		s.Unimplementedf("64->32 index truncation not implemented")
+		return v
+	}
+	// Extend value to the required size
+	var op ssa.Op
+	if v.Type.IsSigned() {
+		switch 10*size + s.config.PtrSize {
+		case 14:
+			op = ssa.OpSignExt8to32
+		case 18:
+			op = ssa.OpSignExt8to64
+		case 24:
+			op = ssa.OpSignExt16to32
+		case 28:
+			op = ssa.OpSignExt16to64
+		case 48:
+			op = ssa.OpSignExt32to64
+		default:
+			s.Fatalf("bad signed index extension %s", v.Type)
+		}
+	} else {
+		switch 10*size + s.config.PtrSize {
+		case 14:
+			op = ssa.OpZeroExt8to32
+		case 18:
+			op = ssa.OpZeroExt8to64
+		case 24:
+			op = ssa.OpZeroExt16to32
+		case 28:
+			op = ssa.OpZeroExt16to64
+		case 48:
+			op = ssa.OpZeroExt32to64
+		default:
+			s.Fatalf("bad unsigned index extension %s", v.Type)
+		}
+	}
+	return s.newValue1(op, s.config.Uintptr, v)
+}
 // ssaRegToReg maps ssa register numbers to obj register numbers.
 var ssaRegToReg = [...]int16{
 	x86.REG_AX,

--- a/src/cmd/compile/internal/ssa/config.go
+++ b/src/cmd/compile/internal/ssa/config.go
@@ -6,6 +6,7 @@ package ssa
 type Config struct {
 	arch       string // "amd64", etc.
+	IntSize    int64  // 4 or 8
 	PtrSize    int64  // 4 or 8
 	Uintptr    Type   // pointer arithmetic type
 	Int        Type
@@ -36,10 +37,12 @@ func NewConfig(arch string, fe Frontend) *Config {
 	c := &Config{arch: arch, fe: fe}
 	switch arch {
 	case "amd64":
+		c.IntSize = 8
 		c.PtrSize = 8
 		c.lowerBlock = rewriteBlockAMD64
 		c.lowerValue = rewriteValueAMD64
 	case "386":
+		c.IntSize = 4
 		c.PtrSize = 4
 		c.lowerBlock = rewriteBlockAMD64
 		c.lowerValue = rewriteValueAMD64 // TODO(khr): full 32-bit support
@@ -52,6 +55,8 @@ func NewConfig(arch string, fe Frontend) *Config {
 	c.Int = TypeInt32
 	if c.PtrSize == 8 {
 		c.Uintptr = TypeUInt64
+	}
+	if c.IntSize == 8 {
 		c.Int = TypeInt64
 	}

--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -3,10 +3,7 @@
 // license that can be found in the LICENSE file.
 // x86 register conventions:
-//  - Integer types live in the low portion of registers.
+//  - Integer types live in the low portion of registers.  Upper portions are junk.
-//    Upper portions are correctly extended.
-//    TODO: reconsider?  The current choice means we need no extension for indexing,
-//    but we do need extension for e.g. 32-bit signed adds.
 //  - Boolean types use the low-order byte of a register.  Upper bytes are junk.
 //  - We do not use AH,BH,CH,DH registers.
 //  - Floating-point types will live in the low natural slot of an sse2 register.
@@ -14,78 +11,75 @@
 // Lowering arithmetic
 (Add64 x y) -> (ADDQ x y)
-(Add64U x y) -> (ADDQ x y)
 (AddPtr x y) -> (ADDQ x y)
-(Add32U x y) -> (ADDL x y)
+(Add32 x y) -> (ADDL x y)
-(Add32 x y) -> (MOVLQSX (ADDL <v.Type> x y))
+(Add16 x y) -> (ADDW x y)
-(Add16U x y) -> (ADDW x y)
+(Add8 x y) -> (ADDB x y)
-(Add16 x y) -> (MOVWQSX (ADDW <v.Type> x y))
-(Add8U x y) -> (ADDB x y)
-(Add8 x y) -> (MOVBQSX (ADDB <v.Type> x y))
 (And64 x y) -> (ANDQ x y)
-(And64U x y) -> (ANDQ x y)
+(And32 x y) -> (ANDL x y)
-(And32U x y) -> (ANDL x y)
+(And16 x y) -> (ANDW x y)
-(And32 x y) -> (MOVLQSX (ANDL <v.Type> x y))
+(And8 x y) -> (ANDB x y)
-(And16U x y) -> (ANDW x y)
-(And16 x y) -> (MOVWQSX (ANDW <v.Type> x y))
-(And8U x y) -> (ANDB x y)
-(And8 x y) -> (MOVBQSX (ANDB <v.Type> x y))
 (Sub64 x y) -> (SUBQ x y)
-(Sub64U x y) -> (SUBQ x y)
+(Sub32 x y) -> (SUBL x y)
-(Sub32U x y) -> (SUBL x y)
+(Sub16 x y) -> (SUBW x y)
-(Sub32 x y) -> (MOVLQSX (SUBL <v.Type> x y))
+(Sub8 x y) -> (SUBB x y)
-(Sub16U x y) -> (SUBW x y)
-(Sub16 x y) -> (MOVWQSX (SUBW <v.Type> x y))
-(Sub8U x y) -> (SUBB x y)
-(Sub8 x y) -> (MOVBQSX (SUBB <v.Type> x y))
 (Neg64 x) -> (NEGQ x)
-(Neg64U x) -> (NEGQ x)
+(Neg32 x) -> (NEGL x)
-(Neg32U x) -> (NEGL x)
+(Neg16 x) -> (NEGW x)
-(Neg32 x) -> (MOVLQSX (NEGL <v.Type> x))
+(Neg8 x) -> (NEGB x)
-(Neg16U x) -> (NEGW x)
-(Neg16 x) -> (MOVWQSX (NEGW <v.Type> x))
-(Neg8U x) -> (NEGB x)
-(Neg8 x) -> (MOVBQSX (NEGB <v.Type> x))
 (Mul64 x y) -> (MULQ x y)
-(Mul64U x y) -> (MULQ x y)
 (MulPtr x y) -> (MULQ x y)
-(Mul32 x y) -> (MOVLQSX (MULL <v.Type> x y))
+(Mul32 x y) -> (MULL x y)
-(Mul32U x y) -> (MULL x y)
+(Mul16 x y) -> (MULW x y)
-(Mul16 x y) -> (MOVWQSX (MULW <v.Type> x y))
-(Mul16U x y) -> (MULW x y)
 // Note: we use 16-bit multiply instructions for 8-bit multiplies because
 // the 16-bit multiply instructions are more forgiving (they operate on
 // any register instead of just AX/DX).
-(Mul8 x y) -> (MOVBQSX (MULW <TypeInt16> x y))
+(Mul8 x y) -> (MULW x y)
-(Mul8U x y) -> (MOVBQZX (MULW <TypeUInt16> x y))
+// Note: we always extend to 64 bits even though some ops don't need that many result bits.
+(SignExt8to16 x) -> (MOVBQSX x)
+(SignExt8to32 x) -> (MOVBQSX x)
+(SignExt8to64 x) -> (MOVBQSX x)
+(SignExt16to32 x) -> (MOVWQSX x)
+(SignExt16to64 x) -> (MOVWQSX x)
+(SignExt32to64 x) -> (MOVLQSX x)
+(ZeroExt8to16 x) -> (MOVBQZX x)
+(ZeroExt8to32 x) -> (MOVBQZX x)
+(ZeroExt8to64 x) -> (MOVBQZX x)
+(ZeroExt16to32 x) -> (MOVWQZX x)
+(ZeroExt16to64 x) -> (MOVWQZX x)
+(ZeroExt32to64 x) -> (MOVLQZX x)
+// Because we ignore high parts of registers, truncates are just copies.
+(Trunc16to8 x) -> (Copy x)
+(Trunc32to8 x) -> (Copy x)
+(Trunc32to16 x) -> (Copy x)
+(Trunc64to8 x) -> (Copy x)
+(Trunc64to16 x) -> (Copy x)
+(Trunc64to32 x) -> (Copy x)
-(MOVLstore ptr (MOVLQSX x) mem) -> (MOVLstore ptr x mem)
-(MOVWstore ptr (MOVWQSX x) mem) -> (MOVWstore ptr x mem)
-(MOVBstore ptr (MOVBQSX x) mem) -> (MOVBstore ptr x mem)
-(MOVLstore ptr (MOVLQZX x) mem) -> (MOVLstore ptr x mem)
-(MOVWstore ptr (MOVWQZX x) mem) -> (MOVWstore ptr x mem)
-(MOVBstore ptr (MOVBQZX x) mem) -> (MOVBstore ptr x mem)
-(Convert <t> x) && t.IsInteger() && x.Type.IsInteger() -> (Copy x)
 (ConvNop <t> x) && t == x.Type -> (Copy x)
+(ConvNop <t> x) && t.IsInteger() && x.Type.IsInteger() && t.Size() == x.Type.Size() -> (Copy x)
+// TODO: other ConvNops are safe?  Maybe all of them?
 // Lowering shifts
 // Note: unsigned shifts need to return 0 if shift amount is >= 64.
 //   mask = shift >= 64 ? 0 : 0xffffffffffffffff
 //   result = mask & arg << shift
-(Lsh64 <t> x y) ->
+(Lsh64 <t> x y) && y.Type.Size() == 8 ->
 	(ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMPQconst <TypeFlags> [64] y)))
-(Rsh64U <t> x y) ->
+(Rsh64U <t> x y) && y.Type.Size() == 8 ->
 	(ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMPQconst <TypeFlags> [64] y)))
 // Note: signed right shift needs to return 0/-1 if shift amount is >= 64.
 //   if shift > 63 { shift = 63 }
 //   result = arg >> shift
-(Rsh64 <t> x y) ->
+(Rsh64 <t> x y) && y.Type.Size() == 8 ->
 	(SARQ <t> x (CMOVQCC <t>
 			(CMPQconst <TypeFlags> [64] y)
 			(Const <t> [63])
@@ -187,6 +181,19 @@
 (SETL (InvertFlags x)) -> (SETG x)
 (SETG (InvertFlags x)) -> (SETL x)
+// sign extended loads
+(MOVBQSX (MOVBload ptr mem)) -> (MOVBQSXload ptr mem)
+(MOVBQZX (MOVBload ptr mem)) -> (MOVBQZXload ptr mem)
+// TODO: more
+// Don't extend before storing
+(MOVLstore ptr (MOVLQSX x) mem) -> (MOVLstore ptr x mem)
+(MOVWstore ptr (MOVWQSX x) mem) -> (MOVWstore ptr x mem)
+(MOVBstore ptr (MOVBQSX x) mem) -> (MOVBstore ptr x mem)
+(MOVLstore ptr (MOVLQZX x) mem) -> (MOVLstore ptr x mem)
+(MOVWstore ptr (MOVWQZX x) mem) -> (MOVWstore ptr x mem)
+(MOVBstore ptr (MOVBQZX x) mem) -> (MOVBstore ptr x mem)
 // fold constants into memory operations
 // Note that this is not always a good idea because if not all the uses of
 // the ADDQconst get eliminated, we still have to compute the ADDQconst and we now

--- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
@@ -146,8 +146,8 @@ func init() {
 		{name: "LEAQ8", reg: gp21sb},   // arg0 + 8*arg1 + auxint
 		{name: "MOVBload", reg: gpload, asm: "MOVB"},        // load byte from arg0+auxint. arg1=mem
-		{name: "MOVBQZXload", reg: gpload},                  // ditto, extend to uint64
+		{name: "MOVBQSXload", reg: gpload, asm: "MOVBQSX"},  // ditto, extend to int64
-		{name: "MOVBQSXload", reg: gpload},                  // ditto, extend to int64
+		{name: "MOVBQZXload", reg: gpload, asm: "MOVBQZX"},  // ditto, extend to uint64
 		{name: "MOVWload", reg: gpload, asm: "MOVW"},        // load 2 bytes from arg0+auxint. arg1=mem
 		{name: "MOVLload", reg: gpload, asm: "MOVL"},        // load 4 bytes from arg0+auxint. arg1=mem
 		{name: "MOVQload", reg: gpload, asm: "MOVQ"},        // load 8 bytes from arg0+auxint. arg1=mem

--- a/src/cmd/compile/internal/ssa/gen/generic.rules
+++ b/src/cmd/compile/internal/ssa/gen/generic.rules
@@ -21,10 +21,8 @@
 // constant folding
 (Add64 (Const [c]) (Const [d])) -> (Const [c+d])
-(Add64U (Const [c]) (Const [d])) -> (Const [c+d])
 (AddPtr (Const [c]) (Const [d])) -> (Const [c+d])
 (Mul64 (Const [c]) (Const [d])) -> (Const [c*d])
-(Mul64U (Const [c]) (Const [d])) -> (Const [c*d])
 (MulPtr (Const [c]) (Const [d])) -> (Const [c*d])
 (IsInBounds (Const [c]) (Const [d])) -> (Const {inBounds(c,d)})

--- a/src/cmd/compile/internal/ssa/gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/gen/genericOps.go
@@ -12,10 +12,6 @@ var genericOps = []opData{
 	{name: "Add16"},
 	{name: "Add32"},
 	{name: "Add64"},
-	{name: "Add8U"},
-	{name: "Add16U"},
-	{name: "Add32U"},
-	{name: "Add64U"},
 	{name: "AddPtr"},
 	// TODO: Add32F, Add64F, Add64C, Add128C
@@ -23,30 +19,18 @@ var genericOps = []opData{
 	{name: "Sub16"},
 	{name: "Sub32"},
 	{name: "Sub64"},
-	{name: "Sub8U"},
-	{name: "Sub16U"},
-	{name: "Sub32U"},
-	{name: "Sub64U"},
 	// TODO: Sub32F, Sub64F, Sub64C, Sub128C
 	{name: "Mul8"}, // arg0 * arg1
 	{name: "Mul16"},
 	{name: "Mul32"},
 	{name: "Mul64"},
-	{name: "Mul8U"},
-	{name: "Mul16U"},
-	{name: "Mul32U"},
-	{name: "Mul64U"},
 	{name: "MulPtr"}, // MulPtr is used for address calculations
 	{name: "And8"}, // arg0 & arg1
 	{name: "And16"},
 	{name: "And32"},
 	{name: "And64"},
-	{name: "And8U"},
-	{name: "And16U"},
-	{name: "And32U"},
-	{name: "And64U"},
 	{name: "Lsh8"}, // arg0 << arg1
 	{name: "Lsh16"},
@@ -120,10 +104,6 @@ var genericOps = []opData{
 	{name: "Neg16"},
 	{name: "Neg32"},
 	{name: "Neg64"},
-	{name: "Neg8U"},
-	{name: "Neg16U"},
-	{name: "Neg32U"},
-	{name: "Neg64U"},
 	// Data movement
 	{name: "Phi"},  // select an argument based on which predecessor block we came from
@@ -132,9 +112,9 @@ var genericOps = []opData{
 	// constants.  Constant values are stored in the aux field.
 	// booleans have a bool aux field, strings have a string aux
 	// field, and so on.  All integer types store their value
-	// in the aux field as an int64 (including int, uint64, etc.).
+	// in the AuxInt field as an int64 (including int, uint64, etc.).
-	// We could store int8 as an int8, but that won't work for int,
+	// For integer types smaller than 64 bits, only the low-order
-	// as it may be different widths on the host and target.
+	// bits of the AuxInt field matter.
 	{name: "Const"},
 	// Constant-like things
@@ -162,9 +142,27 @@ var genericOps = []opData{
 	{name: "ClosureCall"}, // arg0=code pointer, arg1=context ptr, arg2=memory.  Returns memory.
 	{name: "StaticCall"},  // call function aux.(*gc.Sym), arg0=memory.  Returns memory.
-	// Conversions
+	// Conversions: signed extensions, zero (unsigned) extensions, truncations, and no-op (type only)
-	{name: "Convert"}, // convert arg0 to another type
+	{name: "SignExt8to16"},
-	{name: "ConvNop"}, // interpret arg0 as another type
+	{name: "SignExt8to32"},
+	{name: "SignExt8to64"},
+	{name: "SignExt16to32"},
+	{name: "SignExt16to64"},
+	{name: "SignExt32to64"},
+	{name: "ZeroExt8to16"},
+	{name: "ZeroExt8to32"},
+	{name: "ZeroExt8to64"},
+	{name: "ZeroExt16to32"},
+	{name: "ZeroExt16to64"},
+	{name: "ZeroExt32to64"},
+	{name: "Trunc16to8"},
+	{name: "Trunc32to8"},
+	{name: "Trunc32to16"},
+	{name: "Trunc64to8"},
+	{name: "Trunc64to16"},
+	{name: "Trunc64to32"},
+	{name: "ConvNop"},
 	// Automatically inserted safety checks
 	{name: "IsNonNil"},   // arg0 != nil

--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -92,8 +92,8 @@ const (
 	OpAMD64LEAQ4
 	OpAMD64LEAQ8
 	OpAMD64MOVBload
-	OpAMD64MOVBQZXload
 	OpAMD64MOVBQSXload
+	OpAMD64MOVBQZXload
 	OpAMD64MOVWload
 	OpAMD64MOVLload
 	OpAMD64MOVQload
@@ -137,36 +137,20 @@ const (
 	OpAdd16
 	OpAdd32
 	OpAdd64
-	OpAdd8U
-	OpAdd16U
-	OpAdd32U
-	OpAdd64U
 	OpAddPtr
 	OpSub8
 	OpSub16
 	OpSub32
 	OpSub64
-	OpSub8U
-	OpSub16U
-	OpSub32U
-	OpSub64U
 	OpMul8
 	OpMul16
 	OpMul32
 	OpMul64
-	OpMul8U
-	OpMul16U
-	OpMul32U
-	OpMul64U
 	OpMulPtr
 	OpAnd8
 	OpAnd16
 	OpAnd32
 	OpAnd64
-	OpAnd8U
-	OpAnd16U
-	OpAnd32U
-	OpAnd64U
 	OpLsh8
 	OpLsh16
 	OpLsh32
@@ -228,10 +212,6 @@ const (
 	OpNeg16
 	OpNeg32
 	OpNeg64
-	OpNeg8U
-	OpNeg16U
-	OpNeg32U
-	OpNeg64U
 	OpPhi
 	OpCopy
 	OpConst
@@ -246,7 +226,24 @@ const (
 	OpZero
 	OpClosureCall
 	OpStaticCall
-	OpConvert
+	OpSignExt8to16
+	OpSignExt8to32
+	OpSignExt8to64
+	OpSignExt16to32
+	OpSignExt16to64
+	OpSignExt32to64
+	OpZeroExt8to16
+	OpZeroExt8to32
+	OpZeroExt8to64
+	OpZeroExt16to32
+	OpZeroExt16to64
+	OpZeroExt32to64
+	OpTrunc16to8
+	OpTrunc32to8
+	OpTrunc32to16
+	OpTrunc64to8
+	OpTrunc64to16
+	OpTrunc64to32
 	OpConvNop
 	OpIsNonNil
 	OpIsInBounds
@@ -769,7 +766,8 @@ var opcodeTable = [...]opInfo{
 		},
 	},
 	{
-		name: "MOVBQZXload",
+		name: "MOVBQSXload",
+		asm:  x86.AMOVBQSX,
 		reg: regInfo{
 			inputs: []regMask{
 				4295032831, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15 .SB
@@ -781,7 +779,8 @@ var opcodeTable = [...]opInfo{
 		},
 	},
 	{
-		name: "MOVBQSXload",
+		name: "MOVBQZXload",
+		asm:  x86.AMOVBQZX,
 		reg: regInfo{
 			inputs: []regMask{
 				4295032831, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15 .SB
@@ -1237,22 +1236,6 @@ var opcodeTable = [...]opInfo{
 		name:    "Add64",
 		generic: true,
 	},
-	{
-		name:    "Add8U",
-		generic: true,
-	},
-	{
-		name:    "Add16U",
-		generic: true,
-	},
-	{
-		name:    "Add32U",
-		generic: true,
-	},
-	{
-		name:    "Add64U",
-		generic: true,
-	},
 	{
 		name:    "AddPtr",
 		generic: true,
@@ -1273,22 +1256,6 @@ var opcodeTable = [...]opInfo{
 		name:    "Sub64",
 		generic: true,
 	},
-	{
-		name:    "Sub8U",
-		generic: true,
-	},
-	{
-		name:    "Sub16U",
-		generic: true,
-	},
-	{
-		name:    "Sub32U",
-		generic: true,
-	},
-	{
-		name:    "Sub64U",
-		generic: true,
-	},
 	{
 		name:    "Mul8",
 		generic: true,
@@ -1305,22 +1272,6 @@ var opcodeTable = [...]opInfo{
 		name:    "Mul64",
 		generic: true,
 	},
-	{
-		name:    "Mul8U",
-		generic: true,
-	},
-	{
-		name:    "Mul16U",
-		generic: true,
-	},
-	{
-		name:    "Mul32U",
-		generic: true,
-	},
-	{
-		name:    "Mul64U",
-		generic: true,
-	},
 	{
 		name:    "MulPtr",
 		generic: true,
@@ -1341,22 +1292,6 @@ var opcodeTable = [...]opInfo{
 		name:    "And64",
 		generic: true,
 	},
-	{
-		name:    "And8U",
-		generic: true,
-	},
-	{
-		name:    "And16U",
-		generic: true,
-	},
-	{
-		name:    "And32U",
-		generic: true,
-	},
-	{
-		name:    "And64U",
-		generic: true,
-	},
 	{
 		name:    "Lsh8",
 		generic: true,
@@ -1601,22 +1536,6 @@ var opcodeTable = [...]opInfo{
 		name:    "Neg64",
 		generic: true,
 	},
-	{
-		name:    "Neg8U",
-		generic: true,
-	},
-	{
-		name:    "Neg16U",
-		generic: true,
-	},
-	{
-		name:    "Neg32U",
-		generic: true,
-	},
-	{
-		name:    "Neg64U",
-		generic: true,
-	},
 	{
 		name:    "Phi",
 		generic: true,
@@ -1674,7 +1593,75 @@ var opcodeTable = [...]opInfo{
 		generic: true,
 	},
 	{
-		name:    "Convert",
+		name:    "SignExt8to16",
+		generic: true,
+	},
+	{
+		name:    "SignExt8to32",
+		generic: true,
+	},
+	{
+		name:    "SignExt8to64",
+		generic: true,
+	},
+	{
+		name:    "SignExt16to32",
+		generic: true,
+	},
+	{
+		name:    "SignExt16to64",
+		generic: true,
+	},
+	{
+		name:    "SignExt32to64",
+		generic: true,
+	},
+	{
+		name:    "ZeroExt8to16",
+		generic: true,
+	},
+	{
+		name:    "ZeroExt8to32",
+		generic: true,
+	},
+	{
+		name:    "ZeroExt8to64",
+		generic: true,
+	},
+	{
+		name:    "ZeroExt16to32",
+		generic: true,
+	},
+	{
+		name:    "ZeroExt16to64",
+		generic: true,
+	},
+	{
+		name:    "ZeroExt32to64",
+		generic: true,
+	},
+	{
+		name:    "Trunc16to8",
+		generic: true,
+	},
+	{
+		name:    "Trunc32to8",
+		generic: true,
+	},
+	{
+		name:    "Trunc32to16",
+		generic: true,
+	},
+	{
+		name:    "Trunc64to8",
+		generic: true,
+	},
+	{
+		name:    "Trunc64to16",
+		generic: true,
+	},
+	{
+		name:    "Trunc64to32",
 		generic: true,
 	},
 	{

--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
--- a/src/cmd/compile/internal/ssa/rewritegeneric.go
+++ b/src/cmd/compile/internal/ssa/rewritegeneric.go
@@ -27,29 +27,6 @@ func rewriteValuegeneric(v *Value, config *Config) bool {
 		goto endd2f4bfaaf6c937171a287b73e5c2f73e
 	endd2f4bfaaf6c937171a287b73e5c2f73e:
 		;
-	case OpAdd64U:
-		// match: (Add64U (Const [c]) (Const [d]))
-		// cond:
-		// result: (Const [c+d])
-		{
-			if v.Args[0].Op != OpConst {
-				goto endfedc373d8be0243cb5dbbc948996fe3a
-			}
-			c := v.Args[0].AuxInt
-			if v.Args[1].Op != OpConst {
-				goto endfedc373d8be0243cb5dbbc948996fe3a
-			}
-			d := v.Args[1].AuxInt
-			v.Op = OpConst
-			v.AuxInt = 0
-			v.Aux = nil
-			v.resetArgs()
-			v.AuxInt = c + d
-			return true
-		}
-		goto endfedc373d8be0243cb5dbbc948996fe3a
-	endfedc373d8be0243cb5dbbc948996fe3a:
-		;
 	case OpAddPtr:
 		// match: (AddPtr (Const [c]) (Const [d]))
 		// cond:
@@ -261,29 +238,6 @@ func rewriteValuegeneric(v *Value, config *Config) bool {
 		goto endf4ba5346dc8a624781afaa68a8096a9a
 	endf4ba5346dc8a624781afaa68a8096a9a:
 		;
-	case OpMul64U:
-		// match: (Mul64U (Const [c]) (Const [d]))
-		// cond:
-		// result: (Const [c*d])
-		{
-			if v.Args[0].Op != OpConst {
-				goto end88b6638d23b281a90172e80ab26549cb
-			}
-			c := v.Args[0].AuxInt
-			if v.Args[1].Op != OpConst {
-				goto end88b6638d23b281a90172e80ab26549cb
-			}
-			d := v.Args[1].AuxInt
-			v.Op = OpConst
-			v.AuxInt = 0
-			v.Aux = nil
-			v.resetArgs()
-			v.AuxInt = c * d
-			return true
-		}
-		goto end88b6638d23b281a90172e80ab26549cb
-	end88b6638d23b281a90172e80ab26549cb:
-		;
 	case OpMulPtr:
 		// match: (MulPtr (Const [c]) (Const [d]))
 		// cond: