[dev.ssa] cmd/compile: add some ARM optimization rewriting rules

Mostly constant folding rules, analogous to AMD64 ones. Along with some simplifications. Updates #15365. Change-Id: If83bc1188bb05acb982ef3a1c21704c187e3eb24 Reviewed-on: https://go-review.googlesource.com/24210 Run-TryBot: David Chase <drchase@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com>

[dev.ssa] cmd/compile: add some ARM optimization rewriting rules
Mostly constant folding rules, analogous to AMD64 ones. Along with some simplifications. Updates #15365. Change-Id: If83bc1188bb05acb982ef3a1c21704c187e3eb24 Reviewed-on: https://go-review.googlesource.com/24210 Run-TryBot: David Chase <drchase@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com>
8599fdd9 · Cherry Zhang · 42181ad8 · 8599fdd9 · 8599fdd9 · 8599fdd9
Commit 8599fdd9 authored Jun 13, 2016 by Cherry Zhang
6 changed files
--- a/src/cmd/compile/internal/arm/ssa.go
+++ b/src/cmd/compile/internal/arm/ssa.go
@@ -117,7 +117,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		// input args need no code
 	case ssa.OpSP, ssa.OpSB, ssa.OpGetG:
 		// nothing to do
-	case ssa.OpCopy, ssa.OpARMMOVWconvert:
+	case ssa.OpCopy, ssa.OpARMMOVWconvert, ssa.OpARMMOVWreg:
 		if v.Type.IsMemory() {
 			return
 		}
@@ -290,8 +290,11 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = r
 	case ssa.OpARMADDconst,
+		ssa.OpARMADCconst,
 		ssa.OpARMSUBconst,
+		ssa.OpARMSBCconst,
 		ssa.OpARMRSBconst,
+		ssa.OpARMRSCconst,
 		ssa.OpARMANDconst,
 		ssa.OpARMORconst,
 		ssa.OpARMXORconst,
@@ -305,6 +308,16 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		p.Reg = gc.SSARegNum(v.Args[0])
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = gc.SSARegNum(v)
+	case ssa.OpARMADDSconst,
+		ssa.OpARMSUBSconst,
+		ssa.OpARMRSBSconst:
+		p := gc.Prog(v.Op.Asm())
+		p.Scond = arm.C_SBIT
+		p.From.Type = obj.TYPE_CONST
+		p.From.Offset = v.AuxInt
+		p.Reg = gc.SSARegNum(v.Args[0])
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = gc.SSARegNum(v)
 	case ssa.OpARMSRRconst:
 		p := gc.Prog(arm.AMOVW)
 		p.From.Type = obj.TYPE_SHIFT
@@ -710,6 +723,14 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 	case ssa.OpARMLoweredGetClosurePtr:
 		// Closure pointer is R7 (arm.REGCTXT).
 		gc.CheckLoweredGetClosurePtr(v)
+	case ssa.OpARMFlagEQ,
+		ssa.OpARMFlagLT_ULT,
+		ssa.OpARMFlagLT_UGT,
+		ssa.OpARMFlagGT_ULT,
+		ssa.OpARMFlagGT_UGT:
+		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
+	case ssa.OpARMInvertFlags:
+		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
 	default:
 		v.Unimplementedf("genValue not implemented: %s", v.LongString())
 	}

--- a/src/cmd/compile/internal/ssa/gen/ARM.rules
+++ b/src/cmd/compile/internal/ssa/gen/ARM.rules
@@ -399,11 +399,10 @@

 // Optimizations

-(ADD (MOVWconst [c]) x) -> (ADDconst [c] x)
-(ADD x (MOVWconst [c])) -> (ADDconst [c] x)
-
+// fold offset into address
 (ADDconst [off1] (MOVWaddr [off2] {sym} ptr)) -> (MOVWaddr [off1+off2] {sym} ptr)

+// fold address into load/store
 (MOVBload [off1] {sym} (ADDconst [off2] ptr) mem) -> (MOVBload [off1+off2] {sym} ptr mem)
 (MOVBUload [off1] {sym} (ADDconst [off2] ptr) mem) -> (MOVBUload [off1+off2] {sym} ptr mem)
 (MOVHload [off1] {sym} (ADDconst [off2] ptr) mem) -> (MOVHload [off1+off2] {sym} ptr mem)
@@ -444,5 +443,314 @@
 (MOVDstore [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) ->
 	(MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)

+// replace load from same location as preceding store with copy
+(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) && isSigned(x.Type) -> x
+(MOVBUload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) && !isSigned(x.Type) -> x
+(MOVHload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) && isSigned(x.Type) -> x
+(MOVHUload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) && !isSigned(x.Type) -> x
+(MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
+(MOVFload [off] {sym} ptr (MOVFstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
+(MOVDload [off] {sym} ptr (MOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
+
+// fold constant into arithmatic ops
+(ADD (MOVWconst [c]) x) -> (ADDconst [c] x)
+(ADD x (MOVWconst [c])) -> (ADDconst [c] x)
+(SUB (MOVWconst [c]) x) -> (RSBconst [c] x)
+(SUB x (MOVWconst [c])) -> (SUBconst [c] x)
+(RSB (MOVWconst [c]) x) -> (SUBconst [c] x)
+(RSB x (MOVWconst [c])) -> (RSBconst [c] x)
+
+(ADDS (MOVWconst [c]) x) -> (ADDSconst [c] x)
+(ADDS x (MOVWconst [c])) -> (ADDSconst [c] x)
+(SUBS (MOVWconst [c]) x) -> (RSBSconst [c] x)
+(SUBS x (MOVWconst [c])) -> (SUBSconst [c] x)
+
+(ADC (MOVWconst [c]) x flags) -> (ADCconst [c] x flags)
+(ADC x (MOVWconst [c]) flags) -> (ADCconst [c] x flags)
+(SBC (MOVWconst [c]) x flags) -> (RSCconst [c] x flags)
+(SBC x (MOVWconst [c]) flags) -> (SBCconst [c] x flags)
+
+(AND (MOVWconst [c]) x) -> (ANDconst [c] x)
+(AND x (MOVWconst [c])) -> (ANDconst [c] x)
+(OR (MOVWconst [c]) x) -> (ORconst [c] x)
+(OR x (MOVWconst [c])) -> (ORconst [c] x)
+(XOR (MOVWconst [c]) x) -> (XORconst [c] x)
+(XOR x (MOVWconst [c])) -> (XORconst [c] x)
+(BIC x (MOVWconst [c])) -> (BICconst [c] x)
+
+(SLL x (MOVWconst [c])) -> (SLLconst x [c&31]) // Note: I don't think we ever generate bad constant shifts (i.e. c>=32)
+(SRL x (MOVWconst [c])) -> (SRLconst x [c&31])
+(SRA x (MOVWconst [c])) -> (SRAconst x [c&31])
+
+(CMP x (MOVWconst [c])) -> (CMPconst [c] x)
+(CMP (MOVWconst [c]) x) -> (InvertFlags (CMPconst [c] x))
+
+(LoweredZeromask (MOVWconst [0])) -> (MOVWconst [0])
+(LoweredZeromask (MOVWconst [c])) && c != 0 -> (MOVWconst [0xffffffff])
+
+// don't extend after proper load
+// MOVWreg instruction is not emitted if src and dst registers are same, but it ensures the type.
+(MOVBreg x:(MOVBload _ _)) -> (MOVWreg x)
+(MOVBUreg x:(MOVBUload _ _)) -> (MOVWreg x)
+(MOVHreg x:(MOVBload _ _)) -> (MOVWreg x)
+(MOVHreg x:(MOVBUload _ _)) -> (MOVWreg x)
+(MOVHreg x:(MOVHload _ _)) -> (MOVWreg x)
+(MOVHUreg x:(MOVBUload _ _)) -> (MOVWreg x)
+(MOVHUreg x:(MOVHUload _ _)) -> (MOVWreg x)
+
+// fold extensions and ANDs together
+(MOVBUreg (ANDconst [c] x)) -> (ANDconst [c&0xff] x)
+(MOVHUreg (ANDconst [c] x)) -> (ANDconst [c&0xffff] x)
+(MOVBreg (ANDconst [c] x)) && c & 0x80 == 0 -> (ANDconst [c&0x7f] x)
+(MOVHreg (ANDconst [c] x)) && c & 0x8000 == 0 -> (ANDconst [c&0x7fff] x)
+
+// don't extend before store
+(MOVBstore [off] {sym} ptr (MOVBreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVBUreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVHreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVHUreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
+(MOVHstore [off] {sym} ptr (MOVHreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
+(MOVHstore [off] {sym} ptr (MOVHUreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
+
+// mul by constant
+(MUL x (MOVWconst [-1])) -> (RSBconst [0] x)
+(MUL _ (MOVWconst [0])) -> (MOVWconst [0])
+(MUL x (MOVWconst [1])) -> x
+(MUL x (MOVWconst [c])) && isPowerOfTwo(c) -> (SLLconst [log2(c)] x)
+
+(MUL (MOVWconst [-1]) x) -> (RSBconst [0] x)
+(MUL (MOVWconst [0]) _) -> (MOVWconst [0])
+(MUL (MOVWconst [1]) x) -> x
+(MUL (MOVWconst [c]) x) && isPowerOfTwo(c) -> (SLLconst [log2(c)] x)
+
+(MULA x (MOVWconst [-1]) a) -> (SUB a x)
+(MULA _ (MOVWconst [0]) a) -> a
+(MULA x (MOVWconst [1]) a) -> (ADD x a)
+(MULA x (MOVWconst [c]) a) && isPowerOfTwo(c) -> (ADD (SLLconst <x.Type> [log2(c)] x) a)
+
+(MULA (MOVWconst [-1]) x a) -> (SUB a x)
+(MULA (MOVWconst [0]) _ a) -> a
+(MULA (MOVWconst [1]) x a) -> (ADD x a)
+(MULA (MOVWconst [c]) x a) && isPowerOfTwo(c) -> (ADD (SLLconst <x.Type> [log2(c)] x) a)
+
+// div by constant
+(DIVU x (MOVWconst [1])) -> x
+(DIVU x (MOVWconst [c])) && isPowerOfTwo(c) -> (SRLconst [log2(c)] x)
+
+// constant comparisons
+(CMPconst (MOVWconst [x]) [y]) && int32(x)==int32(y) -> (FlagEQ)
+(CMPconst (MOVWconst [x]) [y]) && int32(x)<int32(y) && uint32(x)<uint32(y) -> (FlagLT_ULT)
+(CMPconst (MOVWconst [x]) [y]) && int32(x)<int32(y) && uint32(x)>uint32(y) -> (FlagLT_UGT)
+(CMPconst (MOVWconst [x]) [y]) && int32(x)>int32(y) && uint32(x)<uint32(y) -> (FlagGT_ULT)
+(CMPconst (MOVWconst [x]) [y]) && int32(x)>int32(y) && uint32(x)>uint32(y) -> (FlagGT_UGT)
+
+// other known comparisons
+(CMPconst (MOVBUreg _) [c]) && 0xff < c -> (FlagLT_ULT)
+(CMPconst (MOVHUreg _) [c]) && 0xffff < c -> (FlagLT_ULT)
+(CMPconst (ANDconst _ [m]) [n]) && 0 <= int32(m) && int32(m) < int32(n) -> (FlagLT_ULT)
+(CMPconst (SRLconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 32 && (1<<uint32(32-c)) <= uint32(n) -> (FlagLT_ULT)
+
+// absorb flag constants into branches
+(EQ (FlagEQ) yes no) -> (First nil yes no)
+(EQ (FlagLT_ULT) yes no) -> (First nil no yes)
+(EQ (FlagLT_UGT) yes no) -> (First nil no yes)
+(EQ (FlagGT_ULT) yes no) -> (First nil no yes)
+(EQ (FlagGT_UGT) yes no) -> (First nil no yes)
+
+(NE (FlagEQ) yes no) -> (First nil no yes)
+(NE (FlagLT_ULT) yes no) -> (First nil yes no)
+(NE (FlagLT_UGT) yes no) -> (First nil yes no)
+(NE (FlagGT_ULT) yes no) -> (First nil yes no)
+(NE (FlagGT_UGT) yes no) -> (First nil yes no)
+
+(LT (FlagEQ) yes no) -> (First nil no yes)
+(LT (FlagLT_ULT) yes no) -> (First nil yes no)
+(LT (FlagLT_UGT) yes no) -> (First nil yes no)
+(LT (FlagGT_ULT) yes no) -> (First nil no yes)
+(LT (FlagGT_UGT) yes no) -> (First nil no yes)
+
+(LE (FlagEQ) yes no) -> (First nil yes no)
+(LE (FlagLT_ULT) yes no) -> (First nil yes no)
+(LE (FlagLT_UGT) yes no) -> (First nil yes no)
+(LE (FlagGT_ULT) yes no) -> (First nil no yes)
+(LE (FlagGT_UGT) yes no) -> (First nil no yes)
+
+(GT (FlagEQ) yes no) -> (First nil no yes)
+(GT (FlagLT_ULT) yes no) -> (First nil no yes)
+(GT (FlagLT_UGT) yes no) -> (First nil no yes)
+(GT (FlagGT_ULT) yes no) -> (First nil yes no)
+(GT (FlagGT_UGT) yes no) -> (First nil yes no)
+
+(GE (FlagEQ) yes no) -> (First nil yes no)
+(GE (FlagLT_ULT) yes no) -> (First nil no yes)
+(GE (FlagLT_UGT) yes no) -> (First nil no yes)
+(GE (FlagGT_ULT) yes no) -> (First nil yes no)
+(GE (FlagGT_UGT) yes no) -> (First nil yes no)
+
+(ULT (FlagEQ) yes no) -> (First nil no yes)
+(ULT (FlagLT_ULT) yes no) -> (First nil yes no)
+(ULT (FlagLT_UGT) yes no) -> (First nil no yes)
+(ULT (FlagGT_ULT) yes no) -> (First nil yes no)
+(ULT (FlagGT_UGT) yes no) -> (First nil no yes)
+
+(ULE (FlagEQ) yes no) -> (First nil yes no)
+(ULE (FlagLT_ULT) yes no) -> (First nil yes no)
+(ULE (FlagLT_UGT) yes no) -> (First nil no yes)
+(ULE (FlagGT_ULT) yes no) -> (First nil yes no)
+(ULE (FlagGT_UGT) yes no) -> (First nil no yes)
+
+(UGT (FlagEQ) yes no) -> (First nil no yes)
+(UGT (FlagLT_ULT) yes no) -> (First nil no yes)
+(UGT (FlagLT_UGT) yes no) -> (First nil yes no)
+(UGT (FlagGT_ULT) yes no) -> (First nil no yes)
+(UGT (FlagGT_UGT) yes no) -> (First nil yes no)
+
+(UGE (FlagEQ) yes no) -> (First nil yes no)
+(UGE (FlagLT_ULT) yes no) -> (First nil no yes)
+(UGE (FlagLT_UGT) yes no) -> (First nil yes no)
+(UGE (FlagGT_ULT) yes no) -> (First nil no yes)
+(UGE (FlagGT_UGT) yes no) -> (First nil yes no)
+
+// absorb InvertFlags into branches
+(LT (InvertFlags cmp) yes no) -> (GT cmp yes no)
+(GT (InvertFlags cmp) yes no) -> (LT cmp yes no)
+(LE (InvertFlags cmp) yes no) -> (GE cmp yes no)
+(GE (InvertFlags cmp) yes no) -> (LE cmp yes no)
+(ULT (InvertFlags cmp) yes no) -> (UGT cmp yes no)
+(UGT (InvertFlags cmp) yes no) -> (ULT cmp yes no)
+(ULE (InvertFlags cmp) yes no) -> (UGE cmp yes no)
+(UGE (InvertFlags cmp) yes no) -> (ULE cmp yes no)
+(EQ (InvertFlags cmp) yes no) -> (EQ cmp yes no)
+(NE (InvertFlags cmp) yes no) -> (NE cmp yes no)
+
+// absorb flag constants into boolean values
+(Equal (FlagEQ)) -> (MOVWconst [1])
+(Equal (FlagLT_ULT)) -> (MOVWconst [0])
+(Equal (FlagLT_UGT)) -> (MOVWconst [0])
+(Equal (FlagGT_ULT)) -> (MOVWconst [0])
+(Equal (FlagGT_UGT)) -> (MOVWconst [0])
+
+(NotEqual (FlagEQ)) -> (MOVWconst [0])
+(NotEqual (FlagLT_ULT)) -> (MOVWconst [1])
+(NotEqual (FlagLT_UGT)) -> (MOVWconst [1])
+(NotEqual (FlagGT_ULT)) -> (MOVWconst [1])
+(NotEqual (FlagGT_UGT)) -> (MOVWconst [1])
+
+(LessThan (FlagEQ)) -> (MOVWconst [0])
+(LessThan (FlagLT_ULT)) -> (MOVWconst [1])
+(LessThan (FlagLT_UGT)) -> (MOVWconst [1])
+(LessThan (FlagGT_ULT)) -> (MOVWconst [0])
+(LessThan (FlagGT_UGT)) -> (MOVWconst [0])
+
+(LessThanU (FlagEQ)) -> (MOVWconst [0])
+(LessThanU (FlagLT_ULT)) -> (MOVWconst [1])
+(LessThanU (FlagLT_UGT)) -> (MOVWconst [0])
+(LessThanU (FlagGT_ULT)) -> (MOVWconst [1])
+(LessThanU (FlagGT_UGT)) -> (MOVWconst [0])
+
+(LessEqual (FlagEQ)) -> (MOVWconst [1])
+(LessEqual (FlagLT_ULT)) -> (MOVWconst [1])
+(LessEqual (FlagLT_UGT)) -> (MOVWconst [1])
+(LessEqual (FlagGT_ULT)) -> (MOVWconst [0])
+(LessEqual (FlagGT_UGT)) -> (MOVWconst [0])
+
+(LessEqualU (FlagEQ)) -> (MOVWconst [1])
+(LessEqualU (FlagLT_ULT)) -> (MOVWconst [1])
+(LessEqualU (FlagLT_UGT)) -> (MOVWconst [0])
+(LessEqualU (FlagGT_ULT)) -> (MOVWconst [1])
+(LessEqualU (FlagGT_UGT)) -> (MOVWconst [0])
+
+(GreaterThan (FlagEQ)) -> (MOVWconst [0])
+(GreaterThan (FlagLT_ULT)) -> (MOVWconst [0])
+(GreaterThan (FlagLT_UGT)) -> (MOVWconst [0])
+(GreaterThan (FlagGT_ULT)) -> (MOVWconst [1])
+(GreaterThan (FlagGT_UGT)) -> (MOVWconst [1])
+
+(GreaterThanU (FlagEQ)) -> (MOVWconst [0])
+(GreaterThanU (FlagLT_ULT)) -> (MOVWconst [0])
+(GreaterThanU (FlagLT_UGT)) -> (MOVWconst [1])
+(GreaterThanU (FlagGT_ULT)) -> (MOVWconst [0])
+(GreaterThanU (FlagGT_UGT)) -> (MOVWconst [1])
+
+(GreaterEqual (FlagEQ)) -> (MOVWconst [1])
+(GreaterEqual (FlagLT_ULT)) -> (MOVWconst [0])
+(GreaterEqual (FlagLT_UGT)) -> (MOVWconst [0])
+(GreaterEqual (FlagGT_ULT)) -> (MOVWconst [1])
+(GreaterEqual (FlagGT_UGT)) -> (MOVWconst [1])
+
+(GreaterEqualU (FlagEQ)) -> (MOVWconst [1])
+(GreaterEqualU (FlagLT_ULT)) -> (MOVWconst [0])
+(GreaterEqualU (FlagLT_UGT)) -> (MOVWconst [1])
+(GreaterEqualU (FlagGT_ULT)) -> (MOVWconst [0])
+(GreaterEqualU (FlagGT_UGT)) -> (MOVWconst [1])
+
+// absorb InvertFlags into boolean values
+(Equal (InvertFlags x)) -> (Equal x)
+(NotEqual (InvertFlags x)) -> (NotEqual x)
+(LessThan (InvertFlags x)) -> (GreaterThan x)
+(LessThanU (InvertFlags x)) -> (GreaterThanU x)
+(GreaterThan (InvertFlags x)) -> (LessThan x)
+(GreaterThanU (InvertFlags x)) -> (LessThanU x)
+(LessEqual (InvertFlags x)) -> (GreaterEqual x)
+(LessEqualU (InvertFlags x)) -> (GreaterEqualU x)
+(GreaterEqual (InvertFlags x)) -> (LessEqual x)
+(GreaterEqualU (InvertFlags x)) -> (LessEqualU x)
+
+// remove redundant *const ops
+(ADDconst [0] x) -> x
+(SUBconst [0] x) -> x
+(ANDconst [0] _) -> (MOVWconst [0])
+(ANDconst [c] x) && int32(c)==-1 -> x
+(ORconst [0] x) -> x
+(ORconst [c] _) && int32(c)==-1 -> (MOVWconst [-1])
+(XORconst [0] x) -> x
+(BICconst [0] x) -> x
+(BICconst [c] _) && int32(c)==-1 -> (MOVWconst [0])
+
+// generic constant folding
+(ADDconst [c] (MOVWconst [d])) -> (MOVWconst [int64(int32(c+d))])
+(ADDconst [c] (ADDconst [d] x)) -> (ADDconst [int64(int32(c+d))] x)
+(ADDconst [c] (SUBconst [d] x)) -> (ADDconst [int64(int32(c-d))] x)
+(ADDconst [c] (RSBconst [d] x)) -> (RSBconst [int64(int32(c+d))] x)
+(ADCconst [c] (ADDconst [d] x) flags) -> (ADCconst [int64(int32(c+d))] x flags)
+(ADCconst [c] (SUBconst [d] x) flags) -> (ADCconst [int64(int32(c-d))] x flags)
+(SUBconst [c] (MOVWconst [d])) -> (MOVWconst [int64(int32(d-c))])
+(SUBconst [c] (SUBconst [d] x)) -> (ADDconst [int64(int32(-c-d))] x)
+(SUBconst [c] (ADDconst [d] x)) -> (ADDconst [int64(int32(-c+d))] x)
+(SUBconst [c] (RSBconst [d] x)) -> (RSBconst [int64(int32(-c+d))] x)
+(SBCconst [c] (ADDconst [d] x) flags) -> (SBCconst [int64(int32(c-d))] x flags)
+(SBCconst [c] (SUBconst [d] x) flags) -> (SBCconst [int64(int32(c+d))] x flags)
+(RSBconst [c] (MOVWconst [d])) -> (MOVWconst [int64(int32(c-d))])
+(RSBconst [c] (RSBconst [d] x)) -> (ADDconst [int64(int32(c-d))] x)
+(RSBconst [c] (ADDconst [d] x)) -> (RSBconst [int64(int32(c-d))] x)
+(RSBconst [c] (SUBconst [d] x)) -> (RSBconst [int64(int32(c+d))] x)
+(RSCconst [c] (ADDconst [d] x) flags) -> (RSCconst [int64(int32(c-d))] x flags)
+(RSCconst [c] (SUBconst [d] x) flags) -> (RSCconst [int64(int32(c+d))] x flags)
+(SLLconst [c] (MOVWconst [d])) -> (MOVWconst [int64(uint32(d)<<uint64(c))])
+(SRLconst [c] (MOVWconst [d])) -> (MOVWconst [int64(uint32(d)>>uint64(c))])
+(SRAconst [c] (MOVWconst [d])) -> (MOVWconst [int64(int32(d)>>uint64(c))])
+(MUL (MOVWconst [c]) (MOVWconst [d])) -> (MOVWconst [int64(int32(c*d))])
+(MULA (MOVWconst [c]) (MOVWconst [d]) a) -> (ADDconst [int64(int32(c*d))] a)
+(DIV (MOVWconst [c]) (MOVWconst [d])) -> (MOVWconst [int64(int32(c)/int32(d))])
+(DIVU (MOVWconst [c]) (MOVWconst [d])) -> (MOVWconst [int64(uint32(c)/uint32(d))])
+(ANDconst [c] (MOVWconst [d])) -> (MOVWconst [c&d])
+(ANDconst [c] (ANDconst [d] x)) -> (ANDconst [c&d] x)
+(ORconst [c] (MOVWconst [d])) -> (MOVWconst [c|d])
+(ORconst [c] (ORconst [d] x)) -> (ORconst [c|d] x)
+(XORconst [c] (MOVWconst [d])) -> (MOVWconst [c^d])
+(XORconst [c] (XORconst [d] x)) -> (XORconst [c^d] x)
+(BICconst [c] (MOVWconst [d])) -> (MOVWconst [d&^c])
+(MVN (MOVWconst [c])) -> (MOVWconst [^c])
+
+// generic simplifications
+(ADD x (RSBconst [0] y)) -> (SUB x y)
+(SUB x x) -> (MOVWconst [0])
+(AND x x) -> x
+(OR x x) -> x
+(XOR x x) -> (MOVWconst [0])
+(BIC x x) -> (MOVWconst [0])
+
 (ADD (MUL x y) a) -> (MULA x y a)
 (ADD a (MUL x y)) -> (MULA x y a)
+
+(AND x (MVN y)) -> (BIC x y)
--- a/src/cmd/compile/internal/ssa/gen/ARMOps.go
+++ b/src/cmd/compile/internal/ssa/gen/ARMOps.go
@@ -101,8 +101,10 @@ func init() {
 	var (
 		gp01      = regInfo{inputs: []regMask{}, outputs: []regMask{gp}}
 		gp11      = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
+		gp11cf    = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}, clobbers: flags} // cf: clobbers flags
 		gp11sp    = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}}
 		gp1flags  = regInfo{inputs: []regMask{gpg}, outputs: []regMask{flags}}
+		gp1flags1 = regInfo{inputs: []regMask{gp, flags}, outputs: []regMask{gp}}
 		gp21      = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}}
 		gp21cf    = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}, clobbers: flags} // cf: clobbers flags
 		gp2flags  = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{flags}}
@@ -137,9 +139,15 @@ func init() {
 		{name: "MODU", argLength: 2, reg: gp21cf, asm: "MODU"},                    // arg0 % arg1, unsigned

 		{name: "ADDS", argLength: 2, reg: gp21cf, asm: "ADD", commutative: true},   // arg0 + arg1, set carry flag
+		{name: "ADDSconst", argLength: 1, reg: gp11cf, asm: "ADD", aux: "Int32"},   // arg0 + auxInt, set carry flag
 		{name: "ADC", argLength: 3, reg: gp2flags1, asm: "ADC", commutative: true}, // arg0 + arg1 + carry, arg2=flags
+		{name: "ADCconst", argLength: 2, reg: gp1flags1, asm: "ADC", aux: "Int32"}, // arg0 + auxInt + carry, arg1=flags
 		{name: "SUBS", argLength: 2, reg: gp21cf, asm: "SUB"},                      // arg0 - arg1, set carry flag
+		{name: "SUBSconst", argLength: 1, reg: gp11cf, asm: "SUB", aux: "Int32"},   // arg0 - auxInt, set carry flag
+		{name: "RSBSconst", argLength: 1, reg: gp11cf, asm: "RSB", aux: "Int32"},   // auxInt - arg0, set carry flag
 		{name: "SBC", argLength: 3, reg: gp2flags1, asm: "SBC"},                    // arg0 - arg1 - carry, arg2=flags
+		{name: "SBCconst", argLength: 2, reg: gp1flags1, asm: "SBC", aux: "Int32"}, // arg0 - auxInt - carry, arg1=flags
+		{name: "RSCconst", argLength: 2, reg: gp1flags1, asm: "RSC", aux: "Int32"}, // auxInt - arg0 - carry, arg1=flags

 		{name: "MULLU", argLength: 2, reg: regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp &^ buildReg("R0")}, clobbers: buildReg("R0")}, asm: "MULLU", commutative: true}, // arg0 * arg1, results 64-bit, high 32-bit in R0
 		{name: "MULA", argLength: 3, reg: gp31, asm: "MULA"},                                                                                                                        // arg0 * arg1 + arg2
@@ -211,6 +219,7 @@ func init() {
 		{name: "MOVBUreg", argLength: 1, reg: gp11, asm: "MOVBU"}, // move from arg0, unsign-extended from byte
 		{name: "MOVHreg", argLength: 1, reg: gp11, asm: "MOVHS"},  // move from arg0, sign-extended from half
 		{name: "MOVHUreg", argLength: 1, reg: gp11, asm: "MOVHU"}, // move from arg0, unsign-extended from half
+		{name: "MOVWreg", argLength: 1, reg: gp11, asm: "MOVW"},   // move from arg0

 		{name: "MOVWF", argLength: 1, reg: gpfp, asm: "MOVWF"},  // int32 -> float32
 		{name: "MOVWD", argLength: 1, reg: gpfp, asm: "MOVWD"},  // int32 -> float64
@@ -366,6 +375,23 @@ func init() {
 		// gets correctly ordered with respect to GC safepoints.
 		// arg0=ptr/int arg1=mem, output=int/ptr
 		{name: "MOVWconvert", argLength: 2, reg: gp11, asm: "MOVW"},
+
+		// Constant flag values. For any comparison, there are 5 possible
+		// outcomes: the three from the signed total order (<,==,>) and the
+		// three from the unsigned total order. The == cases overlap.
+		// Note: there's a sixth "unordered" outcome for floating-point
+		// comparisons, but we don't use such a beast yet.
+		// These ops are for temporary use by rewrite rules. They
+		// cannot appear in the generated assembly.
+		{name: "FlagEQ"},     // equal
+		{name: "FlagLT_ULT"}, // signed < and unsigned <
+		{name: "FlagLT_UGT"}, // signed < and unsigned >
+		{name: "FlagGT_UGT"}, // signed > and unsigned <
+		{name: "FlagGT_ULT"}, // signed > and unsigned >
+
+		// (InvertFlags (CMP a b)) == (CMP b a)
+		// InvertFlags is a pseudo-op which can't appear in assembly output.
+		{name: "InvertFlags", argLength: 1}, // reverse direction of arg0
 	}

 	blocks := []blockData{

--- a/src/cmd/compile/internal/ssa/op.go
+++ b/src/cmd/compile/internal/ssa/op.go
@@ -156,7 +156,8 @@ func MakeSizeAndAlign(size, align int64) SizeAndAlign {
 func (op Op) isTupleGenerator() bool {
 	switch op {
 	case OpAdd32carry, OpSub32carry, OpMul32uhilo,
-		OpARMADDS, OpARMSUBS, OpARMMULLU:
+		OpARMADDS, OpARMSUBS, OpARMMULLU,
+		OpARMADDSconst, OpARMSUBSconst, OpARMRSBSconst:
 		return true
 	}
 	return false

--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -360,9 +360,15 @@ const (
 	OpARMMOD
 	OpARMMODU
 	OpARMADDS
+	OpARMADDSconst
 	OpARMADC
+	OpARMADCconst
 	OpARMSUBS
+	OpARMSUBSconst
+	OpARMRSBSconst
 	OpARMSBC
+	OpARMSBCconst
+	OpARMRSCconst
 	OpARMMULLU
 	OpARMMULA
 	OpARMADDF
@@ -420,6 +426,7 @@ const (
 	OpARMMOVBUreg
 	OpARMMOVHreg
 	OpARMMOVHUreg
+	OpARMMOVWreg
 	OpARMMOVWF
 	OpARMMOVWD
 	OpARMMOVWUF
@@ -458,6 +465,12 @@ const (
 	OpARMLoweredMoveU
 	OpARMLoweredGetClosurePtr
 	OpARMMOVWconvert
+	OpARMFlagEQ
+	OpARMFlagLT_ULT
+	OpARMFlagLT_UGT
+	OpARMFlagGT_UGT
+	OpARMFlagGT_ULT
+	OpARMInvertFlags

 	OpPPC64ADD
 	OpPPC64ADDconst
@@ -4212,6 +4225,21 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:    "ADDSconst",
+		auxType: auxInt32,
+		argLen:  1,
+		asm:     arm.AADD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
+			},
+			clobbers: 4294967296, // FLAGS
+			outputs: []regMask{
+				5119, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+			},
+		},
+	},
 	{
 		name:        "ADC",
 		argLen:      3,
@@ -4228,6 +4256,21 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:    "ADCconst",
+		auxType: auxInt32,
+		argLen:  2,
+		asm:     arm.AADC,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 4294967296}, // FLAGS
+				{0, 5119},       // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+			},
+			outputs: []regMask{
+				5119, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+			},
+		},
+	},
 	{
 		name:   "SUBS",
 		argLen: 2,
@@ -4243,6 +4286,36 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:    "SUBSconst",
+		auxType: auxInt32,
+		argLen:  1,
+		asm:     arm.ASUB,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
+			},
+			clobbers: 4294967296, // FLAGS
+			outputs: []regMask{
+				5119, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+			},
+		},
+	},
+	{
+		name:    "RSBSconst",
+		auxType: auxInt32,
+		argLen:  1,
+		asm:     arm.ARSB,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
+			},
+			clobbers: 4294967296, // FLAGS
+			outputs: []regMask{
+				5119, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+			},
+		},
+	},
 	{
 		name:   "SBC",
 		argLen: 3,
@@ -4258,6 +4331,36 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:    "SBCconst",
+		auxType: auxInt32,
+		argLen:  2,
+		asm:     arm.ASBC,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 4294967296}, // FLAGS
+				{0, 5119},       // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+			},
+			outputs: []regMask{
+				5119, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+			},
+		},
+	},
+	{
+		name:    "RSCconst",
+		auxType: auxInt32,
+		argLen:  2,
+		asm:     arm.ARSC,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 4294967296}, // FLAGS
+				{0, 5119},       // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+			},
+			outputs: []regMask{
+				5119, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+			},
+		},
+	},
 	{
 		name:        "MULLU",
 		argLen:      2,
@@ -5049,6 +5152,19 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "MOVWreg",
+		argLen: 1,
+		asm:    arm.AMOVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 6143}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12
+			},
+			outputs: []regMask{
+				5119, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+			},
+		},
+	},
 	{
 		name:   "MOVWF",
 		argLen: 1,
@@ -5492,6 +5608,36 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "FlagEQ",
+		argLen: 0,
+		reg:    regInfo{},
+	},
+	{
+		name:   "FlagLT_ULT",
+		argLen: 0,
+		reg:    regInfo{},
+	},
+	{
+		name:   "FlagLT_UGT",
+		argLen: 0,
+		reg:    regInfo{},
+	},
+	{
+		name:   "FlagGT_UGT",
+		argLen: 0,
+		reg:    regInfo{},
+	},
+	{
+		name:   "FlagGT_ULT",
+		argLen: 0,
+		reg:    regInfo{},
+	},
+	{
+		name:   "InvertFlags",
+		argLen: 1,
+		reg:    regInfo{},
+	},

 	{
 		name:        "ADD",

--- a/src/cmd/compile/internal/ssa/rewriteARM.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM.go