Commit a0418063 authored by Ben Shi's avatar Ben Shi Committed by Cherry Zhang

cmd/internal/obj/arm: use new form of MOVW introduced in ARMv7

As discussion in issue #18293, "MOVW $Imm-16, Reg" was introduced in
ARMv7. It directly encoded the 16-bit immediate into the instruction
instead of put it in the constant pool.

This patch makes the arm assembler choose this form of MOVW if available.

Besides 4 bytes are saved in the constant pool, the go1 benchmark test
also shows a slight improvement.

name                     old time/op    new time/op    delta
BinaryTree17-4              42.7s ± 1%     42.7s ± 1%    ~     (p=0.304 n=50+50)
Fannkuch11-4                24.8s ± 1%     24.8s ± 0%    ~     (p=0.757 n=50+49)
FmtFprintfEmpty-4           875ns ± 1%     873ns ± 2%    ~     (p=0.066 n=44+46)
FmtFprintfString-4         1.43µs ± 1%    1.45µs ± 1%  +1.68%  (p=0.000 n=44+44)
FmtFprintfInt-4            1.52µs ± 1%    1.52µs ± 1%  +0.26%  (p=0.009 n=41+45)
FmtFprintfIntInt-4         2.19µs ± 1%    2.20µs ± 1%  +0.76%  (p=0.000 n=43+46)
FmtFprintfPrefixedInt-4    2.56µs ± 2%    2.53µs ± 1%  -1.03%  (p=0.000 n=45+44)
FmtFprintfFloat-4          4.41µs ± 1%    4.39µs ± 1%  -0.52%  (p=0.000 n=44+44)
FmtManyArgs-4              9.02µs ± 2%    9.04µs ± 1%  +0.27%  (p=0.000 n=46+44)
GobDecode-4                 106ms ± 1%     106ms ± 1%    ~     (p=0.310 n=45+43)
GobEncode-4                88.1ms ± 2%    88.0ms ± 2%    ~     (p=0.648 n=49+50)
Gzip-4                      4.31s ± 1%     4.27s ± 1%  -1.01%  (p=0.000 n=50+50)
Gunzip-4                    618ms ± 1%     608ms ± 1%  -1.65%  (p=0.000 n=45+47)
HTTPClientServer-4          689µs ± 6%     692µs ± 4%  +0.52%  (p=0.038 n=50+47)
JSONEncode-4                282ms ± 2%     280ms ± 1%  -0.75%  (p=0.000 n=46+43)
JSONDecode-4                945ms ± 2%     940ms ± 1%  -0.47%  (p=0.000 n=47+47)
Mandelbrot200-4            49.4ms ± 1%    49.3ms ± 1%    ~     (p=0.163 n=45+45)
GoParse-4                  46.0ms ± 3%    45.5ms ± 2%  -0.95%  (p=0.000 n=49+40)
RegexpMatchEasy0_32-4      1.29µs ± 1%    1.28µs ± 1%  -0.14%  (p=0.005 n=38+45)
RegexpMatchEasy0_1K-4      7.92µs ± 8%    7.75µs ± 6%  -2.12%  (p=0.000 n=47+50)
RegexpMatchEasy1_32-4      1.31µs ± 1%    1.31µs ± 0%    ~     (p=0.282 n=45+48)
RegexpMatchEasy1_1K-4      10.4µs ± 5%    10.4µs ± 3%    ~     (p=0.771 n=50+49)
RegexpMatchMedium_32-4     2.06µs ± 1%    2.07µs ± 1%  +0.35%  (p=0.001 n=44+49)
RegexpMatchMedium_1K-4      533µs ± 1%     532µs ± 1%    ~     (p=0.710 n=43+47)
RegexpMatchHard_32-4       29.7µs ± 1%    29.6µs ± 1%  -0.34%  (p=0.002 n=43+46)
RegexpMatchHard_1K-4        893µs ± 2%     885µs ± 1%  -0.85%  (p=0.000 n=50+45)
Revcomp-4                  85.6ms ± 4%    85.5ms ± 2%    ~     (p=0.683 n=50+50)
Template-4                  1.05s ± 3%     1.04s ± 1%  -1.06%  (p=0.000 n=50+44)
TimeParse-4                7.19µs ± 2%    7.11µs ± 2%  -1.10%  (p=0.000 n=48+46)
TimeFormat-4               13.4µs ± 1%    13.5µs ± 1%    ~     (p=0.056 n=46+49)
[Geo mean]                  747µs          745µs       -0.28%

name                     old speed      new speed      delta
GobDecode-4              7.23MB/s ± 1%  7.22MB/s ± 1%    ~     (p=0.062 n=45+39)
GobEncode-4              8.71MB/s ± 2%  8.72MB/s ± 2%    ~     (p=0.656 n=49+50)
Gzip-4                   4.50MB/s ± 1%  4.55MB/s ± 1%  +1.03%  (p=0.000 n=50+50)
Gunzip-4                 31.4MB/s ± 1%  31.9MB/s ± 1%  +1.67%  (p=0.000 n=45+47)
JSONEncode-4             6.89MB/s ± 2%  6.94MB/s ± 1%  +0.76%  (p=0.000 n=46+43)
JSONDecode-4             2.05MB/s ± 2%  2.06MB/s ± 2%  +0.32%  (p=0.017 n=47+50)
GoParse-4                1.26MB/s ± 3%  1.27MB/s ± 1%  +0.68%  (p=0.000 n=50+48)
RegexpMatchEasy0_32-4    24.9MB/s ± 1%  24.9MB/s ± 1%  +0.13%  (p=0.004 n=38+45)
RegexpMatchEasy0_1K-4     129MB/s ± 7%   132MB/s ± 6%  +2.34%  (p=0.000 n=46+50)
RegexpMatchEasy1_32-4    24.5MB/s ± 1%  24.4MB/s ± 1%    ~     (p=0.252 n=45+48)
RegexpMatchEasy1_1K-4    98.8MB/s ± 4%  98.7MB/s ± 3%    ~     (p=0.771 n=50+49)
RegexpMatchMedium_32-4    485kB/s ± 3%   480kB/s ± 0%  -0.95%  (p=0.000 n=50+38)
RegexpMatchMedium_1K-4   1.92MB/s ± 1%  1.92MB/s ± 1%    ~     (p=0.129 n=43+47)
RegexpMatchHard_32-4     1.08MB/s ± 2%  1.08MB/s ± 1%  +0.38%  (p=0.017 n=46+46)
RegexpMatchHard_1K-4     1.15MB/s ± 2%  1.16MB/s ± 1%  +0.67%  (p=0.001 n=50+49)
Revcomp-4                29.7MB/s ± 4%  29.7MB/s ± 2%    ~     (p=0.682 n=50+50)
Template-4               1.85MB/s ± 3%  1.87MB/s ± 1%  +1.04%  (p=0.000 n=50+44)
[Geo mean]               6.56MB/s       6.60MB/s       +0.47%


Change-Id: Ic2cca90133c27a08d9f1a23c65b0eed5fbd02684
Reviewed-on: https://go-review.googlesource.com/41190
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
parent 34ee8ec1
...@@ -123,12 +123,17 @@ var optab = []Optab{ ...@@ -123,12 +123,17 @@ var optab = []Optab{
{AWORD, C_NONE, C_NONE, C_TLS_LE, 103, 4, 0, 0, 0}, {AWORD, C_NONE, C_NONE, C_TLS_LE, 103, 4, 0, 0, 0},
{AWORD, C_NONE, C_NONE, C_TLS_IE, 104, 4, 0, 0, 0}, {AWORD, C_NONE, C_NONE, C_TLS_IE, 104, 4, 0, 0, 0},
{AMOVW, C_NCON, C_NONE, C_REG, 12, 4, 0, 0, 0}, {AMOVW, C_NCON, C_NONE, C_REG, 12, 4, 0, 0, 0},
{AMOVW, C_SCON, C_NONE, C_REG, 12, 4, 0, 0, 0},
{AMOVW, C_LCON, C_NONE, C_REG, 12, 4, 0, LFROM, 0}, {AMOVW, C_LCON, C_NONE, C_REG, 12, 4, 0, LFROM, 0},
{AMOVW, C_LCONADDR, C_NONE, C_REG, 12, 4, 0, LFROM | LPCREL, 4}, {AMOVW, C_LCONADDR, C_NONE, C_REG, 12, 4, 0, LFROM | LPCREL, 4},
{AADD, C_NCON, C_REG, C_REG, 13, 8, 0, 0, 0}, {AADD, C_NCON, C_REG, C_REG, 13, 8, 0, 0, 0},
{AADD, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0}, {AADD, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
{AMVN, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0}, {AMVN, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
{ACMP, C_NCON, C_REG, C_NONE, 13, 8, 0, 0, 0}, {ACMP, C_NCON, C_REG, C_NONE, 13, 8, 0, 0, 0},
{AADD, C_SCON, C_REG, C_REG, 13, 8, 0, 0, 0},
{AADD, C_SCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
{AMVN, C_SCON, C_NONE, C_REG, 13, 8, 0, 0, 0},
{ACMP, C_SCON, C_REG, C_NONE, 13, 8, 0, 0, 0},
{AADD, C_LCON, C_REG, C_REG, 13, 8, 0, LFROM, 0}, {AADD, C_LCON, C_REG, C_REG, 13, 8, 0, LFROM, 0},
{AADD, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0}, {AADD, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0},
{AMVN, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0}, {AMVN, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM, 0},
...@@ -1123,6 +1128,9 @@ func (c *ctxt5) aclass(a *obj.Addr) int { ...@@ -1123,6 +1128,9 @@ func (c *ctxt5) aclass(a *obj.Addr) int {
if immrot(^uint32(c.instoffset)) != 0 { if immrot(^uint32(c.instoffset)) != 0 {
return C_NCON return C_NCON
} }
if uint32(c.instoffset) <= 0xffff && objabi.GOARM == 7 {
return C_SCON
}
return C_LCON return C_LCON
case obj.NAME_EXTERN, case obj.NAME_EXTERN,
...@@ -1217,7 +1225,7 @@ func cmp(a int, b int) bool { ...@@ -1217,7 +1225,7 @@ func cmp(a int, b int) bool {
} }
switch a { switch a {
case C_LCON: case C_LCON:
if b == C_RCON || b == C_NCON { if b == C_RCON || b == C_NCON || b == C_SCON {
return true return true
} }
...@@ -1674,14 +1682,22 @@ func (c *ctxt5) asmout(p *obj.Prog, o *Optab, out []uint32) { ...@@ -1674,14 +1682,22 @@ func (c *ctxt5) asmout(p *obj.Prog, o *Optab, out []uint32) {
} }
case 12: /* movw $lcon, reg */ case 12: /* movw $lcon, reg */
o1 = c.omvl(p, &p.From, int(p.To.Reg)) if o.a1 == C_SCON {
o1 = c.omvs(p, &p.From, int(p.To.Reg))
} else {
o1 = c.omvl(p, &p.From, int(p.To.Reg))
}
if o.flag&LPCREL != 0 { if o.flag&LPCREL != 0 {
o2 = c.oprrr(p, AADD, int(p.Scond)) | (uint32(p.To.Reg)&15)<<0 | (REGPC&15)<<16 | (uint32(p.To.Reg)&15)<<12 o2 = c.oprrr(p, AADD, int(p.Scond)) | (uint32(p.To.Reg)&15)<<0 | (REGPC&15)<<16 | (uint32(p.To.Reg)&15)<<12
} }
case 13: /* op $lcon, [R], R */ case 13: /* op $lcon, [R], R */
o1 = c.omvl(p, &p.From, REGTMP) if o.a1 == C_SCON {
o1 = c.omvs(p, &p.From, REGTMP)
} else {
o1 = c.omvl(p, &p.From, REGTMP)
}
if o1 == 0 { if o1 == 0 {
break break
...@@ -2827,6 +2843,17 @@ func (c *ctxt5) ofsr(a obj.As, r int, v int32, b int, sc int, p *obj.Prog) uint3 ...@@ -2827,6 +2843,17 @@ func (c *ctxt5) ofsr(a obj.As, r int, v int32, b int, sc int, p *obj.Prog) uint3
return o return o
} }
// MOVW $"lower 16-bit", Reg
func (c *ctxt5) omvs(p *obj.Prog, a *obj.Addr, dr int) uint32 {
var o1 uint32
o1 = ((uint32(p.Scond) & C_SCOND) ^ C_SCOND_XOR) << 28
o1 |= 0x30 << 20
o1 |= (uint32(dr) & 15) << 12
o1 |= uint32(a.Offset) & 0x0fff
o1 |= (uint32(a.Offset) & 0xf000) << 4
return o1
}
func (c *ctxt5) omvl(p *obj.Prog, a *obj.Addr, dr int) uint32 { func (c *ctxt5) omvl(p *obj.Prog, a *obj.Addr, dr int) uint32 {
var o1 uint32 var o1 uint32
if p.Pcond == nil { if p.Pcond == nil {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment