Commit 9c99512d authored by Ilya Tocar's avatar Ilya Tocar

cmd/compile/internal/ssa: combine consecutive loads and stores on amd64

Sometimes (often for calls) we generate code like this:

MOVQ  (addr),AX
MOVQ  8(addr),BX
MOVQ  AX,(otheraddr)
MOVQ  BX,8(otheraddr)

Replace it with

MOVUPS (addr),X0
MOVUPS X0,(otheraddr)

For completeness do the same for 8,16,32-bit loads/stores too.
Shaves 1% from code sections of go tool.

/localdisk/itocar/golang/bin/go 10293917
go_old 10334877 [40960 bytes]

read-only data = 682 bytes (0.040769%)
global text (code) = 38961 bytes (1.036503%)
Total difference 39643 bytes (0.674628%)

Updates #6853

Change-Id: I1f0d2f60273a63a079b58927cd1c4e3429d2e7ae
Reviewed-on: https://go-review.googlesource.com/57130
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent b40831b1
...@@ -24,9 +24,10 @@ import ( ...@@ -24,9 +24,10 @@ import (
// architecture-specific, and they are grouped in arrays of tests, one // architecture-specific, and they are grouped in arrays of tests, one
// for each architecture. // for each architecture.
// //
// Each asmTest consists in a function to be compiled and an array of // Each asmTest consists of a function to compile, an array of
// regexps that will be matched to the generated assembly. For // positiveRegexps that will be matched to the generated assembly and
// example, the following amd64 test // an array of negativeRegexps that must not match generated assembly.
// For example, the following amd64 test
// //
// { // {
// ` // `
...@@ -35,10 +36,11 @@ import ( ...@@ -35,10 +36,11 @@ import (
// } // }
// `, // `,
// []string{"\tSHLQ\t[$]6,"}, // []string{"\tSHLQ\t[$]6,"},
// []string{"MULQ"}
// } // }
// //
// verifies that the code the compiler generates for a multiplication // verifies that the code the compiler generates for a multiplication
// by 64 contains a 'SHLQ' instruction. // by 64 contains a 'SHLQ' instruction and does not contain a MULQ.
// //
// Since all the tests for a given architecture are dumped in the same // Since all the tests for a given architecture are dumped in the same
// file, the function names must be unique. As a workaround for this // file, the function names must be unique. As a workaround for this
...@@ -52,6 +54,7 @@ import ( ...@@ -52,6 +54,7 @@ import (
// } // }
// `, // `,
// []string{"\tSHLQ\t[$]6,"}, // []string{"\tSHLQ\t[$]6,"},
// []string{"MULQ"}
// } // }
// //
// Each '$'-function will be given a unique name of form f<N>_<arch>, // Each '$'-function will be given a unique name of form f<N>_<arch>,
...@@ -124,16 +127,22 @@ func funcAsm(t *testing.T, asm string, funcName string) string { ...@@ -124,16 +127,22 @@ func funcAsm(t *testing.T, asm string, funcName string) string {
type asmTest struct { type asmTest struct {
// function to compile // function to compile
function string function string
// regexps that must match the generated assembly // positiveRegexps that must match the generated assembly
regexps []string positiveRegexps []string
negativeRegexps []string
} }
func (at asmTest) verifyAsm(t *testing.T, fa string) { func (at asmTest) verifyAsm(t *testing.T, fa string) {
for _, r := range at.regexps { for _, r := range at.positiveRegexps {
if b, err := regexp.MatchString(r, fa); !b || err != nil { if b, err := regexp.MatchString(r, fa); !b || err != nil {
t.Errorf("expected:%s\ngo:%s\nasm:%s\n", r, at.function, fa) t.Errorf("expected:%s\ngo:%s\nasm:%s\n", r, at.function, fa)
} }
} }
for _, r := range at.negativeRegexps {
if b, err := regexp.MatchString(r, fa); b || err != nil {
t.Errorf("not expected:%s\ngo:%s\nasm:%s\n", r, at.function, fa)
}
}
} }
type asmTests struct { type asmTests struct {
...@@ -214,7 +223,7 @@ var allAsmTests = []*asmTests{ ...@@ -214,7 +223,7 @@ var allAsmTests = []*asmTests{
{ {
arch: "amd64", arch: "amd64",
os: "linux", os: "linux",
imports: []string{"encoding/binary", "math", "math/bits", "unsafe"}, imports: []string{"encoding/binary", "math", "math/bits", "unsafe", "runtime"},
tests: linuxAMD64Tests, tests: linuxAMD64Tests,
}, },
{ {
...@@ -262,6 +271,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -262,6 +271,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tSHLQ\t\\$6,"}, []string{"\tSHLQ\t\\$6,"},
[]string{},
}, },
{ {
` `
...@@ -270,6 +280,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -270,6 +280,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tSHLQ\t\\$5,", "\tLEAQ\t\\(.*\\)\\(.*\\*2\\),"}, []string{"\tSHLQ\t\\$5,", "\tLEAQ\t\\(.*\\)\\(.*\\*2\\),"},
[]string{},
}, },
// Load-combining tests. // Load-combining tests.
{ {
...@@ -279,6 +290,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -279,6 +290,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tMOVQ\t\\(.*\\),"}, []string{"\tMOVQ\t\\(.*\\),"},
[]string{},
}, },
{ {
` `
...@@ -287,6 +299,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -287,6 +299,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tMOVQ\t\\(.*\\)\\(.*\\*1\\),"}, []string{"\tMOVQ\t\\(.*\\)\\(.*\\*1\\),"},
[]string{},
}, },
{ {
` `
...@@ -295,6 +308,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -295,6 +308,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tMOVL\t\\(.*\\),"}, []string{"\tMOVL\t\\(.*\\),"},
[]string{},
}, },
{ {
` `
...@@ -303,6 +317,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -303,6 +317,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tMOVL\t\\(.*\\)\\(.*\\*1\\),"}, []string{"\tMOVL\t\\(.*\\)\\(.*\\*1\\),"},
[]string{},
}, },
{ {
` `
...@@ -311,6 +326,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -311,6 +326,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tBSWAPQ\t"}, []string{"\tBSWAPQ\t"},
[]string{},
}, },
{ {
` `
...@@ -319,6 +335,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -319,6 +335,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tBSWAPQ\t"}, []string{"\tBSWAPQ\t"},
[]string{},
}, },
{ {
` `
...@@ -327,6 +344,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -327,6 +344,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tBSWAPQ\t"}, []string{"\tBSWAPQ\t"},
[]string{},
}, },
{ {
` `
...@@ -335,6 +353,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -335,6 +353,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tBSWAPQ\t"}, []string{"\tBSWAPQ\t"},
[]string{},
}, },
{ {
` `
...@@ -343,6 +362,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -343,6 +362,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tBSWAPL\t"}, []string{"\tBSWAPL\t"},
[]string{},
}, },
{ {
` `
...@@ -351,6 +371,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -351,6 +371,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tBSWAPL\t"}, []string{"\tBSWAPL\t"},
[]string{},
}, },
{ {
` `
...@@ -359,6 +380,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -359,6 +380,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tBSWAPL\t"}, []string{"\tBSWAPL\t"},
[]string{},
}, },
{ {
` `
...@@ -367,6 +389,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -367,6 +389,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tBSWAPL\t"}, []string{"\tBSWAPL\t"},
[]string{},
}, },
{ {
` `
...@@ -375,6 +398,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -375,6 +398,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tROLW\t\\$8,"}, []string{"\tROLW\t\\$8,"},
[]string{},
}, },
{ {
` `
...@@ -383,6 +407,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -383,6 +407,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tROLW\t\\$8,"}, []string{"\tROLW\t\\$8,"},
[]string{},
}, },
{ {
` `
...@@ -391,6 +416,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -391,6 +416,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tROLW\t\\$8,"}, []string{"\tROLW\t\\$8,"},
[]string{},
}, },
{ {
` `
...@@ -399,6 +425,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -399,6 +425,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tROLW\t\\$8,"}, []string{"\tROLW\t\\$8,"},
[]string{},
}, },
// Structure zeroing. See issue #18370. // Structure zeroing. See issue #18370.
{ {
...@@ -411,6 +438,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -411,6 +438,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tXORPS\tX., X", "\tMOVUPS\tX., \\(.*\\)", "\tMOVQ\t\\$0, 16\\(.*\\)"}, []string{"\tXORPS\tX., X", "\tMOVUPS\tX., \\(.*\\)", "\tMOVQ\t\\$0, 16\\(.*\\)"},
[]string{},
}, },
// SSA-able composite literal initialization. Issue 18872. // SSA-able composite literal initialization. Issue 18872.
{ {
...@@ -424,6 +452,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -424,6 +452,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tMOVQ\t[$]1", "\tMOVQ\t[$]2", "\tMOVQ\t[$]3", "\tMOVQ\t[$]4"}, []string{"\tMOVQ\t[$]1", "\tMOVQ\t[$]2", "\tMOVQ\t[$]3", "\tMOVQ\t[$]4"},
[]string{},
}, },
// Also test struct containing pointers (this was special because of write barriers). // Also test struct containing pointers (this was special because of write barriers).
{ {
...@@ -436,6 +465,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -436,6 +465,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tXORPS\tX., X", "\tMOVUPS\tX., \\(.*\\)", "\tMOVQ\t\\$0, 16\\(.*\\)", "\tCALL\truntime\\.writebarrierptr\\(SB\\)"}, []string{"\tXORPS\tX., X", "\tMOVUPS\tX., \\(.*\\)", "\tMOVQ\t\\$0, 16\\(.*\\)", "\tCALL\truntime\\.writebarrierptr\\(SB\\)"},
[]string{},
}, },
// Rotate tests // Rotate tests
{ {
...@@ -445,6 +475,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -445,6 +475,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tROLQ\t[$]7,"}, []string{"\tROLQ\t[$]7,"},
[]string{},
}, },
{ {
` `
...@@ -453,6 +484,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -453,6 +484,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tROLQ\t[$]7,"}, []string{"\tROLQ\t[$]7,"},
[]string{},
}, },
{ {
` `
...@@ -461,6 +493,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -461,6 +493,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tROLQ\t[$]7,"}, []string{"\tROLQ\t[$]7,"},
[]string{},
}, },
{ {
` `
...@@ -469,6 +502,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -469,6 +502,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tROLL\t[$]7,"}, []string{"\tROLL\t[$]7,"},
[]string{},
}, },
{ {
` `
...@@ -477,6 +511,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -477,6 +511,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tROLL\t[$]7,"}, []string{"\tROLL\t[$]7,"},
[]string{},
}, },
{ {
` `
...@@ -485,6 +520,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -485,6 +520,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tROLL\t[$]7,"}, []string{"\tROLL\t[$]7,"},
[]string{},
}, },
{ {
` `
...@@ -493,6 +529,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -493,6 +529,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tROLW\t[$]7,"}, []string{"\tROLW\t[$]7,"},
[]string{},
}, },
{ {
` `
...@@ -501,6 +538,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -501,6 +538,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tROLW\t[$]7,"}, []string{"\tROLW\t[$]7,"},
[]string{},
}, },
{ {
` `
...@@ -509,6 +547,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -509,6 +547,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tROLW\t[$]7,"}, []string{"\tROLW\t[$]7,"},
[]string{},
}, },
{ {
` `
...@@ -517,6 +556,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -517,6 +556,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tROLB\t[$]7,"}, []string{"\tROLB\t[$]7,"},
[]string{},
}, },
{ {
` `
...@@ -525,6 +565,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -525,6 +565,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tROLB\t[$]7,"}, []string{"\tROLB\t[$]7,"},
[]string{},
}, },
{ {
` `
...@@ -533,6 +574,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -533,6 +574,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tROLB\t[$]7,"}, []string{"\tROLB\t[$]7,"},
[]string{},
}, },
// Rotate after inlining (see issue 18254). // Rotate after inlining (see issue 18254).
{ {
...@@ -545,6 +587,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -545,6 +587,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tROLL\t[$]7,"}, []string{"\tROLL\t[$]7,"},
[]string{},
}, },
{ {
` `
...@@ -553,6 +596,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -553,6 +596,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tMOVQ\t[$]5,"}, []string{"\tMOVQ\t[$]5,"},
[]string{},
}, },
// Direct use of constants in fast map access calls. Issue 19015. // Direct use of constants in fast map access calls. Issue 19015.
{ {
...@@ -563,6 +607,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -563,6 +607,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tMOVQ\t[$]5,"}, []string{"\tMOVQ\t[$]5,"},
[]string{},
}, },
{ {
` `
...@@ -571,6 +616,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -571,6 +616,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\"abc\""}, []string{"\"abc\""},
[]string{},
}, },
{ {
` `
...@@ -580,6 +626,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -580,6 +626,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\"abc\""}, []string{"\"abc\""},
[]string{},
}, },
// Bit test ops on amd64, issue 18943. // Bit test ops on amd64, issue 18943.
{ {
...@@ -592,6 +639,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -592,6 +639,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tBTQ\t"}, []string{"\tBTQ\t"},
[]string{},
}, },
{ {
` `
...@@ -600,6 +648,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -600,6 +648,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tBTQ\t"}, []string{"\tBTQ\t"},
[]string{},
}, },
{ {
` `
...@@ -611,6 +660,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -611,6 +660,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tBTQ\t\\$60"}, []string{"\tBTQ\t\\$60"},
[]string{},
}, },
{ {
` `
...@@ -619,6 +669,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -619,6 +669,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tBTQ\t\\$60"}, []string{"\tBTQ\t\\$60"},
[]string{},
}, },
// Intrinsic tests for math/bits // Intrinsic tests for math/bits
{ {
...@@ -628,6 +679,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -628,6 +679,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tBSFQ\t", "\tMOVL\t\\$64,", "\tCMOVQEQ\t"}, []string{"\tBSFQ\t", "\tMOVL\t\\$64,", "\tCMOVQEQ\t"},
[]string{},
}, },
{ {
` `
...@@ -636,6 +688,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -636,6 +688,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tBSFQ\t", "\tORQ\t[^$]", "\tMOVQ\t\\$4294967296,"}, []string{"\tBSFQ\t", "\tORQ\t[^$]", "\tMOVQ\t\\$4294967296,"},
[]string{},
}, },
{ {
` `
...@@ -644,6 +697,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -644,6 +697,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tBSFQ\t", "\tORQ\t\\$65536,"}, []string{"\tBSFQ\t", "\tORQ\t\\$65536,"},
[]string{},
}, },
{ {
` `
...@@ -652,6 +706,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -652,6 +706,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tBSFQ\t", "\tORQ\t\\$256,"}, []string{"\tBSFQ\t", "\tORQ\t\\$256,"},
[]string{},
}, },
{ {
` `
...@@ -660,6 +715,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -660,6 +715,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tBSWAPQ\t"}, []string{"\tBSWAPQ\t"},
[]string{},
}, },
{ {
` `
...@@ -668,6 +724,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -668,6 +724,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tBSWAPL\t"}, []string{"\tBSWAPL\t"},
[]string{},
}, },
{ {
` `
...@@ -676,6 +733,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -676,6 +733,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tROLW\t\\$8,"}, []string{"\tROLW\t\\$8,"},
[]string{},
}, },
{ {
` `
...@@ -684,6 +742,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -684,6 +742,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tBSRQ\t"}, []string{"\tBSRQ\t"},
[]string{},
}, },
{ {
` `
...@@ -692,6 +751,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -692,6 +751,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tBSRQ\t"}, []string{"\tBSRQ\t"},
[]string{},
}, },
{ {
` `
...@@ -700,6 +760,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -700,6 +760,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tBSRQ\t"}, []string{"\tBSRQ\t"},
[]string{},
}, },
/* see ssa.go /* see ssa.go
{ {
...@@ -709,6 +770,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -709,6 +770,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tBSRQ\t"}, []string{"\tBSRQ\t"},
[]string{},
}, },
*/ */
{ {
...@@ -718,6 +780,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -718,6 +780,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tBSRQ\t"}, []string{"\tBSRQ\t"},
[]string{},
}, },
{ {
` `
...@@ -726,6 +789,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -726,6 +789,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tBSRQ\t"}, []string{"\tBSRQ\t"},
[]string{},
}, },
{ {
` `
...@@ -734,6 +798,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -734,6 +798,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tBSRQ\t"}, []string{"\tBSRQ\t"},
[]string{},
}, },
{ {
` `
...@@ -742,6 +807,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -742,6 +807,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tBSRQ\t"}, []string{"\tBSRQ\t"},
[]string{},
}, },
/* see ssa.go /* see ssa.go
{ {
...@@ -751,6 +817,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -751,6 +817,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tBSRQ\t"}, []string{"\tBSRQ\t"},
[]string{},
}, },
*/ */
{ {
...@@ -760,6 +827,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -760,6 +827,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tBSRQ\t"}, []string{"\tBSRQ\t"},
[]string{},
}, },
{ {
` `
...@@ -767,6 +835,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -767,6 +835,7 @@ var linuxAMD64Tests = []*asmTest{
return bits.OnesCount64(x) return bits.OnesCount64(x)
}`, }`,
[]string{"\tPOPCNTQ\t", "support_popcnt"}, []string{"\tPOPCNTQ\t", "support_popcnt"},
[]string{},
}, },
{ {
` `
...@@ -774,6 +843,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -774,6 +843,7 @@ var linuxAMD64Tests = []*asmTest{
return bits.OnesCount32(x) return bits.OnesCount32(x)
}`, }`,
[]string{"\tPOPCNTL\t", "support_popcnt"}, []string{"\tPOPCNTL\t", "support_popcnt"},
[]string{},
}, },
{ {
` `
...@@ -781,6 +851,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -781,6 +851,7 @@ var linuxAMD64Tests = []*asmTest{
return bits.OnesCount16(x) return bits.OnesCount16(x)
}`, }`,
[]string{"\tPOPCNTL\t", "support_popcnt"}, []string{"\tPOPCNTL\t", "support_popcnt"},
[]string{},
}, },
{ {
` `
...@@ -788,6 +859,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -788,6 +859,7 @@ var linuxAMD64Tests = []*asmTest{
return bits.OnesCount(x) return bits.OnesCount(x)
}`, }`,
[]string{"\tPOPCNTQ\t", "support_popcnt"}, []string{"\tPOPCNTQ\t", "support_popcnt"},
[]string{},
}, },
// multiplication merging tests // multiplication merging tests
{ {
...@@ -796,6 +868,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -796,6 +868,7 @@ var linuxAMD64Tests = []*asmTest{
return 15*n + 31*n return 15*n + 31*n
}`, }`,
[]string{"\tIMULQ\t[$]46"}, // 46*n []string{"\tIMULQ\t[$]46"}, // 46*n
[]string{},
}, },
{ {
` `
...@@ -803,6 +876,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -803,6 +876,7 @@ var linuxAMD64Tests = []*asmTest{
return 5*n + 7*(n+1) + 11*(n+2) return 5*n + 7*(n+1) + 11*(n+2)
}`, }`,
[]string{"\tIMULQ\t[$]23", "\tADDQ\t[$]29"}, // 23*n + 29 []string{"\tIMULQ\t[$]23", "\tADDQ\t[$]29"}, // 23*n + 29
[]string{},
}, },
{ {
` `
...@@ -810,6 +884,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -810,6 +884,7 @@ var linuxAMD64Tests = []*asmTest{
return a*n + 19*n return a*n + 19*n
}`, }`,
[]string{"\tADDQ\t[$]19", "\tIMULQ"}, // (a+19)*n []string{"\tADDQ\t[$]19", "\tIMULQ"}, // (a+19)*n
[]string{},
}, },
// see issue 19595. // see issue 19595.
...@@ -821,6 +896,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -821,6 +896,7 @@ var linuxAMD64Tests = []*asmTest{
*q += x *q += x
}`, }`,
[]string{"\tADDQ\t\\("}, []string{"\tADDQ\t\\("},
[]string{},
}, },
{ {
` `
...@@ -831,6 +907,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -831,6 +907,7 @@ var linuxAMD64Tests = []*asmTest{
} }
}`, }`,
[]string{"\tADDQ\t[A-Z]"}, []string{"\tADDQ\t[A-Z]"},
[]string{},
}, },
// Floating-point strength reduction // Floating-point strength reduction
{ {
...@@ -839,6 +916,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -839,6 +916,7 @@ var linuxAMD64Tests = []*asmTest{
return f * 2.0 return f * 2.0
}`, }`,
[]string{"\tADDSD\t"}, []string{"\tADDSD\t"},
[]string{},
}, },
{ {
` `
...@@ -846,6 +924,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -846,6 +924,7 @@ var linuxAMD64Tests = []*asmTest{
return f / 16.0 return f / 16.0
}`, }`,
[]string{"\tMULSD\t"}, []string{"\tMULSD\t"},
[]string{},
}, },
{ {
` `
...@@ -853,6 +932,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -853,6 +932,7 @@ var linuxAMD64Tests = []*asmTest{
return f / 0.125 return f / 0.125
}`, }`,
[]string{"\tMULSD\t"}, []string{"\tMULSD\t"},
[]string{},
}, },
{ {
` `
...@@ -860,6 +940,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -860,6 +940,7 @@ var linuxAMD64Tests = []*asmTest{
return f / 0.5 return f / 0.5
}`, }`,
[]string{"\tADDSD\t"}, []string{"\tADDSD\t"},
[]string{},
}, },
// Check that compare to constant string uses 2/4/8 byte compares // Check that compare to constant string uses 2/4/8 byte compares
{ {
...@@ -868,6 +949,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -868,6 +949,7 @@ var linuxAMD64Tests = []*asmTest{
return a == "xx" return a == "xx"
}`, }`,
[]string{"\tCMPW\t[A-Z]"}, []string{"\tCMPW\t[A-Z]"},
[]string{},
}, },
{ {
` `
...@@ -875,6 +957,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -875,6 +957,7 @@ var linuxAMD64Tests = []*asmTest{
return a == "xxxx" return a == "xxxx"
}`, }`,
[]string{"\tCMPL\t[A-Z]"}, []string{"\tCMPL\t[A-Z]"},
[]string{},
}, },
{ {
` `
...@@ -882,6 +965,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -882,6 +965,7 @@ var linuxAMD64Tests = []*asmTest{
return a == "xxxxxxxx" return a == "xxxxxxxx"
}`, }`,
[]string{"\tCMPQ\t[A-Z]"}, []string{"\tCMPQ\t[A-Z]"},
[]string{},
}, },
// Non-constant rotate // Non-constant rotate
{ {
...@@ -890,6 +974,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -890,6 +974,7 @@ var linuxAMD64Tests = []*asmTest{
return x << z | x >> (64-z) return x << z | x >> (64-z)
}`, }`,
[]string{"\tROLQ\t"}, []string{"\tROLQ\t"},
[]string{},
}, },
{ {
`func rot64r(x uint64, y int) uint64 { `func rot64r(x uint64, y int) uint64 {
...@@ -897,6 +982,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -897,6 +982,7 @@ var linuxAMD64Tests = []*asmTest{
return x >> z | x << (64-z) return x >> z | x << (64-z)
}`, }`,
[]string{"\tRORQ\t"}, []string{"\tRORQ\t"},
[]string{},
}, },
{ {
`func rot32l(x uint32, y int) uint32 { `func rot32l(x uint32, y int) uint32 {
...@@ -904,6 +990,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -904,6 +990,7 @@ var linuxAMD64Tests = []*asmTest{
return x << z | x >> (32-z) return x << z | x >> (32-z)
}`, }`,
[]string{"\tROLL\t"}, []string{"\tROLL\t"},
[]string{},
}, },
{ {
`func rot32r(x uint32, y int) uint32 { `func rot32r(x uint32, y int) uint32 {
...@@ -911,6 +998,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -911,6 +998,7 @@ var linuxAMD64Tests = []*asmTest{
return x >> z | x << (32-z) return x >> z | x << (32-z)
}`, }`,
[]string{"\tRORL\t"}, []string{"\tRORL\t"},
[]string{},
}, },
{ {
`func rot16l(x uint16, y int) uint16 { `func rot16l(x uint16, y int) uint16 {
...@@ -918,6 +1006,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -918,6 +1006,7 @@ var linuxAMD64Tests = []*asmTest{
return x << z | x >> (16-z) return x << z | x >> (16-z)
}`, }`,
[]string{"\tROLW\t"}, []string{"\tROLW\t"},
[]string{},
}, },
{ {
`func rot16r(x uint16, y int) uint16 { `func rot16r(x uint16, y int) uint16 {
...@@ -925,6 +1014,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -925,6 +1014,7 @@ var linuxAMD64Tests = []*asmTest{
return x >> z | x << (16-z) return x >> z | x << (16-z)
}`, }`,
[]string{"\tRORW\t"}, []string{"\tRORW\t"},
[]string{},
}, },
{ {
`func rot8l(x uint8, y int) uint8 { `func rot8l(x uint8, y int) uint8 {
...@@ -932,6 +1022,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -932,6 +1022,7 @@ var linuxAMD64Tests = []*asmTest{
return x << z | x >> (8-z) return x << z | x >> (8-z)
}`, }`,
[]string{"\tROLB\t"}, []string{"\tROLB\t"},
[]string{},
}, },
{ {
`func rot8r(x uint8, y int) uint8 { `func rot8r(x uint8, y int) uint8 {
...@@ -939,6 +1030,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -939,6 +1030,7 @@ var linuxAMD64Tests = []*asmTest{
return x >> z | x << (8-z) return x >> z | x << (8-z)
}`, }`,
[]string{"\tRORB\t"}, []string{"\tRORB\t"},
[]string{},
}, },
// Check that array compare uses 2/4/8 byte compares // Check that array compare uses 2/4/8 byte compares
{ {
...@@ -947,6 +1039,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -947,6 +1039,7 @@ var linuxAMD64Tests = []*asmTest{
return a == b return a == b
}`, }`,
[]string{"\tCMPW\t[A-Z]"}, []string{"\tCMPW\t[A-Z]"},
[]string{},
}, },
{ {
` `
...@@ -954,6 +1047,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -954,6 +1047,7 @@ var linuxAMD64Tests = []*asmTest{
return a == b return a == b
}`, }`,
[]string{"\tCMPL\t[A-Z]"}, []string{"\tCMPL\t[A-Z]"},
[]string{},
}, },
{ {
` `
...@@ -961,6 +1055,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -961,6 +1055,7 @@ var linuxAMD64Tests = []*asmTest{
return a == b return a == b
}`, }`,
[]string{"\tCMPQ\t[A-Z]"}, []string{"\tCMPQ\t[A-Z]"},
[]string{},
}, },
{ {
` `
...@@ -968,6 +1063,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -968,6 +1063,7 @@ var linuxAMD64Tests = []*asmTest{
return *((*[4]byte)(a)) != *((*[4]byte)(b)) return *((*[4]byte)(a)) != *((*[4]byte)(b))
}`, }`,
[]string{"\tCMPL\t[A-Z]"}, []string{"\tCMPL\t[A-Z]"},
[]string{},
}, },
{ {
// make sure assembly output has matching offset and base register. // make sure assembly output has matching offset and base register.
...@@ -979,6 +1075,56 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -979,6 +1075,56 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"b\\+40\\(SP\\)"}, []string{"b\\+40\\(SP\\)"},
[]string{},
},
{
// check load combining
`
func f73(a, b byte) (byte,byte) {
return f73(f73(a,b))
}
`,
[]string{"\tMOVW\t"},
[]string{},
},
{
`
func f74(a, b uint16) (uint16,uint16) {
return f74(f74(a,b))
}
`,
[]string{"\tMOVL\t"},
[]string{},
},
{
`
func f75(a, b uint32) (uint32,uint32) {
return f75(f75(a,b))
}
`,
[]string{"\tMOVQ\t"},
[]string{},
},
{
`
func f76(a, b uint64) (uint64,uint64) {
return f76(f76(a,b))
}
`,
[]string{"\tMOVUPS\t"},
[]string{},
},
// Make sure we don't put pointers in SSE registers across safe points.
{
`
func $(p, q *[2]*int) {
a, b := p[0], p[1]
runtime.GC()
q[0], q[1] = a, b
}
`,
[]string{},
[]string{"MOVUPS"},
}, },
{ {
// check that stack store is optimized away // check that stack store is optimized away
...@@ -989,6 +1135,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -989,6 +1135,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"TEXT\t.*, [$]0-8"}, []string{"TEXT\t.*, [$]0-8"},
[]string{},
}, },
// math.Abs using integer registers // math.Abs using integer registers
{ {
...@@ -998,6 +1145,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -998,6 +1145,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tSHLQ\t[$]1,", "\tSHRQ\t[$]1,"}, []string{"\tSHLQ\t[$]1,", "\tSHRQ\t[$]1,"},
[]string{},
}, },
// math.Copysign using integer registers // math.Copysign using integer registers
{ {
...@@ -1007,6 +1155,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -1007,6 +1155,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tSHLQ\t[$]1,", "\tSHRQ\t[$]1,", "\tSHRQ\t[$]63,", "\tSHLQ\t[$]63,", "\tORQ\t"}, []string{"\tSHLQ\t[$]1,", "\tSHRQ\t[$]1,", "\tSHRQ\t[$]63,", "\tSHLQ\t[$]63,", "\tORQ\t"},
[]string{},
}, },
// int <-> fp moves // int <-> fp moves
{ {
...@@ -1016,6 +1165,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -1016,6 +1165,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tMOVQ\tX.*, [^X].*"}, []string{"\tMOVQ\tX.*, [^X].*"},
[]string{},
}, },
{ {
` `
...@@ -1024,6 +1174,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -1024,6 +1174,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tMOVL\tX.*, [^X].*"}, []string{"\tMOVL\tX.*, [^X].*"},
[]string{},
}, },
{ {
` `
...@@ -1032,6 +1183,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -1032,6 +1183,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tMOVQ\t[^X].*, X.*"}, []string{"\tMOVQ\t[^X].*, X.*"},
[]string{},
}, },
{ {
` `
...@@ -1040,6 +1192,7 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -1040,6 +1192,7 @@ var linuxAMD64Tests = []*asmTest{
} }
`, `,
[]string{"\tMOVL\t[^X].*, X.*"}, []string{"\tMOVL\t[^X].*, X.*"},
[]string{},
}, },
} }
...@@ -1051,6 +1204,7 @@ var linux386Tests = []*asmTest{ ...@@ -1051,6 +1204,7 @@ var linux386Tests = []*asmTest{
} }
`, `,
[]string{"\tMOVL\t\\(.*\\),"}, []string{"\tMOVL\t\\(.*\\),"},
[]string{},
}, },
{ {
` `
...@@ -1059,6 +1213,7 @@ var linux386Tests = []*asmTest{ ...@@ -1059,6 +1213,7 @@ var linux386Tests = []*asmTest{
} }
`, `,
[]string{"\tMOVL\t\\(.*\\)\\(.*\\*1\\),"}, []string{"\tMOVL\t\\(.*\\)\\(.*\\*1\\),"},
[]string{},
}, },
// multiplication merging tests // multiplication merging tests
...@@ -1068,6 +1223,7 @@ var linux386Tests = []*asmTest{ ...@@ -1068,6 +1223,7 @@ var linux386Tests = []*asmTest{
return 9*n + 14*n return 9*n + 14*n
}`, }`,
[]string{"\tIMULL\t[$]23"}, // 23*n []string{"\tIMULL\t[$]23"}, // 23*n
[]string{},
}, },
{ {
` `
...@@ -1075,6 +1231,7 @@ var linux386Tests = []*asmTest{ ...@@ -1075,6 +1231,7 @@ var linux386Tests = []*asmTest{
return 19*a + a*n return 19*a + a*n
}`, }`,
[]string{"\tADDL\t[$]19", "\tIMULL"}, // (n+19)*a []string{"\tADDL\t[$]19", "\tIMULL"}, // (n+19)*a
[]string{},
}, },
{ {
// check that stack store is optimized away // check that stack store is optimized away
...@@ -1085,6 +1242,7 @@ var linux386Tests = []*asmTest{ ...@@ -1085,6 +1242,7 @@ var linux386Tests = []*asmTest{
} }
`, `,
[]string{"TEXT\t.*, [$]0-4"}, []string{"TEXT\t.*, [$]0-4"},
[]string{},
}, },
} }
...@@ -1096,6 +1254,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1096,6 +1254,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tMOVWBR\t\\(.*\\),"}, []string{"\tMOVWBR\t\\(.*\\),"},
[]string{},
}, },
{ {
` `
...@@ -1104,6 +1263,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1104,6 +1263,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tMOVWBR\t\\(.*\\)\\(.*\\*1\\),"}, []string{"\tMOVWBR\t\\(.*\\)\\(.*\\*1\\),"},
[]string{},
}, },
{ {
` `
...@@ -1112,6 +1272,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1112,6 +1272,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tMOVDBR\t\\(.*\\),"}, []string{"\tMOVDBR\t\\(.*\\),"},
[]string{},
}, },
{ {
` `
...@@ -1120,6 +1281,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1120,6 +1281,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tMOVDBR\t\\(.*\\)\\(.*\\*1\\),"}, []string{"\tMOVDBR\t\\(.*\\)\\(.*\\*1\\),"},
[]string{},
}, },
{ {
` `
...@@ -1128,6 +1290,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1128,6 +1290,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tMOVWZ\t\\(.*\\),"}, []string{"\tMOVWZ\t\\(.*\\),"},
[]string{},
}, },
{ {
` `
...@@ -1136,6 +1299,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1136,6 +1299,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tMOVWZ\t\\(.*\\)\\(.*\\*1\\),"}, []string{"\tMOVWZ\t\\(.*\\)\\(.*\\*1\\),"},
[]string{},
}, },
{ {
` `
...@@ -1144,6 +1308,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1144,6 +1308,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tMOVD\t\\(.*\\),"}, []string{"\tMOVD\t\\(.*\\),"},
[]string{},
}, },
{ {
` `
...@@ -1152,6 +1317,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1152,6 +1317,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tMOVD\t\\(.*\\)\\(.*\\*1\\),"}, []string{"\tMOVD\t\\(.*\\)\\(.*\\*1\\),"},
[]string{},
}, },
{ {
` `
...@@ -1160,6 +1326,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1160,6 +1326,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tRLLG\t[$]7,"}, []string{"\tRLLG\t[$]7,"},
[]string{},
}, },
{ {
` `
...@@ -1168,6 +1335,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1168,6 +1335,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tRLLG\t[$]7,"}, []string{"\tRLLG\t[$]7,"},
[]string{},
}, },
{ {
` `
...@@ -1176,6 +1344,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1176,6 +1344,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tRLLG\t[$]7,"}, []string{"\tRLLG\t[$]7,"},
[]string{},
}, },
{ {
` `
...@@ -1184,6 +1353,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1184,6 +1353,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tRLL\t[$]7,"}, []string{"\tRLL\t[$]7,"},
[]string{},
}, },
{ {
` `
...@@ -1192,6 +1362,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1192,6 +1362,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tRLL\t[$]7,"}, []string{"\tRLL\t[$]7,"},
[]string{},
}, },
{ {
` `
...@@ -1200,6 +1371,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1200,6 +1371,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tRLL\t[$]7,"}, []string{"\tRLL\t[$]7,"},
[]string{},
}, },
// Fused multiply-add/sub instructions. // Fused multiply-add/sub instructions.
{ {
...@@ -1209,6 +1381,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1209,6 +1381,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tFMADD\t"}, []string{"\tFMADD\t"},
[]string{},
}, },
{ {
` `
...@@ -1217,6 +1390,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1217,6 +1390,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tFMSUB\t"}, []string{"\tFMSUB\t"},
[]string{},
}, },
{ {
` `
...@@ -1225,6 +1399,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1225,6 +1399,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tFMADDS\t"}, []string{"\tFMADDS\t"},
[]string{},
}, },
{ {
` `
...@@ -1233,6 +1408,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1233,6 +1408,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tFMSUBS\t"}, []string{"\tFMSUBS\t"},
[]string{},
}, },
// Intrinsic tests for math/bits // Intrinsic tests for math/bits
{ {
...@@ -1242,6 +1418,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1242,6 +1418,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tFLOGR\t"}, []string{"\tFLOGR\t"},
[]string{},
}, },
{ {
` `
...@@ -1250,6 +1427,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1250,6 +1427,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tFLOGR\t", "\tMOVWZ\t"}, []string{"\tFLOGR\t", "\tMOVWZ\t"},
[]string{},
}, },
{ {
` `
...@@ -1258,6 +1436,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1258,6 +1436,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tFLOGR\t", "\tOR\t\\$65536,"}, []string{"\tFLOGR\t", "\tOR\t\\$65536,"},
[]string{},
}, },
{ {
` `
...@@ -1266,6 +1445,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1266,6 +1445,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tFLOGR\t", "\tOR\t\\$256,"}, []string{"\tFLOGR\t", "\tOR\t\\$256,"},
[]string{},
}, },
// Intrinsic tests for math/bits // Intrinsic tests for math/bits
{ {
...@@ -1275,6 +1455,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1275,6 +1455,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tMOVDBR\t"}, []string{"\tMOVDBR\t"},
[]string{},
}, },
{ {
` `
...@@ -1283,6 +1464,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1283,6 +1464,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tMOVWBR\t"}, []string{"\tMOVWBR\t"},
[]string{},
}, },
{ {
` `
...@@ -1291,6 +1473,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1291,6 +1473,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tFLOGR\t"}, []string{"\tFLOGR\t"},
[]string{},
}, },
{ {
` `
...@@ -1299,6 +1482,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1299,6 +1482,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tFLOGR\t"}, []string{"\tFLOGR\t"},
[]string{},
}, },
{ {
` `
...@@ -1307,6 +1491,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1307,6 +1491,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tFLOGR\t"}, []string{"\tFLOGR\t"},
[]string{},
}, },
{ {
` `
...@@ -1315,6 +1500,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1315,6 +1500,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tFLOGR\t"}, []string{"\tFLOGR\t"},
[]string{},
}, },
{ {
` `
...@@ -1323,6 +1509,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1323,6 +1509,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tFLOGR\t"}, []string{"\tFLOGR\t"},
[]string{},
}, },
{ {
` `
...@@ -1331,6 +1518,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1331,6 +1518,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tFLOGR\t"}, []string{"\tFLOGR\t"},
[]string{},
}, },
{ {
` `
...@@ -1339,6 +1527,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1339,6 +1527,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tFLOGR\t"}, []string{"\tFLOGR\t"},
[]string{},
}, },
{ {
` `
...@@ -1347,6 +1536,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1347,6 +1536,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tFLOGR\t"}, []string{"\tFLOGR\t"},
[]string{},
}, },
{ {
` `
...@@ -1355,6 +1545,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1355,6 +1545,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tFLOGR\t"}, []string{"\tFLOGR\t"},
[]string{},
}, },
{ {
` `
...@@ -1363,6 +1554,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1363,6 +1554,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"\tFLOGR\t"}, []string{"\tFLOGR\t"},
[]string{},
}, },
{ {
// check that stack store is optimized away // check that stack store is optimized away
...@@ -1373,6 +1565,7 @@ var linuxS390XTests = []*asmTest{ ...@@ -1373,6 +1565,7 @@ var linuxS390XTests = []*asmTest{
} }
`, `,
[]string{"TEXT\t.*, [$]0-8"}, []string{"TEXT\t.*, [$]0-8"},
[]string{},
}, },
} }
...@@ -1384,6 +1577,7 @@ var linuxARMTests = []*asmTest{ ...@@ -1384,6 +1577,7 @@ var linuxARMTests = []*asmTest{
} }
`, `,
[]string{"\tMOVW\tR[0-9]+@>25,"}, []string{"\tMOVW\tR[0-9]+@>25,"},
[]string{},
}, },
{ {
` `
...@@ -1392,6 +1586,7 @@ var linuxARMTests = []*asmTest{ ...@@ -1392,6 +1586,7 @@ var linuxARMTests = []*asmTest{
} }
`, `,
[]string{"\tMOVW\tR[0-9]+@>25,"}, []string{"\tMOVW\tR[0-9]+@>25,"},
[]string{},
}, },
{ {
` `
...@@ -1400,6 +1595,7 @@ var linuxARMTests = []*asmTest{ ...@@ -1400,6 +1595,7 @@ var linuxARMTests = []*asmTest{
} }
`, `,
[]string{"\tMOVW\tR[0-9]+@>25,"}, []string{"\tMOVW\tR[0-9]+@>25,"},
[]string{},
}, },
{ {
` `
...@@ -1408,6 +1604,7 @@ var linuxARMTests = []*asmTest{ ...@@ -1408,6 +1604,7 @@ var linuxARMTests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
` `
...@@ -1416,6 +1613,7 @@ var linuxARMTests = []*asmTest{ ...@@ -1416,6 +1613,7 @@ var linuxARMTests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
` `
...@@ -1424,6 +1622,7 @@ var linuxARMTests = []*asmTest{ ...@@ -1424,6 +1622,7 @@ var linuxARMTests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
` `
...@@ -1432,6 +1631,7 @@ var linuxARMTests = []*asmTest{ ...@@ -1432,6 +1631,7 @@ var linuxARMTests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
` `
...@@ -1440,6 +1640,7 @@ var linuxARMTests = []*asmTest{ ...@@ -1440,6 +1640,7 @@ var linuxARMTests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
` `
...@@ -1448,6 +1649,7 @@ var linuxARMTests = []*asmTest{ ...@@ -1448,6 +1649,7 @@ var linuxARMTests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
` `
...@@ -1456,6 +1658,7 @@ var linuxARMTests = []*asmTest{ ...@@ -1456,6 +1658,7 @@ var linuxARMTests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
` `
...@@ -1464,6 +1667,7 @@ var linuxARMTests = []*asmTest{ ...@@ -1464,6 +1667,7 @@ var linuxARMTests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
` `
...@@ -1472,6 +1676,7 @@ var linuxARMTests = []*asmTest{ ...@@ -1472,6 +1676,7 @@ var linuxARMTests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
` `
...@@ -1480,6 +1685,7 @@ var linuxARMTests = []*asmTest{ ...@@ -1480,6 +1685,7 @@ var linuxARMTests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
// make sure assembly output has matching offset and base register. // make sure assembly output has matching offset and base register.
...@@ -1491,6 +1697,7 @@ var linuxARMTests = []*asmTest{ ...@@ -1491,6 +1697,7 @@ var linuxARMTests = []*asmTest{
} }
`, `,
[]string{"b\\+4\\(FP\\)"}, []string{"b\\+4\\(FP\\)"},
[]string{},
}, },
{ {
// check that stack store is optimized away // check that stack store is optimized away
...@@ -1501,6 +1708,7 @@ var linuxARMTests = []*asmTest{ ...@@ -1501,6 +1708,7 @@ var linuxARMTests = []*asmTest{
} }
`, `,
[]string{"TEXT\t.*, [$]-4-4"}, []string{"TEXT\t.*, [$]-4-4"},
[]string{},
}, },
} }
...@@ -1512,6 +1720,7 @@ var linuxARM64Tests = []*asmTest{ ...@@ -1512,6 +1720,7 @@ var linuxARM64Tests = []*asmTest{
} }
`, `,
[]string{"\tROR\t[$]57,"}, []string{"\tROR\t[$]57,"},
[]string{},
}, },
{ {
` `
...@@ -1520,6 +1729,7 @@ var linuxARM64Tests = []*asmTest{ ...@@ -1520,6 +1729,7 @@ var linuxARM64Tests = []*asmTest{
} }
`, `,
[]string{"\tROR\t[$]57,"}, []string{"\tROR\t[$]57,"},
[]string{},
}, },
{ {
` `
...@@ -1528,6 +1738,7 @@ var linuxARM64Tests = []*asmTest{ ...@@ -1528,6 +1738,7 @@ var linuxARM64Tests = []*asmTest{
} }
`, `,
[]string{"\tROR\t[$]57,"}, []string{"\tROR\t[$]57,"},
[]string{},
}, },
{ {
` `
...@@ -1536,6 +1747,7 @@ var linuxARM64Tests = []*asmTest{ ...@@ -1536,6 +1747,7 @@ var linuxARM64Tests = []*asmTest{
} }
`, `,
[]string{"\tRORW\t[$]25,"}, []string{"\tRORW\t[$]25,"},
[]string{},
}, },
{ {
` `
...@@ -1544,6 +1756,7 @@ var linuxARM64Tests = []*asmTest{ ...@@ -1544,6 +1756,7 @@ var linuxARM64Tests = []*asmTest{
} }
`, `,
[]string{"\tRORW\t[$]25,"}, []string{"\tRORW\t[$]25,"},
[]string{},
}, },
{ {
` `
...@@ -1552,6 +1765,7 @@ var linuxARM64Tests = []*asmTest{ ...@@ -1552,6 +1765,7 @@ var linuxARM64Tests = []*asmTest{
} }
`, `,
[]string{"\tRORW\t[$]25,"}, []string{"\tRORW\t[$]25,"},
[]string{},
}, },
{ {
` `
...@@ -1560,6 +1774,7 @@ var linuxARM64Tests = []*asmTest{ ...@@ -1560,6 +1774,7 @@ var linuxARM64Tests = []*asmTest{
} }
`, `,
[]string{"\tREV\t"}, []string{"\tREV\t"},
[]string{},
}, },
{ {
` `
...@@ -1568,6 +1783,7 @@ var linuxARM64Tests = []*asmTest{ ...@@ -1568,6 +1783,7 @@ var linuxARM64Tests = []*asmTest{
} }
`, `,
[]string{"\tREVW\t"}, []string{"\tREVW\t"},
[]string{},
}, },
{ {
` `
...@@ -1576,6 +1792,7 @@ var linuxARM64Tests = []*asmTest{ ...@@ -1576,6 +1792,7 @@ var linuxARM64Tests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
` `
...@@ -1584,6 +1801,7 @@ var linuxARM64Tests = []*asmTest{ ...@@ -1584,6 +1801,7 @@ var linuxARM64Tests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
` `
...@@ -1592,6 +1810,7 @@ var linuxARM64Tests = []*asmTest{ ...@@ -1592,6 +1810,7 @@ var linuxARM64Tests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
` `
...@@ -1600,6 +1819,7 @@ var linuxARM64Tests = []*asmTest{ ...@@ -1600,6 +1819,7 @@ var linuxARM64Tests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
` `
...@@ -1608,6 +1828,7 @@ var linuxARM64Tests = []*asmTest{ ...@@ -1608,6 +1828,7 @@ var linuxARM64Tests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
` `
...@@ -1616,6 +1837,7 @@ var linuxARM64Tests = []*asmTest{ ...@@ -1616,6 +1837,7 @@ var linuxARM64Tests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
` `
...@@ -1624,6 +1846,7 @@ var linuxARM64Tests = []*asmTest{ ...@@ -1624,6 +1846,7 @@ var linuxARM64Tests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
` `
...@@ -1632,6 +1855,7 @@ var linuxARM64Tests = []*asmTest{ ...@@ -1632,6 +1855,7 @@ var linuxARM64Tests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
` `
...@@ -1640,6 +1864,7 @@ var linuxARM64Tests = []*asmTest{ ...@@ -1640,6 +1864,7 @@ var linuxARM64Tests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
` `
...@@ -1648,6 +1873,7 @@ var linuxARM64Tests = []*asmTest{ ...@@ -1648,6 +1873,7 @@ var linuxARM64Tests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
` `
...@@ -1656,6 +1882,7 @@ var linuxARM64Tests = []*asmTest{ ...@@ -1656,6 +1882,7 @@ var linuxARM64Tests = []*asmTest{
} }
`, `,
[]string{"\tAND\t"}, []string{"\tAND\t"},
[]string{},
}, },
{ {
` `
...@@ -1664,6 +1891,7 @@ var linuxARM64Tests = []*asmTest{ ...@@ -1664,6 +1891,7 @@ var linuxARM64Tests = []*asmTest{
} }
`, `,
[]string{"\tAND\t"}, []string{"\tAND\t"},
[]string{},
}, },
{ {
// make sure offsets are folded into load and store. // make sure offsets are folded into load and store.
...@@ -1674,6 +1902,7 @@ var linuxARM64Tests = []*asmTest{ ...@@ -1674,6 +1902,7 @@ var linuxARM64Tests = []*asmTest{
} }
`, `,
[]string{"\tMOVD\t\"\"\\.a\\+[0-9]+\\(FP\\), R[0-9]+", "\tMOVD\tR[0-9]+, \"\"\\.b\\+[0-9]+\\(FP\\)"}, []string{"\tMOVD\t\"\"\\.a\\+[0-9]+\\(FP\\), R[0-9]+", "\tMOVD\tR[0-9]+, \"\"\\.b\\+[0-9]+\\(FP\\)"},
[]string{},
}, },
{ {
// check that stack store is optimized away // check that stack store is optimized away
...@@ -1684,6 +1913,7 @@ var linuxARM64Tests = []*asmTest{ ...@@ -1684,6 +1913,7 @@ var linuxARM64Tests = []*asmTest{
} }
`, `,
[]string{"TEXT\t.*, [$]-8-8"}, []string{"TEXT\t.*, [$]-8-8"},
[]string{},
}, },
} }
...@@ -1695,6 +1925,7 @@ var linuxMIPSTests = []*asmTest{ ...@@ -1695,6 +1925,7 @@ var linuxMIPSTests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
` `
...@@ -1703,6 +1934,7 @@ var linuxMIPSTests = []*asmTest{ ...@@ -1703,6 +1934,7 @@ var linuxMIPSTests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
` `
...@@ -1711,6 +1943,7 @@ var linuxMIPSTests = []*asmTest{ ...@@ -1711,6 +1943,7 @@ var linuxMIPSTests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
` `
...@@ -1719,6 +1952,7 @@ var linuxMIPSTests = []*asmTest{ ...@@ -1719,6 +1952,7 @@ var linuxMIPSTests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
` `
...@@ -1727,6 +1961,7 @@ var linuxMIPSTests = []*asmTest{ ...@@ -1727,6 +1961,7 @@ var linuxMIPSTests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
` `
...@@ -1735,6 +1970,7 @@ var linuxMIPSTests = []*asmTest{ ...@@ -1735,6 +1970,7 @@ var linuxMIPSTests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
` `
...@@ -1743,6 +1979,7 @@ var linuxMIPSTests = []*asmTest{ ...@@ -1743,6 +1979,7 @@ var linuxMIPSTests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
` `
...@@ -1751,6 +1988,7 @@ var linuxMIPSTests = []*asmTest{ ...@@ -1751,6 +1988,7 @@ var linuxMIPSTests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
` `
...@@ -1759,6 +1997,7 @@ var linuxMIPSTests = []*asmTest{ ...@@ -1759,6 +1997,7 @@ var linuxMIPSTests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
` `
...@@ -1767,6 +2006,7 @@ var linuxMIPSTests = []*asmTest{ ...@@ -1767,6 +2006,7 @@ var linuxMIPSTests = []*asmTest{
} }
`, `,
[]string{"\tCLZ\t"}, []string{"\tCLZ\t"},
[]string{},
}, },
{ {
// check that stack store is optimized away // check that stack store is optimized away
...@@ -1777,6 +2017,7 @@ var linuxMIPSTests = []*asmTest{ ...@@ -1777,6 +2017,7 @@ var linuxMIPSTests = []*asmTest{
} }
`, `,
[]string{"TEXT\t.*, [$]-4-4"}, []string{"TEXT\t.*, [$]-4-4"},
[]string{},
}, },
} }
...@@ -1789,6 +2030,7 @@ var linuxPPC64LETests = []*asmTest{ ...@@ -1789,6 +2030,7 @@ var linuxPPC64LETests = []*asmTest{
} }
`, `,
[]string{"\tFMADD\t"}, []string{"\tFMADD\t"},
[]string{},
}, },
{ {
` `
...@@ -1797,6 +2039,7 @@ var linuxPPC64LETests = []*asmTest{ ...@@ -1797,6 +2039,7 @@ var linuxPPC64LETests = []*asmTest{
} }
`, `,
[]string{"\tFMSUB\t"}, []string{"\tFMSUB\t"},
[]string{},
}, },
{ {
` `
...@@ -1805,6 +2048,7 @@ var linuxPPC64LETests = []*asmTest{ ...@@ -1805,6 +2048,7 @@ var linuxPPC64LETests = []*asmTest{
} }
`, `,
[]string{"\tFMADDS\t"}, []string{"\tFMADDS\t"},
[]string{},
}, },
{ {
` `
...@@ -1813,6 +2057,7 @@ var linuxPPC64LETests = []*asmTest{ ...@@ -1813,6 +2057,7 @@ var linuxPPC64LETests = []*asmTest{
} }
`, `,
[]string{"\tFMSUBS\t"}, []string{"\tFMSUBS\t"},
[]string{},
}, },
{ {
` `
...@@ -1821,6 +2066,7 @@ var linuxPPC64LETests = []*asmTest{ ...@@ -1821,6 +2066,7 @@ var linuxPPC64LETests = []*asmTest{
} }
`, `,
[]string{"\tROTLW\t"}, []string{"\tROTLW\t"},
[]string{},
}, },
{ {
` `
...@@ -1829,6 +2075,7 @@ var linuxPPC64LETests = []*asmTest{ ...@@ -1829,6 +2075,7 @@ var linuxPPC64LETests = []*asmTest{
} }
`, `,
[]string{"\tROTLW\t"}, []string{"\tROTLW\t"},
[]string{},
}, },
{ {
` `
...@@ -1837,6 +2084,7 @@ var linuxPPC64LETests = []*asmTest{ ...@@ -1837,6 +2084,7 @@ var linuxPPC64LETests = []*asmTest{
} }
`, `,
[]string{"\tROTLW\t"}, []string{"\tROTLW\t"},
[]string{},
}, },
{ {
` `
...@@ -1845,6 +2093,7 @@ var linuxPPC64LETests = []*asmTest{ ...@@ -1845,6 +2093,7 @@ var linuxPPC64LETests = []*asmTest{
} }
`, `,
[]string{"\tROTL\t"}, []string{"\tROTL\t"},
[]string{},
}, },
{ {
` `
...@@ -1853,6 +2102,7 @@ var linuxPPC64LETests = []*asmTest{ ...@@ -1853,6 +2102,7 @@ var linuxPPC64LETests = []*asmTest{
} }
`, `,
[]string{"\tROTL\t"}, []string{"\tROTL\t"},
[]string{},
}, },
{ {
` `
...@@ -1861,6 +2111,7 @@ var linuxPPC64LETests = []*asmTest{ ...@@ -1861,6 +2111,7 @@ var linuxPPC64LETests = []*asmTest{
} }
`, `,
[]string{"\tROTL\t"}, []string{"\tROTL\t"},
[]string{},
}, },
{ {
// check that stack store is optimized away // check that stack store is optimized away
...@@ -1871,6 +2122,7 @@ var linuxPPC64LETests = []*asmTest{ ...@@ -1871,6 +2122,7 @@ var linuxPPC64LETests = []*asmTest{
} }
`, `,
[]string{"TEXT\t.*, [$]0-8"}, []string{"TEXT\t.*, [$]0-8"},
[]string{},
}, },
} }
......
...@@ -2327,6 +2327,58 @@ ...@@ -2327,6 +2327,58 @@
&& clobber(x) && clobber(x)
-> (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w0 mem) -> (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w0 mem)
(MOVBstore [i] {s} p
x1:(MOVBload [j] {s2} p2 mem)
mem2:(MOVBstore [i-1] {s} p
x2:(MOVBload [j-1] {s2} p2 mem) mem))
&& x1.Uses == 1
&& x2.Uses == 1
&& mem2.Uses == 1
&& clobber(x1)
&& clobber(x2)
&& clobber(mem2)
-> (MOVWstore [i-1] {s} p (MOVWload [j-1] {s2} p2 mem) mem)
(MOVWstore [i] {s} p
x1:(MOVWload [j] {s2} p2 mem)
mem2:(MOVWstore [i-2] {s} p
x2:(MOVWload [j-2] {s2} p2 mem) mem))
&& x1.Uses == 1
&& x2.Uses == 1
&& mem2.Uses == 1
&& clobber(x1)
&& clobber(x2)
&& clobber(mem2)
-> (MOVLstore [i-2] {s} p (MOVLload [j-2] {s2} p2 mem) mem)
(MOVLstore [i] {s} p
x1:(MOVLload [j] {s2} p2 mem)
mem2:(MOVLstore [i-4] {s} p
x2:(MOVLload [j-4] {s2} p2 mem) mem))
&& x1.Uses == 1
&& x2.Uses == 1
&& mem2.Uses == 1
&& clobber(x1)
&& clobber(x2)
&& clobber(mem2)
-> (MOVQstore [i-4] {s} p (MOVQload [j-4] {s2} p2 mem) mem)
// This is somewhat tricky. There may be pointers in SSE registers due to rule below.
// However those register shouldn't live across GC safepoint.
(MOVQstore [i] {s} p
x1:(MOVQload [j] {s2} p2 mem)
mem2:(MOVQstore [i-8] {s} p
x2:(MOVQload [j-8] {s2} p2 mem) mem))
&& x1.Uses == 1
&& x2.Uses == 1
&& mem2.Uses == 1
&& config.useSSE
&& clobber(x1)
&& clobber(x2)
&& clobber(mem2)
-> (MOVOstore [i-8] {s} p (MOVOload [j-8] {s2} p2 mem) mem)
// amd64p32 rules // amd64p32 rules
// same as the rules above, but with 32 instead of 64 bit pointer arithmetic. // same as the rules above, but with 32 instead of 64 bit pointer arithmetic.
// LEAQ,ADDQ -> LEAL,ADDL // LEAQ,ADDQ -> LEAL,ADDL
......
...@@ -154,7 +154,7 @@ func rewriteValueAMD64(v *Value) bool { ...@@ -154,7 +154,7 @@ func rewriteValueAMD64(v *Value) bool {
case OpAMD64MOVQloadidx8: case OpAMD64MOVQloadidx8:
return rewriteValueAMD64_OpAMD64MOVQloadidx8_0(v) return rewriteValueAMD64_OpAMD64MOVQloadidx8_0(v)
case OpAMD64MOVQstore: case OpAMD64MOVQstore:
return rewriteValueAMD64_OpAMD64MOVQstore_0(v) return rewriteValueAMD64_OpAMD64MOVQstore_0(v) || rewriteValueAMD64_OpAMD64MOVQstore_10(v)
case OpAMD64MOVQstoreconst: case OpAMD64MOVQstoreconst:
return rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v) return rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v)
case OpAMD64MOVQstoreconstidx1: case OpAMD64MOVQstoreconstidx1:
...@@ -5690,6 +5690,10 @@ func rewriteValueAMD64_OpAMD64MOVBstore_0(v *Value) bool { ...@@ -5690,6 +5690,10 @@ func rewriteValueAMD64_OpAMD64MOVBstore_0(v *Value) bool {
return false return false
} }
func rewriteValueAMD64_OpAMD64MOVBstore_10(v *Value) bool { func rewriteValueAMD64_OpAMD64MOVBstore_10(v *Value) bool {
b := v.Block
_ = b
typ := &b.Func.Config.Types
_ = typ
// match: (MOVBstore [i] {s} p (SHRQconst [8] w) x:(MOVBstore [i-1] {s} p w mem)) // match: (MOVBstore [i] {s} p (SHRQconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
// cond: x.Uses == 1 && clobber(x) // cond: x.Uses == 1 && clobber(x)
// result: (MOVWstore [i-1] {s} p w mem) // result: (MOVWstore [i-1] {s} p w mem)
...@@ -5785,6 +5789,73 @@ func rewriteValueAMD64_OpAMD64MOVBstore_10(v *Value) bool { ...@@ -5785,6 +5789,73 @@ func rewriteValueAMD64_OpAMD64MOVBstore_10(v *Value) bool {
v.AddArg(mem) v.AddArg(mem)
return true return true
} }
// match: (MOVBstore [i] {s} p x1:(MOVBload [j] {s2} p2 mem) mem2:(MOVBstore [i-1] {s} p x2:(MOVBload [j-1] {s2} p2 mem) mem))
// cond: x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1) && clobber(x2) && clobber(mem2)
// result: (MOVWstore [i-1] {s} p (MOVWload [j-1] {s2} p2 mem) mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[2]
p := v.Args[0]
x1 := v.Args[1]
if x1.Op != OpAMD64MOVBload {
break
}
j := x1.AuxInt
s2 := x1.Aux
_ = x1.Args[1]
p2 := x1.Args[0]
mem := x1.Args[1]
mem2 := v.Args[2]
if mem2.Op != OpAMD64MOVBstore {
break
}
if mem2.AuxInt != i-1 {
break
}
if mem2.Aux != s {
break
}
_ = mem2.Args[2]
if p != mem2.Args[0] {
break
}
x2 := mem2.Args[1]
if x2.Op != OpAMD64MOVBload {
break
}
if x2.AuxInt != j-1 {
break
}
if x2.Aux != s2 {
break
}
_ = x2.Args[1]
if p2 != x2.Args[0] {
break
}
if mem != x2.Args[1] {
break
}
if mem != mem2.Args[2] {
break
}
if !(x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1) && clobber(x2) && clobber(mem2)) {
break
}
v.reset(OpAMD64MOVWstore)
v.AuxInt = i - 1
v.Aux = s
v.AddArg(p)
v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16)
v0.AuxInt = j - 1
v0.Aux = s2
v0.AddArg(p2)
v0.AddArg(mem)
v.AddArg(v0)
v.AddArg(mem)
return true
}
// match: (MOVBstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) // match: (MOVBstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
// cond: canMergeSym(sym1, sym2) // cond: canMergeSym(sym1, sym2)
// result: (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) // result: (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
...@@ -7810,6 +7881,77 @@ func rewriteValueAMD64_OpAMD64MOVLstore_0(v *Value) bool { ...@@ -7810,6 +7881,77 @@ func rewriteValueAMD64_OpAMD64MOVLstore_0(v *Value) bool {
return false return false
} }
func rewriteValueAMD64_OpAMD64MOVLstore_10(v *Value) bool { func rewriteValueAMD64_OpAMD64MOVLstore_10(v *Value) bool {
b := v.Block
_ = b
typ := &b.Func.Config.Types
_ = typ
// match: (MOVLstore [i] {s} p x1:(MOVLload [j] {s2} p2 mem) mem2:(MOVLstore [i-4] {s} p x2:(MOVLload [j-4] {s2} p2 mem) mem))
// cond: x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1) && clobber(x2) && clobber(mem2)
// result: (MOVQstore [i-4] {s} p (MOVQload [j-4] {s2} p2 mem) mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[2]
p := v.Args[0]
x1 := v.Args[1]
if x1.Op != OpAMD64MOVLload {
break
}
j := x1.AuxInt
s2 := x1.Aux
_ = x1.Args[1]
p2 := x1.Args[0]
mem := x1.Args[1]
mem2 := v.Args[2]
if mem2.Op != OpAMD64MOVLstore {
break
}
if mem2.AuxInt != i-4 {
break
}
if mem2.Aux != s {
break
}
_ = mem2.Args[2]
if p != mem2.Args[0] {
break
}
x2 := mem2.Args[1]
if x2.Op != OpAMD64MOVLload {
break
}
if x2.AuxInt != j-4 {
break
}
if x2.Aux != s2 {
break
}
_ = x2.Args[1]
if p2 != x2.Args[0] {
break
}
if mem != x2.Args[1] {
break
}
if mem != mem2.Args[2] {
break
}
if !(x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1) && clobber(x2) && clobber(mem2)) {
break
}
v.reset(OpAMD64MOVQstore)
v.AuxInt = i - 4
v.Aux = s
v.AddArg(p)
v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
v0.AuxInt = j - 4
v0.Aux = s2
v0.AddArg(p2)
v0.AddArg(mem)
v.AddArg(v0)
v.AddArg(mem)
return true
}
// match: (MOVLstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) // match: (MOVLstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
// cond: canMergeSym(sym1, sym2) // cond: canMergeSym(sym1, sym2)
// result: (MOVLstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) // result: (MOVLstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
...@@ -9345,6 +9487,10 @@ func rewriteValueAMD64_OpAMD64MOVQloadidx8_0(v *Value) bool { ...@@ -9345,6 +9487,10 @@ func rewriteValueAMD64_OpAMD64MOVQloadidx8_0(v *Value) bool {
return false return false
} }
func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool { func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool {
b := v.Block
_ = b
config := b.Func.Config
_ = config
// match: (MOVQstore [off1] {sym} (ADDQconst [off2] ptr) val mem) // match: (MOVQstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
// cond: is32Bit(off1+off2) // cond: is32Bit(off1+off2)
// result: (MOVQstore [off1+off2] {sym} ptr val mem) // result: (MOVQstore [off1+off2] {sym} ptr val mem)
...@@ -9510,6 +9656,73 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool { ...@@ -9510,6 +9656,73 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool {
v.AddArg(mem) v.AddArg(mem)
return true return true
} }
// match: (MOVQstore [i] {s} p x1:(MOVQload [j] {s2} p2 mem) mem2:(MOVQstore [i-8] {s} p x2:(MOVQload [j-8] {s2} p2 mem) mem))
// cond: x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && config.useSSE && clobber(x1) && clobber(x2) && clobber(mem2)
// result: (MOVOstore [i-8] {s} p (MOVOload [j-8] {s2} p2 mem) mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[2]
p := v.Args[0]
x1 := v.Args[1]
if x1.Op != OpAMD64MOVQload {
break
}
j := x1.AuxInt
s2 := x1.Aux
_ = x1.Args[1]
p2 := x1.Args[0]
mem := x1.Args[1]
mem2 := v.Args[2]
if mem2.Op != OpAMD64MOVQstore {
break
}
if mem2.AuxInt != i-8 {
break
}
if mem2.Aux != s {
break
}
_ = mem2.Args[2]
if p != mem2.Args[0] {
break
}
x2 := mem2.Args[1]
if x2.Op != OpAMD64MOVQload {
break
}
if x2.AuxInt != j-8 {
break
}
if x2.Aux != s2 {
break
}
_ = x2.Args[1]
if p2 != x2.Args[0] {
break
}
if mem != x2.Args[1] {
break
}
if mem != mem2.Args[2] {
break
}
if !(x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && config.useSSE && clobber(x1) && clobber(x2) && clobber(mem2)) {
break
}
v.reset(OpAMD64MOVOstore)
v.AuxInt = i - 8
v.Aux = s
v.AddArg(p)
v0 := b.NewValue0(v.Pos, OpAMD64MOVOload, types.TypeInt128)
v0.AuxInt = j - 8
v0.Aux = s2
v0.AddArg(p2)
v0.AddArg(mem)
v.AddArg(v0)
v.AddArg(mem)
return true
}
// match: (MOVQstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) // match: (MOVQstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
// cond: canMergeSym(sym1, sym2) // cond: canMergeSym(sym1, sym2)
// result: (MOVQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) // result: (MOVQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
...@@ -9602,6 +9815,9 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool { ...@@ -9602,6 +9815,9 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool {
v.AddArg(mem) v.AddArg(mem)
return true return true
} }
return false
}
func rewriteValueAMD64_OpAMD64MOVQstore_10(v *Value) bool {
// match: (MOVQstore [off] {sym} ptr (MOVQf2i val) mem) // match: (MOVQstore [off] {sym} ptr (MOVQf2i val) mem)
// cond: // cond:
// result: (MOVSDstore [off] {sym} ptr val mem) // result: (MOVSDstore [off] {sym} ptr val mem)
...@@ -12334,6 +12550,77 @@ func rewriteValueAMD64_OpAMD64MOVWstore_0(v *Value) bool { ...@@ -12334,6 +12550,77 @@ func rewriteValueAMD64_OpAMD64MOVWstore_0(v *Value) bool {
return false return false
} }
func rewriteValueAMD64_OpAMD64MOVWstore_10(v *Value) bool { func rewriteValueAMD64_OpAMD64MOVWstore_10(v *Value) bool {
b := v.Block
_ = b
typ := &b.Func.Config.Types
_ = typ
// match: (MOVWstore [i] {s} p x1:(MOVWload [j] {s2} p2 mem) mem2:(MOVWstore [i-2] {s} p x2:(MOVWload [j-2] {s2} p2 mem) mem))
// cond: x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1) && clobber(x2) && clobber(mem2)
// result: (MOVLstore [i-2] {s} p (MOVLload [j-2] {s2} p2 mem) mem)
for {
i := v.AuxInt
s := v.Aux
_ = v.Args[2]
p := v.Args[0]
x1 := v.Args[1]
if x1.Op != OpAMD64MOVWload {
break
}
j := x1.AuxInt
s2 := x1.Aux
_ = x1.Args[1]
p2 := x1.Args[0]
mem := x1.Args[1]
mem2 := v.Args[2]
if mem2.Op != OpAMD64MOVWstore {
break
}
if mem2.AuxInt != i-2 {
break
}
if mem2.Aux != s {
break
}
_ = mem2.Args[2]
if p != mem2.Args[0] {
break
}
x2 := mem2.Args[1]
if x2.Op != OpAMD64MOVWload {
break
}
if x2.AuxInt != j-2 {
break
}
if x2.Aux != s2 {
break
}
_ = x2.Args[1]
if p2 != x2.Args[0] {
break
}
if mem != x2.Args[1] {
break
}
if mem != mem2.Args[2] {
break
}
if !(x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1) && clobber(x2) && clobber(mem2)) {
break
}
v.reset(OpAMD64MOVLstore)
v.AuxInt = i - 2
v.Aux = s
v.AddArg(p)
v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
v0.AuxInt = j - 2
v0.Aux = s2
v0.AddArg(p2)
v0.AddArg(mem)
v.AddArg(v0)
v.AddArg(mem)
return true
}
// match: (MOVWstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) // match: (MOVWstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
// cond: canMergeSym(sym1, sym2) // cond: canMergeSym(sym1, sym2)
// result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment