Commit 26dadbe3 authored by isharipo's avatar isharipo Committed by Ilya Tocar

cmd/asm: add amd64 PAB{SB,SD,SW}, PMADDUBSW, PMULHRSW, PSIG{NB,ND,NW}

instructions

1st change out of 3 to cover AMD64 SSSE3 instruction set in Go asm.
This commit adds instructions that do not require new named ytab sets.

Change-Id: I0c3dfd8d39c3daa8b7683ab163c63145626d042e
Reviewed-on: https://go-review.googlesource.com/56834
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarIlya Tocar <ilya.tocar@intel.com>
Reviewed-by: default avatarRuss Cox <rsc@golang.org>
parent 16edf0b1
...@@ -3083,14 +3083,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 ...@@ -3083,14 +3083,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
//TODO: PABSB (R11), M3 // 410f381c1b //TODO: PABSB (R11), M3 // 410f381c1b
//TODO: PABSB M2, M3 // 0f381cda //TODO: PABSB M2, M3 // 0f381cda
//TODO: PABSB M3, M3 // 0f381cdb //TODO: PABSB M3, M3 // 0f381cdb
//TODO: PABSB (BX), X2 // 660f381c13 PABSB (BX), X2 // 660f381c13
//TODO: PABSB (R11), X2 // 66410f381c13 PABSB (R11), X2 // 66410f381c13
//TODO: PABSB X2, X2 // 660f381cd2 PABSB X2, X2 // 660f381cd2
//TODO: PABSB X11, X2 // 66410f381cd3 PABSB X11, X2 // 66410f381cd3
//TODO: PABSB (BX), X11 // 66440f381c1b PABSB (BX), X11 // 66440f381c1b
//TODO: PABSB (R11), X11 // 66450f381c1b PABSB (R11), X11 // 66450f381c1b
//TODO: PABSB X2, X11 // 66440f381cda PABSB X2, X11 // 66440f381cda
//TODO: PABSB X11, X11 // 66450f381cdb PABSB X11, X11 // 66450f381cdb
//TODO: PABSD (BX), M2 // 0f381e13 //TODO: PABSD (BX), M2 // 0f381e13
//TODO: PABSD (R11), M2 // 410f381e13 //TODO: PABSD (R11), M2 // 410f381e13
//TODO: PABSD M2, M2 // 0f381ed2 //TODO: PABSD M2, M2 // 0f381ed2
...@@ -3099,14 +3099,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 ...@@ -3099,14 +3099,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
//TODO: PABSD (R11), M3 // 410f381e1b //TODO: PABSD (R11), M3 // 410f381e1b
//TODO: PABSD M2, M3 // 0f381eda //TODO: PABSD M2, M3 // 0f381eda
//TODO: PABSD M3, M3 // 0f381edb //TODO: PABSD M3, M3 // 0f381edb
//TODO: PABSD (BX), X2 // 660f381e13 PABSD (BX), X2 // 660f381e13
//TODO: PABSD (R11), X2 // 66410f381e13 PABSD (R11), X2 // 66410f381e13
//TODO: PABSD X2, X2 // 660f381ed2 PABSD X2, X2 // 660f381ed2
//TODO: PABSD X11, X2 // 66410f381ed3 PABSD X11, X2 // 66410f381ed3
//TODO: PABSD (BX), X11 // 66440f381e1b PABSD (BX), X11 // 66440f381e1b
//TODO: PABSD (R11), X11 // 66450f381e1b PABSD (R11), X11 // 66450f381e1b
//TODO: PABSD X2, X11 // 66440f381eda PABSD X2, X11 // 66440f381eda
//TODO: PABSD X11, X11 // 66450f381edb PABSD X11, X11 // 66450f381edb
//TODO: PABSW (BX), M2 // 0f381d13 //TODO: PABSW (BX), M2 // 0f381d13
//TODO: PABSW (R11), M2 // 410f381d13 //TODO: PABSW (R11), M2 // 410f381d13
//TODO: PABSW M2, M2 // 0f381dd2 //TODO: PABSW M2, M2 // 0f381dd2
...@@ -3115,14 +3115,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 ...@@ -3115,14 +3115,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
//TODO: PABSW (R11), M3 // 410f381d1b //TODO: PABSW (R11), M3 // 410f381d1b
//TODO: PABSW M2, M3 // 0f381dda //TODO: PABSW M2, M3 // 0f381dda
//TODO: PABSW M3, M3 // 0f381ddb //TODO: PABSW M3, M3 // 0f381ddb
//TODO: PABSW (BX), X2 // 660f381d13 PABSW (BX), X2 // 660f381d13
//TODO: PABSW (R11), X2 // 66410f381d13 PABSW (R11), X2 // 66410f381d13
//TODO: PABSW X2, X2 // 660f381dd2 PABSW X2, X2 // 660f381dd2
//TODO: PABSW X11, X2 // 66410f381dd3 PABSW X11, X2 // 66410f381dd3
//TODO: PABSW (BX), X11 // 66440f381d1b PABSW (BX), X11 // 66440f381d1b
//TODO: PABSW (R11), X11 // 66450f381d1b PABSW (R11), X11 // 66450f381d1b
//TODO: PABSW X2, X11 // 66440f381dda PABSW X2, X11 // 66440f381dda
//TODO: PABSW X11, X11 // 66450f381ddb PABSW X11, X11 // 66450f381ddb
PACKSSLW (BX), M2 // 0f6b13 PACKSSLW (BX), M2 // 0f6b13
PACKSSLW (R11), M2 // 410f6b13 PACKSSLW (R11), M2 // 410f6b13
PACKSSLW M2, M2 // 0f6bd2 PACKSSLW M2, M2 // 0f6bd2
...@@ -3775,14 +3775,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 ...@@ -3775,14 +3775,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
//TODO: PMADDUBSW (R11), M3 // 410f38041b //TODO: PMADDUBSW (R11), M3 // 410f38041b
//TODO: PMADDUBSW M2, M3 // 0f3804da //TODO: PMADDUBSW M2, M3 // 0f3804da
//TODO: PMADDUBSW M3, M3 // 0f3804db //TODO: PMADDUBSW M3, M3 // 0f3804db
//TODO: PMADDUBSW (BX), X2 // 660f380413 PMADDUBSW (BX), X2 // 660f380413
//TODO: PMADDUBSW (R11), X2 // 66410f380413 PMADDUBSW (R11), X2 // 66410f380413
//TODO: PMADDUBSW X2, X2 // 660f3804d2 PMADDUBSW X2, X2 // 660f3804d2
//TODO: PMADDUBSW X11, X2 // 66410f3804d3 PMADDUBSW X11, X2 // 66410f3804d3
//TODO: PMADDUBSW (BX), X11 // 66440f38041b PMADDUBSW (BX), X11 // 66440f38041b
//TODO: PMADDUBSW (R11), X11 // 66450f38041b PMADDUBSW (R11), X11 // 66450f38041b
//TODO: PMADDUBSW X2, X11 // 66440f3804da PMADDUBSW X2, X11 // 66440f3804da
//TODO: PMADDUBSW X11, X11 // 66450f3804db PMADDUBSW X11, X11 // 66450f3804db
PMADDWL (BX), M2 // 0ff513 PMADDWL (BX), M2 // 0ff513
PMADDWL (R11), M2 // 410ff513 PMADDWL (R11), M2 // 410ff513
PMADDWL M2, M2 // 0ff5d2 PMADDWL M2, M2 // 0ff5d2
...@@ -4047,14 +4047,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 ...@@ -4047,14 +4047,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
//TODO: PMULHRSW (R11), M3 // 410f380b1b //TODO: PMULHRSW (R11), M3 // 410f380b1b
//TODO: PMULHRSW M2, M3 // 0f380bda //TODO: PMULHRSW M2, M3 // 0f380bda
//TODO: PMULHRSW M3, M3 // 0f380bdb //TODO: PMULHRSW M3, M3 // 0f380bdb
//TODO: PMULHRSW (BX), X2 // 660f380b13 PMULHRSW (BX), X2 // 660f380b13
//TODO: PMULHRSW (R11), X2 // 66410f380b13 PMULHRSW (R11), X2 // 66410f380b13
//TODO: PMULHRSW X2, X2 // 660f380bd2 PMULHRSW X2, X2 // 660f380bd2
//TODO: PMULHRSW X11, X2 // 66410f380bd3 PMULHRSW X11, X2 // 66410f380bd3
//TODO: PMULHRSW (BX), X11 // 66440f380b1b PMULHRSW (BX), X11 // 66440f380b1b
//TODO: PMULHRSW (R11), X11 // 66450f380b1b PMULHRSW (R11), X11 // 66450f380b1b
//TODO: PMULHRSW X2, X11 // 66440f380bda PMULHRSW X2, X11 // 66440f380bda
//TODO: PMULHRSW X11, X11 // 66450f380bdb PMULHRSW X11, X11 // 66450f380bdb
PMULHUW (BX), M2 // 0fe413 PMULHUW (BX), M2 // 0fe413
PMULHUW (R11), M2 // 410fe413 PMULHUW (R11), M2 // 410fe413
PMULHUW M2, M2 // 0fe4d2 PMULHUW M2, M2 // 0fe4d2
...@@ -4283,14 +4283,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 ...@@ -4283,14 +4283,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
//TODO: PSIGNB (R11), M3 // 410f38081b //TODO: PSIGNB (R11), M3 // 410f38081b
//TODO: PSIGNB M2, M3 // 0f3808da //TODO: PSIGNB M2, M3 // 0f3808da
//TODO: PSIGNB M3, M3 // 0f3808db //TODO: PSIGNB M3, M3 // 0f3808db
//TODO: PSIGNB (BX), X2 // 660f380813 PSIGNB (BX), X2 // 660f380813
//TODO: PSIGNB (R11), X2 // 66410f380813 PSIGNB (R11), X2 // 66410f380813
//TODO: PSIGNB X2, X2 // 660f3808d2 PSIGNB X2, X2 // 660f3808d2
//TODO: PSIGNB X11, X2 // 66410f3808d3 PSIGNB X11, X2 // 66410f3808d3
//TODO: PSIGNB (BX), X11 // 66440f38081b PSIGNB (BX), X11 // 66440f38081b
//TODO: PSIGNB (R11), X11 // 66450f38081b PSIGNB (R11), X11 // 66450f38081b
//TODO: PSIGNB X2, X11 // 66440f3808da PSIGNB X2, X11 // 66440f3808da
//TODO: PSIGNB X11, X11 // 66450f3808db PSIGNB X11, X11 // 66450f3808db
//TODO: PSIGND (BX), M2 // 0f380a13 //TODO: PSIGND (BX), M2 // 0f380a13
//TODO: PSIGND (R11), M2 // 410f380a13 //TODO: PSIGND (R11), M2 // 410f380a13
//TODO: PSIGND M2, M2 // 0f380ad2 //TODO: PSIGND M2, M2 // 0f380ad2
...@@ -4299,14 +4299,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 ...@@ -4299,14 +4299,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
//TODO: PSIGND (R11), M3 // 410f380a1b //TODO: PSIGND (R11), M3 // 410f380a1b
//TODO: PSIGND M2, M3 // 0f380ada //TODO: PSIGND M2, M3 // 0f380ada
//TODO: PSIGND M3, M3 // 0f380adb //TODO: PSIGND M3, M3 // 0f380adb
//TODO: PSIGND (BX), X2 // 660f380a13 PSIGND (BX), X2 // 660f380a13
//TODO: PSIGND (R11), X2 // 66410f380a13 PSIGND (R11), X2 // 66410f380a13
//TODO: PSIGND X2, X2 // 660f380ad2 PSIGND X2, X2 // 660f380ad2
//TODO: PSIGND X11, X2 // 66410f380ad3 PSIGND X11, X2 // 66410f380ad3
//TODO: PSIGND (BX), X11 // 66440f380a1b PSIGND (BX), X11 // 66440f380a1b
//TODO: PSIGND (R11), X11 // 66450f380a1b PSIGND (R11), X11 // 66450f380a1b
//TODO: PSIGND X2, X11 // 66440f380ada PSIGND X2, X11 // 66440f380ada
//TODO: PSIGND X11, X11 // 66450f380adb PSIGND X11, X11 // 66450f380adb
//TODO: PSIGNW (BX), M2 // 0f380913 //TODO: PSIGNW (BX), M2 // 0f380913
//TODO: PSIGNW (R11), M2 // 410f380913 //TODO: PSIGNW (R11), M2 // 410f380913
//TODO: PSIGNW M2, M2 // 0f3809d2 //TODO: PSIGNW M2, M2 // 0f3809d2
...@@ -4315,14 +4315,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 ...@@ -4315,14 +4315,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
//TODO: PSIGNW (R11), M3 // 410f38091b //TODO: PSIGNW (R11), M3 // 410f38091b
//TODO: PSIGNW M2, M3 // 0f3809da //TODO: PSIGNW M2, M3 // 0f3809da
//TODO: PSIGNW M3, M3 // 0f3809db //TODO: PSIGNW M3, M3 // 0f3809db
//TODO: PSIGNW (BX), X2 // 660f380913 PSIGNW (BX), X2 // 660f380913
//TODO: PSIGNW (R11), X2 // 66410f380913 PSIGNW (R11), X2 // 66410f380913
//TODO: PSIGNW X2, X2 // 660f3809d2 PSIGNW X2, X2 // 660f3809d2
//TODO: PSIGNW X11, X2 // 66410f3809d3 PSIGNW X11, X2 // 66410f3809d3
//TODO: PSIGNW (BX), X11 // 66440f38091b PSIGNW (BX), X11 // 66440f38091b
//TODO: PSIGNW (R11), X11 // 66450f38091b PSIGNW (R11), X11 // 66450f38091b
//TODO: PSIGNW X2, X11 // 66440f3809da PSIGNW X2, X11 // 66440f3809da
//TODO: PSIGNW X11, X11 // 66450f3809db PSIGNW X11, X11 // 66450f3809db
PSLLL (BX), M2 // 0ff213 PSLLL (BX), M2 // 0ff213
PSLLL (R11), M2 // 410ff213 PSLLL (R11), M2 // 410ff213
PSLLL M2, M2 // 0ff2d2 PSLLL M2, M2 // 0ff2d2
......
...@@ -188,6 +188,9 @@ const ( ...@@ -188,6 +188,9 @@ const (
AOUTSB AOUTSB
AOUTSL AOUTSL
AOUTSW AOUTSW
APABSB
APABSD
APABSW
APAUSE APAUSE
APOPAL APOPAL
APOPAW APOPAW
...@@ -652,6 +655,7 @@ const ( ...@@ -652,6 +655,7 @@ const (
APINSRD APINSRD
APINSRQ APINSRQ
APINSRW APINSRW
APMADDUBSW
APMADDWL APMADDWL
APMAXSW APMAXSW
APMAXUB APMAXUB
...@@ -671,6 +675,7 @@ const ( ...@@ -671,6 +675,7 @@ const (
APMOVZXWD APMOVZXWD
APMOVZXWQ APMOVZXWQ
APMULDQ APMULDQ
APMULHRSW
APMULHUW APMULHUW
APMULHW APMULHW
APMULLD APMULLD
...@@ -683,6 +688,9 @@ const ( ...@@ -683,6 +688,9 @@ const (
APSHUFL APSHUFL
APSHUFLW APSHUFLW
APSHUFW APSHUFW
APSIGNB
APSIGND
APSIGNW
APSLLL APSLLL
APSLLO APSLLO
APSLLQ APSLLQ
......
...@@ -150,6 +150,9 @@ var Anames = []string{ ...@@ -150,6 +150,9 @@ var Anames = []string{
"OUTSB", "OUTSB",
"OUTSL", "OUTSL",
"OUTSW", "OUTSW",
"PABSB",
"PABSD",
"PABSW",
"PAUSE", "PAUSE",
"POPAL", "POPAL",
"POPAW", "POPAW",
...@@ -595,6 +598,7 @@ var Anames = []string{ ...@@ -595,6 +598,7 @@ var Anames = []string{
"PINSRD", "PINSRD",
"PINSRQ", "PINSRQ",
"PINSRW", "PINSRW",
"PMADDUBSW",
"PMADDWL", "PMADDWL",
"PMAXSW", "PMAXSW",
"PMAXUB", "PMAXUB",
...@@ -614,6 +618,7 @@ var Anames = []string{ ...@@ -614,6 +618,7 @@ var Anames = []string{
"PMOVZXWD", "PMOVZXWD",
"PMOVZXWQ", "PMOVZXWQ",
"PMULDQ", "PMULDQ",
"PMULHRSW",
"PMULHUW", "PMULHUW",
"PMULHW", "PMULHW",
"PMULLD", "PMULLD",
...@@ -626,6 +631,9 @@ var Anames = []string{ ...@@ -626,6 +631,9 @@ var Anames = []string{
"PSHUFL", "PSHUFL",
"PSHUFLW", "PSHUFLW",
"PSHUFW", "PSHUFW",
"PSIGNB",
"PSIGND",
"PSIGNW",
"PSLLL", "PSLLL",
"PSLLO", "PSLLO",
"PSLLQ", "PSLLQ",
......
...@@ -1255,6 +1255,9 @@ var optab = ...@@ -1255,6 +1255,9 @@ var optab =
{AOUTSL, ynone, Px, [23]uint8{0x6f}}, {AOUTSL, ynone, Px, [23]uint8{0x6f}},
{AOUTSW, ynone, Pe, [23]uint8{0x6f}}, {AOUTSW, ynone, Pe, [23]uint8{0x6f}},
{AOUTW, yin, Pe, [23]uint8{0xe7, 0xef}}, {AOUTW, yin, Pe, [23]uint8{0xe7, 0xef}},
{APABSB, yxm_q4, Pq4, [23]uint8{0x1c}},
{APABSD, yxm_q4, Pq4, [23]uint8{0x1e}},
{APABSW, yxm_q4, Pq4, [23]uint8{0x1d}},
{APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}}, {APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}},
{APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}}, {APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}},
{APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}}, {APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}},
...@@ -1292,6 +1295,7 @@ var optab = ...@@ -1292,6 +1295,7 @@ var optab =
{APINSRB, yinsr, Pq, [23]uint8{0x3a, 0x20, 00}}, {APINSRB, yinsr, Pq, [23]uint8{0x3a, 0x20, 00}},
{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}}, {APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}}, {APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
{APMADDUBSW, yxm_q4, Pq4, [23]uint8{0x04}},
{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}}, {APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
{APMAXSW, yxm, Pe, [23]uint8{0xee}}, {APMAXSW, yxm, Pe, [23]uint8{0xee}},
{APMAXUB, yxm, Pe, [23]uint8{0xde}}, {APMAXUB, yxm, Pe, [23]uint8{0xde}},
...@@ -1311,6 +1315,7 @@ var optab = ...@@ -1311,6 +1315,7 @@ var optab =
{APMOVZXWD, yxm_q4, Pq4, [23]uint8{0x33}}, {APMOVZXWD, yxm_q4, Pq4, [23]uint8{0x33}},
{APMOVZXWQ, yxm_q4, Pq4, [23]uint8{0x34}}, {APMOVZXWQ, yxm_q4, Pq4, [23]uint8{0x34}},
{APMULDQ, yxm_q4, Pq4, [23]uint8{0x28}}, {APMULDQ, yxm_q4, Pq4, [23]uint8{0x28}},
{APMULHRSW, yxm_q4, Pq4, [23]uint8{0x0b}},
{APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}}, {APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}},
{APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}}, {APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}},
{APMULLD, yxm_q4, Pq4, [23]uint8{0x40}}, {APMULLD, yxm_q4, Pq4, [23]uint8{0x40}},
...@@ -1334,6 +1339,9 @@ var optab = ...@@ -1334,6 +1339,9 @@ var optab =
{APSHUFLW, yxshuf, Pf2, [23]uint8{0x70, 00}}, {APSHUFLW, yxshuf, Pf2, [23]uint8{0x70, 00}},
{APSHUFW, ymshuf, Pm, [23]uint8{0x70, 00}}, {APSHUFW, ymshuf, Pm, [23]uint8{0x70, 00}},
{APSHUFB, ymshufb, Pq, [23]uint8{0x38, 0x00}}, {APSHUFB, ymshufb, Pq, [23]uint8{0x38, 0x00}},
{APSIGNB, yxm_q4, Pq4, [23]uint8{0x08}},
{APSIGND, yxm_q4, Pq4, [23]uint8{0x0a}},
{APSIGNW, yxm_q4, Pq4, [23]uint8{0x09}},
{APSLLO, ypsdq, Pq, [23]uint8{0x73, 07}}, {APSLLO, ypsdq, Pq, [23]uint8{0x73, 07}},
{APSLLL, yps, Py3, [23]uint8{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}}, {APSLLL, yps, Py3, [23]uint8{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
{APSLLQ, yps, Py3, [23]uint8{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}}, {APSLLQ, yps, Py3, [23]uint8{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment