Commit c75f81f0 authored by Russ Cox's avatar Russ Cox

cmd/objdump: move armasm, x86asm into internal packages

For Go 1.3 these external packages were collapsed into
large single-file implementations stored in the cmd/objdump
directory.

For Go 1.4 we want pprof to be able to link against them too,
so move them into cmd/internal, where they can be shared.

The new files are copied from the repo in the file path (rsc.io/...).
Those repos were code reviewed during development
(mainly by crawshaw and minux), because we knew the
main repo would use them.

Update #8798

LGTM=bradfitz
R=crawshaw, bradfitz
CC=golang-codereviews
https://golang.org/cl/153750044
parent 7de0c315
tables.go: ../armmap/map.go ../arm.csv
go run ../armmap/map.go -fmt=decoder ../arm.csv >_tables.go && gofmt _tables.go >tables.go && rm _tables.go
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package armasm
import (
"encoding/binary"
"fmt"
)
// An instFormat describes the format of an instruction encoding.
// An instruction with 32-bit value x matches the format if x&mask == value
// and the condition matches.
// The condition matches if x>>28 == 0xF && value>>28==0xF
// or if x>>28 != 0xF and value>>28 == 0.
// If x matches the format, then the rest of the fields describe how to interpret x.
// The opBits describe bits that should be extracted from x and added to the opcode.
// For example opBits = 0x1234 means that the value
// (2 bits at offset 1) followed by (4 bits at offset 3)
// should be added to op.
// Finally the args describe how to decode the instruction arguments.
// args is stored as a fixed-size array; if there are fewer than len(args) arguments,
// args[i] == 0 marks the end of the argument list.
type instFormat struct {
mask uint32
value uint32
priority int8
op Op
opBits uint64
args instArgs
}
type instArgs [4]instArg
var (
errMode = fmt.Errorf("unsupported execution mode")
errShort = fmt.Errorf("truncated instruction")
errUnknown = fmt.Errorf("unknown instruction")
)
var decoderCover []bool
// Decode decodes the leading bytes in src as a single instruction.
func Decode(src []byte, mode Mode) (inst Inst, err error) {
if mode != ModeARM {
return Inst{}, errMode
}
if len(src) < 4 {
return Inst{}, errShort
}
if decoderCover == nil {
decoderCover = make([]bool, len(instFormats))
}
x := binary.LittleEndian.Uint32(src)
// The instFormat table contains both conditional and unconditional instructions.
// Considering only the top 4 bits, the conditional instructions use mask=0, value=0,
// while the unconditional instructions use mask=f, value=f.
// Prepare a version of x with the condition cleared to 0 in conditional instructions
// and then assume mask=f during matching.
const condMask = 0xf0000000
xNoCond := x
if x&condMask != condMask {
xNoCond &^= condMask
}
var priority int8
Search:
for i := range instFormats {
f := &instFormats[i]
if xNoCond&(f.mask|condMask) != f.value || f.priority <= priority {
continue
}
delta := uint32(0)
deltaShift := uint(0)
for opBits := f.opBits; opBits != 0; opBits >>= 16 {
n := uint(opBits & 0xFF)
off := uint((opBits >> 8) & 0xFF)
delta |= (x >> off) & (1<<n - 1) << deltaShift
deltaShift += n
}
op := f.op + Op(delta)
// Special case: BKPT encodes with condition but cannot have one.
if op&^15 == BKPT_EQ && op != BKPT {
continue Search
}
var args Args
for j, aop := range f.args {
if aop == 0 {
break
}
arg := decodeArg(aop, x)
if arg == nil { // cannot decode argument
continue Search
}
args[j] = arg
}
decoderCover[i] = true
inst = Inst{
Op: op,
Args: args,
Enc: x,
Len: 4,
}
priority = f.priority
continue Search
}
if inst.Op != 0 {
return inst, nil
}
return Inst{}, errUnknown
}
// An instArg describes the encoding of a single argument.
// In the names used for arguments, _p_ means +, _m_ means -,
// _pm_ means ± (usually keyed by the U bit).
// The _W suffix indicates a general addressing mode based on the P and W bits.
// The _offset and _postindex suffixes force the given addressing mode.
// The rest should be somewhat self-explanatory, at least given
// the decodeArg function.
type instArg uint8
const (
_ instArg = iota
arg_APSR
arg_FPSCR
arg_Dn_half
arg_R1_0
arg_R1_12
arg_R2_0
arg_R2_12
arg_R_0
arg_R_12
arg_R_12_nzcv
arg_R_16
arg_R_16_WB
arg_R_8
arg_R_rotate
arg_R_shift_R
arg_R_shift_imm
arg_SP
arg_Sd
arg_Sd_Dd
arg_Dd_Sd
arg_Sm
arg_Sm_Dm
arg_Sn
arg_Sn_Dn
arg_const
arg_endian
arg_fbits
arg_fp_0
arg_imm24
arg_imm5
arg_imm5_32
arg_imm5_nz
arg_imm_12at8_4at0
arg_imm_4at16_12at0
arg_imm_vfp
arg_label24
arg_label24H
arg_label_m_12
arg_label_p_12
arg_label_pm_12
arg_label_pm_4_4
arg_lsb_width
arg_mem_R
arg_mem_R_pm_R_W
arg_mem_R_pm_R_postindex
arg_mem_R_pm_R_shift_imm_W
arg_mem_R_pm_R_shift_imm_offset
arg_mem_R_pm_R_shift_imm_postindex
arg_mem_R_pm_imm12_W
arg_mem_R_pm_imm12_offset
arg_mem_R_pm_imm12_postindex
arg_mem_R_pm_imm8_W
arg_mem_R_pm_imm8_postindex
arg_mem_R_pm_imm8at0_offset
arg_option
arg_registers
arg_registers1
arg_registers2
arg_satimm4
arg_satimm5
arg_satimm4m1
arg_satimm5m1
arg_widthm1
)
// decodeArg decodes the arg described by aop from the instruction bits x.
// It returns nil if x cannot be decoded according to aop.
func decodeArg(aop instArg, x uint32) Arg {
switch aop {
default:
return nil
case arg_APSR:
return APSR
case arg_FPSCR:
return FPSCR
case arg_R_0:
return Reg(x & (1<<4 - 1))
case arg_R_8:
return Reg((x >> 8) & (1<<4 - 1))
case arg_R_12:
return Reg((x >> 12) & (1<<4 - 1))
case arg_R_16:
return Reg((x >> 16) & (1<<4 - 1))
case arg_R_12_nzcv:
r := Reg((x >> 12) & (1<<4 - 1))
if r == R15 {
return APSR_nzcv
}
return r
case arg_R_16_WB:
mode := AddrLDM
if (x>>21)&1 != 0 {
mode = AddrLDM_WB
}
return Mem{Base: Reg((x >> 16) & (1<<4 - 1)), Mode: mode}
case arg_R_rotate:
Rm := Reg(x & (1<<4 - 1))
typ, count := decodeShift(x)
// ROR #0 here means ROR #0, but decodeShift rewrites to RRX #1.
if typ == RotateRightExt {
return Reg(Rm)
}
return RegShift{Rm, typ, uint8(count)}
case arg_R_shift_R:
Rm := Reg(x & (1<<4 - 1))
Rs := Reg((x >> 8) & (1<<4 - 1))
typ := Shift((x >> 5) & (1<<2 - 1))
return RegShiftReg{Rm, typ, Rs}
case arg_R_shift_imm:
Rm := Reg(x & (1<<4 - 1))
typ, count := decodeShift(x)
if typ == ShiftLeft && count == 0 {
return Reg(Rm)
}
return RegShift{Rm, typ, uint8(count)}
case arg_R1_0:
return Reg((x & (1<<4 - 1)))
case arg_R1_12:
return Reg(((x >> 12) & (1<<4 - 1)))
case arg_R2_0:
return Reg((x & (1<<4 - 1)) | 1)
case arg_R2_12:
return Reg(((x >> 12) & (1<<4 - 1)) | 1)
case arg_SP:
return SP
case arg_Sd_Dd:
v := (x >> 12) & (1<<4 - 1)
vx := (x >> 22) & 1
sz := (x >> 8) & 1
if sz != 0 {
return D0 + Reg(vx<<4+v)
} else {
return S0 + Reg(v<<1+vx)
}
case arg_Dd_Sd:
return decodeArg(arg_Sd_Dd, x^(1<<8))
case arg_Sd:
v := (x >> 12) & (1<<4 - 1)
vx := (x >> 22) & 1
return S0 + Reg(v<<1+vx)
case arg_Sm_Dm:
v := (x >> 0) & (1<<4 - 1)
vx := (x >> 5) & 1
sz := (x >> 8) & 1
if sz != 0 {
return D0 + Reg(vx<<4+v)
} else {
return S0 + Reg(v<<1+vx)
}
case arg_Sm:
v := (x >> 0) & (1<<4 - 1)
vx := (x >> 5) & 1
return S0 + Reg(v<<1+vx)
case arg_Dn_half:
v := (x >> 16) & (1<<4 - 1)
vx := (x >> 7) & 1
return RegX{D0 + Reg(vx<<4+v), int((x >> 21) & 1)}
case arg_Sn_Dn:
v := (x >> 16) & (1<<4 - 1)
vx := (x >> 7) & 1
sz := (x >> 8) & 1
if sz != 0 {
return D0 + Reg(vx<<4+v)
} else {
return S0 + Reg(v<<1+vx)
}
case arg_Sn:
v := (x >> 16) & (1<<4 - 1)
vx := (x >> 7) & 1
return S0 + Reg(v<<1+vx)
case arg_const:
v := x & (1<<8 - 1)
rot := (x >> 8) & (1<<4 - 1) * 2
if rot > 0 && v&3 == 0 {
// could rotate less
return ImmAlt{uint8(v), uint8(rot)}
}
if rot >= 24 && ((v<<(32-rot))&0xFF)>>(32-rot) == v {
// could wrap around to rot==0.
return ImmAlt{uint8(v), uint8(rot)}
}
return Imm(v>>rot | v<<(32-rot))
case arg_endian:
return Endian((x >> 9) & 1)
case arg_fbits:
return Imm((16 << ((x >> 7) & 1)) - ((x&(1<<4-1))<<1 | (x>>5)&1))
case arg_fp_0:
return Imm(0)
case arg_imm24:
return Imm(x & (1<<24 - 1))
case arg_imm5:
return Imm((x >> 7) & (1<<5 - 1))
case arg_imm5_32:
x = (x >> 7) & (1<<5 - 1)
if x == 0 {
x = 32
}
return Imm(x)
case arg_imm5_nz:
x = (x >> 7) & (1<<5 - 1)
if x == 0 {
return nil
}
return Imm(x)
case arg_imm_4at16_12at0:
return Imm((x>>16)&(1<<4-1)<<12 | x&(1<<12-1))
case arg_imm_12at8_4at0:
return Imm((x>>8)&(1<<12-1)<<4 | x&(1<<4-1))
case arg_imm_vfp:
x = (x>>16)&(1<<4-1)<<4 | x&(1<<4-1)
return Imm(x)
case arg_label24:
imm := (x & (1<<24 - 1)) << 2
return PCRel(int32(imm<<6) >> 6)
case arg_label24H:
h := (x >> 24) & 1
imm := (x&(1<<24-1))<<2 | h<<1
return PCRel(int32(imm<<6) >> 6)
case arg_label_m_12:
d := int32(x & (1<<12 - 1))
return Mem{Base: PC, Mode: AddrOffset, Offset: int16(-d)}
case arg_label_p_12:
d := int32(x & (1<<12 - 1))
return Mem{Base: PC, Mode: AddrOffset, Offset: int16(d)}
case arg_label_pm_12:
d := int32(x & (1<<12 - 1))
u := (x >> 23) & 1
if u == 0 {
d = -d
}
return Mem{Base: PC, Mode: AddrOffset, Offset: int16(d)}
case arg_label_pm_4_4:
d := int32((x>>8)&(1<<4-1)<<4 | x&(1<<4-1))
u := (x >> 23) & 1
if u == 0 {
d = -d
}
return PCRel(d)
case arg_lsb_width:
lsb := (x >> 7) & (1<<5 - 1)
msb := (x >> 16) & (1<<5 - 1)
if msb < lsb || msb >= 32 {
return nil
}
return Imm(msb + 1 - lsb)
case arg_mem_R:
Rn := Reg((x >> 16) & (1<<4 - 1))
return Mem{Base: Rn, Mode: AddrOffset}
case arg_mem_R_pm_R_postindex:
// Treat [<Rn>],+/-<Rm> like [<Rn>,+/-<Rm>{,<shift>}]{!}
// by forcing shift bits to <<0 and P=0, W=0 (postindex=true).
return decodeArg(arg_mem_R_pm_R_shift_imm_W, x&^((1<<7-1)<<5|1<<24|1<<21))
case arg_mem_R_pm_R_W:
// Treat [<Rn>,+/-<Rm>]{!} like [<Rn>,+/-<Rm>{,<shift>}]{!}
// by forcing shift bits to <<0.
return decodeArg(arg_mem_R_pm_R_shift_imm_W, x&^((1<<7-1)<<5))
case arg_mem_R_pm_R_shift_imm_offset:
// Treat [<Rn>],+/-<Rm>{,<shift>} like [<Rn>,+/-<Rm>{,<shift>}]{!}
// by forcing P=1, W=0 (index=false, wback=false).
return decodeArg(arg_mem_R_pm_R_shift_imm_W, x&^(1<<21)|1<<24)
case arg_mem_R_pm_R_shift_imm_postindex:
// Treat [<Rn>],+/-<Rm>{,<shift>} like [<Rn>,+/-<Rm>{,<shift>}]{!}
// by forcing P=0, W=0 (postindex=true).
return decodeArg(arg_mem_R_pm_R_shift_imm_W, x&^(1<<24|1<<21))
case arg_mem_R_pm_R_shift_imm_W:
Rn := Reg((x >> 16) & (1<<4 - 1))
Rm := Reg(x & (1<<4 - 1))
typ, count := decodeShift(x)
u := (x >> 23) & 1
w := (x >> 21) & 1
p := (x >> 24) & 1
if p == 0 && w == 1 {
return nil
}
sign := int8(+1)
if u == 0 {
sign = -1
}
mode := AddrMode(uint8(p<<1) | uint8(w^1))
return Mem{Base: Rn, Mode: mode, Sign: sign, Index: Rm, Shift: typ, Count: count}
case arg_mem_R_pm_imm12_offset:
// Treat [<Rn>,#+/-<imm12>] like [<Rn>{,#+/-<imm12>}]{!}
// by forcing P=1, W=0 (index=false, wback=false).
return decodeArg(arg_mem_R_pm_imm12_W, x&^(1<<21)|1<<24)
case arg_mem_R_pm_imm12_postindex:
// Treat [<Rn>],#+/-<imm12> like [<Rn>{,#+/-<imm12>}]{!}
// by forcing P=0, W=0 (postindex=true).
return decodeArg(arg_mem_R_pm_imm12_W, x&^(1<<24|1<<21))
case arg_mem_R_pm_imm12_W:
Rn := Reg((x >> 16) & (1<<4 - 1))
u := (x >> 23) & 1
w := (x >> 21) & 1
p := (x >> 24) & 1
if p == 0 && w == 1 {
return nil
}
sign := int8(+1)
if u == 0 {
sign = -1
}
imm := int16(x & (1<<12 - 1))
mode := AddrMode(uint8(p<<1) | uint8(w^1))
return Mem{Base: Rn, Mode: mode, Offset: int16(sign) * imm}
case arg_mem_R_pm_imm8_postindex:
// Treat [<Rn>],#+/-<imm8> like [<Rn>{,#+/-<imm8>}]{!}
// by forcing P=0, W=0 (postindex=true).
return decodeArg(arg_mem_R_pm_imm8_W, x&^(1<<24|1<<21))
case arg_mem_R_pm_imm8_W:
Rn := Reg((x >> 16) & (1<<4 - 1))
u := (x >> 23) & 1
w := (x >> 21) & 1
p := (x >> 24) & 1
if p == 0 && w == 1 {
return nil
}
sign := int8(+1)
if u == 0 {
sign = -1
}
imm := int16((x>>8)&(1<<4-1)<<4 | x&(1<<4-1))
mode := AddrMode(uint8(p<<1) | uint8(w^1))
return Mem{Base: Rn, Mode: mode, Offset: int16(sign) * imm}
case arg_mem_R_pm_imm8at0_offset:
Rn := Reg((x >> 16) & (1<<4 - 1))
u := (x >> 23) & 1
sign := int8(+1)
if u == 0 {
sign = -1
}
imm := int16(x&(1<<8-1)) << 2
return Mem{Base: Rn, Mode: AddrOffset, Offset: int16(sign) * imm}
case arg_option:
return Imm(x & (1<<4 - 1))
case arg_registers:
return RegList(x & (1<<16 - 1))
case arg_registers2:
x &= 1<<16 - 1
n := 0
for i := 0; i < 16; i++ {
if x>>uint(i)&1 != 0 {
n++
}
}
if n < 2 {
return nil
}
return RegList(x)
case arg_registers1:
Rt := (x >> 12) & (1<<4 - 1)
return RegList(1 << Rt)
case arg_satimm4:
return Imm((x >> 16) & (1<<4 - 1))
case arg_satimm5:
return Imm((x >> 16) & (1<<5 - 1))
case arg_satimm4m1:
return Imm((x>>16)&(1<<4-1) + 1)
case arg_satimm5m1:
return Imm((x>>16)&(1<<5-1) + 1)
case arg_widthm1:
return Imm((x>>16)&(1<<5-1) + 1)
}
}
// decodeShift decodes the shift-by-immediate encoded in x.
func decodeShift(x uint32) (Shift, uint8) {
count := (x >> 7) & (1<<5 - 1)
typ := Shift((x >> 5) & (1<<2 - 1))
switch typ {
case ShiftRight, ShiftRightSigned:
if count == 0 {
count = 32
}
case RotateRight:
if count == 0 {
typ = RotateRightExt
count = 1
}
}
return typ, uint8(count)
}
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package armasm
import (
"encoding/hex"
"io/ioutil"
"strconv"
"strings"
"testing"
)
func TestDecode(t *testing.T) {
data, err := ioutil.ReadFile("testdata/decode.txt")
if err != nil {
t.Fatal(err)
}
all := string(data)
for strings.Contains(all, "\t\t") {
all = strings.Replace(all, "\t\t", "\t", -1)
}
for _, line := range strings.Split(all, "\n") {
line = strings.TrimSpace(line)
if line == "" || strings.HasPrefix(line, "#") {
continue
}
f := strings.SplitN(line, "\t", 4)
i := strings.Index(f[0], "|")
if i < 0 {
t.Errorf("parsing %q: missing | separator", f[0])
continue
}
if i%2 != 0 {
t.Errorf("parsing %q: misaligned | separator", f[0])
}
size := i / 2
code, err := hex.DecodeString(f[0][:i] + f[0][i+1:])
if err != nil {
t.Errorf("parsing %q: %v", f[0], err)
continue
}
mode, err := strconv.Atoi(f[1])
if err != nil {
t.Errorf("invalid mode %q in: %s", f[1], line)
continue
}
syntax, asm := f[2], f[3]
inst, err := Decode(code, Mode(mode))
var out string
if err != nil {
out = "error: " + err.Error()
} else {
switch syntax {
case "gnu":
out = GNUSyntax(inst)
case "plan9":
out = Plan9Syntax(inst, 0, nil, nil)
default:
t.Errorf("unknown syntax %q", syntax)
continue
}
}
if out != asm || inst.Len != size {
t.Errorf("Decode(%s) [%s] = %s, %d, want %s, %d", f[0], syntax, out, inst.Len, asm, size)
}
}
}
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Support for testing against external disassembler program.
// Copied and simplified from rsc.io/x86/x86asm/ext_test.go.
package armasm
import (
"bufio"
"bytes"
"encoding/hex"
"flag"
"fmt"
"io/ioutil"
"log"
"math/rand"
"os"
"os/exec"
"regexp"
"runtime"
"strings"
"testing"
"time"
)
var (
printTests = flag.Bool("printtests", false, "print test cases that exercise new code paths")
dumpTest = flag.Bool("dump", false, "dump all encodings")
mismatch = flag.Bool("mismatch", false, "log allowed mismatches")
longTest = flag.Bool("long", false, "long test")
keep = flag.Bool("keep", false, "keep object files around")
debug = false
)
// A ExtInst represents a single decoded instruction parsed
// from an external disassembler's output.
type ExtInst struct {
addr uint32
enc [4]byte
nenc int
text string
}
func (r ExtInst) String() string {
return fmt.Sprintf("%#x: % x: %s", r.addr, r.enc, r.text)
}
// An ExtDis is a connection between an external disassembler and a test.
type ExtDis struct {
Arch Mode
Dec chan ExtInst
File *os.File
Size int
KeepFile bool
Cmd *exec.Cmd
}
// Run runs the given command - the external disassembler - and returns
// a buffered reader of its standard output.
func (ext *ExtDis) Run(cmd ...string) (*bufio.Reader, error) {
if *keep {
log.Printf("%s\n", strings.Join(cmd, " "))
}
ext.Cmd = exec.Command(cmd[0], cmd[1:]...)
out, err := ext.Cmd.StdoutPipe()
if err != nil {
return nil, fmt.Errorf("stdoutpipe: %v", err)
}
if err := ext.Cmd.Start(); err != nil {
return nil, fmt.Errorf("exec: %v", err)
}
b := bufio.NewReaderSize(out, 1<<20)
return b, nil
}
// Wait waits for the command started with Run to exit.
func (ext *ExtDis) Wait() error {
return ext.Cmd.Wait()
}
// testExtDis tests a set of byte sequences against an external disassembler.
// The disassembler is expected to produce the given syntax and be run
// in the given architecture mode (16, 32, or 64-bit).
// The extdis function must start the external disassembler
// and then parse its output, sending the parsed instructions on ext.Dec.
// The generate function calls its argument f once for each byte sequence
// to be tested. The generate function itself will be called twice, and it must
// make the same sequence of calls to f each time.
// When a disassembly does not match the internal decoding,
// allowedMismatch determines whether this mismatch should be
// allowed, or else considered an error.
func testExtDis(
t *testing.T,
syntax string,
arch Mode,
extdis func(ext *ExtDis) error,
generate func(f func([]byte)),
allowedMismatch func(text string, size int, inst *Inst, dec ExtInst) bool,
) {
start := time.Now()
ext := &ExtDis{
Dec: make(chan ExtInst),
Arch: arch,
}
errc := make(chan error)
// First pass: write instructions to input file for external disassembler.
file, f, size, err := writeInst(generate)
if err != nil {
t.Fatal(err)
}
ext.Size = size
ext.File = f
defer func() {
f.Close()
if !*keep {
os.Remove(file)
}
}()
// Second pass: compare disassembly against our decodings.
var (
totalTests = 0
totalSkips = 0
totalErrors = 0
errors = make([]string, 0, 100) // sampled errors, at most cap
)
go func() {
errc <- extdis(ext)
}()
generate(func(enc []byte) {
dec, ok := <-ext.Dec
if !ok {
t.Errorf("decoding stream ended early")
return
}
inst, text := disasm(syntax, arch, pad(enc))
totalTests++
if *dumpTest {
fmt.Printf("%x -> %s [%d]\n", enc[:len(enc)], dec.text, dec.nenc)
}
if text != dec.text || inst.Len != dec.nenc {
suffix := ""
if allowedMismatch(text, size, &inst, dec) {
totalSkips++
if !*mismatch {
return
}
suffix += " (allowed mismatch)"
}
totalErrors++
if len(errors) >= cap(errors) {
j := rand.Intn(totalErrors)
if j >= cap(errors) {
return
}
errors = append(errors[:j], errors[j+1:]...)
}
errors = append(errors, fmt.Sprintf("decode(%x) = %q, %d, want %q, %d%s", enc, text, inst.Len, dec.text, dec.nenc, suffix))
}
})
if *mismatch {
totalErrors -= totalSkips
}
for _, b := range errors {
t.Log(b)
}
if totalErrors > 0 {
t.Fail()
}
t.Logf("%d test cases, %d expected mismatches, %d failures; %.0f cases/second", totalTests, totalSkips, totalErrors, float64(totalTests)/time.Since(start).Seconds())
if err := <-errc; err != nil {
t.Fatal("external disassembler: %v", err)
}
}
const start = 0x8000 // start address of text
// writeInst writes the generated byte sequences to a new file
// starting at offset start. That file is intended to be the input to
// the external disassembler.
func writeInst(generate func(func([]byte))) (file string, f *os.File, size int, err error) {
f, err = ioutil.TempFile("", "armasm")
if err != nil {
return
}
file = f.Name()
f.Seek(start, 0)
w := bufio.NewWriter(f)
defer w.Flush()
size = 0
generate(func(x []byte) {
if len(x) > 4 {
x = x[:4]
}
if debug {
fmt.Printf("%#x: %x%x\n", start+size, x, zeros[len(x):])
}
w.Write(x)
w.Write(zeros[len(x):])
size += len(zeros)
})
return file, f, size, nil
}
var zeros = []byte{0, 0, 0, 0}
// pad pads the code sequenc with pops.
func pad(enc []byte) []byte {
if len(enc) < 4 {
enc = append(enc[:len(enc):len(enc)], zeros[:4-len(enc)]...)
}
return enc
}
// disasm returns the decoded instruction and text
// for the given source bytes, using the given syntax and mode.
func disasm(syntax string, mode Mode, src []byte) (inst Inst, text string) {
// If printTests is set, we record the coverage value
// before and after, and we write out the inputs for which
// coverage went up, in the format expected in testdata/decode.text.
// This produces a fairly small set of test cases that exercise nearly
// all the code.
var cover float64
if *printTests {
cover -= coverage()
}
inst, err := Decode(src, mode)
if err != nil {
text = "error: " + err.Error()
} else {
text = inst.String()
switch syntax {
//case "arm":
// text = ARMSyntax(inst)
case "gnu":
text = GNUSyntax(inst)
//case "plan9":
// text = Plan9Syntax(inst, 0, nil)
default:
text = "error: unknown syntax " + syntax
}
}
if *printTests {
cover += coverage()
if cover > 0 {
max := len(src)
if max > 4 && inst.Len <= 4 {
max = 4
}
fmt.Printf("%x|%x\t%d\t%s\t%s\n", src[:inst.Len], src[inst.Len:max], mode, syntax, text)
}
}
return
}
// coverage returns a floating point number denoting the
// test coverage until now. The number increases when new code paths are exercised,
// both in the Go program and in the decoder byte code.
func coverage() float64 {
/*
testing.Coverage is not in the main distribution.
The implementation, which must go in package testing, is:
// Coverage reports the current code coverage as a fraction in the range [0, 1].
func Coverage() float64 {
var n, d int64
for _, counters := range cover.Counters {
for _, c := range counters {
if c > 0 {
n++
}
d++
}
}
if d == 0 {
return 0
}
return float64(n) / float64(d)
}
*/
var f float64
f += testing.Coverage()
f += decodeCoverage()
return f
}
func decodeCoverage() float64 {
n := 0
for _, t := range decoderCover {
if t {
n++
}
}
return float64(1+n) / float64(1+len(decoderCover))
}
// Helpers for writing disassembler output parsers.
// hasPrefix reports whether any of the space-separated words in the text s
// begins with any of the given prefixes.
func hasPrefix(s string, prefixes ...string) bool {
for _, prefix := range prefixes {
for s := s; s != ""; {
if strings.HasPrefix(s, prefix) {
return true
}
i := strings.Index(s, " ")
if i < 0 {
break
}
s = s[i+1:]
}
}
return false
}
// contains reports whether the text s contains any of the given substrings.
func contains(s string, substrings ...string) bool {
for _, sub := range substrings {
if strings.Contains(s, sub) {
return true
}
}
return false
}
// isHex reports whether b is a hexadecimal character (0-9A-Fa-f).
func isHex(b byte) bool { return b == '0' || unhex[b] > 0 }
// parseHex parses the hexadecimal byte dump in hex,
// appending the parsed bytes to raw and returning the updated slice.
// The returned bool signals whether any invalid hex was found.
// Spaces and tabs between bytes are okay but any other non-hex is not.
func parseHex(hex []byte, raw []byte) ([]byte, bool) {
hex = trimSpace(hex)
for j := 0; j < len(hex); {
for hex[j] == ' ' || hex[j] == '\t' {
j++
}
if j >= len(hex) {
break
}
if j+2 > len(hex) || !isHex(hex[j]) || !isHex(hex[j+1]) {
return nil, false
}
raw = append(raw, unhex[hex[j]]<<4|unhex[hex[j+1]])
j += 2
}
return raw, true
}
var unhex = [256]byte{
'0': 0,
'1': 1,
'2': 2,
'3': 3,
'4': 4,
'5': 5,
'6': 6,
'7': 7,
'8': 8,
'9': 9,
'A': 10,
'B': 11,
'C': 12,
'D': 13,
'E': 14,
'F': 15,
'a': 10,
'b': 11,
'c': 12,
'd': 13,
'e': 14,
'f': 15,
}
// index is like bytes.Index(s, []byte(t)) but avoids the allocation.
func index(s []byte, t string) int {
i := 0
for {
j := bytes.IndexByte(s[i:], t[0])
if j < 0 {
return -1
}
i = i + j
if i+len(t) > len(s) {
return -1
}
for k := 1; k < len(t); k++ {
if s[i+k] != t[k] {
goto nomatch
}
}
return i
nomatch:
i++
}
}
// fixSpace rewrites runs of spaces, tabs, and newline characters into single spaces in s.
// If s must be rewritten, it is rewritten in place.
func fixSpace(s []byte) []byte {
s = trimSpace(s)
for i := 0; i < len(s); i++ {
if s[i] == '\t' || s[i] == '\n' || i > 0 && s[i] == ' ' && s[i-1] == ' ' {
goto Fix
}
}
return s
Fix:
b := s
w := 0
for i := 0; i < len(s); i++ {
c := s[i]
if c == '\t' || c == '\n' {
c = ' '
}
if c == ' ' && w > 0 && b[w-1] == ' ' {
continue
}
b[w] = c
w++
}
if w > 0 && b[w-1] == ' ' {
w--
}
return b[:w]
}
// trimSpace trims leading and trailing space from s, returning a subslice of s.
func trimSpace(s []byte) []byte {
j := len(s)
for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t' || s[j-1] == '\n') {
j--
}
i := 0
for i < j && (s[i] == ' ' || s[i] == '\t') {
i++
}
return s[i:j]
}
// pcrel matches instructions using relative addressing mode.
var (
pcrel = regexp.MustCompile(`^((?:.* )?(?:b|bl)x?(?:eq|ne|cs|cc|mi|pl|vs|vc|hi|ls|ge|lt|gt|le)?) 0x([0-9a-f]+)$`)
)
// Generators.
//
// The test cases are described as functions that invoke a callback repeatedly,
// with a new input sequence each time. These helpers make writing those
// a little easier.
// condCases generates conditional instructions.
func condCases(t *testing.T) func(func([]byte)) {
return func(try func([]byte)) {
// All the strides are relatively prime to 2 and therefore to 2²⁸,
// so we will not repeat any instructions until we have tried all 2²⁸.
// Using a stride other than 1 is meant to visit the instructions in a
// pseudorandom order, which gives better variety in the set of
// test cases chosen by -printtests.
stride := uint32(10007)
n := 1 << 28 / 7
if testing.Short() {
stride = 100003
n = 1 << 28 / 1001
} else if *longTest {
stride = 200000033
n = 1 << 28
}
x := uint32(0)
for i := 0; i < n; i++ {
enc := (x%15)<<28 | x&(1<<28-1)
try([]byte{byte(enc), byte(enc >> 8), byte(enc >> 16), byte(enc >> 24)})
x += stride
}
}
}
// uncondCases generates unconditional instructions.
func uncondCases(t *testing.T) func(func([]byte)) {
return func(try func([]byte)) {
condCases(t)(func(enc []byte) {
enc[3] |= 0xF0
try(enc)
})
}
}
func countBits(x uint32) int {
n := 0
for ; x != 0; x >>= 1 {
n += int(x & 1)
}
return n
}
func expandBits(x, m uint32) uint32 {
var out uint32
for i := uint(0); i < 32; i++ {
out >>= 1
if m&1 != 0 {
out |= (x & 1) << 31
x >>= 1
}
m >>= 1
}
return out
}
func tryCondMask(mask, val uint32, try func([]byte)) {
n := countBits(^mask)
bits := uint32(0)
for i := 0; i < 1<<uint(n); i++ {
bits += 848251 // arbitrary prime
x := val | expandBits(bits, ^mask) | uint32(i)%15<<28
try([]byte{byte(x), byte(x >> 8), byte(x >> 16), byte(x >> 24)})
}
}
// vfpCases generates VFP instructions.
func vfpCases(t *testing.T) func(func([]byte)) {
const (
vfpmask uint32 = 0xFF00FE10
vfp uint32 = 0x0E009A00
)
return func(try func([]byte)) {
tryCondMask(0xff00fe10, 0x0e009a00, try) // standard VFP instruction space
tryCondMask(0xffc00f7f, 0x0e000b10, try) // VFP MOV core reg to/from float64 half
tryCondMask(0xffe00f7f, 0x0e000a10, try) // VFP MOV core reg to/from float32
tryCondMask(0xffef0fff, 0x0ee10a10, try) // VFP MOV core reg to/from cond codes
}
}
// hexCases generates the cases written in hexadecimal in the encoded string.
// Spaces in 'encoded' separate entire test cases, not individual bytes.
func hexCases(t *testing.T, encoded string) func(func([]byte)) {
return func(try func([]byte)) {
for _, x := range strings.Fields(encoded) {
src, err := hex.DecodeString(x)
if err != nil {
t.Errorf("parsing %q: %v", x, err)
}
try(src)
}
}
}
// testdataCases generates the test cases recorded in testdata/decode.txt.
// It only uses the inputs; it ignores the answers recorded in that file.
func testdataCases(t *testing.T) func(func([]byte)) {
var codes [][]byte
data, err := ioutil.ReadFile("testdata/decode.txt")
if err != nil {
t.Fatal(err)
}
for _, line := range strings.Split(string(data), "\n") {
line = strings.TrimSpace(line)
if line == "" || strings.HasPrefix(line, "#") {
continue
}
f := strings.Fields(line)[0]
i := strings.Index(f, "|")
if i < 0 {
t.Errorf("parsing %q: missing | separator", f)
continue
}
if i%2 != 0 {
t.Errorf("parsing %q: misaligned | separator", f)
}
code, err := hex.DecodeString(f[:i] + f[i+1:])
if err != nil {
t.Errorf("parsing %q: %v", f, err)
continue
}
codes = append(codes, code)
}
return func(try func([]byte)) {
for _, code := range codes {
try(code)
}
}
}
func caller(skip int) string {
pc, _, _, _ := runtime.Caller(skip)
f := runtime.FuncForPC(pc)
name := "?"
if f != nil {
name = f.Name()
if i := strings.LastIndex(name, "."); i >= 0 {
name = name[i+1:]
}
}
return name
}
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package armasm
import (
"bytes"
"fmt"
"strings"
)
var saveDot = strings.NewReplacer(
".F16", "_dot_F16",
".F32", "_dot_F32",
".F64", "_dot_F64",
".S32", "_dot_S32",
".U32", "_dot_U32",
".FXS", "_dot_S",
".FXU", "_dot_U",
".32", "_dot_32",
)
// GNUSyntax returns the GNU assembler syntax for the instruction, as defined by GNU binutils.
// This form typically matches the syntax defined in the ARM Reference Manual.
func GNUSyntax(inst Inst) string {
var buf bytes.Buffer
op := inst.Op.String()
op = saveDot.Replace(op)
op = strings.Replace(op, ".", "", -1)
op = strings.Replace(op, "_dot_", ".", -1)
op = strings.ToLower(op)
buf.WriteString(op)
sep := " "
for i, arg := range inst.Args {
if arg == nil {
break
}
text := gnuArg(&inst, i, arg)
if text == "" {
continue
}
buf.WriteString(sep)
sep = ", "
buf.WriteString(text)
}
return buf.String()
}
func gnuArg(inst *Inst, argIndex int, arg Arg) string {
switch inst.Op &^ 15 {
case LDRD_EQ, LDREXD_EQ, STRD_EQ:
if argIndex == 1 {
// second argument in consecutive pair not printed
return ""
}
case STREXD_EQ:
if argIndex == 2 {
// second argument in consecutive pair not printed
return ""
}
}
switch arg := arg.(type) {
case Imm:
switch inst.Op &^ 15 {
case BKPT_EQ:
return fmt.Sprintf("%#04x", uint32(arg))
case SVC_EQ:
return fmt.Sprintf("%#08x", uint32(arg))
}
return fmt.Sprintf("#%d", int32(arg))
case ImmAlt:
return fmt.Sprintf("#%d, %d", arg.Val, arg.Rot)
case Mem:
R := gnuArg(inst, -1, arg.Base)
X := ""
if arg.Sign != 0 {
X = ""
if arg.Sign < 0 {
X = "-"
}
X += gnuArg(inst, -1, arg.Index)
if arg.Shift == ShiftLeft && arg.Count == 0 {
// nothing
} else if arg.Shift == RotateRightExt {
X += ", rrx"
} else {
X += fmt.Sprintf(", %s #%d", strings.ToLower(arg.Shift.String()), arg.Count)
}
} else {
X = fmt.Sprintf("#%d", arg.Offset)
}
switch arg.Mode {
case AddrOffset:
if X == "#0" {
return fmt.Sprintf("[%s]", R)
}
return fmt.Sprintf("[%s, %s]", R, X)
case AddrPreIndex:
return fmt.Sprintf("[%s, %s]!", R, X)
case AddrPostIndex:
return fmt.Sprintf("[%s], %s", R, X)
case AddrLDM:
if X == "#0" {
return R
}
case AddrLDM_WB:
if X == "#0" {
return R + "!"
}
}
return fmt.Sprintf("[%s Mode(%d) %s]", R, int(arg.Mode), X)
case PCRel:
return fmt.Sprintf(".%+#x", int32(arg)+4)
case Reg:
switch inst.Op &^ 15 {
case LDREX_EQ:
if argIndex == 0 {
return fmt.Sprintf("r%d", int32(arg))
}
}
switch arg {
case R10:
return "sl"
case R11:
return "fp"
case R12:
return "ip"
}
case RegList:
var buf bytes.Buffer
fmt.Fprintf(&buf, "{")
sep := ""
for i := 0; i < 16; i++ {
if arg&(1<<uint(i)) != 0 {
fmt.Fprintf(&buf, "%s%s", sep, gnuArg(inst, -1, Reg(i)))
sep = ", "
}
}
fmt.Fprintf(&buf, "}")
return buf.String()
case RegShift:
if arg.Shift == ShiftLeft && arg.Count == 0 {
return gnuArg(inst, -1, arg.Reg)
}
if arg.Shift == RotateRightExt {
return gnuArg(inst, -1, arg.Reg) + ", rrx"
}
return fmt.Sprintf("%s, %s #%d", gnuArg(inst, -1, arg.Reg), strings.ToLower(arg.Shift.String()), arg.Count)
case RegShiftReg:
return fmt.Sprintf("%s, %s %s", gnuArg(inst, -1, arg.Reg), strings.ToLower(arg.Shift.String()), gnuArg(inst, -1, arg.RegCount))
}
return strings.ToLower(arg.String())
}
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package armasm
import (
"bytes"
"fmt"
)
// A Mode is an instruction execution mode.
type Mode int
const (
_ Mode = iota
ModeARM
ModeThumb
)
func (m Mode) String() string {
switch m {
case ModeARM:
return "ARM"
case ModeThumb:
return "Thumb"
}
return fmt.Sprintf("Mode(%d)", int(m))
}
// An Op is an ARM opcode.
type Op uint16
// NOTE: The actual Op values are defined in tables.go.
// They are chosen to simplify instruction decoding and
// are not a dense packing from 0 to N, although the
// density is high, probably at least 90%.
func (op Op) String() string {
if op >= Op(len(opstr)) || opstr[op] == "" {
return fmt.Sprintf("Op(%d)", int(op))
}
return opstr[op]
}
// An Inst is a single instruction.
type Inst struct {
Op Op // Opcode mnemonic
Enc uint32 // Raw encoding bits.
Len int // Length of encoding in bytes.
Args Args // Instruction arguments, in ARM manual order.
}
func (i Inst) String() string {
var buf bytes.Buffer
buf.WriteString(i.Op.String())
for j, arg := range i.Args {
if arg == nil {
break
}
if j == 0 {
buf.WriteString(" ")
} else {
buf.WriteString(", ")
}
buf.WriteString(arg.String())
}
return buf.String()
}
// An Args holds the instruction arguments.
// If an instruction has fewer than 4 arguments,
// the final elements in the array are nil.
type Args [4]Arg
// An Arg is a single instruction argument, one of these types:
// Endian, Imm, Mem, PCRel, Reg, RegList, RegShift, RegShiftReg.
type Arg interface {
IsArg()
String() string
}
type Float32Imm float32
func (Float32Imm) IsArg() {}
func (f Float32Imm) String() string {
return fmt.Sprintf("#%v", float32(f))
}
type Float64Imm float32
func (Float64Imm) IsArg() {}
func (f Float64Imm) String() string {
return fmt.Sprintf("#%v", float64(f))
}
// An Imm is an integer constant.
type Imm uint32
func (Imm) IsArg() {}
func (i Imm) String() string {
return fmt.Sprintf("#%#x", uint32(i))
}
// A ImmAlt is an alternate encoding of an integer constant.
type ImmAlt struct {
Val uint8
Rot uint8
}
func (ImmAlt) IsArg() {}
func (i ImmAlt) Imm() Imm {
v := uint32(i.Val)
r := uint(i.Rot)
return Imm(v>>r | v<<(32-r))
}
func (i ImmAlt) String() string {
return fmt.Sprintf("#%#x, %d", i.Val, i.Rot)
}
// A Label is a text (code) address.
type Label uint32
func (Label) IsArg() {}
func (i Label) String() string {
return fmt.Sprintf("%#x", uint32(i))
}
// A Reg is a single register.
// The zero value denotes R0, not the absence of a register.
type Reg uint8
const (
R0 Reg = iota
R1
R2
R3
R4
R5
R6
R7
R8
R9
R10
R11
R12
R13
R14
R15
S0
S1
S2
S3
S4
S5
S6
S7
S8
S9
S10
S11
S12
S13
S14
S15
S16
S17
S18
S19
S20
S21
S22
S23
S24
S25
S26
S27
S28
S29
S30
S31
D0
D1
D2
D3
D4
D5
D6
D7
D8
D9
D10
D11
D12
D13
D14
D15
D16
D17
D18
D19
D20
D21
D22
D23
D24
D25
D26
D27
D28
D29
D30
D31
APSR
APSR_nzcv
FPSCR
SP = R13
LR = R14
PC = R15
)
func (Reg) IsArg() {}
func (r Reg) String() string {
switch r {
case APSR:
return "APSR"
case APSR_nzcv:
return "APSR_nzcv"
case FPSCR:
return "FPSCR"
case SP:
return "SP"
case PC:
return "PC"
case LR:
return "LR"
}
if R0 <= r && r <= R15 {
return fmt.Sprintf("R%d", int(r-R0))
}
if S0 <= r && r <= S31 {
return fmt.Sprintf("S%d", int(r-S0))
}
if D0 <= r && r <= D31 {
return fmt.Sprintf("D%d", int(r-D0))
}
return fmt.Sprintf("Reg(%d)", int(r))
}
// A RegX represents a fraction of a multi-value register.
// The Index field specifies the index number,
// but the size of the fraction is not specified.
// It must be inferred from the instruction and the register type.
// For example, in a VMOV instruction, RegX{D5, 1} represents
// the top 32 bits of the 64-bit D5 register.
type RegX struct {
Reg Reg
Index int
}
func (RegX) IsArg() {}
func (r RegX) String() string {
return fmt.Sprintf("%s[%d]", r.Reg, r.Index)
}
// A RegList is a register list.
// Bits at indexes x = 0 through 15 indicate whether the corresponding Rx register is in the list.
type RegList uint16
func (RegList) IsArg() {}
func (r RegList) String() string {
var buf bytes.Buffer
fmt.Fprintf(&buf, "{")
sep := ""
for i := 0; i < 16; i++ {
if r&(1<<uint(i)) != 0 {
fmt.Fprintf(&buf, "%s%s", sep, Reg(i).String())
sep = ","
}
}
fmt.Fprintf(&buf, "}")
return buf.String()
}
// An Endian is the argument to the SETEND instruction.
type Endian uint8
const (
LittleEndian Endian = 0
BigEndian Endian = 1
)
func (Endian) IsArg() {}
func (e Endian) String() string {
if e != 0 {
return "BE"
}
return "LE"
}
// A Shift describes an ARM shift operation.
type Shift uint8
const (
ShiftLeft Shift = 0 // left shift
ShiftRight Shift = 1 // logical (unsigned) right shift
ShiftRightSigned Shift = 2 // arithmetic (signed) right shift
RotateRight Shift = 3 // right rotate
RotateRightExt Shift = 4 // right rotate through carry (Count will always be 1)
)
var shiftName = [...]string{
"LSL", "LSR", "ASR", "ROR", "RRX",
}
func (s Shift) String() string {
if s < 5 {
return shiftName[s]
}
return fmt.Sprintf("Shift(%d)", int(s))
}
// A RegShift is a register shifted by a constant.
type RegShift struct {
Reg Reg
Shift Shift
Count uint8
}
func (RegShift) IsArg() {}
func (r RegShift) String() string {
return fmt.Sprintf("%s %s #%d", r.Reg, r.Shift, r.Count)
}
// A RegShiftReg is a register shifted by a register.
type RegShiftReg struct {
Reg Reg
Shift Shift
RegCount Reg
}
func (RegShiftReg) IsArg() {}
func (r RegShiftReg) String() string {
return fmt.Sprintf("%s %s %s", r.Reg, r.Shift, r.RegCount)
}
// A PCRel describes a memory address (usually a code label)
// as a distance relative to the program counter.
// TODO(rsc): Define which program counter (PC+4? PC+8? PC?).
type PCRel int32
func (PCRel) IsArg() {}
func (r PCRel) String() string {
return fmt.Sprintf("PC%+#x", int32(r))
}
// An AddrMode is an ARM addressing mode.
type AddrMode uint8
const (
_ AddrMode = iota
AddrPostIndex // [R], X – use address R, set R = R + X
AddrPreIndex // [R, X]! – use address R + X, set R = R + X
AddrOffset // [R, X] – use address R + X
AddrLDM // R – [R] but formats as R, for LDM/STM only
AddrLDM_WB // R! - [R], X where X is instruction-specific amount, for LDM/STM only
)
// A Mem is a memory reference made up of a base R and index expression X.
// The effective memory address is R or R+X depending on AddrMode.
// The index expression is X = Sign*(Index Shift Count) + Offset,
// but in any instruction either Sign = 0 or Offset = 0.
type Mem struct {
Base Reg
Mode AddrMode
Sign int8
Index Reg
Shift Shift
Count uint8
Offset int16
}
func (Mem) IsArg() {}
func (m Mem) String() string {
R := m.Base.String()
X := ""
if m.Sign != 0 {
X = "+"
if m.Sign < 0 {
X = "-"
}
X += m.Index.String()
if m.Shift != ShiftLeft || m.Count != 0 {
X += fmt.Sprintf(", %s #%d", m.Shift, m.Count)
}
} else {
X = fmt.Sprintf("#%d", m.Offset)
}
switch m.Mode {
case AddrOffset:
if X == "#0" {
return fmt.Sprintf("[%s]", R)
}
return fmt.Sprintf("[%s, %s]", R, X)
case AddrPreIndex:
return fmt.Sprintf("[%s, %s]!", R, X)
case AddrPostIndex:
return fmt.Sprintf("[%s], %s", R, X)
case AddrLDM:
if X == "#0" {
return R
}
case AddrLDM_WB:
if X == "#0" {
return R + "!"
}
}
return fmt.Sprintf("[%s Mode(%d) %s]", R, int(m.Mode), X)
}
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package armasm
import (
"encoding/binary"
"strings"
"testing"
)
func TestObjdumpARMTestdata(t *testing.T) { testObjdumpARM(t, testdataCases(t)) }
func TestObjdumpARMManual(t *testing.T) { testObjdumpARM(t, hexCases(t, objdumpManualTests)) }
func TestObjdumpARMCond(t *testing.T) { testObjdumpARM(t, condCases(t)) }
func TestObjdumpARMUncond(t *testing.T) { testObjdumpARM(t, uncondCases(t)) }
func TestObjdumpARMVFP(t *testing.T) { testObjdumpARM(t, vfpCases(t)) }
// objdumpManualTests holds test cases that will be run by TestObjdumpARMManual.
// If you are debugging a few cases that turned up in a longer run, it can be useful
// to list them here and then use -run=Manual, particularly with tracing enabled.
// Note that these are byte sequences, so they must be reversed from the usual
// word presentation.
var objdumpManualTests = `
00000000
`
// allowedMismatchObjdump reports whether the mismatch between text and dec
// should be allowed by the test.
func allowedMismatchObjdump(text string, size int, inst *Inst, dec ExtInst) bool {
if hasPrefix(text, "error:") {
if hasPrefix(dec.text, unsupported...) || strings.Contains(dec.text, "invalid:") || strings.HasSuffix(dec.text, "^") || strings.Contains(dec.text, "f16.f64") || strings.Contains(dec.text, "f64.f16") {
return true
}
// word 4320F02C: libopcodes says 'nopmi {44}'.
if hasPrefix(dec.text, "nop") && strings.Contains(dec.text, "{") {
return true
}
}
if hasPrefix(dec.text, "error:") && text == "undef" && inst.Enc == 0xf7fabcfd {
return true
}
// word 00f02053: libopcodes says 'noppl {0}'.
if hasPrefix(dec.text, "nop") && hasPrefix(text, "nop") && dec.text == text+" {0}" {
return true
}
// word F57FF04F. we say 'dsb #15', libopcodes says 'dsb sy'.
if hasPrefix(text, "dsb") && hasPrefix(dec.text, "dsb") {
return true
}
// word F57FF06F. we say 'isb #15', libopcodes says 'isb sy'.
if hasPrefix(text, "isb") && hasPrefix(dec.text, "isb") {
return true
}
// word F57FF053. we say 'dmb #3', libopcodes says 'dmb osh'.
if hasPrefix(text, "dmb") && hasPrefix(dec.text, "dmb") {
return true
}
// word 992D0000. push/stmdb with no registers (undefined).
// we say 'stmdbls sp!, {}', libopcodes says 'pushls {}'.
if hasPrefix(text, "stmdb") && hasPrefix(dec.text, "push") && strings.Contains(text, "{}") && strings.Contains(dec.text, "{}") {
return true
}
// word 28BD0000. pop/ldm with no registers (undefined).
// we say 'ldmcs sp!, {}', libopcodes says 'popcs {}'.
if hasPrefix(text, "ldm") && hasPrefix(dec.text, "pop") && strings.Contains(text, "{}") && strings.Contains(dec.text, "{}") {
return true
}
// word 014640F0.
// libopcodes emits #-0 for negative zero; we don't.
if strings.Replace(dec.text, "#-0", "#0", -1) == text || strings.Replace(dec.text, ", #-0", "", -1) == text {
return true
}
// word 91EF90F0. we say 'strdls r9, [pc, #0]!' but libopcodes says 'strdls r9, [pc]'.
// word D16F60F0. we say 'strdle r6, [pc, #0]!' but libopcodes says 'strdle r6, [pc, #-0]'.
if strings.Replace(text, ", #0]!", "]", -1) == strings.Replace(dec.text, ", #-0]", "]", -1) {
return true
}
// word 510F4000. we say apsr, libopcodes says CPSR.
if strings.Replace(dec.text, "CPSR", "apsr", -1) == text {
return true
}
// word 06A4B059.
// for ssat and usat, libopcodes decodes asr #0 as asr #0 but the manual seems to say it should be asr #32.
// There is never an asr #0.
if strings.Replace(dec.text, ", asr #0", ", asr #32", -1) == text {
return true
}
if len(dec.enc) >= 4 {
raw := binary.LittleEndian.Uint32(dec.enc[:4])
// word 21FFF0B5.
// the manual is clear that this is pre-indexed mode (with !) but libopcodes generates post-index (without !).
if raw&0x01200000 == 0x01200000 && strings.Replace(text, "!", "", -1) == dec.text {
return true
}
// word C100543E: libopcodes says tst, but no evidence for that.
if strings.HasPrefix(dec.text, "tst") && raw&0x0ff00000 != 0x03100000 && raw&0x0ff00000 != 0x01100000 {
return true
}
// word C3203CE8: libopcodes says teq, but no evidence for that.
if strings.HasPrefix(dec.text, "teq") && raw&0x0ff00000 != 0x03300000 && raw&0x0ff00000 != 0x01300000 {
return true
}
// word D14C552E: libopcodes says cmp but no evidence for that.
if strings.HasPrefix(dec.text, "cmp") && raw&0x0ff00000 != 0x03500000 && raw&0x0ff00000 != 0x01500000 {
return true
}
// word 2166AA4A: libopcodes says cmn but no evidence for that.
if strings.HasPrefix(dec.text, "cmn") && raw&0x0ff00000 != 0x03700000 && raw&0x0ff00000 != 0x01700000 {
return true
}
// word E70AEEEF: libopcodes says str but no evidence for that.
if strings.HasPrefix(dec.text, "str") && len(dec.text) >= 5 && (dec.text[3] == ' ' || dec.text[5] == ' ') && raw&0x0e500018 != 0x06000000 && raw&0x0e500000 != 0x0400000 {
return true
}
// word B0AF48F4: libopcodes says strd but P=0,W=1 which is unpredictable.
if hasPrefix(dec.text, "ldr", "str") && raw&0x01200000 == 0x00200000 {
return true
}
// word B6CC1C76: libopcodes inexplicably says 'uxtab16lt r1, ip, r6, ROR #24' instead of 'uxtab16lt r1, ip, r6, ror #24'
if strings.ToLower(dec.text) == text {
return true
}
// word F410FDA1: libopcodes says PLDW but the manual is clear that PLDW is F5/F7, not F4.
// word F7D0FB17: libopcodes says PLDW but the manual is clear that PLDW has 0x10 clear
if hasPrefix(dec.text, "pld") && raw&0xfd000010 != 0xf5000000 {
return true
}
// word F650FE14: libopcodes says PLI but the manual is clear that PLI has 0x10 clear
if hasPrefix(dec.text, "pli") && raw&0xff000010 != 0xf6000000 {
return true
}
}
return false
}
// Instructions known to libopcodes (or xed) but not to us.
// Most of these are floating point coprocessor instructions.
var unsupported = strings.Fields(`
abs
acs
adf
aes
asn
atn
cdp
cf
cmf
cnf
cos
cps
crc32
dvf
eret
exp
fadd
fcmp
fcpy
fcvt
fdiv
fdv
fix
fld
flt
fmac
fmd
fml
fmr
fms
fmul
fmx
fneg
fnm
frd
fsit
fsq
fst
fsu
fto
fui
hlt
hvc
lda
ldc
ldf
lfm
lgn
log
mar
mcr
mcrr
mia
mnf
mra
mrc
mrrc
mrs
msr
msr
muf
mvf
nrm
pol
pow
rdf
rfc
rfe
rfs
rmf
rnd
rpw
rsf
sdiv
sev
sfm
sha1
sha256
sin
smc
sqt
srs
stc
stf
stl
suf
tan
udf
udiv
urd
vfma
vfms
vfnma
vfnms
vrint
wfc
wfs
`)
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Copied and simplified from rsc.io/x86/x86asm/objdumpext_test.go.
package armasm
import (
"bytes"
"debug/elf"
"encoding/binary"
"fmt"
"io"
"log"
"os"
"strconv"
"strings"
"testing"
)
const objdumpPath = "/usr/local/bin/arm-linux-elf-objdump"
func testObjdumpARM(t *testing.T, generate func(func([]byte))) {
testObjdumpArch(t, generate, ModeARM)
}
func testObjdumpArch(t *testing.T, generate func(func([]byte)), arch Mode) {
if testing.Short() {
t.Skip("skipping objdump test in short mode")
}
if _, err := os.Stat(objdumpPath); err != nil {
t.Fatal(err)
}
testExtDis(t, "gnu", arch, objdump, generate, allowedMismatchObjdump)
}
func objdump(ext *ExtDis) error {
// File already written with instructions; add ELF header.
if ext.Arch == ModeARM {
if err := writeELF32(ext.File, ext.Size); err != nil {
return err
}
} else {
panic("unknown arch")
}
b, err := ext.Run(objdumpPath, "-d", "-z", ext.File.Name())
if err != nil {
return err
}
var (
nmatch int
reading bool
next uint32 = start
addr uint32
encbuf [4]byte
enc []byte
text string
)
flush := func() {
if addr == next {
if m := pcrel.FindStringSubmatch(text); m != nil {
targ, _ := strconv.ParseUint(m[2], 16, 64)
text = fmt.Sprintf("%s .%+#x", m[1], int32(uint32(targ)-addr-uint32(len(enc))))
}
if strings.HasPrefix(text, "stmia") {
text = "stm" + text[5:]
}
if strings.HasPrefix(text, "stmfd") {
text = "stmdb" + text[5:]
}
if strings.HasPrefix(text, "ldmfd") {
text = "ldm" + text[5:]
}
text = strings.Replace(text, "#0.0", "#0", -1)
if text == "undefined" && len(enc) == 4 {
text = "error: unknown instruction"
enc = nil
}
if len(enc) == 4 {
// prints as word but we want to record bytes
enc[0], enc[3] = enc[3], enc[0]
enc[1], enc[2] = enc[2], enc[1]
}
ext.Dec <- ExtInst{addr, encbuf, len(enc), text}
encbuf = [4]byte{}
enc = nil
next += 4
}
}
var textangle = []byte("<.text>:")
for {
line, err := b.ReadSlice('\n')
if err != nil {
if err == io.EOF {
break
}
return fmt.Errorf("reading objdump output: %v", err)
}
if bytes.Contains(line, textangle) {
reading = true
continue
}
if !reading {
continue
}
if debug {
os.Stdout.Write(line)
}
if enc1 := parseContinuation(line, encbuf[:len(enc)]); enc1 != nil {
enc = enc1
continue
}
flush()
nmatch++
addr, enc, text = parseLine(line, encbuf[:0])
if addr > next {
return fmt.Errorf("address out of sync expected <= %#x at %q in:\n%s", next, line, line)
}
}
flush()
if next != start+uint32(ext.Size) {
return fmt.Errorf("not enough results found [%d %d]", next, start+ext.Size)
}
if err := ext.Wait(); err != nil {
return fmt.Errorf("exec: %v", err)
}
return nil
}
var (
undefined = []byte("<UNDEFINED>")
unpredictable = []byte("<UNPREDICTABLE>")
illegalShifter = []byte("<illegal shifter operand>")
)
func parseLine(line []byte, encstart []byte) (addr uint32, enc []byte, text string) {
oline := line
i := index(line, ":\t")
if i < 0 {
log.Fatalf("cannot parse disassembly: %q", oline)
}
x, err := strconv.ParseUint(string(trimSpace(line[:i])), 16, 32)
if err != nil {
log.Fatalf("cannot parse disassembly: %q", oline)
}
addr = uint32(x)
line = line[i+2:]
i = bytes.IndexByte(line, '\t')
if i < 0 {
log.Fatalf("cannot parse disassembly: %q", oline)
}
enc, ok := parseHex(line[:i], encstart)
if !ok {
log.Fatalf("cannot parse disassembly: %q", oline)
}
line = trimSpace(line[i:])
if bytes.Contains(line, undefined) {
text = "undefined"
return
}
if bytes.Contains(line, illegalShifter) {
text = "undefined"
return
}
if false && bytes.Contains(line, unpredictable) {
text = "unpredictable"
return
}
if i := bytes.IndexByte(line, ';'); i >= 0 {
line = trimSpace(line[:i])
}
text = string(fixSpace(line))
return
}
func parseContinuation(line []byte, enc []byte) []byte {
i := index(line, ":\t")
if i < 0 {
return nil
}
line = line[i+1:]
enc, _ = parseHex(line, enc)
return enc
}
// writeELF32 writes an ELF32 header to the file,
// describing a text segment that starts at start
// and extends for size bytes.
func writeELF32(f *os.File, size int) error {
f.Seek(0, 0)
var hdr elf.Header32
var prog elf.Prog32
var sect elf.Section32
var buf bytes.Buffer
binary.Write(&buf, binary.LittleEndian, &hdr)
off1 := buf.Len()
binary.Write(&buf, binary.LittleEndian, &prog)
off2 := buf.Len()
binary.Write(&buf, binary.LittleEndian, &sect)
off3 := buf.Len()
buf.Reset()
data := byte(elf.ELFDATA2LSB)
hdr = elf.Header32{
Ident: [16]byte{0x7F, 'E', 'L', 'F', 1, data, 1},
Type: 2,
Machine: uint16(elf.EM_ARM),
Version: 1,
Entry: start,
Phoff: uint32(off1),
Shoff: uint32(off2),
Flags: 0x05000002,
Ehsize: uint16(off1),
Phentsize: uint16(off2 - off1),
Phnum: 1,
Shentsize: uint16(off3 - off2),
Shnum: 3,
Shstrndx: 2,
}
binary.Write(&buf, binary.LittleEndian, &hdr)
prog = elf.Prog32{
Type: 1,
Off: start,
Vaddr: start,
Paddr: start,
Filesz: uint32(size),
Memsz: uint32(size),
Flags: 5,
Align: start,
}
binary.Write(&buf, binary.LittleEndian, &prog)
binary.Write(&buf, binary.LittleEndian, &sect) // NULL section
sect = elf.Section32{
Name: 1,
Type: uint32(elf.SHT_PROGBITS),
Addr: start,
Off: start,
Size: uint32(size),
Flags: uint32(elf.SHF_ALLOC | elf.SHF_EXECINSTR),
Addralign: 4,
}
binary.Write(&buf, binary.LittleEndian, &sect) // .text
sect = elf.Section32{
Name: uint32(len("\x00.text\x00")),
Type: uint32(elf.SHT_STRTAB),
Addr: 0,
Off: uint32(off2 + (off3-off2)*3),
Size: uint32(len("\x00.text\x00.shstrtab\x00")),
Addralign: 1,
}
binary.Write(&buf, binary.LittleEndian, &sect)
buf.WriteString("\x00.text\x00.shstrtab\x00")
f.Write(buf.Bytes())
return nil
}
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package armasm
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"strings"
)
// Plan9Syntax returns the Go assembler syntax for the instruction.
// The syntax was originally defined by Plan 9.
// The pc is the program counter of the instruction, used for expanding
// PC-relative addresses into absolute ones.
// The symname function queries the symbol table for the program
// being disassembled. Given a target address it returns the name and base
// address of the symbol containing the target, if any; otherwise it returns "", 0.
// The reader r should read from the text segment using text addresses
// as offsets; it is used to display pc-relative loads as constant loads.
func Plan9Syntax(inst Inst, pc uint64, symname func(uint64) (string, uint64), text io.ReaderAt) string {
if symname == nil {
symname = func(uint64) (string, uint64) { return "", 0 }
}
var args []string
for _, a := range inst.Args {
if a == nil {
break
}
args = append(args, plan9Arg(&inst, pc, symname, a))
}
op := inst.Op.String()
switch inst.Op &^ 15 {
case LDR_EQ, LDRB_EQ, LDRH_EQ:
// Check for RET
reg, _ := inst.Args[0].(Reg)
mem, _ := inst.Args[1].(Mem)
if inst.Op&^15 == LDR_EQ && reg == R15 && mem.Base == SP && mem.Sign == 0 && mem.Mode == AddrPostIndex {
return fmt.Sprintf("RET%s #%d", op[3:], mem.Offset)
}
// Check for PC-relative load.
if mem.Base == PC && mem.Sign == 0 && mem.Mode == AddrOffset && text != nil {
addr := uint32(pc) + 8 + uint32(mem.Offset)
buf := make([]byte, 4)
switch inst.Op &^ 15 {
case LDRB_EQ:
if _, err := text.ReadAt(buf[:1], int64(addr)); err != nil {
break
}
args[1] = fmt.Sprintf("$%#x", buf[0])
case LDRH_EQ:
if _, err := text.ReadAt(buf[:2], int64(addr)); err != nil {
break
}
args[1] = fmt.Sprintf("$%#x", binary.LittleEndian.Uint16(buf))
case LDR_EQ:
if _, err := text.ReadAt(buf, int64(addr)); err != nil {
break
}
x := binary.LittleEndian.Uint32(buf)
if s, base := symname(uint64(x)); s != "" && uint64(x) == base {
args[1] = fmt.Sprintf("$%s(SB)", s)
} else {
args[1] = fmt.Sprintf("$%#x", x)
}
}
}
}
// Move addressing mode into opcode suffix.
suffix := ""
switch inst.Op &^ 15 {
case LDR_EQ, LDRB_EQ, LDRH_EQ, STR_EQ, STRB_EQ, STRH_EQ:
mem, _ := inst.Args[1].(Mem)
switch mem.Mode {
case AddrOffset, AddrLDM:
// no suffix
case AddrPreIndex, AddrLDM_WB:
suffix = ".W"
case AddrPostIndex:
suffix = ".P"
}
off := ""
if mem.Offset != 0 {
off = fmt.Sprintf("%#x", mem.Offset)
}
base := fmt.Sprintf("(R%d)", int(mem.Base))
index := ""
if mem.Sign != 0 {
sign := ""
if mem.Sign < 0 {
sign = ""
}
shift := ""
if mem.Count != 0 {
shift = fmt.Sprintf("%s%d", plan9Shift[mem.Shift], mem.Count)
}
index = fmt.Sprintf("(%sR%d%s)", sign, int(mem.Index), shift)
}
args[1] = off + base + index
}
// Reverse args, placing dest last.
for i, j := 0, len(args)-1; i < j; i, j = i+1, j-1 {
args[i], args[j] = args[j], args[i]
}
switch inst.Op &^ 15 {
case MOV_EQ:
op = "MOVW" + op[3:]
case LDR_EQ:
op = "MOVW" + op[3:] + suffix
case LDRB_EQ:
op = "MOVB" + op[4:] + suffix
case LDRH_EQ:
op = "MOVH" + op[4:] + suffix
case STR_EQ:
op = "MOVW" + op[3:] + suffix
args[0], args[1] = args[1], args[0]
case STRB_EQ:
op = "MOVB" + op[4:] + suffix
args[0], args[1] = args[1], args[0]
case STRH_EQ:
op = "MOVH" + op[4:] + suffix
args[0], args[1] = args[1], args[0]
}
if args != nil {
op += " " + strings.Join(args, ", ")
}
return op
}
// assembler syntax for the various shifts.
// @x> is a lie; the assembler uses @> 0
// instead of @x> 1, but i wanted to be clear that it
// was a different operation (rotate right extended, not rotate right).
var plan9Shift = []string{"<<", ">>", "->", "@>", "@x>"}
func plan9Arg(inst *Inst, pc uint64, symname func(uint64) (string, uint64), arg Arg) string {
switch a := arg.(type) {
case Endian:
case Imm:
return fmt.Sprintf("$%d", int(a))
case Mem:
case PCRel:
addr := uint32(pc) + 8 + uint32(a)
if s, base := symname(uint64(addr)); s != "" && uint64(addr) == base {
return fmt.Sprintf("%s(SB)", s)
}
return fmt.Sprintf("%#x", addr)
case Reg:
if a < 16 {
return fmt.Sprintf("R%d", int(a))
}
case RegList:
var buf bytes.Buffer
start := -2
end := -2
fmt.Fprintf(&buf, "[")
flush := func() {
if start >= 0 {
if buf.Len() > 1 {
fmt.Fprintf(&buf, ",")
}
if start == end {
fmt.Fprintf(&buf, "R%d", start)
} else {
fmt.Fprintf(&buf, "R%d-R%d", start, end)
}
}
}
for i := 0; i < 16; i++ {
if a&(1<<uint(i)) != 0 {
if i == end+1 {
end++
continue
}
start = i
end = i
}
}
flush()
fmt.Fprintf(&buf, "]")
return buf.String()
case RegShift:
return fmt.Sprintf("R%d%s$%d", int(a.Reg), plan9Shift[a.Shift], int(a.Count))
case RegShiftReg:
return fmt.Sprintf("R%d%sR%d", int(a.Reg), plan9Shift[a.Shift], int(a.RegCount))
}
return strings.ToUpper(arg.String())
}
This source diff could not be displayed because it is too large. You can view the blob instead.
newdecode.txt:
cd ..; go test -cover -run 'ObjdumpARMCond' -v -timeout 10h -printtests -long 2>&1 | tee log
cd ..; go test -cover -run 'ObjdumpARMUncond' -v -timeout 10h -printtests -long 2>&1 | tee -a log
egrep ' (gnu|plan9) ' ../log |sort >newdecode.txt
000001f1| 1 gnu setend le
00100f61| 1 gnu mrsvs r1, apsr
00f02053| 1 gnu noppl
00f0d4f4| 1 gnu pli [r4]
01f020d3| 1 gnu yieldle
02002d59| 1 gnu stmdbpl sp!, {r1}
021da9d8| 1 gnu stmle r9!, {r1, r8, sl, fp, ip}
02c0b071| 1 gnu movsvc ip, r2
02f02073| 1 gnu wfevc
03f02013| 1 gnu wfine
03f05df7| 1 gnu pld [sp, -r3]
04009d34| 1 gnu popcc {r0}
043a52b1| 1 gnu cmplt r2, r4, lsl #20
04402de5| 1 gnu push {r4}
045b148d| 1 gnu vldrhi d5, [r4, #-16]
04f02093| 1 gnu sevls
0793eab0| 1 gnu rsclt r9, sl, r7, lsl #6
079bfb9e| 1 gnu vmovls.f64 d25, #183
0a4fc9d3| 1 gnu bicle r4, r9, #10, 30
0bac7ab6| 1 gnu ldrbtlt sl, [sl], -fp, lsl #24
0c2aee44| 1 gnu strbtmi r2, [lr], #2572
0c4bb000| 1 gnu adcseq r4, r0, ip, lsl #22
0e26d561| 1 gnu bicsvs r2, r5, lr, lsl #12
0f0fa011| 1 gnu lslne r0, pc, #30
0fa448e0| 1 gnu sub sl, r8, pc, lsl #8
101af1de| 1 gnu vmrsle r1, fpscr
108a0cee| 1 gnu vmov s24, r8
108a1dae| 1 gnu vmovge r8, s26
108ae14e| 1 gnu vmsrmi fpscr, r8
10faf1ae| 1 gnu vmrsge apsr_nzcv, fpscr
10fb052e| 1 gnu vmovcs.32 d5[0], pc
11c902b7| 1 gnu smladlt r2, r1, r9, ip
11ef5b16| 1 gnu uadd16ne lr, fp, r1
12fa87a7| 1 gnu usad8ge r7, r2, sl
135f2956| 1 gnu qadd16pl r5, r9, r3
13de9aa1| 1 gnu orrsge sp, sl, r3, lsl lr
145c0e40| 1 gnu andmi r5, lr, r4, lsl ip
150f7fd6| 1 gnu uhadd16le r0, pc, r5
15b9bf12| 1 gnu adcsne fp, pc, #344064
16373391| 1 gnu teqls r3, r6, lsl r7
19ef1966| 1 gnu sadd16vs lr, r9, r9
1ab0b091| 1 gnu lslsls fp, sl, r0
1b9f6fe6| 1 gnu uqadd16 r9, pc, fp
1bb58557| 1 gnu usada8pl r5, fp, r5, fp
1beff8e0| 1 gnu rscs lr, r8, fp, lsl pc
1caff0e6| 1 gnu usat sl, #16, ip, lsl #30
1d0f3d36| 1 gnu shadd16cc r0, sp, sp
1dca1d52| 1 gnu andspl ip, sp, #118784
1e4891d0| 1 gnu addsle r4, r1, lr, lsl r8
1f0889e6| 1 gnu pkhbt r0, r9, pc, lsl #16
1f1f6fe1| 1 gnu clz r1, pc
1f26d157| 1 gnu bfcpl r2, #12, #6
1ff07ff5| 1 gnu clrex
1fff2fd1| 1 gnu bxle pc
20f153f6| 1 gnu pli [r3, -r0, lsr #2]
21047013| 1 gnu cmnne r0, #553648128
21c2eb8b| 1 gnu blhi .-0x50f778
21c2ebfb| 1 gnu blx .-0x50f776
21fa62ee| 1 gnu vmul.f32 s31, s4, s3
23005720| 1 gnu subscs r0, r7, r3, lsr #32
236a303e| 1 gnu vaddcc.f32 s12, s0, s7
23f055f6| 1 gnu pli [r5, -r3, lsr #32]
2430a031| 1 gnu lsrcc r3, r4, #32
245d0803| 1 gnu movweq r5, #36132
251a86be| 1 gnu vdivlt.f32 s2, s12, s11
25db7b81| 1 gnu cmnhi fp, r5, lsr #22
26bc3553| 1 gnu teqpl r5, #9728
277c2d69| 1 gnu pushvs {r0, r1, r2, r5, sl, fp, ip, sp, lr}
29fc1cf5| 1 gnu pldw [ip, #-3113]
29ff2fc1| 1 gnu bxjgt r9
2decd9c0| 1 gnu sbcsgt lr, r9, sp, lsr #24
30fa5e47| 1 gnu smmulrmi lr, r0, sl
316f64d6| 1 gnu uqasxle r6, r4, r1
323f5da6| 1 gnu uasxge r3, sp, r2
327fe5e6| 1 gnu usat16 r7, #5, r2
330151e3| 1 gnu cmp r1, #-1073741812
34af2ae6| 1 gnu qasx sl, sl, r4
35fd3710| 1 gnu eorsne pc, r7, r5, lsr sp
36def1c1| 1 gnu mvnsgt sp, r6, lsr lr
3801b061| 1 gnu lsrsvs r0, r8, r1
38985477| 1 gnu smmlarvc r4, r8, r8, r9
3a2fbfa6| 1 gnu revge r2, sl
3a3f1b06| 1 gnu sasxeq r3, fp, sl
3a7fa346| 1 gnu ssat16mi r7, #4, sl
3a943b94| 1 gnu ldrtls r9, [fp], #-1082
3bf505e7| 1 gnu smuadx r5, fp, r5
3cef7086| 1 gnu uhasxhi lr, r0, ip
3e5f3ec6| 1 gnu shasxgt r5, lr, lr
3f4fff86| 1 gnu rbithi r4, pc
3faf4717| 1 gnu smlaldxne sl, r7, pc, pc
3fff2fc1| 1 gnu blxgt pc
402bbf7e| 1 gnu vcvtvc.u16.f64 d2, d2, #16
403ab5de| 1 gnu vcmple.f32 s6, #0
40eb363e| 1 gnu vsubcc.f64 d14, d6, d0
420f73d1| 1 gnu cmnle r3, r2, asr #30
424a648e| 1 gnu vnmulhi.f32 s9, s8, s4
4284d717| 1 gnu ldrbne r8, [r7, r2, asr #8]
42a599c3| 1 gnu orrsgt sl, r9, #276824064
42abf0be| 1 gnu vmovlt.f64 d26, d2
446ea031| 1 gnu asrcc r6, r4, #28
4a953557| 1 gnu ldrpl r9, [r5, -sl, asr #10]!
4ab6f712| 1 gnu rscsne fp, r7, #77594624
4af07ff5| 1 gnu dsb #10
4df6def4| 1 gnu pli [lr, #1613]
4efbf52e| 1 gnu vcmpcs.f64 d31, #0
50aaac79| 1 gnu stmibvc ip!, {r4, r6, r9, fp, sp, pc}
50caf011| 1 gnu mvnsne ip, r0, asr sl
50f04961| 1 gnu qdaddvs pc, r0, r9
51282008| 1 gnu stmdaeq r0!, {r0, r4, r6, fp, sp}
52bf6576| 1 gnu uqsaxvc fp, r5, r2
5345c9d0| 1 gnu sbcle r4, r9, r3, asr r5
538f5e46| 1 gnu usaxmi r8, lr, r3
54106d31| 1 gnu qdsubcc r1, r4, sp
56e0e557| 1 gnu ubfxpl lr, r6, #0, #6
57073d11| 1 gnu teqne sp, r7, asr r7
58bb0aa9| 1 gnu stmdbge sl, {r3, r4, r6, r8, r9, fp, ip, sp, pc}
58f007b1| 1 gnu qaddlt pc, r8, r7
59fd0e77| 1 gnu smusdvc lr, r9, sp
5ab7f1c5| 1 gnu ldrbgt fp, [r1, #1882]!
5abf23c6| 1 gnu qsaxgt fp, r3, sl
5b8f1c96| 1 gnu ssaxls r8, ip, fp
5b98ab97| 1 gnu sbfxls r9, fp, #16, #12
5bc9b041| 1 gnu asrsmi ip, fp, r9
5bf07ff5| 1 gnu dmb #11
5c102b81| 1 gnu qsubhi r1, ip, fp
5caa49e1| 1 gnu qdadd sl, ip, r9
5d3f7226| 1 gnu uhsaxcs r3, r2, sp
5db55470| 1 gnu subsvc fp, r4, sp, asr r5
5ef14387| 1 gnu smlsldhi pc, r3, lr, r1
5f540a11| 1 gnu qaddne r5, pc, sl
5f9079d1| 1 gnu cmnle r9, pc, asr r0
5faf3f66| 1 gnu shsaxvs sl, pc, pc
605071d7| 1 gnu ldrble r5, [r1, -r0, rrx]!
614adc76| 1 gnu ldrbvc r4, [ip], r1, ror #20
616b9e42| 1 gnu addsmi r6, lr, #99328
62c84f15| 1 gnu strbne ip, [pc, #-2146]
62f051f7| 1 gnu pld [r1, -r2, rrx]
6346c393| 1 gnu bicls r4, r3, #103809024
654abbae| 1 gnu vcvtge.f32.u16 s8, s8, #5
65a5f0e3| 1 gnu mvns sl, #423624704
65f796f7| 1 gnu pldw [r6, r5, ror #14]
670bb12e| 1 gnu vnegcs.f64 d0, d23
67903731| 1 gnu teqcc r7, r7, rrx
68ddc637| 1 gnu strbcc sp, [r6, r8, ror #26]
695b3ab6| 1 gnu ldrtlt r5, [sl], -r9, ror #22
697cfc71| 1 gnu mvnsvc r7, r9, ror #24
6a0ab3ee| 1 gnu vcvtb.f16.f32 s0, s21
6ad9ad54| 1 gnu strtpl sp, [sp], #2410
6af07ff5| 1 gnu isb #10
6afa6f10| 1 gnu rsbne pc, pc, sl, ror #20
6d5b19ee| 1 gnu vnmla.f64 d5, d9, d29
6d60b071| 1 gnu rrxsvc r6, sp
6df754f7| 1 gnu pld [r4, -sp, ror #14]
70065821| 1 gnu cmpcs r8, r0, ror r6
7050ed86| 1 gnu uxtabhi r5, sp, r0
715f1186| 1 gnu ssub16hi r5, r1, r1
716c9805| 1 gnu ldreq r6, [r8, #3185]
718d5ab1| 1 gnu cmplt sl, r1, ror sp
71c8cfb6| 1 gnu uxtb16lt ip, r1, ror #16
7294af06| 1 gnu sxtbeq r9, r2, ror #8
72c0bac6| 1 gnu sxtahgt ip, sl, r2
730f6716| 1 gnu uqsub16ne r0, r7, r3
73608f46| 1 gnu sxtb16mi r6, r3
73687f22| 1 gnu rsbscs r6, pc, #7536640
74308816| 1 gnu sxtab16ne r3, r8, r4
757f3456| 1 gnu shsub16pl r7, r4, r5
77788016| 1 gnu sxtab16ne r7, r0, r7, ror #16
78061671| 1 gnu tstvc r6, r8, ror r6
780a2fe1| 1 gnu bkpt 0xf0a8
7850abd6| 1 gnu sxtable r5, fp, r8
792cef26| 1 gnu uxtbcs r2, r9, ror #24
799eb8e0| 1 gnu adcs r9, r8, r9, ror lr
799f5726| 1 gnu usub16cs r9, r7, r9
79d0bf16| 1 gnu sxthne sp, r9
7a037ba1| 1 gnu cmnge fp, sl, ror r3
7b0f2566| 1 gnu qsub16vs r0, r5, fp
7b79dd51| 1 gnu bicspl r7, sp, fp, ror r9
7b9a9f1d| 1 gnu vldrne s18, [pc, #492]
7c70cea6| 1 gnu uxtab16ge r7, lr, ip
7d48f966| 1 gnu uxtahvs r4, r9, sp, ror #16
7d5c13a1| 1 gnu tstge r3, sp, ror ip
7e0001f1| 1 gnu setend le
7e1c0ba7| 1 gnu smlsdxge fp, lr, ip, r1
7e567e40| 1 gnu rsbsmi r5, lr, lr, ror r6
7e8f73b6| 1 gnu uhsub16lt r8, r3, lr
7ef0ffd6| 1 gnu uxthle pc, lr
7faaa011| 1 gnu rorne sl, pc, sl
81f19af7| 1 gnu pldw [sl, r1, lsl #3]
82033901| 1 gnu teqeq r9, r2, lsl #7
82f316f5| 1 gnu pldw [r6, #-898]
830201f1| 1 gnu setend be
838a3b91| 1 gnu teqls fp, r3, lsl #21
8408af2f| 1 gnu svccs 0x00af0884
884201d1| 1 gnu smlabble r1, r8, r2, r4
8aa12e31| 1 gnu smlawbcc lr, sl, r1, sl
8b9b99c0| 1 gnu addsgt r9, r9, fp, lsl #23
8c005c81| 1 gnu cmphi ip, ip, lsl #1
8fb429c6| 1 gnu strtgt fp, [r9], -pc, lsl #9
907b1f9e| 1 gnu vmovls.32 r7, d31[0]
91975f25| 1 gnu ldrbcs r9, [pc, #-1937]
91b010e3| 1 gnu tst r0, #145
927facb1| 1 gnu strexdlt r7, r2, [ip]
92904c91| 1 gnu swpbls r9, r2, [ip]
92af1226| 1 gnu sadd8cs sl, r2, r2
92b28c70| 1 gnu umullvc fp, ip, r2, r2
945f68a6| 1 gnu uqadd8ge r5, r8, r4
950b2560| 1 gnu mlavs r5, r5, fp, r0
969fcf71| 1 gnu strexbvc r9, r6, [pc]
96cf35e6| 1 gnu shadd8 ip, r5, r6
98060eb0| 1 gnu mullt lr, r8, r6
9843fb93| 1 gnu mvnsls r4, #152, 6
9a3fe2b0| 1 gnu smlallt r3, r2, sl, pc
9aef58b6| 1 gnu uadd8lt lr, r8, sl
9afcdff5| 1 gnu pld [pc, #3226]
9c221810| 1 gnu mulsne r8, ip, r2
9c3bc9dd| 1 gnu vstrle d19, [r9, #624]
9c5f2606| 1 gnu qadd8eq r5, r6, ip
9d87dac0| 1 gnu smullsgt r8, sl, sp, r7
9e0f7c86| 1 gnu uhadd8hi r0, ip, lr
9e814560| 1 gnu umaalvs r8, r5, lr, r1
9e9f8dc1| 1 gnu strexgt r9, lr, [sp]
9ec3c9d7| 1 gnu bfile ip, lr, #7, #3
9ed26d90| 1 gnu mlsls sp, lr, r2, sp
9f7fd9c1| 1 gnu ldrexbgt r7, [r9]
9f7fea91| 1 gnu strexhls r7, pc, [sl]
9f9f9921| 1 gnu ldrexcs r9, [r9]
9faffd21| 1 gnu ldrexhcs sl, [sp]
9fcfbd61| 1 gnu ldrexdvs ip, [sp]
9ff7a710| 1 gnu umlalne pc, r7, pc, r7
a05459d3| 1 gnu cmple r9, #160, 8
a3062be1| 1 gnu smulwb fp, r3, r6
a68a92b1| 1 gnu orrslt r8, r2, r6, lsr #21
abff55f6| 1 gnu pli [r5, -fp, lsr #31]
addbf8ea| 1 gnu b .-0x1c9148
ae79b021| 1 gnu lsrscs r7, lr, #19
b590a3b1| 1 gnu strhlt r9, [r3, r5]!
b5b2e390| 1 gnu strhtls fp, [r3], #37
b6ac4e30| 1 gnu strhcc sl, [lr], #-198
b73fff86| 1 gnu revshhi r3, r7
b75fbfc6| 1 gnu rev16gt r5, r7
b80b7c80| 1 gnu ldrhthi r0, [ip], #-184
b82035e0| 1 gnu ldrht r2, [r5], -r8
b8877391| 1 gnu ldrhls r8, [r3, #-120]!
b9703e41| 1 gnu ldrhmi r7, [lr, -r9]!
b9cf8c16| 1 gnu selne ip, ip, r9
bd81bd58| 1 gnu poppl {r0, r2, r3, r4, r5, r7, r8, pc}
bdfdb469| 1 gnu ldmibvs r4!, {r0, r2, r3, r4, r5, r7, r8, sl, fp, ip, sp, lr, pc}
beb02500| 1 gnu strhteq fp, [r5], -lr
bf1a5e42| 1 gnu subsmi r1, lr, #782336
c19a4d5e| 1 gnu vmlspl.f32 s19, s27, s2
c1aab15e| 1 gnu vsqrtpl.f32 s20, s2
c354b003| 1 gnu movseq r5, #-1023410176
c4091dc1| 1 gnu tstgt sp, r4, asr #19
c50e13a9| 1 gnu ldmdbge r3, {r0, r2, r6, r7, r9, sl, fp}
c68c8637| 1 gnu strcc r8, [r6, r6, asr #25]
c6ad48e3| 1 gnu movt sl, #36294
c6f65ff5| 1 gnu pld [pc, #-1734]
c8a92f10| 1 gnu eorne sl, pc, r8, asr #19
c9016b61| 1 gnu smulbtvs fp, r9, r1
cadbf49e| 1 gnu vcmpels.f64 d29, d10
ce9de476| 1 gnu strbtvc r9, [r4], lr, asr #27
cf3c1ab1| 1 gnu tstlt sl, pc, asr #25
d355aab6| 1 gnu ssatlt r5, #11, r3, asr #11
d4f4df10| 1 gnu ldrsbne pc, [pc], #68
d6530d61| 1 gnu ldrdvs r5, [sp, -r6]
d74d7800| 1 gnu ldrsbteq r4, [r8], #-215
d9703680| 1 gnu ldrsbthi r7, [r6], -r9
dbe003c0| 1 gnu ldrdgt lr, [r3], -fp
dc709561| 1 gnu ldrsbvs r7, [r5, ip]
dcc3b9c8| 1 gnu ldmgt r9!, {r2, r3, r4, r6, r7, r8, r9, lr, pc}
debfa0e5| 1 gnu str fp, [r0, #4062]!
dee062a1| 1 gnu ldrdge lr, [r2, #-14]!
dfa05ab7| 1 gnu smmlslt sl, pc, r0, sl
e02ef011| 1 gnu mvnsne r2, r0, ror #29
e4d41718| 1 gnu ldmdane r7, {r2, r5, r6, r7, sl, ip, lr, pc}
e6d0fe34| 1 gnu ldrbtcc sp, [lr], #230
e73bf7be| 1 gnu vcvtlt.f32.f64 s7, d23
e74e72b3| 1 gnu cmnlt r2, #3696
e80bf07e| 1 gnu vabsvc.f64 d16, d24
e9b5b001| 1 gnu rorseq fp, r9, #11
ea7bbdbe| 1 gnu vcvtlt.s32.f64 s14, d26
ec063813| 1 gnu teqne r8, #236, 12
ec0e49e1| 1 gnu smlaltt r0, r9, ip, lr
ee4ab85e| 1 gnu vcvtpl.f32.s32 s8, s29
ef461f25| 1 gnu ldrcs r4, [pc, #-1775]
ef5fd002| 1 gnu sbcseq r5, r0, #956
f4cf1d36| 1 gnu ssub8cc ip, sp, r4
f67f73b6| 1 gnu uhsub8lt r7, r3, r6
f6e09ca0| 1 gnu ldrshge lr, [ip], r6
f7702e32| 1 gnu eorcc r7, lr, #247
fa4dcf20| 1 gnu strdcs r4, [pc], #218
fac03720| 1 gnu ldrshtcs ip, [r7], -sl
fc0f64c6| 1 gnu uqsub8gt r0, r4, ip
fc28f481| 1 gnu ldrshhi r2, [r4, #140]!
fc300560| 1 gnu strdvs r3, [r5], -ip
fcacfc70| 1 gnu ldrshtvc sl, [ip], #204
fdbcfaf7| 1 gnu undef
fddf5c86| 1 gnu usub8hi sp, ip, sp
fdf02013| 1 gnu dbgne #13
fe0319e3| 1 gnu tst r9, #-134217725
fe7f3116| 1 gnu shsub8ne r7, r1, lr
ff4f2ac6| 1 gnu qsub8gt r4, sl, pc
ff818c71| 1 gnu strdvc r8, [ip, pc]
|6b5721d3 1 gnu error: unknown instruction
|76452001 1 gnu error: unknown instruction
|97acd647 1 gnu error: unknown instruction
tables.go: ../x86map/map.go ../x86.csv
go run ../x86map/map.go -fmt=decoder ../x86.csv >_tables.go && gofmt _tables.go >tables.go && rm _tables.go
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Table-driven decoding of x86 instructions.
package x86asm
import (
"encoding/binary"
"errors"
"fmt"
"runtime"
)
// Set trace to true to cause the decoder to print the PC sequence
// of the executed instruction codes. This is typically only useful
// when you are running a test of a single input case.
const trace = false
// A decodeOp is a single instruction in the decoder bytecode program.
//
// The decodeOps correspond to consuming and conditionally branching
// on input bytes, consuming additional fields, and then interpreting
// consumed data as instruction arguments. The names of the xRead and xArg
// operations are taken from the Intel manual conventions, for example
// Volume 2, Section 3.1.1, page 487 of
// http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf
//
// The actual decoding program is generated by ../x86map.
//
// TODO(rsc): We may be able to merge various of the memory operands
// since we don't care about, say, the distinction between m80dec and m80bcd.
// Similarly, mm and mm1 have identical meaning, as do xmm and xmm1.
type decodeOp uint16
const (
xFail decodeOp = iota // invalid instruction (return)
xMatch // completed match
xJump // jump to pc
xCondByte // switch on instruction byte value
xCondSlashR // read and switch on instruction /r value
xCondPrefix // switch on presence of instruction prefix
xCondIs64 // switch on 64-bit processor mode
xCondDataSize // switch on operand size
xCondAddrSize // switch on address size
xCondIsMem // switch on memory vs register argument
xSetOp // set instruction opcode
xReadSlashR // read /r
xReadIb // read ib
xReadIw // read iw
xReadId // read id
xReadIo // read io
xReadCb // read cb
xReadCw // read cw
xReadCd // read cd
xReadCp // read cp
xReadCm // read cm
xArg1 // arg 1
xArg3 // arg 3
xArgAL // arg AL
xArgAX // arg AX
xArgCL // arg CL
xArgCR0dashCR7 // arg CR0-CR7
xArgCS // arg CS
xArgDR0dashDR7 // arg DR0-DR7
xArgDS // arg DS
xArgDX // arg DX
xArgEAX // arg EAX
xArgEDX // arg EDX
xArgES // arg ES
xArgFS // arg FS
xArgGS // arg GS
xArgImm16 // arg imm16
xArgImm32 // arg imm32
xArgImm64 // arg imm64
xArgImm8 // arg imm8
xArgImm8u // arg imm8 but record as unsigned
xArgImm16u // arg imm8 but record as unsigned
xArgM // arg m
xArgM128 // arg m128
xArgM1428byte // arg m14/28byte
xArgM16 // arg m16
xArgM16and16 // arg m16&16
xArgM16and32 // arg m16&32
xArgM16and64 // arg m16&64
xArgM16colon16 // arg m16:16
xArgM16colon32 // arg m16:32
xArgM16colon64 // arg m16:64
xArgM16int // arg m16int
xArgM2byte // arg m2byte
xArgM32 // arg m32
xArgM32and32 // arg m32&32
xArgM32fp // arg m32fp
xArgM32int // arg m32int
xArgM512byte // arg m512byte
xArgM64 // arg m64
xArgM64fp // arg m64fp
xArgM64int // arg m64int
xArgM8 // arg m8
xArgM80bcd // arg m80bcd
xArgM80dec // arg m80dec
xArgM80fp // arg m80fp
xArgM94108byte // arg m94/108byte
xArgMm // arg mm
xArgMm1 // arg mm1
xArgMm2 // arg mm2
xArgMm2M64 // arg mm2/m64
xArgMmM32 // arg mm/m32
xArgMmM64 // arg mm/m64
xArgMem // arg mem
xArgMoffs16 // arg moffs16
xArgMoffs32 // arg moffs32
xArgMoffs64 // arg moffs64
xArgMoffs8 // arg moffs8
xArgPtr16colon16 // arg ptr16:16
xArgPtr16colon32 // arg ptr16:32
xArgR16 // arg r16
xArgR16op // arg r16 with +rw in opcode
xArgR32 // arg r32
xArgR32M16 // arg r32/m16
xArgR32M8 // arg r32/m8
xArgR32op // arg r32 with +rd in opcode
xArgR64 // arg r64
xArgR64M16 // arg r64/m16
xArgR64op // arg r64 with +rd in opcode
xArgR8 // arg r8
xArgR8op // arg r8 with +rb in opcode
xArgRAX // arg RAX
xArgRDX // arg RDX
xArgRM // arg r/m
xArgRM16 // arg r/m16
xArgRM32 // arg r/m32
xArgRM64 // arg r/m64
xArgRM8 // arg r/m8
xArgReg // arg reg
xArgRegM16 // arg reg/m16
xArgRegM32 // arg reg/m32
xArgRegM8 // arg reg/m8
xArgRel16 // arg rel16
xArgRel32 // arg rel32
xArgRel8 // arg rel8
xArgSS // arg SS
xArgST // arg ST, aka ST(0)
xArgSTi // arg ST(i) with +i in opcode
xArgSreg // arg Sreg
xArgTR0dashTR7 // arg TR0-TR7
xArgXmm // arg xmm
xArgXMM0 // arg <XMM0>
xArgXmm1 // arg xmm1
xArgXmm2 // arg xmm2
xArgXmm2M128 // arg xmm2/m128
xArgXmm2M16 // arg xmm2/m16
xArgXmm2M32 // arg xmm2/m32
xArgXmm2M64 // arg xmm2/m64
xArgXmmM128 // arg xmm/m128
xArgXmmM32 // arg xmm/m32
xArgXmmM64 // arg xmm/m64
xArgRmf16 // arg r/m16 but force mod=3
xArgRmf32 // arg r/m32 but force mod=3
xArgRmf64 // arg r/m64 but force mod=3
)
// instPrefix returns an Inst describing just one prefix byte.
// It is only used if there is a prefix followed by an unintelligible
// or invalid instruction byte sequence.
func instPrefix(b byte, mode int) (Inst, error) {
// When tracing it is useful to see what called instPrefix to report an error.
if trace {
_, file, line, _ := runtime.Caller(1)
fmt.Printf("%s:%d\n", file, line)
}
p := Prefix(b)
switch p {
case PrefixDataSize:
if mode == 16 {
p = PrefixData32
} else {
p = PrefixData16
}
case PrefixAddrSize:
if mode == 32 {
p = PrefixAddr16
} else {
p = PrefixAddr32
}
}
// Note: using composite literal with Prefix key confuses 'bundle' tool.
inst := Inst{Len: 1}
inst.Prefix = Prefixes{p}
return inst, nil
}
// truncated reports a truncated instruction.
// For now we use instPrefix but perhaps later we will return
// a specific error here.
func truncated(src []byte, mode int) (Inst, error) {
// return Inst{}, len(src), ErrTruncated
return instPrefix(src[0], mode) // too long
}
// These are the errors returned by Decode.
var (
ErrInvalidMode = errors.New("invalid x86 mode in Decode")
ErrTruncated = errors.New("truncated instruction")
ErrUnrecognized = errors.New("unrecognized instruction")
)
// decoderCover records coverage information for which parts
// of the byte code have been executed.
// TODO(rsc): This is for testing. Only use this if a flag is given.
var decoderCover []bool
// Decode decodes the leading bytes in src as a single instruction.
// The mode arguments specifies the assumed processor mode:
// 16, 32, or 64 for 16-, 32-, and 64-bit execution modes.
func Decode(src []byte, mode int) (inst Inst, err error) {
return decode1(src, mode, false)
}
// decode1 is the implementation of Decode but takes an extra
// gnuCompat flag to cause it to change its behavior to mimic
// bugs (or at least unique features) of GNU libopcodes as used
// by objdump. We don't believe that logic is the right thing to do
// in general, but when testing against libopcodes it simplifies the
// comparison if we adjust a few small pieces of logic.
// The affected logic is in the conditional branch for "mandatory" prefixes,
// case xCondPrefix.
func decode1(src []byte, mode int, gnuCompat bool) (Inst, error) {
switch mode {
case 16, 32, 64:
// ok
// TODO(rsc): 64-bit mode not tested, probably not working.
default:
return Inst{}, ErrInvalidMode
}
// Maximum instruction size is 15 bytes.
// If we need to read more, return 'truncated instruction.
if len(src) > 15 {
src = src[:15]
}
var (
// prefix decoding information
pos = 0 // position reading src
nprefix = 0 // number of prefixes
lockIndex = -1 // index of LOCK prefix in src and inst.Prefix
repIndex = -1 // index of REP/REPN prefix in src and inst.Prefix
segIndex = -1 // index of Group 2 prefix in src and inst.Prefix
dataSizeIndex = -1 // index of Group 3 prefix in src and inst.Prefix
addrSizeIndex = -1 // index of Group 4 prefix in src and inst.Prefix
rex Prefix // rex byte if present (or 0)
rexUsed Prefix // bits used in rex byte
rexIndex = -1 // index of rex byte
addrMode = mode // address mode (width in bits)
dataMode = mode // operand mode (width in bits)
// decoded ModR/M fields
haveModrm bool
modrm int
mod int
regop int
rm int
// if ModR/M is memory reference, Mem form
mem Mem
haveMem bool
// decoded SIB fields
haveSIB bool
sib int
scale int
index int
base int
// decoded immediate values
imm int64
imm8 int8
immc int64
// output
opshift int
inst Inst
narg int // number of arguments written to inst
)
if mode == 64 {
dataMode = 32
}
// Prefixes are certainly the most complex and underspecified part of
// decoding x86 instructions. Although the manuals say things like
// up to four prefixes, one from each group, nearly everyone seems to
// agree that in practice as many prefixes as possible, including multiple
// from a particular group or repetitions of a given prefix, can be used on
// an instruction, provided the total instruction length including prefixes
// does not exceed the agreed-upon maximum of 15 bytes.
// Everyone also agrees that if one of these prefixes is the LOCK prefix
// and the instruction is not one of the instructions that can be used with
// the LOCK prefix or if the destination is not a memory operand,
// then the instruction is invalid and produces the #UD exception.
// However, that is the end of any semblance of agreement.
//
// What happens if prefixes are given that conflict with other prefixes?
// For example, the memory segment overrides CS, DS, ES, FS, GS, SS
// conflict with each other: only one segment can be in effect.
// Disassemblers seem to agree that later prefixes take priority over
// earlier ones. I have not taken the time to write assembly programs
// to check to see if the hardware agrees.
//
// What happens if prefixes are given that have no meaning for the
// specific instruction to which they are attached? It depends.
// If they really have no meaning, they are ignored. However, a future
// processor may assign a different meaning. As a disassembler, we
// don't really know whether we're seeing a meaningless prefix or one
// whose meaning we simply haven't been told yet.
//
// Combining the two questions, what happens when conflicting
// extension prefixes are given? No one seems to know for sure.
// For example, MOVQ is 66 0F D6 /r, MOVDQ2Q is F2 0F D6 /r,
// and MOVQ2DQ is F3 0F D6 /r. What is '66 F2 F3 0F D6 /r'?
// Which prefix wins? See the xCondPrefix prefix for more.
//
// Writing assembly test cases to divine which interpretation the
// CPU uses might clarify the situation, but more likely it would
// make the situation even less clear.
// Read non-REX prefixes.
ReadPrefixes:
for ; pos < len(src); pos++ {
p := Prefix(src[pos])
switch p {
default:
nprefix = pos
break ReadPrefixes
// Group 1 - lock and repeat prefixes
// According to Intel, there should only be one from this set,
// but according to AMD both can be present.
case 0xF0:
if lockIndex >= 0 {
inst.Prefix[lockIndex] |= PrefixIgnored
}
lockIndex = pos
case 0xF2, 0xF3:
if repIndex >= 0 {
inst.Prefix[repIndex] |= PrefixIgnored
}
repIndex = pos
// Group 2 - segment override / branch hints
case 0x26, 0x2E, 0x36, 0x3E:
if mode == 64 {
p |= PrefixIgnored
break
}
fallthrough
case 0x64, 0x65:
if segIndex >= 0 {
inst.Prefix[segIndex] |= PrefixIgnored
}
segIndex = pos
// Group 3 - operand size override
case 0x66:
if mode == 16 {
dataMode = 32
p = PrefixData32
} else {
dataMode = 16
p = PrefixData16
}
if dataSizeIndex >= 0 {
inst.Prefix[dataSizeIndex] |= PrefixIgnored
}
dataSizeIndex = pos
// Group 4 - address size override
case 0x67:
if mode == 32 {
addrMode = 16
p = PrefixAddr16
} else {
addrMode = 32
p = PrefixAddr32
}
if addrSizeIndex >= 0 {
inst.Prefix[addrSizeIndex] |= PrefixIgnored
}
addrSizeIndex = pos
}
if pos >= len(inst.Prefix) {
return instPrefix(src[0], mode) // too long
}
inst.Prefix[pos] = p
}
// Read REX prefix.
if pos < len(src) && mode == 64 && Prefix(src[pos]).IsREX() {
rex = Prefix(src[pos])
rexIndex = pos
if pos >= len(inst.Prefix) {
return instPrefix(src[0], mode) // too long
}
inst.Prefix[pos] = rex
pos++
if rex&PrefixREXW != 0 {
dataMode = 64
if dataSizeIndex >= 0 {
inst.Prefix[dataSizeIndex] |= PrefixIgnored
}
}
}
// Decode instruction stream, interpreting decoding instructions.
// opshift gives the shift to use when saving the next
// opcode byte into inst.Opcode.
opshift = 24
if decoderCover == nil {
decoderCover = make([]bool, len(decoder))
}
// Decode loop, executing decoder program.
var oldPC, prevPC int
Decode:
for pc := 1; ; { // TODO uint
oldPC = prevPC
prevPC = pc
if trace {
println("run", pc)
}
x := decoder[pc]
decoderCover[pc] = true
pc++
// Read and decode ModR/M if needed by opcode.
switch decodeOp(x) {
case xCondSlashR, xReadSlashR:
if haveModrm {
return Inst{Len: pos}, errInternal
}
haveModrm = true
if pos >= len(src) {
return truncated(src, mode)
}
modrm = int(src[pos])
pos++
if opshift >= 0 {
inst.Opcode |= uint32(modrm) << uint(opshift)
opshift -= 8
}
mod = modrm >> 6
regop = (modrm >> 3) & 07
rm = modrm & 07
if rex&PrefixREXR != 0 {
rexUsed |= PrefixREXR
regop |= 8
}
if addrMode == 16 {
// 16-bit modrm form
if mod != 3 {
haveMem = true
mem = addr16[rm]
if rm == 6 && mod == 0 {
mem.Base = 0
}
// Consume disp16 if present.
if mod == 0 && rm == 6 || mod == 2 {
if pos+2 > len(src) {
return truncated(src, mode)
}
mem.Disp = int64(binary.LittleEndian.Uint16(src[pos:]))
pos += 2
}
// Consume disp8 if present.
if mod == 1 {
if pos >= len(src) {
return truncated(src, mode)
}
mem.Disp = int64(int8(src[pos]))
pos++
}
}
} else {
haveMem = mod != 3
// 32-bit or 64-bit form
// Consume SIB encoding if present.
if rm == 4 && mod != 3 {
haveSIB = true
if pos >= len(src) {
return truncated(src, mode)
}
sib = int(src[pos])
pos++
if opshift >= 0 {
inst.Opcode |= uint32(sib) << uint(opshift)
opshift -= 8
}
scale = sib >> 6
index = (sib >> 3) & 07
base = sib & 07
if rex&PrefixREXB != 0 {
rexUsed |= PrefixREXB
base |= 8
}
if rex&PrefixREXX != 0 {
rexUsed |= PrefixREXX
index |= 8
}
mem.Scale = 1 << uint(scale)
if index == 4 {
// no mem.Index
} else {
mem.Index = baseRegForBits(addrMode) + Reg(index)
}
if base&7 == 5 && mod == 0 {
// no mem.Base
} else {
mem.Base = baseRegForBits(addrMode) + Reg(base)
}
} else {
if rex&PrefixREXB != 0 {
rexUsed |= PrefixREXB
rm |= 8
}
if mod == 0 && rm&7 == 5 || rm&7 == 4 {
// base omitted
} else if mod != 3 {
mem.Base = baseRegForBits(addrMode) + Reg(rm)
}
}
// Consume disp32 if present.
if mod == 0 && (rm&7 == 5 || haveSIB && base&7 == 5) || mod == 2 {
if pos+4 > len(src) {
return truncated(src, mode)
}
mem.Disp = int64(binary.LittleEndian.Uint32(src[pos:]))
pos += 4
}
// Consume disp8 if present.
if mod == 1 {
if pos >= len(src) {
return truncated(src, mode)
}
mem.Disp = int64(int8(src[pos]))
pos++
}
// In 64-bit, mod=0 rm=5 is PC-relative instead of just disp.
// See Vol 2A. Table 2-7.
if mode == 64 && mod == 0 && rm&7 == 5 {
if addrMode == 32 {
mem.Base = EIP
} else {
mem.Base = RIP
}
}
}
if segIndex >= 0 {
mem.Segment = prefixToSegment(inst.Prefix[segIndex])
}
}
// Execute single opcode.
switch decodeOp(x) {
default:
println("bad op", x, "at", pc-1, "from", oldPC)
return Inst{Len: pos}, errInternal
case xFail:
inst.Op = 0
break Decode
case xMatch:
break Decode
case xJump:
pc = int(decoder[pc])
// Conditional branches.
case xCondByte:
if pos >= len(src) {
return truncated(src, mode)
}
b := src[pos]
n := int(decoder[pc])
pc++
for i := 0; i < n; i++ {
xb, xpc := decoder[pc], int(decoder[pc+1])
pc += 2
if b == byte(xb) {
pc = xpc
pos++
if opshift >= 0 {
inst.Opcode |= uint32(b) << uint(opshift)
opshift -= 8
}
continue Decode
}
}
// xCondByte is the only conditional with a fall through,
// so that it can be used to pick off special cases before
// an xCondSlash. If the fallthrough instruction is xFail,
// advance the position so that the decoded instruction
// size includes the byte we just compared against.
if decodeOp(decoder[pc]) == xJump {
pc = int(decoder[pc+1])
}
if decodeOp(decoder[pc]) == xFail {
pos++
}
case xCondIs64:
if mode == 64 {
pc = int(decoder[pc+1])
} else {
pc = int(decoder[pc])
}
case xCondIsMem:
mem := haveMem
if !haveModrm {
if pos >= len(src) {
return instPrefix(src[0], mode) // too long
}
mem = src[pos]>>6 != 3
}
if mem {
pc = int(decoder[pc+1])
} else {
pc = int(decoder[pc])
}
case xCondDataSize:
switch dataMode {
case 16:
if dataSizeIndex >= 0 {
inst.Prefix[dataSizeIndex] |= PrefixImplicit
}
pc = int(decoder[pc])
case 32:
if dataSizeIndex >= 0 {
inst.Prefix[dataSizeIndex] |= PrefixImplicit
}
pc = int(decoder[pc+1])
case 64:
rexUsed |= PrefixREXW
pc = int(decoder[pc+2])
}
case xCondAddrSize:
switch addrMode {
case 16:
if addrSizeIndex >= 0 {
inst.Prefix[addrSizeIndex] |= PrefixImplicit
}
pc = int(decoder[pc])
case 32:
if addrSizeIndex >= 0 {
inst.Prefix[addrSizeIndex] |= PrefixImplicit
}
pc = int(decoder[pc+1])
case 64:
pc = int(decoder[pc+2])
}
case xCondPrefix:
// Conditional branch based on presence or absence of prefixes.
// The conflict cases here are completely undocumented and
// differ significantly between GNU libopcodes and Intel xed.
// I have not written assembly code to divine what various CPUs
// do, but it wouldn't surprise me if they are not consistent either.
//
// The basic idea is to switch on the presence of a prefix, so that
// for example:
//
// xCondPrefix, 4
// 0xF3, 123,
// 0xF2, 234,
// 0x66, 345,
// 0, 456
//
// branch to 123 if the F3 prefix is present, 234 if the F2 prefix
// is present, 66 if the 345 prefix is present, and 456 otherwise.
// The prefixes are given in descending order so that the 0 will be last.
//
// It is unclear what should happen if multiple conditions are
// satisfied: what if F2 and F3 are both present, or if 66 and F2
// are present, or if all three are present? The one chosen becomes
// part of the opcode and the others do not. Perhaps the answer
// depends on the specific opcodes in question.
//
// The only clear example is that CRC32 is F2 0F 38 F1 /r, and
// it comes in 16-bit and 32-bit forms based on the 66 prefix,
// so 66 F2 0F 38 F1 /r should be treated as F2 taking priority,
// with the 66 being only an operand size override, and probably
// F2 66 0F 38 F1 /r should be treated the same.
// Perhaps that rule is specific to the case of CRC32, since no
// 66 0F 38 F1 instruction is defined (today) (that we know of).
// However, both libopcodes and xed seem to generalize this
// example and choose F2/F3 in preference to 66, and we
// do the same.
//
// Next, what if both F2 and F3 are present? Which wins?
// The Intel xed rule, and ours, is that the one that occurs last wins.
// The GNU libopcodes rule, which we implement only in gnuCompat mode,
// is that F3 beats F2 unless F3 has no special meaning, in which
// case F3 can be a modified on an F2 special meaning.
//
// Concretely,
// 66 0F D6 /r is MOVQ
// F2 0F D6 /r is MOVDQ2Q
// F3 0F D6 /r is MOVQ2DQ.
//
// F2 66 0F D6 /r is 66 + MOVDQ2Q always.
// 66 F2 0F D6 /r is 66 + MOVDQ2Q always.
// F3 66 0F D6 /r is 66 + MOVQ2DQ always.
// 66 F3 0F D6 /r is 66 + MOVQ2DQ always.
// F2 F3 0F D6 /r is F2 + MOVQ2DQ always.
// F3 F2 0F D6 /r is F3 + MOVQ2DQ in Intel xed, but F2 + MOVQ2DQ in GNU libopcodes.
// Adding 66 anywhere in the prefix section of the
// last two cases does not change the outcome.
//
// Finally, what if there is a variant in which 66 is a mandatory
// prefix rather than an operand size override, but we know of
// no corresponding F2/F3 form, and we see both F2/F3 and 66.
// Does F2/F3 still take priority, so that the result is an unknown
// instruction, or does the 66 take priority, so that the extended
// 66 instruction should be interpreted as having a REP/REPN prefix?
// Intel xed does the former and GNU libopcodes does the latter.
// We side with Intel xed, unless we are trying to match libopcodes
// more closely during the comparison-based test suite.
//
// In 64-bit mode REX.W is another valid prefix to test for, but
// there is less ambiguity about that. When present, REX.W is
// always the first entry in the table.
n := int(decoder[pc])
pc++
sawF3 := false
for j := 0; j < n; j++ {
prefix := Prefix(decoder[pc+2*j])
if prefix.IsREX() {
rexUsed |= prefix
if rex&prefix == prefix {
pc = int(decoder[pc+2*j+1])
continue Decode
}
continue
}
ok := false
if prefix == 0 {
ok = true
} else if prefix.IsREX() {
rexUsed |= prefix
if rex&prefix == prefix {
ok = true
}
} else {
if prefix == 0xF3 {
sawF3 = true
}
switch prefix {
case PrefixLOCK:
if lockIndex >= 0 {
inst.Prefix[lockIndex] |= PrefixImplicit
ok = true
}
case PrefixREP, PrefixREPN:
if repIndex >= 0 && inst.Prefix[repIndex]&0xFF == prefix {
inst.Prefix[repIndex] |= PrefixImplicit
ok = true
}
if gnuCompat && !ok && prefix == 0xF3 && repIndex >= 0 && (j+1 >= n || decoder[pc+2*(j+1)] != 0xF2) {
// Check to see if earlier prefix F3 is present.
for i := repIndex - 1; i >= 0; i-- {
if inst.Prefix[i]&0xFF == prefix {
inst.Prefix[i] |= PrefixImplicit
ok = true
}
}
}
if gnuCompat && !ok && prefix == 0xF2 && repIndex >= 0 && !sawF3 && inst.Prefix[repIndex]&0xFF == 0xF3 {
// Check to see if earlier prefix F2 is present.
for i := repIndex - 1; i >= 0; i-- {
if inst.Prefix[i]&0xFF == prefix {
inst.Prefix[i] |= PrefixImplicit
ok = true
}
}
}
case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
if segIndex >= 0 && inst.Prefix[segIndex]&0xFF == prefix {
inst.Prefix[segIndex] |= PrefixImplicit
ok = true
}
case PrefixDataSize:
// Looking for 66 mandatory prefix.
// The F2/F3 mandatory prefixes take priority when both are present.
// If we got this far in the xCondPrefix table and an F2/F3 is present,
// it means the table didn't have any entry for that prefix. But if 66 has
// special meaning, perhaps F2/F3 have special meaning that we don't know.
// Intel xed works this way, treating the F2/F3 as inhibiting the 66.
// GNU libopcodes allows the 66 to match. We do what Intel xed does
// except in gnuCompat mode.
if repIndex >= 0 && !gnuCompat {
inst.Op = 0
break Decode
}
if dataSizeIndex >= 0 {
inst.Prefix[dataSizeIndex] |= PrefixImplicit
ok = true
}
case PrefixAddrSize:
if addrSizeIndex >= 0 {
inst.Prefix[addrSizeIndex] |= PrefixImplicit
ok = true
}
}
}
if ok {
pc = int(decoder[pc+2*j+1])
continue Decode
}
}
inst.Op = 0
break Decode
case xCondSlashR:
pc = int(decoder[pc+regop&7])
// Input.
case xReadSlashR:
// done above
case xReadIb:
if pos >= len(src) {
return truncated(src, mode)
}
imm8 = int8(src[pos])
pos++
case xReadIw:
if pos+2 > len(src) {
return truncated(src, mode)
}
imm = int64(binary.LittleEndian.Uint16(src[pos:]))
pos += 2
case xReadId:
if pos+4 > len(src) {
return truncated(src, mode)
}
imm = int64(binary.LittleEndian.Uint32(src[pos:]))
pos += 4
case xReadIo:
if pos+8 > len(src) {
return truncated(src, mode)
}
imm = int64(binary.LittleEndian.Uint64(src[pos:]))
pos += 8
case xReadCb:
if pos >= len(src) {
return truncated(src, mode)
}
immc = int64(src[pos])
pos++
case xReadCw:
if pos+2 > len(src) {
return truncated(src, mode)
}
immc = int64(binary.LittleEndian.Uint16(src[pos:]))
pos += 2
case xReadCm:
if addrMode == 16 {
if pos+2 > len(src) {
return truncated(src, mode)
}
immc = int64(binary.LittleEndian.Uint16(src[pos:]))
pos += 2
} else if addrMode == 32 {
if pos+4 > len(src) {
return truncated(src, mode)
}
immc = int64(binary.LittleEndian.Uint32(src[pos:]))
pos += 4
} else {
if pos+8 > len(src) {
return truncated(src, mode)
}
immc = int64(binary.LittleEndian.Uint64(src[pos:]))
pos += 8
}
case xReadCd:
if pos+4 > len(src) {
return truncated(src, mode)
}
immc = int64(binary.LittleEndian.Uint32(src[pos:]))
pos += 4
case xReadCp:
if pos+6 > len(src) {
return truncated(src, mode)
}
w := binary.LittleEndian.Uint32(src[pos:])
w2 := binary.LittleEndian.Uint16(src[pos+4:])
immc = int64(w2)<<32 | int64(w)
pos += 6
// Output.
case xSetOp:
inst.Op = Op(decoder[pc])
pc++
case xArg1,
xArg3,
xArgAL,
xArgAX,
xArgCL,
xArgCS,
xArgDS,
xArgDX,
xArgEAX,
xArgEDX,
xArgES,
xArgFS,
xArgGS,
xArgRAX,
xArgRDX,
xArgSS,
xArgST,
xArgXMM0:
inst.Args[narg] = fixedArg[x]
narg++
case xArgImm8:
inst.Args[narg] = Imm(imm8)
narg++
case xArgImm8u:
inst.Args[narg] = Imm(uint8(imm8))
narg++
case xArgImm16:
inst.Args[narg] = Imm(int16(imm))
narg++
case xArgImm16u:
inst.Args[narg] = Imm(uint16(imm))
narg++
case xArgImm32:
inst.Args[narg] = Imm(int32(imm))
narg++
case xArgImm64:
inst.Args[narg] = Imm(imm)
narg++
case xArgM,
xArgM128,
xArgM1428byte,
xArgM16,
xArgM16and16,
xArgM16and32,
xArgM16and64,
xArgM16colon16,
xArgM16colon32,
xArgM16colon64,
xArgM16int,
xArgM2byte,
xArgM32,
xArgM32and32,
xArgM32fp,
xArgM32int,
xArgM512byte,
xArgM64,
xArgM64fp,
xArgM64int,
xArgM8,
xArgM80bcd,
xArgM80dec,
xArgM80fp,
xArgM94108byte,
xArgMem:
if !haveMem {
inst.Op = 0
break Decode
}
inst.Args[narg] = mem
inst.MemBytes = int(memBytes[decodeOp(x)])
narg++
case xArgPtr16colon16:
inst.Args[narg] = Imm(immc >> 16)
inst.Args[narg+1] = Imm(immc & (1<<16 - 1))
narg += 2
case xArgPtr16colon32:
inst.Args[narg] = Imm(immc >> 32)
inst.Args[narg+1] = Imm(immc & (1<<32 - 1))
narg += 2
case xArgMoffs8, xArgMoffs16, xArgMoffs32, xArgMoffs64:
// TODO(rsc): Can address be 64 bits?
mem = Mem{Disp: int64(immc)}
if segIndex >= 0 {
mem.Segment = prefixToSegment(inst.Prefix[segIndex])
inst.Prefix[segIndex] |= PrefixImplicit
}
inst.Args[narg] = mem
inst.MemBytes = int(memBytes[decodeOp(x)])
narg++
case xArgR8, xArgR16, xArgR32, xArgR64, xArgXmm, xArgXmm1, xArgDR0dashDR7:
base := baseReg[x]
index := Reg(regop)
if rex != 0 && base == AL && index >= 4 {
rexUsed |= PrefixREX
index -= 4
base = SPB
}
inst.Args[narg] = base + index
narg++
case xArgMm, xArgMm1, xArgTR0dashTR7:
inst.Args[narg] = baseReg[x] + Reg(regop&7)
narg++
case xArgCR0dashCR7:
// AMD documents an extension that the LOCK prefix
// can be used in place of a REX prefix in order to access
// CR8 from 32-bit mode. The LOCK prefix is allowed in
// all modes, provided the corresponding CPUID bit is set.
if lockIndex >= 0 {
inst.Prefix[lockIndex] |= PrefixImplicit
regop += 8
}
inst.Args[narg] = CR0 + Reg(regop)
narg++
case xArgSreg:
regop &= 7
if regop >= 6 {
inst.Op = 0
break Decode
}
inst.Args[narg] = ES + Reg(regop)
narg++
case xArgRmf16, xArgRmf32, xArgRmf64:
base := baseReg[x]
index := Reg(modrm & 07)
if rex&PrefixREXB != 0 {
rexUsed |= PrefixREXB
index += 8
}
inst.Args[narg] = base + index
narg++
case xArgR8op, xArgR16op, xArgR32op, xArgR64op, xArgSTi:
n := inst.Opcode >> uint(opshift+8) & 07
base := baseReg[x]
index := Reg(n)
if rex&PrefixREXB != 0 && decodeOp(x) != xArgSTi {
rexUsed |= PrefixREXB
index += 8
}
if rex != 0 && base == AL && index >= 4 {
rexUsed |= PrefixREX
index -= 4
base = SPB
}
inst.Args[narg] = base + index
narg++
case xArgRM8, xArgRM16, xArgRM32, xArgRM64, xArgR32M16, xArgR32M8, xArgR64M16,
xArgMmM32, xArgMmM64, xArgMm2M64,
xArgXmm2M16, xArgXmm2M32, xArgXmm2M64, xArgXmmM64, xArgXmmM128, xArgXmmM32, xArgXmm2M128:
if haveMem {
inst.Args[narg] = mem
inst.MemBytes = int(memBytes[decodeOp(x)])
} else {
base := baseReg[x]
index := Reg(rm)
switch decodeOp(x) {
case xArgMmM32, xArgMmM64, xArgMm2M64:
// There are only 8 MMX registers, so these ignore the REX.X bit.
index &= 7
case xArgRM8:
if rex != 0 && index >= 4 {
rexUsed |= PrefixREX
index -= 4
base = SPB
}
}
inst.Args[narg] = base + index
}
narg++
case xArgMm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag
if haveMem {
inst.Op = 0
break Decode
}
inst.Args[narg] = baseReg[x] + Reg(rm&7)
narg++
case xArgXmm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag
if haveMem {
inst.Op = 0
break Decode
}
inst.Args[narg] = baseReg[x] + Reg(rm)
narg++
case xArgRel8:
inst.Args[narg] = Rel(int8(immc))
narg++
case xArgRel16:
inst.Args[narg] = Rel(int16(immc))
narg++
case xArgRel32:
inst.Args[narg] = Rel(int32(immc))
narg++
}
}
if inst.Op == 0 {
// Invalid instruction.
if nprefix > 0 {
return instPrefix(src[0], mode) // invalid instruction
}
return Inst{Len: pos}, ErrUnrecognized
}
// Matched! Hooray!
// 90 decodes as XCHG EAX, EAX but is NOP.
// 66 90 decodes as XCHG AX, AX and is NOP too.
// 48 90 decodes as XCHG RAX, RAX and is NOP too.
// 43 90 decodes as XCHG R8D, EAX and is *not* NOP.
// F3 90 decodes as REP XCHG EAX, EAX but is PAUSE.
// It's all too special to handle in the decoding tables, at least for now.
if inst.Op == XCHG && inst.Opcode>>24 == 0x90 {
if inst.Args[0] == RAX || inst.Args[0] == EAX || inst.Args[0] == AX {
inst.Op = NOP
if dataSizeIndex >= 0 {
inst.Prefix[dataSizeIndex] &^= PrefixImplicit
}
inst.Args[0] = nil
inst.Args[1] = nil
}
if repIndex >= 0 && inst.Prefix[repIndex] == 0xF3 {
inst.Prefix[repIndex] |= PrefixImplicit
inst.Op = PAUSE
inst.Args[0] = nil
inst.Args[1] = nil
} else if gnuCompat {
for i := nprefix - 1; i >= 0; i-- {
if inst.Prefix[i]&0xFF == 0xF3 {
inst.Prefix[i] |= PrefixImplicit
inst.Op = PAUSE
inst.Args[0] = nil
inst.Args[1] = nil
break
}
}
}
}
// defaultSeg returns the default segment for an implicit
// memory reference: the final override if present, or else DS.
defaultSeg := func() Reg {
if segIndex >= 0 {
inst.Prefix[segIndex] |= PrefixImplicit
return prefixToSegment(inst.Prefix[segIndex])
}
return DS
}
// Add implicit arguments not present in the tables.
// Normally we shy away from making implicit arguments explicit,
// following the Intel manuals, but adding the arguments seems
// the best way to express the effect of the segment override prefixes.
// TODO(rsc): Perhaps add these to the tables and
// create bytecode instructions for them.
usedAddrSize := false
switch inst.Op {
case INSB, INSW, INSD:
inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
inst.Args[1] = DX
usedAddrSize = true
case OUTSB, OUTSW, OUTSD:
inst.Args[0] = DX
inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
usedAddrSize = true
case MOVSB, MOVSW, MOVSD, MOVSQ:
inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
usedAddrSize = true
case CMPSB, CMPSW, CMPSD, CMPSQ:
inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
usedAddrSize = true
case LODSB, LODSW, LODSD, LODSQ:
switch inst.Op {
case LODSB:
inst.Args[0] = AL
case LODSW:
inst.Args[0] = AX
case LODSD:
inst.Args[0] = EAX
case LODSQ:
inst.Args[0] = RAX
}
inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
usedAddrSize = true
case STOSB, STOSW, STOSD, STOSQ:
inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
switch inst.Op {
case STOSB:
inst.Args[1] = AL
case STOSW:
inst.Args[1] = AX
case STOSD:
inst.Args[1] = EAX
case STOSQ:
inst.Args[1] = RAX
}
usedAddrSize = true
case SCASB, SCASW, SCASD, SCASQ:
inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
switch inst.Op {
case SCASB:
inst.Args[0] = AL
case SCASW:
inst.Args[0] = AX
case SCASD:
inst.Args[0] = EAX
case SCASQ:
inst.Args[0] = RAX
}
usedAddrSize = true
case XLATB:
inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + BX - AX}
usedAddrSize = true
}
// If we used the address size annotation to construct the
// argument list, mark that prefix as implicit: it doesn't need
// to be shown when printing the instruction.
if haveMem || usedAddrSize {
if addrSizeIndex >= 0 {
inst.Prefix[addrSizeIndex] |= PrefixImplicit
}
}
// Similarly, if there's some memory operand, the segment
// will be shown there and doesn't need to be shown as an
// explicit prefix.
if haveMem {
if segIndex >= 0 {
inst.Prefix[segIndex] |= PrefixImplicit
}
}
// Branch predict prefixes are overloaded segment prefixes,
// since segment prefixes don't make sense on conditional jumps.
// Rewrite final instance to prediction prefix.
// The set of instructions to which the prefixes apply (other then the
// Jcc conditional jumps) is not 100% clear from the manuals, but
// the disassemblers seem to agree about the LOOP and JCXZ instructions,
// so we'll follow along.
// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
if isCondJmp[inst.Op] || isLoop[inst.Op] || inst.Op == JCXZ || inst.Op == JECXZ || inst.Op == JRCXZ {
PredictLoop:
for i := nprefix - 1; i >= 0; i-- {
p := inst.Prefix[i]
switch p & 0xFF {
case PrefixCS:
inst.Prefix[i] = PrefixPN
break PredictLoop
case PrefixDS:
inst.Prefix[i] = PrefixPT
break PredictLoop
}
}
}
// The BND prefix is part of the Intel Memory Protection Extensions (MPX).
// A REPN applied to certain control transfers is a BND prefix to bound
// the range of possible destinations. There's surprisingly little documentation
// about this, so we just do what libopcodes and xed agree on.
// In particular, it's unclear why a REPN applied to LOOP or JCXZ instructions
// does not turn into a BND.
// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
if isCondJmp[inst.Op] || inst.Op == JMP || inst.Op == CALL || inst.Op == RET {
for i := nprefix - 1; i >= 0; i-- {
p := inst.Prefix[i]
if p&^PrefixIgnored == PrefixREPN {
inst.Prefix[i] = PrefixBND
break
}
}
}
// The LOCK prefix only applies to certain instructions, and then only
// to instances of the instruction with a memory destination.
// Other uses of LOCK are invalid and cause a processor exception,
// in contrast to the "just ignore it" spirit applied to all other prefixes.
// Mark invalid lock prefixes.
hasLock := false
if lockIndex >= 0 && inst.Prefix[lockIndex]&PrefixImplicit == 0 {
switch inst.Op {
// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
case ADD, ADC, AND, BTC, BTR, BTS, CMPXCHG, CMPXCHG8B, CMPXCHG16B, DEC, INC, NEG, NOT, OR, SBB, SUB, XOR, XADD, XCHG:
if isMem(inst.Args[0]) {
hasLock = true
break
}
fallthrough
default:
inst.Prefix[lockIndex] |= PrefixInvalid
}
}
// In certain cases, all of which require a memory destination,
// the REPN and REP prefixes are interpreted as XACQUIRE and XRELEASE
// from the Intel Transactional Synchroniation Extensions (TSX).
//
// The specific rules are:
// (1) Any instruction with a valid LOCK prefix can have XACQUIRE or XRELEASE.
// (2) Any XCHG, which always has an implicit LOCK, can have XACQUIRE or XRELEASE.
// (3) Any 0x88-, 0x89-, 0xC6-, or 0xC7-opcode MOV can have XRELEASE.
if isMem(inst.Args[0]) {
if inst.Op == XCHG {
hasLock = true
}
for i := len(inst.Prefix) - 1; i >= 0; i-- {
p := inst.Prefix[i] &^ PrefixIgnored
switch p {
case PrefixREPN:
if hasLock {
inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXACQUIRE
}
case PrefixREP:
if hasLock {
inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE
}
if inst.Op == MOV {
op := (inst.Opcode >> 24) &^ 1
if op == 0x88 || op == 0xC6 {
inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE
}
}
}
}
}
// If REP is used on a non-REP-able instruction, mark the prefix as ignored.
if repIndex >= 0 {
switch inst.Prefix[repIndex] {
case PrefixREP, PrefixREPN:
switch inst.Op {
// According to the manuals, the REP/REPE prefix applies to all of these,
// while the REPN applies only to some of them. However, both libopcodes
// and xed show both prefixes explicitly for all instructions, so we do the same.
// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
case INSB, INSW, INSD,
MOVSB, MOVSW, MOVSD, MOVSQ,
OUTSB, OUTSW, OUTSD,
LODSB, LODSW, LODSD, LODSQ,
CMPSB, CMPSW, CMPSD, CMPSQ,
SCASB, SCASW, SCASD, SCASQ,
STOSB, STOSW, STOSD, STOSQ:
// ok
default:
inst.Prefix[repIndex] |= PrefixIgnored
}
}
}
// If REX was present, mark implicit if all the 1 bits were consumed.
if rexIndex >= 0 {
if rexUsed != 0 {
rexUsed |= PrefixREX
}
if rex&^rexUsed == 0 {
inst.Prefix[rexIndex] |= PrefixImplicit
}
}
inst.DataSize = dataMode
inst.AddrSize = addrMode
inst.Mode = mode
inst.Len = pos
return inst, nil
}
var errInternal = errors.New("internal error")
// addr16 records the eight 16-bit addressing modes.
var addr16 = [8]Mem{
{Base: BX, Scale: 1, Index: SI},
{Base: BX, Scale: 1, Index: DI},
{Base: BP, Scale: 1, Index: SI},
{Base: BP, Scale: 1, Index: DI},
{Base: SI},
{Base: DI},
{Base: BP},
{Base: BX},
}
// baseReg returns the base register for a given register size in bits.
func baseRegForBits(bits int) Reg {
switch bits {
case 8:
return AL
case 16:
return AX
case 32:
return EAX
case 64:
return RAX
}
return 0
}
// baseReg records the base register for argument types that specify
// a range of registers indexed by op, regop, or rm.
var baseReg = [...]Reg{
xArgDR0dashDR7: DR0,
xArgMm1: M0,
xArgMm2: M0,
xArgMm2M64: M0,
xArgMm: M0,
xArgMmM32: M0,
xArgMmM64: M0,
xArgR16: AX,
xArgR16op: AX,
xArgR32: EAX,
xArgR32M16: EAX,
xArgR32M8: EAX,
xArgR32op: EAX,
xArgR64: RAX,
xArgR64M16: RAX,
xArgR64op: RAX,
xArgR8: AL,
xArgR8op: AL,
xArgRM16: AX,
xArgRM32: EAX,
xArgRM64: RAX,
xArgRM8: AL,
xArgRmf16: AX,
xArgRmf32: EAX,
xArgRmf64: RAX,
xArgSTi: F0,
xArgTR0dashTR7: TR0,
xArgXmm1: X0,
xArgXmm2: X0,
xArgXmm2M128: X0,
xArgXmm2M16: X0,
xArgXmm2M32: X0,
xArgXmm2M64: X0,
xArgXmm: X0,
xArgXmmM128: X0,
xArgXmmM32: X0,
xArgXmmM64: X0,
}
// prefixToSegment returns the segment register
// corresponding to a particular segment prefix.
func prefixToSegment(p Prefix) Reg {
switch p &^ PrefixImplicit {
case PrefixCS:
return CS
case PrefixDS:
return DS
case PrefixES:
return ES
case PrefixFS:
return FS
case PrefixGS:
return GS
case PrefixSS:
return SS
}
return 0
}
// fixedArg records the fixed arguments corresponding to the given bytecodes.
var fixedArg = [...]Arg{
xArg1: Imm(1),
xArg3: Imm(3),
xArgAL: AL,
xArgAX: AX,
xArgDX: DX,
xArgEAX: EAX,
xArgEDX: EDX,
xArgRAX: RAX,
xArgRDX: RDX,
xArgCL: CL,
xArgCS: CS,
xArgDS: DS,
xArgES: ES,
xArgFS: FS,
xArgGS: GS,
xArgSS: SS,
xArgST: F0,
xArgXMM0: X0,
}
// memBytes records the size of the memory pointed at
// by a memory argument of the given form.
var memBytes = [...]int8{
xArgM128: 128 / 8,
xArgM16: 16 / 8,
xArgM16and16: (16 + 16) / 8,
xArgM16colon16: (16 + 16) / 8,
xArgM16colon32: (16 + 32) / 8,
xArgM16int: 16 / 8,
xArgM2byte: 2,
xArgM32: 32 / 8,
xArgM32and32: (32 + 32) / 8,
xArgM32fp: 32 / 8,
xArgM32int: 32 / 8,
xArgM64: 64 / 8,
xArgM64fp: 64 / 8,
xArgM64int: 64 / 8,
xArgMm2M64: 64 / 8,
xArgMmM32: 32 / 8,
xArgMmM64: 64 / 8,
xArgMoffs16: 16 / 8,
xArgMoffs32: 32 / 8,
xArgMoffs64: 64 / 8,
xArgMoffs8: 8 / 8,
xArgR32M16: 16 / 8,
xArgR32M8: 8 / 8,
xArgR64M16: 16 / 8,
xArgRM16: 16 / 8,
xArgRM32: 32 / 8,
xArgRM64: 64 / 8,
xArgRM8: 8 / 8,
xArgXmm2M128: 128 / 8,
xArgXmm2M16: 16 / 8,
xArgXmm2M32: 32 / 8,
xArgXmm2M64: 64 / 8,
xArgXmm: 128 / 8,
xArgXmmM128: 128 / 8,
xArgXmmM32: 32 / 8,
xArgXmmM64: 64 / 8,
}
// isCondJmp records the conditional jumps.
var isCondJmp = [maxOp + 1]bool{
JA: true,
JAE: true,
JB: true,
JBE: true,
JE: true,
JG: true,
JGE: true,
JL: true,
JLE: true,
JNE: true,
JNO: true,
JNP: true,
JNS: true,
JO: true,
JP: true,
JS: true,
}
// isLoop records the loop operators.
var isLoop = [maxOp + 1]bool{
LOOP: true,
LOOPE: true,
LOOPNE: true,
JECXZ: true,
JRCXZ: true,
}
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package x86asm
import (
"encoding/hex"
"io/ioutil"
"strconv"
"strings"
"testing"
)
func TestDecode(t *testing.T) {
data, err := ioutil.ReadFile("testdata/decode.txt")
if err != nil {
t.Fatal(err)
}
all := string(data)
for strings.Contains(all, "\t\t") {
all = strings.Replace(all, "\t\t", "\t", -1)
}
for _, line := range strings.Split(all, "\n") {
line = strings.TrimSpace(line)
if line == "" || strings.HasPrefix(line, "#") {
continue
}
f := strings.SplitN(line, "\t", 4)
i := strings.Index(f[0], "|")
if i < 0 {
t.Errorf("parsing %q: missing | separator", f[0])
continue
}
if i%2 != 0 {
t.Errorf("parsing %q: misaligned | separator", f[0])
}
size := i / 2
code, err := hex.DecodeString(f[0][:i] + f[0][i+1:])
if err != nil {
t.Errorf("parsing %q: %v", f[0], err)
continue
}
mode, err := strconv.Atoi(f[1])
if err != nil {
t.Errorf("invalid mode %q in: %s", f[1], line)
continue
}
syntax, asm := f[2], f[3]
inst, err := Decode(code, mode)
var out string
if err != nil {
out = "error: " + err.Error()
} else {
switch syntax {
case "gnu":
out = GNUSyntax(inst)
case "intel":
out = IntelSyntax(inst)
case "plan9":
out = Plan9Syntax(inst, 0, nil)
default:
t.Errorf("unknown syntax %q", syntax)
continue
}
}
if out != asm || inst.Len != size {
t.Errorf("Decode(%s) [%s] = %s, %d, want %s, %d", f[0], syntax, out, inst.Len, asm, size)
}
}
}
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Support for testing against external disassembler program.
package x86asm
import (
"bufio"
"bytes"
"encoding/hex"
"flag"
"fmt"
"io/ioutil"
"log"
"math/rand"
"os"
"os/exec"
"regexp"
"runtime"
"strings"
"testing"
"time"
)
var (
printTests = flag.Bool("printtests", false, "print test cases that exercise new code paths")
dumpTest = flag.Bool("dump", false, "dump all encodings")
mismatch = flag.Bool("mismatch", false, "log allowed mismatches")
longTest = flag.Bool("long", false, "long test")
keep = flag.Bool("keep", false, "keep object files around")
debug = false
)
// A ExtInst represents a single decoded instruction parsed
// from an external disassembler's output.
type ExtInst struct {
addr uint32
enc [32]byte
nenc int
text string
}
func (r ExtInst) String() string {
return fmt.Sprintf("%#x: % x: %s", r.addr, r.enc, r.text)
}
// An ExtDis is a connection between an external disassembler and a test.
type ExtDis struct {
Arch int
Dec chan ExtInst
File *os.File
Size int
KeepFile bool
Cmd *exec.Cmd
}
// Run runs the given command - the external disassembler - and returns
// a buffered reader of its standard output.
func (ext *ExtDis) Run(cmd ...string) (*bufio.Reader, error) {
if *keep {
log.Printf("%s\n", strings.Join(cmd, " "))
}
ext.Cmd = exec.Command(cmd[0], cmd[1:]...)
out, err := ext.Cmd.StdoutPipe()
if err != nil {
return nil, fmt.Errorf("stdoutpipe: %v", err)
}
if err := ext.Cmd.Start(); err != nil {
return nil, fmt.Errorf("exec: %v", err)
}
b := bufio.NewReaderSize(out, 1<<20)
return b, nil
}
// Wait waits for the command started with Run to exit.
func (ext *ExtDis) Wait() error {
return ext.Cmd.Wait()
}
// testExtDis tests a set of byte sequences against an external disassembler.
// The disassembler is expected to produce the given syntax and be run
// in the given architecture mode (16, 32, or 64-bit).
// The extdis function must start the external disassembler
// and then parse its output, sending the parsed instructions on ext.Dec.
// The generate function calls its argument f once for each byte sequence
// to be tested. The generate function itself will be called twice, and it must
// make the same sequence of calls to f each time.
// When a disassembly does not match the internal decoding,
// allowedMismatch determines whether this mismatch should be
// allowed, or else considered an error.
func testExtDis(
t *testing.T,
syntax string,
arch int,
extdis func(ext *ExtDis) error,
generate func(f func([]byte)),
allowedMismatch func(text string, size int, inst *Inst, dec ExtInst) bool,
) {
start := time.Now()
ext := &ExtDis{
Dec: make(chan ExtInst),
Arch: arch,
}
errc := make(chan error)
// First pass: write instructions to input file for external disassembler.
file, f, size, err := writeInst(generate)
if err != nil {
t.Fatal(err)
}
ext.Size = size
ext.File = f
defer func() {
f.Close()
if !*keep {
os.Remove(file)
}
}()
// Second pass: compare disassembly against our decodings.
var (
totalTests = 0
totalSkips = 0
totalErrors = 0
errors = make([]string, 0, 100) // sampled errors, at most cap
)
go func() {
errc <- extdis(ext)
}()
generate(func(enc []byte) {
dec, ok := <-ext.Dec
if !ok {
t.Errorf("decoding stream ended early")
return
}
inst, text := disasm(syntax, arch, pad(enc))
totalTests++
if *dumpTest {
fmt.Printf("%x -> %s [%d]\n", enc[:len(enc)], dec.text, dec.nenc)
}
if text != dec.text || inst.Len != dec.nenc {
suffix := ""
if allowedMismatch(text, size, &inst, dec) {
totalSkips++
if !*mismatch {
return
}
suffix += " (allowed mismatch)"
}
totalErrors++
if len(errors) >= cap(errors) {
j := rand.Intn(totalErrors)
if j >= cap(errors) {
return
}
errors = append(errors[:j], errors[j+1:]...)
}
errors = append(errors, fmt.Sprintf("decode(%x) = %q, %d, want %q, %d%s", enc, text, inst.Len, dec.text, dec.nenc, suffix))
}
})
if *mismatch {
totalErrors -= totalSkips
}
for _, b := range errors {
t.Log(b)
}
if totalErrors > 0 {
t.Fail()
}
t.Logf("%d test cases, %d expected mismatches, %d failures; %.0f cases/second", totalTests, totalSkips, totalErrors, float64(totalTests)/time.Since(start).Seconds())
if err := <-errc; err != nil {
t.Fatal("external disassembler: %v", err)
}
}
const start = 0x8000 // start address of text
// writeInst writes the generated byte sequences to a new file
// starting at offset start. That file is intended to be the input to
// the external disassembler.
func writeInst(generate func(func([]byte))) (file string, f *os.File, size int, err error) {
f, err = ioutil.TempFile("", "x86map")
if err != nil {
return
}
file = f.Name()
f.Seek(start, 0)
w := bufio.NewWriter(f)
defer w.Flush()
size = 0
generate(func(x []byte) {
if len(x) > 16 {
x = x[:16]
}
if debug {
fmt.Printf("%#x: %x%x\n", start+size, x, pops[len(x):])
}
w.Write(x)
w.Write(pops[len(x):])
size += len(pops)
})
return file, f, size, nil
}
// 0x5F is a single-byte pop instruction.
// We pad the bytes we want decoded with enough 0x5Fs
// that no matter what state the instruction stream is in
// after reading our bytes, the pops will get us back to
// a forced instruction boundary.
var pops = []byte{
0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f,
0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f,
0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f,
0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f,
}
// pad pads the code sequenc with pops.
func pad(enc []byte) []byte {
return append(enc[:len(enc):len(enc)], pops...)
}
// disasm returns the decoded instruction and text
// for the given source bytes, using the given syntax and mode.
func disasm(syntax string, mode int, src []byte) (inst Inst, text string) {
// If printTests is set, we record the coverage value
// before and after, and we write out the inputs for which
// coverage went up, in the format expected in testdata/decode.text.
// This produces a fairly small set of test cases that exercise nearly
// all the code.
var cover float64
if *printTests {
cover -= coverage()
}
inst, err := decode1(src, mode, syntax == "gnu")
if err != nil {
text = "error: " + err.Error()
} else {
switch syntax {
case "gnu":
text = GNUSyntax(inst)
case "intel":
text = IntelSyntax(inst)
case "plan9":
text = Plan9Syntax(inst, 0, nil)
default:
text = "error: unknown syntax " + syntax
}
}
if *printTests {
cover += coverage()
if cover > 0 {
max := len(src)
if max > 16 && inst.Len <= 16 {
max = 16
}
fmt.Printf("%x|%x\t%d\t%s\t%s\n", src[:inst.Len], src[inst.Len:max], mode, syntax, text)
}
}
return
}
// coverage returns a floating point number denoting the
// test coverage until now. The number increases when new code paths are exercised,
// both in the Go program and in the decoder byte code.
func coverage() float64 {
/*
testing.Coverage is not in the main distribution.
The implementation, which must go in package testing, is:
// Coverage reports the current code coverage as a fraction in the range [0, 1].
func Coverage() float64 {
var n, d int64
for _, counters := range cover.Counters {
for _, c := range counters {
if c > 0 {
n++
}
d++
}
}
if d == 0 {
return 0
}
return float64(n) / float64(d)
}
*/
var f float64
// f += testing.Coverage()
f += decodeCoverage()
return f
}
func decodeCoverage() float64 {
n := 0
for _, t := range decoderCover {
if t {
n++
}
}
return float64(1+n) / float64(1+len(decoderCover))
}
// Helpers for writing disassembler output parsers.
// isPrefix reports whether text is the name of an instruction prefix.
func isPrefix(text string) bool {
return prefixByte[text] > 0
}
// prefixByte maps instruction prefix text to actual prefix byte values.
var prefixByte = map[string]byte{
"es": 0x26,
"cs": 0x2e,
"ss": 0x36,
"ds": 0x3e,
"fs": 0x64,
"gs": 0x65,
"data16": 0x66,
"addr16": 0x67,
"lock": 0xf0,
"repn": 0xf2,
"repne": 0xf2,
"rep": 0xf3,
"repe": 0xf3,
"xacquire": 0xf2,
"xrelease": 0xf3,
"bnd": 0xf2,
"addr32": 0x66,
"data32": 0x67,
}
// hasPrefix reports whether any of the space-separated words in the text s
// begins with any of the given prefixes.
func hasPrefix(s string, prefixes ...string) bool {
for _, prefix := range prefixes {
for s := s; s != ""; {
if strings.HasPrefix(s, prefix) {
return true
}
i := strings.Index(s, " ")
if i < 0 {
break
}
s = s[i+1:]
}
}
return false
}
// contains reports whether the text s contains any of the given substrings.
func contains(s string, substrings ...string) bool {
for _, sub := range substrings {
if strings.Contains(s, sub) {
return true
}
}
return false
}
// isHex reports whether b is a hexadecimal character (0-9A-Fa-f).
func isHex(b byte) bool { return b == '0' || unhex[b] > 0 }
// parseHex parses the hexadecimal byte dump in hex,
// appending the parsed bytes to raw and returning the updated slice.
// The returned bool signals whether any invalid hex was found.
// Spaces and tabs between bytes are okay but any other non-hex is not.
func parseHex(hex []byte, raw []byte) ([]byte, bool) {
hex = trimSpace(hex)
for j := 0; j < len(hex); {
for hex[j] == ' ' || hex[j] == '\t' {
j++
}
if j >= len(hex) {
break
}
if j+2 > len(hex) || !isHex(hex[j]) || !isHex(hex[j+1]) {
return nil, false
}
raw = append(raw, unhex[hex[j]]<<4|unhex[hex[j+1]])
j += 2
}
return raw, true
}
var unhex = [256]byte{
'0': 0,
'1': 1,
'2': 2,
'3': 3,
'4': 4,
'5': 5,
'6': 6,
'7': 7,
'8': 8,
'9': 9,
'A': 10,
'B': 11,
'C': 12,
'D': 13,
'E': 14,
'F': 15,
'a': 10,
'b': 11,
'c': 12,
'd': 13,
'e': 14,
'f': 15,
}
// index is like bytes.Index(s, []byte(t)) but avoids the allocation.
func index(s []byte, t string) int {
i := 0
for {
j := bytes.IndexByte(s[i:], t[0])
if j < 0 {
return -1
}
i = i + j
if i+len(t) > len(s) {
return -1
}
for k := 1; k < len(t); k++ {
if s[i+k] != t[k] {
goto nomatch
}
}
return i
nomatch:
i++
}
}
// fixSpace rewrites runs of spaces, tabs, and newline characters into single spaces in s.
// If s must be rewritten, it is rewritten in place.
func fixSpace(s []byte) []byte {
s = trimSpace(s)
for i := 0; i < len(s); i++ {
if s[i] == '\t' || s[i] == '\n' || i > 0 && s[i] == ' ' && s[i-1] == ' ' {
goto Fix
}
}
return s
Fix:
b := s
w := 0
for i := 0; i < len(s); i++ {
c := s[i]
if c == '\t' || c == '\n' {
c = ' '
}
if c == ' ' && w > 0 && b[w-1] == ' ' {
continue
}
b[w] = c
w++
}
if w > 0 && b[w-1] == ' ' {
w--
}
return b[:w]
}
// trimSpace trims leading and trailing space from s, returning a subslice of s.
func trimSpace(s []byte) []byte {
j := len(s)
for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t' || s[j-1] == '\n') {
j--
}
i := 0
for i < j && (s[i] == ' ' || s[i] == '\t') {
i++
}
return s[i:j]
}
// pcrel and pcrelw match instructions using relative addressing mode.
var (
pcrel = regexp.MustCompile(`^((?:.* )?(?:j[a-z]+|call|ljmp|loopn?e?w?|xbegin)q?(?:,p[nt])?) 0x([0-9a-f]+)$`)
pcrelw = regexp.MustCompile(`^((?:.* )?(?:callw|jmpw|xbeginw|ljmpw)(?:,p[nt])?) 0x([0-9a-f]+)$`)
)
// Generators.
//
// The test cases are described as functions that invoke a callback repeatedly,
// with a new input sequence each time. These helpers make writing those
// a little easier.
// hexCases generates the cases written in hexadecimal in the encoded string.
// Spaces in 'encoded' separate entire test cases, not individual bytes.
func hexCases(t *testing.T, encoded string) func(func([]byte)) {
return func(try func([]byte)) {
for _, x := range strings.Fields(encoded) {
src, err := hex.DecodeString(x)
if err != nil {
t.Errorf("parsing %q: %v", x, err)
}
try(src)
}
}
}
// testdataCases generates the test cases recorded in testdata/decode.txt.
// It only uses the inputs; it ignores the answers recorded in that file.
func testdataCases(t *testing.T) func(func([]byte)) {
var codes [][]byte
data, err := ioutil.ReadFile("testdata/decode.txt")
if err != nil {
t.Fatal(err)
}
for _, line := range strings.Split(string(data), "\n") {
line = strings.TrimSpace(line)
if line == "" || strings.HasPrefix(line, "#") {
continue
}
f := strings.Fields(line)[0]
i := strings.Index(f, "|")
if i < 0 {
t.Errorf("parsing %q: missing | separator", f)
continue
}
if i%2 != 0 {
t.Errorf("parsing %q: misaligned | separator", f)
}
code, err := hex.DecodeString(f[:i] + f[i+1:])
if err != nil {
t.Errorf("parsing %q: %v", f, err)
continue
}
codes = append(codes, code)
}
return func(try func([]byte)) {
for _, code := range codes {
try(code)
}
}
}
// manyPrefixes generates all possible 2⁹ combinations of nine chosen prefixes.
// The relative ordering of the prefixes within the combinations varies deterministically.
func manyPrefixes(try func([]byte)) {
var prefixBytes = []byte{0x66, 0x67, 0xF0, 0xF2, 0xF3, 0x3E, 0x36, 0x66, 0x67}
var enc []byte
for i := 0; i < 1<<uint(len(prefixBytes)); i++ {
enc = enc[:0]
for j, p := range prefixBytes {
if i&(1<<uint(j)) != 0 {
enc = append(enc, p)
}
}
if len(enc) > 0 {
k := i % len(enc)
enc[0], enc[k] = enc[k], enc[0]
}
try(enc)
}
}
// basicPrefixes geneartes 8 different possible prefix cases: no prefix
// and then one each of seven different prefix bytes.
func basicPrefixes(try func([]byte)) {
try(nil)
for _, b := range []byte{0x66, 0x67, 0xF0, 0xF2, 0xF3, 0x3E, 0x36} {
try([]byte{b})
}
}
func rexPrefixes(try func([]byte)) {
try(nil)
for _, b := range []byte{0x40, 0x48, 0x43, 0x4C} {
try([]byte{b})
}
}
// concat takes two generators and returns a generator for the
// cross product of the two, concatenating the results from each.
func concat(gen1, gen2 func(func([]byte))) func(func([]byte)) {
return func(try func([]byte)) {
gen1(func(enc1 []byte) {
gen2(func(enc2 []byte) {
try(append(enc1[:len(enc1):len(enc1)], enc2...))
})
})
}
}
// concat3 takes three generators and returns a generator for the
// cross product of the three, concatenating the results from each.
func concat3(gen1, gen2, gen3 func(func([]byte))) func(func([]byte)) {
return func(try func([]byte)) {
gen1(func(enc1 []byte) {
gen2(func(enc2 []byte) {
gen3(func(enc3 []byte) {
try(append(append(enc1[:len(enc1):len(enc1)], enc2...), enc3...))
})
})
})
}
}
// concat4 takes four generators and returns a generator for the
// cross product of the four, concatenating the results from each.
func concat4(gen1, gen2, gen3, gen4 func(func([]byte))) func(func([]byte)) {
return func(try func([]byte)) {
gen1(func(enc1 []byte) {
gen2(func(enc2 []byte) {
gen3(func(enc3 []byte) {
gen4(func(enc4 []byte) {
try(append(append(append(enc1[:len(enc1):len(enc1)], enc2...), enc3...), enc4...))
})
})
})
})
}
}
// filter generates the sequences from gen that satisfy ok.
func filter(gen func(func([]byte)), ok func([]byte) bool) func(func([]byte)) {
return func(try func([]byte)) {
gen(func(enc []byte) {
if ok(enc) {
try(enc)
}
})
}
}
// enum8bit generates all possible 1-byte sequences, followed by distinctive padding.
func enum8bit(try func([]byte)) {
for i := 0; i < 1<<8; i++ {
try([]byte{byte(i), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88})
}
}
// enum8bit generates all possible 2-byte sequences, followed by distinctive padding.
func enum16bit(try func([]byte)) {
for i := 0; i < 1<<16; i++ {
try([]byte{byte(i), byte(i >> 8), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88})
}
}
// enum24bit generates all possible 3-byte sequences, followed by distinctive padding.
func enum24bit(try func([]byte)) {
for i := 0; i < 1<<24; i++ {
try([]byte{byte(i), byte(i >> 8), byte(i >> 16), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88})
}
}
// enumModRM generates all possible modrm bytes and, for modrm values that indicate
// a following sib byte, all possible modrm, sib combinations.
func enumModRM(try func([]byte)) {
for i := 0; i < 256; i++ {
if (i>>3)&07 == 04 && i>>6 != 3 { // has sib
for j := 0; j < 256; j++ {
try([]byte{0, byte(i), byte(j), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88}) // byte encodings
try([]byte{1, byte(i), byte(j), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88}) // word encodings
}
} else {
try([]byte{0, byte(i), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88}) // byte encodings
try([]byte{1, byte(i), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88}) // word encodings
}
}
}
// fixed generates the single case b.
// It's mainly useful to prepare an argument for concat or concat3.
func fixed(b ...byte) func(func([]byte)) {
return func(try func([]byte)) {
try(b)
}
}
// testBasic runs the given test function with cases all using opcode as the initial opcode bytes.
// It runs three phases:
//
// First, zero-or-one prefixes followed by opcode followed by all possible 1-byte values.
// If in -short mode, that's all.
//
// Second, zero-or-one prefixes followed by opcode followed by all possible 2-byte values.
// If not in -long mode, that's all. This phase and the next run in parallel with other tests
// (using t.Parallel).
//
// Finally, opcode followed by all possible 3-byte values. The test can take a very long time
// and prints progress messages to package log.
func testBasic(t *testing.T, testfn func(*testing.T, func(func([]byte))), opcode ...byte) {
testfn(t, concat3(basicPrefixes, fixed(opcode...), enum8bit))
if testing.Short() {
return
}
t.Parallel()
testfn(t, concat3(basicPrefixes, fixed(opcode...), enum16bit))
if !*longTest {
return
}
name := caller(2)
op1 := make([]byte, len(opcode)+1)
copy(op1, opcode)
for i := 0; i < 256; i++ {
log.Printf("%s 24-bit: %d/256\n", name, i)
op1[len(opcode)] = byte(i)
testfn(t, concat(fixed(op1...), enum16bit))
}
}
func testBasicREX(t *testing.T, testfn func(*testing.T, func(func([]byte))), opcode ...byte) {
testfn(t, filter(concat4(basicPrefixes, rexPrefixes, fixed(opcode...), enum8bit), isValidREX))
if testing.Short() {
return
}
t.Parallel()
testfn(t, filter(concat4(basicPrefixes, rexPrefixes, fixed(opcode...), enum16bit), isValidREX))
if !*longTest {
return
}
name := caller(2)
op1 := make([]byte, len(opcode)+1)
copy(op1, opcode)
for i := 0; i < 256; i++ {
log.Printf("%s 24-bit: %d/256\n", name, i)
op1[len(opcode)] = byte(i)
testfn(t, filter(concat3(rexPrefixes, fixed(op1...), enum16bit), isValidREX))
}
}
// testPrefix runs the given test function for all many prefix possibilities
// followed by all possible 1-byte sequences.
//
// If in -long mode, it then runs a test of all the prefix possibilities followed
// by all possible 2-byte sequences.
func testPrefix(t *testing.T, testfn func(*testing.T, func(func([]byte)))) {
t.Parallel()
testfn(t, concat(manyPrefixes, enum8bit))
if testing.Short() || !*longTest {
return
}
name := caller(2)
for i := 0; i < 256; i++ {
log.Printf("%s 16-bit: %d/256\n", name, i)
testfn(t, concat3(manyPrefixes, fixed(byte(i)), enum8bit))
}
}
func testPrefixREX(t *testing.T, testfn func(*testing.T, func(func([]byte)))) {
t.Parallel()
testfn(t, filter(concat3(manyPrefixes, rexPrefixes, enum8bit), isValidREX))
if testing.Short() || !*longTest {
return
}
name := caller(2)
for i := 0; i < 256; i++ {
log.Printf("%s 16-bit: %d/256\n", name, i)
testfn(t, filter(concat4(manyPrefixes, rexPrefixes, fixed(byte(i)), enum8bit), isValidREX))
}
}
func caller(skip int) string {
pc, _, _, _ := runtime.Caller(skip)
f := runtime.FuncForPC(pc)
name := "?"
if f != nil {
name = f.Name()
if i := strings.LastIndex(name, "."); i >= 0 {
name = name[i+1:]
}
}
return name
}
func isValidREX(x []byte) bool {
i := 0
for i < len(x) && isPrefixByte(x[i]) {
i++
}
if i < len(x) && Prefix(x[i]).IsREX() {
i++
if i < len(x) {
return !isPrefixByte(x[i]) && !Prefix(x[i]).IsREX()
}
}
return true
}
func isPrefixByte(b byte) bool {
switch b {
case 0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65, 0x66, 0x67, 0xF0, 0xF2, 0xF3:
return true
}
return false
}
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package x86asm
import (
"fmt"
"strings"
)
// GNUSyntax returns the GNU assembler syntax for the instruction, as defined by GNU binutils.
// This general form is often called ``AT&T syntax'' as a reference to AT&T System V Unix.
func GNUSyntax(inst Inst) string {
// Rewrite instruction to mimic GNU peculiarities.
// Note that inst has been passed by value and contains
// no pointers, so any changes we make here are local
// and will not propagate back out to the caller.
// Adjust opcode [sic].
switch inst.Op {
case FDIV, FDIVR, FSUB, FSUBR, FDIVP, FDIVRP, FSUBP, FSUBRP:
// DC E0, DC F0: libopcodes swaps FSUBR/FSUB and FDIVR/FDIV, at least
// if you believe the Intel manual is correct (the encoding is irregular as given;
// libopcodes uses the more regular expected encoding).
// TODO(rsc): Test to ensure Intel manuals are correct and report to libopcodes maintainers?
// NOTE: iant thinks this is deliberate, but we can't find the history.
_, reg1 := inst.Args[0].(Reg)
_, reg2 := inst.Args[1].(Reg)
if reg1 && reg2 && (inst.Opcode>>24 == 0xDC || inst.Opcode>>24 == 0xDE) {
switch inst.Op {
case FDIV:
inst.Op = FDIVR
case FDIVR:
inst.Op = FDIV
case FSUB:
inst.Op = FSUBR
case FSUBR:
inst.Op = FSUB
case FDIVP:
inst.Op = FDIVRP
case FDIVRP:
inst.Op = FDIVP
case FSUBP:
inst.Op = FSUBRP
case FSUBRP:
inst.Op = FSUBP
}
}
case MOVNTSD:
// MOVNTSD is F2 0F 2B /r.
// MOVNTSS is F3 0F 2B /r (supposedly; not in manuals).
// Usually inner prefixes win for display,
// so that F3 F2 0F 2B 11 is REP MOVNTSD
// and F2 F3 0F 2B 11 is REPN MOVNTSS.
// Libopcodes always prefers MOVNTSS regardless of prefix order.
if countPrefix(&inst, 0xF3) > 0 {
found := false
for i := len(inst.Prefix) - 1; i >= 0; i-- {
switch inst.Prefix[i] & 0xFF {
case 0xF3:
if !found {
found = true
inst.Prefix[i] |= PrefixImplicit
}
case 0xF2:
inst.Prefix[i] &^= PrefixImplicit
}
}
inst.Op = MOVNTSS
}
}
// Add implicit arguments.
switch inst.Op {
case MONITOR:
inst.Args[0] = EDX
inst.Args[1] = ECX
inst.Args[2] = EAX
if inst.AddrSize == 16 {
inst.Args[2] = AX
}
case MWAIT:
if inst.Mode == 64 {
inst.Args[0] = RCX
inst.Args[1] = RAX
} else {
inst.Args[0] = ECX
inst.Args[1] = EAX
}
}
// Adjust which prefixes will be displayed.
// The rule is to display all the prefixes not implied by
// the usual instruction display, that is, all the prefixes
// except the ones with PrefixImplicit set.
// However, of course, there are exceptions to the rule.
switch inst.Op {
case CRC32:
// CRC32 has a mandatory F2 prefix.
// If there are multiple F2s and no F3s, the extra F2s do not print.
// (And Decode has already marked them implicit.)
// However, if there is an F3 anywhere, then the extra F2s do print.
// If there are multiple F2 prefixes *and* an (ignored) F3,
// then libopcodes prints the extra F2s as REPNs.
if countPrefix(&inst, 0xF2) > 1 {
unmarkImplicit(&inst, 0xF2)
markLastImplicit(&inst, 0xF2)
}
// An unused data size override should probably be shown,
// to distinguish DATA16 CRC32B from plain CRC32B,
// but libopcodes always treats the final override as implicit
// and the others as explicit.
unmarkImplicit(&inst, PrefixDataSize)
markLastImplicit(&inst, PrefixDataSize)
case CVTSI2SD, CVTSI2SS:
if !isMem(inst.Args[1]) {
markLastImplicit(&inst, PrefixDataSize)
}
case CVTSD2SI, CVTSS2SI, CVTTSD2SI, CVTTSS2SI,
ENTER, FLDENV, FNSAVE, FNSTENV, FRSTOR, LGDT, LIDT, LRET,
POP, PUSH, RET, SGDT, SIDT, SYSRET, XBEGIN:
markLastImplicit(&inst, PrefixDataSize)
case LOOP, LOOPE, LOOPNE, MONITOR:
markLastImplicit(&inst, PrefixAddrSize)
case MOV:
// The 16-bit and 32-bit forms of MOV Sreg, dst and MOV src, Sreg
// cannot be distinguished when src or dst refers to memory, because
// Sreg is always a 16-bit value, even when we're doing a 32-bit
// instruction. Because the instruction tables distinguished these two,
// any operand size prefix has been marked as used (to decide which
// branch to take). Unmark it, so that it will show up in disassembly,
// so that the reader can tell the size of memory operand.
// up with the same arguments
dst, _ := inst.Args[0].(Reg)
src, _ := inst.Args[1].(Reg)
if ES <= src && src <= GS && isMem(inst.Args[0]) || ES <= dst && dst <= GS && isMem(inst.Args[1]) {
unmarkImplicit(&inst, PrefixDataSize)
}
case MOVDQU:
if countPrefix(&inst, 0xF3) > 1 {
unmarkImplicit(&inst, 0xF3)
markLastImplicit(&inst, 0xF3)
}
case MOVQ2DQ:
markLastImplicit(&inst, PrefixDataSize)
case SLDT, SMSW, STR, FXRSTOR, XRSTOR, XSAVE, XSAVEOPT, CMPXCHG8B:
if isMem(inst.Args[0]) {
unmarkImplicit(&inst, PrefixDataSize)
}
case SYSEXIT:
unmarkImplicit(&inst, PrefixDataSize)
}
if isCondJmp[inst.Op] || isLoop[inst.Op] || inst.Op == JCXZ || inst.Op == JECXZ || inst.Op == JRCXZ {
if countPrefix(&inst, PrefixCS) > 0 && countPrefix(&inst, PrefixDS) > 0 {
for i, p := range inst.Prefix {
switch p & 0xFFF {
case PrefixPN, PrefixPT:
inst.Prefix[i] &= 0xF0FF // cut interpretation bits, producing original segment prefix
}
}
}
}
// XACQUIRE/XRELEASE adjustment.
if inst.Op == MOV {
// MOV into memory is a candidate for turning REP into XRELEASE.
// However, if the REP is followed by a REPN, that REPN blocks the
// conversion.
haveREPN := false
for i := len(inst.Prefix) - 1; i >= 0; i-- {
switch inst.Prefix[i] &^ PrefixIgnored {
case PrefixREPN:
haveREPN = true
case PrefixXRELEASE:
if haveREPN {
inst.Prefix[i] = PrefixREP
}
}
}
}
// We only format the final F2/F3 as XRELEASE/XACQUIRE.
haveXA := false
haveXR := false
for i := len(inst.Prefix) - 1; i >= 0; i-- {
switch inst.Prefix[i] &^ PrefixIgnored {
case PrefixXRELEASE:
if !haveXR {
haveXR = true
} else {
inst.Prefix[i] = PrefixREP
}
case PrefixXACQUIRE:
if !haveXA {
haveXA = true
} else {
inst.Prefix[i] = PrefixREPN
}
}
}
// Determine opcode.
op := strings.ToLower(inst.Op.String())
if alt := gnuOp[inst.Op]; alt != "" {
op = alt
}
// Determine opcode suffix.
// Libopcodes omits the suffix if the width of the operation
// can be inferred from a register arguments. For example,
// add $1, %ebx has no suffix because you can tell from the
// 32-bit register destination that it is a 32-bit add,
// but in addl $1, (%ebx), the destination is memory, so the
// size is not evident without the l suffix.
needSuffix := true
SuffixLoop:
for i, a := range inst.Args {
if a == nil {
break
}
switch a := a.(type) {
case Reg:
switch inst.Op {
case MOVSX, MOVZX:
continue
case SHL, SHR, RCL, RCR, ROL, ROR, SAR:
if i == 1 {
// shift count does not tell us operand size
continue
}
case CRC32:
// The source argument does tell us operand size,
// but libopcodes still always puts a suffix on crc32.
continue
case PUSH, POP:
// Even though segment registers are 16-bit, push and pop
// can save/restore them from 32-bit slots, so they
// do not imply operand size.
if ES <= a && a <= GS {
continue
}
case CVTSI2SD, CVTSI2SS:
// The integer register argument takes priority.
if X0 <= a && a <= X15 {
continue
}
}
if AL <= a && a <= R15 || ES <= a && a <= GS || X0 <= a && a <= X15 || M0 <= a && a <= M7 {
needSuffix = false
break SuffixLoop
}
}
}
if needSuffix {
switch inst.Op {
case CMPXCHG8B, FLDCW, FNSTCW, FNSTSW, LDMXCSR, LLDT, LMSW, LTR, PCLMULQDQ,
SETA, SETAE, SETB, SETBE, SETE, SETG, SETGE, SETL, SETLE, SETNE, SETNO, SETNP, SETNS, SETO, SETP, SETS,
SLDT, SMSW, STMXCSR, STR, VERR, VERW:
// For various reasons, libopcodes emits no suffix for these instructions.
case CRC32:
op += byteSizeSuffix(argBytes(&inst, inst.Args[1]))
case LGDT, LIDT, SGDT, SIDT:
op += byteSizeSuffix(inst.DataSize / 8)
case MOVZX, MOVSX:
// Integer size conversions get two suffixes.
op = op[:4] + byteSizeSuffix(argBytes(&inst, inst.Args[1])) + byteSizeSuffix(argBytes(&inst, inst.Args[0]))
case LOOP, LOOPE, LOOPNE:
// Add w suffix to indicate use of CX register instead of ECX.
if inst.AddrSize == 16 {
op += "w"
}
case CALL, ENTER, JMP, LCALL, LEAVE, LJMP, LRET, RET, SYSRET, XBEGIN:
// Add w suffix to indicate use of 16-bit target.
// Exclude JMP rel8.
if inst.Opcode>>24 == 0xEB {
break
}
if inst.DataSize == 16 && inst.Mode != 16 {
markLastImplicit(&inst, PrefixDataSize)
op += "w"
} else if inst.Mode == 64 {
op += "q"
}
case FRSTOR, FNSAVE, FNSTENV, FLDENV:
// Add s suffix to indicate shortened FPU state (I guess).
if inst.DataSize == 16 {
op += "s"
}
case PUSH, POP:
if markLastImplicit(&inst, PrefixDataSize) {
op += byteSizeSuffix(inst.DataSize / 8)
} else if inst.Mode == 64 {
op += "q"
} else {
op += byteSizeSuffix(inst.MemBytes)
}
default:
if isFloat(inst.Op) {
// I can't explain any of this, but it's what libopcodes does.
switch inst.MemBytes {
default:
if (inst.Op == FLD || inst.Op == FSTP) && isMem(inst.Args[0]) {
op += "t"
}
case 4:
if isFloatInt(inst.Op) {
op += "l"
} else {
op += "s"
}
case 8:
if isFloatInt(inst.Op) {
op += "ll"
} else {
op += "l"
}
}
break
}
op += byteSizeSuffix(inst.MemBytes)
}
}
// Adjust special case opcodes.
switch inst.Op {
case 0:
if inst.Prefix[0] != 0 {
return strings.ToLower(inst.Prefix[0].String())
}
case INT:
if inst.Opcode>>24 == 0xCC {
inst.Args[0] = nil
op = "int3"
}
case CMPPS, CMPPD, CMPSD_XMM, CMPSS:
imm, ok := inst.Args[2].(Imm)
if ok && 0 <= imm && imm < 8 {
inst.Args[2] = nil
op = cmppsOps[imm] + op[3:]
}
case PCLMULQDQ:
imm, ok := inst.Args[2].(Imm)
if ok && imm&^0x11 == 0 {
inst.Args[2] = nil
op = pclmulqOps[(imm&0x10)>>3|(imm&1)]
}
case XLATB:
if markLastImplicit(&inst, PrefixAddrSize) {
op = "xlat" // not xlatb
}
}
// Build list of argument strings.
var (
usedPrefixes bool // segment prefixes consumed by Mem formatting
args []string // formatted arguments
)
for i, a := range inst.Args {
if a == nil {
break
}
switch inst.Op {
case MOVSB, MOVSW, MOVSD, MOVSQ, OUTSB, OUTSW, OUTSD:
if i == 0 {
usedPrefixes = true // disable use of prefixes for first argument
} else {
usedPrefixes = false
}
}
if a == Imm(1) && (inst.Opcode>>24)&^1 == 0xD0 {
continue
}
args = append(args, gnuArg(&inst, a, &usedPrefixes))
}
// The default is to print the arguments in reverse Intel order.
// A few instructions inhibit this behavior.
switch inst.Op {
case BOUND, LCALL, ENTER, LJMP:
// no reverse
default:
// reverse args
for i, j := 0, len(args)-1; i < j; i, j = i+1, j-1 {
args[i], args[j] = args[j], args[i]
}
}
// Build prefix string.
// Must be after argument formatting, which can turn off segment prefixes.
var (
prefix = "" // output string
numAddr = 0
numData = 0
implicitData = false
)
for _, p := range inst.Prefix {
if p&0xFF == PrefixDataSize && p&PrefixImplicit != 0 {
implicitData = true
}
}
for _, p := range inst.Prefix {
if p == 0 {
break
}
if p&PrefixImplicit != 0 {
continue
}
switch p &^ (PrefixIgnored | PrefixInvalid) {
default:
if p.IsREX() {
if p&0xFF == PrefixREX {
prefix += "rex "
} else {
prefix += "rex." + p.String()[4:] + " "
}
break
}
prefix += strings.ToLower(p.String()) + " "
case PrefixPN:
op += ",pn"
continue
case PrefixPT:
op += ",pt"
continue
case PrefixAddrSize, PrefixAddr16, PrefixAddr32:
// For unknown reasons, if the addr16 prefix is repeated,
// libopcodes displays all but the last as addr32, even though
// the addressing form used in a memory reference is clearly
// still 16-bit.
n := 32
if inst.Mode == 32 {
n = 16
}
numAddr++
if countPrefix(&inst, PrefixAddrSize) > numAddr {
n = inst.Mode
}
prefix += fmt.Sprintf("addr%d ", n)
continue
case PrefixData16, PrefixData32:
if implicitData && countPrefix(&inst, PrefixDataSize) > 1 {
// Similar to the addr32 logic above, but it only kicks in
// when something used the data size prefix (one is implicit).
n := 16
if inst.Mode == 16 {
n = 32
}
numData++
if countPrefix(&inst, PrefixDataSize) > numData {
if inst.Mode == 16 {
n = 16
} else {
n = 32
}
}
prefix += fmt.Sprintf("data%d ", n)
continue
}
prefix += strings.ToLower(p.String()) + " "
}
}
// Finally! Put it all together.
text := prefix + op
if args != nil {
text += " "
// Indirect call/jmp gets a star to distinguish from direct jump address.
if (inst.Op == CALL || inst.Op == JMP || inst.Op == LJMP || inst.Op == LCALL) && (isMem(inst.Args[0]) || isReg(inst.Args[0])) {
text += "*"
}
text += strings.Join(args, ",")
}
return text
}
// gnuArg returns the GNU syntax for the argument x from the instruction inst.
// If *usedPrefixes is false and x is a Mem, then the formatting
// includes any segment prefixes and sets *usedPrefixes to true.
func gnuArg(inst *Inst, x Arg, usedPrefixes *bool) string {
if x == nil {
return "<nil>"
}
switch x := x.(type) {
case Reg:
switch inst.Op {
case CVTSI2SS, CVTSI2SD, CVTSS2SI, CVTSD2SI, CVTTSD2SI, CVTTSS2SI:
if inst.DataSize == 16 && EAX <= x && x <= R15L {
x -= EAX - AX
}
case IN, INSB, INSW, INSD, OUT, OUTSB, OUTSW, OUTSD:
// DX is the port, but libopcodes prints it as if it were a memory reference.
if x == DX {
return "(%dx)"
}
}
return gccRegName[x]
case Mem:
seg := ""
var haveCS, haveDS, haveES, haveFS, haveGS, haveSS bool
switch x.Segment {
case CS:
haveCS = true
case DS:
haveDS = true
case ES:
haveES = true
case FS:
haveFS = true
case GS:
haveGS = true
case SS:
haveSS = true
}
switch inst.Op {
case INSB, INSW, INSD, STOSB, STOSW, STOSD, STOSQ, SCASB, SCASW, SCASD, SCASQ:
// These do not accept segment prefixes, at least in the GNU rendering.
default:
if *usedPrefixes {
break
}
for i := len(inst.Prefix) - 1; i >= 0; i-- {
p := inst.Prefix[i] &^ PrefixIgnored
if p == 0 {
continue
}
switch p {
case PrefixCS:
if !haveCS {
haveCS = true
inst.Prefix[i] |= PrefixImplicit
}
case PrefixDS:
if !haveDS {
haveDS = true
inst.Prefix[i] |= PrefixImplicit
}
case PrefixES:
if !haveES {
haveES = true
inst.Prefix[i] |= PrefixImplicit
}
case PrefixFS:
if !haveFS {
haveFS = true
inst.Prefix[i] |= PrefixImplicit
}
case PrefixGS:
if !haveGS {
haveGS = true
inst.Prefix[i] |= PrefixImplicit
}
case PrefixSS:
if !haveSS {
haveSS = true
inst.Prefix[i] |= PrefixImplicit
}
}
}
*usedPrefixes = true
}
if haveCS {
seg += "%cs:"
}
if haveDS {
seg += "%ds:"
}
if haveSS {
seg += "%ss:"
}
if haveES {
seg += "%es:"
}
if haveFS {
seg += "%fs:"
}
if haveGS {
seg += "%gs:"
}
disp := ""
if x.Disp != 0 {
disp = fmt.Sprintf("%#x", x.Disp)
}
if x.Scale == 0 || x.Index == 0 && x.Scale == 1 && (x.Base == ESP || x.Base == RSP || x.Base == 0 && inst.Mode == 64) {
if x.Base == 0 {
return seg + disp
}
return fmt.Sprintf("%s%s(%s)", seg, disp, gccRegName[x.Base])
}
base := gccRegName[x.Base]
if x.Base == 0 {
base = ""
}
index := gccRegName[x.Index]
if x.Index == 0 {
if inst.AddrSize == 64 {
index = "%riz"
} else {
index = "%eiz"
}
}
if AX <= x.Base && x.Base <= DI {
// 16-bit addressing - no scale
return fmt.Sprintf("%s%s(%s,%s)", seg, disp, base, index)
}
return fmt.Sprintf("%s%s(%s,%s,%d)", seg, disp, base, index, x.Scale)
case Rel:
return fmt.Sprintf(".%+#x", int32(x))
case Imm:
if inst.Mode == 32 {
return fmt.Sprintf("$%#x", uint32(x))
}
return fmt.Sprintf("$%#x", int64(x))
}
return x.String()
}
var gccRegName = [...]string{
0: "REG0",
AL: "%al",
CL: "%cl",
BL: "%bl",
DL: "%dl",
AH: "%ah",
CH: "%ch",
BH: "%bh",
DH: "%dh",
SPB: "%spl",
BPB: "%bpl",
SIB: "%sil",
DIB: "%dil",
R8B: "%r8b",
R9B: "%r9b",
R10B: "%r10b",
R11B: "%r11b",
R12B: "%r12b",
R13B: "%r13b",
R14B: "%r14b",
R15B: "%r15b",
AX: "%ax",
CX: "%cx",
BX: "%bx",
DX: "%dx",
SP: "%sp",
BP: "%bp",
SI: "%si",
DI: "%di",
R8W: "%r8w",
R9W: "%r9w",
R10W: "%r10w",
R11W: "%r11w",
R12W: "%r12w",
R13W: "%r13w",
R14W: "%r14w",
R15W: "%r15w",
EAX: "%eax",
ECX: "%ecx",
EDX: "%edx",
EBX: "%ebx",
ESP: "%esp",
EBP: "%ebp",
ESI: "%esi",
EDI: "%edi",
R8L: "%r8d",
R9L: "%r9d",
R10L: "%r10d",
R11L: "%r11d",
R12L: "%r12d",
R13L: "%r13d",
R14L: "%r14d",
R15L: "%r15d",
RAX: "%rax",
RCX: "%rcx",
RDX: "%rdx",
RBX: "%rbx",
RSP: "%rsp",
RBP: "%rbp",
RSI: "%rsi",
RDI: "%rdi",
R8: "%r8",
R9: "%r9",
R10: "%r10",
R11: "%r11",
R12: "%r12",
R13: "%r13",
R14: "%r14",
R15: "%r15",
IP: "%ip",
EIP: "%eip",
RIP: "%rip",
F0: "%st",
F1: "%st(1)",
F2: "%st(2)",
F3: "%st(3)",
F4: "%st(4)",
F5: "%st(5)",
F6: "%st(6)",
F7: "%st(7)",
M0: "%mm0",
M1: "%mm1",
M2: "%mm2",
M3: "%mm3",
M4: "%mm4",
M5: "%mm5",
M6: "%mm6",
M7: "%mm7",
X0: "%xmm0",
X1: "%xmm1",
X2: "%xmm2",
X3: "%xmm3",
X4: "%xmm4",
X5: "%xmm5",
X6: "%xmm6",
X7: "%xmm7",
X8: "%xmm8",
X9: "%xmm9",
X10: "%xmm10",
X11: "%xmm11",
X12: "%xmm12",
X13: "%xmm13",
X14: "%xmm14",
X15: "%xmm15",
CS: "%cs",
SS: "%ss",
DS: "%ds",
ES: "%es",
FS: "%fs",
GS: "%gs",
GDTR: "%gdtr",
IDTR: "%idtr",
LDTR: "%ldtr",
MSW: "%msw",
TASK: "%task",
CR0: "%cr0",
CR1: "%cr1",
CR2: "%cr2",
CR3: "%cr3",
CR4: "%cr4",
CR5: "%cr5",
CR6: "%cr6",
CR7: "%cr7",
CR8: "%cr8",
CR9: "%cr9",
CR10: "%cr10",
CR11: "%cr11",
CR12: "%cr12",
CR13: "%cr13",
CR14: "%cr14",
CR15: "%cr15",
DR0: "%db0",
DR1: "%db1",
DR2: "%db2",
DR3: "%db3",
DR4: "%db4",
DR5: "%db5",
DR6: "%db6",
DR7: "%db7",
TR0: "%tr0",
TR1: "%tr1",
TR2: "%tr2",
TR3: "%tr3",
TR4: "%tr4",
TR5: "%tr5",
TR6: "%tr6",
TR7: "%tr7",
}
var gnuOp = map[Op]string{
CBW: "cbtw",
CDQ: "cltd",
CMPSD: "cmpsl",
CMPSD_XMM: "cmpsd",
CWD: "cwtd",
CWDE: "cwtl",
CQO: "cqto",
INSD: "insl",
IRET: "iretw",
IRETD: "iret",
IRETQ: "iretq",
LODSB: "lods",
LODSD: "lods",
LODSQ: "lods",
LODSW: "lods",
MOVSD: "movsl",
MOVSD_XMM: "movsd",
OUTSD: "outsl",
POPA: "popaw",
POPAD: "popa",
POPF: "popfw",
POPFD: "popf",
PUSHA: "pushaw",
PUSHAD: "pusha",
PUSHF: "pushfw",
PUSHFD: "pushf",
SCASB: "scas",
SCASD: "scas",
SCASQ: "scas",
SCASW: "scas",
STOSB: "stos",
STOSD: "stos",
STOSQ: "stos",
STOSW: "stos",
XLATB: "xlat",
}
var cmppsOps = []string{
"cmpeq",
"cmplt",
"cmple",
"cmpunord",
"cmpneq",
"cmpnlt",
"cmpnle",
"cmpord",
}
var pclmulqOps = []string{
"pclmullqlqdq",
"pclmulhqlqdq",
"pclmullqhqdq",
"pclmulhqhqdq",
}
func countPrefix(inst *Inst, target Prefix) int {
n := 0
for _, p := range inst.Prefix {
if p&0xFF == target&0xFF {
n++
}
}
return n
}
func markLastImplicit(inst *Inst, prefix Prefix) bool {
for i := len(inst.Prefix) - 1; i >= 0; i-- {
p := inst.Prefix[i]
if p&0xFF == prefix {
inst.Prefix[i] |= PrefixImplicit
return true
}
}
return false
}
func unmarkImplicit(inst *Inst, prefix Prefix) {
for i := len(inst.Prefix) - 1; i >= 0; i-- {
p := inst.Prefix[i]
if p&0xFF == prefix {
inst.Prefix[i] &^= PrefixImplicit
}
}
}
func byteSizeSuffix(b int) string {
switch b {
case 1:
return "b"
case 2:
return "w"
case 4:
return "l"
case 8:
return "q"
}
return ""
}
func argBytes(inst *Inst, arg Arg) int {
if isMem(arg) {
return inst.MemBytes
}
return regBytes(arg)
}
func isFloat(op Op) bool {
switch op {
case FADD, FCOM, FCOMP, FDIV, FDIVR, FIADD, FICOM, FICOMP, FIDIV, FIDIVR, FILD, FIMUL, FIST, FISTP, FISTTP, FISUB, FISUBR, FLD, FMUL, FST, FSTP, FSUB, FSUBR:
return true
}
return false
}
func isFloatInt(op Op) bool {
switch op {
case FIADD, FICOM, FICOMP, FIDIV, FIDIVR, FILD, FIMUL, FIST, FISTP, FISTTP, FISUB, FISUBR:
return true
}
return false
}
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package x86asm implements decoding of x86 machine code.
package x86asm
import (
"bytes"
"fmt"
)
// An Inst is a single instruction.
type Inst struct {
Prefix Prefixes // Prefixes applied to the instruction.
Op Op // Opcode mnemonic
Opcode uint32 // Encoded opcode bits, left aligned (first byte is Opcode>>24, etc)
Args Args // Instruction arguments, in Intel order
Mode int // processor mode in bits: 16, 32, or 64
AddrSize int // address size in bits: 16, 32, or 64
DataSize int // operand size in bits: 16, 32, or 64
MemBytes int // size of memory argument in bytes: 1, 2, 4, 8, 16, and so on.
Len int // length of encoded instruction in bytes
}
// Prefixes is an array of prefixes associated with a single instruction.
// The prefixes are listed in the same order as found in the instruction:
// each prefix byte corresponds to one slot in the array. The first zero
// in the array marks the end of the prefixes.
type Prefixes [14]Prefix
// A Prefix represents an Intel instruction prefix.
// The low 8 bits are the actual prefix byte encoding,
// and the top 8 bits contain distinguishing bits and metadata.
type Prefix uint16
const (
// Metadata about the role of a prefix in an instruction.
PrefixImplicit Prefix = 0x8000 // prefix is implied by instruction text
PrefixIgnored Prefix = 0x4000 // prefix is ignored: either irrelevant or overridden by a later prefix
PrefixInvalid Prefix = 0x2000 // prefix makes entire instruction invalid (bad LOCK)
// Memory segment overrides.
PrefixES Prefix = 0x26 // ES segment override
PrefixCS Prefix = 0x2E // CS segment override
PrefixSS Prefix = 0x36 // SS segment override
PrefixDS Prefix = 0x3E // DS segment override
PrefixFS Prefix = 0x64 // FS segment override
PrefixGS Prefix = 0x65 // GS segment override
// Branch prediction.
PrefixPN Prefix = 0x12E // predict not taken (conditional branch only)
PrefixPT Prefix = 0x13E // predict taken (conditional branch only)
// Size attributes.
PrefixDataSize Prefix = 0x66 // operand size override
PrefixData16 Prefix = 0x166
PrefixData32 Prefix = 0x266
PrefixAddrSize Prefix = 0x67 // address size override
PrefixAddr16 Prefix = 0x167
PrefixAddr32 Prefix = 0x267
// One of a kind.
PrefixLOCK Prefix = 0xF0 // lock
PrefixREPN Prefix = 0xF2 // repeat not zero
PrefixXACQUIRE Prefix = 0x1F2
PrefixBND Prefix = 0x2F2
PrefixREP Prefix = 0xF3 // repeat
PrefixXRELEASE Prefix = 0x1F3
// The REX prefixes must be in the range [PrefixREX, PrefixREX+0x10).
// the other bits are set or not according to the intended use.
PrefixREX Prefix = 0x40 // REX 64-bit extension prefix
PrefixREXW Prefix = 0x08 // extension bit W (64-bit instruction width)
PrefixREXR Prefix = 0x04 // extension bit R (r field in modrm)
PrefixREXX Prefix = 0x02 // extension bit X (index field in sib)
PrefixREXB Prefix = 0x01 // extension bit B (r/m field in modrm or base field in sib)
)
// IsREX reports whether p is a REX prefix byte.
func (p Prefix) IsREX() bool {
return p&0xF0 == PrefixREX
}
func (p Prefix) String() string {
p &^= PrefixImplicit | PrefixIgnored | PrefixInvalid
if s := prefixNames[p]; s != "" {
return s
}
if p.IsREX() {
s := "REX."
if p&PrefixREXW != 0 {
s += "W"
}
if p&PrefixREXR != 0 {
s += "R"
}
if p&PrefixREXX != 0 {
s += "X"
}
if p&PrefixREXB != 0 {
s += "B"
}
return s
}
return fmt.Sprintf("Prefix(%#x)", int(p))
}
// An Op is an x86 opcode.
type Op uint32
func (op Op) String() string {
i := int(op)
if i < 0 || i >= len(opNames) || opNames[i] == "" {
return fmt.Sprintf("Op(%d)", i)
}
return opNames[i]
}
// An Args holds the instruction arguments.
// If an instruction has fewer than 4 arguments,
// the final elements in the array are nil.
type Args [4]Arg
// An Arg is a single instruction argument,
// one of these types: Reg, Mem, Imm, Rel.
type Arg interface {
String() string
isArg()
}
// Note that the implements of Arg that follow are all sized
// so that on a 64-bit machine the data can be inlined in
// the interface value instead of requiring an allocation.
// A Reg is a single register.
// The zero Reg value has no name but indicates ``no register.''
type Reg uint8
const (
_ Reg = iota
// 8-bit
AL
CL
DL
BL
AH
CH
DH
BH
SPB
BPB
SIB
DIB
R8B
R9B
R10B
R11B
R12B
R13B
R14B
R15B
// 16-bit
AX
CX
DX
BX
SP
BP
SI
DI
R8W
R9W
R10W
R11W
R12W
R13W
R14W
R15W
// 32-bit
EAX
ECX
EDX
EBX
ESP
EBP
ESI
EDI
R8L
R9L
R10L
R11L
R12L
R13L
R14L
R15L
// 64-bit
RAX
RCX
RDX
RBX
RSP
RBP
RSI
RDI
R8
R9
R10
R11
R12
R13
R14
R15
// Instruction pointer.
IP // 16-bit
EIP // 32-bit
RIP // 64-bit
// 387 floating point registers.
F0
F1
F2
F3
F4
F5
F6
F7
// MMX registers.
M0
M1
M2
M3
M4
M5
M6
M7
// XMM registers.
X0
X1
X2
X3
X4
X5
X6
X7
X8
X9
X10
X11
X12
X13
X14
X15
// Segment registers.
ES
CS
SS
DS
FS
GS
// System registers.
GDTR
IDTR
LDTR
MSW
TASK
// Control registers.
CR0
CR1
CR2
CR3
CR4
CR5
CR6
CR7
CR8
CR9
CR10
CR11
CR12
CR13
CR14
CR15
// Debug registers.
DR0
DR1
DR2
DR3
DR4
DR5
DR6
DR7
DR8
DR9
DR10
DR11
DR12
DR13
DR14
DR15
// Task registers.
TR0
TR1
TR2
TR3
TR4
TR5
TR6
TR7
)
const regMax = TR7
func (Reg) isArg() {}
func (r Reg) String() string {
i := int(r)
if i < 0 || i >= len(regNames) || regNames[i] == "" {
return fmt.Sprintf("Reg(%d)", i)
}
return regNames[i]
}
// A Mem is a memory reference.
// The general form is Segment:[Base+Scale*Index+Disp].
type Mem struct {
Segment Reg
Base Reg
Scale uint8
Index Reg
Disp int64
}
func (Mem) isArg() {}
func (m Mem) String() string {
var base, plus, scale, index, disp string
if m.Base != 0 {
base = m.Base.String()
}
if m.Scale != 0 {
if m.Base != 0 {
plus = "+"
}
if m.Scale > 1 {
scale = fmt.Sprintf("%d*", m.Scale)
}
index = m.Index.String()
}
if m.Disp != 0 || m.Base == 0 && m.Scale == 0 {
disp = fmt.Sprintf("%+#x", m.Disp)
}
return "[" + base + plus + scale + index + disp + "]"
}
// A Rel is an offset relative to the current instruction pointer.
type Rel int32
func (Rel) isArg() {}
func (r Rel) String() string {
return fmt.Sprintf(".%+d", r)
}
// An Imm is an integer constant.
type Imm int64
func (Imm) isArg() {}
func (i Imm) String() string {
return fmt.Sprintf("%#x", int64(i))
}
func (i Inst) String() string {
var buf bytes.Buffer
for _, p := range i.Prefix {
if p == 0 {
break
}
if p&PrefixImplicit != 0 {
continue
}
fmt.Fprintf(&buf, "%v ", p)
}
fmt.Fprintf(&buf, "%v", i.Op)
sep := " "
for _, v := range i.Args {
if v == nil {
break
}
fmt.Fprintf(&buf, "%s%v", sep, v)
sep = ", "
}
return buf.String()
}
func isReg(a Arg) bool {
_, ok := a.(Reg)
return ok
}
func isSegReg(a Arg) bool {
r, ok := a.(Reg)
return ok && ES <= r && r <= GS
}
func isMem(a Arg) bool {
_, ok := a.(Mem)
return ok
}
func isImm(a Arg) bool {
_, ok := a.(Imm)
return ok
}
func regBytes(a Arg) int {
r, ok := a.(Reg)
if !ok {
return 0
}
if AL <= r && r <= R15B {
return 1
}
if AX <= r && r <= R15W {
return 2
}
if EAX <= r && r <= R15L {
return 4
}
if RAX <= r && r <= R15 {
return 8
}
return 0
}
func isSegment(p Prefix) bool {
switch p {
case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
return true
}
return false
}
// The Op definitions and string list are in tables.go.
var prefixNames = map[Prefix]string{
PrefixCS: "CS",
PrefixDS: "DS",
PrefixES: "ES",
PrefixFS: "FS",
PrefixGS: "GS",
PrefixSS: "SS",
PrefixLOCK: "LOCK",
PrefixREP: "REP",
PrefixREPN: "REPN",
PrefixAddrSize: "ADDRSIZE",
PrefixDataSize: "DATASIZE",
PrefixAddr16: "ADDR16",
PrefixData16: "DATA16",
PrefixAddr32: "ADDR32",
PrefixData32: "DATA32",
PrefixBND: "BND",
PrefixXACQUIRE: "XACQUIRE",
PrefixXRELEASE: "XRELEASE",
PrefixREX: "REX",
PrefixPT: "PT",
PrefixPN: "PN",
}
var regNames = [...]string{
AL: "AL",
CL: "CL",
BL: "BL",
DL: "DL",
AH: "AH",
CH: "CH",
BH: "BH",
DH: "DH",
SPB: "SPB",
BPB: "BPB",
SIB: "SIB",
DIB: "DIB",
R8B: "R8B",
R9B: "R9B",
R10B: "R10B",
R11B: "R11B",
R12B: "R12B",
R13B: "R13B",
R14B: "R14B",
R15B: "R15B",
AX: "AX",
CX: "CX",
BX: "BX",
DX: "DX",
SP: "SP",
BP: "BP",
SI: "SI",
DI: "DI",
R8W: "R8W",
R9W: "R9W",
R10W: "R10W",
R11W: "R11W",
R12W: "R12W",
R13W: "R13W",
R14W: "R14W",
R15W: "R15W",
EAX: "EAX",
ECX: "ECX",
EDX: "EDX",
EBX: "EBX",
ESP: "ESP",
EBP: "EBP",
ESI: "ESI",
EDI: "EDI",
R8L: "R8L",
R9L: "R9L",
R10L: "R10L",
R11L: "R11L",
R12L: "R12L",
R13L: "R13L",
R14L: "R14L",
R15L: "R15L",
RAX: "RAX",
RCX: "RCX",
RDX: "RDX",
RBX: "RBX",
RSP: "RSP",
RBP: "RBP",
RSI: "RSI",
RDI: "RDI",
R8: "R8",
R9: "R9",
R10: "R10",
R11: "R11",
R12: "R12",
R13: "R13",
R14: "R14",
R15: "R15",
IP: "IP",
EIP: "EIP",
RIP: "RIP",
F0: "F0",
F1: "F1",
F2: "F2",
F3: "F3",
F4: "F4",
F5: "F5",
F6: "F6",
F7: "F7",
M0: "M0",
M1: "M1",
M2: "M2",
M3: "M3",
M4: "M4",
M5: "M5",
M6: "M6",
M7: "M7",
X0: "X0",
X1: "X1",
X2: "X2",
X3: "X3",
X4: "X4",
X5: "X5",
X6: "X6",
X7: "X7",
X8: "X8",
X9: "X9",
X10: "X10",
X11: "X11",
X12: "X12",
X13: "X13",
X14: "X14",
X15: "X15",
CS: "CS",
SS: "SS",
DS: "DS",
ES: "ES",
FS: "FS",
GS: "GS",
GDTR: "GDTR",
IDTR: "IDTR",
LDTR: "LDTR",
MSW: "MSW",
TASK: "TASK",
CR0: "CR0",
CR1: "CR1",
CR2: "CR2",
CR3: "CR3",
CR4: "CR4",
CR5: "CR5",
CR6: "CR6",
CR7: "CR7",
CR8: "CR8",
CR9: "CR9",
CR10: "CR10",
CR11: "CR11",
CR12: "CR12",
CR13: "CR13",
CR14: "CR14",
CR15: "CR15",
DR0: "DR0",
DR1: "DR1",
DR2: "DR2",
DR3: "DR3",
DR4: "DR4",
DR5: "DR5",
DR6: "DR6",
DR7: "DR7",
DR8: "DR8",
DR9: "DR9",
DR10: "DR10",
DR11: "DR11",
DR12: "DR12",
DR13: "DR13",
DR14: "DR14",
DR15: "DR15",
TR0: "TR0",
TR1: "TR1",
TR2: "TR2",
TR3: "TR3",
TR4: "TR4",
TR5: "TR5",
TR6: "TR6",
TR7: "TR7",
}
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package x86asm
import (
"strings"
"testing"
)
func TestRegString(t *testing.T) {
for r := Reg(1); r <= regMax; r++ {
if regNames[r] == "" {
t.Errorf("regNames[%d] is missing", int(r))
} else if s := r.String(); strings.Contains(s, "Reg(") {
t.Errorf("Reg(%d).String() = %s, want proper name", int(r), s)
}
}
}
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package x86asm
import (
"fmt"
"strings"
)
// IntelSyntax returns the Intel assembler syntax for the instruction, as defined by Intel's XED tool.
func IntelSyntax(inst Inst) string {
var iargs []Arg
for _, a := range inst.Args {
if a == nil {
break
}
iargs = append(iargs, a)
}
switch inst.Op {
case INSB, INSD, INSW, OUTSB, OUTSD, OUTSW, LOOPNE, JCXZ, JECXZ, JRCXZ, LOOP, LOOPE, MOV, XLATB:
if inst.Op == MOV && (inst.Opcode>>16)&0xFFFC != 0x0F20 {
break
}
for i, p := range inst.Prefix {
if p&0xFF == PrefixAddrSize {
inst.Prefix[i] &^= PrefixImplicit
}
}
}
switch inst.Op {
case MOV:
dst, _ := inst.Args[0].(Reg)
src, _ := inst.Args[1].(Reg)
if ES <= dst && dst <= GS && EAX <= src && src <= R15L {
src -= EAX - AX
iargs[1] = src
}
if ES <= dst && dst <= GS && RAX <= src && src <= R15 {
src -= RAX - AX
iargs[1] = src
}
if inst.Opcode>>24&^3 == 0xA0 {
for i, p := range inst.Prefix {
if p&0xFF == PrefixAddrSize {
inst.Prefix[i] |= PrefixImplicit
}
}
}
}
switch inst.Op {
case AAM, AAD:
if imm, ok := iargs[0].(Imm); ok {
if inst.DataSize == 32 {
iargs[0] = Imm(uint32(int8(imm)))
} else if inst.DataSize == 16 {
iargs[0] = Imm(uint16(int8(imm)))
}
}
case PUSH:
if imm, ok := iargs[0].(Imm); ok {
iargs[0] = Imm(uint32(imm))
}
}
for _, p := range inst.Prefix {
if p&PrefixImplicit != 0 {
for j, pj := range inst.Prefix {
if pj&0xFF == p&0xFF {
inst.Prefix[j] |= PrefixImplicit
}
}
}
}
if inst.Op != 0 {
for i, p := range inst.Prefix {
switch p &^ PrefixIgnored {
case PrefixData16, PrefixData32, PrefixCS, PrefixDS, PrefixES, PrefixSS:
inst.Prefix[i] |= PrefixImplicit
}
if p.IsREX() {
inst.Prefix[i] |= PrefixImplicit
}
}
}
if isLoop[inst.Op] || inst.Op == JCXZ || inst.Op == JECXZ || inst.Op == JRCXZ {
for i, p := range inst.Prefix {
if p == PrefixPT || p == PrefixPN {
inst.Prefix[i] |= PrefixImplicit
}
}
}
switch inst.Op {
case AAA, AAS, CBW, CDQE, CLC, CLD, CLI, CLTS, CMC, CPUID, CQO, CWD, DAA, DAS,
FDECSTP, FINCSTP, FNCLEX, FNINIT, FNOP, FWAIT, HLT,
ICEBP, INSB, INSD, INSW, INT, INTO, INVD, IRET, IRETQ,
LAHF, LEAVE, LRET, MONITOR, MWAIT, NOP, OUTSB, OUTSD, OUTSW,
PAUSE, POPA, POPF, POPFQ, PUSHA, PUSHF, PUSHFQ,
RDMSR, RDPMC, RDTSC, RDTSCP, RET, RSM,
SAHF, STC, STD, STI, SYSENTER, SYSEXIT, SYSRET,
UD2, WBINVD, WRMSR, XEND, XLATB, XTEST:
if inst.Op == NOP && inst.Opcode>>24 != 0x90 {
break
}
if inst.Op == RET && inst.Opcode>>24 != 0xC3 {
break
}
if inst.Op == INT && inst.Opcode>>24 != 0xCC {
break
}
if inst.Op == LRET && inst.Opcode>>24 != 0xcb {
break
}
for i, p := range inst.Prefix {
if p&0xFF == PrefixDataSize {
inst.Prefix[i] &^= PrefixImplicit | PrefixIgnored
}
}
case 0:
// ok
}
switch inst.Op {
case INSB, INSD, INSW, OUTSB, OUTSD, OUTSW, MONITOR, MWAIT, XLATB:
iargs = nil
case STOSB, STOSW, STOSD, STOSQ:
iargs = iargs[:1]
case LODSB, LODSW, LODSD, LODSQ, SCASB, SCASW, SCASD, SCASQ:
iargs = iargs[1:]
}
const (
haveData16 = 1 << iota
haveData32
haveAddr16
haveAddr32
haveXacquire
haveXrelease
haveLock
haveHintTaken
haveHintNotTaken
haveBnd
)
var prefixBits uint32
prefix := ""
for _, p := range inst.Prefix {
if p == 0 {
break
}
if p&0xFF == 0xF3 {
prefixBits &^= haveBnd
}
if p&(PrefixImplicit|PrefixIgnored) != 0 {
continue
}
switch p {
default:
prefix += strings.ToLower(p.String()) + " "
case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
if inst.Op == 0 {
prefix += strings.ToLower(p.String()) + " "
}
case PrefixREPN:
prefix += "repne "
case PrefixLOCK:
prefixBits |= haveLock
case PrefixData16, PrefixDataSize:
prefixBits |= haveData16
case PrefixData32:
prefixBits |= haveData32
case PrefixAddrSize, PrefixAddr16:
prefixBits |= haveAddr16
case PrefixAddr32:
prefixBits |= haveAddr32
case PrefixXACQUIRE:
prefixBits |= haveXacquire
case PrefixXRELEASE:
prefixBits |= haveXrelease
case PrefixPT:
prefixBits |= haveHintTaken
case PrefixPN:
prefixBits |= haveHintNotTaken
case PrefixBND:
prefixBits |= haveBnd
}
}
switch inst.Op {
case JMP:
if inst.Opcode>>24 == 0xEB {
prefixBits &^= haveBnd
}
case RET, LRET:
prefixBits &^= haveData16 | haveData32
}
if prefixBits&haveXacquire != 0 {
prefix += "xacquire "
}
if prefixBits&haveXrelease != 0 {
prefix += "xrelease "
}
if prefixBits&haveLock != 0 {
prefix += "lock "
}
if prefixBits&haveBnd != 0 {
prefix += "bnd "
}
if prefixBits&haveHintTaken != 0 {
prefix += "hint-taken "
}
if prefixBits&haveHintNotTaken != 0 {
prefix += "hint-not-taken "
}
if prefixBits&haveAddr16 != 0 {
prefix += "addr16 "
}
if prefixBits&haveAddr32 != 0 {
prefix += "addr32 "
}
if prefixBits&haveData16 != 0 {
prefix += "data16 "
}
if prefixBits&haveData32 != 0 {
prefix += "data32 "
}
if inst.Op == 0 {
if prefix == "" {
return "<no instruction>"
}
return prefix[:len(prefix)-1]
}
var args []string
for _, a := range iargs {
if a == nil {
break
}
args = append(args, intelArg(&inst, a))
}
var op string
switch inst.Op {
case NOP:
if inst.Opcode>>24 == 0x0F {
if inst.DataSize == 16 {
args = append(args, "ax")
} else {
args = append(args, "eax")
}
}
case BLENDVPD, BLENDVPS, PBLENDVB:
args = args[:2]
case INT:
if inst.Opcode>>24 == 0xCC {
args = nil
op = "int3"
}
case LCALL, LJMP:
if len(args) == 2 {
args[0], args[1] = args[1], args[0]
}
case FCHS, FABS, FTST, FLDPI, FLDL2E, FLDLG2, F2XM1, FXAM, FLD1, FLDL2T, FSQRT, FRNDINT, FCOS, FSIN:
if len(args) == 0 {
args = append(args, "st0")
}
case FPTAN, FSINCOS, FUCOMPP, FCOMPP, FYL2X, FPATAN, FXTRACT, FPREM1, FPREM, FYL2XP1, FSCALE:
if len(args) == 0 {
args = []string{"st0", "st1"}
}
case FST, FSTP, FISTTP, FIST, FISTP, FBSTP:
if len(args) == 1 {
args = append(args, "st0")
}
case FLD, FXCH, FCOM, FCOMP, FIADD, FIMUL, FICOM, FICOMP, FISUBR, FIDIV, FUCOM, FUCOMP, FILD, FBLD, FADD, FMUL, FSUB, FSUBR, FISUB, FDIV, FDIVR, FIDIVR:
if len(args) == 1 {
args = []string{"st0", args[0]}
}
case MASKMOVDQU, MASKMOVQ, XLATB, OUTSB, OUTSW, OUTSD:
FixSegment:
for i := len(inst.Prefix) - 1; i >= 0; i-- {
p := inst.Prefix[i] & 0xFF
switch p {
case PrefixCS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
if inst.Mode != 64 || p == PrefixFS || p == PrefixGS {
args = append(args, strings.ToLower((inst.Prefix[i] & 0xFF).String()))
break FixSegment
}
case PrefixDS:
if inst.Mode != 64 {
break FixSegment
}
}
}
}
if op == "" {
op = intelOp[inst.Op]
}
if op == "" {
op = strings.ToLower(inst.Op.String())
}
if args != nil {
op += " " + strings.Join(args, ", ")
}
return prefix + op
}
func intelArg(inst *Inst, arg Arg) string {
switch a := arg.(type) {
case Imm:
if inst.Mode == 32 {
return fmt.Sprintf("%#x", uint32(a))
}
if Imm(int32(a)) == a {
return fmt.Sprintf("%#x", int64(a))
}
return fmt.Sprintf("%#x", uint64(a))
case Mem:
if a.Base == EIP {
a.Base = RIP
}
prefix := ""
switch inst.MemBytes {
case 1:
prefix = "byte "
case 2:
prefix = "word "
case 4:
prefix = "dword "
case 8:
prefix = "qword "
case 16:
prefix = "xmmword "
}
switch inst.Op {
case INVLPG:
prefix = "byte "
case STOSB, MOVSB, CMPSB, LODSB, SCASB:
prefix = "byte "
case STOSW, MOVSW, CMPSW, LODSW, SCASW:
prefix = "word "
case STOSD, MOVSD, CMPSD, LODSD, SCASD:
prefix = "dword "
case STOSQ, MOVSQ, CMPSQ, LODSQ, SCASQ:
prefix = "qword "
case LAR:
prefix = "word "
case BOUND:
if inst.Mode == 32 {
prefix = "qword "
} else {
prefix = "dword "
}
case PREFETCHW, PREFETCHNTA, PREFETCHT0, PREFETCHT1, PREFETCHT2, CLFLUSH:
prefix = "zmmword "
}
switch inst.Op {
case MOVSB, MOVSW, MOVSD, MOVSQ, CMPSB, CMPSW, CMPSD, CMPSQ, STOSB, STOSW, STOSD, STOSQ, SCASB, SCASW, SCASD, SCASQ, LODSB, LODSW, LODSD, LODSQ:
switch a.Base {
case DI, EDI, RDI:
if a.Segment == ES {
a.Segment = 0
}
case SI, ESI, RSI:
if a.Segment == DS {
a.Segment = 0
}
}
case LEA:
a.Segment = 0
default:
switch a.Base {
case SP, ESP, RSP, BP, EBP, RBP:
if a.Segment == SS {
a.Segment = 0
}
default:
if a.Segment == DS {
a.Segment = 0
}
}
}
if inst.Mode == 64 && a.Segment != FS && a.Segment != GS {
a.Segment = 0
}
prefix += "ptr "
if a.Segment != 0 {
prefix += strings.ToLower(a.Segment.String()) + ":"
}
prefix += "["
if a.Base != 0 {
prefix += intelArg(inst, a.Base)
}
if a.Scale != 0 && a.Index != 0 {
if a.Base != 0 {
prefix += "+"
}
prefix += fmt.Sprintf("%s*%d", intelArg(inst, a.Index), a.Scale)
}
if a.Disp != 0 {
if prefix[len(prefix)-1] == '[' && (a.Disp >= 0 || int64(int32(a.Disp)) != a.Disp) {
prefix += fmt.Sprintf("%#x", uint64(a.Disp))
} else {
prefix += fmt.Sprintf("%+#x", a.Disp)
}
}
prefix += "]"
return prefix
case Rel:
return fmt.Sprintf(".%+#x", int64(a))
case Reg:
if int(a) < len(intelReg) && intelReg[a] != "" {
return intelReg[a]
}
}
return strings.ToLower(arg.String())
}
var intelOp = map[Op]string{
JAE: "jnb",
JA: "jnbe",
JGE: "jnl",
JNE: "jnz",
JG: "jnle",
JE: "jz",
SETAE: "setnb",
SETA: "setnbe",
SETGE: "setnl",
SETNE: "setnz",
SETG: "setnle",
SETE: "setz",
CMOVAE: "cmovnb",
CMOVA: "cmovnbe",
CMOVGE: "cmovnl",
CMOVNE: "cmovnz",
CMOVG: "cmovnle",
CMOVE: "cmovz",
LCALL: "call far",
LJMP: "jmp far",
LRET: "ret far",
ICEBP: "int1",
MOVSD_XMM: "movsd",
XLATB: "xlat",
}
var intelReg = [...]string{
F0: "st0",
F1: "st1",
F2: "st2",
F3: "st3",
F4: "st4",
F5: "st5",
F6: "st6",
F7: "st7",
M0: "mmx0",
M1: "mmx1",
M2: "mmx2",
M3: "mmx3",
M4: "mmx4",
M5: "mmx5",
M6: "mmx6",
M7: "mmx7",
X0: "xmm0",
X1: "xmm1",
X2: "xmm2",
X3: "xmm3",
X4: "xmm4",
X5: "xmm5",
X6: "xmm6",
X7: "xmm7",
X8: "xmm8",
X9: "xmm9",
X10: "xmm10",
X11: "xmm11",
X12: "xmm12",
X13: "xmm13",
X14: "xmm14",
X15: "xmm15",
// TODO: Maybe the constants are named wrong.
SPB: "spl",
BPB: "bpl",
SIB: "sil",
DIB: "dil",
R8L: "r8d",
R9L: "r9d",
R10L: "r10d",
R11L: "r11d",
R12L: "r12d",
R13L: "r13d",
R14L: "r14d",
R15L: "r15d",
}
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package x86asm
import (
"bytes"
"strings"
"testing"
)
func TestObjdump32Manual(t *testing.T) { testObjdump32(t, hexCases(t, objdumpManualTests)) }
func TestObjdump32Testdata(t *testing.T) { testObjdump32(t, concat(basicPrefixes, testdataCases(t))) }
func TestObjdump32ModRM(t *testing.T) { testObjdump32(t, concat(basicPrefixes, enumModRM)) }
func TestObjdump32OneByte(t *testing.T) { testBasic(t, testObjdump32) }
func TestObjdump320F(t *testing.T) { testBasic(t, testObjdump32, 0x0F) }
func TestObjdump320F38(t *testing.T) { testBasic(t, testObjdump32, 0x0F, 0x38) }
func TestObjdump320F3A(t *testing.T) { testBasic(t, testObjdump32, 0x0F, 0x3A) }
func TestObjdump32Prefix(t *testing.T) { testPrefix(t, testObjdump32) }
func TestObjdump64Manual(t *testing.T) { testObjdump64(t, hexCases(t, objdumpManualTests)) }
func TestObjdump64Testdata(t *testing.T) { testObjdump64(t, concat(basicPrefixes, testdataCases(t))) }
func TestObjdump64ModRM(t *testing.T) { testObjdump64(t, concat(basicPrefixes, enumModRM)) }
func TestObjdump64OneByte(t *testing.T) { testBasic(t, testObjdump64) }
func TestObjdump640F(t *testing.T) { testBasic(t, testObjdump64, 0x0F) }
func TestObjdump640F38(t *testing.T) { testBasic(t, testObjdump64, 0x0F, 0x38) }
func TestObjdump640F3A(t *testing.T) { testBasic(t, testObjdump64, 0x0F, 0x3A) }
func TestObjdump64Prefix(t *testing.T) { testPrefix(t, testObjdump64) }
func TestObjdump64REXTestdata(t *testing.T) {
testObjdump64(t, filter(concat3(basicPrefixes, rexPrefixes, testdataCases(t)), isValidREX))
}
func TestObjdump64REXModRM(t *testing.T) {
testObjdump64(t, concat3(basicPrefixes, rexPrefixes, enumModRM))
}
func TestObjdump64REXOneByte(t *testing.T) { testBasicREX(t, testObjdump64) }
func TestObjdump64REX0F(t *testing.T) { testBasicREX(t, testObjdump64, 0x0F) }
func TestObjdump64REX0F38(t *testing.T) { testBasicREX(t, testObjdump64, 0x0F, 0x38) }
func TestObjdump64REX0F3A(t *testing.T) { testBasicREX(t, testObjdump64, 0x0F, 0x3A) }
func TestObjdump64REXPrefix(t *testing.T) { testPrefixREX(t, testObjdump64) }
// objdumpManualTests holds test cases that will be run by TestObjdumpManual.
// If you are debugging a few cases that turned up in a longer run, it can be useful
// to list them here and then use -run=ObjdumpManual, particularly with tracing enabled.
var objdumpManualTests = `
F390
`
// allowedMismatchObjdump reports whether the mismatch between text and dec
// should be allowed by the test.
func allowedMismatchObjdump(text string, size int, inst *Inst, dec ExtInst) bool {
if size == 15 && dec.nenc == 15 && contains(text, "truncated") && contains(dec.text, "(bad)") {
return true
}
if i := strings.LastIndex(dec.text, " "); isPrefix(dec.text[i+1:]) && size == 1 && isPrefix(text) {
return true
}
if size == dec.nenc && contains(dec.text, "movupd") && contains(dec.text, "data32") {
s := strings.Replace(dec.text, "data32 ", "", -1)
if text == s {
return true
}
}
// Simplify our invalid instruction text.
if text == "error: unrecognized instruction" {
text = "BAD"
}
// Invalid instructions for which libopcodes prints %? register.
// FF E8 11 22 33 44:
// Invalid instructions for which libopcodes prints "internal disassembler error".
// Invalid instructions for which libopcodes prints 8087 only (e.g., DB E0)
// or prints 287 only (e.g., DB E4).
if contains(dec.text, "%?", "<internal disassembler error>", "(8087 only)", "(287 only)") {
dec.text = "(bad)"
}
// 0F 19 11, 0F 1C 11, 0F 1D 11, 0F 1E 11, 0F 1F 11: libopcodes says nop,
// but the Intel manuals say that the only NOP there is 0F 1F /0.
// Perhaps libopcodes is reporting an older encoding.
i := bytes.IndexByte(dec.enc[:], 0x0F)
if contains(dec.text, "nop") && i >= 0 && i+2 < len(dec.enc) && dec.enc[i+1]&^7 == 0x18 && (dec.enc[i+1] != 0x1F || (dec.enc[i+2]>>3)&7 != 0) {
dec.text = "(bad)"
}
// Any invalid instruction.
if text == "BAD" && contains(dec.text, "(bad)") {
return true
}
// Instructions libopcodes knows but we do not (e.g., 0F 19 11).
if (text == "BAD" || size == 1 && isPrefix(text)) && hasPrefix(dec.text, unsupported...) {
return true
}
// Instructions we know but libopcodes does not (e.g., 0F D0 11).
if (contains(dec.text, "(bad)") || dec.nenc == 1 && isPrefix(dec.text)) && hasPrefix(text, libopcodesUnsupported...) {
return true
}
// Libopcodes rejects F2 90 as NOP. Not sure why.
if (contains(dec.text, "(bad)") || dec.nenc == 1 && isPrefix(dec.text)) && inst.Opcode>>24 == 0x90 && countPrefix(inst, 0xF2) > 0 {
return true
}
// 0F 20 11, 0F 21 11, 0F 22 11, 0F 23 11, 0F 24 11:
// Moves into and out of some control registers seem to be unsupported by libopcodes.
// TODO(rsc): Are they invalid somehow?
if (contains(dec.text, "(bad)") || dec.nenc == 1 && isPrefix(dec.text)) && contains(text, "%cr", "%db", "%tr") {
return true
}
if contains(dec.text, "fwait") && dec.nenc == 1 && dec.enc[0] != 0x9B {
return true
}
// 9B D9 11: libopcodes reports FSTSW instead of FWAIT + FNSTSW.
// This is correct in that FSTSW is a pseudo-op for the pair, but it really
// is a pair of instructions: execution can stop between them.
// Our decoder chooses to separate them.
if (text == "fwait" || strings.HasSuffix(text, " fwait")) && dec.nenc >= len(strings.Fields(text)) && dec.enc[len(strings.Fields(text))-1] == 0x9B {
return true
}
// 0F 18 77 11:
// Invalid instructions for which libopcodes prints "nop/reserved".
// Perhaps libopcodes is reporting an older encoding.
if text == "BAD" && contains(dec.text, "nop/reserved") {
return true
}
// 0F C7 B0 11 22 33 44: libopcodes says vmptrld 0x44332211(%eax); we say rdrand %eax.
// TODO(rsc): Fix, since we are probably wrong, but we don't have vmptrld in the manual.
if contains(text, "rdrand") && contains(dec.text, "vmptrld", "vmxon", "vmclear") {
return true
}
// DD C8: libopcodes says FNOP but the Intel manual is clear FNOP is only D9 D0.
// Perhaps libopcodes is reporting an older encoding.
if text == "BAD" && contains(dec.text, "fnop") && (dec.enc[0] != 0xD9 || dec.enc[1] != 0xD0) {
return true
}
// 66 90: libopcodes says xchg %ax,%ax; we say 'data16 nop'.
// The 16-bit swap will preserve the high bits of the register,
// so they are the same.
if contains(text, "nop") && contains(dec.text, "xchg %ax,%ax") {
return true
}
// If there are multiple prefixes, allow libopcodes to use an alternate name.
if size == 1 && dec.nenc == 1 && prefixByte[text] > 0 && prefixByte[text] == prefixByte[dec.text] {
return true
}
// 26 9B: libopcodes reports "fwait"/1, ignoring segment prefix.
// https://sourceware.org/bugzilla/show_bug.cgi?id=16891
// F0 82: Decode="lock"/1 but libopcodes="lock (bad)"/2.
if size == 1 && dec.nenc >= 1 && prefixByte[text] == dec.enc[0] && contains(dec.text, "(bad)", "fwait", "fnop") {
return true
}
// libopcodes interprets 660f801122 as taking a rel16 but
// truncating the address at 16 bits. Not sure what is correct.
if contains(text, ".+0x2211", ".+0x11") && contains(dec.text, " .-") {
return true
}
// 66 F3 0F D6 C5, 66 F2 0F D6 C0: libopcodes reports use of XMM register instead of MMX register,
// but only when the instruction has a 66 prefix. Maybe they know something we don't.
if countPrefix(inst, 0x66) > 0 && contains(dec.text, "movdq2q", "movq2dq") && !contains(dec.text, "%mm") {
return true
}
// 0F 01 F8, 0F 05, 0F 07: these are 64-bit instructions but libopcodes accepts them.
if (text == "BAD" || size == 1 && isPrefix(text)) && contains(dec.text, "swapgs", "syscall", "sysret", "rdfsbase", "rdgsbase", "wrfsbase", "wrgsbase") {
return true
}
return false
}
// Instructions known to libopcodes (or xed) but not to us.
// Most of these come from supplementary manuals of one form or another.
var unsupported = strings.Fields(`
bndc
bndl
bndm
bnds
clac
clgi
femms
fldln
fldz
getsec
invlpga
kmov
montmul
pavg
pf2i
pfacc
pfadd
pfcmp
pfmax
pfmin
pfmul
pfna
pfpnac
pfrc
pfrs
pfsub
phadd
phsub
pi2f
pmulhr
prefetch
pswap
ptest
rdseed
sha1
sha256
skinit
stac
stgi
vadd
vand
vcmp
vcomis
vcvt
vcvt
vdiv
vhadd
vhsub
vld
vmax
vmcall
vmfunc
vmin
vmlaunch
vmload
vmmcall
vmov
vmov
vmov
vmptrld
vmptrst
vmread
vmresume
vmrun
vmsave
vmul
vmwrite
vmxoff
vor
vpack
vpadd
vpand
vpavg
vpcmp
vpcmp
vpins
vpmadd
vpmax
vpmin
vpmul
vpmul
vpor
vpsad
vpshuf
vpsll
vpsra
vpsrad
vpsrl
vpsub
vpunp
vpxor
vrcp
vrsqrt
vshuf
vsqrt
vsub
vucomis
vunp
vxor
vzero
xcrypt
xsha1
xsha256
xstore-rng
insertq
extrq
vmclear
invvpid
adox
vmxon
invept
adcx
vmclear
prefetchwt1
enclu
encls
salc
fstpnce
fdisi8087_nop
fsetpm287_nop
feni8087_nop
syscall
sysret
`)
// Instructions known to us but not to libopcodes (at least in binutils 2.24).
var libopcodesUnsupported = strings.Fields(`
addsubps
aes
blend
cvttpd2dq
dpp
extract
haddps
hsubps
insert
invpcid
lddqu
movmsk
movnt
movq2dq
mps
pack
pblend
pclmul
pcmp
pext
phmin
pins
pmax
pmin
pmov
pmovmsk
pmul
popcnt
pslld
psllq
psllw
psrad
psraw
psrl
ptest
punpck
round
xrstor
xsavec
xsaves
comis
ucomis
movhps
movntps
rsqrt
rcpp
puncpck
bsf
movq2dq
cvttpd2dq
movq
hsubpd
movdqa
movhpd
addsubpd
movd
haddpd
cvtps2dq
bsr
cvtdq2ps
rdrand
maskmov
movq2dq
movlhps
movbe
movlpd
`)
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package x86asm
import (
"bytes"
"debug/elf"
"encoding/binary"
"fmt"
"io"
"log"
"os"
"strconv"
"strings"
"testing"
)
// Apologies for the proprietary path, but we need objdump 2.24 + some committed patches that will land in 2.25.
const objdumpPath = "/Users/rsc/bin/objdump2"
func testObjdump32(t *testing.T, generate func(func([]byte))) {
testObjdumpArch(t, generate, 32)
}
func testObjdump64(t *testing.T, generate func(func([]byte))) {
testObjdumpArch(t, generate, 64)
}
func testObjdumpArch(t *testing.T, generate func(func([]byte)), arch int) {
if testing.Short() {
t.Skip("skipping objdump test in short mode")
}
if _, err := os.Stat(objdumpPath); err != nil {
t.Fatal(err)
}
testExtDis(t, "gnu", arch, objdump, generate, allowedMismatchObjdump)
}
func objdump(ext *ExtDis) error {
// File already written with instructions; add ELF header.
if ext.Arch == 32 {
if err := writeELF32(ext.File, ext.Size); err != nil {
return err
}
} else {
if err := writeELF64(ext.File, ext.Size); err != nil {
return err
}
}
b, err := ext.Run(objdumpPath, "-d", "-z", ext.File.Name())
if err != nil {
return err
}
var (
nmatch int
reading bool
next uint32 = start
addr uint32
encbuf [32]byte
enc []byte
text string
)
flush := func() {
if addr == next {
switch text {
case "repz":
text = "rep"
case "repnz":
text = "repn"
default:
text = strings.Replace(text, "repz ", "rep ", -1)
text = strings.Replace(text, "repnz ", "repn ", -1)
}
if m := pcrelw.FindStringSubmatch(text); m != nil {
targ, _ := strconv.ParseUint(m[2], 16, 64)
text = fmt.Sprintf("%s .%+#x", m[1], int16(uint32(targ)-uint32(uint16(addr))-uint32(len(enc))))
}
if m := pcrel.FindStringSubmatch(text); m != nil {
targ, _ := strconv.ParseUint(m[2], 16, 64)
text = fmt.Sprintf("%s .%+#x", m[1], int32(uint32(targ)-addr-uint32(len(enc))))
}
text = strings.Replace(text, "0x0(", "(", -1)
text = strings.Replace(text, "%st(0)", "%st", -1)
ext.Dec <- ExtInst{addr, encbuf, len(enc), text}
encbuf = [32]byte{}
enc = nil
next += 32
}
}
var textangle = []byte("<.text>:")
for {
line, err := b.ReadSlice('\n')
if err != nil {
if err == io.EOF {
break
}
return fmt.Errorf("reading objdump output: %v", err)
}
if bytes.Contains(line, textangle) {
reading = true
continue
}
if !reading {
continue
}
if debug {
os.Stdout.Write(line)
}
if enc1 := parseContinuation(line, encbuf[:len(enc)]); enc1 != nil {
enc = enc1
continue
}
flush()
nmatch++
addr, enc, text = parseLine(line, encbuf[:0])
if addr > next {
return fmt.Errorf("address out of sync expected <= %#x at %q in:\n%s", next, line, line)
}
}
flush()
if next != start+uint32(ext.Size) {
return fmt.Errorf("not enough results found [%d %d]", next, start+ext.Size)
}
if err := ext.Wait(); err != nil {
return fmt.Errorf("exec: %v", err)
}
return nil
}
func parseLine(line []byte, encstart []byte) (addr uint32, enc []byte, text string) {
oline := line
i := index(line, ":\t")
if i < 0 {
log.Fatalf("cannot parse disassembly: %q", oline)
}
x, err := strconv.ParseUint(string(trimSpace(line[:i])), 16, 32)
if err != nil {
log.Fatalf("cannot parse disassembly: %q", oline)
}
addr = uint32(x)
line = line[i+2:]
i = bytes.IndexByte(line, '\t')
if i < 0 {
log.Fatalf("cannot parse disassembly: %q", oline)
}
enc, ok := parseHex(line[:i], encstart)
if !ok {
log.Fatalf("cannot parse disassembly: %q", oline)
}
line = trimSpace(line[i:])
if i := bytes.IndexByte(line, '#'); i >= 0 {
line = trimSpace(line[:i])
}
text = string(fixSpace(line))
return
}
func parseContinuation(line []byte, enc []byte) []byte {
i := index(line, ":\t")
if i < 0 {
return nil
}
line = line[i+1:]
enc, _ = parseHex(line, enc)
return enc
}
// writeELF32 writes an ELF32 header to the file,
// describing a text segment that starts at start
// and extends for size bytes.
func writeELF32(f *os.File, size int) error {
f.Seek(0, 0)
var hdr elf.Header32
var prog elf.Prog32
var sect elf.Section32
var buf bytes.Buffer
binary.Write(&buf, binary.LittleEndian, &hdr)
off1 := buf.Len()
binary.Write(&buf, binary.LittleEndian, &prog)
off2 := buf.Len()
binary.Write(&buf, binary.LittleEndian, &sect)
off3 := buf.Len()
buf.Reset()
data := byte(elf.ELFDATA2LSB)
hdr = elf.Header32{
Ident: [16]byte{0x7F, 'E', 'L', 'F', 1, data, 1},
Type: 2,
Machine: uint16(elf.EM_386),
Version: 1,
Entry: start,
Phoff: uint32(off1),
Shoff: uint32(off2),
Flags: 0x05000002,
Ehsize: uint16(off1),
Phentsize: uint16(off2 - off1),
Phnum: 1,
Shentsize: uint16(off3 - off2),
Shnum: 3,
Shstrndx: 2,
}
binary.Write(&buf, binary.LittleEndian, &hdr)
prog = elf.Prog32{
Type: 1,
Off: start,
Vaddr: start,
Paddr: start,
Filesz: uint32(size),
Memsz: uint32(size),
Flags: 5,
Align: start,
}
binary.Write(&buf, binary.LittleEndian, &prog)
binary.Write(&buf, binary.LittleEndian, &sect) // NULL section
sect = elf.Section32{
Name: 1,
Type: uint32(elf.SHT_PROGBITS),
Addr: start,
Off: start,
Size: uint32(size),
Flags: uint32(elf.SHF_ALLOC | elf.SHF_EXECINSTR),
Addralign: 4,
}
binary.Write(&buf, binary.LittleEndian, &sect) // .text
sect = elf.Section32{
Name: uint32(len("\x00.text\x00")),
Type: uint32(elf.SHT_STRTAB),
Addr: 0,
Off: uint32(off2 + (off3-off2)*3),
Size: uint32(len("\x00.text\x00.shstrtab\x00")),
Addralign: 1,
}
binary.Write(&buf, binary.LittleEndian, &sect)
buf.WriteString("\x00.text\x00.shstrtab\x00")
f.Write(buf.Bytes())
return nil
}
// writeELF64 writes an ELF64 header to the file,
// describing a text segment that starts at start
// and extends for size bytes.
func writeELF64(f *os.File, size int) error {
f.Seek(0, 0)
var hdr elf.Header64
var prog elf.Prog64
var sect elf.Section64
var buf bytes.Buffer
binary.Write(&buf, binary.LittleEndian, &hdr)
off1 := buf.Len()
binary.Write(&buf, binary.LittleEndian, &prog)
off2 := buf.Len()
binary.Write(&buf, binary.LittleEndian, &sect)
off3 := buf.Len()
buf.Reset()
data := byte(elf.ELFDATA2LSB)
hdr = elf.Header64{
Ident: [16]byte{0x7F, 'E', 'L', 'F', 2, data, 1},
Type: 2,
Machine: uint16(elf.EM_X86_64),
Version: 1,
Entry: start,
Phoff: uint64(off1),
Shoff: uint64(off2),
Flags: 0x05000002,
Ehsize: uint16(off1),
Phentsize: uint16(off2 - off1),
Phnum: 1,
Shentsize: uint16(off3 - off2),
Shnum: 3,
Shstrndx: 2,
}
binary.Write(&buf, binary.LittleEndian, &hdr)
prog = elf.Prog64{
Type: 1,
Off: start,
Vaddr: start,
Paddr: start,
Filesz: uint64(size),
Memsz: uint64(size),
Flags: 5,
Align: start,
}
binary.Write(&buf, binary.LittleEndian, &prog)
binary.Write(&buf, binary.LittleEndian, &sect) // NULL section
sect = elf.Section64{
Name: 1,
Type: uint32(elf.SHT_PROGBITS),
Addr: start,
Off: start,
Size: uint64(size),
Flags: uint64(elf.SHF_ALLOC | elf.SHF_EXECINSTR),
Addralign: 4,
}
binary.Write(&buf, binary.LittleEndian, &sect) // .text
sect = elf.Section64{
Name: uint32(len("\x00.text\x00")),
Type: uint32(elf.SHT_STRTAB),
Addr: 0,
Off: uint64(off2 + (off3-off2)*3),
Size: uint64(len("\x00.text\x00.shstrtab\x00")),
Addralign: 1,
}
binary.Write(&buf, binary.LittleEndian, &sect)
buf.WriteString("\x00.text\x00.shstrtab\x00")
f.Write(buf.Bytes())
return nil
}
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package x86asm
import (
"bytes"
"fmt"
"io"
"log"
"os"
"strconv"
"testing"
)
const plan9Path = "testdata/libmach8db"
func testPlan9Arch(t *testing.T, arch int, generate func(func([]byte))) {
if testing.Short() {
t.Skip("skipping libmach test in short mode")
}
if _, err := os.Stat(plan9Path); err != nil {
t.Fatal(err)
}
testExtDis(t, "plan9", arch, plan9, generate, allowedMismatchPlan9)
}
func testPlan932(t *testing.T, generate func(func([]byte))) {
testPlan9Arch(t, 32, generate)
}
func testPlan964(t *testing.T, generate func(func([]byte))) {
testPlan9Arch(t, 64, generate)
}
func plan9(ext *ExtDis) error {
flag := "-8"
if ext.Arch == 64 {
flag = "-6"
}
b, err := ext.Run(plan9Path, flag, ext.File.Name())
if err != nil {
return err
}
nmatch := 0
next := uint32(start)
var (
addr uint32
encbuf [32]byte
enc []byte
text string
)
for {
line, err := b.ReadSlice('\n')
if err != nil {
if err == io.EOF {
break
}
return fmt.Errorf("reading libmach8db output: %v", err)
}
if debug {
os.Stdout.Write(line)
}
nmatch++
addr, enc, text = parseLinePlan9(line, encbuf[:0])
if addr > next {
return fmt.Errorf("address out of sync expected <= %#x at %q in:\n%s", next, line, line)
}
if addr < next {
continue
}
if m := pcrelw.FindStringSubmatch(text); m != nil {
targ, _ := strconv.ParseUint(m[2], 16, 64)
text = fmt.Sprintf("%s .%+#x", m[1], int16(uint32(targ)-uint32(uint16(addr))-uint32(len(enc))))
}
if m := pcrel.FindStringSubmatch(text); m != nil {
targ, _ := strconv.ParseUint(m[2], 16, 64)
text = fmt.Sprintf("%s .%+#x", m[1], int32(uint32(targ)-addr-uint32(len(enc))))
}
ext.Dec <- ExtInst{addr, encbuf, len(enc), text}
encbuf = [32]byte{}
enc = nil
next += 32
}
if next != start+uint32(ext.Size) {
return fmt.Errorf("not enough results found [%d %d]", next, start+ext.Size)
}
if err := ext.Wait(); err != nil {
return fmt.Errorf("exec: %v", err)
}
return nil
}
func parseLinePlan9(line []byte, encstart []byte) (addr uint32, enc []byte, text string) {
i := bytes.IndexByte(line, ' ')
if i < 0 || line[0] != '0' || line[1] != 'x' {
log.Fatalf("cannot parse disassembly: %q", line)
}
j := bytes.IndexByte(line[i+1:], ' ')
if j < 0 {
log.Fatalf("cannot parse disassembly: %q", line)
}
j += i + 1
x, err := strconv.ParseUint(string(trimSpace(line[2:i])), 16, 32)
if err != nil {
log.Fatalf("cannot parse disassembly: %q", line)
}
addr = uint32(x)
enc, ok := parseHex(line[i+1:j], encstart)
if !ok {
log.Fatalf("cannot parse disassembly: %q", line)
}
return addr, enc, string(fixSpace(line[j+1:]))
}
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package x86asm
import (
"fmt"
"strings"
)
// Plan9Syntax returns the Go assembler syntax for the instruction.
// The syntax was originally defined by Plan 9.
// The pc is the program counter of the instruction, used for expanding
// PC-relative addresses into absolute ones.
// The symname function queries the symbol table for the program
// being disassembled. Given a target address it returns the name and base
// address of the symbol containing the target, if any; otherwise it returns "", 0.
func Plan9Syntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) string {
if symname == nil {
symname = func(uint64) (string, uint64) { return "", 0 }
}
var args []string
for i := len(inst.Args) - 1; i >= 0; i-- {
a := inst.Args[i]
if a == nil {
continue
}
args = append(args, plan9Arg(&inst, pc, symname, a))
}
var last Prefix
for _, p := range inst.Prefix {
if p == 0 || p.IsREX() {
break
}
last = p
}
prefix := ""
switch last & 0xFF {
case 0, 0x66, 0x67:
// ignore
case PrefixREPN:
prefix += "REPNE "
default:
prefix += last.String() + " "
}
op := inst.Op.String()
if plan9Suffix[inst.Op] {
switch inst.DataSize {
case 8:
op += "B"
case 16:
op += "W"
case 32:
op += "L"
case 64:
op += "Q"
}
}
if args != nil {
op += " " + strings.Join(args, ", ")
}
return prefix + op
}
func plan9Arg(inst *Inst, pc uint64, symname func(uint64) (string, uint64), arg Arg) string {
switch a := arg.(type) {
case Reg:
return plan9Reg[a]
case Rel:
if pc == 0 {
break
}
// If the absolute address is the start of a symbol, use the name.
// Otherwise use the raw address, so that things like relative
// jumps show up as JMP 0x123 instead of JMP f+10(SB).
// It is usually easier to search for 0x123 than to do the mental
// arithmetic to find f+10.
addr := pc + uint64(inst.Len) + uint64(a)
if s, base := symname(addr); s != "" && addr == base {
return fmt.Sprintf("%s(SB)", s)
}
return fmt.Sprintf("%#x", addr)
case Imm:
if s, base := symname(uint64(a)); s != "" {
suffix := ""
if uint64(a) != base {
suffix = fmt.Sprintf("%+d", uint64(a)-base)
}
return fmt.Sprintf("$%s%s(SB)", s, suffix)
}
if inst.Mode == 32 {
return fmt.Sprintf("$%#x", uint32(a))
}
if Imm(int32(a)) == a {
return fmt.Sprintf("$%#x", int64(a))
}
return fmt.Sprintf("$%#x", uint64(a))
case Mem:
if a.Segment == 0 && a.Disp != 0 && a.Base == 0 && (a.Index == 0 || a.Scale == 0) {
if s, base := symname(uint64(a.Disp)); s != "" {
suffix := ""
if uint64(a.Disp) != base {
suffix = fmt.Sprintf("%+d", uint64(a.Disp)-base)
}
return fmt.Sprintf("%s%s(SB)", s, suffix)
}
}
s := ""
if a.Segment != 0 {
s += fmt.Sprintf("%s:", plan9Reg[a.Segment])
}
if a.Disp != 0 {
s += fmt.Sprintf("%#x", a.Disp)
} else {
s += "0"
}
if a.Base != 0 {
s += fmt.Sprintf("(%s)", plan9Reg[a.Base])
}
if a.Index != 0 && a.Scale != 0 {
s += fmt.Sprintf("(%s*%d)", plan9Reg[a.Index], a.Scale)
}
return s
}
return arg.String()
}
var plan9Suffix = [maxOp + 1]bool{
ADC: true,
ADD: true,
AND: true,
BSF: true,
BSR: true,
BT: true,
BTC: true,
BTR: true,
BTS: true,
CMP: true,
CMPXCHG: true,
CVTSI2SD: true,
CVTSI2SS: true,
CVTSD2SI: true,
CVTSS2SI: true,
CVTTSD2SI: true,
CVTTSS2SI: true,
DEC: true,
DIV: true,
FLDENV: true,
FRSTOR: true,
IDIV: true,
IMUL: true,
IN: true,
INC: true,
LEA: true,
MOV: true,
MOVNTI: true,
MUL: true,
NEG: true,
NOP: true,
NOT: true,
OR: true,
OUT: true,
POP: true,
POPA: true,
PUSH: true,
PUSHA: true,
RCL: true,
RCR: true,
ROL: true,
ROR: true,
SAR: true,
SBB: true,
SHL: true,
SHLD: true,
SHR: true,
SHRD: true,
SUB: true,
TEST: true,
XADD: true,
XCHG: true,
XOR: true,
}
var plan9Reg = [...]string{
AL: "AL",
CL: "CL",
BL: "BL",
DL: "DL",
AH: "AH",
CH: "CH",
BH: "BH",
DH: "DH",
SPB: "SP",
BPB: "BP",
SIB: "SI",
DIB: "DI",
R8B: "R8",
R9B: "R9",
R10B: "R10",
R11B: "R11",
R12B: "R12",
R13B: "R13",
R14B: "R14",
R15B: "R15",
AX: "AX",
CX: "CX",
BX: "BX",
DX: "DX",
SP: "SP",
BP: "BP",
SI: "SI",
DI: "DI",
R8W: "R8",
R9W: "R9",
R10W: "R10",
R11W: "R11",
R12W: "R12",
R13W: "R13",
R14W: "R14",
R15W: "R15",
EAX: "AX",
ECX: "CX",
EDX: "DX",
EBX: "BX",
ESP: "SP",
EBP: "BP",
ESI: "SI",
EDI: "DI",
R8L: "R8",
R9L: "R9",
R10L: "R10",
R11L: "R11",
R12L: "R12",
R13L: "R13",
R14L: "R14",
R15L: "R15",
RAX: "AX",
RCX: "CX",
RDX: "DX",
RBX: "BX",
RSP: "SP",
RBP: "BP",
RSI: "SI",
RDI: "DI",
R8: "R8",
R9: "R9",
R10: "R10",
R11: "R11",
R12: "R12",
R13: "R13",
R14: "R14",
R15: "R15",
IP: "IP",
EIP: "IP",
RIP: "IP",
F0: "F0",
F1: "F1",
F2: "F2",
F3: "F3",
F4: "F4",
F5: "F5",
F6: "F6",
F7: "F7",
M0: "M0",
M1: "M1",
M2: "M2",
M3: "M3",
M4: "M4",
M5: "M5",
M6: "M6",
M7: "M7",
X0: "X0",
X1: "X1",
X2: "X2",
X3: "X3",
X4: "X4",
X5: "X5",
X6: "X6",
X7: "X7",
X8: "X8",
X9: "X9",
X10: "X10",
X11: "X11",
X12: "X12",
X13: "X13",
X14: "X14",
X15: "X15",
CS: "CS",
SS: "SS",
DS: "DS",
ES: "ES",
FS: "FS",
GS: "GS",
GDTR: "GDTR",
IDTR: "IDTR",
LDTR: "LDTR",
MSW: "MSW",
TASK: "TASK",
CR0: "CR0",
CR1: "CR1",
CR2: "CR2",
CR3: "CR3",
CR4: "CR4",
CR5: "CR5",
CR6: "CR6",
CR7: "CR7",
CR8: "CR8",
CR9: "CR9",
CR10: "CR10",
CR11: "CR11",
CR12: "CR12",
CR13: "CR13",
CR14: "CR14",
CR15: "CR15",
DR0: "DR0",
DR1: "DR1",
DR2: "DR2",
DR3: "DR3",
DR4: "DR4",
DR5: "DR5",
DR6: "DR6",
DR7: "DR7",
DR8: "DR8",
DR9: "DR9",
DR10: "DR10",
DR11: "DR11",
DR12: "DR12",
DR13: "DR13",
DR14: "DR14",
DR15: "DR15",
TR0: "TR0",
TR1: "TR1",
TR2: "TR2",
TR3: "TR3",
TR4: "TR4",
TR5: "TR5",
TR6: "TR6",
TR7: "TR7",
}
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package x86asm
import (
"strings"
"testing"
)
func TestPlan932Manual(t *testing.T) { testPlan932(t, hexCases(t, plan9ManualTests)) }
func TestPlan932Testdata(t *testing.T) { testPlan932(t, concat(basicPrefixes, testdataCases(t))) }
func TestPlan932ModRM(t *testing.T) { testPlan932(t, concat(basicPrefixes, enumModRM)) }
func TestPlan932OneByte(t *testing.T) { testBasic(t, testPlan932) }
func TestPlan9320F(t *testing.T) { testBasic(t, testPlan932, 0x0F) }
func TestPlan9320F38(t *testing.T) { testBasic(t, testPlan932, 0x0F, 0x38) }
func TestPlan9320F3A(t *testing.T) { testBasic(t, testPlan932, 0x0F, 0x3A) }
func TestPlan932Prefix(t *testing.T) { testPrefix(t, testPlan932) }
func TestPlan964Manual(t *testing.T) { testPlan964(t, hexCases(t, plan9ManualTests)) }
func TestPlan964Testdata(t *testing.T) { testPlan964(t, concat(basicPrefixes, testdataCases(t))) }
func TestPlan964ModRM(t *testing.T) { testPlan964(t, concat(basicPrefixes, enumModRM)) }
func TestPlan964OneByte(t *testing.T) { testBasic(t, testPlan964) }
func TestPlan9640F(t *testing.T) { testBasic(t, testPlan964, 0x0F) }
func TestPlan9640F38(t *testing.T) { testBasic(t, testPlan964, 0x0F, 0x38) }
func TestPlan9640F3A(t *testing.T) { testBasic(t, testPlan964, 0x0F, 0x3A) }
func TestPlan964Prefix(t *testing.T) { testPrefix(t, testPlan964) }
func TestPlan964REXTestdata(t *testing.T) {
testPlan964(t, filter(concat3(basicPrefixes, rexPrefixes, testdataCases(t)), isValidREX))
}
func TestPlan964REXModRM(t *testing.T) { testPlan964(t, concat3(basicPrefixes, rexPrefixes, enumModRM)) }
func TestPlan964REXOneByte(t *testing.T) { testBasicREX(t, testPlan964) }
func TestPlan964REX0F(t *testing.T) { testBasicREX(t, testPlan964, 0x0F) }
func TestPlan964REX0F38(t *testing.T) { testBasicREX(t, testPlan964, 0x0F, 0x38) }
func TestPlan964REX0F3A(t *testing.T) { testBasicREX(t, testPlan964, 0x0F, 0x3A) }
func TestPlan964REXPrefix(t *testing.T) { testPrefixREX(t, testPlan964) }
// plan9ManualTests holds test cases that will be run by TestPlan9Manual32 and TestPlan9Manual64.
// If you are debugging a few cases that turned up in a longer run, it can be useful
// to list them here and then use -run=Plan9Manual, particularly with tracing enabled.
var plan9ManualTests = `
`
// allowedMismatchPlan9 reports whether the mismatch between text and dec
// should be allowed by the test.
func allowedMismatchPlan9(text string, size int, inst *Inst, dec ExtInst) bool {
return false
}
// Instructions known to us but not to plan9.
var plan9Unsupported = strings.Fields(`
`)
This source diff could not be displayed because it is too large. You can view the blob instead.
libmach8db: libmach8db.c
9c libmach8db.c && 9l -o libmach8db libmach8db.o; rm libmach8db.o
newdecode.txt:
cd ..; go test -cover -run 'Objdump.*32' -v -timeout 10h -printtests 2>&1 | tee log
cd ..; go test -cover -run 'Objdump.*64' -v -timeout 10h -printtests 2>&1 | tee -a log
cd ..; go test -cover -run 'Xed.*32' -v -timeout 10h -printtests 2>&1 | tee -a log
cd ..; go test -cover -run 'Xed.*64' -v -timeout 10h -printtests 2>&1 | tee -a log
cd ..; go test -cover -run 'Plan9.*32' -v -timeout 10h -printtests 2>&1 | tee -a log
cd ..; go test -cover -run 'Plan9.*64' -v -timeout 10h -printtests 2>&1 | tee -a log
egrep ' (gnu|intel|plan9) ' ../log |sort >newdecode.txt
This source diff could not be displayed because it is too large. You can view the blob instead.
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package x86asm
import (
"bytes"
"strings"
"testing"
)
func TestXed32Manual(t *testing.T) { testXed32(t, hexCases(t, xedManualTests)) }
func TestXed32Testdata(t *testing.T) { testXed32(t, concat(basicPrefixes, testdataCases(t))) }
func TestXed32ModRM(t *testing.T) { testXed32(t, concat(basicPrefixes, enumModRM)) }
func TestXed32OneByte(t *testing.T) { testBasic(t, testXed32) }
func TestXed320F(t *testing.T) { testBasic(t, testXed32, 0x0F) }
func TestXed320F38(t *testing.T) { testBasic(t, testXed32, 0x0F, 0x38) }
func TestXed320F3A(t *testing.T) { testBasic(t, testXed32, 0x0F, 0x3A) }
func TestXed32Prefix(t *testing.T) { testPrefix(t, testXed32) }
func TestXed64Manual(t *testing.T) { testXed64(t, hexCases(t, xedManualTests)) }
func TestXed64Testdata(t *testing.T) { testXed64(t, concat(basicPrefixes, testdataCases(t))) }
func TestXed64ModRM(t *testing.T) { testXed64(t, concat(basicPrefixes, enumModRM)) }
func TestXed64OneByte(t *testing.T) { testBasic(t, testXed64) }
func TestXed640F(t *testing.T) { testBasic(t, testXed64, 0x0F) }
func TestXed640F38(t *testing.T) { testBasic(t, testXed64, 0x0F, 0x38) }
func TestXed640F3A(t *testing.T) { testBasic(t, testXed64, 0x0F, 0x3A) }
func TestXed64Prefix(t *testing.T) { testPrefix(t, testXed64) }
func TestXed64REXTestdata(t *testing.T) {
testXed64(t, filter(concat3(basicPrefixes, rexPrefixes, testdataCases(t)), isValidREX))
}
func TestXed64REXModRM(t *testing.T) { testXed64(t, concat3(basicPrefixes, rexPrefixes, enumModRM)) }
func TestXed64REXOneByte(t *testing.T) { testBasicREX(t, testXed64) }
func TestXed64REX0F(t *testing.T) { testBasicREX(t, testXed64, 0x0F) }
func TestXed64REX0F38(t *testing.T) { testBasicREX(t, testXed64, 0x0F, 0x38) }
func TestXed64REX0F3A(t *testing.T) { testBasicREX(t, testXed64, 0x0F, 0x3A) }
func TestXed64REXPrefix(t *testing.T) { testPrefixREX(t, testXed64) }
// xedManualTests holds test cases that will be run by TestXedManual32 and TestXedManual64.
// If you are debugging a few cases that turned up in a longer run, it can be useful
// to list them here and then use -run=XedManual, particularly with tracing enabled.
var xedManualTests = `
6690
`
// allowedMismatchXed reports whether the mismatch between text and dec
// should be allowed by the test.
func allowedMismatchXed(text string, size int, inst *Inst, dec ExtInst) bool {
if (contains(text, "error:") || isPrefix(text) && size == 1) && contains(dec.text, "GENERAL_ERROR", "INSTR_TOO_LONG", "BAD_LOCK_PREFIX") {
return true
}
if contains(dec.text, "BAD_LOCK_PREFIX") && countExactPrefix(inst, PrefixLOCK|PrefixInvalid) > 0 {
return true
}
if contains(dec.text, "BAD_LOCK_PREFIX", "GENERAL_ERROR") && countExactPrefix(inst, PrefixLOCK|PrefixImplicit) > 0 {
return true
}
if text == "lock" && size == 1 && contains(dec.text, "BAD_LOCK_PREFIX") {
return true
}
// Instructions not known to us.
if (contains(text, "error:") || isPrefix(text) && size == 1) && contains(dec.text, unsupported...) {
return true
}
// Instructions not known to xed.
if contains(text, xedUnsupported...) && contains(dec.text, "ERROR") {
return true
}
if (contains(text, "error:") || isPrefix(text) && size == 1) && contains(dec.text, "shl ") && (inst.Opcode>>16)&0xEC38 == 0xC030 {
return true
}
// 82 11 22: xed says 'adc byte ptr [ecx], 0x22' but there is no justification in the manuals for that.
// C0 30 11: xed says 'shl byte ptr [eax], 0x11' but there is no justification in the manuals for that.
// F6 08 11: xed says 'test byte ptr [eax], 0x11' but there is no justification in the manuals for that.
if (contains(text, "error:") || isPrefix(text) && size == 1) && hasByte(dec.enc[:dec.nenc], 0x82, 0xC0, 0xC1, 0xD0, 0xD1, 0xD2, 0xD3, 0xF6, 0xF7) {
return true
}
// F3 11 22 and many others: xed allows and drops misused rep/repn prefix.
if (text == "rep" && dec.enc[0] == 0xF3 || (text == "repn" || text == "repne") && dec.enc[0] == 0xF2) && (!contains(dec.text, "ins", "outs", "movs", "lods", "cmps", "scas") || contains(dec.text, "xmm")) {
return true
}
// 0F C7 30: xed says vmptrld qword ptr [eax]; we say rdrand eax.
// TODO(rsc): Fix, since we are probably wrong, but we don't have vmptrld in the manual.
if contains(text, "rdrand") && contains(dec.text, "vmptrld", "vmxon", "vmclear") {
return true
}
// F3 0F AE 00: we say 'rdfsbase dword ptr [eax]' but RDFSBASE needs a register.
// Also, this is a 64-bit only instruction.
// TODO(rsc): Fix to reject this encoding.
if contains(text, "rdfsbase", "rdgsbase", "wrfsbase", "wrgsbase") && contains(dec.text, "ERROR") {
return true
}
// 0F 01 F8: we say swapgs but that's only valid in 64-bit mode.
// TODO(rsc): Fix.
if contains(text, "swapgs") {
return true
}
// 0F 24 11: 'mov ecx, tr2' except there is no TR2.
// Or maybe the MOV to TR registers doesn't use RMF.
if contains(text, "cr1", "cr5", "cr6", "cr7", "tr0", "tr1", "tr2", "tr3", "tr4", "tr5", "tr6", "tr7") && contains(dec.text, "ERROR") {
return true
}
// 0F 19 11, 0F 1C 11, 0F 1D 11, 0F 1E 11, 0F 1F 11: xed says nop,
// but the Intel manuals say that the only NOP there is 0F 1F /0.
// Perhaps xed is reporting an older encoding.
if (contains(text, "error:") || isPrefix(text) && size == 1) && contains(dec.text, "nop ") && (inst.Opcode>>8)&0xFFFF38 != 0x0F1F00 {
return true
}
// 66 0F AE 38: clflushopt but we only know clflush
if contains(text, "clflush") && contains(dec.text, "clflushopt") {
return true
}
// 0F 20 04 11: MOV SP, CR0 but has mod!=3 despite register argument.
// (This encoding ignores the mod bits.) The decoder sees the non-register
// mod and reads farther ahead to decode the memory reference that
// isn't really there, causing the size to be too large.
// TODO(rsc): Fix.
if text == dec.text && size > dec.nenc && contains(text, " cr", " dr", " tr") {
return true
}
// 0F AE E9: xed says lfence, which is wrong (only 0F AE E8 is lfence). And so on.
if contains(dec.text, "fence") && hasByte(dec.enc[:dec.nenc], 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF) {
return true
}
// DD C9, DF C9: xed says 'fxch st0, st1' but that instruction is D9 C9.
if (contains(text, "error:") || isPrefix(text) && size == 1) && contains(dec.text, "fxch ") && hasByte(dec.enc[:dec.nenc], 0xDD, 0xDF) {
return true
}
// DC D4: xed says 'fcom st0, st4' but that instruction is D8 D4.
if (contains(text, "error:") || isPrefix(text) && size == 1) && contains(dec.text, "fcom ") && hasByte(dec.enc[:dec.nenc], 0xD8, 0xDC) {
return true
}
// DE D4: xed says 'fcomp st0, st4' but that instruction is D8 D4.
if (contains(text, "error:") || isPrefix(text) && size == 1) && contains(dec.text, "fcomp ") && hasByte(dec.enc[:dec.nenc], 0xDC, 0xDE) {
return true
}
// DF D4: xed says 'fstp st4, st0' but that instruction is DD D4.
if (contains(text, "error:") || isPrefix(text) && size == 1) && contains(dec.text, "fstp ") && hasByte(dec.enc[:dec.nenc], 0xDF) {
return true
}
return false
}
func countExactPrefix(inst *Inst, target Prefix) int {
n := 0
for _, p := range inst.Prefix {
if p == target {
n++
}
}
return n
}
func hasByte(src []byte, target ...byte) bool {
for _, b := range target {
if bytes.IndexByte(src, b) >= 0 {
return true
}
}
return false
}
// Instructions known to us but not to xed.
var xedUnsupported = strings.Fields(`
xrstor
xsave
xsave
ud1
xgetbv
xsetbv
fxsave
fxrstor
clflush
lfence
mfence
sfence
rsqrtps
rcpps
emms
ldmxcsr
stmxcsr
movhpd
movnti
rdrand
movbe
movlpd
sysret
`)
package x86asm
import (
"bytes"
"fmt"
"io"
"log"
"os"
"strconv"
"strings"
"testing"
)
// xed binary from Intel sde-external-6.22.0-2014-03-06.
const xedPath = "/Users/rsc/bin/xed"
func testXedArch(t *testing.T, arch int, generate func(func([]byte))) {
if testing.Short() {
t.Skip("skipping libmach test in short mode")
}
if _, err := os.Stat(xedPath); err != nil {
t.Fatal(err)
}
testExtDis(t, "intel", arch, xed, generate, allowedMismatchXed)
}
func testXed32(t *testing.T, generate func(func([]byte))) {
testXedArch(t, 32, generate)
}
func testXed64(t *testing.T, generate func(func([]byte))) {
testXedArch(t, 64, generate)
}
func xed(ext *ExtDis) error {
b, err := ext.Run(xedPath, fmt.Sprintf("-%d", ext.Arch), "-n", "1G", "-ir", ext.File.Name())
if err != nil {
return err
}
nmatch := 0
next := uint32(start)
var (
addr uint32
encbuf [32]byte
enc []byte
text string
)
var xedEnd = []byte("# end of text section")
var xedEnd1 = []byte("# Errors")
eof := false
for {
line, err := b.ReadSlice('\n')
if err != nil {
if err == io.EOF {
break
}
return fmt.Errorf("reading objdump output: %v", err)
}
if debug {
os.Stdout.Write(line)
}
if bytes.HasPrefix(line, xedEnd) || bytes.HasPrefix(line, xedEnd1) {
eof = true
}
if eof {
continue
}
nmatch++
addr, enc, text = parseLineXed(line, encbuf[:0])
if addr > next {
return fmt.Errorf("address out of sync expected <= %#x at %q in:\n%s", next, line, line)
}
if addr < next {
continue
}
switch text {
case "repz":
text = "rep"
case "repnz":
text = "repn"
default:
text = strings.Replace(text, "repz ", "rep ", -1)
text = strings.Replace(text, "repnz ", "repn ", -1)
}
if m := pcrelw.FindStringSubmatch(text); m != nil {
targ, _ := strconv.ParseUint(m[2], 16, 64)
text = fmt.Sprintf("%s .%+#x", m[1], int16(uint32(targ)-uint32(uint16(addr))-uint32(len(enc))))
}
if m := pcrel.FindStringSubmatch(text); m != nil {
targ, _ := strconv.ParseUint(m[2], 16, 64)
text = fmt.Sprintf("%s .%+#x", m[1], int32(uint32(targ)-addr-uint32(len(enc))))
}
ext.Dec <- ExtInst{addr, encbuf, len(enc), text}
encbuf = [32]byte{}
enc = nil
next += 32
}
if next != start+uint32(ext.Size) {
return fmt.Errorf("not enough results found [%d %d]", next, start+ext.Size)
}
if err := ext.Wait(); err != nil {
return fmt.Errorf("exec: %v", err)
}
return nil
}
var (
xedInRaw = []byte("In raw...")
xedDots = []byte("...")
xdis = []byte("XDIS ")
xedError = []byte("ERROR: ")
xedNoDecode = []byte("Could not decode at offset: 0x")
)
func parseLineXed(line []byte, encstart []byte) (addr uint32, enc []byte, text string) {
oline := line
if bytes.HasPrefix(line, xedInRaw) || bytes.HasPrefix(line, xedDots) {
return 0, nil, ""
}
if bytes.HasPrefix(line, xedError) {
i := bytes.IndexByte(line[len(xedError):], ' ')
if i < 0 {
log.Fatalf("cannot parse error: %q", oline)
}
errstr := string(line[len(xedError):])
i = bytes.Index(line, xedNoDecode)
if i < 0 {
log.Fatalf("cannot parse error: %q", oline)
}
i += len(xedNoDecode)
j := bytes.IndexByte(line[i:], ' ')
if j < 0 {
log.Fatalf("cannot parse error: %q", oline)
}
x, err := strconv.ParseUint(string(trimSpace(line[i:i+j])), 16, 32)
if err != nil {
log.Fatalf("cannot parse disassembly: %q", oline)
}
addr = uint32(x)
return addr, nil, errstr
}
if !bytes.HasPrefix(line, xdis) {
log.Fatalf("cannot parse disassembly: %q", oline)
}
i := bytes.IndexByte(line, ':')
if i < 0 {
log.Fatalf("cannot parse disassembly: %q", oline)
}
x, err := strconv.ParseUint(string(trimSpace(line[len(xdis):i])), 16, 32)
if err != nil {
log.Fatalf("cannot parse disassembly: %q", oline)
}
addr = uint32(x)
// spaces
i++
for i < len(line) && line[i] == ' ' {
i++
}
// instruction class, spaces
for i < len(line) && line[i] != ' ' {
i++
}
for i < len(line) && line[i] == ' ' {
i++
}
// instruction set, spaces
for i < len(line) && line[i] != ' ' {
i++
}
for i < len(line) && line[i] == ' ' {
i++
}
// hex
hexStart := i
for i < len(line) && line[i] != ' ' {
i++
}
hexEnd := i
for i < len(line) && line[i] == ' ' {
i++
}
// text
textStart := i
for i < len(line) && line[i] != '\n' {
i++
}
textEnd := i
enc, ok := parseHex(line[hexStart:hexEnd], encstart)
if !ok {
log.Fatalf("cannot parse disassembly: %q", oline)
}
return addr, enc, string(fixSpace(line[textStart:textEnd]))
}
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -50,6 +50,9 @@ import (
"strconv"
"strings"
"text/tabwriter"
"cmd/internal/rsc.io/arm/armasm"
"cmd/internal/rsc.io/x86/x86asm"
)
var symregexp = flag.String("s", "", "only dump symbols matching this regexp")
......@@ -199,14 +202,14 @@ func disasm_amd64(code []byte, pc uint64, lookup lookupFunc) (string, int) {
}
func disasm_x86(code []byte, pc uint64, lookup lookupFunc, arch int) (string, int) {
inst, err := x86_Decode(code, 64)
inst, err := x86asm.Decode(code, 64)
var text string
size := inst.Len
if err != nil || size == 0 || inst.Op == 0 {
size = 1
text = "?"
} else {
text = x86_plan9Syntax(inst, pc, lookup)
text = x86asm.Plan9Syntax(inst, pc, lookup)
}
return text, size
}
......@@ -232,14 +235,14 @@ func (r textReader) ReadAt(data []byte, off int64) (n int, err error) {
}
func disasm_arm(code []byte, pc uint64, lookup lookupFunc) (string, int) {
inst, err := arm_Decode(code, arm_ModeARM)
inst, err := armasm.Decode(code, armasm.ModeARM)
var text string
size := inst.Len
if err != nil || size == 0 || inst.Op == 0 {
size = 4
text = "?"
} else {
text = arm_plan9Syntax(inst, pc, lookup, textReader{code, pc})
text = armasm.Plan9Syntax(inst, pc, lookup, textReader{code, pc})
}
return text, size
}
......
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment