whatcanGOwrong

This commit is contained in:
2024-09-19 21:38:24 -04:00
commit d0ae4d841d
17908 changed files with 4096831 additions and 0 deletions
@@ -0,0 +1,281 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package xeddata
import (
"bytes"
"errors"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"regexp"
"strings"
)
// Types for XED enum-like constants.
type (
// OperandSizeMode describes operand size mode (66H prefix).
OperandSizeMode int
// AddressSizeMode describes address size mode (67H prefix).
AddressSizeMode int
// CPUMode describes availability in certain CPU mode.
CPUMode int
)
// Possible operand size modes. XED calls it OSZ.
const (
OpSize16 OperandSizeMode = iota
OpSize32
OpSize64
)
// Possible address size modes. XED calls it ASZ.
const (
AddrSize16 AddressSizeMode = iota
AddrSize32
AddrSize64
)
// Possible CPU modes. XED calls it MODE.
const (
Mode16 CPUMode = iota
Mode32
Mode64
)
var sizeStrings = [...]string{"16", "32", "64"}
// sizeString maps size enumeration value to it's string representation.
func sizeString(size int) string {
// Panic more gracefully than with "index out of range".
// If client code specified invalid size enumeration,
// this is programming error that should be fixed, not "handled".
if size >= len(sizeStrings) {
panic(fmt.Sprintf("illegal size value: %d", size))
}
return sizeStrings[size]
}
// String returns osz bit size string. Panics on illegal enumerations.
func (osz OperandSizeMode) String() string { return sizeString(int(osz)) }
// String returns asz bit size string. Panics on illegal enumerations.
func (asz AddressSizeMode) String() string { return sizeString(int(asz)) }
// Database holds information that is required to
// properly handle XED datafiles.
type Database struct {
widths map[string]*width // all-widths.txt
states map[string]string // all-state.txt
xtypes map[string]*xtype // all-element-types.txt
}
// width is a "all-width.txt" record.
type width struct {
// Default xtype name (examples: int, i8, f32).
xtype string
// 16, 32 and 64 bit sizes (all may have same value).
sizes [3]string
}
// xtype is a "all-element-type.txt" record.
type xtype struct {
// Name is xtype identifier.
name string
// baseType specifies xtype base type.
// See "all-element-type-base.txt".
baseType string
// Size is an operand data size in bits.
size string
}
// NewDatabase returns Database that loads everything
// it can find in xedPath.
// Missing lookup file is not an error, but error during
// parsing of found file is.
//
// Lookup:
//
// "$xedPath/all-state.txt" => db.LoadStates()
// "$xedPath/all-widths.txt" => db.LoadWidths()
// "$xedPath/all-element-types.txt" => db.LoadXtypes()
//
// $xedPath is the interpolated value of function argument.
//
// The call NewDatabase("") is valid and returns empty database.
// Load methods can be used to read lookup files one-by-one.
func NewDatabase(xedPath string) (*Database, error) {
var db Database
stat, err := os.Stat(xedPath)
if err != nil {
return nil, err
}
if !stat.IsDir() {
return nil, errors.New("xedPath is not directory")
}
states, err := os.Open(filepath.Join(xedPath, "all-state.txt"))
if err == nil {
err = db.LoadStates(states)
if err != nil {
return &db, err
}
}
widths, err := os.Open(filepath.Join(xedPath, "all-widths.txt"))
if err == nil {
err = db.LoadWidths(widths)
if err != nil {
return &db, err
}
}
xtypes, err := os.Open(filepath.Join(xedPath, "all-element-types.txt"))
if err == nil {
err = db.LoadXtypes(xtypes)
if err != nil {
return &db, err
}
}
return &db, nil
}
// LoadWidths reads XED widths definitions from r and updates db.
// "widths" are 16/32/64 bit mode type sizes.
// See "$XED/obj/dgen/all-widths.txt".
func (db *Database) LoadWidths(r io.Reader) error {
var err error
db.widths, err = parseWidths(r)
return err
}
// LoadStates reads XED states definitions from r and updates db.
// "states" are simple macro substitutions without parameters.
// See "$XED/obj/dgen/all-state.txt".
func (db *Database) LoadStates(r io.Reader) error {
var err error
db.states, err = parseStates(r)
return err
}
// LoadXtypes reads XED xtypes definitions from r and updates db.
// "xtypes" are low-level XED type names.
// See "$XED/obj/dgen/all-element-types.txt".
// See "$XED/obj/dgen/all-element-type-base.txt".
func (db *Database) LoadXtypes(r io.Reader) error {
var err error
db.xtypes, err = parseXtypes(r)
return err
}
// WidthSize translates width string to size string using desired
// SizeMode m. For some widths output is the same for any valid value of m.
func (db *Database) WidthSize(width string, m OperandSizeMode) string {
info := db.widths[width]
if info == nil {
return ""
}
return info.sizes[m]
}
func parseWidths(r io.Reader) (map[string]*width, error) {
data, err := ioutil.ReadAll(r)
if err != nil {
return nil, fmt.Errorf("parse widths: %v", err)
}
// Lines have two forms:
// 1. name xtype size [# comment]
// 2. name xtype size16, size32, size64 [# comment]
reLine := regexp.MustCompile(`(^\s*\w+\s+\w+\s+\w+\s+\w+\s+\w+)|(^\s*\w+\s+\w+\s+\w+)`)
widths := make(map[string]*width, 128)
for _, l := range bytes.Split(data, []byte("\n")) {
var name, xtype, size16, size32, size64 string
if m := reLine.FindSubmatch(l); m != nil {
var f [][]byte
if m[1] != nil {
f = bytes.Fields(m[1])
} else {
f = bytes.Fields(m[2])
}
name = string(f[0])
xtype = string(f[1])
if len(f) > 3 {
size16 = string(f[2])
size32 = string(f[3])
size64 = string(f[4])
} else {
size16 = string(f[2])
size32 = size16
size64 = size16
}
}
if name != "" {
widths[name] = &width{
xtype: xtype,
sizes: [3]string{size16, size32, size64},
}
}
}
return widths, nil
}
func parseStates(r io.Reader) (map[string]string, error) {
data, err := ioutil.ReadAll(r)
if err != nil {
return nil, fmt.Errorf("parse states: %v", err)
}
// Lines have form of "name ...replacements [# comment]".
// This regexp captures the name and everything until line end or comment.
lineRE := regexp.MustCompile(`^\s*(\w+)\s+([^#]+)`)
states := make(map[string]string, 128)
for _, l := range strings.Split(string(data), "\n") {
if m := lineRE.FindStringSubmatch(l); m != nil {
name, replacements := m[1], m[2]
states[name] = strings.TrimSpace(replacements)
}
}
return states, nil
}
func parseXtypes(r io.Reader) (map[string]*xtype, error) {
data, err := ioutil.ReadAll(r)
if err != nil {
return nil, fmt.Errorf("parse xtypes: %v", err)
}
// Lines have form of "name baseType size [# comment]".
lineRE := regexp.MustCompile(`^\s*(\w+)\s+(\w+)\s*(\d+)`)
xtypes := make(map[string]*xtype)
for _, l := range strings.Split(string(data), "\n") {
if m := lineRE.FindStringSubmatch(l); m != nil {
name, baseType, size := m[1], m[2], m[3]
xtypes[name] = &xtype{
name: name,
baseType: baseType,
size: size,
}
}
}
return xtypes, nil
}
@@ -0,0 +1,50 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package xeddata provides utilities to work with XED datafiles.
//
// Main features:
// - Fundamental XED enumerations (CPU modes, operand sizes, ...)
// - XED objects and their components
// - XED datafiles reader (see below)
// - Utility functions like ExpandStates
//
// The amount of file formats that is understood is a minimal
// set required to generate x86.csv from XED tables:
// - states - simple macro substitutions used in patterns
// - widths - mappings from width names to their size
// - element-types - XED xtype information
// - objects - XED objects that constitute "the tables"
//
// Collectively, those files are called "datafiles".
//
// Terminology is borrowed from XED itself,
// where appropriate, x86csv names are provided
// as an alternative.
//
// "$XED/foo/bar.txt" notation is used to specify a path to "foo/bar.txt"
// file under local XED source repository folder.
//
// The default usage scheme:
// 1. Open "XED database" to load required metadata.
// 2. Read XED file with objects definitions.
// 3. Operate on XED objects.
//
// See example_test.go for complete examples.
//
// It is required to build Intel XED before attempting to use
// its datafiles, as this package expects "all" versions that
// are a concatenated final versions of datafiles.
// If "$XED/obj/dgen/" does not contain relevant files,
// then either this documentation is stale or your XED is not built.
//
// To see examples of "XED objects" see "testdata/xed_objects.txt".
//
// Intel XED https://github.com/intelxed/xed provides all documentation
// that can be required to understand datafiles.
// The "$XED/misc/engineering-notes.txt" is particularly useful.
// For convenience, the most important notes are spread across package comments.
//
// Tested with XED 088c48a2efa447872945168272bcd7005a7ddd91.
package xeddata
@@ -0,0 +1,180 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package xeddata_test
import (
"fmt"
"log"
"strings"
"golang.org/x/arch/x86/xeddata"
)
// The "testdata/xedpath" directory contains XED metadata files
// that are supposed to be used for Database initialization.
// Note that XED objects in this file are not real,
// instructions they describe are fictional.
// This example shows how to print raw XED objects using Reader.
// Objects are called "raw" because some of their fields may
// require additional transformations like macro (states) expansion.
func ExampleReader() {
const xedPath = "testdata/xedpath"
input := strings.NewReader(`
{
ICLASS: VEXADD
EXCEPTIONS: avx-type-zero
CPL: 2000
CATEGORY: AVX-Q
EXTENSION: AVX-Q
ATTRIBUTES: A B C
PATTERN: VV1 0x07 VL128 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()
OPERANDS: REG0=XMM_R():w:width_dq:fword64 REG1=XMM_N():r:width_dq:fword64 MEM0:r:width_dq:fword64
}
{
ICLASS: COND_MOV_Z
CPL: 210
CATEGORY: MOV_IF_COND_MET
EXTENSION: BASE
ISA_SET: COND_MOV
FLAGS: READONLY [ zf-tst ]
PATTERN: 0x0F 0x4F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()
OPERANDS: REG0=GPRv_R():cw MEM0:r:width_v
PATTERN: 0x0F 0x4F MOD[0b11] MOD=3 REG[rrr] RM[nnn]
OPERANDS: REG0=GPRv_R():cw REG1=GPRv_B():r
}`)
objects, err := xeddata.NewReader(input).ReadAll()
if err != nil {
log.Fatal(err)
}
for _, o := range objects {
fmt.Printf("%s (%s):\n", o.Opcode(), o.Extension)
for _, inst := range o.Insts {
fmt.Printf("\t[%d] %s\n", inst.Index, inst.Operands)
}
}
//Output:
// VEXADD (AVX-Q):
// [0] REG0=XMM_R():w:width_dq:fword64 REG1=XMM_N():r:width_dq:fword64 MEM0:r:width_dq:fword64
// COND_MOV_Z (BASE):
// [0] REG0=GPRv_R():cw MEM0:r:width_v
// [1] REG0=GPRv_R():cw REG1=GPRv_B():r
}
// This example shows how to use ExpandStates and its effects.
func ExampleExpandStates() {
const xedPath = "testdata/xedpath"
input := strings.NewReader(`
{
ICLASS: VEXADD
CPL: 3
CATEGORY: ?
EXTENSION: ?
ATTRIBUTES: AT_A AT_B
PATTERN: _M_VV_TRUE 0x58 _M_VEX_P_66 _M_VLEN_128 _M_MAP_0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()
OPERANDS: REG0=XMM_R():w:width_dq:fword64 REG1=XMM_N():r:width_dq:fword64 MEM0:r:width_dq:fword64
PATTERN: _M_VV_TRUE 0x58 _M_VEX_P_66 _M_VLEN_128 _M_MAP_0F MOD[0b11] MOD=3 REG[rrr] RM[nnn]
OPERANDS: REG0=XMM_R():w:width_dq:fword64 REG1=XMM_N():r:width_dq:fword64 REG2=XMM_B():r:width_dq:fword64
PATTERN: _M_VV_TRUE 0x58 _M_VEX_P_66 _M_VLEN_256 _M_MAP_0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()
OPERANDS: REG0=YMM_R():w:qq:fword64 REG1=YMM_N():r:qq:fword64 MEM0:r:qq:fword64
PATTERN: _M_VV_TRUE 0x58 _M_VEX_P_66 _M_VLEN_256 _M_MAP_0F MOD[0b11] MOD=3 REG[rrr] RM[nnn]
OPERANDS: REG0=YMM_R():w:qq:fword64 REG1=YMM_N():r:qq:fword64 REG2=YMM_B():r:qq:fword64
}`)
objects, err := xeddata.NewReader(input).ReadAll()
if err != nil {
log.Fatal(err)
}
db, err := xeddata.NewDatabase(xedPath)
if err != nil {
log.Fatal(err)
}
for _, o := range objects {
for _, inst := range o.Insts {
fmt.Printf("old: %q\n", inst.Pattern)
fmt.Printf("new: %q\n", xeddata.ExpandStates(db, inst.Pattern))
}
}
//Output:
// old: "_M_VV_TRUE 0x58 _M_VEX_P_66 _M_VLEN_128 _M_MAP_0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()"
// new: "VEXVALID=1 0x58 VEX_PREFIX=1 VL=0 MAP=1 MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()"
// old: "_M_VV_TRUE 0x58 _M_VEX_P_66 _M_VLEN_128 _M_MAP_0F MOD[0b11] MOD=3 REG[rrr] RM[nnn]"
// new: "VEXVALID=1 0x58 VEX_PREFIX=1 VL=0 MAP=1 MOD[0b11] MOD=3 REG[rrr] RM[nnn]"
// old: "_M_VV_TRUE 0x58 _M_VEX_P_66 _M_VLEN_256 _M_MAP_0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()"
// new: "VEXVALID=1 0x58 VEX_PREFIX=1 VL=1 MAP=1 MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()"
// old: "_M_VV_TRUE 0x58 _M_VEX_P_66 _M_VLEN_256 _M_MAP_0F MOD[0b11] MOD=3 REG[rrr] RM[nnn]"
// new: "VEXVALID=1 0x58 VEX_PREFIX=1 VL=1 MAP=1 MOD[0b11] MOD=3 REG[rrr] RM[nnn]"
}
// This example shows how to handle Inst "OPERANDS" field.
func ExampleOperand() {
const xedPath = "testdata/xedpath"
input := strings.NewReader(`
{
ICLASS: ADD_N_TIMES # Like IMUL
CPL: 3
CATEGORY: BINARY
EXTENSION: BASE
ISA_SET: I86
FLAGS: MUST [ of-mod sf-u zf-u af-u pf-u cf-mod ]
PATTERN: 0xAA MOD[mm] MOD!=3 REG[0b101] RM[nnn] MODRM()
OPERANDS: MEM0:r:width_v REG0=AX:rw:SUPP REG1=DX:w:SUPP
}`)
objects, err := xeddata.NewReader(input).ReadAll()
if err != nil {
log.Fatal(err)
}
db, err := xeddata.NewDatabase(xedPath)
if err != nil {
log.Fatal(err)
}
inst := objects[0].Insts[0] // Single instruction is enough for this example
for i, rawOperand := range strings.Fields(inst.Operands) {
operand, err := xeddata.NewOperand(db, rawOperand)
if err != nil {
log.Fatalf("parse operand #%d: %+v", i, err)
}
visibility := "implicit"
if operand.IsVisible() {
visibility = "explicit"
}
fmt.Printf("(%s) %s:\n", visibility, rawOperand)
fmt.Printf("\tname: %q\n", operand.Name)
if operand.IsVisible() {
fmt.Printf("\t32/64bit width: %s/%s bytes\n",
db.WidthSize(operand.Width, xeddata.OpSize32),
db.WidthSize(operand.Width, xeddata.OpSize64))
}
}
//Output:
// (explicit) MEM0:r:width_v:
// name: "MEM0"
// 32/64bit width: 4/8 bytes
// (implicit) REG0=AX:rw:SUPP:
// name: "REG0=AX"
// (implicit) REG1=DX:w:SUPP:
// name: "REG1=DX"
}
@@ -0,0 +1,261 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package xeddata
import (
"encoding/json"
"strings"
)
// An Object is a single "dec/enc-instruction" XED object from datafiles.
//
// Field names and their comments are borrowed from Intel XED
// engineering notes (see "$XED/misc/engineering-notes.txt").
//
// Field values are always trimmed (i.e. no leading/trailing whitespace).
//
// Missing optional members are expressed with an empty string.
//
// Object contains multiple Inst elements that represent concrete
// instruction with encoding pattern and operands description.
type Object struct {
// Iclass is instruction class name (opcode).
// Iclass alone is not enough to uniquely identify machine instructions.
// Example: "PSRLW".
Iclass string
// Disasm is substituted name when a simple conversion
// from iclass is inappropriate.
// Never combined with DisasmIntel or DisasmATTSV.
// Example: "syscall".
//
// Optional.
Disasm string
// DisasmIntel is like Disasm, but with Intel syntax.
// If present, usually comes with DisasmATTSV.
// Example: "jmp far".
//
// Optional.
DisasmIntel string
// DisasmATTSV is like Disasm, but with AT&T/SysV syntax.
// If present, usually comes with DisasmIntel.
// Example: "ljmp".
//
// Optional.
DisasmATTSV string
// Attributes describes name set for bits in the binary attributes field.
// Example: "NOP X87_CONTROL NOTSX".
//
// Optional. If not present, zero attribute set is implied.
Attributes string
// Uname is unique name used for deleting / replacing instructions.
//
// Optional. Provided for completeness, mostly useful for XED internal usage.
Uname string
// CPL is instruction current privilege level restriction.
// Can have value of "0" or "3".
CPL string
// Category is an ad-hoc categorization of instructions.
// Example: "SEMAPHORE".
Category string
// Extension is an ad-hoc grouping of instructions.
// If no ISASet is specified, this is used instead.
// Example: "3DNOW"
Extension string
// Exceptions is an exception set name.
// Example: "SSE_TYPE_7".
//
// Optional. Empty exception category generally means that
// instruction generates no exceptions.
Exceptions string
// ISASet is a name for the group of instructions that
// introduced this feature.
// Example: "I286PROTECTED".
//
// Older objects only defined Extension field.
// Newer objects may contain both Extension and ISASet fields.
// For some objects Extension==ISASet.
// Both fields are required to do precise CPUID-like decisions.
//
// Optional.
ISASet string
// Flags describes read/written flag bit values.
// Example: "MUST [ of-u sf-u af-u pf-u cf-mod ]".
//
// Optional. If not present, no flags are neither read nor written.
Flags string
// A hopefully useful comment.
//
// Optional.
Comment string
// The object revision.
//
// Optional.
Version string
// RealOpcode marks unstable (not in SDM yet) instructions with "N".
// Normally, always "Y" or not present at all.
//
// Optional.
RealOpcode string
// Insts are concrete instruction templates that are derived from containing Object.
// Inst contains fields PATTERN, OPERANDS, IFORM in enc/dec instruction.
Insts []*Inst
}
// Inst represents a single instruction template.
//
// Some templates contain expandable (macro) pattern and operands
// which tells that there are more than one real instructions
// that are expressed by the template.
type Inst struct {
// Object that contains properties that are shared with multiple
// Inst objects.
*Object
// Index is the position inside XED object.
// Object.Insts[Index] returns this inst.
Index int
// Pattern is the sequence of bits and nonterminals used to
// decode/encode an instruction.
// Example: "0x0F 0x28 no_refining_prefix MOD[0b11] MOD=3 REG[rrr] RM[nnn]".
Pattern string
// Operands are instruction arguments, typicall registers,
// memory operands and pseudo-resources. Separated by space.
// Example: "MEM0:rcw:b REG0=GPR8_R():r REG1=XED_REG_AL:rcw:SUPP".
Operands string
// Iform is a name for the pattern that starts with the
// iclass and bakes in the operands. If omitted, XED
// tries to generate one. We often add custom suffixes
// to these to disambiguate certain combinations.
// Example: "MOVAPS_XMMps_XMMps_0F28".
//
// Optional.
Iform string
}
// Opcode returns instruction name or empty string,
// if appropriate Object fields are not initialized.
func (o *Object) Opcode() string {
switch {
case o.Iclass != "":
return o.Iclass
case o.Disasm != "":
return o.Disasm
case o.DisasmIntel != "":
return o.DisasmIntel
case o.DisasmATTSV != "":
return o.DisasmATTSV
case o.Uname != "":
return o.Uname
}
return ""
}
// HasAttribute checks that o has attribute with specified name.
// Note that check is done at "word" level, substring names will not match.
func (o *Object) HasAttribute(name string) bool {
return containsWord(o.Attributes, name)
}
// String returns pretty-printed inst representation.
//
// Outputs valid JSON string. This property is
// not guaranteed to be preserved.
func (inst *Inst) String() string {
// Do not use direct inst marshalling to achieve
// flat object printed representation.
// Map is avoided to ensure consistent props order.
type flatObject struct {
Iclass string
Disasm string `json:",omitempty"`
DisasmIntel string `json:",omitempty"`
DisasmATTSV string `json:",omitempty"`
Attributes string `json:",omitempty"`
Uname string `json:",omitempty"`
CPL string
Category string
Extension string
Exceptions string `json:",omitempty"`
ISASet string `json:",omitempty"`
Flags string `json:",omitempty"`
Comment string `json:",omitempty"`
Version string `json:",omitempty"`
RealOpcode string `json:",omitempty"`
Pattern string
Operands string
Iform string `json:",omitempty"`
}
flat := flatObject{
Iclass: inst.Iclass,
Disasm: inst.Disasm,
DisasmIntel: inst.DisasmIntel,
DisasmATTSV: inst.DisasmATTSV,
Attributes: inst.Attributes,
Uname: inst.Uname,
CPL: inst.CPL,
Category: inst.Category,
Extension: inst.Extension,
Exceptions: inst.Exceptions,
ISASet: inst.ISASet,
Flags: inst.Flags,
Comment: inst.Comment,
Version: inst.Version,
RealOpcode: inst.RealOpcode,
Pattern: inst.Pattern,
Operands: inst.Operands,
Iform: inst.Iform,
}
b, err := json.MarshalIndent(flat, "", " ")
if err != nil {
panic(err)
}
return string(b)
}
// ExpandStates returns a copy of s where all state macros
// are expanded.
// This requires db "states" to be loaded.
func ExpandStates(db *Database, s string) string {
substs := db.states
parts := strings.Fields(s)
for i := range parts {
if repl := substs[parts[i]]; repl != "" {
parts[i] = repl
}
}
return strings.Join(parts, " ")
}
// containsWord searches for whole word match in s.
func containsWord(s, word string) bool {
i := strings.Index(s, word)
if i == -1 {
return false
}
leftOK := i == 0 ||
(s[i-1] == ' ')
rigthOK := i+len(word) == len(s) ||
(s[i+len(word)] == ' ')
return leftOK && rigthOK
}
@@ -0,0 +1,158 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package xeddata
import (
"errors"
"strings"
)
// OperandVisibility describes operand visibility in XED terms.
type OperandVisibility int
const (
// VisExplicit is a default operand visibility.
// Explicit operand is "real" kind of operands that
// is shown in syntax and can be specified by the programmer.
VisExplicit OperandVisibility = iota
// VisImplicit is for fixed arg (like EAX); usually shown in syntax.
VisImplicit
// VisSuppressed is like VisImplicit, but not shown in syntax.
// In some very rare exceptions, they are also shown in syntax string.
VisSuppressed
// VisEcond is encoder-only conditions. Can be ignored.
VisEcond
)
// Operand holds data that is encoded inside
// instruction's "OPERANDS" field.
//
// Use NewOperand function to decode operand fields into Operand object.
type Operand struct {
// Name is an ID with optional nonterminal name part.
//
// Possible values: "REG0=GPRv_B", "REG1", "MEM0", ...
//
// If nonterminal part is present, name
// can be split into LHS and RHS with NonTerminalName method.
Name string
// Action describes argument types.
//
// Possible values: "r", "w", "rw", "cr", "cw", "crw".
// Optional "c" prefix represents conditional access.
Action string
// Width descriptor. It can express simple width like "w" (word, 16bit)
// or meta-width like "v", which corresponds to {16, 32, 64} bits.
//
// Possible values: "", "q", "ds", "dq", ...
// Optional.
Width string
// Xtype holds XED-specific type information.
//
// Possible values: "", "f64", "i32", ...
// Optional.
Xtype string
// Attributes serves as container for all other properties.
//
// Possible values:
// EVEX.b context {
// TXT=ZEROSTR - zeroing
// TXT=SAESTR - suppress all exceptions
// TXT=ROUNDC - rounding
// TXT=BCASTSTR - broadcasting
// }
// MULTISOURCE4 - 4FMA multi-register operand.
//
// Optional. For most operands, it's nil.
Attributes map[string]bool
// Visibility tells if operand is explicit, implicit or suspended.
Visibility OperandVisibility
}
var xedVisibilities = map[string]OperandVisibility{
"EXPL": VisExplicit,
"IMPL": VisImplicit,
"SUPP": VisSuppressed,
"ECOND": VisEcond,
}
// NewOperand decodes operand string.
//
// See "$XED/pysrc/opnds.py" to learn about fields format
// and valid combinations.
//
// Requires database with xtypes and widths info.
func NewOperand(db *Database, s string) (*Operand, error) {
if db.widths == nil {
return nil, errors.New("Database.widths is nil")
}
fields := strings.Split(s, ":")
switch len(fields) {
case 0:
return nil, errors.New("empty operand fields string")
case 1:
return &Operand{Name: fields[0]}, nil
}
var op Operand
// First two fields are fixed.
op.Name = fields[0]
op.Action = fields[1]
// Optional fields.
for _, f := range fields[2:] {
if db.widths[f] != nil && op.Width == "" {
op.Width = f
} else if vis, ok := xedVisibilities[f]; ok {
op.Visibility = vis
} else if xtype := db.xtypes[f]; xtype != nil {
op.Xtype = f
} else {
if op.Attributes == nil {
op.Attributes = make(map[string]bool)
}
op.Attributes[f] = true
}
}
return &op, nil
}
// NonterminalName returns true if op.Name consist
// of LHS and RHS parts.
//
// RHS is non-terminal name lookup function expression.
// Example: "REG0=GPRv()" has "GPRv()" name lookup function.
func (op *Operand) NonterminalName() bool {
return strings.Contains(op.Name, "=")
}
// NameLHS returns left hand side part of the non-terminal name.
// Example: NameLHS("REG0=GPRv()") => "REG0".
func (op *Operand) NameLHS() string {
return strings.Split(op.Name, "=")[0]
}
// NameRHS returns right hand side part of the non-terminal name.
// Example: NameLHS("REG0=GPRv()") => "GPRv()".
func (op *Operand) NameRHS() string {
return strings.Split(op.Name, "=")[1]
}
// IsVisible returns true for operands that are usually
// shown in syntax strings.
func (op *Operand) IsVisible() bool {
return op.Visibility == VisExplicit ||
op.Visibility == VisImplicit
}
@@ -0,0 +1,95 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package xeddata
import (
"sort"
"strings"
)
// PatternSet wraps instruction PATTERN properties providing set operations on them.
type PatternSet map[string]bool
// NewPatternSet decodes pattern string into PatternSet.
func NewPatternSet(pattern string) PatternSet {
pset := make(PatternSet)
for _, f := range strings.Fields(pattern) {
pset[f] = true
}
return pset
}
// PatternAliases is extendable map of pattern keys aliases.
// Maps human-readable key to XED property.
//
// Used in PatternSet.Is.
var PatternAliases = map[string]string{
"VEX": "VEXVALID=1",
"EVEX": "VEXVALID=2",
"XOP": "VEXVALID=3",
"MemOnly": "MOD!=3",
"RegOnly": "MOD=3",
}
// String returns pattern printer representation.
// All properties are sorted.
func (pset PatternSet) String() string {
var keys []string
for k := range pset {
keys = append(keys, k)
}
sort.Strings(keys)
return strings.Join(keys, " ")
}
// Is reports whether set contains key k.
// In contrast with direct pattern set lookup, it does
// check if PatternAliases[k] is available to be used instead of k in lookup.
func (pset PatternSet) Is(k string) bool {
if alias := PatternAliases[k]; alias != "" {
return pset[alias]
}
return pset[k]
}
// Replace inserts newKey if oldKey is defined.
// oldKey is removed if insertion is performed.
func (pset PatternSet) Replace(oldKey, newKey string) {
if pset[oldKey] {
pset[newKey] = true
delete(pset, oldKey)
}
}
// Index returns index from keys of first matching key.
// Returns -1 if does not contain any of given keys.
func (pset PatternSet) Index(keys ...string) int {
for i, k := range keys {
if pset[k] {
return i
}
}
return -1
}
// Match is like MatchOrDefault("", keyval...).
func (pset PatternSet) Match(keyval ...string) string {
return pset.MatchOrDefault("", keyval...)
}
// MatchOrDefault returns first matching key associated value.
// Returns defaultValue if no match is found.
//
// Keyval structure can be described as {"k1", "v1", ..., "kN", "vN"}.
func (pset PatternSet) MatchOrDefault(defaultValue string, keyval ...string) string {
for i := 0; i < len(keyval); i += 2 {
key := keyval[i+0]
val := keyval[i+1]
if pset[key] {
return val
}
}
return defaultValue
}
@@ -0,0 +1,211 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package xeddata
import (
"bufio"
"errors"
"fmt"
"io"
"regexp"
"strings"
)
// Reader reads enc/dec-instruction objects from XED datafile.
type Reader struct {
scanner *bufio.Scanner
lines []string // Re-used between Read calls
// True if last line ends with newline escape (backslash).
joinLines bool
}
// NewReader returns a new Reader that reads from r.
func NewReader(r io.Reader) *Reader {
return newReader(bufio.NewScanner(r))
}
func newReader(scanner *bufio.Scanner) *Reader {
r := &Reader{
lines: make([]string, 0, 64),
scanner: scanner,
}
scanner.Split(r.split)
return r
}
// split implements bufio.SplitFunc for Reader.
func (r *Reader) split(data []byte, atEOF bool) (int, []byte, error) {
// Wrapping bufio.ScanLines to handle \-style newline escapes.
// joinLines flag affects Reader.scanLine behavior.
advance, tok, err := bufio.ScanLines(data, atEOF)
if err == nil && len(tok) >= 1 {
r.joinLines = tok[len(tok)-1] == '\\'
}
return advance, tok, err
}
// Read reads single XED instruction object from
// the stream backed by reader.
//
// If there is no data left to be read,
// returned error is io.EOF.
func (r *Reader) Read() (*Object, error) {
for line := r.scanLine(); line != ""; line = r.scanLine() {
if line[0] != '{' {
continue
}
lines := r.lines[:0] // Object lines
for line := r.scanLine(); line != ""; line = r.scanLine() {
if line[0] == '}' {
return r.parseLines(lines)
}
lines = append(lines, line)
}
return nil, errors.New("no matching '}' found")
}
return nil, io.EOF
}
// ReadAll reads all the remaining objects from r.
// A successful call returns err == nil, not err == io.EOF,
// just like csv.Reader.ReadAll().
func (r *Reader) ReadAll() ([]*Object, error) {
objects := []*Object{}
for {
o, err := r.Read()
if err == io.EOF {
return objects, nil
}
if err != nil {
return objects, err
}
objects = append(objects, o)
}
}
// instLineRE matches valid XED object/inst line.
// It expects lines that are joined by '\' to be concatenated.
//
// The format can be described as:
//
// unquoted field name "[A-Z_]+" (captured)
// field value delimiter ":"
// field value string (captured)
// optional trailing comment that is ignored "[^#]*"
var instLineRE = regexp.MustCompile(`^([A-Z_]+)\s*:\s*([^#]*)`)
// parseLines turns collected object lines into Object.
func (r *Reader) parseLines(lines []string) (*Object, error) {
o := &Object{}
// Repeatable tokens.
// We can not assign them eagerly, because these fields
// are not guaranteed to follow strict order.
var (
operands []string
iforms []string
patterns []string
)
for _, l := range lines {
if l[0] == '#' { // Skip comment lines.
continue
}
m := instLineRE.FindStringSubmatch(l)
if len(m) == 0 {
return nil, fmt.Errorf("malformed line: %s", l)
}
key, val := m[1], m[2]
val = strings.TrimSpace(val)
switch key {
case "ICLASS":
o.Iclass = val
case "DISASM":
o.Disasm = val
case "DISASM_INTEL":
o.DisasmIntel = val
case "DISASM_ATTSV":
o.DisasmATTSV = val
case "ATTRIBUTES":
o.Attributes = val
case "UNAME":
o.Uname = val
case "CPL":
o.CPL = val
case "CATEGORY":
o.Category = val
case "EXTENSION":
o.Extension = val
case "EXCEPTIONS":
o.Exceptions = val
case "ISA_SET":
o.ISASet = val
case "FLAGS":
o.Flags = val
case "COMMENT":
o.Comment = val
case "VERSION":
o.Version = val
case "REAL_OPCODE":
o.RealOpcode = val
case "OPERANDS":
operands = append(operands, val)
case "PATTERN":
patterns = append(patterns, val)
case "IFORM":
iforms = append(iforms, val)
default:
// Being strict about unknown field names gives a nice
// XED file validation diagnostics.
// Also defends against typos in test files.
return nil, fmt.Errorf("unknown key token: %s", key)
}
}
if len(operands) != len(patterns) {
return nil, fmt.Errorf("%s: OPERANDS and PATTERN lines mismatch", o.Opcode())
}
insts := make([]*Inst, len(operands))
for i := range operands {
insts[i] = &Inst{
Object: o,
Index: i,
Pattern: patterns[i],
Operands: operands[i],
}
// There can be less IFORMs than insts.
if i < len(iforms) {
insts[i].Iform = iforms[i]
}
}
o.Insts = insts
return o, nil
}
// scanLine tries to fetch non-empty line from scanner.
//
// Returns empty line when scanner.Scan() returns false
// before non-empty line is found.
func (r *Reader) scanLine() string {
for r.scanner.Scan() {
line := r.scanner.Text()
if line == "" {
continue
}
if r.joinLines {
return line[:len(line)-len("\\")] + r.scanLine()
}
return line
}
return ""
}
@@ -0,0 +1,289 @@
------ empty input
====
[]
------ only newlines
====
[]
------ only comments and newlines
# {
# ICLASS : ADD
# }
====
[]
------ join lines
{
ICLASS : i\
cla\
ss1
VERSION : 1.\
0
FLAGS:\
\
\
NOP
REAL_OPCODE : \Y
CPL : \3
PATTERN: A B
OPERANDS:
}
====
[{
"Iclass": "i cla ss1",
"Version": "1.0",
"Flags": "NOP",
"RealOpcode": "\\Y",
"CPL": "\\3",
"Pattern": "A B"
}]
------ 1 variant; no iform
{
ICLASS:iclass1 # comment
DISASM : disasm1
PATTERN :pat1 pat1
OPERANDS : ops1 ops1
}
# comment
{ # comment
# comment
ICLASS : iclass2
OPERANDS:ops2
PATTERN:pat2 # comment
}
====
[{
"Iclass": "iclass1",
"Disasm": "disasm1",
"Pattern": "pat1 pat1",
"Operands": "ops1 ops1"
}, {
"Iclass": "iclass2",
"Operands": "ops2",
"Pattern": "pat2"
}]
------ 2 variants; no iform
{
PATTERN : pat1_1
COMMENT : comment1
OPERANDS : ops1_1
OPERANDS : ops1_2
PATTERN : pat1_2
}
{
PATTERN : pat2_1
PATTERN : pat2_2
OPERANDS : ops2_1
OPERANDS : ops2_2
}
====
[{
"Comment": "comment1",
"Pattern": "pat1_1",
"Operands": "ops1_1"
}, {
"Comment": "comment1",
"Pattern": "pat1_2",
"Operands": "ops1_2"
}, {
"Pattern": "pat2_1",
"Operands": "ops2_1"
}, {
"Pattern": "pat2_2",
"Operands": "ops2_2"
}]
------ 3 variants
{
PATTERN : pat1_1
OPERANDS : ops1_1
IFORM : iform1_1
PATTERN : pat1_2# comment
OPERANDS : ops1_2# comment
IFORM : iform1_2# comment
# comment
PATTERN : pat1_3
OPERANDS : ops1_3
IFORM : iform1_3
}
{
PATTERN : pat2_1
OPERANDS : ops2_1
IFORM : iform2_1
PATTERN : pat2_2
OPERANDS : ops2_2
PATTERN : pat2_3
OPERANDS : ops2_3
}
====
[{
"Iform": "iform1_1",
"Pattern": "pat1_1",
"Operands": "ops1_1"
}, {
"Iform": "iform1_2",
"Pattern": "pat1_2",
"Operands": "ops1_2"
}, {
"Iform": "iform1_3",
"Pattern": "pat1_3",
"Operands": "ops1_3"
}, {
"Iform": "iform2_1",
"Pattern": "pat2_1",
"Operands": "ops2_1"
}, {
"Pattern": "pat2_2",
"Operands": "ops2_2"
}, {
"Pattern": "pat2_3",
"Operands": "ops2_3"
}]
------ stable and unstable instructions (REAL_OPCODE)
{
ICLASS: STABLE
REAL_OPCODE: Y
PATTERN : x y z
OPERANDS :
}
{
ICLASS: UNSTABLE
REAL_OPCODE: N
PATTERN : x y z
OPERANDS :
}
====
[{
"Iclass": "STABLE",
"RealOpcode": "Y",
"Pattern": "x y z",
"Operands": ""
}, {
"Iclass": "UNSTABLE",
"RealOpcode": "N",
"Pattern": "x y z",
"Operands": ""
}]
------ AVXAES objects
# Emitting VAESENCLAST
{
ICLASS : VAESENCLAST
EXCEPTIONS: avx-type-4
CPL : 3
CATEGORY : AES
EXTENSION : AVXAES
PATTERN : VV1 0xDD V66 V0F38 MOD[0b11] MOD=3 REG[rrr] RM[nnn] VL128
OPERANDS : REG0=XMM_R():w:dq REG1=XMM_N():r:dq REG2=XMM_B():r:dq
PATTERN : VV1 0xDD V66 V0F38 MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL128
OPERANDS : REG0=XMM_R():w:dq REG1=XMM_N():r:dq MEM0:r:dq
}
# Emitting VAESDEC
{
ICLASS : VAESDEC
EXCEPTIONS: avx-type-4
CPL : 3
CATEGORY : AES
EXTENSION : AVXAES
PATTERN : VV1 0xDE V66 V0F38 MOD[0b11] MOD=3 REG[rrr] RM[nnn] VL128
OPERANDS : REG0=XMM_R():w:dq REG1=XMM_N():r:dq REG2=XMM_B():r:dq
PATTERN : VV1 0xDE V66 V0F38 MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL128
OPERANDS : REG0=XMM_R():w:dq REG1=XMM_N():r:dq MEM0:r:dq
}
====
[{
"Iclass": "VAESENCLAST",
"Exceptions": "avx-type-4",
"CPL": "3",
"Category": "AES",
"Extension": "AVXAES",
"Pattern": "VV1 0xDD V66 V0F38 MOD[0b11] MOD=3 REG[rrr] RM[nnn] VL128",
"Operands": "REG0=XMM_R():w:dq REG1=XMM_N():r:dq REG2=XMM_B():r:dq"
}, {
"Iclass": "VAESENCLAST",
"Exceptions": "avx-type-4",
"CPL": "3",
"Category": "AES",
"Extension": "AVXAES",
"Pattern": "VV1 0xDD V66 V0F38 MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL128",
"Operands": "REG0=XMM_R():w:dq REG1=XMM_N():r:dq MEM0:r:dq"
}, {
"Iclass": "VAESDEC",
"Exceptions": "avx-type-4",
"CPL": "3",
"Category": "AES",
"Extension": "AVXAES",
"Pattern": "VV1 0xDE V66 V0F38 MOD[0b11] MOD=3 REG[rrr] RM[nnn] VL128",
"Operands": "REG0=XMM_R():w:dq REG1=XMM_N():r:dq REG2=XMM_B():r:dq"
}, {
"Iclass": "VAESDEC",
"Exceptions": "avx-type-4",
"CPL": "3",
"Category": "AES",
"Extension": "AVXAES",
"Pattern": "VV1 0xDE V66 V0F38 MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL128",
"Operands": "REG0=XMM_R():w:dq REG1=XMM_N():r:dq MEM0:r:dq"
}]
------ Two-word disasm
{
ICLASS : JMP_FAR
DISASM_INTEL: jmp far
DISASM_ATTSV: ljmp
CPL : 3
CATEGORY : UNCOND_BR
ATTRIBUTES : FAR_XFER NOTSX
EXTENSION : BASE
ISA_SET : I86
PATTERN : 0xEA not64 BRDISPz() UIMM16()
OPERANDS : PTR:r:p IMM0:r:w REG0=XED_REG_EIP:w:SUPP
}
====
[{
"Iclass": "JMP_FAR",
"DisasmIntel": "jmp far",
"DisasmATTSV": "ljmp",
"CPL": "3",
"Attributes": "FAR_XFER NOTSX",
"Extension": "BASE",
"ISASet": "I86",
"Pattern": "0xEA not64 BRDISPz() UIMM16()",
"Operands": "PTR:r:p IMM0:r:w REG0=XED_REG_EIP:w:SUPP"
}]
------ INVALID key token
{
FOO : 111
}
====
unknown key token: FOO
------ INVALID unterminated object
{
====
no matching '}' found
------ INVALID pat+ops
{
ICLASS: foobar
PATTERN : 1
PATTERN : 2
OPERANDS : 3
}
====
foobar: OPERANDS and PATTERN lines mismatch
@@ -0,0 +1,5 @@
# Copyright 2018 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
fword64 T_DOUBLE 64
@@ -0,0 +1,17 @@
# Copyright 2018 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
_M_VV_TRUE VEXVALID=1
_M_VV_FALSE VEXVALID=0
_M_VEX_P_66 VEX_PREFIX=1
_M_VEX_P_F2 VEX_PREFIX=2
_M_VEX_P_F3 VEX_PREFIX=3
_M_VLEN_128 VL=0
_M_VLEN_256 VL=1
_M_MAP_0F MAP=1
_M_MAP_0F38 MAP=2
_M_MAP_0F3A MAP=3
@@ -0,0 +1,8 @@
# Copyright 2018 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
width_dq i32 16
width_qq i32 32
width_v int 2 4 8
width_f64 f64 8
@@ -0,0 +1,32 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package xeddata
import (
"io"
"os"
"path/filepath"
)
// WalkInsts calls visit function for each XED instruction found at $xedPath/all-dec-instructions.txt.
func WalkInsts(xedPath string, visit func(*Inst)) error {
f, err := os.Open(filepath.Join(xedPath, "all-dec-instructions.txt"))
if err != nil {
return err
}
r := NewReader(f)
for {
o, err := r.Read()
if err == io.EOF {
return nil
}
if err != nil {
return err
}
for _, inst := range o.Insts {
visit(inst)
}
}
}
@@ -0,0 +1,488 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package xeddata
import (
"bytes"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"path"
"reflect"
"strings"
"testing"
)
// Small database to generate state/xtype/width input files and validate parse results.
//
// Tests should use only those symbols that are defined inside test maps.
// For example, if {"foo"=>"bar"} element is not in statesMap, tests
// can't expect that "foo" get's replaced by "bar".
var (
statesMap = map[string]string{
"not64": "MODE!=2",
"mode64": "MODE=2",
"mode32": "MODE=1",
"mode16": "MODE=0",
"rexw_prefix": "REXW=1 SKIP_OSZ=1",
"norexw_prefix": "REXW=0 SKIP_OSZ=1",
"W1": "REXW=1 SKIP_OSZ=1",
"W0": "REXW=0 SKIP_OSZ=1",
"VV1": "VEXVALID=1",
"V66": "VEX_PREFIX=1",
"VF2": "VEX_PREFIX=2",
"VF3": "VEX_PREFIX=3",
"V0F": "MAP=1",
"V0F38": "MAP=2",
"V0F3A": "MAP=3",
"VL128": "VL=0",
"VL256": "VL=1",
}
xtypesMap = map[string]*xtype{
"int": {name: "int", baseType: "INT", size: "0"},
"i8": {name: "i8", baseType: "INT", size: "8"},
"i64": {name: "i64", baseType: "INT", size: "64"},
"i32": {name: "i32", baseType: "INT", size: "32"},
"u8": {name: "u8", baseType: "UINT", size: "8"},
"f32": {name: "f32", baseType: "SIGNLE", size: "32"},
"f64": {name: "f64", baseType: "DOUBLE", size: "64"},
"var": {name: "var", baseType: "VARIABLE", size: "0"},
}
widthsMap = map[string]*width{
"q": {xtype: "i64", sizes: [3]string{"8", "8", "8"}},
"z": {xtype: "int", sizes: [3]string{"2", "4", "4"}},
"b": {xtype: "u8", sizes: [3]string{"1", "1", "1"}},
"d": {xtype: "i32", sizes: [3]string{"4", "4", "4"}},
"ps": {xtype: "f32", sizes: [3]string{"16", "16", "16"}},
"dq": {xtype: "i32", sizes: [3]string{"16", "16", "16"}},
"i32": {xtype: "i32", sizes: [3]string{"4", "4", "4"}},
"i64": {xtype: "i64", sizes: [3]string{"8", "8", "8"}},
"vv": {xtype: "var", sizes: [3]string{"0", "0", "0"}},
"mskw": {xtype: "i1", sizes: [3]string{"64bits", "64bits", "64bits"}},
"zf32": {xtype: "f32", sizes: [3]string{"512bits", "512bits", "512bits"}},
"zf64": {xtype: "f64", sizes: [3]string{"512bits", "512bits", "512bits"}},
"mem80real": {xtype: "f80", sizes: [3]string{"10", "10", "10"}},
"mfpxenv": {xtype: "struct", sizes: [3]string{"512", "512", "512"}},
}
)
// newStatesSource returns a reader that mocks "all-state.txt" file.
// Input content is generated based on statesMap.
func newStatesSource() io.Reader {
var buf bytes.Buffer
i := 0
for k, v := range statesMap {
buf.WriteString("# Line comment\n")
buf.WriteString("#\n\n\n")
fmt.Fprintf(&buf, "\t%-20s%s", k, v)
if i%3 == 0 {
buf.WriteString("\t# Trailing comment")
}
buf.WriteByte('\n')
i++
}
return &buf
}
// newWidthsSource returns a reader that mocks "all-widths.txt" file.
// Input content is generated based on widthsMap.
func newWidthsSource() io.Reader {
var buf bytes.Buffer
i := 0
for name, width := range widthsMap {
buf.WriteString("# Line comment\n")
buf.WriteString("#\n\n\n")
eqSizes := width.sizes[0] == width.sizes[1] &&
width.sizes[0] == width.sizes[2]
if i%2 == 0 && eqSizes {
fmt.Fprintf(&buf, "\t%-16s%-12s%-8s",
name, width.xtype, width.sizes[0])
} else {
fmt.Fprintf(&buf, "\t%-16s%-12s%-8s%-8s%-8s",
name, width.xtype,
width.sizes[0], width.sizes[1], width.sizes[2])
}
if i%3 == 0 {
buf.WriteString("\t# Trailing comment")
}
buf.WriteByte('\n')
i++
}
return &buf
}
// newXtypesSource returns a reader that mocks "all-element-types.txt" file.
// Input content is generated based on xtypesMap.
func newXtypesSource() io.Reader {
var buf bytes.Buffer
i := 0
for _, v := range xtypesMap {
buf.WriteString("# Line comment\n")
buf.WriteString("#\n\n\n")
fmt.Fprintf(&buf, "\t%s %s %s",
v.name, v.baseType, v.size)
if i%3 == 0 {
buf.WriteString("\t# Trailing comment")
}
buf.WriteByte('\n')
i++
}
return &buf
}
func newTestDatabase(t *testing.T) *Database {
var db Database
err := db.LoadStates(newStatesSource())
if err != nil {
t.Fatal(err)
}
err = db.LoadWidths(newWidthsSource())
if err != nil {
t.Fatal(err)
}
err = db.LoadXtypes(newXtypesSource())
if err != nil {
t.Fatal(err)
}
return &db
}
func TestContainsWord(t *testing.T) {
tests := []struct {
attrs string
attrName string
output bool
}{
{"ATT1", "ATT1", true},
{" ATT1", "ATT1", true},
{"ATT1 ", "ATT1", true},
{" ATT1 ", "ATT1", true},
{"ATT1 ATT2 ATT3", "ATT1", true},
{"ATT1 ATT2 ATT3", "ATT2", true},
{"ATT1 ATT2 ATT3", "ATT2", true},
{"ATT1 ATT2 ATT3", "ATT4", false},
{"ATT1ATT1", "ATT1", false},
{".ATT1", "ATT1", false},
{".ATT1.", "ATT1", false},
{"ATT1.", "ATT1", false},
{"", "ATT1", false},
{"AT", "ATT1", false},
{"ATT 1", "ATT1", false},
{" ATT1 ", "TT", false},
{" ATT1 ", "T1", false},
{" ATT1 ", "AT", false},
}
for _, test := range tests {
output := containsWord(test.attrs, test.attrName)
if output != test.output {
t.Errorf("containsWord(%q, %q)):\nhave: %v\nwant: %v",
test.attrs, test.attrName, output, test.output)
}
}
}
func TestParseWidths(t *testing.T) {
have, err := parseWidths(newWidthsSource())
if err != nil {
t.Fatal(err)
}
for k := range widthsMap {
if have[k] == nil {
t.Fatalf("missing key %s", k)
}
if *have[k] != *widthsMap[k] {
t.Fatalf("key %s:\nhave: %#v\nwant: %#v",
k, have[k], widthsMap[k])
}
}
if !reflect.DeepEqual(have, widthsMap) {
t.Errorf("widths output mismatch:\nhave: %#v\nwant: %#v",
have, widthsMap)
}
}
func TestParseStates(t *testing.T) {
have, err := parseStates(newStatesSource())
if err != nil {
t.Fatal(err)
}
want := statesMap
if !reflect.DeepEqual(have, want) {
t.Errorf("states output mismatch:\nhave: %v\nwant: %v", have, want)
}
}
func TestParseXtypes(t *testing.T) {
have, err := parseXtypes(newXtypesSource())
if err != nil {
t.Fatal(err)
}
for k := range xtypesMap {
if have[k] == nil {
t.Fatalf("missing key %s", k)
}
if *have[k] != *xtypesMap[k] {
t.Fatalf("key %s:\nhave: %#v\nwant: %#v",
k, have[k], xtypesMap[k])
}
}
if !reflect.DeepEqual(have, xtypesMap) {
t.Fatalf("xtype maps are not equal")
}
}
func TestNewOperand(t *testing.T) {
tests := []struct {
input string
op Operand
}{
// Simple cases.
{
"REG0=XMM_R():r",
Operand{Name: "REG0=XMM_R()", Action: "r"},
},
{
"REG0=XMM_R:w",
Operand{Name: "REG0=XMM_R", Action: "w"},
},
{
"MEM0:rw:q",
Operand{Name: "MEM0", Action: "rw", Width: "q"},
},
{
"REG0=XMM_R():rcw:ps:f32",
Operand{Name: "REG0=XMM_R()", Action: "rcw", Width: "ps", Xtype: "f32"},
},
{
"IMM0:r:z",
Operand{Name: "IMM0", Action: "r", Width: "z"},
},
{
"IMM1:cw:b:i8",
Operand{Name: "IMM1", Action: "cw", Width: "b", Xtype: "i8"},
},
// Optional fields and visibility.
{
"REG2:r:EXPL",
Operand{Name: "REG2", Action: "r", Visibility: VisExplicit},
},
{
"MEM1:w:d:IMPL",
Operand{Name: "MEM1", Action: "w", Width: "d", Visibility: VisImplicit},
},
{
"MEM1:w:IMPL:d",
Operand{Name: "MEM1", Action: "w", Width: "d", Visibility: VisImplicit},
},
{
"MEM1:w:d:SUPP:i32",
Operand{Name: "MEM1", Action: "w", Width: "d", Visibility: VisSuppressed, Xtype: "i32"},
},
{
"MEM1:w:SUPP:d:i32",
Operand{Name: "MEM1", Action: "w", Width: "d", Visibility: VisSuppressed, Xtype: "i32"},
},
// Ambiguity: xtypes that look like widths.
{
"REG0=XMM_R():w:dq:i64",
Operand{Name: "REG0=XMM_R()", Action: "w", Width: "dq", Xtype: "i64"},
},
// TXT=X field.
{
"REG1=MASK1():r:mskw:TXT=ZEROSTR",
Operand{Name: "REG1=MASK1()", Action: "r", Width: "mskw",
Attributes: map[string]bool{"TXT=ZEROSTR": true}},
},
{
"MEM0:r:vv:f64:TXT=BCASTSTR",
Operand{Name: "MEM0", Action: "r", Width: "vv", Xtype: "f64",
Attributes: map[string]bool{"TXT=BCASTSTR": true}},
},
{
"REG0=ZMM_R3():w:zf32:TXT=SAESTR",
Operand{Name: "REG0=ZMM_R3()", Action: "w", Width: "zf32",
Attributes: map[string]bool{"TXT=SAESTR": true}},
},
{
"REG0=ZMM_R3():w:zf64:TXT=ROUNDC",
Operand{Name: "REG0=ZMM_R3()", Action: "w", Width: "zf64",
Attributes: map[string]bool{"TXT=ROUNDC": true}},
},
// Multi-source.
{
"REG2=ZMM_N3():r:zf32:MULTISOURCE4",
Operand{Name: "REG2=ZMM_N3()", Action: "r", Width: "zf32",
Attributes: map[string]bool{"MULTISOURCE4": true}},
},
// Multi-source + EVEX.b context.
{
"REG2=ZMM_N3():r:zf32:MULTISOURCE4:TXT=SAESTR",
Operand{Name: "REG2=ZMM_N3()", Action: "r", Width: "zf32",
Attributes: map[string]bool{"MULTISOURCE4": true, "TXT=SAESTR": true}},
},
}
db := newTestDatabase(t)
for _, test := range tests {
op, err := NewOperand(db, test.input)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(*op, test.op) {
t.Errorf("parse(`%s`): output mismatch\nhave: %#v\nwant: %#v",
test.input, op, test.op,
)
}
}
}
func TestReader(t *testing.T) {
type test struct {
name string
input string
output string
}
var tests []test
{
b, err := ioutil.ReadFile(path.Join("testdata", "xed_objects.txt"))
if err != nil {
t.Fatal(err)
}
cases := strings.Split(string(b), "------")[1:]
for _, c := range cases {
name := c[:strings.Index(c, "\n")]
parts := strings.Split(c[len(name):], "====")
tests = append(tests, test{
name: strings.TrimSpace(name),
input: strings.TrimSpace(parts[0]),
output: strings.TrimSpace(parts[1]),
})
}
}
for _, test := range tests {
r := NewReader(strings.NewReader(test.input))
objects, err := r.ReadAll()
if strings.Contains(test.name, "INVALID") {
if err == nil {
t.Errorf("%s: expected non-nil error", test.name)
continue
}
if err.Error() != test.output {
t.Errorf("%s: error mismatch\nhave: `%s`\nwant: `%s`\n",
test.name, err.Error(), test.output)
}
t.Logf("PASS: %s", test.name)
continue
}
if err != nil {
t.Fatal(err)
}
var have []map[string]string
for _, o := range objects {
for _, inst := range o.Insts {
var result map[string]string
err := json.Unmarshal([]byte(inst.String()), &result)
if err != nil {
t.Fatal(err)
}
have = append(have, result)
}
}
var want []map[string]string
err = json.Unmarshal([]byte(test.output), &want)
if err != nil {
t.Fatal(err)
}
for i := range want {
for k := range want[i] {
if want[i][k] == have[i][k] {
continue
}
// i - index inside array of JSON objects.
// k - i'th object key (example: "Iclass").
t.Errorf("%s: insts[%d].%s mismatch\nhave: `%s`\nwant: `%s`",
test.name, i, k, have[i][k], want[i][k])
}
}
if !t.Failed() {
t.Logf("PASS: %s", test.name)
}
}
}
func TestMacroExpand(t *testing.T) {
tests := [...]struct {
input string
output string
}{
0: {
"a not64 b c",
"a MODE!=2 b c",
},
1: {
"mode16 W0",
"MODE=0 REXW=0 SKIP_OSZ=1",
},
2: {
"W1 mode32",
"REXW=1 SKIP_OSZ=1 MODE=1",
},
3: {
"W1 W1",
"REXW=1 SKIP_OSZ=1 REXW=1 SKIP_OSZ=1",
},
4: {
"W1W1",
"W1W1",
},
5: {
"mode64 1 2 3 rexw_prefix",
"MODE=2 1 2 3 REXW=1 SKIP_OSZ=1",
},
6: {
"a b c",
"a b c",
},
7: {
"mode16 mode32 mode16 mode16",
"MODE=0 MODE=1 MODE=0 MODE=0",
},
8: {
"V0F38 V0FV0F V0FV0F38",
"MAP=2 V0FV0F V0FV0F38",
},
9: {
"VV1 0x2E V66 V0F38 VL128 norexw_prefix MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()",
"VEXVALID=1 0x2E VEX_PREFIX=1 MAP=2 VL=0 REXW=0 SKIP_OSZ=1 MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()",
},
}
db := newTestDatabase(t)
for id, test := range tests {
have := ExpandStates(db, test.input)
if test.output != have {
t.Errorf("test %d: output mismatch:\nhave: `%s`\nwant: `%s`",
id, have, test.output)
}
}
}