whatcanGOwrong

2024-09-19 21:38:24 -04:00
commit d0ae4d841d
17908 changed files with 4096831 additions and 0 deletions
@@ -0,0 +1,281 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package xeddata
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"regexp"
+	"strings"
+)
+
+// Types for XED enum-like constants.
+type (
+	// OperandSizeMode describes operand size mode (66H prefix).
+	OperandSizeMode int
+
+	// AddressSizeMode describes address size mode (67H prefix).
+	AddressSizeMode int
+
+	// CPUMode describes availability in certain CPU mode.
+	CPUMode int
+)
+
+// Possible operand size modes. XED calls it OSZ.
+const (
+	OpSize16 OperandSizeMode = iota
+	OpSize32
+	OpSize64
+)
+
+// Possible address size modes. XED calls it ASZ.
+const (
+	AddrSize16 AddressSizeMode = iota
+	AddrSize32
+	AddrSize64
+)
+
+// Possible CPU modes. XED calls it MODE.
+const (
+	Mode16 CPUMode = iota
+	Mode32
+	Mode64
+)
+
+var sizeStrings = [...]string{"16", "32", "64"}
+
+// sizeString maps size enumeration value to it's string representation.
+func sizeString(size int) string {
+	// Panic more gracefully than with "index out of range".
+	// If client code specified invalid size enumeration,
+	// this is programming error that should be fixed, not "handled".
+	if size >= len(sizeStrings) {
+		panic(fmt.Sprintf("illegal size value: %d", size))
+	}
+	return sizeStrings[size]
+}
+
+// String returns osz bit size string. Panics on illegal enumerations.
+func (osz OperandSizeMode) String() string { return sizeString(int(osz)) }
+
+// String returns asz bit size string. Panics on illegal enumerations.
+func (asz AddressSizeMode) String() string { return sizeString(int(asz)) }
+
+// Database holds information that is required to
+// properly handle XED datafiles.
+type Database struct {
+	widths map[string]*width // all-widths.txt
+	states map[string]string // all-state.txt
+	xtypes map[string]*xtype // all-element-types.txt
+}
+
+// width is a "all-width.txt" record.
+type width struct {
+	// Default xtype name (examples: int, i8, f32).
+	xtype string
+
+	// 16, 32 and 64 bit sizes (all may have same value).
+	sizes [3]string
+}
+
+// xtype is a "all-element-type.txt" record.
+type xtype struct {
+	// Name is xtype identifier.
+	name string
+
+	// baseType specifies xtype base type.
+	// See "all-element-type-base.txt".
+	baseType string
+
+	// Size is an operand data size in bits.
+	size string
+}
+
+// NewDatabase returns Database that loads everything
+// it can find in xedPath.
+// Missing lookup file is not an error, but error during
+// parsing of found file is.
+//
+// Lookup:
+//
+//	"$xedPath/all-state.txt" => db.LoadStates()
+//	"$xedPath/all-widths.txt" => db.LoadWidths()
+//	"$xedPath/all-element-types.txt" => db.LoadXtypes()
+//
+// $xedPath is the interpolated value of function argument.
+//
+// The call NewDatabase("") is valid and returns empty database.
+// Load methods can be used to read lookup files one-by-one.
+func NewDatabase(xedPath string) (*Database, error) {
+	var db Database
+
+	stat, err := os.Stat(xedPath)
+	if err != nil {
+		return nil, err
+	}
+	if !stat.IsDir() {
+		return nil, errors.New("xedPath is not directory")
+	}
+
+	states, err := os.Open(filepath.Join(xedPath, "all-state.txt"))
+	if err == nil {
+		err = db.LoadStates(states)
+		if err != nil {
+			return &db, err
+		}
+	}
+
+	widths, err := os.Open(filepath.Join(xedPath, "all-widths.txt"))
+	if err == nil {
+		err = db.LoadWidths(widths)
+		if err != nil {
+			return &db, err
+		}
+	}
+
+	xtypes, err := os.Open(filepath.Join(xedPath, "all-element-types.txt"))
+	if err == nil {
+		err = db.LoadXtypes(xtypes)
+		if err != nil {
+			return &db, err
+		}
+	}
+
+	return &db, nil
+}
+
+// LoadWidths reads XED widths definitions from r and updates db.
+// "widths" are 16/32/64 bit mode type sizes.
+// See "$XED/obj/dgen/all-widths.txt".
+func (db *Database) LoadWidths(r io.Reader) error {
+	var err error
+	db.widths, err = parseWidths(r)
+	return err
+}
+
+// LoadStates reads XED states definitions from r and updates db.
+// "states" are simple macro substitutions without parameters.
+// See "$XED/obj/dgen/all-state.txt".
+func (db *Database) LoadStates(r io.Reader) error {
+	var err error
+	db.states, err = parseStates(r)
+	return err
+}
+
+// LoadXtypes reads XED xtypes definitions from r and updates db.
+// "xtypes" are low-level XED type names.
+// See "$XED/obj/dgen/all-element-types.txt".
+// See "$XED/obj/dgen/all-element-type-base.txt".
+func (db *Database) LoadXtypes(r io.Reader) error {
+	var err error
+	db.xtypes, err = parseXtypes(r)
+	return err
+}
+
+// WidthSize translates width string to size string using desired
+// SizeMode m. For some widths output is the same for any valid value of m.
+func (db *Database) WidthSize(width string, m OperandSizeMode) string {
+	info := db.widths[width]
+	if info == nil {
+		return ""
+	}
+	return info.sizes[m]
+}
+
+func parseWidths(r io.Reader) (map[string]*width, error) {
+	data, err := ioutil.ReadAll(r)
+	if err != nil {
+		return nil, fmt.Errorf("parse widths: %v", err)
+	}
+
+	// Lines have two forms:
+	// 1. name xtype size [# comment]
+	// 2. name xtype size16, size32, size64 [# comment]
+	reLine := regexp.MustCompile(`(^\s*\w+\s+\w+\s+\w+\s+\w+\s+\w+)|(^\s*\w+\s+\w+\s+\w+)`)
+
+	widths := make(map[string]*width, 128)
+	for _, l := range bytes.Split(data, []byte("\n")) {
+		var name, xtype, size16, size32, size64 string
+
+		if m := reLine.FindSubmatch(l); m != nil {
+			var f [][]byte
+			if m[1] != nil {
+				f = bytes.Fields(m[1])
+			} else {
+				f = bytes.Fields(m[2])
+			}
+
+			name = string(f[0])
+			xtype = string(f[1])
+			if len(f) > 3 {
+				size16 = string(f[2])
+				size32 = string(f[3])
+				size64 = string(f[4])
+			} else {
+				size16 = string(f[2])
+				size32 = size16
+				size64 = size16
+			}
+		}
+		if name != "" {
+			widths[name] = &width{
+				xtype: xtype,
+				sizes: [3]string{size16, size32, size64},
+			}
+		}
+	}
+
+	return widths, nil
+}
+
+func parseStates(r io.Reader) (map[string]string, error) {
+	data, err := ioutil.ReadAll(r)
+	if err != nil {
+		return nil, fmt.Errorf("parse states: %v", err)
+	}
+
+	// Lines have form of "name ...replacements [# comment]".
+	// This regexp captures the name and everything until line end or comment.
+	lineRE := regexp.MustCompile(`^\s*(\w+)\s+([^#]+)`)
+
+	states := make(map[string]string, 128)
+	for _, l := range strings.Split(string(data), "\n") {
+		if m := lineRE.FindStringSubmatch(l); m != nil {
+			name, replacements := m[1], m[2]
+			states[name] = strings.TrimSpace(replacements)
+		}
+	}
+
+	return states, nil
+}
+
+func parseXtypes(r io.Reader) (map[string]*xtype, error) {
+	data, err := ioutil.ReadAll(r)
+	if err != nil {
+		return nil, fmt.Errorf("parse xtypes: %v", err)
+	}
+
+	// Lines have form of "name baseType size [# comment]".
+	lineRE := regexp.MustCompile(`^\s*(\w+)\s+(\w+)\s*(\d+)`)
+
+	xtypes := make(map[string]*xtype)
+	for _, l := range strings.Split(string(data), "\n") {
+		if m := lineRE.FindStringSubmatch(l); m != nil {
+			name, baseType, size := m[1], m[2], m[3]
+			xtypes[name] = &xtype{
+				name:     name,
+				baseType: baseType,
+				size:     size,
+			}
+		}
+	}
+
+	return xtypes, nil
+}
@@ -0,0 +1,50 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package xeddata provides utilities to work with XED datafiles.
+//
+// Main features:
+//   - Fundamental XED enumerations (CPU modes, operand sizes, ...)
+//   - XED objects and their components
+//   - XED datafiles reader (see below)
+//   - Utility functions like ExpandStates
+//
+// The amount of file formats that is understood is a minimal
+// set required to generate x86.csv from XED tables:
+//   - states - simple macro substitutions used in patterns
+//   - widths - mappings from width names to their size
+//   - element-types - XED xtype information
+//   - objects - XED objects that constitute "the tables"
+//
+// Collectively, those files are called "datafiles".
+//
+// Terminology is borrowed from XED itself,
+// where appropriate, x86csv names are provided
+// as an alternative.
+//
+// "$XED/foo/bar.txt" notation is used to specify a path to "foo/bar.txt"
+// file under local XED source repository folder.
+//
+// The default usage scheme:
+//  1. Open "XED database" to load required metadata.
+//  2. Read XED file with objects definitions.
+//  3. Operate on XED objects.
+//
+// See example_test.go for complete examples.
+//
+// It is required to build Intel XED before attempting to use
+// its datafiles, as this package expects "all" versions that
+// are a concatenated final versions of datafiles.
+// If "$XED/obj/dgen/" does not contain relevant files,
+// then either this documentation is stale or your XED is not built.
+//
+// To see examples of "XED objects" see "testdata/xed_objects.txt".
+//
+// Intel XED https://github.com/intelxed/xed provides all documentation
+// that can be required to understand datafiles.
+// The "$XED/misc/engineering-notes.txt" is particularly useful.
+// For convenience, the most important notes are spread across package comments.
+//
+// Tested with XED 088c48a2efa447872945168272bcd7005a7ddd91.
+package xeddata
@@ -0,0 +1,180 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package xeddata_test
+
+import (
+	"fmt"
+	"log"
+	"strings"
+
+	"golang.org/x/arch/x86/xeddata"
+)
+
+// The "testdata/xedpath" directory contains XED metadata files
+// that are supposed to be used for Database initialization.
+
+// Note that XED objects in this file are not real,
+// instructions they describe are fictional.
+
+// This example shows how to print raw XED objects using Reader.
+// Objects are called "raw" because some of their fields may
+// require additional transformations like macro (states) expansion.
+func ExampleReader() {
+	const xedPath = "testdata/xedpath"
+
+	input := strings.NewReader(`
+{
+ICLASS: VEXADD
+EXCEPTIONS: avx-type-zero
+CPL: 2000
+CATEGORY: AVX-Q
+EXTENSION: AVX-Q
+ATTRIBUTES: A B C
+PATTERN: VV1 0x07 VL128 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()
+OPERANDS: REG0=XMM_R():w:width_dq:fword64 REG1=XMM_N():r:width_dq:fword64 MEM0:r:width_dq:fword64
+}
+
+{
+ICLASS: COND_MOV_Z
+CPL: 210
+CATEGORY: MOV_IF_COND_MET
+EXTENSION: BASE
+ISA_SET: COND_MOV
+FLAGS: READONLY [ zf-tst ]
+
+PATTERN: 0x0F 0x4F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()
+OPERANDS: REG0=GPRv_R():cw MEM0:r:width_v
+PATTERN: 0x0F 0x4F MOD[0b11] MOD=3 REG[rrr] RM[nnn]
+OPERANDS: REG0=GPRv_R():cw REG1=GPRv_B():r
+}`)
+
+	objects, err := xeddata.NewReader(input).ReadAll()
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	for _, o := range objects {
+		fmt.Printf("%s (%s):\n", o.Opcode(), o.Extension)
+		for _, inst := range o.Insts {
+			fmt.Printf("\t[%d] %s\n", inst.Index, inst.Operands)
+		}
+	}
+
+	//Output:
+	// VEXADD (AVX-Q):
+	// 	[0] REG0=XMM_R():w:width_dq:fword64 REG1=XMM_N():r:width_dq:fword64 MEM0:r:width_dq:fword64
+	// COND_MOV_Z (BASE):
+	// 	[0] REG0=GPRv_R():cw MEM0:r:width_v
+	// 	[1] REG0=GPRv_R():cw REG1=GPRv_B():r
+}
+
+// This example shows how to use ExpandStates and its effects.
+func ExampleExpandStates() {
+	const xedPath = "testdata/xedpath"
+
+	input := strings.NewReader(`
+{
+ICLASS: VEXADD
+CPL: 3
+CATEGORY: ?
+EXTENSION: ?
+ATTRIBUTES: AT_A AT_B
+
+PATTERN: _M_VV_TRUE 0x58  _M_VEX_P_66 _M_VLEN_128 _M_MAP_0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()
+OPERANDS: REG0=XMM_R():w:width_dq:fword64 REG1=XMM_N():r:width_dq:fword64 MEM0:r:width_dq:fword64
+
+PATTERN: _M_VV_TRUE 0x58  _M_VEX_P_66 _M_VLEN_128 _M_MAP_0F MOD[0b11] MOD=3 REG[rrr] RM[nnn]
+OPERANDS: REG0=XMM_R():w:width_dq:fword64 REG1=XMM_N():r:width_dq:fword64 REG2=XMM_B():r:width_dq:fword64
+
+PATTERN: _M_VV_TRUE 0x58  _M_VEX_P_66 _M_VLEN_256 _M_MAP_0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()
+OPERANDS: REG0=YMM_R():w:qq:fword64 REG1=YMM_N():r:qq:fword64 MEM0:r:qq:fword64
+
+PATTERN: _M_VV_TRUE 0x58  _M_VEX_P_66 _M_VLEN_256 _M_MAP_0F MOD[0b11] MOD=3 REG[rrr] RM[nnn]
+OPERANDS: REG0=YMM_R():w:qq:fword64 REG1=YMM_N():r:qq:fword64 REG2=YMM_B():r:qq:fword64
+}`)
+
+	objects, err := xeddata.NewReader(input).ReadAll()
+	if err != nil {
+		log.Fatal(err)
+	}
+	db, err := xeddata.NewDatabase(xedPath)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	for _, o := range objects {
+		for _, inst := range o.Insts {
+			fmt.Printf("old: %q\n", inst.Pattern)
+			fmt.Printf("new: %q\n", xeddata.ExpandStates(db, inst.Pattern))
+		}
+	}
+
+	//Output:
+	// old: "_M_VV_TRUE 0x58  _M_VEX_P_66 _M_VLEN_128 _M_MAP_0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()"
+	// new: "VEXVALID=1 0x58 VEX_PREFIX=1 VL=0 MAP=1 MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()"
+	// old: "_M_VV_TRUE 0x58  _M_VEX_P_66 _M_VLEN_128 _M_MAP_0F MOD[0b11] MOD=3 REG[rrr] RM[nnn]"
+	// new: "VEXVALID=1 0x58 VEX_PREFIX=1 VL=0 MAP=1 MOD[0b11] MOD=3 REG[rrr] RM[nnn]"
+	// old: "_M_VV_TRUE 0x58  _M_VEX_P_66 _M_VLEN_256 _M_MAP_0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()"
+	// new: "VEXVALID=1 0x58 VEX_PREFIX=1 VL=1 MAP=1 MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()"
+	// old: "_M_VV_TRUE 0x58  _M_VEX_P_66 _M_VLEN_256 _M_MAP_0F MOD[0b11] MOD=3 REG[rrr] RM[nnn]"
+	// new: "VEXVALID=1 0x58 VEX_PREFIX=1 VL=1 MAP=1 MOD[0b11] MOD=3 REG[rrr] RM[nnn]"
+}
+
+// This example shows how to handle Inst "OPERANDS" field.
+func ExampleOperand() {
+	const xedPath = "testdata/xedpath"
+
+	input := strings.NewReader(`
+{
+ICLASS: ADD_N_TIMES # Like IMUL
+CPL: 3
+CATEGORY: BINARY
+EXTENSION: BASE
+ISA_SET: I86
+FLAGS: MUST [ of-mod sf-u zf-u af-u pf-u cf-mod ]
+
+PATTERN: 0xAA MOD[mm] MOD!=3 REG[0b101] RM[nnn] MODRM()
+OPERANDS: MEM0:r:width_v REG0=AX:rw:SUPP REG1=DX:w:SUPP
+}`)
+
+	objects, err := xeddata.NewReader(input).ReadAll()
+	if err != nil {
+		log.Fatal(err)
+	}
+	db, err := xeddata.NewDatabase(xedPath)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	inst := objects[0].Insts[0] // Single instruction is enough for this example
+	for i, rawOperand := range strings.Fields(inst.Operands) {
+		operand, err := xeddata.NewOperand(db, rawOperand)
+		if err != nil {
+			log.Fatalf("parse operand #%d: %+v", i, err)
+		}
+
+		visibility := "implicit"
+		if operand.IsVisible() {
+			visibility = "explicit"
+		}
+		fmt.Printf("(%s) %s:\n", visibility, rawOperand)
+
+		fmt.Printf("\tname: %q\n", operand.Name)
+		if operand.IsVisible() {
+			fmt.Printf("\t32/64bit width: %s/%s bytes\n",
+				db.WidthSize(operand.Width, xeddata.OpSize32),
+				db.WidthSize(operand.Width, xeddata.OpSize64))
+		}
+	}
+
+	//Output:
+	// (explicit) MEM0:r:width_v:
+	// 	name: "MEM0"
+	// 	32/64bit width: 4/8 bytes
+	// (implicit) REG0=AX:rw:SUPP:
+	// 	name: "REG0=AX"
+	// (implicit) REG1=DX:w:SUPP:
+	// 	name: "REG1=DX"
+}
@@ -0,0 +1,261 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package xeddata
+
+import (
+	"encoding/json"
+	"strings"
+)
+
+// An Object is a single "dec/enc-instruction" XED object from datafiles.
+//
+// Field names and their comments are borrowed from Intel XED
+// engineering notes (see "$XED/misc/engineering-notes.txt").
+//
+// Field values are always trimmed (i.e. no leading/trailing whitespace).
+//
+// Missing optional members are expressed with an empty string.
+//
+// Object contains multiple Inst elements that represent concrete
+// instruction with encoding pattern and operands description.
+type Object struct {
+	// Iclass is instruction class name (opcode).
+	// Iclass alone is not enough to uniquely identify machine instructions.
+	// Example: "PSRLW".
+	Iclass string
+
+	// Disasm is substituted name when a simple conversion
+	// from iclass is inappropriate.
+	// Never combined with DisasmIntel or DisasmATTSV.
+	// Example: "syscall".
+	//
+	// Optional.
+	Disasm string
+
+	// DisasmIntel is like Disasm, but with Intel syntax.
+	// If present, usually comes with DisasmATTSV.
+	// Example: "jmp far".
+	//
+	// Optional.
+	DisasmIntel string
+
+	// DisasmATTSV is like Disasm, but with AT&T/SysV syntax.
+	// If present, usually comes with DisasmIntel.
+	// Example: "ljmp".
+	//
+	// Optional.
+	DisasmATTSV string
+
+	// Attributes describes name set for bits in the binary attributes field.
+	// Example: "NOP X87_CONTROL NOTSX".
+	//
+	// Optional. If not present, zero attribute set is implied.
+	Attributes string
+
+	// Uname is unique name used for deleting / replacing instructions.
+	//
+	// Optional. Provided for completeness, mostly useful for XED internal usage.
+	Uname string
+
+	// CPL is instruction current privilege level restriction.
+	// Can have value of "0" or "3".
+	CPL string
+
+	// Category is an ad-hoc categorization of instructions.
+	// Example: "SEMAPHORE".
+	Category string
+
+	// Extension is an ad-hoc grouping of instructions.
+	// If no ISASet is specified, this is used instead.
+	// Example: "3DNOW"
+	Extension string
+
+	// Exceptions is an exception set name.
+	// Example: "SSE_TYPE_7".
+	//
+	// Optional. Empty exception category generally means that
+	// instruction generates no exceptions.
+	Exceptions string
+
+	// ISASet is a name for the group of instructions that
+	// introduced this feature.
+	// Example: "I286PROTECTED".
+	//
+	// Older objects only defined Extension field.
+	// Newer objects may contain both Extension and ISASet fields.
+	// For some objects Extension==ISASet.
+	// Both fields are required to do precise CPUID-like decisions.
+	//
+	// Optional.
+	ISASet string
+
+	// Flags describes read/written flag bit values.
+	// Example: "MUST [ of-u sf-u af-u pf-u cf-mod ]".
+	//
+	// Optional. If not present, no flags are neither read nor written.
+	Flags string
+
+	// A hopefully useful comment.
+	//
+	// Optional.
+	Comment string
+
+	// The object revision.
+	//
+	// Optional.
+	Version string
+
+	// RealOpcode marks unstable (not in SDM yet) instructions with "N".
+	// Normally, always "Y" or not present at all.
+	//
+	// Optional.
+	RealOpcode string
+
+	// Insts are concrete instruction templates that are derived from containing Object.
+	// Inst contains fields PATTERN, OPERANDS, IFORM in enc/dec instruction.
+	Insts []*Inst
+}
+
+// Inst represents a single instruction template.
+//
+// Some templates contain expandable (macro) pattern and operands
+// which tells that there are more than one real instructions
+// that are expressed by the template.
+type Inst struct {
+	// Object that contains properties that are shared with multiple
+	// Inst objects.
+	*Object
+
+	// Index is the position inside XED object.
+	// Object.Insts[Index] returns this inst.
+	Index int
+
+	// Pattern is the sequence of bits and nonterminals used to
+	// decode/encode an instruction.
+	// Example: "0x0F 0x28 no_refining_prefix MOD[0b11] MOD=3 REG[rrr] RM[nnn]".
+	Pattern string
+
+	// Operands are instruction arguments, typicall registers,
+	// memory operands and pseudo-resources. Separated by space.
+	// Example: "MEM0:rcw:b REG0=GPR8_R():r REG1=XED_REG_AL:rcw:SUPP".
+	Operands string
+
+	// Iform is a name for the pattern that starts with the
+	// iclass and bakes in the operands. If omitted, XED
+	// tries to generate one. We often add custom suffixes
+	// to these to disambiguate certain combinations.
+	// Example: "MOVAPS_XMMps_XMMps_0F28".
+	//
+	// Optional.
+	Iform string
+}
+
+// Opcode returns instruction name or empty string,
+// if appropriate Object fields are not initialized.
+func (o *Object) Opcode() string {
+	switch {
+	case o.Iclass != "":
+		return o.Iclass
+	case o.Disasm != "":
+		return o.Disasm
+	case o.DisasmIntel != "":
+		return o.DisasmIntel
+	case o.DisasmATTSV != "":
+		return o.DisasmATTSV
+	case o.Uname != "":
+		return o.Uname
+	}
+	return ""
+}
+
+// HasAttribute checks that o has attribute with specified name.
+// Note that check is done at "word" level, substring names will not match.
+func (o *Object) HasAttribute(name string) bool {
+	return containsWord(o.Attributes, name)
+}
+
+// String returns pretty-printed inst representation.
+//
+// Outputs valid JSON string. This property is
+// not guaranteed to be preserved.
+func (inst *Inst) String() string {
+	// Do not use direct inst marshalling to achieve
+	// flat object printed representation.
+	// Map is avoided to ensure consistent props order.
+	type flatObject struct {
+		Iclass      string
+		Disasm      string `json:",omitempty"`
+		DisasmIntel string `json:",omitempty"`
+		DisasmATTSV string `json:",omitempty"`
+		Attributes  string `json:",omitempty"`
+		Uname       string `json:",omitempty"`
+		CPL         string
+		Category    string
+		Extension   string
+		Exceptions  string `json:",omitempty"`
+		ISASet      string `json:",omitempty"`
+		Flags       string `json:",omitempty"`
+		Comment     string `json:",omitempty"`
+		Version     string `json:",omitempty"`
+		RealOpcode  string `json:",omitempty"`
+		Pattern     string
+		Operands    string
+		Iform       string `json:",omitempty"`
+	}
+
+	flat := flatObject{
+		Iclass:      inst.Iclass,
+		Disasm:      inst.Disasm,
+		DisasmIntel: inst.DisasmIntel,
+		DisasmATTSV: inst.DisasmATTSV,
+		Attributes:  inst.Attributes,
+		Uname:       inst.Uname,
+		CPL:         inst.CPL,
+		Category:    inst.Category,
+		Extension:   inst.Extension,
+		Exceptions:  inst.Exceptions,
+		ISASet:      inst.ISASet,
+		Flags:       inst.Flags,
+		Comment:     inst.Comment,
+		Version:     inst.Version,
+		RealOpcode:  inst.RealOpcode,
+		Pattern:     inst.Pattern,
+		Operands:    inst.Operands,
+		Iform:       inst.Iform,
+	}
+
+	b, err := json.MarshalIndent(flat, "", "  ")
+	if err != nil {
+		panic(err)
+	}
+	return string(b)
+}
+
+// ExpandStates returns a copy of s where all state macros
+// are expanded.
+// This requires db "states" to be loaded.
+func ExpandStates(db *Database, s string) string {
+	substs := db.states
+	parts := strings.Fields(s)
+	for i := range parts {
+		if repl := substs[parts[i]]; repl != "" {
+			parts[i] = repl
+		}
+	}
+	return strings.Join(parts, " ")
+}
+
+// containsWord searches for whole word match in s.
+func containsWord(s, word string) bool {
+	i := strings.Index(s, word)
+	if i == -1 {
+		return false
+	}
+	leftOK := i == 0 ||
+		(s[i-1] == ' ')
+	rigthOK := i+len(word) == len(s) ||
+		(s[i+len(word)] == ' ')
+	return leftOK && rigthOK
+}
@@ -0,0 +1,158 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package xeddata
+
+import (
+	"errors"
+	"strings"
+)
+
+// OperandVisibility describes operand visibility in XED terms.
+type OperandVisibility int
+
+const (
+	// VisExplicit is a default operand visibility.
+	// Explicit operand is "real" kind of operands that
+	// is shown in syntax and can be specified by the programmer.
+	VisExplicit OperandVisibility = iota
+
+	// VisImplicit is for fixed arg (like EAX); usually shown in syntax.
+	VisImplicit
+
+	// VisSuppressed is like VisImplicit, but not shown in syntax.
+	// In some very rare exceptions, they are also shown in syntax string.
+	VisSuppressed
+
+	// VisEcond is encoder-only conditions. Can be ignored.
+	VisEcond
+)
+
+// Operand holds data that is encoded inside
+// instruction's "OPERANDS" field.
+//
+// Use NewOperand function to decode operand fields into Operand object.
+type Operand struct {
+	// Name is an ID with optional nonterminal name part.
+	//
+	// Possible values: "REG0=GPRv_B", "REG1", "MEM0", ...
+	//
+	// If nonterminal part is present, name
+	// can be split into LHS and RHS with NonTerminalName method.
+	Name string
+
+	// Action describes argument types.
+	//
+	// Possible values: "r", "w", "rw", "cr", "cw", "crw".
+	// Optional "c" prefix represents conditional access.
+	Action string
+
+	// Width descriptor. It can express simple width like "w" (word, 16bit)
+	// or meta-width like "v", which corresponds to {16, 32, 64} bits.
+	//
+	// Possible values: "", "q", "ds", "dq", ...
+	// Optional.
+	Width string
+
+	// Xtype holds XED-specific type information.
+	//
+	// Possible values: "", "f64", "i32", ...
+	// Optional.
+	Xtype string
+
+	// Attributes serves as container for all other properties.
+	//
+	// Possible values:
+	//   EVEX.b context {
+	//     TXT=ZEROSTR  - zeroing
+	//     TXT=SAESTR   - suppress all exceptions
+	//     TXT=ROUNDC   - rounding
+	//     TXT=BCASTSTR - broadcasting
+	//   }
+	//   MULTISOURCE4 - 4FMA multi-register operand.
+	//
+	// Optional. For most operands, it's nil.
+	Attributes map[string]bool
+
+	// Visibility tells if operand is explicit, implicit or suspended.
+	Visibility OperandVisibility
+}
+
+var xedVisibilities = map[string]OperandVisibility{
+	"EXPL":  VisExplicit,
+	"IMPL":  VisImplicit,
+	"SUPP":  VisSuppressed,
+	"ECOND": VisEcond,
+}
+
+// NewOperand decodes operand string.
+//
+// See "$XED/pysrc/opnds.py" to learn about fields format
+// and valid combinations.
+//
+// Requires database with xtypes and widths info.
+func NewOperand(db *Database, s string) (*Operand, error) {
+	if db.widths == nil {
+		return nil, errors.New("Database.widths is nil")
+	}
+
+	fields := strings.Split(s, ":")
+	switch len(fields) {
+	case 0:
+		return nil, errors.New("empty operand fields string")
+	case 1:
+		return &Operand{Name: fields[0]}, nil
+	}
+	var op Operand
+
+	// First two fields are fixed.
+	op.Name = fields[0]
+	op.Action = fields[1]
+
+	// Optional fields.
+	for _, f := range fields[2:] {
+		if db.widths[f] != nil && op.Width == "" {
+			op.Width = f
+		} else if vis, ok := xedVisibilities[f]; ok {
+			op.Visibility = vis
+		} else if xtype := db.xtypes[f]; xtype != nil {
+			op.Xtype = f
+		} else {
+			if op.Attributes == nil {
+				op.Attributes = make(map[string]bool)
+			}
+			op.Attributes[f] = true
+		}
+	}
+
+	return &op, nil
+}
+
+// NonterminalName returns true if op.Name consist
+// of LHS and RHS parts.
+//
+// RHS is non-terminal name lookup function expression.
+// Example: "REG0=GPRv()" has "GPRv()" name lookup function.
+func (op *Operand) NonterminalName() bool {
+	return strings.Contains(op.Name, "=")
+}
+
+// NameLHS returns left hand side part of the non-terminal name.
+// Example: NameLHS("REG0=GPRv()") => "REG0".
+func (op *Operand) NameLHS() string {
+	return strings.Split(op.Name, "=")[0]
+}
+
+// NameRHS returns right hand side part of the non-terminal name.
+// Example: NameLHS("REG0=GPRv()") => "GPRv()".
+func (op *Operand) NameRHS() string {
+	return strings.Split(op.Name, "=")[1]
+}
+
+// IsVisible returns true for operands that are usually
+// shown in syntax strings.
+func (op *Operand) IsVisible() bool {
+	return op.Visibility == VisExplicit ||
+		op.Visibility == VisImplicit
+}
@@ -0,0 +1,95 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package xeddata
+
+import (
+	"sort"
+	"strings"
+)
+
+// PatternSet wraps instruction PATTERN properties providing set operations on them.
+type PatternSet map[string]bool
+
+// NewPatternSet decodes pattern string into PatternSet.
+func NewPatternSet(pattern string) PatternSet {
+	pset := make(PatternSet)
+	for _, f := range strings.Fields(pattern) {
+		pset[f] = true
+	}
+	return pset
+}
+
+// PatternAliases is extendable map of pattern keys aliases.
+// Maps human-readable key to XED property.
+//
+// Used in PatternSet.Is.
+var PatternAliases = map[string]string{
+	"VEX":     "VEXVALID=1",
+	"EVEX":    "VEXVALID=2",
+	"XOP":     "VEXVALID=3",
+	"MemOnly": "MOD!=3",
+	"RegOnly": "MOD=3",
+}
+
+// String returns pattern printer representation.
+// All properties are sorted.
+func (pset PatternSet) String() string {
+	var keys []string
+	for k := range pset {
+		keys = append(keys, k)
+	}
+	sort.Strings(keys)
+	return strings.Join(keys, " ")
+}
+
+// Is reports whether set contains key k.
+// In contrast with direct pattern set lookup, it does
+// check if PatternAliases[k] is available to be used instead of k in lookup.
+func (pset PatternSet) Is(k string) bool {
+	if alias := PatternAliases[k]; alias != "" {
+		return pset[alias]
+	}
+	return pset[k]
+}
+
+// Replace inserts newKey if oldKey is defined.
+// oldKey is removed if insertion is performed.
+func (pset PatternSet) Replace(oldKey, newKey string) {
+	if pset[oldKey] {
+		pset[newKey] = true
+		delete(pset, oldKey)
+	}
+}
+
+// Index returns index from keys of first matching key.
+// Returns -1 if does not contain any of given keys.
+func (pset PatternSet) Index(keys ...string) int {
+	for i, k := range keys {
+		if pset[k] {
+			return i
+		}
+	}
+	return -1
+}
+
+// Match is like MatchOrDefault("", keyval...).
+func (pset PatternSet) Match(keyval ...string) string {
+	return pset.MatchOrDefault("", keyval...)
+}
+
+// MatchOrDefault returns first matching key associated value.
+// Returns defaultValue if no match is found.
+//
+// Keyval structure can be described as {"k1", "v1", ..., "kN", "vN"}.
+func (pset PatternSet) MatchOrDefault(defaultValue string, keyval ...string) string {
+	for i := 0; i < len(keyval); i += 2 {
+		key := keyval[i+0]
+		val := keyval[i+1]
+		if pset[key] {
+			return val
+		}
+	}
+	return defaultValue
+}
@@ -0,0 +1,211 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package xeddata
+
+import (
+	"bufio"
+	"errors"
+	"fmt"
+	"io"
+	"regexp"
+	"strings"
+)
+
+// Reader reads enc/dec-instruction objects from XED datafile.
+type Reader struct {
+	scanner *bufio.Scanner
+
+	lines []string // Re-used between Read calls
+
+	// True if last line ends with newline escape (backslash).
+	joinLines bool
+}
+
+// NewReader returns a new Reader that reads from r.
+func NewReader(r io.Reader) *Reader {
+	return newReader(bufio.NewScanner(r))
+}
+
+func newReader(scanner *bufio.Scanner) *Reader {
+	r := &Reader{
+		lines:   make([]string, 0, 64),
+		scanner: scanner,
+	}
+	scanner.Split(r.split)
+	return r
+}
+
+// split implements bufio.SplitFunc for Reader.
+func (r *Reader) split(data []byte, atEOF bool) (int, []byte, error) {
+	// Wrapping bufio.ScanLines to handle \-style newline escapes.
+	// joinLines flag affects Reader.scanLine behavior.
+	advance, tok, err := bufio.ScanLines(data, atEOF)
+	if err == nil && len(tok) >= 1 {
+		r.joinLines = tok[len(tok)-1] == '\\'
+	}
+	return advance, tok, err
+}
+
+// Read reads single XED instruction object from
+// the stream backed by reader.
+//
+// If there is no data left to be read,
+// returned error is io.EOF.
+func (r *Reader) Read() (*Object, error) {
+	for line := r.scanLine(); line != ""; line = r.scanLine() {
+		if line[0] != '{' {
+			continue
+		}
+		lines := r.lines[:0] // Object lines
+		for line := r.scanLine(); line != ""; line = r.scanLine() {
+			if line[0] == '}' {
+				return r.parseLines(lines)
+			}
+			lines = append(lines, line)
+		}
+		return nil, errors.New("no matching '}' found")
+	}
+
+	return nil, io.EOF
+}
+
+// ReadAll reads all the remaining objects from r.
+// A successful call returns err == nil, not err == io.EOF,
+// just like csv.Reader.ReadAll().
+func (r *Reader) ReadAll() ([]*Object, error) {
+	objects := []*Object{}
+	for {
+		o, err := r.Read()
+		if err == io.EOF {
+			return objects, nil
+		}
+		if err != nil {
+			return objects, err
+		}
+		objects = append(objects, o)
+	}
+}
+
+// instLineRE matches valid XED object/inst line.
+// It expects lines that are joined by '\' to be concatenated.
+//
+// The format can be described as:
+//
+//	unquoted field name "[A-Z_]+" (captured)
+//	field value delimiter ":"
+//	field value string (captured)
+//	optional trailing comment that is ignored "[^#]*"
+var instLineRE = regexp.MustCompile(`^([A-Z_]+)\s*:\s*([^#]*)`)
+
+// parseLines turns collected object lines into Object.
+func (r *Reader) parseLines(lines []string) (*Object, error) {
+	o := &Object{}
+
+	// Repeatable tokens.
+	// We can not assign them eagerly, because these fields
+	// are not guaranteed to follow strict order.
+	var (
+		operands []string
+		iforms   []string
+		patterns []string
+	)
+
+	for _, l := range lines {
+		if l[0] == '#' { // Skip comment lines.
+			continue
+		}
+		m := instLineRE.FindStringSubmatch(l)
+		if len(m) == 0 {
+			return nil, fmt.Errorf("malformed line: %s", l)
+		}
+		key, val := m[1], m[2]
+		val = strings.TrimSpace(val)
+
+		switch key {
+		case "ICLASS":
+			o.Iclass = val
+		case "DISASM":
+			o.Disasm = val
+		case "DISASM_INTEL":
+			o.DisasmIntel = val
+		case "DISASM_ATTSV":
+			o.DisasmATTSV = val
+		case "ATTRIBUTES":
+			o.Attributes = val
+		case "UNAME":
+			o.Uname = val
+		case "CPL":
+			o.CPL = val
+		case "CATEGORY":
+			o.Category = val
+		case "EXTENSION":
+			o.Extension = val
+		case "EXCEPTIONS":
+			o.Exceptions = val
+		case "ISA_SET":
+			o.ISASet = val
+		case "FLAGS":
+			o.Flags = val
+		case "COMMENT":
+			o.Comment = val
+		case "VERSION":
+			o.Version = val
+		case "REAL_OPCODE":
+			o.RealOpcode = val
+
+		case "OPERANDS":
+			operands = append(operands, val)
+		case "PATTERN":
+			patterns = append(patterns, val)
+		case "IFORM":
+			iforms = append(iforms, val)
+
+		default:
+			// Being strict about unknown field names gives a nice
+			// XED file validation diagnostics.
+			// Also defends against typos in test files.
+			return nil, fmt.Errorf("unknown key token: %s", key)
+		}
+	}
+
+	if len(operands) != len(patterns) {
+		return nil, fmt.Errorf("%s: OPERANDS and PATTERN lines mismatch", o.Opcode())
+	}
+
+	insts := make([]*Inst, len(operands))
+	for i := range operands {
+		insts[i] = &Inst{
+			Object:   o,
+			Index:    i,
+			Pattern:  patterns[i],
+			Operands: operands[i],
+		}
+		// There can be less IFORMs than insts.
+		if i < len(iforms) {
+			insts[i].Iform = iforms[i]
+		}
+	}
+	o.Insts = insts
+
+	return o, nil
+}
+
+// scanLine tries to fetch non-empty line from scanner.
+//
+// Returns empty line when scanner.Scan() returns false
+// before non-empty line is found.
+func (r *Reader) scanLine() string {
+	for r.scanner.Scan() {
+		line := r.scanner.Text()
+		if line == "" {
+			continue
+		}
+		if r.joinLines {
+			return line[:len(line)-len("\\")] + r.scanLine()
+		}
+		return line
+	}
+	return ""
+}
@@ -0,0 +1,289 @@
+------ empty input
+====
+[]
+
+------ only newlines
+
+
+
+====
+[]
+
+------ only comments and newlines
+# {
+# ICLASS : ADD
+# }
+====
+[]
+
+------ join lines
+{
+ICLASS : i\
+ cla\
+  ss1
+VERSION : 1.\
+0
+FLAGS:\
+\
+\
+NOP
+
+REAL_OPCODE    : \Y
+CPL            : \3
+PATTERN: A B
+OPERANDS:
+}
+====
+[{
+  "Iclass": "i cla  ss1",
+  "Version": "1.0",
+  "Flags": "NOP",
+  "RealOpcode": "\\Y",
+  "CPL": "\\3",
+  "Pattern": "A B"
+}]
+
+------ 1 variant; no iform
+{
+ICLASS:iclass1 # comment
+DISASM : disasm1
+
+PATTERN :pat1 pat1
+OPERANDS :  ops1  ops1
+}
+# comment
+
+{ # comment
+# comment
+ICLASS  : iclass2
+OPERANDS:ops2
+PATTERN:pat2 # comment
+}
+====
+[{
+  "Iclass": "iclass1",
+  "Disasm": "disasm1",
+  "Pattern": "pat1 pat1",
+  "Operands": "ops1  ops1"
+}, {
+  "Iclass": "iclass2",
+  "Operands": "ops2",
+  "Pattern": "pat2"
+}]
+
+------ 2 variants; no iform
+{
+PATTERN    : pat1_1
+COMMENT    : comment1
+OPERANDS   : ops1_1
+OPERANDS   : ops1_2
+PATTERN    : pat1_2
+}
+{
+PATTERN    : pat2_1
+PATTERN    : pat2_2
+OPERANDS   : ops2_1
+OPERANDS   : ops2_2
+}
+====
+[{
+  "Comment": "comment1",
+  "Pattern": "pat1_1",
+  "Operands": "ops1_1"
+}, {
+  "Comment": "comment1",
+  "Pattern": "pat1_2",
+  "Operands": "ops1_2"
+}, {
+  "Pattern": "pat2_1",
+  "Operands": "ops2_1"
+}, {
+  "Pattern": "pat2_2",
+  "Operands": "ops2_2"
+}]
+
+------ 3 variants
+{
+
+PATTERN  : pat1_1
+OPERANDS : ops1_1
+IFORM    : iform1_1
+
+PATTERN  : pat1_2# comment
+OPERANDS : ops1_2# comment
+IFORM    : iform1_2# comment
+
+# comment
+PATTERN  : pat1_3
+OPERANDS : ops1_3
+IFORM    : iform1_3
+
+}
+
+{
+PATTERN  : pat2_1
+OPERANDS : ops2_1
+IFORM    : iform2_1
+PATTERN  : pat2_2
+OPERANDS : ops2_2
+PATTERN  : pat2_3
+OPERANDS : ops2_3
+}
+====
+[{
+  "Iform": "iform1_1",
+  "Pattern": "pat1_1",
+  "Operands": "ops1_1"
+}, {
+  "Iform": "iform1_2",
+  "Pattern": "pat1_2",
+  "Operands": "ops1_2"
+}, {
+  "Iform": "iform1_3",
+  "Pattern": "pat1_3",
+  "Operands": "ops1_3"
+}, {
+  "Iform": "iform2_1",
+  "Pattern": "pat2_1",
+  "Operands": "ops2_1"
+}, {
+  "Pattern": "pat2_2",
+  "Operands": "ops2_2"
+}, {
+  "Pattern": "pat2_3",
+  "Operands": "ops2_3"
+}]
+
+------ stable and unstable instructions (REAL_OPCODE)
+{
+ICLASS: STABLE
+REAL_OPCODE: Y
+PATTERN : x y z
+OPERANDS :
+}
+{
+ICLASS: UNSTABLE
+REAL_OPCODE:   N
+PATTERN : x y z
+OPERANDS :
+}
+====
+[{
+  "Iclass": "STABLE",
+  "RealOpcode": "Y",
+  "Pattern": "x y z",
+  "Operands": ""
+}, {
+  "Iclass": "UNSTABLE",
+  "RealOpcode": "N",
+  "Pattern": "x y z",
+  "Operands": ""
+}]
+
+------ AVXAES objects
+# Emitting VAESENCLAST
+{
+ICLASS    : VAESENCLAST
+EXCEPTIONS: avx-type-4
+CPL       : 3
+CATEGORY  : AES
+EXTENSION : AVXAES
+PATTERN : VV1 0xDD V66 V0F38 MOD[0b11] MOD=3  REG[rrr] RM[nnn] VL128
+OPERANDS  : REG0=XMM_R():w:dq REG1=XMM_N():r:dq  REG2=XMM_B():r:dq
+PATTERN : VV1 0xDD  V66 V0F38 MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL128
+OPERANDS  : REG0=XMM_R():w:dq REG1=XMM_N():r:dq  MEM0:r:dq
+}
+# Emitting VAESDEC
+{
+ICLASS    : VAESDEC
+EXCEPTIONS: avx-type-4
+CPL       : 3
+CATEGORY  : AES
+EXTENSION : AVXAES
+PATTERN : VV1 0xDE V66 V0F38 MOD[0b11] MOD=3  REG[rrr] RM[nnn] VL128
+OPERANDS  : REG0=XMM_R():w:dq REG1=XMM_N():r:dq  REG2=XMM_B():r:dq
+PATTERN : VV1 0xDE V66 V0F38 MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL128
+OPERANDS  : REG0=XMM_R():w:dq REG1=XMM_N():r:dq  MEM0:r:dq
+}
+====
+[{
+  "Iclass": "VAESENCLAST",
+  "Exceptions": "avx-type-4",
+  "CPL": "3",
+  "Category": "AES",
+  "Extension": "AVXAES",
+  "Pattern": "VV1 0xDD V66 V0F38 MOD[0b11] MOD=3  REG[rrr] RM[nnn] VL128",
+  "Operands": "REG0=XMM_R():w:dq REG1=XMM_N():r:dq  REG2=XMM_B():r:dq"
+}, {
+  "Iclass": "VAESENCLAST",
+  "Exceptions": "avx-type-4",
+  "CPL": "3",
+  "Category": "AES",
+  "Extension": "AVXAES",
+  "Pattern": "VV1 0xDD  V66 V0F38 MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL128",
+  "Operands": "REG0=XMM_R():w:dq REG1=XMM_N():r:dq  MEM0:r:dq"
+}, {
+  "Iclass": "VAESDEC",
+  "Exceptions": "avx-type-4",
+  "CPL": "3",
+  "Category": "AES",
+  "Extension": "AVXAES",
+  "Pattern": "VV1 0xDE V66 V0F38 MOD[0b11] MOD=3  REG[rrr] RM[nnn] VL128",
+  "Operands": "REG0=XMM_R():w:dq REG1=XMM_N():r:dq  REG2=XMM_B():r:dq"
+}, {
+  "Iclass": "VAESDEC",
+  "Exceptions": "avx-type-4",
+  "CPL": "3",
+  "Category": "AES",
+  "Extension": "AVXAES",
+  "Pattern": "VV1 0xDE V66 V0F38 MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL128",
+  "Operands": "REG0=XMM_R():w:dq REG1=XMM_N():r:dq  MEM0:r:dq"
+}]
+
+------ Two-word disasm
+{
+ICLASS    : JMP_FAR
+DISASM_INTEL: jmp far
+DISASM_ATTSV: ljmp
+CPL       : 3
+CATEGORY  : UNCOND_BR
+ATTRIBUTES : FAR_XFER NOTSX
+EXTENSION : BASE
+ISA_SET   : I86
+PATTERN   : 0xEA not64 BRDISPz() UIMM16()
+OPERANDS  : PTR:r:p IMM0:r:w REG0=XED_REG_EIP:w:SUPP
+}
+====
+[{
+  "Iclass": "JMP_FAR",
+  "DisasmIntel": "jmp far",
+  "DisasmATTSV": "ljmp",
+  "CPL": "3",
+  "Attributes": "FAR_XFER NOTSX",
+  "Extension": "BASE",
+  "ISASet": "I86",
+  "Pattern": "0xEA not64 BRDISPz() UIMM16()",
+  "Operands": "PTR:r:p IMM0:r:w REG0=XED_REG_EIP:w:SUPP"
+}]
+
+------ INVALID key token
+{
+FOO : 111
+}
+====
+unknown key token: FOO
+
+------ INVALID unterminated object
+{
+====
+no matching '}' found
+
+------ INVALID pat+ops
+{
+ICLASS: foobar
+PATTERN : 1
+PATTERN : 2
+OPERANDS : 3
+}
+====
+foobar: OPERANDS and PATTERN lines mismatch
@@ -0,0 +1,5 @@
+# Copyright 2018 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+fword64 T_DOUBLE 64
@@ -0,0 +1,17 @@
+# Copyright 2018 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+_M_VV_TRUE VEXVALID=1
+_M_VV_FALSE VEXVALID=0
+
+_M_VEX_P_66 VEX_PREFIX=1
+_M_VEX_P_F2 VEX_PREFIX=2
+_M_VEX_P_F3 VEX_PREFIX=3
+
+_M_VLEN_128 VL=0
+_M_VLEN_256 VL=1
+
+_M_MAP_0F    MAP=1
+_M_MAP_0F38  MAP=2
+_M_MAP_0F3A  MAP=3
@@ -0,0 +1,8 @@
+# Copyright 2018 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+width_dq i32 16
+width_qq i32 32
+width_v int 2 4 8
+width_f64 f64 8
@@ -0,0 +1,32 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package xeddata
+
+import (
+	"io"
+	"os"
+	"path/filepath"
+)
+
+// WalkInsts calls visit function for each XED instruction found at $xedPath/all-dec-instructions.txt.
+func WalkInsts(xedPath string, visit func(*Inst)) error {
+	f, err := os.Open(filepath.Join(xedPath, "all-dec-instructions.txt"))
+	if err != nil {
+		return err
+	}
+	r := NewReader(f)
+	for {
+		o, err := r.Read()
+		if err == io.EOF {
+			return nil
+		}
+		if err != nil {
+			return err
+		}
+		for _, inst := range o.Insts {
+			visit(inst)
+		}
+	}
+}
@@ -0,0 +1,488 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package xeddata
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"path"
+	"reflect"
+	"strings"
+	"testing"
+)
+
+// Small database to generate state/xtype/width input files and validate parse results.
+//
+// Tests should use only those symbols that are defined inside test maps.
+// For example, if {"foo"=>"bar"} element is not in statesMap, tests
+// can't expect that "foo" get's replaced by "bar".
+var (
+	statesMap = map[string]string{
+		"not64":         "MODE!=2",
+		"mode64":        "MODE=2",
+		"mode32":        "MODE=1",
+		"mode16":        "MODE=0",
+		"rexw_prefix":   "REXW=1 SKIP_OSZ=1",
+		"norexw_prefix": "REXW=0 SKIP_OSZ=1",
+		"W1":            "REXW=1 SKIP_OSZ=1",
+		"W0":            "REXW=0 SKIP_OSZ=1",
+		"VV1":           "VEXVALID=1",
+		"V66":           "VEX_PREFIX=1",
+		"VF2":           "VEX_PREFIX=2",
+		"VF3":           "VEX_PREFIX=3",
+		"V0F":           "MAP=1",
+		"V0F38":         "MAP=2",
+		"V0F3A":         "MAP=3",
+		"VL128":         "VL=0",
+		"VL256":         "VL=1",
+	}
+
+	xtypesMap = map[string]*xtype{
+		"int": {name: "int", baseType: "INT", size: "0"},
+		"i8":  {name: "i8", baseType: "INT", size: "8"},
+		"i64": {name: "i64", baseType: "INT", size: "64"},
+		"i32": {name: "i32", baseType: "INT", size: "32"},
+		"u8":  {name: "u8", baseType: "UINT", size: "8"},
+		"f32": {name: "f32", baseType: "SIGNLE", size: "32"},
+		"f64": {name: "f64", baseType: "DOUBLE", size: "64"},
+		"var": {name: "var", baseType: "VARIABLE", size: "0"},
+	}
+
+	widthsMap = map[string]*width{
+		"q":         {xtype: "i64", sizes: [3]string{"8", "8", "8"}},
+		"z":         {xtype: "int", sizes: [3]string{"2", "4", "4"}},
+		"b":         {xtype: "u8", sizes: [3]string{"1", "1", "1"}},
+		"d":         {xtype: "i32", sizes: [3]string{"4", "4", "4"}},
+		"ps":        {xtype: "f32", sizes: [3]string{"16", "16", "16"}},
+		"dq":        {xtype: "i32", sizes: [3]string{"16", "16", "16"}},
+		"i32":       {xtype: "i32", sizes: [3]string{"4", "4", "4"}},
+		"i64":       {xtype: "i64", sizes: [3]string{"8", "8", "8"}},
+		"vv":        {xtype: "var", sizes: [3]string{"0", "0", "0"}},
+		"mskw":      {xtype: "i1", sizes: [3]string{"64bits", "64bits", "64bits"}},
+		"zf32":      {xtype: "f32", sizes: [3]string{"512bits", "512bits", "512bits"}},
+		"zf64":      {xtype: "f64", sizes: [3]string{"512bits", "512bits", "512bits"}},
+		"mem80real": {xtype: "f80", sizes: [3]string{"10", "10", "10"}},
+		"mfpxenv":   {xtype: "struct", sizes: [3]string{"512", "512", "512"}},
+	}
+)
+
+// newStatesSource returns a reader that mocks "all-state.txt" file.
+// Input content is generated based on statesMap.
+func newStatesSource() io.Reader {
+	var buf bytes.Buffer
+	i := 0
+	for k, v := range statesMap {
+		buf.WriteString("# Line comment\n")
+		buf.WriteString("#\n\n\n")
+		fmt.Fprintf(&buf, "\t%-20s%s", k, v)
+		if i%3 == 0 {
+			buf.WriteString("\t# Trailing comment")
+		}
+		buf.WriteByte('\n')
+		i++
+	}
+
+	return &buf
+}
+
+// newWidthsSource returns a reader that mocks "all-widths.txt" file.
+// Input content is generated based on widthsMap.
+func newWidthsSource() io.Reader {
+	var buf bytes.Buffer
+	i := 0
+	for name, width := range widthsMap {
+		buf.WriteString("# Line comment\n")
+		buf.WriteString("#\n\n\n")
+		eqSizes := width.sizes[0] == width.sizes[1] &&
+			width.sizes[0] == width.sizes[2]
+		if i%2 == 0 && eqSizes {
+			fmt.Fprintf(&buf, "\t%-16s%-12s%-8s",
+				name, width.xtype, width.sizes[0])
+		} else {
+			fmt.Fprintf(&buf, "\t%-16s%-12s%-8s%-8s%-8s",
+				name, width.xtype,
+				width.sizes[0], width.sizes[1], width.sizes[2])
+		}
+		if i%3 == 0 {
+			buf.WriteString("\t# Trailing comment")
+		}
+		buf.WriteByte('\n')
+		i++
+	}
+
+	return &buf
+}
+
+// newXtypesSource returns a reader that mocks "all-element-types.txt" file.
+// Input content is generated based on xtypesMap.
+func newXtypesSource() io.Reader {
+	var buf bytes.Buffer
+	i := 0
+	for _, v := range xtypesMap {
+		buf.WriteString("# Line comment\n")
+		buf.WriteString("#\n\n\n")
+
+		fmt.Fprintf(&buf, "\t%s %s %s",
+			v.name, v.baseType, v.size)
+
+		if i%3 == 0 {
+			buf.WriteString("\t# Trailing comment")
+		}
+		buf.WriteByte('\n')
+		i++
+	}
+
+	return &buf
+}
+
+func newTestDatabase(t *testing.T) *Database {
+	var db Database
+	err := db.LoadStates(newStatesSource())
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = db.LoadWidths(newWidthsSource())
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = db.LoadXtypes(newXtypesSource())
+	if err != nil {
+		t.Fatal(err)
+	}
+	return &db
+}
+
+func TestContainsWord(t *testing.T) {
+	tests := []struct {
+		attrs    string
+		attrName string
+		output   bool
+	}{
+		{"ATT1", "ATT1", true},
+		{" ATT1", "ATT1", true},
+		{"ATT1 ", "ATT1", true},
+		{" ATT1 ", "ATT1", true},
+		{"ATT1 ATT2 ATT3", "ATT1", true},
+		{"ATT1 ATT2 ATT3", "ATT2", true},
+		{"ATT1 ATT2 ATT3", "ATT2", true},
+		{"ATT1 ATT2 ATT3", "ATT4", false},
+		{"ATT1ATT1", "ATT1", false},
+		{".ATT1", "ATT1", false},
+		{".ATT1.", "ATT1", false},
+		{"ATT1.", "ATT1", false},
+		{"", "ATT1", false},
+		{"AT", "ATT1", false},
+		{"ATT 1", "ATT1", false},
+		{" ATT1 ", "TT", false},
+		{" ATT1 ", "T1", false},
+		{" ATT1 ", "AT", false},
+	}
+
+	for _, test := range tests {
+		output := containsWord(test.attrs, test.attrName)
+		if output != test.output {
+			t.Errorf("containsWord(%q, %q)):\nhave: %v\nwant: %v",
+				test.attrs, test.attrName, output, test.output)
+		}
+	}
+}
+
+func TestParseWidths(t *testing.T) {
+	have, err := parseWidths(newWidthsSource())
+	if err != nil {
+		t.Fatal(err)
+	}
+	for k := range widthsMap {
+		if have[k] == nil {
+			t.Fatalf("missing key %s", k)
+		}
+		if *have[k] != *widthsMap[k] {
+			t.Fatalf("key %s:\nhave: %#v\nwant: %#v",
+				k, have[k], widthsMap[k])
+		}
+	}
+	if !reflect.DeepEqual(have, widthsMap) {
+		t.Errorf("widths output mismatch:\nhave: %#v\nwant: %#v",
+			have, widthsMap)
+	}
+}
+
+func TestParseStates(t *testing.T) {
+	have, err := parseStates(newStatesSource())
+	if err != nil {
+		t.Fatal(err)
+	}
+	want := statesMap
+	if !reflect.DeepEqual(have, want) {
+		t.Errorf("states output mismatch:\nhave: %v\nwant: %v", have, want)
+	}
+}
+
+func TestParseXtypes(t *testing.T) {
+	have, err := parseXtypes(newXtypesSource())
+	if err != nil {
+		t.Fatal(err)
+	}
+	for k := range xtypesMap {
+		if have[k] == nil {
+			t.Fatalf("missing key %s", k)
+		}
+		if *have[k] != *xtypesMap[k] {
+			t.Fatalf("key %s:\nhave: %#v\nwant: %#v",
+				k, have[k], xtypesMap[k])
+		}
+	}
+	if !reflect.DeepEqual(have, xtypesMap) {
+		t.Fatalf("xtype maps are not equal")
+	}
+}
+
+func TestNewOperand(t *testing.T) {
+	tests := []struct {
+		input string
+		op    Operand
+	}{
+		// Simple cases.
+		{
+			"REG0=XMM_R():r",
+			Operand{Name: "REG0=XMM_R()", Action: "r"},
+		},
+		{
+			"REG0=XMM_R:w",
+			Operand{Name: "REG0=XMM_R", Action: "w"},
+		},
+		{
+			"MEM0:rw:q",
+			Operand{Name: "MEM0", Action: "rw", Width: "q"},
+		},
+		{
+			"REG0=XMM_R():rcw:ps:f32",
+			Operand{Name: "REG0=XMM_R()", Action: "rcw", Width: "ps", Xtype: "f32"},
+		},
+		{
+			"IMM0:r:z",
+			Operand{Name: "IMM0", Action: "r", Width: "z"},
+		},
+		{
+			"IMM1:cw:b:i8",
+			Operand{Name: "IMM1", Action: "cw", Width: "b", Xtype: "i8"},
+		},
+
+		// Optional fields and visibility.
+		{
+			"REG2:r:EXPL",
+			Operand{Name: "REG2", Action: "r", Visibility: VisExplicit},
+		},
+		{
+			"MEM1:w:d:IMPL",
+			Operand{Name: "MEM1", Action: "w", Width: "d", Visibility: VisImplicit},
+		},
+		{
+			"MEM1:w:IMPL:d",
+			Operand{Name: "MEM1", Action: "w", Width: "d", Visibility: VisImplicit},
+		},
+		{
+			"MEM1:w:d:SUPP:i32",
+			Operand{Name: "MEM1", Action: "w", Width: "d", Visibility: VisSuppressed, Xtype: "i32"},
+		},
+		{
+			"MEM1:w:SUPP:d:i32",
+			Operand{Name: "MEM1", Action: "w", Width: "d", Visibility: VisSuppressed, Xtype: "i32"},
+		},
+
+		// Ambiguity: xtypes that look like widths.
+		{
+			"REG0=XMM_R():w:dq:i64",
+			Operand{Name: "REG0=XMM_R()", Action: "w", Width: "dq", Xtype: "i64"},
+		},
+
+		// TXT=X field.
+		{
+			"REG1=MASK1():r:mskw:TXT=ZEROSTR",
+			Operand{Name: "REG1=MASK1()", Action: "r", Width: "mskw",
+				Attributes: map[string]bool{"TXT=ZEROSTR": true}},
+		},
+		{
+			"MEM0:r:vv:f64:TXT=BCASTSTR",
+			Operand{Name: "MEM0", Action: "r", Width: "vv", Xtype: "f64",
+				Attributes: map[string]bool{"TXT=BCASTSTR": true}},
+		},
+		{
+			"REG0=ZMM_R3():w:zf32:TXT=SAESTR",
+			Operand{Name: "REG0=ZMM_R3()", Action: "w", Width: "zf32",
+				Attributes: map[string]bool{"TXT=SAESTR": true}},
+		},
+		{
+			"REG0=ZMM_R3():w:zf64:TXT=ROUNDC",
+			Operand{Name: "REG0=ZMM_R3()", Action: "w", Width: "zf64",
+				Attributes: map[string]bool{"TXT=ROUNDC": true}},
+		},
+
+		// Multi-source.
+		{
+			"REG2=ZMM_N3():r:zf32:MULTISOURCE4",
+			Operand{Name: "REG2=ZMM_N3()", Action: "r", Width: "zf32",
+				Attributes: map[string]bool{"MULTISOURCE4": true}},
+		},
+
+		// Multi-source + EVEX.b context.
+		{
+			"REG2=ZMM_N3():r:zf32:MULTISOURCE4:TXT=SAESTR",
+			Operand{Name: "REG2=ZMM_N3()", Action: "r", Width: "zf32",
+				Attributes: map[string]bool{"MULTISOURCE4": true, "TXT=SAESTR": true}},
+		},
+	}
+
+	db := newTestDatabase(t)
+	for _, test := range tests {
+		op, err := NewOperand(db, test.input)
+		if err != nil {
+			t.Fatal(err)
+		}
+		if !reflect.DeepEqual(*op, test.op) {
+			t.Errorf("parse(`%s`): output mismatch\nhave: %#v\nwant: %#v",
+				test.input, op, test.op,
+			)
+		}
+	}
+}
+
+func TestReader(t *testing.T) {
+	type test struct {
+		name   string
+		input  string
+		output string
+	}
+
+	var tests []test
+	{
+		b, err := ioutil.ReadFile(path.Join("testdata", "xed_objects.txt"))
+		if err != nil {
+			t.Fatal(err)
+		}
+		cases := strings.Split(string(b), "------")[1:]
+		for _, c := range cases {
+			name := c[:strings.Index(c, "\n")]
+			parts := strings.Split(c[len(name):], "====")
+
+			tests = append(tests, test{
+				name:   strings.TrimSpace(name),
+				input:  strings.TrimSpace(parts[0]),
+				output: strings.TrimSpace(parts[1]),
+			})
+		}
+	}
+
+	for _, test := range tests {
+		r := NewReader(strings.NewReader(test.input))
+		objects, err := r.ReadAll()
+		if strings.Contains(test.name, "INVALID") {
+			if err == nil {
+				t.Errorf("%s: expected non-nil error", test.name)
+				continue
+			}
+			if err.Error() != test.output {
+				t.Errorf("%s: error mismatch\nhave: `%s`\nwant: `%s`\n",
+					test.name, err.Error(), test.output)
+			}
+			t.Logf("PASS: %s", test.name)
+			continue
+		}
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		var have []map[string]string
+		for _, o := range objects {
+			for _, inst := range o.Insts {
+				var result map[string]string
+				err := json.Unmarshal([]byte(inst.String()), &result)
+				if err != nil {
+					t.Fatal(err)
+				}
+				have = append(have, result)
+			}
+		}
+		var want []map[string]string
+		err = json.Unmarshal([]byte(test.output), &want)
+		if err != nil {
+			t.Fatal(err)
+		}
+		for i := range want {
+			for k := range want[i] {
+				if want[i][k] == have[i][k] {
+					continue
+				}
+				// i - index inside array of JSON objects.
+				// k - i'th object key (example: "Iclass").
+				t.Errorf("%s: insts[%d].%s mismatch\nhave: `%s`\nwant: `%s`",
+					test.name, i, k, have[i][k], want[i][k])
+			}
+		}
+		if !t.Failed() {
+			t.Logf("PASS: %s", test.name)
+		}
+	}
+}
+
+func TestMacroExpand(t *testing.T) {
+	tests := [...]struct {
+		input  string
+		output string
+	}{
+		0: {
+			"a not64 b c",
+			"a MODE!=2 b c",
+		},
+		1: {
+			"mode16 W0",
+			"MODE=0 REXW=0 SKIP_OSZ=1",
+		},
+		2: {
+			"W1 mode32",
+			"REXW=1 SKIP_OSZ=1 MODE=1",
+		},
+		3: {
+			"W1 W1",
+			"REXW=1 SKIP_OSZ=1 REXW=1 SKIP_OSZ=1",
+		},
+		4: {
+			"W1W1",
+			"W1W1",
+		},
+		5: {
+			"mode64 1 2 3 rexw_prefix",
+			"MODE=2 1 2 3 REXW=1 SKIP_OSZ=1",
+		},
+		6: {
+			"a  b  c",
+			"a b c",
+		},
+		7: {
+			"mode16 mode32 mode16 mode16",
+			"MODE=0 MODE=1 MODE=0 MODE=0",
+		},
+		8: {
+			"V0F38 V0FV0F V0FV0F38",
+			"MAP=2 V0FV0F V0FV0F38",
+		},
+		9: {
+			"VV1 0x2E V66 V0F38 VL128  norexw_prefix MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()",
+			"VEXVALID=1 0x2E VEX_PREFIX=1 MAP=2 VL=0 REXW=0 SKIP_OSZ=1 MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM()",
+		},
+	}
+
+	db := newTestDatabase(t)
+	for id, test := range tests {
+		have := ExpandStates(db, test.input)
+		if test.output != have {
+			t.Errorf("test %d: output mismatch:\nhave: `%s`\nwant: `%s`",
+				id, have, test.output)
+		}
+	}
+}