whatcanGOwrong

This commit is contained in:
2024-09-19 21:38:24 -04:00
commit d0ae4d841d
17908 changed files with 4096831 additions and 0 deletions
@@ -0,0 +1 @@
x86manual.pdf
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,652 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Derive format specifications.
package main
import (
"sort"
"strings"
)
func format(insts []*instruction) {
// Determine opcodes that come in multiple sizes
// and could need disambiguating suffixes.
// Mark those with multisize=true.
sort.Sort(bySyntax(insts))
needSize := make(map[string]bool)
for i := 0; i < 2; i++ {
seen := make(map[string]bool)
for _, inst := range insts {
if hasTag(inst, "pseudo") || hasTag(inst, "pseudo64") {
continue
}
switch i {
case 0:
if inst.valid32 != "V" {
continue
}
case 1:
if inst.valid64 != "V" {
continue
}
}
unsized := stripSize.Replace(inst.syntax)
if seen[unsized] {
op, _ := splitSyntax(inst.syntax)
needSize[op] = true
}
seen[unsized] = true
}
}
for _, inst := range insts {
op, _ := splitSyntax(inst.syntax)
if needSize[op] || forceNeedSize[op] {
inst.multisize = "Y"
}
}
// Assign data sizes.
for _, inst := range insts {
if inst.multisize != "Y" {
continue
}
op, args := splitSyntax(inst.syntax)
Args:
for i := startArg[op]; i < len(args); i++ {
switch args[i] {
case "AL", "r8", "r8op", "r/m8":
inst.datasize = 8
break Args
case "AX", "r16", "r16op", "r/m16":
inst.datasize = 16
break Args
case "EAX", "r32", "r32op", "r/m32", "rmr32", "m32fp", "m32int":
inst.datasize = 32
break Args
case "RAX", "r64", "r64op", "r/m64", "rmr64", "m64fp", "m64int":
inst.datasize = 64
break Args
case "m80fp":
inst.datasize = 80
break Args
case "xmm2/m128":
inst.datasize = 128
break Args
case "ymm2/m256":
inst.datasize = 256
break Args
}
}
}
// Determine GNU syntax for instructions.
// With a few exceptions, it's the Intel opcode plus an optional suffix,
// followed by the reversed argument list.
for _, inst := range insts {
op, args := splitSyntax(inst.syntax)
intelOp := op
op = strings.ToLower(op)
if custom, ok := gnuOpcode[inst.syntax]; ok {
op = custom
} else {
if inst.multisize == "Y" {
suffix := defaultSizeSuffix[inst.datasize]
if custom, ok := gnuSizeSuffix[op]; ok {
suffix = custom[inst.datasize]
}
op += suffix
}
}
switch intelOp {
case "BOUND", "ENTER":
// no reversal
default:
for i, j := 0, len(args)-1; i < j; i, j = i+1, j-1 {
args[i], args[j] = args[j], args[i]
}
}
inst.gnuSyntax = joinSyntax(op, args)
}
// Determine Go syntax for instructions.
// Similar to GNU syntax (really they are both similar to "AT&T" syntax)
// but upper case and not reversing the argument list for a few instructions,
// like comparisons.
for _, inst := range insts {
intelOp, args := splitSyntax(inst.syntax)
// start with GNU op, because it has suffixes already
op, _ := splitSyntax(inst.gnuSyntax)
op = strings.ToUpper(op)
if custom, ok := goOpcode[inst.syntax]; ok {
op = custom
} else if custom, ok := goOpcode[intelOp]; ok {
op = custom
} else if custom, ok := goOpcode[op]; ok {
op = custom
} else if suffix, ok := goSizeSuffix[op]; ok {
op += suffix[inst.datasize]
}
switch intelOp {
case "CMP":
// no reversal
case "CMPPD", "CMPPS", "CMPSD", "CMPSS":
// rotate destination to end but don't swap comparison operands
if len(args) == 3 {
args[0], args[1], args[2] = args[2], args[0], args[1]
break
}
fallthrough
default:
for i, j := 0, len(args)-1; i < j; i, j = i+1, j-1 {
args[i], args[j] = args[j], args[i]
}
}
inst.goSyntax = joinSyntax(op, args)
}
}
var forceNeedSize = map[string]bool{
"SAL": true,
}
var stripSize = strings.NewReplacer(
"rel8", "rel8", // leave these alone
"rel16", "rel16",
"rel32", "rel32",
"8", "#",
"16", "#",
"32", "#",
"64", "#",
"xmm2/m128", "xy/#",
"ymm2/m256", "xy/#",
"EAX", "AX",
)
var defaultSizeSuffix = map[int]string{
8: "b",
16: "w",
32: "l",
64: "q",
}
var gnuSizeSuffix = map[string]map[int]string{
"cvtsd2si": {64: "q"},
"cvtss2si": {64: "q"},
"cvttsd2si": {64: "q"},
"cvttss2si": {64: "q"},
"vcvtsd2si": {64: "q"},
"vcvtss2si": {64: "q"},
"vcvttsd2si": {64: "q"},
"vcvttss2si": {64: "q"},
"vcvtpd2dq": {128: "x", 256: "y"},
"vcvtpd2ps": {128: "x", 256: "y"},
"vcvttpd2dq": {128: "x", 256: "y"},
"vcvttpd2ps": {128: "x", 256: "y"},
"fadd": {32: "s", 64: "l"},
"fcom": {32: "s", 64: "l"},
"fcomp": {32: "s", 64: "l"},
"fdiv": {32: "s", 64: "l"},
"fdivr": {32: "s", 64: "l"},
"fmul": {32: "s", 64: "l"},
"fsub": {32: "s", 64: "l"},
"fsubr": {32: "s", 64: "l"},
"fld": {32: "s", 64: "l", 80: "t"},
"fst": {32: "s", 64: "l", 80: "t"},
"fstp": {32: "s", 64: "l", 80: "t"},
"fiadd": {32: "l"},
"ficom": {32: "l"},
"ficomp": {32: "l"},
"fidiv": {32: "l"},
"fidivr": {32: "l"},
"fimul": {32: "l"},
"fist": {32: "l"},
"fisub": {32: "l"},
"fisubr": {32: "l"},
"fild": {32: "l", 64: "ll"},
"fistp": {32: "l", 64: "ll"},
"fisttp": {32: "l", 64: "ll"},
"fldenv": {64: "l"},
// These can be distinguished by register name (%rcx vs %ecx)
// and objdump refuses to put suffixes on them.
"bswap": {},
"rdfsbase": {},
"rdgsbase": {},
"rdrand": {},
"rdseed": {},
"wrfsbase": {},
"wrgsbase": {},
}
var gnuOpcode = map[string]string{
// Simple name changes.
"CBW": "cbtw",
"CDQ": "cltd",
"CDQE": "cltq",
"CMPSD": "cmpsl",
"CQO": "cqto",
"CWD": "cwtd",
"CWDE": "cwtl",
"INSD": "insl",
"LODSD": "lodsl",
"MOVSD": "movsl",
"OUTSD": "outsl",
"PUSHAD": "pushal",
"PUSHFD": "pushfl",
"POPAD": "popal",
"POPFD": "popfl",
"STOSD": "stosl",
"XLATB": "xlat",
"POPA": "popaw",
"POPF": "popfw",
"PUSHA": "pushaw",
"PUSHF": "pushfw",
"SCASD": "scasl",
// Two-operand FDIV and FDIVR are inverted, but only for the ST(i), ST(0) form.
// I think this is a bug in the GNU tools but perhaps one that must be historically maintained.
"FDIV ST(i), ST(0)": "fdivr",
"FDIVR ST(i), ST(0)": "fdiv",
"FDIVP ST(i), ST(0)": "fdivrp",
"FDIVRP ST(i), ST(0)": "fdivp",
"FSUB ST(i), ST(0)": "fsubr",
"FSUBR ST(i), ST(0)": "fsub",
"FSUBP ST(i), ST(0)": "fsubrp",
"FSUBRP ST(i), ST(0)": "fsubp",
"MOV r64op, imm64": "movabsq",
"MOV moffs64, RAX": "movabsq",
"MOV RAX, moffs64": "movabsq",
"MOV moffs8, AL": "movb/movb/movabsb",
"MOV AL, moffs8": "movb/movb/movabsb",
"LGDT m16&32": "lgdtw/lgdtl",
"LIDT m16&32": "lidtw/lidtl",
"SGDT m": "sgdtw/sgdtl/sgdt",
"SIDT m": "sidtw/sidtl/sidt",
"LEAVE": "leavew/leavel/leaveq",
"MOVBE r16, m16": "movbeww",
"MOVBE m16, r16": "movbeww",
"MOVBE m32, r32": "movbell",
"MOVBE r32, m32": "movbell",
"MOVBE m64, r64": "movbeqq",
"MOVBE r64, m64": "movbeqq",
"MOVSX r16, r/m16": "movsww",
"MOVSX r16, r/m8": "movsbw",
"MOVSX r32, r/m16": "movswl",
"MOVSX r32, r/m8": "movsbl",
"MOVSX r64, r/m16": "movswq",
"MOVSX r64, r/m8": "movsbq",
"MOVSXD r64, r/m32": "movslq",
"MOVZX r16, r/m16": "movzww",
"MOVZX r16, r/m8": "movzbw",
"MOVZX r32, r/m16": "movzwl",
"MOVZX r32, r/m8": "movzbl",
"MOVZX r64, r/m16": "movzwq",
"MOVZX r64, r/m8": "movzbq",
"CALL r/m16": "callw*",
"CALL r/m32": "calll*",
"CALL r/m64": "callq*",
"JMP r/m16": "jmpw*",
"JMP r/m32": "jmpl*",
"JMP r/m64": "jmpq*",
"CALL_FAR m16:16": "lcallw*",
"CALL_FAR m16:32": "lcalll*",
"CALL_FAR m16:64": "lcallq*",
"JMP_FAR m16:16": "ljmpw*",
"JMP_FAR m16:32": "ljmpl*",
"JMP_FAR m16:64": "ljmpq*",
"CALL_FAR ptr16:16": "lcallw",
"CALL_FAR ptr16:32": "lcalll",
"JMP_FAR ptr16:16": "ljmpw",
"JMP_FAR ptr16:32": "ljmpl",
"STR r32/m16": "str{l/w}",
"SMSW r32/m16": "smsw{l/w}",
"SLDT r32/m16": "sldt{l/w}",
"MOV Sreg, r32/m16": "mov{l/w}",
"MOV r32/m16, Sreg": "mov{l/w}",
"STR r64/m16": "str{q/w}",
"SMSW r64/m16": "smsw{q/w}",
"SLDT r64/m16": "sldt{q/w}",
"MOV Sreg, r64/m16": "mov{q/w}",
"MOV r64/m16, Sreg": "mov{q/w}",
"FLDENV m14/28byte": "fldenvs/fldenvl",
"FNSAVE m94/108byte": "fnsaves/fnsavel",
"FNSTENV m14/28byte": "fnstenvs/fnstenvl",
"FRSTOR m94/108byte": "frstors/frstorl",
"IRETD": "iretl",
"IRET": "iretw",
"RET_FAR imm16u": "lretw/lretl/lretl",
"RET_FAR": "lretw/lretl/lretl",
"ENTER imm16, imm8b": "enterw/enterl/enterq",
"RET": "retw/retl/retq",
"SYSRET": "sysretw/sysretl/sysretl",
"RET imm16u": "retw/retl/retq",
"PUSH CS": "pushw/pushl/pushq",
"PUSH DS": "pushw/pushl/pushq",
"PUSH ES": "pushw/pushl/pushq",
"PUSH FS": "pushw/pushl/pushq",
"PUSH GS": "pushw/pushl/pushq",
"PUSH SS": "pushw/pushl/pushq",
"PUSH imm16": "pushw",
"POP CS": "popw/popl/popq",
"POP DS": "popw/popl/popq",
"POP ES": "popw/popl/popq",
"POP FS": "popw/popl/popq",
"POP GS": "popw/popl/popq",
"POP SS": "popw/popl/popq",
"PUSH imm32": "-/pushl/pushq",
"PUSH imm8": "pushw/pushl/pushq",
}
var startArg = map[string]int{
"CRC32": 1,
}
var goSizeSuffix = map[string]map[int]string{
"BSWAP": {16: "W", 32: "L", 64: "Q"},
}
var goOpcode = map[string]string{
// Overriding the GNU rewrites.
"CBW": "CBW",
"CDQ": "CDQ",
"CDQE": "CDQE",
"CQO": "CQO",
"CWD": "CWD",
"CWDE": "CWDE",
"SYSRET": "SYSRET",
"MOVABSQ": "MOVQ",
// Our own rewrites, of either GNU or Intel syntax.
"CVTPD2DQ": "CVTPD2PL",
"CVTDQ2PD": "CVTPL2PD",
"CVTDQ2PS": "CVTPL2PS",
"CVTPS2DQ": "CVTPS2PL",
"CVTSD2SI": "CVTSD2SL",
"CVTSD2SIQ": "CVTSD2SQ",
"CVTSI2SDL": "CVTSL2SD",
"CVTSI2SDQ": "CVTSQ2SD",
"CVTSI2SSL": "CVTSL2SS",
"CVTSI2SSQ": "CVTSQ2SS",
"CVTSS2SI": "CVTSS2SL",
"CVTSS2SIQ": "CVTSS2SQ",
"CVTTPD2DQ": "CVTTPD2PL",
"CVTTPS2DQ": "CVTTPS2PL",
"CVTTSD2SI": "CVTTSD2SL",
"CVTTSD2SIQ": "CVTTSD2SQ",
"CVTTSS2SI": "CVTTSS2SL",
"CVTTSS2SIQ": "CVTTSS2SQ",
"LOOPE": "LOOPEQ",
"MASKMOVDQU": "MASKMOVOU",
"MOVDQA": "MOVO",
"MOVDQU": "MOVOU",
"MOVNTDQ": "MOVNTO",
"MOVQ2DQ": "MOVQOZX",
"MOVDQ2Q": "MOVQ",
"MOVSBL": "MOVBLSX",
"MOVSBQ": "MOVBQSX",
"MOVSBW": "MOVBWSX",
"MOVSLQ": "MOVLQSX",
"MOVSWL": "MOVWLSX",
"MOVSWQ": "MOVWQSX",
"MOVZBL": "MOVBLZX",
"MOVZBQ": "MOVBQZX",
"MOVZBW": "MOVBWZX",
"MOVZLQ": "MOVLQZX",
"MOVZWL": "MOVWLZX",
"MOVZWQ": "MOVWQZX",
"PACKSSDW": "PACKSSLW",
"PADDD": "PADDL",
"PCMPEQD": "PCMPEQL",
"PCMPGTD": "PCMPGTL",
"PMADDWD": "PMADDWL",
"PMULUDQ": "PMULULQ",
"PSLLD": "PSLLL",
"PSLLDQ": "PSLLO",
"PSRAD": "PSRAL",
"PSRLD": "PSRLL",
"PSRLDQ": "PSRLO",
"PSUBD": "PSUBL",
"PUNPCKLWD": "PUNPCKLWL",
"PUNPCKHDQ": "PUNPCKHLQ",
"PUNPCKHWD": "PUNPCKHWL",
"PUNPCKLDQ": "PUNPCKLLQ",
"PUSHA": "PUSHAW",
"PUSHAD": "PUSHAL",
"PUSHF": "PUSHFW",
"PUSHFD": "PUSHFL",
"RET_FAR": "RETFW/RETFL/RETFQ",
"CALLQ": "CALL",
"CALLL": "CALL",
"CALLW": "CALL",
"MOVSXDW": "MOVWQSX",
"MOVSXDL": "MOVLQSX",
"SHLDW": "SHLW",
"SHLDL": "SHLL",
"SHLDQ": "SHLQ",
"SHRDW": "SHRW",
"SHRDL": "SHRL",
"SHRDQ": "SHRQ",
"CMOVAW": "CMOVWHI",
"CMOVAEW": "CMOVWCC",
"CMOVBW": "CMOVWCS",
"CMOVBEW": "CMOVWLS",
"CMOVCW": "CMOVWCS",
"CMOVCCW": "CMOVWCC",
"CMOVCSW": "CMOVWCS",
"CMOVEW": "CMOVWEQ",
"CMOVEQW": "CMOVWEQ",
"CMOVGW": "CMOVWGT",
"CMOVGEW": "CMOVWGE",
"CMOVGTW": "CMOVWGT",
"CMOVHIW": "CMOVWHI",
"CMOVHSW": "CMOVWCC",
"CMOVLW": "CMOVWLT",
"CMOVLEW": "CMOVWLE",
"CMOVLSW": "CMOVWLS",
"CMOVLTW": "CMOVWLT",
"CMOVLOW": "CMOVWCS",
"CMOVMIW": "CMOVWMI",
"CMOVNAW": "CMOVWLS",
"CMOVNAEW": "CMOVWCS",
"CMOVNBW": "CMOVWCC",
"CMOVNBEW": "CMOVWHI",
"CMOVNCW": "CMOVWCC",
"CMOVNEW": "CMOVWNE",
"CMOVNGW": "CMOVWLE",
"CMOVNGEW": "CMOVWLT",
"CMOVNLW": "CMOVWGE",
"CMOVNLEW": "CMOVWGT",
"CMOVNOW": "CMOVWOC",
"CMOVNPW": "CMOVWPC",
"CMOVNSW": "CMOVWPL",
"CMOVNZW": "CMOVWNE",
"CMOVOW": "CMOVWOS",
"CMOVOCW": "CMOVWOC",
"CMOVOSW": "CMOVWOS",
"CMOVPW": "CMOVWPS",
"CMOVPCW": "CMOVWPC",
"CMOVPEW": "CMOVWPS",
"CMOVPOW": "CMOVWPC",
"CMOVPSW": "CMOVWPS",
"CMOVSW": "CMOVWMI",
"CMOVZW": "CMOVWEQ",
"CMOVAL": "CMOVLHI",
"CMOVAEL": "CMOVLCC",
"CMOVBL": "CMOVLCS",
"CMOVBEL": "CMOVLLS",
"CMOVCL": "CMOVLCS",
"CMOVCCL": "CMOVLCC",
"CMOVCSL": "CMOVLCS",
"CMOVEL": "CMOVLEQ",
"CMOVEQL": "CMOVLEQ",
"CMOVGL": "CMOVLGT",
"CMOVGEL": "CMOVLGE",
"CMOVGTL": "CMOVLGT",
"CMOVHIL": "CMOVLHI",
"CMOVHSL": "CMOVLCC",
"CMOVLL": "CMOVLLT",
"CMOVLEL": "CMOVLLE",
"CMOVLSL": "CMOVLLS",
"CMOVLTL": "CMOVLLT",
"CMOVLOL": "CMOVLCS",
"CMOVMIL": "CMOVLMI",
"CMOVNAL": "CMOVLLS",
"CMOVNAEL": "CMOVLCS",
"CMOVNBL": "CMOVLCC",
"CMOVNBEL": "CMOVLHI",
"CMOVNCL": "CMOVLCC",
"CMOVNEL": "CMOVLNE",
"CMOVNGL": "CMOVLLE",
"CMOVNGEL": "CMOVLLT",
"CMOVNLL": "CMOVLGE",
"CMOVNLEL": "CMOVLGT",
"CMOVNOL": "CMOVLOC",
"CMOVNPL": "CMOVLPC",
"CMOVNSL": "CMOVLPL",
"CMOVNZL": "CMOVLNE",
"CMOVOL": "CMOVLOS",
"CMOVOCL": "CMOVLOC",
"CMOVOSL": "CMOVLOS",
"CMOVPL": "CMOVLPS",
"CMOVPCL": "CMOVLPC",
"CMOVPEL": "CMOVLPS",
"CMOVPOL": "CMOVLPC",
"CMOVPSL": "CMOVLPS",
"CMOVSL": "CMOVLMI",
"CMOVZL": "CMOVLEQ",
"CMOVAQ": "CMOVQHI",
"CMOVAEQ": "CMOVQCC",
"CMOVBQ": "CMOVQCS",
"CMOVBEQ": "CMOVQLS",
"CMOVCQ": "CMOVQCS",
"CMOVCCQ": "CMOVQCC",
"CMOVCSQ": "CMOVQCS",
"CMOVEQ": "CMOVQEQ",
"CMOVEQQ": "CMOVQEQ",
"CMOVGQ": "CMOVQGT",
"CMOVGEQ": "CMOVQGE",
"CMOVGTQ": "CMOVQGT",
"CMOVHIQ": "CMOVQHI",
"CMOVHSQ": "CMOVQCC",
"CMOVLQ": "CMOVQLT",
"CMOVLEQ": "CMOVQLE",
"CMOVLSQ": "CMOVQLS",
"CMOVLTQ": "CMOVQLT",
"CMOVLOQ": "CMOVQCS",
"CMOVMIQ": "CMOVQMI",
"CMOVNAQ": "CMOVQLS",
"CMOVNAEQ": "CMOVQCS",
"CMOVNBQ": "CMOVQCC",
"CMOVNBEQ": "CMOVQHI",
"CMOVNCQ": "CMOVQCC",
"CMOVNEQ": "CMOVQNE",
"CMOVNGQ": "CMOVQLE",
"CMOVNGEQ": "CMOVQLT",
"CMOVNLQ": "CMOVQGE",
"CMOVNLEQ": "CMOVQGT",
"CMOVNOQ": "CMOVQOC",
"CMOVNPQ": "CMOVQPC",
"CMOVNSQ": "CMOVQPL",
"CMOVNZQ": "CMOVQNE",
"CMOVOQ": "CMOVQOS",
"CMOVOCQ": "CMOVQOC",
"CMOVOSQ": "CMOVQOS",
"CMOVPQ": "CMOVQPS",
"CMOVPCQ": "CMOVQPC",
"CMOVPEQ": "CMOVQPS",
"CMOVPOQ": "CMOVQPC",
"CMOVPSQ": "CMOVQPS",
"CMOVSQ": "CMOVQMI",
"CMOVZQ": "CMOVQEQ",
"SETA": "SETHI",
"SETAE": "SETCC",
"SETB": "SETCS",
"SETBE": "SETLS",
"SETC": "SETCS",
"SETCC": "SETCC",
"SETCS": "SETCS",
"SETE": "SETEQ",
"SETEQ": "SETEQ",
"SETG": "SETGT",
"SETGE": "SETGE",
"SETGT": "SETGT",
"SETHI": "SETHI",
"SETHS": "SETCC",
"SETL": "SETLT",
"SETLE": "SETLE",
"SETLS": "SETLS",
"SETLT": "SETLT",
"SETLO": "SETCS",
"SETMI": "SETMI",
"SETNA": "SETLS",
"SETNAE": "SETCS",
"SETNB": "SETCC",
"SETNBE": "SETHI",
"SETNC": "SETCC",
"SETNE": "SETNE",
"SETNG": "SETLE",
"SETNGE": "SETLT",
"SETNL": "SETGE",
"SETNLE": "SETGT",
"SETNO": "SETOC",
"SETNP": "SETPC",
"SETNS": "SETPL",
"SETNZ": "SETNE",
"SETO": "SETOS",
"SETOC": "SETOC",
"SETOS": "SETOS",
"SETP": "SETPS",
"SETPC": "SETPC",
"SETPE": "SETPS",
"SETPO": "SETPC",
"SETPS": "SETPS",
"SETS": "SETMI",
"SETZ": "SETEQ",
"FADD": "FADDD",
"FADDP": "FADDDP",
"FADDS": "FADDF",
"FCOM": "FCOMD",
"FCOMS": "FCOMF",
"FCOMPS": "FCOMFP",
"FDIV": "FDIVD",
"FDIVS": "FDIVF",
"FDIVRS": "FDIVFR",
}
@@ -0,0 +1,989 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import (
"bytes"
"errors"
"fmt"
"io"
"log"
"math"
"os"
"reflect"
"regexp"
"sort"
"strconv"
"strings"
"time"
"rsc.io/pdf"
)
// listing holds information about one or more parsed manual pages
// concerning a single instruction listing.
type listing struct {
pageNum int
name string // instruction heading
mtables [][][]string // mnemonic tables (at most one per page)
enctables [][][]string // encoding tables (at most one per page)
compat string
}
type logReaderAt struct {
f io.ReaderAt
}
func (l *logReaderAt) ReadAt(x []byte, off int64) (int, error) {
log.Printf("read %d @ %d", len(x), off)
return l.f.ReadAt(x, off)
}
const (
cacheBlockSize = 64 * 1024
numCacheBlock = 16
)
type cachedReaderAt struct {
r io.ReaderAt
cache *cacheBlock
}
type cacheBlock struct {
next *cacheBlock
buf []byte
offset int64
err error
}
func newCachedReaderAt(r io.ReaderAt) *cachedReaderAt {
c := &cachedReaderAt{
r: r,
}
for i := 0; i < numCacheBlock; i++ {
c.cache = &cacheBlock{next: c.cache}
}
return c
}
func (c *cachedReaderAt) ReadAt(p []byte, offset int64) (n int, err error) {
// Assume large reads indicate a caller that doesn't need caching.
if len(p) >= cacheBlockSize {
return c.r.ReadAt(p, offset)
}
for n < len(p) {
o := offset + int64(n)
f := o & (cacheBlockSize - 1)
b := c.readBlock(o - f)
n += copy(p[n:], b.buf[f:])
if n < len(p) && b.err != nil {
return n, b.err
}
}
return n, nil
}
var errShortRead = errors.New("short read")
func (c *cachedReaderAt) readBlock(offset int64) *cacheBlock {
if offset&(cacheBlockSize-1) != 0 {
panic("misuse of cachedReaderAt.readBlock")
}
// Look in cache.
var b, prev *cacheBlock
for b = c.cache; ; prev, b = b, b.next {
if b.buf != nil && b.offset == offset {
// Move to front.
if prev != nil {
prev.next = b.next
b.next = c.cache
c.cache = b
}
return b
}
if b.next == nil {
break
}
}
// Otherwise b is LRU block in cache, prev points at b.
if b.buf == nil {
b.buf = make([]byte, cacheBlockSize)
}
b.offset = offset
n, err := c.r.ReadAt(b.buf[:cacheBlockSize], offset)
b.buf = b.buf[:n]
b.err = err
if n > 0 {
// Move to front.
prev.next = nil
b.next = c.cache
c.cache = b
}
return b
}
func pdfOpen(name string) (*pdf.Reader, error) {
f, err := os.Open(name)
if err != nil {
return nil, err
}
fi, err := f.Stat()
if err != nil {
f.Close()
return nil, err
}
return pdf.NewReader(newCachedReaderAt(f), fi.Size())
}
func parse() []*instruction {
var insts []*instruction
f, err := pdfOpen(*flagFile)
if err != nil {
log.Fatal(err)
}
// Find instruction set reference in outline, to build instruction list.
instList := instHeadings(f.Outline())
if len(instList) < 200 {
log.Fatalf("only found %d instructions in table of contents", len(instList))
}
// Scan document looking for instructions.
// Must find exactly the ones in the outline.
n := f.NumPage()
var current *listing
finishInstruction := func() {
if current == nil {
return
}
if len(current.mtables) == 0 || len(current.mtables[0]) <= 1 {
fmt.Fprintf(os.Stderr, "p.%d: no mnemonics for instruction %q\n", current.pageNum, current.name)
}
processListing(current, &insts)
current = nil
}
for pageNum := 1; pageNum <= n; pageNum++ {
if onlySomePages && !isDebugPage(pageNum) {
continue
}
p := f.Page(pageNum)
parsed := parsePage(p, pageNum)
if parsed.name != "" {
finishInstruction()
for j, headline := range instList {
if parsed.name == headline {
instList[j] = ""
current = parsed
break
}
}
if current == nil {
fmt.Fprintf(os.Stderr, "p.%d: unexpected instruction %q\n", pageNum, parsed.name)
}
continue
}
if current != nil {
merge(current, parsed)
continue
}
if parsed.mtables != nil {
fmt.Fprintf(os.Stderr, "p.%d: unexpected mnemonic table\n", pageNum)
}
if parsed.enctables != nil {
fmt.Fprintf(os.Stderr, "p.%d: unexpected encoding table\n", pageNum)
}
if parsed.compat != "" {
fmt.Fprintf(os.Stderr, "p.%d: unexpected compatibility statement\n", pageNum)
}
}
finishInstruction()
if !onlySomePages {
for _, headline := range instList {
if headline != "" {
fmt.Fprintf(os.Stderr, "missing instruction %q\n", headline)
}
}
}
return insts
}
// isDebugPage reports whether the -debugpage flag mentions page n.
// The argument is a comma-separated list of pages.
// Maybe some day it will support ranges.
func isDebugPage(n int) bool {
s := *flagDebugPage
var k int
for i := 0; ; i++ {
if i == len(s) || s[i] == ',' {
if n == k {
return true
}
k = 0
}
if i == len(s) {
break
}
if '0' <= s[i] && s[i] <= '9' {
k = k*10 + int(s[i]) - '0'
}
}
return false
}
// merge merges the content of y into the running collection in x.
func merge(x, y *listing) {
if y.name != "" {
fmt.Fprintf(os.Stderr, "p.%d: merging page incorrectly\n", y.pageNum)
return
}
x.mtables = append(x.mtables, y.mtables...)
x.enctables = append(x.enctables, y.enctables...)
x.compat += y.compat
}
// instHeadings returns the list of instruction headings from the table of contents.
// When we parse the pages we expect to find every one of these.
func instHeadings(outline pdf.Outline) []string {
return appendInstHeadings(outline, nil)
}
var instRE = regexp.MustCompile(`\d Instructions \([A-Z]-[A-Z]\)|VMX Instructions|Instruction SET Reference|SHA Extensions Reference`)
// The headings are inconsistent about dash and superscript usage. Normalize.
var fixDash = strings.NewReplacer(
"Compute 2 1", "Compute 2^x-1",
"Compute 2x-1", "Compute 2^x-1",
"Compute 2x1", "Compute 2^x-1",
"/ FUCOMI", "/FUCOMI",
"Compute y log x", "Compute y * log₂x",
"Compute y * log2x", "Compute y * log₂x",
"Compute y * log2(x +1)", "Compute y * log₂(x+1)",
"Compute y log (x +1)", "Compute y * log₂(x+1)",
" — ", "-",
"— ", "-",
" —", "-",
"—", "-",
" ", "-",
" ", "-",
" ", "-",
"", "-",
" - ", "-",
"- ", "-",
" -", "-",
)
func appendInstHeadings(outline pdf.Outline, list []string) []string {
if instRE.MatchString(outline.Title) {
for _, child := range outline.Child {
list = append(list, fixDash.Replace(child.Title))
}
}
for _, child := range outline.Child {
list = appendInstHeadings(child, list)
}
return list
}
var dateRE = regexp.MustCompile(`\b(January|February|March|April|May|June|July|August|September|October|November|December) ((19|20)[0-9][0-9])\b`)
// parsePage parses a single PDF page and returns the content it found.
func parsePage(p pdf.Page, pageNum int) *listing {
if debugging {
fmt.Fprintf(os.Stderr, "DEBUG: parsing page %d\n", pageNum)
}
parsed := new(listing)
parsed.pageNum = pageNum
content := p.Content()
for i, t := range content.Text {
if match(t, "Symbol", 11, "≠") {
t.Font = "NeoSansIntel"
t.FontSize = 9
content.Text[i] = t
}
if t.S == "*" || t.S == "**" || t.S == "***" || t.S == "," && t.Font == "Arial" && t.FontSize < 9 || t.S == "1" && t.Font == "Arial" {
t.Font = "NeoSansIntel"
t.FontSize = 9
if i+1 < len(content.Text) {
t.Y = content.Text[i+1].Y
}
content.Text[i] = t
}
}
text := findWords(content.Text)
for i, t := range text {
if match(t, "NeoSansIntel", 8, ".WIG") || match(t, "NeoSansIntel", 8, "AVX2") {
t.FontSize = 9
text[i] = t
}
if t.Font == "NeoSansIntel-Medium" {
t.Font = "NeoSansIntelMedium"
text[i] = t
}
if t.Font == "NeoSansIntel-Italic" {
t.Font = "NeoSansIntel,Italic"
text[i] = t
}
}
if debugging {
for _, t := range text {
fmt.Println(t)
}
}
if pageNum == 1 {
var buf bytes.Buffer
for _, t := range text {
buf.WriteString(t.S + "\n")
}
all := buf.String()
m := regexp.MustCompile(`Order Number: ([\w-\-]+)`).FindStringSubmatch(all)
num := "???"
if m != nil {
num = m[1]
}
date := dateRE.FindString(all)
if date == "" {
date = "???"
}
fmt.Printf("# x86 instruction set description version %s, %s\n",
specFormatVersion, time.Now().Format("2006-01-02"))
fmt.Printf("# Based on Intel Instruction Set Reference #%s, %s.\n", num, date)
fmt.Printf("# https://golang.org/x/arch/x86/x86spec\n")
}
// Remove text we should ignore.
out := text[:0]
for _, t := range text {
if shouldIgnore(t) {
continue
}
out = append(out, t)
}
text = out
// Page header must say instruction set reference.
if len(text) == 0 {
return parsed
}
if (!match(text[0], "NeoSansIntel", 9, "INSTRUCTION") || !match(text[0], "NeoSansIntel", 9, "REFERENCE")) &&
!match(text[0], "NeoSansIntel", 9, "EXTENSIONS") {
return parsed
}
text = text[1:]
enctable := findEncodingTable(text)
if enctable != nil {
parsed.enctables = append(parsed.enctables, enctable)
}
parsed.compat = findCompat(text)
// Narrow scope for finding mnemonic table.
// Must be last, since it trims text.
// Next line is headline. Can wrap to multiple lines.
if len(text) == 0 || !match(text[0], "NeoSansIntelMedium", 12, "") || !isInstHeadline(text[0].S) {
if debugging {
fmt.Fprintf(os.Stderr, "non-inst-headline: %v\n", text[0])
}
} else {
parsed.name = text[0].S
text = text[1:]
for len(text) > 0 && match(text[0], "NeoSansIntelMedium", 12, "") {
parsed.name += " " + text[0].S
text = text[1:]
}
parsed.name = fixDash.Replace(parsed.name)
}
// Table follows; heading is NeoSansIntelMedium and rows are NeoSansIntel.
i := 0
for i < len(text) && match(text[i], "NeoSansIntelMedium", 9, "") {
i++
}
for i < len(text) && match(text[i], "NeoSansIntel", 9, "") && text[i].S != "NOTES:" {
i++
}
mtable := findMnemonicTable(text[:i])
if mtable != nil {
parsed.mtables = append(parsed.mtables, mtable)
}
return parsed
}
func match(t pdf.Text, font string, size float64, substr string) bool {
return t.Font == font && math.Abs(t.FontSize-size) < 0.1 && strings.Contains(t.S, substr)
}
func shouldIgnore(t pdf.Text) bool {
// Ignore footnote stars, which are in Arial.
// Also, the page describing MOVS has a tiny 2pt Arial backslash.
if (t.S == "*" || t.S == "\\") && strings.HasPrefix(t.Font, "Arial") {
return true
}
// Ignore superscript numbers, superscript ST(0), and superscript x.
if len(t.S) == 1 && '1' <= t.S[0] && t.S[0] <= '9' || t.S == "ST(0)" || t.S == "x" {
if match(t, "NeoSansIntel", 7.2, "") || match(t, "NeoSansIntel", 5.6, "") || match(t, "NeoSansIntelMedium", 8, "") || match(t, "NeoSansIntelMedium", 9.6, "") {
return true
}
}
return false
}
func isInstHeadline(s string) bool {
return strings.Contains(s, "—") ||
strings.Contains(s, " - ") ||
strings.Contains(s, "PTEST- Logical Compare")
}
func findWords(chars []pdf.Text) (words []pdf.Text) {
// Sort by Y coordinate and normalize.
const nudge = 1
sort.Sort(pdf.TextVertical(chars))
old := -100000.0
for i, c := range chars {
if c.Y != old && math.Abs(old-c.Y) < nudge {
chars[i].Y = old
} else {
old = c.Y
}
}
// Sort by Y coordinate, breaking ties with X.
// This will bring letters in a single word together.
sort.Sort(pdf.TextVertical(chars))
// Loop over chars.
for i := 0; i < len(chars); {
// Find all chars on line.
j := i + 1
for j < len(chars) && chars[j].Y == chars[i].Y {
j++
}
var end float64
// Split line into words (really, phrases).
for k := i; k < j; {
ck := &chars[k]
s := ck.S
end = ck.X + ck.W
charSpace := ck.FontSize / 6
wordSpace := ck.FontSize * 2 / 3
l := k + 1
for l < j {
// Grow word.
cl := &chars[l]
if sameFont(cl.Font, ck.Font) && cl.FontSize == ck.FontSize && cl.X <= end+charSpace {
s += cl.S
end = cl.X + cl.W
l++
continue
}
// Add space to phrase before next word.
if sameFont(cl.Font, ck.Font) && cl.FontSize == ck.FontSize && cl.X <= end+wordSpace {
s += " " + cl.S
end = cl.X + cl.W
l++
continue
}
break
}
f := ck.Font
f = strings.TrimSuffix(f, ",Italic")
f = strings.TrimSuffix(f, "-Italic")
words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end, s})
k = l
}
i = j
}
return words
}
func sameFont(f1, f2 string) bool {
f1 = strings.TrimSuffix(f1, ",Italic")
f1 = strings.TrimSuffix(f1, "-Italic")
f2 = strings.TrimSuffix(f1, ",Italic")
f2 = strings.TrimSuffix(f1, "-Italic")
return strings.TrimSuffix(f1, ",Italic") == strings.TrimSuffix(f2, ",Italic") || f1 == "Symbol" || f2 == "Symbol" || f1 == "TimesNewRoman" || f2 == "TimesNewRoman"
}
func findMnemonicTable(text []pdf.Text) [][]string {
sort.Sort(pdf.TextHorizontal(text))
const nudge = 1
old := -100000.0
var col []float64
for i, t := range text {
if t.Font != "NeoSansIntelMedium" { // only headings count
continue
}
if t.X != old && math.Abs(old-t.X) < nudge {
text[i].X = old
} else if t.X != old {
old = t.X
col = append(col, old)
}
}
sort.Sort(pdf.TextVertical(text))
if len(col) == 0 {
return nil
}
y := -100000.0
var table [][]string
var line []string
bold := -1
for _, t := range text {
if t.Y != y {
table = append(table, make([]string, len(col)))
line = table[len(table)-1]
y = t.Y
if t.Font == "NeoSansIntelMedium" {
bold = len(table) - 1
}
}
i := 0
for i+1 < len(col) && col[i+1] <= t.X+nudge {
i++
}
if line[i] != "" {
line[i] += " "
}
line[i] += t.S
}
var mtable [][]string
for i, t := range table {
if 0 < i && i <= bold || bold < i && halfMissing(t) {
// merge with earlier line
last := mtable[len(mtable)-1]
for j, s := range t {
if s != "" {
last[j] += "\n" + s
}
}
} else {
mtable = append(mtable, t)
}
}
if bold >= 0 {
heading := mtable[0]
for i, x := range heading {
heading[i] = fixHeading.Replace(x)
}
}
return mtable
}
var fixHeading = strings.NewReplacer(
"64/32-\nbit\nMode", "64/32-Bit Mode",
"64/32-\nbit Mode", "64/32-Bit Mode",
"64/32-bit\nMode", "64/32-Bit Mode",
"64/3\n2-bit\nMode", "64/32-Bit Mode",
"64/32 bit\nMode\nSupport", "64/32-Bit Mode",
"64/32bit\nMode\nSupport", "64/32-Bit Mode",
"64/32\n-bit\nMode", "64/32-Bit Mode",
"64/32\nbit Mode\nSupport", "64/32-Bit Mode",
"64-Bit\nMode", "64-Bit Mode",
"64-bit\nMode", "64-Bit Mode",
"Op/ En", "Op/En",
"Op/\nEn", "Op/En",
"Op/\nEN", "Op/En",
"Op /\nEn", "Op/En",
"Opcode***", "Opcode",
"Opcode**", "Opcode",
"Opcode*", "Opcode",
"/\nInstruction", "/Instruction",
"CPUID Fea-\nture Flag", "CPUID Feature Flag",
"CPUID\nFeature\nFlag", "CPUID Feature Flag",
"CPUID\nFeature Flag", "CPUID Feature Flag",
"CPUIDFeature\nFlag", "CPUID Feature Flag",
"Compat/\nLeg Mode*", "Compat/Leg Mode",
"Compat/\nLeg Mode", "Compat/Leg Mode",
"Compat/ *\nLeg Mode", "Compat/Leg Mode",
)
func halfMissing(x []string) bool {
n := 0
for _, s := range x {
if s == "" {
n++
}
}
return n >= len(x)/2
}
func findEncodingTable(text []pdf.Text) [][]string {
// Look for operand encoding table.
sort.Sort(pdf.TextVertical(text))
var col []float64
sawTitle := false
center := func(t pdf.Text) float64 {
return t.X + t.W/2
}
start := 0
end := len(text)
for i, t := range text {
if match(t, "NeoSansIntelMedium", 10, "Instruction Operand Encoding") {
sawTitle = true
start = i + 1
continue
}
if !sawTitle {
continue
}
if match(t, "NeoSansIntel", 9, "Op/En") || match(t, "NeoSansIntel", 9, "Operand") {
if debugging {
fmt.Printf("column %d at %.2f: %v\n", len(col), center(t), t)
}
col = append(col, center(t))
}
if match(t, "NeoSansIntelMedium", 10, "Description") {
end = i
break
}
}
text = text[start:end]
if len(col) == 0 {
return nil
}
const nudge = 20
y := -100000.0
var table [][]string
var line []string
for _, t := range text {
if t.Y != y {
table = append(table, make([]string, len(col)))
line = table[len(table)-1]
y = t.Y
}
i := 0
x := center(t)
for i+1 < len(col) && col[i+1] <= x+nudge {
i++
}
if debugging {
fmt.Printf("text at %.2f: %v => %d\n", x, t, i)
}
if line[i] != "" {
line[i] += " "
}
line[i] += t.S
}
out := table[:0]
for _, line := range table {
if strings.HasPrefix(line[len(line)-1], "Vol. 2") { // page footer
continue
}
if line[0] == "" && len(out) > 0 {
last := out[len(out)-1]
for i, col := range line {
if col != "" {
last[i] += " " + col
}
}
continue
}
out = append(out, line)
}
table = out
return table
}
func findCompat(text []pdf.Text) string {
sort.Sort(pdf.TextVertical(text))
inCompat := false
out := ""
for _, t := range text {
if match(t, "NeoSansIntelMedium", 10, "") {
inCompat = strings.Contains(t.S, "Architecture Compatibility")
if inCompat {
out += t.S + "\n"
}
}
if inCompat && match(t, "Verdana", 9, "") || strings.Contains(t.S, "were introduced") {
out += t.S + "\n"
}
}
return out
}
func processListing(p *listing, insts *[]*instruction) {
if debugging {
for _, table := range p.mtables {
fmt.Printf("table:\n")
for _, row := range table {
fmt.Printf("%q\n", row)
}
}
fmt.Printf("enctable:\n")
for _, table := range p.enctables {
for _, row := range table {
fmt.Printf("%q\n", row)
}
}
fmt.Printf("compat:\n%s", p.compat)
}
if *flagCompat && p.compat != "" {
fmt.Printf("# p.%d: %s\n#\t%s\n", p.pageNum, p.name, strings.Replace(p.compat, "\n", "\n#\t", -1))
}
encs := make(map[string][]string)
for _, table := range p.enctables {
for _, row := range table[1:] {
for len(row) > 1 && (row[len(row)-1] == "NA" || row[len(row)-1] == "" || row[len(row)-1] == " source") {
row = row[:len(row)-1]
}
encs[row[0]] = row[1:]
}
}
var wrong string
for _, table := range p.mtables {
heading := table[0]
for _, row := range table[1:] {
if row[0] == heading[0] && reflect.DeepEqual(row, heading) {
continue
}
if len(row) >= 5 && row[1] == "CMOVG r64, r/m64" && row[3] == "V/N.E." && row[4] == "NA" {
row[3] = "V"
row[4] = "N.E."
}
inst := new(instruction)
inst.page = p.pageNum
inst.compat = strings.Join(strings.Fields(p.compat), " ")
for i, hdr := range heading {
x := row[i]
x = strings.Replace(x, "\n", " ", -1)
switch strings.TrimSpace(hdr) {
default:
wrong = "unexpected header: " + strconv.Quote(hdr)
goto BadTable
case "Opcode/Instruction":
x = row[i]
if strings.HasPrefix(x, "\nVEX") {
x = x[1:]
row[i] = x
}
if strings.Contains(x, "\n/r ") {
x = strings.Replace(x, "\n/r ", " /r ", -1)
row[i] = x
}
if strings.Contains(x, ",\nimm") {
x = strings.Replace(x, ",\nimm", ", imm", -1)
row[i] = x
}
if strings.Count(x, "\n") < 1 {
wrong = "bad Opcode/Instruction pairing: " + strconv.Quote(x)
goto BadTable
}
i := strings.Index(x, "\n")
inst.opcode = x[:i]
inst.syntax = strings.Replace(x[i+1:], "\n", " ", -1)
case "Opcode":
inst.opcode = x
case "Instruction":
inst.syntax = x
case "Op/En":
inst.args = encs[x]
if inst.args == nil && len(encs) == 1 && encs["A"] != nil {
inst.args = encs["A"]
}
// In the December 2015 manual, PREFETCHW says
// encoding A but the table gives encoding M.
if inst.args == nil && inst.syntax == "PREFETCHW m8" && x == "A" && len(encs) == 1 && encs["M"] != nil {
inst.args = encs["M"]
}
case "64-Bit Mode":
x, ok := parseMode(x)
if !ok {
wrong = "unexpected value for 64-Bit Mode column: " + x
goto BadTable
}
inst.valid64 = x
case "Compat/Leg Mode":
x, ok := parseMode(x)
if !ok {
wrong = "unexpected value for Compat/Leg Mode column: " + x
goto BadTable
}
inst.valid32 = x
case "64/32-Bit Mode":
i := strings.Index(x, "/")
if i < 0 {
wrong = "unexpected value for 64/32-Bit Mode column: " + x
goto BadTable
}
x1, ok1 := parseMode(x[:i])
x2, ok2 := parseMode(x[i+1:])
if !ok1 || !ok2 {
wrong = "unexpected value for 64/32-Bit Mode column: " + x
goto BadTable
}
inst.valid64 = x1
inst.valid32 = x2
case "CPUID Feature Flag":
inst.cpuid = x
case "Description":
if inst.desc != "" {
inst.desc += " "
}
inst.desc += x
}
}
// Fixup various typos or bugs in opcode descriptions.
if inst.opcode == "VEX.128.66.0F.W0 6E /" {
inst.opcode += "r"
}
fix := func(old, new string) {
inst.opcode = strings.Replace(inst.opcode, old, new, -1)
}
fix(" imm8", " ib")
fix("REX.w", "REX.W")
fix("REX.W+", "REX.W +")
fix(" 0f ", " 0F ")
fix(". 0F38", ".0F38")
fix("0F .WIG", "0F.WIG")
fix("0F38 .WIG", "0F38.WIG")
fix("NDS .LZ", "NDS.LZ")
fix("58+ r", "58+r")
fix("B0+ ", "B0+")
fix("B8+ ", "B8+")
fix("40+ ", "40+")
fix("*", "")
fix(",", " ")
fix("/", " /")
fix("REX.W +", "REX.W")
fix("REX +", "REX")
fix("REX 0F BE", "REX.W 0F BE")
fix("REX 0F B2", "REX.W 0F B2")
fix("REX 0F B4", "REX.W 0F B4")
fix("REX 0F B5", "REX.W 0F B5")
fix("0F38.0", "0F38.W0")
fix(".660F.", ".66.0F.")
fix("VEX128", "VEX.128")
fix("0F3A.W0.1D", "0F3A.W0 1D")
inst.opcode = strings.Join(strings.Fields(inst.opcode), " ")
fix = func(old, new string) {
inst.syntax = strings.Replace(inst.syntax, old, new, -1)
}
fix("xmm1 xmm2", "xmm1, xmm2")
fix("r16/m16", "r/m16")
fix("r32/m161", "r32/m16") // really r32/m16¹ (footnote)
fix("r32/m32", "r/m32")
fix("r64/m64", "r/m64")
fix("\u2013", "-")
fix("mm3 /m", "mm3/m")
fix("mm3/.m", "mm3/m")
inst.syntax = joinSyntax(splitSyntax(inst.syntax))
fix = func(old, new string) {
inst.cpuid = strings.Replace(inst.cpuid, old, new, -1)
}
fix("PCLMUL- QDQ", "PCLMULQDQ")
fix("PCL- MULQDQ", "PCLMULQDQ")
fix("Both PCLMULQDQ and AVX flags", "PCLMULQDQ+AVX")
if !instBlacklist[inst.syntax] {
*insts = append(*insts, inst)
}
}
}
return
BadTable:
fmt.Fprintf(os.Stderr, "p.%d: reading %v: %v\n", p.pageNum, p.name, wrong)
for _, table := range p.mtables {
for _, t := range table {
fmt.Fprintf(os.Stderr, "\t%q\n", t)
}
}
fmt.Fprintf(os.Stderr, "\n")
}
func parseMode(s string) (string, bool) {
switch strings.TrimSpace(s) {
case "Invalid", "Invalid*", "Inv.", "I", "i":
return "I", true
case "Valid", "Valid*", "V":
return "V", true
case "N.E.", "NE", "N. E.":
return "N.E.", true
case "N.P.", "N. P.":
return "N.P.", true
case "N.S.", "N. S.":
return "N.S.", true
case "N.I.", "N. I.":
return "N.I.", true
}
return s, false
}
func splitSyntax(syntax string) (op string, args []string) {
i := strings.Index(syntax, " ")
if i < 0 {
return syntax, nil
}
op, syntax = syntax[:i], syntax[i+1:]
args = strings.Split(syntax, ",")
for i, arg := range args {
arg = strings.TrimSpace(arg)
arg = strings.TrimRight(arg, "*")
args[i] = arg
}
return
}
func joinSyntax(op string, args []string) string {
if len(args) == 0 {
return op
}
return op + " " + strings.Join(args, ", ")
}
@@ -0,0 +1,320 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// X86spec reads the “Intel® 64 and IA-32 Architectures Software Developer's Manual”
// to collect instruction encoding details and writes those details to standard output
// in CSV format.
//
// Usage:
//
// x86spec [-f file] [-u url] >x86.csv
//
// The -f flag specifies the input file (default x86manual.pdf), the Intel instruction
// set reference manual in PDF form.
// If the input file does not exist, it will be created by downloading the manual.
//
// The -u flag specifies the URL from which to download the manual
// (default https://golang.org/s/x86manual, which redirects to Intel's site).
// The URL is downloaded only when the file named by the -f flag is missing.
//
// There are additional debugging flags, not shown. Run x86spec -help for the list.
//
// # File Format
//
// TODO: Mention comments at top of file.
// TODO: Mention that this is version 0.2 of the file.
// TODO: Mention that file format will change incompatibly until version 1.0.
//
// Each CSV line contains these fields:
//
// 1. The Intel manual instruction mnemonic. For example, "SHR r/m32, imm8".
//
// 2. The Go assembler instruction mnemonic. For example, "SHRL imm8, r/m32".
//
// 3. The GNU binutils instruction mnemonic. For example, "shrl imm8, r/m32".
//
// 4. The instruction encoding. For example, "C1 /4 ib".
//
// 5. The validity of the instruction in 32-bit (aka compatibility, legacy) mode.
//
// 6. The validity of the instruction in 64-bit mode.
//
// 7. The CPUID feature flags that signal support for the instruction.
//
// 8. Additional comma-separated tags containing hints about the instruction.
//
// 9. The read/write actions of the instruction on the arguments used in
// the Intel mnemonic. For example, "rw,r" to denote that "SHR r/m32, imm8"
// reads and writes its first argument but only reads its second argument.
//
// 10. Whether the opcode used in the Intel mnemonic has encoding forms
// distinguished only by operand size, like most arithmetic instructions.
// The string "Y" indicates yes, the string "" indicates no.
//
// 11. The data size of the operation in bits. In general this is the size corresponding
// to the Go and GNU assembler opcode suffix.
//
// The complete line used for the above examples is:
//
// "SHR r/m32, imm8","SHRL imm8, r/m32","shrl imm8, r/m32","C1 /5 ib","V","V","","operand32","rw,r","Y","32"
//
// # Mnemonics
//
// The instruction mnemonics are as used in the Intel manual, with a few exceptions.
//
// Mnemonics claiming general memory forms but that really require fixed addressing modes
// are omitted in favor of their equivalents with implicit arguments..
// For example, "CMPS m16, m16" (really CMPS [SI], [DI]) is omitted in favor of "CMPSW".
//
// Instruction forms with an explicit REP, REPE, or REPNE prefix are also omitted.
// Encoders and decoders are expected to handle those prefixes separately.
//
// Perhaps most significantly, the argument syntaxes used in the mnemonic indicate
// exactly how to derive the argument from the instruction encoding, or vice versa.
//
// Immediate values: imm8, imm8u, imm16, imm16u, imm32, imm64.
// Immediates are signed by default; the u suffixes indicates an unsigned value.
//
// Memory operands. The forms m, m128, m14/28byte, m16, m16&16, m16&32, m16&64, m16:16, m16:32,
// m16:64, m16int, m256, m2byte, m32, m32&32, m32fp, m32int, m512byte, m64, m64fp, m64int,
// m8, m80bcd, m80dec, m80fp, m94/108byte. These operands always correspond to the
// memory address specified by the r/m half of the modrm encoding.
//
// Integer registers.
// The forms r8, r16, r32, r64 indicate a register selected by the modrm reg encoding.
// The forms rmr16, rmr32, rmr64 indicate a register (never memory) selected by the modrm r/m encoding.
// The forms r/m8, r/m16, r/m32, and r/m64 indicate a register or memory selected by the modrm r/m encoding.
// Forms with two sizes, like r32/m16 also indicate a register or memory selected by the modrm r/m encodng,
// but the size for a register argument differs from the size of a memory argument.
// The forms r8V, r16V, r32V, r64V indicate a register selected by the VEX.vvvv bits.
//
// Multimedia registers.
// The forms mm1, xmm1, and ymm1 indicate a multimedia register selected by the
// modrm reg encoding.
// The forms mm2, xmm2, and ymm2 indicate a register (never memory) selected by
// the modrm r/m encoding.
// The forms mm2/m64, xmm2/m128, and so on indicate a register or memory
// selected by the modrm r/m encoding.
// The forms xmmV and ymmV indicate a register selected by the VEX.vvvv bits.
// The forms xmmI and ymmI indicate a register selected by the top four bits of an /is4 immediate byte.
//
// Bound registers.
// The form bnd1 indicate a bound register selected by the modrm reg encoding.
// The form bnd2 indicates a bound register (never memory) selected by the modrm r/m encoding.
// The forms bnd2/m64 and bnd2/m128 indicate a register or memorys selected by the modrm r/m encoding.
// TODO: Describe mib.
//
// One-of-a-kind operands: rel8, rel16, rel32, ptr16:16, ptr16:32,
// moffs8, moffs16, moffs32, moffs64, vm32x, vm32y, vm64x, and vm64y
// are all as in the Intel manual.
//
// # Encodings
//
// The encodings are also as used in the Intel manual, with automated corrections.
// For example, the Intel manual sometimes omits the modrm /r indicator or other trailing bytes,
// and it also contains typographical errors.
// These problems are corrected so that the CSV data may be used to generate
// tools for processing x86 machine code.
// See https://golang.org/x/arch/x86/x86map for one such generator.
//
// # Valid32 and Valid64
//
// These columns hold validity abbreviations as defined in the Intel manual:
// V, I, N.E., N.P., N.S., or N.I.
// Tools processing the data are typically only concerned with whether the
// column is "V" (valid) or not.
// This data is also corrected compared to the manual.
// For example, the manual lists many instruction forms using REX bytes
// with an incorrect "V" in the Valid32 column.
//
// # CPUID Feature Flags
//
// This column specifies CPUID feature flags that must be present in order
// to use the instruction. If multiple flags are required,
// they are listed separated by plus signs, as in PCLMULQDQ+AVX.
// The column can also list one of the values 486, Pentium, PentiumII, and P6,
// indicating that the instruction was introduced on that architecture version.
//
// # Tags
//
// The tag column does not correspond to a traditional column in the Intel manual tables.
// Instead, it is itself a comma-separated list of tags or hints derived by analysis
// of the instruction set or the instruction encodings.
//
// The tags address16, address32, and address64 indicate that the instruction form
// applies when using the specified addressing size. It may therefore be necessary to use an
// address size prefix byte to access the instruction.
// If two address tags are listed, the instruction can be used with either of those
// address sizes. An instruction will never list all three address sizes.
// (In fact, today, no instruction lists two address sizes, but that may change.)
//
// The tags operand16, operand32, and operand64 indicate that the instruction form
// applies when using the specified operand size. It may therefore be necessary to use an
// operand size prefix byte to access the instruction.
// If two operand tags are listed, the instruction can be used with either of those
// operand sizes. An instruction will never list all three operand sizes.
//
// The tags modrm_regonly or modrm_memonly indicate that the modrm byte's
// r/m encoding must specify a register or memory, respectively.
// Especially in newer instructions, the modrm constraint may be the only way
// to distinguish two instruction forms. For example the MOVHLPS and MOVLPS
// instructions share the same encoding, except that the former requires the
// modrm byte's r/m to indicate a register, while the latter requires it to indicate memory.
//
// The tags pseudo and pseudo64 indicate that this instruction form is redundant
// with others listed in the table and should be ignored when generating disassembly
// or instruction scanning programs. The pseudo64 tag is reserved for the case where
// the manual lists an instruction twice, once with the optional 64-bit mode REX byte.
// Since most decoders will handle the REX byte separately, the form with the
// unnecessary REX is tagged pseudo64.
//
// # Corrections and Additions
//
// The x86spec program makes various corrections to the Intel manual data
// as part of extracting the information. Those corrections are described above.
//
// The x86spec program also adds a few well-known undocumented instructions,
// such as UD1 and FFREEP.
//
// # Examples
//
// The latest version of the CSV file is available in this Git repository and also
// online at https://golang.org/s/x86.csv. It is meant to be human-readable for
// quick reference and also to be input for generating tools that operate on
// x86 machine code.
//
// To print instruction syntaxes introduced by the Pentium II and P6,
// using https://rsc.io/csv2tsv to prepare the table for processing by awk:
//
// csv2tsv x86.csv | awk -F'\t' '$5 == "PentiumII" || $5 == "P6" { print $1 }'
//
// The x86map program (https://golang.org/x/arch/x86/x86map)
// reads the CSV file and generates an x86 instruction decoder in the form
// of a simple byte-code program. This decoder is the core of the disassembler
// in the x86asm package (https://golang.org/x/arch/x86/x86asm).
package main
import (
"bufio"
"flag"
"fmt"
"io"
"log"
"net/http"
"os"
"sort"
"strings"
)
const (
specFormatVersion = "0.2"
)
var (
flagDebugPage = flag.String("debugpage", "", "debug page `n` of the manual (can be comma-separated list)")
flagURL = flag.String("u", "https://golang.org/s/x86manual", "use `url` for download if needed")
flagFile = flag.String("f", "x86manual.pdf", "read manual from `file`, downloading if necessary")
flagCompat = flag.Bool("compat", false, "print compatibility statements")
debugging bool
onlySomePages bool
)
type instruction struct {
page int
opcode string
syntax string
valid64 string
valid32 string
cpuid string
desc string
tags []string
args []string
seq int // for use by cleanup
compat string
action string
multisize string
datasize int
gnuSyntax string
goSyntax string
}
func main() {
log.SetFlags(0)
log.SetPrefix("x86spec: ")
flags()
download()
insts := parse()
insts = cleanup(insts)
format(insts)
sort.Sort(bySyntax(insts))
write(os.Stdout, insts)
}
func flags() {
flag.Usage = func() {
fmt.Fprintf(os.Stderr, "usage: x86spec [options]\n")
flag.PrintDefaults()
os.Exit(2)
}
flag.Parse()
if flag.NArg() != 0 {
flag.Usage()
}
debugging = *flagDebugPage != ""
onlySomePages = *flagDebugPage != ""
}
func download() {
_, err := os.Stat(*flagFile)
if !os.IsNotExist(err) {
return
}
// Try downloading.
log.Printf("downloading manual to %s", *flagFile)
resp, err := http.Get(*flagURL)
if err != nil {
log.Fatal(err)
}
if resp.StatusCode != 200 {
log.Fatal(resp.Status)
}
f, err := os.Create(*flagFile)
if err != nil {
log.Fatal(err)
}
_, err = io.Copy(f, resp.Body)
if err != nil {
log.Fatal(err)
}
if err := f.Close(); err != nil {
log.Fatal(err)
}
}
func write(w io.Writer, insts []*instruction) {
bw := bufio.NewWriter(w)
defer bw.Flush()
for _, inst := range insts {
datasize := ""
if inst.datasize != 0 {
datasize = fmt.Sprint(inst.datasize)
}
writeCSV(bw, inst.syntax, inst.goSyntax, inst.gnuSyntax, inst.opcode, inst.valid32, inst.valid64, inst.cpuid, strings.Join(inst.tags, ","), inst.action, inst.multisize, datasize)
}
}
// Note: not using encoding/csv because we want the CSV to use quotes always,
// so that it is a little easier to process with non-CSV tools like grep,
// but the encoding/csv package does not have an "always quote" writing mode.
func writeCSV(w io.Writer, args ...string) {
for i, arg := range args {
if i > 0 {
fmt.Fprintf(w, ",")
}
fmt.Fprintf(w, `"%s"`, strings.Replace(arg, `"`, `""`, -1))
}
fmt.Fprintf(w, "\n")
}
@@ -0,0 +1,939 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import (
"bytes"
"fmt"
"os"
"sort"
"strings"
"testing"
)
var tests = []struct {
pages string
output string
}{
// TODO: If we get page information out of the table of contents,
// we could avoid hard-coding page numbers that need updating with each manual.
// Trivial.
{"82", `
"AAA","37","V","I","",""
`},
// Pseudo detection.
{"84", `
"AAD","D5 0A","V","I","","pseudo"
"AAD imm8","D5 ib","V","I","",""
`},
// Operand-size and pseudo64 detection.
{"95", `
"ADD AL, imm8","04 ib","V","V","",""
"ADD AX, imm16","05 iw","V","V","","operand16"
"ADD EAX, imm32","05 id","V","V","","operand32"
"ADD RAX, imm32","REX.W 05 id","N.E.","V","",""
"ADD r/m8, imm8","80 /0 ib","V","V","",""
"ADD r/m8, imm8","REX 80 /0 ib","N.E.","V","","pseudo64"
"ADD r/m16, imm16","81 /0 iw","V","V","","operand16"
"ADD r/m32, imm32","81 /0 id","V","V","","operand32"
"ADD r/m64, imm32","REX.W 81 /0 id","N.E.","V","",""
"ADD r/m16, imm8","83 /0 ib","V","V","","operand16"
"ADD r/m32, imm8","83 /0 ib","V","V","","operand32"
"ADD r/m64, imm8","REX.W 83 /0 ib","N.E.","V","",""
"ADD r/m8, r8","00 /r","V","V","",""
"ADD r/m8, r8","REX 00 /r","N.E.","V","","pseudo64"
"ADD r/m16, r16","01 /r","V","V","","operand16"
"ADD r/m32, r32","01 /r","V","V","","operand32"
"ADD r/m64, r64","REX.W 01 /r","N.E.","V","",""
"ADD r8, r/m8","02 /r","V","V","",""
"ADD r8, r/m8","REX 02 /r","N.E.","V","","pseudo64"
"ADD r16, r/m16","03 /r","V","V","","operand16"
"ADD r32, r/m32","03 /r","V","V","","operand32"
"ADD r64, r/m64","REX.W 03 /r","N.E.","V","",""
`},
{"961", `
"PUSH r/m16","FF /6","V","V","","operand16"
"PUSH r/m32","FF /6","V","N.E.","","operand32"
"PUSH r/m64","FF /6","N.E.","V","","operand32,operand64"
"PUSH r16op","50+rw","V","V","","operand16"
"PUSH r32op","50+rd","V","N.E.","","operand32"
"PUSH r64op","50+rd","N.E.","V","","operand32,operand64"
"PUSH imm8","6A ib","V","V","",""
"PUSH imm16","68 iw","V","V","","operand16"
"PUSH imm32","68 id","V","V","","operand32"
"PUSH CS","0E","V","I","",""
"PUSH SS","16","V","I","",""
"PUSH DS","1E","V","I","",""
"PUSH ES","06","V","I","",""
"PUSH FS","0F A0","V","V","",""
"PUSH GS","0F A8","V","V","",""
`},
{"964", `
"PUSHA","60","V","I","","operand16"
"PUSHAD","60","V","I","","operand32"
`},
{"966", `
"PUSHF","9C","V","V","","operand16"
"PUSHFD","9C","V","N.E.","","operand32"
"PUSHFQ","9C","N.E.","V","","operand32,operand64"
`},
{"872", `
"POP r/m16","8F /0","V","V","","operand16"
"POP r/m32","8F /0","V","N.E.","","operand32"
"POP r/m64","8F /0","N.E.","V","","operand32,operand64"
"POP r16op","58+rw","V","V","","operand16"
"POP r32op","58+rd","V","N.E.","","operand32"
"POP r64op","58+rd","N.E.","V","","operand32,operand64"
"POP DS","1F","V","I","",""
"POP ES","07","V","I","",""
"POP SS","17","V","I","",""
"POP FS","0F A1","V","V","","operand16"
"POP FS","0F A1","V","N.E.","","operand32"
"POP FS","0F A1","N.E.","V","","operand32,operand64"
"POP GS","0F A9","V","V","","operand16"
"POP GS","0F A9","V","N.E.","","operand32"
"POP GS","0F A9","N.E.","V","","operand32,operand64"
`},
{"224", `
"CMPSB","A6","V","V","",""
"CMPSW","A7","V","V","","operand16"
"CMPSD","A7","V","V","","operand32"
"CMPSQ","REX.W A7","N.E.","V","",""
`},
{"228,654", `
"CMPSD xmm1, xmm2/m64, imm8","F2 0F C2 /r ib","V","V","SSE2",""
"VCMPSD xmm1, xmmV, xmm2/m64, imm8","VEX.NDS.LIG.F2.0F.WIG C2 /r ib","V","V","AVX",""
"MOVSD xmm1, xmm2/m64","F2 0F 10 /r","V","V","SSE2",""
"VMOVSD xmm1, xmmV, xmm2","VEX.NDS.LIG.F2.0F.WIG 10 /r","V","V","AVX","modrm_regonly"
"VMOVSD xmm1, m64","VEX.LIG.F2.0F.WIG 10 /r","V","V","AVX","modrm_memonly"
"MOVSD xmm2/m64, xmm1","F2 0F 11 /r","V","V","SSE2",""
"VMOVSD xmm2, xmmV, xmm1","VEX.NDS.LIG.F2.0F.WIG 11 /r","V","V","AVX","modrm_regonly"
"VMOVSD m64, xmm1","VEX.LIG.F2.0F.WIG 11 /r","V","V","AVX","modrm_memonly"
`},
{"277", `
"CRC32 r32, r/m8","F2 0F 38 F0 /r","V","V","","operand16,operand32"
"CRC32 r32, r/m8","F2 REX 0F 38 F0 /r","N.E.","V","","pseudo64"
"CRC32 r32, r/m16","F2 0F 38 F1 /r","V","V","","operand16"
"CRC32 r32, r/m32","F2 0F 38 F1 /r","V","V","","operand32"
"CRC32 r64, r/m8","F2 REX.W 0F 38 F0 /r","N.E.","V","",""
"CRC32 r64, r/m64","F2 REX.W 0F 38 F1 /r","N.E.","V","",""
`},
{"540", `
"LDS r16, m16:16","C5 /r","V","I","","operand16"
"LDS r32, m16:32","C5 /r","V","I","","operand32"
"LSS r16, m16:16","0F B2 /r","V","V","","operand16"
"LSS r32, m16:32","0F B2 /r","V","V","","operand32"
"LSS r64, m16:64","REX.W 0F B2 /r","N.E.","V","",""
"LES r16, m16:16","C4 /r","V","I","","operand16"
"LES r32, m16:32","C4 /r","V","I","","operand32"
"LFS r16, m16:16","0F B4 /r","V","V","","operand16"
"LFS r32, m16:32","0F B4 /r","V","V","","operand32"
"LFS r64, m16:64","REX.W 0F B4 /r","N.E.","V","",""
"LGS r16, m16:16","0F B5 /r","V","V","","operand16"
"LGS r32, m16:32","0F B5 /r","V","V","","operand32"
"LGS r64, m16:64","REX.W 0F B5 /r","N.E.","V","",""
`},
// Condition code preferences.
{"205,206,207", `
"CMOVA r16, r/m16","0F 47 /r","V","V","","operand16"
"CMOVA r32, r/m32","0F 47 /r","V","V","","operand32"
"CMOVA r64, r/m64","REX.W 0F 47 /r","N.E.","V","",""
"CMOVAE r16, r/m16","0F 43 /r","V","V","","operand16"
"CMOVAE r32, r/m32","0F 43 /r","V","V","","operand32"
"CMOVAE r64, r/m64","REX.W 0F 43 /r","N.E.","V","",""
"CMOVB r16, r/m16","0F 42 /r","V","V","","operand16"
"CMOVB r32, r/m32","0F 42 /r","V","V","","operand32"
"CMOVB r64, r/m64","REX.W 0F 42 /r","N.E.","V","",""
"CMOVBE r16, r/m16","0F 46 /r","V","V","","operand16"
"CMOVBE r32, r/m32","0F 46 /r","V","V","","operand32"
"CMOVBE r64, r/m64","REX.W 0F 46 /r","N.E.","V","",""
"CMOVC r16, r/m16","0F 42 /r","V","V","","operand16,pseudo"
"CMOVC r32, r/m32","0F 42 /r","V","V","","operand32,pseudo"
"CMOVC r64, r/m64","REX.W 0F 42 /r","N.E.","V","","pseudo"
"CMOVE r16, r/m16","0F 44 /r","V","V","","operand16"
"CMOVE r32, r/m32","0F 44 /r","V","V","","operand32"
"CMOVE r64, r/m64","REX.W 0F 44 /r","N.E.","V","",""
"CMOVG r16, r/m16","0F 4F /r","V","V","","operand16"
"CMOVG r32, r/m32","0F 4F /r","V","V","","operand32"
"CMOVG r64, r/m64","REX.W 0F 4F /r","N.E.","V","",""
"CMOVGE r16, r/m16","0F 4D /r","V","V","","operand16"
"CMOVGE r32, r/m32","0F 4D /r","V","V","","operand32"
"CMOVGE r64, r/m64","REX.W 0F 4D /r","N.E.","V","",""
"CMOVL r16, r/m16","0F 4C /r","V","V","","operand16"
"CMOVL r32, r/m32","0F 4C /r","V","V","","operand32"
"CMOVL r64, r/m64","REX.W 0F 4C /r","N.E.","V","",""
"CMOVLE r16, r/m16","0F 4E /r","V","V","","operand16"
"CMOVLE r32, r/m32","0F 4E /r","V","V","","operand32"
"CMOVLE r64, r/m64","REX.W 0F 4E /r","N.E.","V","",""
"CMOVNA r16, r/m16","0F 46 /r","V","V","","operand16,pseudo"
"CMOVNA r32, r/m32","0F 46 /r","V","V","","operand32,pseudo"
"CMOVNA r64, r/m64","REX.W 0F 46 /r","N.E.","V","","pseudo"
"CMOVNAE r16, r/m16","0F 42 /r","V","V","","operand16,pseudo"
"CMOVNAE r32, r/m32","0F 42 /r","V","V","","operand32,pseudo"
"CMOVNAE r64, r/m64","REX.W 0F 42 /r","N.E.","V","","pseudo"
"CMOVNB r16, r/m16","0F 43 /r","V","V","","operand16,pseudo"
"CMOVNB r32, r/m32","0F 43 /r","V","V","","operand32,pseudo"
"CMOVNB r64, r/m64","REX.W 0F 43 /r","N.E.","V","","pseudo"
"CMOVNBE r16, r/m16","0F 47 /r","V","V","","operand16,pseudo"
"CMOVNBE r32, r/m32","0F 47 /r","V","V","","operand32,pseudo"
"CMOVNBE r64, r/m64","REX.W 0F 47 /r","N.E.","V","","pseudo"
"CMOVNC r16, r/m16","0F 43 /r","V","V","","operand16,pseudo"
"CMOVNC r32, r/m32","0F 43 /r","V","V","","operand32,pseudo"
"CMOVNC r64, r/m64","REX.W 0F 43 /r","N.E.","V","","pseudo"
"CMOVNE r16, r/m16","0F 45 /r","V","V","","operand16"
"CMOVNE r32, r/m32","0F 45 /r","V","V","","operand32"
"CMOVNE r64, r/m64","REX.W 0F 45 /r","N.E.","V","",""
"CMOVNG r16, r/m16","0F 4E /r","V","V","","operand16,pseudo"
"CMOVNG r32, r/m32","0F 4E /r","V","V","","operand32,pseudo"
"CMOVNG r64, r/m64","REX.W 0F 4E /r","N.E.","V","","pseudo"
"CMOVNGE r16, r/m16","0F 4C /r","V","V","","operand16,pseudo"
"CMOVNGE r32, r/m32","0F 4C /r","V","V","","operand32,pseudo"
"CMOVNGE r64, r/m64","REX.W 0F 4C /r","N.E.","V","","pseudo"
"CMOVNL r16, r/m16","0F 4D /r","V","V","","operand16,pseudo"
"CMOVNL r32, r/m32","0F 4D /r","V","V","","operand32,pseudo"
"CMOVNL r64, r/m64","REX.W 0F 4D /r","N.E.","V","","pseudo"
"CMOVNLE r16, r/m16","0F 4F /r","V","V","","operand16,pseudo"
"CMOVNLE r32, r/m32","0F 4F /r","V","V","","operand32,pseudo"
"CMOVNLE r64, r/m64","REX.W 0F 4F /r","N.E.","V","","pseudo"
"CMOVNO r16, r/m16","0F 41 /r","V","V","","operand16"
"CMOVNO r32, r/m32","0F 41 /r","V","V","","operand32"
"CMOVNO r64, r/m64","REX.W 0F 41 /r","N.E.","V","",""
"CMOVNP r16, r/m16","0F 4B /r","V","V","","operand16"
"CMOVNP r32, r/m32","0F 4B /r","V","V","","operand32"
"CMOVNP r64, r/m64","REX.W 0F 4B /r","N.E.","V","",""
"CMOVNS r16, r/m16","0F 49 /r","V","V","","operand16"
"CMOVNS r32, r/m32","0F 49 /r","V","V","","operand32"
"CMOVNS r64, r/m64","REX.W 0F 49 /r","N.E.","V","",""
"CMOVNZ r16, r/m16","0F 45 /r","V","V","","operand16,pseudo"
"CMOVNZ r32, r/m32","0F 45 /r","V","V","","operand32,pseudo"
"CMOVNZ r64, r/m64","REX.W 0F 45 /r","N.E.","V","","pseudo"
"CMOVO r16, r/m16","0F 40 /r","V","V","","operand16"
"CMOVO r32, r/m32","0F 40 /r","V","V","","operand32"
"CMOVO r64, r/m64","REX.W 0F 40 /r","N.E.","V","",""
"CMOVP r16, r/m16","0F 4A /r","V","V","","operand16"
"CMOVP r32, r/m32","0F 4A /r","V","V","","operand32"
"CMOVP r64, r/m64","REX.W 0F 4A /r","N.E.","V","",""
"CMOVPE r16, r/m16","0F 4A /r","V","V","","operand16,pseudo"
"CMOVPE r32, r/m32","0F 4A /r","V","V","","operand32,pseudo"
"CMOVPE r64, r/m64","REX.W 0F 4A /r","N.E.","V","","pseudo"
"CMOVPO r16, r/m16","0F 4B /r","V","V","","operand16,pseudo"
"CMOVPO r32, r/m32","0F 4B /r","V","V","","operand32,pseudo"
"CMOVPO r64, r/m64","REX.W 0F 4B /r","N.E.","V","","pseudo"
"CMOVS r16, r/m16","0F 48 /r","V","V","","operand16"
"CMOVS r32, r/m32","0F 48 /r","V","V","","operand32"
"CMOVS r64, r/m64","REX.W 0F 48 /r","N.E.","V","",""
"CMOVZ r16, r/m16","0F 44 /r","V","V","","operand16,pseudo"
"CMOVZ r32, r/m32","0F 44 /r","V","V","","operand32,pseudo"
"CMOVZ r64, r/m64","REX.W 0F 44 /r","N.E.","V","","pseudo"
`},
// Condition code preferences, but also Intel manual is also missing /r in the syntax lines.
{"1043,1044", `
"SETA r/m8","0F 97 /r","V","V","",""
"SETA r/m8","REX 0F 97 /r","N.E.","V","","pseudo64"
"SETAE r/m8","0F 93 /r","V","V","",""
"SETAE r/m8","REX 0F 93 /r","N.E.","V","","pseudo64"
"SETB r/m8","0F 92 /r","V","V","",""
"SETB r/m8","REX 0F 92 /r","N.E.","V","","pseudo64"
"SETBE r/m8","0F 96 /r","V","V","",""
"SETBE r/m8","REX 0F 96 /r","N.E.","V","","pseudo64"
"SETC r/m8","0F 92 /r","V","V","","pseudo"
"SETC r/m8","REX 0F 92 /r","N.E.","V","","pseudo"
"SETE r/m8","0F 94 /r","V","V","",""
"SETE r/m8","REX 0F 94 /r","N.E.","V","","pseudo64"
"SETG r/m8","0F 9F /r","V","V","",""
"SETG r/m8","REX 0F 9F /r","N.E.","V","","pseudo64"
"SETGE r/m8","0F 9D /r","V","V","",""
"SETGE r/m8","REX 0F 9D /r","N.E.","V","","pseudo64"
"SETL r/m8","0F 9C /r","V","V","",""
"SETL r/m8","REX 0F 9C /r","N.E.","V","","pseudo64"
"SETLE r/m8","0F 9E /r","V","V","",""
"SETLE r/m8","REX 0F 9E /r","N.E.","V","","pseudo64"
"SETNA r/m8","0F 96 /r","V","V","","pseudo"
"SETNA r/m8","REX 0F 96 /r","N.E.","V","","pseudo"
"SETNAE r/m8","0F 92 /r","V","V","","pseudo"
"SETNAE r/m8","REX 0F 92 /r","N.E.","V","","pseudo"
"SETNB r/m8","0F 93 /r","V","V","","pseudo"
"SETNB r/m8","REX 0F 93 /r","N.E.","V","","pseudo"
"SETNBE r/m8","0F 97 /r","V","V","","pseudo"
"SETNBE r/m8","REX 0F 97 /r","N.E.","V","","pseudo"
"SETNC r/m8","0F 93 /r","V","V","","pseudo"
"SETNC r/m8","REX 0F 93 /r","N.E.","V","","pseudo"
"SETNE r/m8","0F 95 /r","V","V","",""
"SETNE r/m8","REX 0F 95 /r","N.E.","V","","pseudo64"
"SETNG r/m8","0F 9E /r","V","V","","pseudo"
"SETNG r/m8","REX 0F 9E /r","N.E.","V","","pseudo"
"SETNGE r/m8","0F 9C /r","V","V","","pseudo"
"SETNGE r/m8","REX 0F 9C /r","N.E.","V","","pseudo"
"SETNL r/m8","0F 9D /r","V","V","","pseudo"
"SETNL r/m8","REX 0F 9D /r","N.E.","V","","pseudo"
"SETNLE r/m8","0F 9F /r","V","V","","pseudo"
"SETNLE r/m8","REX 0F 9F /r","N.E.","V","","pseudo"
"SETNO r/m8","0F 91 /r","V","V","",""
"SETNO r/m8","REX 0F 91 /r","N.E.","V","","pseudo64"
"SETNP r/m8","0F 9B /r","V","V","",""
"SETNP r/m8","REX 0F 9B /r","N.E.","V","","pseudo64"
"SETNS r/m8","0F 99 /r","V","V","",""
"SETNS r/m8","REX 0F 99 /r","N.E.","V","","pseudo64"
"SETNZ r/m8","0F 95 /r","V","V","","pseudo"
"SETNZ r/m8","REX 0F 95 /r","N.E.","V","","pseudo"
"SETO r/m8","0F 90 /r","V","V","",""
"SETO r/m8","REX 0F 90 /r","N.E.","V","","pseudo64"
"SETP r/m8","0F 9A /r","V","V","",""
"SETP r/m8","REX 0F 9A /r","N.E.","V","","pseudo64"
"SETPE r/m8","0F 9A /r","V","V","","pseudo"
"SETPE r/m8","REX 0F 9A /r","N.E.","V","","pseudo"
"SETPO r/m8","0F 9B /r","V","V","","pseudo"
"SETPO r/m8","REX 0F 9B /r","N.E.","V","","pseudo"
"SETS r/m8","0F 98 /r","V","V","",""
"SETS r/m8","REX 0F 98 /r","N.E.","V","","pseudo64"
"SETZ r/m8","0F 94 /r","V","V","","pseudo"
"SETZ r/m8","REX 0F 94 /r","N.E.","V","","pseudo"
`},
{"520,521,522,523", `
"JA rel8","77 cb","V","V","",""
"JAE rel8","73 cb","V","V","",""
"JB rel8","72 cb","V","V","",""
"JBE rel8","76 cb","V","V","",""
"JC rel8","72 cb","V","V","","pseudo"
"JCXZ rel8","E3 cb","V","N.E.","","address16"
"JECXZ rel8","E3 cb","V","V","","address32"
"JRCXZ rel8","E3 cb","N.E.","V","","address64"
"JE rel8","74 cb","V","V","",""
"JG rel8","7F cb","V","V","",""
"JGE rel8","7D cb","V","V","",""
"JL rel8","7C cb","V","V","",""
"JLE rel8","7E cb","V","V","",""
"JNA rel8","76 cb","V","V","","pseudo"
"JNAE rel8","72 cb","V","V","","pseudo"
"JNB rel8","73 cb","V","V","","pseudo"
"JNBE rel8","77 cb","V","V","","pseudo"
"JNC rel8","73 cb","V","V","","pseudo"
"JNE rel8","75 cb","V","V","",""
"JNG rel8","7E cb","V","V","","pseudo"
"JNGE rel8","7C cb","V","V","","pseudo"
"JNL rel8","7D cb","V","V","","pseudo"
"JNLE rel8","7F cb","V","V","","pseudo"
"JNO rel8","71 cb","V","V","",""
"JNP rel8","7B cb","V","V","",""
"JNS rel8","79 cb","V","V","",""
"JNZ rel8","75 cb","V","V","","pseudo"
"JO rel8","70 cb","V","V","",""
"JP rel8","7A cb","V","V","",""
"JPE rel8","7A cb","V","V","","pseudo"
"JPO rel8","7B cb","V","V","","pseudo"
"JS rel8","78 cb","V","V","",""
"JZ rel8","74 cb","V","V","","pseudo"
"JA rel16","0F 87 cw","V","N.S.","","operand16"
"JA rel32","0F 87 cd","V","V","","operand32"
"JAE rel16","0F 83 cw","V","N.S.","","operand16"
"JAE rel32","0F 83 cd","V","V","","operand32"
"JB rel16","0F 82 cw","V","N.S.","","operand16"
"JB rel32","0F 82 cd","V","V","","operand32"
"JBE rel16","0F 86 cw","V","N.S.","","operand16"
"JBE rel32","0F 86 cd","V","V","","operand32"
"JC rel16","0F 82 cw","V","N.S.","","pseudo"
"JC rel32","0F 82 cd","V","V","","pseudo"
"JE rel16","0F 84 cw","V","N.S.","","operand16"
"JE rel32","0F 84 cd","V","V","","operand32"
"JZ rel16","0F 84 cw","V","N.S.","","operand16,pseudo"
"JZ rel32","0F 84 cd","V","V","","operand32,pseudo"
"JG rel16","0F 8F cw","V","N.S.","","operand16"
"JG rel32","0F 8F cd","V","V","","operand32"
"JGE rel16","0F 8D cw","V","N.S.","","operand16"
"JGE rel32","0F 8D cd","V","V","","operand32"
"JL rel16","0F 8C cw","V","N.S.","","operand16"
"JL rel32","0F 8C cd","V","V","","operand32"
"JLE rel16","0F 8E cw","V","N.S.","","operand16"
"JLE rel32","0F 8E cd","V","V","","operand32"
"JNA rel16","0F 86 cw","V","N.S.","","pseudo"
"JNA rel32","0F 86 cd","V","V","","pseudo"
"JNAE rel16","0F 82 cw","V","N.S.","","pseudo"
"JNAE rel32","0F 82 cd","V","V","","pseudo"
"JNB rel16","0F 83 cw","V","N.S.","","pseudo"
"JNB rel32","0F 83 cd","V","V","","pseudo"
"JNBE rel16","0F 87 cw","V","N.S.","","pseudo"
"JNBE rel32","0F 87 cd","V","V","","pseudo"
"JNC rel16","0F 83 cw","V","N.S.","","pseudo"
"JNC rel32","0F 83 cd","V","V","","pseudo"
"JNE rel16","0F 85 cw","V","N.S.","","operand16"
"JNE rel32","0F 85 cd","V","V","","operand32"
"JNG rel16","0F 8E cw","V","N.S.","","pseudo"
"JNG rel32","0F 8E cd","V","V","","pseudo"
"JNGE rel16","0F 8C cw","V","N.S.","","pseudo"
"JNGE rel32","0F 8C cd","V","V","","pseudo"
"JNL rel16","0F 8D cw","V","N.S.","","pseudo"
"JNL rel32","0F 8D cd","V","V","","pseudo"
"JNLE rel16","0F 8F cw","V","N.S.","","pseudo"
"JNLE rel32","0F 8F cd","V","V","","pseudo"
"JNO rel16","0F 81 cw","V","N.S.","","operand16"
"JNO rel32","0F 81 cd","V","V","","operand32"
"JNP rel16","0F 8B cw","V","N.S.","","operand16"
"JNP rel32","0F 8B cd","V","V","","operand32"
"JNS rel16","0F 89 cw","V","N.S.","","operand16"
"JNS rel32","0F 89 cd","V","V","","operand32"
"JNZ rel16","0F 85 cw","V","N.S.","","pseudo"
"JNZ rel32","0F 85 cd","V","V","","pseudo"
"JO rel16","0F 80 cw","V","N.S.","","operand16"
"JO rel32","0F 80 cd","V","V","","operand32"
"JP rel16","0F 8A cw","V","N.S.","","operand16"
"JP rel32","0F 8A cd","V","V","","operand32"
"JPE rel16","0F 8A cw","V","N.S.","","pseudo"
"JPE rel32","0F 8A cd","V","V","","pseudo"
"JPO rel16","0F 8B cw","V","N.S.","","pseudo"
"JPO rel32","0F 8B cd","V","V","","pseudo"
"JS rel16","0F 88 cw","V","N.S.","","operand16"
"JS rel32","0F 88 cd","V","V","","operand32"
"JA rel32","0F 87 cd","N.S.","V","","operand16,operand64"
"JAE rel32","0F 83 cd","N.S.","V","","operand16,operand64"
"JB rel32","0F 82 cd","N.S.","V","","operand16,operand64"
"JBE rel32","0F 86 cd","N.S.","V","","operand16,operand64"
"JE rel32","0F 84 cd","N.S.","V","","operand16,operand64"
"JG rel32","0F 8F cd","N.S.","V","","operand16,operand64"
"JGE rel32","0F 8D cd","N.S.","V","","operand16,operand64"
"JL rel32","0F 8C cd","N.S.","V","","operand16,operand64"
"JLE rel32","0F 8E cd","N.S.","V","","operand16,operand64"
"JNE rel32","0F 85 cd","N.S.","V","","operand16,operand64"
"JNO rel32","0F 81 cd","N.S.","V","","operand16,operand64"
"JNP rel32","0F 8B cd","N.S.","V","","operand16,operand64"
"JNS rel32","0F 89 cd","N.S.","V","","operand16,operand64"
"JO rel32","0F 80 cd","N.S.","V","","operand16,operand64"
"JP rel32","0F 8A cd","N.S.","V","","operand16,operand64"
"JS rel32","0F 88 cd","N.S.","V","","operand16,operand64"
`},
// Pseudo-ops in floating point.
{"362", `
"FCOM m32fp","D8 /2","V","V","",""
"FCOM m64fp","DC /2","V","V","",""
"FCOM ST(i)","D8 D0+i","V","V","",""
"FCOM","D8 D1","V","V","","pseudo"
"FCOMP m32fp","D8 /3","V","V","",""
"FCOMP m64fp","DC /3","V","V","",""
"FCOMP ST(i)","D8 D8+i","V","V","",""
"FCOMP","D8 D9","V","V","","pseudo"
"FCOMPP","DE D9","V","V","",""
`},
{"358", `
"FCLEX","9B DB E2","V","V","","pseudo"
"FNCLEX","DB E2","V","V","",""
`},
// Unsigned immediates.
{"340,", `
"ENTER imm16u, 0","C8 iw 00","V","V","","pseudo"
"ENTER imm16u, 1","C8 iw 01","V","V","","pseudo"
"ENTER imm16u, imm8","C8 iw ib","V","V","",""
`},
// Rewriting of arguments to match encoding (xmm1 vs xmm2).
{"785", `
"PEXTRB r32/m8, xmm1, imm8","66 0F 3A 14 /r ib","V","V","SSE4_1",""
"PEXTRD r/m32, xmm1, imm8","66 0F 3A 16 /r ib","V","V","SSE4_1","operand16,operand32"
"PEXTRQ r/m64, xmm1, imm8","66 REX.W 0F 3A 16 /r ib","N.E.","V","SSE4_1",""
"VPEXTRB r32/m8, xmm1, imm8","VEX.128.66.0F3A.W0 14 /r ib","V","V","AVX",""
"VPEXTRD r32/m32, xmm1, imm8","VEX.128.66.0F3A.W0 16 /r ib","V","V","AVX",""
"VPEXTRQ r64/m64, xmm1, imm8","VEX.128.66.0F3A.W1 16 /r ib","I","V","AVX",""
`},
{"843", `
"PMOVMSKB r32, mm2","0F D7 /r","V","V","SSE",""
"PMOVMSKB r32, xmm2","66 0F D7 /r","V","V","SSE2","modrm_regonly"
"VPMOVMSKB r32, xmm2","VEX.128.66.0F.WIG D7 /r","V","V","AVX","modrm_regonly"
"VPMOVMSKB r32, ymm2","VEX.256.66.0F.WIG D7 /r","V","V","AVX2","modrm_regonly"
`},
{"343", `
"EXTRACTPS r/m32, xmm1, imm8","66 0F 3A 17 /r ib","V","V","SSE4_1",""
"VEXTRACTPS r/m32, xmm1, imm8","VEX.128.66.0F3A.WIG 17 /r ib","V","V","AVX",""
`},
{"624", `
"MOVHPS xmm1, m64","0F 16 /r","V","V","SSE","modrm_memonly"
"MOVHPS m64, xmm1","0F 17 /r","V","V","SSE","modrm_memonly"
"VMOVHPS xmm1, xmmV, m64","VEX.NDS.128.0F.WIG 16 /r","V","V","AVX","modrm_memonly"
"VMOVHPS m64, xmm1","VEX.128.0F.WIG 17 /r","V","V","AVX","modrm_memonly"
`},
{"979", `
"RDFSBASE rmr32","F3 0F AE /0","I","V","FSGSBASE","modrm_regonly,operand16,operand32"
"RDFSBASE rmr64","F3 REX.W 0F AE /0","I","V","FSGSBASE","modrm_regonly"
"RDGSBASE rmr32","F3 0F AE /1","I","V","FSGSBASE","modrm_regonly,operand16,operand32"
"RDGSBASE rmr64","F3 REX.W 0F AE /1","I","V","FSGSBASE","modrm_regonly"
`},
{"988", `
"RDRAND rmr16","0F C7 /6","V","V","RDRAND","modrm_regonly,operand16"
"RDRAND rmr32","0F C7 /6","V","V","RDRAND","modrm_regonly,operand32"
"RDRAND rmr64","REX.W 0F C7 /6","I","V","RDRAND","modrm_regonly"
`},
{"1135", `
"VEXTRACTI128 xmm2/m128, ymm1, imm8","VEX.256.66.0F3A.W0 39 /r ib","V","V","AVX2",""
`},
{"1248", `
"WRFSBASE rmr32","F3 0F AE /2","I","V","FSGSBASE","modrm_regonly,operand16,operand32"
"WRFSBASE rmr64","F3 REX.W 0F AE /2","I","V","FSGSBASE","modrm_regonly"
"WRGSBASE rmr32","F3 0F AE /3","I","V","FSGSBASE","modrm_regonly,operand16,operand32"
"WRGSBASE rmr64","F3 REX.W 0F AE /3","I","V","FSGSBASE","modrm_regonly"
`},
{"1229", `
"VPMASKMOVD xmm1, xmmV, m128","VEX.NDS.128.66.0F38.W0 8C /r","V","V","AVX2","modrm_memonly"
"VPMASKMOVD ymm1, ymmV, m256","VEX.NDS.256.66.0F38.W0 8C /r","V","V","AVX2","modrm_memonly"
"VPMASKMOVQ xmm1, xmmV, m128","VEX.NDS.128.66.0F38.W1 8C /r","V","V","AVX2","modrm_memonly"
"VPMASKMOVQ ymm1, ymmV, m256","VEX.NDS.256.66.0F38.W1 8C /r","V","V","AVX2","modrm_memonly"
"VPMASKMOVD m128, xmmV, xmm1","VEX.NDS.128.66.0F38.W0 8E /r","V","V","AVX2","modrm_memonly"
"VPMASKMOVD m256, ymmV, ymm1","VEX.NDS.256.66.0F38.W0 8E /r","V","V","AVX2","modrm_memonly"
"VPMASKMOVQ m128, xmmV, xmm1","VEX.NDS.128.66.0F38.W1 8E /r","V","V","AVX2","modrm_memonly"
"VPMASKMOVQ m256, ymmV, ymm1","VEX.NDS.256.66.0F38.W1 8E /r","V","V","AVX2","modrm_memonly"
`},
{"537", `
"LDDQU xmm1, m128","F2 0F F0 /r","V","V","SSE3","modrm_memonly"
"VLDDQU xmm1, m128","VEX.128.F2.0F.WIG F0 /r","V","V","AVX","modrm_memonly"
"VLDDQU ymm1, m256","VEX.256.F2.0F.WIG F0 /r","V","V","AVX","modrm_memonly"
`},
{"624,626", `
"MOVHPS xmm1, m64","0F 16 /r","V","V","SSE","modrm_memonly"
"MOVHPS m64, xmm1","0F 17 /r","V","V","SSE","modrm_memonly"
"VMOVHPS xmm1, xmmV, m64","VEX.NDS.128.0F.WIG 16 /r","V","V","AVX","modrm_memonly"
"VMOVHPS m64, xmm1","VEX.128.0F.WIG 17 /r","V","V","AVX","modrm_memonly"
"MOVLHPS xmm1, xmm2","0F 16 /r","V","V","SSE","modrm_regonly"
"VMOVLHPS xmm1, xmmV, xmm2","VEX.NDS.128.0F.WIG 16 /r","V","V","AVX","modrm_regonly"
`},
// CPU features
{"758", `
"PCLMULQDQ xmm1, xmm2/m128, imm8","66 0F 3A 44 /r ib","V","V","PCLMULQDQ",""
"VPCLMULQDQ xmm1, xmmV, xmm2/m128, imm8","VEX.NDS.128.66.0F3A.WIG 44 /r ib","V","V","PCLMULQDQ+AVX",""
`},
// Fonts
{"486", `
"INC r/m8","FE /0","V","V","",""
"INC r/m8","REX FE /0","N.E.","V","","pseudo64"
"INC r/m16","FF /0","V","V","","operand16"
"INC r/m32","FF /0","V","V","","operand32"
"INC r/m64","REX.W FF /0","N.E.","V","",""
"INC r16op","40+rw","V","N.E.","","operand16"
"INC r32op","40+rd","V","N.E.","","operand32"
`},
// Intel manual has spurious trailing "m64" and "m128" in the opcode.
{"238", `
"CMPXCHG8B m64","0F C7 /1","V","V","","modrm_memonly,operand16,operand32"
"CMPXCHG16B m128","REX.W 0F C7 /1","N.E.","V","","modrm_memonly"
`},
// Intel manual missing cw and cd in opcode.
{"1260", `
"XBEGIN rel16","C7 F8 cw","V","V","RTM","operand16"
"XBEGIN rel32","C7 F8 cd","V","V","RTM","operand32,operand64"
`},
// Special cases
{"180", `
"CALL rel16","E8 cw","V","N.S.","","operand16"
"CALL rel32","E8 cd","V","V","","operand32"
"CALL r/m16","FF /2","V","N.E.","","operand16"
"CALL r/m32","FF /2","V","N.E.","","operand32"
"CALL r/m64","FF /2","N.E.","V","",""
"CALL_FAR ptr16:16","9A cd","V","I","","operand16"
"CALL_FAR ptr16:32","9A cp","V","I","","operand32"
"CALL_FAR m16:16","FF /3","V","V","","operand16"
"CALL_FAR m16:32","FF /3","V","V","","operand32"
"CALL_FAR m16:64","REX.W FF /3","N.E.","V","",""
"CALL rel32","E8 cd","N.S.","V","","operand16,operand64"
`},
{"525", `
"JMP rel8","EB cb","V","V","",""
"JMP rel16","E9 cw","V","N.S.","","operand16"
"JMP rel32","E9 cd","V","V","","operand32"
"JMP r/m16","FF /4","V","N.S.","","operand16"
"JMP r/m32","FF /4","V","N.S.","","operand32"
"JMP r/m64","FF /4","N.E.","V","",""
"JMP_FAR ptr16:16","EA cd","V","I","","operand16"
"JMP_FAR ptr16:32","EA cp","V","I","","operand32"
"JMP_FAR m16:16","FF /5","V","V","","operand16"
"JMP_FAR m16:32","FF /5","V","V","","operand32"
"JMP_FAR m16:64","REX.W FF /5","N.E.","V","",""
"JMP rel32","E9 cd","N.S.","V","","operand16,operand64"
`},
{"698", `
"NOP","90","V","V","","pseudo"
"NOP r/m16","0F 1F /0","V","V","","operand16"
"NOP r/m32","0F 1F /0","V","V","","operand32"
`},
{"747", `
"PAUSE","F3 90","V","V","","pseudo"
`},
{"1029,1030", `
"SAL r/m8, 1","D0 /4","V","V","","pseudo"
"SAL r/m8, 1","REX D0 /4","N.E.","V","","pseudo"
"SAL r/m8, CL","D2 /4","V","V","","pseudo"
"SAL r/m8, CL","REX D2 /4","N.E.","V","","pseudo"
"SAL r/m8, imm8","C0 /4 ib","V","V","","pseudo"
"SAL r/m8, imm8","REX C0 /4 ib","N.E.","V","","pseudo"
"SAL r/m16, 1","D1 /4","V","V","","operand16,pseudo"
"SAL r/m16, CL","D3 /4","V","V","","operand16,pseudo"
"SAL r/m16, imm8","C1 /4 ib","V","V","","operand16,pseudo"
"SAL r/m32, 1","D1 /4","V","V","","operand32,pseudo"
"SAL r/m64, 1","REX.W D1 /4","N.E.","V","","pseudo"
"SAL r/m32, CL","D3 /4","V","V","","operand32,pseudo"
"SAL r/m64, CL","REX.W D3 /4","N.E.","V","","pseudo"
"SAL r/m32, imm8","C1 /4 ib","V","V","","operand32,pseudo"
"SAL r/m64, imm8","REX.W C1 /4 ib","N.E.","V","","pseudo"
"SAR r/m8, 1","D0 /7","V","V","",""
"SAR r/m8, 1","REX D0 /7","N.E.","V","","pseudo64"
"SAR r/m8, CL","D2 /7","V","V","",""
"SAR r/m8, CL","REX D2 /7","N.E.","V","","pseudo64"
"SAR r/m8, imm8","C0 /7 ib","V","V","",""
"SAR r/m8, imm8","REX C0 /7 ib","N.E.","V","","pseudo64"
"SAR r/m16, 1","D1 /7","V","V","","operand16"
"SAR r/m16, CL","D3 /7","V","V","","operand16"
"SAR r/m16, imm8","C1 /7 ib","V","V","","operand16"
"SAR r/m32, 1","D1 /7","V","V","","operand32"
"SAR r/m64, 1","REX.W D1 /7","N.E.","V","",""
"SAR r/m32, CL","D3 /7","V","V","","operand32"
"SAR r/m64, CL","REX.W D3 /7","N.E.","V","",""
"SAR r/m32, imm8","C1 /7 ib","V","V","","operand32"
"SAR r/m64, imm8","REX.W C1 /7 ib","N.E.","V","",""
"SHL r/m8, 1","D0 /4","V","V","",""
"SHL r/m8, 1","REX D0 /4","N.E.","V","","pseudo64"
"SHL r/m8, CL","D2 /4","V","V","",""
"SHL r/m8, CL","REX D2 /4","N.E.","V","","pseudo64"
"SHL r/m8, imm8","C0 /4 ib","V","V","",""
"SHL r/m8, imm8","REX C0 /4 ib","N.E.","V","","pseudo64"
"SHL r/m16, 1","D1 /4","V","V","","operand16"
"SHL r/m16, CL","D3 /4","V","V","","operand16"
"SHL r/m16, imm8","C1 /4 ib","V","V","","operand16"
"SHL r/m32, 1","D1 /4","V","V","","operand32"
"SHL r/m64, 1","REX.W D1 /4","N.E.","V","",""
"SHL r/m32, CL","D3 /4","V","V","","operand32"
"SHL r/m64, CL","REX.W D3 /4","N.E.","V","",""
"SHL r/m32, imm8","C1 /4 ib","V","V","","operand32"
"SHL r/m64, imm8","REX.W C1 /4 ib","N.E.","V","",""
"SHR r/m8, 1","D0 /5","V","V","",""
"SHR r/m8, 1","REX D0 /5","N.E.","V","","pseudo64"
"SHR r/m8, CL","D2 /5","V","V","",""
"SHR r/m8, CL","REX D2 /5","N.E.","V","","pseudo64"
"SHR r/m8, imm8","C0 /5 ib","V","V","",""
"SHR r/m8, imm8","REX C0 /5 ib","N.E.","V","","pseudo64"
"SHR r/m16, 1","D1 /5","V","V","","operand16"
"SHR r/m16, CL","D3 /5","V","V","","operand16"
"SHR r/m16, imm8","C1 /5 ib","V","V","","operand16"
"SHR r/m32, 1","D1 /5","V","V","","operand32"
"SHR r/m64, 1","REX.W D1 /5","N.E.","V","",""
"SHR r/m32, CL","D3 /5","V","V","","operand32"
"SHR r/m64, CL","REX.W D3 /5","N.E.","V","",""
"SHR r/m32, imm8","C1 /5 ib","V","V","","operand32"
"SHR r/m64, imm8","REX.W C1 /5 ib","N.E.","V","",""
`},
{"564", `
"LSL r16, r/m16","0F 03 /r","V","V","","operand16"
"LSL r32, r32/m16","0F 03 /r","V","V","","operand32"
"LSL r64, r32/m16","REX.W 0F 03 /r","N.E.","V","",""
`},
{"1000", `
"RET","C3","V","V","",""
"RET_FAR","CB","V","V","",""
"RET imm16u","C2 iw","V","V","",""
"RET_FAR imm16u","CA iw","V","V","",""
`},
{"1245", `
"WAIT","9B","V","V","","pseudo"
"FWAIT","9B","V","V","",""
`},
{"1263", `
"XCHG AX, r16op","90+rw","V","V","","operand16,pseudo"
"XCHG r16op, AX","90+rw","V","V","","operand16"
"XCHG EAX, r32op","90+rd","V","V","","operand32,pseudo"
"XCHG RAX, r64op","REX.W 90+rd","N.E.","V","","pseudo"
"XCHG r32op, EAX","90+rd","V","V","","operand32"
"XCHG r64op, RAX","REX.W 90+rd","N.E.","V","",""
"XCHG r/m8, r8","86 /r","V","V","",""
"XCHG r/m8, r8","REX 86 /r","N.E.","V","","pseudo64"
"XCHG r8, r/m8","86 /r","V","V","","pseudo"
"XCHG r8, r/m8","REX 86 /r","N.E.","V","","pseudo"
"XCHG r/m16, r16","87 /r","V","V","","operand16"
"XCHG r16, r/m16","87 /r","V","V","","operand16,pseudo"
"XCHG r/m32, r32","87 /r","V","V","","operand32"
"XCHG r/m64, r64","REX.W 87 /r","N.E.","V","",""
"XCHG r32, r/m32","87 /r","V","V","","operand32,pseudo"
"XCHG r64, r/m64","REX.W 87 /r","N.E.","V","","pseudo"
`},
{"1063", `
"SLDT r/m16","0F 00 /0","V","V","","operand16"
"SLDT r64/m16","REX.W 0F 00 /0","N.E.","V","",""
"SLDT r32/m16","0F 00 /0","V","V","","operand32"
`},
{"1065", `
"SMSW r/m16","0F 01 /4","V","V","","operand16"
"SMSW r32/m16","0F 01 /4","V","V","","operand32"
"SMSW r64/m16","REX.W 0F 01 /4","N.E.","V","",""
`},
{"1083", `
"STR r/m16","0F 00 /1","V","V","","operand16"
"STR r32/m16","0F 00 /1","V","V","","operand32"
"STR r64/m16","REX.W 0F 00 /1","N.E.","V","",""
`},
{"533,1027", `
"LAHF","9F","V","V","",""
"SAHF","9E","V","V","",""
`},
{"662", `
"MOVSX r16, r/m8","0F BE /r","V","V","","operand16"
"MOVSX r32, r/m8","0F BE /r","V","V","","operand32"
"MOVSX r64, r/m8","REX.W 0F BE /r","N.E.","V","",""
"MOVSX r32, r/m16","0F BF /r","V","V","","operand32"
"MOVSX r64, r/m16","REX.W 0F BF /r","N.E.","V","",""
"MOVSXD r64, r/m32","REX.W 63 /r","N.E.","V","",""
"MOVSX r16, r/m16","0F BF /r","V","V","","operand16"
"MOVSXD r16, r/m32","63 /r","N.E.","V","","operand16"
"MOVSXD r32, r/m32","63 /r","N.E.","V","","operand32"
`},
{"668", `
"MOVZX r16, r/m8","0F B6 /r","V","V","","operand16"
"MOVZX r32, r/m8","0F B6 /r","V","V","","operand32"
"MOVZX r64, r/m8","REX.W 0F B6 /r","N.E.","V","",""
"MOVZX r32, r/m16","0F B7 /r","V","V","","operand32"
"MOVZX r64, r/m16","REX.W 0F B7 /r","N.E.","V","",""
"MOVZX r16, r/m16","0F B7 /r","V","V","","operand16"
`},
{"1253,1260", `
"XACQUIRE","F2","V","V","HLE","pseudo"
"XRELEASE","F3","V","V","HLE","pseudo"
"XBEGIN rel16","C7 F8 cw","V","V","RTM","operand16"
"XBEGIN rel32","C7 F8 cd","V","V","RTM","operand32,operand64"
`},
{"547", `
"LEAVE","C9","V","V","","operand16"
"LEAVE","C9","V","N.E.","","operand32"
"LEAVE","C9","N.E.","V","","operand32,operand64"
`},
{"484", `
"IN AL, imm8u","E4 ib","V","V","",""
"IN AX, imm8u","E5 ib","V","V","","operand16"
"IN EAX, imm8u","E5 ib","V","V","","operand32,operand64"
"IN AL, DX","EC","V","V","",""
"IN AX, DX","ED","V","V","","operand16"
"IN EAX, DX","ED","V","V","","operand32,operand64"
`},
{"488", `
"INSB","6C","V","V","",""
"INSW","6D","V","V","","operand16"
"INSD","6D","V","V","","operand32,operand64"
`},
{"707", `
"OUT imm8u, AL","E6 ib","V","V","",""
"OUT imm8u, AX","E7 ib","V","V","","operand16"
"OUT imm8u, EAX","E7 ib","V","V","","operand32,operand64"
"OUT DX, AL","EE","V","V","",""
"OUT DX, AX","EF","V","V","","operand16"
"OUT DX, EAX","EF","V","V","","operand32,operand64"
`},
{"709", `
"OUTSB","6E","V","V","",""
"OUTSW","6F","V","V","","operand16"
"OUTSD","6F","V","V","","operand32,operand64"
`},
{"881,966", `
"POPF","9D","V","V","","operand16"
"POPFD","9D","V","N.E.","","operand32"
"POPFQ","9D","N.E.","V","","operand32,operand64"
"PUSHF","9C","V","V","","operand16"
"PUSHFD","9C","V","N.E.","","operand32"
"PUSHFQ","9C","N.E.","V","","operand32,operand64"
`},
{"610", `
"MOVD mm1, r/m32","0F 6E /r","V","V","MMX","operand16,operand32"
"MOVQ mm1, r/m64","REX.W 0F 6E /r","N.E.","V","MMX",""
"MOVD r/m32, mm1","0F 7E /r","V","V","MMX","operand16,operand32"
"MOVQ r/m64, mm1","REX.W 0F 7E /r","N.E.","V","MMX",""
"VMOVD xmm1, r32/m32","VEX.128.66.0F.W0 6E /r","V","V","AVX",""
"VMOVQ xmm1, r64/m64","VEX.128.66.0F.W1 6E /r","N.E.","V","AVX",""
"MOVD xmm1, r/m32","66 0F 6E /r","V","V","SSE2","operand16,operand32"
"MOVQ xmm1, r/m64","66 REX.W 0F 6E /r","N.E.","V","SSE2",""
"MOVD r/m32, xmm1","66 0F 7E /r","V","V","SSE2","operand16,operand32"
"MOVQ r/m64, xmm1","66 REX.W 0F 7E /r","N.E.","V","SSE2",""
"VMOVD r32/m32, xmm1","VEX.128.66.0F.W0 7E /r","V","V","AVX",""
"VMOVQ r64/m64, xmm1","VEX.128.66.0F.W1 7E /r","N.E.","V","AVX",""
`},
{"534", `
"LAR r16, r/m16","0F 02 /r","V","V","","operand16"
"LAR r32, r32/m16","0F 02 /r","V","V","","operand32"
"LAR r64, r64/m16","REX.W 0F 02 /r","N.E.","V","",""
`},
{"360", `
"FCMOVB ST(0), ST(i)","DA C0+i","V","V","",""
"FCMOVE ST(0), ST(i)","DA C8+i","V","V","",""
"FCMOVBE ST(0), ST(i)","DA D0+i","V","V","",""
"FCMOVU ST(0), ST(i)","DA D8+i","V","V","",""
"FCMOVNB ST(0), ST(i)","DB C0+i","V","V","",""
"FCMOVNE ST(0), ST(i)","DB C8+i","V","V","",""
"FCMOVNBE ST(0), ST(i)","DB D0+i","V","V","",""
"FCMOVNU ST(0), ST(i)","DB D8+i","V","V","",""
`},
{"413", `
"FSAVE m94/108byte","9B DD /6","V","V","","pseudo"
"FNSAVE m94/108byte","DD /6","V","V","",""
`},
{"446,449", `
"FXRSTOR m512byte","0F AE /1","V","V","","operand16,operand32"
"FXRSTOR64 m512byte","REX.W 0F AE /1","N.E.","V","",""
"FXSAVE m512byte","0F AE /0","V","V","","operand16,operand32"
"FXSAVE64 m512byte","REX.W 0F AE /0","N.E.","V","",""
`},
// The way extra instructions are inserted, the MOV TR and MOV Sreg extra instructions
// appear in every test of a page containing MOV instructions.
// So be it: we definitely won't lose them!
{"594,595", `
"MOV r/m8, r8","88 /r","V","V","",""
"MOV r/m8, r8","REX 88 /r","N.E.","V","","pseudo64"
"MOV r/m16, r16","89 /r","V","V","","operand16"
"MOV r/m32, r32","89 /r","V","V","","operand32"
"MOV r/m64, r64","REX.W 89 /r","N.E.","V","",""
"MOV r8, r/m8","8A /r","V","V","",""
"MOV r8, r/m8","REX 8A /r","N.E.","V","","pseudo64"
"MOV r16, r/m16","8B /r","V","V","","operand16"
"MOV r32, r/m32","8B /r","V","V","","operand32"
"MOV r64, r/m64","REX.W 8B /r","N.E.","V","",""
"MOV r/m16, Sreg","8C /r","V","V","","operand16"
"MOV r/m64, Sreg","REX.W 8C /r","N.E.","V","",""
"MOV Sreg, r/m16","8E /r","V","V","","operand16"
"MOV Sreg, r64/m16","REX.W 8E /r","N.E.","V","",""
"MOV AL, moffs8","A0 cm","V","V","","ignoreREXW"
"MOV AL, moffs8","REX.W A0 cm","N.E.","V","","pseudo"
"MOV AX, moffs16","A1 cm","V","V","","operand16"
"MOV EAX, moffs32","A1 cm","V","V","","operand32"
"MOV RAX, moffs64","REX.W A1 cm","N.E.","V","",""
"MOV moffs8, AL","A2 cm","V","V","","ignoreREXW"
"MOV moffs8, AL","REX.W A2 cm","N.E.","V","","pseudo"
"MOV moffs16, AX","A3 cm","V","V","","operand16"
"MOV moffs32, EAX","A3 cm","V","V","","operand32"
"MOV moffs64, RAX","REX.W A3 cm","N.E.","V","",""
"MOV r8op, imm8u","B0+rb ib","V","V","",""
"MOV r8op, imm8u","REX B0+rb ib","N.E.","V","","pseudo64"
"MOV r16op, imm16","B8+rw iw","V","V","","operand16"
"MOV r32op, imm32","B8+rd id","V","V","","operand32"
"MOV r64op, imm64","REX.W B8+rd io","N.E.","V","",""
"MOV r/m8, imm8u","C6 /0 ib","V","V","",""
"MOV r/m8, imm8u","REX C6 /0 ib","N.E.","V","","pseudo64"
"MOV r/m16, imm16","C7 /0 iw","V","V","","operand16"
"MOV r/m32, imm32","C7 /0 id","V","V","","operand32"
"MOV r/m64, imm32","REX.W C7 /0 id","N.E.","V","",""
"MOV TR0-TR7, rmr32","0F 26 /r","V","N.E.","","modrm_regonly"
"MOV TR0-TR7, rmr64","0F 26 /r","N.E.","V","","modrm_regonly"
"MOV rmr32, TR0-TR7","0F 24 /r","V","N.E.","","modrm_regonly"
"MOV rmr64, TR0-TR7","0F 24 /r","N.E.","V","","modrm_regonly"
"MOV Sreg, r32/m16","8E /r","V","V","","operand32"
"MOV r/m32, Sreg","8C /r","V","V","","operand32"
`},
{"599", `
"MOV rmr32, CR0-CR7","0F 20 /r","V","N.E.","","modrm_regonly"
"MOV rmr64, CR0-CR7","0F 20 /r","N.E.","V","","modrm_regonly"
"MOV rmr64, CR8","REX.R + 0F 20 /0","N.E.","V","","modrm_regonly,pseudo"
"MOV CR0-CR7, rmr32","0F 22 /r","V","N.E.","","modrm_regonly"
"MOV CR0-CR7, rmr64","0F 22 /r","N.E.","V","","modrm_regonly"
"MOV CR8, rmr64","REX.R + 0F 22 /0","N.E.","V","","modrm_regonly,pseudo"
"MOV TR0-TR7, rmr32","0F 26 /r","V","N.E.","","modrm_regonly"
"MOV TR0-TR7, rmr64","0F 26 /r","N.E.","V","","modrm_regonly"
"MOV rmr32, TR0-TR7","0F 24 /r","V","N.E.","","modrm_regonly"
"MOV rmr64, TR0-TR7","0F 24 /r","N.E.","V","","modrm_regonly"
"MOV Sreg, r32/m16","8E /r","V","V","","operand32"
"MOV r/m32, Sreg","8C /r","V","V","","operand32"
`},
{"602", `
"MOV rmr32, DR0-DR7","0F 21 /r","V","N.E.","","modrm_regonly"
"MOV rmr64, DR0-DR7","0F 21 /r","N.E.","V","","modrm_regonly"
"MOV DR0-DR7, rmr32","0F 23 /r","V","N.E.","","modrm_regonly"
"MOV DR0-DR7, rmr64","0F 23 /r","N.E.","V","","modrm_regonly"
"MOV TR0-TR7, rmr32","0F 26 /r","V","N.E.","","modrm_regonly"
"MOV TR0-TR7, rmr64","0F 26 /r","N.E.","V","","modrm_regonly"
"MOV rmr32, TR0-TR7","0F 24 /r","V","N.E.","","modrm_regonly"
"MOV rmr64, TR0-TR7","0F 24 /r","N.E.","V","","modrm_regonly"
"MOV Sreg, r32/m16","8E /r","V","V","","operand32"
"MOV r/m32, Sreg","8C /r","V","V","","operand32"
`},
{"148,150,155,157,160", `
"BNDCL bnd1, r/m32","F3 0F 1A /r","V","N.E.","MPX",""
"BNDCL bnd1, r/m64","F3 0F 1A /r","N.E.","V","MPX",""
"BNDCU bnd1, r/m32","F2 0F 1A /r","V","N.E.","MPX",""
"BNDCU bnd1, r/m64","F2 0F 1A /r","N.E.","V","MPX",""
"BNDCN bnd1, r/m32","F2 0F 1B /r","V","N.E.","MPX",""
"BNDCN bnd1, r/m64","F2 0F 1B /r","N.E.","V","MPX",""
"BNDMK bnd1, m32","F3 0F 1B /r","V","N.E.","MPX","modrm_memonly"
"BNDMK bnd1, m64","F3 0F 1B /r","N.E.","V","MPX","modrm_memonly"
"BNDMOV bnd1, bnd2/m64","66 0F 1A /r","V","N.E.","MPX",""
"BNDMOV bnd1, bnd2/m128","66 0F 1A /r","N.E.","V","MPX",""
"BNDMOV bnd2/m64, bnd1","66 0F 1B /r","V","N.E.","MPX",""
"BNDMOV bnd2/m128, bnd1","66 0F 1B /r","N.E.","V","MPX",""
"BNDSTX mib, bnd1","0F 1B /r","V","V","MPX",""
`},
{"169", `
"BSWAP r32op","0F C8+rd","V","V","","operand32"
"BSWAP r64op","REX.W 0F C8+rd","N.E.","V","",""
"BSWAP r16op","0F C8+rd","V","V","","operand16"
`},
{"296,300", `
"CVTSD2SI r32, xmm2/m64","F2 0F 2D /r","V","V","SSE2","operand16,operand32"
"CVTSD2SI r64, xmm2/m64","F2 REX.W 0F 2D /r","N.E.","V","SSE2",""
"VCVTSD2SI r32, xmm2/m64","VEX.LIG.F2.0F.W0 2D /r","V","V","AVX",""
"VCVTSD2SI r64, xmm2/m64","VEX.LIG.F2.0F.W1 2D /r","N.E.","V","AVX",""
"CVTSI2SD xmm1, r/m32","F2 0F 2A /r","V","V","SSE2","operand16,operand32"
"CVTSI2SD xmm1, r/m64","F2 REX.W 0F 2A /r","N.E.","V","SSE2",""
"VCVTSI2SD xmm1, xmmV, r/m32","VEX.NDS.LIG.F2.0F.W0 2A /r","V","V","AVX",""
"VCVTSI2SD xmm1, xmmV, r/m64","VEX.NDS.LIG.F2.0F.W1 2A /r","N.E.","V","AVX",""
`},
{"686", `
"MULX r32, r32V, r/m32","VEX.NDD.LZ.F2.0F38.W0 F6 /r","V","V","BMI2",""
"MULX r64, r64V, r/m64","VEX.NDD.LZ.F2.0F38.W1 F6 /r","N.E.","V","BMI2",""
`},
}
func TestOutput(t *testing.T) {
if _, err := os.Stat(*flagFile); os.IsNotExist(err) {
t.Skipf("no x86manual: %v", err)
}
for _, tt := range tests {
*flagDebugPage = tt.pages
onlySomePages = true
insts := parse()
insts = cleanup(insts)
out := new(bytes.Buffer)
write(out, insts)
have := out.String()
want := reformat(tt.output)
if have != want {
t.Errorf("p.%v: incorrect output\nhave:\n%s\nwant:\n%s\ndiffs:\n%s", tt.pages, strings.TrimRight(have, "\n"), strings.TrimRight(want, "\n"), strings.TrimRight(diffs(have, want), "\n"))
}
}
}
func indent(s string) string {
s = strings.TrimRight(s, "\n")
return strings.Join(strings.Split(s, "\n"), "\n\t")
}
func reformat(s string) string {
var out string
for _, line := range strings.Split(s, "\n") {
line = strings.TrimSpace(line)
if line != "" {
out += line + "\n"
}
}
return out
}
func diffs(have, want string) string {
old := strings.Split(strings.TrimRight(want, "\n"), "\n")
new := strings.Split(strings.TrimRight(have, "\n"), "\n")
sort.Strings(old)
sort.Strings(new)
var buf bytes.Buffer
for len(old) > 0 || len(new) > 0 {
switch {
case len(new) == 0 || len(old) > 0 && old[0] < new[0]:
fmt.Fprintf(&buf, "- %s\n", old[0])
old = old[1:]
case len(old) == 0 || len(new) > 0 && old[0] > new[0]:
fmt.Fprintf(&buf, "+ %s\n", new[0])
new = new[1:]
default:
old = old[1:]
new = new[1:]
}
}
return buf.String()
}