whatcanGOwrong
This commit is contained in:
@@ -0,0 +1,610 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Armspec reads the “ARM Architecture Reference Manual”
|
||||
// to collect instruction encoding details and writes those details to standard output
|
||||
// in JSON format.
|
||||
//
|
||||
// # Warning Warning Warning
|
||||
//
|
||||
// This program is unfinished. It is being published in this incomplete form
|
||||
// for interested readers, but do not expect it to be runnable or useful.
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"math"
|
||||
"os"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"rsc.io/pdf"
|
||||
)
|
||||
|
||||
type Inst struct {
|
||||
Name string
|
||||
ID string
|
||||
Bits string
|
||||
Arch string
|
||||
Syntax []string
|
||||
Code string
|
||||
}
|
||||
|
||||
const debugPage = 0
|
||||
|
||||
var stdout *bufio.Writer
|
||||
|
||||
func main() {
|
||||
log.SetFlags(0)
|
||||
log.SetPrefix("armspec: ")
|
||||
|
||||
if len(os.Args) != 2 {
|
||||
fmt.Fprintf(os.Stderr, "usage: armspec file.pdf\n")
|
||||
os.Exit(2)
|
||||
}
|
||||
|
||||
f, err := pdf.Open(os.Args[1])
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
// Find instruction set reference in outline, to build instruction list.
|
||||
instList := instHeadings(f.Outline())
|
||||
if len(instList) < 200 {
|
||||
log.Fatalf("only found %d instructions in table of contents", len(instList))
|
||||
}
|
||||
|
||||
stdout = bufio.NewWriter(os.Stdout)
|
||||
fmt.Fprintf(stdout, "[")
|
||||
numTable := 0
|
||||
defer stdout.Flush()
|
||||
|
||||
// Scan document looking for instructions.
|
||||
// Must find exactly the ones in the outline.
|
||||
n := f.NumPage()
|
||||
PageLoop:
|
||||
for pageNum := 1; pageNum <= n; pageNum++ {
|
||||
if debugPage > 0 && pageNum != debugPage {
|
||||
continue
|
||||
}
|
||||
if pageNum > 1127 {
|
||||
break
|
||||
}
|
||||
p := f.Page(pageNum)
|
||||
name, table := parsePage(pageNum, p)
|
||||
if name == "" {
|
||||
continue
|
||||
}
|
||||
if len(table) < 1 {
|
||||
if false {
|
||||
fmt.Fprintf(os.Stderr, "no encodings for instruction %q (page %d)\n", name, pageNum)
|
||||
}
|
||||
continue
|
||||
}
|
||||
for _, inst := range table {
|
||||
if numTable > 0 {
|
||||
fmt.Fprintf(stdout, ",")
|
||||
}
|
||||
numTable++
|
||||
js, _ := json.Marshal(inst)
|
||||
fmt.Fprintf(stdout, "\n%s", jsFix.Replace(string(js)))
|
||||
}
|
||||
for j, headline := range instList {
|
||||
if name == headline {
|
||||
instList[j] = ""
|
||||
continue PageLoop
|
||||
}
|
||||
}
|
||||
fmt.Fprintf(os.Stderr, "unexpected instruction %q (page %d)\n", name, pageNum)
|
||||
}
|
||||
|
||||
fmt.Fprintf(stdout, "\n]\n")
|
||||
stdout.Flush()
|
||||
|
||||
if debugPage == 0 {
|
||||
for _, headline := range instList {
|
||||
if headline != "" {
|
||||
switch headline {
|
||||
default:
|
||||
fmt.Fprintf(os.Stderr, "missing instruction %q\n", headline)
|
||||
case "CHKA": // ThumbEE
|
||||
case "CPS": // system instruction
|
||||
case "CPY": // synonym for MOV
|
||||
case "ENTERX": // ThumbEE
|
||||
case "F* (former VFP instruction mnemonics)": // synonyms
|
||||
case "HB, HBL, HBLP, HBP": // ThumbEE
|
||||
case "LEAVEX": // ThumbEE
|
||||
case "MOV (shifted register)": // pseudo instruction for ASR, LSL, LSR, ROR, and RRX
|
||||
case "NEG": // synonym for RSB
|
||||
case "RFE": // system instruction
|
||||
case "SMC (previously SMI)": // system instruction
|
||||
case "SRS": // system instruction
|
||||
case "SUBS PC, LR and related instructions": // system instruction
|
||||
case "VAND (immediate)": // pseudo instruction
|
||||
case "VCLE (register)": // pseudo instruction
|
||||
case "VCLT (register)": // pseudo instruction
|
||||
case "VORN (immediate)": // pseudo instruction
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func instHeadings(outline pdf.Outline) []string {
|
||||
return appendInstHeadings(outline, nil)
|
||||
}
|
||||
|
||||
var instRE = regexp.MustCompile(`A[\d.]+ Alphabetical list of instructions`)
|
||||
var childRE = regexp.MustCompile(`A[\d.]+ (.+)`)
|
||||
var sectionRE = regexp.MustCompile(`^A[\d.]+$`)
|
||||
var bitRE = regexp.MustCompile(`^( |[01]|\([01]\))*$`)
|
||||
|
||||
func appendInstHeadings(outline pdf.Outline, list []string) []string {
|
||||
if instRE.MatchString(outline.Title) {
|
||||
for _, child := range outline.Child {
|
||||
m := childRE.FindStringSubmatch(child.Title)
|
||||
if m == nil {
|
||||
fmt.Fprintf(os.Stderr, "cannot parse section title: %s\n", child.Title)
|
||||
continue
|
||||
}
|
||||
list = append(list, m[1])
|
||||
}
|
||||
}
|
||||
for _, child := range outline.Child {
|
||||
list = appendInstHeadings(child, list)
|
||||
}
|
||||
return list
|
||||
}
|
||||
|
||||
const inch = 72.0
|
||||
|
||||
func parsePage(num int, p pdf.Page) (name string, table []Inst) {
|
||||
content := p.Content()
|
||||
|
||||
var text []pdf.Text
|
||||
for _, t := range content.Text {
|
||||
if match(t, "Times-Roman", 7.2, "") {
|
||||
t.FontSize = 9
|
||||
}
|
||||
if match(t, "Times-Roman", 6.72, "") && '0' <= t.S[0] && t.S[0] <= '9' {
|
||||
t.S = string([]rune("⁰¹²³⁴⁵⁶⁷⁸⁹")[t.S[0]-'0'])
|
||||
t.FontSize = 9
|
||||
t.Y -= 2.28
|
||||
}
|
||||
if t.Font == "Gen_Arial" {
|
||||
continue
|
||||
}
|
||||
text = append(text, t)
|
||||
}
|
||||
|
||||
text = findWords(text)
|
||||
|
||||
for i, t := range text {
|
||||
if t.Font == "Times" {
|
||||
t.Font = "Times-Roman"
|
||||
text[i] = t
|
||||
}
|
||||
}
|
||||
|
||||
if debugPage > 0 {
|
||||
for _, t := range text {
|
||||
fmt.Println(t)
|
||||
}
|
||||
for _, r := range content.Rect {
|
||||
fmt.Println(r)
|
||||
}
|
||||
}
|
||||
|
||||
// Remove text we should ignore.
|
||||
out := text[:0]
|
||||
skip := false
|
||||
for _, t := range text {
|
||||
// skip page footer
|
||||
if match(t, "Helvetica", 8, "A") || match(t, "Helvetica", 8, "ARM DDI") || match(t, "Helvetica-Oblique", 8, "Copyright") {
|
||||
continue
|
||||
}
|
||||
// skip section header and body text
|
||||
if match(t, "Helvetica-Bold", 12, "") && (sectionRE.MatchString(t.S) || t.S == "Alphabetical list of instructions") {
|
||||
skip = true
|
||||
continue
|
||||
}
|
||||
if skip && match(t, "Times-Roman", 9, "") {
|
||||
continue
|
||||
}
|
||||
skip = false
|
||||
out = append(out, t)
|
||||
}
|
||||
text = out
|
||||
|
||||
// Page header must say Instruction Details.
|
||||
if len(text) == 0 || !match(text[0], "Helvetica-Oblique", 8, "Instruction Details") && !match(text[0], "Times-Roman", 9, "Instruction Details") {
|
||||
return "", nil
|
||||
}
|
||||
text = text[1:]
|
||||
|
||||
isSection := func(text []pdf.Text, i int) int {
|
||||
if i+2 <= len(text) && match(text[i], "Helvetica-Bold", 10, "") && sectionRE.MatchString(text[i].S) && match(text[i+1], "Helvetica-Bold", 10, "") {
|
||||
return 2
|
||||
}
|
||||
if i+1 <= len(text) && match(text[i], "Helvetica-Bold", 10, "") && childRE.MatchString(text[i].S) {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// Skip dummy headlines and sections.
|
||||
for d := isSection(text, 0); d != 0; d = isSection(text, 0) {
|
||||
i := d
|
||||
for i < len(text) && !match(text[i], "Helvetica-Bold", 9, "Encoding") && !match(text[i], "Helvetica-Bold", 10, "") {
|
||||
i++
|
||||
}
|
||||
if isSection(text, i) == 0 {
|
||||
break
|
||||
}
|
||||
text = text[i:]
|
||||
}
|
||||
|
||||
// Next line is headline. Can wrap to multiple lines.
|
||||
d := isSection(text, 0)
|
||||
if d == 0 {
|
||||
if debugPage > 0 {
|
||||
fmt.Printf("non-inst-headline: %v\n", text[0])
|
||||
}
|
||||
checkNoEncodings(num, text)
|
||||
return "", nil
|
||||
}
|
||||
if d == 2 {
|
||||
name = text[1].S
|
||||
text = text[2:]
|
||||
} else if d == 1 {
|
||||
m := childRE.FindStringSubmatch(text[0].S)
|
||||
name = m[1]
|
||||
text = text[1:]
|
||||
}
|
||||
for len(text) > 0 && match(text[0], "Helvetica-Bold", 10, "") {
|
||||
name += " " + text[0].S
|
||||
text = text[1:]
|
||||
}
|
||||
|
||||
// Skip description.
|
||||
for len(text) > 0 && (match(text[0], "Times-Roman", 9, "") || match(text[0], "LucidaSansTypewriteX", 6.48, "") || match(text[0], "Times-Bold", 10, "Note")) {
|
||||
text = text[1:]
|
||||
}
|
||||
|
||||
// Encodings follow.
|
||||
warned := false
|
||||
for i := 0; i < len(text); {
|
||||
if match(text[i], "Helvetica-Bold", 10, "Assembler syntax") ||
|
||||
match(text[i], "Helvetica-Bold", 9, "Modified operation in ThumbEE") ||
|
||||
match(text[i], "Helvetica-Bold", 9, "Unallocated memory hints") ||
|
||||
match(text[i], "Helvetica-Bold", 9, "Related encodings") ||
|
||||
match(text[i], "Times-Roman", 9, "Figure A") ||
|
||||
match(text[i], "Helvetica-Bold", 9, "Table A") ||
|
||||
match(text[i], "Helvetica-Bold", 9, "VFP Instructions") ||
|
||||
match(text[i], "Helvetica-Bold", 9, "VFP instructions") ||
|
||||
match(text[i], "Helvetica-Bold", 9, "VFP vectors") ||
|
||||
match(text[i], "Helvetica-Bold", 9, "FLDMX") ||
|
||||
match(text[i], "Helvetica-Bold", 9, "FSTMX") ||
|
||||
match(text[i], "Helvetica-Bold", 9, "Advanced SIMD and VFP") {
|
||||
checkNoEncodings(num, text[i:])
|
||||
break
|
||||
}
|
||||
if match(text[i], "Helvetica-Bold", 9, "Figure A") {
|
||||
y := text[i].Y
|
||||
i++
|
||||
for i < len(text) && math.Abs(text[i].Y-y) < 2 {
|
||||
i++
|
||||
}
|
||||
continue
|
||||
}
|
||||
if !match(text[i], "Helvetica-Bold", 9, "Encoding") {
|
||||
if !warned {
|
||||
warned = true
|
||||
fmt.Fprintln(os.Stderr, "page", num, ": unexpected:", text[i])
|
||||
}
|
||||
i++
|
||||
continue
|
||||
}
|
||||
inst := Inst{
|
||||
Name: name,
|
||||
}
|
||||
enc := text[i].S
|
||||
x := text[i].X
|
||||
i++
|
||||
// Possible subarchitecture notes.
|
||||
for i < len(text) && text[i].X > x+36 {
|
||||
if inst.Arch != "" {
|
||||
inst.Arch += " "
|
||||
}
|
||||
inst.Arch += text[i].S
|
||||
i++
|
||||
}
|
||||
// Encoding syntaxes.
|
||||
for i < len(text) && (match(text[i], "LucidaSansTypewriteX", 6.48, "") || text[i].X > x+36) {
|
||||
if text[i].X < x+0.25*inch {
|
||||
inst.Syntax = append(inst.Syntax, text[i].S)
|
||||
} else {
|
||||
s := inst.Syntax[len(inst.Syntax)-1]
|
||||
if !strings.Contains(s, "\t") {
|
||||
s += "\t"
|
||||
} else {
|
||||
s += " "
|
||||
}
|
||||
s += text[i].S
|
||||
inst.Syntax[len(inst.Syntax)-1] = s
|
||||
}
|
||||
i++
|
||||
}
|
||||
|
||||
var bits, abits, aenc string
|
||||
bits, i = readBitBox(inst.Name, inst.Syntax, content, text, i)
|
||||
if strings.Contains(enc, " / ") {
|
||||
if i < len(text) && match(text[i], "Times-Roman", 8, "") {
|
||||
abits, i = readBitBox(inst.Name, inst.Syntax, content, text, i)
|
||||
} else {
|
||||
abits = bits
|
||||
}
|
||||
slash := strings.Index(enc, " / ")
|
||||
aenc = "Encoding " + enc[slash+len(" / "):]
|
||||
enc = enc[:slash]
|
||||
}
|
||||
|
||||
// pseudocode
|
||||
y0 := -1 * inch
|
||||
tab := 0.0
|
||||
for i < len(text) && match(text[i], "LucidaSansTypewriteX", 6.48, "") {
|
||||
t := text[i]
|
||||
i++
|
||||
if math.Abs(t.Y-y0) < 3 {
|
||||
// same line as last fragment, probably just two spaces
|
||||
inst.Code += " " + t.S
|
||||
continue
|
||||
}
|
||||
if inst.Code != "" {
|
||||
inst.Code += "\n"
|
||||
}
|
||||
if t.X > x+0.1*inch {
|
||||
if tab == 0 {
|
||||
tab = t.X - x
|
||||
}
|
||||
inst.Code += strings.Repeat("\t", int((t.X-x)/tab+0.5))
|
||||
} else {
|
||||
tab = 0
|
||||
}
|
||||
inst.Code += t.S
|
||||
y0 = t.Y
|
||||
}
|
||||
|
||||
inst.ID = strings.TrimPrefix(enc, "Encoding ")
|
||||
inst.Bits = bits
|
||||
table = append(table, inst)
|
||||
if abits != "" {
|
||||
inst.ID = strings.TrimPrefix(aenc, "Encoding ")
|
||||
inst.Bits = abits
|
||||
table = append(table, inst)
|
||||
}
|
||||
|
||||
}
|
||||
return name, table
|
||||
}
|
||||
|
||||
func readBitBox(name string, syntax []string, content pdf.Content, text []pdf.Text, i int) (string, int) {
|
||||
// bit headings
|
||||
y2 := 0.0
|
||||
x1 := 0.0
|
||||
x2 := 0.0
|
||||
for i < len(text) && match(text[i], "Times-Roman", 8, "") {
|
||||
if y2 == 0 {
|
||||
y2 = text[i].Y
|
||||
}
|
||||
if x1 == 0 {
|
||||
x1 = text[i].X
|
||||
}
|
||||
i++
|
||||
}
|
||||
// bit fields in box
|
||||
y1 := 0.0
|
||||
dy1 := 0.0
|
||||
for i < len(text) && match(text[i], "Times-Roman", 9, "") {
|
||||
if x2 < text[i].X+text[i].W {
|
||||
x2 = text[i].X + text[i].W
|
||||
}
|
||||
y1 = text[i].Y
|
||||
dy1 = text[i].FontSize
|
||||
i++
|
||||
}
|
||||
|
||||
if debugPage > 0 {
|
||||
fmt.Println("encoding box", x1, y1, x2, y2)
|
||||
}
|
||||
|
||||
// Find lines (thin rectangles) separating bit fields.
|
||||
var bottom, top pdf.Rect
|
||||
const (
|
||||
yMargin = 0.25 * 72
|
||||
xMargin = 2 * 72
|
||||
)
|
||||
for _, r := range content.Rect {
|
||||
if r.Max.Y-r.Min.Y < 2 && x1-xMargin < r.Min.X && r.Min.X < x1 && x2 < r.Max.X && r.Max.X < x2+xMargin {
|
||||
if y1-yMargin < r.Min.Y && r.Min.Y < y1 {
|
||||
bottom = r
|
||||
}
|
||||
if y1+dy1 < r.Min.Y && r.Min.Y < y2 {
|
||||
top = r
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if debugPage > 0 {
|
||||
fmt.Println("top", top, "bottom", bottom)
|
||||
}
|
||||
|
||||
const ε = 0.1 * 72
|
||||
var bars []pdf.Rect
|
||||
for _, r := range content.Rect {
|
||||
if r.Max.X-r.Min.X < 2 && math.Abs(r.Min.Y-bottom.Min.Y) < ε && math.Abs(r.Max.Y-top.Min.Y) < ε {
|
||||
bars = append(bars, r)
|
||||
}
|
||||
}
|
||||
sort.Sort(RectHorizontal(bars))
|
||||
|
||||
// There are 16-bit and 32-bit encodings.
|
||||
// In practice, they are about 2.65 and 5.3 inches wide, respectively.
|
||||
// Use 4 inches as a cutoff.
|
||||
nbit := 32
|
||||
dx := top.Max.X - top.Min.X
|
||||
if top.Max.X-top.Min.X < 4*72 {
|
||||
nbit = 16
|
||||
}
|
||||
|
||||
total := 0
|
||||
var buf bytes.Buffer
|
||||
for i := 0; i < len(bars)-1; i++ {
|
||||
if i > 0 {
|
||||
fmt.Fprintf(&buf, "|")
|
||||
}
|
||||
var sub []pdf.Text
|
||||
x1, x2 := bars[i].Min.X, bars[i+1].Min.X
|
||||
for _, t := range content.Text {
|
||||
tx := t.X + t.W/2
|
||||
ty := t.Y + t.FontSize/2
|
||||
if x1 < tx && tx < x2 && y1 < ty && ty < y2 {
|
||||
sub = append(sub, t)
|
||||
}
|
||||
}
|
||||
var str []string
|
||||
for _, t := range findWords(sub) {
|
||||
str = append(str, t.S)
|
||||
}
|
||||
s := strings.Join(str, " ")
|
||||
s = strings.Replace(s, ")(", ") (", -1)
|
||||
n := len(strings.Fields(s))
|
||||
b := int(float64(nbit)*(x2-x1)/dx + 0.5)
|
||||
if n == b {
|
||||
for j, f := range strings.Fields(s) {
|
||||
if j > 0 {
|
||||
fmt.Fprintf(&buf, "|")
|
||||
}
|
||||
fmt.Fprintf(&buf, "%s", f)
|
||||
}
|
||||
} else {
|
||||
if n != 1 {
|
||||
fmt.Fprintf(os.Stderr, "%s - %s - multi-field %d-bit encoding: %s\n", name, syntax, n, s)
|
||||
}
|
||||
fmt.Fprintf(&buf, "%s:%d", s, b)
|
||||
}
|
||||
total += b
|
||||
}
|
||||
|
||||
if total != nbit || total == 0 {
|
||||
fmt.Fprintf(os.Stderr, "%s - %s - %d-bit encoding\n", name, syntax, total)
|
||||
}
|
||||
return buf.String(), i
|
||||
}
|
||||
|
||||
type RectHorizontal []pdf.Rect
|
||||
|
||||
func (x RectHorizontal) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
|
||||
func (x RectHorizontal) Less(i, j int) bool { return x[i].Min.X < x[j].Min.X }
|
||||
func (x RectHorizontal) Len() int { return len(x) }
|
||||
|
||||
func checkNoEncodings(num int, text []pdf.Text) {
|
||||
for _, t := range text {
|
||||
if match(t, "Helvetica-Bold", 9, "Encoding") {
|
||||
fmt.Fprintf(os.Stderr, "page %d: unexpected encoding: %s\n", num, t.S)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func match(t pdf.Text, font string, size float64, substr string) bool {
|
||||
return t.Font == font && math.Abs(t.FontSize-size) < 0.1 && strings.Contains(t.S, substr)
|
||||
}
|
||||
|
||||
func findWords(chars []pdf.Text) (words []pdf.Text) {
|
||||
// Sort by Y coordinate and normalize.
|
||||
const nudge = 1
|
||||
sort.Sort(pdf.TextVertical(chars))
|
||||
old := -100000.0
|
||||
for i, c := range chars {
|
||||
if c.Y != old && math.Abs(old-c.Y) < nudge {
|
||||
chars[i].Y = old
|
||||
} else {
|
||||
old = c.Y
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by Y coordinate, breaking ties with X.
|
||||
// This will bring letters in a single word together.
|
||||
sort.Sort(pdf.TextVertical(chars))
|
||||
|
||||
// Loop over chars.
|
||||
for i := 0; i < len(chars); {
|
||||
// Find all chars on line.
|
||||
j := i + 1
|
||||
for j < len(chars) && chars[j].Y == chars[i].Y {
|
||||
j++
|
||||
}
|
||||
var end float64
|
||||
// Split line into words (really, phrases).
|
||||
for k := i; k < j; {
|
||||
ck := &chars[k]
|
||||
s := ck.S
|
||||
end = ck.X + ck.W
|
||||
charSpace := ck.FontSize / 6
|
||||
wordSpace := ck.FontSize * 2 / 3
|
||||
l := k + 1
|
||||
for l < j {
|
||||
// Grow word.
|
||||
cl := &chars[l]
|
||||
if sameFont(cl.Font, ck.Font) && math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+charSpace {
|
||||
s += cl.S
|
||||
end = cl.X + cl.W
|
||||
l++
|
||||
continue
|
||||
}
|
||||
// Add space to phrase before next word.
|
||||
if sameFont(cl.Font, ck.Font) && math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+wordSpace {
|
||||
s += " " + cl.S
|
||||
end = cl.X + cl.W
|
||||
l++
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
f := ck.Font
|
||||
f = strings.TrimSuffix(f, ",Italic")
|
||||
f = strings.TrimSuffix(f, "-Italic")
|
||||
words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end - ck.X, s})
|
||||
k = l
|
||||
}
|
||||
i = j
|
||||
}
|
||||
|
||||
return words
|
||||
}
|
||||
|
||||
func sameFont(f1, f2 string) bool {
|
||||
f1 = strings.TrimSuffix(f1, ",Italic")
|
||||
f1 = strings.TrimSuffix(f1, "-Italic")
|
||||
f2 = strings.TrimSuffix(f1, ",Italic")
|
||||
f2 = strings.TrimSuffix(f1, "-Italic")
|
||||
return strings.TrimSuffix(f1, ",Italic") == strings.TrimSuffix(f2, ",Italic") || f1 == "Symbol" || f2 == "Symbol" || f1 == "TimesNewRoman" || f2 == "TimesNewRoman"
|
||||
}
|
||||
|
||||
var jsFix = strings.NewReplacer(
|
||||
// `\u003c`, `<`,
|
||||
// `\u003e`, `>`,
|
||||
// `\u0026`, `&`,
|
||||
// `\u0009`, `\t`,
|
||||
)
|
||||
|
||||
func printTable(name string, table []Inst) {
|
||||
_ = strconv.Atoi
|
||||
}
|
||||
@@ -0,0 +1,223 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var _ = os.Stdout
|
||||
var _ = fmt.Sprintf
|
||||
|
||||
type Inst struct {
|
||||
Name string
|
||||
ID string
|
||||
Bits string
|
||||
Arch string
|
||||
Syntax []string
|
||||
Code string
|
||||
Base uint32
|
||||
Mask uint32
|
||||
Prog []*Stmt
|
||||
}
|
||||
|
||||
func main() {
|
||||
data, err := ioutil.ReadFile("spec.json")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
var insts []Inst
|
||||
if err := json.Unmarshal(data, &insts); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
var out []Inst
|
||||
for _, inst := range insts {
|
||||
inst.Prog = parse(inst.Name+" "+inst.ID, inst.Code)
|
||||
if inst.ID[0] == 'A' && !strings.HasPrefix(inst.Syntax[0], "MSR<c>") && !strings.Contains(inst.Syntax[0], "<coproc>") && !strings.Contains(inst.Syntax[0], "VLDM") && !strings.Contains(inst.Syntax[0], "VSTM") {
|
||||
out = append(out, inst)
|
||||
}
|
||||
}
|
||||
insts = out
|
||||
|
||||
for i := range insts {
|
||||
dosize(&insts[i])
|
||||
}
|
||||
|
||||
var cond, special []Inst
|
||||
for _, inst := range insts {
|
||||
if inst.Base>>28 == 0xF {
|
||||
special = append(special, inst)
|
||||
} else {
|
||||
cond = append(cond, inst)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf("special:\n")
|
||||
split(special, 0xF0000000, 1)
|
||||
fmt.Printf("cond:\n")
|
||||
split(cond, 0xF0000000, 1)
|
||||
}
|
||||
|
||||
func dosize(inst *Inst) {
|
||||
var base, mask uint32
|
||||
off := 0
|
||||
for _, f := range strings.Split(inst.Bits, "|") {
|
||||
if i := strings.Index(f, ":"); i >= 0 {
|
||||
n, _ := strconv.Atoi(f[i+1:])
|
||||
off += n
|
||||
continue
|
||||
}
|
||||
for _, bit := range strings.Fields(f) {
|
||||
switch bit {
|
||||
case "0", "(0)":
|
||||
mask |= 1 << uint(31-off)
|
||||
case "1", "(1)":
|
||||
base |= 1 << uint(31-off)
|
||||
}
|
||||
off++
|
||||
}
|
||||
}
|
||||
if off != 16 && off != 32 {
|
||||
log.Printf("incorrect bit count for %s %s: have %d", inst.Name, inst.Bits, off)
|
||||
}
|
||||
if off == 16 {
|
||||
mask >>= 16
|
||||
base >>= 16
|
||||
}
|
||||
mask |= base
|
||||
inst.Mask = mask
|
||||
inst.Base = base
|
||||
}
|
||||
|
||||
func split(insts []Inst, used uint32, depth int) {
|
||||
Again:
|
||||
if len(insts) <= 1 {
|
||||
for _, inst := range insts {
|
||||
fmt.Printf("%*s%#08x %#08x %s %s %v\n", depth*2+2, "", inst.Mask, inst.Base, inst.Syntax[0], inst.Bits, seeRE.FindAllString(inst.Code, -1))
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
m := ^used
|
||||
for _, inst := range insts {
|
||||
m &= inst.Mask
|
||||
}
|
||||
if m == 0 {
|
||||
fmt.Printf("«%*s%#08x masked out (%d)\n", depth*2, "", used, len(insts))
|
||||
for _, inst := range insts {
|
||||
fmt.Printf("%*s%#08x %#08x %s %s %v\n", depth*2+2, "", inst.Mask, inst.Base, inst.Syntax[0], inst.Bits, seeRE.FindAllString(inst.Code, -1))
|
||||
}
|
||||
updated := false
|
||||
for i := range insts {
|
||||
if updateMask(&insts[i]) {
|
||||
updated = true
|
||||
}
|
||||
}
|
||||
fmt.Printf("»\n")
|
||||
if updated {
|
||||
goto Again
|
||||
}
|
||||
fmt.Printf("%*s%#08x masked out (%d)\n", depth*2, "", used, len(insts))
|
||||
for _, inst := range insts {
|
||||
fmt.Printf("%*s%#08x %#08x %s %s %v\n", depth*2+2, "", inst.Mask, inst.Base, inst.Syntax[0], inst.Bits, seeRE.FindAllString(inst.Code, -1))
|
||||
}
|
||||
//checkOverlap(used, insts)
|
||||
return
|
||||
}
|
||||
for i := 31; i >= 0; i-- {
|
||||
if m&(1<<uint(i)) != 0 {
|
||||
m = 1 << uint(i)
|
||||
break
|
||||
}
|
||||
}
|
||||
var bit [2][]Inst
|
||||
for _, inst := range insts {
|
||||
b := (inst.Base / m) & 1
|
||||
bit[b] = append(bit[b], inst)
|
||||
}
|
||||
|
||||
for b, list := range bit {
|
||||
if len(list) > 0 {
|
||||
suffix := ""
|
||||
if len(bit[1-b]) == 0 {
|
||||
suffix = " (only)"
|
||||
}
|
||||
fmt.Printf("%*sbit %#08x = %d%s\n", depth*2, "", m, b, suffix)
|
||||
split(list, used|m, depth+1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var seeRE = regexp.MustCompile(`SEE ([^;\n]+)`)
|
||||
|
||||
func updateMask(inst *Inst) bool {
|
||||
defer func() {
|
||||
if err := recover(); err != nil {
|
||||
fmt.Println("PANIC:", err)
|
||||
return
|
||||
}
|
||||
}()
|
||||
|
||||
print(".")
|
||||
println(inst.Name, inst.ID, inst.Bits)
|
||||
println(inst.Code)
|
||||
wiggle := ^inst.Mask &^ 0xF0000000
|
||||
n := countbits(wiggle)
|
||||
m1 := ^uint32(0)
|
||||
m2 := ^uint32(0)
|
||||
for i := uint32(0); i < 1<<uint(n); i++ {
|
||||
w := inst.Base | expand(i, wiggle)
|
||||
if !isValid(inst, w) {
|
||||
continue
|
||||
}
|
||||
m1 &= w
|
||||
m2 &= ^w
|
||||
}
|
||||
m := m1 | m2
|
||||
m &^= 0xF0000000
|
||||
m |= 0xF0000000 & inst.Mask
|
||||
if m&^inst.Mask != 0 {
|
||||
fmt.Printf("%s %s: mask=%#x but decided %#x\n", inst.Name, inst.ID, inst.Mask, m)
|
||||
inst.Mask = m
|
||||
inst.Base = m1
|
||||
return true
|
||||
}
|
||||
if inst.Mask&^m != 0 {
|
||||
fmt.Printf("%s %s: mask=%#x but got %#x\n", inst.Name, inst.ID, inst.Mask, m)
|
||||
panic("bad updateMask")
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func countbits(x uint32) int {
|
||||
n := 0
|
||||
for ; x != 0; x >>= 1 {
|
||||
n += int(x & 1)
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func expand(x, m uint32) uint32 {
|
||||
var out uint32
|
||||
for i := uint(0); i < 32; i++ {
|
||||
out >>= 1
|
||||
if m&1 != 0 {
|
||||
out |= (x & 1) << 31
|
||||
x >>= 1
|
||||
}
|
||||
m >>= 1
|
||||
}
|
||||
return out
|
||||
}
|
||||
Reference in New Issue
Block a user