whatcanGOwrong
This commit is contained in:
@@ -0,0 +1,503 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Power64spec reads the “Power ISA V2.07” Manual
|
||||
// to collect instruction encoding details and writes those details to standard output
|
||||
// in CSV format.
|
||||
//
|
||||
// Usage:
|
||||
//
|
||||
// ppc64spec PowerISA_V2.07_PUBLIC.pdf >ppc64.csv
|
||||
//
|
||||
// Each CSV line contains four fields:
|
||||
//
|
||||
// instruction
|
||||
// The instruction heading, such as "AAD imm8".
|
||||
// mnemonic
|
||||
// The instruction mnemonics, separated by | symbols.
|
||||
// encoding
|
||||
// The instruction encoding, a sequence of name@startbit| describing each bit field in turn.
|
||||
// tags
|
||||
// For now, empty.
|
||||
//
|
||||
// For more on the exact meaning of these fields, see the Power manual.
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"log"
|
||||
"math"
|
||||
"os"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"rsc.io/pdf"
|
||||
)
|
||||
|
||||
type Inst struct {
|
||||
Name string
|
||||
Text string
|
||||
Enc string
|
||||
}
|
||||
|
||||
const debugPage = 0
|
||||
|
||||
var stdout *bufio.Writer
|
||||
|
||||
func main() {
|
||||
log.SetFlags(0)
|
||||
log.SetPrefix("ppc64spec: ")
|
||||
|
||||
if len(os.Args) != 2 {
|
||||
fmt.Fprintf(os.Stderr, "usage: ppc64spec file.pdf\n")
|
||||
os.Exit(2)
|
||||
}
|
||||
|
||||
f, err := pdf.Open(os.Args[1])
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
// Find instruction set reference in outline, to build instruction list.
|
||||
instList := instHeadings(f.Outline())
|
||||
if len(instList) < 200 {
|
||||
log.Fatalf("only found %d instructions in table of contents", len(instList))
|
||||
}
|
||||
|
||||
var all = []Inst{
|
||||
// Split across multiple columns and pages!
|
||||
{"Count Leading Zeros Word X-form", "cntlzw RA, RS (Rc=0)\ncntlzw. RA, RS (Rc=1)", "31@0|RS@6|RA@11|///@16|26@21|Rc@31|"},
|
||||
}
|
||||
|
||||
for j, headline := range instList {
|
||||
for _, inst := range all {
|
||||
if headline == inst.Name {
|
||||
instList[j] = ""
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Scan document looking for instructions.
|
||||
// Must find exactly the ones in the outline.
|
||||
n := f.NumPage()
|
||||
for pageNum := 1; pageNum <= n; pageNum++ {
|
||||
if debugPage > 0 && pageNum != debugPage {
|
||||
continue
|
||||
}
|
||||
p := f.Page(pageNum)
|
||||
table := parsePage(pageNum, p)
|
||||
if len(table) == 0 {
|
||||
continue
|
||||
}
|
||||
InstLoop:
|
||||
for _, inst := range table {
|
||||
for j, headline := range instList {
|
||||
if inst.Name == headline {
|
||||
instList[j] = ""
|
||||
continue InstLoop
|
||||
}
|
||||
}
|
||||
fmt.Fprintf(os.Stderr, "page %d: unexpected instruction %q\n", pageNum, inst.Name)
|
||||
}
|
||||
all = append(all, table...)
|
||||
}
|
||||
|
||||
if debugPage == 0 {
|
||||
for _, headline := range instList {
|
||||
if headline != "" {
|
||||
switch headline {
|
||||
default:
|
||||
fmt.Fprintf(os.Stderr, "missing instruction %q\n", headline)
|
||||
case "CHKA": // ThumbEE
|
||||
case "CPS": // system instruction
|
||||
case "CPY": // synonym for MOV
|
||||
case "ENTERX": // ThumbEE
|
||||
case "F* (former VFP instruction mnemonics)": // synonyms
|
||||
case "HB, HBL, HBLP, HBP": // ThumbEE
|
||||
case "LEAVEX": // ThumbEE
|
||||
case "MOV (shifted register)": // pseudo instruction for ASR, LSL, LSR, ROR, and RRX
|
||||
case "NEG": // synonym for RSB
|
||||
case "RFE": // system instruction
|
||||
case "SMC (previously SMI)": // system instruction
|
||||
case "SRS": // system instruction
|
||||
case "SUBS PC, LR and related instructions": // system instruction
|
||||
case "VAND (immediate)": // pseudo instruction
|
||||
case "VCLE (register)": // pseudo instruction
|
||||
case "VCLT (register)": // pseudo instruction
|
||||
case "VORN (immediate)": // pseudo instruction
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stdout = bufio.NewWriter(os.Stdout)
|
||||
for _, inst := range all {
|
||||
fmt.Fprintf(stdout, "%q,%q,%q,%q\n", inst.Name, strings.Replace(inst.Text, "\n", "|", -1), inst.Enc, "")
|
||||
}
|
||||
stdout.Flush()
|
||||
|
||||
}
|
||||
|
||||
func instHeadings(outline pdf.Outline) []string {
|
||||
return appendInstHeadings(outline, nil)
|
||||
}
|
||||
|
||||
var instRE = regexp.MustCompile(` ([A-Z0-9]+-form|Byte|Word|Doubleword|Halfword)($| \[)`)
|
||||
var sectionRE = regexp.MustCompile(`^[0-9A-Z]+\.[0-9]`)
|
||||
|
||||
func appendInstHeadings(outline pdf.Outline, list []string) []string {
|
||||
if strings.Contains(outline.Title, "Variable Length Encoding (VLE) Encoding") {
|
||||
for _, child := range outline.Child {
|
||||
vle = appendInstHeadings(child, vle)
|
||||
}
|
||||
return list
|
||||
}
|
||||
if instRE.MatchString(outline.Title) && !sectionRE.MatchString(outline.Title) {
|
||||
list = append(list, outline.Title)
|
||||
}
|
||||
if outline.Title == "Transaction Abort Word Conditional" {
|
||||
list = append(list, outline.Title+" X-form")
|
||||
}
|
||||
for _, child := range outline.Child {
|
||||
list = appendInstHeadings(child, list)
|
||||
}
|
||||
return list
|
||||
}
|
||||
|
||||
const inch = 72.0
|
||||
|
||||
func parsePage(num int, p pdf.Page) []Inst {
|
||||
content := p.Content()
|
||||
|
||||
var text []pdf.Text
|
||||
for _, t := range content.Text {
|
||||
text = append(text, t)
|
||||
}
|
||||
|
||||
text = findWords(text)
|
||||
|
||||
if debugPage > 0 {
|
||||
for _, t := range text {
|
||||
fmt.Println(t)
|
||||
}
|
||||
for _, r := range content.Rect {
|
||||
fmt.Println(r)
|
||||
}
|
||||
}
|
||||
|
||||
// Look for instruction encodings.
|
||||
// Some begin with a Helvetica-BoldOblique size 11 headline like "AND X-Form",
|
||||
// is followed by Helvetica 9 mnemonic, and then a bit box with
|
||||
// Helvetica 9 fields and Helvetica 7 bit offsets.
|
||||
// Others use Arial,BoldItalic 11 for the headline,
|
||||
// Arial 8 for the mnemonic, and Arial 4.2 for the bit offsets.
|
||||
|
||||
var insts []Inst
|
||||
for {
|
||||
// Heading
|
||||
for len(text) > 0 && !match(text[0], "Helvetica-BoldOblique", 11, "") && !match(text[0], "Arial,BoldItalic", 11, "") && !match(text[0], "Arial,BoldItalic", 10, "") {
|
||||
text = text[1:]
|
||||
}
|
||||
if len(text) == 0 {
|
||||
break
|
||||
}
|
||||
heading := text[0].S
|
||||
text = text[1:]
|
||||
for len(text) > 0 && (match(text[0], "Helvetica-BoldOblique", 11, "") || match(text[0], "Arial,BoldItalic", 11, "") || match(text[0], "Arial,BoldItalic", 10, "")) {
|
||||
heading += " " + text[0].S
|
||||
text = text[1:]
|
||||
}
|
||||
heading = strings.Replace(heading, "]", "] ", -1)
|
||||
heading = strings.Replace(heading, " ", " ", -1)
|
||||
heading = strings.Replace(heading, "rEVX-form", "r EVX-form", -1)
|
||||
heading = strings.Replace(heading, "eX-form", "e X-form", -1)
|
||||
heading = strings.Replace(heading, "mSD4-form", "m SD4-form", -1)
|
||||
heading = strings.Replace(heading, "eSCI8-form", "e SCI8-form", -1)
|
||||
heading = strings.TrimSpace(heading)
|
||||
if isVLE(heading) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Mnemonic
|
||||
if len(text) == 0 || (!match(text[0], "Helvetica", 9, "") && !match(text[0], "Helvetica-BoldOblique", 9, "") && !match(text[0], "Arial", 9, "") && !match(text[0], "Arial", 10, "")) {
|
||||
continue
|
||||
}
|
||||
mnemonic := ""
|
||||
y := text[0].Y
|
||||
x0 := text[0].X
|
||||
for len(text) > 0 && (match(text[0], "Helvetica", 9, "") || match(text[0], "Helvetica-BoldOblique", 9, "") || match(text[0], "Arial", 9, "") || match(text[0], "Courier", 8, "") || match(text[0], "LucidaConsole", 7.17, "") || text[0].Y == y) {
|
||||
if text[0].Y != y {
|
||||
if math.Abs(text[0].X-x0) > 4 {
|
||||
break
|
||||
}
|
||||
mnemonic += "\n"
|
||||
y = text[0].Y
|
||||
} else if mnemonic != "" {
|
||||
mnemonic += " "
|
||||
}
|
||||
mnemonic += text[0].S
|
||||
text = text[1:]
|
||||
}
|
||||
|
||||
// Encoding
|
||||
bits, i := readBitBox(heading, content, text, num)
|
||||
if i == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
insts = append(insts, Inst{heading, mnemonic, bits})
|
||||
}
|
||||
return insts
|
||||
}
|
||||
|
||||
var vle = []string{
|
||||
"System Call C-form,ESC-form",
|
||||
}
|
||||
|
||||
func isVLE(s string) bool {
|
||||
for _, v := range vle {
|
||||
if s == v {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func readBitBox(headline string, content pdf.Content, text []pdf.Text, pageNum int) (string, int) {
|
||||
// fields
|
||||
i := 0
|
||||
if len(text) == 0 || (!match(text[i], "Helvetica", 9, "") && !match(text[i], "Helvetica", 7.26, "") && !match(text[i], "Arial", 9, "") && !match(text[i], "Arial", 7.98, "") && !match(text[i], "Arial", 7.2, "")) {
|
||||
fmt.Fprintf(os.Stderr, "page %d: no bit fields for %q\n", pageNum, headline)
|
||||
if len(text) > 0 {
|
||||
fmt.Fprintf(os.Stderr, "\tlast text: %v\n", text[0])
|
||||
}
|
||||
return "", 0
|
||||
}
|
||||
sz := text[i].FontSize
|
||||
y2 := text[i].Y
|
||||
x2 := 0.0
|
||||
for i < len(text) && text[i].Y == y2 {
|
||||
if x2 < text[i].X+text[i].W {
|
||||
x2 = text[i].X + text[i].W
|
||||
}
|
||||
i++
|
||||
}
|
||||
y2 += sz / 2
|
||||
|
||||
// bit numbers
|
||||
if i >= len(text) || text[i].S != "0" {
|
||||
if headline == "Transaction Abort Doubleword Conditional X-form" {
|
||||
// Split across the next page.
|
||||
return "31@0|TO@6|RA@11|RB@16|814@21|1@31|", i
|
||||
}
|
||||
if headline == "Add Scaled Immediate SCI8-form" {
|
||||
// Very strange fonts.
|
||||
return "06@0|RT@6|RA@11|8@16|Rc@20|F@21|SCL@22|UI8@24|", i
|
||||
}
|
||||
fmt.Fprintf(os.Stderr, "page %d: no bit numbers for %s\n", pageNum, headline)
|
||||
if i < len(text) {
|
||||
fmt.Fprintf(os.Stderr, "\tlast text: %v\n", text[i])
|
||||
}
|
||||
return "", 0
|
||||
}
|
||||
sz = text[i].FontSize
|
||||
y1 := text[i].Y
|
||||
x1 := text[i].X
|
||||
for i < len(text) && text[i].Y == y1 {
|
||||
if x2 < text[i].X+text[i].W {
|
||||
x2 = text[i].X + text[i].W
|
||||
}
|
||||
i++
|
||||
}
|
||||
|
||||
if debugPage > 0 {
|
||||
fmt.Println("encoding box", x1, y1, x2, y2, i, text[0], text[i])
|
||||
}
|
||||
|
||||
// Find lines (thin rectangles) separating bit fields.
|
||||
var bottom, top pdf.Rect
|
||||
const (
|
||||
yMargin = 0.25 * 72
|
||||
xMargin = 1 * 72
|
||||
)
|
||||
for _, r := range content.Rect {
|
||||
// Only consider lines in the same column.
|
||||
if (x1 < 306) != (r.Max.X < 306) {
|
||||
continue
|
||||
}
|
||||
if r.Max.Y-r.Min.Y < 2 && x1-xMargin < r.Min.X && r.Min.X < x1 && x2 < r.Max.X && r.Max.X < x2+xMargin {
|
||||
if y1-yMargin < r.Min.Y && r.Min.Y < y1 {
|
||||
bottom = r
|
||||
}
|
||||
if y2 < r.Min.Y && r.Min.Y < y2+8 {
|
||||
top = r
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if bottom.Min.X == 0 {
|
||||
// maybe bit numbers are outside box; see doze, nap, sleep, rvwinkle.
|
||||
for _, r := range content.Rect {
|
||||
// Only consider lines in the same column.
|
||||
if (x1 < 306) != (r.Max.X < 306) {
|
||||
continue
|
||||
}
|
||||
if r.Max.Y-r.Min.Y < 2 && x1-xMargin < r.Min.X && r.Min.X < x1 && x2 < r.Max.X && r.Max.X < x2+xMargin {
|
||||
if y1+sz/2 < r.Min.Y && r.Min.Y < y2 {
|
||||
bottom = r
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if debugPage > 0 {
|
||||
fmt.Println("top", top, "bottom", bottom)
|
||||
}
|
||||
|
||||
const ε = 0.1 * 72
|
||||
var bars []pdf.Rect
|
||||
for _, r := range content.Rect {
|
||||
if r.Max.X-r.Min.X < 2 && math.Abs(r.Min.Y-bottom.Min.Y) < ε && math.Abs(r.Max.Y-top.Min.Y) < ε && (bottom.Min.X < 306) == (r.Max.X < 306) {
|
||||
bars = append(bars, r)
|
||||
}
|
||||
}
|
||||
sort.Sort(RectHorizontal(bars))
|
||||
|
||||
out := ""
|
||||
for i := 0; i < len(bars)-1; i++ {
|
||||
var sub []pdf.Text
|
||||
x1, x2 := bars[i].Min.X, bars[i+1].Min.X
|
||||
for _, t := range content.Text {
|
||||
tx := t.X + t.W/2
|
||||
ty := t.Y + t.FontSize/4
|
||||
if x1 < tx && tx < x2 && y1 < ty && ty < y2 {
|
||||
sub = append(sub, t)
|
||||
}
|
||||
}
|
||||
var str []string
|
||||
for _, t := range findWords(sub) {
|
||||
str = append(str, t.S)
|
||||
}
|
||||
s := strings.Join(str, "@")
|
||||
out += s + "|"
|
||||
}
|
||||
|
||||
if out == "" {
|
||||
fmt.Fprintf(os.Stderr, "page %d: no bit encodings for %s\n", pageNum, headline)
|
||||
}
|
||||
return out, i
|
||||
}
|
||||
|
||||
type RectHorizontal []pdf.Rect
|
||||
|
||||
func (x RectHorizontal) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
|
||||
func (x RectHorizontal) Less(i, j int) bool { return x[i].Min.X < x[j].Min.X }
|
||||
func (x RectHorizontal) Len() int { return len(x) }
|
||||
|
||||
func checkNoEncodings(num int, text []pdf.Text) {
|
||||
for _, t := range text {
|
||||
if match(t, "Helvetica-Bold", 9, "Encoding") {
|
||||
fmt.Fprintf(os.Stderr, "page %d: unexpected encoding: %s\n", num, t.S)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func match(t pdf.Text, font string, size float64, substr string) bool {
|
||||
return t.Font == font && (size == 0 || math.Abs(t.FontSize-size) < 0.1) && strings.Contains(t.S, substr)
|
||||
}
|
||||
|
||||
func findWords(chars []pdf.Text) (words []pdf.Text) {
|
||||
// Sort by Y coordinate and normalize.
|
||||
const nudge = 1.5
|
||||
sort.Sort(pdf.TextVertical(chars))
|
||||
old := -100000.0
|
||||
for i, c := range chars {
|
||||
if c.Y != old && math.Abs(old-c.Y) < nudge {
|
||||
chars[i].Y = old
|
||||
} else {
|
||||
old = c.Y
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by Y coordinate, breaking ties with X.
|
||||
// This will bring letters in a single word together.
|
||||
sort.Sort(pdf.TextVertical(chars))
|
||||
|
||||
// Loop over chars.
|
||||
for i := 0; i < len(chars); {
|
||||
// Find all chars on line.
|
||||
j := i + 1
|
||||
for j < len(chars) && chars[j].Y == chars[i].Y {
|
||||
j++
|
||||
}
|
||||
var end float64
|
||||
// Split line into words (really, phrases).
|
||||
for k := i; k < j; {
|
||||
ck := &chars[k]
|
||||
s := ck.S
|
||||
end = ck.X + ck.W
|
||||
charSpace := ck.FontSize / 6
|
||||
wordSpace := ck.FontSize * 2 / 3
|
||||
l := k + 1
|
||||
for l < j {
|
||||
// Grow word.
|
||||
cl := &chars[l]
|
||||
if sameFont(cl.Font, ck.Font) && math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+charSpace {
|
||||
s += cl.S
|
||||
end = cl.X + cl.W
|
||||
l++
|
||||
continue
|
||||
}
|
||||
// Add space to phrase before next word.
|
||||
if sameFont(cl.Font, ck.Font) && math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+wordSpace {
|
||||
s += " " + cl.S
|
||||
end = cl.X + cl.W
|
||||
l++
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
f := ck.Font
|
||||
f = strings.TrimSuffix(f, ",Italic")
|
||||
f = strings.TrimSuffix(f, "-Italic")
|
||||
words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end - ck.X, s})
|
||||
k = l
|
||||
}
|
||||
i = j
|
||||
}
|
||||
|
||||
// Split into two columns.
|
||||
var col1, col2 []pdf.Text
|
||||
for _, w := range words {
|
||||
if w.X > 306 {
|
||||
col2 = append(col2, w)
|
||||
} else {
|
||||
col1 = append(col1, w)
|
||||
}
|
||||
}
|
||||
return append(col1, col2...)
|
||||
}
|
||||
|
||||
func sameFont(f1, f2 string) bool {
|
||||
f1 = strings.TrimSuffix(f1, ",Italic")
|
||||
f1 = strings.TrimSuffix(f1, "-Italic")
|
||||
f2 = strings.TrimSuffix(f1, ",Italic")
|
||||
f2 = strings.TrimSuffix(f1, "-Italic")
|
||||
return strings.TrimSuffix(f1, ",Italic") == strings.TrimSuffix(f2, ",Italic") || f1 == "Symbol" || f2 == "Symbol" || f1 == "TimesNewRoman" || f2 == "TimesNewRoman"
|
||||
}
|
||||
|
||||
var jsFix = strings.NewReplacer(
|
||||
// `\u003c`, `<`,
|
||||
// `\u003e`, `>`,
|
||||
// `\u0026`, `&`,
|
||||
// `\u0009`, `\t`,
|
||||
)
|
||||
|
||||
func printTable(name string, table []Inst) {
|
||||
_ = strconv.Atoi
|
||||
}
|
||||
Reference in New Issue
Block a user