whatcanGOwrong

This commit is contained in:
2024-09-19 21:38:24 -04:00
commit d0ae4d841d
17908 changed files with 4096831 additions and 0 deletions
@@ -0,0 +1,131 @@
// Copyright 2017 The Bazel Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package chunkedfile provides utilities for testing that source code
// errors are reported in the appropriate places.
//
// A chunked file consists of several chunks of input text separated by
// "---" lines. Each chunk is an input to the program under test, such
// as an evaluator. Lines containing "###" are interpreted as
// expectations of failure: the following text is a Go string literal
// denoting a regular expression that should match the failure message.
//
// Example:
//
// x = 1 / 0 ### "division by zero"
// ---
// x = 1
// print(x + "") ### "int + string not supported"
//
// A client test feeds each chunk of text into the program under test,
// then calls chunk.GotError for each error that actually occurred. Any
// discrepancy between the actual and expected errors is reported using
// the client's reporter, which is typically a testing.T.
package chunkedfile // import "go.starlark.net/internal/chunkedfile"
import (
"fmt"
"os"
"regexp"
"runtime"
"strconv"
"strings"
)
const debug = false
// A Chunk is a portion of a source file.
// It contains a set of expected errors.
type Chunk struct {
Source string
filename string
report Reporter
wantErrs map[int]*regexp.Regexp
}
// Reporter is implemented by *testing.T.
type Reporter interface {
Errorf(format string, args ...interface{})
}
// Read parses a chunked file and returns its chunks.
// It reports failures using the reporter.
//
// Error messages of the form "file.star:line:col: ..." are prefixed
// by a newline so that the Go source position added by (*testing.T).Errorf
// appears on a separate line so as not to confused editors.
func Read(filename string, report Reporter) (chunks []Chunk) {
data, err := os.ReadFile(filename)
if err != nil {
report.Errorf("%s", err)
return
}
linenum := 1
eol := "\n"
if runtime.GOOS == "windows" {
eol = "\r\n"
}
for i, chunk := range strings.Split(string(data), eol+"---"+eol) {
if debug {
fmt.Printf("chunk %d at line %d: %s\n", i, linenum, chunk)
}
// Pad with newlines so the line numbers match the original file.
src := strings.Repeat("\n", linenum-1) + chunk
wantErrs := make(map[int]*regexp.Regexp)
// Parse comments of the form:
// ### "expected error".
lines := strings.Split(chunk, "\n")
for j := 0; j < len(lines); j, linenum = j+1, linenum+1 {
line := lines[j]
hashes := strings.Index(line, "###")
if hashes < 0 {
continue
}
rest := strings.TrimSpace(line[hashes+len("###"):])
pattern, err := strconv.Unquote(rest)
if err != nil {
report.Errorf("\n%s:%d: not a quoted regexp: %s", filename, linenum, rest)
continue
}
rx, err := regexp.Compile(pattern)
if err != nil {
report.Errorf("\n%s:%d: %v", filename, linenum, err)
continue
}
wantErrs[linenum] = rx
if debug {
fmt.Printf("\t%d\t%s\n", linenum, rx)
}
}
linenum++
chunks = append(chunks, Chunk{src, filename, report, wantErrs})
}
return chunks
}
// GotError should be called by the client to report an error at a particular line.
// GotError reports unexpected errors to the chunk's reporter.
func (chunk *Chunk) GotError(linenum int, msg string) {
if rx, ok := chunk.wantErrs[linenum]; ok {
delete(chunk.wantErrs, linenum)
if !rx.MatchString(msg) {
chunk.report.Errorf("\n%s:%d: error %q does not match pattern %q", chunk.filename, linenum, msg, rx)
}
} else {
chunk.report.Errorf("\n%s:%d: unexpected error: %v", chunk.filename, linenum, msg)
}
}
// Done should be called by the client to indicate that the chunk has no more errors.
// Done reports expected errors that did not occur to the chunk's reporter.
func (chunk *Chunk) Done() {
for linenum, rx := range chunk.wantErrs {
chunk.report.Errorf("\n%s:%d: expected error matching %q", chunk.filename, linenum, rx)
}
}
@@ -0,0 +1,118 @@
package compile
import (
"bytes"
"fmt"
"testing"
"go.starlark.net/resolve"
"go.starlark.net/syntax"
)
// TestPlusFolding ensures that the compiler generates optimized code for
// n-ary addition of strings, lists, and tuples.
func TestPlusFolding(t *testing.T) {
isPredeclared := func(name string) bool { return name == "x" }
isUniversal := func(name string) bool { return false }
for i, test := range []struct {
src string // source expression
want string // disassembled code
}{
{
// string folding
`"a" + "b" + "c" + "d"`,
`constant "abcd"; return`,
},
{
// string folding with variable:
`"a" + "b" + x + "c" + "d"`,
`constant "ab"; predeclared x; plus; constant "cd"; plus; return`,
},
{
// list folding
`[1] + [2] + [3]`,
`constant 1; constant 2; constant 3; makelist<3>; return`,
},
{
// list folding with variable
`[1] + [2] + x + [3]`,
`constant 1; constant 2; makelist<2>; ` +
`predeclared x; plus; ` +
`constant 3; makelist<1>; plus; ` +
`return`,
},
{
// tuple folding
`() + (1,) + (2, 3)`,
`constant 1; constant 2; constant 3; maketuple<3>; return`,
},
{
// tuple folding with variable
`() + (1,) + x + (2, 3)`,
`constant 1; maketuple<1>; predeclared x; plus; ` +
`constant 2; constant 3; maketuple<2>; plus; ` +
`return`,
},
} {
expr, err := syntax.ParseExpr("in.star", test.src, 0)
if err != nil {
t.Errorf("#%d: %v", i, err)
continue
}
locals, err := resolve.Expr(expr, isPredeclared, isUniversal)
if err != nil {
t.Errorf("#%d: %v", i, err)
continue
}
got := disassemble(Expr(syntax.LegacyFileOptions(), expr, "<expr>", locals).Toplevel)
if test.want != got {
t.Errorf("expression <<%s>> generated <<%s>>, want <<%s>>",
test.src, got, test.want)
}
}
}
// disassemble is a trivial disassembler tailored to the accumulator test.
func disassemble(f *Funcode) string {
out := new(bytes.Buffer)
code := f.Code
for pc := 0; pc < len(code); {
op := Opcode(code[pc])
pc++
// TODO(adonovan): factor in common with interpreter.
var arg uint32
if op >= OpcodeArgMin {
for s := uint(0); ; s += 7 {
b := code[pc]
pc++
arg |= uint32(b&0x7f) << s
if b < 0x80 {
break
}
}
}
if out.Len() > 0 {
out.WriteString("; ")
}
fmt.Fprintf(out, "%s", op)
if op >= OpcodeArgMin {
switch op {
case CONSTANT:
switch x := f.Prog.Constants[arg].(type) {
case string:
fmt.Fprintf(out, " %q", x)
default:
fmt.Fprintf(out, " %v", x)
}
case LOCAL:
fmt.Fprintf(out, " %s", f.Locals[arg].Name)
case PREDECLARED:
fmt.Fprintf(out, " %s", f.Prog.Names[arg])
default:
fmt.Fprintf(out, "<%d>", arg)
}
}
}
return out.String()
}
@@ -0,0 +1,74 @@
package compile_test
import (
"bytes"
"strings"
"testing"
"go.starlark.net/starlark"
)
// TestSerialization verifies that a serialized program can be loaded,
// deserialized, and executed.
func TestSerialization(t *testing.T) {
predeclared := starlark.StringDict{
"x": starlark.String("mur"),
"n": starlark.MakeInt(2),
}
const src = `
def mul(a, b):
return a * b
y = mul(x, n)
`
_, oldProg, err := starlark.SourceProgram("mul.star", src, predeclared.Has)
if err != nil {
t.Fatal(err)
}
buf := new(bytes.Buffer)
if err := oldProg.Write(buf); err != nil {
t.Fatalf("oldProg.WriteTo: %v", err)
}
newProg, err := starlark.CompiledProgram(buf)
if err != nil {
t.Fatalf("CompiledProgram: %v", err)
}
thread := new(starlark.Thread)
globals, err := newProg.Init(thread, predeclared)
if err != nil {
t.Fatalf("newProg.Init: %v", err)
}
if got, want := globals["y"], starlark.String("murmur"); got != want {
t.Errorf("Value of global was %s, want %s", got, want)
t.Logf("globals: %v", globals)
}
// Verify stack frame.
predeclared["n"] = starlark.None
_, err = newProg.Init(thread, predeclared)
evalErr, ok := err.(*starlark.EvalError)
if !ok {
t.Fatalf("newProg.Init call returned err %v, want *EvalError", err)
}
const want = `Traceback (most recent call last):
mul.star:5:8: in <toplevel>
mul.star:3:14: in mul
Error: unknown binary op: string * NoneType`
if got := evalErr.Backtrace(); got != want {
t.Fatalf("got <<%s>>, want <<%s>>", got, want)
}
}
func TestGarbage(t *testing.T) {
const garbage = "This is not a compiled Starlark program."
_, err := starlark.CompiledProgram(strings.NewReader(garbage))
if err == nil {
t.Fatalf("CompiledProgram did not report an error when decoding garbage")
}
if !strings.Contains(err.Error(), "not a compiled module") {
t.Fatalf("CompiledProgram reported the wrong error when decoding garbage: %v", err)
}
}
@@ -0,0 +1,399 @@
package compile
// This file defines functions to read and write a compile.Program to a file.
//
// It is the client's responsibility to avoid version skew between the
// compiler used to produce a file and the interpreter that consumes it.
// The version number is provided as a constant.
// Incompatible protocol changes should also increment the version number.
//
// Encoding
//
// Program:
// "sky!" [4]byte # magic number
// str uint32le # offset of <strings> section
// version varint # must match Version
// filename string
// numloads varint
// loads []Ident
// numnames varint
// names []string
// numconsts varint
// consts []Constant
// numglobals varint
// globals []Ident
// toplevel Funcode
// numfuncs varint
// funcs []Funcode
// recursion varint (0 or 1)
// <strings> []byte # concatenation of all referenced strings
// EOF
//
// Funcode:
// id Ident
// code []byte
// pclinetablen varint
// pclinetab []varint
// numlocals varint
// locals []Ident
// numcells varint
// cells []int
// numfreevars varint
// freevar []Ident
// maxstack varint
// numparams varint
// numkwonlyparams varint
// hasvarargs varint (0 or 1)
// haskwargs varint (0 or 1)
//
// Ident:
// filename string
// line, col varint
//
// Constant: # type data
// type varint # 0=string string
// data ... # 1=bytes string
// # 2=int varint
// # 3=float varint (bits as uint64)
// # 4=bigint string (decimal ASCII text)
//
// The encoding starts with a four-byte magic number.
// The next four bytes are a little-endian uint32
// that provides the offset of the string section
// at the end of the file, which contains the ordered
// concatenation of all strings referenced by the
// program. This design permits the decoder to read
// the first and second parts of the file into different
// memory allocations: the first (the encoded program)
// is transient, but the second (the strings) persists
// for the life of the Program.
//
// Within the encoded program, all strings are referred
// to by their length. As the encoder and decoder process
// the entire file sequentially, they are in lock step,
// so the start offset of each string is implicit.
//
// Program.Code is represented as a []byte slice to permit
// modification when breakpoints are set. All other strings
// are represented as strings. They all (unsafely) share the
// same backing byte slice.
//
// Aside from the str field, all integers are encoded as varints.
import (
"encoding/binary"
"fmt"
"math"
"math/big"
debugpkg "runtime/debug"
"unsafe"
"go.starlark.net/syntax"
)
const magic = "!sky"
// Encode encodes a compiled Starlark program.
func (prog *Program) Encode() []byte {
var e encoder
e.p = append(e.p, magic...)
e.p = append(e.p, "????"...) // string data offset; filled in later
e.int(Version)
e.string(prog.Toplevel.Pos.Filename())
e.bindings(prog.Loads)
e.int(len(prog.Names))
for _, name := range prog.Names {
e.string(name)
}
e.int(len(prog.Constants))
for _, c := range prog.Constants {
switch c := c.(type) {
case string:
e.int(0)
e.string(c)
case Bytes:
e.int(1)
e.string(string(c))
case int64:
e.int(2)
e.int64(c)
case float64:
e.int(3)
e.uint64(math.Float64bits(c))
case *big.Int:
e.int(4)
e.string(c.Text(10))
}
}
e.bindings(prog.Globals)
e.function(prog.Toplevel)
e.int(len(prog.Functions))
for _, fn := range prog.Functions {
e.function(fn)
}
e.int(b2i(prog.Recursion))
// Patch in the offset of the string data section.
binary.LittleEndian.PutUint32(e.p[4:8], uint32(len(e.p)))
return append(e.p, e.s...)
}
type encoder struct {
p []byte // encoded program
s []byte // strings
tmp [binary.MaxVarintLen64]byte
}
func (e *encoder) int(x int) {
e.int64(int64(x))
}
func (e *encoder) int64(x int64) {
n := binary.PutVarint(e.tmp[:], x)
e.p = append(e.p, e.tmp[:n]...)
}
func (e *encoder) uint64(x uint64) {
n := binary.PutUvarint(e.tmp[:], x)
e.p = append(e.p, e.tmp[:n]...)
}
func (e *encoder) string(s string) {
e.int(len(s))
e.s = append(e.s, s...)
}
func (e *encoder) bytes(b []byte) {
e.int(len(b))
e.s = append(e.s, b...)
}
func (e *encoder) binding(bind Binding) {
e.string(bind.Name)
e.int(int(bind.Pos.Line))
e.int(int(bind.Pos.Col))
}
func (e *encoder) bindings(binds []Binding) {
e.int(len(binds))
for _, bind := range binds {
e.binding(bind)
}
}
func (e *encoder) function(fn *Funcode) {
e.binding(Binding{fn.Name, fn.Pos})
e.string(fn.Doc)
e.bytes(fn.Code)
e.int(len(fn.pclinetab))
for _, x := range fn.pclinetab {
e.int64(int64(x))
}
e.bindings(fn.Locals)
e.int(len(fn.Cells))
for _, index := range fn.Cells {
e.int(index)
}
e.bindings(fn.Freevars)
e.int(fn.MaxStack)
e.int(fn.NumParams)
e.int(fn.NumKwonlyParams)
e.int(b2i(fn.HasVarargs))
e.int(b2i(fn.HasKwargs))
}
func b2i(b bool) int {
if b {
return 1
} else {
return 0
}
}
// DecodeProgram decodes a compiled Starlark program from data.
func DecodeProgram(data []byte) (_ *Program, err error) {
if len(data) < len(magic) {
return nil, fmt.Errorf("not a compiled module: no magic number")
}
if got := string(data[:4]); got != magic {
return nil, fmt.Errorf("not a compiled module: got magic number %q, want %q",
got, magic)
}
defer func() {
if x := recover(); x != nil {
debugpkg.PrintStack()
err = fmt.Errorf("internal error while decoding program: %v", x)
}
}()
offset := binary.LittleEndian.Uint32(data[4:8])
d := decoder{
p: data[8:offset],
s: append([]byte(nil), data[offset:]...), // allocate a copy, which will persist
}
if v := d.int(); v != Version {
return nil, fmt.Errorf("version mismatch: read %d, want %d", v, Version)
}
filename := d.string()
d.filename = &filename
loads := d.bindings()
names := make([]string, d.int())
for i := range names {
names[i] = d.string()
}
// constants
constants := make([]interface{}, d.int())
for i := range constants {
var c interface{}
switch d.int() {
case 0:
c = d.string()
case 1:
c = Bytes(d.string())
case 2:
c = d.int64()
case 3:
c = math.Float64frombits(d.uint64())
case 4:
c, _ = new(big.Int).SetString(d.string(), 10)
}
constants[i] = c
}
globals := d.bindings()
toplevel := d.function()
funcs := make([]*Funcode, d.int())
for i := range funcs {
funcs[i] = d.function()
}
recursion := d.int() != 0
prog := &Program{
Loads: loads,
Names: names,
Constants: constants,
Globals: globals,
Functions: funcs,
Toplevel: toplevel,
Recursion: recursion,
}
toplevel.Prog = prog
for _, f := range funcs {
f.Prog = prog
}
if len(d.p)+len(d.s) > 0 {
return nil, fmt.Errorf("internal error: unconsumed data during decoding")
}
return prog, nil
}
type decoder struct {
p []byte // encoded program
s []byte // strings
filename *string // (indirect to avoid keeping decoder live)
}
func (d *decoder) int() int {
return int(d.int64())
}
func (d *decoder) int64() int64 {
x, len := binary.Varint(d.p[:])
d.p = d.p[len:]
return x
}
func (d *decoder) uint64() uint64 {
x, len := binary.Uvarint(d.p[:])
d.p = d.p[len:]
return x
}
func (d *decoder) string() (s string) {
if slice := d.bytes(); len(slice) > 0 {
// Avoid a memory allocation for each string
// by unsafely aliasing slice.
type string struct {
data *byte
len int
}
ptr := (*string)(unsafe.Pointer(&s))
ptr.data = &slice[0]
ptr.len = len(slice)
}
return s
}
func (d *decoder) bytes() []byte {
len := d.int()
r := d.s[:len:len]
d.s = d.s[len:]
return r
}
func (d *decoder) binding() Binding {
name := d.string()
line := int32(d.int())
col := int32(d.int())
return Binding{Name: name, Pos: syntax.MakePosition(d.filename, line, col)}
}
func (d *decoder) bindings() []Binding {
bindings := make([]Binding, d.int())
for i := range bindings {
bindings[i] = d.binding()
}
return bindings
}
func (d *decoder) ints() []int {
ints := make([]int, d.int())
for i := range ints {
ints[i] = d.int()
}
return ints
}
func (d *decoder) bool() bool { return d.int() != 0 }
func (d *decoder) function() *Funcode {
id := d.binding()
doc := d.string()
code := d.bytes()
pclinetab := make([]uint16, d.int())
for i := range pclinetab {
pclinetab[i] = uint16(d.int())
}
locals := d.bindings()
cells := d.ints()
freevars := d.bindings()
maxStack := d.int()
numParams := d.int()
numKwonlyParams := d.int()
hasVarargs := d.int() != 0
hasKwargs := d.int() != 0
return &Funcode{
// Prog is filled in later.
Pos: id.Pos,
Name: id.Name,
Doc: doc,
Code: code,
pclinetab: pclinetab,
Locals: locals,
Cells: cells,
Freevars: freevars,
MaxStack: maxStack,
NumParams: numParams,
NumKwonlyParams: numKwonlyParams,
HasVarargs: hasVarargs,
HasKwargs: hasKwargs,
}
}
@@ -0,0 +1,115 @@
// Package spell file defines a simple spelling checker for use in attribute errors
// such as "no such field .foo; did you mean .food?".
package spell
import (
"strings"
"unicode"
)
// Nearest returns the element of candidates
// nearest to x using the Levenshtein metric,
// or "" if none were promising.
func Nearest(x string, candidates []string) string {
// Ignore underscores and case when matching.
fold := func(s string) string {
return strings.Map(func(r rune) rune {
if r == '_' {
return -1
}
return unicode.ToLower(r)
}, s)
}
x = fold(x)
var best string
bestD := (len(x) + 1) / 2 // allow up to 50% typos
for _, c := range candidates {
d := levenshtein(x, fold(c), bestD)
if d < bestD {
bestD = d
best = c
}
}
return best
}
// levenshtein returns the non-negative Levenshtein edit distance
// between the byte strings x and y.
//
// If the computed distance exceeds max,
// the function may return early with an approximate value > max.
func levenshtein(x, y string, max int) int {
// This implementation is derived from one by Laurent Le Brun in
// Bazel that uses the single-row space efficiency trick
// described at bitbucket.org/clearer/iosifovich.
// Let x be the shorter string.
if len(x) > len(y) {
x, y = y, x
}
// Remove common prefix.
for i := 0; i < len(x); i++ {
if x[i] != y[i] {
x = x[i:]
y = y[i:]
break
}
}
if x == "" {
return len(y)
}
if d := abs(len(x) - len(y)); d > max {
return d // excessive length divergence
}
row := make([]int, len(y)+1)
for i := range row {
row[i] = i
}
for i := 1; i <= len(x); i++ {
row[0] = i
best := i
prev := i - 1
for j := 1; j <= len(y); j++ {
a := prev + b2i(x[i-1] != y[j-1]) // substitution
b := 1 + row[j-1] // deletion
c := 1 + row[j] // insertion
k := min(a, min(b, c))
prev, row[j] = row[j], k
best = min(best, k)
}
if best > max {
return best
}
}
return row[len(y)]
}
func b2i(b bool) int {
if b {
return 1
} else {
return 0
}
}
func min(x, y int) int {
if x < y {
return x
} else {
return y
}
}
func abs(x int) int {
if x >= 0 {
return x
} else {
return -x
}
}
@@ -0,0 +1,19 @@
#!/bin/sh
# Copyright 2021 The Bazel Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
set -eu
# Confirm that go.mod and go.sum are tidy.
cp go.mod go.mod.orig
cp go.sum go.sum.orig
go mod tidy
# Use -w to ignore differences in OS newlines.
diff -w go.mod.orig go.mod || { echo "go.mod is not tidy"; exit 1; }
diff -w go.sum.orig go.sum || { echo "go.sum is not tidy"; exit 1; }
rm go.mod.orig go.sum.orig
# Run tests
go test ./...