whatcanGOwrong

This commit is contained in:
2024-09-19 21:38:24 -04:00
commit d0ae4d841d
17908 changed files with 4096831 additions and 0 deletions
@@ -0,0 +1,129 @@
Grammar of Starlark
==================
File = {Statement | newline} eof .
Statement = DefStmt | IfStmt | ForStmt | WhileStmt | SimpleStmt .
DefStmt = 'def' identifier '(' [Parameters [',']] ')' ':' Suite .
Parameters = Parameter {',' Parameter}.
Parameter = identifier | identifier '=' Test | '*' | '*' identifier | '**' identifier .
IfStmt = 'if' Test ':' Suite {'elif' Test ':' Suite} ['else' ':' Suite] .
ForStmt = 'for' LoopVariables 'in' Expression ':' Suite .
WhileStmt = 'while' Test ':' Suite .
Suite = [newline indent {Statement} outdent] | SimpleStmt .
SimpleStmt = SmallStmt {';' SmallStmt} [';'] '\n' .
# NOTE: '\n' optional at EOF
SmallStmt = ReturnStmt
| BreakStmt | ContinueStmt | PassStmt
| AssignStmt
| ExprStmt
| LoadStmt
.
ReturnStmt = 'return' [Expression] .
BreakStmt = 'break' .
ContinueStmt = 'continue' .
PassStmt = 'pass' .
AssignStmt = Expression ('=' | '+=' | '-=' | '*=' | '/=' | '//=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') Expression .
ExprStmt = Expression .
LoadStmt = 'load' '(' string {',' [identifier '='] string} [','] ')' .
Test = LambdaExpr
| IfExpr
| PrimaryExpr
| UnaryExpr
| BinaryExpr
.
LambdaExpr = 'lambda' [Parameters] ':' Test .
IfExpr = Test 'if' Test 'else' Test .
PrimaryExpr = Operand
| PrimaryExpr DotSuffix
| PrimaryExpr CallSuffix
| PrimaryExpr SliceSuffix
.
Operand = identifier
| int | float | string
| ListExpr | ListComp
| DictExpr | DictComp
| '(' [Expression [',']] ')'
| ('-' | '+') PrimaryExpr
.
DotSuffix = '.' identifier .
CallSuffix = '(' [Arguments [',']] ')' .
SliceSuffix = '[' [Expression] [':' Test [':' Test]] ']' .
Arguments = Argument {',' Argument} .
Argument = Test | identifier '=' Test | '*' Test | '**' Test .
ListExpr = '[' [Expression [',']] ']' .
ListComp = '[' Test {CompClause} ']'.
DictExpr = '{' [Entries [',']] '}' .
DictComp = '{' Entry {CompClause} '}' .
Entries = Entry {',' Entry} .
Entry = Test ':' Test .
CompClause = 'for' LoopVariables 'in' Test | 'if' Test .
UnaryExpr = 'not' Test .
BinaryExpr = Test {Binop Test} .
Binop = 'or'
| 'and'
| '==' | '!=' | '<' | '>' | '<=' | '>=' | 'in' | 'not' 'in'
| '|'
| '^'
| '&'
| '-' | '+'
| '*' | '%' | '/' | '//'
.
Expression = Test {',' Test} .
# NOTE: trailing comma permitted only when within [...] or (...).
LoopVariables = PrimaryExpr {',' PrimaryExpr} .
# Notation (similar to Go spec):
- lowercase and 'quoted' items are lexical tokens.
- Capitalized names denote grammar productions.
- (...) implies grouping
- x | y means either x or y.
- [x] means x is optional
- {x} means x is repeated zero or more times
- The end of each declaration is marked with a period.
# Tokens
- spaces: newline, eof, indent, outdent.
- identifier.
- literals: string, int, float.
- plus all quoted tokens such as '+=', 'return'.
# Notes:
- Ambiguity is resolved using operator precedence.
- The grammar does not enforce the legal order of params and args,
nor that the first compclause must be a 'for'.
TODO:
- explain how the lexer generates indent, outdent, and newline tokens.
- why is unary NOT separated from unary - and +?
- the grammar is (mostly) in LL(1) style so, for example,
dot expressions are formed suffixes, not complete expressions,
which makes the spec harder to read. Reorganize into non-LL(1) form?
@@ -0,0 +1,63 @@
// Copyright 2023 The Bazel Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package syntax
import _ "unsafe" // for linkname
// FileOptions specifies various per-file options that affect static
// aspects of an individual file such as parsing, name resolution, and
// code generation. (Options that affect global dynamics are typically
// controlled through [starlark.Thread].)
//
// The zero value of FileOptions is the default behavior.
//
// Many functions in this package come in two versions: the legacy
// standalone function (such as [Parse]) uses [LegacyFileOptions],
// whereas the more recent method (such as [Options.Parse]) honors the
// provided options. The second form is preferred. In other packages,
// the modern version is a standalone function with a leading
// FileOptions parameter and the name suffix "Options", such as
// [starlark.ExecFileOptions].
type FileOptions struct {
// resolver
Set bool // allow references to the 'set' built-in function
While bool // allow 'while' statements
TopLevelControl bool // allow if/for/while statements at top-level
GlobalReassign bool // allow reassignment to top-level names
LoadBindsGlobally bool // load creates global not file-local bindings (deprecated)
// compiler
Recursion bool // disable recursion check for functions in this file
}
// TODO(adonovan): provide a canonical flag parser for FileOptions.
// (And use it in the testdata "options:" strings.)
// LegacyFileOptions returns a new FileOptions containing the current
// values of the resolver package's legacy global variables such as
// [resolve.AllowRecursion], etc.
// These variables may be associated with command-line flags.
func LegacyFileOptions() *FileOptions {
return &FileOptions{
Set: resolverAllowSet,
While: resolverAllowGlobalReassign,
TopLevelControl: resolverAllowGlobalReassign,
GlobalReassign: resolverAllowGlobalReassign,
Recursion: resolverAllowRecursion,
LoadBindsGlobally: resolverLoadBindsGlobally,
}
}
// Access resolver (legacy) flags, if they are linked in; false otherwise.
var (
//go:linkname resolverAllowSet go.starlark.net/resolve.AllowSet
resolverAllowSet bool
//go:linkname resolverAllowGlobalReassign go.starlark.net/resolve.AllowGlobalReassign
resolverAllowGlobalReassign bool
//go:linkname resolverAllowRecursion go.starlark.net/resolve.AllowRecursion
resolverAllowRecursion bool
//go:linkname resolverLoadBindsGlobally go.starlark.net/resolve.LoadBindsGlobally
resolverLoadBindsGlobally bool
)
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,487 @@
// Copyright 2017 The Bazel Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package syntax_test
import (
"bufio"
"bytes"
"fmt"
"go/build"
"os"
"path/filepath"
"reflect"
"strings"
"testing"
"go.starlark.net/internal/chunkedfile"
"go.starlark.net/starlarktest"
"go.starlark.net/syntax"
)
func TestExprParseTrees(t *testing.T) {
for _, test := range []struct {
input, want string
}{
{`print(1)`,
`(CallExpr Fn=print Args=(1))`},
{"print(1)\n",
`(CallExpr Fn=print Args=(1))`},
{`x + 1`,
`(BinaryExpr X=x Op=+ Y=1)`},
{`[x for x in y]`,
`(Comprehension Body=x Clauses=((ForClause Vars=x X=y)))`},
{`[x for x in (a if b else c)]`,
`(Comprehension Body=x Clauses=((ForClause Vars=x X=(ParenExpr X=(CondExpr Cond=b True=a False=c)))))`},
{`x[i].f(42)`,
`(CallExpr Fn=(DotExpr X=(IndexExpr X=x Y=i) Name=f) Args=(42))`},
{`x.f()`,
`(CallExpr Fn=(DotExpr X=x Name=f))`},
{`x+y*z`,
`(BinaryExpr X=x Op=+ Y=(BinaryExpr X=y Op=* Y=z))`},
{`x%y-z`,
`(BinaryExpr X=(BinaryExpr X=x Op=% Y=y) Op=- Y=z)`},
{`a + b not in c`,
`(BinaryExpr X=(BinaryExpr X=a Op=+ Y=b) Op=not in Y=c)`},
{`lambda x, *args, **kwargs: None`,
`(LambdaExpr Params=(x (UnaryExpr Op=* X=args) (UnaryExpr Op=** X=kwargs)) Body=None)`},
{`{"one": 1}`,
`(DictExpr List=((DictEntry Key="one" Value=1)))`},
{`a[i]`,
`(IndexExpr X=a Y=i)`},
{`a[i:]`,
`(SliceExpr X=a Lo=i)`},
{`a[:j]`,
`(SliceExpr X=a Hi=j)`},
{`a[::]`,
`(SliceExpr X=a)`},
{`a[::k]`,
`(SliceExpr X=a Step=k)`},
{`[]`,
`(ListExpr)`},
{`[1]`,
`(ListExpr List=(1))`},
{`[1,]`,
`(ListExpr List=(1))`},
{`[1, 2]`,
`(ListExpr List=(1 2))`},
{`()`,
`(TupleExpr)`},
{`(4,)`,
`(ParenExpr X=(TupleExpr List=(4)))`},
{`(4)`,
`(ParenExpr X=4)`},
{`(4, 5)`,
`(ParenExpr X=(TupleExpr List=(4 5)))`},
{`1, 2, 3`,
`(TupleExpr List=(1 2 3))`},
{`1, 2,`,
`unparenthesized tuple with trailing comma`},
{`{}`,
`(DictExpr)`},
{`{"a": 1}`,
`(DictExpr List=((DictEntry Key="a" Value=1)))`},
{`{"a": 1,}`,
`(DictExpr List=((DictEntry Key="a" Value=1)))`},
{`{"a": 1, "b": 2}`,
`(DictExpr List=((DictEntry Key="a" Value=1) (DictEntry Key="b" Value=2)))`},
{`{x: y for (x, y) in z}`,
`(Comprehension Curly Body=(DictEntry Key=x Value=y) Clauses=((ForClause Vars=(ParenExpr X=(TupleExpr List=(x y))) X=z)))`},
{`{x: y for a in b if c}`,
`(Comprehension Curly Body=(DictEntry Key=x Value=y) Clauses=((ForClause Vars=a X=b) (IfClause Cond=c)))`},
{`-1 + +2`,
`(BinaryExpr X=(UnaryExpr Op=- X=1) Op=+ Y=(UnaryExpr Op=+ X=2))`},
{`"foo" + "bar"`,
`(BinaryExpr X="foo" Op=+ Y="bar")`},
{`-1 * 2`, // prec(unary -) > prec(binary *)
`(BinaryExpr X=(UnaryExpr Op=- X=1) Op=* Y=2)`},
{`-x[i]`, // prec(unary -) < prec(x[i])
`(UnaryExpr Op=- X=(IndexExpr X=x Y=i))`},
{`a | b & c | d`, // prec(|) < prec(&)
`(BinaryExpr X=(BinaryExpr X=a Op=| Y=(BinaryExpr X=b Op=& Y=c)) Op=| Y=d)`},
{`a or b and c or d`,
`(BinaryExpr X=(BinaryExpr X=a Op=or Y=(BinaryExpr X=b Op=and Y=c)) Op=or Y=d)`},
{`a and b or c and d`,
`(BinaryExpr X=(BinaryExpr X=a Op=and Y=b) Op=or Y=(BinaryExpr X=c Op=and Y=d))`},
{`f(1, x=y)`,
`(CallExpr Fn=f Args=(1 (BinaryExpr X=x Op== Y=y)))`},
{`f(*args, **kwargs)`,
`(CallExpr Fn=f Args=((UnaryExpr Op=* X=args) (UnaryExpr Op=** X=kwargs)))`},
{`lambda *args, *, x=1, **kwargs: 0`,
`(LambdaExpr Params=((UnaryExpr Op=* X=args) (UnaryExpr Op=*) (BinaryExpr X=x Op== Y=1) (UnaryExpr Op=** X=kwargs)) Body=0)`},
{`lambda *, a, *b: 0`,
`(LambdaExpr Params=((UnaryExpr Op=*) a (UnaryExpr Op=* X=b)) Body=0)`},
{`a if b else c`,
`(CondExpr Cond=b True=a False=c)`},
{`a and not b`,
`(BinaryExpr X=a Op=and Y=(UnaryExpr Op=not X=b))`},
{`[e for x in y if cond1 if cond2]`,
`(Comprehension Body=e Clauses=((ForClause Vars=x X=y) (IfClause Cond=cond1) (IfClause Cond=cond2)))`}, // github.com/google/skylark/issues/53
} {
e, err := syntax.ParseExpr("foo.star", test.input, 0)
var got string
if err != nil {
got = stripPos(err)
} else {
got = treeString(e)
}
if test.want != got {
t.Errorf("parse `%s` = %s, want %s", test.input, got, test.want)
}
}
}
func TestStmtParseTrees(t *testing.T) {
for _, test := range []struct {
input, want string
}{
{`print(1)`,
`(ExprStmt X=(CallExpr Fn=print Args=(1)))`},
{`return 1, 2`,
`(ReturnStmt Result=(TupleExpr List=(1 2)))`},
{`return`,
`(ReturnStmt)`},
{`for i in "abc": break`,
`(ForStmt Vars=i X="abc" Body=((BranchStmt Token=break)))`},
{`for i in "abc": continue`,
`(ForStmt Vars=i X="abc" Body=((BranchStmt Token=continue)))`},
{`for x, y in z: pass`,
`(ForStmt Vars=(TupleExpr List=(x y)) X=z Body=((BranchStmt Token=pass)))`},
{`if True: pass`,
`(IfStmt Cond=True True=((BranchStmt Token=pass)))`},
{`if True: break`,
`(IfStmt Cond=True True=((BranchStmt Token=break)))`},
{`if True: continue`,
`(IfStmt Cond=True True=((BranchStmt Token=continue)))`},
{`if True: pass
else:
pass`,
`(IfStmt Cond=True True=((BranchStmt Token=pass)) False=((BranchStmt Token=pass)))`},
{"if a: pass\nelif b: pass\nelse: pass",
`(IfStmt Cond=a True=((BranchStmt Token=pass)) False=((IfStmt Cond=b True=((BranchStmt Token=pass)) False=((BranchStmt Token=pass)))))`},
{`x, y = 1, 2`,
`(AssignStmt Op== LHS=(TupleExpr List=(x y)) RHS=(TupleExpr List=(1 2)))`},
{`x[i] = 1`,
`(AssignStmt Op== LHS=(IndexExpr X=x Y=i) RHS=1)`},
{`x.f = 1`,
`(AssignStmt Op== LHS=(DotExpr X=x Name=f) RHS=1)`},
{`(x, y) = 1`,
`(AssignStmt Op== LHS=(ParenExpr X=(TupleExpr List=(x y))) RHS=1)`},
{`load("", "a", b="c")`,
`(LoadStmt Module="" From=(a c) To=(a b))`},
{`if True: load("", "a", b="c")`, // load needn't be at toplevel
`(IfStmt Cond=True True=((LoadStmt Module="" From=(a c) To=(a b))))`},
{`def f(x, *args, **kwargs):
pass`,
`(DefStmt Name=f Params=(x (UnaryExpr Op=* X=args) (UnaryExpr Op=** X=kwargs)) Body=((BranchStmt Token=pass)))`},
{`def f(**kwargs, *args): pass`,
`(DefStmt Name=f Params=((UnaryExpr Op=** X=kwargs) (UnaryExpr Op=* X=args)) Body=((BranchStmt Token=pass)))`},
{`def f(a, b, c=d): pass`,
`(DefStmt Name=f Params=(a b (BinaryExpr X=c Op== Y=d)) Body=((BranchStmt Token=pass)))`},
{`def f(a, b=c, d): pass`,
`(DefStmt Name=f Params=(a (BinaryExpr X=b Op== Y=c) d) Body=((BranchStmt Token=pass)))`}, // TODO(adonovan): fix this
{`def f():
def g():
pass
pass
def h():
pass`,
`(DefStmt Name=f Body=((DefStmt Name=g Body=((BranchStmt Token=pass))) (BranchStmt Token=pass)))`},
{"f();g()",
`(ExprStmt X=(CallExpr Fn=f))`},
{"f();",
`(ExprStmt X=(CallExpr Fn=f))`},
{"f();g()\n",
`(ExprStmt X=(CallExpr Fn=f))`},
{"f();\n",
`(ExprStmt X=(CallExpr Fn=f))`},
} {
f, err := syntax.Parse("foo.star", test.input, 0)
if err != nil {
t.Errorf("parse `%s` failed: %v", test.input, stripPos(err))
continue
}
if got := treeString(f.Stmts[0]); test.want != got {
t.Errorf("parse `%s` = %s, want %s", test.input, got, test.want)
}
}
}
// TestFileParseTrees tests sequences of statements, and particularly
// handling of indentation, newlines, line continuations, and blank lines.
func TestFileParseTrees(t *testing.T) {
for _, test := range []struct {
input, want string
}{
{`x = 1
print(x)`,
`(AssignStmt Op== LHS=x RHS=1)
(ExprStmt X=(CallExpr Fn=print Args=(x)))`},
{"if cond:\n\tpass",
`(IfStmt Cond=cond True=((BranchStmt Token=pass)))`},
{"if cond:\n\tpass\nelse:\n\tpass",
`(IfStmt Cond=cond True=((BranchStmt Token=pass)) False=((BranchStmt Token=pass)))`},
{`def f():
pass
pass
pass`,
`(DefStmt Name=f Body=((BranchStmt Token=pass)))
(BranchStmt Token=pass)
(BranchStmt Token=pass)`},
{`pass; pass`,
`(BranchStmt Token=pass)
(BranchStmt Token=pass)`},
{"pass\npass",
`(BranchStmt Token=pass)
(BranchStmt Token=pass)`},
{"pass\n\npass",
`(BranchStmt Token=pass)
(BranchStmt Token=pass)`},
{`x = (1 +
2)`,
`(AssignStmt Op== LHS=x RHS=(ParenExpr X=(BinaryExpr X=1 Op=+ Y=2)))`},
{`x = 1 \
+ 2`,
`(AssignStmt Op== LHS=x RHS=(BinaryExpr X=1 Op=+ Y=2))`},
} {
f, err := syntax.Parse("foo.star", test.input, 0)
if err != nil {
t.Errorf("parse `%s` failed: %v", test.input, stripPos(err))
continue
}
var buf bytes.Buffer
for i, stmt := range f.Stmts {
if i > 0 {
buf.WriteByte('\n')
}
writeTree(&buf, reflect.ValueOf(stmt))
}
if got := buf.String(); test.want != got {
t.Errorf("parse `%s` = %s, want %s", test.input, got, test.want)
}
}
}
// TestCompoundStmt tests handling of REPL-style compound statements.
func TestCompoundStmt(t *testing.T) {
for _, test := range []struct {
input, want string
}{
// blank lines
{"\n",
``},
{" \n",
``},
{"# comment\n",
``},
// simple statement
{"1\n",
`(ExprStmt X=1)`},
{"print(1)\n",
`(ExprStmt X=(CallExpr Fn=print Args=(1)))`},
{"1;2;3;\n",
`(ExprStmt X=1)(ExprStmt X=2)(ExprStmt X=3)`},
{"f();g()\n",
`(ExprStmt X=(CallExpr Fn=f))(ExprStmt X=(CallExpr Fn=g))`},
{"f();\n",
`(ExprStmt X=(CallExpr Fn=f))`},
{"f(\n\n\n\n\n\n\n)\n",
`(ExprStmt X=(CallExpr Fn=f))`},
// complex statements
{"def f():\n pass\n\n",
`(DefStmt Name=f Body=((BranchStmt Token=pass)))`},
{"if cond:\n pass\n\n",
`(IfStmt Cond=cond True=((BranchStmt Token=pass)))`},
// Even as a 1-liner, the following blank line is required.
{"if cond: pass\n\n",
`(IfStmt Cond=cond True=((BranchStmt Token=pass)))`},
// github.com/google/starlark-go/issues/121
{"a; b; c\n",
`(ExprStmt X=a)(ExprStmt X=b)(ExprStmt X=c)`},
{"a; b c\n",
`invalid syntax`},
} {
// Fake readline input from string.
// The ! suffix, which would cause a parse error,
// tests that the parser doesn't read more than necessary.
sc := bufio.NewScanner(strings.NewReader(test.input + "!"))
readline := func() ([]byte, error) {
if sc.Scan() {
return []byte(sc.Text() + "\n"), nil
}
return nil, sc.Err()
}
var got string
f, err := syntax.ParseCompoundStmt("foo.star", readline)
if err != nil {
got = stripPos(err)
} else {
for _, stmt := range f.Stmts {
got += treeString(stmt)
}
}
if test.want != got {
t.Errorf("parse `%s` = %s, want %s", test.input, got, test.want)
}
}
}
func stripPos(err error) string {
s := err.Error()
if i := strings.Index(s, ": "); i >= 0 {
s = s[i+len(": "):] // strip file:line:col
}
return s
}
// treeString prints a syntax node as a parenthesized tree.
// Idents are printed as foo and Literals as "foo" or 42.
// Structs are printed as (type name=value ...).
// Only non-empty fields are shown.
func treeString(n syntax.Node) string {
var buf bytes.Buffer
writeTree(&buf, reflect.ValueOf(n))
return buf.String()
}
func writeTree(out *bytes.Buffer, x reflect.Value) {
switch x.Kind() {
case reflect.String, reflect.Int, reflect.Bool:
fmt.Fprintf(out, "%v", x.Interface())
case reflect.Ptr, reflect.Interface:
if elem := x.Elem(); elem.Kind() == 0 {
out.WriteString("nil")
} else {
writeTree(out, elem)
}
case reflect.Struct:
switch v := x.Interface().(type) {
case syntax.Literal:
switch v.Token {
case syntax.STRING:
fmt.Fprintf(out, "%q", v.Value)
case syntax.BYTES:
fmt.Fprintf(out, "b%q", v.Value)
case syntax.INT:
fmt.Fprintf(out, "%d", v.Value)
}
return
case syntax.Ident:
out.WriteString(v.Name)
return
}
fmt.Fprintf(out, "(%s", strings.TrimPrefix(x.Type().String(), "syntax."))
for i, n := 0, x.NumField(); i < n; i++ {
f := x.Field(i)
if f.Type() == reflect.TypeOf(syntax.Position{}) {
continue // skip positions
}
name := x.Type().Field(i).Name
if name == "commentsRef" {
continue // skip comments fields
}
if f.Type() == reflect.TypeOf(syntax.Token(0)) {
fmt.Fprintf(out, " %s=%s", name, f.Interface())
continue
}
switch f.Kind() {
case reflect.Slice:
if n := f.Len(); n > 0 {
fmt.Fprintf(out, " %s=(", name)
for i := 0; i < n; i++ {
if i > 0 {
out.WriteByte(' ')
}
writeTree(out, f.Index(i))
}
out.WriteByte(')')
}
continue
case reflect.Ptr, reflect.Interface:
if f.IsNil() {
continue
}
case reflect.Int:
if f.Int() != 0 {
fmt.Fprintf(out, " %s=%d", name, f.Int())
}
continue
case reflect.Bool:
if f.Bool() {
fmt.Fprintf(out, " %s", name)
}
continue
}
fmt.Fprintf(out, " %s=", name)
writeTree(out, f)
}
fmt.Fprintf(out, ")")
default:
fmt.Fprintf(out, "%T", x.Interface())
}
}
func TestParseErrors(t *testing.T) {
filename := starlarktest.DataFile("syntax", "testdata/errors.star")
for _, chunk := range chunkedfile.Read(filename, t) {
_, err := syntax.Parse(filename, chunk.Source, 0)
switch err := err.(type) {
case nil:
// ok
case syntax.Error:
chunk.GotError(int(err.Pos.Line), err.Msg)
default:
t.Error(err)
}
chunk.Done()
}
}
func TestFilePortion(t *testing.T) {
// Imagine that the Starlark file or expression print(x.f) is extracted
// from the middle of a file in some hypothetical template language;
// see https://github.com/google/starlark-go/issues/346. For example:
// --
// {{loop x seq}}
// {{print(x.f)}}
// {{end}}
// --
fp := syntax.FilePortion{Content: []byte("print(x.f)"), FirstLine: 2, FirstCol: 4}
file, err := syntax.Parse("foo.template", fp, 0)
if err != nil {
t.Fatal(err)
}
span := fmt.Sprint(file.Stmts[0].Span())
want := "foo.template:2:4 foo.template:2:14"
if span != want {
t.Errorf("wrong span: got %q, want %q", span, want)
}
}
// dataFile is the same as starlarktest.DataFile.
// We make a copy to avoid a dependency cycle.
var dataFile = func(pkgdir, filename string) string {
return filepath.Join(build.Default.GOPATH, "src/go.starlark.net", pkgdir, filename)
}
func BenchmarkParse(b *testing.B) {
filename := dataFile("syntax", "testdata/scan.star")
b.StopTimer()
data, err := os.ReadFile(filename)
if err != nil {
b.Fatal(err)
}
b.StartTimer()
for i := 0; i < b.N; i++ {
_, err := syntax.Parse(filename, data, 0)
if err != nil {
b.Fatal(err)
}
}
}
@@ -0,0 +1,309 @@
// Copyright 2017 The Bazel Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package syntax
// Starlark quoted string utilities.
import (
"fmt"
"strconv"
"strings"
"unicode"
"unicode/utf8"
)
// unesc maps single-letter chars following \ to their actual values.
var unesc = [256]byte{
'a': '\a',
'b': '\b',
'f': '\f',
'n': '\n',
'r': '\r',
't': '\t',
'v': '\v',
'\\': '\\',
'\'': '\'',
'"': '"',
}
// esc maps escape-worthy bytes to the char that should follow \.
var esc = [256]byte{
'\a': 'a',
'\b': 'b',
'\f': 'f',
'\n': 'n',
'\r': 'r',
'\t': 't',
'\v': 'v',
'\\': '\\',
'\'': '\'',
'"': '"',
}
// unquote unquotes the quoted string, returning the actual
// string value, whether the original was triple-quoted,
// whether it was a byte string, and an error describing invalid input.
func unquote(quoted string) (s string, triple, isByte bool, err error) {
// Check for raw prefix: means don't interpret the inner \.
raw := false
if strings.HasPrefix(quoted, "r") {
raw = true
quoted = quoted[1:]
}
// Check for bytes prefix.
if strings.HasPrefix(quoted, "b") {
isByte = true
quoted = quoted[1:]
}
if len(quoted) < 2 {
err = fmt.Errorf("string literal too short")
return
}
if quoted[0] != '"' && quoted[0] != '\'' || quoted[0] != quoted[len(quoted)-1] {
err = fmt.Errorf("string literal has invalid quotes")
return
}
// Check for triple quoted string.
quote := quoted[0]
if len(quoted) >= 6 && quoted[1] == quote && quoted[2] == quote && quoted[:3] == quoted[len(quoted)-3:] {
triple = true
quoted = quoted[3 : len(quoted)-3]
} else {
quoted = quoted[1 : len(quoted)-1]
}
// Now quoted is the quoted data, but no quotes.
// If we're in raw mode or there are no escapes or
// carriage returns, we're done.
var unquoteChars string
if raw {
unquoteChars = "\r"
} else {
unquoteChars = "\\\r"
}
if !strings.ContainsAny(quoted, unquoteChars) {
s = quoted
return
}
// Otherwise process quoted string.
// Each iteration processes one escape sequence along with the
// plain text leading up to it.
buf := new(strings.Builder)
for {
// Remove prefix before escape sequence.
i := strings.IndexAny(quoted, unquoteChars)
if i < 0 {
i = len(quoted)
}
buf.WriteString(quoted[:i])
quoted = quoted[i:]
if len(quoted) == 0 {
break
}
// Process carriage return.
if quoted[0] == '\r' {
buf.WriteByte('\n')
if len(quoted) > 1 && quoted[1] == '\n' {
quoted = quoted[2:]
} else {
quoted = quoted[1:]
}
continue
}
// Process escape sequence.
if len(quoted) == 1 {
err = fmt.Errorf(`truncated escape sequence \`)
return
}
switch quoted[1] {
default:
// In Starlark, like Go, a backslash must escape something.
// (Python still treats unnecessary backslashes literally,
// but since 3.6 has emitted a deprecation warning.)
err = fmt.Errorf("invalid escape sequence \\%c", quoted[1])
return
case '\n':
// Ignore the escape and the line break.
quoted = quoted[2:]
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '\'', '"':
// One-char escape.
// Escapes are allowed for both kinds of quotation
// mark, not just the kind in use.
buf.WriteByte(unesc[quoted[1]])
quoted = quoted[2:]
case '0', '1', '2', '3', '4', '5', '6', '7':
// Octal escape, up to 3 digits, \OOO.
n := int(quoted[1] - '0')
quoted = quoted[2:]
for i := 1; i < 3; i++ {
if len(quoted) == 0 || quoted[0] < '0' || '7' < quoted[0] {
break
}
n = n*8 + int(quoted[0]-'0')
quoted = quoted[1:]
}
if !isByte && n > 127 {
err = fmt.Errorf(`non-ASCII octal escape \%o (use \u%04X for the UTF-8 encoding of U+%04X)`, n, n, n)
return
}
if n >= 256 {
// NOTE: Python silently discards the high bit,
// so that '\541' == '\141' == 'a'.
// Let's see if we can avoid doing that in BUILD files.
err = fmt.Errorf(`invalid escape sequence \%03o`, n)
return
}
buf.WriteByte(byte(n))
case 'x':
// Hexadecimal escape, exactly 2 digits, \xXX. [0-127]
if len(quoted) < 4 {
err = fmt.Errorf(`truncated escape sequence %s`, quoted)
return
}
n, err1 := strconv.ParseUint(quoted[2:4], 16, 0)
if err1 != nil {
err = fmt.Errorf(`invalid escape sequence %s`, quoted[:4])
return
}
if !isByte && n > 127 {
err = fmt.Errorf(`non-ASCII hex escape %s (use \u%04X for the UTF-8 encoding of U+%04X)`,
quoted[:4], n, n)
return
}
buf.WriteByte(byte(n))
quoted = quoted[4:]
case 'u', 'U':
// Unicode code point, 4 (\uXXXX) or 8 (\UXXXXXXXX) hex digits.
sz := 6
if quoted[1] == 'U' {
sz = 10
}
if len(quoted) < sz {
err = fmt.Errorf(`truncated escape sequence %s`, quoted)
return
}
n, err1 := strconv.ParseUint(quoted[2:sz], 16, 0)
if err1 != nil {
err = fmt.Errorf(`invalid escape sequence %s`, quoted[:sz])
return
}
if n > unicode.MaxRune {
err = fmt.Errorf(`code point out of range: %s (max \U%08x)`,
quoted[:sz], n)
return
}
// As in Go, surrogates are disallowed.
if 0xD800 <= n && n < 0xE000 {
err = fmt.Errorf(`invalid Unicode code point U+%04X`, n)
return
}
buf.WriteRune(rune(n))
quoted = quoted[sz:]
}
}
s = buf.String()
return
}
// indexByte returns the index of the first instance of b in s, or else -1.
func indexByte(s string, b byte) int {
for i := 0; i < len(s); i++ {
if s[i] == b {
return i
}
}
return -1
}
// Quote returns a Starlark literal that denotes s.
// If b, it returns a bytes literal.
func Quote(s string, b bool) string {
const hex = "0123456789abcdef"
var runeTmp [utf8.UTFMax]byte
buf := make([]byte, 0, 3*len(s)/2)
if b {
buf = append(buf, 'b')
}
buf = append(buf, '"')
for width := 0; len(s) > 0; s = s[width:] {
r := rune(s[0])
width = 1
if r >= utf8.RuneSelf {
r, width = utf8.DecodeRuneInString(s)
}
if width == 1 && r == utf8.RuneError {
// String (!b) literals accept \xXX escapes only for ASCII,
// but we must use them here to represent invalid bytes.
// The result is not a legal literal.
buf = append(buf, `\x`...)
buf = append(buf, hex[s[0]>>4])
buf = append(buf, hex[s[0]&0xF])
continue
}
if r == '"' || r == '\\' { // always backslashed
buf = append(buf, '\\')
buf = append(buf, byte(r))
continue
}
if strconv.IsPrint(r) {
n := utf8.EncodeRune(runeTmp[:], r)
buf = append(buf, runeTmp[:n]...)
continue
}
switch r {
case '\a':
buf = append(buf, `\a`...)
case '\b':
buf = append(buf, `\b`...)
case '\f':
buf = append(buf, `\f`...)
case '\n':
buf = append(buf, `\n`...)
case '\r':
buf = append(buf, `\r`...)
case '\t':
buf = append(buf, `\t`...)
case '\v':
buf = append(buf, `\v`...)
default:
switch {
case r < ' ' || r == 0x7f:
buf = append(buf, `\x`...)
buf = append(buf, hex[byte(r)>>4])
buf = append(buf, hex[byte(r)&0xF])
case r > utf8.MaxRune:
r = 0xFFFD
fallthrough
case r < 0x10000:
buf = append(buf, `\u`...)
for s := 12; s >= 0; s -= 4 {
buf = append(buf, hex[r>>uint(s)&0xF])
}
default:
buf = append(buf, `\U`...)
for s := 28; s >= 0; s -= 4 {
buf = append(buf, hex[r>>uint(s)&0xF])
}
}
}
}
buf = append(buf, '"')
return string(buf)
}
@@ -0,0 +1,65 @@
// Copyright 2017 The Bazel Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package syntax
import (
"strings"
"testing"
)
var quoteTests = []struct {
q string // quoted
s string // unquoted (actual string)
std bool // q is standard form for s
}{
{`""`, "", true},
{`''`, "", false},
{`"hello"`, `hello`, true},
{`'hello'`, `hello`, false},
{`"quote\"here"`, `quote"here`, true},
{`'quote"here'`, `quote"here`, false},
{`"quote'here"`, `quote'here`, true},
{`'quote\'here'`, `quote'here`, false},
{`"\a\b\f\n\r\t\v\x00\x7f"`, "\a\b\f\n\r\t\v\000\x7F", true},
{`"\a\b\f\n\r\t\v\x00\x7f"`, "\a\b\f\n\r\t\v\000\x7F", false},
{`"\a\b\f\n\r\t\v\x00\x7f"`, "\a\b\f\n\r\t\v\000\x7F", false},
{`"\a\b\f\n\r\t\v\x00\x7f\"'\\\x03"`, "\a\b\f\n\r\t\v\x00\x7F\"'\\\x03", true},
{`"\a\b\f\n\r\t\v\x00\x7f\"'\\\x03"`, "\a\b\f\n\r\t\v\x00\x7F\"'\\\x03", false},
{`"\a\b\f\n\r\t\v\x00\x7f\"'\\\x03"`, "\a\b\f\n\r\t\v\x00\x7F\"'\\\x03", false},
{`"\a\b\f\n\r\t\v\x00\x7f\"\\\x03"`, "\a\b\f\n\r\t\v\x00\x7F\"\\\x03", false},
{
`"cat $(SRCS) | grep '\\s*ip_block:' | sed -e 's/\\s*ip_block: \"\\([^ ]*\\)\"/ \x27\\1\x27,/g' >> $@; "`,
"cat $(SRCS) | grep '\\s*ip_block:' | sed -e 's/\\s*ip_block: \"\\([^ ]*\\)\"/ '\\1',/g' >> $@; ",
false,
},
{
`"cat $(SRCS) | grep '\\s*ip_block:' | sed -e 's/\\s*ip_block: \"\\([^ ]*\\)\"/ '\\1',/g' >> $@; "`,
"cat $(SRCS) | grep '\\s*ip_block:' | sed -e 's/\\s*ip_block: \"\\([^ ]*\\)\"/ '\\1',/g' >> $@; ",
true,
},
}
func TestQuote(t *testing.T) {
for _, tt := range quoteTests {
if !tt.std {
continue
}
q := Quote(tt.s, false)
if q != tt.q {
t.Errorf("quote(%#q) = %s, want %s", tt.s, q, tt.q)
}
}
}
func TestUnquote(t *testing.T) {
for _, tt := range quoteTests {
s, triple, _, err := unquote(tt.q)
wantTriple := strings.HasPrefix(tt.q, `"""`) || strings.HasPrefix(tt.q, `'''`)
if s != tt.s || triple != wantTriple || err != nil {
t.Errorf("unquote(%s) = %#q, %v, %v want %#q, %v, nil", tt.q, s, triple, err, tt.s, wantTriple)
}
}
}
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,310 @@
// Copyright 2017 The Bazel Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package syntax
import (
"bytes"
"fmt"
"go/build"
"os"
"path/filepath"
"strings"
"testing"
)
func scan(src interface{}) (tokens string, err error) {
sc, err := newScanner("foo.star", src, false)
if err != nil {
return "", err
}
defer sc.recover(&err)
var buf bytes.Buffer
var val tokenValue
for {
tok := sc.nextToken(&val)
if buf.Len() > 0 {
buf.WriteByte(' ')
}
switch tok {
case EOF:
buf.WriteString("EOF")
case IDENT:
buf.WriteString(val.raw)
case INT:
if val.bigInt != nil {
fmt.Fprintf(&buf, "%d", val.bigInt)
} else {
fmt.Fprintf(&buf, "%d", val.int)
}
case FLOAT:
fmt.Fprintf(&buf, "%e", val.float)
case STRING, BYTES:
buf.WriteString(Quote(val.string, tok == BYTES))
default:
buf.WriteString(tok.String())
}
if tok == EOF {
break
}
}
return buf.String(), nil
}
func TestScanner(t *testing.T) {
for _, test := range []struct {
input, want string
}{
{``, "EOF"},
{`123`, "123 EOF"},
{`x.y`, "x . y EOF"},
{`chocolate.éclair`, `chocolate . éclair EOF`},
{`123 "foo" hello x.y`, `123 "foo" hello x . y EOF`},
{`print(x)`, "print ( x ) EOF"},
{`print(x); print(y)`, "print ( x ) ; print ( y ) EOF"},
{"\nprint(\n1\n)\n", "print ( 1 ) newline EOF"}, // final \n is at toplevel on non-blank line => token
{`/ // /= //= ///=`, "/ // /= //= // /= EOF"},
{`# hello
print(x)`, "print ( x ) EOF"},
{`# hello
print(1)
cc_binary(name="foo")
def f(x):
return x+1
print(1)
`,
`print ( 1 ) newline ` +
`cc_binary ( name = "foo" ) newline ` +
`def f ( x ) : newline ` +
`indent return x + 1 newline ` +
`outdent print ( 1 ) newline ` +
`EOF`},
// EOF should act line an implicit newline.
{`def f(): pass`,
"def f ( ) : pass EOF"},
{`def f():
pass`,
"def f ( ) : newline indent pass newline outdent EOF"},
{`def f():
pass
# oops`,
"def f ( ) : newline indent pass newline outdent EOF"},
{`def f():
pass \
`,
"def f ( ) : newline indent pass newline outdent EOF"},
{`def f():
pass
`,
"def f ( ) : newline indent pass newline outdent EOF"},
{`pass
pass`, "pass newline pass EOF"}, // consecutive newlines are consolidated
{`def f():
pass
`, "def f ( ) : newline indent pass newline outdent EOF"},
{`def f():
pass
` + "\n", "def f ( ) : newline indent pass newline outdent EOF"},
{"pass", "pass EOF"},
{"pass\n", "pass newline EOF"},
{"pass\n ", "pass newline EOF"},
{"pass\n \n", "pass newline EOF"},
{"if x:\n pass\n ", "if x : newline indent pass newline outdent EOF"},
{`x = 1 + \
2`, `x = 1 + 2 EOF`},
{`x = 'a\nb'`, `x = "a\nb" EOF`},
{`x = r'a\nb'`, `x = "a\\nb" EOF`},
{"x = 'a\\\nb'", `x = "ab" EOF`},
{`x = '\''`, `x = "'" EOF`},
{`x = "\""`, `x = "\"" EOF`},
{`x = r'\''`, `x = "\\'" EOF`},
{`x = '''\''''`, `x = "'" EOF`},
{`x = r'''\''''`, `x = "\\'" EOF`},
{`x = ''''a'b'c'''`, `x = "'a'b'c" EOF`},
{"x = '''a\nb'''", `x = "a\nb" EOF`},
{"x = '''a\rb'''", `x = "a\nb" EOF`},
{"x = '''a\r\nb'''", `x = "a\nb" EOF`},
{"x = '''a\n\rb'''", `x = "a\n\nb" EOF`},
{"x = r'a\\\nb'", `x = "a\\\nb" EOF`},
{"x = r'a\\\rb'", `x = "a\\\nb" EOF`},
{"x = r'a\\\r\nb'", `x = "a\\\nb" EOF`},
{"a\rb", `a newline b EOF`},
{"a\nb", `a newline b EOF`},
{"a\r\nb", `a newline b EOF`},
{"a\n\nb", `a newline b EOF`},
// numbers
{"0", `0 EOF`},
{"00", `0 EOF`},
{"0.", `0.000000e+00 EOF`},
{"0.e1", `0.000000e+00 EOF`},
{".0", `0.000000e+00 EOF`},
{"0.0", `0.000000e+00 EOF`},
{".e1", `. e1 EOF`},
{"1", `1 EOF`},
{"1.", `1.000000e+00 EOF`},
{".1", `1.000000e-01 EOF`},
{".1e1", `1.000000e+00 EOF`},
{".1e+1", `1.000000e+00 EOF`},
{".1e-1", `1.000000e-02 EOF`},
{"1e1", `1.000000e+01 EOF`},
{"1e+1", `1.000000e+01 EOF`},
{"1e-1", `1.000000e-01 EOF`},
{"123", `123 EOF`},
{"123e45", `1.230000e+47 EOF`},
{"999999999999999999999999999999999999999999999999999", `999999999999999999999999999999999999999999999999999 EOF`},
{"12345678901234567890", `12345678901234567890 EOF`},
// hex
{"0xA", `10 EOF`},
{"0xAAG", `170 G EOF`},
{"0xG", `foo.star:1:1: invalid hex literal`},
{"0XA", `10 EOF`},
{"0XG", `foo.star:1:1: invalid hex literal`},
{"0xA.", `10 . EOF`},
{"0xA.e1", `10 . e1 EOF`},
{"0x12345678deadbeef12345678", `5634002672576678570168178296 EOF`},
// binary
{"0b1010", `10 EOF`},
{"0B111101", `61 EOF`},
{"0b3", `foo.star:1:3: invalid binary literal`},
{"0b1010201", `10 201 EOF`},
{"0b1010.01", `10 1.000000e-02 EOF`},
{"0b0000", `0 EOF`},
// octal
{"0o123", `83 EOF`},
{"0o12834", `10 834 EOF`},
{"0o12934", `10 934 EOF`},
{"0o12934.", `10 9.340000e+02 EOF`},
{"0o12934.1", `10 9.341000e+02 EOF`},
{"0o12934e1", `10 9.340000e+03 EOF`},
{"0o123.", `83 . EOF`},
{"0o123.1", `83 1.000000e-01 EOF`},
{"0123", `foo.star:1:5: obsolete form of octal literal; use 0o123`},
{"012834", `foo.star:1:1: invalid int literal`},
{"012934", `foo.star:1:1: invalid int literal`},
{"i = 012934", `foo.star:1:5: invalid int literal`},
// octal escapes in string literals
{`"\037"`, `"\x1f" EOF`},
{`"\377"`, `foo.star:1:1: non-ASCII octal escape \377 (use \u00FF for the UTF-8 encoding of U+00FF)`},
{`"\378"`, `"\x1f8" EOF`}, // = '\37' + '8'
{`"\400"`, `foo.star:1:1: non-ASCII octal escape \400`}, // unlike Python 2 and 3
// hex escapes
{`"\x00\x20\x09\x41\x7e\x7f"`, `"\x00 \tA~\x7f" EOF`}, // DEL is non-printable
{`"\x80"`, `foo.star:1:1: non-ASCII hex escape`},
{`"\xff"`, `foo.star:1:1: non-ASCII hex escape`},
{`"\xFf"`, `foo.star:1:1: non-ASCII hex escape`},
{`"\xF"`, `foo.star:1:1: truncated escape sequence \xF`},
{`"\x"`, `foo.star:1:1: truncated escape sequence \x`},
{`"\xfg"`, `foo.star:1:1: invalid escape sequence \xfg`},
// Unicode escapes
// \uXXXX
{`"\u0400"`, `"Ѐ" EOF`},
{`"\u100"`, `foo.star:1:1: truncated escape sequence \u100`},
{`"\u04000"`, `"Ѐ0" EOF`}, // = U+0400 + '0'
{`"\u100g"`, `foo.star:1:1: invalid escape sequence \u100g`},
{`"\u4E16"`, `"世" EOF`},
{`"\udc00"`, `foo.star:1:1: invalid Unicode code point U+DC00`}, // surrogate
// \UXXXXXXXX
{`"\U00000400"`, `"Ѐ" EOF`},
{`"\U0000400"`, `foo.star:1:1: truncated escape sequence \U0000400`},
{`"\U000004000"`, `"Ѐ0" EOF`}, // = U+0400 + '0'
{`"\U1000000g"`, `foo.star:1:1: invalid escape sequence \U1000000g`},
{`"\U0010FFFF"`, `"\U0010ffff" EOF`},
{`"\U00110000"`, `foo.star:1:1: code point out of range: \U00110000 (max \U00110000)`},
{`"\U0001F63F"`, `"😿" EOF`},
{`"\U0000dc00"`, `foo.star:1:1: invalid Unicode code point U+DC00`}, // surrogate
// backslash escapes
// As in Go, a backslash must escape something.
// (Python started issuing a deprecation warning in 3.6.)
{`"foo\(bar"`, `foo.star:1:1: invalid escape sequence \(`},
{`"\+"`, `foo.star:1:1: invalid escape sequence \+`},
{`"\w"`, `foo.star:1:1: invalid escape sequence \w`},
{`"\""`, `"\"" EOF`},
{`"\'"`, `"'" EOF`},
{`'\w'`, `foo.star:1:1: invalid escape sequence \w`},
{`'\''`, `"'" EOF`},
{`'\"'`, `"\"" EOF`},
{`"""\w"""`, `foo.star:1:1: invalid escape sequence \w`},
{`"""\""""`, `"\"" EOF`},
{`"""\'"""`, `"'" EOF`},
{`'''\w'''`, `foo.star:1:1: invalid escape sequence \w`},
{`'''\''''`, `"'" EOF`},
{`'''\"'''`, `"\"" EOF`},
{`r"\w"`, `"\\w" EOF`},
{`r"\""`, `"\\\"" EOF`},
{`r"\'"`, `"\\'" EOF`},
{`r'\w'`, `"\\w" EOF`},
{`r'\''`, `"\\'" EOF`},
{`r'\"'`, `"\\\"" EOF`},
{`'a\zb'`, `foo.star:1:1: invalid escape sequence \z`},
{`"\o123"`, `foo.star:1:1: invalid escape sequence \o`},
// bytes literals (where they differ from text strings)
{`b"AЀ世😿"`, `b"AЀ世😿`}, // 1-4 byte encodings, literal
{`b"\x41\u0400\u4e16\U0001F63F"`, `b"AЀ世😿"`}, // same, as escapes
{`b"\377\378\x80\xff\xFf"`, `b"\xff\x1f8\x80\xff\xff" EOF`}, // hex/oct escapes allow non-ASCII
{`b"\400"`, `foo.star:1:2: invalid escape sequence \400`},
{`b"\udc00"`, `foo.star:1:2: invalid Unicode code point U+DC00`}, // (same as string)
// floats starting with octal digits
{"012934.", `1.293400e+04 EOF`},
{"012934.1", `1.293410e+04 EOF`},
{"012934e1", `1.293400e+05 EOF`},
{"0123.", `1.230000e+02 EOF`},
{"0123.1", `1.231000e+02 EOF`},
// github.com/google/skylark/issues/16
{"x ! 0", "foo.star:1:3: unexpected input character '!'"},
// github.com/google/starlark-go/issues/80
{"([{<>}])", "( [ { < > } ] ) EOF"},
{"f();", "f ( ) ; EOF"},
// github.com/google/starlark-go/issues/104
{"def f():\n if x:\n pass\n ", `def f ( ) : newline indent if x : newline indent pass newline outdent outdent EOF`},
{`while cond: pass`, "while cond : pass EOF"},
// github.com/google/starlark-go/issues/107
{"~= ~= 5", "~ = ~ = 5 EOF"},
{"0in", "0 in EOF"},
{"0or", "foo.star:1:3: invalid octal literal"},
{"6in", "6 in EOF"},
{"6or", "6 or EOF"},
} {
got, err := scan(test.input)
if err != nil {
got = err.(Error).Error()
}
// Prefix match allows us to truncate errors in expectations.
// Success cases all end in EOF.
if !strings.HasPrefix(got, test.want) {
t.Errorf("scan `%s` = [%s], want [%s]", test.input, got, test.want)
}
}
}
// dataFile is the same as starlarktest.DataFile.
// We make a copy to avoid a dependency cycle.
var dataFile = func(pkgdir, filename string) string {
return filepath.Join(build.Default.GOPATH, "src/go.starlark.net", pkgdir, filename)
}
func BenchmarkScan(b *testing.B) {
filename := dataFile("syntax", "testdata/scan.star")
b.StopTimer()
data, err := os.ReadFile(filename)
if err != nil {
b.Fatal(err)
}
b.StartTimer()
for i := 0; i < b.N; i++ {
sc, err := newScanner(filename, data, false)
if err != nil {
b.Fatal(err)
}
var val tokenValue
for sc.nextToken(&val) != EOF {
}
}
}
@@ -0,0 +1,529 @@
// Copyright 2017 The Bazel Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package syntax provides a Starlark parser and abstract syntax tree.
package syntax // import "go.starlark.net/syntax"
// A Node is a node in a Starlark syntax tree.
type Node interface {
// Span returns the start and end position of the expression.
Span() (start, end Position)
// Comments returns the comments associated with this node.
// It returns nil if RetainComments was not specified during parsing,
// or if AllocComments was not called.
Comments() *Comments
// AllocComments allocates a new Comments node if there was none.
// This makes possible to add new comments using Comments() method.
AllocComments()
}
// A Comment represents a single # comment.
type Comment struct {
Start Position
Text string // without trailing newline
}
// Comments collects the comments associated with an expression.
type Comments struct {
Before []Comment // whole-line comments before this expression
Suffix []Comment // end-of-line comments after this expression (up to 1)
// For top-level expressions only, After lists whole-line
// comments following the expression.
After []Comment
}
// A commentsRef is a possibly-nil reference to a set of comments.
// A commentsRef is embedded in each type of syntax node,
// and provides its Comments and AllocComments methods.
type commentsRef struct{ ref *Comments }
// Comments returns the comments associated with a syntax node,
// or nil if AllocComments has not yet been called.
func (cr commentsRef) Comments() *Comments { return cr.ref }
// AllocComments enables comments to be associated with a syntax node.
func (cr *commentsRef) AllocComments() {
if cr.ref == nil {
cr.ref = new(Comments)
}
}
// Start returns the start position of the expression.
func Start(n Node) Position {
start, _ := n.Span()
return start
}
// End returns the end position of the expression.
func End(n Node) Position {
_, end := n.Span()
return end
}
// A File represents a Starlark file.
type File struct {
commentsRef
Path string
Stmts []Stmt
Module interface{} // a *resolve.Module, set by resolver
Options *FileOptions
}
func (x *File) Span() (start, end Position) {
if len(x.Stmts) == 0 {
return
}
start, _ = x.Stmts[0].Span()
_, end = x.Stmts[len(x.Stmts)-1].Span()
return start, end
}
// A Stmt is a Starlark statement.
type Stmt interface {
Node
stmt()
}
func (*AssignStmt) stmt() {}
func (*BranchStmt) stmt() {}
func (*DefStmt) stmt() {}
func (*ExprStmt) stmt() {}
func (*ForStmt) stmt() {}
func (*WhileStmt) stmt() {}
func (*IfStmt) stmt() {}
func (*LoadStmt) stmt() {}
func (*ReturnStmt) stmt() {}
// An AssignStmt represents an assignment:
//
// x = 0
// x, y = y, x
// x += 1
type AssignStmt struct {
commentsRef
OpPos Position
Op Token // = EQ | {PLUS,MINUS,STAR,PERCENT}_EQ
LHS Expr
RHS Expr
}
func (x *AssignStmt) Span() (start, end Position) {
start, _ = x.LHS.Span()
_, end = x.RHS.Span()
return
}
// A DefStmt represents a function definition.
type DefStmt struct {
commentsRef
Def Position
Name *Ident
Lparen Position
Params []Expr // param = ident | ident=expr | * | *ident | **ident
Rparen Position
Body []Stmt
Function interface{} // a *resolve.Function, set by resolver
}
func (x *DefStmt) Span() (start, end Position) {
_, end = x.Body[len(x.Body)-1].Span()
return x.Def, end
}
// An ExprStmt is an expression evaluated for side effects.
type ExprStmt struct {
commentsRef
X Expr
}
func (x *ExprStmt) Span() (start, end Position) {
return x.X.Span()
}
// An IfStmt is a conditional: If Cond: True; else: False.
// 'elseif' is desugared into a chain of IfStmts.
type IfStmt struct {
commentsRef
If Position // IF or ELIF
Cond Expr
True []Stmt
ElsePos Position // ELSE or ELIF
False []Stmt // optional
}
func (x *IfStmt) Span() (start, end Position) {
body := x.False
if body == nil {
body = x.True
}
_, end = body[len(body)-1].Span()
return x.If, end
}
// A LoadStmt loads another module and binds names from it:
// load(Module, "x", y="foo").
//
// The AST is slightly unfaithful to the concrete syntax here because
// Starlark's load statement, so that it can be implemented in Python,
// binds some names (like y above) with an identifier and some (like x)
// without. For consistency we create fake identifiers for all the
// strings.
type LoadStmt struct {
commentsRef
Load Position
Module *Literal // a string
From []*Ident // name defined in loading module
To []*Ident // name in loaded module
Rparen Position
}
func (x *LoadStmt) Span() (start, end Position) {
return x.Load, x.Rparen
}
// ModuleName returns the name of the module loaded by this statement.
func (x *LoadStmt) ModuleName() string { return x.Module.Value.(string) }
// A BranchStmt changes the flow of control: break, continue, pass.
type BranchStmt struct {
commentsRef
Token Token // = BREAK | CONTINUE | PASS
TokenPos Position
}
func (x *BranchStmt) Span() (start, end Position) {
return x.TokenPos, x.TokenPos.add(x.Token.String())
}
// A ReturnStmt returns from a function.
type ReturnStmt struct {
commentsRef
Return Position
Result Expr // may be nil
}
func (x *ReturnStmt) Span() (start, end Position) {
if x.Result == nil {
return x.Return, x.Return.add("return")
}
_, end = x.Result.Span()
return x.Return, end
}
// An Expr is a Starlark expression.
type Expr interface {
Node
expr()
}
func (*BinaryExpr) expr() {}
func (*CallExpr) expr() {}
func (*Comprehension) expr() {}
func (*CondExpr) expr() {}
func (*DictEntry) expr() {}
func (*DictExpr) expr() {}
func (*DotExpr) expr() {}
func (*Ident) expr() {}
func (*IndexExpr) expr() {}
func (*LambdaExpr) expr() {}
func (*ListExpr) expr() {}
func (*Literal) expr() {}
func (*ParenExpr) expr() {}
func (*SliceExpr) expr() {}
func (*TupleExpr) expr() {}
func (*UnaryExpr) expr() {}
// An Ident represents an identifier.
type Ident struct {
commentsRef
NamePos Position
Name string
Binding interface{} // a *resolver.Binding, set by resolver
}
func (x *Ident) Span() (start, end Position) {
return x.NamePos, x.NamePos.add(x.Name)
}
// A Literal represents a literal string or number.
type Literal struct {
commentsRef
Token Token // = STRING | BYTES | INT | FLOAT
TokenPos Position
Raw string // uninterpreted text
Value interface{} // = string | int64 | *big.Int | float64
}
func (x *Literal) Span() (start, end Position) {
return x.TokenPos, x.TokenPos.add(x.Raw)
}
// A ParenExpr represents a parenthesized expression: (X).
type ParenExpr struct {
commentsRef
Lparen Position
X Expr
Rparen Position
}
func (x *ParenExpr) Span() (start, end Position) {
return x.Lparen, x.Rparen.add(")")
}
// A CallExpr represents a function call expression: Fn(Args).
type CallExpr struct {
commentsRef
Fn Expr
Lparen Position
Args []Expr // arg = expr | ident=expr | *expr | **expr
Rparen Position
}
func (x *CallExpr) Span() (start, end Position) {
start, _ = x.Fn.Span()
return start, x.Rparen.add(")")
}
// A DotExpr represents a field or method selector: X.Name.
type DotExpr struct {
commentsRef
X Expr
Dot Position
NamePos Position
Name *Ident
}
func (x *DotExpr) Span() (start, end Position) {
start, _ = x.X.Span()
_, end = x.Name.Span()
return
}
// A Comprehension represents a list or dict comprehension:
// [Body for ... if ...] or {Body for ... if ...}
type Comprehension struct {
commentsRef
Curly bool // {x:y for ...} or {x for ...}, not [x for ...]
Lbrack Position
Body Expr
Clauses []Node // = *ForClause | *IfClause
Rbrack Position
}
func (x *Comprehension) Span() (start, end Position) {
return x.Lbrack, x.Rbrack.add("]")
}
// A ForStmt represents a loop: for Vars in X: Body.
type ForStmt struct {
commentsRef
For Position
Vars Expr // name, or tuple of names
X Expr
Body []Stmt
}
func (x *ForStmt) Span() (start, end Position) {
_, end = x.Body[len(x.Body)-1].Span()
return x.For, end
}
// A WhileStmt represents a while loop: while X: Body.
type WhileStmt struct {
commentsRef
While Position
Cond Expr
Body []Stmt
}
func (x *WhileStmt) Span() (start, end Position) {
_, end = x.Body[len(x.Body)-1].Span()
return x.While, end
}
// A ForClause represents a for clause in a list comprehension: for Vars in X.
type ForClause struct {
commentsRef
For Position
Vars Expr // name, or tuple of names
In Position
X Expr
}
func (x *ForClause) Span() (start, end Position) {
_, end = x.X.Span()
return x.For, end
}
// An IfClause represents an if clause in a list comprehension: if Cond.
type IfClause struct {
commentsRef
If Position
Cond Expr
}
func (x *IfClause) Span() (start, end Position) {
_, end = x.Cond.Span()
return x.If, end
}
// A DictExpr represents a dictionary literal: { List }.
type DictExpr struct {
commentsRef
Lbrace Position
List []Expr // all *DictEntrys
Rbrace Position
}
func (x *DictExpr) Span() (start, end Position) {
return x.Lbrace, x.Rbrace.add("}")
}
// A DictEntry represents a dictionary entry: Key: Value.
// Used only within a DictExpr.
type DictEntry struct {
commentsRef
Key Expr
Colon Position
Value Expr
}
func (x *DictEntry) Span() (start, end Position) {
start, _ = x.Key.Span()
_, end = x.Value.Span()
return start, end
}
// A LambdaExpr represents an inline function abstraction.
type LambdaExpr struct {
commentsRef
Lambda Position
Params []Expr // param = ident | ident=expr | * | *ident | **ident
Body Expr
Function interface{} // a *resolve.Function, set by resolver
}
func (x *LambdaExpr) Span() (start, end Position) {
_, end = x.Body.Span()
return x.Lambda, end
}
// A ListExpr represents a list literal: [ List ].
type ListExpr struct {
commentsRef
Lbrack Position
List []Expr
Rbrack Position
}
func (x *ListExpr) Span() (start, end Position) {
return x.Lbrack, x.Rbrack.add("]")
}
// CondExpr represents the conditional: X if COND else ELSE.
type CondExpr struct {
commentsRef
If Position
Cond Expr
True Expr
ElsePos Position
False Expr
}
func (x *CondExpr) Span() (start, end Position) {
start, _ = x.True.Span()
_, end = x.False.Span()
return start, end
}
// A TupleExpr represents a tuple literal: (List).
type TupleExpr struct {
commentsRef
Lparen Position // optional (e.g. in x, y = 0, 1), but required if List is empty
List []Expr
Rparen Position
}
func (x *TupleExpr) Span() (start, end Position) {
if x.Lparen.IsValid() {
return x.Lparen, x.Rparen
} else {
return Start(x.List[0]), End(x.List[len(x.List)-1])
}
}
// A UnaryExpr represents a unary expression: Op X.
//
// As a special case, UnaryOp{Op:Star} may also represent
// the star parameter in def f(*args) or def f(*, x).
type UnaryExpr struct {
commentsRef
OpPos Position
Op Token
X Expr // may be nil if Op==STAR
}
func (x *UnaryExpr) Span() (start, end Position) {
if x.X != nil {
_, end = x.X.Span()
} else {
end = x.OpPos.add("*")
}
return x.OpPos, end
}
// A BinaryExpr represents a binary expression: X Op Y.
//
// As a special case, BinaryExpr{Op:EQ} may also
// represent a named argument in a call f(k=v)
// or a named parameter in a function declaration
// def f(param=default).
type BinaryExpr struct {
commentsRef
X Expr
OpPos Position
Op Token
Y Expr
}
func (x *BinaryExpr) Span() (start, end Position) {
start, _ = x.X.Span()
_, end = x.Y.Span()
return start, end
}
// A SliceExpr represents a slice or substring expression: X[Lo:Hi:Step].
type SliceExpr struct {
commentsRef
X Expr
Lbrack Position
Lo, Hi, Step Expr // all optional
Rbrack Position
}
func (x *SliceExpr) Span() (start, end Position) {
start, _ = x.X.Span()
return start, x.Rbrack
}
// An IndexExpr represents an index expression: X[Y].
type IndexExpr struct {
commentsRef
X Expr
Lbrack Position
Y Expr
Rbrack Position
}
func (x *IndexExpr) Span() (start, end Position) {
start, _ = x.X.Span()
return start, x.Rbrack
}
@@ -0,0 +1,212 @@
# Tests of parse errors.
# This is a "chunked" file; each "---" line demarcates a new parser input.
#
# TODO(adonovan): lots more tests.
x = 1 +
2 ### "got newline, want primary expression"
---
_ = *x ### `got '\*', want primary`
---
# trailing comma is ok
def f(a, ): pass
def f(*args, ): pass
def f(**kwargs, ): pass
---
# Parameters are validated later.
def f(**kwargs, *args, *, b=1, a, **kwargs, *args, *, b=1, a):
pass
---
def f(a, *-b, c): # ### `got '-', want ','`
pass
---
def f(**kwargs, *args, b=1, a, **kwargs, *args, b=1, a):
pass
---
def pass(): ### "not an identifier"
pass
---
def f : ### `got ':', want '\('`
---
# trailing comma is ok
f(a, )
f(*args, )
f(**kwargs, )
---
f(a=1, *, b=2) ### `got ',', want primary`
---
_ = {x:y for y in z} # ok
_ = {x for y in z} ### `got for, want ':'`
---
def f():
pass
pass ### `unindent does not match any outer indentation level`
---
def f(): pass
---
# Blank line after pass => outdent.
def f():
pass
---
# No blank line after pass; EOF acts like a newline.
def f():
pass
---
# This is a well known parsing ambiguity in Python.
# Python 2.7 accepts it but Python3 and Starlark reject it.
_ = [x for x in lambda: True, lambda: False if x()] ### "got lambda, want primary"
_ = [x for x in (lambda: True, lambda: False) if x()] # ok in all dialects
---
# Starlark, following Python 3, allows an unparenthesized
# tuple after 'in' only in a for statement but not in a comprehension.
# (Python 2.7 allows both.)
for x in 1, 2, 3:
print(x)
_ = [x for x in 1, 2, 3] ### `got ',', want ']', for, or if`
---
# Unparenthesized tuple is not allowed as operand of 'if' in comprehension.
_ = [a for b in c if 1, 2] ### `got ',', want ']', for, or if`
---
# Lambda is ok though.
_ = [a for b in c if lambda: d] # ok
# But the body of such a lambda may not be a conditional:
_ = [a for b in c if (lambda: d if e else f)] # ok
_ = [a for b in c if lambda: d if e else f] ### "got else, want ']'"
---
# A lambda is not allowed as the operand of a 'for' clause.
_ = [a for b in lambda: c] ### `got lambda, want primary`
---
# Comparison operations are not associative.
_ = (0 == 1) == 2 # ok
_ = 0 == (1 == 2) # ok
_ = 0 == 1 == 2 ### "== does not associate with =="
---
_ = (0 <= i) < n # ok
_ = 0 <= (i < n) # ok
_ = 0 <= i < n ### "<= does not associate with <"
---
_ = (a in b) not in c # ok
_ = a in (b not in c) # ok
_ = a in b not in c ### "in does not associate with not in"
---
# shift/reduce ambiguity is reduced
_ = [x for x in a if b else c] ### `got else, want ']', for, or if`
---
[a for b in c else d] ### `got else, want ']', for, or if`
---
_ = a + b not c ### "got identifier, want in"
---
f(1+2 = 3) ### "keyword argument must have form name=expr"
---
print(1, 2, 3
### `got end of file, want '\)'`
---
_ = a if b ### "conditional expression without else clause"
---
load("") ### "load statement must import at least 1 symbol"
---
load("", 1) ### `load operand must be "name" or localname="name" \(got int literal\)`
---
load("a", "x") # ok
---
load(1, 2) ### "first operand of load statement must be a string literal"
---
load("a", x) ### `load operand must be "x" or x="originalname"`
---
load("a", x2=x) ### `original name of loaded symbol must be quoted: x2="originalname"`
---
# All of these parse.
load("a", "x")
load("a", "x", y2="y")
load("a", x2="x", "y") # => positional-before-named arg check happens later (!)
---
# 'load' is not an identifier
load = 1 ### `got '=', want '\('`
---
# 'load' is not an identifier
f(load()) ### `got load, want primary`
---
# 'load' is not an identifier
def load(): ### `not an identifier`
pass
---
# 'load' is not an identifier
def f(load): ### `not an identifier`
pass
---
# A load statement allows a trailing comma.
load("module", "x",)
---
x = 1 +
2 ### "got newline, want primary expression"
---
def f():
pass
# this used to cause a spurious indentation error
---
print 1 2 ### `got int literal, want newline`
---
# newlines are not allowed in raw string literals
raw = r'a ### `unexpected newline in string`
b'
---
# The parser permits an unparenthesized tuple expression for the first index.
x[1, 2:] # ok
---
# But not if it has a trailing comma.
x[1, 2,:] ### `got ':', want primary`
---
# Trailing tuple commas are permitted only within parens; see b/28867036.
(a, b,) = 1, 2 # ok
c, d = 1, 2 # ok
---
a, b, = 1, 2 ### `unparenthesized tuple with trailing comma`
---
a, b = 1, 2, ### `unparenthesized tuple with trailing comma`
---
# See github.com/google/starlark-go/issues/48
a = max(range(10))) ### `unexpected '\)'`
---
# github.com/google/starlark-go/issues/85
s = "\x-0" ### `invalid escape sequence`
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,161 @@
// Copyright 2017 The Bazel Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package syntax
// Walk traverses a syntax tree in depth-first order.
// It starts by calling f(n); n must not be nil.
// If f returns true, Walk calls itself
// recursively for each non-nil child of n.
// Walk then calls f(nil).
func Walk(n Node, f func(Node) bool) {
if n == nil {
panic("nil")
}
if !f(n) {
return
}
// TODO(adonovan): opt: order cases using profile data.
switch n := n.(type) {
case *File:
walkStmts(n.Stmts, f)
case *ExprStmt:
Walk(n.X, f)
case *BranchStmt:
// no-op
case *IfStmt:
Walk(n.Cond, f)
walkStmts(n.True, f)
walkStmts(n.False, f)
case *AssignStmt:
Walk(n.LHS, f)
Walk(n.RHS, f)
case *DefStmt:
Walk(n.Name, f)
for _, param := range n.Params {
Walk(param, f)
}
walkStmts(n.Body, f)
case *ForStmt:
Walk(n.Vars, f)
Walk(n.X, f)
walkStmts(n.Body, f)
case *ReturnStmt:
if n.Result != nil {
Walk(n.Result, f)
}
case *LoadStmt:
Walk(n.Module, f)
for _, from := range n.From {
Walk(from, f)
}
for _, to := range n.To {
Walk(to, f)
}
case *Ident, *Literal:
// no-op
case *ListExpr:
for _, x := range n.List {
Walk(x, f)
}
case *ParenExpr:
Walk(n.X, f)
case *CondExpr:
Walk(n.Cond, f)
Walk(n.True, f)
Walk(n.False, f)
case *IndexExpr:
Walk(n.X, f)
Walk(n.Y, f)
case *DictEntry:
Walk(n.Key, f)
Walk(n.Value, f)
case *SliceExpr:
Walk(n.X, f)
if n.Lo != nil {
Walk(n.Lo, f)
}
if n.Hi != nil {
Walk(n.Hi, f)
}
if n.Step != nil {
Walk(n.Step, f)
}
case *Comprehension:
Walk(n.Body, f)
for _, clause := range n.Clauses {
Walk(clause, f)
}
case *IfClause:
Walk(n.Cond, f)
case *ForClause:
Walk(n.Vars, f)
Walk(n.X, f)
case *TupleExpr:
for _, x := range n.List {
Walk(x, f)
}
case *DictExpr:
for _, entry := range n.List {
Walk(entry, f)
}
case *UnaryExpr:
if n.X != nil {
Walk(n.X, f)
}
case *BinaryExpr:
Walk(n.X, f)
Walk(n.Y, f)
case *DotExpr:
Walk(n.X, f)
Walk(n.Name, f)
case *CallExpr:
Walk(n.Fn, f)
for _, arg := range n.Args {
Walk(arg, f)
}
case *LambdaExpr:
for _, param := range n.Params {
Walk(param, f)
}
Walk(n.Body, f)
default:
panic(n)
}
f(nil)
}
func walkStmts(stmts []Stmt, f func(Node) bool) {
for _, stmt := range stmts {
Walk(stmt, f)
}
}
@@ -0,0 +1,103 @@
package syntax_test
import (
"bytes"
"fmt"
"log"
"reflect"
"strings"
"testing"
"go.starlark.net/syntax"
)
func TestWalk(t *testing.T) {
const src = `
for x in y:
if x:
pass
else:
f([2*x for x in "abc"])
`
// TODO(adonovan): test that it finds all syntax.Nodes
// (compare against a reflect-based implementation).
// TODO(adonovan): test that the result of f is used to prune
// the descent.
f, err := syntax.Parse("hello.go", src, 0)
if err != nil {
t.Fatal(err)
}
var buf bytes.Buffer
var depth int
syntax.Walk(f, func(n syntax.Node) bool {
if n == nil {
depth--
return true
}
fmt.Fprintf(&buf, "%s%s\n",
strings.Repeat(" ", depth),
strings.TrimPrefix(reflect.TypeOf(n).String(), "*syntax."))
depth++
return true
})
got := buf.String()
want := `
File
ForStmt
Ident
Ident
IfStmt
Ident
BranchStmt
ExprStmt
CallExpr
Ident
Comprehension
BinaryExpr
Literal
Ident
ForClause
Ident
Literal`
got = strings.TrimSpace(got)
want = strings.TrimSpace(want)
if got != want {
t.Errorf("got %s, want %s", got, want)
}
}
// ExampleWalk demonstrates the use of Walk to
// enumerate the identifiers in a Starlark source file
// containing a nonsense program with varied grammar.
func ExampleWalk() {
const src = `
load("library", "a")
def b(c, *, d=e):
f += {g: h}
i = -(j)
return k.l[m + n]
for o in [p for q, r in s if t]:
u(lambda: v, w[x:y:z])
`
f, err := syntax.Parse("hello.star", src, 0)
if err != nil {
log.Fatal(err)
}
var idents []string
syntax.Walk(f, func(n syntax.Node) bool {
if id, ok := n.(*syntax.Ident); ok {
idents = append(idents, id.Name)
}
return true
})
fmt.Println(strings.Join(idents, " "))
// The identifier 'a' appears in both LoadStmt.From[0] and LoadStmt.To[0].
// Output:
// a a b c d e f g h i j k l m n o p q r s t u v w x y z
}