whatcanGOwrong

2024-09-19 21:38:24 -04:00
commit d0ae4d841d
17908 changed files with 4096831 additions and 0 deletions
@@ -0,0 +1,129 @@
+
+Grammar of Starlark
+==================
+
+File = {Statement | newline} eof .
+
+Statement = DefStmt | IfStmt | ForStmt | WhileStmt | SimpleStmt .
+
+DefStmt = 'def' identifier '(' [Parameters [',']] ')' ':' Suite .
+
+Parameters = Parameter {',' Parameter}.
+
+Parameter = identifier | identifier '=' Test | '*' | '*' identifier | '**' identifier .
+
+IfStmt = 'if' Test ':' Suite {'elif' Test ':' Suite} ['else' ':' Suite] .
+
+ForStmt = 'for' LoopVariables 'in' Expression ':' Suite .
+
+WhileStmt = 'while' Test ':' Suite .
+
+Suite = [newline indent {Statement} outdent] | SimpleStmt .
+
+SimpleStmt = SmallStmt {';' SmallStmt} [';'] '\n' .
+# NOTE: '\n' optional at EOF
+
+SmallStmt = ReturnStmt
+          | BreakStmt | ContinueStmt | PassStmt
+          | AssignStmt
+          | ExprStmt
+          | LoadStmt
+          .
+
+ReturnStmt   = 'return' [Expression] .
+BreakStmt    = 'break' .
+ContinueStmt = 'continue' .
+PassStmt     = 'pass' .
+AssignStmt   = Expression ('=' | '+=' | '-=' | '*=' | '/=' | '//=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') Expression .
+ExprStmt     = Expression .
+
+LoadStmt = 'load' '(' string {',' [identifier '='] string} [','] ')' .
+
+Test = LambdaExpr
+     | IfExpr
+     | PrimaryExpr
+     | UnaryExpr
+     | BinaryExpr
+     .
+
+LambdaExpr = 'lambda' [Parameters] ':' Test .
+
+IfExpr = Test 'if' Test 'else' Test .
+
+PrimaryExpr = Operand
+            | PrimaryExpr DotSuffix
+            | PrimaryExpr CallSuffix
+            | PrimaryExpr SliceSuffix
+            .
+
+Operand = identifier
+        | int | float | string
+        | ListExpr | ListComp
+        | DictExpr | DictComp
+        | '(' [Expression [',']] ')'
+        | ('-' | '+') PrimaryExpr
+        .
+
+DotSuffix   = '.' identifier .
+CallSuffix  = '(' [Arguments [',']] ')' .
+SliceSuffix = '[' [Expression] [':' Test [':' Test]] ']' .
+
+Arguments = Argument {',' Argument} .
+Argument  = Test | identifier '=' Test | '*' Test | '**' Test .
+
+ListExpr = '[' [Expression [',']] ']' .
+ListComp = '[' Test {CompClause} ']'.
+
+DictExpr = '{' [Entries [',']] '}' .
+DictComp = '{' Entry {CompClause} '}' .
+Entries  = Entry {',' Entry} .
+Entry    = Test ':' Test .
+
+CompClause = 'for' LoopVariables 'in' Test | 'if' Test .
+
+UnaryExpr = 'not' Test .
+
+BinaryExpr = Test {Binop Test} .
+
+Binop = 'or'
+      | 'and'
+      | '==' | '!=' | '<' | '>' | '<=' | '>=' | 'in' | 'not' 'in'
+      | '|'
+      | '^'
+      | '&'
+      | '-' | '+'
+      | '*' | '%' | '/' | '//'
+      .
+
+Expression = Test {',' Test} .
+# NOTE: trailing comma permitted only when within [...] or (...).
+
+LoopVariables = PrimaryExpr {',' PrimaryExpr} .
+
+
+# Notation (similar to Go spec):
+- lowercase and 'quoted' items are lexical tokens.
+- Capitalized names denote grammar productions.
+- (...) implies grouping
+- x | y means either x or y.
+- [x] means x is optional
+- {x} means x is repeated zero or more times
+- The end of each declaration is marked with a period.
+
+# Tokens
+- spaces: newline, eof, indent, outdent.
+- identifier.
+- literals: string, int, float.
+- plus all quoted tokens such as '+=', 'return'.
+
+# Notes:
+- Ambiguity is resolved using operator precedence.
+- The grammar does not enforce the legal order of params and args,
+  nor that the first compclause must be a 'for'.
+
+TODO:
+- explain how the lexer generates indent, outdent, and newline tokens.
+- why is unary NOT separated from unary - and +?
+- the grammar is (mostly) in LL(1) style so, for example,
+  dot expressions are formed suffixes, not complete expressions,
+  which makes the spec harder to read.  Reorganize into non-LL(1) form?
@@ -0,0 +1,63 @@
+// Copyright 2023 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+import _ "unsafe" // for linkname
+
+// FileOptions specifies various per-file options that affect static
+// aspects of an individual file such as parsing, name resolution, and
+// code generation. (Options that affect global dynamics are typically
+// controlled through [starlark.Thread].)
+//
+// The zero value of FileOptions is the default behavior.
+//
+// Many functions in this package come in two versions: the legacy
+// standalone function (such as [Parse]) uses [LegacyFileOptions],
+// whereas the more recent method (such as [Options.Parse]) honors the
+// provided options. The second form is preferred. In other packages,
+// the modern version is a standalone function with a leading
+// FileOptions parameter and the name suffix "Options", such as
+// [starlark.ExecFileOptions].
+type FileOptions struct {
+	// resolver
+	Set               bool // allow references to the 'set' built-in function
+	While             bool // allow 'while' statements
+	TopLevelControl   bool // allow if/for/while statements at top-level
+	GlobalReassign    bool // allow reassignment to top-level names
+	LoadBindsGlobally bool // load creates global not file-local bindings (deprecated)
+
+	// compiler
+	Recursion bool // disable recursion check for functions in this file
+}
+
+// TODO(adonovan): provide a canonical flag parser for FileOptions.
+// (And use it in the testdata "options:" strings.)
+
+// LegacyFileOptions returns a new FileOptions containing the current
+// values of the resolver package's legacy global variables such as
+// [resolve.AllowRecursion], etc.
+// These variables may be associated with command-line flags.
+func LegacyFileOptions() *FileOptions {
+	return &FileOptions{
+		Set:               resolverAllowSet,
+		While:             resolverAllowGlobalReassign,
+		TopLevelControl:   resolverAllowGlobalReassign,
+		GlobalReassign:    resolverAllowGlobalReassign,
+		Recursion:         resolverAllowRecursion,
+		LoadBindsGlobally: resolverLoadBindsGlobally,
+	}
+}
+
+// Access resolver (legacy) flags, if they are linked in; false otherwise.
+var (
+	//go:linkname resolverAllowSet go.starlark.net/resolve.AllowSet
+	resolverAllowSet bool
+	//go:linkname resolverAllowGlobalReassign go.starlark.net/resolve.AllowGlobalReassign
+	resolverAllowGlobalReassign bool
+	//go:linkname resolverAllowRecursion go.starlark.net/resolve.AllowRecursion
+	resolverAllowRecursion bool
+	//go:linkname resolverLoadBindsGlobally go.starlark.net/resolve.LoadBindsGlobally
+	resolverLoadBindsGlobally bool
+)
@@ -0,0 +1,487 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax_test
+
+import (
+	"bufio"
+	"bytes"
+	"fmt"
+	"go/build"
+	"os"
+	"path/filepath"
+	"reflect"
+	"strings"
+	"testing"
+
+	"go.starlark.net/internal/chunkedfile"
+	"go.starlark.net/starlarktest"
+	"go.starlark.net/syntax"
+)
+
+func TestExprParseTrees(t *testing.T) {
+	for _, test := range []struct {
+		input, want string
+	}{
+		{`print(1)`,
+			`(CallExpr Fn=print Args=(1))`},
+		{"print(1)\n",
+			`(CallExpr Fn=print Args=(1))`},
+		{`x + 1`,
+			`(BinaryExpr X=x Op=+ Y=1)`},
+		{`[x for x in y]`,
+			`(Comprehension Body=x Clauses=((ForClause Vars=x X=y)))`},
+		{`[x for x in (a if b else c)]`,
+			`(Comprehension Body=x Clauses=((ForClause Vars=x X=(ParenExpr X=(CondExpr Cond=b True=a False=c)))))`},
+		{`x[i].f(42)`,
+			`(CallExpr Fn=(DotExpr X=(IndexExpr X=x Y=i) Name=f) Args=(42))`},
+		{`x.f()`,
+			`(CallExpr Fn=(DotExpr X=x Name=f))`},
+		{`x+y*z`,
+			`(BinaryExpr X=x Op=+ Y=(BinaryExpr X=y Op=* Y=z))`},
+		{`x%y-z`,
+			`(BinaryExpr X=(BinaryExpr X=x Op=% Y=y) Op=- Y=z)`},
+		{`a + b not in c`,
+			`(BinaryExpr X=(BinaryExpr X=a Op=+ Y=b) Op=not in Y=c)`},
+		{`lambda x, *args, **kwargs: None`,
+			`(LambdaExpr Params=(x (UnaryExpr Op=* X=args) (UnaryExpr Op=** X=kwargs)) Body=None)`},
+		{`{"one": 1}`,
+			`(DictExpr List=((DictEntry Key="one" Value=1)))`},
+		{`a[i]`,
+			`(IndexExpr X=a Y=i)`},
+		{`a[i:]`,
+			`(SliceExpr X=a Lo=i)`},
+		{`a[:j]`,
+			`(SliceExpr X=a Hi=j)`},
+		{`a[::]`,
+			`(SliceExpr X=a)`},
+		{`a[::k]`,
+			`(SliceExpr X=a Step=k)`},
+		{`[]`,
+			`(ListExpr)`},
+		{`[1]`,
+			`(ListExpr List=(1))`},
+		{`[1,]`,
+			`(ListExpr List=(1))`},
+		{`[1, 2]`,
+			`(ListExpr List=(1 2))`},
+		{`()`,
+			`(TupleExpr)`},
+		{`(4,)`,
+			`(ParenExpr X=(TupleExpr List=(4)))`},
+		{`(4)`,
+			`(ParenExpr X=4)`},
+		{`(4, 5)`,
+			`(ParenExpr X=(TupleExpr List=(4 5)))`},
+		{`1, 2, 3`,
+			`(TupleExpr List=(1 2 3))`},
+		{`1, 2,`,
+			`unparenthesized tuple with trailing comma`},
+		{`{}`,
+			`(DictExpr)`},
+		{`{"a": 1}`,
+			`(DictExpr List=((DictEntry Key="a" Value=1)))`},
+		{`{"a": 1,}`,
+			`(DictExpr List=((DictEntry Key="a" Value=1)))`},
+		{`{"a": 1, "b": 2}`,
+			`(DictExpr List=((DictEntry Key="a" Value=1) (DictEntry Key="b" Value=2)))`},
+		{`{x: y for (x, y) in z}`,
+			`(Comprehension Curly Body=(DictEntry Key=x Value=y) Clauses=((ForClause Vars=(ParenExpr X=(TupleExpr List=(x y))) X=z)))`},
+		{`{x: y for a in b if c}`,
+			`(Comprehension Curly Body=(DictEntry Key=x Value=y) Clauses=((ForClause Vars=a X=b) (IfClause Cond=c)))`},
+		{`-1 + +2`,
+			`(BinaryExpr X=(UnaryExpr Op=- X=1) Op=+ Y=(UnaryExpr Op=+ X=2))`},
+		{`"foo" + "bar"`,
+			`(BinaryExpr X="foo" Op=+ Y="bar")`},
+		{`-1 * 2`, // prec(unary -) > prec(binary *)
+			`(BinaryExpr X=(UnaryExpr Op=- X=1) Op=* Y=2)`},
+		{`-x[i]`, // prec(unary -) < prec(x[i])
+			`(UnaryExpr Op=- X=(IndexExpr X=x Y=i))`},
+		{`a | b & c | d`, // prec(|) < prec(&)
+			`(BinaryExpr X=(BinaryExpr X=a Op=| Y=(BinaryExpr X=b Op=& Y=c)) Op=| Y=d)`},
+		{`a or b and c or d`,
+			`(BinaryExpr X=(BinaryExpr X=a Op=or Y=(BinaryExpr X=b Op=and Y=c)) Op=or Y=d)`},
+		{`a and b or c and d`,
+			`(BinaryExpr X=(BinaryExpr X=a Op=and Y=b) Op=or Y=(BinaryExpr X=c Op=and Y=d))`},
+		{`f(1, x=y)`,
+			`(CallExpr Fn=f Args=(1 (BinaryExpr X=x Op== Y=y)))`},
+		{`f(*args, **kwargs)`,
+			`(CallExpr Fn=f Args=((UnaryExpr Op=* X=args) (UnaryExpr Op=** X=kwargs)))`},
+		{`lambda *args, *, x=1, **kwargs: 0`,
+			`(LambdaExpr Params=((UnaryExpr Op=* X=args) (UnaryExpr Op=*) (BinaryExpr X=x Op== Y=1) (UnaryExpr Op=** X=kwargs)) Body=0)`},
+		{`lambda *, a, *b: 0`,
+			`(LambdaExpr Params=((UnaryExpr Op=*) a (UnaryExpr Op=* X=b)) Body=0)`},
+		{`a if b else c`,
+			`(CondExpr Cond=b True=a False=c)`},
+		{`a and not b`,
+			`(BinaryExpr X=a Op=and Y=(UnaryExpr Op=not X=b))`},
+		{`[e for x in y if cond1 if cond2]`,
+			`(Comprehension Body=e Clauses=((ForClause Vars=x X=y) (IfClause Cond=cond1) (IfClause Cond=cond2)))`}, // github.com/google/skylark/issues/53
+	} {
+		e, err := syntax.ParseExpr("foo.star", test.input, 0)
+		var got string
+		if err != nil {
+			got = stripPos(err)
+		} else {
+			got = treeString(e)
+		}
+		if test.want != got {
+			t.Errorf("parse `%s` = %s, want %s", test.input, got, test.want)
+		}
+	}
+}
+
+func TestStmtParseTrees(t *testing.T) {
+	for _, test := range []struct {
+		input, want string
+	}{
+		{`print(1)`,
+			`(ExprStmt X=(CallExpr Fn=print Args=(1)))`},
+		{`return 1, 2`,
+			`(ReturnStmt Result=(TupleExpr List=(1 2)))`},
+		{`return`,
+			`(ReturnStmt)`},
+		{`for i in "abc": break`,
+			`(ForStmt Vars=i X="abc" Body=((BranchStmt Token=break)))`},
+		{`for i in "abc": continue`,
+			`(ForStmt Vars=i X="abc" Body=((BranchStmt Token=continue)))`},
+		{`for x, y in z: pass`,
+			`(ForStmt Vars=(TupleExpr List=(x y)) X=z Body=((BranchStmt Token=pass)))`},
+		{`if True: pass`,
+			`(IfStmt Cond=True True=((BranchStmt Token=pass)))`},
+		{`if True: break`,
+			`(IfStmt Cond=True True=((BranchStmt Token=break)))`},
+		{`if True: continue`,
+			`(IfStmt Cond=True True=((BranchStmt Token=continue)))`},
+		{`if True: pass
+else:
+	pass`,
+			`(IfStmt Cond=True True=((BranchStmt Token=pass)) False=((BranchStmt Token=pass)))`},
+		{"if a: pass\nelif b: pass\nelse: pass",
+			`(IfStmt Cond=a True=((BranchStmt Token=pass)) False=((IfStmt Cond=b True=((BranchStmt Token=pass)) False=((BranchStmt Token=pass)))))`},
+		{`x, y = 1, 2`,
+			`(AssignStmt Op== LHS=(TupleExpr List=(x y)) RHS=(TupleExpr List=(1 2)))`},
+		{`x[i] = 1`,
+			`(AssignStmt Op== LHS=(IndexExpr X=x Y=i) RHS=1)`},
+		{`x.f = 1`,
+			`(AssignStmt Op== LHS=(DotExpr X=x Name=f) RHS=1)`},
+		{`(x, y) = 1`,
+			`(AssignStmt Op== LHS=(ParenExpr X=(TupleExpr List=(x y))) RHS=1)`},
+		{`load("", "a", b="c")`,
+			`(LoadStmt Module="" From=(a c) To=(a b))`},
+		{`if True: load("", "a", b="c")`, // load needn't be at toplevel
+			`(IfStmt Cond=True True=((LoadStmt Module="" From=(a c) To=(a b))))`},
+		{`def f(x, *args, **kwargs):
+	pass`,
+			`(DefStmt Name=f Params=(x (UnaryExpr Op=* X=args) (UnaryExpr Op=** X=kwargs)) Body=((BranchStmt Token=pass)))`},
+		{`def f(**kwargs, *args): pass`,
+			`(DefStmt Name=f Params=((UnaryExpr Op=** X=kwargs) (UnaryExpr Op=* X=args)) Body=((BranchStmt Token=pass)))`},
+		{`def f(a, b, c=d): pass`,
+			`(DefStmt Name=f Params=(a b (BinaryExpr X=c Op== Y=d)) Body=((BranchStmt Token=pass)))`},
+		{`def f(a, b=c, d): pass`,
+			`(DefStmt Name=f Params=(a (BinaryExpr X=b Op== Y=c) d) Body=((BranchStmt Token=pass)))`}, // TODO(adonovan): fix this
+		{`def f():
+	def g():
+		pass
+	pass
+def h():
+	pass`,
+			`(DefStmt Name=f Body=((DefStmt Name=g Body=((BranchStmt Token=pass))) (BranchStmt Token=pass)))`},
+		{"f();g()",
+			`(ExprStmt X=(CallExpr Fn=f))`},
+		{"f();",
+			`(ExprStmt X=(CallExpr Fn=f))`},
+		{"f();g()\n",
+			`(ExprStmt X=(CallExpr Fn=f))`},
+		{"f();\n",
+			`(ExprStmt X=(CallExpr Fn=f))`},
+	} {
+		f, err := syntax.Parse("foo.star", test.input, 0)
+		if err != nil {
+			t.Errorf("parse `%s` failed: %v", test.input, stripPos(err))
+			continue
+		}
+		if got := treeString(f.Stmts[0]); test.want != got {
+			t.Errorf("parse `%s` = %s, want %s", test.input, got, test.want)
+		}
+	}
+}
+
+// TestFileParseTrees tests sequences of statements, and particularly
+// handling of indentation, newlines, line continuations, and blank lines.
+func TestFileParseTrees(t *testing.T) {
+	for _, test := range []struct {
+		input, want string
+	}{
+		{`x = 1
+print(x)`,
+			`(AssignStmt Op== LHS=x RHS=1)
+(ExprStmt X=(CallExpr Fn=print Args=(x)))`},
+		{"if cond:\n\tpass",
+			`(IfStmt Cond=cond True=((BranchStmt Token=pass)))`},
+		{"if cond:\n\tpass\nelse:\n\tpass",
+			`(IfStmt Cond=cond True=((BranchStmt Token=pass)) False=((BranchStmt Token=pass)))`},
+		{`def f():
+	pass
+pass
+
+pass`,
+			`(DefStmt Name=f Body=((BranchStmt Token=pass)))
+(BranchStmt Token=pass)
+(BranchStmt Token=pass)`},
+		{`pass; pass`,
+			`(BranchStmt Token=pass)
+(BranchStmt Token=pass)`},
+		{"pass\npass",
+			`(BranchStmt Token=pass)
+(BranchStmt Token=pass)`},
+		{"pass\n\npass",
+			`(BranchStmt Token=pass)
+(BranchStmt Token=pass)`},
+		{`x = (1 +
+2)`,
+			`(AssignStmt Op== LHS=x RHS=(ParenExpr X=(BinaryExpr X=1 Op=+ Y=2)))`},
+		{`x = 1 \
+ 2`,
+			`(AssignStmt Op== LHS=x RHS=(BinaryExpr X=1 Op=+ Y=2))`},
+	} {
+		f, err := syntax.Parse("foo.star", test.input, 0)
+		if err != nil {
+			t.Errorf("parse `%s` failed: %v", test.input, stripPos(err))
+			continue
+		}
+		var buf bytes.Buffer
+		for i, stmt := range f.Stmts {
+			if i > 0 {
+				buf.WriteByte('\n')
+			}
+			writeTree(&buf, reflect.ValueOf(stmt))
+		}
+		if got := buf.String(); test.want != got {
+			t.Errorf("parse `%s` = %s, want %s", test.input, got, test.want)
+		}
+	}
+}
+
+// TestCompoundStmt tests handling of REPL-style compound statements.
+func TestCompoundStmt(t *testing.T) {
+	for _, test := range []struct {
+		input, want string
+	}{
+		// blank lines
+		{"\n",
+			``},
+		{"   \n",
+			``},
+		{"# comment\n",
+			``},
+		// simple statement
+		{"1\n",
+			`(ExprStmt X=1)`},
+		{"print(1)\n",
+			`(ExprStmt X=(CallExpr Fn=print Args=(1)))`},
+		{"1;2;3;\n",
+			`(ExprStmt X=1)(ExprStmt X=2)(ExprStmt X=3)`},
+		{"f();g()\n",
+			`(ExprStmt X=(CallExpr Fn=f))(ExprStmt X=(CallExpr Fn=g))`},
+		{"f();\n",
+			`(ExprStmt X=(CallExpr Fn=f))`},
+		{"f(\n\n\n\n\n\n\n)\n",
+			`(ExprStmt X=(CallExpr Fn=f))`},
+		// complex statements
+		{"def f():\n  pass\n\n",
+			`(DefStmt Name=f Body=((BranchStmt Token=pass)))`},
+		{"if cond:\n  pass\n\n",
+			`(IfStmt Cond=cond True=((BranchStmt Token=pass)))`},
+		// Even as a 1-liner, the following blank line is required.
+		{"if cond: pass\n\n",
+			`(IfStmt Cond=cond True=((BranchStmt Token=pass)))`},
+		// github.com/google/starlark-go/issues/121
+		{"a; b; c\n",
+			`(ExprStmt X=a)(ExprStmt X=b)(ExprStmt X=c)`},
+		{"a; b c\n",
+			`invalid syntax`},
+	} {
+
+		// Fake readline input from string.
+		// The ! suffix, which would cause a parse error,
+		// tests that the parser doesn't read more than necessary.
+		sc := bufio.NewScanner(strings.NewReader(test.input + "!"))
+		readline := func() ([]byte, error) {
+			if sc.Scan() {
+				return []byte(sc.Text() + "\n"), nil
+			}
+			return nil, sc.Err()
+		}
+
+		var got string
+		f, err := syntax.ParseCompoundStmt("foo.star", readline)
+		if err != nil {
+			got = stripPos(err)
+		} else {
+			for _, stmt := range f.Stmts {
+				got += treeString(stmt)
+			}
+		}
+		if test.want != got {
+			t.Errorf("parse `%s` = %s, want %s", test.input, got, test.want)
+		}
+	}
+}
+
+func stripPos(err error) string {
+	s := err.Error()
+	if i := strings.Index(s, ": "); i >= 0 {
+		s = s[i+len(": "):] // strip file:line:col
+	}
+	return s
+}
+
+// treeString prints a syntax node as a parenthesized tree.
+// Idents are printed as foo and Literals as "foo" or 42.
+// Structs are printed as (type name=value ...).
+// Only non-empty fields are shown.
+func treeString(n syntax.Node) string {
+	var buf bytes.Buffer
+	writeTree(&buf, reflect.ValueOf(n))
+	return buf.String()
+}
+
+func writeTree(out *bytes.Buffer, x reflect.Value) {
+	switch x.Kind() {
+	case reflect.String, reflect.Int, reflect.Bool:
+		fmt.Fprintf(out, "%v", x.Interface())
+	case reflect.Ptr, reflect.Interface:
+		if elem := x.Elem(); elem.Kind() == 0 {
+			out.WriteString("nil")
+		} else {
+			writeTree(out, elem)
+		}
+	case reflect.Struct:
+		switch v := x.Interface().(type) {
+		case syntax.Literal:
+			switch v.Token {
+			case syntax.STRING:
+				fmt.Fprintf(out, "%q", v.Value)
+			case syntax.BYTES:
+				fmt.Fprintf(out, "b%q", v.Value)
+			case syntax.INT:
+				fmt.Fprintf(out, "%d", v.Value)
+			}
+			return
+		case syntax.Ident:
+			out.WriteString(v.Name)
+			return
+		}
+		fmt.Fprintf(out, "(%s", strings.TrimPrefix(x.Type().String(), "syntax."))
+		for i, n := 0, x.NumField(); i < n; i++ {
+			f := x.Field(i)
+			if f.Type() == reflect.TypeOf(syntax.Position{}) {
+				continue // skip positions
+			}
+			name := x.Type().Field(i).Name
+			if name == "commentsRef" {
+				continue // skip comments fields
+			}
+			if f.Type() == reflect.TypeOf(syntax.Token(0)) {
+				fmt.Fprintf(out, " %s=%s", name, f.Interface())
+				continue
+			}
+
+			switch f.Kind() {
+			case reflect.Slice:
+				if n := f.Len(); n > 0 {
+					fmt.Fprintf(out, " %s=(", name)
+					for i := 0; i < n; i++ {
+						if i > 0 {
+							out.WriteByte(' ')
+						}
+						writeTree(out, f.Index(i))
+					}
+					out.WriteByte(')')
+				}
+				continue
+			case reflect.Ptr, reflect.Interface:
+				if f.IsNil() {
+					continue
+				}
+			case reflect.Int:
+				if f.Int() != 0 {
+					fmt.Fprintf(out, " %s=%d", name, f.Int())
+				}
+				continue
+			case reflect.Bool:
+				if f.Bool() {
+					fmt.Fprintf(out, " %s", name)
+				}
+				continue
+			}
+			fmt.Fprintf(out, " %s=", name)
+			writeTree(out, f)
+		}
+		fmt.Fprintf(out, ")")
+	default:
+		fmt.Fprintf(out, "%T", x.Interface())
+	}
+}
+
+func TestParseErrors(t *testing.T) {
+	filename := starlarktest.DataFile("syntax", "testdata/errors.star")
+	for _, chunk := range chunkedfile.Read(filename, t) {
+		_, err := syntax.Parse(filename, chunk.Source, 0)
+		switch err := err.(type) {
+		case nil:
+			// ok
+		case syntax.Error:
+			chunk.GotError(int(err.Pos.Line), err.Msg)
+		default:
+			t.Error(err)
+		}
+		chunk.Done()
+	}
+}
+
+func TestFilePortion(t *testing.T) {
+	// Imagine that the Starlark file or expression print(x.f) is extracted
+	// from the middle of a file in some hypothetical template language;
+	// see https://github.com/google/starlark-go/issues/346. For example:
+	// --
+	// {{loop x seq}}
+	//   {{print(x.f)}}
+	// {{end}}
+	// --
+	fp := syntax.FilePortion{Content: []byte("print(x.f)"), FirstLine: 2, FirstCol: 4}
+	file, err := syntax.Parse("foo.template", fp, 0)
+	if err != nil {
+		t.Fatal(err)
+	}
+	span := fmt.Sprint(file.Stmts[0].Span())
+	want := "foo.template:2:4 foo.template:2:14"
+	if span != want {
+		t.Errorf("wrong span: got %q, want %q", span, want)
+	}
+}
+
+// dataFile is the same as starlarktest.DataFile.
+// We make a copy to avoid a dependency cycle.
+var dataFile = func(pkgdir, filename string) string {
+	return filepath.Join(build.Default.GOPATH, "src/go.starlark.net", pkgdir, filename)
+}
+
+func BenchmarkParse(b *testing.B) {
+	filename := dataFile("syntax", "testdata/scan.star")
+	b.StopTimer()
+	data, err := os.ReadFile(filename)
+	if err != nil {
+		b.Fatal(err)
+	}
+	b.StartTimer()
+
+	for i := 0; i < b.N; i++ {
+		_, err := syntax.Parse(filename, data, 0)
+		if err != nil {
+			b.Fatal(err)
+		}
+	}
+}
@@ -0,0 +1,309 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+// Starlark quoted string utilities.
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+)
+
+// unesc maps single-letter chars following \ to their actual values.
+var unesc = [256]byte{
+	'a':  '\a',
+	'b':  '\b',
+	'f':  '\f',
+	'n':  '\n',
+	'r':  '\r',
+	't':  '\t',
+	'v':  '\v',
+	'\\': '\\',
+	'\'': '\'',
+	'"':  '"',
+}
+
+// esc maps escape-worthy bytes to the char that should follow \.
+var esc = [256]byte{
+	'\a': 'a',
+	'\b': 'b',
+	'\f': 'f',
+	'\n': 'n',
+	'\r': 'r',
+	'\t': 't',
+	'\v': 'v',
+	'\\': '\\',
+	'\'': '\'',
+	'"':  '"',
+}
+
+// unquote unquotes the quoted string, returning the actual
+// string value, whether the original was triple-quoted,
+// whether it was a byte string, and an error describing invalid input.
+func unquote(quoted string) (s string, triple, isByte bool, err error) {
+	// Check for raw prefix: means don't interpret the inner \.
+	raw := false
+	if strings.HasPrefix(quoted, "r") {
+		raw = true
+		quoted = quoted[1:]
+	}
+	// Check for bytes prefix.
+	if strings.HasPrefix(quoted, "b") {
+		isByte = true
+		quoted = quoted[1:]
+	}
+
+	if len(quoted) < 2 {
+		err = fmt.Errorf("string literal too short")
+		return
+	}
+
+	if quoted[0] != '"' && quoted[0] != '\'' || quoted[0] != quoted[len(quoted)-1] {
+		err = fmt.Errorf("string literal has invalid quotes")
+		return
+	}
+
+	// Check for triple quoted string.
+	quote := quoted[0]
+	if len(quoted) >= 6 && quoted[1] == quote && quoted[2] == quote && quoted[:3] == quoted[len(quoted)-3:] {
+		triple = true
+		quoted = quoted[3 : len(quoted)-3]
+	} else {
+		quoted = quoted[1 : len(quoted)-1]
+	}
+
+	// Now quoted is the quoted data, but no quotes.
+	// If we're in raw mode or there are no escapes or
+	// carriage returns, we're done.
+	var unquoteChars string
+	if raw {
+		unquoteChars = "\r"
+	} else {
+		unquoteChars = "\\\r"
+	}
+	if !strings.ContainsAny(quoted, unquoteChars) {
+		s = quoted
+		return
+	}
+
+	// Otherwise process quoted string.
+	// Each iteration processes one escape sequence along with the
+	// plain text leading up to it.
+	buf := new(strings.Builder)
+	for {
+		// Remove prefix before escape sequence.
+		i := strings.IndexAny(quoted, unquoteChars)
+		if i < 0 {
+			i = len(quoted)
+		}
+		buf.WriteString(quoted[:i])
+		quoted = quoted[i:]
+
+		if len(quoted) == 0 {
+			break
+		}
+
+		// Process carriage return.
+		if quoted[0] == '\r' {
+			buf.WriteByte('\n')
+			if len(quoted) > 1 && quoted[1] == '\n' {
+				quoted = quoted[2:]
+			} else {
+				quoted = quoted[1:]
+			}
+			continue
+		}
+
+		// Process escape sequence.
+		if len(quoted) == 1 {
+			err = fmt.Errorf(`truncated escape sequence \`)
+			return
+		}
+
+		switch quoted[1] {
+		default:
+			// In Starlark, like Go, a backslash must escape something.
+			// (Python still treats unnecessary backslashes literally,
+			// but since 3.6 has emitted a deprecation warning.)
+			err = fmt.Errorf("invalid escape sequence \\%c", quoted[1])
+			return
+
+		case '\n':
+			// Ignore the escape and the line break.
+			quoted = quoted[2:]
+
+		case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '\'', '"':
+			// One-char escape.
+			// Escapes are allowed for both kinds of quotation
+			// mark, not just the kind in use.
+			buf.WriteByte(unesc[quoted[1]])
+			quoted = quoted[2:]
+
+		case '0', '1', '2', '3', '4', '5', '6', '7':
+			// Octal escape, up to 3 digits, \OOO.
+			n := int(quoted[1] - '0')
+			quoted = quoted[2:]
+			for i := 1; i < 3; i++ {
+				if len(quoted) == 0 || quoted[0] < '0' || '7' < quoted[0] {
+					break
+				}
+				n = n*8 + int(quoted[0]-'0')
+				quoted = quoted[1:]
+			}
+			if !isByte && n > 127 {
+				err = fmt.Errorf(`non-ASCII octal escape \%o (use \u%04X for the UTF-8 encoding of U+%04X)`, n, n, n)
+				return
+			}
+			if n >= 256 {
+				// NOTE: Python silently discards the high bit,
+				// so that '\541' == '\141' == 'a'.
+				// Let's see if we can avoid doing that in BUILD files.
+				err = fmt.Errorf(`invalid escape sequence \%03o`, n)
+				return
+			}
+			buf.WriteByte(byte(n))
+
+		case 'x':
+			// Hexadecimal escape, exactly 2 digits, \xXX. [0-127]
+			if len(quoted) < 4 {
+				err = fmt.Errorf(`truncated escape sequence %s`, quoted)
+				return
+			}
+			n, err1 := strconv.ParseUint(quoted[2:4], 16, 0)
+			if err1 != nil {
+				err = fmt.Errorf(`invalid escape sequence %s`, quoted[:4])
+				return
+			}
+			if !isByte && n > 127 {
+				err = fmt.Errorf(`non-ASCII hex escape %s (use \u%04X for the UTF-8 encoding of U+%04X)`,
+					quoted[:4], n, n)
+				return
+			}
+			buf.WriteByte(byte(n))
+			quoted = quoted[4:]
+
+		case 'u', 'U':
+			// Unicode code point, 4 (\uXXXX) or 8 (\UXXXXXXXX) hex digits.
+			sz := 6
+			if quoted[1] == 'U' {
+				sz = 10
+			}
+			if len(quoted) < sz {
+				err = fmt.Errorf(`truncated escape sequence %s`, quoted)
+				return
+			}
+			n, err1 := strconv.ParseUint(quoted[2:sz], 16, 0)
+			if err1 != nil {
+				err = fmt.Errorf(`invalid escape sequence %s`, quoted[:sz])
+				return
+			}
+			if n > unicode.MaxRune {
+				err = fmt.Errorf(`code point out of range: %s (max \U%08x)`,
+					quoted[:sz], n)
+				return
+			}
+			// As in Go, surrogates are disallowed.
+			if 0xD800 <= n && n < 0xE000 {
+				err = fmt.Errorf(`invalid Unicode code point U+%04X`, n)
+				return
+			}
+			buf.WriteRune(rune(n))
+			quoted = quoted[sz:]
+		}
+	}
+
+	s = buf.String()
+	return
+}
+
+// indexByte returns the index of the first instance of b in s, or else -1.
+func indexByte(s string, b byte) int {
+	for i := 0; i < len(s); i++ {
+		if s[i] == b {
+			return i
+		}
+	}
+	return -1
+}
+
+// Quote returns a Starlark literal that denotes s.
+// If b, it returns a bytes literal.
+func Quote(s string, b bool) string {
+	const hex = "0123456789abcdef"
+	var runeTmp [utf8.UTFMax]byte
+
+	buf := make([]byte, 0, 3*len(s)/2)
+	if b {
+		buf = append(buf, 'b')
+	}
+	buf = append(buf, '"')
+	for width := 0; len(s) > 0; s = s[width:] {
+		r := rune(s[0])
+		width = 1
+		if r >= utf8.RuneSelf {
+			r, width = utf8.DecodeRuneInString(s)
+		}
+		if width == 1 && r == utf8.RuneError {
+			// String (!b) literals accept \xXX escapes only for ASCII,
+			// but we must use them here to represent invalid bytes.
+			// The result is not a legal literal.
+			buf = append(buf, `\x`...)
+			buf = append(buf, hex[s[0]>>4])
+			buf = append(buf, hex[s[0]&0xF])
+			continue
+		}
+		if r == '"' || r == '\\' { // always backslashed
+			buf = append(buf, '\\')
+			buf = append(buf, byte(r))
+			continue
+		}
+		if strconv.IsPrint(r) {
+			n := utf8.EncodeRune(runeTmp[:], r)
+			buf = append(buf, runeTmp[:n]...)
+			continue
+		}
+		switch r {
+		case '\a':
+			buf = append(buf, `\a`...)
+		case '\b':
+			buf = append(buf, `\b`...)
+		case '\f':
+			buf = append(buf, `\f`...)
+		case '\n':
+			buf = append(buf, `\n`...)
+		case '\r':
+			buf = append(buf, `\r`...)
+		case '\t':
+			buf = append(buf, `\t`...)
+		case '\v':
+			buf = append(buf, `\v`...)
+		default:
+			switch {
+			case r < ' ' || r == 0x7f:
+				buf = append(buf, `\x`...)
+				buf = append(buf, hex[byte(r)>>4])
+				buf = append(buf, hex[byte(r)&0xF])
+			case r > utf8.MaxRune:
+				r = 0xFFFD
+				fallthrough
+			case r < 0x10000:
+				buf = append(buf, `\u`...)
+				for s := 12; s >= 0; s -= 4 {
+					buf = append(buf, hex[r>>uint(s)&0xF])
+				}
+			default:
+				buf = append(buf, `\U`...)
+				for s := 28; s >= 0; s -= 4 {
+					buf = append(buf, hex[r>>uint(s)&0xF])
+				}
+			}
+		}
+	}
+	buf = append(buf, '"')
+	return string(buf)
+}
@@ -0,0 +1,65 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+import (
+	"strings"
+	"testing"
+)
+
+var quoteTests = []struct {
+	q   string // quoted
+	s   string // unquoted (actual string)
+	std bool   // q is standard form for s
+}{
+	{`""`, "", true},
+	{`''`, "", false},
+	{`"hello"`, `hello`, true},
+	{`'hello'`, `hello`, false},
+	{`"quote\"here"`, `quote"here`, true},
+	{`'quote"here'`, `quote"here`, false},
+	{`"quote'here"`, `quote'here`, true},
+	{`'quote\'here'`, `quote'here`, false},
+
+	{`"\a\b\f\n\r\t\v\x00\x7f"`, "\a\b\f\n\r\t\v\000\x7F", true},
+	{`"\a\b\f\n\r\t\v\x00\x7f"`, "\a\b\f\n\r\t\v\000\x7F", false},
+	{`"\a\b\f\n\r\t\v\x00\x7f"`, "\a\b\f\n\r\t\v\000\x7F", false},
+	{`"\a\b\f\n\r\t\v\x00\x7f\"'\\\x03"`, "\a\b\f\n\r\t\v\x00\x7F\"'\\\x03", true},
+	{`"\a\b\f\n\r\t\v\x00\x7f\"'\\\x03"`, "\a\b\f\n\r\t\v\x00\x7F\"'\\\x03", false},
+	{`"\a\b\f\n\r\t\v\x00\x7f\"'\\\x03"`, "\a\b\f\n\r\t\v\x00\x7F\"'\\\x03", false},
+	{`"\a\b\f\n\r\t\v\x00\x7f\"\\\x03"`, "\a\b\f\n\r\t\v\x00\x7F\"\\\x03", false},
+	{
+		`"cat $(SRCS) | grep '\\s*ip_block:' | sed -e 's/\\s*ip_block: \"\\([^ ]*\\)\"/    \x27\\1\x27,/g' >> $@; "`,
+		"cat $(SRCS) | grep '\\s*ip_block:' | sed -e 's/\\s*ip_block: \"\\([^ ]*\\)\"/    '\\1',/g' >> $@; ",
+		false,
+	},
+	{
+		`"cat $(SRCS) | grep '\\s*ip_block:' | sed -e 's/\\s*ip_block: \"\\([^ ]*\\)\"/    '\\1',/g' >> $@; "`,
+		"cat $(SRCS) | grep '\\s*ip_block:' | sed -e 's/\\s*ip_block: \"\\([^ ]*\\)\"/    '\\1',/g' >> $@; ",
+		true,
+	},
+}
+
+func TestQuote(t *testing.T) {
+	for _, tt := range quoteTests {
+		if !tt.std {
+			continue
+		}
+		q := Quote(tt.s, false)
+		if q != tt.q {
+			t.Errorf("quote(%#q) = %s, want %s", tt.s, q, tt.q)
+		}
+	}
+}
+
+func TestUnquote(t *testing.T) {
+	for _, tt := range quoteTests {
+		s, triple, _, err := unquote(tt.q)
+		wantTriple := strings.HasPrefix(tt.q, `"""`) || strings.HasPrefix(tt.q, `'''`)
+		if s != tt.s || triple != wantTriple || err != nil {
+			t.Errorf("unquote(%s) = %#q, %v, %v want %#q, %v, nil", tt.q, s, triple, err, tt.s, wantTriple)
+		}
+	}
+}
@@ -0,0 +1,310 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+import (
+	"bytes"
+	"fmt"
+	"go/build"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+func scan(src interface{}) (tokens string, err error) {
+	sc, err := newScanner("foo.star", src, false)
+	if err != nil {
+		return "", err
+	}
+
+	defer sc.recover(&err)
+
+	var buf bytes.Buffer
+	var val tokenValue
+	for {
+		tok := sc.nextToken(&val)
+
+		if buf.Len() > 0 {
+			buf.WriteByte(' ')
+		}
+		switch tok {
+		case EOF:
+			buf.WriteString("EOF")
+		case IDENT:
+			buf.WriteString(val.raw)
+		case INT:
+			if val.bigInt != nil {
+				fmt.Fprintf(&buf, "%d", val.bigInt)
+			} else {
+				fmt.Fprintf(&buf, "%d", val.int)
+			}
+		case FLOAT:
+			fmt.Fprintf(&buf, "%e", val.float)
+		case STRING, BYTES:
+			buf.WriteString(Quote(val.string, tok == BYTES))
+		default:
+			buf.WriteString(tok.String())
+		}
+		if tok == EOF {
+			break
+		}
+	}
+	return buf.String(), nil
+}
+
+func TestScanner(t *testing.T) {
+	for _, test := range []struct {
+		input, want string
+	}{
+		{``, "EOF"},
+		{`123`, "123 EOF"},
+		{`x.y`, "x . y EOF"},
+		{`chocolate.éclair`, `chocolate . éclair EOF`},
+		{`123 "foo" hello x.y`, `123 "foo" hello x . y EOF`},
+		{`print(x)`, "print ( x ) EOF"},
+		{`print(x); print(y)`, "print ( x ) ; print ( y ) EOF"},
+		{"\nprint(\n1\n)\n", "print ( 1 ) newline EOF"}, // final \n is at toplevel on non-blank line => token
+		{`/ // /= //= ///=`, "/ // /= //= // /= EOF"},
+		{`# hello
+print(x)`, "print ( x ) EOF"},
+		{`# hello
+print(1)
+cc_binary(name="foo")
+def f(x):
+		return x+1
+print(1)
+`,
+			`print ( 1 ) newline ` +
+				`cc_binary ( name = "foo" ) newline ` +
+				`def f ( x ) : newline ` +
+				`indent return x + 1 newline ` +
+				`outdent print ( 1 ) newline ` +
+				`EOF`},
+		// EOF should act line an implicit newline.
+		{`def f(): pass`,
+			"def f ( ) : pass EOF"},
+		{`def f():
+	pass`,
+			"def f ( ) : newline indent pass newline outdent EOF"},
+		{`def f():
+	pass
+# oops`,
+			"def f ( ) : newline indent pass newline outdent EOF"},
+		{`def f():
+	pass \
+`,
+			"def f ( ) : newline indent pass newline outdent EOF"},
+		{`def f():
+	pass
+`,
+			"def f ( ) : newline indent pass newline outdent EOF"},
+		{`pass
+
+
+pass`, "pass newline pass EOF"}, // consecutive newlines are consolidated
+		{`def f():
+    pass
+    `, "def f ( ) : newline indent pass newline outdent EOF"},
+		{`def f():
+    pass
+    ` + "\n", "def f ( ) : newline indent pass newline outdent EOF"},
+		{"pass", "pass EOF"},
+		{"pass\n", "pass newline EOF"},
+		{"pass\n ", "pass newline EOF"},
+		{"pass\n \n", "pass newline EOF"},
+		{"if x:\n  pass\n ", "if x : newline indent pass newline outdent EOF"},
+		{`x = 1 + \
+2`, `x = 1 + 2 EOF`},
+		{`x = 'a\nb'`, `x = "a\nb" EOF`},
+		{`x = r'a\nb'`, `x = "a\\nb" EOF`},
+		{"x = 'a\\\nb'", `x = "ab" EOF`},
+		{`x = '\''`, `x = "'" EOF`},
+		{`x = "\""`, `x = "\"" EOF`},
+		{`x = r'\''`, `x = "\\'" EOF`},
+		{`x = '''\''''`, `x = "'" EOF`},
+		{`x = r'''\''''`, `x = "\\'" EOF`},
+		{`x = ''''a'b'c'''`, `x = "'a'b'c" EOF`},
+		{"x = '''a\nb'''", `x = "a\nb" EOF`},
+		{"x = '''a\rb'''", `x = "a\nb" EOF`},
+		{"x = '''a\r\nb'''", `x = "a\nb" EOF`},
+		{"x = '''a\n\rb'''", `x = "a\n\nb" EOF`},
+		{"x = r'a\\\nb'", `x = "a\\\nb" EOF`},
+		{"x = r'a\\\rb'", `x = "a\\\nb" EOF`},
+		{"x = r'a\\\r\nb'", `x = "a\\\nb" EOF`},
+		{"a\rb", `a newline b EOF`},
+		{"a\nb", `a newline b EOF`},
+		{"a\r\nb", `a newline b EOF`},
+		{"a\n\nb", `a newline b EOF`},
+		// numbers
+		{"0", `0 EOF`},
+		{"00", `0 EOF`},
+		{"0.", `0.000000e+00 EOF`},
+		{"0.e1", `0.000000e+00 EOF`},
+		{".0", `0.000000e+00 EOF`},
+		{"0.0", `0.000000e+00 EOF`},
+		{".e1", `. e1 EOF`},
+		{"1", `1 EOF`},
+		{"1.", `1.000000e+00 EOF`},
+		{".1", `1.000000e-01 EOF`},
+		{".1e1", `1.000000e+00 EOF`},
+		{".1e+1", `1.000000e+00 EOF`},
+		{".1e-1", `1.000000e-02 EOF`},
+		{"1e1", `1.000000e+01 EOF`},
+		{"1e+1", `1.000000e+01 EOF`},
+		{"1e-1", `1.000000e-01 EOF`},
+		{"123", `123 EOF`},
+		{"123e45", `1.230000e+47 EOF`},
+		{"999999999999999999999999999999999999999999999999999", `999999999999999999999999999999999999999999999999999 EOF`},
+		{"12345678901234567890", `12345678901234567890 EOF`},
+		// hex
+		{"0xA", `10 EOF`},
+		{"0xAAG", `170 G EOF`},
+		{"0xG", `foo.star:1:1: invalid hex literal`},
+		{"0XA", `10 EOF`},
+		{"0XG", `foo.star:1:1: invalid hex literal`},
+		{"0xA.", `10 . EOF`},
+		{"0xA.e1", `10 . e1 EOF`},
+		{"0x12345678deadbeef12345678", `5634002672576678570168178296 EOF`},
+		// binary
+		{"0b1010", `10 EOF`},
+		{"0B111101", `61 EOF`},
+		{"0b3", `foo.star:1:3: invalid binary literal`},
+		{"0b1010201", `10 201 EOF`},
+		{"0b1010.01", `10 1.000000e-02 EOF`},
+		{"0b0000", `0 EOF`},
+		// octal
+		{"0o123", `83 EOF`},
+		{"0o12834", `10 834 EOF`},
+		{"0o12934", `10 934 EOF`},
+		{"0o12934.", `10 9.340000e+02 EOF`},
+		{"0o12934.1", `10 9.341000e+02 EOF`},
+		{"0o12934e1", `10 9.340000e+03 EOF`},
+		{"0o123.", `83 . EOF`},
+		{"0o123.1", `83 1.000000e-01 EOF`},
+		{"0123", `foo.star:1:5: obsolete form of octal literal; use 0o123`},
+		{"012834", `foo.star:1:1: invalid int literal`},
+		{"012934", `foo.star:1:1: invalid int literal`},
+		{"i = 012934", `foo.star:1:5: invalid int literal`},
+		// octal escapes in string literals
+		{`"\037"`, `"\x1f" EOF`},
+		{`"\377"`, `foo.star:1:1: non-ASCII octal escape \377 (use \u00FF for the UTF-8 encoding of U+00FF)`},
+		{`"\378"`, `"\x1f8" EOF`},                               // = '\37' + '8'
+		{`"\400"`, `foo.star:1:1: non-ASCII octal escape \400`}, // unlike Python 2 and 3
+		// hex escapes
+		{`"\x00\x20\x09\x41\x7e\x7f"`, `"\x00 \tA~\x7f" EOF`}, // DEL is non-printable
+		{`"\x80"`, `foo.star:1:1: non-ASCII hex escape`},
+		{`"\xff"`, `foo.star:1:1: non-ASCII hex escape`},
+		{`"\xFf"`, `foo.star:1:1: non-ASCII hex escape`},
+		{`"\xF"`, `foo.star:1:1: truncated escape sequence \xF`},
+		{`"\x"`, `foo.star:1:1: truncated escape sequence \x`},
+		{`"\xfg"`, `foo.star:1:1: invalid escape sequence \xfg`},
+		// Unicode escapes
+		// \uXXXX
+		{`"\u0400"`, `"Ѐ" EOF`},
+		{`"\u100"`, `foo.star:1:1: truncated escape sequence \u100`},
+		{`"\u04000"`, `"Ѐ0" EOF`}, // = U+0400 + '0'
+		{`"\u100g"`, `foo.star:1:1: invalid escape sequence \u100g`},
+		{`"\u4E16"`, `"世" EOF`},
+		{`"\udc00"`, `foo.star:1:1: invalid Unicode code point U+DC00`}, // surrogate
+		// \UXXXXXXXX
+		{`"\U00000400"`, `"Ѐ" EOF`},
+		{`"\U0000400"`, `foo.star:1:1: truncated escape sequence \U0000400`},
+		{`"\U000004000"`, `"Ѐ0" EOF`}, // = U+0400 + '0'
+		{`"\U1000000g"`, `foo.star:1:1: invalid escape sequence \U1000000g`},
+		{`"\U0010FFFF"`, `"\U0010ffff" EOF`},
+		{`"\U00110000"`, `foo.star:1:1: code point out of range: \U00110000 (max \U00110000)`},
+		{`"\U0001F63F"`, `"😿" EOF`},
+		{`"\U0000dc00"`, `foo.star:1:1: invalid Unicode code point U+DC00`}, // surrogate
+
+		// backslash escapes
+		// As in Go, a backslash must escape something.
+		// (Python started issuing a deprecation warning in 3.6.)
+		{`"foo\(bar"`, `foo.star:1:1: invalid escape sequence \(`},
+		{`"\+"`, `foo.star:1:1: invalid escape sequence \+`},
+		{`"\w"`, `foo.star:1:1: invalid escape sequence \w`},
+		{`"\""`, `"\"" EOF`},
+		{`"\'"`, `"'" EOF`},
+		{`'\w'`, `foo.star:1:1: invalid escape sequence \w`},
+		{`'\''`, `"'" EOF`},
+		{`'\"'`, `"\"" EOF`},
+		{`"""\w"""`, `foo.star:1:1: invalid escape sequence \w`},
+		{`"""\""""`, `"\"" EOF`},
+		{`"""\'"""`, `"'" EOF`},
+		{`'''\w'''`, `foo.star:1:1: invalid escape sequence \w`},
+		{`'''\''''`, `"'" EOF`},
+		{`'''\"'''`, `"\"" EOF`},
+		{`r"\w"`, `"\\w" EOF`},
+		{`r"\""`, `"\\\"" EOF`},
+		{`r"\'"`, `"\\'" EOF`},
+		{`r'\w'`, `"\\w" EOF`},
+		{`r'\''`, `"\\'" EOF`},
+		{`r'\"'`, `"\\\"" EOF`},
+		{`'a\zb'`, `foo.star:1:1: invalid escape sequence \z`},
+		{`"\o123"`, `foo.star:1:1: invalid escape sequence \o`},
+		// bytes literals (where they differ from text strings)
+		{`b"AЀ世😿"`, `b"AЀ世😿`},                                       // 1-4 byte encodings, literal
+		{`b"\x41\u0400\u4e16\U0001F63F"`, `b"AЀ世😿"`},                // same, as escapes
+		{`b"\377\378\x80\xff\xFf"`, `b"\xff\x1f8\x80\xff\xff" EOF`}, // hex/oct escapes allow non-ASCII
+		{`b"\400"`, `foo.star:1:2: invalid escape sequence \400`},
+		{`b"\udc00"`, `foo.star:1:2: invalid Unicode code point U+DC00`}, // (same as string)
+		// floats starting with octal digits
+		{"012934.", `1.293400e+04 EOF`},
+		{"012934.1", `1.293410e+04 EOF`},
+		{"012934e1", `1.293400e+05 EOF`},
+		{"0123.", `1.230000e+02 EOF`},
+		{"0123.1", `1.231000e+02 EOF`},
+		// github.com/google/skylark/issues/16
+		{"x ! 0", "foo.star:1:3: unexpected input character '!'"},
+		// github.com/google/starlark-go/issues/80
+		{"([{<>}])", "( [ { < > } ] ) EOF"},
+		{"f();", "f ( ) ; EOF"},
+		// github.com/google/starlark-go/issues/104
+		{"def f():\n  if x:\n    pass\n  ", `def f ( ) : newline indent if x : newline indent pass newline outdent outdent EOF`},
+		{`while cond: pass`, "while cond : pass EOF"},
+		// github.com/google/starlark-go/issues/107
+		{"~= ~= 5", "~ = ~ = 5 EOF"},
+		{"0in", "0 in EOF"},
+		{"0or", "foo.star:1:3: invalid octal literal"},
+		{"6in", "6 in EOF"},
+		{"6or", "6 or EOF"},
+	} {
+		got, err := scan(test.input)
+		if err != nil {
+			got = err.(Error).Error()
+		}
+		// Prefix match allows us to truncate errors in expectations.
+		// Success cases all end in EOF.
+		if !strings.HasPrefix(got, test.want) {
+			t.Errorf("scan `%s` = [%s], want [%s]", test.input, got, test.want)
+		}
+	}
+}
+
+// dataFile is the same as starlarktest.DataFile.
+// We make a copy to avoid a dependency cycle.
+var dataFile = func(pkgdir, filename string) string {
+	return filepath.Join(build.Default.GOPATH, "src/go.starlark.net", pkgdir, filename)
+}
+
+func BenchmarkScan(b *testing.B) {
+	filename := dataFile("syntax", "testdata/scan.star")
+	b.StopTimer()
+	data, err := os.ReadFile(filename)
+	if err != nil {
+		b.Fatal(err)
+	}
+	b.StartTimer()
+
+	for i := 0; i < b.N; i++ {
+		sc, err := newScanner(filename, data, false)
+		if err != nil {
+			b.Fatal(err)
+		}
+		var val tokenValue
+		for sc.nextToken(&val) != EOF {
+		}
+	}
+}
@@ -0,0 +1,529 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package syntax provides a Starlark parser and abstract syntax tree.
+package syntax // import "go.starlark.net/syntax"
+
+// A Node is a node in a Starlark syntax tree.
+type Node interface {
+	// Span returns the start and end position of the expression.
+	Span() (start, end Position)
+
+	// Comments returns the comments associated with this node.
+	// It returns nil if RetainComments was not specified during parsing,
+	// or if AllocComments was not called.
+	Comments() *Comments
+
+	// AllocComments allocates a new Comments node if there was none.
+	// This makes possible to add new comments using Comments() method.
+	AllocComments()
+}
+
+// A Comment represents a single # comment.
+type Comment struct {
+	Start Position
+	Text  string // without trailing newline
+}
+
+// Comments collects the comments associated with an expression.
+type Comments struct {
+	Before []Comment // whole-line comments before this expression
+	Suffix []Comment // end-of-line comments after this expression (up to 1)
+
+	// For top-level expressions only, After lists whole-line
+	// comments following the expression.
+	After []Comment
+}
+
+// A commentsRef is a possibly-nil reference to a set of comments.
+// A commentsRef is embedded in each type of syntax node,
+// and provides its Comments and AllocComments methods.
+type commentsRef struct{ ref *Comments }
+
+// Comments returns the comments associated with a syntax node,
+// or nil if AllocComments has not yet been called.
+func (cr commentsRef) Comments() *Comments { return cr.ref }
+
+// AllocComments enables comments to be associated with a syntax node.
+func (cr *commentsRef) AllocComments() {
+	if cr.ref == nil {
+		cr.ref = new(Comments)
+	}
+}
+
+// Start returns the start position of the expression.
+func Start(n Node) Position {
+	start, _ := n.Span()
+	return start
+}
+
+// End returns the end position of the expression.
+func End(n Node) Position {
+	_, end := n.Span()
+	return end
+}
+
+// A File represents a Starlark file.
+type File struct {
+	commentsRef
+	Path  string
+	Stmts []Stmt
+
+	Module  interface{} // a *resolve.Module, set by resolver
+	Options *FileOptions
+}
+
+func (x *File) Span() (start, end Position) {
+	if len(x.Stmts) == 0 {
+		return
+	}
+	start, _ = x.Stmts[0].Span()
+	_, end = x.Stmts[len(x.Stmts)-1].Span()
+	return start, end
+}
+
+// A Stmt is a Starlark statement.
+type Stmt interface {
+	Node
+	stmt()
+}
+
+func (*AssignStmt) stmt() {}
+func (*BranchStmt) stmt() {}
+func (*DefStmt) stmt()    {}
+func (*ExprStmt) stmt()   {}
+func (*ForStmt) stmt()    {}
+func (*WhileStmt) stmt()  {}
+func (*IfStmt) stmt()     {}
+func (*LoadStmt) stmt()   {}
+func (*ReturnStmt) stmt() {}
+
+// An AssignStmt represents an assignment:
+//
+//	x = 0
+//	x, y = y, x
+//	x += 1
+type AssignStmt struct {
+	commentsRef
+	OpPos Position
+	Op    Token // = EQ | {PLUS,MINUS,STAR,PERCENT}_EQ
+	LHS   Expr
+	RHS   Expr
+}
+
+func (x *AssignStmt) Span() (start, end Position) {
+	start, _ = x.LHS.Span()
+	_, end = x.RHS.Span()
+	return
+}
+
+// A DefStmt represents a function definition.
+type DefStmt struct {
+	commentsRef
+	Def    Position
+	Name   *Ident
+	Lparen Position
+	Params []Expr // param = ident | ident=expr | * | *ident | **ident
+	Rparen Position
+	Body   []Stmt
+
+	Function interface{} // a *resolve.Function, set by resolver
+}
+
+func (x *DefStmt) Span() (start, end Position) {
+	_, end = x.Body[len(x.Body)-1].Span()
+	return x.Def, end
+}
+
+// An ExprStmt is an expression evaluated for side effects.
+type ExprStmt struct {
+	commentsRef
+	X Expr
+}
+
+func (x *ExprStmt) Span() (start, end Position) {
+	return x.X.Span()
+}
+
+// An IfStmt is a conditional: If Cond: True; else: False.
+// 'elseif' is desugared into a chain of IfStmts.
+type IfStmt struct {
+	commentsRef
+	If      Position // IF or ELIF
+	Cond    Expr
+	True    []Stmt
+	ElsePos Position // ELSE or ELIF
+	False   []Stmt   // optional
+}
+
+func (x *IfStmt) Span() (start, end Position) {
+	body := x.False
+	if body == nil {
+		body = x.True
+	}
+	_, end = body[len(body)-1].Span()
+	return x.If, end
+}
+
+// A LoadStmt loads another module and binds names from it:
+// load(Module, "x", y="foo").
+//
+// The AST is slightly unfaithful to the concrete syntax here because
+// Starlark's load statement, so that it can be implemented in Python,
+// binds some names (like y above) with an identifier and some (like x)
+// without.  For consistency we create fake identifiers for all the
+// strings.
+type LoadStmt struct {
+	commentsRef
+	Load   Position
+	Module *Literal // a string
+	From   []*Ident // name defined in loading module
+	To     []*Ident // name in loaded module
+	Rparen Position
+}
+
+func (x *LoadStmt) Span() (start, end Position) {
+	return x.Load, x.Rparen
+}
+
+// ModuleName returns the name of the module loaded by this statement.
+func (x *LoadStmt) ModuleName() string { return x.Module.Value.(string) }
+
+// A BranchStmt changes the flow of control: break, continue, pass.
+type BranchStmt struct {
+	commentsRef
+	Token    Token // = BREAK | CONTINUE | PASS
+	TokenPos Position
+}
+
+func (x *BranchStmt) Span() (start, end Position) {
+	return x.TokenPos, x.TokenPos.add(x.Token.String())
+}
+
+// A ReturnStmt returns from a function.
+type ReturnStmt struct {
+	commentsRef
+	Return Position
+	Result Expr // may be nil
+}
+
+func (x *ReturnStmt) Span() (start, end Position) {
+	if x.Result == nil {
+		return x.Return, x.Return.add("return")
+	}
+	_, end = x.Result.Span()
+	return x.Return, end
+}
+
+// An Expr is a Starlark expression.
+type Expr interface {
+	Node
+	expr()
+}
+
+func (*BinaryExpr) expr()    {}
+func (*CallExpr) expr()      {}
+func (*Comprehension) expr() {}
+func (*CondExpr) expr()      {}
+func (*DictEntry) expr()     {}
+func (*DictExpr) expr()      {}
+func (*DotExpr) expr()       {}
+func (*Ident) expr()         {}
+func (*IndexExpr) expr()     {}
+func (*LambdaExpr) expr()    {}
+func (*ListExpr) expr()      {}
+func (*Literal) expr()       {}
+func (*ParenExpr) expr()     {}
+func (*SliceExpr) expr()     {}
+func (*TupleExpr) expr()     {}
+func (*UnaryExpr) expr()     {}
+
+// An Ident represents an identifier.
+type Ident struct {
+	commentsRef
+	NamePos Position
+	Name    string
+
+	Binding interface{} // a *resolver.Binding, set by resolver
+}
+
+func (x *Ident) Span() (start, end Position) {
+	return x.NamePos, x.NamePos.add(x.Name)
+}
+
+// A Literal represents a literal string or number.
+type Literal struct {
+	commentsRef
+	Token    Token // = STRING | BYTES | INT | FLOAT
+	TokenPos Position
+	Raw      string      // uninterpreted text
+	Value    interface{} // = string | int64 | *big.Int | float64
+}
+
+func (x *Literal) Span() (start, end Position) {
+	return x.TokenPos, x.TokenPos.add(x.Raw)
+}
+
+// A ParenExpr represents a parenthesized expression: (X).
+type ParenExpr struct {
+	commentsRef
+	Lparen Position
+	X      Expr
+	Rparen Position
+}
+
+func (x *ParenExpr) Span() (start, end Position) {
+	return x.Lparen, x.Rparen.add(")")
+}
+
+// A CallExpr represents a function call expression: Fn(Args).
+type CallExpr struct {
+	commentsRef
+	Fn     Expr
+	Lparen Position
+	Args   []Expr // arg = expr | ident=expr | *expr | **expr
+	Rparen Position
+}
+
+func (x *CallExpr) Span() (start, end Position) {
+	start, _ = x.Fn.Span()
+	return start, x.Rparen.add(")")
+}
+
+// A DotExpr represents a field or method selector: X.Name.
+type DotExpr struct {
+	commentsRef
+	X       Expr
+	Dot     Position
+	NamePos Position
+	Name    *Ident
+}
+
+func (x *DotExpr) Span() (start, end Position) {
+	start, _ = x.X.Span()
+	_, end = x.Name.Span()
+	return
+}
+
+// A Comprehension represents a list or dict comprehension:
+// [Body for ... if ...] or {Body for ... if ...}
+type Comprehension struct {
+	commentsRef
+	Curly   bool // {x:y for ...} or {x for ...}, not [x for ...]
+	Lbrack  Position
+	Body    Expr
+	Clauses []Node // = *ForClause | *IfClause
+	Rbrack  Position
+}
+
+func (x *Comprehension) Span() (start, end Position) {
+	return x.Lbrack, x.Rbrack.add("]")
+}
+
+// A ForStmt represents a loop: for Vars in X: Body.
+type ForStmt struct {
+	commentsRef
+	For  Position
+	Vars Expr // name, or tuple of names
+	X    Expr
+	Body []Stmt
+}
+
+func (x *ForStmt) Span() (start, end Position) {
+	_, end = x.Body[len(x.Body)-1].Span()
+	return x.For, end
+}
+
+// A WhileStmt represents a while loop: while X: Body.
+type WhileStmt struct {
+	commentsRef
+	While Position
+	Cond  Expr
+	Body  []Stmt
+}
+
+func (x *WhileStmt) Span() (start, end Position) {
+	_, end = x.Body[len(x.Body)-1].Span()
+	return x.While, end
+}
+
+// A ForClause represents a for clause in a list comprehension: for Vars in X.
+type ForClause struct {
+	commentsRef
+	For  Position
+	Vars Expr // name, or tuple of names
+	In   Position
+	X    Expr
+}
+
+func (x *ForClause) Span() (start, end Position) {
+	_, end = x.X.Span()
+	return x.For, end
+}
+
+// An IfClause represents an if clause in a list comprehension: if Cond.
+type IfClause struct {
+	commentsRef
+	If   Position
+	Cond Expr
+}
+
+func (x *IfClause) Span() (start, end Position) {
+	_, end = x.Cond.Span()
+	return x.If, end
+}
+
+// A DictExpr represents a dictionary literal: { List }.
+type DictExpr struct {
+	commentsRef
+	Lbrace Position
+	List   []Expr // all *DictEntrys
+	Rbrace Position
+}
+
+func (x *DictExpr) Span() (start, end Position) {
+	return x.Lbrace, x.Rbrace.add("}")
+}
+
+// A DictEntry represents a dictionary entry: Key: Value.
+// Used only within a DictExpr.
+type DictEntry struct {
+	commentsRef
+	Key   Expr
+	Colon Position
+	Value Expr
+}
+
+func (x *DictEntry) Span() (start, end Position) {
+	start, _ = x.Key.Span()
+	_, end = x.Value.Span()
+	return start, end
+}
+
+// A LambdaExpr represents an inline function abstraction.
+type LambdaExpr struct {
+	commentsRef
+	Lambda Position
+	Params []Expr // param = ident | ident=expr | * | *ident | **ident
+	Body   Expr
+
+	Function interface{} // a *resolve.Function, set by resolver
+}
+
+func (x *LambdaExpr) Span() (start, end Position) {
+	_, end = x.Body.Span()
+	return x.Lambda, end
+}
+
+// A ListExpr represents a list literal: [ List ].
+type ListExpr struct {
+	commentsRef
+	Lbrack Position
+	List   []Expr
+	Rbrack Position
+}
+
+func (x *ListExpr) Span() (start, end Position) {
+	return x.Lbrack, x.Rbrack.add("]")
+}
+
+// CondExpr represents the conditional: X if COND else ELSE.
+type CondExpr struct {
+	commentsRef
+	If      Position
+	Cond    Expr
+	True    Expr
+	ElsePos Position
+	False   Expr
+}
+
+func (x *CondExpr) Span() (start, end Position) {
+	start, _ = x.True.Span()
+	_, end = x.False.Span()
+	return start, end
+}
+
+// A TupleExpr represents a tuple literal: (List).
+type TupleExpr struct {
+	commentsRef
+	Lparen Position // optional (e.g. in x, y = 0, 1), but required if List is empty
+	List   []Expr
+	Rparen Position
+}
+
+func (x *TupleExpr) Span() (start, end Position) {
+	if x.Lparen.IsValid() {
+		return x.Lparen, x.Rparen
+	} else {
+		return Start(x.List[0]), End(x.List[len(x.List)-1])
+	}
+}
+
+// A UnaryExpr represents a unary expression: Op X.
+//
+// As a special case, UnaryOp{Op:Star} may also represent
+// the star parameter in def f(*args) or def f(*, x).
+type UnaryExpr struct {
+	commentsRef
+	OpPos Position
+	Op    Token
+	X     Expr // may be nil if Op==STAR
+}
+
+func (x *UnaryExpr) Span() (start, end Position) {
+	if x.X != nil {
+		_, end = x.X.Span()
+	} else {
+		end = x.OpPos.add("*")
+	}
+	return x.OpPos, end
+}
+
+// A BinaryExpr represents a binary expression: X Op Y.
+//
+// As a special case, BinaryExpr{Op:EQ} may also
+// represent a named argument in a call f(k=v)
+// or a named parameter in a function declaration
+// def f(param=default).
+type BinaryExpr struct {
+	commentsRef
+	X     Expr
+	OpPos Position
+	Op    Token
+	Y     Expr
+}
+
+func (x *BinaryExpr) Span() (start, end Position) {
+	start, _ = x.X.Span()
+	_, end = x.Y.Span()
+	return start, end
+}
+
+// A SliceExpr represents a slice or substring expression: X[Lo:Hi:Step].
+type SliceExpr struct {
+	commentsRef
+	X            Expr
+	Lbrack       Position
+	Lo, Hi, Step Expr // all optional
+	Rbrack       Position
+}
+
+func (x *SliceExpr) Span() (start, end Position) {
+	start, _ = x.X.Span()
+	return start, x.Rbrack
+}
+
+// An IndexExpr represents an index expression: X[Y].
+type IndexExpr struct {
+	commentsRef
+	X      Expr
+	Lbrack Position
+	Y      Expr
+	Rbrack Position
+}
+
+func (x *IndexExpr) Span() (start, end Position) {
+	start, _ = x.X.Span()
+	return start, x.Rbrack
+}
@@ -0,0 +1,212 @@
+# Tests of parse errors.
+# This is a "chunked" file; each "---" line demarcates a new parser input.
+#
+# TODO(adonovan): lots more tests.
+
+x = 1 +
+2 ### "got newline, want primary expression"
+
+---
+
+_ = *x ### `got '\*', want primary`
+
+---
+# trailing comma is ok
+
+def f(a, ): pass
+def f(*args, ): pass
+def f(**kwargs, ): pass
+
+---
+
+# Parameters are validated later.
+def f(**kwargs, *args, *, b=1, a, **kwargs, *args, *, b=1, a):
+  pass
+
+---
+
+def f(a, *-b, c): # ### `got '-', want ','`
+  pass
+
+---
+
+def f(**kwargs, *args, b=1, a, **kwargs, *args, b=1, a):
+  pass
+
+---
+
+def pass(): ### "not an identifier"
+  pass
+
+---
+
+def f : ### `got ':', want '\('`
+
+---
+# trailing comma is ok
+
+f(a, )
+f(*args, )
+f(**kwargs, )
+
+---
+
+f(a=1, *, b=2) ### `got ',', want primary`
+
+---
+
+_ = {x:y for y in z} # ok
+_ = {x for y in z}   ### `got for, want ':'`
+
+---
+
+def f():
+  pass
+ pass ### `unindent does not match any outer indentation level`
+
+---
+def f(): pass
+---
+# Blank line after pass => outdent.
+def f():
+	pass
+
+---
+# No blank line after pass; EOF acts like a newline.
+def f():
+	pass
+---
+# This is a well known parsing ambiguity in Python.
+# Python 2.7 accepts it but Python3 and Starlark reject it.
+_ = [x for x in lambda: True, lambda: False if x()] ### "got lambda, want primary"
+
+_ = [x for x in (lambda: True, lambda: False) if x()] # ok in all dialects
+
+---
+# Starlark, following Python 3, allows an unparenthesized
+# tuple after 'in' only in a for statement but not in a comprehension.
+# (Python 2.7 allows both.)
+for x in 1, 2, 3:
+      print(x)
+
+_ = [x for x in 1, 2, 3] ### `got ',', want ']', for, or if`
+---
+# Unparenthesized tuple is not allowed as operand of 'if' in comprehension.
+_ = [a for b in c if 1, 2] ### `got ',', want ']', for, or if`
+
+---
+# Lambda is ok though.
+_ = [a for b in c if lambda: d] # ok
+
+# But the body of such a lambda may not be a conditional:
+_ = [a for b in c if (lambda: d if e else f)] # ok
+_ = [a for b in c if lambda: d if e else f]   ### "got else, want ']'"
+
+---
+# A lambda is not allowed as the operand of a 'for' clause.
+_ = [a for b in lambda: c] ### `got lambda, want primary`
+
+---
+# Comparison operations are not associative.
+
+_ = (0 == 1) == 2 # ok
+_ = 0 == (1 == 2) # ok
+_ = 0 == 1 == 2 ### "== does not associate with =="
+
+---
+
+_ = (0 <= i) < n   # ok
+_ = 0 <= (i < n) # ok
+_ = 0 <= i < n ### "<= does not associate with <"
+
+---
+
+_ = (a in b) not in c  # ok
+_ = a in (b not in c)  # ok
+_ = a in b not in c    ### "in does not associate with not in"
+
+---
+# shift/reduce ambiguity is reduced
+_ = [x for x in a if b else c] ### `got else, want ']', for, or if`
+---
+[a for b in c else d] ### `got else, want ']', for, or if`
+---
+_ = a + b not c ### "got identifier, want in"
+---
+f(1+2 = 3) ### "keyword argument must have form name=expr"
+---
+print(1, 2, 3
+### `got end of file, want '\)'`
+---
+_ = a if b ### "conditional expression without else clause"
+---
+load("") ### "load statement must import at least 1 symbol"
+---
+load("", 1) ### `load operand must be "name" or localname="name" \(got int literal\)`
+---
+load("a", "x") # ok
+---
+load(1, 2) ### "first operand of load statement must be a string literal"
+---
+load("a", x) ### `load operand must be "x" or x="originalname"`
+---
+load("a", x2=x) ### `original name of loaded symbol must be quoted: x2="originalname"`
+---
+# All of these parse.
+load("a", "x")
+load("a", "x", y2="y")
+load("a", x2="x", "y") # => positional-before-named arg check happens later (!)
+---
+# 'load' is not an identifier
+load = 1 ### `got '=', want '\('`
+---
+# 'load' is not an identifier
+f(load()) ### `got load, want primary`
+---
+# 'load' is not an identifier
+def load(): ### `not an identifier`
+  pass
+---
+# 'load' is not an identifier
+def f(load): ### `not an identifier`
+  pass
+---
+# A load statement allows a trailing comma.
+load("module", "x",)
+---
+x = 1 +
+2 ### "got newline, want primary expression"
+---
+def f():
+    pass
+# this used to cause a spurious indentation error
+---
+print 1 2 ### `got int literal, want newline`
+
+---
+# newlines are not allowed in raw string literals
+raw = r'a ### `unexpected newline in string`
+b'
+
+---
+# The parser permits an unparenthesized tuple expression for the first index.
+x[1, 2:] # ok
+---
+# But not if it has a trailing comma.
+x[1, 2,:] ### `got ':', want primary`
+---
+# Trailing tuple commas are permitted only within parens; see b/28867036.
+(a, b,) = 1, 2 # ok
+c, d = 1, 2 # ok
+---
+a, b, = 1, 2 ### `unparenthesized tuple with trailing comma`
+---
+a, b = 1, 2, ### `unparenthesized tuple with trailing comma`
+
+---
+# See github.com/google/starlark-go/issues/48
+a = max(range(10))) ### `unexpected '\)'`
+
+---
+# github.com/google/starlark-go/issues/85
+s = "\x-0" ### `invalid escape sequence`
@@ -0,0 +1,161 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+// Walk traverses a syntax tree in depth-first order.
+// It starts by calling f(n); n must not be nil.
+// If f returns true, Walk calls itself
+// recursively for each non-nil child of n.
+// Walk then calls f(nil).
+func Walk(n Node, f func(Node) bool) {
+	if n == nil {
+		panic("nil")
+	}
+	if !f(n) {
+		return
+	}
+
+	// TODO(adonovan): opt: order cases using profile data.
+	switch n := n.(type) {
+	case *File:
+		walkStmts(n.Stmts, f)
+
+	case *ExprStmt:
+		Walk(n.X, f)
+
+	case *BranchStmt:
+		// no-op
+
+	case *IfStmt:
+		Walk(n.Cond, f)
+		walkStmts(n.True, f)
+		walkStmts(n.False, f)
+
+	case *AssignStmt:
+		Walk(n.LHS, f)
+		Walk(n.RHS, f)
+
+	case *DefStmt:
+		Walk(n.Name, f)
+		for _, param := range n.Params {
+			Walk(param, f)
+		}
+		walkStmts(n.Body, f)
+
+	case *ForStmt:
+		Walk(n.Vars, f)
+		Walk(n.X, f)
+		walkStmts(n.Body, f)
+
+	case *ReturnStmt:
+		if n.Result != nil {
+			Walk(n.Result, f)
+		}
+
+	case *LoadStmt:
+		Walk(n.Module, f)
+		for _, from := range n.From {
+			Walk(from, f)
+		}
+		for _, to := range n.To {
+			Walk(to, f)
+		}
+
+	case *Ident, *Literal:
+		// no-op
+
+	case *ListExpr:
+		for _, x := range n.List {
+			Walk(x, f)
+		}
+
+	case *ParenExpr:
+		Walk(n.X, f)
+
+	case *CondExpr:
+		Walk(n.Cond, f)
+		Walk(n.True, f)
+		Walk(n.False, f)
+
+	case *IndexExpr:
+		Walk(n.X, f)
+		Walk(n.Y, f)
+
+	case *DictEntry:
+		Walk(n.Key, f)
+		Walk(n.Value, f)
+
+	case *SliceExpr:
+		Walk(n.X, f)
+		if n.Lo != nil {
+			Walk(n.Lo, f)
+		}
+		if n.Hi != nil {
+			Walk(n.Hi, f)
+		}
+		if n.Step != nil {
+			Walk(n.Step, f)
+		}
+
+	case *Comprehension:
+		Walk(n.Body, f)
+		for _, clause := range n.Clauses {
+			Walk(clause, f)
+		}
+
+	case *IfClause:
+		Walk(n.Cond, f)
+
+	case *ForClause:
+		Walk(n.Vars, f)
+		Walk(n.X, f)
+
+	case *TupleExpr:
+		for _, x := range n.List {
+			Walk(x, f)
+		}
+
+	case *DictExpr:
+		for _, entry := range n.List {
+			Walk(entry, f)
+		}
+
+	case *UnaryExpr:
+		if n.X != nil {
+			Walk(n.X, f)
+		}
+
+	case *BinaryExpr:
+		Walk(n.X, f)
+		Walk(n.Y, f)
+
+	case *DotExpr:
+		Walk(n.X, f)
+		Walk(n.Name, f)
+
+	case *CallExpr:
+		Walk(n.Fn, f)
+		for _, arg := range n.Args {
+			Walk(arg, f)
+		}
+
+	case *LambdaExpr:
+		for _, param := range n.Params {
+			Walk(param, f)
+		}
+		Walk(n.Body, f)
+
+	default:
+		panic(n)
+	}
+
+	f(nil)
+}
+
+func walkStmts(stmts []Stmt, f func(Node) bool) {
+	for _, stmt := range stmts {
+		Walk(stmt, f)
+	}
+}
@@ -0,0 +1,103 @@
+package syntax_test
+
+import (
+	"bytes"
+	"fmt"
+	"log"
+	"reflect"
+	"strings"
+	"testing"
+
+	"go.starlark.net/syntax"
+)
+
+func TestWalk(t *testing.T) {
+	const src = `
+for x in y:
+  if x:
+    pass
+  else:
+    f([2*x for x in "abc"])
+`
+	// TODO(adonovan): test that it finds all syntax.Nodes
+	// (compare against a reflect-based implementation).
+	// TODO(adonovan): test that the result of f is used to prune
+	// the descent.
+	f, err := syntax.Parse("hello.go", src, 0)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	var buf bytes.Buffer
+	var depth int
+	syntax.Walk(f, func(n syntax.Node) bool {
+		if n == nil {
+			depth--
+			return true
+		}
+		fmt.Fprintf(&buf, "%s%s\n",
+			strings.Repeat("  ", depth),
+			strings.TrimPrefix(reflect.TypeOf(n).String(), "*syntax."))
+		depth++
+		return true
+	})
+	got := buf.String()
+	want := `
+File
+  ForStmt
+    Ident
+    Ident
+    IfStmt
+      Ident
+      BranchStmt
+      ExprStmt
+        CallExpr
+          Ident
+          Comprehension
+            BinaryExpr
+              Literal
+              Ident
+            ForClause
+              Ident
+              Literal`
+	got = strings.TrimSpace(got)
+	want = strings.TrimSpace(want)
+	if got != want {
+		t.Errorf("got %s, want %s", got, want)
+	}
+}
+
+// ExampleWalk demonstrates the use of Walk to
+// enumerate the identifiers in a Starlark source file
+// containing a nonsense program with varied grammar.
+func ExampleWalk() {
+	const src = `
+load("library", "a")
+
+def b(c, *, d=e):
+    f += {g: h}
+    i = -(j)
+    return k.l[m + n]
+
+for o in [p for q, r in s if t]:
+    u(lambda: v, w[x:y:z])
+`
+	f, err := syntax.Parse("hello.star", src, 0)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	var idents []string
+	syntax.Walk(f, func(n syntax.Node) bool {
+		if id, ok := n.(*syntax.Ident); ok {
+			idents = append(idents, id.Name)
+		}
+		return true
+	})
+	fmt.Println(strings.Join(idents, " "))
+
+	// The identifier 'a' appears in both LoadStmt.From[0] and LoadStmt.To[0].
+
+	// Output:
+	// a a b c d e f g h i j k l m n o p q r s t u v w x y z
+}