whatcanGOwrong

This commit is contained in:
2024-09-19 21:38:24 -04:00
commit d0ae4d841d
17908 changed files with 4096831 additions and 0 deletions
@@ -0,0 +1,176 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package diff computes differences between text files or strings.
package diff
import (
"fmt"
"sort"
"strings"
)
// An Edit describes the replacement of a portion of a text file.
type Edit struct {
Start, End int // byte offsets of the region to replace
New string // the replacement
}
func (e Edit) String() string {
return fmt.Sprintf("{Start:%d,End:%d,New:%q}", e.Start, e.End, e.New)
}
// Apply applies a sequence of edits to the src buffer and returns the
// result. Edits are applied in order of start offset; edits with the
// same start offset are applied in they order they were provided.
//
// Apply returns an error if any edit is out of bounds,
// or if any pair of edits is overlapping.
func Apply(src string, edits []Edit) (string, error) {
edits, size, err := validate(src, edits)
if err != nil {
return "", err
}
// Apply edits.
out := make([]byte, 0, size)
lastEnd := 0
for _, edit := range edits {
if lastEnd < edit.Start {
out = append(out, src[lastEnd:edit.Start]...)
}
out = append(out, edit.New...)
lastEnd = edit.End
}
out = append(out, src[lastEnd:]...)
if len(out) != size {
panic("wrong size")
}
return string(out), nil
}
// ApplyBytes is like Apply, but it accepts a byte slice.
// The result is always a new array.
func ApplyBytes(src []byte, edits []Edit) ([]byte, error) {
res, err := Apply(string(src), edits)
return []byte(res), err
}
// validate checks that edits are consistent with src,
// and returns the size of the patched output.
// It may return a different slice.
func validate(src string, edits []Edit) ([]Edit, int, error) {
if !sort.IsSorted(editsSort(edits)) {
edits = append([]Edit(nil), edits...)
SortEdits(edits)
}
// Check validity of edits and compute final size.
size := len(src)
lastEnd := 0
for _, edit := range edits {
if !(0 <= edit.Start && edit.Start <= edit.End && edit.End <= len(src)) {
return nil, 0, fmt.Errorf("diff has out-of-bounds edits")
}
if edit.Start < lastEnd {
return nil, 0, fmt.Errorf("diff has overlapping edits")
}
size += len(edit.New) + edit.Start - edit.End
lastEnd = edit.End
}
return edits, size, nil
}
// SortEdits orders a slice of Edits by (start, end) offset.
// This ordering puts insertions (end = start) before deletions
// (end > start) at the same point, but uses a stable sort to preserve
// the order of multiple insertions at the same point.
// (Apply detects multiple deletions at the same point as an error.)
func SortEdits(edits []Edit) {
sort.Stable(editsSort(edits))
}
type editsSort []Edit
func (a editsSort) Len() int { return len(a) }
func (a editsSort) Less(i, j int) bool {
if cmp := a[i].Start - a[j].Start; cmp != 0 {
return cmp < 0
}
return a[i].End < a[j].End
}
func (a editsSort) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
// lineEdits expands and merges a sequence of edits so that each
// resulting edit replaces one or more complete lines.
// See ApplyEdits for preconditions.
func lineEdits(src string, edits []Edit) ([]Edit, error) {
edits, _, err := validate(src, edits)
if err != nil {
return nil, err
}
// Do all deletions begin and end at the start of a line,
// and all insertions end with a newline?
// (This is merely a fast path.)
for _, edit := range edits {
if edit.Start >= len(src) || // insertion at EOF
edit.Start > 0 && src[edit.Start-1] != '\n' || // not at line start
edit.End > 0 && src[edit.End-1] != '\n' || // not at line start
edit.New != "" && edit.New[len(edit.New)-1] != '\n' { // partial insert
goto expand // slow path
}
}
return edits, nil // aligned
expand:
if len(edits) == 0 {
return edits, nil // no edits (unreachable due to fast path)
}
expanded := make([]Edit, 0, len(edits)) // a guess
prev := edits[0]
// TODO(adonovan): opt: start from the first misaligned edit.
// TODO(adonovan): opt: avoid quadratic cost of string += string.
for _, edit := range edits[1:] {
between := src[prev.End:edit.Start]
if !strings.Contains(between, "\n") {
// overlapping lines: combine with previous edit.
prev.New += between + edit.New
prev.End = edit.End
} else {
// non-overlapping lines: flush previous edit.
expanded = append(expanded, expandEdit(prev, src))
prev = edit
}
}
return append(expanded, expandEdit(prev, src)), nil // flush final edit
}
// expandEdit returns edit expanded to complete whole lines.
func expandEdit(edit Edit, src string) Edit {
// Expand start left to start of line.
// (delta is the zero-based column number of start.)
start := edit.Start
if delta := start - 1 - strings.LastIndex(src[:start], "\n"); delta > 0 {
edit.Start -= delta
edit.New = src[start-delta:start] + edit.New
}
// Expand end right to end of line.
end := edit.End
if end > 0 && src[end-1] != '\n' ||
edit.New != "" && edit.New[len(edit.New)-1] != '\n' {
if nl := strings.IndexByte(src[end:], '\n'); nl < 0 {
edit.End = len(src) // extend to EOF
} else {
edit.End = end + nl + 1 // extend beyond \n
}
}
edit.New += src[end:edit.End]
return edit
}
@@ -0,0 +1,207 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package diff_test
import (
"bytes"
"math/rand"
"os"
"os/exec"
"path/filepath"
"reflect"
"strings"
"testing"
"unicode/utf8"
"golang.org/x/tools/internal/diff"
"golang.org/x/tools/internal/diff/difftest"
"golang.org/x/tools/internal/testenv"
)
func TestApply(t *testing.T) {
for _, tc := range difftest.TestCases {
t.Run(tc.Name, func(t *testing.T) {
got, err := diff.Apply(tc.In, tc.Edits)
if err != nil {
t.Fatalf("Apply(Edits) failed: %v", err)
}
if got != tc.Out {
t.Errorf("Apply(Edits): got %q, want %q", got, tc.Out)
}
if tc.LineEdits != nil {
got, err := diff.Apply(tc.In, tc.LineEdits)
if err != nil {
t.Fatalf("Apply(LineEdits) failed: %v", err)
}
if got != tc.Out {
t.Errorf("Apply(LineEdits): got %q, want %q", got, tc.Out)
}
}
})
}
}
func TestNEdits(t *testing.T) {
for _, tc := range difftest.TestCases {
edits := diff.Strings(tc.In, tc.Out)
got, err := diff.Apply(tc.In, edits)
if err != nil {
t.Fatalf("Apply failed: %v", err)
}
if got != tc.Out {
t.Fatalf("%s: got %q wanted %q", tc.Name, got, tc.Out)
}
if len(edits) < len(tc.Edits) { // should find subline edits
t.Errorf("got %v, expected %v for %#v", edits, tc.Edits, tc)
}
}
}
func TestNRandom(t *testing.T) {
rand.Seed(1)
for i := 0; i < 1000; i++ {
a := randstr("abω", 16)
b := randstr("abωc", 16)
edits := diff.Strings(a, b)
got, err := diff.Apply(a, edits)
if err != nil {
t.Fatalf("Apply failed: %v", err)
}
if got != b {
t.Fatalf("%d: got %q, wanted %q, starting with %q", i, got, b, a)
}
}
}
// $ go test -fuzz=FuzzRoundTrip ./internal/diff
func FuzzRoundTrip(f *testing.F) {
f.Fuzz(func(t *testing.T, a, b string) {
if !utf8.ValidString(a) || !utf8.ValidString(b) {
return // inputs must be text
}
edits := diff.Strings(a, b)
got, err := diff.Apply(a, edits)
if err != nil {
t.Fatalf("Apply failed: %v", err)
}
if got != b {
t.Fatalf("applying diff(%q, %q) gives %q; edits=%v", a, b, got, edits)
}
})
}
func TestLineEdits(t *testing.T) {
for _, tc := range difftest.TestCases {
t.Run(tc.Name, func(t *testing.T) {
want := tc.LineEdits
if want == nil {
want = tc.Edits // already line-aligned
}
got, err := diff.LineEdits(tc.In, tc.Edits)
if err != nil {
t.Fatalf("LineEdits: %v", err)
}
if !reflect.DeepEqual(got, want) {
t.Errorf("in=<<%s>>\nout=<<%s>>\nraw edits=%s\nline edits=%s\nwant: %s",
tc.In, tc.Out, tc.Edits, got, want)
}
// make sure that applying the edits gives the expected result
fixed, err := diff.Apply(tc.In, got)
if err != nil {
t.Error(err)
}
if fixed != tc.Out {
t.Errorf("Apply(LineEdits): got %q, want %q", fixed, tc.Out)
}
})
}
}
func TestToUnified(t *testing.T) {
testenv.NeedsTool(t, "patch")
for _, tc := range difftest.TestCases {
t.Run(tc.Name, func(t *testing.T) {
unified, err := diff.ToUnified(difftest.FileA, difftest.FileB, tc.In, tc.Edits, diff.DefaultContextLines)
if err != nil {
t.Fatal(err)
}
if unified == "" {
return
}
orig := filepath.Join(t.TempDir(), "original")
err = os.WriteFile(orig, []byte(tc.In), 0644)
if err != nil {
t.Fatal(err)
}
temp := filepath.Join(t.TempDir(), "patched")
err = os.WriteFile(temp, []byte(tc.In), 0644)
if err != nil {
t.Fatal(err)
}
cmd := exec.Command("patch", "-p0", "-u", "-s", "-o", temp, orig)
cmd.Stdin = strings.NewReader(unified)
cmd.Stdout = new(bytes.Buffer)
cmd.Stderr = new(bytes.Buffer)
if err = cmd.Run(); err != nil {
t.Fatalf("%v: %q (%q) (%q)", err, cmd.String(),
cmd.Stderr, cmd.Stdout)
}
got, err := os.ReadFile(temp)
if err != nil {
t.Fatal(err)
}
if string(got) != tc.Out {
t.Errorf("applying unified failed: got\n%q, wanted\n%q unified\n%q",
got, tc.Out, unified)
}
})
}
}
func TestRegressionOld001(t *testing.T) {
a := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage diff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"golang.org/x/tools/gopls/internal/lsp/diff\"\n\t\"golang.org/x/tools/internal/diff/difftest\"\n\t\"golang.org/x/tools/gopls/internal/span\"\n)\n"
b := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage diff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"github.com/google/safehtml/template\"\n\t\"golang.org/x/tools/gopls/internal/lsp/diff\"\n\t\"golang.org/x/tools/internal/diff/difftest\"\n\t\"golang.org/x/tools/gopls/internal/span\"\n)\n"
diffs := diff.Strings(a, b)
got, err := diff.Apply(a, diffs)
if err != nil {
t.Fatalf("Apply failed: %v", err)
}
if got != b {
i := 0
for ; i < len(a) && i < len(b) && got[i] == b[i]; i++ {
}
t.Errorf("oops %vd\n%q\n%q", diffs, got, b)
t.Errorf("\n%q\n%q", got[i:], b[i:])
}
}
func TestRegressionOld002(t *testing.T) {
a := "n\"\n)\n"
b := "n\"\n\t\"golang.org/x//nnal/stack\"\n)\n"
diffs := diff.Strings(a, b)
got, err := diff.Apply(a, diffs)
if err != nil {
t.Fatalf("Apply failed: %v", err)
}
if got != b {
i := 0
for ; i < len(a) && i < len(b) && got[i] == b[i]; i++ {
}
t.Errorf("oops %vd\n%q\n%q", diffs, got, b)
t.Errorf("\n%q\n%q", got[i:], b[i:])
}
}
// return a random string of length n made of characters from s
func randstr(s string, n int) string {
src := []rune(s)
x := make([]rune, n)
for i := 0; i < n; i++ {
x[i] = src[rand.Intn(len(src))]
}
return string(x)
}
@@ -0,0 +1,324 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package difftest supplies a set of tests that will operate on any
// implementation of a diff algorithm as exposed by
// "golang.org/x/tools/internal/diff"
package difftest
// There are two kinds of tests, semantic tests, and 'golden data' tests.
// The semantic tests check that the computed diffs transform the input to
// the output, and that 'patch' accepts the computed unified diffs.
// The other tests just check that Edits and LineEdits haven't changed
// unexpectedly. These fields may need to be changed when the diff algorithm
// changes.
import (
"testing"
"golang.org/x/tools/internal/diff"
)
const (
FileA = "from"
FileB = "to"
UnifiedPrefix = "--- " + FileA + "\n+++ " + FileB + "\n"
)
var TestCases = []struct {
Name, In, Out, Unified string
Edits, LineEdits []diff.Edit // expectation (LineEdits=nil => already line-aligned)
NoDiff bool
}{{
Name: "empty",
In: "",
Out: "",
}, {
Name: "no_diff",
In: "gargantuan\n",
Out: "gargantuan\n",
}, {
Name: "replace_all",
In: "fruit\n",
Out: "cheese\n",
Unified: UnifiedPrefix + `
@@ -1 +1 @@
-fruit
+cheese
`[1:],
Edits: []diff.Edit{{Start: 0, End: 5, New: "cheese"}},
LineEdits: []diff.Edit{{Start: 0, End: 6, New: "cheese\n"}},
}, {
Name: "insert_rune",
In: "gord\n",
Out: "gourd\n",
Unified: UnifiedPrefix + `
@@ -1 +1 @@
-gord
+gourd
`[1:],
Edits: []diff.Edit{{Start: 2, End: 2, New: "u"}},
LineEdits: []diff.Edit{{Start: 0, End: 5, New: "gourd\n"}},
}, {
Name: "delete_rune",
In: "groat\n",
Out: "goat\n",
Unified: UnifiedPrefix + `
@@ -1 +1 @@
-groat
+goat
`[1:],
Edits: []diff.Edit{{Start: 1, End: 2, New: ""}},
LineEdits: []diff.Edit{{Start: 0, End: 6, New: "goat\n"}},
}, {
Name: "replace_rune",
In: "loud\n",
Out: "lord\n",
Unified: UnifiedPrefix + `
@@ -1 +1 @@
-loud
+lord
`[1:],
Edits: []diff.Edit{{Start: 2, End: 3, New: "r"}},
LineEdits: []diff.Edit{{Start: 0, End: 5, New: "lord\n"}},
}, {
Name: "replace_partials",
In: "blanket\n",
Out: "bunker\n",
Unified: UnifiedPrefix + `
@@ -1 +1 @@
-blanket
+bunker
`[1:],
Edits: []diff.Edit{
{Start: 1, End: 3, New: "u"},
{Start: 6, End: 7, New: "r"},
},
LineEdits: []diff.Edit{{Start: 0, End: 8, New: "bunker\n"}},
}, {
Name: "insert_line",
In: "1: one\n3: three\n",
Out: "1: one\n2: two\n3: three\n",
Unified: UnifiedPrefix + `
@@ -1,2 +1,3 @@
1: one
+2: two
3: three
`[1:],
Edits: []diff.Edit{{Start: 7, End: 7, New: "2: two\n"}},
}, {
Name: "replace_no_newline",
In: "A",
Out: "B",
Unified: UnifiedPrefix + `
@@ -1 +1 @@
-A
\ No newline at end of file
+B
\ No newline at end of file
`[1:],
Edits: []diff.Edit{{Start: 0, End: 1, New: "B"}},
}, {
Name: "delete_empty",
In: "meow",
Out: "", // GNU diff -u special case: +0,0
Unified: UnifiedPrefix + `
@@ -1 +0,0 @@
-meow
\ No newline at end of file
`[1:],
Edits: []diff.Edit{{Start: 0, End: 4, New: ""}},
LineEdits: []diff.Edit{{Start: 0, End: 4, New: ""}},
}, {
Name: "append_empty",
In: "", // GNU diff -u special case: -0,0
Out: "AB\nC",
Unified: UnifiedPrefix + `
@@ -0,0 +1,2 @@
+AB
+C
\ No newline at end of file
`[1:],
Edits: []diff.Edit{{Start: 0, End: 0, New: "AB\nC"}},
LineEdits: []diff.Edit{{Start: 0, End: 0, New: "AB\nC"}},
},
// TODO(adonovan): fix this test: GNU diff -u prints "+1,2", Unifies prints "+1,3".
// {
// Name: "add_start",
// In: "A",
// Out: "B\nCA",
// Unified: UnifiedPrefix + `
// @@ -1 +1,2 @@
// -A
// \ No newline at end of file
// +B
// +CA
// \ No newline at end of file
// `[1:],
// Edits: []diff.TextEdit{{Span: newSpan(0, 0), NewText: "B\nC"}},
// LineEdits: []diff.TextEdit{{Span: newSpan(0, 0), NewText: "B\nC"}},
// },
{
Name: "add_end",
In: "A",
Out: "AB",
Unified: UnifiedPrefix + `
@@ -1 +1 @@
-A
\ No newline at end of file
+AB
\ No newline at end of file
`[1:],
Edits: []diff.Edit{{Start: 1, End: 1, New: "B"}},
LineEdits: []diff.Edit{{Start: 0, End: 1, New: "AB"}},
}, {
Name: "add_empty",
In: "",
Out: "AB\nC",
Unified: UnifiedPrefix + `
@@ -0,0 +1,2 @@
+AB
+C
\ No newline at end of file
`[1:],
Edits: []diff.Edit{{Start: 0, End: 0, New: "AB\nC"}},
LineEdits: []diff.Edit{{Start: 0, End: 0, New: "AB\nC"}},
}, {
Name: "add_newline",
In: "A",
Out: "A\n",
Unified: UnifiedPrefix + `
@@ -1 +1 @@
-A
\ No newline at end of file
+A
`[1:],
Edits: []diff.Edit{{Start: 1, End: 1, New: "\n"}},
LineEdits: []diff.Edit{{Start: 0, End: 1, New: "A\n"}},
}, {
Name: "delete_front",
In: "A\nB\nC\nA\nB\nB\nA\n",
Out: "C\nB\nA\nB\nA\nC\n",
Unified: UnifiedPrefix + `
@@ -1,7 +1,6 @@
-A
-B
C
+B
A
B
-B
A
+C
`[1:],
NoDiff: true, // unified diff is different but valid
Edits: []diff.Edit{
{Start: 0, End: 4, New: ""},
{Start: 6, End: 6, New: "B\n"},
{Start: 10, End: 12, New: ""},
{Start: 14, End: 14, New: "C\n"},
},
LineEdits: []diff.Edit{
{Start: 0, End: 4, New: ""},
{Start: 6, End: 6, New: "B\n"},
{Start: 10, End: 12, New: ""},
{Start: 14, End: 14, New: "C\n"},
},
}, {
Name: "replace_last_line",
In: "A\nB\n",
Out: "A\nC\n\n",
Unified: UnifiedPrefix + `
@@ -1,2 +1,3 @@
A
-B
+C
+
`[1:],
Edits: []diff.Edit{{Start: 2, End: 3, New: "C\n"}},
LineEdits: []diff.Edit{{Start: 2, End: 4, New: "C\n\n"}},
},
{
Name: "multiple_replace",
In: "A\nB\nC\nD\nE\nF\nG\n",
Out: "A\nH\nI\nJ\nE\nF\nK\n",
Unified: UnifiedPrefix + `
@@ -1,7 +1,7 @@
A
-B
-C
-D
+H
+I
+J
E
F
-G
+K
`[1:],
Edits: []diff.Edit{
{Start: 2, End: 8, New: "H\nI\nJ\n"},
{Start: 12, End: 14, New: "K\n"},
},
NoDiff: true, // diff algorithm produces different delete/insert pattern
},
{
Name: "extra_newline",
In: "\nA\n",
Out: "A\n",
Edits: []diff.Edit{{Start: 0, End: 1, New: ""}},
Unified: UnifiedPrefix + `@@ -1,2 +1 @@
-
A
`,
}, {
Name: "unified_lines",
In: "aaa\nccc\n",
Out: "aaa\nbbb\nccc\n",
Edits: []diff.Edit{{Start: 3, End: 3, New: "\nbbb"}},
LineEdits: []diff.Edit{{Start: 0, End: 4, New: "aaa\nbbb\n"}},
Unified: UnifiedPrefix + "@@ -1,2 +1,3 @@\n aaa\n+bbb\n ccc\n",
}, {
Name: "60379",
In: `package a
type S struct {
s fmt.Stringer
}
`,
Out: `package a
type S struct {
s fmt.Stringer
}
`,
Edits: []diff.Edit{{Start: 27, End: 27, New: "\t"}},
LineEdits: []diff.Edit{{Start: 27, End: 42, New: "\ts fmt.Stringer\n"}},
Unified: UnifiedPrefix + "@@ -1,5 +1,5 @@\n package a\n \n type S struct {\n-s fmt.Stringer\n+\ts fmt.Stringer\n }\n",
},
}
func DiffTest(t *testing.T, compute func(before, after string) []diff.Edit) {
for _, test := range TestCases {
t.Run(test.Name, func(t *testing.T) {
edits := compute(test.In, test.Out)
got, err := diff.Apply(test.In, edits)
if err != nil {
t.Fatalf("Apply failed: %v", err)
}
unified, err := diff.ToUnified(FileA, FileB, test.In, edits, diff.DefaultContextLines)
if err != nil {
t.Fatalf("ToUnified: %v", err)
}
if got != test.Out {
t.Errorf("Apply: got patched:\n%v\nfrom diff:\n%v\nexpected:\n%v",
got, unified, test.Out)
}
if !test.NoDiff && unified != test.Unified {
t.Errorf("Unified: got diff:\n%q\nexpected:\n%q diffs:%v",
unified, test.Unified, edits)
}
})
}
}
@@ -0,0 +1,88 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package difftest supplies a set of tests that will operate on any
// implementation of a diff algorithm as exposed by
// "golang.org/x/tools/internal/diff"
package difftest_test
import (
"fmt"
"os"
"os/exec"
"strings"
"testing"
"golang.org/x/tools/internal/diff/difftest"
"golang.org/x/tools/internal/testenv"
)
func TestVerifyUnified(t *testing.T) {
testenv.NeedsTool(t, "diff")
for _, test := range difftest.TestCases {
t.Run(test.Name, func(t *testing.T) {
if test.NoDiff {
t.Skip("diff tool produces expected different results")
}
diff, err := getDiffOutput(test.In, test.Out)
if err != nil {
t.Fatal(err)
}
if len(diff) > 0 {
diff = difftest.UnifiedPrefix + diff
}
if diff != test.Unified {
t.Errorf("unified:\n%s\ndiff -u:\n%s", test.Unified, diff)
}
})
}
}
func getDiffOutput(a, b string) (string, error) {
fileA, err := os.CreateTemp("", "myers.in")
if err != nil {
return "", err
}
defer os.Remove(fileA.Name())
if _, err := fileA.Write([]byte(a)); err != nil {
return "", err
}
if err := fileA.Close(); err != nil {
return "", err
}
fileB, err := os.CreateTemp("", "myers.in")
if err != nil {
return "", err
}
defer os.Remove(fileB.Name())
if _, err := fileB.Write([]byte(b)); err != nil {
return "", err
}
if err := fileB.Close(); err != nil {
return "", err
}
cmd := exec.Command("diff", "-u", fileA.Name(), fileB.Name())
cmd.Env = append(cmd.Env, "LANG=en_US.UTF-8")
out, err := cmd.Output()
if err != nil {
exit, ok := err.(*exec.ExitError)
if !ok {
return "", fmt.Errorf("can't exec %s: %v", cmd, err)
}
if len(out) == 0 {
// Nonzero exit with no output: terminated by signal?
return "", fmt.Errorf("%s failed: %v; stderr:\n%s", cmd, err, exit.Stderr)
}
// nonzero exit + output => files differ
}
diff := string(out)
if len(diff) <= 0 {
return diff, nil
}
bits := strings.SplitN(diff, "\n", 3)
if len(bits) != 3 {
return "", fmt.Errorf("diff output did not have file prefix:\n%s", diff)
}
return bits[2], nil
}
@@ -0,0 +1,9 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package diff
// This file exports some private declarations to tests.
var LineEdits = lineEdits
@@ -0,0 +1,179 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lcs
import (
"log"
"sort"
)
// lcs is a longest common sequence
type lcs []diag
// A diag is a piece of the edit graph where A[X+i] == B[Y+i], for 0<=i<Len.
// All computed diagonals are parts of a longest common subsequence.
type diag struct {
X, Y int
Len int
}
// sort sorts in place, by lowest X, and if tied, inversely by Len
func (l lcs) sort() lcs {
sort.Slice(l, func(i, j int) bool {
if l[i].X != l[j].X {
return l[i].X < l[j].X
}
return l[i].Len > l[j].Len
})
return l
}
// validate that the elements of the lcs do not overlap
// (can only happen when the two-sided algorithm ends early)
// expects the lcs to be sorted
func (l lcs) valid() bool {
for i := 1; i < len(l); i++ {
if l[i-1].X+l[i-1].Len > l[i].X {
return false
}
if l[i-1].Y+l[i-1].Len > l[i].Y {
return false
}
}
return true
}
// repair overlapping lcs
// only called if two-sided stops early
func (l lcs) fix() lcs {
// from the set of diagonals in l, find a maximal non-conflicting set
// this problem may be NP-complete, but we use a greedy heuristic,
// which is quadratic, but with a better data structure, could be D log D.
// indepedent is not enough: {0,3,1} and {3,0,2} can't both occur in an lcs
// which has to have monotone x and y
if len(l) == 0 {
return nil
}
sort.Slice(l, func(i, j int) bool { return l[i].Len > l[j].Len })
tmp := make(lcs, 0, len(l))
tmp = append(tmp, l[0])
for i := 1; i < len(l); i++ {
var dir direction
nxt := l[i]
for _, in := range tmp {
if dir, nxt = overlap(in, nxt); dir == empty || dir == bad {
break
}
}
if nxt.Len > 0 && dir != bad {
tmp = append(tmp, nxt)
}
}
tmp.sort()
if false && !tmp.valid() { // debug checking
log.Fatalf("here %d", len(tmp))
}
return tmp
}
type direction int
const (
empty direction = iota // diag is empty (so not in lcs)
leftdown // proposed acceptably to the left and below
rightup // proposed diag is acceptably to the right and above
bad // proposed diag is inconsistent with the lcs so far
)
// overlap trims the proposed diag prop so it doesn't overlap with
// the existing diag that has already been added to the lcs.
func overlap(exist, prop diag) (direction, diag) {
if prop.X <= exist.X && exist.X < prop.X+prop.Len {
// remove the end of prop where it overlaps with the X end of exist
delta := prop.X + prop.Len - exist.X
prop.Len -= delta
if prop.Len <= 0 {
return empty, prop
}
}
if exist.X <= prop.X && prop.X < exist.X+exist.Len {
// remove the beginning of prop where overlaps with exist
delta := exist.X + exist.Len - prop.X
prop.Len -= delta
if prop.Len <= 0 {
return empty, prop
}
prop.X += delta
prop.Y += delta
}
if prop.Y <= exist.Y && exist.Y < prop.Y+prop.Len {
// remove the end of prop that overlaps (in Y) with exist
delta := prop.Y + prop.Len - exist.Y
prop.Len -= delta
if prop.Len <= 0 {
return empty, prop
}
}
if exist.Y <= prop.Y && prop.Y < exist.Y+exist.Len {
// remove the beginning of peop that overlaps with exist
delta := exist.Y + exist.Len - prop.Y
prop.Len -= delta
if prop.Len <= 0 {
return empty, prop
}
prop.X += delta // no test reaches this code
prop.Y += delta
}
if prop.X+prop.Len <= exist.X && prop.Y+prop.Len <= exist.Y {
return leftdown, prop
}
if exist.X+exist.Len <= prop.X && exist.Y+exist.Len <= prop.Y {
return rightup, prop
}
// prop can't be in an lcs that contains exist
return bad, prop
}
// manipulating Diag and lcs
// prepend a diagonal (x,y)-(x+1,y+1) segment either to an empty lcs
// or to its first Diag. prepend is only called to extend diagonals
// the backward direction.
func (lcs lcs) prepend(x, y int) lcs {
if len(lcs) > 0 {
d := &lcs[0]
if int(d.X) == x+1 && int(d.Y) == y+1 {
// extend the diagonal down and to the left
d.X, d.Y = int(x), int(y)
d.Len++
return lcs
}
}
r := diag{X: int(x), Y: int(y), Len: 1}
lcs = append([]diag{r}, lcs...)
return lcs
}
// append appends a diagonal, or extends the existing one.
// by adding the edge (x,y)-(x+1.y+1). append is only called
// to extend diagonals in the forward direction.
func (lcs lcs) append(x, y int) lcs {
if len(lcs) > 0 {
last := &lcs[len(lcs)-1]
// Expand last element if adjoining.
if last.X+last.Len == x && last.Y+last.Len == y {
last.Len++
return lcs
}
}
return append(lcs, diag{X: x, Y: y, Len: 1})
}
// enforce constraint on d, k
func ok(d, k int) bool {
return d >= 0 && -d <= k && k <= d
}
@@ -0,0 +1,140 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lcs
import (
"log"
"math/rand"
"strings"
"testing"
)
type Btest struct {
a, b string
lcs []string
}
var Btests = []Btest{
{"aaabab", "abaab", []string{"abab", "aaab"}},
{"aabbba", "baaba", []string{"aaba"}},
{"cabbx", "cbabx", []string{"cabx", "cbbx"}},
{"c", "cb", []string{"c"}},
{"aaba", "bbb", []string{"b"}},
{"bbaabb", "b", []string{"b"}},
{"baaabb", "bbaba", []string{"bbb", "baa", "bab"}},
{"baaabb", "abbab", []string{"abb", "bab", "aab"}},
{"baaba", "aaabba", []string{"aaba"}},
{"ca", "cba", []string{"ca"}},
{"ccbcbc", "abba", []string{"bb"}},
{"ccbcbc", "aabba", []string{"bb"}},
{"ccb", "cba", []string{"cb"}},
{"caef", "axe", []string{"ae"}},
{"bbaabb", "baabb", []string{"baabb"}},
// Example from Myers:
{"abcabba", "cbabac", []string{"caba", "baba", "cbba"}},
{"3456aaa", "aaa", []string{"aaa"}},
{"aaa", "aaa123", []string{"aaa"}},
{"aabaa", "aacaa", []string{"aaaa"}},
{"1a", "a", []string{"a"}},
{"abab", "bb", []string{"bb"}},
{"123", "ab", []string{""}},
{"a", "b", []string{""}},
{"abc", "123", []string{""}},
{"aa", "aa", []string{"aa"}},
{"abcde", "12345", []string{""}},
{"aaa3456", "aaa", []string{"aaa"}},
{"abcde", "12345a", []string{"a"}},
{"ab", "123", []string{""}},
{"1a2", "a", []string{"a"}},
// for two-sided
{"babaab", "cccaba", []string{"aba"}},
{"aabbab", "cbcabc", []string{"bab"}},
{"abaabb", "bcacab", []string{"baab"}},
{"abaabb", "abaaaa", []string{"abaa"}},
{"bababb", "baaabb", []string{"baabb"}},
{"abbbaa", "cabacc", []string{"aba"}},
{"aabbaa", "aacaba", []string{"aaaa", "aaba"}},
}
func init() {
log.SetFlags(log.Lshortfile)
}
func check(t *testing.T, str string, lcs lcs, want []string) {
t.Helper()
if !lcs.valid() {
t.Errorf("bad lcs %v", lcs)
}
var got strings.Builder
for _, dd := range lcs {
got.WriteString(str[dd.X : dd.X+dd.Len])
}
ans := got.String()
for _, w := range want {
if ans == w {
return
}
}
t.Fatalf("str=%q lcs=%v want=%q got=%q", str, lcs, want, ans)
}
func checkDiffs(t *testing.T, before string, diffs []Diff, after string) {
t.Helper()
var ans strings.Builder
sofar := 0 // index of position in before
for _, d := range diffs {
if sofar < d.Start {
ans.WriteString(before[sofar:d.Start])
}
ans.WriteString(after[d.ReplStart:d.ReplEnd])
sofar = d.End
}
ans.WriteString(before[sofar:])
if ans.String() != after {
t.Fatalf("diff %v took %q to %q, not to %q", diffs, before, ans.String(), after)
}
}
func lcslen(l lcs) int {
ans := 0
for _, d := range l {
ans += int(d.Len)
}
return ans
}
// return a random string of length n made of characters from s
func randstr(s string, n int) string {
src := []rune(s)
x := make([]rune, n)
for i := 0; i < n; i++ {
x[i] = src[rand.Intn(len(src))]
}
return string(x)
}
func TestLcsFix(t *testing.T) {
tests := []struct{ before, after lcs }{
{lcs{diag{0, 0, 3}, diag{2, 2, 5}, diag{3, 4, 5}, diag{8, 9, 4}}, lcs{diag{0, 0, 2}, diag{2, 2, 1}, diag{3, 4, 5}, diag{8, 9, 4}}},
{lcs{diag{1, 1, 6}, diag{6, 12, 3}}, lcs{diag{1, 1, 5}, diag{6, 12, 3}}},
{lcs{diag{0, 0, 4}, diag{3, 5, 4}}, lcs{diag{0, 0, 3}, diag{3, 5, 4}}},
{lcs{diag{0, 20, 1}, diag{0, 0, 3}, diag{1, 20, 4}}, lcs{diag{0, 0, 3}, diag{3, 22, 2}}},
{lcs{diag{0, 0, 4}, diag{1, 1, 2}}, lcs{diag{0, 0, 4}}},
{lcs{diag{0, 0, 4}}, lcs{diag{0, 0, 4}}},
{lcs{}, lcs{}},
{lcs{diag{0, 0, 4}, diag{1, 1, 6}, diag{3, 3, 2}}, lcs{diag{0, 0, 1}, diag{1, 1, 6}}},
}
for n, x := range tests {
got := x.before.fix()
if len(got) != len(x.after) {
t.Errorf("got %v, expected %v, for %v", got, x.after, x.before)
}
olen := lcslen(x.after)
glen := lcslen(got)
if olen != glen {
t.Errorf("%d: lens(%d,%d) differ, %v, %v, %v", n, glen, olen, got, x.after, x.before)
}
}
}
@@ -0,0 +1,156 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// package lcs contains code to find longest-common-subsequences
// (and diffs)
package lcs
/*
Compute longest-common-subsequences of two slices A, B using
algorithms from Myers' paper. A longest-common-subsequence
(LCS from now on) of A and B is a maximal set of lexically increasing
pairs of subscripts (x,y) with A[x]==B[y]. There may be many LCS, but
they all have the same length. An LCS determines a sequence of edits
that changes A into B.
The key concept is the edit graph of A and B.
If A has length N and B has length M, then the edit graph has
vertices v[i][j] for 0 <= i <= N, 0 <= j <= M. There is a
horizontal edge from v[i][j] to v[i+1][j] whenever both are in
the graph, and a vertical edge from v[i][j] to f[i][j+1] similarly.
When A[i] == B[j] there is a diagonal edge from v[i][j] to v[i+1][j+1].
A path between in the graph between (0,0) and (N,M) determines a sequence
of edits converting A into B: each horizontal edge corresponds to removing
an element of A, and each vertical edge corresponds to inserting an
element of B.
A vertex (x,y) is on (forward) diagonal k if x-y=k. A path in the graph
is of length D if it has D non-diagonal edges. The algorithms generate
forward paths (in which at least one of x,y increases at each edge),
or backward paths (in which at least one of x,y decreases at each edge),
or a combination. (Note that the orientation is the traditional mathematical one,
with the origin in the lower-left corner.)
Here is the edit graph for A:"aabbaa", B:"aacaba". (I know the diagonals look weird.)
⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙
a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ |
⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙
b | | | ___/‾‾‾ | ___/‾‾‾ | | |
⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙
a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ |
⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙
c | | | | | | |
⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙
a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ |
⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙
a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ |
⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙
a a b b a a
The algorithm labels a vertex (x,y) with D,k if it is on diagonal k and at
the end of a maximal path of length D. (Because x-y=k it suffices to remember
only the x coordinate of the vertex.)
The forward algorithm: Find the longest diagonal starting at (0,0) and
label its end with D=0,k=0. From that vertex take a vertical step and
then follow the longest diagonal (up and to the right), and label that vertex
with D=1,k=-1. From the D=0,k=0 point take a horizontal step and the follow
the longest diagonal (up and to the right) and label that vertex
D=1,k=1. In the same way, having labelled all the D vertices,
from a vertex labelled D,k find two vertices
tentatively labelled D+1,k-1 and D+1,k+1. There may be two on the same
diagonal, in which case take the one with the larger x.
Eventually the path gets to (N,M), and the diagonals on it are the LCS.
Here is the edit graph with the ends of D-paths labelled. (So, for instance,
0/2,2 indicates that x=2,y=2 is labelled with 0, as it should be, since the first
step is to go up the longest diagonal from (0,0).)
A:"aabbaa", B:"aacaba"
⊙ ------- ⊙ ------- ⊙ -------(3/3,6)------- ⊙ -------(3/5,6)-------(4/6,6)
a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ |
⊙ ------- ⊙ ------- ⊙ -------(2/3,5)------- ⊙ ------- ⊙ ------- ⊙
b | | | ___/‾‾‾ | ___/‾‾‾ | | |
⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ -------(3/5,4)------- ⊙
a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ |
⊙ ------- ⊙ -------(1/2,3)-------(2/3,3)------- ⊙ ------- ⊙ ------- ⊙
c | | | | | | |
⊙ ------- ⊙ -------(0/2,2)-------(1/3,2)-------(2/4,2)-------(3/5,2)-------(4/6,2)
a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ |
⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙
a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ |
⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙
a a b b a a
The 4-path is reconstructed starting at (4/6,6), horizontal to (3/5,6), diagonal to (3,4), vertical
to (2/3,3), horizontal to (1/2,3), vertical to (0/2,2), and diagonal to (0,0). As expected,
there are 4 non-diagonal steps, and the diagonals form an LCS.
There is a symmetric backward algorithm, which gives (backwards labels are prefixed with a colon):
A:"aabbaa", B:"aacaba"
⊙ -------- ⊙ -------- ⊙ -------- ⊙ -------- ⊙ -------- ⊙ -------- ⊙
a | ____/‾‾‾ | ____/‾‾‾ | | | ____/‾‾‾ | ____/‾‾‾ |
⊙ -------- ⊙ -------- ⊙ -------- ⊙ -------- ⊙ --------(:0/5,5)-------- ⊙
b | | | ____/‾‾‾ | ____/‾‾‾ | | |
⊙ -------- ⊙ -------- ⊙ --------(:1/3,4)-------- ⊙ -------- ⊙ -------- ⊙
a | ____/‾‾‾ | ____/‾‾‾ | | | ____/‾‾‾ | ____/‾‾‾ |
(:3/0,3)--------(:2/1,3)-------- ⊙ --------(:2/3,3)--------(:1/4,3)-------- ⊙ -------- ⊙
c | | | | | | |
⊙ -------- ⊙ -------- ⊙ --------(:3/3,2)--------(:2/4,2)-------- ⊙ -------- ⊙
a | ____/‾‾‾ | ____/‾‾‾ | | | ____/‾‾‾ | ____/‾‾‾ |
(:3/0,1)-------- ⊙ -------- ⊙ -------- ⊙ --------(:3/4,1)-------- ⊙ -------- ⊙
a | ____/‾‾‾ | ____/‾‾‾ | | | ____/‾‾‾ | ____/‾‾‾ |
(:4/0,0)-------- ⊙ -------- ⊙ -------- ⊙ --------(:4/4,0)-------- ⊙ -------- ⊙
a a b b a a
Neither of these is ideal for use in an editor, where it is undesirable to send very long diffs to the
front end. It's tricky to decide exactly what 'very long diffs' means, as "replace A by B" is very short.
We want to control how big D can be, by stopping when it gets too large. The forward algorithm then
privileges common prefixes, and the backward algorithm privileges common suffixes. Either is an undesirable
asymmetry.
Fortunately there is a two-sided algorithm, implied by results in Myers' paper. Here's what the labels in
the edit graph look like.
A:"aabbaa", B:"aacaba"
⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙
a | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ____/‾‾‾‾ | ____/‾‾‾‾ |
⊙ --------- ⊙ --------- ⊙ --------- (2/3,5) --------- ⊙ --------- (:0/5,5)--------- ⊙
b | | | ____/‾‾‾‾ | ____/‾‾‾‾ | | |
⊙ --------- ⊙ --------- ⊙ --------- (:1/3,4)--------- ⊙ --------- ⊙ --------- ⊙
a | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ____/‾‾‾‾ | ____/‾‾‾‾ |
⊙ --------- (:2/1,3)--------- (1/2,3) ---------(2:2/3,3)--------- (:1/4,3)--------- ⊙ --------- ⊙
c | | | | | | |
⊙ --------- ⊙ --------- (0/2,2) --------- (1/3,2) ---------(2:2/4,2)--------- ⊙ --------- ⊙
a | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ____/‾‾‾‾ | ____/‾‾‾‾ |
⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙
a | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ____/‾‾‾‾ | ____/‾‾‾‾ |
⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙
a a b b a a
The algorithm stopped when it saw the backwards 2-path ending at (1,3) and the forwards 2-path ending at (3,5). The criterion
is a backwards path ending at (u,v) and a forward path ending at (x,y), where u <= x and the two points are on the same
diagonal. (Here the edgegraph has a diagonal, but the criterion is x-y=u-v.) Myers proves there is a forward
2-path from (0,0) to (1,3), and that together with the backwards 2-path ending at (1,3) gives the expected 4-path.
Unfortunately the forward path has to be constructed by another run of the forward algorithm; it can't be found from the
computed labels. That is the worst case. Had the code noticed (x,y)=(u,v)=(3,3) the whole path could be reconstructed
from the edgegraph. The implementation looks for a number of special cases to try to avoid computing an extra forward path.
If the two-sided algorithm has stop early (because D has become too large) it will have found a forward LCS and a
backwards LCS. Ideally these go with disjoint prefixes and suffixes of A and B, but disjointness may fail and the two
computed LCS may conflict. (An easy example is where A is a suffix of B, and shares a short prefix. The backwards LCS
is all of A, and the forward LCS is a prefix of A.) The algorithm combines the two
to form a best-effort LCS. In the worst case the forward partial LCS may have to
be recomputed.
*/
/* Eugene Myers paper is titled
"An O(ND) Difference Algorithm and Its Variations"
and can be found at
http://www.xmailserver.org/diff2.pdf
(There is a generic implementation of the algorithm the repository with git hash
b9ad7e4ade3a686d608e44475390ad428e60e7fc)
*/
@@ -0,0 +1,33 @@
#!/bin/bash
#
# Copyright 2022 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
#
# Creates a zip file containing all numbered versions
# of the commit history of a large source file, for use
# as input data for the tests of the diff algorithm.
#
# Run script from root of the x/tools repo.
set -eu
# WARNING: This script will install the latest version of $file
# The largest real source file in the x/tools repo.
# file=internal/golang/completion/completion.go
# file=internal/golang/diagnostics.go
file=internal/protocol/tsprotocol.go
tmp=$(mktemp -d)
git log $file |
awk '/^commit / {print $2}' |
nl -ba -nrz |
while read n hash; do
git checkout --quiet $hash $file
cp -f $file $tmp/$n
done
(cd $tmp && zip -q - *) > testdata.zip
rm -fr $tmp
git restore --staged $file
git restore $file
echo "Created testdata.zip"
@@ -0,0 +1,55 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lcs
import (
"fmt"
)
// For each D, vec[D] has length D+1,
// and the label for (D, k) is stored in vec[D][(D+k)/2].
type label struct {
vec [][]int
}
// Temporary checking DO NOT COMMIT true TO PRODUCTION CODE
const debug = false
// debugging. check that the (d,k) pair is valid
// (that is, -d<=k<=d and d+k even)
func checkDK(D, k int) {
if k >= -D && k <= D && (D+k)%2 == 0 {
return
}
panic(fmt.Sprintf("out of range, d=%d,k=%d", D, k))
}
func (t *label) set(D, k, x int) {
if debug {
checkDK(D, k)
}
for len(t.vec) <= D {
t.vec = append(t.vec, nil)
}
if t.vec[D] == nil {
t.vec[D] = make([]int, D+1)
}
t.vec[D][(D+k)/2] = x // known that D+k is even
}
func (t *label) get(d, k int) int {
if debug {
checkDK(d, k)
}
return int(t.vec[d][(d+k)/2])
}
func newtriang(limit int) label {
if limit < 100 {
// Preallocate if limit is not large.
return label{vec: make([][]int, limit)}
}
return label{}
}
@@ -0,0 +1,480 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lcs
// TODO(adonovan): remove unclear references to "old" in this package.
import (
"fmt"
)
// A Diff is a replacement of a portion of A by a portion of B.
type Diff struct {
Start, End int // offsets of portion to delete in A
ReplStart, ReplEnd int // offset of replacement text in B
}
// DiffStrings returns the differences between two strings.
// It does not respect rune boundaries.
func DiffStrings(a, b string) []Diff { return diff(stringSeqs{a, b}) }
// DiffBytes returns the differences between two byte sequences.
// It does not respect rune boundaries.
func DiffBytes(a, b []byte) []Diff { return diff(bytesSeqs{a, b}) }
// DiffRunes returns the differences between two rune sequences.
func DiffRunes(a, b []rune) []Diff { return diff(runesSeqs{a, b}) }
func diff(seqs sequences) []Diff {
// A limit on how deeply the LCS algorithm should search. The value is just a guess.
const maxDiffs = 100
diff, _ := compute(seqs, twosided, maxDiffs/2)
return diff
}
// compute computes the list of differences between two sequences,
// along with the LCS. It is exercised directly by tests.
// The algorithm is one of {forward, backward, twosided}.
func compute(seqs sequences, algo func(*editGraph) lcs, limit int) ([]Diff, lcs) {
if limit <= 0 {
limit = 1 << 25 // effectively infinity
}
alen, blen := seqs.lengths()
g := &editGraph{
seqs: seqs,
vf: newtriang(limit),
vb: newtriang(limit),
limit: limit,
ux: alen,
uy: blen,
delta: alen - blen,
}
lcs := algo(g)
diffs := lcs.toDiffs(alen, blen)
return diffs, lcs
}
// editGraph carries the information for computing the lcs of two sequences.
type editGraph struct {
seqs sequences
vf, vb label // forward and backward labels
limit int // maximal value of D
// the bounding rectangle of the current edit graph
lx, ly, ux, uy int
delta int // common subexpression: (ux-lx)-(uy-ly)
}
// toDiffs converts an LCS to a list of edits.
func (lcs lcs) toDiffs(alen, blen int) []Diff {
var diffs []Diff
var pa, pb int // offsets in a, b
for _, l := range lcs {
if pa < l.X || pb < l.Y {
diffs = append(diffs, Diff{pa, l.X, pb, l.Y})
}
pa = l.X + l.Len
pb = l.Y + l.Len
}
if pa < alen || pb < blen {
diffs = append(diffs, Diff{pa, alen, pb, blen})
}
return diffs
}
// --- FORWARD ---
// fdone decides if the forward path has reached the upper right
// corner of the rectangle. If so, it also returns the computed lcs.
func (e *editGraph) fdone(D, k int) (bool, lcs) {
// x, y, k are relative to the rectangle
x := e.vf.get(D, k)
y := x - k
if x == e.ux && y == e.uy {
return true, e.forwardlcs(D, k)
}
return false, nil
}
// run the forward algorithm, until success or up to the limit on D.
func forward(e *editGraph) lcs {
e.setForward(0, 0, e.lx)
if ok, ans := e.fdone(0, 0); ok {
return ans
}
// from D to D+1
for D := 0; D < e.limit; D++ {
e.setForward(D+1, -(D + 1), e.getForward(D, -D))
if ok, ans := e.fdone(D+1, -(D + 1)); ok {
return ans
}
e.setForward(D+1, D+1, e.getForward(D, D)+1)
if ok, ans := e.fdone(D+1, D+1); ok {
return ans
}
for k := -D + 1; k <= D-1; k += 2 {
// these are tricky and easy to get backwards
lookv := e.lookForward(k, e.getForward(D, k-1)+1)
lookh := e.lookForward(k, e.getForward(D, k+1))
if lookv > lookh {
e.setForward(D+1, k, lookv)
} else {
e.setForward(D+1, k, lookh)
}
if ok, ans := e.fdone(D+1, k); ok {
return ans
}
}
}
// D is too large
// find the D path with maximal x+y inside the rectangle and
// use that to compute the found part of the lcs
kmax := -e.limit - 1
diagmax := -1
for k := -e.limit; k <= e.limit; k += 2 {
x := e.getForward(e.limit, k)
y := x - k
if x+y > diagmax && x <= e.ux && y <= e.uy {
diagmax, kmax = x+y, k
}
}
return e.forwardlcs(e.limit, kmax)
}
// recover the lcs by backtracking from the farthest point reached
func (e *editGraph) forwardlcs(D, k int) lcs {
var ans lcs
for x := e.getForward(D, k); x != 0 || x-k != 0; {
if ok(D-1, k-1) && x-1 == e.getForward(D-1, k-1) {
// if (x-1,y) is labelled D-1, x--,D--,k--,continue
D, k, x = D-1, k-1, x-1
continue
} else if ok(D-1, k+1) && x == e.getForward(D-1, k+1) {
// if (x,y-1) is labelled D-1, x, D--,k++, continue
D, k = D-1, k+1
continue
}
// if (x-1,y-1)--(x,y) is a diagonal, prepend,x--,y--, continue
y := x - k
ans = ans.prepend(x+e.lx-1, y+e.ly-1)
x--
}
return ans
}
// start at (x,y), go up the diagonal as far as possible,
// and label the result with d
func (e *editGraph) lookForward(k, relx int) int {
rely := relx - k
x, y := relx+e.lx, rely+e.ly
if x < e.ux && y < e.uy {
x += e.seqs.commonPrefixLen(x, e.ux, y, e.uy)
}
return x
}
func (e *editGraph) setForward(d, k, relx int) {
x := e.lookForward(k, relx)
e.vf.set(d, k, x-e.lx)
}
func (e *editGraph) getForward(d, k int) int {
x := e.vf.get(d, k)
return x
}
// --- BACKWARD ---
// bdone decides if the backward path has reached the lower left corner
func (e *editGraph) bdone(D, k int) (bool, lcs) {
// x, y, k are relative to the rectangle
x := e.vb.get(D, k)
y := x - (k + e.delta)
if x == 0 && y == 0 {
return true, e.backwardlcs(D, k)
}
return false, nil
}
// run the backward algorithm, until success or up to the limit on D.
func backward(e *editGraph) lcs {
e.setBackward(0, 0, e.ux)
if ok, ans := e.bdone(0, 0); ok {
return ans
}
// from D to D+1
for D := 0; D < e.limit; D++ {
e.setBackward(D+1, -(D + 1), e.getBackward(D, -D)-1)
if ok, ans := e.bdone(D+1, -(D + 1)); ok {
return ans
}
e.setBackward(D+1, D+1, e.getBackward(D, D))
if ok, ans := e.bdone(D+1, D+1); ok {
return ans
}
for k := -D + 1; k <= D-1; k += 2 {
// these are tricky and easy to get wrong
lookv := e.lookBackward(k, e.getBackward(D, k-1))
lookh := e.lookBackward(k, e.getBackward(D, k+1)-1)
if lookv < lookh {
e.setBackward(D+1, k, lookv)
} else {
e.setBackward(D+1, k, lookh)
}
if ok, ans := e.bdone(D+1, k); ok {
return ans
}
}
}
// D is too large
// find the D path with minimal x+y inside the rectangle and
// use that to compute the part of the lcs found
kmax := -e.limit - 1
diagmin := 1 << 25
for k := -e.limit; k <= e.limit; k += 2 {
x := e.getBackward(e.limit, k)
y := x - (k + e.delta)
if x+y < diagmin && x >= 0 && y >= 0 {
diagmin, kmax = x+y, k
}
}
if kmax < -e.limit {
panic(fmt.Sprintf("no paths when limit=%d?", e.limit))
}
return e.backwardlcs(e.limit, kmax)
}
// recover the lcs by backtracking
func (e *editGraph) backwardlcs(D, k int) lcs {
var ans lcs
for x := e.getBackward(D, k); x != e.ux || x-(k+e.delta) != e.uy; {
if ok(D-1, k-1) && x == e.getBackward(D-1, k-1) {
// D--, k--, x unchanged
D, k = D-1, k-1
continue
} else if ok(D-1, k+1) && x+1 == e.getBackward(D-1, k+1) {
// D--, k++, x++
D, k, x = D-1, k+1, x+1
continue
}
y := x - (k + e.delta)
ans = ans.append(x+e.lx, y+e.ly)
x++
}
return ans
}
// start at (x,y), go down the diagonal as far as possible,
func (e *editGraph) lookBackward(k, relx int) int {
rely := relx - (k + e.delta) // forward k = k + e.delta
x, y := relx+e.lx, rely+e.ly
if x > 0 && y > 0 {
x -= e.seqs.commonSuffixLen(0, x, 0, y)
}
return x
}
// convert to rectangle, and label the result with d
func (e *editGraph) setBackward(d, k, relx int) {
x := e.lookBackward(k, relx)
e.vb.set(d, k, x-e.lx)
}
func (e *editGraph) getBackward(d, k int) int {
x := e.vb.get(d, k)
return x
}
// -- TWOSIDED ---
func twosided(e *editGraph) lcs {
// The termination condition could be improved, as either the forward
// or backward pass could succeed before Myers' Lemma applies.
// Aside from questions of efficiency (is the extra testing cost-effective)
// this is more likely to matter when e.limit is reached.
e.setForward(0, 0, e.lx)
e.setBackward(0, 0, e.ux)
// from D to D+1
for D := 0; D < e.limit; D++ {
// just finished a backwards pass, so check
if got, ok := e.twoDone(D, D); ok {
return e.twolcs(D, D, got)
}
// do a forwards pass (D to D+1)
e.setForward(D+1, -(D + 1), e.getForward(D, -D))
e.setForward(D+1, D+1, e.getForward(D, D)+1)
for k := -D + 1; k <= D-1; k += 2 {
// these are tricky and easy to get backwards
lookv := e.lookForward(k, e.getForward(D, k-1)+1)
lookh := e.lookForward(k, e.getForward(D, k+1))
if lookv > lookh {
e.setForward(D+1, k, lookv)
} else {
e.setForward(D+1, k, lookh)
}
}
// just did a forward pass, so check
if got, ok := e.twoDone(D+1, D); ok {
return e.twolcs(D+1, D, got)
}
// do a backward pass, D to D+1
e.setBackward(D+1, -(D + 1), e.getBackward(D, -D)-1)
e.setBackward(D+1, D+1, e.getBackward(D, D))
for k := -D + 1; k <= D-1; k += 2 {
// these are tricky and easy to get wrong
lookv := e.lookBackward(k, e.getBackward(D, k-1))
lookh := e.lookBackward(k, e.getBackward(D, k+1)-1)
if lookv < lookh {
e.setBackward(D+1, k, lookv)
} else {
e.setBackward(D+1, k, lookh)
}
}
}
// D too large. combine a forward and backward partial lcs
// first, a forward one
kmax := -e.limit - 1
diagmax := -1
for k := -e.limit; k <= e.limit; k += 2 {
x := e.getForward(e.limit, k)
y := x - k
if x+y > diagmax && x <= e.ux && y <= e.uy {
diagmax, kmax = x+y, k
}
}
if kmax < -e.limit {
panic(fmt.Sprintf("no forward paths when limit=%d?", e.limit))
}
lcs := e.forwardlcs(e.limit, kmax)
// now a backward one
// find the D path with minimal x+y inside the rectangle and
// use that to compute the lcs
diagmin := 1 << 25 // infinity
for k := -e.limit; k <= e.limit; k += 2 {
x := e.getBackward(e.limit, k)
y := x - (k + e.delta)
if x+y < diagmin && x >= 0 && y >= 0 {
diagmin, kmax = x+y, k
}
}
if kmax < -e.limit {
panic(fmt.Sprintf("no backward paths when limit=%d?", e.limit))
}
lcs = append(lcs, e.backwardlcs(e.limit, kmax)...)
// These may overlap (e.forwardlcs and e.backwardlcs return sorted lcs)
ans := lcs.fix()
return ans
}
// Does Myers' Lemma apply?
func (e *editGraph) twoDone(df, db int) (int, bool) {
if (df+db+e.delta)%2 != 0 {
return 0, false // diagonals cannot overlap
}
kmin := -db + e.delta
if -df > kmin {
kmin = -df
}
kmax := db + e.delta
if df < kmax {
kmax = df
}
for k := kmin; k <= kmax; k += 2 {
x := e.vf.get(df, k)
u := e.vb.get(db, k-e.delta)
if u <= x {
// is it worth looking at all the other k?
for l := k; l <= kmax; l += 2 {
x := e.vf.get(df, l)
y := x - l
u := e.vb.get(db, l-e.delta)
v := u - l
if x == u || u == 0 || v == 0 || y == e.uy || x == e.ux {
return l, true
}
}
return k, true
}
}
return 0, false
}
func (e *editGraph) twolcs(df, db, kf int) lcs {
// db==df || db+1==df
x := e.vf.get(df, kf)
y := x - kf
kb := kf - e.delta
u := e.vb.get(db, kb)
v := u - kf
// Myers proved there is a df-path from (0,0) to (u,v)
// and a db-path from (x,y) to (N,M).
// In the first case the overall path is the forward path
// to (u,v) followed by the backward path to (N,M).
// In the second case the path is the backward path to (x,y)
// followed by the forward path to (x,y) from (0,0).
// Look for some special cases to avoid computing either of these paths.
if x == u {
// "babaab" "cccaba"
// already patched together
lcs := e.forwardlcs(df, kf)
lcs = append(lcs, e.backwardlcs(db, kb)...)
return lcs.sort()
}
// is (u-1,v) or (u,v-1) labelled df-1?
// if so, that forward df-1-path plus a horizontal or vertical edge
// is the df-path to (u,v), then plus the db-path to (N,M)
if u > 0 && ok(df-1, u-1-v) && e.vf.get(df-1, u-1-v) == u-1 {
// "aabbab" "cbcabc"
lcs := e.forwardlcs(df-1, u-1-v)
lcs = append(lcs, e.backwardlcs(db, kb)...)
return lcs.sort()
}
if v > 0 && ok(df-1, (u-(v-1))) && e.vf.get(df-1, u-(v-1)) == u {
// "abaabb" "bcacab"
lcs := e.forwardlcs(df-1, u-(v-1))
lcs = append(lcs, e.backwardlcs(db, kb)...)
return lcs.sort()
}
// The path can't possibly contribute to the lcs because it
// is all horizontal or vertical edges
if u == 0 || v == 0 || x == e.ux || y == e.uy {
// "abaabb" "abaaaa"
if u == 0 || v == 0 {
return e.backwardlcs(db, kb)
}
return e.forwardlcs(df, kf)
}
// is (x+1,y) or (x,y+1) labelled db-1?
if x+1 <= e.ux && ok(db-1, x+1-y-e.delta) && e.vb.get(db-1, x+1-y-e.delta) == x+1 {
// "bababb" "baaabb"
lcs := e.backwardlcs(db-1, kb+1)
lcs = append(lcs, e.forwardlcs(df, kf)...)
return lcs.sort()
}
if y+1 <= e.uy && ok(db-1, x-(y+1)-e.delta) && e.vb.get(db-1, x-(y+1)-e.delta) == x {
// "abbbaa" "cabacc"
lcs := e.backwardlcs(db-1, kb-1)
lcs = append(lcs, e.forwardlcs(df, kf)...)
return lcs.sort()
}
// need to compute another path
// "aabbaa" "aacaba"
lcs := e.backwardlcs(db, kb)
oldx, oldy := e.ux, e.uy
e.ux = u
e.uy = v
lcs = append(lcs, forward(e)...)
e.ux, e.uy = oldx, oldy
return lcs.sort()
}
@@ -0,0 +1,251 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lcs
import (
"fmt"
"log"
"math/rand"
"os"
"strings"
"testing"
)
func TestAlgosOld(t *testing.T) {
for i, algo := range []func(*editGraph) lcs{forward, backward, twosided} {
t.Run(strings.Fields("forward backward twosided")[i], func(t *testing.T) {
for _, tx := range Btests {
lim := len(tx.a) + len(tx.b)
diffs, lcs := compute(stringSeqs{tx.a, tx.b}, algo, lim)
check(t, tx.a, lcs, tx.lcs)
checkDiffs(t, tx.a, diffs, tx.b)
diffs, lcs = compute(stringSeqs{tx.b, tx.a}, algo, lim)
check(t, tx.b, lcs, tx.lcs)
checkDiffs(t, tx.b, diffs, tx.a)
}
})
}
}
func TestIntOld(t *testing.T) {
// need to avoid any characters in btests
lfill, rfill := "AAAAAAAAAAAA", "BBBBBBBBBBBB"
for _, tx := range Btests {
if len(tx.a) < 2 || len(tx.b) < 2 {
continue
}
left := tx.a + lfill
right := tx.b + rfill
lim := len(tx.a) + len(tx.b)
diffs, lcs := compute(stringSeqs{left, right}, twosided, lim)
check(t, left, lcs, tx.lcs)
checkDiffs(t, left, diffs, right)
diffs, lcs = compute(stringSeqs{right, left}, twosided, lim)
check(t, right, lcs, tx.lcs)
checkDiffs(t, right, diffs, left)
left = lfill + tx.a
right = rfill + tx.b
diffs, lcs = compute(stringSeqs{left, right}, twosided, lim)
check(t, left, lcs, tx.lcs)
checkDiffs(t, left, diffs, right)
diffs, lcs = compute(stringSeqs{right, left}, twosided, lim)
check(t, right, lcs, tx.lcs)
checkDiffs(t, right, diffs, left)
}
}
func TestSpecialOld(t *testing.T) { // exercises lcs.fix
a := "golang.org/x/tools/intern"
b := "github.com/google/safehtml/template\"\n\t\"golang.org/x/tools/intern"
diffs, lcs := compute(stringSeqs{a, b}, twosided, 4)
if !lcs.valid() {
t.Errorf("%d,%v", len(diffs), lcs)
}
}
func TestRegressionOld001(t *testing.T) {
a := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage diff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"golang.org/x/tools/gopls/internal/lsp/diff\"\n\t\"golang.org/x/tools/internal/diff/difftest\"\n\t\"golang.org/x/tools/gopls/internal/span\"\n)\n"
b := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage diff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"github.com/google/safehtml/template\"\n\t\"golang.org/x/tools/gopls/internal/lsp/diff\"\n\t\"golang.org/x/tools/internal/diff/difftest\"\n\t\"golang.org/x/tools/gopls/internal/span\"\n)\n"
for i := 1; i < len(b); i++ {
diffs, lcs := compute(stringSeqs{a, b}, twosided, i) // 14 from gopls
if !lcs.valid() {
t.Errorf("%d,%v", len(diffs), lcs)
}
checkDiffs(t, a, diffs, b)
}
}
func TestRegressionOld002(t *testing.T) {
a := "n\"\n)\n"
b := "n\"\n\t\"golang.org/x//nnal/stack\"\n)\n"
for i := 1; i <= len(b); i++ {
diffs, lcs := compute(stringSeqs{a, b}, twosided, i)
if !lcs.valid() {
t.Errorf("%d,%v", len(diffs), lcs)
}
checkDiffs(t, a, diffs, b)
}
}
func TestRegressionOld003(t *testing.T) {
a := "golang.org/x/hello v1.0.0\nrequire golang.org/x/unused v1"
b := "golang.org/x/hello v1"
for i := 1; i <= len(a); i++ {
diffs, lcs := compute(stringSeqs{a, b}, twosided, i)
if !lcs.valid() {
t.Errorf("%d,%v", len(diffs), lcs)
}
checkDiffs(t, a, diffs, b)
}
}
func TestRandOld(t *testing.T) {
rand.Seed(1)
for i := 0; i < 1000; i++ {
// TODO(adonovan): use ASCII and bytesSeqs here? The use of
// non-ASCII isn't relevant to the property exercised by the test.
a := []rune(randstr("abω", 16))
b := []rune(randstr("abωc", 16))
seq := runesSeqs{a, b}
const lim = 24 // large enough to get true lcs
_, forw := compute(seq, forward, lim)
_, back := compute(seq, backward, lim)
_, two := compute(seq, twosided, lim)
if lcslen(two) != lcslen(forw) || lcslen(forw) != lcslen(back) {
t.Logf("\n%v\n%v\n%v", forw, back, two)
t.Fatalf("%d forw:%d back:%d two:%d", i, lcslen(forw), lcslen(back), lcslen(two))
}
if !two.valid() || !forw.valid() || !back.valid() {
t.Errorf("check failure")
}
}
}
// TestDiffAPI tests the public API functions (Diff{Bytes,Strings,Runes})
// to ensure at least minimal parity of the three representations.
func TestDiffAPI(t *testing.T) {
for _, test := range []struct {
a, b string
wantStrings, wantBytes, wantRunes string
}{
{"abcXdef", "abcxdef", "[{3 4 3 4}]", "[{3 4 3 4}]", "[{3 4 3 4}]"}, // ASCII
{"abcωdef", "abcΩdef", "[{3 5 3 5}]", "[{3 5 3 5}]", "[{3 4 3 4}]"}, // non-ASCII
} {
gotStrings := fmt.Sprint(DiffStrings(test.a, test.b))
if gotStrings != test.wantStrings {
t.Errorf("DiffStrings(%q, %q) = %v, want %v",
test.a, test.b, gotStrings, test.wantStrings)
}
gotBytes := fmt.Sprint(DiffBytes([]byte(test.a), []byte(test.b)))
if gotBytes != test.wantBytes {
t.Errorf("DiffBytes(%q, %q) = %v, want %v",
test.a, test.b, gotBytes, test.wantBytes)
}
gotRunes := fmt.Sprint(DiffRunes([]rune(test.a), []rune(test.b)))
if gotRunes != test.wantRunes {
t.Errorf("DiffRunes(%q, %q) = %v, want %v",
test.a, test.b, gotRunes, test.wantRunes)
}
}
}
func BenchmarkTwoOld(b *testing.B) {
tests := genBench("abc", 96)
for i := 0; i < b.N; i++ {
for _, tt := range tests {
_, two := compute(stringSeqs{tt.before, tt.after}, twosided, 100)
if !two.valid() {
b.Error("check failed")
}
}
}
}
func BenchmarkForwOld(b *testing.B) {
tests := genBench("abc", 96)
for i := 0; i < b.N; i++ {
for _, tt := range tests {
_, two := compute(stringSeqs{tt.before, tt.after}, forward, 100)
if !two.valid() {
b.Error("check failed")
}
}
}
}
func genBench(set string, n int) []struct{ before, after string } {
// before and after for benchmarks. 24 strings of length n with
// before and after differing at least once, and about 5%
rand.Seed(3)
var ans []struct{ before, after string }
for i := 0; i < 24; i++ {
// maybe b should have an approximately known number of diffs
a := randstr(set, n)
cnt := 0
bb := make([]rune, 0, n)
for _, r := range a {
if rand.Float64() < .05 {
cnt++
r = 'N'
}
bb = append(bb, r)
}
if cnt == 0 {
// avoid == shortcut
bb[n/2] = 'N'
}
ans = append(ans, struct{ before, after string }{a, string(bb)})
}
return ans
}
// This benchmark represents a common case for a diff command:
// large file with a single relatively small diff in the middle.
// (It's not clear whether this is representative of gopls workloads
// or whether it is important to gopls diff performance.)
//
// TODO(adonovan) opt: it could be much faster. For example,
// comparing a file against itself is about 10x faster than with the
// small deletion in the middle. Strangely, comparing a file against
// itself minus the last byte is faster still; I don't know why.
// There is much low-hanging fruit here for further improvement.
func BenchmarkLargeFileSmallDiff(b *testing.B) {
data, err := os.ReadFile("old.go") // large file
if err != nil {
log.Fatal(err)
}
n := len(data)
src := string(data)
dst := src[:n*49/100] + src[n*51/100:] // remove 2% from the middle
b.Run("string", func(b *testing.B) {
for i := 0; i < b.N; i++ {
compute(stringSeqs{src, dst}, twosided, len(src)+len(dst))
}
})
srcBytes := []byte(src)
dstBytes := []byte(dst)
b.Run("bytes", func(b *testing.B) {
for i := 0; i < b.N; i++ {
compute(bytesSeqs{srcBytes, dstBytes}, twosided, len(srcBytes)+len(dstBytes))
}
})
srcRunes := []rune(src)
dstRunes := []rune(dst)
b.Run("runes", func(b *testing.B) {
for i := 0; i < b.N; i++ {
compute(runesSeqs{srcRunes, dstRunes}, twosided, len(srcRunes)+len(dstRunes))
}
})
}
@@ -0,0 +1,113 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lcs
// This file defines the abstract sequence over which the LCS algorithm operates.
// sequences abstracts a pair of sequences, A and B.
type sequences interface {
lengths() (int, int) // len(A), len(B)
commonPrefixLen(ai, aj, bi, bj int) int // len(commonPrefix(A[ai:aj], B[bi:bj]))
commonSuffixLen(ai, aj, bi, bj int) int // len(commonSuffix(A[ai:aj], B[bi:bj]))
}
type stringSeqs struct{ a, b string }
func (s stringSeqs) lengths() (int, int) { return len(s.a), len(s.b) }
func (s stringSeqs) commonPrefixLen(ai, aj, bi, bj int) int {
return commonPrefixLenString(s.a[ai:aj], s.b[bi:bj])
}
func (s stringSeqs) commonSuffixLen(ai, aj, bi, bj int) int {
return commonSuffixLenString(s.a[ai:aj], s.b[bi:bj])
}
// The explicit capacity in s[i:j:j] leads to more efficient code.
type bytesSeqs struct{ a, b []byte }
func (s bytesSeqs) lengths() (int, int) { return len(s.a), len(s.b) }
func (s bytesSeqs) commonPrefixLen(ai, aj, bi, bj int) int {
return commonPrefixLenBytes(s.a[ai:aj:aj], s.b[bi:bj:bj])
}
func (s bytesSeqs) commonSuffixLen(ai, aj, bi, bj int) int {
return commonSuffixLenBytes(s.a[ai:aj:aj], s.b[bi:bj:bj])
}
type runesSeqs struct{ a, b []rune }
func (s runesSeqs) lengths() (int, int) { return len(s.a), len(s.b) }
func (s runesSeqs) commonPrefixLen(ai, aj, bi, bj int) int {
return commonPrefixLenRunes(s.a[ai:aj:aj], s.b[bi:bj:bj])
}
func (s runesSeqs) commonSuffixLen(ai, aj, bi, bj int) int {
return commonSuffixLenRunes(s.a[ai:aj:aj], s.b[bi:bj:bj])
}
// TODO(adonovan): optimize these functions using ideas from:
// - https://go.dev/cl/408116 common.go
// - https://go.dev/cl/421435 xor_generic.go
// TODO(adonovan): factor using generics when available,
// but measure performance impact.
// commonPrefixLen* returns the length of the common prefix of a[ai:aj] and b[bi:bj].
func commonPrefixLenBytes(a, b []byte) int {
n := min(len(a), len(b))
i := 0
for i < n && a[i] == b[i] {
i++
}
return i
}
func commonPrefixLenRunes(a, b []rune) int {
n := min(len(a), len(b))
i := 0
for i < n && a[i] == b[i] {
i++
}
return i
}
func commonPrefixLenString(a, b string) int {
n := min(len(a), len(b))
i := 0
for i < n && a[i] == b[i] {
i++
}
return i
}
// commonSuffixLen* returns the length of the common suffix of a[ai:aj] and b[bi:bj].
func commonSuffixLenBytes(a, b []byte) int {
n := min(len(a), len(b))
i := 0
for i < n && a[len(a)-1-i] == b[len(b)-1-i] {
i++
}
return i
}
func commonSuffixLenRunes(a, b []rune) int {
n := min(len(a), len(b))
i := 0
for i < n && a[len(a)-1-i] == b[len(b)-1-i] {
i++
}
return i
}
func commonSuffixLenString(a, b string) int {
n := min(len(a), len(b))
i := 0
for i < n && a[len(a)-1-i] == b[len(b)-1-i] {
i++
}
return i
}
func min(x, y int) int {
if x < y {
return x
} else {
return y
}
}
@@ -0,0 +1,246 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package myers implements the Myers diff algorithm.
package myers
import (
"strings"
"golang.org/x/tools/internal/diff"
)
// Sources:
// https://blog.jcoglan.com/2017/02/17/the-myers-diff-algorithm-part-3/
// https://www.codeproject.com/Articles/42279/%2FArticles%2F42279%2FInvestigating-Myers-diff-algorithm-Part-1-of-2
// ComputeEdits returns the diffs of two strings using a simple
// line-based implementation, like [diff.Strings].
//
// Deprecated: this implementation is moribund. However, when diffs
// appear in marker test expectations, they are the particular diffs
// produced by this implementation. The marker test framework
// asserts diff(orig, got)==wantDiff, but ideally it would compute
// got==apply(orig, wantDiff) so that the notation of the diff
// is immaterial.
func ComputeEdits(before, after string) []diff.Edit {
beforeLines := splitLines(before)
ops := operations(beforeLines, splitLines(after))
// Build a table mapping line number to offset.
lineOffsets := make([]int, 0, len(beforeLines)+1)
total := 0
for i := range beforeLines {
lineOffsets = append(lineOffsets, total)
total += len(beforeLines[i])
}
lineOffsets = append(lineOffsets, total) // EOF
edits := make([]diff.Edit, 0, len(ops))
for _, op := range ops {
start, end := lineOffsets[op.I1], lineOffsets[op.I2]
switch op.Kind {
case opDelete:
// Delete: before[I1:I2] is deleted.
edits = append(edits, diff.Edit{Start: start, End: end})
case opInsert:
// Insert: after[J1:J2] is inserted at before[I1:I1].
if content := strings.Join(op.Content, ""); content != "" {
edits = append(edits, diff.Edit{Start: start, End: end, New: content})
}
}
}
return edits
}
// opKind is used to denote the type of operation a line represents.
type opKind int
const (
opDelete opKind = iota // line deleted from input (-)
opInsert // line inserted into output (+)
opEqual // line present in input and output
)
func (kind opKind) String() string {
switch kind {
case opDelete:
return "delete"
case opInsert:
return "insert"
case opEqual:
return "equal"
default:
panic("unknown opKind")
}
}
type operation struct {
Kind opKind
Content []string // content from b
I1, I2 int // indices of the line in a
J1 int // indices of the line in b, J2 implied by len(Content)
}
// operations returns the list of operations to convert a into b, consolidating
// operations for multiple lines and not including equal lines.
func operations(a, b []string) []*operation {
if len(a) == 0 && len(b) == 0 {
return nil
}
trace, offset := shortestEditSequence(a, b)
snakes := backtrack(trace, len(a), len(b), offset)
M, N := len(a), len(b)
var i int
solution := make([]*operation, len(a)+len(b))
add := func(op *operation, i2, j2 int) {
if op == nil {
return
}
op.I2 = i2
if op.Kind == opInsert {
op.Content = b[op.J1:j2]
}
solution[i] = op
i++
}
x, y := 0, 0
for _, snake := range snakes {
if len(snake) < 2 {
continue
}
var op *operation
// delete (horizontal)
for snake[0]-snake[1] > x-y {
if op == nil {
op = &operation{
Kind: opDelete,
I1: x,
J1: y,
}
}
x++
if x == M {
break
}
}
add(op, x, y)
op = nil
// insert (vertical)
for snake[0]-snake[1] < x-y {
if op == nil {
op = &operation{
Kind: opInsert,
I1: x,
J1: y,
}
}
y++
}
add(op, x, y)
op = nil
// equal (diagonal)
for x < snake[0] {
x++
y++
}
if x >= M && y >= N {
break
}
}
return solution[:i]
}
// backtrack uses the trace for the edit sequence computation and returns the
// "snakes" that make up the solution. A "snake" is a single deletion or
// insertion followed by zero or diagonals.
func backtrack(trace [][]int, x, y, offset int) [][]int {
snakes := make([][]int, len(trace))
d := len(trace) - 1
for ; x > 0 && y > 0 && d > 0; d-- {
V := trace[d]
if len(V) == 0 {
continue
}
snakes[d] = []int{x, y}
k := x - y
var kPrev int
if k == -d || (k != d && V[k-1+offset] < V[k+1+offset]) {
kPrev = k + 1
} else {
kPrev = k - 1
}
x = V[kPrev+offset]
y = x - kPrev
}
if x < 0 || y < 0 {
return snakes
}
snakes[d] = []int{x, y}
return snakes
}
// shortestEditSequence returns the shortest edit sequence that converts a into b.
func shortestEditSequence(a, b []string) ([][]int, int) {
M, N := len(a), len(b)
V := make([]int, 2*(N+M)+1)
offset := N + M
trace := make([][]int, N+M+1)
// Iterate through the maximum possible length of the SES (N+M).
for d := 0; d <= N+M; d++ {
copyV := make([]int, len(V))
// k lines are represented by the equation y = x - k. We move in
// increments of 2 because end points for even d are on even k lines.
for k := -d; k <= d; k += 2 {
// At each point, we either go down or to the right. We go down if
// k == -d, and we go to the right if k == d. We also prioritize
// the maximum x value, because we prefer deletions to insertions.
var x int
if k == -d || (k != d && V[k-1+offset] < V[k+1+offset]) {
x = V[k+1+offset] // down
} else {
x = V[k-1+offset] + 1 // right
}
y := x - k
// Diagonal moves while we have equal contents.
for x < M && y < N && a[x] == b[y] {
x++
y++
}
V[k+offset] = x
// Return if we've exceeded the maximum values.
if x == M && y == N {
// Makes sure to save the state of the array before returning.
copy(copyV, V)
trace[d] = copyV
return trace, offset
}
}
// Save the state of the array.
copy(copyV, V)
trace[d] = copyV
}
return nil, 0
}
func splitLines(text string) []string {
lines := strings.SplitAfter(text, "\n")
if lines[len(lines)-1] == "" {
lines = lines[:len(lines)-1]
}
return lines
}
@@ -0,0 +1,16 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package myers_test
import (
"testing"
"golang.org/x/tools/internal/diff/difftest"
"golang.org/x/tools/internal/diff/myers"
)
func TestDiff(t *testing.T) {
difftest.DiffTest(t, myers.ComputeEdits)
}
@@ -0,0 +1,99 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package diff
import (
"bytes"
"unicode/utf8"
"golang.org/x/tools/internal/diff/lcs"
)
// Strings computes the differences between two strings.
// The resulting edits respect rune boundaries.
func Strings(before, after string) []Edit {
if before == after {
return nil // common case
}
if isASCII(before) && isASCII(after) {
// TODO(adonovan): opt: specialize diffASCII for strings.
return diffASCII([]byte(before), []byte(after))
}
return diffRunes([]rune(before), []rune(after))
}
// Bytes computes the differences between two byte slices.
// The resulting edits respect rune boundaries.
func Bytes(before, after []byte) []Edit {
if bytes.Equal(before, after) {
return nil // common case
}
if isASCII(before) && isASCII(after) {
return diffASCII(before, after)
}
return diffRunes(runes(before), runes(after))
}
func diffASCII(before, after []byte) []Edit {
diffs := lcs.DiffBytes(before, after)
// Convert from LCS diffs.
res := make([]Edit, len(diffs))
for i, d := range diffs {
res[i] = Edit{d.Start, d.End, string(after[d.ReplStart:d.ReplEnd])}
}
return res
}
func diffRunes(before, after []rune) []Edit {
diffs := lcs.DiffRunes(before, after)
// The diffs returned by the lcs package use indexes
// into whatever slice was passed in.
// Convert rune offsets to byte offsets.
res := make([]Edit, len(diffs))
lastEnd := 0
utf8Len := 0
for i, d := range diffs {
utf8Len += runesLen(before[lastEnd:d.Start]) // text between edits
start := utf8Len
utf8Len += runesLen(before[d.Start:d.End]) // text deleted by this edit
res[i] = Edit{start, utf8Len, string(after[d.ReplStart:d.ReplEnd])}
lastEnd = d.End
}
return res
}
// runes is like []rune(string(bytes)) without the duplicate allocation.
func runes(bytes []byte) []rune {
n := utf8.RuneCount(bytes)
runes := make([]rune, n)
for i := 0; i < n; i++ {
r, sz := utf8.DecodeRune(bytes)
bytes = bytes[sz:]
runes[i] = r
}
return runes
}
// runesLen returns the length in bytes of the UTF-8 encoding of runes.
func runesLen(runes []rune) (len int) {
for _, r := range runes {
len += utf8.RuneLen(r)
}
return len
}
// isASCII reports whether s contains only ASCII.
func isASCII[S string | []byte](s S) bool {
for i := 0; i < len(s); i++ {
if s[i] >= utf8.RuneSelf {
return false
}
}
return true
}
@@ -0,0 +1,251 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package diff
import (
"fmt"
"log"
"strings"
)
// DefaultContextLines is the number of unchanged lines of surrounding
// context displayed by Unified. Use ToUnified to specify a different value.
const DefaultContextLines = 3
// Unified returns a unified diff of the old and new strings.
// The old and new labels are the names of the old and new files.
// If the strings are equal, it returns the empty string.
func Unified(oldLabel, newLabel, old, new string) string {
edits := Strings(old, new)
unified, err := ToUnified(oldLabel, newLabel, old, edits, DefaultContextLines)
if err != nil {
// Can't happen: edits are consistent.
log.Fatalf("internal error in diff.Unified: %v", err)
}
return unified
}
// ToUnified applies the edits to content and returns a unified diff,
// with contextLines lines of (unchanged) context around each diff hunk.
// The old and new labels are the names of the content and result files.
// It returns an error if the edits are inconsistent; see ApplyEdits.
func ToUnified(oldLabel, newLabel, content string, edits []Edit, contextLines int) (string, error) {
u, err := toUnified(oldLabel, newLabel, content, edits, contextLines)
if err != nil {
return "", err
}
return u.String(), nil
}
// unified represents a set of edits as a unified diff.
type unified struct {
// from is the name of the original file.
from string
// to is the name of the modified file.
to string
// hunks is the set of edit hunks needed to transform the file content.
hunks []*hunk
}
// Hunk represents a contiguous set of line edits to apply.
type hunk struct {
// The line in the original source where the hunk starts.
fromLine int
// The line in the original source where the hunk finishes.
toLine int
// The set of line based edits to apply.
lines []line
}
// Line represents a single line operation to apply as part of a Hunk.
type line struct {
// kind is the type of line this represents, deletion, insertion or copy.
kind opKind
// content is the content of this line.
// For deletion it is the line being removed, for all others it is the line
// to put in the output.
content string
}
// opKind is used to denote the type of operation a line represents.
type opKind int
const (
// opDelete is the operation kind for a line that is present in the input
// but not in the output.
opDelete opKind = iota
// opInsert is the operation kind for a line that is new in the output.
opInsert
// opEqual is the operation kind for a line that is the same in the input and
// output, often used to provide context around edited lines.
opEqual
)
// String returns a human readable representation of an OpKind. It is not
// intended for machine processing.
func (k opKind) String() string {
switch k {
case opDelete:
return "delete"
case opInsert:
return "insert"
case opEqual:
return "equal"
default:
panic("unknown operation kind")
}
}
// toUnified takes a file contents and a sequence of edits, and calculates
// a unified diff that represents those edits.
func toUnified(fromName, toName string, content string, edits []Edit, contextLines int) (unified, error) {
gap := contextLines * 2
u := unified{
from: fromName,
to: toName,
}
if len(edits) == 0 {
return u, nil
}
var err error
edits, err = lineEdits(content, edits) // expand to whole lines
if err != nil {
return u, err
}
lines := splitLines(content)
var h *hunk
last := 0
toLine := 0
for _, edit := range edits {
// Compute the zero-based line numbers of the edit start and end.
// TODO(adonovan): opt: compute incrementally, avoid O(n^2).
start := strings.Count(content[:edit.Start], "\n")
end := strings.Count(content[:edit.End], "\n")
if edit.End == len(content) && len(content) > 0 && content[len(content)-1] != '\n' {
end++ // EOF counts as an implicit newline
}
switch {
case h != nil && start == last:
//direct extension
case h != nil && start <= last+gap:
//within range of previous lines, add the joiners
addEqualLines(h, lines, last, start)
default:
//need to start a new hunk
if h != nil {
// add the edge to the previous hunk
addEqualLines(h, lines, last, last+contextLines)
u.hunks = append(u.hunks, h)
}
toLine += start - last
h = &hunk{
fromLine: start + 1,
toLine: toLine + 1,
}
// add the edge to the new hunk
delta := addEqualLines(h, lines, start-contextLines, start)
h.fromLine -= delta
h.toLine -= delta
}
last = start
for i := start; i < end; i++ {
h.lines = append(h.lines, line{kind: opDelete, content: lines[i]})
last++
}
if edit.New != "" {
for _, content := range splitLines(edit.New) {
h.lines = append(h.lines, line{kind: opInsert, content: content})
toLine++
}
}
}
if h != nil {
// add the edge to the final hunk
addEqualLines(h, lines, last, last+contextLines)
u.hunks = append(u.hunks, h)
}
return u, nil
}
func splitLines(text string) []string {
lines := strings.SplitAfter(text, "\n")
if lines[len(lines)-1] == "" {
lines = lines[:len(lines)-1]
}
return lines
}
func addEqualLines(h *hunk, lines []string, start, end int) int {
delta := 0
for i := start; i < end; i++ {
if i < 0 {
continue
}
if i >= len(lines) {
return delta
}
h.lines = append(h.lines, line{kind: opEqual, content: lines[i]})
delta++
}
return delta
}
// String converts a unified diff to the standard textual form for that diff.
// The output of this function can be passed to tools like patch.
func (u unified) String() string {
if len(u.hunks) == 0 {
return ""
}
b := new(strings.Builder)
fmt.Fprintf(b, "--- %s\n", u.from)
fmt.Fprintf(b, "+++ %s\n", u.to)
for _, hunk := range u.hunks {
fromCount, toCount := 0, 0
for _, l := range hunk.lines {
switch l.kind {
case opDelete:
fromCount++
case opInsert:
toCount++
default:
fromCount++
toCount++
}
}
fmt.Fprint(b, "@@")
if fromCount > 1 {
fmt.Fprintf(b, " -%d,%d", hunk.fromLine, fromCount)
} else if hunk.fromLine == 1 && fromCount == 0 {
// Match odd GNU diff -u behavior adding to empty file.
fmt.Fprintf(b, " -0,0")
} else {
fmt.Fprintf(b, " -%d", hunk.fromLine)
}
if toCount > 1 {
fmt.Fprintf(b, " +%d,%d", hunk.toLine, toCount)
} else if hunk.toLine == 1 && toCount == 0 {
// Match odd GNU diff -u behavior adding to empty file.
fmt.Fprintf(b, " +0,0")
} else {
fmt.Fprintf(b, " +%d", hunk.toLine)
}
fmt.Fprint(b, " @@\n")
for _, l := range hunk.lines {
switch l.kind {
case opDelete:
fmt.Fprintf(b, "-%s", l.content)
case opInsert:
fmt.Fprintf(b, "+%s", l.content)
default:
fmt.Fprintf(b, " %s", l.content)
}
if !strings.HasSuffix(l.content, "\n") {
fmt.Fprintf(b, "\n\\ No newline at end of file\n")
}
}
}
return b.String()
}