whatcanGOwrong
This commit is contained in:
@@ -0,0 +1,176 @@
|
||||
// Copyright 2019 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package diff computes differences between text files or strings.
|
||||
package diff
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// An Edit describes the replacement of a portion of a text file.
|
||||
type Edit struct {
|
||||
Start, End int // byte offsets of the region to replace
|
||||
New string // the replacement
|
||||
}
|
||||
|
||||
func (e Edit) String() string {
|
||||
return fmt.Sprintf("{Start:%d,End:%d,New:%q}", e.Start, e.End, e.New)
|
||||
}
|
||||
|
||||
// Apply applies a sequence of edits to the src buffer and returns the
|
||||
// result. Edits are applied in order of start offset; edits with the
|
||||
// same start offset are applied in they order they were provided.
|
||||
//
|
||||
// Apply returns an error if any edit is out of bounds,
|
||||
// or if any pair of edits is overlapping.
|
||||
func Apply(src string, edits []Edit) (string, error) {
|
||||
edits, size, err := validate(src, edits)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Apply edits.
|
||||
out := make([]byte, 0, size)
|
||||
lastEnd := 0
|
||||
for _, edit := range edits {
|
||||
if lastEnd < edit.Start {
|
||||
out = append(out, src[lastEnd:edit.Start]...)
|
||||
}
|
||||
out = append(out, edit.New...)
|
||||
lastEnd = edit.End
|
||||
}
|
||||
out = append(out, src[lastEnd:]...)
|
||||
|
||||
if len(out) != size {
|
||||
panic("wrong size")
|
||||
}
|
||||
|
||||
return string(out), nil
|
||||
}
|
||||
|
||||
// ApplyBytes is like Apply, but it accepts a byte slice.
|
||||
// The result is always a new array.
|
||||
func ApplyBytes(src []byte, edits []Edit) ([]byte, error) {
|
||||
res, err := Apply(string(src), edits)
|
||||
return []byte(res), err
|
||||
}
|
||||
|
||||
// validate checks that edits are consistent with src,
|
||||
// and returns the size of the patched output.
|
||||
// It may return a different slice.
|
||||
func validate(src string, edits []Edit) ([]Edit, int, error) {
|
||||
if !sort.IsSorted(editsSort(edits)) {
|
||||
edits = append([]Edit(nil), edits...)
|
||||
SortEdits(edits)
|
||||
}
|
||||
|
||||
// Check validity of edits and compute final size.
|
||||
size := len(src)
|
||||
lastEnd := 0
|
||||
for _, edit := range edits {
|
||||
if !(0 <= edit.Start && edit.Start <= edit.End && edit.End <= len(src)) {
|
||||
return nil, 0, fmt.Errorf("diff has out-of-bounds edits")
|
||||
}
|
||||
if edit.Start < lastEnd {
|
||||
return nil, 0, fmt.Errorf("diff has overlapping edits")
|
||||
}
|
||||
size += len(edit.New) + edit.Start - edit.End
|
||||
lastEnd = edit.End
|
||||
}
|
||||
|
||||
return edits, size, nil
|
||||
}
|
||||
|
||||
// SortEdits orders a slice of Edits by (start, end) offset.
|
||||
// This ordering puts insertions (end = start) before deletions
|
||||
// (end > start) at the same point, but uses a stable sort to preserve
|
||||
// the order of multiple insertions at the same point.
|
||||
// (Apply detects multiple deletions at the same point as an error.)
|
||||
func SortEdits(edits []Edit) {
|
||||
sort.Stable(editsSort(edits))
|
||||
}
|
||||
|
||||
type editsSort []Edit
|
||||
|
||||
func (a editsSort) Len() int { return len(a) }
|
||||
func (a editsSort) Less(i, j int) bool {
|
||||
if cmp := a[i].Start - a[j].Start; cmp != 0 {
|
||||
return cmp < 0
|
||||
}
|
||||
return a[i].End < a[j].End
|
||||
}
|
||||
func (a editsSort) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
||||
|
||||
// lineEdits expands and merges a sequence of edits so that each
|
||||
// resulting edit replaces one or more complete lines.
|
||||
// See ApplyEdits for preconditions.
|
||||
func lineEdits(src string, edits []Edit) ([]Edit, error) {
|
||||
edits, _, err := validate(src, edits)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Do all deletions begin and end at the start of a line,
|
||||
// and all insertions end with a newline?
|
||||
// (This is merely a fast path.)
|
||||
for _, edit := range edits {
|
||||
if edit.Start >= len(src) || // insertion at EOF
|
||||
edit.Start > 0 && src[edit.Start-1] != '\n' || // not at line start
|
||||
edit.End > 0 && src[edit.End-1] != '\n' || // not at line start
|
||||
edit.New != "" && edit.New[len(edit.New)-1] != '\n' { // partial insert
|
||||
goto expand // slow path
|
||||
}
|
||||
}
|
||||
return edits, nil // aligned
|
||||
|
||||
expand:
|
||||
if len(edits) == 0 {
|
||||
return edits, nil // no edits (unreachable due to fast path)
|
||||
}
|
||||
expanded := make([]Edit, 0, len(edits)) // a guess
|
||||
prev := edits[0]
|
||||
// TODO(adonovan): opt: start from the first misaligned edit.
|
||||
// TODO(adonovan): opt: avoid quadratic cost of string += string.
|
||||
for _, edit := range edits[1:] {
|
||||
between := src[prev.End:edit.Start]
|
||||
if !strings.Contains(between, "\n") {
|
||||
// overlapping lines: combine with previous edit.
|
||||
prev.New += between + edit.New
|
||||
prev.End = edit.End
|
||||
} else {
|
||||
// non-overlapping lines: flush previous edit.
|
||||
expanded = append(expanded, expandEdit(prev, src))
|
||||
prev = edit
|
||||
}
|
||||
}
|
||||
return append(expanded, expandEdit(prev, src)), nil // flush final edit
|
||||
}
|
||||
|
||||
// expandEdit returns edit expanded to complete whole lines.
|
||||
func expandEdit(edit Edit, src string) Edit {
|
||||
// Expand start left to start of line.
|
||||
// (delta is the zero-based column number of start.)
|
||||
start := edit.Start
|
||||
if delta := start - 1 - strings.LastIndex(src[:start], "\n"); delta > 0 {
|
||||
edit.Start -= delta
|
||||
edit.New = src[start-delta:start] + edit.New
|
||||
}
|
||||
|
||||
// Expand end right to end of line.
|
||||
end := edit.End
|
||||
if end > 0 && src[end-1] != '\n' ||
|
||||
edit.New != "" && edit.New[len(edit.New)-1] != '\n' {
|
||||
if nl := strings.IndexByte(src[end:], '\n'); nl < 0 {
|
||||
edit.End = len(src) // extend to EOF
|
||||
} else {
|
||||
edit.End = end + nl + 1 // extend beyond \n
|
||||
}
|
||||
}
|
||||
edit.New += src[end:edit.End]
|
||||
|
||||
return edit
|
||||
}
|
||||
@@ -0,0 +1,207 @@
|
||||
// Copyright 2019 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package diff_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"math/rand"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/tools/internal/diff"
|
||||
"golang.org/x/tools/internal/diff/difftest"
|
||||
"golang.org/x/tools/internal/testenv"
|
||||
)
|
||||
|
||||
func TestApply(t *testing.T) {
|
||||
for _, tc := range difftest.TestCases {
|
||||
t.Run(tc.Name, func(t *testing.T) {
|
||||
got, err := diff.Apply(tc.In, tc.Edits)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply(Edits) failed: %v", err)
|
||||
}
|
||||
if got != tc.Out {
|
||||
t.Errorf("Apply(Edits): got %q, want %q", got, tc.Out)
|
||||
}
|
||||
if tc.LineEdits != nil {
|
||||
got, err := diff.Apply(tc.In, tc.LineEdits)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply(LineEdits) failed: %v", err)
|
||||
}
|
||||
if got != tc.Out {
|
||||
t.Errorf("Apply(LineEdits): got %q, want %q", got, tc.Out)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNEdits(t *testing.T) {
|
||||
for _, tc := range difftest.TestCases {
|
||||
edits := diff.Strings(tc.In, tc.Out)
|
||||
got, err := diff.Apply(tc.In, edits)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply failed: %v", err)
|
||||
}
|
||||
if got != tc.Out {
|
||||
t.Fatalf("%s: got %q wanted %q", tc.Name, got, tc.Out)
|
||||
}
|
||||
if len(edits) < len(tc.Edits) { // should find subline edits
|
||||
t.Errorf("got %v, expected %v for %#v", edits, tc.Edits, tc)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestNRandom(t *testing.T) {
|
||||
rand.Seed(1)
|
||||
for i := 0; i < 1000; i++ {
|
||||
a := randstr("abω", 16)
|
||||
b := randstr("abωc", 16)
|
||||
edits := diff.Strings(a, b)
|
||||
got, err := diff.Apply(a, edits)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply failed: %v", err)
|
||||
}
|
||||
if got != b {
|
||||
t.Fatalf("%d: got %q, wanted %q, starting with %q", i, got, b, a)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// $ go test -fuzz=FuzzRoundTrip ./internal/diff
|
||||
func FuzzRoundTrip(f *testing.F) {
|
||||
f.Fuzz(func(t *testing.T, a, b string) {
|
||||
if !utf8.ValidString(a) || !utf8.ValidString(b) {
|
||||
return // inputs must be text
|
||||
}
|
||||
edits := diff.Strings(a, b)
|
||||
got, err := diff.Apply(a, edits)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply failed: %v", err)
|
||||
}
|
||||
if got != b {
|
||||
t.Fatalf("applying diff(%q, %q) gives %q; edits=%v", a, b, got, edits)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestLineEdits(t *testing.T) {
|
||||
for _, tc := range difftest.TestCases {
|
||||
t.Run(tc.Name, func(t *testing.T) {
|
||||
want := tc.LineEdits
|
||||
if want == nil {
|
||||
want = tc.Edits // already line-aligned
|
||||
}
|
||||
got, err := diff.LineEdits(tc.In, tc.Edits)
|
||||
if err != nil {
|
||||
t.Fatalf("LineEdits: %v", err)
|
||||
}
|
||||
if !reflect.DeepEqual(got, want) {
|
||||
t.Errorf("in=<<%s>>\nout=<<%s>>\nraw edits=%s\nline edits=%s\nwant: %s",
|
||||
tc.In, tc.Out, tc.Edits, got, want)
|
||||
}
|
||||
// make sure that applying the edits gives the expected result
|
||||
fixed, err := diff.Apply(tc.In, got)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if fixed != tc.Out {
|
||||
t.Errorf("Apply(LineEdits): got %q, want %q", fixed, tc.Out)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestToUnified(t *testing.T) {
|
||||
testenv.NeedsTool(t, "patch")
|
||||
for _, tc := range difftest.TestCases {
|
||||
t.Run(tc.Name, func(t *testing.T) {
|
||||
unified, err := diff.ToUnified(difftest.FileA, difftest.FileB, tc.In, tc.Edits, diff.DefaultContextLines)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if unified == "" {
|
||||
return
|
||||
}
|
||||
orig := filepath.Join(t.TempDir(), "original")
|
||||
err = os.WriteFile(orig, []byte(tc.In), 0644)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
temp := filepath.Join(t.TempDir(), "patched")
|
||||
err = os.WriteFile(temp, []byte(tc.In), 0644)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
cmd := exec.Command("patch", "-p0", "-u", "-s", "-o", temp, orig)
|
||||
cmd.Stdin = strings.NewReader(unified)
|
||||
cmd.Stdout = new(bytes.Buffer)
|
||||
cmd.Stderr = new(bytes.Buffer)
|
||||
if err = cmd.Run(); err != nil {
|
||||
t.Fatalf("%v: %q (%q) (%q)", err, cmd.String(),
|
||||
cmd.Stderr, cmd.Stdout)
|
||||
}
|
||||
got, err := os.ReadFile(temp)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if string(got) != tc.Out {
|
||||
t.Errorf("applying unified failed: got\n%q, wanted\n%q unified\n%q",
|
||||
got, tc.Out, unified)
|
||||
}
|
||||
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegressionOld001(t *testing.T) {
|
||||
a := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage diff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"golang.org/x/tools/gopls/internal/lsp/diff\"\n\t\"golang.org/x/tools/internal/diff/difftest\"\n\t\"golang.org/x/tools/gopls/internal/span\"\n)\n"
|
||||
|
||||
b := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage diff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"github.com/google/safehtml/template\"\n\t\"golang.org/x/tools/gopls/internal/lsp/diff\"\n\t\"golang.org/x/tools/internal/diff/difftest\"\n\t\"golang.org/x/tools/gopls/internal/span\"\n)\n"
|
||||
diffs := diff.Strings(a, b)
|
||||
got, err := diff.Apply(a, diffs)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply failed: %v", err)
|
||||
}
|
||||
if got != b {
|
||||
i := 0
|
||||
for ; i < len(a) && i < len(b) && got[i] == b[i]; i++ {
|
||||
}
|
||||
t.Errorf("oops %vd\n%q\n%q", diffs, got, b)
|
||||
t.Errorf("\n%q\n%q", got[i:], b[i:])
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegressionOld002(t *testing.T) {
|
||||
a := "n\"\n)\n"
|
||||
b := "n\"\n\t\"golang.org/x//nnal/stack\"\n)\n"
|
||||
diffs := diff.Strings(a, b)
|
||||
got, err := diff.Apply(a, diffs)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply failed: %v", err)
|
||||
}
|
||||
if got != b {
|
||||
i := 0
|
||||
for ; i < len(a) && i < len(b) && got[i] == b[i]; i++ {
|
||||
}
|
||||
t.Errorf("oops %vd\n%q\n%q", diffs, got, b)
|
||||
t.Errorf("\n%q\n%q", got[i:], b[i:])
|
||||
}
|
||||
}
|
||||
|
||||
// return a random string of length n made of characters from s
|
||||
func randstr(s string, n int) string {
|
||||
src := []rune(s)
|
||||
x := make([]rune, n)
|
||||
for i := 0; i < n; i++ {
|
||||
x[i] = src[rand.Intn(len(src))]
|
||||
}
|
||||
return string(x)
|
||||
}
|
||||
@@ -0,0 +1,324 @@
|
||||
// Copyright 2019 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package difftest supplies a set of tests that will operate on any
|
||||
// implementation of a diff algorithm as exposed by
|
||||
// "golang.org/x/tools/internal/diff"
|
||||
package difftest
|
||||
|
||||
// There are two kinds of tests, semantic tests, and 'golden data' tests.
|
||||
// The semantic tests check that the computed diffs transform the input to
|
||||
// the output, and that 'patch' accepts the computed unified diffs.
|
||||
// The other tests just check that Edits and LineEdits haven't changed
|
||||
// unexpectedly. These fields may need to be changed when the diff algorithm
|
||||
// changes.
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"golang.org/x/tools/internal/diff"
|
||||
)
|
||||
|
||||
const (
|
||||
FileA = "from"
|
||||
FileB = "to"
|
||||
UnifiedPrefix = "--- " + FileA + "\n+++ " + FileB + "\n"
|
||||
)
|
||||
|
||||
var TestCases = []struct {
|
||||
Name, In, Out, Unified string
|
||||
Edits, LineEdits []diff.Edit // expectation (LineEdits=nil => already line-aligned)
|
||||
NoDiff bool
|
||||
}{{
|
||||
Name: "empty",
|
||||
In: "",
|
||||
Out: "",
|
||||
}, {
|
||||
Name: "no_diff",
|
||||
In: "gargantuan\n",
|
||||
Out: "gargantuan\n",
|
||||
}, {
|
||||
Name: "replace_all",
|
||||
In: "fruit\n",
|
||||
Out: "cheese\n",
|
||||
Unified: UnifiedPrefix + `
|
||||
@@ -1 +1 @@
|
||||
-fruit
|
||||
+cheese
|
||||
`[1:],
|
||||
Edits: []diff.Edit{{Start: 0, End: 5, New: "cheese"}},
|
||||
LineEdits: []diff.Edit{{Start: 0, End: 6, New: "cheese\n"}},
|
||||
}, {
|
||||
Name: "insert_rune",
|
||||
In: "gord\n",
|
||||
Out: "gourd\n",
|
||||
Unified: UnifiedPrefix + `
|
||||
@@ -1 +1 @@
|
||||
-gord
|
||||
+gourd
|
||||
`[1:],
|
||||
Edits: []diff.Edit{{Start: 2, End: 2, New: "u"}},
|
||||
LineEdits: []diff.Edit{{Start: 0, End: 5, New: "gourd\n"}},
|
||||
}, {
|
||||
Name: "delete_rune",
|
||||
In: "groat\n",
|
||||
Out: "goat\n",
|
||||
Unified: UnifiedPrefix + `
|
||||
@@ -1 +1 @@
|
||||
-groat
|
||||
+goat
|
||||
`[1:],
|
||||
Edits: []diff.Edit{{Start: 1, End: 2, New: ""}},
|
||||
LineEdits: []diff.Edit{{Start: 0, End: 6, New: "goat\n"}},
|
||||
}, {
|
||||
Name: "replace_rune",
|
||||
In: "loud\n",
|
||||
Out: "lord\n",
|
||||
Unified: UnifiedPrefix + `
|
||||
@@ -1 +1 @@
|
||||
-loud
|
||||
+lord
|
||||
`[1:],
|
||||
Edits: []diff.Edit{{Start: 2, End: 3, New: "r"}},
|
||||
LineEdits: []diff.Edit{{Start: 0, End: 5, New: "lord\n"}},
|
||||
}, {
|
||||
Name: "replace_partials",
|
||||
In: "blanket\n",
|
||||
Out: "bunker\n",
|
||||
Unified: UnifiedPrefix + `
|
||||
@@ -1 +1 @@
|
||||
-blanket
|
||||
+bunker
|
||||
`[1:],
|
||||
Edits: []diff.Edit{
|
||||
{Start: 1, End: 3, New: "u"},
|
||||
{Start: 6, End: 7, New: "r"},
|
||||
},
|
||||
LineEdits: []diff.Edit{{Start: 0, End: 8, New: "bunker\n"}},
|
||||
}, {
|
||||
Name: "insert_line",
|
||||
In: "1: one\n3: three\n",
|
||||
Out: "1: one\n2: two\n3: three\n",
|
||||
Unified: UnifiedPrefix + `
|
||||
@@ -1,2 +1,3 @@
|
||||
1: one
|
||||
+2: two
|
||||
3: three
|
||||
`[1:],
|
||||
Edits: []diff.Edit{{Start: 7, End: 7, New: "2: two\n"}},
|
||||
}, {
|
||||
Name: "replace_no_newline",
|
||||
In: "A",
|
||||
Out: "B",
|
||||
Unified: UnifiedPrefix + `
|
||||
@@ -1 +1 @@
|
||||
-A
|
||||
\ No newline at end of file
|
||||
+B
|
||||
\ No newline at end of file
|
||||
`[1:],
|
||||
Edits: []diff.Edit{{Start: 0, End: 1, New: "B"}},
|
||||
}, {
|
||||
Name: "delete_empty",
|
||||
In: "meow",
|
||||
Out: "", // GNU diff -u special case: +0,0
|
||||
Unified: UnifiedPrefix + `
|
||||
@@ -1 +0,0 @@
|
||||
-meow
|
||||
\ No newline at end of file
|
||||
`[1:],
|
||||
Edits: []diff.Edit{{Start: 0, End: 4, New: ""}},
|
||||
LineEdits: []diff.Edit{{Start: 0, End: 4, New: ""}},
|
||||
}, {
|
||||
Name: "append_empty",
|
||||
In: "", // GNU diff -u special case: -0,0
|
||||
Out: "AB\nC",
|
||||
Unified: UnifiedPrefix + `
|
||||
@@ -0,0 +1,2 @@
|
||||
+AB
|
||||
+C
|
||||
\ No newline at end of file
|
||||
`[1:],
|
||||
Edits: []diff.Edit{{Start: 0, End: 0, New: "AB\nC"}},
|
||||
LineEdits: []diff.Edit{{Start: 0, End: 0, New: "AB\nC"}},
|
||||
},
|
||||
// TODO(adonovan): fix this test: GNU diff -u prints "+1,2", Unifies prints "+1,3".
|
||||
// {
|
||||
// Name: "add_start",
|
||||
// In: "A",
|
||||
// Out: "B\nCA",
|
||||
// Unified: UnifiedPrefix + `
|
||||
// @@ -1 +1,2 @@
|
||||
// -A
|
||||
// \ No newline at end of file
|
||||
// +B
|
||||
// +CA
|
||||
// \ No newline at end of file
|
||||
// `[1:],
|
||||
// Edits: []diff.TextEdit{{Span: newSpan(0, 0), NewText: "B\nC"}},
|
||||
// LineEdits: []diff.TextEdit{{Span: newSpan(0, 0), NewText: "B\nC"}},
|
||||
// },
|
||||
{
|
||||
Name: "add_end",
|
||||
In: "A",
|
||||
Out: "AB",
|
||||
Unified: UnifiedPrefix + `
|
||||
@@ -1 +1 @@
|
||||
-A
|
||||
\ No newline at end of file
|
||||
+AB
|
||||
\ No newline at end of file
|
||||
`[1:],
|
||||
Edits: []diff.Edit{{Start: 1, End: 1, New: "B"}},
|
||||
LineEdits: []diff.Edit{{Start: 0, End: 1, New: "AB"}},
|
||||
}, {
|
||||
Name: "add_empty",
|
||||
In: "",
|
||||
Out: "AB\nC",
|
||||
Unified: UnifiedPrefix + `
|
||||
@@ -0,0 +1,2 @@
|
||||
+AB
|
||||
+C
|
||||
\ No newline at end of file
|
||||
`[1:],
|
||||
Edits: []diff.Edit{{Start: 0, End: 0, New: "AB\nC"}},
|
||||
LineEdits: []diff.Edit{{Start: 0, End: 0, New: "AB\nC"}},
|
||||
}, {
|
||||
Name: "add_newline",
|
||||
In: "A",
|
||||
Out: "A\n",
|
||||
Unified: UnifiedPrefix + `
|
||||
@@ -1 +1 @@
|
||||
-A
|
||||
\ No newline at end of file
|
||||
+A
|
||||
`[1:],
|
||||
Edits: []diff.Edit{{Start: 1, End: 1, New: "\n"}},
|
||||
LineEdits: []diff.Edit{{Start: 0, End: 1, New: "A\n"}},
|
||||
}, {
|
||||
Name: "delete_front",
|
||||
In: "A\nB\nC\nA\nB\nB\nA\n",
|
||||
Out: "C\nB\nA\nB\nA\nC\n",
|
||||
Unified: UnifiedPrefix + `
|
||||
@@ -1,7 +1,6 @@
|
||||
-A
|
||||
-B
|
||||
C
|
||||
+B
|
||||
A
|
||||
B
|
||||
-B
|
||||
A
|
||||
+C
|
||||
`[1:],
|
||||
NoDiff: true, // unified diff is different but valid
|
||||
Edits: []diff.Edit{
|
||||
{Start: 0, End: 4, New: ""},
|
||||
{Start: 6, End: 6, New: "B\n"},
|
||||
{Start: 10, End: 12, New: ""},
|
||||
{Start: 14, End: 14, New: "C\n"},
|
||||
},
|
||||
LineEdits: []diff.Edit{
|
||||
{Start: 0, End: 4, New: ""},
|
||||
{Start: 6, End: 6, New: "B\n"},
|
||||
{Start: 10, End: 12, New: ""},
|
||||
{Start: 14, End: 14, New: "C\n"},
|
||||
},
|
||||
}, {
|
||||
Name: "replace_last_line",
|
||||
In: "A\nB\n",
|
||||
Out: "A\nC\n\n",
|
||||
Unified: UnifiedPrefix + `
|
||||
@@ -1,2 +1,3 @@
|
||||
A
|
||||
-B
|
||||
+C
|
||||
+
|
||||
`[1:],
|
||||
Edits: []diff.Edit{{Start: 2, End: 3, New: "C\n"}},
|
||||
LineEdits: []diff.Edit{{Start: 2, End: 4, New: "C\n\n"}},
|
||||
},
|
||||
{
|
||||
Name: "multiple_replace",
|
||||
In: "A\nB\nC\nD\nE\nF\nG\n",
|
||||
Out: "A\nH\nI\nJ\nE\nF\nK\n",
|
||||
Unified: UnifiedPrefix + `
|
||||
@@ -1,7 +1,7 @@
|
||||
A
|
||||
-B
|
||||
-C
|
||||
-D
|
||||
+H
|
||||
+I
|
||||
+J
|
||||
E
|
||||
F
|
||||
-G
|
||||
+K
|
||||
`[1:],
|
||||
Edits: []diff.Edit{
|
||||
{Start: 2, End: 8, New: "H\nI\nJ\n"},
|
||||
{Start: 12, End: 14, New: "K\n"},
|
||||
},
|
||||
NoDiff: true, // diff algorithm produces different delete/insert pattern
|
||||
},
|
||||
{
|
||||
Name: "extra_newline",
|
||||
In: "\nA\n",
|
||||
Out: "A\n",
|
||||
Edits: []diff.Edit{{Start: 0, End: 1, New: ""}},
|
||||
Unified: UnifiedPrefix + `@@ -1,2 +1 @@
|
||||
-
|
||||
A
|
||||
`,
|
||||
}, {
|
||||
Name: "unified_lines",
|
||||
In: "aaa\nccc\n",
|
||||
Out: "aaa\nbbb\nccc\n",
|
||||
Edits: []diff.Edit{{Start: 3, End: 3, New: "\nbbb"}},
|
||||
LineEdits: []diff.Edit{{Start: 0, End: 4, New: "aaa\nbbb\n"}},
|
||||
Unified: UnifiedPrefix + "@@ -1,2 +1,3 @@\n aaa\n+bbb\n ccc\n",
|
||||
}, {
|
||||
Name: "60379",
|
||||
In: `package a
|
||||
|
||||
type S struct {
|
||||
s fmt.Stringer
|
||||
}
|
||||
`,
|
||||
Out: `package a
|
||||
|
||||
type S struct {
|
||||
s fmt.Stringer
|
||||
}
|
||||
`,
|
||||
Edits: []diff.Edit{{Start: 27, End: 27, New: "\t"}},
|
||||
LineEdits: []diff.Edit{{Start: 27, End: 42, New: "\ts fmt.Stringer\n"}},
|
||||
Unified: UnifiedPrefix + "@@ -1,5 +1,5 @@\n package a\n \n type S struct {\n-s fmt.Stringer\n+\ts fmt.Stringer\n }\n",
|
||||
},
|
||||
}
|
||||
|
||||
func DiffTest(t *testing.T, compute func(before, after string) []diff.Edit) {
|
||||
for _, test := range TestCases {
|
||||
t.Run(test.Name, func(t *testing.T) {
|
||||
edits := compute(test.In, test.Out)
|
||||
got, err := diff.Apply(test.In, edits)
|
||||
if err != nil {
|
||||
t.Fatalf("Apply failed: %v", err)
|
||||
}
|
||||
unified, err := diff.ToUnified(FileA, FileB, test.In, edits, diff.DefaultContextLines)
|
||||
if err != nil {
|
||||
t.Fatalf("ToUnified: %v", err)
|
||||
}
|
||||
if got != test.Out {
|
||||
t.Errorf("Apply: got patched:\n%v\nfrom diff:\n%v\nexpected:\n%v",
|
||||
got, unified, test.Out)
|
||||
}
|
||||
if !test.NoDiff && unified != test.Unified {
|
||||
t.Errorf("Unified: got diff:\n%q\nexpected:\n%q diffs:%v",
|
||||
unified, test.Unified, edits)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
// Copyright 2019 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package difftest supplies a set of tests that will operate on any
|
||||
// implementation of a diff algorithm as exposed by
|
||||
// "golang.org/x/tools/internal/diff"
|
||||
package difftest_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/tools/internal/diff/difftest"
|
||||
"golang.org/x/tools/internal/testenv"
|
||||
)
|
||||
|
||||
func TestVerifyUnified(t *testing.T) {
|
||||
testenv.NeedsTool(t, "diff")
|
||||
for _, test := range difftest.TestCases {
|
||||
t.Run(test.Name, func(t *testing.T) {
|
||||
if test.NoDiff {
|
||||
t.Skip("diff tool produces expected different results")
|
||||
}
|
||||
diff, err := getDiffOutput(test.In, test.Out)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(diff) > 0 {
|
||||
diff = difftest.UnifiedPrefix + diff
|
||||
}
|
||||
if diff != test.Unified {
|
||||
t.Errorf("unified:\n%s\ndiff -u:\n%s", test.Unified, diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func getDiffOutput(a, b string) (string, error) {
|
||||
fileA, err := os.CreateTemp("", "myers.in")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer os.Remove(fileA.Name())
|
||||
if _, err := fileA.Write([]byte(a)); err != nil {
|
||||
return "", err
|
||||
}
|
||||
if err := fileA.Close(); err != nil {
|
||||
return "", err
|
||||
}
|
||||
fileB, err := os.CreateTemp("", "myers.in")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer os.Remove(fileB.Name())
|
||||
if _, err := fileB.Write([]byte(b)); err != nil {
|
||||
return "", err
|
||||
}
|
||||
if err := fileB.Close(); err != nil {
|
||||
return "", err
|
||||
}
|
||||
cmd := exec.Command("diff", "-u", fileA.Name(), fileB.Name())
|
||||
cmd.Env = append(cmd.Env, "LANG=en_US.UTF-8")
|
||||
out, err := cmd.Output()
|
||||
if err != nil {
|
||||
exit, ok := err.(*exec.ExitError)
|
||||
if !ok {
|
||||
return "", fmt.Errorf("can't exec %s: %v", cmd, err)
|
||||
}
|
||||
if len(out) == 0 {
|
||||
// Nonzero exit with no output: terminated by signal?
|
||||
return "", fmt.Errorf("%s failed: %v; stderr:\n%s", cmd, err, exit.Stderr)
|
||||
}
|
||||
// nonzero exit + output => files differ
|
||||
}
|
||||
diff := string(out)
|
||||
if len(diff) <= 0 {
|
||||
return diff, nil
|
||||
}
|
||||
bits := strings.SplitN(diff, "\n", 3)
|
||||
if len(bits) != 3 {
|
||||
return "", fmt.Errorf("diff output did not have file prefix:\n%s", diff)
|
||||
}
|
||||
return bits[2], nil
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
// Copyright 2022 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package diff
|
||||
|
||||
// This file exports some private declarations to tests.
|
||||
|
||||
var LineEdits = lineEdits
|
||||
@@ -0,0 +1,179 @@
|
||||
// Copyright 2022 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lcs
|
||||
|
||||
import (
|
||||
"log"
|
||||
"sort"
|
||||
)
|
||||
|
||||
// lcs is a longest common sequence
|
||||
type lcs []diag
|
||||
|
||||
// A diag is a piece of the edit graph where A[X+i] == B[Y+i], for 0<=i<Len.
|
||||
// All computed diagonals are parts of a longest common subsequence.
|
||||
type diag struct {
|
||||
X, Y int
|
||||
Len int
|
||||
}
|
||||
|
||||
// sort sorts in place, by lowest X, and if tied, inversely by Len
|
||||
func (l lcs) sort() lcs {
|
||||
sort.Slice(l, func(i, j int) bool {
|
||||
if l[i].X != l[j].X {
|
||||
return l[i].X < l[j].X
|
||||
}
|
||||
return l[i].Len > l[j].Len
|
||||
})
|
||||
return l
|
||||
}
|
||||
|
||||
// validate that the elements of the lcs do not overlap
|
||||
// (can only happen when the two-sided algorithm ends early)
|
||||
// expects the lcs to be sorted
|
||||
func (l lcs) valid() bool {
|
||||
for i := 1; i < len(l); i++ {
|
||||
if l[i-1].X+l[i-1].Len > l[i].X {
|
||||
return false
|
||||
}
|
||||
if l[i-1].Y+l[i-1].Len > l[i].Y {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// repair overlapping lcs
|
||||
// only called if two-sided stops early
|
||||
func (l lcs) fix() lcs {
|
||||
// from the set of diagonals in l, find a maximal non-conflicting set
|
||||
// this problem may be NP-complete, but we use a greedy heuristic,
|
||||
// which is quadratic, but with a better data structure, could be D log D.
|
||||
// indepedent is not enough: {0,3,1} and {3,0,2} can't both occur in an lcs
|
||||
// which has to have monotone x and y
|
||||
if len(l) == 0 {
|
||||
return nil
|
||||
}
|
||||
sort.Slice(l, func(i, j int) bool { return l[i].Len > l[j].Len })
|
||||
tmp := make(lcs, 0, len(l))
|
||||
tmp = append(tmp, l[0])
|
||||
for i := 1; i < len(l); i++ {
|
||||
var dir direction
|
||||
nxt := l[i]
|
||||
for _, in := range tmp {
|
||||
if dir, nxt = overlap(in, nxt); dir == empty || dir == bad {
|
||||
break
|
||||
}
|
||||
}
|
||||
if nxt.Len > 0 && dir != bad {
|
||||
tmp = append(tmp, nxt)
|
||||
}
|
||||
}
|
||||
tmp.sort()
|
||||
if false && !tmp.valid() { // debug checking
|
||||
log.Fatalf("here %d", len(tmp))
|
||||
}
|
||||
return tmp
|
||||
}
|
||||
|
||||
type direction int
|
||||
|
||||
const (
|
||||
empty direction = iota // diag is empty (so not in lcs)
|
||||
leftdown // proposed acceptably to the left and below
|
||||
rightup // proposed diag is acceptably to the right and above
|
||||
bad // proposed diag is inconsistent with the lcs so far
|
||||
)
|
||||
|
||||
// overlap trims the proposed diag prop so it doesn't overlap with
|
||||
// the existing diag that has already been added to the lcs.
|
||||
func overlap(exist, prop diag) (direction, diag) {
|
||||
if prop.X <= exist.X && exist.X < prop.X+prop.Len {
|
||||
// remove the end of prop where it overlaps with the X end of exist
|
||||
delta := prop.X + prop.Len - exist.X
|
||||
prop.Len -= delta
|
||||
if prop.Len <= 0 {
|
||||
return empty, prop
|
||||
}
|
||||
}
|
||||
if exist.X <= prop.X && prop.X < exist.X+exist.Len {
|
||||
// remove the beginning of prop where overlaps with exist
|
||||
delta := exist.X + exist.Len - prop.X
|
||||
prop.Len -= delta
|
||||
if prop.Len <= 0 {
|
||||
return empty, prop
|
||||
}
|
||||
prop.X += delta
|
||||
prop.Y += delta
|
||||
}
|
||||
if prop.Y <= exist.Y && exist.Y < prop.Y+prop.Len {
|
||||
// remove the end of prop that overlaps (in Y) with exist
|
||||
delta := prop.Y + prop.Len - exist.Y
|
||||
prop.Len -= delta
|
||||
if prop.Len <= 0 {
|
||||
return empty, prop
|
||||
}
|
||||
}
|
||||
if exist.Y <= prop.Y && prop.Y < exist.Y+exist.Len {
|
||||
// remove the beginning of peop that overlaps with exist
|
||||
delta := exist.Y + exist.Len - prop.Y
|
||||
prop.Len -= delta
|
||||
if prop.Len <= 0 {
|
||||
return empty, prop
|
||||
}
|
||||
prop.X += delta // no test reaches this code
|
||||
prop.Y += delta
|
||||
}
|
||||
if prop.X+prop.Len <= exist.X && prop.Y+prop.Len <= exist.Y {
|
||||
return leftdown, prop
|
||||
}
|
||||
if exist.X+exist.Len <= prop.X && exist.Y+exist.Len <= prop.Y {
|
||||
return rightup, prop
|
||||
}
|
||||
// prop can't be in an lcs that contains exist
|
||||
return bad, prop
|
||||
}
|
||||
|
||||
// manipulating Diag and lcs
|
||||
|
||||
// prepend a diagonal (x,y)-(x+1,y+1) segment either to an empty lcs
|
||||
// or to its first Diag. prepend is only called to extend diagonals
|
||||
// the backward direction.
|
||||
func (lcs lcs) prepend(x, y int) lcs {
|
||||
if len(lcs) > 0 {
|
||||
d := &lcs[0]
|
||||
if int(d.X) == x+1 && int(d.Y) == y+1 {
|
||||
// extend the diagonal down and to the left
|
||||
d.X, d.Y = int(x), int(y)
|
||||
d.Len++
|
||||
return lcs
|
||||
}
|
||||
}
|
||||
|
||||
r := diag{X: int(x), Y: int(y), Len: 1}
|
||||
lcs = append([]diag{r}, lcs...)
|
||||
return lcs
|
||||
}
|
||||
|
||||
// append appends a diagonal, or extends the existing one.
|
||||
// by adding the edge (x,y)-(x+1.y+1). append is only called
|
||||
// to extend diagonals in the forward direction.
|
||||
func (lcs lcs) append(x, y int) lcs {
|
||||
if len(lcs) > 0 {
|
||||
last := &lcs[len(lcs)-1]
|
||||
// Expand last element if adjoining.
|
||||
if last.X+last.Len == x && last.Y+last.Len == y {
|
||||
last.Len++
|
||||
return lcs
|
||||
}
|
||||
}
|
||||
|
||||
return append(lcs, diag{X: x, Y: y, Len: 1})
|
||||
}
|
||||
|
||||
// enforce constraint on d, k
|
||||
func ok(d, k int) bool {
|
||||
return d >= 0 && -d <= k && k <= d
|
||||
}
|
||||
@@ -0,0 +1,140 @@
|
||||
// Copyright 2022 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lcs
|
||||
|
||||
import (
|
||||
"log"
|
||||
"math/rand"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type Btest struct {
|
||||
a, b string
|
||||
lcs []string
|
||||
}
|
||||
|
||||
var Btests = []Btest{
|
||||
{"aaabab", "abaab", []string{"abab", "aaab"}},
|
||||
{"aabbba", "baaba", []string{"aaba"}},
|
||||
{"cabbx", "cbabx", []string{"cabx", "cbbx"}},
|
||||
{"c", "cb", []string{"c"}},
|
||||
{"aaba", "bbb", []string{"b"}},
|
||||
{"bbaabb", "b", []string{"b"}},
|
||||
{"baaabb", "bbaba", []string{"bbb", "baa", "bab"}},
|
||||
{"baaabb", "abbab", []string{"abb", "bab", "aab"}},
|
||||
{"baaba", "aaabba", []string{"aaba"}},
|
||||
{"ca", "cba", []string{"ca"}},
|
||||
{"ccbcbc", "abba", []string{"bb"}},
|
||||
{"ccbcbc", "aabba", []string{"bb"}},
|
||||
{"ccb", "cba", []string{"cb"}},
|
||||
{"caef", "axe", []string{"ae"}},
|
||||
{"bbaabb", "baabb", []string{"baabb"}},
|
||||
// Example from Myers:
|
||||
{"abcabba", "cbabac", []string{"caba", "baba", "cbba"}},
|
||||
{"3456aaa", "aaa", []string{"aaa"}},
|
||||
{"aaa", "aaa123", []string{"aaa"}},
|
||||
{"aabaa", "aacaa", []string{"aaaa"}},
|
||||
{"1a", "a", []string{"a"}},
|
||||
{"abab", "bb", []string{"bb"}},
|
||||
{"123", "ab", []string{""}},
|
||||
{"a", "b", []string{""}},
|
||||
{"abc", "123", []string{""}},
|
||||
{"aa", "aa", []string{"aa"}},
|
||||
{"abcde", "12345", []string{""}},
|
||||
{"aaa3456", "aaa", []string{"aaa"}},
|
||||
{"abcde", "12345a", []string{"a"}},
|
||||
{"ab", "123", []string{""}},
|
||||
{"1a2", "a", []string{"a"}},
|
||||
// for two-sided
|
||||
{"babaab", "cccaba", []string{"aba"}},
|
||||
{"aabbab", "cbcabc", []string{"bab"}},
|
||||
{"abaabb", "bcacab", []string{"baab"}},
|
||||
{"abaabb", "abaaaa", []string{"abaa"}},
|
||||
{"bababb", "baaabb", []string{"baabb"}},
|
||||
{"abbbaa", "cabacc", []string{"aba"}},
|
||||
{"aabbaa", "aacaba", []string{"aaaa", "aaba"}},
|
||||
}
|
||||
|
||||
func init() {
|
||||
log.SetFlags(log.Lshortfile)
|
||||
}
|
||||
|
||||
func check(t *testing.T, str string, lcs lcs, want []string) {
|
||||
t.Helper()
|
||||
if !lcs.valid() {
|
||||
t.Errorf("bad lcs %v", lcs)
|
||||
}
|
||||
var got strings.Builder
|
||||
for _, dd := range lcs {
|
||||
got.WriteString(str[dd.X : dd.X+dd.Len])
|
||||
}
|
||||
ans := got.String()
|
||||
for _, w := range want {
|
||||
if ans == w {
|
||||
return
|
||||
}
|
||||
}
|
||||
t.Fatalf("str=%q lcs=%v want=%q got=%q", str, lcs, want, ans)
|
||||
}
|
||||
|
||||
func checkDiffs(t *testing.T, before string, diffs []Diff, after string) {
|
||||
t.Helper()
|
||||
var ans strings.Builder
|
||||
sofar := 0 // index of position in before
|
||||
for _, d := range diffs {
|
||||
if sofar < d.Start {
|
||||
ans.WriteString(before[sofar:d.Start])
|
||||
}
|
||||
ans.WriteString(after[d.ReplStart:d.ReplEnd])
|
||||
sofar = d.End
|
||||
}
|
||||
ans.WriteString(before[sofar:])
|
||||
if ans.String() != after {
|
||||
t.Fatalf("diff %v took %q to %q, not to %q", diffs, before, ans.String(), after)
|
||||
}
|
||||
}
|
||||
|
||||
func lcslen(l lcs) int {
|
||||
ans := 0
|
||||
for _, d := range l {
|
||||
ans += int(d.Len)
|
||||
}
|
||||
return ans
|
||||
}
|
||||
|
||||
// return a random string of length n made of characters from s
|
||||
func randstr(s string, n int) string {
|
||||
src := []rune(s)
|
||||
x := make([]rune, n)
|
||||
for i := 0; i < n; i++ {
|
||||
x[i] = src[rand.Intn(len(src))]
|
||||
}
|
||||
return string(x)
|
||||
}
|
||||
|
||||
func TestLcsFix(t *testing.T) {
|
||||
tests := []struct{ before, after lcs }{
|
||||
{lcs{diag{0, 0, 3}, diag{2, 2, 5}, diag{3, 4, 5}, diag{8, 9, 4}}, lcs{diag{0, 0, 2}, diag{2, 2, 1}, diag{3, 4, 5}, diag{8, 9, 4}}},
|
||||
{lcs{diag{1, 1, 6}, diag{6, 12, 3}}, lcs{diag{1, 1, 5}, diag{6, 12, 3}}},
|
||||
{lcs{diag{0, 0, 4}, diag{3, 5, 4}}, lcs{diag{0, 0, 3}, diag{3, 5, 4}}},
|
||||
{lcs{diag{0, 20, 1}, diag{0, 0, 3}, diag{1, 20, 4}}, lcs{diag{0, 0, 3}, diag{3, 22, 2}}},
|
||||
{lcs{diag{0, 0, 4}, diag{1, 1, 2}}, lcs{diag{0, 0, 4}}},
|
||||
{lcs{diag{0, 0, 4}}, lcs{diag{0, 0, 4}}},
|
||||
{lcs{}, lcs{}},
|
||||
{lcs{diag{0, 0, 4}, diag{1, 1, 6}, diag{3, 3, 2}}, lcs{diag{0, 0, 1}, diag{1, 1, 6}}},
|
||||
}
|
||||
for n, x := range tests {
|
||||
got := x.before.fix()
|
||||
if len(got) != len(x.after) {
|
||||
t.Errorf("got %v, expected %v, for %v", got, x.after, x.before)
|
||||
}
|
||||
olen := lcslen(x.after)
|
||||
glen := lcslen(got)
|
||||
if olen != glen {
|
||||
t.Errorf("%d: lens(%d,%d) differ, %v, %v, %v", n, glen, olen, got, x.after, x.before)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,156 @@
|
||||
// Copyright 2022 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// package lcs contains code to find longest-common-subsequences
|
||||
// (and diffs)
|
||||
package lcs
|
||||
|
||||
/*
|
||||
Compute longest-common-subsequences of two slices A, B using
|
||||
algorithms from Myers' paper. A longest-common-subsequence
|
||||
(LCS from now on) of A and B is a maximal set of lexically increasing
|
||||
pairs of subscripts (x,y) with A[x]==B[y]. There may be many LCS, but
|
||||
they all have the same length. An LCS determines a sequence of edits
|
||||
that changes A into B.
|
||||
|
||||
The key concept is the edit graph of A and B.
|
||||
If A has length N and B has length M, then the edit graph has
|
||||
vertices v[i][j] for 0 <= i <= N, 0 <= j <= M. There is a
|
||||
horizontal edge from v[i][j] to v[i+1][j] whenever both are in
|
||||
the graph, and a vertical edge from v[i][j] to f[i][j+1] similarly.
|
||||
When A[i] == B[j] there is a diagonal edge from v[i][j] to v[i+1][j+1].
|
||||
|
||||
A path between in the graph between (0,0) and (N,M) determines a sequence
|
||||
of edits converting A into B: each horizontal edge corresponds to removing
|
||||
an element of A, and each vertical edge corresponds to inserting an
|
||||
element of B.
|
||||
|
||||
A vertex (x,y) is on (forward) diagonal k if x-y=k. A path in the graph
|
||||
is of length D if it has D non-diagonal edges. The algorithms generate
|
||||
forward paths (in which at least one of x,y increases at each edge),
|
||||
or backward paths (in which at least one of x,y decreases at each edge),
|
||||
or a combination. (Note that the orientation is the traditional mathematical one,
|
||||
with the origin in the lower-left corner.)
|
||||
|
||||
Here is the edit graph for A:"aabbaa", B:"aacaba". (I know the diagonals look weird.)
|
||||
⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙
|
||||
a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ |
|
||||
⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙
|
||||
b | | | ___/‾‾‾ | ___/‾‾‾ | | |
|
||||
⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙
|
||||
a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ |
|
||||
⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙
|
||||
c | | | | | | |
|
||||
⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙
|
||||
a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ |
|
||||
⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙
|
||||
a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ |
|
||||
⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙
|
||||
a a b b a a
|
||||
|
||||
|
||||
The algorithm labels a vertex (x,y) with D,k if it is on diagonal k and at
|
||||
the end of a maximal path of length D. (Because x-y=k it suffices to remember
|
||||
only the x coordinate of the vertex.)
|
||||
|
||||
The forward algorithm: Find the longest diagonal starting at (0,0) and
|
||||
label its end with D=0,k=0. From that vertex take a vertical step and
|
||||
then follow the longest diagonal (up and to the right), and label that vertex
|
||||
with D=1,k=-1. From the D=0,k=0 point take a horizontal step and the follow
|
||||
the longest diagonal (up and to the right) and label that vertex
|
||||
D=1,k=1. In the same way, having labelled all the D vertices,
|
||||
from a vertex labelled D,k find two vertices
|
||||
tentatively labelled D+1,k-1 and D+1,k+1. There may be two on the same
|
||||
diagonal, in which case take the one with the larger x.
|
||||
|
||||
Eventually the path gets to (N,M), and the diagonals on it are the LCS.
|
||||
|
||||
Here is the edit graph with the ends of D-paths labelled. (So, for instance,
|
||||
0/2,2 indicates that x=2,y=2 is labelled with 0, as it should be, since the first
|
||||
step is to go up the longest diagonal from (0,0).)
|
||||
A:"aabbaa", B:"aacaba"
|
||||
⊙ ------- ⊙ ------- ⊙ -------(3/3,6)------- ⊙ -------(3/5,6)-------(4/6,6)
|
||||
a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ |
|
||||
⊙ ------- ⊙ ------- ⊙ -------(2/3,5)------- ⊙ ------- ⊙ ------- ⊙
|
||||
b | | | ___/‾‾‾ | ___/‾‾‾ | | |
|
||||
⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ -------(3/5,4)------- ⊙
|
||||
a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ |
|
||||
⊙ ------- ⊙ -------(1/2,3)-------(2/3,3)------- ⊙ ------- ⊙ ------- ⊙
|
||||
c | | | | | | |
|
||||
⊙ ------- ⊙ -------(0/2,2)-------(1/3,2)-------(2/4,2)-------(3/5,2)-------(4/6,2)
|
||||
a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ |
|
||||
⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙
|
||||
a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ |
|
||||
⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙
|
||||
a a b b a a
|
||||
|
||||
The 4-path is reconstructed starting at (4/6,6), horizontal to (3/5,6), diagonal to (3,4), vertical
|
||||
to (2/3,3), horizontal to (1/2,3), vertical to (0/2,2), and diagonal to (0,0). As expected,
|
||||
there are 4 non-diagonal steps, and the diagonals form an LCS.
|
||||
|
||||
There is a symmetric backward algorithm, which gives (backwards labels are prefixed with a colon):
|
||||
A:"aabbaa", B:"aacaba"
|
||||
⊙ -------- ⊙ -------- ⊙ -------- ⊙ -------- ⊙ -------- ⊙ -------- ⊙
|
||||
a | ____/‾‾‾ | ____/‾‾‾ | | | ____/‾‾‾ | ____/‾‾‾ |
|
||||
⊙ -------- ⊙ -------- ⊙ -------- ⊙ -------- ⊙ --------(:0/5,5)-------- ⊙
|
||||
b | | | ____/‾‾‾ | ____/‾‾‾ | | |
|
||||
⊙ -------- ⊙ -------- ⊙ --------(:1/3,4)-------- ⊙ -------- ⊙ -------- ⊙
|
||||
a | ____/‾‾‾ | ____/‾‾‾ | | | ____/‾‾‾ | ____/‾‾‾ |
|
||||
(:3/0,3)--------(:2/1,3)-------- ⊙ --------(:2/3,3)--------(:1/4,3)-------- ⊙ -------- ⊙
|
||||
c | | | | | | |
|
||||
⊙ -------- ⊙ -------- ⊙ --------(:3/3,2)--------(:2/4,2)-------- ⊙ -------- ⊙
|
||||
a | ____/‾‾‾ | ____/‾‾‾ | | | ____/‾‾‾ | ____/‾‾‾ |
|
||||
(:3/0,1)-------- ⊙ -------- ⊙ -------- ⊙ --------(:3/4,1)-------- ⊙ -------- ⊙
|
||||
a | ____/‾‾‾ | ____/‾‾‾ | | | ____/‾‾‾ | ____/‾‾‾ |
|
||||
(:4/0,0)-------- ⊙ -------- ⊙ -------- ⊙ --------(:4/4,0)-------- ⊙ -------- ⊙
|
||||
a a b b a a
|
||||
|
||||
Neither of these is ideal for use in an editor, where it is undesirable to send very long diffs to the
|
||||
front end. It's tricky to decide exactly what 'very long diffs' means, as "replace A by B" is very short.
|
||||
We want to control how big D can be, by stopping when it gets too large. The forward algorithm then
|
||||
privileges common prefixes, and the backward algorithm privileges common suffixes. Either is an undesirable
|
||||
asymmetry.
|
||||
|
||||
Fortunately there is a two-sided algorithm, implied by results in Myers' paper. Here's what the labels in
|
||||
the edit graph look like.
|
||||
A:"aabbaa", B:"aacaba"
|
||||
⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙
|
||||
a | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ____/‾‾‾‾ | ____/‾‾‾‾ |
|
||||
⊙ --------- ⊙ --------- ⊙ --------- (2/3,5) --------- ⊙ --------- (:0/5,5)--------- ⊙
|
||||
b | | | ____/‾‾‾‾ | ____/‾‾‾‾ | | |
|
||||
⊙ --------- ⊙ --------- ⊙ --------- (:1/3,4)--------- ⊙ --------- ⊙ --------- ⊙
|
||||
a | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ____/‾‾‾‾ | ____/‾‾‾‾ |
|
||||
⊙ --------- (:2/1,3)--------- (1/2,3) ---------(2:2/3,3)--------- (:1/4,3)--------- ⊙ --------- ⊙
|
||||
c | | | | | | |
|
||||
⊙ --------- ⊙ --------- (0/2,2) --------- (1/3,2) ---------(2:2/4,2)--------- ⊙ --------- ⊙
|
||||
a | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ____/‾‾‾‾ | ____/‾‾‾‾ |
|
||||
⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙
|
||||
a | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ____/‾‾‾‾ | ____/‾‾‾‾ |
|
||||
⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙
|
||||
a a b b a a
|
||||
|
||||
The algorithm stopped when it saw the backwards 2-path ending at (1,3) and the forwards 2-path ending at (3,5). The criterion
|
||||
is a backwards path ending at (u,v) and a forward path ending at (x,y), where u <= x and the two points are on the same
|
||||
diagonal. (Here the edgegraph has a diagonal, but the criterion is x-y=u-v.) Myers proves there is a forward
|
||||
2-path from (0,0) to (1,3), and that together with the backwards 2-path ending at (1,3) gives the expected 4-path.
|
||||
Unfortunately the forward path has to be constructed by another run of the forward algorithm; it can't be found from the
|
||||
computed labels. That is the worst case. Had the code noticed (x,y)=(u,v)=(3,3) the whole path could be reconstructed
|
||||
from the edgegraph. The implementation looks for a number of special cases to try to avoid computing an extra forward path.
|
||||
|
||||
If the two-sided algorithm has stop early (because D has become too large) it will have found a forward LCS and a
|
||||
backwards LCS. Ideally these go with disjoint prefixes and suffixes of A and B, but disjointness may fail and the two
|
||||
computed LCS may conflict. (An easy example is where A is a suffix of B, and shares a short prefix. The backwards LCS
|
||||
is all of A, and the forward LCS is a prefix of A.) The algorithm combines the two
|
||||
to form a best-effort LCS. In the worst case the forward partial LCS may have to
|
||||
be recomputed.
|
||||
*/
|
||||
|
||||
/* Eugene Myers paper is titled
|
||||
"An O(ND) Difference Algorithm and Its Variations"
|
||||
and can be found at
|
||||
http://www.xmailserver.org/diff2.pdf
|
||||
|
||||
(There is a generic implementation of the algorithm the repository with git hash
|
||||
b9ad7e4ade3a686d608e44475390ad428e60e7fc)
|
||||
*/
|
||||
@@ -0,0 +1,33 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright 2022 The Go Authors. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style
|
||||
# license that can be found in the LICENSE file.
|
||||
#
|
||||
# Creates a zip file containing all numbered versions
|
||||
# of the commit history of a large source file, for use
|
||||
# as input data for the tests of the diff algorithm.
|
||||
#
|
||||
# Run script from root of the x/tools repo.
|
||||
|
||||
set -eu
|
||||
|
||||
# WARNING: This script will install the latest version of $file
|
||||
# The largest real source file in the x/tools repo.
|
||||
# file=internal/golang/completion/completion.go
|
||||
# file=internal/golang/diagnostics.go
|
||||
file=internal/protocol/tsprotocol.go
|
||||
|
||||
tmp=$(mktemp -d)
|
||||
git log $file |
|
||||
awk '/^commit / {print $2}' |
|
||||
nl -ba -nrz |
|
||||
while read n hash; do
|
||||
git checkout --quiet $hash $file
|
||||
cp -f $file $tmp/$n
|
||||
done
|
||||
(cd $tmp && zip -q - *) > testdata.zip
|
||||
rm -fr $tmp
|
||||
git restore --staged $file
|
||||
git restore $file
|
||||
echo "Created testdata.zip"
|
||||
@@ -0,0 +1,55 @@
|
||||
// Copyright 2022 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lcs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// For each D, vec[D] has length D+1,
|
||||
// and the label for (D, k) is stored in vec[D][(D+k)/2].
|
||||
type label struct {
|
||||
vec [][]int
|
||||
}
|
||||
|
||||
// Temporary checking DO NOT COMMIT true TO PRODUCTION CODE
|
||||
const debug = false
|
||||
|
||||
// debugging. check that the (d,k) pair is valid
|
||||
// (that is, -d<=k<=d and d+k even)
|
||||
func checkDK(D, k int) {
|
||||
if k >= -D && k <= D && (D+k)%2 == 0 {
|
||||
return
|
||||
}
|
||||
panic(fmt.Sprintf("out of range, d=%d,k=%d", D, k))
|
||||
}
|
||||
|
||||
func (t *label) set(D, k, x int) {
|
||||
if debug {
|
||||
checkDK(D, k)
|
||||
}
|
||||
for len(t.vec) <= D {
|
||||
t.vec = append(t.vec, nil)
|
||||
}
|
||||
if t.vec[D] == nil {
|
||||
t.vec[D] = make([]int, D+1)
|
||||
}
|
||||
t.vec[D][(D+k)/2] = x // known that D+k is even
|
||||
}
|
||||
|
||||
func (t *label) get(d, k int) int {
|
||||
if debug {
|
||||
checkDK(d, k)
|
||||
}
|
||||
return int(t.vec[d][(d+k)/2])
|
||||
}
|
||||
|
||||
func newtriang(limit int) label {
|
||||
if limit < 100 {
|
||||
// Preallocate if limit is not large.
|
||||
return label{vec: make([][]int, limit)}
|
||||
}
|
||||
return label{}
|
||||
}
|
||||
@@ -0,0 +1,480 @@
|
||||
// Copyright 2022 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lcs
|
||||
|
||||
// TODO(adonovan): remove unclear references to "old" in this package.
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// A Diff is a replacement of a portion of A by a portion of B.
|
||||
type Diff struct {
|
||||
Start, End int // offsets of portion to delete in A
|
||||
ReplStart, ReplEnd int // offset of replacement text in B
|
||||
}
|
||||
|
||||
// DiffStrings returns the differences between two strings.
|
||||
// It does not respect rune boundaries.
|
||||
func DiffStrings(a, b string) []Diff { return diff(stringSeqs{a, b}) }
|
||||
|
||||
// DiffBytes returns the differences between two byte sequences.
|
||||
// It does not respect rune boundaries.
|
||||
func DiffBytes(a, b []byte) []Diff { return diff(bytesSeqs{a, b}) }
|
||||
|
||||
// DiffRunes returns the differences between two rune sequences.
|
||||
func DiffRunes(a, b []rune) []Diff { return diff(runesSeqs{a, b}) }
|
||||
|
||||
func diff(seqs sequences) []Diff {
|
||||
// A limit on how deeply the LCS algorithm should search. The value is just a guess.
|
||||
const maxDiffs = 100
|
||||
diff, _ := compute(seqs, twosided, maxDiffs/2)
|
||||
return diff
|
||||
}
|
||||
|
||||
// compute computes the list of differences between two sequences,
|
||||
// along with the LCS. It is exercised directly by tests.
|
||||
// The algorithm is one of {forward, backward, twosided}.
|
||||
func compute(seqs sequences, algo func(*editGraph) lcs, limit int) ([]Diff, lcs) {
|
||||
if limit <= 0 {
|
||||
limit = 1 << 25 // effectively infinity
|
||||
}
|
||||
alen, blen := seqs.lengths()
|
||||
g := &editGraph{
|
||||
seqs: seqs,
|
||||
vf: newtriang(limit),
|
||||
vb: newtriang(limit),
|
||||
limit: limit,
|
||||
ux: alen,
|
||||
uy: blen,
|
||||
delta: alen - blen,
|
||||
}
|
||||
lcs := algo(g)
|
||||
diffs := lcs.toDiffs(alen, blen)
|
||||
return diffs, lcs
|
||||
}
|
||||
|
||||
// editGraph carries the information for computing the lcs of two sequences.
|
||||
type editGraph struct {
|
||||
seqs sequences
|
||||
vf, vb label // forward and backward labels
|
||||
|
||||
limit int // maximal value of D
|
||||
// the bounding rectangle of the current edit graph
|
||||
lx, ly, ux, uy int
|
||||
delta int // common subexpression: (ux-lx)-(uy-ly)
|
||||
}
|
||||
|
||||
// toDiffs converts an LCS to a list of edits.
|
||||
func (lcs lcs) toDiffs(alen, blen int) []Diff {
|
||||
var diffs []Diff
|
||||
var pa, pb int // offsets in a, b
|
||||
for _, l := range lcs {
|
||||
if pa < l.X || pb < l.Y {
|
||||
diffs = append(diffs, Diff{pa, l.X, pb, l.Y})
|
||||
}
|
||||
pa = l.X + l.Len
|
||||
pb = l.Y + l.Len
|
||||
}
|
||||
if pa < alen || pb < blen {
|
||||
diffs = append(diffs, Diff{pa, alen, pb, blen})
|
||||
}
|
||||
return diffs
|
||||
}
|
||||
|
||||
// --- FORWARD ---
|
||||
|
||||
// fdone decides if the forward path has reached the upper right
|
||||
// corner of the rectangle. If so, it also returns the computed lcs.
|
||||
func (e *editGraph) fdone(D, k int) (bool, lcs) {
|
||||
// x, y, k are relative to the rectangle
|
||||
x := e.vf.get(D, k)
|
||||
y := x - k
|
||||
if x == e.ux && y == e.uy {
|
||||
return true, e.forwardlcs(D, k)
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// run the forward algorithm, until success or up to the limit on D.
|
||||
func forward(e *editGraph) lcs {
|
||||
e.setForward(0, 0, e.lx)
|
||||
if ok, ans := e.fdone(0, 0); ok {
|
||||
return ans
|
||||
}
|
||||
// from D to D+1
|
||||
for D := 0; D < e.limit; D++ {
|
||||
e.setForward(D+1, -(D + 1), e.getForward(D, -D))
|
||||
if ok, ans := e.fdone(D+1, -(D + 1)); ok {
|
||||
return ans
|
||||
}
|
||||
e.setForward(D+1, D+1, e.getForward(D, D)+1)
|
||||
if ok, ans := e.fdone(D+1, D+1); ok {
|
||||
return ans
|
||||
}
|
||||
for k := -D + 1; k <= D-1; k += 2 {
|
||||
// these are tricky and easy to get backwards
|
||||
lookv := e.lookForward(k, e.getForward(D, k-1)+1)
|
||||
lookh := e.lookForward(k, e.getForward(D, k+1))
|
||||
if lookv > lookh {
|
||||
e.setForward(D+1, k, lookv)
|
||||
} else {
|
||||
e.setForward(D+1, k, lookh)
|
||||
}
|
||||
if ok, ans := e.fdone(D+1, k); ok {
|
||||
return ans
|
||||
}
|
||||
}
|
||||
}
|
||||
// D is too large
|
||||
// find the D path with maximal x+y inside the rectangle and
|
||||
// use that to compute the found part of the lcs
|
||||
kmax := -e.limit - 1
|
||||
diagmax := -1
|
||||
for k := -e.limit; k <= e.limit; k += 2 {
|
||||
x := e.getForward(e.limit, k)
|
||||
y := x - k
|
||||
if x+y > diagmax && x <= e.ux && y <= e.uy {
|
||||
diagmax, kmax = x+y, k
|
||||
}
|
||||
}
|
||||
return e.forwardlcs(e.limit, kmax)
|
||||
}
|
||||
|
||||
// recover the lcs by backtracking from the farthest point reached
|
||||
func (e *editGraph) forwardlcs(D, k int) lcs {
|
||||
var ans lcs
|
||||
for x := e.getForward(D, k); x != 0 || x-k != 0; {
|
||||
if ok(D-1, k-1) && x-1 == e.getForward(D-1, k-1) {
|
||||
// if (x-1,y) is labelled D-1, x--,D--,k--,continue
|
||||
D, k, x = D-1, k-1, x-1
|
||||
continue
|
||||
} else if ok(D-1, k+1) && x == e.getForward(D-1, k+1) {
|
||||
// if (x,y-1) is labelled D-1, x, D--,k++, continue
|
||||
D, k = D-1, k+1
|
||||
continue
|
||||
}
|
||||
// if (x-1,y-1)--(x,y) is a diagonal, prepend,x--,y--, continue
|
||||
y := x - k
|
||||
ans = ans.prepend(x+e.lx-1, y+e.ly-1)
|
||||
x--
|
||||
}
|
||||
return ans
|
||||
}
|
||||
|
||||
// start at (x,y), go up the diagonal as far as possible,
|
||||
// and label the result with d
|
||||
func (e *editGraph) lookForward(k, relx int) int {
|
||||
rely := relx - k
|
||||
x, y := relx+e.lx, rely+e.ly
|
||||
if x < e.ux && y < e.uy {
|
||||
x += e.seqs.commonPrefixLen(x, e.ux, y, e.uy)
|
||||
}
|
||||
return x
|
||||
}
|
||||
|
||||
func (e *editGraph) setForward(d, k, relx int) {
|
||||
x := e.lookForward(k, relx)
|
||||
e.vf.set(d, k, x-e.lx)
|
||||
}
|
||||
|
||||
func (e *editGraph) getForward(d, k int) int {
|
||||
x := e.vf.get(d, k)
|
||||
return x
|
||||
}
|
||||
|
||||
// --- BACKWARD ---
|
||||
|
||||
// bdone decides if the backward path has reached the lower left corner
|
||||
func (e *editGraph) bdone(D, k int) (bool, lcs) {
|
||||
// x, y, k are relative to the rectangle
|
||||
x := e.vb.get(D, k)
|
||||
y := x - (k + e.delta)
|
||||
if x == 0 && y == 0 {
|
||||
return true, e.backwardlcs(D, k)
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// run the backward algorithm, until success or up to the limit on D.
|
||||
func backward(e *editGraph) lcs {
|
||||
e.setBackward(0, 0, e.ux)
|
||||
if ok, ans := e.bdone(0, 0); ok {
|
||||
return ans
|
||||
}
|
||||
// from D to D+1
|
||||
for D := 0; D < e.limit; D++ {
|
||||
e.setBackward(D+1, -(D + 1), e.getBackward(D, -D)-1)
|
||||
if ok, ans := e.bdone(D+1, -(D + 1)); ok {
|
||||
return ans
|
||||
}
|
||||
e.setBackward(D+1, D+1, e.getBackward(D, D))
|
||||
if ok, ans := e.bdone(D+1, D+1); ok {
|
||||
return ans
|
||||
}
|
||||
for k := -D + 1; k <= D-1; k += 2 {
|
||||
// these are tricky and easy to get wrong
|
||||
lookv := e.lookBackward(k, e.getBackward(D, k-1))
|
||||
lookh := e.lookBackward(k, e.getBackward(D, k+1)-1)
|
||||
if lookv < lookh {
|
||||
e.setBackward(D+1, k, lookv)
|
||||
} else {
|
||||
e.setBackward(D+1, k, lookh)
|
||||
}
|
||||
if ok, ans := e.bdone(D+1, k); ok {
|
||||
return ans
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// D is too large
|
||||
// find the D path with minimal x+y inside the rectangle and
|
||||
// use that to compute the part of the lcs found
|
||||
kmax := -e.limit - 1
|
||||
diagmin := 1 << 25
|
||||
for k := -e.limit; k <= e.limit; k += 2 {
|
||||
x := e.getBackward(e.limit, k)
|
||||
y := x - (k + e.delta)
|
||||
if x+y < diagmin && x >= 0 && y >= 0 {
|
||||
diagmin, kmax = x+y, k
|
||||
}
|
||||
}
|
||||
if kmax < -e.limit {
|
||||
panic(fmt.Sprintf("no paths when limit=%d?", e.limit))
|
||||
}
|
||||
return e.backwardlcs(e.limit, kmax)
|
||||
}
|
||||
|
||||
// recover the lcs by backtracking
|
||||
func (e *editGraph) backwardlcs(D, k int) lcs {
|
||||
var ans lcs
|
||||
for x := e.getBackward(D, k); x != e.ux || x-(k+e.delta) != e.uy; {
|
||||
if ok(D-1, k-1) && x == e.getBackward(D-1, k-1) {
|
||||
// D--, k--, x unchanged
|
||||
D, k = D-1, k-1
|
||||
continue
|
||||
} else if ok(D-1, k+1) && x+1 == e.getBackward(D-1, k+1) {
|
||||
// D--, k++, x++
|
||||
D, k, x = D-1, k+1, x+1
|
||||
continue
|
||||
}
|
||||
y := x - (k + e.delta)
|
||||
ans = ans.append(x+e.lx, y+e.ly)
|
||||
x++
|
||||
}
|
||||
return ans
|
||||
}
|
||||
|
||||
// start at (x,y), go down the diagonal as far as possible,
|
||||
func (e *editGraph) lookBackward(k, relx int) int {
|
||||
rely := relx - (k + e.delta) // forward k = k + e.delta
|
||||
x, y := relx+e.lx, rely+e.ly
|
||||
if x > 0 && y > 0 {
|
||||
x -= e.seqs.commonSuffixLen(0, x, 0, y)
|
||||
}
|
||||
return x
|
||||
}
|
||||
|
||||
// convert to rectangle, and label the result with d
|
||||
func (e *editGraph) setBackward(d, k, relx int) {
|
||||
x := e.lookBackward(k, relx)
|
||||
e.vb.set(d, k, x-e.lx)
|
||||
}
|
||||
|
||||
func (e *editGraph) getBackward(d, k int) int {
|
||||
x := e.vb.get(d, k)
|
||||
return x
|
||||
}
|
||||
|
||||
// -- TWOSIDED ---
|
||||
|
||||
func twosided(e *editGraph) lcs {
|
||||
// The termination condition could be improved, as either the forward
|
||||
// or backward pass could succeed before Myers' Lemma applies.
|
||||
// Aside from questions of efficiency (is the extra testing cost-effective)
|
||||
// this is more likely to matter when e.limit is reached.
|
||||
e.setForward(0, 0, e.lx)
|
||||
e.setBackward(0, 0, e.ux)
|
||||
|
||||
// from D to D+1
|
||||
for D := 0; D < e.limit; D++ {
|
||||
// just finished a backwards pass, so check
|
||||
if got, ok := e.twoDone(D, D); ok {
|
||||
return e.twolcs(D, D, got)
|
||||
}
|
||||
// do a forwards pass (D to D+1)
|
||||
e.setForward(D+1, -(D + 1), e.getForward(D, -D))
|
||||
e.setForward(D+1, D+1, e.getForward(D, D)+1)
|
||||
for k := -D + 1; k <= D-1; k += 2 {
|
||||
// these are tricky and easy to get backwards
|
||||
lookv := e.lookForward(k, e.getForward(D, k-1)+1)
|
||||
lookh := e.lookForward(k, e.getForward(D, k+1))
|
||||
if lookv > lookh {
|
||||
e.setForward(D+1, k, lookv)
|
||||
} else {
|
||||
e.setForward(D+1, k, lookh)
|
||||
}
|
||||
}
|
||||
// just did a forward pass, so check
|
||||
if got, ok := e.twoDone(D+1, D); ok {
|
||||
return e.twolcs(D+1, D, got)
|
||||
}
|
||||
// do a backward pass, D to D+1
|
||||
e.setBackward(D+1, -(D + 1), e.getBackward(D, -D)-1)
|
||||
e.setBackward(D+1, D+1, e.getBackward(D, D))
|
||||
for k := -D + 1; k <= D-1; k += 2 {
|
||||
// these are tricky and easy to get wrong
|
||||
lookv := e.lookBackward(k, e.getBackward(D, k-1))
|
||||
lookh := e.lookBackward(k, e.getBackward(D, k+1)-1)
|
||||
if lookv < lookh {
|
||||
e.setBackward(D+1, k, lookv)
|
||||
} else {
|
||||
e.setBackward(D+1, k, lookh)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// D too large. combine a forward and backward partial lcs
|
||||
// first, a forward one
|
||||
kmax := -e.limit - 1
|
||||
diagmax := -1
|
||||
for k := -e.limit; k <= e.limit; k += 2 {
|
||||
x := e.getForward(e.limit, k)
|
||||
y := x - k
|
||||
if x+y > diagmax && x <= e.ux && y <= e.uy {
|
||||
diagmax, kmax = x+y, k
|
||||
}
|
||||
}
|
||||
if kmax < -e.limit {
|
||||
panic(fmt.Sprintf("no forward paths when limit=%d?", e.limit))
|
||||
}
|
||||
lcs := e.forwardlcs(e.limit, kmax)
|
||||
// now a backward one
|
||||
// find the D path with minimal x+y inside the rectangle and
|
||||
// use that to compute the lcs
|
||||
diagmin := 1 << 25 // infinity
|
||||
for k := -e.limit; k <= e.limit; k += 2 {
|
||||
x := e.getBackward(e.limit, k)
|
||||
y := x - (k + e.delta)
|
||||
if x+y < diagmin && x >= 0 && y >= 0 {
|
||||
diagmin, kmax = x+y, k
|
||||
}
|
||||
}
|
||||
if kmax < -e.limit {
|
||||
panic(fmt.Sprintf("no backward paths when limit=%d?", e.limit))
|
||||
}
|
||||
lcs = append(lcs, e.backwardlcs(e.limit, kmax)...)
|
||||
// These may overlap (e.forwardlcs and e.backwardlcs return sorted lcs)
|
||||
ans := lcs.fix()
|
||||
return ans
|
||||
}
|
||||
|
||||
// Does Myers' Lemma apply?
|
||||
func (e *editGraph) twoDone(df, db int) (int, bool) {
|
||||
if (df+db+e.delta)%2 != 0 {
|
||||
return 0, false // diagonals cannot overlap
|
||||
}
|
||||
kmin := -db + e.delta
|
||||
if -df > kmin {
|
||||
kmin = -df
|
||||
}
|
||||
kmax := db + e.delta
|
||||
if df < kmax {
|
||||
kmax = df
|
||||
}
|
||||
for k := kmin; k <= kmax; k += 2 {
|
||||
x := e.vf.get(df, k)
|
||||
u := e.vb.get(db, k-e.delta)
|
||||
if u <= x {
|
||||
// is it worth looking at all the other k?
|
||||
for l := k; l <= kmax; l += 2 {
|
||||
x := e.vf.get(df, l)
|
||||
y := x - l
|
||||
u := e.vb.get(db, l-e.delta)
|
||||
v := u - l
|
||||
if x == u || u == 0 || v == 0 || y == e.uy || x == e.ux {
|
||||
return l, true
|
||||
}
|
||||
}
|
||||
return k, true
|
||||
}
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
func (e *editGraph) twolcs(df, db, kf int) lcs {
|
||||
// db==df || db+1==df
|
||||
x := e.vf.get(df, kf)
|
||||
y := x - kf
|
||||
kb := kf - e.delta
|
||||
u := e.vb.get(db, kb)
|
||||
v := u - kf
|
||||
|
||||
// Myers proved there is a df-path from (0,0) to (u,v)
|
||||
// and a db-path from (x,y) to (N,M).
|
||||
// In the first case the overall path is the forward path
|
||||
// to (u,v) followed by the backward path to (N,M).
|
||||
// In the second case the path is the backward path to (x,y)
|
||||
// followed by the forward path to (x,y) from (0,0).
|
||||
|
||||
// Look for some special cases to avoid computing either of these paths.
|
||||
if x == u {
|
||||
// "babaab" "cccaba"
|
||||
// already patched together
|
||||
lcs := e.forwardlcs(df, kf)
|
||||
lcs = append(lcs, e.backwardlcs(db, kb)...)
|
||||
return lcs.sort()
|
||||
}
|
||||
|
||||
// is (u-1,v) or (u,v-1) labelled df-1?
|
||||
// if so, that forward df-1-path plus a horizontal or vertical edge
|
||||
// is the df-path to (u,v), then plus the db-path to (N,M)
|
||||
if u > 0 && ok(df-1, u-1-v) && e.vf.get(df-1, u-1-v) == u-1 {
|
||||
// "aabbab" "cbcabc"
|
||||
lcs := e.forwardlcs(df-1, u-1-v)
|
||||
lcs = append(lcs, e.backwardlcs(db, kb)...)
|
||||
return lcs.sort()
|
||||
}
|
||||
if v > 0 && ok(df-1, (u-(v-1))) && e.vf.get(df-1, u-(v-1)) == u {
|
||||
// "abaabb" "bcacab"
|
||||
lcs := e.forwardlcs(df-1, u-(v-1))
|
||||
lcs = append(lcs, e.backwardlcs(db, kb)...)
|
||||
return lcs.sort()
|
||||
}
|
||||
|
||||
// The path can't possibly contribute to the lcs because it
|
||||
// is all horizontal or vertical edges
|
||||
if u == 0 || v == 0 || x == e.ux || y == e.uy {
|
||||
// "abaabb" "abaaaa"
|
||||
if u == 0 || v == 0 {
|
||||
return e.backwardlcs(db, kb)
|
||||
}
|
||||
return e.forwardlcs(df, kf)
|
||||
}
|
||||
|
||||
// is (x+1,y) or (x,y+1) labelled db-1?
|
||||
if x+1 <= e.ux && ok(db-1, x+1-y-e.delta) && e.vb.get(db-1, x+1-y-e.delta) == x+1 {
|
||||
// "bababb" "baaabb"
|
||||
lcs := e.backwardlcs(db-1, kb+1)
|
||||
lcs = append(lcs, e.forwardlcs(df, kf)...)
|
||||
return lcs.sort()
|
||||
}
|
||||
if y+1 <= e.uy && ok(db-1, x-(y+1)-e.delta) && e.vb.get(db-1, x-(y+1)-e.delta) == x {
|
||||
// "abbbaa" "cabacc"
|
||||
lcs := e.backwardlcs(db-1, kb-1)
|
||||
lcs = append(lcs, e.forwardlcs(df, kf)...)
|
||||
return lcs.sort()
|
||||
}
|
||||
|
||||
// need to compute another path
|
||||
// "aabbaa" "aacaba"
|
||||
lcs := e.backwardlcs(db, kb)
|
||||
oldx, oldy := e.ux, e.uy
|
||||
e.ux = u
|
||||
e.uy = v
|
||||
lcs = append(lcs, forward(e)...)
|
||||
e.ux, e.uy = oldx, oldy
|
||||
return lcs.sort()
|
||||
}
|
||||
@@ -0,0 +1,251 @@
|
||||
// Copyright 2022 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lcs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"math/rand"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestAlgosOld(t *testing.T) {
|
||||
for i, algo := range []func(*editGraph) lcs{forward, backward, twosided} {
|
||||
t.Run(strings.Fields("forward backward twosided")[i], func(t *testing.T) {
|
||||
for _, tx := range Btests {
|
||||
lim := len(tx.a) + len(tx.b)
|
||||
|
||||
diffs, lcs := compute(stringSeqs{tx.a, tx.b}, algo, lim)
|
||||
check(t, tx.a, lcs, tx.lcs)
|
||||
checkDiffs(t, tx.a, diffs, tx.b)
|
||||
|
||||
diffs, lcs = compute(stringSeqs{tx.b, tx.a}, algo, lim)
|
||||
check(t, tx.b, lcs, tx.lcs)
|
||||
checkDiffs(t, tx.b, diffs, tx.a)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntOld(t *testing.T) {
|
||||
// need to avoid any characters in btests
|
||||
lfill, rfill := "AAAAAAAAAAAA", "BBBBBBBBBBBB"
|
||||
for _, tx := range Btests {
|
||||
if len(tx.a) < 2 || len(tx.b) < 2 {
|
||||
continue
|
||||
}
|
||||
left := tx.a + lfill
|
||||
right := tx.b + rfill
|
||||
lim := len(tx.a) + len(tx.b)
|
||||
diffs, lcs := compute(stringSeqs{left, right}, twosided, lim)
|
||||
check(t, left, lcs, tx.lcs)
|
||||
checkDiffs(t, left, diffs, right)
|
||||
diffs, lcs = compute(stringSeqs{right, left}, twosided, lim)
|
||||
check(t, right, lcs, tx.lcs)
|
||||
checkDiffs(t, right, diffs, left)
|
||||
|
||||
left = lfill + tx.a
|
||||
right = rfill + tx.b
|
||||
diffs, lcs = compute(stringSeqs{left, right}, twosided, lim)
|
||||
check(t, left, lcs, tx.lcs)
|
||||
checkDiffs(t, left, diffs, right)
|
||||
diffs, lcs = compute(stringSeqs{right, left}, twosided, lim)
|
||||
check(t, right, lcs, tx.lcs)
|
||||
checkDiffs(t, right, diffs, left)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSpecialOld(t *testing.T) { // exercises lcs.fix
|
||||
a := "golang.org/x/tools/intern"
|
||||
b := "github.com/google/safehtml/template\"\n\t\"golang.org/x/tools/intern"
|
||||
diffs, lcs := compute(stringSeqs{a, b}, twosided, 4)
|
||||
if !lcs.valid() {
|
||||
t.Errorf("%d,%v", len(diffs), lcs)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegressionOld001(t *testing.T) {
|
||||
a := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage diff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"golang.org/x/tools/gopls/internal/lsp/diff\"\n\t\"golang.org/x/tools/internal/diff/difftest\"\n\t\"golang.org/x/tools/gopls/internal/span\"\n)\n"
|
||||
|
||||
b := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage diff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"github.com/google/safehtml/template\"\n\t\"golang.org/x/tools/gopls/internal/lsp/diff\"\n\t\"golang.org/x/tools/internal/diff/difftest\"\n\t\"golang.org/x/tools/gopls/internal/span\"\n)\n"
|
||||
for i := 1; i < len(b); i++ {
|
||||
diffs, lcs := compute(stringSeqs{a, b}, twosided, i) // 14 from gopls
|
||||
if !lcs.valid() {
|
||||
t.Errorf("%d,%v", len(diffs), lcs)
|
||||
}
|
||||
checkDiffs(t, a, diffs, b)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegressionOld002(t *testing.T) {
|
||||
a := "n\"\n)\n"
|
||||
b := "n\"\n\t\"golang.org/x//nnal/stack\"\n)\n"
|
||||
for i := 1; i <= len(b); i++ {
|
||||
diffs, lcs := compute(stringSeqs{a, b}, twosided, i)
|
||||
if !lcs.valid() {
|
||||
t.Errorf("%d,%v", len(diffs), lcs)
|
||||
}
|
||||
checkDiffs(t, a, diffs, b)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegressionOld003(t *testing.T) {
|
||||
a := "golang.org/x/hello v1.0.0\nrequire golang.org/x/unused v1"
|
||||
b := "golang.org/x/hello v1"
|
||||
for i := 1; i <= len(a); i++ {
|
||||
diffs, lcs := compute(stringSeqs{a, b}, twosided, i)
|
||||
if !lcs.valid() {
|
||||
t.Errorf("%d,%v", len(diffs), lcs)
|
||||
}
|
||||
checkDiffs(t, a, diffs, b)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRandOld(t *testing.T) {
|
||||
rand.Seed(1)
|
||||
for i := 0; i < 1000; i++ {
|
||||
// TODO(adonovan): use ASCII and bytesSeqs here? The use of
|
||||
// non-ASCII isn't relevant to the property exercised by the test.
|
||||
a := []rune(randstr("abω", 16))
|
||||
b := []rune(randstr("abωc", 16))
|
||||
seq := runesSeqs{a, b}
|
||||
|
||||
const lim = 24 // large enough to get true lcs
|
||||
_, forw := compute(seq, forward, lim)
|
||||
_, back := compute(seq, backward, lim)
|
||||
_, two := compute(seq, twosided, lim)
|
||||
if lcslen(two) != lcslen(forw) || lcslen(forw) != lcslen(back) {
|
||||
t.Logf("\n%v\n%v\n%v", forw, back, two)
|
||||
t.Fatalf("%d forw:%d back:%d two:%d", i, lcslen(forw), lcslen(back), lcslen(two))
|
||||
}
|
||||
if !two.valid() || !forw.valid() || !back.valid() {
|
||||
t.Errorf("check failure")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestDiffAPI tests the public API functions (Diff{Bytes,Strings,Runes})
|
||||
// to ensure at least minimal parity of the three representations.
|
||||
func TestDiffAPI(t *testing.T) {
|
||||
for _, test := range []struct {
|
||||
a, b string
|
||||
wantStrings, wantBytes, wantRunes string
|
||||
}{
|
||||
{"abcXdef", "abcxdef", "[{3 4 3 4}]", "[{3 4 3 4}]", "[{3 4 3 4}]"}, // ASCII
|
||||
{"abcωdef", "abcΩdef", "[{3 5 3 5}]", "[{3 5 3 5}]", "[{3 4 3 4}]"}, // non-ASCII
|
||||
} {
|
||||
|
||||
gotStrings := fmt.Sprint(DiffStrings(test.a, test.b))
|
||||
if gotStrings != test.wantStrings {
|
||||
t.Errorf("DiffStrings(%q, %q) = %v, want %v",
|
||||
test.a, test.b, gotStrings, test.wantStrings)
|
||||
}
|
||||
gotBytes := fmt.Sprint(DiffBytes([]byte(test.a), []byte(test.b)))
|
||||
if gotBytes != test.wantBytes {
|
||||
t.Errorf("DiffBytes(%q, %q) = %v, want %v",
|
||||
test.a, test.b, gotBytes, test.wantBytes)
|
||||
}
|
||||
gotRunes := fmt.Sprint(DiffRunes([]rune(test.a), []rune(test.b)))
|
||||
if gotRunes != test.wantRunes {
|
||||
t.Errorf("DiffRunes(%q, %q) = %v, want %v",
|
||||
test.a, test.b, gotRunes, test.wantRunes)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkTwoOld(b *testing.B) {
|
||||
tests := genBench("abc", 96)
|
||||
for i := 0; i < b.N; i++ {
|
||||
for _, tt := range tests {
|
||||
_, two := compute(stringSeqs{tt.before, tt.after}, twosided, 100)
|
||||
if !two.valid() {
|
||||
b.Error("check failed")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkForwOld(b *testing.B) {
|
||||
tests := genBench("abc", 96)
|
||||
for i := 0; i < b.N; i++ {
|
||||
for _, tt := range tests {
|
||||
_, two := compute(stringSeqs{tt.before, tt.after}, forward, 100)
|
||||
if !two.valid() {
|
||||
b.Error("check failed")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func genBench(set string, n int) []struct{ before, after string } {
|
||||
// before and after for benchmarks. 24 strings of length n with
|
||||
// before and after differing at least once, and about 5%
|
||||
rand.Seed(3)
|
||||
var ans []struct{ before, after string }
|
||||
for i := 0; i < 24; i++ {
|
||||
// maybe b should have an approximately known number of diffs
|
||||
a := randstr(set, n)
|
||||
cnt := 0
|
||||
bb := make([]rune, 0, n)
|
||||
for _, r := range a {
|
||||
if rand.Float64() < .05 {
|
||||
cnt++
|
||||
r = 'N'
|
||||
}
|
||||
bb = append(bb, r)
|
||||
}
|
||||
if cnt == 0 {
|
||||
// avoid == shortcut
|
||||
bb[n/2] = 'N'
|
||||
}
|
||||
ans = append(ans, struct{ before, after string }{a, string(bb)})
|
||||
}
|
||||
return ans
|
||||
}
|
||||
|
||||
// This benchmark represents a common case for a diff command:
|
||||
// large file with a single relatively small diff in the middle.
|
||||
// (It's not clear whether this is representative of gopls workloads
|
||||
// or whether it is important to gopls diff performance.)
|
||||
//
|
||||
// TODO(adonovan) opt: it could be much faster. For example,
|
||||
// comparing a file against itself is about 10x faster than with the
|
||||
// small deletion in the middle. Strangely, comparing a file against
|
||||
// itself minus the last byte is faster still; I don't know why.
|
||||
// There is much low-hanging fruit here for further improvement.
|
||||
func BenchmarkLargeFileSmallDiff(b *testing.B) {
|
||||
data, err := os.ReadFile("old.go") // large file
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
n := len(data)
|
||||
|
||||
src := string(data)
|
||||
dst := src[:n*49/100] + src[n*51/100:] // remove 2% from the middle
|
||||
b.Run("string", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
compute(stringSeqs{src, dst}, twosided, len(src)+len(dst))
|
||||
}
|
||||
})
|
||||
|
||||
srcBytes := []byte(src)
|
||||
dstBytes := []byte(dst)
|
||||
b.Run("bytes", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
compute(bytesSeqs{srcBytes, dstBytes}, twosided, len(srcBytes)+len(dstBytes))
|
||||
}
|
||||
})
|
||||
|
||||
srcRunes := []rune(src)
|
||||
dstRunes := []rune(dst)
|
||||
b.Run("runes", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
compute(runesSeqs{srcRunes, dstRunes}, twosided, len(srcRunes)+len(dstRunes))
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -0,0 +1,113 @@
|
||||
// Copyright 2022 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lcs
|
||||
|
||||
// This file defines the abstract sequence over which the LCS algorithm operates.
|
||||
|
||||
// sequences abstracts a pair of sequences, A and B.
|
||||
type sequences interface {
|
||||
lengths() (int, int) // len(A), len(B)
|
||||
commonPrefixLen(ai, aj, bi, bj int) int // len(commonPrefix(A[ai:aj], B[bi:bj]))
|
||||
commonSuffixLen(ai, aj, bi, bj int) int // len(commonSuffix(A[ai:aj], B[bi:bj]))
|
||||
}
|
||||
|
||||
type stringSeqs struct{ a, b string }
|
||||
|
||||
func (s stringSeqs) lengths() (int, int) { return len(s.a), len(s.b) }
|
||||
func (s stringSeqs) commonPrefixLen(ai, aj, bi, bj int) int {
|
||||
return commonPrefixLenString(s.a[ai:aj], s.b[bi:bj])
|
||||
}
|
||||
func (s stringSeqs) commonSuffixLen(ai, aj, bi, bj int) int {
|
||||
return commonSuffixLenString(s.a[ai:aj], s.b[bi:bj])
|
||||
}
|
||||
|
||||
// The explicit capacity in s[i:j:j] leads to more efficient code.
|
||||
|
||||
type bytesSeqs struct{ a, b []byte }
|
||||
|
||||
func (s bytesSeqs) lengths() (int, int) { return len(s.a), len(s.b) }
|
||||
func (s bytesSeqs) commonPrefixLen(ai, aj, bi, bj int) int {
|
||||
return commonPrefixLenBytes(s.a[ai:aj:aj], s.b[bi:bj:bj])
|
||||
}
|
||||
func (s bytesSeqs) commonSuffixLen(ai, aj, bi, bj int) int {
|
||||
return commonSuffixLenBytes(s.a[ai:aj:aj], s.b[bi:bj:bj])
|
||||
}
|
||||
|
||||
type runesSeqs struct{ a, b []rune }
|
||||
|
||||
func (s runesSeqs) lengths() (int, int) { return len(s.a), len(s.b) }
|
||||
func (s runesSeqs) commonPrefixLen(ai, aj, bi, bj int) int {
|
||||
return commonPrefixLenRunes(s.a[ai:aj:aj], s.b[bi:bj:bj])
|
||||
}
|
||||
func (s runesSeqs) commonSuffixLen(ai, aj, bi, bj int) int {
|
||||
return commonSuffixLenRunes(s.a[ai:aj:aj], s.b[bi:bj:bj])
|
||||
}
|
||||
|
||||
// TODO(adonovan): optimize these functions using ideas from:
|
||||
// - https://go.dev/cl/408116 common.go
|
||||
// - https://go.dev/cl/421435 xor_generic.go
|
||||
|
||||
// TODO(adonovan): factor using generics when available,
|
||||
// but measure performance impact.
|
||||
|
||||
// commonPrefixLen* returns the length of the common prefix of a[ai:aj] and b[bi:bj].
|
||||
func commonPrefixLenBytes(a, b []byte) int {
|
||||
n := min(len(a), len(b))
|
||||
i := 0
|
||||
for i < n && a[i] == b[i] {
|
||||
i++
|
||||
}
|
||||
return i
|
||||
}
|
||||
func commonPrefixLenRunes(a, b []rune) int {
|
||||
n := min(len(a), len(b))
|
||||
i := 0
|
||||
for i < n && a[i] == b[i] {
|
||||
i++
|
||||
}
|
||||
return i
|
||||
}
|
||||
func commonPrefixLenString(a, b string) int {
|
||||
n := min(len(a), len(b))
|
||||
i := 0
|
||||
for i < n && a[i] == b[i] {
|
||||
i++
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
// commonSuffixLen* returns the length of the common suffix of a[ai:aj] and b[bi:bj].
|
||||
func commonSuffixLenBytes(a, b []byte) int {
|
||||
n := min(len(a), len(b))
|
||||
i := 0
|
||||
for i < n && a[len(a)-1-i] == b[len(b)-1-i] {
|
||||
i++
|
||||
}
|
||||
return i
|
||||
}
|
||||
func commonSuffixLenRunes(a, b []rune) int {
|
||||
n := min(len(a), len(b))
|
||||
i := 0
|
||||
for i < n && a[len(a)-1-i] == b[len(b)-1-i] {
|
||||
i++
|
||||
}
|
||||
return i
|
||||
}
|
||||
func commonSuffixLenString(a, b string) int {
|
||||
n := min(len(a), len(b))
|
||||
i := 0
|
||||
for i < n && a[len(a)-1-i] == b[len(b)-1-i] {
|
||||
i++
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
func min(x, y int) int {
|
||||
if x < y {
|
||||
return x
|
||||
} else {
|
||||
return y
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,246 @@
|
||||
// Copyright 2019 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package myers implements the Myers diff algorithm.
|
||||
package myers
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"golang.org/x/tools/internal/diff"
|
||||
)
|
||||
|
||||
// Sources:
|
||||
// https://blog.jcoglan.com/2017/02/17/the-myers-diff-algorithm-part-3/
|
||||
// https://www.codeproject.com/Articles/42279/%2FArticles%2F42279%2FInvestigating-Myers-diff-algorithm-Part-1-of-2
|
||||
|
||||
// ComputeEdits returns the diffs of two strings using a simple
|
||||
// line-based implementation, like [diff.Strings].
|
||||
//
|
||||
// Deprecated: this implementation is moribund. However, when diffs
|
||||
// appear in marker test expectations, they are the particular diffs
|
||||
// produced by this implementation. The marker test framework
|
||||
// asserts diff(orig, got)==wantDiff, but ideally it would compute
|
||||
// got==apply(orig, wantDiff) so that the notation of the diff
|
||||
// is immaterial.
|
||||
func ComputeEdits(before, after string) []diff.Edit {
|
||||
beforeLines := splitLines(before)
|
||||
ops := operations(beforeLines, splitLines(after))
|
||||
|
||||
// Build a table mapping line number to offset.
|
||||
lineOffsets := make([]int, 0, len(beforeLines)+1)
|
||||
total := 0
|
||||
for i := range beforeLines {
|
||||
lineOffsets = append(lineOffsets, total)
|
||||
total += len(beforeLines[i])
|
||||
}
|
||||
lineOffsets = append(lineOffsets, total) // EOF
|
||||
|
||||
edits := make([]diff.Edit, 0, len(ops))
|
||||
for _, op := range ops {
|
||||
start, end := lineOffsets[op.I1], lineOffsets[op.I2]
|
||||
switch op.Kind {
|
||||
case opDelete:
|
||||
// Delete: before[I1:I2] is deleted.
|
||||
edits = append(edits, diff.Edit{Start: start, End: end})
|
||||
case opInsert:
|
||||
// Insert: after[J1:J2] is inserted at before[I1:I1].
|
||||
if content := strings.Join(op.Content, ""); content != "" {
|
||||
edits = append(edits, diff.Edit{Start: start, End: end, New: content})
|
||||
}
|
||||
}
|
||||
}
|
||||
return edits
|
||||
}
|
||||
|
||||
// opKind is used to denote the type of operation a line represents.
|
||||
type opKind int
|
||||
|
||||
const (
|
||||
opDelete opKind = iota // line deleted from input (-)
|
||||
opInsert // line inserted into output (+)
|
||||
opEqual // line present in input and output
|
||||
)
|
||||
|
||||
func (kind opKind) String() string {
|
||||
switch kind {
|
||||
case opDelete:
|
||||
return "delete"
|
||||
case opInsert:
|
||||
return "insert"
|
||||
case opEqual:
|
||||
return "equal"
|
||||
default:
|
||||
panic("unknown opKind")
|
||||
}
|
||||
}
|
||||
|
||||
type operation struct {
|
||||
Kind opKind
|
||||
Content []string // content from b
|
||||
I1, I2 int // indices of the line in a
|
||||
J1 int // indices of the line in b, J2 implied by len(Content)
|
||||
}
|
||||
|
||||
// operations returns the list of operations to convert a into b, consolidating
|
||||
// operations for multiple lines and not including equal lines.
|
||||
func operations(a, b []string) []*operation {
|
||||
if len(a) == 0 && len(b) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
trace, offset := shortestEditSequence(a, b)
|
||||
snakes := backtrack(trace, len(a), len(b), offset)
|
||||
|
||||
M, N := len(a), len(b)
|
||||
|
||||
var i int
|
||||
solution := make([]*operation, len(a)+len(b))
|
||||
|
||||
add := func(op *operation, i2, j2 int) {
|
||||
if op == nil {
|
||||
return
|
||||
}
|
||||
op.I2 = i2
|
||||
if op.Kind == opInsert {
|
||||
op.Content = b[op.J1:j2]
|
||||
}
|
||||
solution[i] = op
|
||||
i++
|
||||
}
|
||||
x, y := 0, 0
|
||||
for _, snake := range snakes {
|
||||
if len(snake) < 2 {
|
||||
continue
|
||||
}
|
||||
var op *operation
|
||||
// delete (horizontal)
|
||||
for snake[0]-snake[1] > x-y {
|
||||
if op == nil {
|
||||
op = &operation{
|
||||
Kind: opDelete,
|
||||
I1: x,
|
||||
J1: y,
|
||||
}
|
||||
}
|
||||
x++
|
||||
if x == M {
|
||||
break
|
||||
}
|
||||
}
|
||||
add(op, x, y)
|
||||
op = nil
|
||||
// insert (vertical)
|
||||
for snake[0]-snake[1] < x-y {
|
||||
if op == nil {
|
||||
op = &operation{
|
||||
Kind: opInsert,
|
||||
I1: x,
|
||||
J1: y,
|
||||
}
|
||||
}
|
||||
y++
|
||||
}
|
||||
add(op, x, y)
|
||||
op = nil
|
||||
// equal (diagonal)
|
||||
for x < snake[0] {
|
||||
x++
|
||||
y++
|
||||
}
|
||||
if x >= M && y >= N {
|
||||
break
|
||||
}
|
||||
}
|
||||
return solution[:i]
|
||||
}
|
||||
|
||||
// backtrack uses the trace for the edit sequence computation and returns the
|
||||
// "snakes" that make up the solution. A "snake" is a single deletion or
|
||||
// insertion followed by zero or diagonals.
|
||||
func backtrack(trace [][]int, x, y, offset int) [][]int {
|
||||
snakes := make([][]int, len(trace))
|
||||
d := len(trace) - 1
|
||||
for ; x > 0 && y > 0 && d > 0; d-- {
|
||||
V := trace[d]
|
||||
if len(V) == 0 {
|
||||
continue
|
||||
}
|
||||
snakes[d] = []int{x, y}
|
||||
|
||||
k := x - y
|
||||
|
||||
var kPrev int
|
||||
if k == -d || (k != d && V[k-1+offset] < V[k+1+offset]) {
|
||||
kPrev = k + 1
|
||||
} else {
|
||||
kPrev = k - 1
|
||||
}
|
||||
|
||||
x = V[kPrev+offset]
|
||||
y = x - kPrev
|
||||
}
|
||||
if x < 0 || y < 0 {
|
||||
return snakes
|
||||
}
|
||||
snakes[d] = []int{x, y}
|
||||
return snakes
|
||||
}
|
||||
|
||||
// shortestEditSequence returns the shortest edit sequence that converts a into b.
|
||||
func shortestEditSequence(a, b []string) ([][]int, int) {
|
||||
M, N := len(a), len(b)
|
||||
V := make([]int, 2*(N+M)+1)
|
||||
offset := N + M
|
||||
trace := make([][]int, N+M+1)
|
||||
|
||||
// Iterate through the maximum possible length of the SES (N+M).
|
||||
for d := 0; d <= N+M; d++ {
|
||||
copyV := make([]int, len(V))
|
||||
// k lines are represented by the equation y = x - k. We move in
|
||||
// increments of 2 because end points for even d are on even k lines.
|
||||
for k := -d; k <= d; k += 2 {
|
||||
// At each point, we either go down or to the right. We go down if
|
||||
// k == -d, and we go to the right if k == d. We also prioritize
|
||||
// the maximum x value, because we prefer deletions to insertions.
|
||||
var x int
|
||||
if k == -d || (k != d && V[k-1+offset] < V[k+1+offset]) {
|
||||
x = V[k+1+offset] // down
|
||||
} else {
|
||||
x = V[k-1+offset] + 1 // right
|
||||
}
|
||||
|
||||
y := x - k
|
||||
|
||||
// Diagonal moves while we have equal contents.
|
||||
for x < M && y < N && a[x] == b[y] {
|
||||
x++
|
||||
y++
|
||||
}
|
||||
|
||||
V[k+offset] = x
|
||||
|
||||
// Return if we've exceeded the maximum values.
|
||||
if x == M && y == N {
|
||||
// Makes sure to save the state of the array before returning.
|
||||
copy(copyV, V)
|
||||
trace[d] = copyV
|
||||
return trace, offset
|
||||
}
|
||||
}
|
||||
|
||||
// Save the state of the array.
|
||||
copy(copyV, V)
|
||||
trace[d] = copyV
|
||||
}
|
||||
return nil, 0
|
||||
}
|
||||
|
||||
func splitLines(text string) []string {
|
||||
lines := strings.SplitAfter(text, "\n")
|
||||
if lines[len(lines)-1] == "" {
|
||||
lines = lines[:len(lines)-1]
|
||||
}
|
||||
return lines
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
// Copyright 2019 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package myers_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"golang.org/x/tools/internal/diff/difftest"
|
||||
"golang.org/x/tools/internal/diff/myers"
|
||||
)
|
||||
|
||||
func TestDiff(t *testing.T) {
|
||||
difftest.DiffTest(t, myers.ComputeEdits)
|
||||
}
|
||||
@@ -0,0 +1,99 @@
|
||||
// Copyright 2022 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package diff
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/tools/internal/diff/lcs"
|
||||
)
|
||||
|
||||
// Strings computes the differences between two strings.
|
||||
// The resulting edits respect rune boundaries.
|
||||
func Strings(before, after string) []Edit {
|
||||
if before == after {
|
||||
return nil // common case
|
||||
}
|
||||
|
||||
if isASCII(before) && isASCII(after) {
|
||||
// TODO(adonovan): opt: specialize diffASCII for strings.
|
||||
return diffASCII([]byte(before), []byte(after))
|
||||
}
|
||||
return diffRunes([]rune(before), []rune(after))
|
||||
}
|
||||
|
||||
// Bytes computes the differences between two byte slices.
|
||||
// The resulting edits respect rune boundaries.
|
||||
func Bytes(before, after []byte) []Edit {
|
||||
if bytes.Equal(before, after) {
|
||||
return nil // common case
|
||||
}
|
||||
|
||||
if isASCII(before) && isASCII(after) {
|
||||
return diffASCII(before, after)
|
||||
}
|
||||
return diffRunes(runes(before), runes(after))
|
||||
}
|
||||
|
||||
func diffASCII(before, after []byte) []Edit {
|
||||
diffs := lcs.DiffBytes(before, after)
|
||||
|
||||
// Convert from LCS diffs.
|
||||
res := make([]Edit, len(diffs))
|
||||
for i, d := range diffs {
|
||||
res[i] = Edit{d.Start, d.End, string(after[d.ReplStart:d.ReplEnd])}
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
func diffRunes(before, after []rune) []Edit {
|
||||
diffs := lcs.DiffRunes(before, after)
|
||||
|
||||
// The diffs returned by the lcs package use indexes
|
||||
// into whatever slice was passed in.
|
||||
// Convert rune offsets to byte offsets.
|
||||
res := make([]Edit, len(diffs))
|
||||
lastEnd := 0
|
||||
utf8Len := 0
|
||||
for i, d := range diffs {
|
||||
utf8Len += runesLen(before[lastEnd:d.Start]) // text between edits
|
||||
start := utf8Len
|
||||
utf8Len += runesLen(before[d.Start:d.End]) // text deleted by this edit
|
||||
res[i] = Edit{start, utf8Len, string(after[d.ReplStart:d.ReplEnd])}
|
||||
lastEnd = d.End
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
// runes is like []rune(string(bytes)) without the duplicate allocation.
|
||||
func runes(bytes []byte) []rune {
|
||||
n := utf8.RuneCount(bytes)
|
||||
runes := make([]rune, n)
|
||||
for i := 0; i < n; i++ {
|
||||
r, sz := utf8.DecodeRune(bytes)
|
||||
bytes = bytes[sz:]
|
||||
runes[i] = r
|
||||
}
|
||||
return runes
|
||||
}
|
||||
|
||||
// runesLen returns the length in bytes of the UTF-8 encoding of runes.
|
||||
func runesLen(runes []rune) (len int) {
|
||||
for _, r := range runes {
|
||||
len += utf8.RuneLen(r)
|
||||
}
|
||||
return len
|
||||
}
|
||||
|
||||
// isASCII reports whether s contains only ASCII.
|
||||
func isASCII[S string | []byte](s S) bool {
|
||||
for i := 0; i < len(s); i++ {
|
||||
if s[i] >= utf8.RuneSelf {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
@@ -0,0 +1,251 @@
|
||||
// Copyright 2019 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package diff
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// DefaultContextLines is the number of unchanged lines of surrounding
|
||||
// context displayed by Unified. Use ToUnified to specify a different value.
|
||||
const DefaultContextLines = 3
|
||||
|
||||
// Unified returns a unified diff of the old and new strings.
|
||||
// The old and new labels are the names of the old and new files.
|
||||
// If the strings are equal, it returns the empty string.
|
||||
func Unified(oldLabel, newLabel, old, new string) string {
|
||||
edits := Strings(old, new)
|
||||
unified, err := ToUnified(oldLabel, newLabel, old, edits, DefaultContextLines)
|
||||
if err != nil {
|
||||
// Can't happen: edits are consistent.
|
||||
log.Fatalf("internal error in diff.Unified: %v", err)
|
||||
}
|
||||
return unified
|
||||
}
|
||||
|
||||
// ToUnified applies the edits to content and returns a unified diff,
|
||||
// with contextLines lines of (unchanged) context around each diff hunk.
|
||||
// The old and new labels are the names of the content and result files.
|
||||
// It returns an error if the edits are inconsistent; see ApplyEdits.
|
||||
func ToUnified(oldLabel, newLabel, content string, edits []Edit, contextLines int) (string, error) {
|
||||
u, err := toUnified(oldLabel, newLabel, content, edits, contextLines)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return u.String(), nil
|
||||
}
|
||||
|
||||
// unified represents a set of edits as a unified diff.
|
||||
type unified struct {
|
||||
// from is the name of the original file.
|
||||
from string
|
||||
// to is the name of the modified file.
|
||||
to string
|
||||
// hunks is the set of edit hunks needed to transform the file content.
|
||||
hunks []*hunk
|
||||
}
|
||||
|
||||
// Hunk represents a contiguous set of line edits to apply.
|
||||
type hunk struct {
|
||||
// The line in the original source where the hunk starts.
|
||||
fromLine int
|
||||
// The line in the original source where the hunk finishes.
|
||||
toLine int
|
||||
// The set of line based edits to apply.
|
||||
lines []line
|
||||
}
|
||||
|
||||
// Line represents a single line operation to apply as part of a Hunk.
|
||||
type line struct {
|
||||
// kind is the type of line this represents, deletion, insertion or copy.
|
||||
kind opKind
|
||||
// content is the content of this line.
|
||||
// For deletion it is the line being removed, for all others it is the line
|
||||
// to put in the output.
|
||||
content string
|
||||
}
|
||||
|
||||
// opKind is used to denote the type of operation a line represents.
|
||||
type opKind int
|
||||
|
||||
const (
|
||||
// opDelete is the operation kind for a line that is present in the input
|
||||
// but not in the output.
|
||||
opDelete opKind = iota
|
||||
// opInsert is the operation kind for a line that is new in the output.
|
||||
opInsert
|
||||
// opEqual is the operation kind for a line that is the same in the input and
|
||||
// output, often used to provide context around edited lines.
|
||||
opEqual
|
||||
)
|
||||
|
||||
// String returns a human readable representation of an OpKind. It is not
|
||||
// intended for machine processing.
|
||||
func (k opKind) String() string {
|
||||
switch k {
|
||||
case opDelete:
|
||||
return "delete"
|
||||
case opInsert:
|
||||
return "insert"
|
||||
case opEqual:
|
||||
return "equal"
|
||||
default:
|
||||
panic("unknown operation kind")
|
||||
}
|
||||
}
|
||||
|
||||
// toUnified takes a file contents and a sequence of edits, and calculates
|
||||
// a unified diff that represents those edits.
|
||||
func toUnified(fromName, toName string, content string, edits []Edit, contextLines int) (unified, error) {
|
||||
gap := contextLines * 2
|
||||
u := unified{
|
||||
from: fromName,
|
||||
to: toName,
|
||||
}
|
||||
if len(edits) == 0 {
|
||||
return u, nil
|
||||
}
|
||||
var err error
|
||||
edits, err = lineEdits(content, edits) // expand to whole lines
|
||||
if err != nil {
|
||||
return u, err
|
||||
}
|
||||
lines := splitLines(content)
|
||||
var h *hunk
|
||||
last := 0
|
||||
toLine := 0
|
||||
for _, edit := range edits {
|
||||
// Compute the zero-based line numbers of the edit start and end.
|
||||
// TODO(adonovan): opt: compute incrementally, avoid O(n^2).
|
||||
start := strings.Count(content[:edit.Start], "\n")
|
||||
end := strings.Count(content[:edit.End], "\n")
|
||||
if edit.End == len(content) && len(content) > 0 && content[len(content)-1] != '\n' {
|
||||
end++ // EOF counts as an implicit newline
|
||||
}
|
||||
|
||||
switch {
|
||||
case h != nil && start == last:
|
||||
//direct extension
|
||||
case h != nil && start <= last+gap:
|
||||
//within range of previous lines, add the joiners
|
||||
addEqualLines(h, lines, last, start)
|
||||
default:
|
||||
//need to start a new hunk
|
||||
if h != nil {
|
||||
// add the edge to the previous hunk
|
||||
addEqualLines(h, lines, last, last+contextLines)
|
||||
u.hunks = append(u.hunks, h)
|
||||
}
|
||||
toLine += start - last
|
||||
h = &hunk{
|
||||
fromLine: start + 1,
|
||||
toLine: toLine + 1,
|
||||
}
|
||||
// add the edge to the new hunk
|
||||
delta := addEqualLines(h, lines, start-contextLines, start)
|
||||
h.fromLine -= delta
|
||||
h.toLine -= delta
|
||||
}
|
||||
last = start
|
||||
for i := start; i < end; i++ {
|
||||
h.lines = append(h.lines, line{kind: opDelete, content: lines[i]})
|
||||
last++
|
||||
}
|
||||
if edit.New != "" {
|
||||
for _, content := range splitLines(edit.New) {
|
||||
h.lines = append(h.lines, line{kind: opInsert, content: content})
|
||||
toLine++
|
||||
}
|
||||
}
|
||||
}
|
||||
if h != nil {
|
||||
// add the edge to the final hunk
|
||||
addEqualLines(h, lines, last, last+contextLines)
|
||||
u.hunks = append(u.hunks, h)
|
||||
}
|
||||
return u, nil
|
||||
}
|
||||
|
||||
func splitLines(text string) []string {
|
||||
lines := strings.SplitAfter(text, "\n")
|
||||
if lines[len(lines)-1] == "" {
|
||||
lines = lines[:len(lines)-1]
|
||||
}
|
||||
return lines
|
||||
}
|
||||
|
||||
func addEqualLines(h *hunk, lines []string, start, end int) int {
|
||||
delta := 0
|
||||
for i := start; i < end; i++ {
|
||||
if i < 0 {
|
||||
continue
|
||||
}
|
||||
if i >= len(lines) {
|
||||
return delta
|
||||
}
|
||||
h.lines = append(h.lines, line{kind: opEqual, content: lines[i]})
|
||||
delta++
|
||||
}
|
||||
return delta
|
||||
}
|
||||
|
||||
// String converts a unified diff to the standard textual form for that diff.
|
||||
// The output of this function can be passed to tools like patch.
|
||||
func (u unified) String() string {
|
||||
if len(u.hunks) == 0 {
|
||||
return ""
|
||||
}
|
||||
b := new(strings.Builder)
|
||||
fmt.Fprintf(b, "--- %s\n", u.from)
|
||||
fmt.Fprintf(b, "+++ %s\n", u.to)
|
||||
for _, hunk := range u.hunks {
|
||||
fromCount, toCount := 0, 0
|
||||
for _, l := range hunk.lines {
|
||||
switch l.kind {
|
||||
case opDelete:
|
||||
fromCount++
|
||||
case opInsert:
|
||||
toCount++
|
||||
default:
|
||||
fromCount++
|
||||
toCount++
|
||||
}
|
||||
}
|
||||
fmt.Fprint(b, "@@")
|
||||
if fromCount > 1 {
|
||||
fmt.Fprintf(b, " -%d,%d", hunk.fromLine, fromCount)
|
||||
} else if hunk.fromLine == 1 && fromCount == 0 {
|
||||
// Match odd GNU diff -u behavior adding to empty file.
|
||||
fmt.Fprintf(b, " -0,0")
|
||||
} else {
|
||||
fmt.Fprintf(b, " -%d", hunk.fromLine)
|
||||
}
|
||||
if toCount > 1 {
|
||||
fmt.Fprintf(b, " +%d,%d", hunk.toLine, toCount)
|
||||
} else if hunk.toLine == 1 && toCount == 0 {
|
||||
// Match odd GNU diff -u behavior adding to empty file.
|
||||
fmt.Fprintf(b, " +0,0")
|
||||
} else {
|
||||
fmt.Fprintf(b, " +%d", hunk.toLine)
|
||||
}
|
||||
fmt.Fprint(b, " @@\n")
|
||||
for _, l := range hunk.lines {
|
||||
switch l.kind {
|
||||
case opDelete:
|
||||
fmt.Fprintf(b, "-%s", l.content)
|
||||
case opInsert:
|
||||
fmt.Fprintf(b, "+%s", l.content)
|
||||
default:
|
||||
fmt.Fprintf(b, " %s", l.content)
|
||||
}
|
||||
if !strings.HasSuffix(l.content, "\n") {
|
||||
fmt.Fprintf(b, "\n\\ No newline at end of file\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
Reference in New Issue
Block a user