whatcanGOwrong

This commit is contained in:
2024-09-19 21:38:24 -04:00
commit d0ae4d841d
17908 changed files with 4096831 additions and 0 deletions
@@ -0,0 +1,115 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build ignore
package main
import (
"bytes"
"flag"
"fmt"
"io"
"log"
"reflect"
"strings"
"unicode"
"golang.org/x/text/collate"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/ucd"
"golang.org/x/text/language"
"golang.org/x/text/unicode/rangetable"
)
var versionList = flag.String("versions", "",
"list of versions for which to generate RangeTables")
const bootstrapMessage = `No versions specified.
To bootstrap the code generation, run:
go run gen.go --versions=4.1.0,5.0.0,6.0.0,6.1.0,6.2.0,6.3.0,7.0.0
and ensure that the latest versions are included by checking:
https://www.unicode.org/Public/`
func getVersions() []string {
if *versionList == "" {
log.Fatal(bootstrapMessage)
}
c := collate.New(language.Und, collate.Numeric)
versions := strings.Split(*versionList, ",")
c.SortStrings(versions)
// Ensure that at least the current version is included.
for _, v := range versions {
if v == gen.UnicodeVersion() {
return versions
}
}
versions = append(versions, gen.UnicodeVersion())
c.SortStrings(versions)
return versions
}
func main() {
gen.Init()
versions := getVersions()
w := &bytes.Buffer{}
fmt.Fprintf(w, "//go:generate go run gen.go --versions=%s\n\n", strings.Join(versions, ","))
fmt.Fprintf(w, "import \"unicode\"\n\n")
vstr := func(s string) string { return strings.Replace(s, ".", "_", -1) }
fmt.Fprintf(w, "var assigned = map[string]*unicode.RangeTable{\n")
for _, v := range versions {
fmt.Fprintf(w, "\t%q: assigned%s,\n", v, vstr(v))
}
fmt.Fprintf(w, "}\n\n")
var size int
for _, v := range versions {
assigned := []rune{}
r := gen.Open("https://www.unicode.org/Public/", "", v+"/ucd/UnicodeData.txt")
ucd.Parse(r, func(p *ucd.Parser) {
assigned = append(assigned, p.Rune(0))
})
rt := rangetable.New(assigned...)
sz := int(reflect.TypeOf(unicode.RangeTable{}).Size())
sz += int(reflect.TypeOf(unicode.Range16{}).Size()) * len(rt.R16)
sz += int(reflect.TypeOf(unicode.Range32{}).Size()) * len(rt.R32)
fmt.Fprintf(w, "// size %d bytes (%d KiB)\n", sz, sz/1024)
fmt.Fprintf(w, "var assigned%s = ", vstr(v))
print(w, rt)
size += sz
}
fmt.Fprintf(w, "// Total size %d bytes (%d KiB)\n", size, size/1024)
gen.WriteVersionedGoFile("tables.go", "rangetable", w.Bytes())
}
func print(w io.Writer, rt *unicode.RangeTable) {
fmt.Fprintln(w, "&unicode.RangeTable{")
fmt.Fprintln(w, "\tR16: []unicode.Range16{")
for _, r := range rt.R16 {
fmt.Fprintf(w, "\t\t{%#04x, %#04x, %d},\n", r.Lo, r.Hi, r.Stride)
}
fmt.Fprintln(w, "\t},")
fmt.Fprintln(w, "\tR32: []unicode.Range32{")
for _, r := range rt.R32 {
fmt.Fprintf(w, "\t\t{%#08x, %#08x, %d},\n", r.Lo, r.Hi, r.Stride)
}
fmt.Fprintln(w, "\t},")
fmt.Fprintf(w, "\tLatinOffset: %d,\n", rt.LatinOffset)
fmt.Fprintf(w, "}\n\n")
}
@@ -0,0 +1,260 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package rangetable
import (
"unicode"
)
// atEnd is used to mark a completed iteration.
const atEnd = unicode.MaxRune + 1
// Merge returns a new RangeTable that is the union of the given tables.
// It can also be used to compact user-created RangeTables. The entries in
// R16 and R32 for any given RangeTable should be sorted and non-overlapping.
//
// A lookup in the resulting table can be several times faster than using In
// directly on the ranges. Merge is an expensive operation, however, and only
// makes sense if one intends to use the result for more than a couple of
// hundred lookups.
func Merge(ranges ...*unicode.RangeTable) *unicode.RangeTable {
rt := &unicode.RangeTable{}
if len(ranges) == 0 {
return rt
}
iter := tablesIter(make([]tableIndex, len(ranges)))
for i, t := range ranges {
iter[i] = tableIndex{t, 0, atEnd}
if len(t.R16) > 0 {
iter[i].next = rune(t.R16[0].Lo)
}
}
if r0 := iter.next16(); r0.Stride != 0 {
for {
r1 := iter.next16()
if r1.Stride == 0 {
rt.R16 = append(rt.R16, r0)
break
}
stride := r1.Lo - r0.Hi
if (r1.Lo == r1.Hi || stride == r1.Stride) && (r0.Lo == r0.Hi || stride == r0.Stride) {
// Fully merge the next range into the previous one.
r0.Hi, r0.Stride = r1.Hi, stride
continue
} else if stride == r0.Stride {
// Move the first element of r1 to r0. This may eliminate an
// entry.
r0.Hi = r1.Lo
r0.Stride = stride
r1.Lo = r1.Lo + r1.Stride
if r1.Lo > r1.Hi {
continue
}
}
rt.R16 = append(rt.R16, r0)
r0 = r1
}
}
for i, t := range ranges {
iter[i] = tableIndex{t, 0, atEnd}
if len(t.R32) > 0 {
iter[i].next = rune(t.R32[0].Lo)
}
}
if r0 := iter.next32(); r0.Stride != 0 {
for {
r1 := iter.next32()
if r1.Stride == 0 {
rt.R32 = append(rt.R32, r0)
break
}
stride := r1.Lo - r0.Hi
if (r1.Lo == r1.Hi || stride == r1.Stride) && (r0.Lo == r0.Hi || stride == r0.Stride) {
// Fully merge the next range into the previous one.
r0.Hi, r0.Stride = r1.Hi, stride
continue
} else if stride == r0.Stride {
// Move the first element of r1 to r0. This may eliminate an
// entry.
r0.Hi = r1.Lo
r1.Lo = r1.Lo + r1.Stride
if r1.Lo > r1.Hi {
continue
}
}
rt.R32 = append(rt.R32, r0)
r0 = r1
}
}
for i := 0; i < len(rt.R16) && rt.R16[i].Hi <= unicode.MaxLatin1; i++ {
rt.LatinOffset = i + 1
}
return rt
}
type tableIndex struct {
t *unicode.RangeTable
p uint32
next rune
}
type tablesIter []tableIndex
// sortIter does an insertion sort using the next field of tableIndex. Insertion
// sort is a good sorting algorithm for this case.
func sortIter(t []tableIndex) {
for i := range t {
for j := i; j > 0 && t[j-1].next > t[j].next; j-- {
t[j], t[j-1] = t[j-1], t[j]
}
}
}
// next16 finds the ranged to be added to the table. If ranges overlap between
// multiple tables it clips the result to a non-overlapping range if the
// elements are not fully subsumed. It returns a zero range if there are no more
// ranges.
func (ti tablesIter) next16() unicode.Range16 {
sortIter(ti)
t0 := ti[0]
if t0.next == atEnd {
return unicode.Range16{}
}
r0 := t0.t.R16[t0.p]
r0.Lo = uint16(t0.next)
// We restrict the Hi of the current range if it overlaps with another range.
for i := range ti {
tn := ti[i]
// Since our tableIndices are sorted by next, we can break if the there
// is no overlap. The first value of a next range can always be merged
// into the current one, so we can break in case of equality as well.
if rune(r0.Hi) <= tn.next {
break
}
rn := tn.t.R16[tn.p]
rn.Lo = uint16(tn.next)
// Limit r0.Hi based on next ranges in list, but allow it to overlap
// with ranges as long as it subsumes it.
m := (rn.Lo - r0.Lo) % r0.Stride
if m == 0 && (rn.Stride == r0.Stride || rn.Lo == rn.Hi) {
// Overlap, take the min of the two Hi values: for simplicity's sake
// we only process one range at a time.
if r0.Hi > rn.Hi {
r0.Hi = rn.Hi
}
} else {
// Not a compatible stride. Set to the last possible value before
// rn.Lo, but ensure there is at least one value.
if x := rn.Lo - m; r0.Lo <= x {
r0.Hi = x
}
break
}
}
// Update the next values for each table.
for i := range ti {
tn := &ti[i]
if rune(r0.Hi) < tn.next {
break
}
rn := tn.t.R16[tn.p]
stride := rune(rn.Stride)
tn.next += stride * (1 + ((rune(r0.Hi) - tn.next) / stride))
if rune(rn.Hi) < tn.next {
if tn.p++; int(tn.p) == len(tn.t.R16) {
tn.next = atEnd
} else {
tn.next = rune(tn.t.R16[tn.p].Lo)
}
}
}
if r0.Lo == r0.Hi {
r0.Stride = 1
}
return r0
}
// next32 finds the ranged to be added to the table. If ranges overlap between
// multiple tables it clips the result to a non-overlapping range if the
// elements are not fully subsumed. It returns a zero range if there are no more
// ranges.
func (ti tablesIter) next32() unicode.Range32 {
sortIter(ti)
t0 := ti[0]
if t0.next == atEnd {
return unicode.Range32{}
}
r0 := t0.t.R32[t0.p]
r0.Lo = uint32(t0.next)
// We restrict the Hi of the current range if it overlaps with another range.
for i := range ti {
tn := ti[i]
// Since our tableIndices are sorted by next, we can break if the there
// is no overlap. The first value of a next range can always be merged
// into the current one, so we can break in case of equality as well.
if rune(r0.Hi) <= tn.next {
break
}
rn := tn.t.R32[tn.p]
rn.Lo = uint32(tn.next)
// Limit r0.Hi based on next ranges in list, but allow it to overlap
// with ranges as long as it subsumes it.
m := (rn.Lo - r0.Lo) % r0.Stride
if m == 0 && (rn.Stride == r0.Stride || rn.Lo == rn.Hi) {
// Overlap, take the min of the two Hi values: for simplicity's sake
// we only process one range at a time.
if r0.Hi > rn.Hi {
r0.Hi = rn.Hi
}
} else {
// Not a compatible stride. Set to the last possible value before
// rn.Lo, but ensure there is at least one value.
if x := rn.Lo - m; r0.Lo <= x {
r0.Hi = x
}
break
}
}
// Update the next values for each table.
for i := range ti {
tn := &ti[i]
if rune(r0.Hi) < tn.next {
break
}
rn := tn.t.R32[tn.p]
stride := rune(rn.Stride)
tn.next += stride * (1 + ((rune(r0.Hi) - tn.next) / stride))
if rune(rn.Hi) < tn.next {
if tn.p++; int(tn.p) == len(tn.t.R32) {
tn.next = atEnd
} else {
tn.next = rune(tn.t.R32[tn.p].Lo)
}
}
}
if r0.Lo == r0.Hi {
r0.Stride = 1
}
return r0
}
@@ -0,0 +1,184 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package rangetable
import (
"testing"
"unicode"
)
var (
maxRuneTable = &unicode.RangeTable{
R32: []unicode.Range32{
{unicode.MaxRune, unicode.MaxRune, 1},
},
}
overlap1 = &unicode.RangeTable{
R16: []unicode.Range16{
{0x100, 0xfffc, 4},
},
R32: []unicode.Range32{
{0x100000, 0x10fffc, 4},
},
}
overlap2 = &unicode.RangeTable{
R16: []unicode.Range16{
{0x101, 0xfffd, 4},
},
R32: []unicode.Range32{
{0x100001, 0x10fffd, 3},
},
}
// The following table should be compacted into two entries for R16 and R32.
optimize = &unicode.RangeTable{
R16: []unicode.Range16{
{0x1, 0x1, 1},
{0x2, 0x2, 1},
{0x3, 0x3, 1},
{0x5, 0x5, 1},
{0x7, 0x7, 1},
{0x9, 0x9, 1},
{0xb, 0xf, 2},
},
R32: []unicode.Range32{
{0x10001, 0x10001, 1},
{0x10002, 0x10002, 1},
{0x10003, 0x10003, 1},
{0x10005, 0x10005, 1},
{0x10007, 0x10007, 1},
{0x10009, 0x10009, 1},
{0x1000b, 0x1000f, 2},
},
}
)
func TestMerge(t *testing.T) {
for i, tt := range [][]*unicode.RangeTable{
{unicode.Cc, unicode.Cf},
{unicode.L, unicode.Ll},
{unicode.L, unicode.Ll, unicode.Lu},
{unicode.Ll, unicode.Lu},
{unicode.M},
unicode.GraphicRanges,
cased,
// Merge R16 only and R32 only and vice versa.
{unicode.Khmer, unicode.Khudawadi},
{unicode.Imperial_Aramaic, unicode.Radical},
// Merge with empty.
{&unicode.RangeTable{}},
{&unicode.RangeTable{}, &unicode.RangeTable{}},
{&unicode.RangeTable{}, &unicode.RangeTable{}, &unicode.RangeTable{}},
{&unicode.RangeTable{}, unicode.Hiragana},
{unicode.Inherited, &unicode.RangeTable{}},
{&unicode.RangeTable{}, unicode.Hanunoo, &unicode.RangeTable{}},
// Hypothetical tables.
{maxRuneTable},
{overlap1, overlap2},
// Optimization
{optimize},
} {
rt := Merge(tt...)
for r := rune(0); r <= unicode.MaxRune; r++ {
if got, want := unicode.Is(rt, r), unicode.In(r, tt...); got != want {
t.Fatalf("%d:%U: got %v; want %v", i, r, got, want)
}
}
// Test optimization and correctness for R16.
for k := 0; k < len(rt.R16)-1; k++ {
if lo, hi := rt.R16[k].Lo, rt.R16[k].Hi; lo > hi {
t.Errorf("%d: Lo (%x) > Hi (%x)", i, lo, hi)
}
if hi, lo := rt.R16[k].Hi, rt.R16[k+1].Lo; hi >= lo {
t.Errorf("%d: Hi (%x) >= next Lo (%x)", i, hi, lo)
}
if rt.R16[k].Hi+rt.R16[k].Stride == rt.R16[k+1].Lo {
t.Errorf("%d: missed optimization for R16 at %d between %X and %x",
i, k, rt.R16[k], rt.R16[k+1])
}
}
// Test optimization and correctness for R32.
for k := 0; k < len(rt.R32)-1; k++ {
if lo, hi := rt.R32[k].Lo, rt.R32[k].Hi; lo > hi {
t.Errorf("%d: Lo (%x) > Hi (%x)", i, lo, hi)
}
if hi, lo := rt.R32[k].Hi, rt.R32[k+1].Lo; hi >= lo {
t.Errorf("%d: Hi (%x) >= next Lo (%x)", i, hi, lo)
}
if rt.R32[k].Hi+rt.R32[k].Stride == rt.R32[k+1].Lo {
t.Errorf("%d: missed optimization for R32 at %d between %X and %X",
i, k, rt.R32[k], rt.R32[k+1])
}
}
}
}
const runes = "Hello World in 2015!,\U0010fffd"
func BenchmarkNotMerged(t *testing.B) {
for i := 0; i < t.N; i++ {
for _, r := range runes {
unicode.In(r, unicode.GraphicRanges...)
}
}
}
func BenchmarkMerged(t *testing.B) {
rt := Merge(unicode.GraphicRanges...)
for i := 0; i < t.N; i++ {
for _, r := range runes {
unicode.Is(rt, r)
}
}
}
var cased = []*unicode.RangeTable{
unicode.Lower,
unicode.Upper,
unicode.Title,
unicode.Other_Lowercase,
unicode.Other_Uppercase,
}
func BenchmarkNotMergedCased(t *testing.B) {
for i := 0; i < t.N; i++ {
for _, r := range runes {
unicode.In(r, cased...)
}
}
}
func BenchmarkMergedCased(t *testing.B) {
// This reduces len(R16) from 243 to 82 and len(R32) from 65 to 35 for
// Unicode 7.0.0.
rt := Merge(cased...)
for i := 0; i < t.N; i++ {
for _, r := range runes {
unicode.Is(rt, r)
}
}
}
func BenchmarkInit(t *testing.B) {
for i := 0; i < t.N; i++ {
Merge(cased...)
Merge(unicode.GraphicRanges...)
}
}
func BenchmarkInit2(t *testing.B) {
// Hypothetical near-worst-case performance.
for i := 0; i < t.N; i++ {
Merge(overlap1, overlap2)
}
}
@@ -0,0 +1,70 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package rangetable provides utilities for creating and inspecting
// unicode.RangeTables.
package rangetable
import (
"sort"
"unicode"
)
// New creates a RangeTable from the given runes, which may contain duplicates.
func New(r ...rune) *unicode.RangeTable {
if len(r) == 0 {
return &unicode.RangeTable{}
}
sort.Sort(byRune(r))
// Remove duplicates.
k := 1
for i := 1; i < len(r); i++ {
if r[k-1] != r[i] {
r[k] = r[i]
k++
}
}
var rt unicode.RangeTable
for _, r := range r[:k] {
if r <= 0xFFFF {
rt.R16 = append(rt.R16, unicode.Range16{Lo: uint16(r), Hi: uint16(r), Stride: 1})
} else {
rt.R32 = append(rt.R32, unicode.Range32{Lo: uint32(r), Hi: uint32(r), Stride: 1})
}
}
// Optimize RangeTable.
return Merge(&rt)
}
type byRune []rune
func (r byRune) Len() int { return len(r) }
func (r byRune) Swap(i, j int) { r[i], r[j] = r[j], r[i] }
func (r byRune) Less(i, j int) bool { return r[i] < r[j] }
// Visit visits all runes in the given RangeTable in order, calling fn for each.
func Visit(rt *unicode.RangeTable, fn func(rune)) {
for _, r16 := range rt.R16 {
for r := rune(r16.Lo); r <= rune(r16.Hi); r += rune(r16.Stride) {
fn(r)
}
}
for _, r32 := range rt.R32 {
for r := rune(r32.Lo); r <= rune(r32.Hi); r += rune(r32.Stride) {
fn(r)
}
}
}
// Assigned returns a RangeTable with all assigned code points for a given
// Unicode version. This includes graphic, format, control, and private-use
// characters. It returns nil if the data for the given version is not
// available.
func Assigned(version string) *unicode.RangeTable {
return assigned[version]
}
@@ -0,0 +1,55 @@
package rangetable
import (
"reflect"
"testing"
"unicode"
)
var (
empty = &unicode.RangeTable{}
many = &unicode.RangeTable{
R16: []unicode.Range16{{0, 0xffff, 5}},
R32: []unicode.Range32{{0x10004, 0x10009, 5}},
LatinOffset: 0,
}
)
func TestVisit(t *testing.T) {
Visit(empty, func(got rune) {
t.Error("call from empty RangeTable")
})
var want rune
Visit(many, func(got rune) {
if got != want {
t.Errorf("got %U; want %U", got, want)
}
want += 5
})
if want -= 5; want != 0x10009 {
t.Errorf("last run was %U; want U+10009", want)
}
}
func TestNew(t *testing.T) {
for i, rt := range []*unicode.RangeTable{
empty,
unicode.Co,
unicode.Letter,
unicode.ASCII_Hex_Digit,
many,
maxRuneTable,
} {
var got, want []rune
Visit(rt, func(r rune) {
want = append(want, r)
})
Visit(New(want...), func(r rune) {
got = append(got, r)
})
if !reflect.DeepEqual(got, want) {
t.Errorf("%d:\ngot %v;\nwant %v", i, got, want)
}
}
}
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff