whatcanGOwrong
This commit is contained in:
@@ -0,0 +1,114 @@
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package runenames_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"golang.org/x/text/unicode/runenames"
|
||||
)
|
||||
|
||||
func Example() {
|
||||
runes := []rune{
|
||||
-1,
|
||||
'\U00000000',
|
||||
'\U0000001f',
|
||||
'\U00000020',
|
||||
'\U00000021',
|
||||
'\U00000041',
|
||||
'\U0000007e',
|
||||
'\U0000007f',
|
||||
'\U00000080',
|
||||
'\U000000e0',
|
||||
|
||||
'\U0000037f',
|
||||
'\U00000380',
|
||||
'\U00000381',
|
||||
'\U00000382',
|
||||
'\U00000383',
|
||||
'\U00000384',
|
||||
'\U00000385',
|
||||
'\U00000386',
|
||||
'\U000007c0',
|
||||
|
||||
'\U00002603',
|
||||
'\U000033ff',
|
||||
'\U00003400',
|
||||
'\U00003401',
|
||||
'\U00003402',
|
||||
'\U00004dc0',
|
||||
|
||||
'\U00009fd5',
|
||||
'\U0000a000',
|
||||
0xdc00, // '\U0000dc00' (Low Surrogate) is an invalid Go literal.
|
||||
'\U0000f800',
|
||||
'\U0000fffc',
|
||||
'\U0000fffd',
|
||||
'\U0000fffe',
|
||||
'\U0000ffff',
|
||||
|
||||
'\U00010000',
|
||||
'\U0001f574',
|
||||
'\U0002fa1d',
|
||||
'\U0002fa1e',
|
||||
'\U000e0100',
|
||||
'\U000e01ef',
|
||||
'\U000e01f0',
|
||||
'\U00100000',
|
||||
'\U0010fffd',
|
||||
'\U0010fffe',
|
||||
'\U0010ffff',
|
||||
}
|
||||
|
||||
for _, r := range runes {
|
||||
fmt.Printf("%08x %q\n", r, runenames.Name(r))
|
||||
}
|
||||
|
||||
// Output:
|
||||
// -0000001 ""
|
||||
// 00000000 "<control>"
|
||||
// 0000001f "<control>"
|
||||
// 00000020 "SPACE"
|
||||
// 00000021 "EXCLAMATION MARK"
|
||||
// 00000041 "LATIN CAPITAL LETTER A"
|
||||
// 0000007e "TILDE"
|
||||
// 0000007f "<control>"
|
||||
// 00000080 "<control>"
|
||||
// 000000e0 "LATIN SMALL LETTER A WITH GRAVE"
|
||||
// 0000037f "GREEK CAPITAL LETTER YOT"
|
||||
// 00000380 ""
|
||||
// 00000381 ""
|
||||
// 00000382 ""
|
||||
// 00000383 ""
|
||||
// 00000384 "GREEK TONOS"
|
||||
// 00000385 "GREEK DIALYTIKA TONOS"
|
||||
// 00000386 "GREEK CAPITAL LETTER ALPHA WITH TONOS"
|
||||
// 000007c0 "NKO DIGIT ZERO"
|
||||
// 00002603 "SNOWMAN"
|
||||
// 000033ff "SQUARE GAL"
|
||||
// 00003400 "<CJK Ideograph Extension A>"
|
||||
// 00003401 "<CJK Ideograph Extension A>"
|
||||
// 00003402 "<CJK Ideograph Extension A>"
|
||||
// 00004dc0 "HEXAGRAM FOR THE CREATIVE HEAVEN"
|
||||
// 00009fd5 "<CJK Ideograph>"
|
||||
// 0000a000 "YI SYLLABLE IT"
|
||||
// 0000dc00 "<Low Surrogate>"
|
||||
// 0000f800 "<Private Use>"
|
||||
// 0000fffc "OBJECT REPLACEMENT CHARACTER"
|
||||
// 0000fffd "REPLACEMENT CHARACTER"
|
||||
// 0000fffe ""
|
||||
// 0000ffff ""
|
||||
// 00010000 "LINEAR B SYLLABLE B008 A"
|
||||
// 0001f574 "MAN IN BUSINESS SUIT LEVITATING"
|
||||
// 0002fa1d "CJK COMPATIBILITY IDEOGRAPH-2FA1D"
|
||||
// 0002fa1e ""
|
||||
// 000e0100 "VARIATION SELECTOR-17"
|
||||
// 000e01ef "VARIATION SELECTOR-256"
|
||||
// 000e01f0 ""
|
||||
// 00100000 "<Plane 16 Private Use>"
|
||||
// 0010fffd "<Plane 16 Private Use>"
|
||||
// 0010fffe ""
|
||||
// 0010ffff ""
|
||||
}
|
||||
@@ -0,0 +1,162 @@
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"log"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/gen/bitfield"
|
||||
"golang.org/x/text/internal/ucd"
|
||||
)
|
||||
|
||||
var (
|
||||
// computed by computeDirectOffsets
|
||||
directOffsets = map[string]int{}
|
||||
directData bytes.Buffer
|
||||
|
||||
// computed by computeEntries
|
||||
entries []entry
|
||||
singleData bytes.Buffer
|
||||
index []uint16
|
||||
)
|
||||
|
||||
type entry struct {
|
||||
start rune `bitfield:"21,startRune"`
|
||||
numRunes int `bitfield:"16"`
|
||||
end rune
|
||||
index int `bitfield:"16"`
|
||||
base int `bitfield:"6"`
|
||||
direct bool `bitfield:""`
|
||||
name string
|
||||
}
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteVersionedGoFile("tables.go", "runenames")
|
||||
|
||||
gen.WriteUnicodeVersion(w)
|
||||
|
||||
computeDirectOffsets()
|
||||
computeEntries()
|
||||
|
||||
if err := bitfield.Gen(w, entry{}, nil); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
type entry uint64 // trick the generation code to use the entry type
|
||||
packed := []entry{}
|
||||
for _, e := range entries {
|
||||
e.numRunes = int(e.end - e.start + 1)
|
||||
v, err := bitfield.Pack(e, nil)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
packed = append(packed, entry(v))
|
||||
}
|
||||
|
||||
index = append(index, uint16(singleData.Len()))
|
||||
|
||||
w.WriteVar("entries", packed)
|
||||
w.WriteVar("index", index)
|
||||
w.WriteConst("directData", directData.String())
|
||||
w.WriteConst("singleData", singleData.String())
|
||||
}
|
||||
|
||||
func computeDirectOffsets() {
|
||||
counts := map[string]int{}
|
||||
|
||||
p := ucd.New(gen.OpenUCDFile("UnicodeData.txt"), ucd.KeepRanges)
|
||||
for p.Next() {
|
||||
start, end := p.Range(0)
|
||||
counts[getName(p)] += int(end-start) + 1
|
||||
}
|
||||
|
||||
direct := []string{}
|
||||
for k, v := range counts {
|
||||
if v > 1 {
|
||||
direct = append(direct, k)
|
||||
}
|
||||
}
|
||||
sort.Strings(direct)
|
||||
|
||||
for _, s := range direct {
|
||||
directOffsets[s] = directData.Len()
|
||||
directData.WriteString(s)
|
||||
}
|
||||
}
|
||||
|
||||
func computeEntries() {
|
||||
p := ucd.New(gen.OpenUCDFile("UnicodeData.txt"), ucd.KeepRanges)
|
||||
for p.Next() {
|
||||
start, end := p.Range(0)
|
||||
|
||||
last := entry{}
|
||||
if len(entries) > 0 {
|
||||
last = entries[len(entries)-1]
|
||||
}
|
||||
|
||||
name := getName(p)
|
||||
if index, ok := directOffsets[name]; ok {
|
||||
if last.name == name && last.end+1 == start {
|
||||
entries[len(entries)-1].end = end
|
||||
continue
|
||||
}
|
||||
entries = append(entries, entry{
|
||||
start: start,
|
||||
end: end,
|
||||
index: index,
|
||||
base: len(name),
|
||||
direct: true,
|
||||
name: name,
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
if start != end {
|
||||
log.Fatalf("Expected start == end, found %x != %x", start, end)
|
||||
}
|
||||
|
||||
offset := singleData.Len()
|
||||
base := offset >> 16
|
||||
index = append(index, uint16(offset))
|
||||
singleData.WriteString(name)
|
||||
|
||||
if last.base == base && last.end+1 == start {
|
||||
entries[len(entries)-1].end = start
|
||||
continue
|
||||
}
|
||||
|
||||
entries = append(entries, entry{
|
||||
start: start,
|
||||
end: end,
|
||||
index: len(index) - 1,
|
||||
base: base,
|
||||
name: name,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func getName(p *ucd.Parser) string {
|
||||
s := p.String(ucd.Name)
|
||||
if s == "" {
|
||||
return ""
|
||||
}
|
||||
if s[0] == '<' {
|
||||
const first = ", First>"
|
||||
if i := strings.Index(s, first); i >= 0 {
|
||||
s = s[:i] + ">"
|
||||
}
|
||||
|
||||
}
|
||||
return s
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gen.go
|
||||
|
||||
// Package runenames provides rune names from the Unicode Character Database.
|
||||
// For example, the name for '\u0100' is "LATIN CAPITAL LETTER A WITH MACRON".
|
||||
//
|
||||
// See https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
|
||||
package runenames
|
||||
|
||||
import (
|
||||
"sort"
|
||||
)
|
||||
|
||||
// Name returns the name for r.
|
||||
func Name(r rune) string {
|
||||
i := sort.Search(len(entries), func(j int) bool {
|
||||
return entries[j].startRune() > r
|
||||
})
|
||||
if i == 0 {
|
||||
return ""
|
||||
}
|
||||
e := entries[i-1]
|
||||
|
||||
offset := int(r - e.startRune())
|
||||
if offset >= e.numRunes() {
|
||||
return ""
|
||||
}
|
||||
|
||||
if e.direct() {
|
||||
o := e.index()
|
||||
n := e.len()
|
||||
return directData[o : o+n]
|
||||
}
|
||||
|
||||
start := int(index[e.index()+offset])
|
||||
end := int(index[e.index()+offset+1])
|
||||
base1 := e.base() << 16
|
||||
base2 := base1
|
||||
if start > end {
|
||||
base2 += 1 << 16
|
||||
}
|
||||
return singleData[start+base1 : end+base2]
|
||||
}
|
||||
|
||||
func (e entry) len() int { return e.base() }
|
||||
@@ -0,0 +1,52 @@
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package runenames
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/testtext"
|
||||
"golang.org/x/text/internal/ucd"
|
||||
)
|
||||
|
||||
func TestName(t *testing.T) {
|
||||
testtext.SkipIfNotLong(t)
|
||||
|
||||
wants := make([]string, 1+unicode.MaxRune)
|
||||
ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
|
||||
wants[p.Rune(0)] = getName(p)
|
||||
})
|
||||
|
||||
nErrors := 0
|
||||
for r, want := range wants {
|
||||
got := Name(rune(r))
|
||||
if got != want {
|
||||
t.Errorf("r=%#08x: got %q, want %q", r, got, want)
|
||||
nErrors++
|
||||
if nErrors == 100 {
|
||||
t.Fatal("too many errors")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Copied from gen.go.
|
||||
func getName(p *ucd.Parser) string {
|
||||
s := p.String(ucd.Name)
|
||||
if s == "" {
|
||||
return ""
|
||||
}
|
||||
if s[0] == '<' {
|
||||
const first = ", First>"
|
||||
if i := strings.Index(s, first); i >= 0 {
|
||||
s = s[:i] + ">"
|
||||
}
|
||||
|
||||
}
|
||||
return s
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user