whatcanGOwrong
This commit is contained in:
@@ -0,0 +1,16 @@
|
||||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package language
|
||||
|
||||
// This file contains code common to the maketables.go and the package code.
|
||||
|
||||
// AliasType is the type of an alias in AliasMap.
|
||||
type AliasType int8
|
||||
|
||||
const (
|
||||
Deprecated AliasType = iota
|
||||
Macro
|
||||
Legacy
|
||||
|
||||
AliasTypeUnknown AliasType = -1
|
||||
)
|
||||
@@ -0,0 +1,29 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
// CompactCoreInfo is a compact integer with the three core tags encoded.
|
||||
type CompactCoreInfo uint32
|
||||
|
||||
// GetCompactCore generates a uint32 value that is guaranteed to be unique for
|
||||
// different language, region, and script values.
|
||||
func GetCompactCore(t Tag) (cci CompactCoreInfo, ok bool) {
|
||||
if t.LangID > langNoIndexOffset {
|
||||
return 0, false
|
||||
}
|
||||
cci |= CompactCoreInfo(t.LangID) << (8 + 12)
|
||||
cci |= CompactCoreInfo(t.ScriptID) << 12
|
||||
cci |= CompactCoreInfo(t.RegionID)
|
||||
return cci, true
|
||||
}
|
||||
|
||||
// Tag generates a tag from c.
|
||||
func (c CompactCoreInfo) Tag() Tag {
|
||||
return Tag{
|
||||
LangID: Language(c >> 20),
|
||||
RegionID: Region(c & 0x3ff),
|
||||
ScriptID: Script(c>>12) & 0xff,
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,61 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package compact defines a compact representation of language tags.
|
||||
//
|
||||
// Common language tags (at least all for which locale information is defined
|
||||
// in CLDR) are assigned a unique index. Each Tag is associated with such an
|
||||
// ID for selecting language-related resources (such as translations) as well
|
||||
// as one for selecting regional defaults (currency, number formatting, etc.)
|
||||
//
|
||||
// It may want to export this functionality at some point, but at this point
|
||||
// this is only available for use within x/text.
|
||||
package compact // import "golang.org/x/text/internal/language/compact"
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
)
|
||||
|
||||
// ID is an integer identifying a single tag.
|
||||
type ID uint16
|
||||
|
||||
func getCoreIndex(t language.Tag) (id ID, ok bool) {
|
||||
cci, ok := language.GetCompactCore(t)
|
||||
if !ok {
|
||||
return 0, false
|
||||
}
|
||||
i := sort.Search(len(coreTags), func(i int) bool {
|
||||
return cci <= coreTags[i]
|
||||
})
|
||||
if i == len(coreTags) || coreTags[i] != cci {
|
||||
return 0, false
|
||||
}
|
||||
return ID(i), true
|
||||
}
|
||||
|
||||
// Parent returns the ID of the parent or the root ID if id is already the root.
|
||||
func (id ID) Parent() ID {
|
||||
return parents[id]
|
||||
}
|
||||
|
||||
// Tag converts id to an internal language Tag.
|
||||
func (id ID) Tag() language.Tag {
|
||||
if int(id) >= len(coreTags) {
|
||||
return specialTags[int(id)-len(coreTags)]
|
||||
}
|
||||
return coreTags[id].Tag()
|
||||
}
|
||||
|
||||
var specialTags []language.Tag
|
||||
|
||||
func init() {
|
||||
tags := strings.Split(specialTagsStr, " ")
|
||||
specialTags = make([]language.Tag, len(tags))
|
||||
for i, t := range tags {
|
||||
specialTags[i] = language.MustParse(t)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build ignore
|
||||
|
||||
// Language tag table generator.
|
||||
// Data read from the web.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/unicode/cldr"
|
||||
)
|
||||
|
||||
var (
|
||||
test = flag.Bool("test",
|
||||
false,
|
||||
"test existing tables; can be used to compare web data with package data.")
|
||||
outputFile = flag.String("output",
|
||||
"tables.go",
|
||||
"output file for generated tables")
|
||||
)
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteGoFile("tables.go", "compact")
|
||||
|
||||
fmt.Fprintln(w, `import "golang.org/x/text/internal/language"`)
|
||||
|
||||
b := newBuilder(w)
|
||||
gen.WriteCLDRVersion(w)
|
||||
|
||||
b.writeCompactIndex()
|
||||
}
|
||||
|
||||
type builder struct {
|
||||
w *gen.CodeWriter
|
||||
data *cldr.CLDR
|
||||
supp *cldr.SupplementalData
|
||||
}
|
||||
|
||||
func newBuilder(w *gen.CodeWriter) *builder {
|
||||
r := gen.OpenCLDRCoreZip()
|
||||
defer r.Close()
|
||||
d := &cldr.Decoder{}
|
||||
data, err := d.DecodeZip(r)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
b := builder{
|
||||
w: w,
|
||||
data: data,
|
||||
supp: data.Supplemental(),
|
||||
}
|
||||
return &b
|
||||
}
|
||||
@@ -0,0 +1,113 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This file generates derivative tables based on the language package itself.
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
)
|
||||
|
||||
// Compact indices:
|
||||
// Note -va-X variants only apply to localization variants.
|
||||
// BCP variants only ever apply to language.
|
||||
// The only ambiguity between tags is with regions.
|
||||
|
||||
func (b *builder) writeCompactIndex() {
|
||||
// Collect all language tags for which we have any data in CLDR.
|
||||
m := map[language.Tag]bool{}
|
||||
for _, lang := range b.data.Locales() {
|
||||
// We include all locales unconditionally to be consistent with en_US.
|
||||
// We want en_US, even though it has no data associated with it.
|
||||
|
||||
// TODO: put any of the languages for which no data exists at the end
|
||||
// of the index. This allows all components based on ICU to use that
|
||||
// as the cutoff point.
|
||||
// if x := data.RawLDML(lang); false ||
|
||||
// x.LocaleDisplayNames != nil ||
|
||||
// x.Characters != nil ||
|
||||
// x.Delimiters != nil ||
|
||||
// x.Measurement != nil ||
|
||||
// x.Dates != nil ||
|
||||
// x.Numbers != nil ||
|
||||
// x.Units != nil ||
|
||||
// x.ListPatterns != nil ||
|
||||
// x.Collations != nil ||
|
||||
// x.Segmentations != nil ||
|
||||
// x.Rbnf != nil ||
|
||||
// x.Annotations != nil ||
|
||||
// x.Metadata != nil {
|
||||
|
||||
// TODO: support POSIX natively, albeit non-standard.
|
||||
tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1))
|
||||
m[tag] = true
|
||||
// }
|
||||
}
|
||||
|
||||
// TODO: plural rules are also defined for the deprecated tags:
|
||||
// iw mo sh tl
|
||||
// Consider removing these as compact tags.
|
||||
|
||||
// Include locales for plural rules, which uses a different structure.
|
||||
for _, plurals := range b.supp.Plurals {
|
||||
for _, rules := range plurals.PluralRules {
|
||||
for _, lang := range strings.Split(rules.Locales, " ") {
|
||||
m[language.Make(lang)] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var coreTags []language.CompactCoreInfo
|
||||
var special []string
|
||||
|
||||
for t := range m {
|
||||
if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" {
|
||||
log.Fatalf("Unexpected extension %v in %v", x, t)
|
||||
}
|
||||
if len(t.Variants()) == 0 && len(t.Extensions()) == 0 {
|
||||
cci, ok := language.GetCompactCore(t)
|
||||
if !ok {
|
||||
log.Fatalf("Locale for non-basic language %q", t)
|
||||
}
|
||||
coreTags = append(coreTags, cci)
|
||||
} else {
|
||||
special = append(special, t.String())
|
||||
}
|
||||
}
|
||||
|
||||
w := b.w
|
||||
|
||||
sort.Slice(coreTags, func(i, j int) bool { return coreTags[i] < coreTags[j] })
|
||||
sort.Strings(special)
|
||||
|
||||
w.WriteComment(`
|
||||
NumCompactTags is the number of common tags. The maximum tag is
|
||||
NumCompactTags-1.`)
|
||||
w.WriteConst("NumCompactTags", len(m))
|
||||
|
||||
fmt.Fprintln(w, "const (")
|
||||
for i, t := range coreTags {
|
||||
fmt.Fprintf(w, "%s ID = %d\n", ident(t.Tag().String()), i)
|
||||
}
|
||||
for i, t := range special {
|
||||
fmt.Fprintf(w, "%s ID = %d\n", ident(t), i+len(coreTags))
|
||||
}
|
||||
fmt.Fprintln(w, ")")
|
||||
|
||||
w.WriteVar("coreTags", coreTags)
|
||||
|
||||
w.WriteConst("specialTagsStr", strings.Join(special, " "))
|
||||
}
|
||||
|
||||
func ident(s string) string {
|
||||
return strings.Replace(s, "-", "", -1) + "Index"
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/language"
|
||||
"golang.org/x/text/internal/language/compact"
|
||||
"golang.org/x/text/unicode/cldr"
|
||||
)
|
||||
|
||||
func main() {
|
||||
r := gen.OpenCLDRCoreZip()
|
||||
defer r.Close()
|
||||
|
||||
d := &cldr.Decoder{}
|
||||
data, err := d.DecodeZip(r)
|
||||
if err != nil {
|
||||
log.Fatalf("DecodeZip: %v", err)
|
||||
}
|
||||
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteGoFile("parents.go", "compact")
|
||||
|
||||
// Create parents table.
|
||||
type ID uint16
|
||||
parents := make([]ID, compact.NumCompactTags)
|
||||
for _, loc := range data.Locales() {
|
||||
tag := language.MustParse(loc)
|
||||
index, ok := compact.FromTag(tag)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
parentIndex := compact.ID(0) // und
|
||||
for p := tag.Parent(); p != language.Und; p = p.Parent() {
|
||||
if x, ok := compact.FromTag(p); ok {
|
||||
parentIndex = x
|
||||
break
|
||||
}
|
||||
}
|
||||
parents[index] = ID(parentIndex)
|
||||
}
|
||||
|
||||
w.WriteComment(`
|
||||
parents maps a compact index of a tag to the compact index of the parent of
|
||||
this tag.`)
|
||||
w.WriteVar("parents", parents)
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package compact
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
)
|
||||
|
||||
func TestParents(t *testing.T) {
|
||||
testCases := []struct {
|
||||
tag, parent string
|
||||
}{
|
||||
{"af", "und"},
|
||||
{"en", "und"},
|
||||
{"en-001", "en"},
|
||||
{"en-AU", "en-001"},
|
||||
{"en-US", "en"},
|
||||
{"en-US-u-va-posix", "en-US"},
|
||||
{"ca-ES-valencia", "ca-ES"},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
tag, ok := LanguageID(Make(language.MustParse(tc.tag)))
|
||||
if !ok {
|
||||
t.Fatalf("Could not get index of flag %s", tc.tag)
|
||||
}
|
||||
want, ok := LanguageID(Make(language.MustParse(tc.parent)))
|
||||
if !ok {
|
||||
t.Fatalf("Could not get index of parent %s of tag %s", tc.parent, tc.tag)
|
||||
}
|
||||
if got := parents[tag]; got != want {
|
||||
t.Errorf("Parent[%s] = %d; want %d (%s)", tc.tag, got, want, tc.parent)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,260 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gen.go gen_index.go -output tables.go
|
||||
//go:generate go run gen_parents.go
|
||||
|
||||
package compact
|
||||
|
||||
// TODO: Remove above NOTE after:
|
||||
// - verifying that tables are dropped correctly (most notably matcher tables).
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
)
|
||||
|
||||
// Tag represents a BCP 47 language tag. It is used to specify an instance of a
|
||||
// specific language or locale. All language tag values are guaranteed to be
|
||||
// well-formed.
|
||||
type Tag struct {
|
||||
// NOTE: exported tags will become part of the public API.
|
||||
language ID
|
||||
locale ID
|
||||
full fullTag // always a language.Tag for now.
|
||||
}
|
||||
|
||||
const _und = 0
|
||||
|
||||
type fullTag interface {
|
||||
IsRoot() bool
|
||||
Parent() language.Tag
|
||||
}
|
||||
|
||||
// Make a compact Tag from a fully specified internal language Tag.
|
||||
func Make(t language.Tag) (tag Tag) {
|
||||
if region := t.TypeForKey("rg"); len(region) == 6 && region[2:] == "zzzz" {
|
||||
if r, err := language.ParseRegion(region[:2]); err == nil {
|
||||
tFull := t
|
||||
t, _ = t.SetTypeForKey("rg", "")
|
||||
// TODO: should we not consider "va" for the language tag?
|
||||
var exact1, exact2 bool
|
||||
tag.language, exact1 = FromTag(t)
|
||||
t.RegionID = r
|
||||
tag.locale, exact2 = FromTag(t)
|
||||
if !exact1 || !exact2 {
|
||||
tag.full = tFull
|
||||
}
|
||||
return tag
|
||||
}
|
||||
}
|
||||
lang, ok := FromTag(t)
|
||||
tag.language = lang
|
||||
tag.locale = lang
|
||||
if !ok {
|
||||
tag.full = t
|
||||
}
|
||||
return tag
|
||||
}
|
||||
|
||||
// Tag returns an internal language Tag version of this tag.
|
||||
func (t Tag) Tag() language.Tag {
|
||||
if t.full != nil {
|
||||
return t.full.(language.Tag)
|
||||
}
|
||||
tag := t.language.Tag()
|
||||
if t.language != t.locale {
|
||||
loc := t.locale.Tag()
|
||||
tag, _ = tag.SetTypeForKey("rg", strings.ToLower(loc.RegionID.String())+"zzzz")
|
||||
}
|
||||
return tag
|
||||
}
|
||||
|
||||
// IsCompact reports whether this tag is fully defined in terms of ID.
|
||||
func (t *Tag) IsCompact() bool {
|
||||
return t.full == nil
|
||||
}
|
||||
|
||||
// MayHaveVariants reports whether a tag may have variants. If it returns false
|
||||
// it is guaranteed the tag does not have variants.
|
||||
func (t Tag) MayHaveVariants() bool {
|
||||
return t.full != nil || int(t.language) >= len(coreTags)
|
||||
}
|
||||
|
||||
// MayHaveExtensions reports whether a tag may have extensions. If it returns
|
||||
// false it is guaranteed the tag does not have them.
|
||||
func (t Tag) MayHaveExtensions() bool {
|
||||
return t.full != nil ||
|
||||
int(t.language) >= len(coreTags) ||
|
||||
t.language != t.locale
|
||||
}
|
||||
|
||||
// IsRoot returns true if t is equal to language "und".
|
||||
func (t Tag) IsRoot() bool {
|
||||
if t.full != nil {
|
||||
return t.full.IsRoot()
|
||||
}
|
||||
return t.language == _und
|
||||
}
|
||||
|
||||
// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
|
||||
// specific language are substituted with fields from the parent language.
|
||||
// The parent for a language may change for newer versions of CLDR.
|
||||
func (t Tag) Parent() Tag {
|
||||
if t.full != nil {
|
||||
return Make(t.full.Parent())
|
||||
}
|
||||
if t.language != t.locale {
|
||||
// Simulate stripping -u-rg-xxxxxx
|
||||
return Tag{language: t.language, locale: t.language}
|
||||
}
|
||||
// TODO: use parent lookup table once cycle from internal package is
|
||||
// removed. Probably by internalizing the table and declaring this fast
|
||||
// enough.
|
||||
// lang := compactID(internal.Parent(uint16(t.language)))
|
||||
lang, _ := FromTag(t.language.Tag().Parent())
|
||||
return Tag{language: lang, locale: lang}
|
||||
}
|
||||
|
||||
// nextToken returns token t and the rest of the string.
|
||||
func nextToken(s string) (t, tail string) {
|
||||
p := strings.Index(s[1:], "-")
|
||||
if p == -1 {
|
||||
return s[1:], ""
|
||||
}
|
||||
p++
|
||||
return s[1:p], s[p:]
|
||||
}
|
||||
|
||||
// LanguageID returns an index, where 0 <= index < NumCompactTags, for tags
|
||||
// for which data exists in the text repository.The index will change over time
|
||||
// and should not be stored in persistent storage. If t does not match a compact
|
||||
// index, exact will be false and the compact index will be returned for the
|
||||
// first match after repeatedly taking the Parent of t.
|
||||
func LanguageID(t Tag) (id ID, exact bool) {
|
||||
return t.language, t.full == nil
|
||||
}
|
||||
|
||||
// RegionalID returns the ID for the regional variant of this tag. This index is
|
||||
// used to indicate region-specific overrides, such as default currency, default
|
||||
// calendar and week data, default time cycle, and default measurement system
|
||||
// and unit preferences.
|
||||
//
|
||||
// For instance, the tag en-GB-u-rg-uszzzz specifies British English with US
|
||||
// settings for currency, number formatting, etc. The CompactIndex for this tag
|
||||
// will be that for en-GB, while the RegionalID will be the one corresponding to
|
||||
// en-US.
|
||||
func RegionalID(t Tag) (id ID, exact bool) {
|
||||
return t.locale, t.full == nil
|
||||
}
|
||||
|
||||
// LanguageTag returns t stripped of regional variant indicators.
|
||||
//
|
||||
// At the moment this means it is stripped of a regional and variant subtag "rg"
|
||||
// and "va" in the "u" extension.
|
||||
func (t Tag) LanguageTag() Tag {
|
||||
if t.full == nil {
|
||||
return Tag{language: t.language, locale: t.language}
|
||||
}
|
||||
tt := t.Tag()
|
||||
tt.SetTypeForKey("rg", "")
|
||||
tt.SetTypeForKey("va", "")
|
||||
return Make(tt)
|
||||
}
|
||||
|
||||
// RegionalTag returns the regional variant of the tag.
|
||||
//
|
||||
// At the moment this means that the region is set from the regional subtag
|
||||
// "rg" in the "u" extension.
|
||||
func (t Tag) RegionalTag() Tag {
|
||||
rt := Tag{language: t.locale, locale: t.locale}
|
||||
if t.full == nil {
|
||||
return rt
|
||||
}
|
||||
b := language.Builder{}
|
||||
tag := t.Tag()
|
||||
// tag, _ = tag.SetTypeForKey("rg", "")
|
||||
b.SetTag(t.locale.Tag())
|
||||
if v := tag.Variants(); v != "" {
|
||||
for _, v := range strings.Split(v, "-") {
|
||||
b.AddVariant(v)
|
||||
}
|
||||
}
|
||||
for _, e := range tag.Extensions() {
|
||||
b.AddExt(e)
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// FromTag reports closest matching ID for an internal language Tag.
|
||||
func FromTag(t language.Tag) (id ID, exact bool) {
|
||||
// TODO: perhaps give more frequent tags a lower index.
|
||||
// TODO: we could make the indexes stable. This will excluded some
|
||||
// possibilities for optimization, so don't do this quite yet.
|
||||
exact = true
|
||||
|
||||
b, s, r := t.Raw()
|
||||
if t.HasString() {
|
||||
if t.IsPrivateUse() {
|
||||
// We have no entries for user-defined tags.
|
||||
return 0, false
|
||||
}
|
||||
hasExtra := false
|
||||
if t.HasVariants() {
|
||||
if t.HasExtensions() {
|
||||
build := language.Builder{}
|
||||
build.SetTag(language.Tag{LangID: b, ScriptID: s, RegionID: r})
|
||||
build.AddVariant(t.Variants())
|
||||
exact = false
|
||||
t = build.Make()
|
||||
}
|
||||
hasExtra = true
|
||||
} else if _, ok := t.Extension('u'); ok {
|
||||
// TODO: va may mean something else. Consider not considering it.
|
||||
// Strip all but the 'va' entry.
|
||||
old := t
|
||||
variant := t.TypeForKey("va")
|
||||
t = language.Tag{LangID: b, ScriptID: s, RegionID: r}
|
||||
if variant != "" {
|
||||
t, _ = t.SetTypeForKey("va", variant)
|
||||
hasExtra = true
|
||||
}
|
||||
exact = old == t
|
||||
} else {
|
||||
exact = false
|
||||
}
|
||||
if hasExtra {
|
||||
// We have some variants.
|
||||
for i, s := range specialTags {
|
||||
if s == t {
|
||||
return ID(i + len(coreTags)), exact
|
||||
}
|
||||
}
|
||||
exact = false
|
||||
}
|
||||
}
|
||||
if x, ok := getCoreIndex(t); ok {
|
||||
return x, exact
|
||||
}
|
||||
exact = false
|
||||
if r != 0 && s == 0 {
|
||||
// Deal with cases where an extra script is inserted for the region.
|
||||
t, _ := t.Maximize()
|
||||
if x, ok := getCoreIndex(t); ok {
|
||||
return x, exact
|
||||
}
|
||||
}
|
||||
for t = t.Parent(); t != root; t = t.Parent() {
|
||||
// No variants specified: just compare core components.
|
||||
// The key has the form lllssrrr, where l, s, and r are nibbles for
|
||||
// respectively the langID, scriptID, and regionID.
|
||||
if x, ok := getCoreIndex(t); ok {
|
||||
return x, exact
|
||||
}
|
||||
}
|
||||
return 0, exact
|
||||
}
|
||||
|
||||
var root = language.Tag{}
|
||||
@@ -0,0 +1,236 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package compact
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
)
|
||||
|
||||
func mustParse(s string) Tag {
|
||||
t, err := language.Parse(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return Make(t)
|
||||
}
|
||||
|
||||
func TestTagSize(t *testing.T) {
|
||||
id := Tag{}
|
||||
typ := reflect.TypeOf(id)
|
||||
if typ.Size() > 24 {
|
||||
t.Errorf("size of Tag was %d; want 24", typ.Size())
|
||||
}
|
||||
}
|
||||
|
||||
func TestNoPublic(t *testing.T) {
|
||||
noExportedField(t, reflect.TypeOf(Tag{}))
|
||||
}
|
||||
|
||||
func noExportedField(t *testing.T, typ reflect.Type) {
|
||||
for i := 0; i < typ.NumField(); i++ {
|
||||
f := typ.Field(i)
|
||||
if f.PkgPath == "" {
|
||||
t.Errorf("Tag may not have exported fields, but has field %q", f.Name)
|
||||
}
|
||||
if f.Anonymous {
|
||||
noExportedField(t, f.Type)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestEquality(t *testing.T) {
|
||||
for i, tt := range parseTests() {
|
||||
s := tt.in
|
||||
tag := mk(s)
|
||||
t1 := mustParse(tag.Tag().String())
|
||||
if tag != t1 {
|
||||
t.Errorf("%d:%s: equality test 1 failed\n got: %#v\nwant: %#v)", i, s, t1, tag)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type compactTest struct {
|
||||
tag string
|
||||
index ID
|
||||
ok bool
|
||||
}
|
||||
|
||||
var compactTests = []compactTest{
|
||||
// TODO: these values will change with each CLDR update. This issue
|
||||
// will be solved if we decide to fix the indexes.
|
||||
{"und", undIndex, true},
|
||||
{"ca-ES-valencia", caESvalenciaIndex, true},
|
||||
{"ca-ES-valencia-u-va-posix", caESvalenciaIndex, false},
|
||||
{"ca-ES-valencia-u-co-phonebk", caESvalenciaIndex, false},
|
||||
{"ca-ES-valencia-u-co-phonebk-va-posix", caESvalenciaIndex, false},
|
||||
{"x-klingon", 0, false},
|
||||
{"en-US", enUSIndex, true},
|
||||
{"en-US-u-va-posix", enUSuvaposixIndex, true},
|
||||
{"en", enIndex, true},
|
||||
{"en-u-co-phonebk", enIndex, false},
|
||||
{"en-001", en001Index, true},
|
||||
{"zh-Hant-HK", zhHantHKIndex, true},
|
||||
{"zh-HK", zhHantHKIndex, false}, // maximized to zh-Hant-HK
|
||||
{"nl-Beng", 0, false}, // parent skips script
|
||||
{"nl-NO", nlIndex, false}, // region is ignored
|
||||
{"nl-Latn-NO", nlIndex, false},
|
||||
{"nl-Latn-NO-u-co-phonebk", nlIndex, false},
|
||||
{"nl-Latn-NO-valencia", nlIndex, false},
|
||||
{"nl-Latn-NO-oxendict", nlIndex, false},
|
||||
{"sh", shIndex, true}, // From plural rules.
|
||||
}
|
||||
|
||||
func TestLanguageID(t *testing.T) {
|
||||
tests := append(compactTests, []compactTest{
|
||||
{"en-GB", enGBIndex, true},
|
||||
{"en-GB-u-rg-uszzzz", enGBIndex, true},
|
||||
{"en-GB-u-rg-USZZZZ", enGBIndex, true},
|
||||
{"en-GB-u-rg-uszzzz-va-posix", enGBIndex, false},
|
||||
{"en-GB-u-co-phonebk-rg-uszzzz", enGBIndex, false},
|
||||
// Invalid region specifications are ignored.
|
||||
{"en-GB-u-rg-usz-va-posix", enGBIndex, false},
|
||||
{"en-GB-u-co-phonebk-rg-usz", enGBIndex, false},
|
||||
}...)
|
||||
for _, tt := range tests {
|
||||
x, ok := LanguageID(mustParse(tt.tag))
|
||||
if ID(x) != tt.index || ok != tt.ok {
|
||||
t.Errorf("%s: got %d, %v; want %d %v", tt.tag, x, ok, tt.index, tt.ok)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegionalID(t *testing.T) {
|
||||
tests := append(compactTests, []compactTest{
|
||||
{"en-GB", enGBIndex, true},
|
||||
{"en-GB-u-rg-uszzzz", enUSIndex, true},
|
||||
{"en-GB-u-rg-USZZZZ", enUSIndex, true},
|
||||
// TODO: use different exact values for language and regional tag?
|
||||
{"en-GB-u-rg-uszzzz-va-posix", enUSuvaposixIndex, false},
|
||||
{"en-GB-u-co-phonebk-rg-uszzzz-va-posix", enUSuvaposixIndex, false},
|
||||
{"en-GB-u-co-phonebk-rg-uszzzz", enUSIndex, false},
|
||||
// Invalid region specifications are ignored.
|
||||
{"en-GB-u-rg-usz-va-posix", enGBIndex, false},
|
||||
{"en-GB-u-co-phonebk-rg-usz", enGBIndex, false},
|
||||
}...)
|
||||
for _, tt := range tests {
|
||||
x, ok := RegionalID(mustParse(tt.tag))
|
||||
if ID(x) != tt.index || ok != tt.ok {
|
||||
t.Errorf("%s: got %d, %v; want %d %v", tt.tag, x, ok, tt.index, tt.ok)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParent(t *testing.T) {
|
||||
tests := []struct{ in, out string }{
|
||||
// Strip variants and extensions first
|
||||
{"de-u-co-phonebk", "de"},
|
||||
{"de-1994", "de"},
|
||||
{"de-Latn-1994", "de"}, // remove superfluous script.
|
||||
|
||||
// Ensure the canonical Tag for an entry is in the chain for base-script
|
||||
// pairs.
|
||||
{"zh-Hans", "zh"},
|
||||
|
||||
// Skip the script if it is the maximized version. CLDR files for the
|
||||
// skipped tag are always empty.
|
||||
{"zh-Hans-TW", "zh"},
|
||||
{"zh-Hans-CN", "zh"},
|
||||
|
||||
// Insert the script if the maximized script is not the same as the
|
||||
// maximized script of the base language.
|
||||
{"zh-TW", "zh-Hant"},
|
||||
{"zh-HK", "zh-Hant"},
|
||||
{"zh-Hant-TW", "zh-Hant"},
|
||||
{"zh-Hant-HK", "zh-Hant"},
|
||||
|
||||
// Non-default script skips to und.
|
||||
// CLDR
|
||||
{"az-Cyrl", "und"},
|
||||
{"bs-Cyrl", "und"},
|
||||
{"en-Dsrt", "und"},
|
||||
{"ha-Arab", "und"},
|
||||
{"mn-Mong", "und"},
|
||||
{"pa-Arab", "und"},
|
||||
{"shi-Latn", "und"},
|
||||
{"sr-Latn", "und"},
|
||||
{"uz-Arab", "und"},
|
||||
{"uz-Cyrl", "und"},
|
||||
{"vai-Latn", "und"},
|
||||
{"zh-Hant", "und"},
|
||||
// extra
|
||||
{"nl-Cyrl", "und"},
|
||||
|
||||
// World english inherits from en-001.
|
||||
{"en-150", "en-001"},
|
||||
{"en-AU", "en-001"},
|
||||
{"en-BE", "en-001"},
|
||||
{"en-GG", "en-001"},
|
||||
{"en-GI", "en-001"},
|
||||
{"en-HK", "en-001"},
|
||||
{"en-IE", "en-001"},
|
||||
{"en-IM", "en-001"},
|
||||
{"en-IN", "en-001"},
|
||||
{"en-JE", "en-001"},
|
||||
{"en-MT", "en-001"},
|
||||
{"en-NZ", "en-001"},
|
||||
{"en-PK", "en-001"},
|
||||
{"en-SG", "en-001"},
|
||||
|
||||
// Spanish in Latin-American countries have es-419 as parent.
|
||||
{"es-AR", "es-419"},
|
||||
{"es-BO", "es-419"},
|
||||
{"es-CL", "es-419"},
|
||||
{"es-CO", "es-419"},
|
||||
{"es-CR", "es-419"},
|
||||
{"es-CU", "es-419"},
|
||||
{"es-DO", "es-419"},
|
||||
{"es-EC", "es-419"},
|
||||
{"es-GT", "es-419"},
|
||||
{"es-HN", "es-419"},
|
||||
{"es-MX", "es-419"},
|
||||
{"es-NI", "es-419"},
|
||||
{"es-PA", "es-419"},
|
||||
{"es-PE", "es-419"},
|
||||
{"es-PR", "es-419"},
|
||||
{"es-PY", "es-419"},
|
||||
{"es-SV", "es-419"},
|
||||
{"es-US", "es-419"},
|
||||
{"es-UY", "es-419"},
|
||||
{"es-VE", "es-419"},
|
||||
// exceptions (according to CLDR)
|
||||
{"es-CW", "es"},
|
||||
|
||||
// Inherit from pt-PT, instead of pt for these countries.
|
||||
{"pt-AO", "pt-PT"},
|
||||
{"pt-CV", "pt-PT"},
|
||||
{"pt-GW", "pt-PT"},
|
||||
{"pt-MO", "pt-PT"},
|
||||
{"pt-MZ", "pt-PT"},
|
||||
{"pt-ST", "pt-PT"},
|
||||
{"pt-TL", "pt-PT"},
|
||||
|
||||
{"en-GB-u-co-phonebk-rg-uszzzz", "en-GB"},
|
||||
{"en-GB-u-rg-uszzzz", "en-GB"},
|
||||
{"en-US-u-va-posix", "en-US"},
|
||||
|
||||
// Difference between language and regional tag.
|
||||
{"ca-ES-valencia", "ca-ES"},
|
||||
{"ca-ES-valencia-u-rg-ptzzzz", "ca-ES"}, // t.full != nil
|
||||
{"en-US-u-va-variant", "en-US"},
|
||||
{"en-u-va-variant", "en"}, // t.full != nil
|
||||
{"en-u-rg-gbzzzz", "en"},
|
||||
{"en-US-u-rg-gbzzzz", "en-US"},
|
||||
{"nl-US-u-rg-gbzzzz", "nl-US"}, // t.full != nil
|
||||
}
|
||||
for _, tt := range tests {
|
||||
tag := mustParse(tt.in)
|
||||
if p := mustParse(tt.out); p != tag.Parent() {
|
||||
t.Errorf("%s: was %v; want %v", tt.in, tag.Parent(), p)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,120 @@
|
||||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package compact
|
||||
|
||||
// parents maps a compact index of a tag to the compact index of the parent of
|
||||
// this tag.
|
||||
var parents = []ID{ // 775 elements
|
||||
// Entry 0 - 3F
|
||||
0x0000, 0x0000, 0x0001, 0x0001, 0x0000, 0x0004, 0x0000, 0x0006,
|
||||
0x0000, 0x0008, 0x0000, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
|
||||
0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
|
||||
0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
|
||||
0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x0000,
|
||||
0x0000, 0x0028, 0x0000, 0x002a, 0x0000, 0x002c, 0x0000, 0x0000,
|
||||
0x002f, 0x002e, 0x002e, 0x0000, 0x0033, 0x0000, 0x0035, 0x0000,
|
||||
0x0037, 0x0000, 0x0039, 0x0000, 0x003b, 0x0000, 0x0000, 0x003e,
|
||||
// Entry 40 - 7F
|
||||
0x0000, 0x0040, 0x0040, 0x0000, 0x0043, 0x0043, 0x0000, 0x0046,
|
||||
0x0000, 0x0048, 0x0000, 0x0000, 0x004b, 0x004a, 0x004a, 0x0000,
|
||||
0x004f, 0x004f, 0x004f, 0x004f, 0x0000, 0x0054, 0x0054, 0x0000,
|
||||
0x0057, 0x0000, 0x0059, 0x0000, 0x005b, 0x0000, 0x005d, 0x005d,
|
||||
0x0000, 0x0060, 0x0000, 0x0062, 0x0000, 0x0064, 0x0000, 0x0066,
|
||||
0x0066, 0x0000, 0x0069, 0x0000, 0x006b, 0x006b, 0x006b, 0x006b,
|
||||
0x006b, 0x006b, 0x006b, 0x0000, 0x0073, 0x0000, 0x0075, 0x0000,
|
||||
0x0077, 0x0000, 0x0000, 0x007a, 0x0000, 0x007c, 0x0000, 0x007e,
|
||||
// Entry 80 - BF
|
||||
0x0000, 0x0080, 0x0080, 0x0000, 0x0083, 0x0083, 0x0000, 0x0086,
|
||||
0x0087, 0x0087, 0x0087, 0x0086, 0x0088, 0x0087, 0x0087, 0x0087,
|
||||
0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0088,
|
||||
0x0087, 0x0087, 0x0087, 0x0087, 0x0088, 0x0087, 0x0088, 0x0087,
|
||||
0x0087, 0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||
0x0087, 0x0087, 0x0087, 0x0086, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0086, 0x0087, 0x0086,
|
||||
// Entry C0 - FF
|
||||
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||
0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||
0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0088, 0x0087,
|
||||
0x0087, 0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0086, 0x0086, 0x0087,
|
||||
0x0087, 0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0000,
|
||||
0x00ef, 0x0000, 0x00f1, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2,
|
||||
0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f1, 0x00f2, 0x00f1, 0x00f1,
|
||||
// Entry 100 - 13F
|
||||
0x00f2, 0x00f2, 0x00f1, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f1,
|
||||
0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x0000, 0x010e,
|
||||
0x0000, 0x0110, 0x0000, 0x0112, 0x0000, 0x0114, 0x0114, 0x0000,
|
||||
0x0117, 0x0117, 0x0117, 0x0117, 0x0000, 0x011c, 0x0000, 0x011e,
|
||||
0x0000, 0x0120, 0x0120, 0x0000, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||
// Entry 140 - 17F
|
||||
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||
0x0123, 0x0123, 0x0000, 0x0152, 0x0000, 0x0154, 0x0000, 0x0156,
|
||||
0x0000, 0x0158, 0x0000, 0x015a, 0x0000, 0x015c, 0x015c, 0x015c,
|
||||
0x0000, 0x0160, 0x0000, 0x0000, 0x0163, 0x0000, 0x0165, 0x0000,
|
||||
0x0167, 0x0167, 0x0167, 0x0000, 0x016b, 0x0000, 0x016d, 0x0000,
|
||||
0x016f, 0x0000, 0x0171, 0x0171, 0x0000, 0x0174, 0x0000, 0x0176,
|
||||
0x0000, 0x0178, 0x0000, 0x017a, 0x0000, 0x017c, 0x0000, 0x017e,
|
||||
// Entry 180 - 1BF
|
||||
0x0000, 0x0000, 0x0000, 0x0182, 0x0000, 0x0184, 0x0184, 0x0184,
|
||||
0x0184, 0x0000, 0x0000, 0x0000, 0x018b, 0x0000, 0x0000, 0x018e,
|
||||
0x0000, 0x0000, 0x0191, 0x0000, 0x0000, 0x0000, 0x0195, 0x0000,
|
||||
0x0197, 0x0000, 0x0000, 0x019a, 0x0000, 0x0000, 0x019d, 0x0000,
|
||||
0x019f, 0x0000, 0x01a1, 0x0000, 0x01a3, 0x0000, 0x01a5, 0x0000,
|
||||
0x01a7, 0x0000, 0x01a9, 0x0000, 0x01ab, 0x0000, 0x01ad, 0x0000,
|
||||
0x01af, 0x0000, 0x01b1, 0x01b1, 0x0000, 0x01b4, 0x0000, 0x01b6,
|
||||
0x0000, 0x01b8, 0x0000, 0x01ba, 0x0000, 0x01bc, 0x0000, 0x0000,
|
||||
// Entry 1C0 - 1FF
|
||||
0x01bf, 0x0000, 0x01c1, 0x0000, 0x01c3, 0x0000, 0x01c5, 0x0000,
|
||||
0x01c7, 0x0000, 0x01c9, 0x0000, 0x01cb, 0x01cb, 0x01cb, 0x01cb,
|
||||
0x0000, 0x01d0, 0x0000, 0x01d2, 0x01d2, 0x0000, 0x01d5, 0x0000,
|
||||
0x01d7, 0x0000, 0x01d9, 0x0000, 0x01db, 0x0000, 0x01dd, 0x0000,
|
||||
0x01df, 0x01df, 0x0000, 0x01e2, 0x0000, 0x01e4, 0x0000, 0x01e6,
|
||||
0x0000, 0x01e8, 0x0000, 0x01ea, 0x0000, 0x01ec, 0x0000, 0x01ee,
|
||||
0x0000, 0x01f0, 0x0000, 0x0000, 0x01f3, 0x0000, 0x01f5, 0x01f5,
|
||||
0x01f5, 0x0000, 0x01f9, 0x0000, 0x01fb, 0x0000, 0x01fd, 0x0000,
|
||||
// Entry 200 - 23F
|
||||
0x01ff, 0x0000, 0x0000, 0x0202, 0x0000, 0x0204, 0x0204, 0x0000,
|
||||
0x0207, 0x0000, 0x0209, 0x0209, 0x0000, 0x020c, 0x020c, 0x0000,
|
||||
0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x0000,
|
||||
0x0217, 0x0000, 0x0219, 0x0000, 0x021b, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0221, 0x0000, 0x0000, 0x0224, 0x0000, 0x0226,
|
||||
0x0226, 0x0000, 0x0229, 0x0000, 0x022b, 0x022b, 0x0000, 0x0000,
|
||||
0x022f, 0x022e, 0x022e, 0x0000, 0x0000, 0x0234, 0x0000, 0x0236,
|
||||
0x0000, 0x0238, 0x0000, 0x0244, 0x023a, 0x0244, 0x0244, 0x0244,
|
||||
// Entry 240 - 27F
|
||||
0x0244, 0x0244, 0x0244, 0x0244, 0x023a, 0x0244, 0x0244, 0x0000,
|
||||
0x0247, 0x0247, 0x0247, 0x0000, 0x024b, 0x0000, 0x024d, 0x0000,
|
||||
0x024f, 0x024f, 0x0000, 0x0252, 0x0000, 0x0254, 0x0254, 0x0254,
|
||||
0x0254, 0x0254, 0x0254, 0x0000, 0x025b, 0x0000, 0x025d, 0x0000,
|
||||
0x025f, 0x0000, 0x0261, 0x0000, 0x0263, 0x0000, 0x0265, 0x0000,
|
||||
0x0000, 0x0268, 0x0268, 0x0268, 0x0000, 0x026c, 0x0000, 0x026e,
|
||||
0x0000, 0x0270, 0x0000, 0x0000, 0x0000, 0x0274, 0x0273, 0x0273,
|
||||
0x0000, 0x0278, 0x0000, 0x027a, 0x0000, 0x027c, 0x0000, 0x0000,
|
||||
// Entry 280 - 2BF
|
||||
0x0000, 0x0000, 0x0281, 0x0000, 0x0000, 0x0284, 0x0000, 0x0286,
|
||||
0x0286, 0x0286, 0x0286, 0x0000, 0x028b, 0x028b, 0x028b, 0x0000,
|
||||
0x028f, 0x028f, 0x028f, 0x028f, 0x028f, 0x0000, 0x0295, 0x0295,
|
||||
0x0295, 0x0295, 0x0000, 0x0000, 0x0000, 0x0000, 0x029d, 0x029d,
|
||||
0x029d, 0x0000, 0x02a1, 0x02a1, 0x02a1, 0x02a1, 0x0000, 0x0000,
|
||||
0x02a7, 0x02a7, 0x02a7, 0x02a7, 0x0000, 0x02ac, 0x0000, 0x02ae,
|
||||
0x02ae, 0x0000, 0x02b1, 0x0000, 0x02b3, 0x0000, 0x02b5, 0x02b5,
|
||||
0x0000, 0x0000, 0x02b9, 0x0000, 0x0000, 0x0000, 0x02bd, 0x0000,
|
||||
// Entry 2C0 - 2FF
|
||||
0x02bf, 0x02bf, 0x0000, 0x0000, 0x02c3, 0x0000, 0x02c5, 0x0000,
|
||||
0x02c7, 0x0000, 0x02c9, 0x0000, 0x02cb, 0x0000, 0x02cd, 0x02cd,
|
||||
0x0000, 0x0000, 0x02d1, 0x0000, 0x02d3, 0x02d0, 0x02d0, 0x0000,
|
||||
0x0000, 0x02d8, 0x02d7, 0x02d7, 0x0000, 0x0000, 0x02dd, 0x0000,
|
||||
0x02df, 0x0000, 0x02e1, 0x0000, 0x0000, 0x02e4, 0x0000, 0x02e6,
|
||||
0x0000, 0x0000, 0x02e9, 0x0000, 0x02eb, 0x0000, 0x02ed, 0x0000,
|
||||
0x02ef, 0x02ef, 0x0000, 0x0000, 0x02f3, 0x02f2, 0x02f2, 0x0000,
|
||||
0x02f7, 0x0000, 0x02f9, 0x02f9, 0x02f9, 0x02f9, 0x02f9, 0x0000,
|
||||
// Entry 300 - 33F
|
||||
0x02ff, 0x0300, 0x02ff, 0x0000, 0x0303, 0x0051, 0x00e6,
|
||||
} // Size: 1574 bytes
|
||||
|
||||
// Total table size 1574 bytes (1KiB); checksum: 895AAF0B
|
||||
@@ -0,0 +1,201 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package compact
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
)
|
||||
|
||||
var errSyntax = language.ErrSyntax
|
||||
|
||||
type parseTest struct {
|
||||
i int // the index of this test
|
||||
in string
|
||||
lang, script, region string
|
||||
variants, ext string
|
||||
extList []string // only used when more than one extension is present
|
||||
invalid bool
|
||||
rewrite bool // special rewrite not handled by parseTag
|
||||
changed bool // string needed to be reformatted
|
||||
}
|
||||
|
||||
func parseTests() []parseTest {
|
||||
tests := []parseTest{
|
||||
{in: "root", lang: "und"},
|
||||
{in: "und", lang: "und"},
|
||||
{in: "en", lang: "en"},
|
||||
|
||||
{in: "en-US-u-va-posix", lang: "en", region: "US", ext: "u-va-posix"},
|
||||
{in: "ca-ES-valencia", lang: "ca", region: "ES", variants: "valencia"},
|
||||
{in: "en-US-u-rg-gbzzzz", lang: "en", region: "US", ext: "u-rg-gbzzzz"},
|
||||
|
||||
{in: "xy", lang: "und", invalid: true},
|
||||
{in: "en-ZY", lang: "en", invalid: true},
|
||||
{in: "gsw", lang: "gsw"},
|
||||
{in: "sr_Latn", lang: "sr", script: "Latn"},
|
||||
{in: "af-Arab", lang: "af", script: "Arab"},
|
||||
{in: "nl-BE", lang: "nl", region: "BE"},
|
||||
{in: "es-419", lang: "es", region: "419"},
|
||||
{in: "und-001", lang: "und", region: "001"},
|
||||
{in: "de-latn-be", lang: "de", script: "Latn", region: "BE"},
|
||||
// Variants
|
||||
{in: "de-1901", lang: "de", variants: "1901"},
|
||||
// Accept with unsuppressed script.
|
||||
{in: "de-Latn-1901", lang: "de", script: "Latn", variants: "1901"},
|
||||
// Specialized.
|
||||
{in: "sl-rozaj", lang: "sl", variants: "rozaj"},
|
||||
{in: "sl-rozaj-lipaw", lang: "sl", variants: "rozaj-lipaw"},
|
||||
{in: "sl-rozaj-biske", lang: "sl", variants: "rozaj-biske"},
|
||||
{in: "sl-rozaj-biske-1994", lang: "sl", variants: "rozaj-biske-1994"},
|
||||
{in: "sl-rozaj-1994", lang: "sl", variants: "rozaj-1994"},
|
||||
// Maximum number of variants while adhering to prefix rules.
|
||||
{in: "sl-rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp"},
|
||||
|
||||
// Sorting.
|
||||
{in: "sl-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true},
|
||||
{in: "sl-rozaj-biske-1994-alalc97-fonupa-fonipa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", changed: true},
|
||||
{in: "nl-fonxsamp-alalc97-fonipa-fonupa", lang: "nl", variants: "alalc97-fonipa-fonupa-fonxsamp", changed: true},
|
||||
|
||||
// Duplicates variants are removed, but not an error.
|
||||
{in: "nl-fonupa-fonupa", lang: "nl", variants: "fonupa"},
|
||||
|
||||
// Variants that do not have correct prefixes. We still accept these.
|
||||
{in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"},
|
||||
{in: "sl-rozaj-lipaw-1994", lang: "sl", variants: "rozaj-lipaw-1994"},
|
||||
{in: "sl-1994-biske-rozaj-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true},
|
||||
{in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"},
|
||||
|
||||
// Invalid variant.
|
||||
{in: "de-1902", lang: "de", variants: "", invalid: true},
|
||||
|
||||
{in: "EN_CYRL", lang: "en", script: "Cyrl"},
|
||||
// private use and extensions
|
||||
{in: "x-a-b-c-d", ext: "x-a-b-c-d"},
|
||||
{in: "x_A.-B-C_D", ext: "x-b-c-d", invalid: true, changed: true},
|
||||
{in: "x-aa-bbbb-cccccccc-d", ext: "x-aa-bbbb-cccccccc-d"},
|
||||
{in: "en-c_cc-b-bbb-a-aaa", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc"}},
|
||||
{in: "en-x_cc-b-bbb-a-aaa", lang: "en", ext: "x-cc-b-bbb-a-aaa", changed: true},
|
||||
{in: "en-c_cc-b-bbb-a-aaa-x-x", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc", "x-x"}},
|
||||
{in: "en-v-c", lang: "en", ext: "", invalid: true},
|
||||
{in: "en-v-abcdefghi", lang: "en", ext: "", invalid: true},
|
||||
{in: "en-v-abc-x", lang: "en", ext: "v-abc", invalid: true},
|
||||
{in: "en-v-abc-x-", lang: "en", ext: "v-abc", invalid: true},
|
||||
{in: "en-v-abc-w-x-xx", lang: "en", extList: []string{"v-abc", "x-xx"}, invalid: true, changed: true},
|
||||
{in: "en-v-abc-w-y-yx", lang: "en", extList: []string{"v-abc", "y-yx"}, invalid: true, changed: true},
|
||||
{in: "en-v-c-abc", lang: "en", ext: "c-abc", invalid: true, changed: true},
|
||||
{in: "en-v-w-abc", lang: "en", ext: "w-abc", invalid: true, changed: true},
|
||||
{in: "en-v-x-abc", lang: "en", ext: "x-abc", invalid: true, changed: true},
|
||||
{in: "en-v-x-a", lang: "en", ext: "x-a", invalid: true, changed: true},
|
||||
{in: "en-9-aa-0-aa-z-bb-x-a", lang: "en", extList: []string{"0-aa", "9-aa", "z-bb", "x-a"}, changed: true},
|
||||
{in: "en-u-c", lang: "en", ext: "", invalid: true},
|
||||
{in: "en-u-co-phonebk", lang: "en", ext: "u-co-phonebk"},
|
||||
{in: "en-u-co-phonebk-ca", lang: "en", ext: "u-co-phonebk", invalid: true},
|
||||
{in: "en-u-nu-arabic-co-phonebk-ca", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true},
|
||||
{in: "en-u-nu-arabic-co-phonebk-ca-x", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true},
|
||||
{in: "en-u-nu-arabic-co-phonebk-ca-s", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true},
|
||||
{in: "en-u-nu-arabic-co-phonebk-ca-a12345678", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true},
|
||||
{in: "en-u-co-phonebook", lang: "en", ext: "", invalid: true},
|
||||
{in: "en-u-co-phonebook-cu-xau", lang: "en", ext: "u-cu-xau", invalid: true, changed: true},
|
||||
{in: "en-Cyrl-u-co-phonebk", lang: "en", script: "Cyrl", ext: "u-co-phonebk"},
|
||||
{in: "en-US-u-co-phonebk", lang: "en", region: "US", ext: "u-co-phonebk"},
|
||||
{in: "en-US-u-co-phonebk-cu-xau", lang: "en", region: "US", ext: "u-co-phonebk-cu-xau"},
|
||||
{in: "en-scotland-u-co-phonebk", lang: "en", variants: "scotland", ext: "u-co-phonebk"},
|
||||
{in: "en-u-cu-xua-co-phonebk", lang: "en", ext: "u-co-phonebk-cu-xua", changed: true},
|
||||
{in: "en-u-def-abc-cu-xua-co-phonebk", lang: "en", ext: "u-abc-def-co-phonebk-cu-xua", changed: true},
|
||||
{in: "en-u-def-abc", lang: "en", ext: "u-abc-def", changed: true},
|
||||
{in: "en-u-cu-xua-co-phonebk-a-cd", lang: "en", extList: []string{"a-cd", "u-co-phonebk-cu-xua"}, changed: true},
|
||||
// Invalid "u" extension. Drop invalid parts.
|
||||
{in: "en-u-cu-co-phonebk", lang: "en", extList: []string{"u-co-phonebk"}, invalid: true, changed: true},
|
||||
{in: "en-u-cu-xau-co", lang: "en", extList: []string{"u-cu-xau"}, invalid: true},
|
||||
// We allow duplicate keys as the LDML spec does not explicitly prohibit it.
|
||||
// TODO: Consider eliminating duplicates and returning an error.
|
||||
{in: "en-u-cu-xau-co-phonebk-cu-xau", lang: "en", ext: "u-co-phonebk-cu-xau", changed: true},
|
||||
{in: "en-t-en-Cyrl-NL-fonipa", lang: "en", ext: "t-en-cyrl-nl-fonipa", changed: true},
|
||||
{in: "en-t-en-Cyrl-NL-fonipa-t0-abc-def", lang: "en", ext: "t-en-cyrl-nl-fonipa-t0-abc-def", changed: true},
|
||||
{in: "en-t-t0-abcd", lang: "en", ext: "t-t0-abcd"},
|
||||
// Not necessary to have changed here.
|
||||
{in: "en-t-nl-abcd", lang: "en", ext: "t-nl", invalid: true},
|
||||
{in: "en-t-nl-latn", lang: "en", ext: "t-nl-latn"},
|
||||
{in: "en-t-t0-abcd-x-a", lang: "en", extList: []string{"t-t0-abcd", "x-a"}},
|
||||
{in: "en_t_pt_MLt", lang: "en", ext: "t-pt-mlt", changed: true},
|
||||
{in: "en-t-fr-est", lang: "en", ext: "t-fr-est", changed: false},
|
||||
{in: "fr-est", lang: "et", changed: true},
|
||||
{in: "fr-est-t-fr-est", lang: "et", ext: "t-fr-est", changed: true},
|
||||
{in: "fr-est-Cyrl", lang: "et", script: "Cyrl", changed: true},
|
||||
// invalid
|
||||
{in: "", lang: "und", invalid: true},
|
||||
{in: "-", lang: "und", invalid: true},
|
||||
{in: "x", lang: "und", invalid: true},
|
||||
{in: "x-", lang: "und", invalid: true},
|
||||
{in: "x--", lang: "und", invalid: true},
|
||||
{in: "a-a-b-c-d", lang: "und", invalid: true},
|
||||
{in: "en-", lang: "en", invalid: true},
|
||||
{in: "enne-", lang: "und", invalid: true},
|
||||
{in: "en.", lang: "und", invalid: true},
|
||||
{in: "en.-latn", lang: "und", invalid: true},
|
||||
{in: "en.-en", lang: "en", invalid: true},
|
||||
{in: "x-a-tooManyChars-c-d", ext: "x-a-c-d", invalid: true, changed: true},
|
||||
{in: "a-tooManyChars-c-d", lang: "und", invalid: true},
|
||||
// TODO: check key-value validity
|
||||
// { in: "en-u-cu-xd", lang: "en", ext: "u-cu-xd", invalid: true },
|
||||
{in: "en-t-abcd", lang: "en", invalid: true},
|
||||
{in: "en-Latn-US-en", lang: "en", script: "Latn", region: "US", invalid: true},
|
||||
// rewrites (more tests in TestGrandfathered)
|
||||
{in: "zh-min-nan", lang: "nan"},
|
||||
{in: "zh-yue", lang: "yue"},
|
||||
{in: "zh-xiang", lang: "hsn", rewrite: true},
|
||||
{in: "zh-guoyu", lang: "cmn", rewrite: true},
|
||||
{in: "iw", lang: "iw"},
|
||||
{in: "sgn-BE-FR", lang: "sfb", rewrite: true},
|
||||
{in: "i-klingon", lang: "tlh", rewrite: true},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
tests[i].i = i
|
||||
if tt.extList != nil {
|
||||
tests[i].ext = strings.Join(tt.extList, "-")
|
||||
}
|
||||
if tt.ext != "" && tt.extList == nil {
|
||||
tests[i].extList = []string{tt.ext}
|
||||
}
|
||||
}
|
||||
return tests
|
||||
}
|
||||
|
||||
// partChecks runs checks for each part by calling the function returned by f.
|
||||
func partChecks(t *testing.T, f func(*parseTest) (Tag, bool)) {
|
||||
for i, tt := range parseTests() {
|
||||
tag, skip := f(&tt)
|
||||
if skip {
|
||||
continue
|
||||
}
|
||||
if l, _ := language.ParseBase(tt.lang); l != tag.Tag().LangID {
|
||||
t.Errorf("%d: lang was %q; want %q", i, tag.Tag().LangID, l)
|
||||
}
|
||||
if sc, _ := language.ParseScript(tt.script); sc != tag.Tag().ScriptID {
|
||||
t.Errorf("%d: script was %q; want %q", i, tag.Tag().ScriptID, sc)
|
||||
}
|
||||
if r, _ := language.ParseRegion(tt.region); r != tag.Tag().RegionID {
|
||||
t.Errorf("%d: region was %q; want %q", i, tag.Tag().RegionID, r)
|
||||
}
|
||||
v := tag.Tag().Variants()
|
||||
if v != "" {
|
||||
v = v[1:]
|
||||
}
|
||||
if v != tt.variants {
|
||||
t.Errorf("%d: variants was %q; want %q", i, v, tt.variants)
|
||||
}
|
||||
if e := strings.Join(tag.Tag().Extensions(), "-"); e != tt.ext {
|
||||
t.Errorf("%d: extensions were %q; want %q", i, e, tt.ext)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func mk(s string) Tag {
|
||||
tag, _ := language.Parse(s)
|
||||
return Make(tag)
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,91 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package compact
|
||||
|
||||
var (
|
||||
und = Tag{}
|
||||
|
||||
Und Tag = Tag{}
|
||||
|
||||
Afrikaans Tag = Tag{language: afIndex, locale: afIndex}
|
||||
Amharic Tag = Tag{language: amIndex, locale: amIndex}
|
||||
Arabic Tag = Tag{language: arIndex, locale: arIndex}
|
||||
ModernStandardArabic Tag = Tag{language: ar001Index, locale: ar001Index}
|
||||
Azerbaijani Tag = Tag{language: azIndex, locale: azIndex}
|
||||
Bulgarian Tag = Tag{language: bgIndex, locale: bgIndex}
|
||||
Bengali Tag = Tag{language: bnIndex, locale: bnIndex}
|
||||
Catalan Tag = Tag{language: caIndex, locale: caIndex}
|
||||
Czech Tag = Tag{language: csIndex, locale: csIndex}
|
||||
Danish Tag = Tag{language: daIndex, locale: daIndex}
|
||||
German Tag = Tag{language: deIndex, locale: deIndex}
|
||||
Greek Tag = Tag{language: elIndex, locale: elIndex}
|
||||
English Tag = Tag{language: enIndex, locale: enIndex}
|
||||
AmericanEnglish Tag = Tag{language: enUSIndex, locale: enUSIndex}
|
||||
BritishEnglish Tag = Tag{language: enGBIndex, locale: enGBIndex}
|
||||
Spanish Tag = Tag{language: esIndex, locale: esIndex}
|
||||
EuropeanSpanish Tag = Tag{language: esESIndex, locale: esESIndex}
|
||||
LatinAmericanSpanish Tag = Tag{language: es419Index, locale: es419Index}
|
||||
Estonian Tag = Tag{language: etIndex, locale: etIndex}
|
||||
Persian Tag = Tag{language: faIndex, locale: faIndex}
|
||||
Finnish Tag = Tag{language: fiIndex, locale: fiIndex}
|
||||
Filipino Tag = Tag{language: filIndex, locale: filIndex}
|
||||
French Tag = Tag{language: frIndex, locale: frIndex}
|
||||
CanadianFrench Tag = Tag{language: frCAIndex, locale: frCAIndex}
|
||||
Gujarati Tag = Tag{language: guIndex, locale: guIndex}
|
||||
Hebrew Tag = Tag{language: heIndex, locale: heIndex}
|
||||
Hindi Tag = Tag{language: hiIndex, locale: hiIndex}
|
||||
Croatian Tag = Tag{language: hrIndex, locale: hrIndex}
|
||||
Hungarian Tag = Tag{language: huIndex, locale: huIndex}
|
||||
Armenian Tag = Tag{language: hyIndex, locale: hyIndex}
|
||||
Indonesian Tag = Tag{language: idIndex, locale: idIndex}
|
||||
Icelandic Tag = Tag{language: isIndex, locale: isIndex}
|
||||
Italian Tag = Tag{language: itIndex, locale: itIndex}
|
||||
Japanese Tag = Tag{language: jaIndex, locale: jaIndex}
|
||||
Georgian Tag = Tag{language: kaIndex, locale: kaIndex}
|
||||
Kazakh Tag = Tag{language: kkIndex, locale: kkIndex}
|
||||
Khmer Tag = Tag{language: kmIndex, locale: kmIndex}
|
||||
Kannada Tag = Tag{language: knIndex, locale: knIndex}
|
||||
Korean Tag = Tag{language: koIndex, locale: koIndex}
|
||||
Kirghiz Tag = Tag{language: kyIndex, locale: kyIndex}
|
||||
Lao Tag = Tag{language: loIndex, locale: loIndex}
|
||||
Lithuanian Tag = Tag{language: ltIndex, locale: ltIndex}
|
||||
Latvian Tag = Tag{language: lvIndex, locale: lvIndex}
|
||||
Macedonian Tag = Tag{language: mkIndex, locale: mkIndex}
|
||||
Malayalam Tag = Tag{language: mlIndex, locale: mlIndex}
|
||||
Mongolian Tag = Tag{language: mnIndex, locale: mnIndex}
|
||||
Marathi Tag = Tag{language: mrIndex, locale: mrIndex}
|
||||
Malay Tag = Tag{language: msIndex, locale: msIndex}
|
||||
Burmese Tag = Tag{language: myIndex, locale: myIndex}
|
||||
Nepali Tag = Tag{language: neIndex, locale: neIndex}
|
||||
Dutch Tag = Tag{language: nlIndex, locale: nlIndex}
|
||||
Norwegian Tag = Tag{language: noIndex, locale: noIndex}
|
||||
Punjabi Tag = Tag{language: paIndex, locale: paIndex}
|
||||
Polish Tag = Tag{language: plIndex, locale: plIndex}
|
||||
Portuguese Tag = Tag{language: ptIndex, locale: ptIndex}
|
||||
BrazilianPortuguese Tag = Tag{language: ptBRIndex, locale: ptBRIndex}
|
||||
EuropeanPortuguese Tag = Tag{language: ptPTIndex, locale: ptPTIndex}
|
||||
Romanian Tag = Tag{language: roIndex, locale: roIndex}
|
||||
Russian Tag = Tag{language: ruIndex, locale: ruIndex}
|
||||
Sinhala Tag = Tag{language: siIndex, locale: siIndex}
|
||||
Slovak Tag = Tag{language: skIndex, locale: skIndex}
|
||||
Slovenian Tag = Tag{language: slIndex, locale: slIndex}
|
||||
Albanian Tag = Tag{language: sqIndex, locale: sqIndex}
|
||||
Serbian Tag = Tag{language: srIndex, locale: srIndex}
|
||||
SerbianLatin Tag = Tag{language: srLatnIndex, locale: srLatnIndex}
|
||||
Swedish Tag = Tag{language: svIndex, locale: svIndex}
|
||||
Swahili Tag = Tag{language: swIndex, locale: swIndex}
|
||||
Tamil Tag = Tag{language: taIndex, locale: taIndex}
|
||||
Telugu Tag = Tag{language: teIndex, locale: teIndex}
|
||||
Thai Tag = Tag{language: thIndex, locale: thIndex}
|
||||
Turkish Tag = Tag{language: trIndex, locale: trIndex}
|
||||
Ukrainian Tag = Tag{language: ukIndex, locale: ukIndex}
|
||||
Urdu Tag = Tag{language: urIndex, locale: urIndex}
|
||||
Uzbek Tag = Tag{language: uzIndex, locale: uzIndex}
|
||||
Vietnamese Tag = Tag{language: viIndex, locale: viIndex}
|
||||
Chinese Tag = Tag{language: zhIndex, locale: zhIndex}
|
||||
SimplifiedChinese Tag = Tag{language: zhHansIndex, locale: zhHansIndex}
|
||||
TraditionalChinese Tag = Tag{language: zhHantIndex, locale: zhHantIndex}
|
||||
Zulu Tag = Tag{language: zuIndex, locale: zuIndex}
|
||||
)
|
||||
@@ -0,0 +1,167 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// A Builder allows constructing a Tag from individual components.
|
||||
// Its main user is Compose in the top-level language package.
|
||||
type Builder struct {
|
||||
Tag Tag
|
||||
|
||||
private string // the x extension
|
||||
variants []string
|
||||
extensions []string
|
||||
}
|
||||
|
||||
// Make returns a new Tag from the current settings.
|
||||
func (b *Builder) Make() Tag {
|
||||
t := b.Tag
|
||||
|
||||
if len(b.extensions) > 0 || len(b.variants) > 0 {
|
||||
sort.Sort(sortVariants(b.variants))
|
||||
sort.Strings(b.extensions)
|
||||
|
||||
if b.private != "" {
|
||||
b.extensions = append(b.extensions, b.private)
|
||||
}
|
||||
n := maxCoreSize + tokenLen(b.variants...) + tokenLen(b.extensions...)
|
||||
buf := make([]byte, n)
|
||||
p := t.genCoreBytes(buf)
|
||||
t.pVariant = byte(p)
|
||||
p += appendTokens(buf[p:], b.variants...)
|
||||
t.pExt = uint16(p)
|
||||
p += appendTokens(buf[p:], b.extensions...)
|
||||
t.str = string(buf[:p])
|
||||
// We may not always need to remake the string, but when or when not
|
||||
// to do so is rather tricky.
|
||||
scan := makeScanner(buf[:p])
|
||||
t, _ = parse(&scan, "")
|
||||
return t
|
||||
|
||||
} else if b.private != "" {
|
||||
t.str = b.private
|
||||
t.RemakeString()
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// SetTag copies all the settings from a given Tag. Any previously set values
|
||||
// are discarded.
|
||||
func (b *Builder) SetTag(t Tag) {
|
||||
b.Tag.LangID = t.LangID
|
||||
b.Tag.RegionID = t.RegionID
|
||||
b.Tag.ScriptID = t.ScriptID
|
||||
// TODO: optimize
|
||||
b.variants = b.variants[:0]
|
||||
if variants := t.Variants(); variants != "" {
|
||||
for _, vr := range strings.Split(variants[1:], "-") {
|
||||
b.variants = append(b.variants, vr)
|
||||
}
|
||||
}
|
||||
b.extensions, b.private = b.extensions[:0], ""
|
||||
for _, e := range t.Extensions() {
|
||||
b.AddExt(e)
|
||||
}
|
||||
}
|
||||
|
||||
// AddExt adds extension e to the tag. e must be a valid extension as returned
|
||||
// by Tag.Extension. If the extension already exists, it will be discarded,
|
||||
// except for a -u extension, where non-existing key-type pairs will added.
|
||||
func (b *Builder) AddExt(e string) {
|
||||
if e[0] == 'x' {
|
||||
if b.private == "" {
|
||||
b.private = e
|
||||
}
|
||||
return
|
||||
}
|
||||
for i, s := range b.extensions {
|
||||
if s[0] == e[0] {
|
||||
if e[0] == 'u' {
|
||||
b.extensions[i] += e[1:]
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
b.extensions = append(b.extensions, e)
|
||||
}
|
||||
|
||||
// SetExt sets the extension e to the tag. e must be a valid extension as
|
||||
// returned by Tag.Extension. If the extension already exists, it will be
|
||||
// overwritten, except for a -u extension, where the individual key-type pairs
|
||||
// will be set.
|
||||
func (b *Builder) SetExt(e string) {
|
||||
if e[0] == 'x' {
|
||||
b.private = e
|
||||
return
|
||||
}
|
||||
for i, s := range b.extensions {
|
||||
if s[0] == e[0] {
|
||||
if e[0] == 'u' {
|
||||
b.extensions[i] = e + s[1:]
|
||||
} else {
|
||||
b.extensions[i] = e
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
b.extensions = append(b.extensions, e)
|
||||
}
|
||||
|
||||
// AddVariant adds any number of variants.
|
||||
func (b *Builder) AddVariant(v ...string) {
|
||||
for _, v := range v {
|
||||
if v != "" {
|
||||
b.variants = append(b.variants, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ClearVariants removes any variants previously added, including those
|
||||
// copied from a Tag in SetTag.
|
||||
func (b *Builder) ClearVariants() {
|
||||
b.variants = b.variants[:0]
|
||||
}
|
||||
|
||||
// ClearExtensions removes any extensions previously added, including those
|
||||
// copied from a Tag in SetTag.
|
||||
func (b *Builder) ClearExtensions() {
|
||||
b.private = ""
|
||||
b.extensions = b.extensions[:0]
|
||||
}
|
||||
|
||||
func tokenLen(token ...string) (n int) {
|
||||
for _, t := range token {
|
||||
n += len(t) + 1
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func appendTokens(b []byte, token ...string) int {
|
||||
p := 0
|
||||
for _, t := range token {
|
||||
b[p] = '-'
|
||||
copy(b[p+1:], t)
|
||||
p += 1 + len(t)
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
type sortVariants []string
|
||||
|
||||
func (s sortVariants) Len() int {
|
||||
return len(s)
|
||||
}
|
||||
|
||||
func (s sortVariants) Swap(i, j int) {
|
||||
s[j], s[i] = s[i], s[j]
|
||||
}
|
||||
|
||||
func (s sortVariants) Less(i, j int) bool {
|
||||
return variantIndex[s[i]] < variantIndex[s[j]]
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func parseBase(s string) Language {
|
||||
if s == "" {
|
||||
return 0
|
||||
}
|
||||
return MustParseBase(s)
|
||||
}
|
||||
|
||||
func parseScript(s string) Script {
|
||||
if s == "" {
|
||||
return 0
|
||||
}
|
||||
return MustParseScript(s)
|
||||
}
|
||||
|
||||
func parseRegion(s string) Region {
|
||||
if s == "" {
|
||||
return 0
|
||||
}
|
||||
return MustParseRegion(s)
|
||||
}
|
||||
|
||||
func TestBuilder(t *testing.T) {
|
||||
partChecks(t, func(t *testing.T, tt *parseTest) (id Tag, skip bool) {
|
||||
tag := Make(tt.in)
|
||||
b := Builder{}
|
||||
b.SetTag(Tag{
|
||||
LangID: parseBase(tt.lang),
|
||||
ScriptID: parseScript(tt.script),
|
||||
RegionID: parseRegion(tt.region),
|
||||
})
|
||||
if tt.variants != "" {
|
||||
b.AddVariant(strings.Split(tt.variants, "-")...)
|
||||
}
|
||||
for _, e := range tag.Extensions() {
|
||||
b.AddExt(e)
|
||||
}
|
||||
got := b.Make()
|
||||
if got != tag {
|
||||
t.Errorf("%s: got %v; want %v", tt.in, got, tag)
|
||||
}
|
||||
return got, false
|
||||
})
|
||||
}
|
||||
|
||||
func TestSetTag(t *testing.T) {
|
||||
partChecks(t, func(t *testing.T, tt *parseTest) (id Tag, skip bool) {
|
||||
tag := Make(tt.in)
|
||||
b := Builder{}
|
||||
b.SetTag(tag)
|
||||
got := b.Make()
|
||||
if got != tag {
|
||||
t.Errorf("%s: got %v; want %v", tt.in, got, tag)
|
||||
}
|
||||
return got, false
|
||||
})
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
// BaseLanguages returns the list of all supported base languages. It generates
|
||||
// the list by traversing the internal structures.
|
||||
func BaseLanguages() []Language {
|
||||
base := make([]Language, 0, NumLanguages)
|
||||
for i := 0; i < langNoIndexOffset; i++ {
|
||||
// We included "und" already for the value 0.
|
||||
if i != nonCanonicalUnd {
|
||||
base = append(base, Language(i))
|
||||
}
|
||||
}
|
||||
i := langNoIndexOffset
|
||||
for _, v := range langNoIndex {
|
||||
for k := 0; k < 8; k++ {
|
||||
if v&1 == 1 {
|
||||
base = append(base, Language(i))
|
||||
}
|
||||
v >>= 1
|
||||
i++
|
||||
}
|
||||
}
|
||||
return base
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,20 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This file contains code common to the maketables.go and the package code.
|
||||
|
||||
// AliasType is the type of an alias in AliasMap.
|
||||
type AliasType int8
|
||||
|
||||
const (
|
||||
Deprecated AliasType = iota
|
||||
Macro
|
||||
Legacy
|
||||
|
||||
AliasTypeUnknown AliasType = -1
|
||||
)
|
||||
@@ -0,0 +1,627 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gen.go gen_common.go -output tables.go
|
||||
|
||||
package language // import "golang.org/x/text/internal/language"
|
||||
|
||||
// TODO: Remove above NOTE after:
|
||||
// - verifying that tables are dropped correctly (most notably matcher tables).
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
// maxCoreSize is the maximum size of a BCP 47 tag without variants and
|
||||
// extensions. Equals max lang (3) + script (4) + max reg (3) + 2 dashes.
|
||||
maxCoreSize = 12
|
||||
|
||||
// max99thPercentileSize is a somewhat arbitrary buffer size that presumably
|
||||
// is large enough to hold at least 99% of the BCP 47 tags.
|
||||
max99thPercentileSize = 32
|
||||
|
||||
// maxSimpleUExtensionSize is the maximum size of a -u extension with one
|
||||
// key-type pair. Equals len("-u-") + key (2) + dash + max value (8).
|
||||
maxSimpleUExtensionSize = 14
|
||||
)
|
||||
|
||||
// Tag represents a BCP 47 language tag. It is used to specify an instance of a
|
||||
// specific language or locale. All language tag values are guaranteed to be
|
||||
// well-formed. The zero value of Tag is Und.
|
||||
type Tag struct {
|
||||
// TODO: the following fields have the form TagTypeID. This name is chosen
|
||||
// to allow refactoring the public package without conflicting with its
|
||||
// Base, Script, and Region methods. Once the transition is fully completed
|
||||
// the ID can be stripped from the name.
|
||||
|
||||
LangID Language
|
||||
RegionID Region
|
||||
// TODO: we will soon run out of positions for ScriptID. Idea: instead of
|
||||
// storing lang, region, and ScriptID codes, store only the compact index and
|
||||
// have a lookup table from this code to its expansion. This greatly speeds
|
||||
// up table lookup, speed up common variant cases.
|
||||
// This will also immediately free up 3 extra bytes. Also, the pVariant
|
||||
// field can now be moved to the lookup table, as the compact index uniquely
|
||||
// determines the offset of a possible variant.
|
||||
ScriptID Script
|
||||
pVariant byte // offset in str, includes preceding '-'
|
||||
pExt uint16 // offset of first extension, includes preceding '-'
|
||||
|
||||
// str is the string representation of the Tag. It will only be used if the
|
||||
// tag has variants or extensions.
|
||||
str string
|
||||
}
|
||||
|
||||
// Make is a convenience wrapper for Parse that omits the error.
|
||||
// In case of an error, a sensible default is returned.
|
||||
func Make(s string) Tag {
|
||||
t, _ := Parse(s)
|
||||
return t
|
||||
}
|
||||
|
||||
// Raw returns the raw base language, script and region, without making an
|
||||
// attempt to infer their values.
|
||||
// TODO: consider removing
|
||||
func (t Tag) Raw() (b Language, s Script, r Region) {
|
||||
return t.LangID, t.ScriptID, t.RegionID
|
||||
}
|
||||
|
||||
// equalTags compares language, script and region subtags only.
|
||||
func (t Tag) equalTags(a Tag) bool {
|
||||
return t.LangID == a.LangID && t.ScriptID == a.ScriptID && t.RegionID == a.RegionID
|
||||
}
|
||||
|
||||
// IsRoot returns true if t is equal to language "und".
|
||||
func (t Tag) IsRoot() bool {
|
||||
if int(t.pVariant) < len(t.str) {
|
||||
return false
|
||||
}
|
||||
return t.equalTags(Und)
|
||||
}
|
||||
|
||||
// IsPrivateUse reports whether the Tag consists solely of an IsPrivateUse use
|
||||
// tag.
|
||||
func (t Tag) IsPrivateUse() bool {
|
||||
return t.str != "" && t.pVariant == 0
|
||||
}
|
||||
|
||||
// RemakeString is used to update t.str in case lang, script or region changed.
|
||||
// It is assumed that pExt and pVariant still point to the start of the
|
||||
// respective parts.
|
||||
func (t *Tag) RemakeString() {
|
||||
if t.str == "" {
|
||||
return
|
||||
}
|
||||
extra := t.str[t.pVariant:]
|
||||
if t.pVariant > 0 {
|
||||
extra = extra[1:]
|
||||
}
|
||||
if t.equalTags(Und) && strings.HasPrefix(extra, "x-") {
|
||||
t.str = extra
|
||||
t.pVariant = 0
|
||||
t.pExt = 0
|
||||
return
|
||||
}
|
||||
var buf [max99thPercentileSize]byte // avoid extra memory allocation in most cases.
|
||||
b := buf[:t.genCoreBytes(buf[:])]
|
||||
if extra != "" {
|
||||
diff := len(b) - int(t.pVariant)
|
||||
b = append(b, '-')
|
||||
b = append(b, extra...)
|
||||
t.pVariant = uint8(int(t.pVariant) + diff)
|
||||
t.pExt = uint16(int(t.pExt) + diff)
|
||||
} else {
|
||||
t.pVariant = uint8(len(b))
|
||||
t.pExt = uint16(len(b))
|
||||
}
|
||||
t.str = string(b)
|
||||
}
|
||||
|
||||
// genCoreBytes writes a string for the base languages, script and region tags
|
||||
// to the given buffer and returns the number of bytes written. It will never
|
||||
// write more than maxCoreSize bytes.
|
||||
func (t *Tag) genCoreBytes(buf []byte) int {
|
||||
n := t.LangID.StringToBuf(buf[:])
|
||||
if t.ScriptID != 0 {
|
||||
n += copy(buf[n:], "-")
|
||||
n += copy(buf[n:], t.ScriptID.String())
|
||||
}
|
||||
if t.RegionID != 0 {
|
||||
n += copy(buf[n:], "-")
|
||||
n += copy(buf[n:], t.RegionID.String())
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// String returns the canonical string representation of the language tag.
|
||||
func (t Tag) String() string {
|
||||
if t.str != "" {
|
||||
return t.str
|
||||
}
|
||||
if t.ScriptID == 0 && t.RegionID == 0 {
|
||||
return t.LangID.String()
|
||||
}
|
||||
buf := [maxCoreSize]byte{}
|
||||
return string(buf[:t.genCoreBytes(buf[:])])
|
||||
}
|
||||
|
||||
// MarshalText implements encoding.TextMarshaler.
|
||||
func (t Tag) MarshalText() (text []byte, err error) {
|
||||
if t.str != "" {
|
||||
text = append(text, t.str...)
|
||||
} else if t.ScriptID == 0 && t.RegionID == 0 {
|
||||
text = append(text, t.LangID.String()...)
|
||||
} else {
|
||||
buf := [maxCoreSize]byte{}
|
||||
text = buf[:t.genCoreBytes(buf[:])]
|
||||
}
|
||||
return text, nil
|
||||
}
|
||||
|
||||
// UnmarshalText implements encoding.TextUnmarshaler.
|
||||
func (t *Tag) UnmarshalText(text []byte) error {
|
||||
tag, err := Parse(string(text))
|
||||
*t = tag
|
||||
return err
|
||||
}
|
||||
|
||||
// Variants returns the part of the tag holding all variants or the empty string
|
||||
// if there are no variants defined.
|
||||
func (t Tag) Variants() string {
|
||||
if t.pVariant == 0 {
|
||||
return ""
|
||||
}
|
||||
return t.str[t.pVariant:t.pExt]
|
||||
}
|
||||
|
||||
// VariantOrPrivateUseTags returns variants or private use tags.
|
||||
func (t Tag) VariantOrPrivateUseTags() string {
|
||||
if t.pExt > 0 {
|
||||
return t.str[t.pVariant:t.pExt]
|
||||
}
|
||||
return t.str[t.pVariant:]
|
||||
}
|
||||
|
||||
// HasString reports whether this tag defines more than just the raw
|
||||
// components.
|
||||
func (t Tag) HasString() bool {
|
||||
return t.str != ""
|
||||
}
|
||||
|
||||
// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
|
||||
// specific language are substituted with fields from the parent language.
|
||||
// The parent for a language may change for newer versions of CLDR.
|
||||
func (t Tag) Parent() Tag {
|
||||
if t.str != "" {
|
||||
// Strip the variants and extensions.
|
||||
b, s, r := t.Raw()
|
||||
t = Tag{LangID: b, ScriptID: s, RegionID: r}
|
||||
if t.RegionID == 0 && t.ScriptID != 0 && t.LangID != 0 {
|
||||
base, _ := addTags(Tag{LangID: t.LangID})
|
||||
if base.ScriptID == t.ScriptID {
|
||||
return Tag{LangID: t.LangID}
|
||||
}
|
||||
}
|
||||
return t
|
||||
}
|
||||
if t.LangID != 0 {
|
||||
if t.RegionID != 0 {
|
||||
maxScript := t.ScriptID
|
||||
if maxScript == 0 {
|
||||
max, _ := addTags(t)
|
||||
maxScript = max.ScriptID
|
||||
}
|
||||
|
||||
for i := range parents {
|
||||
if Language(parents[i].lang) == t.LangID && Script(parents[i].maxScript) == maxScript {
|
||||
for _, r := range parents[i].fromRegion {
|
||||
if Region(r) == t.RegionID {
|
||||
return Tag{
|
||||
LangID: t.LangID,
|
||||
ScriptID: Script(parents[i].script),
|
||||
RegionID: Region(parents[i].toRegion),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Strip the script if it is the default one.
|
||||
base, _ := addTags(Tag{LangID: t.LangID})
|
||||
if base.ScriptID != maxScript {
|
||||
return Tag{LangID: t.LangID, ScriptID: maxScript}
|
||||
}
|
||||
return Tag{LangID: t.LangID}
|
||||
} else if t.ScriptID != 0 {
|
||||
// The parent for an base-script pair with a non-default script is
|
||||
// "und" instead of the base language.
|
||||
base, _ := addTags(Tag{LangID: t.LangID})
|
||||
if base.ScriptID != t.ScriptID {
|
||||
return Und
|
||||
}
|
||||
return Tag{LangID: t.LangID}
|
||||
}
|
||||
}
|
||||
return Und
|
||||
}
|
||||
|
||||
// ParseExtension parses s as an extension and returns it on success.
|
||||
func ParseExtension(s string) (ext string, err error) {
|
||||
defer func() {
|
||||
if recover() != nil {
|
||||
ext = ""
|
||||
err = ErrSyntax
|
||||
}
|
||||
}()
|
||||
|
||||
scan := makeScannerString(s)
|
||||
var end int
|
||||
if n := len(scan.token); n != 1 {
|
||||
return "", ErrSyntax
|
||||
}
|
||||
scan.toLower(0, len(scan.b))
|
||||
end = parseExtension(&scan)
|
||||
if end != len(s) {
|
||||
return "", ErrSyntax
|
||||
}
|
||||
return string(scan.b), nil
|
||||
}
|
||||
|
||||
// HasVariants reports whether t has variants.
|
||||
func (t Tag) HasVariants() bool {
|
||||
return uint16(t.pVariant) < t.pExt
|
||||
}
|
||||
|
||||
// HasExtensions reports whether t has extensions.
|
||||
func (t Tag) HasExtensions() bool {
|
||||
return int(t.pExt) < len(t.str)
|
||||
}
|
||||
|
||||
// Extension returns the extension of type x for tag t. It will return
|
||||
// false for ok if t does not have the requested extension. The returned
|
||||
// extension will be invalid in this case.
|
||||
func (t Tag) Extension(x byte) (ext string, ok bool) {
|
||||
for i := int(t.pExt); i < len(t.str)-1; {
|
||||
var ext string
|
||||
i, ext = getExtension(t.str, i)
|
||||
if ext[0] == x {
|
||||
return ext, true
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
// Extensions returns all extensions of t.
|
||||
func (t Tag) Extensions() []string {
|
||||
e := []string{}
|
||||
for i := int(t.pExt); i < len(t.str)-1; {
|
||||
var ext string
|
||||
i, ext = getExtension(t.str, i)
|
||||
e = append(e, ext)
|
||||
}
|
||||
return e
|
||||
}
|
||||
|
||||
// TypeForKey returns the type associated with the given key, where key and type
|
||||
// are of the allowed values defined for the Unicode locale extension ('u') in
|
||||
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
// TypeForKey will traverse the inheritance chain to get the correct value.
|
||||
//
|
||||
// If there are multiple types associated with a key, only the first will be
|
||||
// returned. If there is no type associated with a key, it returns the empty
|
||||
// string.
|
||||
func (t Tag) TypeForKey(key string) string {
|
||||
if _, start, end, _ := t.findTypeForKey(key); end != start {
|
||||
s := t.str[start:end]
|
||||
if p := strings.IndexByte(s, '-'); p >= 0 {
|
||||
s = s[:p]
|
||||
}
|
||||
return s
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
var (
|
||||
errPrivateUse = errors.New("cannot set a key on a private use tag")
|
||||
errInvalidArguments = errors.New("invalid key or type")
|
||||
)
|
||||
|
||||
// SetTypeForKey returns a new Tag with the key set to type, where key and type
|
||||
// are of the allowed values defined for the Unicode locale extension ('u') in
|
||||
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
// An empty value removes an existing pair with the same key.
|
||||
func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
|
||||
if t.IsPrivateUse() {
|
||||
return t, errPrivateUse
|
||||
}
|
||||
if len(key) != 2 {
|
||||
return t, errInvalidArguments
|
||||
}
|
||||
|
||||
// Remove the setting if value is "".
|
||||
if value == "" {
|
||||
start, sep, end, _ := t.findTypeForKey(key)
|
||||
if start != sep {
|
||||
// Remove a possible empty extension.
|
||||
switch {
|
||||
case t.str[start-2] != '-': // has previous elements.
|
||||
case end == len(t.str), // end of string
|
||||
end+2 < len(t.str) && t.str[end+2] == '-': // end of extension
|
||||
start -= 2
|
||||
}
|
||||
if start == int(t.pVariant) && end == len(t.str) {
|
||||
t.str = ""
|
||||
t.pVariant, t.pExt = 0, 0
|
||||
} else {
|
||||
t.str = fmt.Sprintf("%s%s", t.str[:start], t.str[end:])
|
||||
}
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
|
||||
if len(value) < 3 || len(value) > 8 {
|
||||
return t, errInvalidArguments
|
||||
}
|
||||
|
||||
var (
|
||||
buf [maxCoreSize + maxSimpleUExtensionSize]byte
|
||||
uStart int // start of the -u extension.
|
||||
)
|
||||
|
||||
// Generate the tag string if needed.
|
||||
if t.str == "" {
|
||||
uStart = t.genCoreBytes(buf[:])
|
||||
buf[uStart] = '-'
|
||||
uStart++
|
||||
}
|
||||
|
||||
// Create new key-type pair and parse it to verify.
|
||||
b := buf[uStart:]
|
||||
copy(b, "u-")
|
||||
copy(b[2:], key)
|
||||
b[4] = '-'
|
||||
b = b[:5+copy(b[5:], value)]
|
||||
scan := makeScanner(b)
|
||||
if parseExtensions(&scan); scan.err != nil {
|
||||
return t, scan.err
|
||||
}
|
||||
|
||||
// Assemble the replacement string.
|
||||
if t.str == "" {
|
||||
t.pVariant, t.pExt = byte(uStart-1), uint16(uStart-1)
|
||||
t.str = string(buf[:uStart+len(b)])
|
||||
} else {
|
||||
s := t.str
|
||||
start, sep, end, hasExt := t.findTypeForKey(key)
|
||||
if start == sep {
|
||||
if hasExt {
|
||||
b = b[2:]
|
||||
}
|
||||
t.str = fmt.Sprintf("%s-%s%s", s[:sep], b, s[end:])
|
||||
} else {
|
||||
t.str = fmt.Sprintf("%s-%s%s", s[:start+3], value, s[end:])
|
||||
}
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
|
||||
// findTypeForKey returns the start and end position for the type corresponding
|
||||
// to key or the point at which to insert the key-value pair if the type
|
||||
// wasn't found. The hasExt return value reports whether an -u extension was present.
|
||||
// Note: the extensions are typically very small and are likely to contain
|
||||
// only one key-type pair.
|
||||
func (t Tag) findTypeForKey(key string) (start, sep, end int, hasExt bool) {
|
||||
p := int(t.pExt)
|
||||
if len(key) != 2 || p == len(t.str) || p == 0 {
|
||||
return p, p, p, false
|
||||
}
|
||||
s := t.str
|
||||
|
||||
// Find the correct extension.
|
||||
for p++; s[p] != 'u'; p++ {
|
||||
if s[p] > 'u' {
|
||||
p--
|
||||
return p, p, p, false
|
||||
}
|
||||
if p = nextExtension(s, p); p == len(s) {
|
||||
return len(s), len(s), len(s), false
|
||||
}
|
||||
}
|
||||
// Proceed to the hyphen following the extension name.
|
||||
p++
|
||||
|
||||
// curKey is the key currently being processed.
|
||||
curKey := ""
|
||||
|
||||
// Iterate over keys until we get the end of a section.
|
||||
for {
|
||||
end = p
|
||||
for p++; p < len(s) && s[p] != '-'; p++ {
|
||||
}
|
||||
n := p - end - 1
|
||||
if n <= 2 && curKey == key {
|
||||
if sep < end {
|
||||
sep++
|
||||
}
|
||||
return start, sep, end, true
|
||||
}
|
||||
switch n {
|
||||
case 0, // invalid string
|
||||
1: // next extension
|
||||
return end, end, end, true
|
||||
case 2:
|
||||
// next key
|
||||
curKey = s[end+1 : p]
|
||||
if curKey > key {
|
||||
return end, end, end, true
|
||||
}
|
||||
start = end
|
||||
sep = p
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ParseBase parses a 2- or 3-letter ISO 639 code.
|
||||
// It returns a ValueError if s is a well-formed but unknown language identifier
|
||||
// or another error if another error occurred.
|
||||
func ParseBase(s string) (l Language, err error) {
|
||||
defer func() {
|
||||
if recover() != nil {
|
||||
l = 0
|
||||
err = ErrSyntax
|
||||
}
|
||||
}()
|
||||
|
||||
if n := len(s); n < 2 || 3 < n {
|
||||
return 0, ErrSyntax
|
||||
}
|
||||
var buf [3]byte
|
||||
return getLangID(buf[:copy(buf[:], s)])
|
||||
}
|
||||
|
||||
// ParseScript parses a 4-letter ISO 15924 code.
|
||||
// It returns a ValueError if s is a well-formed but unknown script identifier
|
||||
// or another error if another error occurred.
|
||||
func ParseScript(s string) (scr Script, err error) {
|
||||
defer func() {
|
||||
if recover() != nil {
|
||||
scr = 0
|
||||
err = ErrSyntax
|
||||
}
|
||||
}()
|
||||
|
||||
if len(s) != 4 {
|
||||
return 0, ErrSyntax
|
||||
}
|
||||
var buf [4]byte
|
||||
return getScriptID(script, buf[:copy(buf[:], s)])
|
||||
}
|
||||
|
||||
// EncodeM49 returns the Region for the given UN M.49 code.
|
||||
// It returns an error if r is not a valid code.
|
||||
func EncodeM49(r int) (Region, error) {
|
||||
return getRegionM49(r)
|
||||
}
|
||||
|
||||
// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
|
||||
// It returns a ValueError if s is a well-formed but unknown region identifier
|
||||
// or another error if another error occurred.
|
||||
func ParseRegion(s string) (r Region, err error) {
|
||||
defer func() {
|
||||
if recover() != nil {
|
||||
r = 0
|
||||
err = ErrSyntax
|
||||
}
|
||||
}()
|
||||
|
||||
if n := len(s); n < 2 || 3 < n {
|
||||
return 0, ErrSyntax
|
||||
}
|
||||
var buf [3]byte
|
||||
return getRegionID(buf[:copy(buf[:], s)])
|
||||
}
|
||||
|
||||
// IsCountry returns whether this region is a country or autonomous area. This
|
||||
// includes non-standard definitions from CLDR.
|
||||
func (r Region) IsCountry() bool {
|
||||
if r == 0 || r.IsGroup() || r.IsPrivateUse() && r != _XK {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// IsGroup returns whether this region defines a collection of regions. This
|
||||
// includes non-standard definitions from CLDR.
|
||||
func (r Region) IsGroup() bool {
|
||||
if r == 0 {
|
||||
return false
|
||||
}
|
||||
return int(regionInclusion[r]) < len(regionContainment)
|
||||
}
|
||||
|
||||
// Contains returns whether Region c is contained by Region r. It returns true
|
||||
// if c == r.
|
||||
func (r Region) Contains(c Region) bool {
|
||||
if r == c {
|
||||
return true
|
||||
}
|
||||
g := regionInclusion[r]
|
||||
if g >= nRegionGroups {
|
||||
return false
|
||||
}
|
||||
m := regionContainment[g]
|
||||
|
||||
d := regionInclusion[c]
|
||||
b := regionInclusionBits[d]
|
||||
|
||||
// A contained country may belong to multiple disjoint groups. Matching any
|
||||
// of these indicates containment. If the contained region is a group, it
|
||||
// must strictly be a subset.
|
||||
if d >= nRegionGroups {
|
||||
return b&m != 0
|
||||
}
|
||||
return b&^m == 0
|
||||
}
|
||||
|
||||
var errNoTLD = errors.New("language: region is not a valid ccTLD")
|
||||
|
||||
// TLD returns the country code top-level domain (ccTLD). UK is returned for GB.
|
||||
// In all other cases it returns either the region itself or an error.
|
||||
//
|
||||
// This method may return an error for a region for which there exists a
|
||||
// canonical form with a ccTLD. To get that ccTLD canonicalize r first. The
|
||||
// region will already be canonicalized it was obtained from a Tag that was
|
||||
// obtained using any of the default methods.
|
||||
func (r Region) TLD() (Region, error) {
|
||||
// See http://en.wikipedia.org/wiki/Country_code_top-level_domain for the
|
||||
// difference between ISO 3166-1 and IANA ccTLD.
|
||||
if r == _GB {
|
||||
r = _UK
|
||||
}
|
||||
if (r.typ() & ccTLD) == 0 {
|
||||
return 0, errNoTLD
|
||||
}
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// Canonicalize returns the region or a possible replacement if the region is
|
||||
// deprecated. It will not return a replacement for deprecated regions that
|
||||
// are split into multiple regions.
|
||||
func (r Region) Canonicalize() Region {
|
||||
if cr := normRegion(r); cr != 0 {
|
||||
return cr
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// Variant represents a registered variant of a language as defined by BCP 47.
|
||||
type Variant struct {
|
||||
ID uint8
|
||||
str string
|
||||
}
|
||||
|
||||
// ParseVariant parses and returns a Variant. An error is returned if s is not
|
||||
// a valid variant.
|
||||
func ParseVariant(s string) (v Variant, err error) {
|
||||
defer func() {
|
||||
if recover() != nil {
|
||||
v = Variant{}
|
||||
err = ErrSyntax
|
||||
}
|
||||
}()
|
||||
|
||||
s = strings.ToLower(s)
|
||||
if id, ok := variantIndex[s]; ok {
|
||||
return Variant{id, s}, nil
|
||||
}
|
||||
return Variant{}, NewValueError([]byte(s))
|
||||
}
|
||||
|
||||
// String returns the string representation of the variant.
|
||||
func (v Variant) String() string {
|
||||
return v.str
|
||||
}
|
||||
@@ -0,0 +1,746 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/internal/testtext"
|
||||
)
|
||||
|
||||
func TestTagSize(t *testing.T) {
|
||||
id := Tag{}
|
||||
typ := reflect.TypeOf(id)
|
||||
if typ.Size() > 32 {
|
||||
t.Errorf("size of Tag was %d; want <= 32", typ.Size())
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsRoot(t *testing.T) {
|
||||
loc := Tag{}
|
||||
if !loc.IsRoot() {
|
||||
t.Errorf("unspecified should be root.")
|
||||
}
|
||||
for i, tt := range parseTests() {
|
||||
loc, _ := Parse(tt.in)
|
||||
undef := tt.lang == "und" && tt.script == "" && tt.region == "" && tt.ext == ""
|
||||
if loc.IsRoot() != undef {
|
||||
t.Errorf("%d: was %v; want %v", i, loc.IsRoot(), undef)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestEquality(t *testing.T) {
|
||||
for i, tt := range parseTests() {
|
||||
s := tt.in
|
||||
tag := Make(s)
|
||||
t1 := Make(tag.String())
|
||||
if tag != t1 {
|
||||
t.Errorf("%d:%s: equality test 1 failed\n got: %#v\nwant: %#v)", i, s, t1, tag)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMakeString(t *testing.T) {
|
||||
tests := []struct{ in, out string }{
|
||||
{"und", "und"},
|
||||
{"und", "und-CW"},
|
||||
{"nl", "nl-NL"},
|
||||
{"de-1901", "nl-1901"},
|
||||
{"de-1901", "de-Arab-1901"},
|
||||
{"x-a-b", "de-Arab-x-a-b"},
|
||||
{"x-a-b", "x-a-b"},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
id, _ := Parse(tt.in)
|
||||
mod, _ := Parse(tt.out)
|
||||
id.setTagsFrom(mod)
|
||||
for j := 0; j < 2; j++ {
|
||||
id.RemakeString()
|
||||
if str := id.String(); str != tt.out {
|
||||
t.Errorf("%d:%d: found %s; want %s", i, j, id.String(), tt.out)
|
||||
}
|
||||
}
|
||||
// The bytes to string conversion as used in remakeString
|
||||
// occasionally measures as more than one alloc, breaking this test.
|
||||
// To alleviate this we set the number of runs to more than 1.
|
||||
if n := testtext.AllocsPerRun(8, id.RemakeString); n > 1 {
|
||||
t.Errorf("%d: # allocs got %.1f; want <= 1", i, n)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMarshal(t *testing.T) {
|
||||
testCases := []string{
|
||||
// TODO: these values will change with each CLDR update. This issue
|
||||
// will be solved if we decide to fix the indexes.
|
||||
"und",
|
||||
"ca-ES-valencia",
|
||||
"ca-ES-valencia-u-va-posix",
|
||||
"ca-ES-valencia-u-co-phonebk",
|
||||
"ca-ES-valencia-u-co-phonebk-va-posix",
|
||||
"x-klingon",
|
||||
"en-US",
|
||||
"en-US-u-va-posix",
|
||||
"en",
|
||||
"en-u-co-phonebk",
|
||||
"en-001",
|
||||
"sh",
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
var tag Tag
|
||||
err := tag.UnmarshalText([]byte(tc))
|
||||
if err != nil {
|
||||
t.Errorf("UnmarshalText(%q): unexpected error: %v", tc, err)
|
||||
}
|
||||
b, err := tag.MarshalText()
|
||||
if err != nil {
|
||||
t.Errorf("MarshalText(%q): unexpected error: %v", tc, err)
|
||||
}
|
||||
if got := string(b); got != tc {
|
||||
t.Errorf("%s: got %q; want %q", tc, got, tc)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseBase(t *testing.T) {
|
||||
tests := []struct {
|
||||
in string
|
||||
out string
|
||||
ok bool
|
||||
}{
|
||||
{"en", "en", true},
|
||||
{"EN", "en", true},
|
||||
{"nld", "nl", true},
|
||||
{"dut", "dut", true}, // bibliographic
|
||||
{"aaj", "und", false}, // unknown
|
||||
{"qaa", "qaa", true},
|
||||
{"a", "und", false},
|
||||
{"", "und", false},
|
||||
{"aaaa", "und", false},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
x, err := ParseBase(tt.in)
|
||||
if x.String() != tt.out || err == nil != tt.ok {
|
||||
t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
|
||||
}
|
||||
if y, _, _ := Make(tt.out).Raw(); x != y {
|
||||
t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x, y)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseScript(t *testing.T) {
|
||||
tests := []struct {
|
||||
in string
|
||||
out string
|
||||
ok bool
|
||||
}{
|
||||
{"Latn", "Latn", true},
|
||||
{"zzzz", "Zzzz", true},
|
||||
{"zyyy", "Zyyy", true},
|
||||
{"Latm", "Zzzz", false},
|
||||
{"Zzz", "Zzzz", false},
|
||||
{"", "Zzzz", false},
|
||||
{"Zzzxx", "Zzzz", false},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
x, err := ParseScript(tt.in)
|
||||
if x.String() != tt.out || err == nil != tt.ok {
|
||||
t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
|
||||
}
|
||||
if err == nil {
|
||||
if _, y, _ := Make("und-" + tt.out).Raw(); x != y {
|
||||
t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x, y)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestEncodeM49(t *testing.T) {
|
||||
tests := []struct {
|
||||
m49 int
|
||||
code string
|
||||
ok bool
|
||||
}{
|
||||
{1, "001", true},
|
||||
{840, "US", true},
|
||||
{899, "ZZ", false},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
if r, err := EncodeM49(tt.m49); r.String() != tt.code || err == nil != tt.ok {
|
||||
t.Errorf("%d:%d: was %s, %v; want %s, %v", i, tt.m49, r, err == nil, tt.code, tt.ok)
|
||||
}
|
||||
}
|
||||
for i := 1; i <= 1000; i++ {
|
||||
if r, err := EncodeM49(i); err == nil && r.M49() == 0 {
|
||||
t.Errorf("%d has no error, but maps to undefined region", i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseRegion(t *testing.T) {
|
||||
tests := []struct {
|
||||
in string
|
||||
out string
|
||||
ok bool
|
||||
}{
|
||||
{"001", "001", true},
|
||||
{"840", "US", true},
|
||||
{"899", "ZZ", false},
|
||||
{"USA", "US", true},
|
||||
{"US", "US", true},
|
||||
{"BC", "ZZ", false},
|
||||
{"C", "ZZ", false},
|
||||
{"CCCC", "ZZ", false},
|
||||
{"01", "ZZ", false},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
r, err := ParseRegion(tt.in)
|
||||
if r.String() != tt.out || err == nil != tt.ok {
|
||||
t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, r, err == nil, tt.out, tt.ok)
|
||||
}
|
||||
if err == nil {
|
||||
if _, _, y := Make("und-" + tt.out).Raw(); r != y {
|
||||
t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, r, y)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsCountry(t *testing.T) {
|
||||
tests := []struct {
|
||||
reg string
|
||||
country bool
|
||||
}{
|
||||
{"US", true},
|
||||
{"001", false},
|
||||
{"958", false},
|
||||
{"419", false},
|
||||
{"203", true},
|
||||
{"020", true},
|
||||
{"900", false},
|
||||
{"999", false},
|
||||
{"QO", false},
|
||||
{"EU", false},
|
||||
{"AA", false},
|
||||
{"XK", true},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
r, _ := getRegionID([]byte(tt.reg))
|
||||
if r.IsCountry() != tt.country {
|
||||
t.Errorf("%d: IsCountry(%s) was %v; want %v", i, tt.reg, r.IsCountry(), tt.country)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsGroup(t *testing.T) {
|
||||
tests := []struct {
|
||||
reg string
|
||||
group bool
|
||||
}{
|
||||
{"US", false},
|
||||
{"001", true},
|
||||
{"958", false},
|
||||
{"419", true},
|
||||
{"203", false},
|
||||
{"020", false},
|
||||
{"900", false},
|
||||
{"999", false},
|
||||
{"QO", true},
|
||||
{"EU", true},
|
||||
{"AA", false},
|
||||
{"XK", false},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
r, _ := getRegionID([]byte(tt.reg))
|
||||
if r.IsGroup() != tt.group {
|
||||
t.Errorf("%d: IsGroup(%s) was %v; want %v", i, tt.reg, r.IsGroup(), tt.group)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestContains(t *testing.T) {
|
||||
tests := []struct {
|
||||
enclosing, contained string
|
||||
contains bool
|
||||
}{
|
||||
// A region contains itself.
|
||||
{"US", "US", true},
|
||||
{"001", "001", true},
|
||||
|
||||
// Direct containment.
|
||||
{"001", "002", true},
|
||||
{"039", "XK", true},
|
||||
{"150", "XK", true},
|
||||
{"EU", "AT", true},
|
||||
{"QO", "AQ", true},
|
||||
|
||||
// Indirect containemnt.
|
||||
{"001", "US", true},
|
||||
{"001", "419", true},
|
||||
{"001", "013", true},
|
||||
|
||||
// No containment.
|
||||
{"US", "001", false},
|
||||
{"155", "EU", false},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
enc, _ := getRegionID([]byte(tt.enclosing))
|
||||
con, _ := getRegionID([]byte(tt.contained))
|
||||
r := enc
|
||||
if got := r.Contains(con); got != tt.contains {
|
||||
t.Errorf("%d: %s.Contains(%s) was %v; want %v", i, tt.enclosing, tt.contained, got, tt.contains)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegionCanonicalize(t *testing.T) {
|
||||
for i, tt := range []struct{ in, out string }{
|
||||
{"UK", "GB"},
|
||||
{"TP", "TL"},
|
||||
{"QU", "EU"},
|
||||
{"SU", "SU"},
|
||||
{"VD", "VN"},
|
||||
{"DD", "DE"},
|
||||
} {
|
||||
r := MustParseRegion(tt.in)
|
||||
want := MustParseRegion(tt.out)
|
||||
if got := r.Canonicalize(); got != want {
|
||||
t.Errorf("%d: got %v; want %v", i, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegionTLD(t *testing.T) {
|
||||
for _, tt := range []struct {
|
||||
in, out string
|
||||
ok bool
|
||||
}{
|
||||
{"EH", "EH", true},
|
||||
{"FR", "FR", true},
|
||||
{"TL", "TL", true},
|
||||
|
||||
// In ccTLD before in ISO.
|
||||
{"GG", "GG", true},
|
||||
|
||||
// Non-standard assignment of ccTLD to ISO code.
|
||||
{"GB", "UK", true},
|
||||
|
||||
// Exceptionally reserved in ISO and valid ccTLD.
|
||||
{"UK", "UK", true},
|
||||
{"AC", "AC", true},
|
||||
{"EU", "EU", true},
|
||||
{"SU", "SU", true},
|
||||
|
||||
// Exceptionally reserved in ISO and invalid ccTLD.
|
||||
{"CP", "ZZ", false},
|
||||
{"DG", "ZZ", false},
|
||||
{"EA", "ZZ", false},
|
||||
{"FX", "ZZ", false},
|
||||
{"IC", "ZZ", false},
|
||||
{"TA", "ZZ", false},
|
||||
|
||||
// Transitionally reserved in ISO (e.g. deprecated) but valid ccTLD as
|
||||
// it is still being phased out.
|
||||
{"AN", "AN", true},
|
||||
{"TP", "TP", true},
|
||||
|
||||
// Transitionally reserved in ISO (e.g. deprecated) and invalid ccTLD.
|
||||
// Defined in package language as it has a mapping in CLDR.
|
||||
{"BU", "ZZ", false},
|
||||
{"CS", "ZZ", false},
|
||||
{"NT", "ZZ", false},
|
||||
{"YU", "ZZ", false},
|
||||
{"ZR", "ZZ", false},
|
||||
// Not defined in package: SF.
|
||||
|
||||
// Indeterminately reserved in ISO.
|
||||
// Defined in package language as it has a legacy mapping in CLDR.
|
||||
{"DY", "ZZ", false},
|
||||
{"RH", "ZZ", false},
|
||||
{"VD", "ZZ", false},
|
||||
// Not defined in package: EW, FL, JA, LF, PI, RA, RB, RC, RI, RL, RM,
|
||||
// RN, RP, WG, WL, WV, and YV.
|
||||
|
||||
// Not assigned in ISO, but legacy definitions in CLDR.
|
||||
{"DD", "ZZ", false},
|
||||
{"YD", "ZZ", false},
|
||||
|
||||
// Normal mappings but somewhat special status in ccTLD.
|
||||
{"BL", "BL", true},
|
||||
{"MF", "MF", true},
|
||||
{"BV", "BV", true},
|
||||
{"SJ", "SJ", true},
|
||||
|
||||
// Have values when normalized, but not as is.
|
||||
{"QU", "ZZ", false},
|
||||
|
||||
// ISO Private Use.
|
||||
{"AA", "ZZ", false},
|
||||
{"QM", "ZZ", false},
|
||||
{"QO", "ZZ", false},
|
||||
{"XA", "ZZ", false},
|
||||
{"XK", "ZZ", false}, // Sometimes used for Kosovo, but invalid ccTLD.
|
||||
} {
|
||||
if tt.in == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
r := MustParseRegion(tt.in)
|
||||
var want Region
|
||||
if tt.out != "ZZ" {
|
||||
want = MustParseRegion(tt.out)
|
||||
}
|
||||
tld, err := r.TLD()
|
||||
if got := err == nil; got != tt.ok {
|
||||
t.Errorf("error(%v): got %v; want %v", r, got, tt.ok)
|
||||
}
|
||||
if tld != want {
|
||||
t.Errorf("TLD(%v): got %v; want %v", r, tld, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestTypeForKey(t *testing.T) {
|
||||
tests := []struct{ key, in, out string }{
|
||||
{"co", "en", ""},
|
||||
{"co", "en-u-abc", ""},
|
||||
{"co", "en-u-co-phonebk", "phonebk"},
|
||||
{"co", "en-u-co-phonebk-cu-aud", "phonebk"},
|
||||
{"co", "x-foo-u-co-phonebk", ""},
|
||||
{"nu", "en-u-co-phonebk-nu-arabic", "arabic"},
|
||||
{"kc", "cmn-u-co-stroke", ""},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
if v := Make(tt.in).TypeForKey(tt.key); v != tt.out {
|
||||
t.Errorf("%q[%q]: was %q; want %q", tt.in, tt.key, v, tt.out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetTypeForKey(t *testing.T) {
|
||||
tests := []struct {
|
||||
key, value, in, out string
|
||||
err bool
|
||||
}{
|
||||
// replace existing value
|
||||
{"co", "pinyin", "en-u-co-phonebk", "en-u-co-pinyin", false},
|
||||
{"co", "pinyin", "en-u-co-phonebk-cu-xau", "en-u-co-pinyin-cu-xau", false},
|
||||
{"co", "pinyin", "en-u-co-phonebk-v-xx", "en-u-co-pinyin-v-xx", false},
|
||||
{"co", "pinyin", "en-u-co-phonebk-x-x", "en-u-co-pinyin-x-x", false},
|
||||
{"co", "pinyin", "en-u-co-x-x", "en-u-co-pinyin-x-x", false},
|
||||
{"nu", "arabic", "en-u-co-phonebk-nu-vaai", "en-u-co-phonebk-nu-arabic", false},
|
||||
{"nu", "arabic", "en-u-co-phonebk-nu", "en-u-co-phonebk-nu-arabic", false},
|
||||
// add to existing -u extension
|
||||
{"co", "pinyin", "en-u-ca-gregory", "en-u-ca-gregory-co-pinyin", false},
|
||||
{"co", "pinyin", "en-u-ca-gregory-nu-vaai", "en-u-ca-gregory-co-pinyin-nu-vaai", false},
|
||||
{"co", "pinyin", "en-u-ca-gregory-v-va", "en-u-ca-gregory-co-pinyin-v-va", false},
|
||||
{"co", "pinyin", "en-u-ca-gregory-x-a", "en-u-ca-gregory-co-pinyin-x-a", false},
|
||||
{"ca", "gregory", "en-u-co-pinyin", "en-u-ca-gregory-co-pinyin", false},
|
||||
// remove pair
|
||||
{"co", "", "en-u-co-phonebk", "en", false},
|
||||
{"co", "", "en-u-co", "en", false},
|
||||
{"co", "", "en-u-co-v", "en", false},
|
||||
{"co", "", "en-u-co-v-", "en", false},
|
||||
{"co", "", "en-u-ca-gregory-co-phonebk", "en-u-ca-gregory", false},
|
||||
{"co", "", "en-u-co-phonebk-nu-arabic", "en-u-nu-arabic", false},
|
||||
{"co", "", "en-u-co-nu-arabic", "en-u-nu-arabic", false},
|
||||
{"co", "", "en", "en", false},
|
||||
// add -u extension
|
||||
{"co", "pinyin", "en", "en-u-co-pinyin", false},
|
||||
{"co", "pinyin", "und", "und-u-co-pinyin", false},
|
||||
{"co", "pinyin", "en-a-aaa", "en-a-aaa-u-co-pinyin", false},
|
||||
{"co", "pinyin", "en-x-aaa", "en-u-co-pinyin-x-aaa", false},
|
||||
{"co", "pinyin", "en-v-aa", "en-u-co-pinyin-v-aa", false},
|
||||
{"co", "pinyin", "en-a-aaa-x-x", "en-a-aaa-u-co-pinyin-x-x", false},
|
||||
{"co", "pinyin", "en-a-aaa-v-va", "en-a-aaa-u-co-pinyin-v-va", false},
|
||||
// error on invalid values
|
||||
{"co", "pinyinxxx", "en", "en", true},
|
||||
{"co", "piny.n", "en", "en", true},
|
||||
{"co", "pinyinxxx", "en-a-aaa", "en-a-aaa", true},
|
||||
{"co", "pinyinxxx", "en-u-aaa", "en-u-aaa", true},
|
||||
{"co", "pinyinxxx", "en-u-aaa-co-pinyin", "en-u-aaa-co-pinyin", true},
|
||||
{"co", "pinyi.", "en-u-aaa-co-pinyin", "en-u-aaa-co-pinyin", true},
|
||||
{"col", "pinyin", "en", "en", true},
|
||||
{"co", "cu", "en", "en", true},
|
||||
// error when setting on a private use tag
|
||||
{"co", "phonebook", "x-foo", "x-foo", true},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
tag := Make(tt.in)
|
||||
if v, err := tag.SetTypeForKey(tt.key, tt.value); v.String() != tt.out {
|
||||
t.Errorf("%d:%q[%q]=%q: was %q; want %q", i, tt.in, tt.key, tt.value, v, tt.out)
|
||||
} else if (err != nil) != tt.err {
|
||||
t.Errorf("%d:%q[%q]=%q: error was %v; want %v", i, tt.in, tt.key, tt.value, err != nil, tt.err)
|
||||
} else if val := v.TypeForKey(tt.key); err == nil && val != tt.value {
|
||||
t.Errorf("%d:%q[%q]==%q: was %v; want %v", i, tt.out, tt.key, tt.value, val, tt.value)
|
||||
}
|
||||
if len(tag.String()) <= 3 {
|
||||
// Simulate a tag for which the string has not been set.
|
||||
tag.str, tag.pExt, tag.pVariant = "", 0, 0
|
||||
if tag, err := tag.SetTypeForKey(tt.key, tt.value); err == nil {
|
||||
if val := tag.TypeForKey(tt.key); err == nil && val != tt.value {
|
||||
t.Errorf("%d:%q[%q]==%q: was %v; want %v", i, tt.out, tt.key, tt.value, val, tt.value)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindKeyAndType(t *testing.T) {
|
||||
// out is either the matched type in case of a match or the original
|
||||
// string up till the insertion point.
|
||||
tests := []struct {
|
||||
key string
|
||||
hasExt bool
|
||||
in, out string
|
||||
}{
|
||||
// Don't search past a private use extension.
|
||||
{"co", false, "en-x-foo-u-co-pinyin", "en"},
|
||||
{"co", false, "x-foo-u-co-pinyin", ""},
|
||||
{"co", false, "en-s-fff-x-foo", "en-s-fff"},
|
||||
// Insertion points in absence of -u extension.
|
||||
{"cu", false, "en", ""}, // t.str is ""
|
||||
{"cu", false, "en-v-va", "en"},
|
||||
{"cu", false, "en-a-va", "en-a-va"},
|
||||
{"cu", false, "en-a-va-v-va", "en-a-va"},
|
||||
{"cu", false, "en-x-a", "en"},
|
||||
// Tags with the -u extension.
|
||||
{"nu", true, "en-u-cu-nu", "en-u-cu"},
|
||||
{"cu", true, "en-u-cu-nu", "en-u"},
|
||||
{"co", true, "en-u-co-standard", "standard"},
|
||||
{"co", true, "yue-u-co-pinyin", "pinyin"},
|
||||
{"co", true, "en-u-co-abc", "abc"},
|
||||
{"co", true, "en-u-co-abc-def", "abc-def"},
|
||||
{"co", true, "en-u-co-abc-def-x-foo", "abc-def"},
|
||||
{"co", true, "en-u-co-standard-nu-arab", "standard"},
|
||||
{"co", true, "yue-u-co-pinyin-nu-arab", "pinyin"},
|
||||
// Insertion points.
|
||||
{"cu", true, "en-u-co-standard", "en-u-co-standard"},
|
||||
{"cu", true, "yue-u-co-pinyin-x-foo", "yue-u-co-pinyin"},
|
||||
{"cu", true, "en-u-co-abc", "en-u-co-abc"},
|
||||
{"cu", true, "en-u-nu-arabic", "en-u"},
|
||||
{"cu", true, "en-u-co-abc-def-nu-arabic", "en-u-co-abc-def"},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
start, sep, end, hasExt := Make(tt.in).findTypeForKey(tt.key)
|
||||
if sep != end {
|
||||
res := tt.in[sep:end]
|
||||
if res != tt.out {
|
||||
t.Errorf("%d:%s: was %q; want %q", i, tt.in, res, tt.out)
|
||||
}
|
||||
} else {
|
||||
if hasExt != tt.hasExt {
|
||||
t.Errorf("%d:%s: hasExt was %v; want %v", i, tt.in, hasExt, tt.hasExt)
|
||||
continue
|
||||
}
|
||||
if tt.in[:start] != tt.out {
|
||||
t.Errorf("%d:%s: insertion point was %q; want %q", i, tt.in, tt.in[:start], tt.out)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParent(t *testing.T) {
|
||||
tests := []struct{ in, out string }{
|
||||
// Strip variants and extensions first
|
||||
{"de-u-co-phonebk", "de"},
|
||||
{"de-1994", "de"},
|
||||
{"de-Latn-1994", "de"}, // remove superfluous script.
|
||||
|
||||
// Ensure the canonical Tag for an entry is in the chain for base-script
|
||||
// pairs.
|
||||
{"zh-Hans", "zh"},
|
||||
|
||||
// Skip the script if it is the maximized version. CLDR files for the
|
||||
// skipped tag are always empty.
|
||||
{"zh-Hans-TW", "zh"},
|
||||
{"zh-Hans-CN", "zh"},
|
||||
|
||||
// Insert the script if the maximized script is not the same as the
|
||||
// maximized script of the base language.
|
||||
{"zh-TW", "zh-Hant"},
|
||||
{"zh-HK", "zh-Hant"},
|
||||
{"zh-Hant-TW", "zh-Hant"},
|
||||
{"zh-Hant-HK", "zh-Hant"},
|
||||
|
||||
// Non-default script skips to und.
|
||||
// CLDR
|
||||
{"az-Cyrl", "und"},
|
||||
{"bs-Cyrl", "und"},
|
||||
{"en-Dsrt", "und"},
|
||||
{"ha-Arab", "und"},
|
||||
{"mn-Mong", "und"},
|
||||
{"pa-Arab", "und"},
|
||||
{"shi-Latn", "und"},
|
||||
{"sr-Latn", "und"},
|
||||
{"uz-Arab", "und"},
|
||||
{"uz-Cyrl", "und"},
|
||||
{"vai-Latn", "und"},
|
||||
{"zh-Hant", "und"},
|
||||
// extra
|
||||
{"nl-Cyrl", "und"},
|
||||
|
||||
// World english inherits from en-001.
|
||||
{"en-150", "en-001"},
|
||||
{"en-AU", "en-001"},
|
||||
{"en-BE", "en-001"},
|
||||
{"en-GG", "en-001"},
|
||||
{"en-GI", "en-001"},
|
||||
{"en-HK", "en-001"},
|
||||
{"en-IE", "en-001"},
|
||||
{"en-IM", "en-001"},
|
||||
{"en-IN", "en-001"},
|
||||
{"en-JE", "en-001"},
|
||||
{"en-MT", "en-001"},
|
||||
{"en-NZ", "en-001"},
|
||||
{"en-PK", "en-001"},
|
||||
{"en-SG", "en-001"},
|
||||
|
||||
// Spanish in Latin-American countries have es-419 as parent.
|
||||
{"es-AR", "es-419"},
|
||||
{"es-BO", "es-419"},
|
||||
{"es-CL", "es-419"},
|
||||
{"es-CO", "es-419"},
|
||||
{"es-CR", "es-419"},
|
||||
{"es-CU", "es-419"},
|
||||
{"es-DO", "es-419"},
|
||||
{"es-EC", "es-419"},
|
||||
{"es-GT", "es-419"},
|
||||
{"es-HN", "es-419"},
|
||||
{"es-MX", "es-419"},
|
||||
{"es-NI", "es-419"},
|
||||
{"es-PA", "es-419"},
|
||||
{"es-PE", "es-419"},
|
||||
{"es-PR", "es-419"},
|
||||
{"es-PY", "es-419"},
|
||||
{"es-SV", "es-419"},
|
||||
{"es-US", "es-419"},
|
||||
{"es-UY", "es-419"},
|
||||
{"es-VE", "es-419"},
|
||||
// exceptions (according to CLDR)
|
||||
{"es-CW", "es"},
|
||||
|
||||
// Inherit from pt-PT, instead of pt for these countries.
|
||||
{"pt-AO", "pt-PT"},
|
||||
{"pt-CV", "pt-PT"},
|
||||
{"pt-GW", "pt-PT"},
|
||||
{"pt-MO", "pt-PT"},
|
||||
{"pt-MZ", "pt-PT"},
|
||||
{"pt-ST", "pt-PT"},
|
||||
{"pt-TL", "pt-PT"},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
tag := MustParse(tt.in)
|
||||
if p := MustParse(tt.out); p != tag.Parent() {
|
||||
t.Errorf("%s: was %v; want %v", tt.in, tag.Parent(), p)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
// Tags without error that don't need to be changed.
|
||||
benchBasic = []string{
|
||||
"en",
|
||||
"en-Latn",
|
||||
"en-GB",
|
||||
"za",
|
||||
"zh-Hant",
|
||||
"zh",
|
||||
"zh-HK",
|
||||
"ar-MK",
|
||||
"en-CA",
|
||||
"fr-CA",
|
||||
"fr-CH",
|
||||
"fr",
|
||||
"lv",
|
||||
"he-IT",
|
||||
"tlh",
|
||||
"ja",
|
||||
"ja-Jpan",
|
||||
"ja-Jpan-JP",
|
||||
"de-1996",
|
||||
"de-CH",
|
||||
"sr",
|
||||
"sr-Latn",
|
||||
}
|
||||
// Tags with extensions, not changes required.
|
||||
benchExt = []string{
|
||||
"x-a-b-c-d",
|
||||
"x-aa-bbbb-cccccccc-d",
|
||||
"en-x_cc-b-bbb-a-aaa",
|
||||
"en-c_cc-b-bbb-a-aaa-x-x",
|
||||
"en-u-co-phonebk",
|
||||
"en-Cyrl-u-co-phonebk",
|
||||
"en-US-u-co-phonebk-cu-xau",
|
||||
"en-nedix-u-co-phonebk",
|
||||
"en-t-t0-abcd",
|
||||
"en-t-nl-latn",
|
||||
"en-t-t0-abcd-x-a",
|
||||
"en_t_pt_MLt",
|
||||
"en-t-fr-est",
|
||||
}
|
||||
// Change, but not memory allocation required.
|
||||
benchSimpleChange = []string{
|
||||
"EN",
|
||||
"i-klingon",
|
||||
"en-latn",
|
||||
"zh-cmn-Hans-CN",
|
||||
"iw-NL",
|
||||
}
|
||||
// Change and memory allocation required.
|
||||
benchChangeAlloc = []string{
|
||||
"en-c_cc-b-bbb-a-aaa",
|
||||
"en-u-cu-xua-co-phonebk",
|
||||
"en-u-cu-xua-co-phonebk-a-cd",
|
||||
"en-u-def-abc-cu-xua-co-phonebk",
|
||||
"en-t-en-Cyrl-NL-1994",
|
||||
"en-t-en-Cyrl-NL-1994-t0-abc-def",
|
||||
}
|
||||
// Tags that result in errors.
|
||||
benchErr = []string{
|
||||
// IllFormed
|
||||
"x_A.-B-C_D",
|
||||
"en-u-cu-co-phonebk",
|
||||
"en-u-cu-xau-co",
|
||||
"en-t-nl-abcd",
|
||||
// Invalid
|
||||
"xx",
|
||||
"nl-Uuuu",
|
||||
"nl-QB",
|
||||
}
|
||||
benchChange = append(benchSimpleChange, benchChangeAlloc...)
|
||||
benchAll = append(append(append(benchBasic, benchExt...), benchChange...), benchErr...)
|
||||
)
|
||||
|
||||
func doParse(b *testing.B, tag []string) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
// Use the modulo instead of looping over all tags so that we get a somewhat
|
||||
// meaningful ns/op.
|
||||
Parse(tag[i%len(tag)])
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkParse(b *testing.B) {
|
||||
doParse(b, benchAll)
|
||||
}
|
||||
|
||||
func BenchmarkParseBasic(b *testing.B) {
|
||||
doParse(b, benchBasic)
|
||||
}
|
||||
|
||||
func BenchmarkParseError(b *testing.B) {
|
||||
doParse(b, benchErr)
|
||||
}
|
||||
|
||||
func BenchmarkParseSimpleChange(b *testing.B) {
|
||||
doParse(b, benchSimpleChange)
|
||||
}
|
||||
|
||||
func BenchmarkParseChangeAlloc(b *testing.B) {
|
||||
doParse(b, benchChangeAlloc)
|
||||
}
|
||||
@@ -0,0 +1,412 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strconv"
|
||||
|
||||
"golang.org/x/text/internal/tag"
|
||||
)
|
||||
|
||||
// findIndex tries to find the given tag in idx and returns a standardized error
|
||||
// if it could not be found.
|
||||
func findIndex(idx tag.Index, key []byte, form string) (index int, err error) {
|
||||
if !tag.FixCase(form, key) {
|
||||
return 0, ErrSyntax
|
||||
}
|
||||
i := idx.Index(key)
|
||||
if i == -1 {
|
||||
return 0, NewValueError(key)
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
|
||||
func searchUint(imap []uint16, key uint16) int {
|
||||
return sort.Search(len(imap), func(i int) bool {
|
||||
return imap[i] >= key
|
||||
})
|
||||
}
|
||||
|
||||
type Language uint16
|
||||
|
||||
// getLangID returns the langID of s if s is a canonical subtag
|
||||
// or langUnknown if s is not a canonical subtag.
|
||||
func getLangID(s []byte) (Language, error) {
|
||||
if len(s) == 2 {
|
||||
return getLangISO2(s)
|
||||
}
|
||||
return getLangISO3(s)
|
||||
}
|
||||
|
||||
// TODO language normalization as well as the AliasMaps could be moved to the
|
||||
// higher level package, but it is a bit tricky to separate the generation.
|
||||
|
||||
func (id Language) Canonicalize() (Language, AliasType) {
|
||||
return normLang(id)
|
||||
}
|
||||
|
||||
// normLang returns the mapped langID of id according to mapping m.
|
||||
func normLang(id Language) (Language, AliasType) {
|
||||
k := sort.Search(len(AliasMap), func(i int) bool {
|
||||
return AliasMap[i].From >= uint16(id)
|
||||
})
|
||||
if k < len(AliasMap) && AliasMap[k].From == uint16(id) {
|
||||
return Language(AliasMap[k].To), AliasTypes[k]
|
||||
}
|
||||
return id, AliasTypeUnknown
|
||||
}
|
||||
|
||||
// getLangISO2 returns the langID for the given 2-letter ISO language code
|
||||
// or unknownLang if this does not exist.
|
||||
func getLangISO2(s []byte) (Language, error) {
|
||||
if !tag.FixCase("zz", s) {
|
||||
return 0, ErrSyntax
|
||||
}
|
||||
if i := lang.Index(s); i != -1 && lang.Elem(i)[3] != 0 {
|
||||
return Language(i), nil
|
||||
}
|
||||
return 0, NewValueError(s)
|
||||
}
|
||||
|
||||
const base = 'z' - 'a' + 1
|
||||
|
||||
func strToInt(s []byte) uint {
|
||||
v := uint(0)
|
||||
for i := 0; i < len(s); i++ {
|
||||
v *= base
|
||||
v += uint(s[i] - 'a')
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
// converts the given integer to the original ASCII string passed to strToInt.
|
||||
// len(s) must match the number of characters obtained.
|
||||
func intToStr(v uint, s []byte) {
|
||||
for i := len(s) - 1; i >= 0; i-- {
|
||||
s[i] = byte(v%base) + 'a'
|
||||
v /= base
|
||||
}
|
||||
}
|
||||
|
||||
// getLangISO3 returns the langID for the given 3-letter ISO language code
|
||||
// or unknownLang if this does not exist.
|
||||
func getLangISO3(s []byte) (Language, error) {
|
||||
if tag.FixCase("und", s) {
|
||||
// first try to match canonical 3-letter entries
|
||||
for i := lang.Index(s[:2]); i != -1; i = lang.Next(s[:2], i) {
|
||||
if e := lang.Elem(i); e[3] == 0 && e[2] == s[2] {
|
||||
// We treat "und" as special and always translate it to "unspecified".
|
||||
// Note that ZZ and Zzzz are private use and are not treated as
|
||||
// unspecified by default.
|
||||
id := Language(i)
|
||||
if id == nonCanonicalUnd {
|
||||
return 0, nil
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
}
|
||||
if i := altLangISO3.Index(s); i != -1 {
|
||||
return Language(altLangIndex[altLangISO3.Elem(i)[3]]), nil
|
||||
}
|
||||
n := strToInt(s)
|
||||
if langNoIndex[n/8]&(1<<(n%8)) != 0 {
|
||||
return Language(n) + langNoIndexOffset, nil
|
||||
}
|
||||
// Check for non-canonical uses of ISO3.
|
||||
for i := lang.Index(s[:1]); i != -1; i = lang.Next(s[:1], i) {
|
||||
if e := lang.Elem(i); e[2] == s[1] && e[3] == s[2] {
|
||||
return Language(i), nil
|
||||
}
|
||||
}
|
||||
return 0, NewValueError(s)
|
||||
}
|
||||
return 0, ErrSyntax
|
||||
}
|
||||
|
||||
// StringToBuf writes the string to b and returns the number of bytes
|
||||
// written. cap(b) must be >= 3.
|
||||
func (id Language) StringToBuf(b []byte) int {
|
||||
if id >= langNoIndexOffset {
|
||||
intToStr(uint(id)-langNoIndexOffset, b[:3])
|
||||
return 3
|
||||
} else if id == 0 {
|
||||
return copy(b, "und")
|
||||
}
|
||||
l := lang[id<<2:]
|
||||
if l[3] == 0 {
|
||||
return copy(b, l[:3])
|
||||
}
|
||||
return copy(b, l[:2])
|
||||
}
|
||||
|
||||
// String returns the BCP 47 representation of the langID.
|
||||
// Use b as variable name, instead of id, to ensure the variable
|
||||
// used is consistent with that of Base in which this type is embedded.
|
||||
func (b Language) String() string {
|
||||
if b == 0 {
|
||||
return "und"
|
||||
} else if b >= langNoIndexOffset {
|
||||
b -= langNoIndexOffset
|
||||
buf := [3]byte{}
|
||||
intToStr(uint(b), buf[:])
|
||||
return string(buf[:])
|
||||
}
|
||||
l := lang.Elem(int(b))
|
||||
if l[3] == 0 {
|
||||
return l[:3]
|
||||
}
|
||||
return l[:2]
|
||||
}
|
||||
|
||||
// ISO3 returns the ISO 639-3 language code.
|
||||
func (b Language) ISO3() string {
|
||||
if b == 0 || b >= langNoIndexOffset {
|
||||
return b.String()
|
||||
}
|
||||
l := lang.Elem(int(b))
|
||||
if l[3] == 0 {
|
||||
return l[:3]
|
||||
} else if l[2] == 0 {
|
||||
return altLangISO3.Elem(int(l[3]))[:3]
|
||||
}
|
||||
// This allocation will only happen for 3-letter ISO codes
|
||||
// that are non-canonical BCP 47 language identifiers.
|
||||
return l[0:1] + l[2:4]
|
||||
}
|
||||
|
||||
// IsPrivateUse reports whether this language code is reserved for private use.
|
||||
func (b Language) IsPrivateUse() bool {
|
||||
return langPrivateStart <= b && b <= langPrivateEnd
|
||||
}
|
||||
|
||||
// SuppressScript returns the script marked as SuppressScript in the IANA
|
||||
// language tag repository, or 0 if there is no such script.
|
||||
func (b Language) SuppressScript() Script {
|
||||
if b < langNoIndexOffset {
|
||||
return Script(suppressScript[b])
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
type Region uint16
|
||||
|
||||
// getRegionID returns the region id for s if s is a valid 2-letter region code
|
||||
// or unknownRegion.
|
||||
func getRegionID(s []byte) (Region, error) {
|
||||
if len(s) == 3 {
|
||||
if isAlpha(s[0]) {
|
||||
return getRegionISO3(s)
|
||||
}
|
||||
if i, err := strconv.ParseUint(string(s), 10, 10); err == nil {
|
||||
return getRegionM49(int(i))
|
||||
}
|
||||
}
|
||||
return getRegionISO2(s)
|
||||
}
|
||||
|
||||
// getRegionISO2 returns the regionID for the given 2-letter ISO country code
|
||||
// or unknownRegion if this does not exist.
|
||||
func getRegionISO2(s []byte) (Region, error) {
|
||||
i, err := findIndex(regionISO, s, "ZZ")
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return Region(i) + isoRegionOffset, nil
|
||||
}
|
||||
|
||||
// getRegionISO3 returns the regionID for the given 3-letter ISO country code
|
||||
// or unknownRegion if this does not exist.
|
||||
func getRegionISO3(s []byte) (Region, error) {
|
||||
if tag.FixCase("ZZZ", s) {
|
||||
for i := regionISO.Index(s[:1]); i != -1; i = regionISO.Next(s[:1], i) {
|
||||
if e := regionISO.Elem(i); e[2] == s[1] && e[3] == s[2] {
|
||||
return Region(i) + isoRegionOffset, nil
|
||||
}
|
||||
}
|
||||
for i := 0; i < len(altRegionISO3); i += 3 {
|
||||
if tag.Compare(altRegionISO3[i:i+3], s) == 0 {
|
||||
return Region(altRegionIDs[i/3]), nil
|
||||
}
|
||||
}
|
||||
return 0, NewValueError(s)
|
||||
}
|
||||
return 0, ErrSyntax
|
||||
}
|
||||
|
||||
func getRegionM49(n int) (Region, error) {
|
||||
if 0 < n && n <= 999 {
|
||||
const (
|
||||
searchBits = 7
|
||||
regionBits = 9
|
||||
regionMask = 1<<regionBits - 1
|
||||
)
|
||||
idx := n >> searchBits
|
||||
buf := fromM49[m49Index[idx]:m49Index[idx+1]]
|
||||
val := uint16(n) << regionBits // we rely on bits shifting out
|
||||
i := sort.Search(len(buf), func(i int) bool {
|
||||
return buf[i] >= val
|
||||
})
|
||||
if r := fromM49[int(m49Index[idx])+i]; r&^regionMask == val {
|
||||
return Region(r & regionMask), nil
|
||||
}
|
||||
}
|
||||
var e ValueError
|
||||
fmt.Fprint(bytes.NewBuffer([]byte(e.v[:])), n)
|
||||
return 0, e
|
||||
}
|
||||
|
||||
// normRegion returns a region if r is deprecated or 0 otherwise.
|
||||
// TODO: consider supporting BYS (-> BLR), CSK (-> 200 or CZ), PHI (-> PHL) and AFI (-> DJ).
|
||||
// TODO: consider mapping split up regions to new most populous one (like CLDR).
|
||||
func normRegion(r Region) Region {
|
||||
m := regionOldMap
|
||||
k := sort.Search(len(m), func(i int) bool {
|
||||
return m[i].From >= uint16(r)
|
||||
})
|
||||
if k < len(m) && m[k].From == uint16(r) {
|
||||
return Region(m[k].To)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
const (
|
||||
iso3166UserAssigned = 1 << iota
|
||||
ccTLD
|
||||
bcp47Region
|
||||
)
|
||||
|
||||
func (r Region) typ() byte {
|
||||
return regionTypes[r]
|
||||
}
|
||||
|
||||
// String returns the BCP 47 representation for the region.
|
||||
// It returns "ZZ" for an unspecified region.
|
||||
func (r Region) String() string {
|
||||
if r < isoRegionOffset {
|
||||
if r == 0 {
|
||||
return "ZZ"
|
||||
}
|
||||
return fmt.Sprintf("%03d", r.M49())
|
||||
}
|
||||
r -= isoRegionOffset
|
||||
return regionISO.Elem(int(r))[:2]
|
||||
}
|
||||
|
||||
// ISO3 returns the 3-letter ISO code of r.
|
||||
// Note that not all regions have a 3-letter ISO code.
|
||||
// In such cases this method returns "ZZZ".
|
||||
func (r Region) ISO3() string {
|
||||
if r < isoRegionOffset {
|
||||
return "ZZZ"
|
||||
}
|
||||
r -= isoRegionOffset
|
||||
reg := regionISO.Elem(int(r))
|
||||
switch reg[2] {
|
||||
case 0:
|
||||
return altRegionISO3[reg[3]:][:3]
|
||||
case ' ':
|
||||
return "ZZZ"
|
||||
}
|
||||
return reg[0:1] + reg[2:4]
|
||||
}
|
||||
|
||||
// M49 returns the UN M.49 encoding of r, or 0 if this encoding
|
||||
// is not defined for r.
|
||||
func (r Region) M49() int {
|
||||
return int(m49[r])
|
||||
}
|
||||
|
||||
// IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This
|
||||
// may include private-use tags that are assigned by CLDR and used in this
|
||||
// implementation. So IsPrivateUse and IsCountry can be simultaneously true.
|
||||
func (r Region) IsPrivateUse() bool {
|
||||
return r.typ()&iso3166UserAssigned != 0
|
||||
}
|
||||
|
||||
type Script uint16
|
||||
|
||||
// getScriptID returns the script id for string s. It assumes that s
|
||||
// is of the format [A-Z][a-z]{3}.
|
||||
func getScriptID(idx tag.Index, s []byte) (Script, error) {
|
||||
i, err := findIndex(idx, s, "Zzzz")
|
||||
return Script(i), err
|
||||
}
|
||||
|
||||
// String returns the script code in title case.
|
||||
// It returns "Zzzz" for an unspecified script.
|
||||
func (s Script) String() string {
|
||||
if s == 0 {
|
||||
return "Zzzz"
|
||||
}
|
||||
return script.Elem(int(s))
|
||||
}
|
||||
|
||||
// IsPrivateUse reports whether this script code is reserved for private use.
|
||||
func (s Script) IsPrivateUse() bool {
|
||||
return _Qaaa <= s && s <= _Qabx
|
||||
}
|
||||
|
||||
const (
|
||||
maxAltTaglen = len("en-US-POSIX")
|
||||
maxLen = maxAltTaglen
|
||||
)
|
||||
|
||||
var (
|
||||
// grandfatheredMap holds a mapping from legacy and grandfathered tags to
|
||||
// their base language or index to more elaborate tag.
|
||||
grandfatheredMap = map[[maxLen]byte]int16{
|
||||
[maxLen]byte{'a', 'r', 't', '-', 'l', 'o', 'j', 'b', 'a', 'n'}: _jbo, // art-lojban
|
||||
[maxLen]byte{'i', '-', 'a', 'm', 'i'}: _ami, // i-ami
|
||||
[maxLen]byte{'i', '-', 'b', 'n', 'n'}: _bnn, // i-bnn
|
||||
[maxLen]byte{'i', '-', 'h', 'a', 'k'}: _hak, // i-hak
|
||||
[maxLen]byte{'i', '-', 'k', 'l', 'i', 'n', 'g', 'o', 'n'}: _tlh, // i-klingon
|
||||
[maxLen]byte{'i', '-', 'l', 'u', 'x'}: _lb, // i-lux
|
||||
[maxLen]byte{'i', '-', 'n', 'a', 'v', 'a', 'j', 'o'}: _nv, // i-navajo
|
||||
[maxLen]byte{'i', '-', 'p', 'w', 'n'}: _pwn, // i-pwn
|
||||
[maxLen]byte{'i', '-', 't', 'a', 'o'}: _tao, // i-tao
|
||||
[maxLen]byte{'i', '-', 't', 'a', 'y'}: _tay, // i-tay
|
||||
[maxLen]byte{'i', '-', 't', 's', 'u'}: _tsu, // i-tsu
|
||||
[maxLen]byte{'n', 'o', '-', 'b', 'o', 'k'}: _nb, // no-bok
|
||||
[maxLen]byte{'n', 'o', '-', 'n', 'y', 'n'}: _nn, // no-nyn
|
||||
[maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'f', 'r'}: _sfb, // sgn-BE-FR
|
||||
[maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'n', 'l'}: _vgt, // sgn-BE-NL
|
||||
[maxLen]byte{'s', 'g', 'n', '-', 'c', 'h', '-', 'd', 'e'}: _sgg, // sgn-CH-DE
|
||||
[maxLen]byte{'z', 'h', '-', 'g', 'u', 'o', 'y', 'u'}: _cmn, // zh-guoyu
|
||||
[maxLen]byte{'z', 'h', '-', 'h', 'a', 'k', 'k', 'a'}: _hak, // zh-hakka
|
||||
[maxLen]byte{'z', 'h', '-', 'm', 'i', 'n', '-', 'n', 'a', 'n'}: _nan, // zh-min-nan
|
||||
[maxLen]byte{'z', 'h', '-', 'x', 'i', 'a', 'n', 'g'}: _hsn, // zh-xiang
|
||||
|
||||
// Grandfathered tags with no modern replacement will be converted as
|
||||
// follows:
|
||||
[maxLen]byte{'c', 'e', 'l', '-', 'g', 'a', 'u', 'l', 'i', 's', 'h'}: -1, // cel-gaulish
|
||||
[maxLen]byte{'e', 'n', '-', 'g', 'b', '-', 'o', 'e', 'd'}: -2, // en-GB-oed
|
||||
[maxLen]byte{'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'}: -3, // i-default
|
||||
[maxLen]byte{'i', '-', 'e', 'n', 'o', 'c', 'h', 'i', 'a', 'n'}: -4, // i-enochian
|
||||
[maxLen]byte{'i', '-', 'm', 'i', 'n', 'g', 'o'}: -5, // i-mingo
|
||||
[maxLen]byte{'z', 'h', '-', 'm', 'i', 'n'}: -6, // zh-min
|
||||
|
||||
// CLDR-specific tag.
|
||||
[maxLen]byte{'r', 'o', 'o', 't'}: 0, // root
|
||||
[maxLen]byte{'e', 'n', '-', 'u', 's', '-', 'p', 'o', 's', 'i', 'x'}: -7, // en_US_POSIX"
|
||||
}
|
||||
|
||||
altTagIndex = [...]uint8{0, 17, 31, 45, 61, 74, 86, 102}
|
||||
|
||||
altTags = "xtg-x-cel-gaulishen-GB-oxendicten-x-i-defaultund-x-i-enochiansee-x-i-mingonan-x-zh-minen-US-u-va-posix"
|
||||
)
|
||||
|
||||
func grandfathered(s [maxAltTaglen]byte) (t Tag, ok bool) {
|
||||
if v, ok := grandfatheredMap[s]; ok {
|
||||
if v < 0 {
|
||||
return Make(altTags[altTagIndex[-v-1]:altTagIndex[-v]]), true
|
||||
}
|
||||
t.LangID = Language(v)
|
||||
return t, true
|
||||
}
|
||||
return t, false
|
||||
}
|
||||
@@ -0,0 +1,457 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/internal/tag"
|
||||
)
|
||||
|
||||
func b(s string) []byte {
|
||||
return []byte(s)
|
||||
}
|
||||
|
||||
func TestLangID(t *testing.T) {
|
||||
tests := []struct {
|
||||
id, bcp47, iso3, norm string
|
||||
err error
|
||||
}{
|
||||
{id: "", bcp47: "und", iso3: "und", err: ErrSyntax},
|
||||
{id: " ", bcp47: "und", iso3: "und", err: ErrSyntax},
|
||||
{id: " ", bcp47: "und", iso3: "und", err: ErrSyntax},
|
||||
{id: " ", bcp47: "und", iso3: "und", err: ErrSyntax},
|
||||
{id: "xxx", bcp47: "und", iso3: "und", err: NewValueError([]byte("xxx"))},
|
||||
{id: "und", bcp47: "und", iso3: "und"},
|
||||
{id: "aju", bcp47: "aju", iso3: "aju", norm: "jrb"},
|
||||
{id: "jrb", bcp47: "jrb", iso3: "jrb"},
|
||||
{id: "es", bcp47: "es", iso3: "spa"},
|
||||
{id: "spa", bcp47: "es", iso3: "spa"},
|
||||
{id: "ji", bcp47: "ji", iso3: "yid-", norm: "yi"},
|
||||
{id: "jw", bcp47: "jw", iso3: "jav-", norm: "jv"},
|
||||
{id: "ar", bcp47: "ar", iso3: "ara"},
|
||||
{id: "kw", bcp47: "kw", iso3: "cor"},
|
||||
{id: "arb", bcp47: "arb", iso3: "arb", norm: "ar"},
|
||||
{id: "ar", bcp47: "ar", iso3: "ara"},
|
||||
{id: "kur", bcp47: "ku", iso3: "kur"},
|
||||
{id: "nl", bcp47: "nl", iso3: "nld"},
|
||||
{id: "NL", bcp47: "nl", iso3: "nld"},
|
||||
{id: "gsw", bcp47: "gsw", iso3: "gsw"},
|
||||
{id: "gSW", bcp47: "gsw", iso3: "gsw"},
|
||||
{id: "und", bcp47: "und", iso3: "und"},
|
||||
{id: "sh", bcp47: "sh", iso3: "hbs", norm: "sr"},
|
||||
{id: "hbs", bcp47: "sh", iso3: "hbs", norm: "sr"},
|
||||
{id: "no", bcp47: "no", iso3: "nor", norm: "no"},
|
||||
{id: "nor", bcp47: "no", iso3: "nor", norm: "no"},
|
||||
{id: "cmn", bcp47: "cmn", iso3: "cmn", norm: "zh"},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
want, err := getLangID(b(tt.id))
|
||||
if err != tt.err {
|
||||
t.Errorf("%d:err(%s): found %q; want %q", i, tt.id, err, tt.err)
|
||||
}
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if id, _ := getLangISO2(b(tt.bcp47)); len(tt.bcp47) == 2 && want != id {
|
||||
t.Errorf("%d:getISO2(%s): found %v; want %v", i, tt.bcp47, id, want)
|
||||
}
|
||||
if len(tt.iso3) == 3 {
|
||||
if id, _ := getLangISO3(b(tt.iso3)); want != id {
|
||||
t.Errorf("%d:getISO3(%s): found %q; want %q", i, tt.iso3, id, want)
|
||||
}
|
||||
if id, _ := getLangID(b(tt.iso3)); want != id {
|
||||
t.Errorf("%d:getID3(%s): found %v; want %v", i, tt.iso3, id, want)
|
||||
}
|
||||
}
|
||||
norm := want
|
||||
if tt.norm != "" {
|
||||
norm, _ = getLangID(b(tt.norm))
|
||||
}
|
||||
id, _ := normLang(want)
|
||||
if id != norm {
|
||||
t.Errorf("%d:norm(%s): found %v; want %v", i, tt.id, id, norm)
|
||||
}
|
||||
if id := want.String(); tt.bcp47 != id {
|
||||
t.Errorf("%d:String(): found %s; want %s", i, id, tt.bcp47)
|
||||
}
|
||||
if id := want.ISO3(); tt.iso3[:3] != id {
|
||||
t.Errorf("%d:iso3(): found %s; want %s", i, id, tt.iso3[:3])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestGrandfathered(t *testing.T) {
|
||||
for _, tt := range []struct{ in, out string }{
|
||||
{"art-lojban", "jbo"},
|
||||
{"i-ami", "ami"},
|
||||
{"i-bnn", "bnn"},
|
||||
{"i-hak", "hak"},
|
||||
{"i-klingon", "tlh"},
|
||||
{"i-lux", "lb"},
|
||||
{"i-navajo", "nv"},
|
||||
{"i-pwn", "pwn"},
|
||||
{"i-tao", "tao"},
|
||||
{"i-tay", "tay"},
|
||||
{"i-tsu", "tsu"},
|
||||
{"no-bok", "nb"},
|
||||
{"no-nyn", "nn"},
|
||||
{"sgn-BE-FR", "sfb"},
|
||||
{"sgn-BE-NL", "vgt"},
|
||||
{"sgn-CH-DE", "sgg"},
|
||||
{"sgn-ch-de", "sgg"},
|
||||
{"zh-guoyu", "cmn"},
|
||||
{"zh-hakka", "hak"},
|
||||
{"zh-min-nan", "nan"},
|
||||
{"zh-xiang", "hsn"},
|
||||
|
||||
// Grandfathered tags with no modern replacement will be converted as follows:
|
||||
{"cel-gaulish", "xtg-x-cel-gaulish"},
|
||||
{"en-GB-oed", "en-GB-oxendict"},
|
||||
{"en-gb-oed", "en-GB-oxendict"},
|
||||
{"i-default", "en-x-i-default"},
|
||||
{"i-enochian", "und-x-i-enochian"},
|
||||
{"i-mingo", "see-x-i-mingo"},
|
||||
{"zh-min", "nan-x-zh-min"},
|
||||
|
||||
{"root", "und"},
|
||||
{"en_US_POSIX", "en-US-u-va-posix"},
|
||||
{"en_us_posix", "en-US-u-va-posix"},
|
||||
{"en-us-posix", "en-US-u-va-posix"},
|
||||
} {
|
||||
got := Make(tt.in)
|
||||
want := MustParse(tt.out)
|
||||
if got != want {
|
||||
t.Errorf("%s: got %q; want %q", tt.in, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegionID(t *testing.T) {
|
||||
tests := []struct {
|
||||
in, out string
|
||||
}{
|
||||
{"_ ", ""},
|
||||
{"_000", ""},
|
||||
{"419", "419"},
|
||||
{"AA", "AA"},
|
||||
{"ATF", "TF"},
|
||||
{"HV", "HV"},
|
||||
{"CT", "CT"},
|
||||
{"DY", "DY"},
|
||||
{"IC", "IC"},
|
||||
{"FQ", "FQ"},
|
||||
{"JT", "JT"},
|
||||
{"ZZ", "ZZ"},
|
||||
{"EU", "EU"},
|
||||
{"QO", "QO"},
|
||||
{"FX", "FX"},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
if tt.in[0] == '_' {
|
||||
id := tt.in[1:]
|
||||
if _, err := getRegionID(b(id)); err == nil {
|
||||
t.Errorf("%d:err(%s): found nil; want error", i, id)
|
||||
}
|
||||
continue
|
||||
}
|
||||
want, _ := getRegionID(b(tt.in))
|
||||
if s := want.String(); s != tt.out {
|
||||
t.Errorf("%d:%s: found %q; want %q", i, tt.in, s, tt.out)
|
||||
}
|
||||
if len(tt.in) == 2 {
|
||||
want, _ := getRegionISO2(b(tt.in))
|
||||
if s := want.String(); s != tt.out {
|
||||
t.Errorf("%d:getISO2(%s): found %q; want %q", i, tt.in, s, tt.out)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegionType(t *testing.T) {
|
||||
for _, tt := range []struct {
|
||||
r string
|
||||
t byte
|
||||
}{
|
||||
{"NL", bcp47Region | ccTLD},
|
||||
{"EU", bcp47Region | ccTLD}, // exceptionally reserved
|
||||
{"AN", bcp47Region | ccTLD}, // transitionally reserved
|
||||
|
||||
{"DD", bcp47Region}, // deleted in ISO, deprecated in BCP 47
|
||||
{"NT", bcp47Region}, // transitionally reserved, deprecated in BCP 47
|
||||
|
||||
{"XA", iso3166UserAssigned | bcp47Region},
|
||||
{"ZZ", iso3166UserAssigned | bcp47Region},
|
||||
{"AA", iso3166UserAssigned | bcp47Region},
|
||||
{"QO", iso3166UserAssigned | bcp47Region},
|
||||
{"QM", iso3166UserAssigned | bcp47Region},
|
||||
{"XK", iso3166UserAssigned | bcp47Region},
|
||||
|
||||
{"CT", 0}, // deleted in ISO, not in BCP 47, canonicalized in CLDR
|
||||
} {
|
||||
r := MustParseRegion(tt.r)
|
||||
if tp := r.typ(); tp != tt.t {
|
||||
t.Errorf("Type(%s): got %x; want %x", tt.r, tp, tt.t)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegionISO3(t *testing.T) {
|
||||
tests := []struct {
|
||||
from, iso3, to string
|
||||
}{
|
||||
{" ", "ZZZ", "ZZ"},
|
||||
{"000", "ZZZ", "ZZ"},
|
||||
{"AA", "AAA", ""},
|
||||
{"CT", "CTE", ""},
|
||||
{"DY", "DHY", ""},
|
||||
{"EU", "QUU", ""},
|
||||
{"HV", "HVO", ""},
|
||||
{"IC", "ZZZ", "ZZ"},
|
||||
{"JT", "JTN", ""},
|
||||
{"PZ", "PCZ", ""},
|
||||
{"QU", "QUU", "EU"},
|
||||
{"QO", "QOO", ""},
|
||||
{"YD", "YMD", ""},
|
||||
{"FQ", "ATF", "TF"},
|
||||
{"TF", "ATF", ""},
|
||||
{"FX", "FXX", ""},
|
||||
{"ZZ", "ZZZ", ""},
|
||||
{"419", "ZZZ", "ZZ"},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
r, _ := getRegionID(b(tt.from))
|
||||
if s := r.ISO3(); s != tt.iso3 {
|
||||
t.Errorf("iso3(%q): found %q; want %q", tt.from, s, tt.iso3)
|
||||
}
|
||||
if tt.iso3 == "" {
|
||||
continue
|
||||
}
|
||||
want := tt.to
|
||||
if tt.to == "" {
|
||||
want = tt.from
|
||||
}
|
||||
r, _ = getRegionID(b(want))
|
||||
if id, _ := getRegionISO3(b(tt.iso3)); id != r {
|
||||
t.Errorf("%s: found %q; want %q", tt.iso3, id, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegionM49(t *testing.T) {
|
||||
fromTests := []struct {
|
||||
m49 int
|
||||
id string
|
||||
}{
|
||||
{0, ""},
|
||||
{-1, ""},
|
||||
{1000, ""},
|
||||
{10000, ""},
|
||||
|
||||
{001, "001"},
|
||||
{104, "MM"},
|
||||
{180, "CD"},
|
||||
{230, "ET"},
|
||||
{231, "ET"},
|
||||
{249, "FX"},
|
||||
{250, "FR"},
|
||||
{276, "DE"},
|
||||
{278, "DD"},
|
||||
{280, "DE"},
|
||||
{419, "419"},
|
||||
{626, "TL"},
|
||||
{736, "SD"},
|
||||
{840, "US"},
|
||||
{854, "BF"},
|
||||
{891, "CS"},
|
||||
{899, ""},
|
||||
{958, "AA"},
|
||||
{966, "QT"},
|
||||
{967, "EU"},
|
||||
{999, "ZZ"},
|
||||
}
|
||||
for _, tt := range fromTests {
|
||||
id, err := getRegionM49(tt.m49)
|
||||
if want, have := err != nil, tt.id == ""; want != have {
|
||||
t.Errorf("error(%d): have %v; want %v", tt.m49, have, want)
|
||||
continue
|
||||
}
|
||||
r, _ := getRegionID(b(tt.id))
|
||||
if r != id {
|
||||
t.Errorf("region(%d): have %s; want %s", tt.m49, id, r)
|
||||
}
|
||||
}
|
||||
|
||||
toTests := []struct {
|
||||
m49 int
|
||||
id string
|
||||
}{
|
||||
{0, "000"},
|
||||
{0, "IC"}, // Some codes don't have an ID
|
||||
|
||||
{001, "001"},
|
||||
{104, "MM"},
|
||||
{104, "BU"},
|
||||
{180, "CD"},
|
||||
{180, "ZR"},
|
||||
{231, "ET"},
|
||||
{250, "FR"},
|
||||
{249, "FX"},
|
||||
{276, "DE"},
|
||||
{278, "DD"},
|
||||
{419, "419"},
|
||||
{626, "TL"},
|
||||
{626, "TP"},
|
||||
{729, "SD"},
|
||||
{826, "GB"},
|
||||
{840, "US"},
|
||||
{854, "BF"},
|
||||
{891, "YU"},
|
||||
{891, "CS"},
|
||||
{958, "AA"},
|
||||
{966, "QT"},
|
||||
{967, "EU"},
|
||||
{967, "QU"},
|
||||
{999, "ZZ"},
|
||||
// For codes that don't have an M49 code use the replacement value,
|
||||
// if available.
|
||||
{854, "HV"}, // maps to Burkino Faso
|
||||
}
|
||||
for _, tt := range toTests {
|
||||
r, _ := getRegionID(b(tt.id))
|
||||
if r.M49() != tt.m49 {
|
||||
t.Errorf("m49(%q): have %d; want %d", tt.id, r.M49(), tt.m49)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegionDeprecation(t *testing.T) {
|
||||
tests := []struct{ in, out string }{
|
||||
{"BU", "MM"},
|
||||
{"BUR", "MM"},
|
||||
{"CT", "KI"},
|
||||
{"DD", "DE"},
|
||||
{"DDR", "DE"},
|
||||
{"DY", "BJ"},
|
||||
{"FX", "FR"},
|
||||
{"HV", "BF"},
|
||||
{"JT", "UM"},
|
||||
{"MI", "UM"},
|
||||
{"NH", "VU"},
|
||||
{"NQ", "AQ"},
|
||||
{"PU", "UM"},
|
||||
{"PZ", "PA"},
|
||||
{"QU", "EU"},
|
||||
{"RH", "ZW"},
|
||||
{"TP", "TL"},
|
||||
{"UK", "GB"},
|
||||
{"VD", "VN"},
|
||||
{"WK", "UM"},
|
||||
{"YD", "YE"},
|
||||
{"NL", "NL"},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
rIn, _ := getRegionID([]byte(tt.in))
|
||||
rOut, _ := getRegionISO2([]byte(tt.out))
|
||||
r := normRegion(rIn)
|
||||
if rOut == rIn && r != 0 {
|
||||
t.Errorf("%s: was %q; want %q", tt.in, r, tt.in)
|
||||
}
|
||||
if rOut != rIn && r != rOut {
|
||||
t.Errorf("%s: was %q; want %q", tt.in, r, tt.out)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetScriptID(t *testing.T) {
|
||||
idx := tag.Index("0000BbbbDdddEeeeZzzz\xff\xff\xff\xff")
|
||||
tests := []struct {
|
||||
in string
|
||||
out Script
|
||||
}{
|
||||
{" ", 0},
|
||||
{" ", 0},
|
||||
{" ", 0},
|
||||
{"", 0},
|
||||
{"Aaaa", 0},
|
||||
{"Bbbb", 1},
|
||||
{"Dddd", 2},
|
||||
{"dddd", 2},
|
||||
{"dDDD", 2},
|
||||
{"Eeee", 3},
|
||||
{"Zzzz", 4},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
if id, err := getScriptID(idx, b(tt.in)); id != tt.out {
|
||||
t.Errorf("%d:%s: found %d; want %d", i, tt.in, id, tt.out)
|
||||
} else if id == 0 && err == nil {
|
||||
t.Errorf("%d:%s: no error; expected one", i, tt.in)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsPrivateUse(t *testing.T) {
|
||||
type test struct {
|
||||
s string
|
||||
private bool
|
||||
}
|
||||
tests := []test{
|
||||
{"en", false},
|
||||
{"und", false},
|
||||
{"pzn", false},
|
||||
{"qaa", true},
|
||||
{"qtz", true},
|
||||
{"qua", false},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
x, _ := getLangID([]byte(tt.s))
|
||||
if b := x.IsPrivateUse(); b != tt.private {
|
||||
t.Errorf("%d: langID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private)
|
||||
}
|
||||
}
|
||||
tests = []test{
|
||||
{"001", false},
|
||||
{"419", false},
|
||||
{"899", false},
|
||||
{"900", false},
|
||||
{"957", false},
|
||||
{"958", true},
|
||||
{"AA", true},
|
||||
{"AC", false},
|
||||
{"EU", false}, // CLDR grouping, exceptionally reserved in ISO.
|
||||
{"QU", true}, // Canonicalizes to EU, User-assigned in ISO.
|
||||
{"QO", true}, // CLDR grouping, User-assigned in ISO.
|
||||
{"QA", false},
|
||||
{"QM", true},
|
||||
{"QZ", true},
|
||||
{"XA", true},
|
||||
{"XK", true}, // Assigned to Kosovo in CLDR, User-assigned in ISO.
|
||||
{"XZ", true},
|
||||
{"ZW", false},
|
||||
{"ZZ", true},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
x, _ := getRegionID([]byte(tt.s))
|
||||
if b := x.IsPrivateUse(); b != tt.private {
|
||||
t.Errorf("%d: regionID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private)
|
||||
}
|
||||
}
|
||||
tests = []test{
|
||||
{"Latn", false},
|
||||
{"Laaa", false}, // invalid
|
||||
{"Qaaa", true},
|
||||
{"Qabx", true},
|
||||
{"Qaby", false},
|
||||
{"Zyyy", false},
|
||||
{"Zzzz", false},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
x, _ := getScriptID(script, []byte(tt.s))
|
||||
if b := x.IsPrivateUse(); b != tt.private {
|
||||
t.Errorf("%d: scriptID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,226 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import "errors"
|
||||
|
||||
type scriptRegionFlags uint8
|
||||
|
||||
const (
|
||||
isList = 1 << iota
|
||||
scriptInFrom
|
||||
regionInFrom
|
||||
)
|
||||
|
||||
func (t *Tag) setUndefinedLang(id Language) {
|
||||
if t.LangID == 0 {
|
||||
t.LangID = id
|
||||
}
|
||||
}
|
||||
|
||||
func (t *Tag) setUndefinedScript(id Script) {
|
||||
if t.ScriptID == 0 {
|
||||
t.ScriptID = id
|
||||
}
|
||||
}
|
||||
|
||||
func (t *Tag) setUndefinedRegion(id Region) {
|
||||
if t.RegionID == 0 || t.RegionID.Contains(id) {
|
||||
t.RegionID = id
|
||||
}
|
||||
}
|
||||
|
||||
// ErrMissingLikelyTagsData indicates no information was available
|
||||
// to compute likely values of missing tags.
|
||||
var ErrMissingLikelyTagsData = errors.New("missing likely tags data")
|
||||
|
||||
// addLikelySubtags sets subtags to their most likely value, given the locale.
|
||||
// In most cases this means setting fields for unknown values, but in some
|
||||
// cases it may alter a value. It returns an ErrMissingLikelyTagsData error
|
||||
// if the given locale cannot be expanded.
|
||||
func (t Tag) addLikelySubtags() (Tag, error) {
|
||||
id, err := addTags(t)
|
||||
if err != nil {
|
||||
return t, err
|
||||
} else if id.equalTags(t) {
|
||||
return t, nil
|
||||
}
|
||||
id.RemakeString()
|
||||
return id, nil
|
||||
}
|
||||
|
||||
// specializeRegion attempts to specialize a group region.
|
||||
func specializeRegion(t *Tag) bool {
|
||||
if i := regionInclusion[t.RegionID]; i < nRegionGroups {
|
||||
x := likelyRegionGroup[i]
|
||||
if Language(x.lang) == t.LangID && Script(x.script) == t.ScriptID {
|
||||
t.RegionID = Region(x.region)
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Maximize returns a new tag with missing tags filled in.
|
||||
func (t Tag) Maximize() (Tag, error) {
|
||||
return addTags(t)
|
||||
}
|
||||
|
||||
func addTags(t Tag) (Tag, error) {
|
||||
// We leave private use identifiers alone.
|
||||
if t.IsPrivateUse() {
|
||||
return t, nil
|
||||
}
|
||||
if t.ScriptID != 0 && t.RegionID != 0 {
|
||||
if t.LangID != 0 {
|
||||
// already fully specified
|
||||
specializeRegion(&t)
|
||||
return t, nil
|
||||
}
|
||||
// Search matches for und-script-region. Note that for these cases
|
||||
// region will never be a group so there is no need to check for this.
|
||||
list := likelyRegion[t.RegionID : t.RegionID+1]
|
||||
if x := list[0]; x.flags&isList != 0 {
|
||||
list = likelyRegionList[x.lang : x.lang+uint16(x.script)]
|
||||
}
|
||||
for _, x := range list {
|
||||
// Deviating from the spec. See match_test.go for details.
|
||||
if Script(x.script) == t.ScriptID {
|
||||
t.setUndefinedLang(Language(x.lang))
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
if t.LangID != 0 {
|
||||
// Search matches for lang-script and lang-region, where lang != und.
|
||||
if t.LangID < langNoIndexOffset {
|
||||
x := likelyLang[t.LangID]
|
||||
if x.flags&isList != 0 {
|
||||
list := likelyLangList[x.region : x.region+uint16(x.script)]
|
||||
if t.ScriptID != 0 {
|
||||
for _, x := range list {
|
||||
if Script(x.script) == t.ScriptID && x.flags&scriptInFrom != 0 {
|
||||
t.setUndefinedRegion(Region(x.region))
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
} else if t.RegionID != 0 {
|
||||
count := 0
|
||||
goodScript := true
|
||||
tt := t
|
||||
for _, x := range list {
|
||||
// We visit all entries for which the script was not
|
||||
// defined, including the ones where the region was not
|
||||
// defined. This allows for proper disambiguation within
|
||||
// regions.
|
||||
if x.flags&scriptInFrom == 0 && t.RegionID.Contains(Region(x.region)) {
|
||||
tt.RegionID = Region(x.region)
|
||||
tt.setUndefinedScript(Script(x.script))
|
||||
goodScript = goodScript && tt.ScriptID == Script(x.script)
|
||||
count++
|
||||
}
|
||||
}
|
||||
if count == 1 {
|
||||
return tt, nil
|
||||
}
|
||||
// Even if we fail to find a unique Region, we might have
|
||||
// an unambiguous script.
|
||||
if goodScript {
|
||||
t.ScriptID = tt.ScriptID
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Search matches for und-script.
|
||||
if t.ScriptID != 0 {
|
||||
x := likelyScript[t.ScriptID]
|
||||
if x.region != 0 {
|
||||
t.setUndefinedRegion(Region(x.region))
|
||||
t.setUndefinedLang(Language(x.lang))
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
// Search matches for und-region. If und-script-region exists, it would
|
||||
// have been found earlier.
|
||||
if t.RegionID != 0 {
|
||||
if i := regionInclusion[t.RegionID]; i < nRegionGroups {
|
||||
x := likelyRegionGroup[i]
|
||||
if x.region != 0 {
|
||||
t.setUndefinedLang(Language(x.lang))
|
||||
t.setUndefinedScript(Script(x.script))
|
||||
t.RegionID = Region(x.region)
|
||||
}
|
||||
} else {
|
||||
x := likelyRegion[t.RegionID]
|
||||
if x.flags&isList != 0 {
|
||||
x = likelyRegionList[x.lang]
|
||||
}
|
||||
if x.script != 0 && x.flags != scriptInFrom {
|
||||
t.setUndefinedLang(Language(x.lang))
|
||||
t.setUndefinedScript(Script(x.script))
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Search matches for lang.
|
||||
if t.LangID < langNoIndexOffset {
|
||||
x := likelyLang[t.LangID]
|
||||
if x.flags&isList != 0 {
|
||||
x = likelyLangList[x.region]
|
||||
}
|
||||
if x.region != 0 {
|
||||
t.setUndefinedScript(Script(x.script))
|
||||
t.setUndefinedRegion(Region(x.region))
|
||||
}
|
||||
specializeRegion(&t)
|
||||
if t.LangID == 0 {
|
||||
t.LangID = _en // default language
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
return t, ErrMissingLikelyTagsData
|
||||
}
|
||||
|
||||
func (t *Tag) setTagsFrom(id Tag) {
|
||||
t.LangID = id.LangID
|
||||
t.ScriptID = id.ScriptID
|
||||
t.RegionID = id.RegionID
|
||||
}
|
||||
|
||||
// minimize removes the region or script subtags from t such that
|
||||
// t.addLikelySubtags() == t.minimize().addLikelySubtags().
|
||||
func (t Tag) minimize() (Tag, error) {
|
||||
t, err := minimizeTags(t)
|
||||
if err != nil {
|
||||
return t, err
|
||||
}
|
||||
t.RemakeString()
|
||||
return t, nil
|
||||
}
|
||||
|
||||
// minimizeTags mimics the behavior of the ICU 51 C implementation.
|
||||
func minimizeTags(t Tag) (Tag, error) {
|
||||
if t.equalTags(Und) {
|
||||
return t, nil
|
||||
}
|
||||
max, err := addTags(t)
|
||||
if err != nil {
|
||||
return t, err
|
||||
}
|
||||
for _, id := range [...]Tag{
|
||||
{LangID: t.LangID},
|
||||
{LangID: t.LangID, RegionID: t.RegionID},
|
||||
{LangID: t.LangID, ScriptID: t.ScriptID},
|
||||
} {
|
||||
if x, err := addTags(id); err == nil && max.equalTags(x) {
|
||||
t.setTagsFrom(id)
|
||||
break
|
||||
}
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
@@ -0,0 +1,161 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"testing"
|
||||
)
|
||||
|
||||
var verbose = flag.Bool("verbose", false, "set to true to print the internal tables of matchers")
|
||||
|
||||
func TestAddLikelySubtags(t *testing.T) {
|
||||
tests := []struct{ in, out string }{
|
||||
{"aa", "aa-Latn-ET"},
|
||||
{"aa-Latn", "aa-Latn-ET"},
|
||||
{"aa-Arab", "aa-Arab-ET"},
|
||||
{"aa-Arab-ER", "aa-Arab-ER"},
|
||||
{"kk", "kk-Cyrl-KZ"},
|
||||
{"kk-CN", "kk-Arab-CN"},
|
||||
{"cmn", "cmn"},
|
||||
{"zh-AU", "zh-Hant-AU"},
|
||||
{"zh-VN", "zh-Hant-VN"},
|
||||
{"zh-SG", "zh-Hans-SG"},
|
||||
{"zh-Hant", "zh-Hant-TW"},
|
||||
{"zh-Hani", "zh-Hani-CN"},
|
||||
{"und-Hani", "zh-Hani-CN"},
|
||||
{"und", "en-Latn-US"},
|
||||
{"und-GB", "en-Latn-GB"},
|
||||
{"und-CW", "pap-Latn-CW"},
|
||||
{"und-YT", "fr-Latn-YT"},
|
||||
{"und-Arab", "ar-Arab-EG"},
|
||||
{"und-AM", "hy-Armn-AM"},
|
||||
{"und-TW", "zh-Hant-TW"},
|
||||
{"und-002", "en-Latn-NG"},
|
||||
{"und-Latn-002", "en-Latn-NG"},
|
||||
{"en-Latn-002", "en-Latn-NG"},
|
||||
{"en-002", "en-Latn-NG"},
|
||||
{"en-001", "en-Latn-US"},
|
||||
{"und-003", "en-Latn-US"},
|
||||
{"und-GB", "en-Latn-GB"},
|
||||
{"Latn-001", "en-Latn-US"},
|
||||
{"en-001", "en-Latn-US"},
|
||||
{"es-419", "es-Latn-419"},
|
||||
{"he-145", "he-Hebr-IL"},
|
||||
{"ky-145", "ky-Latn-TR"},
|
||||
{"kk", "kk-Cyrl-KZ"},
|
||||
// Don't specialize duplicate and ambiguous matches.
|
||||
{"kk-034", "kk-Arab-034"}, // Matches IR and AF. Both are Arab.
|
||||
{"ku-145", "ku-Latn-TR"}, // Matches IQ, TR, and LB, but kk -> TR.
|
||||
{"und-Arab-CC", "ms-Arab-CC"},
|
||||
{"und-Arab-GB", "ks-Arab-GB"},
|
||||
{"und-Hans-CC", "zh-Hans-CC"},
|
||||
{"und-CC", "en-Latn-CC"},
|
||||
{"sr", "sr-Cyrl-RS"},
|
||||
{"sr-151", "sr-Latn-151"}, // Matches RO and RU.
|
||||
// We would like addLikelySubtags to generate the same results if the input
|
||||
// only changes by adding tags that would otherwise have been added
|
||||
// by the expansion.
|
||||
// In other words:
|
||||
// und-AA -> xx-Scrp-AA implies und-Scrp-AA -> xx-Scrp-AA
|
||||
// und-AA -> xx-Scrp-AA implies xx-AA -> xx-Scrp-AA
|
||||
// und-Scrp -> xx-Scrp-AA implies und-Scrp-AA -> xx-Scrp-AA
|
||||
// und-Scrp -> xx-Scrp-AA implies xx-Scrp -> xx-Scrp-AA
|
||||
// xx -> xx-Scrp-AA implies xx-Scrp -> xx-Scrp-AA
|
||||
// xx -> xx-Scrp-AA implies xx-AA -> xx-Scrp-AA
|
||||
//
|
||||
// The algorithm specified in
|
||||
// https://unicode.org/reports/tr35/tr35-9.html#Supplemental_Data,
|
||||
// Section C.10, does not handle the first case. For example,
|
||||
// the CLDR data contains an entry und-BJ -> fr-Latn-BJ, but not
|
||||
// there is no rule for und-Latn-BJ. According to spec, und-Latn-BJ
|
||||
// would expand to en-Latn-BJ, violating the aforementioned principle.
|
||||
// We deviate from the spec by letting und-Scrp-AA expand to xx-Scrp-AA
|
||||
// if a rule of the form und-AA -> xx-Scrp-AA is defined.
|
||||
// Note that as of version 23, CLDR has some explicitly specified
|
||||
// entries that do not conform to these rules. The implementation
|
||||
// will not correct these explicit inconsistencies. A later versions of CLDR
|
||||
// is supposed to fix this.
|
||||
{"und-Latn-BJ", "fr-Latn-BJ"},
|
||||
{"und-Bugi-ID", "bug-Bugi-ID"},
|
||||
// regions, scripts and languages without definitions
|
||||
{"und-Arab-AA", "ar-Arab-AA"},
|
||||
{"und-Afak-RE", "fr-Afak-RE"},
|
||||
{"und-Arab-GB", "ks-Arab-GB"},
|
||||
{"abp-Arab-GB", "abp-Arab-GB"},
|
||||
// script has preference over region
|
||||
{"und-Arab-NL", "ar-Arab-NL"},
|
||||
{"zza", "zza-Latn-TR"},
|
||||
// preserve variants and extensions
|
||||
{"de-1901", "de-Latn-DE-1901"},
|
||||
{"de-x-abc", "de-Latn-DE-x-abc"},
|
||||
{"de-1901-x-abc", "de-Latn-DE-1901-x-abc"},
|
||||
{"x-abc", "x-abc"}, // TODO: is this the desired behavior?
|
||||
}
|
||||
for i, tt := range tests {
|
||||
in, _ := Parse(tt.in)
|
||||
out, _ := Parse(tt.out)
|
||||
in, _ = in.addLikelySubtags()
|
||||
if in.String() != out.String() {
|
||||
t.Errorf("%d: add(%s) was %s; want %s", i, tt.in, in, tt.out)
|
||||
}
|
||||
}
|
||||
}
|
||||
func TestMinimize(t *testing.T) {
|
||||
tests := []struct{ in, out string }{
|
||||
{"aa", "aa"},
|
||||
{"aa-Latn", "aa"},
|
||||
{"aa-Latn-ET", "aa"},
|
||||
{"aa-ET", "aa"},
|
||||
{"aa-Arab", "aa-Arab"},
|
||||
{"aa-Arab-ER", "aa-Arab-ER"},
|
||||
{"aa-Arab-ET", "aa-Arab"},
|
||||
{"und", "und"},
|
||||
{"und-Latn", "und"},
|
||||
{"und-Latn-US", "und"},
|
||||
{"en-Latn-US", "en"},
|
||||
{"cmn", "cmn"},
|
||||
{"cmn-Hans", "cmn-Hans"},
|
||||
{"cmn-Hant", "cmn-Hant"},
|
||||
{"zh-AU", "zh-AU"},
|
||||
{"zh-VN", "zh-VN"},
|
||||
{"zh-SG", "zh-SG"},
|
||||
{"zh-Hant", "zh-Hant"},
|
||||
{"zh-Hant-TW", "zh-TW"},
|
||||
{"zh-Hans", "zh"},
|
||||
{"zh-Hani", "zh-Hani"},
|
||||
{"und-Hans", "und-Hans"},
|
||||
{"und-Hani", "und-Hani"},
|
||||
|
||||
{"und-CW", "und-CW"},
|
||||
{"und-YT", "und-YT"},
|
||||
{"und-Arab", "und-Arab"},
|
||||
{"und-AM", "und-AM"},
|
||||
{"und-Arab-CC", "und-Arab-CC"},
|
||||
{"und-CC", "und-CC"},
|
||||
{"und-Latn-BJ", "und-BJ"},
|
||||
{"und-Bugi-ID", "und-Bugi"},
|
||||
{"bug-Bugi-ID", "bug-Bugi"},
|
||||
// regions, scripts and languages without definitions
|
||||
{"und-Arab-AA", "und-Arab-AA"},
|
||||
// preserve variants and extensions
|
||||
{"de-Latn-1901", "de-1901"},
|
||||
{"de-Latn-x-abc", "de-x-abc"},
|
||||
{"de-DE-1901-x-abc", "de-1901-x-abc"},
|
||||
{"x-abc", "x-abc"}, // TODO: is this the desired behavior?
|
||||
}
|
||||
for i, tt := range tests {
|
||||
in, _ := Parse(tt.in)
|
||||
out, _ := Parse(tt.out)
|
||||
min, _ := in.minimize()
|
||||
if min.String() != out.String() {
|
||||
t.Errorf("%d: min(%s) was %s; want %s", i, tt.in, min, tt.out)
|
||||
}
|
||||
max, _ := min.addLikelySubtags()
|
||||
if x, _ := in.addLikelySubtags(); x.String() != max.String() {
|
||||
t.Errorf("%d: max(min(%s)) = %s; want %s", i, tt.in, max, x)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,608 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
"golang.org/x/text/internal/tag"
|
||||
)
|
||||
|
||||
// isAlpha returns true if the byte is not a digit.
|
||||
// b must be an ASCII letter or digit.
|
||||
func isAlpha(b byte) bool {
|
||||
return b > '9'
|
||||
}
|
||||
|
||||
// isAlphaNum returns true if the string contains only ASCII letters or digits.
|
||||
func isAlphaNum(s []byte) bool {
|
||||
for _, c := range s {
|
||||
if !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9') {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// ErrSyntax is returned by any of the parsing functions when the
|
||||
// input is not well-formed, according to BCP 47.
|
||||
// TODO: return the position at which the syntax error occurred?
|
||||
var ErrSyntax = errors.New("language: tag is not well-formed")
|
||||
|
||||
// ErrDuplicateKey is returned when a tag contains the same key twice with
|
||||
// different values in the -u section.
|
||||
var ErrDuplicateKey = errors.New("language: different values for same key in -u extension")
|
||||
|
||||
// ValueError is returned by any of the parsing functions when the
|
||||
// input is well-formed but the respective subtag is not recognized
|
||||
// as a valid value.
|
||||
type ValueError struct {
|
||||
v [8]byte
|
||||
}
|
||||
|
||||
// NewValueError creates a new ValueError.
|
||||
func NewValueError(tag []byte) ValueError {
|
||||
var e ValueError
|
||||
copy(e.v[:], tag)
|
||||
return e
|
||||
}
|
||||
|
||||
func (e ValueError) tag() []byte {
|
||||
n := bytes.IndexByte(e.v[:], 0)
|
||||
if n == -1 {
|
||||
n = 8
|
||||
}
|
||||
return e.v[:n]
|
||||
}
|
||||
|
||||
// Error implements the error interface.
|
||||
func (e ValueError) Error() string {
|
||||
return fmt.Sprintf("language: subtag %q is well-formed but unknown", e.tag())
|
||||
}
|
||||
|
||||
// Subtag returns the subtag for which the error occurred.
|
||||
func (e ValueError) Subtag() string {
|
||||
return string(e.tag())
|
||||
}
|
||||
|
||||
// scanner is used to scan BCP 47 tokens, which are separated by _ or -.
|
||||
type scanner struct {
|
||||
b []byte
|
||||
bytes [max99thPercentileSize]byte
|
||||
token []byte
|
||||
start int // start position of the current token
|
||||
end int // end position of the current token
|
||||
next int // next point for scan
|
||||
err error
|
||||
done bool
|
||||
}
|
||||
|
||||
func makeScannerString(s string) scanner {
|
||||
scan := scanner{}
|
||||
if len(s) <= len(scan.bytes) {
|
||||
scan.b = scan.bytes[:copy(scan.bytes[:], s)]
|
||||
} else {
|
||||
scan.b = []byte(s)
|
||||
}
|
||||
scan.init()
|
||||
return scan
|
||||
}
|
||||
|
||||
// makeScanner returns a scanner using b as the input buffer.
|
||||
// b is not copied and may be modified by the scanner routines.
|
||||
func makeScanner(b []byte) scanner {
|
||||
scan := scanner{b: b}
|
||||
scan.init()
|
||||
return scan
|
||||
}
|
||||
|
||||
func (s *scanner) init() {
|
||||
for i, c := range s.b {
|
||||
if c == '_' {
|
||||
s.b[i] = '-'
|
||||
}
|
||||
}
|
||||
s.scan()
|
||||
}
|
||||
|
||||
// restToLower converts the string between start and end to lower case.
|
||||
func (s *scanner) toLower(start, end int) {
|
||||
for i := start; i < end; i++ {
|
||||
c := s.b[i]
|
||||
if 'A' <= c && c <= 'Z' {
|
||||
s.b[i] += 'a' - 'A'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *scanner) setError(e error) {
|
||||
if s.err == nil || (e == ErrSyntax && s.err != ErrSyntax) {
|
||||
s.err = e
|
||||
}
|
||||
}
|
||||
|
||||
// resizeRange shrinks or grows the array at position oldStart such that
|
||||
// a new string of size newSize can fit between oldStart and oldEnd.
|
||||
// Sets the scan point to after the resized range.
|
||||
func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) {
|
||||
s.start = oldStart
|
||||
if end := oldStart + newSize; end != oldEnd {
|
||||
diff := end - oldEnd
|
||||
var b []byte
|
||||
if n := len(s.b) + diff; n > cap(s.b) {
|
||||
b = make([]byte, n)
|
||||
copy(b, s.b[:oldStart])
|
||||
} else {
|
||||
b = s.b[:n]
|
||||
}
|
||||
copy(b[end:], s.b[oldEnd:])
|
||||
s.b = b
|
||||
s.next = end + (s.next - s.end)
|
||||
s.end = end
|
||||
}
|
||||
}
|
||||
|
||||
// replace replaces the current token with repl.
|
||||
func (s *scanner) replace(repl string) {
|
||||
s.resizeRange(s.start, s.end, len(repl))
|
||||
copy(s.b[s.start:], repl)
|
||||
}
|
||||
|
||||
// gobble removes the current token from the input.
|
||||
// Caller must call scan after calling gobble.
|
||||
func (s *scanner) gobble(e error) {
|
||||
s.setError(e)
|
||||
if s.start == 0 {
|
||||
s.b = s.b[:+copy(s.b, s.b[s.next:])]
|
||||
s.end = 0
|
||||
} else {
|
||||
s.b = s.b[:s.start-1+copy(s.b[s.start-1:], s.b[s.end:])]
|
||||
s.end = s.start - 1
|
||||
}
|
||||
s.next = s.start
|
||||
}
|
||||
|
||||
// deleteRange removes the given range from s.b before the current token.
|
||||
func (s *scanner) deleteRange(start, end int) {
|
||||
s.b = s.b[:start+copy(s.b[start:], s.b[end:])]
|
||||
diff := end - start
|
||||
s.next -= diff
|
||||
s.start -= diff
|
||||
s.end -= diff
|
||||
}
|
||||
|
||||
// scan parses the next token of a BCP 47 string. Tokens that are larger
|
||||
// than 8 characters or include non-alphanumeric characters result in an error
|
||||
// and are gobbled and removed from the output.
|
||||
// It returns the end position of the last token consumed.
|
||||
func (s *scanner) scan() (end int) {
|
||||
end = s.end
|
||||
s.token = nil
|
||||
for s.start = s.next; s.next < len(s.b); {
|
||||
i := bytes.IndexByte(s.b[s.next:], '-')
|
||||
if i == -1 {
|
||||
s.end = len(s.b)
|
||||
s.next = len(s.b)
|
||||
i = s.end - s.start
|
||||
} else {
|
||||
s.end = s.next + i
|
||||
s.next = s.end + 1
|
||||
}
|
||||
token := s.b[s.start:s.end]
|
||||
if i < 1 || i > 8 || !isAlphaNum(token) {
|
||||
s.gobble(ErrSyntax)
|
||||
continue
|
||||
}
|
||||
s.token = token
|
||||
return end
|
||||
}
|
||||
if n := len(s.b); n > 0 && s.b[n-1] == '-' {
|
||||
s.setError(ErrSyntax)
|
||||
s.b = s.b[:len(s.b)-1]
|
||||
}
|
||||
s.done = true
|
||||
return end
|
||||
}
|
||||
|
||||
// acceptMinSize parses multiple tokens of the given size or greater.
|
||||
// It returns the end position of the last token consumed.
|
||||
func (s *scanner) acceptMinSize(min int) (end int) {
|
||||
end = s.end
|
||||
s.scan()
|
||||
for ; len(s.token) >= min; s.scan() {
|
||||
end = s.end
|
||||
}
|
||||
return end
|
||||
}
|
||||
|
||||
// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
|
||||
// failed it returns an error and any part of the tag that could be parsed.
|
||||
// If parsing succeeded but an unknown value was found, it returns
|
||||
// ValueError. The Tag returned in this case is just stripped of the unknown
|
||||
// value. All other values are preserved. It accepts tags in the BCP 47 format
|
||||
// and extensions to this standard defined in
|
||||
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
func Parse(s string) (t Tag, err error) {
|
||||
// TODO: consider supporting old-style locale key-value pairs.
|
||||
if s == "" {
|
||||
return Und, ErrSyntax
|
||||
}
|
||||
defer func() {
|
||||
if recover() != nil {
|
||||
t = Und
|
||||
err = ErrSyntax
|
||||
return
|
||||
}
|
||||
}()
|
||||
if len(s) <= maxAltTaglen {
|
||||
b := [maxAltTaglen]byte{}
|
||||
for i, c := range s {
|
||||
// Generating invalid UTF-8 is okay as it won't match.
|
||||
if 'A' <= c && c <= 'Z' {
|
||||
c += 'a' - 'A'
|
||||
} else if c == '_' {
|
||||
c = '-'
|
||||
}
|
||||
b[i] = byte(c)
|
||||
}
|
||||
if t, ok := grandfathered(b); ok {
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
scan := makeScannerString(s)
|
||||
return parse(&scan, s)
|
||||
}
|
||||
|
||||
func parse(scan *scanner, s string) (t Tag, err error) {
|
||||
t = Und
|
||||
var end int
|
||||
if n := len(scan.token); n <= 1 {
|
||||
scan.toLower(0, len(scan.b))
|
||||
if n == 0 || scan.token[0] != 'x' {
|
||||
return t, ErrSyntax
|
||||
}
|
||||
end = parseExtensions(scan)
|
||||
} else if n >= 4 {
|
||||
return Und, ErrSyntax
|
||||
} else { // the usual case
|
||||
t, end = parseTag(scan, true)
|
||||
if n := len(scan.token); n == 1 {
|
||||
t.pExt = uint16(end)
|
||||
end = parseExtensions(scan)
|
||||
} else if end < len(scan.b) {
|
||||
scan.setError(ErrSyntax)
|
||||
scan.b = scan.b[:end]
|
||||
}
|
||||
}
|
||||
if int(t.pVariant) < len(scan.b) {
|
||||
if end < len(s) {
|
||||
s = s[:end]
|
||||
}
|
||||
if len(s) > 0 && tag.Compare(s, scan.b) == 0 {
|
||||
t.str = s
|
||||
} else {
|
||||
t.str = string(scan.b)
|
||||
}
|
||||
} else {
|
||||
t.pVariant, t.pExt = 0, 0
|
||||
}
|
||||
return t, scan.err
|
||||
}
|
||||
|
||||
// parseTag parses language, script, region and variants.
|
||||
// It returns a Tag and the end position in the input that was parsed.
|
||||
// If doNorm is true, then <lang>-<extlang> will be normalized to <extlang>.
|
||||
func parseTag(scan *scanner, doNorm bool) (t Tag, end int) {
|
||||
var e error
|
||||
// TODO: set an error if an unknown lang, script or region is encountered.
|
||||
t.LangID, e = getLangID(scan.token)
|
||||
scan.setError(e)
|
||||
scan.replace(t.LangID.String())
|
||||
langStart := scan.start
|
||||
end = scan.scan()
|
||||
for len(scan.token) == 3 && isAlpha(scan.token[0]) {
|
||||
// From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent
|
||||
// to a tag of the form <extlang>.
|
||||
if doNorm {
|
||||
lang, e := getLangID(scan.token)
|
||||
if lang != 0 {
|
||||
t.LangID = lang
|
||||
langStr := lang.String()
|
||||
copy(scan.b[langStart:], langStr)
|
||||
scan.b[langStart+len(langStr)] = '-'
|
||||
scan.start = langStart + len(langStr) + 1
|
||||
}
|
||||
scan.gobble(e)
|
||||
}
|
||||
end = scan.scan()
|
||||
}
|
||||
if len(scan.token) == 4 && isAlpha(scan.token[0]) {
|
||||
t.ScriptID, e = getScriptID(script, scan.token)
|
||||
if t.ScriptID == 0 {
|
||||
scan.gobble(e)
|
||||
}
|
||||
end = scan.scan()
|
||||
}
|
||||
if n := len(scan.token); n >= 2 && n <= 3 {
|
||||
t.RegionID, e = getRegionID(scan.token)
|
||||
if t.RegionID == 0 {
|
||||
scan.gobble(e)
|
||||
} else {
|
||||
scan.replace(t.RegionID.String())
|
||||
}
|
||||
end = scan.scan()
|
||||
}
|
||||
scan.toLower(scan.start, len(scan.b))
|
||||
t.pVariant = byte(end)
|
||||
end = parseVariants(scan, end, t)
|
||||
t.pExt = uint16(end)
|
||||
return t, end
|
||||
}
|
||||
|
||||
var separator = []byte{'-'}
|
||||
|
||||
// parseVariants scans tokens as long as each token is a valid variant string.
|
||||
// Duplicate variants are removed.
|
||||
func parseVariants(scan *scanner, end int, t Tag) int {
|
||||
start := scan.start
|
||||
varIDBuf := [4]uint8{}
|
||||
variantBuf := [4][]byte{}
|
||||
varID := varIDBuf[:0]
|
||||
variant := variantBuf[:0]
|
||||
last := -1
|
||||
needSort := false
|
||||
for ; len(scan.token) >= 4; scan.scan() {
|
||||
// TODO: measure the impact of needing this conversion and redesign
|
||||
// the data structure if there is an issue.
|
||||
v, ok := variantIndex[string(scan.token)]
|
||||
if !ok {
|
||||
// unknown variant
|
||||
// TODO: allow user-defined variants?
|
||||
scan.gobble(NewValueError(scan.token))
|
||||
continue
|
||||
}
|
||||
varID = append(varID, v)
|
||||
variant = append(variant, scan.token)
|
||||
if !needSort {
|
||||
if last < int(v) {
|
||||
last = int(v)
|
||||
} else {
|
||||
needSort = true
|
||||
// There is no legal combinations of more than 7 variants
|
||||
// (and this is by no means a useful sequence).
|
||||
const maxVariants = 8
|
||||
if len(varID) > maxVariants {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
end = scan.end
|
||||
}
|
||||
if needSort {
|
||||
sort.Sort(variantsSort{varID, variant})
|
||||
k, l := 0, -1
|
||||
for i, v := range varID {
|
||||
w := int(v)
|
||||
if l == w {
|
||||
// Remove duplicates.
|
||||
continue
|
||||
}
|
||||
varID[k] = varID[i]
|
||||
variant[k] = variant[i]
|
||||
k++
|
||||
l = w
|
||||
}
|
||||
if str := bytes.Join(variant[:k], separator); len(str) == 0 {
|
||||
end = start - 1
|
||||
} else {
|
||||
scan.resizeRange(start, end, len(str))
|
||||
copy(scan.b[scan.start:], str)
|
||||
end = scan.end
|
||||
}
|
||||
}
|
||||
return end
|
||||
}
|
||||
|
||||
type variantsSort struct {
|
||||
i []uint8
|
||||
v [][]byte
|
||||
}
|
||||
|
||||
func (s variantsSort) Len() int {
|
||||
return len(s.i)
|
||||
}
|
||||
|
||||
func (s variantsSort) Swap(i, j int) {
|
||||
s.i[i], s.i[j] = s.i[j], s.i[i]
|
||||
s.v[i], s.v[j] = s.v[j], s.v[i]
|
||||
}
|
||||
|
||||
func (s variantsSort) Less(i, j int) bool {
|
||||
return s.i[i] < s.i[j]
|
||||
}
|
||||
|
||||
type bytesSort struct {
|
||||
b [][]byte
|
||||
n int // first n bytes to compare
|
||||
}
|
||||
|
||||
func (b bytesSort) Len() int {
|
||||
return len(b.b)
|
||||
}
|
||||
|
||||
func (b bytesSort) Swap(i, j int) {
|
||||
b.b[i], b.b[j] = b.b[j], b.b[i]
|
||||
}
|
||||
|
||||
func (b bytesSort) Less(i, j int) bool {
|
||||
for k := 0; k < b.n; k++ {
|
||||
if b.b[i][k] == b.b[j][k] {
|
||||
continue
|
||||
}
|
||||
return b.b[i][k] < b.b[j][k]
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// parseExtensions parses and normalizes the extensions in the buffer.
|
||||
// It returns the last position of scan.b that is part of any extension.
|
||||
// It also trims scan.b to remove excess parts accordingly.
|
||||
func parseExtensions(scan *scanner) int {
|
||||
start := scan.start
|
||||
exts := [][]byte{}
|
||||
private := []byte{}
|
||||
end := scan.end
|
||||
for len(scan.token) == 1 {
|
||||
extStart := scan.start
|
||||
ext := scan.token[0]
|
||||
end = parseExtension(scan)
|
||||
extension := scan.b[extStart:end]
|
||||
if len(extension) < 3 || (ext != 'x' && len(extension) < 4) {
|
||||
scan.setError(ErrSyntax)
|
||||
end = extStart
|
||||
continue
|
||||
} else if start == extStart && (ext == 'x' || scan.start == len(scan.b)) {
|
||||
scan.b = scan.b[:end]
|
||||
return end
|
||||
} else if ext == 'x' {
|
||||
private = extension
|
||||
break
|
||||
}
|
||||
exts = append(exts, extension)
|
||||
}
|
||||
sort.Sort(bytesSort{exts, 1})
|
||||
if len(private) > 0 {
|
||||
exts = append(exts, private)
|
||||
}
|
||||
scan.b = scan.b[:start]
|
||||
if len(exts) > 0 {
|
||||
scan.b = append(scan.b, bytes.Join(exts, separator)...)
|
||||
} else if start > 0 {
|
||||
// Strip trailing '-'.
|
||||
scan.b = scan.b[:start-1]
|
||||
}
|
||||
return end
|
||||
}
|
||||
|
||||
// parseExtension parses a single extension and returns the position of
|
||||
// the extension end.
|
||||
func parseExtension(scan *scanner) int {
|
||||
start, end := scan.start, scan.end
|
||||
switch scan.token[0] {
|
||||
case 'u': // https://www.ietf.org/rfc/rfc6067.txt
|
||||
attrStart := end
|
||||
scan.scan()
|
||||
for last := []byte{}; len(scan.token) > 2; scan.scan() {
|
||||
if bytes.Compare(scan.token, last) != -1 {
|
||||
// Attributes are unsorted. Start over from scratch.
|
||||
p := attrStart + 1
|
||||
scan.next = p
|
||||
attrs := [][]byte{}
|
||||
for scan.scan(); len(scan.token) > 2; scan.scan() {
|
||||
attrs = append(attrs, scan.token)
|
||||
end = scan.end
|
||||
}
|
||||
sort.Sort(bytesSort{attrs, 3})
|
||||
copy(scan.b[p:], bytes.Join(attrs, separator))
|
||||
break
|
||||
}
|
||||
last = scan.token
|
||||
end = scan.end
|
||||
}
|
||||
// Scan key-type sequences. A key is of length 2 and may be followed
|
||||
// by 0 or more "type" subtags from 3 to the maximum of 8 letters.
|
||||
var last, key []byte
|
||||
for attrEnd := end; len(scan.token) == 2; last = key {
|
||||
key = scan.token
|
||||
end = scan.end
|
||||
for scan.scan(); end < scan.end && len(scan.token) > 2; scan.scan() {
|
||||
end = scan.end
|
||||
}
|
||||
// TODO: check key value validity
|
||||
if bytes.Compare(key, last) != 1 || scan.err != nil {
|
||||
// We have an invalid key or the keys are not sorted.
|
||||
// Start scanning keys from scratch and reorder.
|
||||
p := attrEnd + 1
|
||||
scan.next = p
|
||||
keys := [][]byte{}
|
||||
for scan.scan(); len(scan.token) == 2; {
|
||||
keyStart := scan.start
|
||||
end = scan.end
|
||||
for scan.scan(); end < scan.end && len(scan.token) > 2; scan.scan() {
|
||||
end = scan.end
|
||||
}
|
||||
keys = append(keys, scan.b[keyStart:end])
|
||||
}
|
||||
sort.Stable(bytesSort{keys, 2})
|
||||
if n := len(keys); n > 0 {
|
||||
k := 0
|
||||
for i := 1; i < n; i++ {
|
||||
if !bytes.Equal(keys[k][:2], keys[i][:2]) {
|
||||
k++
|
||||
keys[k] = keys[i]
|
||||
} else if !bytes.Equal(keys[k], keys[i]) {
|
||||
scan.setError(ErrDuplicateKey)
|
||||
}
|
||||
}
|
||||
keys = keys[:k+1]
|
||||
}
|
||||
reordered := bytes.Join(keys, separator)
|
||||
if e := p + len(reordered); e < end {
|
||||
scan.deleteRange(e, end)
|
||||
end = e
|
||||
}
|
||||
copy(scan.b[p:], reordered)
|
||||
break
|
||||
}
|
||||
}
|
||||
case 't': // https://www.ietf.org/rfc/rfc6497.txt
|
||||
scan.scan()
|
||||
if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
|
||||
_, end = parseTag(scan, false)
|
||||
scan.toLower(start, end)
|
||||
}
|
||||
for len(scan.token) == 2 && !isAlpha(scan.token[1]) {
|
||||
end = scan.acceptMinSize(3)
|
||||
}
|
||||
case 'x':
|
||||
end = scan.acceptMinSize(1)
|
||||
default:
|
||||
end = scan.acceptMinSize(2)
|
||||
}
|
||||
return end
|
||||
}
|
||||
|
||||
// getExtension returns the name, body and end position of the extension.
|
||||
func getExtension(s string, p int) (end int, ext string) {
|
||||
if s[p] == '-' {
|
||||
p++
|
||||
}
|
||||
if s[p] == 'x' {
|
||||
return len(s), s[p:]
|
||||
}
|
||||
end = nextExtension(s, p)
|
||||
return end, s[p:end]
|
||||
}
|
||||
|
||||
// nextExtension finds the next extension within the string, searching
|
||||
// for the -<char>- pattern from position p.
|
||||
// In the fast majority of cases, language tags will have at most
|
||||
// one extension and extensions tend to be small.
|
||||
func nextExtension(s string, p int) int {
|
||||
for n := len(s) - 3; p < n; {
|
||||
if s[p] == '-' {
|
||||
if s[p+2] == '-' {
|
||||
return p
|
||||
}
|
||||
p += 3
|
||||
} else {
|
||||
p++
|
||||
}
|
||||
}
|
||||
return len(s)
|
||||
}
|
||||
@@ -0,0 +1,371 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/internal/tag"
|
||||
)
|
||||
|
||||
type scanTest struct {
|
||||
ok bool // true if scanning does not result in an error
|
||||
in string
|
||||
tok []string // the expected tokens
|
||||
}
|
||||
|
||||
var tests = []scanTest{
|
||||
{true, "", []string{}},
|
||||
{true, "1", []string{"1"}},
|
||||
{true, "en", []string{"en"}},
|
||||
{true, "root", []string{"root"}},
|
||||
{true, "maxchars", []string{"maxchars"}},
|
||||
{false, "bad/", []string{}},
|
||||
{false, "morethan8", []string{}},
|
||||
{false, "-", []string{}},
|
||||
{false, "----", []string{}},
|
||||
{false, "_", []string{}},
|
||||
{true, "en-US", []string{"en", "US"}},
|
||||
{true, "en_US", []string{"en", "US"}},
|
||||
{false, "en-US-", []string{"en", "US"}},
|
||||
{false, "en-US--", []string{"en", "US"}},
|
||||
{false, "en-US---", []string{"en", "US"}},
|
||||
{false, "en--US", []string{"en", "US"}},
|
||||
{false, "-en-US", []string{"en", "US"}},
|
||||
{false, "-en--US-", []string{"en", "US"}},
|
||||
{false, "-en--US-", []string{"en", "US"}},
|
||||
{false, "en-.-US", []string{"en", "US"}},
|
||||
{false, ".-en--US-.", []string{"en", "US"}},
|
||||
{false, "en-u.-US", []string{"en", "US"}},
|
||||
{true, "en-u1-US", []string{"en", "u1", "US"}},
|
||||
{true, "maxchar1_maxchar2-maxchar3", []string{"maxchar1", "maxchar2", "maxchar3"}},
|
||||
{false, "moreThan8-moreThan8-e", []string{"e"}},
|
||||
}
|
||||
|
||||
func TestScan(t *testing.T) {
|
||||
for i, tt := range tests {
|
||||
scan := makeScannerString(tt.in)
|
||||
for j := 0; !scan.done; j++ {
|
||||
if j >= len(tt.tok) {
|
||||
t.Errorf("%d: extra token %q", i, scan.token)
|
||||
} else if tag.Compare(tt.tok[j], scan.token) != 0 {
|
||||
t.Errorf("%d: token %d: found %q; want %q", i, j, scan.token, tt.tok[j])
|
||||
break
|
||||
}
|
||||
scan.scan()
|
||||
}
|
||||
if s := strings.Join(tt.tok, "-"); tag.Compare(s, bytes.Replace(scan.b, b("_"), b("-"), -1)) != 0 {
|
||||
t.Errorf("%d: input: found %q; want %q", i, scan.b, s)
|
||||
}
|
||||
if (scan.err == nil) != tt.ok {
|
||||
t.Errorf("%d: ok: found %v; want %v", i, scan.err == nil, tt.ok)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestAcceptMinSize(t *testing.T) {
|
||||
for i, tt := range tests {
|
||||
// count number of successive tokens with a minimum size.
|
||||
for sz := 1; sz <= 8; sz++ {
|
||||
scan := makeScannerString(tt.in)
|
||||
scan.end, scan.next = 0, 0
|
||||
end := scan.acceptMinSize(sz)
|
||||
n := 0
|
||||
for i := 0; i < len(tt.tok) && len(tt.tok[i]) >= sz; i++ {
|
||||
n += len(tt.tok[i])
|
||||
if i > 0 {
|
||||
n++
|
||||
}
|
||||
}
|
||||
if end != n {
|
||||
t.Errorf("%d:%d: found len %d; want %d", i, sz, end, n)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type parseTest struct {
|
||||
i int // the index of this test
|
||||
in string
|
||||
lang, script, region string
|
||||
variants, ext string
|
||||
extList []string // only used when more than one extension is present
|
||||
invalid bool
|
||||
rewrite bool // special rewrite not handled by parseTag
|
||||
changed bool // string needed to be reformatted
|
||||
}
|
||||
|
||||
func parseTests() []parseTest {
|
||||
tests := []parseTest{
|
||||
{in: "root", lang: "und"},
|
||||
{in: "und", lang: "und"},
|
||||
{in: "en", lang: "en"},
|
||||
{in: "xy", lang: "und", invalid: true},
|
||||
{in: "en-ZY", lang: "en", invalid: true},
|
||||
{in: "gsw", lang: "gsw"},
|
||||
{in: "sr_Latn", lang: "sr", script: "Latn"},
|
||||
{in: "af-Arab", lang: "af", script: "Arab"},
|
||||
{in: "nl-BE", lang: "nl", region: "BE"},
|
||||
{in: "es-419", lang: "es", region: "419"},
|
||||
{in: "und-001", lang: "und", region: "001"},
|
||||
{in: "de-latn-be", lang: "de", script: "Latn", region: "BE"},
|
||||
// Variants
|
||||
{in: "de-1901", lang: "de", variants: "1901"},
|
||||
// Accept with unsuppressed script.
|
||||
{in: "de-Latn-1901", lang: "de", script: "Latn", variants: "1901"},
|
||||
// Specialized.
|
||||
{in: "sl-rozaj", lang: "sl", variants: "rozaj"},
|
||||
{in: "sl-rozaj-lipaw", lang: "sl", variants: "rozaj-lipaw"},
|
||||
{in: "sl-rozaj-biske", lang: "sl", variants: "rozaj-biske"},
|
||||
{in: "sl-rozaj-biske-1994", lang: "sl", variants: "rozaj-biske-1994"},
|
||||
{in: "sl-rozaj-1994", lang: "sl", variants: "rozaj-1994"},
|
||||
// Maximum number of variants while adhering to prefix rules.
|
||||
{in: "sl-rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp"},
|
||||
|
||||
// Sorting.
|
||||
{in: "sl-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true},
|
||||
{in: "sl-rozaj-biske-1994-alalc97-fonupa-fonipa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", changed: true},
|
||||
{in: "nl-fonxsamp-alalc97-fonipa-fonupa", lang: "nl", variants: "alalc97-fonipa-fonupa-fonxsamp", changed: true},
|
||||
|
||||
// Duplicates variants are removed, but not an error.
|
||||
{in: "nl-fonupa-fonupa", lang: "nl", variants: "fonupa"},
|
||||
|
||||
// Variants that do not have correct prefixes. We still accept these.
|
||||
{in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"},
|
||||
{in: "sl-rozaj-lipaw-1994", lang: "sl", variants: "rozaj-lipaw-1994"},
|
||||
{in: "sl-1994-biske-rozaj-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true},
|
||||
{in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"},
|
||||
|
||||
// Invalid variant.
|
||||
{in: "de-1902", lang: "de", variants: "", invalid: true},
|
||||
|
||||
{in: "EN_CYRL", lang: "en", script: "Cyrl"},
|
||||
// private use and extensions
|
||||
{in: "x-a-b-c-d", ext: "x-a-b-c-d"},
|
||||
{in: "x_A.-B-C_D", ext: "x-b-c-d", invalid: true, changed: true},
|
||||
{in: "x-aa-bbbb-cccccccc-d", ext: "x-aa-bbbb-cccccccc-d"},
|
||||
{in: "en-c_cc-b-bbb-a-aaa", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc"}},
|
||||
{in: "en-x_cc-b-bbb-a-aaa", lang: "en", ext: "x-cc-b-bbb-a-aaa", changed: true},
|
||||
{in: "en-c_cc-b-bbb-a-aaa-x-x", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc", "x-x"}},
|
||||
{in: "en-v-c", lang: "en", ext: "", invalid: true},
|
||||
{in: "en-v-abcdefghi", lang: "en", ext: "", invalid: true},
|
||||
{in: "en-v-abc-x", lang: "en", ext: "v-abc", invalid: true},
|
||||
{in: "en-v-abc-x-", lang: "en", ext: "v-abc", invalid: true},
|
||||
{in: "en-v-abc-w-x-xx", lang: "en", extList: []string{"v-abc", "x-xx"}, invalid: true, changed: true},
|
||||
{in: "en-v-abc-w-y-yx", lang: "en", extList: []string{"v-abc", "y-yx"}, invalid: true, changed: true},
|
||||
{in: "en-v-c-abc", lang: "en", ext: "c-abc", invalid: true, changed: true},
|
||||
{in: "en-v-w-abc", lang: "en", ext: "w-abc", invalid: true, changed: true},
|
||||
{in: "en-v-x-abc", lang: "en", ext: "x-abc", invalid: true, changed: true},
|
||||
{in: "en-v-x-a", lang: "en", ext: "x-a", invalid: true, changed: true},
|
||||
{in: "en-9-aa-0-aa-z-bb-x-a", lang: "en", extList: []string{"0-aa", "9-aa", "z-bb", "x-a"}, changed: true},
|
||||
{in: "en-u-c", lang: "en", ext: "", invalid: true},
|
||||
{in: "en-u-co-phonebk", lang: "en", ext: "u-co-phonebk"},
|
||||
{in: "en-u-co-phonebk-ca", lang: "en", ext: "u-ca-co-phonebk", changed: true},
|
||||
{in: "en-u-nu-arabic-co-phonebk-ca", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", changed: true},
|
||||
{in: "en-u-nu-arabic-co-phonebk-ca-x", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true},
|
||||
{in: "en-u-nu-arabic-co-phonebk-ca-s", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true},
|
||||
{in: "en-u-nu-arabic-co-phonebk-ca-a12345678", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true},
|
||||
{in: "en-u-co-phonebook", lang: "en", ext: "u-co", invalid: true},
|
||||
{in: "en-u-co-phonebook-cu-xau", lang: "en", ext: "u-co-cu-xau", invalid: true, changed: true},
|
||||
{in: "en-Cyrl-u-co-phonebk", lang: "en", script: "Cyrl", ext: "u-co-phonebk"},
|
||||
{in: "en-US-u-co-phonebk", lang: "en", region: "US", ext: "u-co-phonebk"},
|
||||
{in: "en-US-u-co-phonebk-cu-xau", lang: "en", region: "US", ext: "u-co-phonebk-cu-xau"},
|
||||
{in: "en-scotland-u-co-phonebk", lang: "en", variants: "scotland", ext: "u-co-phonebk"},
|
||||
{in: "en-u-cu-xua-co-phonebk", lang: "en", ext: "u-co-phonebk-cu-xua", changed: true},
|
||||
{in: "en-u-def-abc-cu-xua-co-phonebk", lang: "en", ext: "u-abc-def-co-phonebk-cu-xua", changed: true},
|
||||
{in: "en-u-def-abc", lang: "en", ext: "u-abc-def", changed: true},
|
||||
{in: "en-u-cu-xua-co-phonebk-a-cd", lang: "en", extList: []string{"a-cd", "u-co-phonebk-cu-xua"}, changed: true},
|
||||
{in: "en-u-cu-co-phonebk", lang: "en", extList: []string{"u-co-phonebk-cu"}, changed: true},
|
||||
{in: "en-u-cu-xau-co", lang: "en", extList: []string{"u-co-cu-xau"}, changed: true},
|
||||
// LDML spec is not specific about it, but remove duplicates and return an error if the values differ.
|
||||
{in: "en-u-cu-xau-co-phonebk-cu-xau", lang: "en", ext: "u-co-phonebk-cu-xau", changed: true},
|
||||
// No change as the result is a substring of the original!
|
||||
{in: "en-US-u-cu-xau-cu-eur", lang: "en", region: "US", ext: "u-cu-xau", invalid: true, changed: false},
|
||||
{in: "en-t-en-Cyrl-NL-fonipa", lang: "en", ext: "t-en-cyrl-nl-fonipa", changed: true},
|
||||
{in: "en-t-en-Cyrl-NL-fonipa-t0-abc-def", lang: "en", ext: "t-en-cyrl-nl-fonipa-t0-abc-def", changed: true},
|
||||
{in: "en-t-t0-abcd", lang: "en", ext: "t-t0-abcd"},
|
||||
// Not necessary to have changed here.
|
||||
{in: "en-t-nl-abcd", lang: "en", ext: "t-nl", invalid: true},
|
||||
{in: "en-t-nl-latn", lang: "en", ext: "t-nl-latn"},
|
||||
{in: "en-t-t0-abcd-x-a", lang: "en", extList: []string{"t-t0-abcd", "x-a"}},
|
||||
{in: "en_t_pt_MLt", lang: "en", ext: "t-pt-mlt", changed: true},
|
||||
{in: "en-t-fr-est", lang: "en", ext: "t-fr-est", changed: false},
|
||||
{in: "fr-est", lang: "et", changed: false},
|
||||
{in: "fr-est-Cyrl", lang: "et", script: "Cyrl", changed: false},
|
||||
// The same input here is used in both TestParse and TestParseExtensions.
|
||||
// changed should be true for this input in TestParse but changed should be false for this input in TestParseExtensions
|
||||
// because the entire input has been reformatted but the extension part hasn't.
|
||||
// {in: "fr-est-t-fr-est", lang: "et", ext: "t-fr-est", changed: true},
|
||||
// invalid
|
||||
{in: "", lang: "und", invalid: true},
|
||||
{in: "-", lang: "und", invalid: true},
|
||||
{in: "x", lang: "und", invalid: true},
|
||||
{in: "x-", lang: "und", invalid: true},
|
||||
{in: "x--", lang: "und", invalid: true},
|
||||
{in: "a-a-b-c-d", lang: "und", invalid: true},
|
||||
{in: "en-", lang: "en", invalid: true},
|
||||
{in: "enne-", lang: "und", invalid: true},
|
||||
{in: "en.", lang: "und", invalid: true},
|
||||
{in: "en.-latn", lang: "und", invalid: true},
|
||||
{in: "en.-en", lang: "en", invalid: true},
|
||||
{in: "x-a-tooManyChars-c-d", ext: "x-a-c-d", invalid: true, changed: true},
|
||||
{in: "a-tooManyChars-c-d", lang: "und", invalid: true},
|
||||
// TODO: check key-value validity
|
||||
// { in: "en-u-cu-xd", lang: "en", ext: "u-cu-xd", invalid: true },
|
||||
{in: "en-t-abcd", lang: "en", invalid: true},
|
||||
{in: "en-Latn-US-en", lang: "en", script: "Latn", region: "US", invalid: true},
|
||||
// rewrites (more tests in TestGrandfathered)
|
||||
{in: "zh-min-nan", lang: "nan"},
|
||||
{in: "zh-yue", lang: "yue"},
|
||||
{in: "zh-xiang", lang: "hsn", rewrite: true},
|
||||
{in: "zh-guoyu", lang: "cmn", rewrite: true},
|
||||
{in: "iw", lang: "iw"},
|
||||
{in: "sgn-BE-FR", lang: "sfb", rewrite: true},
|
||||
{in: "i-klingon", lang: "tlh", rewrite: true},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
tests[i].i = i
|
||||
if tt.extList != nil {
|
||||
tests[i].ext = strings.Join(tt.extList, "-")
|
||||
}
|
||||
if tt.ext != "" && tt.extList == nil {
|
||||
tests[i].extList = []string{tt.ext}
|
||||
}
|
||||
}
|
||||
return tests
|
||||
}
|
||||
|
||||
func TestParseExtensions(t *testing.T) {
|
||||
for i, tt := range parseTests() {
|
||||
if tt.ext == "" || tt.rewrite {
|
||||
continue
|
||||
}
|
||||
scan := makeScannerString(tt.in)
|
||||
if len(scan.b) > 1 && scan.b[1] != '-' {
|
||||
scan.end = nextExtension(string(scan.b), 0)
|
||||
scan.next = scan.end + 1
|
||||
scan.scan()
|
||||
}
|
||||
start := scan.start
|
||||
scan.toLower(start, len(scan.b))
|
||||
parseExtensions(&scan)
|
||||
ext := string(scan.b[start:])
|
||||
if ext != tt.ext {
|
||||
t.Errorf("%d(%s): ext was %v; want %v", i, tt.in, ext, tt.ext)
|
||||
}
|
||||
if changed := !strings.HasPrefix(tt.in[start:], ext); changed != tt.changed {
|
||||
t.Errorf("%d(%s): changed was %v; want %v", i, tt.in, changed, tt.changed)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// partChecks runs checks for each part by calling the function returned by f.
|
||||
func partChecks(t *testing.T, f func(*testing.T, *parseTest) (Tag, bool)) {
|
||||
for i, tt := range parseTests() {
|
||||
t.Run(tt.in, func(t *testing.T) {
|
||||
tag, skip := f(t, &tt)
|
||||
if skip {
|
||||
return
|
||||
}
|
||||
if l, _ := getLangID(b(tt.lang)); l != tag.LangID {
|
||||
t.Errorf("%d: lang was %q; want %q", i, tag.LangID, l)
|
||||
}
|
||||
if sc, _ := getScriptID(script, b(tt.script)); sc != tag.ScriptID {
|
||||
t.Errorf("%d: script was %q; want %q", i, tag.ScriptID, sc)
|
||||
}
|
||||
if r, _ := getRegionID(b(tt.region)); r != tag.RegionID {
|
||||
t.Errorf("%d: region was %q; want %q", i, tag.RegionID, r)
|
||||
}
|
||||
if tag.str == "" {
|
||||
return
|
||||
}
|
||||
p := int(tag.pVariant)
|
||||
if p < int(tag.pExt) {
|
||||
p++
|
||||
}
|
||||
if s, g := tag.str[p:tag.pExt], tt.variants; s != g {
|
||||
t.Errorf("%d: variants was %q; want %q", i, s, g)
|
||||
}
|
||||
p = int(tag.pExt)
|
||||
if p > 0 && p < len(tag.str) {
|
||||
p++
|
||||
}
|
||||
if s, g := (tag.str)[p:], tt.ext; s != g {
|
||||
t.Errorf("%d: extensions were %q; want %q", i, s, g)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseTag(t *testing.T) {
|
||||
partChecks(t, func(t *testing.T, tt *parseTest) (id Tag, skip bool) {
|
||||
if strings.HasPrefix(tt.in, "x-") || tt.rewrite {
|
||||
return Tag{}, true
|
||||
}
|
||||
scan := makeScannerString(tt.in)
|
||||
id, end := parseTag(&scan, true)
|
||||
id.str = string(scan.b[:end])
|
||||
tt.ext = ""
|
||||
tt.extList = []string{}
|
||||
return id, false
|
||||
})
|
||||
}
|
||||
|
||||
func TestParse(t *testing.T) {
|
||||
partChecks(t, func(t *testing.T, tt *parseTest) (id Tag, skip bool) {
|
||||
id, err := Parse(tt.in)
|
||||
ext := ""
|
||||
if id.str != "" {
|
||||
if strings.HasPrefix(id.str, "x-") {
|
||||
ext = id.str
|
||||
} else if int(id.pExt) < len(id.str) && id.pExt > 0 {
|
||||
ext = id.str[id.pExt+1:]
|
||||
}
|
||||
}
|
||||
if tag, _ := Parse(id.String()); tag.String() != id.String() {
|
||||
t.Errorf("%d:%s: reparse was %q; want %q", tt.i, tt.in, id.String(), tag.String())
|
||||
}
|
||||
if ext != tt.ext {
|
||||
t.Errorf("%d:%s: ext was %q; want %q", tt.i, tt.in, ext, tt.ext)
|
||||
}
|
||||
changed := id.str != "" && !strings.HasPrefix(tt.in, id.str)
|
||||
if changed != tt.changed {
|
||||
t.Errorf("%d:%s: changed was %v; want %v", tt.i, tt.in, changed, tt.changed)
|
||||
}
|
||||
if (err != nil) != tt.invalid {
|
||||
t.Errorf("%d:%s: invalid was %v; want %v. Error: %v", tt.i, tt.in, err != nil, tt.invalid, err)
|
||||
}
|
||||
return id, false
|
||||
})
|
||||
}
|
||||
|
||||
func TestErrors(t *testing.T) {
|
||||
mkInvalid := func(s string) error {
|
||||
return NewValueError([]byte(s))
|
||||
}
|
||||
tests := []struct {
|
||||
in string
|
||||
out error
|
||||
}{
|
||||
// invalid subtags.
|
||||
{"ac", mkInvalid("ac")},
|
||||
{"AC", mkInvalid("ac")},
|
||||
{"aa-Uuuu", mkInvalid("Uuuu")},
|
||||
{"aa-AB", mkInvalid("AB")},
|
||||
// ill-formed wins over invalid.
|
||||
{"ac-u", ErrSyntax},
|
||||
{"ac-u-ca", mkInvalid("ac")},
|
||||
{"ac-u-ca-co-pinyin", mkInvalid("ac")},
|
||||
{"noob", ErrSyntax},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
_, err := Parse(tt.in)
|
||||
if err != tt.out {
|
||||
t.Errorf("%s: was %q; want %q", tt.in, err, tt.out)
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,48 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
|
||||
// It simplifies safe initialization of Tag values.
|
||||
func MustParse(s string) Tag {
|
||||
t, err := Parse(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// MustParseBase is like ParseBase, but panics if the given base cannot be parsed.
|
||||
// It simplifies safe initialization of Base values.
|
||||
func MustParseBase(s string) Language {
|
||||
b, err := ParseBase(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// MustParseScript is like ParseScript, but panics if the given script cannot be
|
||||
// parsed. It simplifies safe initialization of Script values.
|
||||
func MustParseScript(s string) Script {
|
||||
scr, err := ParseScript(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return scr
|
||||
}
|
||||
|
||||
// MustParseRegion is like ParseRegion, but panics if the given region cannot be
|
||||
// parsed. It simplifies safe initialization of Region values.
|
||||
func MustParseRegion(s string) Region {
|
||||
r, err := ParseRegion(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// Und is the root language.
|
||||
var Und Tag
|
||||
Reference in New Issue
Block a user