whatcanGOwrong
This commit is contained in:
@@ -0,0 +1,187 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
)
|
||||
|
||||
// The Coverage interface is used to define the level of coverage of an
|
||||
// internationalization service. Note that not all types are supported by all
|
||||
// services. As lists may be generated on the fly, it is recommended that users
|
||||
// of a Coverage cache the results.
|
||||
type Coverage interface {
|
||||
// Tags returns the list of supported tags.
|
||||
Tags() []Tag
|
||||
|
||||
// BaseLanguages returns the list of supported base languages.
|
||||
BaseLanguages() []Base
|
||||
|
||||
// Scripts returns the list of supported scripts.
|
||||
Scripts() []Script
|
||||
|
||||
// Regions returns the list of supported regions.
|
||||
Regions() []Region
|
||||
}
|
||||
|
||||
var (
|
||||
// Supported defines a Coverage that lists all supported subtags. Tags
|
||||
// always returns nil.
|
||||
Supported Coverage = allSubtags{}
|
||||
)
|
||||
|
||||
// TODO:
|
||||
// - Support Variants, numbering systems.
|
||||
// - CLDR coverage levels.
|
||||
// - Set of common tags defined in this package.
|
||||
|
||||
type allSubtags struct{}
|
||||
|
||||
// Regions returns the list of supported regions. As all regions are in a
|
||||
// consecutive range, it simply returns a slice of numbers in increasing order.
|
||||
// The "undefined" region is not returned.
|
||||
func (s allSubtags) Regions() []Region {
|
||||
reg := make([]Region, language.NumRegions)
|
||||
for i := range reg {
|
||||
reg[i] = Region{language.Region(i + 1)}
|
||||
}
|
||||
return reg
|
||||
}
|
||||
|
||||
// Scripts returns the list of supported scripts. As all scripts are in a
|
||||
// consecutive range, it simply returns a slice of numbers in increasing order.
|
||||
// The "undefined" script is not returned.
|
||||
func (s allSubtags) Scripts() []Script {
|
||||
scr := make([]Script, language.NumScripts)
|
||||
for i := range scr {
|
||||
scr[i] = Script{language.Script(i + 1)}
|
||||
}
|
||||
return scr
|
||||
}
|
||||
|
||||
// BaseLanguages returns the list of all supported base languages. It generates
|
||||
// the list by traversing the internal structures.
|
||||
func (s allSubtags) BaseLanguages() []Base {
|
||||
bs := language.BaseLanguages()
|
||||
base := make([]Base, len(bs))
|
||||
for i, b := range bs {
|
||||
base[i] = Base{b}
|
||||
}
|
||||
return base
|
||||
}
|
||||
|
||||
// Tags always returns nil.
|
||||
func (s allSubtags) Tags() []Tag {
|
||||
return nil
|
||||
}
|
||||
|
||||
// coverage is used by NewCoverage which is used as a convenient way for
|
||||
// creating Coverage implementations for partially defined data. Very often a
|
||||
// package will only need to define a subset of slices. coverage provides a
|
||||
// convenient way to do this. Moreover, packages using NewCoverage, instead of
|
||||
// their own implementation, will not break if later new slice types are added.
|
||||
type coverage struct {
|
||||
tags func() []Tag
|
||||
bases func() []Base
|
||||
scripts func() []Script
|
||||
regions func() []Region
|
||||
}
|
||||
|
||||
func (s *coverage) Tags() []Tag {
|
||||
if s.tags == nil {
|
||||
return nil
|
||||
}
|
||||
return s.tags()
|
||||
}
|
||||
|
||||
// bases implements sort.Interface and is used to sort base languages.
|
||||
type bases []Base
|
||||
|
||||
func (b bases) Len() int {
|
||||
return len(b)
|
||||
}
|
||||
|
||||
func (b bases) Swap(i, j int) {
|
||||
b[i], b[j] = b[j], b[i]
|
||||
}
|
||||
|
||||
func (b bases) Less(i, j int) bool {
|
||||
return b[i].langID < b[j].langID
|
||||
}
|
||||
|
||||
// BaseLanguages returns the result from calling s.bases if it is specified or
|
||||
// otherwise derives the set of supported base languages from tags.
|
||||
func (s *coverage) BaseLanguages() []Base {
|
||||
if s.bases == nil {
|
||||
tags := s.Tags()
|
||||
if len(tags) == 0 {
|
||||
return nil
|
||||
}
|
||||
a := make([]Base, len(tags))
|
||||
for i, t := range tags {
|
||||
a[i] = Base{language.Language(t.lang())}
|
||||
}
|
||||
sort.Sort(bases(a))
|
||||
k := 0
|
||||
for i := 1; i < len(a); i++ {
|
||||
if a[k] != a[i] {
|
||||
k++
|
||||
a[k] = a[i]
|
||||
}
|
||||
}
|
||||
return a[:k+1]
|
||||
}
|
||||
return s.bases()
|
||||
}
|
||||
|
||||
func (s *coverage) Scripts() []Script {
|
||||
if s.scripts == nil {
|
||||
return nil
|
||||
}
|
||||
return s.scripts()
|
||||
}
|
||||
|
||||
func (s *coverage) Regions() []Region {
|
||||
if s.regions == nil {
|
||||
return nil
|
||||
}
|
||||
return s.regions()
|
||||
}
|
||||
|
||||
// NewCoverage returns a Coverage for the given lists. It is typically used by
|
||||
// packages providing internationalization services to define their level of
|
||||
// coverage. A list may be of type []T or func() []T, where T is either Tag,
|
||||
// Base, Script or Region. The returned Coverage derives the value for Bases
|
||||
// from Tags if no func or slice for []Base is specified. For other unspecified
|
||||
// types the returned Coverage will return nil for the respective methods.
|
||||
func NewCoverage(list ...interface{}) Coverage {
|
||||
s := &coverage{}
|
||||
for _, x := range list {
|
||||
switch v := x.(type) {
|
||||
case func() []Base:
|
||||
s.bases = v
|
||||
case func() []Script:
|
||||
s.scripts = v
|
||||
case func() []Region:
|
||||
s.regions = v
|
||||
case func() []Tag:
|
||||
s.tags = v
|
||||
case []Base:
|
||||
s.bases = func() []Base { return v }
|
||||
case []Script:
|
||||
s.scripts = func() []Script { return v }
|
||||
case []Region:
|
||||
s.regions = func() []Region { return v }
|
||||
case []Tag:
|
||||
s.tags = func() []Tag { return v }
|
||||
default:
|
||||
panic(fmt.Sprintf("language: unsupported set type %T", v))
|
||||
}
|
||||
}
|
||||
return s
|
||||
}
|
||||
@@ -0,0 +1,156 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
)
|
||||
|
||||
func TestSupported(t *testing.T) {
|
||||
// To prove the results are correct for a type, we test that the number of
|
||||
// results is identical to the number of results on record, that all results
|
||||
// are distinct and that all results are valid.
|
||||
tests := map[string]int{
|
||||
"BaseLanguages": language.NumLanguages,
|
||||
"Scripts": language.NumScripts,
|
||||
"Regions": language.NumRegions,
|
||||
"Tags": 0,
|
||||
}
|
||||
sup := reflect.ValueOf(Supported)
|
||||
for name, num := range tests {
|
||||
v := sup.MethodByName(name).Call(nil)[0]
|
||||
if n := v.Len(); n != num {
|
||||
t.Errorf("len(%s()) was %d; want %d", name, n, num)
|
||||
}
|
||||
dup := make(map[string]bool)
|
||||
for i := 0; i < v.Len(); i++ {
|
||||
x := v.Index(i).Interface()
|
||||
// An invalid value will either cause a crash or result in a
|
||||
// duplicate when passed to Sprint.
|
||||
s := fmt.Sprint(x)
|
||||
if dup[s] {
|
||||
t.Errorf("%s: duplicate entry %q", name, s)
|
||||
}
|
||||
dup[s] = true
|
||||
}
|
||||
if len(dup) != v.Len() {
|
||||
t.Errorf("%s: # unique entries was %d; want %d", name, len(dup), v.Len())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewCoverage(t *testing.T) {
|
||||
bases := []Base{Base{0}, Base{3}, Base{7}}
|
||||
scripts := []Script{Script{11}, Script{17}, Script{23}}
|
||||
regions := []Region{Region{101}, Region{103}, Region{107}}
|
||||
tags := []Tag{Make("pt"), Make("en"), Make("en-GB"), Make("en-US"), Make("pt-PT")}
|
||||
fbases := func() []Base { return bases }
|
||||
fscripts := func() []Script { return scripts }
|
||||
fregions := func() []Region { return regions }
|
||||
ftags := func() []Tag { return tags }
|
||||
|
||||
tests := []struct {
|
||||
desc string
|
||||
list []interface{}
|
||||
bases []Base
|
||||
scripts []Script
|
||||
regions []Region
|
||||
tags []Tag
|
||||
}{
|
||||
{
|
||||
desc: "empty",
|
||||
},
|
||||
{
|
||||
desc: "bases",
|
||||
list: []interface{}{bases},
|
||||
bases: bases,
|
||||
},
|
||||
{
|
||||
desc: "scripts",
|
||||
list: []interface{}{scripts},
|
||||
scripts: scripts,
|
||||
},
|
||||
{
|
||||
desc: "regions",
|
||||
list: []interface{}{regions},
|
||||
regions: regions,
|
||||
},
|
||||
{
|
||||
desc: "bases derives from tags",
|
||||
list: []interface{}{tags},
|
||||
bases: []Base{Base{_en}, Base{_pt}},
|
||||
tags: tags,
|
||||
},
|
||||
{
|
||||
desc: "tags and bases",
|
||||
list: []interface{}{tags, bases},
|
||||
bases: bases,
|
||||
tags: tags,
|
||||
},
|
||||
{
|
||||
desc: "fully specified",
|
||||
list: []interface{}{tags, bases, scripts, regions},
|
||||
bases: bases,
|
||||
scripts: scripts,
|
||||
regions: regions,
|
||||
tags: tags,
|
||||
},
|
||||
{
|
||||
desc: "bases func",
|
||||
list: []interface{}{fbases},
|
||||
bases: bases,
|
||||
},
|
||||
{
|
||||
desc: "scripts func",
|
||||
list: []interface{}{fscripts},
|
||||
scripts: scripts,
|
||||
},
|
||||
{
|
||||
desc: "regions func",
|
||||
list: []interface{}{fregions},
|
||||
regions: regions,
|
||||
},
|
||||
{
|
||||
desc: "tags func",
|
||||
list: []interface{}{ftags},
|
||||
bases: []Base{Base{_en}, Base{_pt}},
|
||||
tags: tags,
|
||||
},
|
||||
{
|
||||
desc: "tags and bases",
|
||||
list: []interface{}{ftags, fbases},
|
||||
bases: bases,
|
||||
tags: tags,
|
||||
},
|
||||
{
|
||||
desc: "fully specified",
|
||||
list: []interface{}{ftags, fbases, fscripts, fregions},
|
||||
bases: bases,
|
||||
scripts: scripts,
|
||||
regions: regions,
|
||||
tags: tags,
|
||||
},
|
||||
}
|
||||
|
||||
for i, tt := range tests {
|
||||
l := NewCoverage(tt.list...)
|
||||
if a := l.BaseLanguages(); !reflect.DeepEqual(a, tt.bases) {
|
||||
t.Errorf("%d:%s: BaseLanguages was %v; want %v", i, tt.desc, a, tt.bases)
|
||||
}
|
||||
if a := l.Scripts(); !reflect.DeepEqual(a, tt.scripts) {
|
||||
t.Errorf("%d:%s: Scripts was %v; want %v", i, tt.desc, a, tt.scripts)
|
||||
}
|
||||
if a := l.Regions(); !reflect.DeepEqual(a, tt.regions) {
|
||||
t.Errorf("%d:%s: Regions was %v; want %v", i, tt.desc, a, tt.regions)
|
||||
}
|
||||
if a := l.Tags(); !reflect.DeepEqual(a, tt.tags) {
|
||||
t.Errorf("%d:%s: Tags was %v; want %v", i, tt.desc, a, tt.tags)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,92 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package display
|
||||
|
||||
// This file contains sets of data for specific languages. Users can use these
|
||||
// to create smaller collections of supported languages and reduce total table
|
||||
// size.
|
||||
|
||||
// The variable names defined here correspond to those in package language.
|
||||
|
||||
var (
|
||||
Afrikaans *Dictionary = &af // af
|
||||
Amharic *Dictionary = &am // am
|
||||
Arabic *Dictionary = &ar // ar
|
||||
ModernStandardArabic *Dictionary = Arabic // ar-001
|
||||
Azerbaijani *Dictionary = &az // az
|
||||
Bulgarian *Dictionary = &bg // bg
|
||||
Bengali *Dictionary = &bn // bn
|
||||
Catalan *Dictionary = &ca // ca
|
||||
Czech *Dictionary = &cs // cs
|
||||
Danish *Dictionary = &da // da
|
||||
German *Dictionary = &de // de
|
||||
Greek *Dictionary = &el // el
|
||||
English *Dictionary = &en // en
|
||||
AmericanEnglish *Dictionary = English // en-US
|
||||
BritishEnglish *Dictionary = English // en-GB
|
||||
Spanish *Dictionary = &es // es
|
||||
EuropeanSpanish *Dictionary = Spanish // es-ES
|
||||
LatinAmericanSpanish *Dictionary = Spanish // es-419
|
||||
Estonian *Dictionary = &et // et
|
||||
Persian *Dictionary = &fa // fa
|
||||
Finnish *Dictionary = &fi // fi
|
||||
Filipino *Dictionary = &fil // fil
|
||||
French *Dictionary = &fr // fr
|
||||
Gujarati *Dictionary = &gu // gu
|
||||
Hebrew *Dictionary = &he // he
|
||||
Hindi *Dictionary = &hi // hi
|
||||
Croatian *Dictionary = &hr // hr
|
||||
Hungarian *Dictionary = &hu // hu
|
||||
Armenian *Dictionary = &hy // hy
|
||||
Indonesian *Dictionary = &id // id
|
||||
Icelandic *Dictionary = &is // is
|
||||
Italian *Dictionary = &it // it
|
||||
Japanese *Dictionary = &ja // ja
|
||||
Georgian *Dictionary = &ka // ka
|
||||
Kazakh *Dictionary = &kk // kk
|
||||
Khmer *Dictionary = &km // km
|
||||
Kannada *Dictionary = &kn // kn
|
||||
Korean *Dictionary = &ko // ko
|
||||
Kirghiz *Dictionary = &ky // ky
|
||||
Lao *Dictionary = &lo // lo
|
||||
Lithuanian *Dictionary = < // lt
|
||||
Latvian *Dictionary = &lv // lv
|
||||
Macedonian *Dictionary = &mk // mk
|
||||
Malayalam *Dictionary = &ml // ml
|
||||
Mongolian *Dictionary = &mn // mn
|
||||
Marathi *Dictionary = &mr // mr
|
||||
Malay *Dictionary = &ms // ms
|
||||
Burmese *Dictionary = &my // my
|
||||
Nepali *Dictionary = &ne // ne
|
||||
Dutch *Dictionary = &nl // nl
|
||||
Norwegian *Dictionary = &no // no
|
||||
Punjabi *Dictionary = &pa // pa
|
||||
Polish *Dictionary = &pl // pl
|
||||
Portuguese *Dictionary = &pt // pt
|
||||
BrazilianPortuguese *Dictionary = Portuguese // pt-BR
|
||||
EuropeanPortuguese *Dictionary = &ptPT // pt-PT
|
||||
Romanian *Dictionary = &ro // ro
|
||||
Russian *Dictionary = &ru // ru
|
||||
Sinhala *Dictionary = &si // si
|
||||
Slovak *Dictionary = &sk // sk
|
||||
Slovenian *Dictionary = &sl // sl
|
||||
Albanian *Dictionary = &sq // sq
|
||||
Serbian *Dictionary = &sr // sr
|
||||
SerbianLatin *Dictionary = &srLatn // sr
|
||||
Swedish *Dictionary = &sv // sv
|
||||
Swahili *Dictionary = &sw // sw
|
||||
Tamil *Dictionary = &ta // ta
|
||||
Telugu *Dictionary = &te // te
|
||||
Thai *Dictionary = &th // th
|
||||
Turkish *Dictionary = &tr // tr
|
||||
Ukrainian *Dictionary = &uk // uk
|
||||
Urdu *Dictionary = &ur // ur
|
||||
Uzbek *Dictionary = &uz // uz
|
||||
Vietnamese *Dictionary = &vi // vi
|
||||
Chinese *Dictionary = &zh // zh
|
||||
SimplifiedChinese *Dictionary = Chinese // zh-Hans
|
||||
TraditionalChinese *Dictionary = &zhHant // zh-Hant
|
||||
Zulu *Dictionary = &zu // zu
|
||||
)
|
||||
@@ -0,0 +1,39 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package display
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/internal/testtext"
|
||||
)
|
||||
|
||||
func TestLinking(t *testing.T) {
|
||||
base := getSize(t, `display.Tags(language.English).Name(language.English)`)
|
||||
compact := getSize(t, `display.English.Languages().Name(language.English)`)
|
||||
|
||||
if d := base - compact; d < 1.5*1024*1024 {
|
||||
t.Errorf("size(base) - size(compact) = %d - %d = was %d; want > 1.5MB", base, compact, d)
|
||||
}
|
||||
}
|
||||
|
||||
func getSize(t *testing.T, main string) int {
|
||||
size, err := testtext.CodeSize(fmt.Sprintf(body, main))
|
||||
if err != nil {
|
||||
t.Skipf("skipping link size test; binary size could not be determined: %v", err)
|
||||
}
|
||||
return size
|
||||
}
|
||||
|
||||
const body = `package main
|
||||
import (
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/language/display"
|
||||
)
|
||||
func main() {
|
||||
%s
|
||||
}
|
||||
`
|
||||
@@ -0,0 +1,420 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run maketables.go -output tables.go
|
||||
|
||||
// Package display provides display names for languages, scripts and regions in
|
||||
// a requested language.
|
||||
//
|
||||
// The data is based on CLDR's localeDisplayNames. It includes the names of the
|
||||
// draft level "contributed" or "approved". The resulting tables are quite
|
||||
// large. The display package is designed so that users can reduce the linked-in
|
||||
// table sizes by cherry picking the languages one wishes to support. There is a
|
||||
// Dictionary defined for a selected set of common languages for this purpose.
|
||||
package display // import "golang.org/x/text/language/display"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/format"
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
/*
|
||||
TODO:
|
||||
All fairly low priority at the moment:
|
||||
- Include alternative and variants as an option (using func options).
|
||||
- Option for returning the empty string for undefined values.
|
||||
- Support variants, currencies, time zones, option names and other data
|
||||
provided in CLDR.
|
||||
- Do various optimizations:
|
||||
- Reduce size of offset tables.
|
||||
- Consider compressing infrequently used languages and decompress on demand.
|
||||
*/
|
||||
|
||||
// A Formatter formats a tag in the current language. It is used in conjunction
|
||||
// with the message package.
|
||||
type Formatter struct {
|
||||
lookup func(tag int, x interface{}) string
|
||||
x interface{}
|
||||
}
|
||||
|
||||
// Format implements "golang.org/x/text/internal/format".Formatter.
|
||||
func (f Formatter) Format(state format.State, verb rune) {
|
||||
// TODO: there are a lot of inefficiencies in this code. Fix it when we
|
||||
// language.Tag has embedded compact tags.
|
||||
t := state.Language()
|
||||
_, index, _ := matcher.Match(t)
|
||||
str := f.lookup(index, f.x)
|
||||
if str == "" {
|
||||
// TODO: use language-specific punctuation.
|
||||
// TODO: use codePattern instead of language?
|
||||
if unknown := f.lookup(index, language.Und); unknown != "" {
|
||||
fmt.Fprintf(state, "%v (%v)", unknown, f.x)
|
||||
} else {
|
||||
fmt.Fprintf(state, "[language: %v]", f.x)
|
||||
}
|
||||
} else {
|
||||
state.Write([]byte(str))
|
||||
}
|
||||
}
|
||||
|
||||
// Language returns a Formatter that renders the name for lang in the
|
||||
// current language. x may be a language.Base or a language.Tag.
|
||||
// It renders lang in the default language if no translation for the current
|
||||
// language is supported.
|
||||
func Language(lang interface{}) Formatter {
|
||||
return Formatter{langFunc, lang}
|
||||
}
|
||||
|
||||
// Region returns a Formatter that renders the name for region in the current
|
||||
// language. region may be a language.Region or a language.Tag.
|
||||
// It renders region in the default language if no translation for the current
|
||||
// language is supported.
|
||||
func Region(region interface{}) Formatter {
|
||||
return Formatter{regionFunc, region}
|
||||
}
|
||||
|
||||
// Script returns a Formatter that renders the name for script in the current
|
||||
// language. script may be a language.Script or a language.Tag.
|
||||
// It renders script in the default language if no translation for the current
|
||||
// language is supported.
|
||||
func Script(script interface{}) Formatter {
|
||||
return Formatter{scriptFunc, script}
|
||||
}
|
||||
|
||||
// Tag returns a Formatter that renders the name for tag in the current
|
||||
// language. tag may be a language.Tag.
|
||||
// It renders tag in the default language if no translation for the current
|
||||
// language is supported.
|
||||
func Tag(tag interface{}) Formatter {
|
||||
return Formatter{tagFunc, tag}
|
||||
}
|
||||
|
||||
// A Namer is used to get the name for a given value, such as a Tag, Language,
|
||||
// Script or Region.
|
||||
type Namer interface {
|
||||
// Name returns a display string for the given value. A Namer returns an
|
||||
// empty string for values it does not support. A Namer may support naming
|
||||
// an unspecified value. For example, when getting the name for a region for
|
||||
// a tag that does not have a defined Region, it may return the name for an
|
||||
// unknown region. It is up to the user to filter calls to Name for values
|
||||
// for which one does not want to have a name string.
|
||||
Name(x interface{}) string
|
||||
}
|
||||
|
||||
var (
|
||||
// Supported lists the languages for which names are defined.
|
||||
Supported language.Coverage
|
||||
|
||||
// The set of all possible values for which names are defined. Note that not
|
||||
// all Namer implementations will cover all the values of a given type.
|
||||
// A Namer will return the empty string for unsupported values.
|
||||
Values language.Coverage
|
||||
|
||||
matcher language.Matcher
|
||||
)
|
||||
|
||||
func init() {
|
||||
tags := make([]language.Tag, numSupported)
|
||||
s := supported
|
||||
for i := range tags {
|
||||
p := strings.IndexByte(s, '|')
|
||||
tags[i] = language.Raw.Make(s[:p])
|
||||
s = s[p+1:]
|
||||
}
|
||||
matcher = language.NewMatcher(tags)
|
||||
Supported = language.NewCoverage(tags)
|
||||
|
||||
Values = language.NewCoverage(langTagSet.Tags, supportedScripts, supportedRegions)
|
||||
}
|
||||
|
||||
// Languages returns a Namer for naming languages. It returns nil if there is no
|
||||
// data for the given tag. The type passed to Name must be either language.Base
|
||||
// or language.Tag. Note that the result may differ between passing a tag or its
|
||||
// base language. For example, for English, passing "nl-BE" would return Flemish
|
||||
// whereas passing "nl" returns "Dutch".
|
||||
func Languages(t language.Tag) Namer {
|
||||
if _, index, conf := matcher.Match(t); conf != language.No {
|
||||
return languageNamer(index)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type languageNamer int
|
||||
|
||||
func langFunc(i int, x interface{}) string {
|
||||
return nameLanguage(languageNamer(i), x)
|
||||
}
|
||||
|
||||
func (n languageNamer) name(i int) string {
|
||||
return lookup(langHeaders[:], int(n), i)
|
||||
}
|
||||
|
||||
// Name implements the Namer interface for language names.
|
||||
func (n languageNamer) Name(x interface{}) string {
|
||||
return nameLanguage(n, x)
|
||||
}
|
||||
|
||||
// nonEmptyIndex walks up the parent chain until a non-empty header is found.
|
||||
// It returns -1 if no index could be found.
|
||||
func nonEmptyIndex(h []header, index int) int {
|
||||
for ; index != -1 && h[index].data == ""; index = int(parents[index]) {
|
||||
}
|
||||
return index
|
||||
}
|
||||
|
||||
// Scripts returns a Namer for naming scripts. It returns nil if there is no
|
||||
// data for the given tag. The type passed to Name must be either a
|
||||
// language.Script or a language.Tag. It will not attempt to infer a script for
|
||||
// tags with an unspecified script.
|
||||
func Scripts(t language.Tag) Namer {
|
||||
if _, index, conf := matcher.Match(t); conf != language.No {
|
||||
if index = nonEmptyIndex(scriptHeaders[:], index); index != -1 {
|
||||
return scriptNamer(index)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type scriptNamer int
|
||||
|
||||
func scriptFunc(i int, x interface{}) string {
|
||||
return nameScript(scriptNamer(i), x)
|
||||
}
|
||||
|
||||
func (n scriptNamer) name(i int) string {
|
||||
return lookup(scriptHeaders[:], int(n), i)
|
||||
}
|
||||
|
||||
// Name implements the Namer interface for script names.
|
||||
func (n scriptNamer) Name(x interface{}) string {
|
||||
return nameScript(n, x)
|
||||
}
|
||||
|
||||
// Regions returns a Namer for naming regions. It returns nil if there is no
|
||||
// data for the given tag. The type passed to Name must be either a
|
||||
// language.Region or a language.Tag. It will not attempt to infer a region for
|
||||
// tags with an unspecified region.
|
||||
func Regions(t language.Tag) Namer {
|
||||
if _, index, conf := matcher.Match(t); conf != language.No {
|
||||
if index = nonEmptyIndex(regionHeaders[:], index); index != -1 {
|
||||
return regionNamer(index)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type regionNamer int
|
||||
|
||||
func regionFunc(i int, x interface{}) string {
|
||||
return nameRegion(regionNamer(i), x)
|
||||
}
|
||||
|
||||
func (n regionNamer) name(i int) string {
|
||||
return lookup(regionHeaders[:], int(n), i)
|
||||
}
|
||||
|
||||
// Name implements the Namer interface for region names.
|
||||
func (n regionNamer) Name(x interface{}) string {
|
||||
return nameRegion(n, x)
|
||||
}
|
||||
|
||||
// Tags returns a Namer for giving a full description of a tag. The names of
|
||||
// scripts and regions that are not already implied by the language name will
|
||||
// in appended within parentheses. It returns nil if there is not data for the
|
||||
// given tag. The type passed to Name must be a tag.
|
||||
func Tags(t language.Tag) Namer {
|
||||
if _, index, conf := matcher.Match(t); conf != language.No {
|
||||
return tagNamer(index)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type tagNamer int
|
||||
|
||||
func tagFunc(i int, x interface{}) string {
|
||||
return nameTag(languageNamer(i), scriptNamer(i), regionNamer(i), x)
|
||||
}
|
||||
|
||||
// Name implements the Namer interface for tag names.
|
||||
func (n tagNamer) Name(x interface{}) string {
|
||||
return nameTag(languageNamer(n), scriptNamer(n), regionNamer(n), x)
|
||||
}
|
||||
|
||||
// lookup finds the name for an entry in a global table, traversing the
|
||||
// inheritance hierarchy if needed.
|
||||
func lookup(table []header, dict, want int) string {
|
||||
for dict != -1 {
|
||||
if s := table[dict].name(want); s != "" {
|
||||
return s
|
||||
}
|
||||
dict = int(parents[dict])
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// A Dictionary holds a collection of Namers for a single language. One can
|
||||
// reduce the amount of data linked in to a binary by only referencing
|
||||
// Dictionaries for the languages one needs to support instead of using the
|
||||
// generic Namer factories.
|
||||
type Dictionary struct {
|
||||
parent *Dictionary
|
||||
lang header
|
||||
script header
|
||||
region header
|
||||
}
|
||||
|
||||
// Tags returns a Namer for giving a full description of a tag. The names of
|
||||
// scripts and regions that are not already implied by the language name will
|
||||
// in appended within parentheses. It returns nil if there is not data for the
|
||||
// given tag. The type passed to Name must be a tag.
|
||||
func (d *Dictionary) Tags() Namer {
|
||||
return dictTags{d}
|
||||
}
|
||||
|
||||
type dictTags struct {
|
||||
d *Dictionary
|
||||
}
|
||||
|
||||
// Name implements the Namer interface for tag names.
|
||||
func (n dictTags) Name(x interface{}) string {
|
||||
return nameTag(dictLanguages{n.d}, dictScripts{n.d}, dictRegions{n.d}, x)
|
||||
}
|
||||
|
||||
// Languages returns a Namer for naming languages. It returns nil if there is no
|
||||
// data for the given tag. The type passed to Name must be either language.Base
|
||||
// or language.Tag. Note that the result may differ between passing a tag or its
|
||||
// base language. For example, for English, passing "nl-BE" would return Flemish
|
||||
// whereas passing "nl" returns "Dutch".
|
||||
func (d *Dictionary) Languages() Namer {
|
||||
return dictLanguages{d}
|
||||
}
|
||||
|
||||
type dictLanguages struct {
|
||||
d *Dictionary
|
||||
}
|
||||
|
||||
func (n dictLanguages) name(i int) string {
|
||||
for d := n.d; d != nil; d = d.parent {
|
||||
if s := d.lang.name(i); s != "" {
|
||||
return s
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// Name implements the Namer interface for language names.
|
||||
func (n dictLanguages) Name(x interface{}) string {
|
||||
return nameLanguage(n, x)
|
||||
}
|
||||
|
||||
// Scripts returns a Namer for naming scripts. It returns nil if there is no
|
||||
// data for the given tag. The type passed to Name must be either a
|
||||
// language.Script or a language.Tag. It will not attempt to infer a script for
|
||||
// tags with an unspecified script.
|
||||
func (d *Dictionary) Scripts() Namer {
|
||||
return dictScripts{d}
|
||||
}
|
||||
|
||||
type dictScripts struct {
|
||||
d *Dictionary
|
||||
}
|
||||
|
||||
func (n dictScripts) name(i int) string {
|
||||
for d := n.d; d != nil; d = d.parent {
|
||||
if s := d.script.name(i); s != "" {
|
||||
return s
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// Name implements the Namer interface for script names.
|
||||
func (n dictScripts) Name(x interface{}) string {
|
||||
return nameScript(n, x)
|
||||
}
|
||||
|
||||
// Regions returns a Namer for naming regions. It returns nil if there is no
|
||||
// data for the given tag. The type passed to Name must be either a
|
||||
// language.Region or a language.Tag. It will not attempt to infer a region for
|
||||
// tags with an unspecified region.
|
||||
func (d *Dictionary) Regions() Namer {
|
||||
return dictRegions{d}
|
||||
}
|
||||
|
||||
type dictRegions struct {
|
||||
d *Dictionary
|
||||
}
|
||||
|
||||
func (n dictRegions) name(i int) string {
|
||||
for d := n.d; d != nil; d = d.parent {
|
||||
if s := d.region.name(i); s != "" {
|
||||
return s
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// Name implements the Namer interface for region names.
|
||||
func (n dictRegions) Name(x interface{}) string {
|
||||
return nameRegion(n, x)
|
||||
}
|
||||
|
||||
// A SelfNamer implements a Namer that returns the name of language in this same
|
||||
// language. It provides a very compact mechanism to provide a comprehensive
|
||||
// list of languages to users in their native language.
|
||||
type SelfNamer struct {
|
||||
// Supported defines the values supported by this Namer.
|
||||
Supported language.Coverage
|
||||
}
|
||||
|
||||
var (
|
||||
// Self is a shared instance of a SelfNamer.
|
||||
Self *SelfNamer = &self
|
||||
|
||||
self = SelfNamer{language.NewCoverage(selfTagSet.Tags)}
|
||||
)
|
||||
|
||||
// Name returns the name of a given language tag in the language identified by
|
||||
// this tag. It supports both the language.Base and language.Tag types.
|
||||
func (n SelfNamer) Name(x interface{}) string {
|
||||
t, _ := language.All.Compose(x)
|
||||
base, scr, reg := t.Raw()
|
||||
baseScript := language.Script{}
|
||||
if (scr == language.Script{} && reg != language.Region{}) {
|
||||
// For looking up in the self dictionary, we need to select the
|
||||
// maximized script. This is even the case if the script isn't
|
||||
// specified.
|
||||
s1, _ := t.Script()
|
||||
if baseScript = getScript(base); baseScript != s1 {
|
||||
scr = s1
|
||||
}
|
||||
}
|
||||
|
||||
i, scr, reg := selfTagSet.index(base, scr, reg)
|
||||
if i == -1 {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Only return the display name if the script matches the expected script.
|
||||
if (scr != language.Script{}) {
|
||||
if (baseScript == language.Script{}) {
|
||||
baseScript = getScript(base)
|
||||
}
|
||||
if baseScript != scr {
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
return selfHeaders[0].name(i)
|
||||
}
|
||||
|
||||
// getScript returns the maximized script for a base language.
|
||||
func getScript(b language.Base) language.Script {
|
||||
tag, _ := language.Raw.Compose(b)
|
||||
scr, _ := tag.Script()
|
||||
return scr
|
||||
}
|
||||
@@ -0,0 +1,714 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package display
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode"
|
||||
|
||||
"golang.org/x/text/internal/testtext"
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/message"
|
||||
)
|
||||
|
||||
// TODO: test that tables are properly dropped by the linker for various use
|
||||
// cases.
|
||||
|
||||
var (
|
||||
firstLang2aa = language.MustParseBase("aa")
|
||||
lastLang2zu = language.MustParseBase("zu")
|
||||
firstLang3ace = language.MustParseBase("ace")
|
||||
lastLang3zza = language.MustParseBase("zza")
|
||||
firstTagAr001 = language.MustParse("ar-001")
|
||||
lastTagZhHant = language.MustParse("zh-Hant")
|
||||
)
|
||||
|
||||
// TestValues tests that for all languages, regions, and scripts in Values, at
|
||||
// least one language has a name defined for it by checking it exists in
|
||||
// English, which is assumed to be the most comprehensive. It is also tested
|
||||
// that a Namer returns "" for unsupported values.
|
||||
func TestValues(t *testing.T) {
|
||||
type testcase struct {
|
||||
kind string
|
||||
n Namer
|
||||
}
|
||||
// checkDefined checks that a value exists in a Namer.
|
||||
checkDefined := func(x interface{}, namers []testcase) {
|
||||
for _, n := range namers {
|
||||
testtext.Run(t, fmt.Sprintf("%s.Name(%s)", n.kind, x), func(t *testing.T) {
|
||||
if n.n.Name(x) == "" {
|
||||
// As of version 28 there is no data for az-Arab in English,
|
||||
// although there is useful data in other languages.
|
||||
if x.(fmt.Stringer).String() == "az-Arab" {
|
||||
return
|
||||
}
|
||||
t.Errorf("supported but no result")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
// checkUnsupported checks that a value does not exist in a Namer.
|
||||
checkUnsupported := func(x interface{}, namers []testcase) {
|
||||
for _, n := range namers {
|
||||
if got := n.n.Name(x); got != "" {
|
||||
t.Fatalf("%s.Name(%s): unsupported tag gave non-empty result: %q", n.kind, x, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tags := map[language.Tag]bool{}
|
||||
namers := []testcase{
|
||||
{"Languages(en)", Languages(language.English)},
|
||||
{"Tags(en)", Tags(language.English)},
|
||||
{"English.Languages()", English.Languages()},
|
||||
{"English.Tags()", English.Tags()},
|
||||
}
|
||||
for _, tag := range Values.Tags() {
|
||||
checkDefined(tag, namers)
|
||||
tags[tag] = true
|
||||
}
|
||||
for _, base := range language.Supported.BaseLanguages() {
|
||||
tag, _ := language.All.Compose(base)
|
||||
if !tags[tag] {
|
||||
checkUnsupported(tag, namers)
|
||||
}
|
||||
}
|
||||
|
||||
regions := map[language.Region]bool{}
|
||||
namers = []testcase{
|
||||
{"Regions(en)", Regions(language.English)},
|
||||
{"English.Regions()", English.Regions()},
|
||||
}
|
||||
for _, r := range Values.Regions() {
|
||||
checkDefined(r, namers)
|
||||
regions[r] = true
|
||||
}
|
||||
for _, r := range language.Supported.Regions() {
|
||||
if r = r.Canonicalize(); !regions[r] {
|
||||
checkUnsupported(r, namers)
|
||||
}
|
||||
}
|
||||
|
||||
scripts := map[language.Script]bool{}
|
||||
namers = []testcase{
|
||||
{"Scripts(en)", Scripts(language.English)},
|
||||
{"English.Scripts()", English.Scripts()},
|
||||
}
|
||||
for _, s := range Values.Scripts() {
|
||||
checkDefined(s, namers)
|
||||
scripts[s] = true
|
||||
}
|
||||
for _, s := range language.Supported.Scripts() {
|
||||
// Canonicalize the script.
|
||||
tag, _ := language.DeprecatedScript.Compose(s)
|
||||
if _, s, _ = tag.Raw(); !scripts[s] {
|
||||
checkUnsupported(s, namers)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestSupported tests that we have at least some Namers for languages that we
|
||||
// claim to support. To test the claims in the documentation, it also verifies
|
||||
// that if a Namer is returned, it will have at least some data.
|
||||
func TestSupported(t *testing.T) {
|
||||
supportedTags := Supported.Tags()
|
||||
if len(supportedTags) != numSupported {
|
||||
t.Errorf("number of supported was %d; want %d", len(supportedTags), numSupported)
|
||||
}
|
||||
|
||||
namerFuncs := []struct {
|
||||
kind string
|
||||
fn func(language.Tag) Namer
|
||||
}{
|
||||
{"Tags", Tags},
|
||||
{"Languages", Languages},
|
||||
{"Regions", Regions},
|
||||
{"Scripts", Scripts},
|
||||
}
|
||||
|
||||
// Verify that we have at least one Namer for all tags we claim to support.
|
||||
tags := make(map[language.Tag]bool)
|
||||
for _, tag := range supportedTags {
|
||||
// Test we have at least one Namer for this supported Tag.
|
||||
found := false
|
||||
for _, kind := range namerFuncs {
|
||||
if defined(t, kind.kind, kind.fn(tag), tag) {
|
||||
found = true
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Errorf("%s: supported, but no data available", tag)
|
||||
}
|
||||
if tags[tag] {
|
||||
t.Errorf("%s: included in Supported.Tags more than once", tag)
|
||||
}
|
||||
tags[tag] = true
|
||||
}
|
||||
|
||||
// Verify that we have no Namers for tags we don't claim to support.
|
||||
for _, base := range language.Supported.BaseLanguages() {
|
||||
tag, _ := language.All.Compose(base)
|
||||
// Skip tags that are supported after matching.
|
||||
if _, _, conf := matcher.Match(tag); conf != language.No {
|
||||
continue
|
||||
}
|
||||
// Test there are no Namers for this tag.
|
||||
for _, kind := range namerFuncs {
|
||||
if defined(t, kind.kind, kind.fn(tag), tag) {
|
||||
t.Errorf("%[1]s(%[2]s) returns a Namer, but %[2]s is not in the set of supported Tags.", kind.kind, tag)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// defined reports whether n is a proper Namer, which means it is non-nil and
|
||||
// must have at least one non-empty value.
|
||||
func defined(t *testing.T, kind string, n Namer, tag language.Tag) bool {
|
||||
if n == nil {
|
||||
return false
|
||||
}
|
||||
switch kind {
|
||||
case "Tags":
|
||||
for _, t := range Values.Tags() {
|
||||
if n.Name(t) != "" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
case "Languages":
|
||||
for _, t := range Values.BaseLanguages() {
|
||||
if n.Name(t) != "" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
case "Regions":
|
||||
for _, t := range Values.Regions() {
|
||||
if n.Name(t) != "" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
case "Scripts":
|
||||
for _, t := range Values.Scripts() {
|
||||
if n.Name(t) != "" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
t.Errorf("%s(%s) returns non-nil Namer without content", kind, tag)
|
||||
return false
|
||||
}
|
||||
|
||||
func TestCoverage(t *testing.T) {
|
||||
en := language.English
|
||||
tests := []struct {
|
||||
n Namer
|
||||
x interface{}
|
||||
}{
|
||||
{Languages(en), Values.Tags()},
|
||||
{Scripts(en), Values.Scripts()},
|
||||
{Regions(en), Values.Regions()},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
uniq := make(map[string]interface{})
|
||||
|
||||
v := reflect.ValueOf(tt.x)
|
||||
for j := 0; j < v.Len(); j++ {
|
||||
x := v.Index(j).Interface()
|
||||
// As of version 28 there is no data for az-Arab in English,
|
||||
// although there is useful data in other languages.
|
||||
if x.(fmt.Stringer).String() == "az-Arab" {
|
||||
continue
|
||||
}
|
||||
s := tt.n.Name(x)
|
||||
if s == "" {
|
||||
t.Errorf("%d:%d:%s: missing content", i, j, x)
|
||||
} else if uniq[s] != nil {
|
||||
t.Errorf("%d:%d:%s: identical return value %q for %v and %v", i, j, x, s, x, uniq[s])
|
||||
}
|
||||
uniq[s] = x
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestUpdate tests whether dictionary entries for certain languages need to be
|
||||
// updated. For some languages, some of the headers may be empty or they may be
|
||||
// identical to the parent. This code detects if such entries need to be updated
|
||||
// after a table update.
|
||||
func TestUpdate(t *testing.T) {
|
||||
tests := []struct {
|
||||
d *Dictionary
|
||||
tag string
|
||||
}{
|
||||
{ModernStandardArabic, "ar-001"},
|
||||
{AmericanEnglish, "en-US"},
|
||||
{EuropeanSpanish, "es-ES"},
|
||||
{BrazilianPortuguese, "pt-BR"},
|
||||
{SimplifiedChinese, "zh-Hans"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
_, i, _ := matcher.Match(language.MustParse(tt.tag))
|
||||
if !reflect.DeepEqual(tt.d.lang, langHeaders[i]) {
|
||||
t.Errorf("%s: lang table update needed", tt.tag)
|
||||
}
|
||||
if !reflect.DeepEqual(tt.d.script, scriptHeaders[i]) {
|
||||
t.Errorf("%s: script table update needed", tt.tag)
|
||||
}
|
||||
if !reflect.DeepEqual(tt.d.region, regionHeaders[i]) {
|
||||
t.Errorf("%s: region table update needed", tt.tag)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestIndex(t *testing.T) {
|
||||
notIn := []string{"aa", "xx", "zz", "aaa", "xxx", "zzz", "Aaaa", "Xxxx", "Zzzz"}
|
||||
tests := []tagIndex{
|
||||
{
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
},
|
||||
{
|
||||
"bb",
|
||||
"",
|
||||
"",
|
||||
},
|
||||
{
|
||||
"",
|
||||
"bbb",
|
||||
"",
|
||||
},
|
||||
{
|
||||
"",
|
||||
"",
|
||||
"Bbbb",
|
||||
},
|
||||
{
|
||||
"bb",
|
||||
"bbb",
|
||||
"Bbbb",
|
||||
},
|
||||
{
|
||||
"bbccddyy",
|
||||
"bbbcccdddyyy",
|
||||
"BbbbCcccDdddYyyy",
|
||||
},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
// Create the test set from the tagIndex.
|
||||
cnt := 0
|
||||
for sz := 2; sz <= 4; sz++ {
|
||||
a := tt[sz-2]
|
||||
for j := 0; j < len(a); j += sz {
|
||||
s := a[j : j+sz]
|
||||
if idx := tt.index(s); idx != cnt {
|
||||
t.Errorf("%d:%s: index was %d; want %d", i, s, idx, cnt)
|
||||
}
|
||||
cnt++
|
||||
}
|
||||
}
|
||||
if n := tt.len(); n != cnt {
|
||||
t.Errorf("%d: len was %d; want %d", i, n, cnt)
|
||||
}
|
||||
for _, x := range notIn {
|
||||
if idx := tt.index(x); idx != -1 {
|
||||
t.Errorf("%d:%s: index was %d; want -1", i, x, idx)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestTag(t *testing.T) {
|
||||
tests := []struct {
|
||||
dict string
|
||||
tag string
|
||||
name string
|
||||
}{
|
||||
// sr is in Value.Languages(), but is not supported by agq.
|
||||
{"agq", "sr", "|[language: sr]"},
|
||||
{"nl", "nl", "Nederlands"},
|
||||
// CLDR 30 dropped Vlaams as the word for nl-BE. It is still called
|
||||
// Flemish in English, though. TODO: check if this is a CLDR bug.
|
||||
// {"nl", "nl-BE", "Vlaams"},
|
||||
{"nl", "nl-BE", "Nederlands (België)"},
|
||||
{"nl", "vls", "West-Vlaams"},
|
||||
{"en", "nl-BE", "Flemish"},
|
||||
{"en", "en", "English"},
|
||||
{"en", "en-GB", "British English"},
|
||||
{"en", "en-US", "American English"}, // American English in CLDR 24+
|
||||
{"ru", "ru", "русский"},
|
||||
{"ru", "ru-RU", "русский (Россия)"},
|
||||
{"ru", "ru-Cyrl", "русский (кириллица)"},
|
||||
{"en", lastLang2zu.String(), "Zulu"},
|
||||
{"en", firstLang2aa.String(), "Afar"},
|
||||
{"en", lastLang3zza.String(), "Zaza"},
|
||||
{"en", firstLang3ace.String(), "Achinese"},
|
||||
{"en", firstTagAr001.String(), "Modern Standard Arabic"},
|
||||
{"en", lastTagZhHant.String(), "Traditional Chinese"},
|
||||
{"en", "aaa", "|Unknown language (aaa)"},
|
||||
{"en", "zzj", "|Unknown language (zzj)"},
|
||||
// If full tag doesn't match, try without script or region.
|
||||
{"en", "aa-Hans", "Afar (Simplified Han)"},
|
||||
{"en", "af-Arab", "Afrikaans (Arabic)"},
|
||||
{"en", "zu-Cyrl", "Zulu (Cyrillic)"},
|
||||
{"en", "aa-GB", "Afar (United Kingdom)"},
|
||||
{"en", "af-NA", "Afrikaans (Namibia)"},
|
||||
{"en", "zu-BR", "Zulu (Brazil)"},
|
||||
// Correct inheritance and language selection.
|
||||
{"zh", "zh-TW", "中文 (台湾)"},
|
||||
{"zh", "zh-Hant-TW", "繁体中文 (台湾)"},
|
||||
{"zh-Hant", "zh-TW", "中文 (台灣)"},
|
||||
{"zh-Hant", "zh-Hant-TW", "繁體中文 (台灣)"},
|
||||
// Some rather arbitrary interpretations for Serbian. This is arguably
|
||||
// correct and consistent with the way zh-[Hant-]TW is handled. It will
|
||||
// also give results more in line with the expectations if users
|
||||
// explicitly use "sh".
|
||||
{"sr-Latn", "sr-ME", "srpski (Crna Gora)"},
|
||||
{"sr-Latn", "sr-Latn-ME", "srpskohrvatski (Crna Gora)"},
|
||||
// Double script and region
|
||||
{"nl", "en-Cyrl-BE", "Engels (Cyrillisch, België)"},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.dict+"/"+tt.tag, func(t *testing.T) {
|
||||
name, fmtName := splitName(tt.name)
|
||||
dict := language.MustParse(tt.dict)
|
||||
tag := language.Raw.MustParse(tt.tag)
|
||||
d := Tags(dict)
|
||||
if n := d.Name(tag); n != name {
|
||||
// There are inconsistencies w.r.t. capitalization in the tests
|
||||
// due to CLDR's update procedure which treats modern and other
|
||||
// languages differently.
|
||||
// See https://unicode.org/cldr/trac/ticket/8051.
|
||||
// TODO: use language capitalization to sanitize the strings.
|
||||
t.Errorf("Name(%s) = %q; want %q", tag, n, name)
|
||||
}
|
||||
|
||||
p := message.NewPrinter(dict)
|
||||
if n := p.Sprint(Tag(tag)); n != fmtName {
|
||||
t.Errorf("Tag(%s) = %q; want %q", tag, n, fmtName)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func splitName(names string) (name, formatName string) {
|
||||
split := strings.Split(names, "|")
|
||||
name, formatName = split[0], split[0]
|
||||
if len(split) > 1 {
|
||||
formatName = split[1]
|
||||
}
|
||||
return name, formatName
|
||||
}
|
||||
|
||||
func TestLanguage(t *testing.T) {
|
||||
tests := []struct {
|
||||
dict string
|
||||
tag string
|
||||
name string
|
||||
}{
|
||||
// sr is in Value.Languages(), but is not supported by agq.
|
||||
{"agq", "sr", "|[language: sr]"},
|
||||
// CLDR 30 dropped Vlaams as the word for nl-BE. It is still called
|
||||
// Flemish in English, though. TODO: this is probably incorrect.
|
||||
// West-Vlaams (vls) is not Vlaams. West-Vlaams could be considered its
|
||||
// own language, whereas Vlaams is generally Dutch. So expect to have
|
||||
// to change these tests back.
|
||||
{"nl", "nl", "Nederlands"},
|
||||
{"nl", "vls", "West-Vlaams"},
|
||||
{"nl", "nl-BE", "Nederlands"},
|
||||
{"en", "pt", "Portuguese"},
|
||||
{"en", "pt-PT", "European Portuguese"},
|
||||
{"en", "pt-BR", "Brazilian Portuguese"},
|
||||
{"en", "en", "English"},
|
||||
{"en", "en-GB", "British English"},
|
||||
{"en", "en-US", "American English"}, // American English in CLDR 24+
|
||||
{"en", lastLang2zu.String(), "Zulu"},
|
||||
{"en", firstLang2aa.String(), "Afar"},
|
||||
{"en", lastLang3zza.String(), "Zaza"},
|
||||
{"en", firstLang3ace.String(), "Achinese"},
|
||||
{"en", firstTagAr001.String(), "Modern Standard Arabic"},
|
||||
{"en", lastTagZhHant.String(), "Traditional Chinese"},
|
||||
{"en", "aaa", "|Unknown language (aaa)"},
|
||||
{"en", "zzj", "|Unknown language (zzj)"},
|
||||
// If full tag doesn't match, try without script or region.
|
||||
{"en", "aa-Hans", "Afar"},
|
||||
{"en", "af-Arab", "Afrikaans"},
|
||||
{"en", "zu-Cyrl", "Zulu"},
|
||||
{"en", "aa-GB", "Afar"},
|
||||
{"en", "af-NA", "Afrikaans"},
|
||||
{"en", "zu-BR", "Zulu"},
|
||||
{"agq", "zh-Hant", "|[language: zh-Hant]"},
|
||||
{"en", "sh", "Serbo-Croatian"},
|
||||
{"en", "sr-Latn", "Serbo-Croatian"},
|
||||
{"en", "sr", "Serbian"},
|
||||
{"en", "sr-ME", "Serbian"},
|
||||
{"en", "sr-Latn-ME", "Serbo-Croatian"}, // See comments in TestTag.
|
||||
}
|
||||
for _, tt := range tests {
|
||||
testtext.Run(t, tt.dict+"/"+tt.tag, func(t *testing.T) {
|
||||
name, fmtName := splitName(tt.name)
|
||||
dict := language.MustParse(tt.dict)
|
||||
tag := language.Raw.MustParse(tt.tag)
|
||||
p := message.NewPrinter(dict)
|
||||
d := Languages(dict)
|
||||
if n := d.Name(tag); n != name {
|
||||
t.Errorf("Name(%v) = %q; want %q", tag, n, name)
|
||||
}
|
||||
if n := p.Sprint(Language(tag)); n != fmtName {
|
||||
t.Errorf("Language(%v) = %q; want %q", tag, n, fmtName)
|
||||
}
|
||||
if len(tt.tag) <= 3 {
|
||||
base := language.MustParseBase(tt.tag)
|
||||
if n := d.Name(base); n != name {
|
||||
t.Errorf("Name(%v) = %q; want %q", base, n, name)
|
||||
}
|
||||
if n := p.Sprint(Language(base)); n != fmtName {
|
||||
t.Errorf("Language(%v) = %q; want %q", base, n, fmtName)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestScript(t *testing.T) {
|
||||
tests := []struct {
|
||||
dict string
|
||||
scr string
|
||||
name string
|
||||
}{
|
||||
{"nl", "Arab", "Arabisch"},
|
||||
{"en", "Arab", "Arabic"},
|
||||
{"en", "Zzzz", "Unknown Script"},
|
||||
{"zh-Hant", "Hang", "韓文字"},
|
||||
{"zh-Hant-HK", "Hang", "韓文字"},
|
||||
{"zh", "Arab", "阿拉伯文"},
|
||||
{"zh-Hans-HK", "Arab", "阿拉伯文"}, // same as zh
|
||||
{"zh-Hant", "Arab", "阿拉伯文"},
|
||||
{"zh-Hant-HK", "Arab", "阿拉伯文"}, // same as zh
|
||||
// Canonicalized form
|
||||
{"en", "Qaai", "Inherited"}, // deprecated script, now is Zinh
|
||||
{"en", "sh", "Unknown Script"}, // sh canonicalizes to sr-Latn
|
||||
{"en", "en", "Unknown Script"},
|
||||
// Don't introduce scripts with canonicalization.
|
||||
{"en", "sh", "Unknown Script"}, // sh canonicalizes to sr-Latn
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.dict+"/"+tt.scr, func(t *testing.T) {
|
||||
name, fmtName := splitName(tt.name)
|
||||
dict := language.MustParse(tt.dict)
|
||||
p := message.NewPrinter(dict)
|
||||
d := Scripts(dict)
|
||||
var tag language.Tag
|
||||
if unicode.IsUpper(rune(tt.scr[0])) {
|
||||
x := language.MustParseScript(tt.scr)
|
||||
if n := d.Name(x); n != name {
|
||||
t.Errorf("Name(%v) = %q; want %q", x, n, name)
|
||||
}
|
||||
if n := p.Sprint(Script(x)); n != fmtName {
|
||||
t.Errorf("Script(%v) = %q; want %q", x, n, fmtName)
|
||||
}
|
||||
tag, _ = language.Raw.Compose(x)
|
||||
} else {
|
||||
tag = language.Raw.MustParse(tt.scr)
|
||||
}
|
||||
if n := d.Name(tag); n != name {
|
||||
t.Errorf("Name(%v) = %q; want %q", tag, n, name)
|
||||
}
|
||||
if n := p.Sprint(Script(tag)); n != fmtName {
|
||||
t.Errorf("Script(%v) = %q; want %q", tag, n, fmtName)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegion(t *testing.T) {
|
||||
tests := []struct {
|
||||
dict string
|
||||
reg string
|
||||
name string
|
||||
}{
|
||||
{"nl", "NL", "Nederland"},
|
||||
{"en", "US", "United States"},
|
||||
{"en", "ZZ", "Unknown Region"},
|
||||
{"en-GB", "NL", "Netherlands"},
|
||||
// Canonical equivalents
|
||||
{"en", "UK", "United Kingdom"},
|
||||
// No region
|
||||
{"en", "pt", "Unknown Region"},
|
||||
{"en", "und", "Unknown Region"},
|
||||
// Don't introduce regions with canonicalization.
|
||||
{"en", "mo", "Unknown Region"},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.dict+"/"+tt.reg, func(t *testing.T) {
|
||||
dict := language.MustParse(tt.dict)
|
||||
p := message.NewPrinter(dict)
|
||||
d := Regions(dict)
|
||||
var tag language.Tag
|
||||
if unicode.IsUpper(rune(tt.reg[0])) {
|
||||
// Region
|
||||
x := language.MustParseRegion(tt.reg)
|
||||
if n := d.Name(x); n != tt.name {
|
||||
t.Errorf("Name(%v) = %q; want %q", x, n, tt.name)
|
||||
}
|
||||
if n := p.Sprint(Region(x)); n != tt.name {
|
||||
t.Errorf("Region(%v) = %q; want %q", x, n, tt.name)
|
||||
}
|
||||
tag, _ = language.Raw.Compose(x)
|
||||
} else {
|
||||
tag = language.Raw.MustParse(tt.reg)
|
||||
}
|
||||
if n := d.Name(tag); n != tt.name {
|
||||
t.Errorf("Name(%v) = %q; want %q", tag, n, tt.name)
|
||||
}
|
||||
if n := p.Sprint(Region(tag)); n != tt.name {
|
||||
t.Errorf("Region(%v) = %q; want %q", tag, n, tt.name)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSelf(t *testing.T) {
|
||||
tests := []struct {
|
||||
tag string
|
||||
name string
|
||||
}{
|
||||
{"nl", "Nederlands"},
|
||||
// CLDR 30 dropped Vlaams as the word for nl-BE. It is still called
|
||||
// Flemish in English, though. TODO: check if this is a CLDR bug.
|
||||
// {"nl-BE", "Vlaams"},
|
||||
{"nl-BE", "Nederlands"},
|
||||
{"en-GB", "British English"},
|
||||
{lastLang2zu.String(), "isiZulu"},
|
||||
{firstLang2aa.String(), ""}, // not defined
|
||||
{lastLang3zza.String(), ""}, // not defined
|
||||
{firstLang3ace.String(), ""}, // not defined
|
||||
{firstTagAr001.String(), "العربية الرسمية الحديثة"},
|
||||
{"ar", "العربية"},
|
||||
{lastTagZhHant.String(), "繁體中文"},
|
||||
{"aaa", ""},
|
||||
{"zzj", ""},
|
||||
// Drop entries that are not in the requested script, even if there is
|
||||
// an entry for the language.
|
||||
{"aa-Hans", ""},
|
||||
{"af-Arab", ""},
|
||||
{"zu-Cyrl", ""},
|
||||
// Append the country name in the language of the matching language.
|
||||
{"af-NA", "Afrikaans"},
|
||||
{"zh", "中文"},
|
||||
// zh-TW should match zh-Hant instead of zh!
|
||||
{"zh-TW", "繁體中文"},
|
||||
{"zh-Hant", "繁體中文"},
|
||||
{"zh-Hans", "简体中文"},
|
||||
{"zh-Hant-TW", "繁體中文"},
|
||||
{"zh-Hans-TW", "简体中文"},
|
||||
// Take the entry for sr which has the matching script.
|
||||
// TODO: Capitalization changed as of CLDR 26, but change seems
|
||||
// arbitrary. Revisit capitalization with revision 27. See
|
||||
// https://unicode.org/cldr/trac/ticket/8051.
|
||||
{"sr", "српски"},
|
||||
// TODO: sr-ME should show up as Serbian or Montenegrin, not Serbo-
|
||||
// Croatian. This is an artifact of the current algorithm, which is the
|
||||
// way it is to have the preferred behavior for other languages such as
|
||||
// Chinese. We can hardwire this case in the table generator or package
|
||||
// code, but we first check if CLDR can be updated.
|
||||
// {"sr-ME", "Srpski"}, // Is Srpskohrvatski
|
||||
{"sr-Latn-ME", "srpskohrvatski"},
|
||||
{"sr-Cyrl-ME", "српски"},
|
||||
{"sr-NL", "српски"},
|
||||
// NOTE: kk is defined, but in Cyrillic script. For China, Arab is the
|
||||
// dominant script. We do not have data for kk-Arab and we chose to not
|
||||
// fall back in such cases.
|
||||
{"kk-CN", ""},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
d := Self
|
||||
if n := d.Name(language.Raw.MustParse(tt.tag)); n != tt.name {
|
||||
t.Errorf("%d:%s: was %q; want %q", i, tt.tag, n, tt.name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestEquivalence(t *testing.T) {
|
||||
testCases := []struct {
|
||||
desc string
|
||||
namer Namer
|
||||
}{
|
||||
{"Self", Self},
|
||||
{"Tags", Tags(language.Romanian)},
|
||||
{"Languages", Languages(language.Romanian)},
|
||||
{"Scripts", Scripts(language.Romanian)},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
ro := tc.namer.Name(language.Raw.MustParse("ro-MD"))
|
||||
mo := tc.namer.Name(language.Raw.MustParse("mo"))
|
||||
if ro != mo {
|
||||
t.Errorf("%q != %q", ro, mo)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestDictionaryLang(t *testing.T) {
|
||||
tests := []struct {
|
||||
d *Dictionary
|
||||
tag string
|
||||
name string
|
||||
}{
|
||||
{English, "en", "English"},
|
||||
{Portuguese, "af", "africâner"},
|
||||
{EuropeanPortuguese, "af", "africanês"},
|
||||
{English, "nl-BE", "Flemish"},
|
||||
}
|
||||
for i, test := range tests {
|
||||
tag := language.MustParse(test.tag)
|
||||
if got := test.d.Tags().Name(tag); got != test.name {
|
||||
t.Errorf("%d:%v: got %s; want %s", i, tag, got, test.name)
|
||||
}
|
||||
if base, _ := language.Compose(tag.Base()); base == tag {
|
||||
if got := test.d.Languages().Name(base); got != test.name {
|
||||
t.Errorf("%d:%v: got %s; want %s", i, tag, got, test.name)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDictionaryRegion(t *testing.T) {
|
||||
tests := []struct {
|
||||
d *Dictionary
|
||||
region string
|
||||
name string
|
||||
}{
|
||||
{English, "FR", "France"},
|
||||
{Portuguese, "009", "Oceania"},
|
||||
{EuropeanPortuguese, "009", "Oceânia"},
|
||||
}
|
||||
for i, test := range tests {
|
||||
tag := language.MustParseRegion(test.region)
|
||||
if got := test.d.Regions().Name(tag); got != test.name {
|
||||
t.Errorf("%d:%v: got %s; want %s", i, tag, got, test.name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDictionaryScript(t *testing.T) {
|
||||
tests := []struct {
|
||||
d *Dictionary
|
||||
script string
|
||||
name string
|
||||
}{
|
||||
{English, "Cyrl", "Cyrillic"},
|
||||
{EuropeanPortuguese, "Gujr", "guzerate"},
|
||||
}
|
||||
for i, test := range tests {
|
||||
tag := language.MustParseScript(test.script)
|
||||
if got := test.d.Scripts().Name(tag); got != test.name {
|
||||
t.Errorf("%d:%v: got %s; want %s", i, tag, got, test.name)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,116 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package display_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/language/display"
|
||||
"golang.org/x/text/message"
|
||||
)
|
||||
|
||||
func ExampleFormatter() {
|
||||
message.SetString(language.Dutch, "In %v people speak %v.", "In %v spreekt men %v.")
|
||||
|
||||
fr := language.French
|
||||
region, _ := fr.Region()
|
||||
for _, tag := range []string{"en", "nl"} {
|
||||
p := message.NewPrinter(language.Make(tag))
|
||||
|
||||
p.Printf("In %v people speak %v.", display.Region(region), display.Language(fr))
|
||||
p.Println()
|
||||
}
|
||||
|
||||
// Output:
|
||||
// In France people speak French.
|
||||
// In Frankrijk spreekt men Frans.
|
||||
}
|
||||
|
||||
func ExampleNamer() {
|
||||
supported := []string{
|
||||
"en-US", "en-GB", "ja", "zh", "zh-Hans", "zh-Hant", "pt", "pt-PT", "ko", "ar", "el", "ru", "uk", "pa",
|
||||
}
|
||||
|
||||
en := display.English.Languages()
|
||||
|
||||
for _, s := range supported {
|
||||
t := language.MustParse(s)
|
||||
fmt.Printf("%-20s (%s)\n", en.Name(t), display.Self.Name(t))
|
||||
}
|
||||
|
||||
// Output:
|
||||
// American English (American English)
|
||||
// British English (British English)
|
||||
// Japanese (日本語)
|
||||
// Chinese (中文)
|
||||
// Simplified Chinese (简体中文)
|
||||
// Traditional Chinese (繁體中文)
|
||||
// Portuguese (português)
|
||||
// European Portuguese (português europeu)
|
||||
// Korean (한국어)
|
||||
// Arabic (العربية)
|
||||
// Greek (Ελληνικά)
|
||||
// Russian (русский)
|
||||
// Ukrainian (українська)
|
||||
// Punjabi (ਪੰਜਾਬੀ)
|
||||
}
|
||||
|
||||
func ExampleTags() {
|
||||
n := display.Tags(language.English)
|
||||
fmt.Println(n.Name(language.Make("nl")))
|
||||
fmt.Println(n.Name(language.Make("nl-BE")))
|
||||
fmt.Println(n.Name(language.Make("nl-CW")))
|
||||
fmt.Println(n.Name(language.Make("nl-Arab")))
|
||||
fmt.Println(n.Name(language.Make("nl-Cyrl-RU")))
|
||||
|
||||
// Output:
|
||||
// Dutch
|
||||
// Flemish
|
||||
// Dutch (Curaçao)
|
||||
// Dutch (Arabic)
|
||||
// Dutch (Cyrillic, Russia)
|
||||
}
|
||||
|
||||
// ExampleDictionary shows how to reduce the amount of data linked into your
|
||||
// binary by only using the predefined Dictionary variables of the languages you
|
||||
// wish to support.
|
||||
func ExampleDictionary() {
|
||||
tags := []language.Tag{
|
||||
language.English,
|
||||
language.German,
|
||||
language.Japanese,
|
||||
language.Russian,
|
||||
}
|
||||
dicts := []*display.Dictionary{
|
||||
display.English,
|
||||
display.German,
|
||||
display.Japanese,
|
||||
display.Russian,
|
||||
}
|
||||
|
||||
m := language.NewMatcher(tags)
|
||||
|
||||
getDict := func(t language.Tag) *display.Dictionary {
|
||||
_, i, confidence := m.Match(t)
|
||||
// Skip this check if you want to support a fall-back language, which
|
||||
// will be the first one passed to NewMatcher.
|
||||
if confidence == language.No {
|
||||
return nil
|
||||
}
|
||||
return dicts[i]
|
||||
}
|
||||
|
||||
// The matcher will match Swiss German to German.
|
||||
n := getDict(language.Make("gsw")).Languages()
|
||||
fmt.Println(n.Name(language.German))
|
||||
fmt.Println(n.Name(language.Make("de-CH")))
|
||||
fmt.Println(n.Name(language.Make("gsw")))
|
||||
|
||||
// Output:
|
||||
// Deutsch
|
||||
// Schweizer Hochdeutsch
|
||||
// Schweizerdeutsch
|
||||
}
|
||||
@@ -0,0 +1,253 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package display
|
||||
|
||||
// This file contains common lookup code that is shared between the various
|
||||
// implementations of Namer and Dictionaries.
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
type namer interface {
|
||||
// name gets the string for the given index. It should walk the
|
||||
// inheritance chain if a value is not present in the base index.
|
||||
name(idx int) string
|
||||
}
|
||||
|
||||
func nameLanguage(n namer, x interface{}) string {
|
||||
t, _ := language.All.Compose(x)
|
||||
for {
|
||||
i, _, _ := langTagSet.index(t.Raw())
|
||||
if s := n.name(i); s != "" {
|
||||
return s
|
||||
}
|
||||
if t = t.Parent(); t == language.Und {
|
||||
return ""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func nameScript(n namer, x interface{}) string {
|
||||
t, _ := language.DeprecatedScript.Compose(x)
|
||||
_, s, _ := t.Raw()
|
||||
return n.name(scriptIndex.index(s.String()))
|
||||
}
|
||||
|
||||
func nameRegion(n namer, x interface{}) string {
|
||||
t, _ := language.DeprecatedRegion.Compose(x)
|
||||
_, _, r := t.Raw()
|
||||
return n.name(regionIndex.index(r.String()))
|
||||
}
|
||||
|
||||
func nameTag(langN, scrN, regN namer, x interface{}) string {
|
||||
t, ok := x.(language.Tag)
|
||||
if !ok {
|
||||
return ""
|
||||
}
|
||||
const form = language.All &^ language.SuppressScript
|
||||
if c, err := form.Canonicalize(t); err == nil {
|
||||
t = c
|
||||
}
|
||||
_, sRaw, rRaw := t.Raw()
|
||||
i, scr, reg := langTagSet.index(t.Raw())
|
||||
for i != -1 {
|
||||
if str := langN.name(i); str != "" {
|
||||
if hasS, hasR := (scr != language.Script{}), (reg != language.Region{}); hasS || hasR {
|
||||
ss, sr := "", ""
|
||||
if hasS {
|
||||
ss = scrN.name(scriptIndex.index(scr.String()))
|
||||
}
|
||||
if hasR {
|
||||
sr = regN.name(regionIndex.index(reg.String()))
|
||||
}
|
||||
// TODO: use patterns in CLDR or at least confirm they are the
|
||||
// same for all languages.
|
||||
if ss != "" && sr != "" {
|
||||
return fmt.Sprintf("%s (%s, %s)", str, ss, sr)
|
||||
}
|
||||
if ss != "" || sr != "" {
|
||||
return fmt.Sprintf("%s (%s%s)", str, ss, sr)
|
||||
}
|
||||
}
|
||||
return str
|
||||
}
|
||||
scr, reg = sRaw, rRaw
|
||||
if t = t.Parent(); t == language.Und {
|
||||
return ""
|
||||
}
|
||||
i, _, _ = langTagSet.index(t.Raw())
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// header contains the data and indexes for a single namer.
|
||||
// data contains a series of strings concatenated into one. index contains the
|
||||
// offsets for a string in data. For example, consider a header that defines
|
||||
// strings for the languages de, el, en, fi, and nl:
|
||||
//
|
||||
// header{
|
||||
// data: "GermanGreekEnglishDutch",
|
||||
// index: []uint16{0, 6, 11, 18, 18, 23},
|
||||
// }
|
||||
//
|
||||
// For a language with index i, the string is defined by
|
||||
// data[index[i]:index[i+1]]. So the number of elements in index is always one
|
||||
// greater than the number of languages for which header defines a value.
|
||||
// A string for a language may be empty, which means the name is undefined. In
|
||||
// the above example, the name for fi (Finnish) is undefined.
|
||||
type header struct {
|
||||
data string
|
||||
index []uint16
|
||||
}
|
||||
|
||||
// name looks up the name for a tag in the dictionary, given its index.
|
||||
func (h *header) name(i int) string {
|
||||
if 0 <= i && i < len(h.index)-1 {
|
||||
return h.data[h.index[i]:h.index[i+1]]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// tagSet is used to find the index of a language in a set of tags.
|
||||
type tagSet struct {
|
||||
single tagIndex
|
||||
long []string
|
||||
}
|
||||
|
||||
var (
|
||||
langTagSet = tagSet{
|
||||
single: langIndex,
|
||||
long: langTagsLong,
|
||||
}
|
||||
|
||||
// selfTagSet is used for indexing the language strings in their own
|
||||
// language.
|
||||
selfTagSet = tagSet{
|
||||
single: selfIndex,
|
||||
long: selfTagsLong,
|
||||
}
|
||||
|
||||
zzzz = language.MustParseScript("Zzzz")
|
||||
zz = language.MustParseRegion("ZZ")
|
||||
)
|
||||
|
||||
// index returns the index of the tag for the given base, script and region or
|
||||
// its parent if the tag is not available. If the match is for a parent entry,
|
||||
// the excess script and region are returned.
|
||||
func (ts *tagSet) index(base language.Base, scr language.Script, reg language.Region) (int, language.Script, language.Region) {
|
||||
lang := base.String()
|
||||
index := -1
|
||||
if (scr != language.Script{} || reg != language.Region{}) {
|
||||
if scr == zzzz {
|
||||
scr = language.Script{}
|
||||
}
|
||||
if reg == zz {
|
||||
reg = language.Region{}
|
||||
}
|
||||
|
||||
i := sort.SearchStrings(ts.long, lang)
|
||||
// All entries have either a script or a region and not both.
|
||||
scrStr, regStr := scr.String(), reg.String()
|
||||
for ; i < len(ts.long) && strings.HasPrefix(ts.long[i], lang); i++ {
|
||||
if s := ts.long[i][len(lang)+1:]; s == scrStr {
|
||||
scr = language.Script{}
|
||||
index = i + ts.single.len()
|
||||
break
|
||||
} else if s == regStr {
|
||||
reg = language.Region{}
|
||||
index = i + ts.single.len()
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if index == -1 {
|
||||
index = ts.single.index(lang)
|
||||
}
|
||||
return index, scr, reg
|
||||
}
|
||||
|
||||
func (ts *tagSet) Tags() []language.Tag {
|
||||
tags := make([]language.Tag, 0, ts.single.len()+len(ts.long))
|
||||
ts.single.keys(func(s string) {
|
||||
tags = append(tags, language.Raw.MustParse(s))
|
||||
})
|
||||
for _, s := range ts.long {
|
||||
tags = append(tags, language.Raw.MustParse(s))
|
||||
}
|
||||
return tags
|
||||
}
|
||||
|
||||
func supportedScripts() []language.Script {
|
||||
scr := make([]language.Script, 0, scriptIndex.len())
|
||||
scriptIndex.keys(func(s string) {
|
||||
scr = append(scr, language.MustParseScript(s))
|
||||
})
|
||||
return scr
|
||||
}
|
||||
|
||||
func supportedRegions() []language.Region {
|
||||
reg := make([]language.Region, 0, regionIndex.len())
|
||||
regionIndex.keys(func(s string) {
|
||||
reg = append(reg, language.MustParseRegion(s))
|
||||
})
|
||||
return reg
|
||||
}
|
||||
|
||||
// tagIndex holds a concatenated lists of subtags of length 2 to 4, one string
|
||||
// for each length, which can be used in combination with binary search to get
|
||||
// the index associated with a tag.
|
||||
// For example, a tagIndex{
|
||||
//
|
||||
// "arenesfrruzh", // 6 2-byte tags.
|
||||
// "barwae", // 2 3-byte tags.
|
||||
// "",
|
||||
//
|
||||
// }
|
||||
// would mean that the 2-byte tag "fr" had an index of 3, and the 3-byte tag
|
||||
// "wae" had an index of 7.
|
||||
type tagIndex [3]string
|
||||
|
||||
func (t *tagIndex) index(s string) int {
|
||||
sz := len(s)
|
||||
if sz < 2 || 4 < sz {
|
||||
return -1
|
||||
}
|
||||
a := t[sz-2]
|
||||
index := sort.Search(len(a)/sz, func(i int) bool {
|
||||
p := i * sz
|
||||
return a[p:p+sz] >= s
|
||||
})
|
||||
p := index * sz
|
||||
if end := p + sz; end > len(a) || a[p:end] != s {
|
||||
return -1
|
||||
}
|
||||
// Add the number of tags for smaller sizes.
|
||||
for i := 0; i < sz-2; i++ {
|
||||
index += len(t[i]) / (i + 2)
|
||||
}
|
||||
return index
|
||||
}
|
||||
|
||||
// len returns the number of tags that are contained in the tagIndex.
|
||||
func (t *tagIndex) len() (n int) {
|
||||
for i, s := range t {
|
||||
n += len(s) / (i + 2)
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// keys calls f for each tag.
|
||||
func (t *tagIndex) keys(f func(key string)) {
|
||||
for i, s := range *t {
|
||||
for ; s != ""; s = s[i+2:] {
|
||||
f(s[:i+2])
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,602 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build ignore
|
||||
|
||||
// Generator for display name tables.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/unicode/cldr"
|
||||
)
|
||||
|
||||
var (
|
||||
test = flag.Bool("test", false,
|
||||
"test existing tables; can be used to compare web data with package data.")
|
||||
outputFile = flag.String("output", "tables.go", "output file")
|
||||
|
||||
stats = flag.Bool("stats", false, "prints statistics to stderr")
|
||||
|
||||
short = flag.Bool("short", false, `Use "short" alternatives, when available.`)
|
||||
draft = flag.String("draft",
|
||||
"contributed",
|
||||
`Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
|
||||
pkg = flag.String("package",
|
||||
"display",
|
||||
"the name of the package in which the generated file is to be included")
|
||||
|
||||
tags = newTagSet("tags",
|
||||
[]language.Tag{},
|
||||
"space-separated list of tags to include or empty for all")
|
||||
dict = newTagSet("dict",
|
||||
dictTags(),
|
||||
"space-separated list or tags for which to include a Dictionary. "+
|
||||
`"" means the common list from go.text/language.`)
|
||||
)
|
||||
|
||||
func dictTags() (tag []language.Tag) {
|
||||
// TODO: replace with language.Common.Tags() once supported.
|
||||
const str = "af am ar ar-001 az bg bn ca cs da de el en en-US en-GB " +
|
||||
"es es-ES es-419 et fa fi fil fr fr-CA gu he hi hr hu hy id is it ja " +
|
||||
"ka kk km kn ko ky lo lt lv mk ml mn mr ms my ne nl no pa pl pt pt-BR " +
|
||||
"pt-PT ro ru si sk sl sq sr sr-Latn sv sw ta te th tr uk ur uz vi " +
|
||||
"zh zh-Hans zh-Hant zu"
|
||||
|
||||
for _, s := range strings.Split(str, " ") {
|
||||
tag = append(tag, language.MustParse(s))
|
||||
}
|
||||
return tag
|
||||
}
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
|
||||
// Read the CLDR zip file.
|
||||
r := gen.OpenCLDRCoreZip()
|
||||
defer r.Close()
|
||||
|
||||
d := &cldr.Decoder{}
|
||||
d.SetDirFilter("main", "supplemental")
|
||||
d.SetSectionFilter("localeDisplayNames")
|
||||
data, err := d.DecodeZip(r)
|
||||
if err != nil {
|
||||
log.Fatalf("DecodeZip: %v", err)
|
||||
}
|
||||
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteGoFile(*outputFile, "display")
|
||||
|
||||
gen.WriteCLDRVersion(w)
|
||||
|
||||
b := builder{
|
||||
w: w,
|
||||
data: data,
|
||||
group: make(map[string]*group),
|
||||
}
|
||||
b.generate()
|
||||
}
|
||||
|
||||
const tagForm = language.All
|
||||
|
||||
// tagSet is used to parse command line flags of tags. It implements the
|
||||
// flag.Value interface.
|
||||
type tagSet map[language.Tag]bool
|
||||
|
||||
func newTagSet(name string, tags []language.Tag, usage string) tagSet {
|
||||
f := tagSet(make(map[language.Tag]bool))
|
||||
for _, t := range tags {
|
||||
f[t] = true
|
||||
}
|
||||
flag.Var(f, name, usage)
|
||||
return f
|
||||
}
|
||||
|
||||
// String implements the String method of the flag.Value interface.
|
||||
func (f tagSet) String() string {
|
||||
tags := []string{}
|
||||
for t := range f {
|
||||
tags = append(tags, t.String())
|
||||
}
|
||||
sort.Strings(tags)
|
||||
return strings.Join(tags, " ")
|
||||
}
|
||||
|
||||
// Set implements Set from the flag.Value interface.
|
||||
func (f tagSet) Set(s string) error {
|
||||
if s != "" {
|
||||
for _, s := range strings.Split(s, " ") {
|
||||
if s != "" {
|
||||
tag, err := tagForm.Parse(s)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
f[tag] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f tagSet) contains(t language.Tag) bool {
|
||||
if len(f) == 0 {
|
||||
return true
|
||||
}
|
||||
return f[t]
|
||||
}
|
||||
|
||||
// builder is used to create all tables with display name information.
|
||||
type builder struct {
|
||||
w *gen.CodeWriter
|
||||
|
||||
data *cldr.CLDR
|
||||
|
||||
fromLocs []string
|
||||
|
||||
// destination tags for the current locale.
|
||||
toTags []string
|
||||
toTagIndex map[string]int
|
||||
|
||||
// list of supported tags
|
||||
supported []language.Tag
|
||||
|
||||
// key-value pairs per group
|
||||
group map[string]*group
|
||||
|
||||
// statistics
|
||||
sizeIndex int // total size of all indexes of headers
|
||||
sizeData int // total size of all data of headers
|
||||
totalSize int
|
||||
}
|
||||
|
||||
type group struct {
|
||||
// Maps from a given language to the Namer data for this language.
|
||||
lang map[language.Tag]keyValues
|
||||
headers []header
|
||||
|
||||
toTags []string
|
||||
threeStart int
|
||||
fourPlusStart int
|
||||
}
|
||||
|
||||
// set sets the typ to the name for locale loc.
|
||||
func (g *group) set(t language.Tag, typ, name string) {
|
||||
kv := g.lang[t]
|
||||
if kv == nil {
|
||||
kv = make(keyValues)
|
||||
g.lang[t] = kv
|
||||
}
|
||||
if kv[typ] == "" {
|
||||
kv[typ] = name
|
||||
}
|
||||
}
|
||||
|
||||
type keyValues map[string]string
|
||||
|
||||
type header struct {
|
||||
tag language.Tag
|
||||
data string
|
||||
index []uint16
|
||||
}
|
||||
|
||||
var versionInfo = `// Version is deprecated. Use CLDRVersion.
|
||||
const Version = %#v
|
||||
|
||||
`
|
||||
|
||||
var self = language.MustParse("mul")
|
||||
|
||||
// generate builds and writes all tables.
|
||||
func (b *builder) generate() {
|
||||
fmt.Fprintf(b.w, versionInfo, cldr.Version)
|
||||
|
||||
b.filter()
|
||||
b.setData("lang", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
|
||||
if ldn.Languages != nil {
|
||||
for _, v := range ldn.Languages.Language {
|
||||
lang := v.Type
|
||||
if lang == "root" {
|
||||
// We prefer the data from "und"
|
||||
// TODO: allow both the data for root and und somehow.
|
||||
continue
|
||||
}
|
||||
tag := tagForm.MustParse(lang)
|
||||
if tags.contains(tag) {
|
||||
g.set(loc, tag.String(), v.Data())
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
b.setData("script", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
|
||||
if ldn.Scripts != nil {
|
||||
for _, v := range ldn.Scripts.Script {
|
||||
code := language.MustParseScript(v.Type)
|
||||
if code.IsPrivateUse() { // Qaaa..Qabx
|
||||
// TODO: data currently appears to be very meager.
|
||||
// Reconsider if we have data for English.
|
||||
if loc == language.English {
|
||||
log.Fatal("Consider including data for private use scripts.")
|
||||
}
|
||||
continue
|
||||
}
|
||||
g.set(loc, code.String(), v.Data())
|
||||
}
|
||||
}
|
||||
})
|
||||
b.setData("region", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
|
||||
if ldn.Territories != nil {
|
||||
for _, v := range ldn.Territories.Territory {
|
||||
g.set(loc, language.MustParseRegion(v.Type).String(), v.Data())
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
b.makeSupported()
|
||||
|
||||
b.writeParents()
|
||||
|
||||
b.writeGroup("lang")
|
||||
b.writeGroup("script")
|
||||
b.writeGroup("region")
|
||||
|
||||
b.w.WriteConst("numSupported", len(b.supported))
|
||||
buf := bytes.Buffer{}
|
||||
for _, tag := range b.supported {
|
||||
fmt.Fprint(&buf, tag.String(), "|")
|
||||
}
|
||||
b.w.WriteConst("supported", buf.String())
|
||||
|
||||
b.writeDictionaries()
|
||||
|
||||
b.supported = []language.Tag{self}
|
||||
|
||||
// Compute the names of locales in their own language. Some of these names
|
||||
// may be specified in their parent locales. We iterate the maximum depth
|
||||
// of the parent three times to match successive parents of tags until a
|
||||
// possible match is found.
|
||||
for i := 0; i < 4; i++ {
|
||||
b.setData("self", func(g *group, tag language.Tag, ldn *cldr.LocaleDisplayNames) {
|
||||
parent := tag
|
||||
if b, s, r := tag.Raw(); i > 0 && (s != language.Script{} && r == language.Region{}) {
|
||||
parent, _ = language.Raw.Compose(b)
|
||||
}
|
||||
if ldn.Languages != nil {
|
||||
for _, v := range ldn.Languages.Language {
|
||||
key := tagForm.MustParse(v.Type)
|
||||
saved := key
|
||||
if key == parent {
|
||||
g.set(self, tag.String(), v.Data())
|
||||
}
|
||||
for k := 0; k < i; k++ {
|
||||
key = key.Parent()
|
||||
}
|
||||
if key == tag {
|
||||
g.set(self, saved.String(), v.Data()) // set does not overwrite a value.
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
b.writeGroup("self")
|
||||
}
|
||||
|
||||
func (b *builder) setData(name string, f func(*group, language.Tag, *cldr.LocaleDisplayNames)) {
|
||||
b.sizeIndex = 0
|
||||
b.sizeData = 0
|
||||
b.toTags = nil
|
||||
b.fromLocs = nil
|
||||
b.toTagIndex = make(map[string]int)
|
||||
|
||||
g := b.group[name]
|
||||
if g == nil {
|
||||
g = &group{lang: make(map[language.Tag]keyValues)}
|
||||
b.group[name] = g
|
||||
}
|
||||
for _, loc := range b.data.Locales() {
|
||||
// We use RawLDML instead of LDML as we are managing our own inheritance
|
||||
// in this implementation.
|
||||
ldml := b.data.RawLDML(loc)
|
||||
|
||||
// We do not support the POSIX variant (it is not a supported BCP 47
|
||||
// variant). This locale also doesn't happen to contain any data, so
|
||||
// we'll skip it by checking for this.
|
||||
tag, err := tagForm.Parse(loc)
|
||||
if err != nil {
|
||||
if ldml.LocaleDisplayNames != nil {
|
||||
log.Fatalf("setData: %v", err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if ldml.LocaleDisplayNames != nil && tags.contains(tag) {
|
||||
f(g, tag, ldml.LocaleDisplayNames)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (b *builder) filter() {
|
||||
filter := func(s *cldr.Slice) {
|
||||
if *short {
|
||||
s.SelectOnePerGroup("alt", []string{"short", ""})
|
||||
} else {
|
||||
s.SelectOnePerGroup("alt", []string{"stand-alone", ""})
|
||||
}
|
||||
d, err := cldr.ParseDraft(*draft)
|
||||
if err != nil {
|
||||
log.Fatalf("filter: %v", err)
|
||||
}
|
||||
s.SelectDraft(d)
|
||||
}
|
||||
for _, loc := range b.data.Locales() {
|
||||
if ldn := b.data.RawLDML(loc).LocaleDisplayNames; ldn != nil {
|
||||
if ldn.Languages != nil {
|
||||
s := cldr.MakeSlice(&ldn.Languages.Language)
|
||||
if filter(&s); len(ldn.Languages.Language) == 0 {
|
||||
ldn.Languages = nil
|
||||
}
|
||||
}
|
||||
if ldn.Scripts != nil {
|
||||
s := cldr.MakeSlice(&ldn.Scripts.Script)
|
||||
if filter(&s); len(ldn.Scripts.Script) == 0 {
|
||||
ldn.Scripts = nil
|
||||
}
|
||||
}
|
||||
if ldn.Territories != nil {
|
||||
s := cldr.MakeSlice(&ldn.Territories.Territory)
|
||||
if filter(&s); len(ldn.Territories.Territory) == 0 {
|
||||
ldn.Territories = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// makeSupported creates a list of all supported locales.
|
||||
func (b *builder) makeSupported() {
|
||||
// tags across groups
|
||||
for _, g := range b.group {
|
||||
for t, _ := range g.lang {
|
||||
b.supported = append(b.supported, t)
|
||||
}
|
||||
}
|
||||
b.supported = b.supported[:unique(tagsSorter(b.supported))]
|
||||
|
||||
}
|
||||
|
||||
type tagsSorter []language.Tag
|
||||
|
||||
func (a tagsSorter) Len() int { return len(a) }
|
||||
func (a tagsSorter) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
||||
func (a tagsSorter) Less(i, j int) bool { return a[i].String() < a[j].String() }
|
||||
|
||||
func (b *builder) writeGroup(name string) {
|
||||
g := b.group[name]
|
||||
|
||||
for _, kv := range g.lang {
|
||||
for t, _ := range kv {
|
||||
g.toTags = append(g.toTags, t)
|
||||
}
|
||||
}
|
||||
g.toTags = g.toTags[:unique(tagsBySize(g.toTags))]
|
||||
|
||||
// Allocate header per supported value.
|
||||
g.headers = make([]header, len(b.supported))
|
||||
for i, sup := range b.supported {
|
||||
kv, ok := g.lang[sup]
|
||||
if !ok {
|
||||
g.headers[i].tag = sup
|
||||
continue
|
||||
}
|
||||
data := []byte{}
|
||||
index := make([]uint16, len(g.toTags), len(g.toTags)+1)
|
||||
for j, t := range g.toTags {
|
||||
index[j] = uint16(len(data))
|
||||
data = append(data, kv[t]...)
|
||||
}
|
||||
index = append(index, uint16(len(data)))
|
||||
|
||||
// Trim the tail of the index.
|
||||
// TODO: indexes can be reduced in size quite a bit more.
|
||||
n := len(index)
|
||||
for ; n >= 2 && index[n-2] == index[n-1]; n-- {
|
||||
}
|
||||
index = index[:n]
|
||||
|
||||
// Workaround for a bug in CLDR 26.
|
||||
// See https://unicode.org/cldr/trac/ticket/8042.
|
||||
if cldr.Version == "26" && sup.String() == "hsb" {
|
||||
data = bytes.Replace(data, []byte{'"'}, nil, 1)
|
||||
}
|
||||
g.headers[i] = header{sup, string(data), index}
|
||||
}
|
||||
g.writeTable(b.w, name)
|
||||
}
|
||||
|
||||
type tagsBySize []string
|
||||
|
||||
func (l tagsBySize) Len() int { return len(l) }
|
||||
func (l tagsBySize) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
|
||||
func (l tagsBySize) Less(i, j int) bool {
|
||||
a, b := l[i], l[j]
|
||||
// Sort single-tag entries based on size first. Otherwise alphabetic.
|
||||
if len(a) != len(b) && (len(a) <= 4 || len(b) <= 4) {
|
||||
return len(a) < len(b)
|
||||
}
|
||||
return a < b
|
||||
}
|
||||
|
||||
// parentIndices returns slice a of len(tags) where tags[a[i]] is the parent
|
||||
// of tags[i].
|
||||
func parentIndices(tags []language.Tag) []int16 {
|
||||
index := make(map[language.Tag]int16)
|
||||
for i, t := range tags {
|
||||
index[t] = int16(i)
|
||||
}
|
||||
|
||||
// Construct default parents.
|
||||
parents := make([]int16, len(tags))
|
||||
for i, t := range tags {
|
||||
parents[i] = -1
|
||||
for t = t.Parent(); t != language.Und; t = t.Parent() {
|
||||
if j, ok := index[t]; ok {
|
||||
parents[i] = j
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return parents
|
||||
}
|
||||
|
||||
func (b *builder) writeParents() {
|
||||
parents := parentIndices(b.supported)
|
||||
fmt.Fprintf(b.w, "var parents = ")
|
||||
b.w.WriteArray(parents)
|
||||
}
|
||||
|
||||
// writeKeys writes keys to a special index used by the display package.
|
||||
// tags are assumed to be sorted by length.
|
||||
func writeKeys(w *gen.CodeWriter, name string, keys []string) {
|
||||
w.Size += int(3 * reflect.TypeOf("").Size())
|
||||
w.WriteComment("Number of keys: %d", len(keys))
|
||||
fmt.Fprintf(w, "var (\n\t%sIndex = tagIndex{\n", name)
|
||||
for i := 2; i <= 4; i++ {
|
||||
sub := []string{}
|
||||
for _, t := range keys {
|
||||
if len(t) != i {
|
||||
break
|
||||
}
|
||||
sub = append(sub, t)
|
||||
}
|
||||
s := strings.Join(sub, "")
|
||||
w.WriteString(s)
|
||||
fmt.Fprintf(w, ",\n")
|
||||
keys = keys[len(sub):]
|
||||
}
|
||||
fmt.Fprintln(w, "\t}")
|
||||
if len(keys) > 0 {
|
||||
w.Size += int(reflect.TypeOf([]string{}).Size())
|
||||
fmt.Fprintf(w, "\t%sTagsLong = ", name)
|
||||
w.WriteSlice(keys)
|
||||
}
|
||||
fmt.Fprintln(w, ")\n")
|
||||
}
|
||||
|
||||
// identifier creates an identifier from the given tag.
|
||||
func identifier(t language.Tag) string {
|
||||
return strings.Replace(t.String(), "-", "", -1)
|
||||
}
|
||||
|
||||
func (h *header) writeEntry(w *gen.CodeWriter, name string) {
|
||||
if len(dict) > 0 && dict.contains(h.tag) {
|
||||
fmt.Fprintf(w, "\t{ // %s\n", h.tag)
|
||||
fmt.Fprintf(w, "\t\t%[1]s%[2]sStr,\n\t\t%[1]s%[2]sIdx,\n", identifier(h.tag), name)
|
||||
fmt.Fprintln(w, "\t},")
|
||||
} else if len(h.data) == 0 {
|
||||
fmt.Fprintln(w, "\t\t{}, //", h.tag)
|
||||
} else {
|
||||
fmt.Fprintf(w, "\t{ // %s\n", h.tag)
|
||||
w.WriteString(h.data)
|
||||
fmt.Fprintln(w, ",")
|
||||
w.WriteSlice(h.index)
|
||||
fmt.Fprintln(w, ",\n\t},")
|
||||
}
|
||||
}
|
||||
|
||||
// write the data for the given header as single entries. The size for this data
|
||||
// was already accounted for in writeEntry.
|
||||
func (h *header) writeSingle(w *gen.CodeWriter, name string) {
|
||||
if len(dict) > 0 && dict.contains(h.tag) {
|
||||
tag := identifier(h.tag)
|
||||
w.WriteConst(tag+name+"Str", h.data)
|
||||
|
||||
// Note that we create a slice instead of an array. If we use an array
|
||||
// we need to refer to it as a[:] in other tables, which will cause the
|
||||
// array to always be included by the linker. See Issue 7651.
|
||||
w.WriteVar(tag+name+"Idx", h.index)
|
||||
}
|
||||
}
|
||||
|
||||
// writeTable writes an entry for a single Namer.
|
||||
func (g *group) writeTable(w *gen.CodeWriter, name string) {
|
||||
start := w.Size
|
||||
writeKeys(w, name, g.toTags)
|
||||
w.Size += len(g.headers) * int(reflect.ValueOf(g.headers[0]).Type().Size())
|
||||
|
||||
fmt.Fprintf(w, "var %sHeaders = [%d]header{\n", name, len(g.headers))
|
||||
|
||||
title := strings.Title(name)
|
||||
for _, h := range g.headers {
|
||||
h.writeEntry(w, title)
|
||||
}
|
||||
fmt.Fprintln(w, "}\n")
|
||||
|
||||
for _, h := range g.headers {
|
||||
h.writeSingle(w, title)
|
||||
}
|
||||
n := w.Size - start
|
||||
fmt.Fprintf(w, "// Total size for %s: %d bytes (%d KB)\n\n", name, n, n/1000)
|
||||
}
|
||||
|
||||
func (b *builder) writeDictionaries() {
|
||||
fmt.Fprintln(b.w, "// Dictionary entries of frequent languages")
|
||||
fmt.Fprintln(b.w, "var (")
|
||||
parents := parentIndices(b.supported)
|
||||
|
||||
for i, t := range b.supported {
|
||||
if dict.contains(t) {
|
||||
ident := identifier(t)
|
||||
fmt.Fprintf(b.w, "\t%s = Dictionary{ // %s\n", ident, t)
|
||||
if p := parents[i]; p == -1 {
|
||||
fmt.Fprintln(b.w, "\t\tnil,")
|
||||
} else {
|
||||
fmt.Fprintf(b.w, "\t\t&%s,\n", identifier(b.supported[p]))
|
||||
}
|
||||
fmt.Fprintf(b.w, "\t\theader{%[1]sLangStr, %[1]sLangIdx},\n", ident)
|
||||
fmt.Fprintf(b.w, "\t\theader{%[1]sScriptStr, %[1]sScriptIdx},\n", ident)
|
||||
fmt.Fprintf(b.w, "\t\theader{%[1]sRegionStr, %[1]sRegionIdx},\n", ident)
|
||||
fmt.Fprintln(b.w, "\t}")
|
||||
}
|
||||
}
|
||||
fmt.Fprintln(b.w, ")")
|
||||
|
||||
var s string
|
||||
var a []uint16
|
||||
sz := reflect.TypeOf(s).Size()
|
||||
sz += reflect.TypeOf(a).Size()
|
||||
sz *= 3
|
||||
sz += reflect.TypeOf(&a).Size()
|
||||
n := int(sz) * len(dict)
|
||||
fmt.Fprintf(b.w, "// Total size for %d entries: %d bytes (%d KB)\n\n", len(dict), n, n/1000)
|
||||
|
||||
b.w.Size += n
|
||||
}
|
||||
|
||||
// unique sorts the given lists and removes duplicate entries by swapping them
|
||||
// past position k, where k is the number of unique values. It returns k.
|
||||
func unique(a sort.Interface) int {
|
||||
if a.Len() == 0 {
|
||||
return 0
|
||||
}
|
||||
sort.Sort(a)
|
||||
k := 1
|
||||
for i := 1; i < a.Len(); i++ {
|
||||
if a.Less(k-1, i) {
|
||||
if k != i {
|
||||
a.Swap(k, i)
|
||||
}
|
||||
k++
|
||||
}
|
||||
}
|
||||
return k
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,98 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package language implements BCP 47 language tags and related functionality.
|
||||
//
|
||||
// The most important function of package language is to match a list of
|
||||
// user-preferred languages to a list of supported languages.
|
||||
// It alleviates the developer of dealing with the complexity of this process
|
||||
// and provides the user with the best experience
|
||||
// (see https://blog.golang.org/matchlang).
|
||||
//
|
||||
// # Matching preferred against supported languages
|
||||
//
|
||||
// A Matcher for an application that supports English, Australian English,
|
||||
// Danish, and standard Mandarin can be created as follows:
|
||||
//
|
||||
// var matcher = language.NewMatcher([]language.Tag{
|
||||
// language.English, // The first language is used as fallback.
|
||||
// language.MustParse("en-AU"),
|
||||
// language.Danish,
|
||||
// language.Chinese,
|
||||
// })
|
||||
//
|
||||
// This list of supported languages is typically implied by the languages for
|
||||
// which there exists translations of the user interface.
|
||||
//
|
||||
// User-preferred languages usually come as a comma-separated list of BCP 47
|
||||
// language tags.
|
||||
// The MatchString finds best matches for such strings:
|
||||
//
|
||||
// handler(w http.ResponseWriter, r *http.Request) {
|
||||
// lang, _ := r.Cookie("lang")
|
||||
// accept := r.Header.Get("Accept-Language")
|
||||
// tag, _ := language.MatchStrings(matcher, lang.String(), accept)
|
||||
//
|
||||
// // tag should now be used for the initialization of any
|
||||
// // locale-specific service.
|
||||
// }
|
||||
//
|
||||
// The Matcher's Match method can be used to match Tags directly.
|
||||
//
|
||||
// Matchers are aware of the intricacies of equivalence between languages, such
|
||||
// as deprecated subtags, legacy tags, macro languages, mutual
|
||||
// intelligibility between scripts and languages, and transparently passing
|
||||
// BCP 47 user configuration.
|
||||
// For instance, it will know that a reader of Bokmål Danish can read Norwegian
|
||||
// and will know that Cantonese ("yue") is a good match for "zh-HK".
|
||||
//
|
||||
// # Using match results
|
||||
//
|
||||
// To guarantee a consistent user experience to the user it is important to
|
||||
// use the same language tag for the selection of any locale-specific services.
|
||||
// For example, it is utterly confusing to substitute spelled-out numbers
|
||||
// or dates in one language in text of another language.
|
||||
// More subtly confusing is using the wrong sorting order or casing
|
||||
// algorithm for a certain language.
|
||||
//
|
||||
// All the packages in x/text that provide locale-specific services
|
||||
// (e.g. collate, cases) should be initialized with the tag that was
|
||||
// obtained at the start of an interaction with the user.
|
||||
//
|
||||
// Note that Tag that is returned by Match and MatchString may differ from any
|
||||
// of the supported languages, as it may contain carried over settings from
|
||||
// the user tags.
|
||||
// This may be inconvenient when your application has some additional
|
||||
// locale-specific data for your supported languages.
|
||||
// Match and MatchString both return the index of the matched supported tag
|
||||
// to simplify associating such data with the matched tag.
|
||||
//
|
||||
// # Canonicalization
|
||||
//
|
||||
// If one uses the Matcher to compare languages one does not need to
|
||||
// worry about canonicalization.
|
||||
//
|
||||
// The meaning of a Tag varies per application. The language package
|
||||
// therefore delays canonicalization and preserves information as much
|
||||
// as possible. The Matcher, however, will always take into account that
|
||||
// two different tags may represent the same language.
|
||||
//
|
||||
// By default, only legacy and deprecated tags are converted into their
|
||||
// canonical equivalent. All other information is preserved. This approach makes
|
||||
// the confidence scores more accurate and allows matchers to distinguish
|
||||
// between variants that are otherwise lost.
|
||||
//
|
||||
// As a consequence, two tags that should be treated as identical according to
|
||||
// BCP 47 or CLDR, like "en-Latn" and "en", will be represented differently. The
|
||||
// Matcher handles such distinctions, though, and is aware of the
|
||||
// equivalence relations. The CanonType type can be used to alter the
|
||||
// canonicalization form.
|
||||
//
|
||||
// # References
|
||||
//
|
||||
// BCP 47 - Tags for Identifying Languages http://tools.ietf.org/html/bcp47
|
||||
package language // import "golang.org/x/text/language"
|
||||
|
||||
// TODO: explanation on how to match languages for your own locale-specific
|
||||
// service.
|
||||
@@ -0,0 +1,411 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
func ExampleCanonType() {
|
||||
p := func(id string) {
|
||||
fmt.Printf("Default(%s) -> %s\n", id, language.Make(id))
|
||||
fmt.Printf("BCP47(%s) -> %s\n", id, language.BCP47.Make(id))
|
||||
fmt.Printf("Macro(%s) -> %s\n", id, language.Macro.Make(id))
|
||||
fmt.Printf("All(%s) -> %s\n", id, language.All.Make(id))
|
||||
}
|
||||
p("en-Latn")
|
||||
p("sh")
|
||||
p("zh-cmn")
|
||||
p("bjd")
|
||||
p("iw-Latn-fonipa-u-cu-usd")
|
||||
// Output:
|
||||
// Default(en-Latn) -> en-Latn
|
||||
// BCP47(en-Latn) -> en
|
||||
// Macro(en-Latn) -> en-Latn
|
||||
// All(en-Latn) -> en
|
||||
// Default(sh) -> sr-Latn
|
||||
// BCP47(sh) -> sh
|
||||
// Macro(sh) -> sh
|
||||
// All(sh) -> sr-Latn
|
||||
// Default(zh-cmn) -> cmn
|
||||
// BCP47(zh-cmn) -> cmn
|
||||
// Macro(zh-cmn) -> zh
|
||||
// All(zh-cmn) -> zh
|
||||
// Default(bjd) -> drl
|
||||
// BCP47(bjd) -> drl
|
||||
// Macro(bjd) -> bjd
|
||||
// All(bjd) -> drl
|
||||
// Default(iw-Latn-fonipa-u-cu-usd) -> he-Latn-fonipa-u-cu-usd
|
||||
// BCP47(iw-Latn-fonipa-u-cu-usd) -> he-Latn-fonipa-u-cu-usd
|
||||
// Macro(iw-Latn-fonipa-u-cu-usd) -> iw-Latn-fonipa-u-cu-usd
|
||||
// All(iw-Latn-fonipa-u-cu-usd) -> he-Latn-fonipa-u-cu-usd
|
||||
}
|
||||
|
||||
func ExampleTag_Base() {
|
||||
fmt.Println(language.Make("und").Base())
|
||||
fmt.Println(language.Make("und-US").Base())
|
||||
fmt.Println(language.Make("und-NL").Base())
|
||||
fmt.Println(language.Make("und-419").Base()) // Latin America
|
||||
fmt.Println(language.Make("und-ZZ").Base())
|
||||
// Output:
|
||||
// en Low
|
||||
// en High
|
||||
// nl High
|
||||
// es Low
|
||||
// en Low
|
||||
}
|
||||
|
||||
func ExampleTag_Script() {
|
||||
en := language.Make("en")
|
||||
sr := language.Make("sr")
|
||||
sr_Latn := language.Make("sr_Latn")
|
||||
fmt.Println(en.Script())
|
||||
fmt.Println(sr.Script())
|
||||
// Was a script explicitly specified?
|
||||
_, c := sr.Script()
|
||||
fmt.Println(c == language.Exact)
|
||||
_, c = sr_Latn.Script()
|
||||
fmt.Println(c == language.Exact)
|
||||
// Output:
|
||||
// Latn High
|
||||
// Cyrl Low
|
||||
// false
|
||||
// true
|
||||
}
|
||||
|
||||
func ExampleTag_Region() {
|
||||
ru := language.Make("ru")
|
||||
en := language.Make("en")
|
||||
fmt.Println(ru.Region())
|
||||
fmt.Println(en.Region())
|
||||
// Output:
|
||||
// RU Low
|
||||
// US Low
|
||||
}
|
||||
|
||||
func ExampleRegion_TLD() {
|
||||
us := language.MustParseRegion("US")
|
||||
gb := language.MustParseRegion("GB")
|
||||
uk := language.MustParseRegion("UK")
|
||||
bu := language.MustParseRegion("BU")
|
||||
|
||||
fmt.Println(us.TLD())
|
||||
fmt.Println(gb.TLD())
|
||||
fmt.Println(uk.TLD())
|
||||
fmt.Println(bu.TLD())
|
||||
|
||||
fmt.Println(us.Canonicalize().TLD())
|
||||
fmt.Println(gb.Canonicalize().TLD())
|
||||
fmt.Println(uk.Canonicalize().TLD())
|
||||
fmt.Println(bu.Canonicalize().TLD())
|
||||
// Output:
|
||||
// US <nil>
|
||||
// UK <nil>
|
||||
// UK <nil>
|
||||
// ZZ language: region is not a valid ccTLD
|
||||
// US <nil>
|
||||
// UK <nil>
|
||||
// UK <nil>
|
||||
// MM <nil>
|
||||
}
|
||||
|
||||
func ExampleCompose() {
|
||||
nl, _ := language.ParseBase("nl")
|
||||
us, _ := language.ParseRegion("US")
|
||||
de := language.Make("de-1901-u-co-phonebk")
|
||||
jp := language.Make("ja-JP")
|
||||
fi := language.Make("fi-x-ing")
|
||||
|
||||
u, _ := language.ParseExtension("u-nu-arabic")
|
||||
x, _ := language.ParseExtension("x-piglatin")
|
||||
|
||||
// Combine a base language and region.
|
||||
fmt.Println(language.Compose(nl, us))
|
||||
// Combine a base language and extension.
|
||||
fmt.Println(language.Compose(nl, x))
|
||||
// Replace the region.
|
||||
fmt.Println(language.Compose(jp, us))
|
||||
// Combine several tags.
|
||||
fmt.Println(language.Compose(us, nl, u))
|
||||
|
||||
// Replace the base language of a tag.
|
||||
fmt.Println(language.Compose(de, nl))
|
||||
fmt.Println(language.Compose(de, nl, u))
|
||||
// Remove the base language.
|
||||
fmt.Println(language.Compose(de, language.Base{}))
|
||||
// Remove all variants.
|
||||
fmt.Println(language.Compose(de, []language.Variant{}))
|
||||
// Remove all extensions.
|
||||
fmt.Println(language.Compose(de, []language.Extension{}))
|
||||
fmt.Println(language.Compose(fi, []language.Extension{}))
|
||||
// Remove all variants and extensions.
|
||||
fmt.Println(language.Compose(de.Raw()))
|
||||
|
||||
// An error is gobbled or returned if non-nil.
|
||||
fmt.Println(language.Compose(language.ParseRegion("ZA")))
|
||||
fmt.Println(language.Compose(language.ParseRegion("HH")))
|
||||
|
||||
// Compose uses the same Default canonicalization as Make.
|
||||
fmt.Println(language.Compose(language.Raw.Parse("en-Latn-UK")))
|
||||
|
||||
// Call compose on a different CanonType for different results.
|
||||
fmt.Println(language.All.Compose(language.Raw.Parse("en-Latn-UK")))
|
||||
|
||||
// Output:
|
||||
// nl-US <nil>
|
||||
// nl-x-piglatin <nil>
|
||||
// ja-US <nil>
|
||||
// nl-US-u-nu-arabic <nil>
|
||||
// nl-1901-u-co-phonebk <nil>
|
||||
// nl-1901-u-co-phonebk-nu-arabic <nil>
|
||||
// und-1901-u-co-phonebk <nil>
|
||||
// de-u-co-phonebk <nil>
|
||||
// de-1901 <nil>
|
||||
// fi <nil>
|
||||
// de <nil>
|
||||
// und-ZA <nil>
|
||||
// und language: subtag "HH" is well-formed but unknown
|
||||
// en-Latn-GB <nil>
|
||||
// en-GB <nil>
|
||||
}
|
||||
|
||||
func ExampleParse_errors() {
|
||||
for _, s := range []string{"Foo", "Bar", "Foobar"} {
|
||||
_, err := language.Parse(s)
|
||||
if err != nil {
|
||||
if inv, ok := err.(language.ValueError); ok {
|
||||
fmt.Println(inv.Subtag())
|
||||
} else {
|
||||
fmt.Println(s)
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, s := range []string{"en", "aa-Uuuu", "AC", "ac-u"} {
|
||||
_, err := language.Parse(s)
|
||||
switch e := err.(type) {
|
||||
case language.ValueError:
|
||||
fmt.Printf("%s: culprit %q\n", s, e.Subtag())
|
||||
case nil:
|
||||
// No error.
|
||||
default:
|
||||
// A syntax error.
|
||||
fmt.Printf("%s: ill-formed\n", s)
|
||||
}
|
||||
}
|
||||
// Output:
|
||||
// foo
|
||||
// Foobar
|
||||
// aa-Uuuu: culprit "Uuuu"
|
||||
// AC: culprit "ac"
|
||||
// ac-u: ill-formed
|
||||
}
|
||||
|
||||
func ExampleTag_Parent() {
|
||||
p := func(tag string) {
|
||||
fmt.Printf("parent(%v): %v\n", tag, language.Make(tag).Parent())
|
||||
}
|
||||
p("zh-CN")
|
||||
|
||||
// Australian English inherits from World English.
|
||||
p("en-AU")
|
||||
|
||||
// If the tag has a different maximized script from its parent, a tag with
|
||||
// this maximized script is inserted. This allows different language tags
|
||||
// which have the same base language and script in common to inherit from
|
||||
// a common set of settings.
|
||||
p("zh-HK")
|
||||
|
||||
// If the maximized script of the parent is not identical, CLDR will skip
|
||||
// inheriting from it, as it means there will not be many entries in common
|
||||
// and inheriting from it is nonsensical.
|
||||
p("zh-Hant")
|
||||
|
||||
// The parent of a tag with variants and extensions is the tag with all
|
||||
// variants and extensions removed.
|
||||
p("de-1994-u-co-phonebk")
|
||||
|
||||
// Remove default script.
|
||||
p("de-Latn-LU")
|
||||
|
||||
// Output:
|
||||
// parent(zh-CN): zh
|
||||
// parent(en-AU): en-001
|
||||
// parent(zh-HK): zh-Hant
|
||||
// parent(zh-Hant): und
|
||||
// parent(de-1994-u-co-phonebk): de
|
||||
// parent(de-Latn-LU): de
|
||||
}
|
||||
|
||||
// ExampleMatcher_bestMatch gives some examples of getting the best match of
|
||||
// a set of tags to any of the tags of given set.
|
||||
func ExampleMatcher() {
|
||||
// This is the set of tags from which we want to pick the best match. These
|
||||
// can be, for example, the supported languages for some package.
|
||||
tags := []language.Tag{
|
||||
language.English, // en
|
||||
language.BritishEnglish, // en-GB
|
||||
language.French, // fr
|
||||
language.Afrikaans, // af
|
||||
language.BrazilianPortuguese, // pt-BR
|
||||
language.EuropeanPortuguese, // pt-PT
|
||||
language.SimplifiedChinese, // zh-Hans
|
||||
language.Raw.Make("iw-IL"), // Hebrew from Israel
|
||||
language.Raw.Make("iw"), // Hebrew
|
||||
language.Raw.Make("he"), // Hebrew
|
||||
}
|
||||
m := language.NewMatcher(tags)
|
||||
|
||||
// A simple match.
|
||||
fmt.Println(m.Match(language.Make("fr")))
|
||||
|
||||
// Australian English is closer to British English than American English.
|
||||
// The resulting match is "en-GB-u-rg-auzzzz". The first language listed,
|
||||
// "en-GB", is the matched language. Next is the region override prefix
|
||||
// "-u-rg-", the region override "au", and the region override suffix "zzzz".
|
||||
// The region override is for things like currency, dates, and measurement
|
||||
// systems.
|
||||
fmt.Println(m.Match(language.Make("en-AU")))
|
||||
|
||||
// Default to the first tag passed to the Matcher if there is no match.
|
||||
fmt.Println(m.Match(language.Make("ar")))
|
||||
|
||||
// Get the default tag.
|
||||
fmt.Println(m.Match())
|
||||
|
||||
fmt.Println("----")
|
||||
|
||||
// We match SimplifiedChinese, but with Low confidence.
|
||||
fmt.Println(m.Match(language.TraditionalChinese))
|
||||
|
||||
// British English is closer to Australian English than Traditional Chinese
|
||||
// to Simplified Chinese.
|
||||
fmt.Println(m.Match(language.TraditionalChinese, language.Make("en-AU")))
|
||||
|
||||
fmt.Println("----")
|
||||
|
||||
// In case a multiple variants of a language are available, the most spoken
|
||||
// variant is typically returned.
|
||||
fmt.Println(m.Match(language.Portuguese))
|
||||
|
||||
// Pick the first value passed to Match in case of a tie.
|
||||
fmt.Println(m.Match(language.Dutch, language.Make("fr-BE"), language.Make("af-NA")))
|
||||
fmt.Println(m.Match(language.Dutch, language.Make("af-NA"), language.Make("fr-BE")))
|
||||
|
||||
fmt.Println("----")
|
||||
|
||||
// If a Matcher is initialized with a language and its deprecated version,
|
||||
// it will distinguish between them.
|
||||
fmt.Println(m.Match(language.Raw.Make("iw")))
|
||||
|
||||
// However, for non-exact matches, it will treat deprecated versions as
|
||||
// equivalent and consider other factors first.
|
||||
fmt.Println(m.Match(language.Raw.Make("he-IL")))
|
||||
|
||||
fmt.Println("----")
|
||||
|
||||
// User settings passed to the Unicode extension are ignored for matching
|
||||
// and preserved in the returned tag.
|
||||
fmt.Println(m.Match(language.Make("de-u-co-phonebk"), language.Make("fr-u-cu-frf")))
|
||||
|
||||
// Even if the matching language is different.
|
||||
fmt.Println(m.Match(language.Make("de-u-co-phonebk"), language.Make("br-u-cu-frf")))
|
||||
|
||||
// If there is no matching language, the options of the first preferred tag are used.
|
||||
fmt.Println(m.Match(language.Make("de-u-co-phonebk")))
|
||||
|
||||
// Output:
|
||||
// fr 2 Exact
|
||||
// en-GB-u-rg-auzzzz 1 High
|
||||
// en 0 No
|
||||
// en 0 No
|
||||
// ----
|
||||
// zh-Hans 6 Low
|
||||
// en-GB-u-rg-auzzzz 1 High
|
||||
// ----
|
||||
// pt-BR 4 Exact
|
||||
// fr-u-rg-bezzzz 2 High
|
||||
// af-u-rg-nazzzz 3 High
|
||||
// ----
|
||||
// iw-IL 7 Exact
|
||||
// he-u-rg-ilzzzz 9 Exact
|
||||
// ----
|
||||
// fr-u-cu-frf 2 Exact
|
||||
// fr-u-cu-frf 2 High
|
||||
// en-u-co-phonebk 0 No
|
||||
}
|
||||
|
||||
func ExampleMatchStrings() {
|
||||
// languages supported by this service:
|
||||
matcher := language.NewMatcher([]language.Tag{
|
||||
language.English, language.Dutch, language.German,
|
||||
})
|
||||
|
||||
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
||||
lang, _ := r.Cookie("lang")
|
||||
tag, _ := language.MatchStrings(matcher, lang.String(), r.Header.Get("Accept-Language"))
|
||||
|
||||
fmt.Println("User language:", tag)
|
||||
})
|
||||
}
|
||||
|
||||
func ExampleComprehends() {
|
||||
// Various levels of comprehensibility.
|
||||
fmt.Println(language.Comprehends(language.English, language.English))
|
||||
fmt.Println(language.Comprehends(language.AmericanEnglish, language.BritishEnglish))
|
||||
|
||||
// An explicit Und results in no match.
|
||||
fmt.Println(language.Comprehends(language.English, language.Und))
|
||||
|
||||
fmt.Println("----")
|
||||
|
||||
// There is usually no mutual comprehensibility between different scripts.
|
||||
fmt.Println(language.Comprehends(language.Make("en-Dsrt"), language.English))
|
||||
|
||||
// One exception is for Traditional versus Simplified Chinese, albeit with
|
||||
// a low confidence.
|
||||
fmt.Println(language.Comprehends(language.TraditionalChinese, language.SimplifiedChinese))
|
||||
|
||||
fmt.Println("----")
|
||||
|
||||
// A Swiss German speaker will often understand High German.
|
||||
fmt.Println(language.Comprehends(language.Make("gsw"), language.Make("de")))
|
||||
|
||||
// The converse is not generally the case.
|
||||
fmt.Println(language.Comprehends(language.Make("de"), language.Make("gsw")))
|
||||
|
||||
// Output:
|
||||
// Exact
|
||||
// High
|
||||
// No
|
||||
// ----
|
||||
// No
|
||||
// Low
|
||||
// ----
|
||||
// High
|
||||
// No
|
||||
}
|
||||
|
||||
func ExampleTag_values() {
|
||||
us := language.MustParseRegion("US")
|
||||
en := language.MustParseBase("en")
|
||||
|
||||
lang, _, region := language.AmericanEnglish.Raw()
|
||||
fmt.Println(lang == en, region == us)
|
||||
|
||||
lang, _, region = language.BritishEnglish.Raw()
|
||||
fmt.Println(lang == en, region == us)
|
||||
|
||||
// Tags can be compared for exact equivalence using '=='.
|
||||
en_us, _ := language.Compose(en, us)
|
||||
fmt.Println(en_us == language.AmericanEnglish)
|
||||
|
||||
// Output:
|
||||
// true true
|
||||
// true false
|
||||
// true
|
||||
}
|
||||
@@ -0,0 +1,307 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build ignore
|
||||
|
||||
// Language tag table generator.
|
||||
// Data read from the web.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/language"
|
||||
"golang.org/x/text/unicode/cldr"
|
||||
)
|
||||
|
||||
var (
|
||||
test = flag.Bool("test",
|
||||
false,
|
||||
"test existing tables; can be used to compare web data with package data.")
|
||||
outputFile = flag.String("output",
|
||||
"tables.go",
|
||||
"output file for generated tables")
|
||||
)
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteGoFile("tables.go", "language")
|
||||
|
||||
b := newBuilder(w)
|
||||
gen.WriteCLDRVersion(w)
|
||||
|
||||
b.writeConstants()
|
||||
b.writeMatchData()
|
||||
}
|
||||
|
||||
type builder struct {
|
||||
w *gen.CodeWriter
|
||||
hw io.Writer // MultiWriter for w and w.Hash
|
||||
data *cldr.CLDR
|
||||
supp *cldr.SupplementalData
|
||||
}
|
||||
|
||||
func (b *builder) langIndex(s string) uint16 {
|
||||
return uint16(language.MustParseBase(s))
|
||||
}
|
||||
|
||||
func (b *builder) regionIndex(s string) int {
|
||||
return int(language.MustParseRegion(s))
|
||||
}
|
||||
|
||||
func (b *builder) scriptIndex(s string) int {
|
||||
return int(language.MustParseScript(s))
|
||||
}
|
||||
|
||||
func newBuilder(w *gen.CodeWriter) *builder {
|
||||
r := gen.OpenCLDRCoreZip()
|
||||
defer r.Close()
|
||||
d := &cldr.Decoder{}
|
||||
data, err := d.DecodeZip(r)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
b := builder{
|
||||
w: w,
|
||||
hw: io.MultiWriter(w, w.Hash),
|
||||
data: data,
|
||||
supp: data.Supplemental(),
|
||||
}
|
||||
return &b
|
||||
}
|
||||
|
||||
// writeConsts computes f(v) for all v in values and writes the results
|
||||
// as constants named _v to a single constant block.
|
||||
func (b *builder) writeConsts(f func(string) int, values ...string) {
|
||||
fmt.Fprintln(b.w, "const (")
|
||||
for _, v := range values {
|
||||
fmt.Fprintf(b.w, "\t_%s = %v\n", v, f(v))
|
||||
}
|
||||
fmt.Fprintln(b.w, ")")
|
||||
}
|
||||
|
||||
// TODO: region inclusion data will probably not be use used in future matchers.
|
||||
|
||||
var langConsts = []string{
|
||||
"de", "en", "fr", "it", "mo", "no", "nb", "pt", "sh", "mul", "und",
|
||||
}
|
||||
|
||||
var scriptConsts = []string{
|
||||
"Latn", "Hani", "Hans", "Hant", "Qaaa", "Qaai", "Qabx", "Zinh", "Zyyy",
|
||||
"Zzzz",
|
||||
}
|
||||
|
||||
var regionConsts = []string{
|
||||
"001", "419", "BR", "CA", "ES", "GB", "MD", "PT", "UK", "US",
|
||||
"ZZ", "XA", "XC", "XK", // Unofficial tag for Kosovo.
|
||||
}
|
||||
|
||||
func (b *builder) writeConstants() {
|
||||
b.writeConsts(func(s string) int { return int(b.langIndex(s)) }, langConsts...)
|
||||
b.writeConsts(b.regionIndex, regionConsts...)
|
||||
b.writeConsts(b.scriptIndex, scriptConsts...)
|
||||
}
|
||||
|
||||
type mutualIntelligibility struct {
|
||||
want, have uint16
|
||||
distance uint8
|
||||
oneway bool
|
||||
}
|
||||
|
||||
type scriptIntelligibility struct {
|
||||
wantLang, haveLang uint16
|
||||
wantScript, haveScript uint8
|
||||
distance uint8
|
||||
// Always oneway
|
||||
}
|
||||
|
||||
type regionIntelligibility struct {
|
||||
lang uint16 // compact language id
|
||||
script uint8 // 0 means any
|
||||
group uint8 // 0 means any; if bit 7 is set it means inverse
|
||||
distance uint8
|
||||
// Always twoway.
|
||||
}
|
||||
|
||||
// writeMatchData writes tables with languages and scripts for which there is
|
||||
// mutual intelligibility. The data is based on CLDR's languageMatching data.
|
||||
// Note that we use a different algorithm than the one defined by CLDR and that
|
||||
// we slightly modify the data. For example, we convert scores to confidence levels.
|
||||
// We also drop all region-related data as we use a different algorithm to
|
||||
// determine region equivalence.
|
||||
func (b *builder) writeMatchData() {
|
||||
lm := b.supp.LanguageMatching.LanguageMatches
|
||||
cldr.MakeSlice(&lm).SelectAnyOf("type", "written_new")
|
||||
|
||||
regionHierarchy := map[string][]string{}
|
||||
for _, g := range b.supp.TerritoryContainment.Group {
|
||||
regions := strings.Split(g.Contains, " ")
|
||||
regionHierarchy[g.Type] = append(regionHierarchy[g.Type], regions...)
|
||||
}
|
||||
// Regions start at 1, so the slice must be one larger than the number of
|
||||
// regions.
|
||||
regionToGroups := make([]uint8, language.NumRegions+1)
|
||||
|
||||
idToIndex := map[string]uint8{}
|
||||
for i, mv := range lm[0].MatchVariable {
|
||||
if i > 6 {
|
||||
log.Fatalf("Too many groups: %d", i)
|
||||
}
|
||||
idToIndex[mv.Id] = uint8(i + 1)
|
||||
// TODO: also handle '-'
|
||||
for _, r := range strings.Split(mv.Value, "+") {
|
||||
todo := []string{r}
|
||||
for k := 0; k < len(todo); k++ {
|
||||
r := todo[k]
|
||||
regionToGroups[b.regionIndex(r)] |= 1 << uint8(i)
|
||||
todo = append(todo, regionHierarchy[r]...)
|
||||
}
|
||||
}
|
||||
}
|
||||
b.w.WriteVar("regionToGroups", regionToGroups)
|
||||
|
||||
// maps language id to in- and out-of-group region.
|
||||
paradigmLocales := [][3]uint16{}
|
||||
locales := strings.Split(lm[0].ParadigmLocales[0].Locales, " ")
|
||||
for i := 0; i < len(locales); i += 2 {
|
||||
x := [3]uint16{}
|
||||
for j := 0; j < 2; j++ {
|
||||
pc := strings.SplitN(locales[i+j], "-", 2)
|
||||
x[0] = b.langIndex(pc[0])
|
||||
if len(pc) == 2 {
|
||||
x[1+j] = uint16(b.regionIndex(pc[1]))
|
||||
}
|
||||
}
|
||||
paradigmLocales = append(paradigmLocales, x)
|
||||
}
|
||||
b.w.WriteVar("paradigmLocales", paradigmLocales)
|
||||
|
||||
b.w.WriteType(mutualIntelligibility{})
|
||||
b.w.WriteType(scriptIntelligibility{})
|
||||
b.w.WriteType(regionIntelligibility{})
|
||||
|
||||
matchLang := []mutualIntelligibility{}
|
||||
matchScript := []scriptIntelligibility{}
|
||||
matchRegion := []regionIntelligibility{}
|
||||
// Convert the languageMatch entries in lists keyed by desired language.
|
||||
for _, m := range lm[0].LanguageMatch {
|
||||
// Different versions of CLDR use different separators.
|
||||
desired := strings.Replace(m.Desired, "-", "_", -1)
|
||||
supported := strings.Replace(m.Supported, "-", "_", -1)
|
||||
d := strings.Split(desired, "_")
|
||||
s := strings.Split(supported, "_")
|
||||
if len(d) != len(s) {
|
||||
log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
|
||||
continue
|
||||
}
|
||||
distance, _ := strconv.ParseInt(m.Distance, 10, 8)
|
||||
switch len(d) {
|
||||
case 2:
|
||||
if desired == supported && desired == "*_*" {
|
||||
continue
|
||||
}
|
||||
// language-script pair.
|
||||
matchScript = append(matchScript, scriptIntelligibility{
|
||||
wantLang: uint16(b.langIndex(d[0])),
|
||||
haveLang: uint16(b.langIndex(s[0])),
|
||||
wantScript: uint8(b.scriptIndex(d[1])),
|
||||
haveScript: uint8(b.scriptIndex(s[1])),
|
||||
distance: uint8(distance),
|
||||
})
|
||||
if m.Oneway != "true" {
|
||||
matchScript = append(matchScript, scriptIntelligibility{
|
||||
wantLang: uint16(b.langIndex(s[0])),
|
||||
haveLang: uint16(b.langIndex(d[0])),
|
||||
wantScript: uint8(b.scriptIndex(s[1])),
|
||||
haveScript: uint8(b.scriptIndex(d[1])),
|
||||
distance: uint8(distance),
|
||||
})
|
||||
}
|
||||
case 1:
|
||||
if desired == supported && desired == "*" {
|
||||
continue
|
||||
}
|
||||
if distance == 1 {
|
||||
// nb == no is already handled by macro mapping. Check there
|
||||
// really is only this case.
|
||||
if d[0] != "no" || s[0] != "nb" {
|
||||
log.Fatalf("unhandled equivalence %s == %s", s[0], d[0])
|
||||
}
|
||||
continue
|
||||
}
|
||||
// TODO: consider dropping oneway field and just doubling the entry.
|
||||
matchLang = append(matchLang, mutualIntelligibility{
|
||||
want: uint16(b.langIndex(d[0])),
|
||||
have: uint16(b.langIndex(s[0])),
|
||||
distance: uint8(distance),
|
||||
oneway: m.Oneway == "true",
|
||||
})
|
||||
case 3:
|
||||
if desired == supported && desired == "*_*_*" {
|
||||
continue
|
||||
}
|
||||
if desired != supported {
|
||||
// This is now supported by CLDR, but only one case, which
|
||||
// should already be covered by paradigm locales. For instance,
|
||||
// test case "und, en, en-GU, en-IN, en-GB ; en-ZA ; en-GB" in
|
||||
// testdata/CLDRLocaleMatcherTest.txt tests this.
|
||||
if supported != "en_*_GB" {
|
||||
log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
|
||||
}
|
||||
continue
|
||||
}
|
||||
ri := regionIntelligibility{
|
||||
lang: b.langIndex(d[0]),
|
||||
distance: uint8(distance),
|
||||
}
|
||||
if d[1] != "*" {
|
||||
ri.script = uint8(b.scriptIndex(d[1]))
|
||||
}
|
||||
switch {
|
||||
case d[2] == "*":
|
||||
ri.group = 0x80 // not contained in anything
|
||||
case strings.HasPrefix(d[2], "$!"):
|
||||
ri.group = 0x80
|
||||
d[2] = "$" + d[2][len("$!"):]
|
||||
fallthrough
|
||||
case strings.HasPrefix(d[2], "$"):
|
||||
ri.group |= idToIndex[d[2]]
|
||||
}
|
||||
matchRegion = append(matchRegion, ri)
|
||||
default:
|
||||
log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
|
||||
}
|
||||
}
|
||||
sort.SliceStable(matchLang, func(i, j int) bool {
|
||||
return matchLang[i].distance < matchLang[j].distance
|
||||
})
|
||||
b.w.WriteComment(`
|
||||
matchLang holds pairs of langIDs of base languages that are typically
|
||||
mutually intelligible. Each pair is associated with a confidence and
|
||||
whether the intelligibility goes one or both ways.`)
|
||||
b.w.WriteVar("matchLang", matchLang)
|
||||
|
||||
b.w.WriteComment(`
|
||||
matchScript holds pairs of scriptIDs where readers of one script
|
||||
can typically also read the other. Each is associated with a confidence.`)
|
||||
sort.SliceStable(matchScript, func(i, j int) bool {
|
||||
return matchScript[i].distance < matchScript[j].distance
|
||||
})
|
||||
b.w.WriteVar("matchScript", matchScript)
|
||||
|
||||
sort.SliceStable(matchRegion, func(i, j int) bool {
|
||||
return matchRegion[i].distance < matchRegion[j].distance
|
||||
})
|
||||
b.w.WriteVar("matchRegion", matchRegion)
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
// matcher is a language.Matcher configured for all supported languages.
|
||||
var matcher = language.NewMatcher([]language.Tag{
|
||||
language.BritishEnglish,
|
||||
language.Norwegian,
|
||||
language.German,
|
||||
})
|
||||
|
||||
// handler is an http.HandlerFunc.
|
||||
func handler(w http.ResponseWriter, r *http.Request) {
|
||||
t, q, err := language.ParseAcceptLanguage(r.Header.Get("Accept-Language"))
|
||||
// We ignore the error: the default language will be selected for t == nil.
|
||||
tag, _, _ := matcher.Match(t...)
|
||||
fmt.Printf("%17v (t: %6v; q: %3v; err: %v)\n", tag, t, q, err)
|
||||
}
|
||||
|
||||
func ExampleParseAcceptLanguage() {
|
||||
for _, al := range []string{
|
||||
"nn;q=0.3, en-us;q=0.8, en,",
|
||||
"gsw, en;q=0.7, en-US;q=0.8",
|
||||
"gsw, nl, da",
|
||||
"invalid",
|
||||
} {
|
||||
// Create dummy request with Accept-Language set and pass it to handler.
|
||||
r, _ := http.NewRequest("GET", "example.com", strings.NewReader("Hello"))
|
||||
r.Header.Set("Accept-Language", al)
|
||||
handler(nil, r)
|
||||
}
|
||||
|
||||
// Output:
|
||||
// en-GB (t: [ en en-US nn]; q: [ 1 0.8 0.3]; err: <nil>)
|
||||
// en-GB-u-rg-uszzzz (t: [ gsw en-US en]; q: [ 1 0.8 0.7]; err: <nil>)
|
||||
// de (t: [ gsw nl da]; q: [ 1 1 1]; err: <nil>)
|
||||
// en-GB (t: []; q: []; err: language: tag is not well-formed)
|
||||
}
|
||||
@@ -0,0 +1,605 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gen.go -output tables.go
|
||||
|
||||
package language
|
||||
|
||||
// TODO: Remove above NOTE after:
|
||||
// - verifying that tables are dropped correctly (most notably matcher tables).
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
"golang.org/x/text/internal/language/compact"
|
||||
)
|
||||
|
||||
// Tag represents a BCP 47 language tag. It is used to specify an instance of a
|
||||
// specific language or locale. All language tag values are guaranteed to be
|
||||
// well-formed.
|
||||
type Tag compact.Tag
|
||||
|
||||
func makeTag(t language.Tag) (tag Tag) {
|
||||
return Tag(compact.Make(t))
|
||||
}
|
||||
|
||||
func (t *Tag) tag() language.Tag {
|
||||
return (*compact.Tag)(t).Tag()
|
||||
}
|
||||
|
||||
func (t *Tag) isCompact() bool {
|
||||
return (*compact.Tag)(t).IsCompact()
|
||||
}
|
||||
|
||||
// TODO: improve performance.
|
||||
func (t *Tag) lang() language.Language { return t.tag().LangID }
|
||||
func (t *Tag) region() language.Region { return t.tag().RegionID }
|
||||
func (t *Tag) script() language.Script { return t.tag().ScriptID }
|
||||
|
||||
// Make is a convenience wrapper for Parse that omits the error.
|
||||
// In case of an error, a sensible default is returned.
|
||||
func Make(s string) Tag {
|
||||
return Default.Make(s)
|
||||
}
|
||||
|
||||
// Make is a convenience wrapper for c.Parse that omits the error.
|
||||
// In case of an error, a sensible default is returned.
|
||||
func (c CanonType) Make(s string) Tag {
|
||||
t, _ := c.Parse(s)
|
||||
return t
|
||||
}
|
||||
|
||||
// Raw returns the raw base language, script and region, without making an
|
||||
// attempt to infer their values.
|
||||
func (t Tag) Raw() (b Base, s Script, r Region) {
|
||||
tt := t.tag()
|
||||
return Base{tt.LangID}, Script{tt.ScriptID}, Region{tt.RegionID}
|
||||
}
|
||||
|
||||
// IsRoot returns true if t is equal to language "und".
|
||||
func (t Tag) IsRoot() bool {
|
||||
return compact.Tag(t).IsRoot()
|
||||
}
|
||||
|
||||
// CanonType can be used to enable or disable various types of canonicalization.
|
||||
type CanonType int
|
||||
|
||||
const (
|
||||
// Replace deprecated base languages with their preferred replacements.
|
||||
DeprecatedBase CanonType = 1 << iota
|
||||
// Replace deprecated scripts with their preferred replacements.
|
||||
DeprecatedScript
|
||||
// Replace deprecated regions with their preferred replacements.
|
||||
DeprecatedRegion
|
||||
// Remove redundant scripts.
|
||||
SuppressScript
|
||||
// Normalize legacy encodings. This includes legacy languages defined in
|
||||
// CLDR as well as bibliographic codes defined in ISO-639.
|
||||
Legacy
|
||||
// Map the dominant language of a macro language group to the macro language
|
||||
// subtag. For example cmn -> zh.
|
||||
Macro
|
||||
// The CLDR flag should be used if full compatibility with CLDR is required.
|
||||
// There are a few cases where language.Tag may differ from CLDR. To follow all
|
||||
// of CLDR's suggestions, use All|CLDR.
|
||||
CLDR
|
||||
|
||||
// Raw can be used to Compose or Parse without Canonicalization.
|
||||
Raw CanonType = 0
|
||||
|
||||
// Replace all deprecated tags with their preferred replacements.
|
||||
Deprecated = DeprecatedBase | DeprecatedScript | DeprecatedRegion
|
||||
|
||||
// All canonicalizations recommended by BCP 47.
|
||||
BCP47 = Deprecated | SuppressScript
|
||||
|
||||
// All canonicalizations.
|
||||
All = BCP47 | Legacy | Macro
|
||||
|
||||
// Default is the canonicalization used by Parse, Make and Compose. To
|
||||
// preserve as much information as possible, canonicalizations that remove
|
||||
// potentially valuable information are not included. The Matcher is
|
||||
// designed to recognize similar tags that would be the same if
|
||||
// they were canonicalized using All.
|
||||
Default = Deprecated | Legacy
|
||||
|
||||
canonLang = DeprecatedBase | Legacy | Macro
|
||||
|
||||
// TODO: LikelyScript, LikelyRegion: suppress similar to ICU.
|
||||
)
|
||||
|
||||
// canonicalize returns the canonicalized equivalent of the tag and
|
||||
// whether there was any change.
|
||||
func canonicalize(c CanonType, t language.Tag) (language.Tag, bool) {
|
||||
if c == Raw {
|
||||
return t, false
|
||||
}
|
||||
changed := false
|
||||
if c&SuppressScript != 0 {
|
||||
if t.LangID.SuppressScript() == t.ScriptID {
|
||||
t.ScriptID = 0
|
||||
changed = true
|
||||
}
|
||||
}
|
||||
if c&canonLang != 0 {
|
||||
for {
|
||||
if l, aliasType := t.LangID.Canonicalize(); l != t.LangID {
|
||||
switch aliasType {
|
||||
case language.Legacy:
|
||||
if c&Legacy != 0 {
|
||||
if t.LangID == _sh && t.ScriptID == 0 {
|
||||
t.ScriptID = _Latn
|
||||
}
|
||||
t.LangID = l
|
||||
changed = true
|
||||
}
|
||||
case language.Macro:
|
||||
if c&Macro != 0 {
|
||||
// We deviate here from CLDR. The mapping "nb" -> "no"
|
||||
// qualifies as a typical Macro language mapping. However,
|
||||
// for legacy reasons, CLDR maps "no", the macro language
|
||||
// code for Norwegian, to the dominant variant "nb". This
|
||||
// change is currently under consideration for CLDR as well.
|
||||
// See https://unicode.org/cldr/trac/ticket/2698 and also
|
||||
// https://unicode.org/cldr/trac/ticket/1790 for some of the
|
||||
// practical implications. TODO: this check could be removed
|
||||
// if CLDR adopts this change.
|
||||
if c&CLDR == 0 || t.LangID != _nb {
|
||||
changed = true
|
||||
t.LangID = l
|
||||
}
|
||||
}
|
||||
case language.Deprecated:
|
||||
if c&DeprecatedBase != 0 {
|
||||
if t.LangID == _mo && t.RegionID == 0 {
|
||||
t.RegionID = _MD
|
||||
}
|
||||
t.LangID = l
|
||||
changed = true
|
||||
// Other canonicalization types may still apply.
|
||||
continue
|
||||
}
|
||||
}
|
||||
} else if c&Legacy != 0 && t.LangID == _no && c&CLDR != 0 {
|
||||
t.LangID = _nb
|
||||
changed = true
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
if c&DeprecatedScript != 0 {
|
||||
if t.ScriptID == _Qaai {
|
||||
changed = true
|
||||
t.ScriptID = _Zinh
|
||||
}
|
||||
}
|
||||
if c&DeprecatedRegion != 0 {
|
||||
if r := t.RegionID.Canonicalize(); r != t.RegionID {
|
||||
changed = true
|
||||
t.RegionID = r
|
||||
}
|
||||
}
|
||||
return t, changed
|
||||
}
|
||||
|
||||
// Canonicalize returns the canonicalized equivalent of the tag.
|
||||
func (c CanonType) Canonicalize(t Tag) (Tag, error) {
|
||||
// First try fast path.
|
||||
if t.isCompact() {
|
||||
if _, changed := canonicalize(c, compact.Tag(t).Tag()); !changed {
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
// It is unlikely that one will canonicalize a tag after matching. So do
|
||||
// a slow but simple approach here.
|
||||
if tag, changed := canonicalize(c, t.tag()); changed {
|
||||
tag.RemakeString()
|
||||
return makeTag(tag), nil
|
||||
}
|
||||
return t, nil
|
||||
|
||||
}
|
||||
|
||||
// Confidence indicates the level of certainty for a given return value.
|
||||
// For example, Serbian may be written in Cyrillic or Latin script.
|
||||
// The confidence level indicates whether a value was explicitly specified,
|
||||
// whether it is typically the only possible value, or whether there is
|
||||
// an ambiguity.
|
||||
type Confidence int
|
||||
|
||||
const (
|
||||
No Confidence = iota // full confidence that there was no match
|
||||
Low // most likely value picked out of a set of alternatives
|
||||
High // value is generally assumed to be the correct match
|
||||
Exact // exact match or explicitly specified value
|
||||
)
|
||||
|
||||
var confName = []string{"No", "Low", "High", "Exact"}
|
||||
|
||||
func (c Confidence) String() string {
|
||||
return confName[c]
|
||||
}
|
||||
|
||||
// String returns the canonical string representation of the language tag.
|
||||
func (t Tag) String() string {
|
||||
return t.tag().String()
|
||||
}
|
||||
|
||||
// MarshalText implements encoding.TextMarshaler.
|
||||
func (t Tag) MarshalText() (text []byte, err error) {
|
||||
return t.tag().MarshalText()
|
||||
}
|
||||
|
||||
// UnmarshalText implements encoding.TextUnmarshaler.
|
||||
func (t *Tag) UnmarshalText(text []byte) error {
|
||||
var tag language.Tag
|
||||
err := tag.UnmarshalText(text)
|
||||
*t = makeTag(tag)
|
||||
return err
|
||||
}
|
||||
|
||||
// Base returns the base language of the language tag. If the base language is
|
||||
// unspecified, an attempt will be made to infer it from the context.
|
||||
// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
|
||||
func (t Tag) Base() (Base, Confidence) {
|
||||
if b := t.lang(); b != 0 {
|
||||
return Base{b}, Exact
|
||||
}
|
||||
tt := t.tag()
|
||||
c := High
|
||||
if tt.ScriptID == 0 && !tt.RegionID.IsCountry() {
|
||||
c = Low
|
||||
}
|
||||
if tag, err := tt.Maximize(); err == nil && tag.LangID != 0 {
|
||||
return Base{tag.LangID}, c
|
||||
}
|
||||
return Base{0}, No
|
||||
}
|
||||
|
||||
// Script infers the script for the language tag. If it was not explicitly given, it will infer
|
||||
// a most likely candidate.
|
||||
// If more than one script is commonly used for a language, the most likely one
|
||||
// is returned with a low confidence indication. For example, it returns (Cyrl, Low)
|
||||
// for Serbian.
|
||||
// If a script cannot be inferred (Zzzz, No) is returned. We do not use Zyyy (undetermined)
|
||||
// as one would suspect from the IANA registry for BCP 47. In a Unicode context Zyyy marks
|
||||
// common characters (like 1, 2, 3, '.', etc.) and is therefore more like multiple scripts.
|
||||
// See https://www.unicode.org/reports/tr24/#Values for more details. Zzzz is also used for
|
||||
// unknown value in CLDR. (Zzzz, Exact) is returned if Zzzz was explicitly specified.
|
||||
// Note that an inferred script is never guaranteed to be the correct one. Latin is
|
||||
// almost exclusively used for Afrikaans, but Arabic has been used for some texts
|
||||
// in the past. Also, the script that is commonly used may change over time.
|
||||
// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
|
||||
func (t Tag) Script() (Script, Confidence) {
|
||||
if scr := t.script(); scr != 0 {
|
||||
return Script{scr}, Exact
|
||||
}
|
||||
tt := t.tag()
|
||||
sc, c := language.Script(_Zzzz), No
|
||||
if scr := tt.LangID.SuppressScript(); scr != 0 {
|
||||
// Note: it is not always the case that a language with a suppress
|
||||
// script value is only written in one script (e.g. kk, ms, pa).
|
||||
if tt.RegionID == 0 {
|
||||
return Script{scr}, High
|
||||
}
|
||||
sc, c = scr, High
|
||||
}
|
||||
if tag, err := tt.Maximize(); err == nil {
|
||||
if tag.ScriptID != sc {
|
||||
sc, c = tag.ScriptID, Low
|
||||
}
|
||||
} else {
|
||||
tt, _ = canonicalize(Deprecated|Macro, tt)
|
||||
if tag, err := tt.Maximize(); err == nil && tag.ScriptID != sc {
|
||||
sc, c = tag.ScriptID, Low
|
||||
}
|
||||
}
|
||||
return Script{sc}, c
|
||||
}
|
||||
|
||||
// Region returns the region for the language tag. If it was not explicitly given, it will
|
||||
// infer a most likely candidate from the context.
|
||||
// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
|
||||
func (t Tag) Region() (Region, Confidence) {
|
||||
if r := t.region(); r != 0 {
|
||||
return Region{r}, Exact
|
||||
}
|
||||
tt := t.tag()
|
||||
if tt, err := tt.Maximize(); err == nil {
|
||||
return Region{tt.RegionID}, Low // TODO: differentiate between high and low.
|
||||
}
|
||||
tt, _ = canonicalize(Deprecated|Macro, tt)
|
||||
if tag, err := tt.Maximize(); err == nil {
|
||||
return Region{tag.RegionID}, Low
|
||||
}
|
||||
return Region{_ZZ}, No // TODO: return world instead of undetermined?
|
||||
}
|
||||
|
||||
// Variants returns the variants specified explicitly for this language tag.
|
||||
// or nil if no variant was specified.
|
||||
func (t Tag) Variants() []Variant {
|
||||
if !compact.Tag(t).MayHaveVariants() {
|
||||
return nil
|
||||
}
|
||||
v := []Variant{}
|
||||
x, str := "", t.tag().Variants()
|
||||
for str != "" {
|
||||
x, str = nextToken(str)
|
||||
v = append(v, Variant{x})
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
|
||||
// specific language are substituted with fields from the parent language.
|
||||
// The parent for a language may change for newer versions of CLDR.
|
||||
//
|
||||
// Parent returns a tag for a less specific language that is mutually
|
||||
// intelligible or Und if there is no such language. This may not be the same as
|
||||
// simply stripping the last BCP 47 subtag. For instance, the parent of "zh-TW"
|
||||
// is "zh-Hant", and the parent of "zh-Hant" is "und".
|
||||
func (t Tag) Parent() Tag {
|
||||
return Tag(compact.Tag(t).Parent())
|
||||
}
|
||||
|
||||
// nextToken returns token t and the rest of the string.
|
||||
func nextToken(s string) (t, tail string) {
|
||||
p := strings.Index(s[1:], "-")
|
||||
if p == -1 {
|
||||
return s[1:], ""
|
||||
}
|
||||
p++
|
||||
return s[1:p], s[p:]
|
||||
}
|
||||
|
||||
// Extension is a single BCP 47 extension.
|
||||
type Extension struct {
|
||||
s string
|
||||
}
|
||||
|
||||
// String returns the string representation of the extension, including the
|
||||
// type tag.
|
||||
func (e Extension) String() string {
|
||||
return e.s
|
||||
}
|
||||
|
||||
// ParseExtension parses s as an extension and returns it on success.
|
||||
func ParseExtension(s string) (e Extension, err error) {
|
||||
ext, err := language.ParseExtension(s)
|
||||
return Extension{ext}, err
|
||||
}
|
||||
|
||||
// Type returns the one-byte extension type of e. It returns 0 for the zero
|
||||
// exception.
|
||||
func (e Extension) Type() byte {
|
||||
if e.s == "" {
|
||||
return 0
|
||||
}
|
||||
return e.s[0]
|
||||
}
|
||||
|
||||
// Tokens returns the list of tokens of e.
|
||||
func (e Extension) Tokens() []string {
|
||||
return strings.Split(e.s, "-")
|
||||
}
|
||||
|
||||
// Extension returns the extension of type x for tag t. It will return
|
||||
// false for ok if t does not have the requested extension. The returned
|
||||
// extension will be invalid in this case.
|
||||
func (t Tag) Extension(x byte) (ext Extension, ok bool) {
|
||||
if !compact.Tag(t).MayHaveExtensions() {
|
||||
return Extension{}, false
|
||||
}
|
||||
e, ok := t.tag().Extension(x)
|
||||
return Extension{e}, ok
|
||||
}
|
||||
|
||||
// Extensions returns all extensions of t.
|
||||
func (t Tag) Extensions() []Extension {
|
||||
if !compact.Tag(t).MayHaveExtensions() {
|
||||
return nil
|
||||
}
|
||||
e := []Extension{}
|
||||
for _, ext := range t.tag().Extensions() {
|
||||
e = append(e, Extension{ext})
|
||||
}
|
||||
return e
|
||||
}
|
||||
|
||||
// TypeForKey returns the type associated with the given key, where key and type
|
||||
// are of the allowed values defined for the Unicode locale extension ('u') in
|
||||
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
// TypeForKey will traverse the inheritance chain to get the correct value.
|
||||
//
|
||||
// If there are multiple types associated with a key, only the first will be
|
||||
// returned. If there is no type associated with a key, it returns the empty
|
||||
// string.
|
||||
func (t Tag) TypeForKey(key string) string {
|
||||
if !compact.Tag(t).MayHaveExtensions() {
|
||||
if key != "rg" && key != "va" {
|
||||
return ""
|
||||
}
|
||||
}
|
||||
return t.tag().TypeForKey(key)
|
||||
}
|
||||
|
||||
// SetTypeForKey returns a new Tag with the key set to type, where key and type
|
||||
// are of the allowed values defined for the Unicode locale extension ('u') in
|
||||
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
// An empty value removes an existing pair with the same key.
|
||||
func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
|
||||
tt, err := t.tag().SetTypeForKey(key, value)
|
||||
return makeTag(tt), err
|
||||
}
|
||||
|
||||
// NumCompactTags is the number of compact tags. The maximum tag is
|
||||
// NumCompactTags-1.
|
||||
const NumCompactTags = compact.NumCompactTags
|
||||
|
||||
// CompactIndex returns an index, where 0 <= index < NumCompactTags, for tags
|
||||
// for which data exists in the text repository.The index will change over time
|
||||
// and should not be stored in persistent storage. If t does not match a compact
|
||||
// index, exact will be false and the compact index will be returned for the
|
||||
// first match after repeatedly taking the Parent of t.
|
||||
func CompactIndex(t Tag) (index int, exact bool) {
|
||||
id, exact := compact.LanguageID(compact.Tag(t))
|
||||
return int(id), exact
|
||||
}
|
||||
|
||||
var root = language.Tag{}
|
||||
|
||||
// Base is an ISO 639 language code, used for encoding the base language
|
||||
// of a language tag.
|
||||
type Base struct {
|
||||
langID language.Language
|
||||
}
|
||||
|
||||
// ParseBase parses a 2- or 3-letter ISO 639 code.
|
||||
// It returns a ValueError if s is a well-formed but unknown language identifier
|
||||
// or another error if another error occurred.
|
||||
func ParseBase(s string) (Base, error) {
|
||||
l, err := language.ParseBase(s)
|
||||
return Base{l}, err
|
||||
}
|
||||
|
||||
// String returns the BCP 47 representation of the base language.
|
||||
func (b Base) String() string {
|
||||
return b.langID.String()
|
||||
}
|
||||
|
||||
// ISO3 returns the ISO 639-3 language code.
|
||||
func (b Base) ISO3() string {
|
||||
return b.langID.ISO3()
|
||||
}
|
||||
|
||||
// IsPrivateUse reports whether this language code is reserved for private use.
|
||||
func (b Base) IsPrivateUse() bool {
|
||||
return b.langID.IsPrivateUse()
|
||||
}
|
||||
|
||||
// Script is a 4-letter ISO 15924 code for representing scripts.
|
||||
// It is idiomatically represented in title case.
|
||||
type Script struct {
|
||||
scriptID language.Script
|
||||
}
|
||||
|
||||
// ParseScript parses a 4-letter ISO 15924 code.
|
||||
// It returns a ValueError if s is a well-formed but unknown script identifier
|
||||
// or another error if another error occurred.
|
||||
func ParseScript(s string) (Script, error) {
|
||||
sc, err := language.ParseScript(s)
|
||||
return Script{sc}, err
|
||||
}
|
||||
|
||||
// String returns the script code in title case.
|
||||
// It returns "Zzzz" for an unspecified script.
|
||||
func (s Script) String() string {
|
||||
return s.scriptID.String()
|
||||
}
|
||||
|
||||
// IsPrivateUse reports whether this script code is reserved for private use.
|
||||
func (s Script) IsPrivateUse() bool {
|
||||
return s.scriptID.IsPrivateUse()
|
||||
}
|
||||
|
||||
// Region is an ISO 3166-1 or UN M.49 code for representing countries and regions.
|
||||
type Region struct {
|
||||
regionID language.Region
|
||||
}
|
||||
|
||||
// EncodeM49 returns the Region for the given UN M.49 code.
|
||||
// It returns an error if r is not a valid code.
|
||||
func EncodeM49(r int) (Region, error) {
|
||||
rid, err := language.EncodeM49(r)
|
||||
return Region{rid}, err
|
||||
}
|
||||
|
||||
// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
|
||||
// It returns a ValueError if s is a well-formed but unknown region identifier
|
||||
// or another error if another error occurred.
|
||||
func ParseRegion(s string) (Region, error) {
|
||||
r, err := language.ParseRegion(s)
|
||||
return Region{r}, err
|
||||
}
|
||||
|
||||
// String returns the BCP 47 representation for the region.
|
||||
// It returns "ZZ" for an unspecified region.
|
||||
func (r Region) String() string {
|
||||
return r.regionID.String()
|
||||
}
|
||||
|
||||
// ISO3 returns the 3-letter ISO code of r.
|
||||
// Note that not all regions have a 3-letter ISO code.
|
||||
// In such cases this method returns "ZZZ".
|
||||
func (r Region) ISO3() string {
|
||||
return r.regionID.ISO3()
|
||||
}
|
||||
|
||||
// M49 returns the UN M.49 encoding of r, or 0 if this encoding
|
||||
// is not defined for r.
|
||||
func (r Region) M49() int {
|
||||
return r.regionID.M49()
|
||||
}
|
||||
|
||||
// IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This
|
||||
// may include private-use tags that are assigned by CLDR and used in this
|
||||
// implementation. So IsPrivateUse and IsCountry can be simultaneously true.
|
||||
func (r Region) IsPrivateUse() bool {
|
||||
return r.regionID.IsPrivateUse()
|
||||
}
|
||||
|
||||
// IsCountry returns whether this region is a country or autonomous area. This
|
||||
// includes non-standard definitions from CLDR.
|
||||
func (r Region) IsCountry() bool {
|
||||
return r.regionID.IsCountry()
|
||||
}
|
||||
|
||||
// IsGroup returns whether this region defines a collection of regions. This
|
||||
// includes non-standard definitions from CLDR.
|
||||
func (r Region) IsGroup() bool {
|
||||
return r.regionID.IsGroup()
|
||||
}
|
||||
|
||||
// Contains returns whether Region c is contained by Region r. It returns true
|
||||
// if c == r.
|
||||
func (r Region) Contains(c Region) bool {
|
||||
return r.regionID.Contains(c.regionID)
|
||||
}
|
||||
|
||||
// TLD returns the country code top-level domain (ccTLD). UK is returned for GB.
|
||||
// In all other cases it returns either the region itself or an error.
|
||||
//
|
||||
// This method may return an error for a region for which there exists a
|
||||
// canonical form with a ccTLD. To get that ccTLD canonicalize r first. The
|
||||
// region will already be canonicalized it was obtained from a Tag that was
|
||||
// obtained using any of the default methods.
|
||||
func (r Region) TLD() (Region, error) {
|
||||
tld, err := r.regionID.TLD()
|
||||
return Region{tld}, err
|
||||
}
|
||||
|
||||
// Canonicalize returns the region or a possible replacement if the region is
|
||||
// deprecated. It will not return a replacement for deprecated regions that
|
||||
// are split into multiple regions.
|
||||
func (r Region) Canonicalize() Region {
|
||||
return Region{r.regionID.Canonicalize()}
|
||||
}
|
||||
|
||||
// Variant represents a registered variant of a language as defined by BCP 47.
|
||||
type Variant struct {
|
||||
variant string
|
||||
}
|
||||
|
||||
// ParseVariant parses and returns a Variant. An error is returned if s is not
|
||||
// a valid variant.
|
||||
func ParseVariant(s string) (Variant, error) {
|
||||
v, err := language.ParseVariant(s)
|
||||
return Variant{v.String()}, err
|
||||
}
|
||||
|
||||
// String returns the string representation of the variant.
|
||||
func (v Variant) String() string {
|
||||
return v.variant
|
||||
}
|
||||
@@ -0,0 +1,788 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestTagSize(t *testing.T) {
|
||||
id := Tag{}
|
||||
typ := reflect.TypeOf(id)
|
||||
if typ.Size() > 24 {
|
||||
t.Errorf("size of Tag was %d; want 24", typ.Size())
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsRoot(t *testing.T) {
|
||||
loc := Tag{}
|
||||
if !loc.IsRoot() {
|
||||
t.Errorf("unspecified should be root.")
|
||||
}
|
||||
for i, tt := range parseTests() {
|
||||
loc, _ := Parse(tt.in)
|
||||
undef := tt.lang == "und" && tt.script == "" && tt.region == "" && tt.ext == ""
|
||||
if loc.IsRoot() != undef {
|
||||
t.Errorf("%d: was %v; want %v", i, loc.IsRoot(), undef)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestEquality(t *testing.T) {
|
||||
for i, tt := range parseTests() {
|
||||
s := tt.in
|
||||
tag := Make(s)
|
||||
t1 := Make(tag.String())
|
||||
if tag != t1 {
|
||||
t.Errorf("%d:%s: equality test 1 failed\n got: %#v\nwant: %#v)", i, s, t1, tag)
|
||||
}
|
||||
t2, _ := Compose(tag)
|
||||
if tag != t2 {
|
||||
t.Errorf("%d:%s: equality test 2 failed\n got: %#v\nwant: %#v", i, s, t2, tag)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestString(t *testing.T) {
|
||||
tests := []string{
|
||||
"no-u-rg-dkzzzz",
|
||||
}
|
||||
for i, s := range tests {
|
||||
tag := Make(s)
|
||||
if tag.String() != s {
|
||||
t.Errorf("%d:%s: got %s: want %s (%#v)", i, s, tag.String(), s, tag)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMarshal(t *testing.T) {
|
||||
testCases := []string{
|
||||
// TODO: these values will change with each CLDR update. This issue
|
||||
// will be solved if we decide to fix the indexes.
|
||||
"und",
|
||||
"ca-ES-valencia",
|
||||
"ca-ES-valencia-u-va-posix",
|
||||
"ca-ES-valencia-u-co-phonebk",
|
||||
"ca-ES-valencia-u-co-phonebk-va-posix",
|
||||
"x-klingon",
|
||||
"en-US",
|
||||
"en-US-u-va-posix",
|
||||
"en",
|
||||
"en-u-co-phonebk",
|
||||
"en-001",
|
||||
"sh",
|
||||
|
||||
"en-GB-u-rg-uszzzz",
|
||||
"en-GB-u-rg-uszzzz-va-posix",
|
||||
"en-GB-u-co-phonebk-rg-uszzzz",
|
||||
// Invalid tags should also roundtrip.
|
||||
"en-GB-u-co-phonebk-rg-uszz",
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
var tag Tag
|
||||
err := tag.UnmarshalText([]byte(tc))
|
||||
if err != nil {
|
||||
t.Errorf("UnmarshalText(%q): unexpected error: %v", tc, err)
|
||||
}
|
||||
b, err := tag.MarshalText()
|
||||
if err != nil {
|
||||
t.Errorf("MarshalText(%q): unexpected error: %v", tc, err)
|
||||
}
|
||||
if got := string(b); got != tc {
|
||||
t.Errorf("%s: got %q; want %q", tc, got, tc)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestBase(t *testing.T) {
|
||||
tests := []struct {
|
||||
loc, lang string
|
||||
conf Confidence
|
||||
}{
|
||||
{"und", "en", Low},
|
||||
{"x-abc", "und", No},
|
||||
{"en", "en", Exact},
|
||||
{"und-Cyrl", "ru", High},
|
||||
// If a region is not included, the official language should be English.
|
||||
{"und-US", "en", High},
|
||||
// TODO: not-explicitly listed scripts should probably be und, No
|
||||
// Modify addTags to return info on how the match was derived.
|
||||
// {"und-Aghb", "und", No},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
loc, _ := Parse(tt.loc)
|
||||
lang, conf := loc.Base()
|
||||
if lang.String() != tt.lang {
|
||||
t.Errorf("%d: language was %s; want %s", i, lang, tt.lang)
|
||||
}
|
||||
if conf != tt.conf {
|
||||
t.Errorf("%d: confidence was %d; want %d", i, conf, tt.conf)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseBase(t *testing.T) {
|
||||
tests := []struct {
|
||||
in string
|
||||
out string
|
||||
ok bool
|
||||
}{
|
||||
{"en", "en", true},
|
||||
{"EN", "en", true},
|
||||
{"nld", "nl", true},
|
||||
{"dut", "dut", true}, // bibliographic
|
||||
{"aaj", "und", false}, // unknown
|
||||
{"qaa", "qaa", true},
|
||||
{"a", "und", false},
|
||||
{"", "und", false},
|
||||
{"aaaa", "und", false},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
x, err := ParseBase(tt.in)
|
||||
if x.String() != tt.out || err == nil != tt.ok {
|
||||
t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
|
||||
}
|
||||
if y, _, _ := Raw.Make(tt.out).Raw(); x != y {
|
||||
t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x, y)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestScript(t *testing.T) {
|
||||
tests := []struct {
|
||||
loc, scr string
|
||||
conf Confidence
|
||||
}{
|
||||
{"und", "Latn", Low},
|
||||
{"en-Latn", "Latn", Exact},
|
||||
{"en", "Latn", High},
|
||||
{"sr", "Cyrl", Low},
|
||||
{"kk", "Cyrl", High},
|
||||
{"kk-CN", "Arab", Low},
|
||||
{"cmn", "Hans", Low},
|
||||
{"ru", "Cyrl", High},
|
||||
{"ru-RU", "Cyrl", High},
|
||||
{"yue", "Hant", Low},
|
||||
{"x-abc", "Zzzz", Low},
|
||||
{"und-zyyy", "Zyyy", Exact},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
loc, _ := Parse(tt.loc)
|
||||
sc, conf := loc.Script()
|
||||
if sc.String() != tt.scr {
|
||||
t.Errorf("%d:%s: script was %s; want %s", i, tt.loc, sc, tt.scr)
|
||||
}
|
||||
if conf != tt.conf {
|
||||
t.Errorf("%d:%s: confidence was %d; want %d", i, tt.loc, conf, tt.conf)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseScript(t *testing.T) {
|
||||
tests := []struct {
|
||||
in string
|
||||
out string
|
||||
ok bool
|
||||
}{
|
||||
{"Latn", "Latn", true},
|
||||
{"zzzz", "Zzzz", true},
|
||||
{"zyyy", "Zyyy", true},
|
||||
{"Latm", "Zzzz", false},
|
||||
{"Zzz", "Zzzz", false},
|
||||
{"", "Zzzz", false},
|
||||
{"Zzzxx", "Zzzz", false},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
x, err := ParseScript(tt.in)
|
||||
if x.String() != tt.out || err == nil != tt.ok {
|
||||
t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
|
||||
}
|
||||
if err == nil {
|
||||
if _, y, _ := Raw.Make("und-" + tt.out).Raw(); x != y {
|
||||
t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x, y)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegion(t *testing.T) {
|
||||
tests := []struct {
|
||||
loc, reg string
|
||||
conf Confidence
|
||||
}{
|
||||
{"und", "US", Low},
|
||||
{"en", "US", Low},
|
||||
{"zh-Hant", "TW", Low},
|
||||
{"en-US", "US", Exact},
|
||||
{"cmn", "CN", Low},
|
||||
{"ru", "RU", Low},
|
||||
{"yue", "HK", Low},
|
||||
{"x-abc", "ZZ", Low},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
loc, _ := Raw.Parse(tt.loc)
|
||||
reg, conf := loc.Region()
|
||||
if reg.String() != tt.reg {
|
||||
t.Errorf("%d:%s: region was %s; want %s", i, tt.loc, reg, tt.reg)
|
||||
}
|
||||
if conf != tt.conf {
|
||||
t.Errorf("%d:%s: confidence was %d; want %d", i, tt.loc, conf, tt.conf)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestEncodeM49(t *testing.T) {
|
||||
tests := []struct {
|
||||
m49 int
|
||||
code string
|
||||
ok bool
|
||||
}{
|
||||
{1, "001", true},
|
||||
{840, "US", true},
|
||||
{899, "ZZ", false},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
if r, err := EncodeM49(tt.m49); r.String() != tt.code || err == nil != tt.ok {
|
||||
t.Errorf("%d:%d: was %s, %v; want %s, %v", i, tt.m49, r, err == nil, tt.code, tt.ok)
|
||||
}
|
||||
}
|
||||
for i := 1; i <= 1000; i++ {
|
||||
if r, err := EncodeM49(i); err == nil && r.M49() == 0 {
|
||||
t.Errorf("%d has no error, but maps to undefined region", i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseRegion(t *testing.T) {
|
||||
tests := []struct {
|
||||
in string
|
||||
out string
|
||||
ok bool
|
||||
}{
|
||||
{"001", "001", true},
|
||||
{"840", "US", true},
|
||||
{"899", "ZZ", false},
|
||||
{"USA", "US", true},
|
||||
{"US", "US", true},
|
||||
{"BC", "ZZ", false},
|
||||
{"C", "ZZ", false},
|
||||
{"CCCC", "ZZ", false},
|
||||
{"01", "ZZ", false},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
r, err := ParseRegion(tt.in)
|
||||
if r.String() != tt.out || err == nil != tt.ok {
|
||||
t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, r, err == nil, tt.out, tt.ok)
|
||||
}
|
||||
if err == nil {
|
||||
if _, _, y := Raw.Make("und-" + tt.out).Raw(); r != y {
|
||||
t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, r, y)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsCountry(t *testing.T) {
|
||||
tests := []struct {
|
||||
reg string
|
||||
country bool
|
||||
}{
|
||||
{"US", true},
|
||||
{"001", false},
|
||||
{"958", false},
|
||||
{"419", false},
|
||||
{"203", true},
|
||||
{"020", true},
|
||||
{"900", false},
|
||||
{"999", false},
|
||||
{"QO", false},
|
||||
{"EU", false},
|
||||
{"AA", false},
|
||||
{"XK", true},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
r, _ := ParseRegion(tt.reg)
|
||||
if r.IsCountry() != tt.country {
|
||||
t.Errorf("%d: IsCountry(%s) was %v; want %v", i, tt.reg, r.IsCountry(), tt.country)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsGroup(t *testing.T) {
|
||||
tests := []struct {
|
||||
reg string
|
||||
group bool
|
||||
}{
|
||||
{"US", false},
|
||||
{"001", true},
|
||||
{"958", false},
|
||||
{"419", true},
|
||||
{"203", false},
|
||||
{"020", false},
|
||||
{"900", false},
|
||||
{"999", false},
|
||||
{"QO", true},
|
||||
{"EU", true},
|
||||
{"AA", false},
|
||||
{"XK", false},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
r, _ := ParseRegion(tt.reg)
|
||||
if r.IsGroup() != tt.group {
|
||||
t.Errorf("%d: IsGroup(%s) was %v; want %v", i, tt.reg, r.IsGroup(), tt.group)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestContains(t *testing.T) {
|
||||
tests := []struct {
|
||||
enclosing, contained string
|
||||
contains bool
|
||||
}{
|
||||
// A region contains itself.
|
||||
{"US", "US", true},
|
||||
{"001", "001", true},
|
||||
|
||||
// Direct containment.
|
||||
{"001", "002", true},
|
||||
{"039", "XK", true},
|
||||
{"150", "XK", true},
|
||||
{"EU", "AT", true},
|
||||
{"QO", "AQ", true},
|
||||
|
||||
// Indirect containemnt.
|
||||
{"001", "US", true},
|
||||
{"001", "419", true},
|
||||
{"001", "013", true},
|
||||
|
||||
// No containment.
|
||||
{"US", "001", false},
|
||||
{"155", "EU", false},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
r := MustParseRegion(tt.enclosing)
|
||||
con := MustParseRegion(tt.contained)
|
||||
if got := r.Contains(con); got != tt.contains {
|
||||
t.Errorf("%d: %s.Contains(%s) was %v; want %v", i, tt.enclosing, tt.contained, got, tt.contains)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegionCanonicalize(t *testing.T) {
|
||||
for i, tt := range []struct{ in, out string }{
|
||||
{"UK", "GB"},
|
||||
{"TP", "TL"},
|
||||
{"QU", "EU"},
|
||||
{"SU", "SU"},
|
||||
{"VD", "VN"},
|
||||
{"DD", "DE"},
|
||||
} {
|
||||
r := MustParseRegion(tt.in)
|
||||
want := MustParseRegion(tt.out)
|
||||
if got := r.Canonicalize(); got != want {
|
||||
t.Errorf("%d: got %v; want %v", i, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegionTLD(t *testing.T) {
|
||||
for _, tt := range []struct {
|
||||
in, out string
|
||||
ok bool
|
||||
}{
|
||||
{"EH", "EH", true},
|
||||
{"FR", "FR", true},
|
||||
{"TL", "TL", true},
|
||||
|
||||
// In ccTLD before in ISO.
|
||||
{"GG", "GG", true},
|
||||
|
||||
// Non-standard assignment of ccTLD to ISO code.
|
||||
{"GB", "UK", true},
|
||||
|
||||
// Exceptionally reserved in ISO and valid ccTLD.
|
||||
{"UK", "UK", true},
|
||||
{"AC", "AC", true},
|
||||
{"EU", "EU", true},
|
||||
{"SU", "SU", true},
|
||||
|
||||
// Exceptionally reserved in ISO and invalid ccTLD.
|
||||
{"CP", "ZZ", false},
|
||||
{"DG", "ZZ", false},
|
||||
{"EA", "ZZ", false},
|
||||
{"FX", "ZZ", false},
|
||||
{"IC", "ZZ", false},
|
||||
{"TA", "ZZ", false},
|
||||
|
||||
// Transitionally reserved in ISO (e.g. deprecated) but valid ccTLD as
|
||||
// it is still being phased out.
|
||||
{"AN", "AN", true},
|
||||
{"TP", "TP", true},
|
||||
|
||||
// Transitionally reserved in ISO (e.g. deprecated) and invalid ccTLD.
|
||||
// Defined in package language as it has a mapping in CLDR.
|
||||
{"BU", "ZZ", false},
|
||||
{"CS", "ZZ", false},
|
||||
{"NT", "ZZ", false},
|
||||
{"YU", "ZZ", false},
|
||||
{"ZR", "ZZ", false},
|
||||
// Not defined in package: SF.
|
||||
|
||||
// Indeterminately reserved in ISO.
|
||||
// Defined in package language as it has a legacy mapping in CLDR.
|
||||
{"DY", "ZZ", false},
|
||||
{"RH", "ZZ", false},
|
||||
{"VD", "ZZ", false},
|
||||
// Not defined in package: EW, FL, JA, LF, PI, RA, RB, RC, RI, RL, RM,
|
||||
// RN, RP, WG, WL, WV, and YV.
|
||||
|
||||
// Not assigned in ISO, but legacy definitions in CLDR.
|
||||
{"DD", "ZZ", false},
|
||||
{"YD", "ZZ", false},
|
||||
|
||||
// Normal mappings but somewhat special status in ccTLD.
|
||||
{"BL", "BL", true},
|
||||
{"MF", "MF", true},
|
||||
{"BV", "BV", true},
|
||||
{"SJ", "SJ", true},
|
||||
|
||||
// Have values when normalized, but not as is.
|
||||
{"QU", "ZZ", false},
|
||||
|
||||
// ISO Private Use.
|
||||
{"AA", "ZZ", false},
|
||||
{"QM", "ZZ", false},
|
||||
{"QO", "ZZ", false},
|
||||
{"XA", "ZZ", false},
|
||||
{"XK", "ZZ", false}, // Sometimes used for Kosovo, but invalid ccTLD.
|
||||
} {
|
||||
if tt.in == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
r := MustParseRegion(tt.in)
|
||||
var want Region
|
||||
if tt.out != "ZZ" {
|
||||
want = MustParseRegion(tt.out)
|
||||
}
|
||||
tld, err := r.TLD()
|
||||
if got := err == nil; got != tt.ok {
|
||||
t.Errorf("error(%v): got %v; want %v", r, got, tt.ok)
|
||||
}
|
||||
if tld != want {
|
||||
t.Errorf("TLD(%v): got %v; want %v", r, tld, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestCanonicalize(t *testing.T) {
|
||||
// TODO: do a full test using CLDR data in a separate regression test.
|
||||
tests := []struct {
|
||||
in, out string
|
||||
option CanonType
|
||||
}{
|
||||
{"en-Latn", "en", SuppressScript},
|
||||
{"sr-Cyrl", "sr-Cyrl", SuppressScript},
|
||||
{"sh", "sr-Latn", Legacy},
|
||||
{"sh-HR", "sr-Latn-HR", Legacy},
|
||||
{"sh-Cyrl-HR", "sr-Cyrl-HR", Legacy},
|
||||
{"tl", "fil", Legacy},
|
||||
{"no", "no", Legacy},
|
||||
{"no", "nb", Legacy | CLDR},
|
||||
{"cmn", "cmn", Legacy},
|
||||
{"cmn", "zh", Macro},
|
||||
{"cmn-u-co-stroke", "zh-u-co-stroke", Macro},
|
||||
{"yue", "yue", Macro},
|
||||
{"nb", "no", Macro},
|
||||
{"nb", "nb", Macro | CLDR},
|
||||
{"no", "no", Macro},
|
||||
{"no", "no", Macro | CLDR},
|
||||
{"iw", "he", DeprecatedBase},
|
||||
{"iw", "he", Deprecated | CLDR},
|
||||
{"mo", "ro-MD", Deprecated}, // Adopted by CLDR as of version 25.
|
||||
{"alb", "sq", Legacy}, // bibliographic
|
||||
{"dut", "nl", Legacy}, // bibliographic
|
||||
// As of CLDR 25, mo is no longer considered a legacy mapping.
|
||||
{"mo", "mo", Legacy | CLDR},
|
||||
{"und-AN", "und-AN", Deprecated},
|
||||
{"und-YD", "und-YE", DeprecatedRegion},
|
||||
{"und-YD", "und-YD", DeprecatedBase},
|
||||
{"und-Qaai", "und-Zinh", DeprecatedScript},
|
||||
{"und-Qaai", "und-Qaai", DeprecatedBase},
|
||||
{"drh", "mn", All}, // drh -> khk -> mn
|
||||
|
||||
{"en-GB-u-rg-uszzzz", "en-GB-u-rg-uszzzz", Raw},
|
||||
{"en-GB-u-rg-USZZZZ", "en-GB-u-rg-uszzzz", Raw},
|
||||
// TODO: use different exact values for language and regional tag?
|
||||
{"en-GB-u-rg-uszzzz-va-posix", "en-GB-u-rg-uszzzz-va-posix", Raw},
|
||||
{"en-GB-u-rg-uszzzz-co-phonebk", "en-GB-u-co-phonebk-rg-uszzzz", Raw},
|
||||
// Invalid region specifications are left as is.
|
||||
{"en-GB-u-rg-usz", "en-GB-u-rg-usz", Raw},
|
||||
{"en-GB-u-rg-usz-va-posix", "en-GB-u-rg-usz-va-posix", Raw},
|
||||
{"en-GB-u-rg-usz-co-phonebk", "en-GB-u-co-phonebk-rg-usz", Raw},
|
||||
|
||||
// CVE-2020-28851
|
||||
// invalid key-value pair of -u- extension.
|
||||
{"ES-u-000-00", "es-u-000-00", Raw},
|
||||
{"ES-u-000-00-v-00", "es-u-000-00-v-00", Raw},
|
||||
// reordered and unknown extension.
|
||||
{"ES-v-00-u-000-00", "es-u-000-00-v-00", Raw},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
in, _ := Raw.Parse(tt.in)
|
||||
in, _ = tt.option.Canonicalize(in)
|
||||
if in.String() != tt.out {
|
||||
t.Errorf("%d:%s: was %s; want %s", i, tt.in, in.String(), tt.out)
|
||||
}
|
||||
}
|
||||
// Test idempotence.
|
||||
for _, base := range Supported.BaseLanguages() {
|
||||
tag, _ := Raw.Compose(base)
|
||||
got, _ := All.Canonicalize(tag)
|
||||
want, _ := All.Canonicalize(got)
|
||||
if got != want {
|
||||
t.Errorf("idem(%s): got %s; want %s", tag, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestTypeForKey(t *testing.T) {
|
||||
tests := []struct{ key, in, out string }{
|
||||
{"co", "en", ""},
|
||||
{"co", "en-u-abc", ""},
|
||||
{"co", "en-u-co-phonebk", "phonebk"},
|
||||
{"co", "en-u-co-phonebk-cu-aud", "phonebk"},
|
||||
{"co", "x-foo-u-co-phonebk", ""},
|
||||
{"va", "en-US-u-va-posix", "posix"},
|
||||
{"rg", "en-u-rg-gbzzzz", "gbzzzz"},
|
||||
{"nu", "en-u-co-phonebk-nu-arabic", "arabic"},
|
||||
{"kc", "cmn-u-co-stroke", ""},
|
||||
{"rg", "cmn-u-rg", ""},
|
||||
{"rg", "cmn-u-rg-co-stroke", ""},
|
||||
{"co", "cmn-u-rg-co-stroke", "stroke"},
|
||||
{"co", "cmn-u-co-rg-gbzzzz", ""},
|
||||
{"rg", "cmn-u-co-rg-gbzzzz", "gbzzzz"},
|
||||
{"rg", "cmn-u-rg-gbzzzz-nlzzzz", "gbzzzz"},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
if v := Make(tt.in).TypeForKey(tt.key); v != tt.out {
|
||||
t.Errorf("%q[%q]: was %q; want %q", tt.in, tt.key, v, tt.out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParent(t *testing.T) {
|
||||
tests := []struct{ in, out string }{
|
||||
// Strip variants and extensions first
|
||||
{"de-u-co-phonebk", "de"},
|
||||
{"de-1994", "de"},
|
||||
{"de-Latn-1994", "de"}, // remove superfluous script.
|
||||
|
||||
// Ensure the canonical Tag for an entry is in the chain for base-script
|
||||
// pairs.
|
||||
{"zh-Hans", "zh"},
|
||||
|
||||
// Skip the script if it is the maximized version. CLDR files for the
|
||||
// skipped tag are always empty.
|
||||
{"zh-Hans-TW", "zh"},
|
||||
{"zh-Hans-CN", "zh"},
|
||||
|
||||
// Insert the script if the maximized script is not the same as the
|
||||
// maximized script of the base language.
|
||||
{"zh-TW", "zh-Hant"},
|
||||
{"zh-HK", "zh-Hant"},
|
||||
{"zh-Hant-TW", "zh-Hant"},
|
||||
{"zh-Hant-HK", "zh-Hant"},
|
||||
|
||||
// Non-default script skips to und.
|
||||
// CLDR
|
||||
{"az-Cyrl", "und"},
|
||||
{"bs-Cyrl", "und"},
|
||||
{"en-Dsrt", "und"},
|
||||
{"ha-Arab", "und"},
|
||||
{"mn-Mong", "und"},
|
||||
{"pa-Arab", "und"},
|
||||
{"shi-Latn", "und"},
|
||||
{"sr-Latn", "und"},
|
||||
{"uz-Arab", "und"},
|
||||
{"uz-Cyrl", "und"},
|
||||
{"vai-Latn", "und"},
|
||||
{"zh-Hant", "und"},
|
||||
// extra
|
||||
{"nl-Cyrl", "und"},
|
||||
|
||||
// World english inherits from en-001.
|
||||
{"en-150", "en-001"},
|
||||
{"en-AU", "en-001"},
|
||||
{"en-BE", "en-001"},
|
||||
{"en-GG", "en-001"},
|
||||
{"en-GI", "en-001"},
|
||||
{"en-HK", "en-001"},
|
||||
{"en-IE", "en-001"},
|
||||
{"en-IM", "en-001"},
|
||||
{"en-IN", "en-001"},
|
||||
{"en-JE", "en-001"},
|
||||
{"en-MT", "en-001"},
|
||||
{"en-NZ", "en-001"},
|
||||
{"en-PK", "en-001"},
|
||||
{"en-SG", "en-001"},
|
||||
|
||||
// Spanish in Latin-American countries have es-419 as parent.
|
||||
{"es-AR", "es-419"},
|
||||
{"es-BO", "es-419"},
|
||||
{"es-CL", "es-419"},
|
||||
{"es-CO", "es-419"},
|
||||
{"es-CR", "es-419"},
|
||||
{"es-CU", "es-419"},
|
||||
{"es-DO", "es-419"},
|
||||
{"es-EC", "es-419"},
|
||||
{"es-GT", "es-419"},
|
||||
{"es-HN", "es-419"},
|
||||
{"es-MX", "es-419"},
|
||||
{"es-NI", "es-419"},
|
||||
{"es-PA", "es-419"},
|
||||
{"es-PE", "es-419"},
|
||||
{"es-PR", "es-419"},
|
||||
{"es-PY", "es-419"},
|
||||
{"es-SV", "es-419"},
|
||||
{"es-US", "es-419"},
|
||||
{"es-UY", "es-419"},
|
||||
{"es-VE", "es-419"},
|
||||
// exceptions (according to CLDR)
|
||||
{"es-CW", "es"},
|
||||
|
||||
// Inherit from pt-PT, instead of pt for these countries.
|
||||
{"pt-AO", "pt-PT"},
|
||||
{"pt-CV", "pt-PT"},
|
||||
{"pt-GW", "pt-PT"},
|
||||
{"pt-MO", "pt-PT"},
|
||||
{"pt-MZ", "pt-PT"},
|
||||
{"pt-ST", "pt-PT"},
|
||||
{"pt-TL", "pt-PT"},
|
||||
|
||||
{"en-GB-u-co-phonebk-rg-uszzzz", "en-GB"},
|
||||
{"en-GB-u-rg-uszzzz", "en-GB"},
|
||||
{"en-US-u-va-posix", "en-US"},
|
||||
|
||||
// Difference between language and regional tag.
|
||||
{"ca-ES-valencia", "ca-ES"},
|
||||
{"ca-ES-valencia-u-rg-ptzzzz", "ca-ES"},
|
||||
{"en-US-u-va-variant", "en-US"},
|
||||
{"en-u-va-variant", "en"},
|
||||
{"en-u-rg-gbzzzz", "en"},
|
||||
{"en-US-u-rg-gbzzzz", "en-US"},
|
||||
{"nl-US-u-rg-gbzzzz", "nl-US"},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
tag := Raw.MustParse(tt.in)
|
||||
if p := Raw.MustParse(tt.out); p != tag.Parent() {
|
||||
t.Errorf("%s: was %v; want %v", tt.in, tag.Parent(), p)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
// Tags without error that don't need to be changed.
|
||||
benchBasic = []string{
|
||||
"en",
|
||||
"en-Latn",
|
||||
"en-GB",
|
||||
"za",
|
||||
"zh-Hant",
|
||||
"zh",
|
||||
"zh-HK",
|
||||
"ar-MK",
|
||||
"en-CA",
|
||||
"fr-CA",
|
||||
"fr-CH",
|
||||
"fr",
|
||||
"lv",
|
||||
"he-IT",
|
||||
"tlh",
|
||||
"ja",
|
||||
"ja-Jpan",
|
||||
"ja-Jpan-JP",
|
||||
"de-1996",
|
||||
"de-CH",
|
||||
"sr",
|
||||
"sr-Latn",
|
||||
}
|
||||
// Tags with extensions, not changes required.
|
||||
benchExt = []string{
|
||||
"x-a-b-c-d",
|
||||
"x-aa-bbbb-cccccccc-d",
|
||||
"en-x_cc-b-bbb-a-aaa",
|
||||
"en-c_cc-b-bbb-a-aaa-x-x",
|
||||
"en-u-co-phonebk",
|
||||
"en-Cyrl-u-co-phonebk",
|
||||
"en-US-u-co-phonebk-cu-xau",
|
||||
"en-nedix-u-co-phonebk",
|
||||
"en-t-t0-abcd",
|
||||
"en-t-nl-latn",
|
||||
"en-t-t0-abcd-x-a",
|
||||
"en_t_pt_MLt",
|
||||
"en-t-fr-est",
|
||||
}
|
||||
// Change, but not memory allocation required.
|
||||
benchSimpleChange = []string{
|
||||
"EN",
|
||||
"i-klingon",
|
||||
"en-latn",
|
||||
"zh-cmn-Hans-CN",
|
||||
"iw-NL",
|
||||
}
|
||||
// Change and memory allocation required.
|
||||
benchChangeAlloc = []string{
|
||||
"en-c_cc-b-bbb-a-aaa",
|
||||
"en-u-cu-xua-co-phonebk",
|
||||
"en-u-cu-xua-co-phonebk-a-cd",
|
||||
"en-u-def-abc-cu-xua-co-phonebk",
|
||||
"en-t-en-Cyrl-NL-1994",
|
||||
"en-t-en-Cyrl-NL-1994-t0-abc-def",
|
||||
}
|
||||
// Tags that result in errors.
|
||||
benchErr = []string{
|
||||
// IllFormed
|
||||
"x_A.-B-C_D",
|
||||
"en-u-cu-co-phonebk",
|
||||
"en-u-cu-xau-co",
|
||||
"en-t-nl-abcd",
|
||||
// Invalid
|
||||
"xx",
|
||||
"nl-Uuuu",
|
||||
"nl-QB",
|
||||
}
|
||||
benchChange = append(benchSimpleChange, benchChangeAlloc...)
|
||||
benchAll = append(append(append(benchBasic, benchExt...), benchChange...), benchErr...)
|
||||
)
|
||||
|
||||
func doParse(b *testing.B, tag []string) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
// Use the modulo instead of looping over all tags so that we get a somewhat
|
||||
// meaningful ns/op.
|
||||
Parse(tag[i%len(tag)])
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkParse(b *testing.B) {
|
||||
doParse(b, benchAll)
|
||||
}
|
||||
|
||||
func BenchmarkParseBasic(b *testing.B) {
|
||||
doParse(b, benchBasic)
|
||||
}
|
||||
|
||||
func BenchmarkParseError(b *testing.B) {
|
||||
doParse(b, benchErr)
|
||||
}
|
||||
|
||||
func BenchmarkParseSimpleChange(b *testing.B) {
|
||||
doParse(b, benchSimpleChange)
|
||||
}
|
||||
|
||||
func BenchmarkParseChangeAlloc(b *testing.B) {
|
||||
doParse(b, benchChangeAlloc)
|
||||
}
|
||||
@@ -0,0 +1,281 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestRegionID(t *testing.T) {
|
||||
tests := []struct {
|
||||
in, out string
|
||||
}{
|
||||
{"_ ", ""},
|
||||
{"_000", ""},
|
||||
{"419", "419"},
|
||||
{"AA", "AA"},
|
||||
{"ATF", "TF"},
|
||||
{"HV", "HV"},
|
||||
{"CT", "CT"},
|
||||
{"DY", "DY"},
|
||||
{"IC", "IC"},
|
||||
{"FQ", "FQ"},
|
||||
{"JT", "JT"},
|
||||
{"ZZ", "ZZ"},
|
||||
{"EU", "EU"},
|
||||
{"QO", "QO"},
|
||||
{"FX", "FX"},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
if tt.in[0] == '_' {
|
||||
id := tt.in[1:]
|
||||
if _, err := ParseRegion(id); err == nil {
|
||||
t.Errorf("%d:err(%s): found nil; want error", i, id)
|
||||
}
|
||||
continue
|
||||
}
|
||||
want, _ := ParseRegion(tt.in)
|
||||
if s := want.String(); s != tt.out {
|
||||
t.Errorf("%d:%s: found %q; want %q", i, tt.in, s, tt.out)
|
||||
}
|
||||
if len(tt.in) == 2 {
|
||||
want, _ := ParseRegion(tt.in)
|
||||
if s := want.String(); s != tt.out {
|
||||
t.Errorf("%d:getISO2(%s): found %q; want %q", i, tt.in, s, tt.out)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegionISO3(t *testing.T) {
|
||||
tests := []struct {
|
||||
from, iso3, to string
|
||||
}{
|
||||
{" ", "ZZZ", "ZZ"},
|
||||
{"000", "ZZZ", "ZZ"},
|
||||
{"AA", "AAA", ""},
|
||||
{"CT", "CTE", ""},
|
||||
{"DY", "DHY", ""},
|
||||
{"EU", "QUU", ""},
|
||||
{"HV", "HVO", ""},
|
||||
{"IC", "ZZZ", "ZZ"},
|
||||
{"JT", "JTN", ""},
|
||||
{"PZ", "PCZ", ""},
|
||||
{"QU", "QUU", "EU"},
|
||||
{"QO", "QOO", ""},
|
||||
{"YD", "YMD", ""},
|
||||
{"FQ", "ATF", "TF"},
|
||||
{"TF", "ATF", ""},
|
||||
{"FX", "FXX", ""},
|
||||
{"ZZ", "ZZZ", ""},
|
||||
{"419", "ZZZ", "ZZ"},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
r, _ := ParseRegion(tt.from)
|
||||
if s := r.ISO3(); s != tt.iso3 {
|
||||
t.Errorf("iso3(%q): found %q; want %q", tt.from, s, tt.iso3)
|
||||
}
|
||||
if tt.iso3 == "" {
|
||||
continue
|
||||
}
|
||||
want := tt.to
|
||||
if tt.to == "" {
|
||||
want = tt.from
|
||||
}
|
||||
r, _ = ParseRegion(want)
|
||||
if id, _ := ParseRegion(tt.iso3); id != r {
|
||||
t.Errorf("%s: found %q; want %q", tt.iso3, id, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegionM49(t *testing.T) {
|
||||
fromTests := []struct {
|
||||
m49 int
|
||||
id string
|
||||
}{
|
||||
{0, ""},
|
||||
{-1, ""},
|
||||
{1000, ""},
|
||||
{10000, ""},
|
||||
|
||||
{001, "001"},
|
||||
{104, "MM"},
|
||||
{180, "CD"},
|
||||
{230, "ET"},
|
||||
{231, "ET"},
|
||||
{249, "FX"},
|
||||
{250, "FR"},
|
||||
{276, "DE"},
|
||||
{278, "DD"},
|
||||
{280, "DE"},
|
||||
{419, "419"},
|
||||
{626, "TL"},
|
||||
{736, "SD"},
|
||||
{840, "US"},
|
||||
{854, "BF"},
|
||||
{891, "CS"},
|
||||
{899, ""},
|
||||
{958, "AA"},
|
||||
{966, "QT"},
|
||||
{967, "EU"},
|
||||
{999, "ZZ"},
|
||||
}
|
||||
for _, tt := range fromTests {
|
||||
id, err := EncodeM49(tt.m49)
|
||||
if want, have := err != nil, tt.id == ""; want != have {
|
||||
t.Errorf("error(%d): have %v; want %v", tt.m49, have, want)
|
||||
continue
|
||||
}
|
||||
r, _ := ParseRegion(tt.id)
|
||||
if r != id {
|
||||
t.Errorf("region(%d): have %s; want %s", tt.m49, id, r)
|
||||
}
|
||||
}
|
||||
|
||||
toTests := []struct {
|
||||
m49 int
|
||||
id string
|
||||
}{
|
||||
{0, "000"},
|
||||
{0, "IC"}, // Some codes don't have an ID
|
||||
|
||||
{001, "001"},
|
||||
{104, "MM"},
|
||||
{104, "BU"},
|
||||
{180, "CD"},
|
||||
{180, "ZR"},
|
||||
{231, "ET"},
|
||||
{250, "FR"},
|
||||
{249, "FX"},
|
||||
{276, "DE"},
|
||||
{278, "DD"},
|
||||
{419, "419"},
|
||||
{626, "TL"},
|
||||
{626, "TP"},
|
||||
{729, "SD"},
|
||||
{826, "GB"},
|
||||
{840, "US"},
|
||||
{854, "BF"},
|
||||
{891, "YU"},
|
||||
{891, "CS"},
|
||||
{958, "AA"},
|
||||
{966, "QT"},
|
||||
{967, "EU"},
|
||||
{967, "QU"},
|
||||
{999, "ZZ"},
|
||||
// For codes that don't have an M49 code use the replacement value,
|
||||
// if available.
|
||||
{854, "HV"}, // maps to Burkino Faso
|
||||
}
|
||||
for _, tt := range toTests {
|
||||
r, _ := ParseRegion(tt.id)
|
||||
if r.M49() != tt.m49 {
|
||||
t.Errorf("m49(%q): have %d; want %d", tt.id, r.M49(), tt.m49)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegionDeprecation(t *testing.T) {
|
||||
tests := []struct{ in, out string }{
|
||||
{"BU", "MM"},
|
||||
{"BUR", "MM"},
|
||||
{"CT", "KI"},
|
||||
{"DD", "DE"},
|
||||
{"DDR", "DE"},
|
||||
{"DY", "BJ"},
|
||||
{"FX", "FR"},
|
||||
{"HV", "BF"},
|
||||
{"JT", "UM"},
|
||||
{"MI", "UM"},
|
||||
{"NH", "VU"},
|
||||
{"NQ", "AQ"},
|
||||
{"PU", "UM"},
|
||||
{"PZ", "PA"},
|
||||
{"QU", "EU"},
|
||||
{"RH", "ZW"},
|
||||
{"TP", "TL"},
|
||||
{"UK", "GB"},
|
||||
{"VD", "VN"},
|
||||
{"WK", "UM"},
|
||||
{"YD", "YE"},
|
||||
{"NL", "NL"},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
rIn, _ := ParseRegion(tt.in)
|
||||
rOut, _ := ParseRegion(tt.out)
|
||||
r := rIn.Canonicalize()
|
||||
if rOut == rIn && r.String() == "ZZ" {
|
||||
t.Errorf("%s: was %q; want %q", tt.in, r, tt.in)
|
||||
}
|
||||
if rOut != rIn && r != rOut {
|
||||
t.Errorf("%s: was %q; want %q", tt.in, r, tt.out)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsPrivateUse(t *testing.T) {
|
||||
type test struct {
|
||||
s string
|
||||
private bool
|
||||
}
|
||||
tests := []test{
|
||||
{"en", false},
|
||||
{"und", false},
|
||||
{"pzn", false},
|
||||
{"qaa", true},
|
||||
{"qtz", true},
|
||||
{"qua", false},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
x, _ := ParseBase(tt.s)
|
||||
if b := x.IsPrivateUse(); b != tt.private {
|
||||
t.Errorf("%d: langID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private)
|
||||
}
|
||||
}
|
||||
tests = []test{
|
||||
{"001", false},
|
||||
{"419", false},
|
||||
{"899", false},
|
||||
{"900", false},
|
||||
{"957", false},
|
||||
{"958", true},
|
||||
{"AA", true},
|
||||
{"AC", false},
|
||||
{"EU", false}, // CLDR grouping, exceptionally reserved in ISO.
|
||||
{"QU", true}, // Canonicalizes to EU, User-assigned in ISO.
|
||||
{"QO", true}, // CLDR grouping, User-assigned in ISO.
|
||||
{"QA", false},
|
||||
{"QM", true},
|
||||
{"QZ", true},
|
||||
{"XA", true},
|
||||
{"XK", true}, // Assigned to Kosovo in CLDR, User-assigned in ISO.
|
||||
{"XZ", true},
|
||||
{"ZW", false},
|
||||
{"ZZ", true},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
x, _ := ParseRegion(tt.s)
|
||||
if b := x.IsPrivateUse(); b != tt.private {
|
||||
t.Errorf("%d: regionID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private)
|
||||
}
|
||||
}
|
||||
tests = []test{
|
||||
{"Latn", false},
|
||||
{"Laaa", false}, // invalid
|
||||
{"Qaaa", true},
|
||||
{"Qabx", true},
|
||||
{"Qaby", false},
|
||||
{"Zyyy", false},
|
||||
{"Zzzz", false},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
x, _ := ParseScript(tt.s)
|
||||
if b := x.IsPrivateUse(); b != tt.private {
|
||||
t.Errorf("%d: scriptID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,735 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
)
|
||||
|
||||
// A MatchOption configures a Matcher.
|
||||
type MatchOption func(*matcher)
|
||||
|
||||
// PreferSameScript will, in the absence of a match, result in the first
|
||||
// preferred tag with the same script as a supported tag to match this supported
|
||||
// tag. The default is currently true, but this may change in the future.
|
||||
func PreferSameScript(preferSame bool) MatchOption {
|
||||
return func(m *matcher) { m.preferSameScript = preferSame }
|
||||
}
|
||||
|
||||
// TODO(v1.0.0): consider making Matcher a concrete type, instead of interface.
|
||||
// There doesn't seem to be too much need for multiple types.
|
||||
// Making it a concrete type allows MatchStrings to be a method, which will
|
||||
// improve its discoverability.
|
||||
|
||||
// MatchStrings parses and matches the given strings until one of them matches
|
||||
// the language in the Matcher. A string may be an Accept-Language header as
|
||||
// handled by ParseAcceptLanguage. The default language is returned if no
|
||||
// other language matched.
|
||||
func MatchStrings(m Matcher, lang ...string) (tag Tag, index int) {
|
||||
for _, accept := range lang {
|
||||
desired, _, err := ParseAcceptLanguage(accept)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if tag, index, conf := m.Match(desired...); conf != No {
|
||||
return tag, index
|
||||
}
|
||||
}
|
||||
tag, index, _ = m.Match()
|
||||
return
|
||||
}
|
||||
|
||||
// Matcher is the interface that wraps the Match method.
|
||||
//
|
||||
// Match returns the best match for any of the given tags, along with
|
||||
// a unique index associated with the returned tag and a confidence
|
||||
// score.
|
||||
type Matcher interface {
|
||||
Match(t ...Tag) (tag Tag, index int, c Confidence)
|
||||
}
|
||||
|
||||
// Comprehends reports the confidence score for a speaker of a given language
|
||||
// to being able to comprehend the written form of an alternative language.
|
||||
func Comprehends(speaker, alternative Tag) Confidence {
|
||||
_, _, c := NewMatcher([]Tag{alternative}).Match(speaker)
|
||||
return c
|
||||
}
|
||||
|
||||
// NewMatcher returns a Matcher that matches an ordered list of preferred tags
|
||||
// against a list of supported tags based on written intelligibility, closeness
|
||||
// of dialect, equivalence of subtags and various other rules. It is initialized
|
||||
// with the list of supported tags. The first element is used as the default
|
||||
// value in case no match is found.
|
||||
//
|
||||
// Its Match method matches the first of the given Tags to reach a certain
|
||||
// confidence threshold. The tags passed to Match should therefore be specified
|
||||
// in order of preference. Extensions are ignored for matching.
|
||||
//
|
||||
// The index returned by the Match method corresponds to the index of the
|
||||
// matched tag in t, but is augmented with the Unicode extension ('u')of the
|
||||
// corresponding preferred tag. This allows user locale options to be passed
|
||||
// transparently.
|
||||
func NewMatcher(t []Tag, options ...MatchOption) Matcher {
|
||||
return newMatcher(t, options)
|
||||
}
|
||||
|
||||
func (m *matcher) Match(want ...Tag) (t Tag, index int, c Confidence) {
|
||||
var tt language.Tag
|
||||
match, w, c := m.getBest(want...)
|
||||
if match != nil {
|
||||
tt, index = match.tag, match.index
|
||||
} else {
|
||||
// TODO: this should be an option
|
||||
tt = m.default_.tag
|
||||
if m.preferSameScript {
|
||||
outer:
|
||||
for _, w := range want {
|
||||
script, _ := w.Script()
|
||||
if script.scriptID == 0 {
|
||||
// Don't do anything if there is no script, such as with
|
||||
// private subtags.
|
||||
continue
|
||||
}
|
||||
for i, h := range m.supported {
|
||||
if script.scriptID == h.maxScript {
|
||||
tt, index = h.tag, i
|
||||
break outer
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// TODO: select first language tag based on script.
|
||||
}
|
||||
if w.RegionID != tt.RegionID && w.RegionID != 0 {
|
||||
if w.RegionID != 0 && tt.RegionID != 0 && tt.RegionID.Contains(w.RegionID) {
|
||||
tt.RegionID = w.RegionID
|
||||
tt.RemakeString()
|
||||
} else if r := w.RegionID.String(); len(r) == 2 {
|
||||
// TODO: also filter macro and deprecated.
|
||||
tt, _ = tt.SetTypeForKey("rg", strings.ToLower(r)+"zzzz")
|
||||
}
|
||||
}
|
||||
// Copy options from the user-provided tag into the result tag. This is hard
|
||||
// to do after the fact, so we do it here.
|
||||
// TODO: add in alternative variants to -u-va-.
|
||||
// TODO: add preferred region to -u-rg-.
|
||||
if e := w.Extensions(); len(e) > 0 {
|
||||
b := language.Builder{}
|
||||
b.SetTag(tt)
|
||||
for _, e := range e {
|
||||
b.AddExt(e)
|
||||
}
|
||||
tt = b.Make()
|
||||
}
|
||||
return makeTag(tt), index, c
|
||||
}
|
||||
|
||||
// ErrMissingLikelyTagsData indicates no information was available
|
||||
// to compute likely values of missing tags.
|
||||
var ErrMissingLikelyTagsData = errors.New("missing likely tags data")
|
||||
|
||||
// func (t *Tag) setTagsFrom(id Tag) {
|
||||
// t.LangID = id.LangID
|
||||
// t.ScriptID = id.ScriptID
|
||||
// t.RegionID = id.RegionID
|
||||
// }
|
||||
|
||||
// Tag Matching
|
||||
// CLDR defines an algorithm for finding the best match between two sets of language
|
||||
// tags. The basic algorithm defines how to score a possible match and then find
|
||||
// the match with the best score
|
||||
// (see https://www.unicode.org/reports/tr35/#LanguageMatching).
|
||||
// Using scoring has several disadvantages. The scoring obfuscates the importance of
|
||||
// the various factors considered, making the algorithm harder to understand. Using
|
||||
// scoring also requires the full score to be computed for each pair of tags.
|
||||
//
|
||||
// We will use a different algorithm which aims to have the following properties:
|
||||
// - clarity on the precedence of the various selection factors, and
|
||||
// - improved performance by allowing early termination of a comparison.
|
||||
//
|
||||
// Matching algorithm (overview)
|
||||
// Input:
|
||||
// - supported: a set of supported tags
|
||||
// - default: the default tag to return in case there is no match
|
||||
// - desired: list of desired tags, ordered by preference, starting with
|
||||
// the most-preferred.
|
||||
//
|
||||
// Algorithm:
|
||||
// 1) Set the best match to the lowest confidence level
|
||||
// 2) For each tag in "desired":
|
||||
// a) For each tag in "supported":
|
||||
// 1) compute the match between the two tags.
|
||||
// 2) if the match is better than the previous best match, replace it
|
||||
// with the new match. (see next section)
|
||||
// b) if the current best match is Exact and pin is true the result will be
|
||||
// frozen to the language found thusfar, although better matches may
|
||||
// still be found for the same language.
|
||||
// 3) If the best match so far is below a certain threshold, return "default".
|
||||
//
|
||||
// Ranking:
|
||||
// We use two phases to determine whether one pair of tags are a better match
|
||||
// than another pair of tags. First, we determine a rough confidence level. If the
|
||||
// levels are different, the one with the highest confidence wins.
|
||||
// Second, if the rough confidence levels are identical, we use a set of tie-breaker
|
||||
// rules.
|
||||
//
|
||||
// The confidence level of matching a pair of tags is determined by finding the
|
||||
// lowest confidence level of any matches of the corresponding subtags (the
|
||||
// result is deemed as good as its weakest link).
|
||||
// We define the following levels:
|
||||
// Exact - An exact match of a subtag, before adding likely subtags.
|
||||
// MaxExact - An exact match of a subtag, after adding likely subtags.
|
||||
// [See Note 2].
|
||||
// High - High level of mutual intelligibility between different subtag
|
||||
// variants.
|
||||
// Low - Low level of mutual intelligibility between different subtag
|
||||
// variants.
|
||||
// No - No mutual intelligibility.
|
||||
//
|
||||
// The following levels can occur for each type of subtag:
|
||||
// Base: Exact, MaxExact, High, Low, No
|
||||
// Script: Exact, MaxExact [see Note 3], Low, No
|
||||
// Region: Exact, MaxExact, High
|
||||
// Variant: Exact, High
|
||||
// Private: Exact, No
|
||||
//
|
||||
// Any result with a confidence level of Low or higher is deemed a possible match.
|
||||
// Once a desired tag matches any of the supported tags with a level of MaxExact
|
||||
// or higher, the next desired tag is not considered (see Step 2.b).
|
||||
// Note that CLDR provides languageMatching data that defines close equivalence
|
||||
// classes for base languages, scripts and regions.
|
||||
//
|
||||
// Tie-breaking
|
||||
// If we get the same confidence level for two matches, we apply a sequence of
|
||||
// tie-breaking rules. The first that succeeds defines the result. The rules are
|
||||
// applied in the following order.
|
||||
// 1) Original language was defined and was identical.
|
||||
// 2) Original region was defined and was identical.
|
||||
// 3) Distance between two maximized regions was the smallest.
|
||||
// 4) Original script was defined and was identical.
|
||||
// 5) Distance from want tag to have tag using the parent relation [see Note 5.]
|
||||
// If there is still no winner after these rules are applied, the first match
|
||||
// found wins.
|
||||
//
|
||||
// Notes:
|
||||
// [2] In practice, as matching of Exact is done in a separate phase from
|
||||
// matching the other levels, we reuse the Exact level to mean MaxExact in
|
||||
// the second phase. As a consequence, we only need the levels defined by
|
||||
// the Confidence type. The MaxExact confidence level is mapped to High in
|
||||
// the public API.
|
||||
// [3] We do not differentiate between maximized script values that were derived
|
||||
// from suppressScript versus most likely tag data. We determined that in
|
||||
// ranking the two, one ranks just after the other. Moreover, the two cannot
|
||||
// occur concurrently. As a consequence, they are identical for practical
|
||||
// purposes.
|
||||
// [4] In case of deprecated, macro-equivalents and legacy mappings, we assign
|
||||
// the MaxExact level to allow iw vs he to still be a closer match than
|
||||
// en-AU vs en-US, for example.
|
||||
// [5] In CLDR a locale inherits fields that are unspecified for this locale
|
||||
// from its parent. Therefore, if a locale is a parent of another locale,
|
||||
// it is a strong measure for closeness, especially when no other tie
|
||||
// breaker rule applies. One could also argue it is inconsistent, for
|
||||
// example, when pt-AO matches pt (which CLDR equates with pt-BR), even
|
||||
// though its parent is pt-PT according to the inheritance rules.
|
||||
//
|
||||
// Implementation Details:
|
||||
// There are several performance considerations worth pointing out. Most notably,
|
||||
// we preprocess as much as possible (within reason) at the time of creation of a
|
||||
// matcher. This includes:
|
||||
// - creating a per-language map, which includes data for the raw base language
|
||||
// and its canonicalized variant (if applicable),
|
||||
// - expanding entries for the equivalence classes defined in CLDR's
|
||||
// languageMatch data.
|
||||
// The per-language map ensures that typically only a very small number of tags
|
||||
// need to be considered. The pre-expansion of canonicalized subtags and
|
||||
// equivalence classes reduces the amount of map lookups that need to be done at
|
||||
// runtime.
|
||||
|
||||
// matcher keeps a set of supported language tags, indexed by language.
|
||||
type matcher struct {
|
||||
default_ *haveTag
|
||||
supported []*haveTag
|
||||
index map[language.Language]*matchHeader
|
||||
passSettings bool
|
||||
preferSameScript bool
|
||||
}
|
||||
|
||||
// matchHeader has the lists of tags for exact matches and matches based on
|
||||
// maximized and canonicalized tags for a given language.
|
||||
type matchHeader struct {
|
||||
haveTags []*haveTag
|
||||
original bool
|
||||
}
|
||||
|
||||
// haveTag holds a supported Tag and its maximized script and region. The maximized
|
||||
// or canonicalized language is not stored as it is not needed during matching.
|
||||
type haveTag struct {
|
||||
tag language.Tag
|
||||
|
||||
// index of this tag in the original list of supported tags.
|
||||
index int
|
||||
|
||||
// conf is the maximum confidence that can result from matching this haveTag.
|
||||
// When conf < Exact this means it was inserted after applying a CLDR equivalence rule.
|
||||
conf Confidence
|
||||
|
||||
// Maximized region and script.
|
||||
maxRegion language.Region
|
||||
maxScript language.Script
|
||||
|
||||
// altScript may be checked as an alternative match to maxScript. If altScript
|
||||
// matches, the confidence level for this match is Low. Theoretically there
|
||||
// could be multiple alternative scripts. This does not occur in practice.
|
||||
altScript language.Script
|
||||
|
||||
// nextMax is the index of the next haveTag with the same maximized tags.
|
||||
nextMax uint16
|
||||
}
|
||||
|
||||
func makeHaveTag(tag language.Tag, index int) (haveTag, language.Language) {
|
||||
max := tag
|
||||
if tag.LangID != 0 || tag.RegionID != 0 || tag.ScriptID != 0 {
|
||||
max, _ = canonicalize(All, max)
|
||||
max, _ = max.Maximize()
|
||||
max.RemakeString()
|
||||
}
|
||||
return haveTag{tag, index, Exact, max.RegionID, max.ScriptID, altScript(max.LangID, max.ScriptID), 0}, max.LangID
|
||||
}
|
||||
|
||||
// altScript returns an alternative script that may match the given script with
|
||||
// a low confidence. At the moment, the langMatch data allows for at most one
|
||||
// script to map to another and we rely on this to keep the code simple.
|
||||
func altScript(l language.Language, s language.Script) language.Script {
|
||||
for _, alt := range matchScript {
|
||||
// TODO: also match cases where language is not the same.
|
||||
if (language.Language(alt.wantLang) == l || language.Language(alt.haveLang) == l) &&
|
||||
language.Script(alt.haveScript) == s {
|
||||
return language.Script(alt.wantScript)
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// addIfNew adds a haveTag to the list of tags only if it is a unique tag.
|
||||
// Tags that have the same maximized values are linked by index.
|
||||
func (h *matchHeader) addIfNew(n haveTag, exact bool) {
|
||||
h.original = h.original || exact
|
||||
// Don't add new exact matches.
|
||||
for _, v := range h.haveTags {
|
||||
if equalsRest(v.tag, n.tag) {
|
||||
return
|
||||
}
|
||||
}
|
||||
// Allow duplicate maximized tags, but create a linked list to allow quickly
|
||||
// comparing the equivalents and bail out.
|
||||
for i, v := range h.haveTags {
|
||||
if v.maxScript == n.maxScript &&
|
||||
v.maxRegion == n.maxRegion &&
|
||||
v.tag.VariantOrPrivateUseTags() == n.tag.VariantOrPrivateUseTags() {
|
||||
for h.haveTags[i].nextMax != 0 {
|
||||
i = int(h.haveTags[i].nextMax)
|
||||
}
|
||||
h.haveTags[i].nextMax = uint16(len(h.haveTags))
|
||||
break
|
||||
}
|
||||
}
|
||||
h.haveTags = append(h.haveTags, &n)
|
||||
}
|
||||
|
||||
// header returns the matchHeader for the given language. It creates one if
|
||||
// it doesn't already exist.
|
||||
func (m *matcher) header(l language.Language) *matchHeader {
|
||||
if h := m.index[l]; h != nil {
|
||||
return h
|
||||
}
|
||||
h := &matchHeader{}
|
||||
m.index[l] = h
|
||||
return h
|
||||
}
|
||||
|
||||
func toConf(d uint8) Confidence {
|
||||
if d <= 10 {
|
||||
return High
|
||||
}
|
||||
if d < 30 {
|
||||
return Low
|
||||
}
|
||||
return No
|
||||
}
|
||||
|
||||
// newMatcher builds an index for the given supported tags and returns it as
|
||||
// a matcher. It also expands the index by considering various equivalence classes
|
||||
// for a given tag.
|
||||
func newMatcher(supported []Tag, options []MatchOption) *matcher {
|
||||
m := &matcher{
|
||||
index: make(map[language.Language]*matchHeader),
|
||||
preferSameScript: true,
|
||||
}
|
||||
for _, o := range options {
|
||||
o(m)
|
||||
}
|
||||
if len(supported) == 0 {
|
||||
m.default_ = &haveTag{}
|
||||
return m
|
||||
}
|
||||
// Add supported languages to the index. Add exact matches first to give
|
||||
// them precedence.
|
||||
for i, tag := range supported {
|
||||
tt := tag.tag()
|
||||
pair, _ := makeHaveTag(tt, i)
|
||||
m.header(tt.LangID).addIfNew(pair, true)
|
||||
m.supported = append(m.supported, &pair)
|
||||
}
|
||||
m.default_ = m.header(supported[0].lang()).haveTags[0]
|
||||
// Keep these in two different loops to support the case that two equivalent
|
||||
// languages are distinguished, such as iw and he.
|
||||
for i, tag := range supported {
|
||||
tt := tag.tag()
|
||||
pair, max := makeHaveTag(tt, i)
|
||||
if max != tt.LangID {
|
||||
m.header(max).addIfNew(pair, true)
|
||||
}
|
||||
}
|
||||
|
||||
// update is used to add indexes in the map for equivalent languages.
|
||||
// update will only add entries to original indexes, thus not computing any
|
||||
// transitive relations.
|
||||
update := func(want, have uint16, conf Confidence) {
|
||||
if hh := m.index[language.Language(have)]; hh != nil {
|
||||
if !hh.original {
|
||||
return
|
||||
}
|
||||
hw := m.header(language.Language(want))
|
||||
for _, ht := range hh.haveTags {
|
||||
v := *ht
|
||||
if conf < v.conf {
|
||||
v.conf = conf
|
||||
}
|
||||
v.nextMax = 0 // this value needs to be recomputed
|
||||
if v.altScript != 0 {
|
||||
v.altScript = altScript(language.Language(want), v.maxScript)
|
||||
}
|
||||
hw.addIfNew(v, conf == Exact && hh.original)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add entries for languages with mutual intelligibility as defined by CLDR's
|
||||
// languageMatch data.
|
||||
for _, ml := range matchLang {
|
||||
update(ml.want, ml.have, toConf(ml.distance))
|
||||
if !ml.oneway {
|
||||
update(ml.have, ml.want, toConf(ml.distance))
|
||||
}
|
||||
}
|
||||
|
||||
// Add entries for possible canonicalizations. This is an optimization to
|
||||
// ensure that only one map lookup needs to be done at runtime per desired tag.
|
||||
// First we match deprecated equivalents. If they are perfect equivalents
|
||||
// (their canonicalization simply substitutes a different language code, but
|
||||
// nothing else), the match confidence is Exact, otherwise it is High.
|
||||
for i, lm := range language.AliasMap {
|
||||
// If deprecated codes match and there is no fiddling with the script
|
||||
// or region, we consider it an exact match.
|
||||
conf := Exact
|
||||
if language.AliasTypes[i] != language.Macro {
|
||||
if !isExactEquivalent(language.Language(lm.From)) {
|
||||
conf = High
|
||||
}
|
||||
update(lm.To, lm.From, conf)
|
||||
}
|
||||
update(lm.From, lm.To, conf)
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
// getBest gets the best matching tag in m for any of the given tags, taking into
|
||||
// account the order of preference of the given tags.
|
||||
func (m *matcher) getBest(want ...Tag) (got *haveTag, orig language.Tag, c Confidence) {
|
||||
best := bestMatch{}
|
||||
for i, ww := range want {
|
||||
w := ww.tag()
|
||||
var max language.Tag
|
||||
// Check for exact match first.
|
||||
h := m.index[w.LangID]
|
||||
if w.LangID != 0 {
|
||||
if h == nil {
|
||||
continue
|
||||
}
|
||||
// Base language is defined.
|
||||
max, _ = canonicalize(Legacy|Deprecated|Macro, w)
|
||||
// A region that is added through canonicalization is stronger than
|
||||
// a maximized region: set it in the original (e.g. mo -> ro-MD).
|
||||
if w.RegionID != max.RegionID {
|
||||
w.RegionID = max.RegionID
|
||||
}
|
||||
// TODO: should we do the same for scripts?
|
||||
// See test case: en, sr, nl ; sh ; sr
|
||||
max, _ = max.Maximize()
|
||||
} else {
|
||||
// Base language is not defined.
|
||||
if h != nil {
|
||||
for i := range h.haveTags {
|
||||
have := h.haveTags[i]
|
||||
if equalsRest(have.tag, w) {
|
||||
return have, w, Exact
|
||||
}
|
||||
}
|
||||
}
|
||||
if w.ScriptID == 0 && w.RegionID == 0 {
|
||||
// We skip all tags matching und for approximate matching, including
|
||||
// private tags.
|
||||
continue
|
||||
}
|
||||
max, _ = w.Maximize()
|
||||
if h = m.index[max.LangID]; h == nil {
|
||||
continue
|
||||
}
|
||||
}
|
||||
pin := true
|
||||
for _, t := range want[i+1:] {
|
||||
if w.LangID == t.lang() {
|
||||
pin = false
|
||||
break
|
||||
}
|
||||
}
|
||||
// Check for match based on maximized tag.
|
||||
for i := range h.haveTags {
|
||||
have := h.haveTags[i]
|
||||
best.update(have, w, max.ScriptID, max.RegionID, pin)
|
||||
if best.conf == Exact {
|
||||
for have.nextMax != 0 {
|
||||
have = h.haveTags[have.nextMax]
|
||||
best.update(have, w, max.ScriptID, max.RegionID, pin)
|
||||
}
|
||||
return best.have, best.want, best.conf
|
||||
}
|
||||
}
|
||||
}
|
||||
if best.conf <= No {
|
||||
if len(want) != 0 {
|
||||
return nil, want[0].tag(), No
|
||||
}
|
||||
return nil, language.Tag{}, No
|
||||
}
|
||||
return best.have, best.want, best.conf
|
||||
}
|
||||
|
||||
// bestMatch accumulates the best match so far.
|
||||
type bestMatch struct {
|
||||
have *haveTag
|
||||
want language.Tag
|
||||
conf Confidence
|
||||
pinnedRegion language.Region
|
||||
pinLanguage bool
|
||||
sameRegionGroup bool
|
||||
// Cached results from applying tie-breaking rules.
|
||||
origLang bool
|
||||
origReg bool
|
||||
paradigmReg bool
|
||||
regGroupDist uint8
|
||||
origScript bool
|
||||
}
|
||||
|
||||
// update updates the existing best match if the new pair is considered to be a
|
||||
// better match. To determine if the given pair is a better match, it first
|
||||
// computes the rough confidence level. If this surpasses the current match, it
|
||||
// will replace it and update the tie-breaker rule cache. If there is a tie, it
|
||||
// proceeds with applying a series of tie-breaker rules. If there is no
|
||||
// conclusive winner after applying the tie-breaker rules, it leaves the current
|
||||
// match as the preferred match.
|
||||
//
|
||||
// If pin is true and have and tag are a strong match, it will henceforth only
|
||||
// consider matches for this language. This corresponds to the idea that most
|
||||
// users have a strong preference for the first defined language. A user can
|
||||
// still prefer a second language over a dialect of the preferred language by
|
||||
// explicitly specifying dialects, e.g. "en, nl, en-GB". In this case pin should
|
||||
// be false.
|
||||
func (m *bestMatch) update(have *haveTag, tag language.Tag, maxScript language.Script, maxRegion language.Region, pin bool) {
|
||||
// Bail if the maximum attainable confidence is below that of the current best match.
|
||||
c := have.conf
|
||||
if c < m.conf {
|
||||
return
|
||||
}
|
||||
// Don't change the language once we already have found an exact match.
|
||||
if m.pinLanguage && tag.LangID != m.want.LangID {
|
||||
return
|
||||
}
|
||||
// Pin the region group if we are comparing tags for the same language.
|
||||
if tag.LangID == m.want.LangID && m.sameRegionGroup {
|
||||
_, sameGroup := regionGroupDist(m.pinnedRegion, have.maxRegion, have.maxScript, m.want.LangID)
|
||||
if !sameGroup {
|
||||
return
|
||||
}
|
||||
}
|
||||
if c == Exact && have.maxScript == maxScript {
|
||||
// If there is another language and then another entry of this language,
|
||||
// don't pin anything, otherwise pin the language.
|
||||
m.pinLanguage = pin
|
||||
}
|
||||
if equalsRest(have.tag, tag) {
|
||||
} else if have.maxScript != maxScript {
|
||||
// There is usually very little comprehension between different scripts.
|
||||
// In a few cases there may still be Low comprehension. This possibility
|
||||
// is pre-computed and stored in have.altScript.
|
||||
if Low < m.conf || have.altScript != maxScript {
|
||||
return
|
||||
}
|
||||
c = Low
|
||||
} else if have.maxRegion != maxRegion {
|
||||
if High < c {
|
||||
// There is usually a small difference between languages across regions.
|
||||
c = High
|
||||
}
|
||||
}
|
||||
|
||||
// We store the results of the computations of the tie-breaker rules along
|
||||
// with the best match. There is no need to do the checks once we determine
|
||||
// we have a winner, but we do still need to do the tie-breaker computations.
|
||||
// We use "beaten" to keep track if we still need to do the checks.
|
||||
beaten := false // true if the new pair defeats the current one.
|
||||
if c != m.conf {
|
||||
if c < m.conf {
|
||||
return
|
||||
}
|
||||
beaten = true
|
||||
}
|
||||
|
||||
// Tie-breaker rules:
|
||||
// We prefer if the pre-maximized language was specified and identical.
|
||||
origLang := have.tag.LangID == tag.LangID && tag.LangID != 0
|
||||
if !beaten && m.origLang != origLang {
|
||||
if m.origLang {
|
||||
return
|
||||
}
|
||||
beaten = true
|
||||
}
|
||||
|
||||
// We prefer if the pre-maximized region was specified and identical.
|
||||
origReg := have.tag.RegionID == tag.RegionID && tag.RegionID != 0
|
||||
if !beaten && m.origReg != origReg {
|
||||
if m.origReg {
|
||||
return
|
||||
}
|
||||
beaten = true
|
||||
}
|
||||
|
||||
regGroupDist, sameGroup := regionGroupDist(have.maxRegion, maxRegion, maxScript, tag.LangID)
|
||||
if !beaten && m.regGroupDist != regGroupDist {
|
||||
if regGroupDist > m.regGroupDist {
|
||||
return
|
||||
}
|
||||
beaten = true
|
||||
}
|
||||
|
||||
paradigmReg := isParadigmLocale(tag.LangID, have.maxRegion)
|
||||
if !beaten && m.paradigmReg != paradigmReg {
|
||||
if !paradigmReg {
|
||||
return
|
||||
}
|
||||
beaten = true
|
||||
}
|
||||
|
||||
// Next we prefer if the pre-maximized script was specified and identical.
|
||||
origScript := have.tag.ScriptID == tag.ScriptID && tag.ScriptID != 0
|
||||
if !beaten && m.origScript != origScript {
|
||||
if m.origScript {
|
||||
return
|
||||
}
|
||||
beaten = true
|
||||
}
|
||||
|
||||
// Update m to the newly found best match.
|
||||
if beaten {
|
||||
m.have = have
|
||||
m.want = tag
|
||||
m.conf = c
|
||||
m.pinnedRegion = maxRegion
|
||||
m.sameRegionGroup = sameGroup
|
||||
m.origLang = origLang
|
||||
m.origReg = origReg
|
||||
m.paradigmReg = paradigmReg
|
||||
m.origScript = origScript
|
||||
m.regGroupDist = regGroupDist
|
||||
}
|
||||
}
|
||||
|
||||
func isParadigmLocale(lang language.Language, r language.Region) bool {
|
||||
for _, e := range paradigmLocales {
|
||||
if language.Language(e[0]) == lang && (r == language.Region(e[1]) || r == language.Region(e[2])) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// regionGroupDist computes the distance between two regions based on their
|
||||
// CLDR grouping.
|
||||
func regionGroupDist(a, b language.Region, script language.Script, lang language.Language) (dist uint8, same bool) {
|
||||
const defaultDistance = 4
|
||||
|
||||
aGroup := uint(regionToGroups[a]) << 1
|
||||
bGroup := uint(regionToGroups[b]) << 1
|
||||
for _, ri := range matchRegion {
|
||||
if language.Language(ri.lang) == lang && (ri.script == 0 || language.Script(ri.script) == script) {
|
||||
group := uint(1 << (ri.group &^ 0x80))
|
||||
if 0x80&ri.group == 0 {
|
||||
if aGroup&bGroup&group != 0 { // Both regions are in the group.
|
||||
return ri.distance, ri.distance == defaultDistance
|
||||
}
|
||||
} else {
|
||||
if (aGroup|bGroup)&group == 0 { // Both regions are not in the group.
|
||||
return ri.distance, ri.distance == defaultDistance
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return defaultDistance, true
|
||||
}
|
||||
|
||||
// equalsRest compares everything except the language.
|
||||
func equalsRest(a, b language.Tag) bool {
|
||||
// TODO: don't include extensions in this comparison. To do this efficiently,
|
||||
// though, we should handle private tags separately.
|
||||
return a.ScriptID == b.ScriptID && a.RegionID == b.RegionID && a.VariantOrPrivateUseTags() == b.VariantOrPrivateUseTags()
|
||||
}
|
||||
|
||||
// isExactEquivalent returns true if canonicalizing the language will not alter
|
||||
// the script or region of a tag.
|
||||
func isExactEquivalent(l language.Language) bool {
|
||||
for _, o := range notEquivalent {
|
||||
if o == l {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
var notEquivalent []language.Language
|
||||
|
||||
func init() {
|
||||
// Create a list of all languages for which canonicalization may alter the
|
||||
// script or region.
|
||||
for _, lm := range language.AliasMap {
|
||||
tag := language.Tag{LangID: language.Language(lm.From)}
|
||||
if tag, _ = canonicalize(All, tag); tag.ScriptID != 0 || tag.RegionID != 0 {
|
||||
notEquivalent = append(notEquivalent, language.Language(lm.From))
|
||||
}
|
||||
}
|
||||
// Maximize undefined regions of paradigm locales.
|
||||
for i, v := range paradigmLocales {
|
||||
t := language.Tag{LangID: language.Language(v[0])}
|
||||
max, _ := t.Maximize()
|
||||
if v[1] == 0 {
|
||||
paradigmLocales[i][1] = uint16(max.RegionID)
|
||||
}
|
||||
if v[2] == 0 {
|
||||
paradigmLocales[i][2] = uint16(max.RegionID)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,384 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/internal/testtext"
|
||||
"golang.org/x/text/internal/ucd"
|
||||
)
|
||||
|
||||
var verbose = flag.Bool("verbose", false, "set to true to print the internal tables of matchers")
|
||||
|
||||
func TestCompliance(t *testing.T) {
|
||||
filepath.Walk("testdata", func(file string, info os.FileInfo, err error) error {
|
||||
if info.IsDir() {
|
||||
return nil
|
||||
}
|
||||
r, err := os.Open(file)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
ucd.Parse(r, func(p *ucd.Parser) {
|
||||
name := strings.ReplaceAll(path.Join(p.String(0), p.String(1)), " ", "")
|
||||
if skip[name] {
|
||||
return
|
||||
}
|
||||
t.Run(info.Name()+"/"+short(name), func(t *testing.T) {
|
||||
supported := makeTagList(p.String(0))
|
||||
desired := makeTagList(p.String(1))
|
||||
gotCombined, index, conf := NewMatcher(supported).Match(desired...)
|
||||
|
||||
gotMatch := supported[index]
|
||||
wantMatch := Raw.Make(p.String(2)) // wantMatch may be null
|
||||
if gotMatch != wantMatch {
|
||||
t.Fatalf("match: got %q; want %q (%v)", gotMatch, wantMatch, conf)
|
||||
}
|
||||
if tag := strings.TrimSpace(p.String(3)); tag != "" {
|
||||
wantCombined := Raw.MustParse(tag)
|
||||
if err == nil && gotCombined != wantCombined {
|
||||
t.Errorf("combined: got %q; want %q (%v)", gotCombined, wantCombined, conf)
|
||||
}
|
||||
}
|
||||
})
|
||||
})
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
func short(s string) string {
|
||||
if len(s) <= 50 {
|
||||
return s
|
||||
}
|
||||
var i int
|
||||
for i = 1; i < utf8.UTFMax && !utf8.RuneStart(s[50-i]); i++ {
|
||||
}
|
||||
return s[:50-i] + "…"
|
||||
}
|
||||
|
||||
var skip = map[string]bool{
|
||||
// TODO: bugs
|
||||
// Honor the wildcard match. This may only be useful to select non-exact
|
||||
// stuff.
|
||||
"mul,af/nl": true, // match: got "af"; want "mul"
|
||||
|
||||
// TODO: include other extensions.
|
||||
// combined: got "en-GB-u-ca-buddhist-nu-arab"; want "en-GB-fonipa-t-m0-iso-i0-pinyin-u-ca-buddhist-nu-arab"
|
||||
"und,en-GB-u-sd-gbsct/en-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin": true,
|
||||
|
||||
// Inconsistencies with Mark Davis' implementation where it is not clear
|
||||
// which is better.
|
||||
|
||||
// Inconsistencies in combined. I think the Go approach is more appropriate.
|
||||
// We could use -u-rg- as alternative.
|
||||
"und,fr/fr-BE-fonipa": true, // combined: got "fr"; want "fr-BE-fonipa"
|
||||
"und,fr-CA/fr-BE-fonipa": true, // combined: got "fr-CA"; want "fr-BE-fonipa"
|
||||
"und,fr-fonupa/fr-BE-fonipa": true, // combined: got "fr-fonupa"; want "fr-BE-fonipa"
|
||||
"und,no/nn-BE-fonipa": true, // combined: got "no"; want "no-BE-fonipa"
|
||||
"50,und,fr-CA-fonupa/fr-BE-fonipa": true, // combined: got "fr-CA-fonupa"; want "fr-BE-fonipa"
|
||||
|
||||
// The initial number is a threshold. As we don't use scoring, we will not
|
||||
// implement this.
|
||||
"50,und,fr-Cyrl-CA-fonupa/fr-BE-fonipa": true,
|
||||
// match: got "und"; want "fr-Cyrl-CA-fonupa"
|
||||
// combined: got "und"; want "fr-Cyrl-BE-fonipa"
|
||||
|
||||
// Other interesting cases to test:
|
||||
// - Should same language or same script have the preference if there is
|
||||
// usually no understanding of the other script?
|
||||
// - More specific region in desired may replace enclosing supported.
|
||||
}
|
||||
|
||||
func makeTagList(s string) (tags []Tag) {
|
||||
for _, s := range strings.Split(s, ",") {
|
||||
tags = append(tags, mk(strings.TrimSpace(s)))
|
||||
}
|
||||
return tags
|
||||
}
|
||||
|
||||
func TestMatchStrings(t *testing.T) {
|
||||
testCases := []struct {
|
||||
supported string
|
||||
desired string // strings separated by |
|
||||
tag string
|
||||
index int
|
||||
}{{
|
||||
supported: "en",
|
||||
desired: "",
|
||||
tag: "en",
|
||||
index: 0,
|
||||
}, {
|
||||
supported: "en",
|
||||
desired: "nl",
|
||||
tag: "en",
|
||||
index: 0,
|
||||
}, {
|
||||
supported: "en,nl",
|
||||
desired: "nl",
|
||||
tag: "nl",
|
||||
index: 1,
|
||||
}, {
|
||||
supported: "en,nl",
|
||||
desired: "nl|en",
|
||||
tag: "nl",
|
||||
index: 1,
|
||||
}, {
|
||||
supported: "en-GB,nl",
|
||||
desired: "en ; q=0.1,nl",
|
||||
tag: "nl",
|
||||
index: 1,
|
||||
}, {
|
||||
supported: "en-GB,nl",
|
||||
desired: "en;q=0.005 | dk; q=0.1,nl ",
|
||||
tag: "en-GB",
|
||||
index: 0,
|
||||
}, {
|
||||
// do not match faulty tags with und
|
||||
supported: "en,und",
|
||||
desired: "|en",
|
||||
tag: "en",
|
||||
index: 0,
|
||||
}}
|
||||
for _, tc := range testCases {
|
||||
t.Run(path.Join(tc.supported, tc.desired), func(t *testing.T) {
|
||||
m := NewMatcher(makeTagList(tc.supported))
|
||||
tag, index := MatchStrings(m, strings.Split(tc.desired, "|")...)
|
||||
if tag.String() != tc.tag || index != tc.index {
|
||||
t.Errorf("got %v, %d; want %v, %d", tag, index, tc.tag, tc.index)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegionGroups(t *testing.T) {
|
||||
testCases := []struct {
|
||||
a, b string
|
||||
distance uint8
|
||||
}{
|
||||
{"zh-TW", "zh-HK", 5},
|
||||
{"zh-MO", "zh-HK", 4},
|
||||
{"es-ES", "es-AR", 5},
|
||||
{"es-ES", "es", 4},
|
||||
{"es-419", "es-MX", 4},
|
||||
{"es-AR", "es-MX", 4},
|
||||
{"es-ES", "es-MX", 5},
|
||||
{"es-PT", "es-MX", 5},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
a := MustParse(tc.a)
|
||||
aScript, _ := a.Script()
|
||||
b := MustParse(tc.b)
|
||||
bScript, _ := b.Script()
|
||||
|
||||
if aScript != bScript {
|
||||
t.Errorf("scripts differ: %q vs %q", aScript, bScript)
|
||||
continue
|
||||
}
|
||||
d, _ := regionGroupDist(a.region(), b.region(), aScript.scriptID, a.lang())
|
||||
if d != tc.distance {
|
||||
t.Errorf("got %q; want %q", d, tc.distance)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsParadigmLocale(t *testing.T) {
|
||||
testCases := map[string]bool{
|
||||
"en-US": true,
|
||||
"en-GB": true,
|
||||
"en-VI": false,
|
||||
"es-GB": false,
|
||||
"es-ES": true,
|
||||
"es-419": true,
|
||||
}
|
||||
for str, want := range testCases {
|
||||
tt := Make(str)
|
||||
tag := tt.tag()
|
||||
got := isParadigmLocale(tag.LangID, tag.RegionID)
|
||||
if got != want {
|
||||
t.Errorf("isPL(%q) = %v; want %v", str, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Implementation of String methods for various types for debugging purposes.
|
||||
|
||||
func (m *matcher) String() string {
|
||||
w := &bytes.Buffer{}
|
||||
fmt.Fprintln(w, "Default:", m.default_)
|
||||
for tag, h := range m.index {
|
||||
fmt.Fprintf(w, " %s: %v\n", tag, h)
|
||||
}
|
||||
return w.String()
|
||||
}
|
||||
|
||||
func (h *matchHeader) String() string {
|
||||
w := &bytes.Buffer{}
|
||||
fmt.Fprint(w, "haveTag: ")
|
||||
for _, h := range h.haveTags {
|
||||
fmt.Fprintf(w, "%v, ", h)
|
||||
}
|
||||
return w.String()
|
||||
}
|
||||
|
||||
func (t haveTag) String() string {
|
||||
return fmt.Sprintf("%v:%d:%v:%v-%v|%v", t.tag, t.index, t.conf, t.maxRegion, t.maxScript, t.altScript)
|
||||
}
|
||||
|
||||
func TestIssue43834(t *testing.T) {
|
||||
matcher := NewMatcher([]Tag{English})
|
||||
|
||||
// ZZ is the largest region code and should not cause overflow.
|
||||
desired, _, err := ParseAcceptLanguage("en-ZZ")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
_, i, _ := matcher.Match(desired...)
|
||||
if i != 0 {
|
||||
t.Errorf("got %v; want 0", i)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBestMatchAlloc(t *testing.T) {
|
||||
m := NewMatcher(makeTagList("en sr nl"))
|
||||
// Go allocates when creating a list of tags from a single tag!
|
||||
list := []Tag{English}
|
||||
avg := testtext.AllocsPerRun(100, func() {
|
||||
m.Match(list...)
|
||||
})
|
||||
if avg > 0 {
|
||||
t.Errorf("got %f; want 0", avg)
|
||||
}
|
||||
}
|
||||
|
||||
var benchHave = []Tag{
|
||||
mk("en"),
|
||||
mk("en-GB"),
|
||||
mk("za"),
|
||||
mk("zh-Hant"),
|
||||
mk("zh-Hans-CN"),
|
||||
mk("zh"),
|
||||
mk("zh-HK"),
|
||||
mk("ar-MK"),
|
||||
mk("en-CA"),
|
||||
mk("fr-CA"),
|
||||
mk("fr-US"),
|
||||
mk("fr-CH"),
|
||||
mk("fr"),
|
||||
mk("lt"),
|
||||
mk("lv"),
|
||||
mk("iw"),
|
||||
mk("iw-NL"),
|
||||
mk("he"),
|
||||
mk("he-IT"),
|
||||
mk("tlh"),
|
||||
mk("ja"),
|
||||
mk("ja-Jpan"),
|
||||
mk("ja-Jpan-JP"),
|
||||
mk("de"),
|
||||
mk("de-CH"),
|
||||
mk("de-AT"),
|
||||
mk("de-DE"),
|
||||
mk("sr"),
|
||||
mk("sr-Latn"),
|
||||
mk("sr-Cyrl"),
|
||||
mk("sr-ME"),
|
||||
}
|
||||
|
||||
var benchWant = [][]Tag{
|
||||
[]Tag{
|
||||
mk("en"),
|
||||
},
|
||||
[]Tag{
|
||||
mk("en-AU"),
|
||||
mk("de-HK"),
|
||||
mk("nl"),
|
||||
mk("fy"),
|
||||
mk("lv"),
|
||||
},
|
||||
[]Tag{
|
||||
mk("en-AU"),
|
||||
mk("de-HK"),
|
||||
mk("nl"),
|
||||
mk("fy"),
|
||||
},
|
||||
[]Tag{
|
||||
mk("ja-Hant"),
|
||||
mk("da-HK"),
|
||||
mk("nl"),
|
||||
mk("zh-TW"),
|
||||
},
|
||||
[]Tag{
|
||||
mk("ja-Hant"),
|
||||
mk("da-HK"),
|
||||
mk("nl"),
|
||||
mk("hr"),
|
||||
},
|
||||
}
|
||||
|
||||
func BenchmarkMatch(b *testing.B) {
|
||||
m := newMatcher(benchHave, nil)
|
||||
for i := 0; i < b.N; i++ {
|
||||
for _, want := range benchWant {
|
||||
m.getBest(want...)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkMatchExact(b *testing.B) {
|
||||
want := mk("en")
|
||||
m := newMatcher(benchHave, nil)
|
||||
for i := 0; i < b.N; i++ {
|
||||
m.getBest(want)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkMatchAltLanguagePresent(b *testing.B) {
|
||||
want := mk("hr")
|
||||
m := newMatcher(benchHave, nil)
|
||||
for i := 0; i < b.N; i++ {
|
||||
m.getBest(want)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkMatchAltLanguageNotPresent(b *testing.B) {
|
||||
want := mk("nn")
|
||||
m := newMatcher(benchHave, nil)
|
||||
for i := 0; i < b.N; i++ {
|
||||
m.getBest(want)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkMatchAltScriptPresent(b *testing.B) {
|
||||
want := mk("zh-Hant-CN")
|
||||
m := newMatcher(benchHave, nil)
|
||||
for i := 0; i < b.N; i++ {
|
||||
m.getBest(want)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkMatchAltScriptNotPresent(b *testing.B) {
|
||||
want := mk("fr-Cyrl")
|
||||
m := newMatcher(benchHave, nil)
|
||||
for i := 0; i < b.N; i++ {
|
||||
m.getBest(want)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkMatchLimitedExact(b *testing.B) {
|
||||
want := []Tag{mk("he-NL"), mk("iw-NL")}
|
||||
m := newMatcher(benchHave, nil)
|
||||
for i := 0; i < b.N; i++ {
|
||||
m.getBest(want...)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,256 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
)
|
||||
|
||||
// ValueError is returned by any of the parsing functions when the
|
||||
// input is well-formed but the respective subtag is not recognized
|
||||
// as a valid value.
|
||||
type ValueError interface {
|
||||
error
|
||||
|
||||
// Subtag returns the subtag for which the error occurred.
|
||||
Subtag() string
|
||||
}
|
||||
|
||||
// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
|
||||
// failed it returns an error and any part of the tag that could be parsed.
|
||||
// If parsing succeeded but an unknown value was found, it returns
|
||||
// ValueError. The Tag returned in this case is just stripped of the unknown
|
||||
// value. All other values are preserved. It accepts tags in the BCP 47 format
|
||||
// and extensions to this standard defined in
|
||||
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
// The resulting tag is canonicalized using the default canonicalization type.
|
||||
func Parse(s string) (t Tag, err error) {
|
||||
return Default.Parse(s)
|
||||
}
|
||||
|
||||
// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
|
||||
// failed it returns an error and any part of the tag that could be parsed.
|
||||
// If parsing succeeded but an unknown value was found, it returns
|
||||
// ValueError. The Tag returned in this case is just stripped of the unknown
|
||||
// value. All other values are preserved. It accepts tags in the BCP 47 format
|
||||
// and extensions to this standard defined in
|
||||
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
// The resulting tag is canonicalized using the canonicalization type c.
|
||||
func (c CanonType) Parse(s string) (t Tag, err error) {
|
||||
defer func() {
|
||||
if recover() != nil {
|
||||
t = Tag{}
|
||||
err = language.ErrSyntax
|
||||
}
|
||||
}()
|
||||
|
||||
tt, err := language.Parse(s)
|
||||
if err != nil {
|
||||
return makeTag(tt), err
|
||||
}
|
||||
tt, changed := canonicalize(c, tt)
|
||||
if changed {
|
||||
tt.RemakeString()
|
||||
}
|
||||
return makeTag(tt), err
|
||||
}
|
||||
|
||||
// Compose creates a Tag from individual parts, which may be of type Tag, Base,
|
||||
// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
|
||||
// Base, Script or Region or slice of type Variant or Extension is passed more
|
||||
// than once, the latter will overwrite the former. Variants and Extensions are
|
||||
// accumulated, but if two extensions of the same type are passed, the latter
|
||||
// will replace the former. For -u extensions, though, the key-type pairs are
|
||||
// added, where later values overwrite older ones. A Tag overwrites all former
|
||||
// values and typically only makes sense as the first argument. The resulting
|
||||
// tag is returned after canonicalizing using the Default CanonType. If one or
|
||||
// more errors are encountered, one of the errors is returned.
|
||||
func Compose(part ...interface{}) (t Tag, err error) {
|
||||
return Default.Compose(part...)
|
||||
}
|
||||
|
||||
// Compose creates a Tag from individual parts, which may be of type Tag, Base,
|
||||
// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
|
||||
// Base, Script or Region or slice of type Variant or Extension is passed more
|
||||
// than once, the latter will overwrite the former. Variants and Extensions are
|
||||
// accumulated, but if two extensions of the same type are passed, the latter
|
||||
// will replace the former. For -u extensions, though, the key-type pairs are
|
||||
// added, where later values overwrite older ones. A Tag overwrites all former
|
||||
// values and typically only makes sense as the first argument. The resulting
|
||||
// tag is returned after canonicalizing using CanonType c. If one or more errors
|
||||
// are encountered, one of the errors is returned.
|
||||
func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
|
||||
defer func() {
|
||||
if recover() != nil {
|
||||
t = Tag{}
|
||||
err = language.ErrSyntax
|
||||
}
|
||||
}()
|
||||
|
||||
var b language.Builder
|
||||
if err = update(&b, part...); err != nil {
|
||||
return und, err
|
||||
}
|
||||
b.Tag, _ = canonicalize(c, b.Tag)
|
||||
return makeTag(b.Make()), err
|
||||
}
|
||||
|
||||
var errInvalidArgument = errors.New("invalid Extension or Variant")
|
||||
|
||||
func update(b *language.Builder, part ...interface{}) (err error) {
|
||||
for _, x := range part {
|
||||
switch v := x.(type) {
|
||||
case Tag:
|
||||
b.SetTag(v.tag())
|
||||
case Base:
|
||||
b.Tag.LangID = v.langID
|
||||
case Script:
|
||||
b.Tag.ScriptID = v.scriptID
|
||||
case Region:
|
||||
b.Tag.RegionID = v.regionID
|
||||
case Variant:
|
||||
if v.variant == "" {
|
||||
err = errInvalidArgument
|
||||
break
|
||||
}
|
||||
b.AddVariant(v.variant)
|
||||
case Extension:
|
||||
if v.s == "" {
|
||||
err = errInvalidArgument
|
||||
break
|
||||
}
|
||||
b.SetExt(v.s)
|
||||
case []Variant:
|
||||
b.ClearVariants()
|
||||
for _, v := range v {
|
||||
b.AddVariant(v.variant)
|
||||
}
|
||||
case []Extension:
|
||||
b.ClearExtensions()
|
||||
for _, e := range v {
|
||||
b.SetExt(e.s)
|
||||
}
|
||||
// TODO: support parsing of raw strings based on morphology or just extensions?
|
||||
case error:
|
||||
if v != nil {
|
||||
err = v
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
|
||||
var errTagListTooLarge = errors.New("tag list exceeds max length")
|
||||
|
||||
// ParseAcceptLanguage parses the contents of an Accept-Language header as
|
||||
// defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and
|
||||
// a list of corresponding quality weights. It is more permissive than RFC 2616
|
||||
// and may return non-nil slices even if the input is not valid.
|
||||
// The Tags will be sorted by highest weight first and then by first occurrence.
|
||||
// Tags with a weight of zero will be dropped. An error will be returned if the
|
||||
// input could not be parsed.
|
||||
func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
|
||||
defer func() {
|
||||
if recover() != nil {
|
||||
tag = nil
|
||||
q = nil
|
||||
err = language.ErrSyntax
|
||||
}
|
||||
}()
|
||||
|
||||
if strings.Count(s, "-") > 1000 {
|
||||
return nil, nil, errTagListTooLarge
|
||||
}
|
||||
|
||||
var entry string
|
||||
for s != "" {
|
||||
if entry, s = split(s, ','); entry == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
entry, weight := split(entry, ';')
|
||||
|
||||
// Scan the language.
|
||||
t, err := Parse(entry)
|
||||
if err != nil {
|
||||
id, ok := acceptFallback[entry]
|
||||
if !ok {
|
||||
return nil, nil, err
|
||||
}
|
||||
t = makeTag(language.Tag{LangID: id})
|
||||
}
|
||||
|
||||
// Scan the optional weight.
|
||||
w := 1.0
|
||||
if weight != "" {
|
||||
weight = consume(weight, 'q')
|
||||
weight = consume(weight, '=')
|
||||
// consume returns the empty string when a token could not be
|
||||
// consumed, resulting in an error for ParseFloat.
|
||||
if w, err = strconv.ParseFloat(weight, 32); err != nil {
|
||||
return nil, nil, errInvalidWeight
|
||||
}
|
||||
// Drop tags with a quality weight of 0.
|
||||
if w <= 0 {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
tag = append(tag, t)
|
||||
q = append(q, float32(w))
|
||||
}
|
||||
sort.Stable(&tagSort{tag, q})
|
||||
return tag, q, nil
|
||||
}
|
||||
|
||||
// consume removes a leading token c from s and returns the result or the empty
|
||||
// string if there is no such token.
|
||||
func consume(s string, c byte) string {
|
||||
if s == "" || s[0] != c {
|
||||
return ""
|
||||
}
|
||||
return strings.TrimSpace(s[1:])
|
||||
}
|
||||
|
||||
func split(s string, c byte) (head, tail string) {
|
||||
if i := strings.IndexByte(s, c); i >= 0 {
|
||||
return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:])
|
||||
}
|
||||
return strings.TrimSpace(s), ""
|
||||
}
|
||||
|
||||
// Add hack mapping to deal with a small number of cases that occur
|
||||
// in Accept-Language (with reasonable frequency).
|
||||
var acceptFallback = map[string]language.Language{
|
||||
"english": _en,
|
||||
"deutsch": _de,
|
||||
"italian": _it,
|
||||
"french": _fr,
|
||||
"*": _mul, // defined in the spec to match all languages.
|
||||
}
|
||||
|
||||
type tagSort struct {
|
||||
tag []Tag
|
||||
q []float32
|
||||
}
|
||||
|
||||
func (s *tagSort) Len() int {
|
||||
return len(s.q)
|
||||
}
|
||||
|
||||
func (s *tagSort) Less(i, j int) bool {
|
||||
return s.q[i] > s.q[j]
|
||||
}
|
||||
|
||||
func (s *tagSort) Swap(i, j int) {
|
||||
s.tag[i], s.tag[j] = s.tag[j], s.tag[i]
|
||||
s.q[i], s.q[j] = s.q[j], s.q[i]
|
||||
}
|
||||
@@ -0,0 +1,409 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
)
|
||||
|
||||
// equalTags compares language, script and region subtags only.
|
||||
func (t Tag) equalTags(a Tag) bool {
|
||||
return t.lang() == a.lang() &&
|
||||
t.script() == a.script() &&
|
||||
t.region() == a.region()
|
||||
}
|
||||
|
||||
var errSyntax = language.ErrSyntax
|
||||
|
||||
type parseTest struct {
|
||||
i int // the index of this test
|
||||
in string
|
||||
lang, script, region string
|
||||
variants, ext string
|
||||
extList []string // only used when more than one extension is present
|
||||
invalid bool
|
||||
rewrite bool // special rewrite not handled by parseTag
|
||||
changed bool // string needed to be reformatted
|
||||
}
|
||||
|
||||
func parseTests() []parseTest {
|
||||
tests := []parseTest{
|
||||
{in: "root", lang: "und"},
|
||||
{in: "und", lang: "und"},
|
||||
{in: "en", lang: "en"},
|
||||
|
||||
{in: "en-US-u-va-posix", lang: "en", region: "US", ext: "u-va-posix"},
|
||||
{in: "ca-ES-valencia", lang: "ca", region: "ES", variants: "valencia"},
|
||||
{in: "en-US-u-rg-gbzzzz", lang: "en", region: "US", ext: "u-rg-gbzzzz"},
|
||||
|
||||
{in: "xy", lang: "und", invalid: true},
|
||||
{in: "en-ZY", lang: "en", invalid: true},
|
||||
{in: "gsw", lang: "gsw"},
|
||||
{in: "sr_Latn", lang: "sr", script: "Latn"},
|
||||
{in: "af-Arab", lang: "af", script: "Arab"},
|
||||
{in: "nl-BE", lang: "nl", region: "BE"},
|
||||
{in: "es-419", lang: "es", region: "419"},
|
||||
{in: "und-001", lang: "und", region: "001"},
|
||||
{in: "de-latn-be", lang: "de", script: "Latn", region: "BE"},
|
||||
// Variants
|
||||
{in: "de-1901", lang: "de", variants: "1901"},
|
||||
// Accept with unsuppressed script.
|
||||
{in: "de-Latn-1901", lang: "de", script: "Latn", variants: "1901"},
|
||||
// Specialized.
|
||||
{in: "sl-rozaj", lang: "sl", variants: "rozaj"},
|
||||
{in: "sl-rozaj-lipaw", lang: "sl", variants: "rozaj-lipaw"},
|
||||
{in: "sl-rozaj-biske", lang: "sl", variants: "rozaj-biske"},
|
||||
{in: "sl-rozaj-biske-1994", lang: "sl", variants: "rozaj-biske-1994"},
|
||||
{in: "sl-rozaj-1994", lang: "sl", variants: "rozaj-1994"},
|
||||
// Maximum number of variants while adhering to prefix rules.
|
||||
{in: "sl-rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp"},
|
||||
|
||||
// Sorting.
|
||||
{in: "sl-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true},
|
||||
{in: "sl-rozaj-biske-1994-alalc97-fonupa-fonipa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", changed: true},
|
||||
{in: "nl-fonxsamp-alalc97-fonipa-fonupa", lang: "nl", variants: "alalc97-fonipa-fonupa-fonxsamp", changed: true},
|
||||
|
||||
// Duplicates variants are removed, but not an error.
|
||||
{in: "nl-fonupa-fonupa", lang: "nl", variants: "fonupa"},
|
||||
|
||||
// Variants that do not have correct prefixes. We still accept these.
|
||||
{in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"},
|
||||
{in: "sl-rozaj-lipaw-1994", lang: "sl", variants: "rozaj-lipaw-1994"},
|
||||
{in: "sl-1994-biske-rozaj-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true},
|
||||
{in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"},
|
||||
|
||||
// Invalid variant.
|
||||
{in: "de-1902", lang: "de", variants: "", invalid: true},
|
||||
|
||||
{in: "EN_CYRL", lang: "en", script: "Cyrl"},
|
||||
// private use and extensions
|
||||
{in: "x-a-b-c-d", ext: "x-a-b-c-d"},
|
||||
{in: "x_A.-B-C_D", ext: "x-b-c-d", invalid: true, changed: true},
|
||||
{in: "x-aa-bbbb-cccccccc-d", ext: "x-aa-bbbb-cccccccc-d"},
|
||||
{in: "en-c_cc-b-bbb-a-aaa", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc"}},
|
||||
{in: "en-x_cc-b-bbb-a-aaa", lang: "en", ext: "x-cc-b-bbb-a-aaa", changed: true},
|
||||
{in: "en-c_cc-b-bbb-a-aaa-x-x", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc", "x-x"}},
|
||||
{in: "en-v-c", lang: "en", ext: "", invalid: true},
|
||||
{in: "en-v-abcdefghi", lang: "en", ext: "", invalid: true},
|
||||
{in: "en-v-abc-x", lang: "en", ext: "v-abc", invalid: true},
|
||||
{in: "en-v-abc-x-", lang: "en", ext: "v-abc", invalid: true},
|
||||
{in: "en-v-abc-w-x-xx", lang: "en", extList: []string{"v-abc", "x-xx"}, invalid: true, changed: true},
|
||||
{in: "en-v-abc-w-y-yx", lang: "en", extList: []string{"v-abc", "y-yx"}, invalid: true, changed: true},
|
||||
{in: "en-v-c-abc", lang: "en", ext: "c-abc", invalid: true, changed: true},
|
||||
{in: "en-v-w-abc", lang: "en", ext: "w-abc", invalid: true, changed: true},
|
||||
{in: "en-v-x-abc", lang: "en", ext: "x-abc", invalid: true, changed: true},
|
||||
{in: "en-v-x-a", lang: "en", ext: "x-a", invalid: true, changed: true},
|
||||
{in: "en-9-aa-0-aa-z-bb-x-a", lang: "en", extList: []string{"0-aa", "9-aa", "z-bb", "x-a"}, changed: true},
|
||||
{in: "en-u-c", lang: "en", ext: "", invalid: true},
|
||||
{in: "en-u-co-phonebk", lang: "en", ext: "u-co-phonebk"},
|
||||
{in: "en-u-co-phonebk-ca", lang: "en", ext: "u-ca-co-phonebk", invalid: true},
|
||||
{in: "en-u-nu-arabic-co-phonebk-ca", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true},
|
||||
{in: "en-u-nu-arabic-co-phonebk-ca-x", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true},
|
||||
{in: "en-u-nu-arabic-co-phonebk-ca-s", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true},
|
||||
{in: "en-u-nu-arabic-co-phonebk-ca-a12345678", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true},
|
||||
{in: "en-u-co-phonebook", lang: "en", ext: "u-co", invalid: true},
|
||||
{in: "en-u-co-phonebook-cu-xau", lang: "en", ext: "u-co-cu-xau", invalid: true, changed: true},
|
||||
{in: "en-Cyrl-u-co-phonebk", lang: "en", script: "Cyrl", ext: "u-co-phonebk"},
|
||||
{in: "en-US-u-co-phonebk", lang: "en", region: "US", ext: "u-co-phonebk"},
|
||||
{in: "en-US-u-co-phonebk-cu-xau", lang: "en", region: "US", ext: "u-co-phonebk-cu-xau"},
|
||||
{in: "en-scotland-u-co-phonebk", lang: "en", variants: "scotland", ext: "u-co-phonebk"},
|
||||
{in: "en-u-cu-xua-co-phonebk", lang: "en", ext: "u-co-phonebk-cu-xua", changed: true},
|
||||
{in: "en-u-def-abc-cu-xua-co-phonebk", lang: "en", ext: "u-abc-def-co-phonebk-cu-xua", changed: true},
|
||||
{in: "en-u-def-abc", lang: "en", ext: "u-abc-def", changed: true},
|
||||
{in: "en-u-cu-xua-co-phonebk-a-cd", lang: "en", extList: []string{"a-cd", "u-co-phonebk-cu-xua"}, changed: true},
|
||||
// Invalid "u" extension. Drop invalid parts.
|
||||
{in: "en-u-cu-co-phonebk", lang: "en", extList: []string{"u-co-phonebk-cu"}, invalid: true, changed: true},
|
||||
{in: "en-u-cu-xau-co", lang: "en", extList: []string{"u-co-cu-xau"}, invalid: true},
|
||||
// We allow duplicate keys as the LDML spec does not explicitly prohibit it.
|
||||
// TODO: Consider eliminating duplicates and returning an error.
|
||||
{in: "en-u-cu-xau-co-phonebk-cu-xau", lang: "en", ext: "u-co-phonebk-cu-xau", changed: true},
|
||||
{in: "en-t-en-Cyrl-NL-fonipa", lang: "en", ext: "t-en-cyrl-nl-fonipa", changed: true},
|
||||
{in: "en-t-en-Cyrl-NL-fonipa-t0-abc-def", lang: "en", ext: "t-en-cyrl-nl-fonipa-t0-abc-def", changed: true},
|
||||
{in: "en-t-t0-abcd", lang: "en", ext: "t-t0-abcd"},
|
||||
// Not necessary to have changed here.
|
||||
{in: "en-t-nl-abcd", lang: "en", ext: "t-nl", invalid: true},
|
||||
{in: "en-t-nl-latn", lang: "en", ext: "t-nl-latn"},
|
||||
{in: "en-t-t0-abcd-x-a", lang: "en", extList: []string{"t-t0-abcd", "x-a"}},
|
||||
{in: "en_t_pt_MLt", lang: "en", ext: "t-pt-mlt", changed: true},
|
||||
{in: "en-t-fr-est", lang: "en", ext: "t-fr-est", changed: false},
|
||||
{in: "fr-est", lang: "et", changed: true},
|
||||
{in: "fr-est-t-fr-est", lang: "et", ext: "t-fr-est", changed: true},
|
||||
{in: "fr-est-Cyrl", lang: "et", script: "Cyrl", changed: true},
|
||||
// invalid
|
||||
{in: "", lang: "und", invalid: true},
|
||||
{in: "-", lang: "und", invalid: true},
|
||||
{in: "x", lang: "und", invalid: true},
|
||||
{in: "x-", lang: "und", invalid: true},
|
||||
{in: "x--", lang: "und", invalid: true},
|
||||
{in: "a-a-b-c-d", lang: "und", invalid: true},
|
||||
{in: "en-", lang: "en", invalid: true},
|
||||
{in: "enne-", lang: "und", invalid: true},
|
||||
{in: "en.", lang: "und", invalid: true},
|
||||
{in: "en.-latn", lang: "und", invalid: true},
|
||||
{in: "en.-en", lang: "en", invalid: true},
|
||||
{in: "x-a-tooManyChars-c-d", ext: "x-a-c-d", invalid: true, changed: true},
|
||||
{in: "a-tooManyChars-c-d", lang: "und", invalid: true},
|
||||
// TODO: check key-value validity
|
||||
// { in: "en-u-cu-xd", lang: "en", ext: "u-cu-xd", invalid: true },
|
||||
{in: "en-t-abcd", lang: "en", invalid: true},
|
||||
{in: "en-Latn-US-en", lang: "en", script: "Latn", region: "US", invalid: true},
|
||||
// rewrites (more tests in TestGrandfathered)
|
||||
{in: "zh-min-nan", lang: "nan"},
|
||||
{in: "zh-yue", lang: "yue"},
|
||||
{in: "zh-xiang", lang: "hsn", rewrite: true},
|
||||
{in: "zh-guoyu", lang: "cmn", rewrite: true},
|
||||
{in: "iw", lang: "iw"},
|
||||
{in: "sgn-BE-FR", lang: "sfb", rewrite: true},
|
||||
{in: "i-klingon", lang: "tlh", rewrite: true},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
tests[i].i = i
|
||||
if tt.extList != nil {
|
||||
tests[i].ext = strings.Join(tt.extList, "-")
|
||||
}
|
||||
if tt.ext != "" && tt.extList == nil {
|
||||
tests[i].extList = []string{tt.ext}
|
||||
}
|
||||
}
|
||||
return tests
|
||||
}
|
||||
|
||||
// partChecks runs checks for each part by calling the function returned by f.
|
||||
func partChecks(t *testing.T, f func(*parseTest) (Tag, bool)) {
|
||||
for i, tt := range parseTests() {
|
||||
tag, skip := f(&tt)
|
||||
if skip {
|
||||
continue
|
||||
}
|
||||
if l, _ := language.ParseBase(tt.lang); l != tag.lang() {
|
||||
t.Errorf("%d: lang was %q; want %q", i, tag.lang(), l)
|
||||
}
|
||||
if sc, _ := language.ParseScript(tt.script); sc != tag.script() {
|
||||
t.Errorf("%d: script was %q; want %q", i, tag.script(), sc)
|
||||
}
|
||||
if r, _ := language.ParseRegion(tt.region); r != tag.region() {
|
||||
t.Errorf("%d: region was %q; want %q", i, tag.region(), r)
|
||||
}
|
||||
v := tag.tag().Variants()
|
||||
if v != "" {
|
||||
v = v[1:]
|
||||
}
|
||||
if v != tt.variants {
|
||||
t.Errorf("%d: variants was %q; want %q", i, v, tt.variants)
|
||||
}
|
||||
if e := strings.Join(tag.tag().Extensions(), "-"); e != tt.ext {
|
||||
t.Errorf("%d: extensions were %q; want %q", i, e, tt.ext)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParse(t *testing.T) {
|
||||
partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
|
||||
id, _ = Raw.Parse(tt.in)
|
||||
return id, false
|
||||
})
|
||||
}
|
||||
|
||||
func TestErrors(t *testing.T) {
|
||||
mkInvalid := func(s string) error {
|
||||
return language.NewValueError([]byte(s))
|
||||
}
|
||||
tests := []struct {
|
||||
in string
|
||||
out error
|
||||
}{
|
||||
// invalid subtags.
|
||||
{"ac", mkInvalid("ac")},
|
||||
{"AC", mkInvalid("ac")},
|
||||
{"aa-Uuuu", mkInvalid("Uuuu")},
|
||||
{"aa-AB", mkInvalid("AB")},
|
||||
// ill-formed wins over invalid.
|
||||
{"ac-u", errSyntax},
|
||||
{"ac-u-ca", mkInvalid("ac")},
|
||||
{"ac-u-ca-co-pinyin", mkInvalid("ac")},
|
||||
{"noob", errSyntax},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
_, err := Parse(tt.in)
|
||||
if err != tt.out {
|
||||
t.Errorf("%s: was %q; want %q", tt.in, err, tt.out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestCompose1(t *testing.T) {
|
||||
partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
|
||||
l, _ := ParseBase(tt.lang)
|
||||
s, _ := ParseScript(tt.script)
|
||||
r, _ := ParseRegion(tt.region)
|
||||
v := []Variant{}
|
||||
for _, x := range strings.Split(tt.variants, "-") {
|
||||
p, _ := ParseVariant(x)
|
||||
v = append(v, p)
|
||||
}
|
||||
e := []Extension{}
|
||||
for _, x := range tt.extList {
|
||||
p, _ := ParseExtension(x)
|
||||
e = append(e, p)
|
||||
}
|
||||
id, _ = Raw.Compose(l, s, r, v, e)
|
||||
return id, false
|
||||
})
|
||||
}
|
||||
|
||||
func TestCompose2(t *testing.T) {
|
||||
partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
|
||||
l, _ := ParseBase(tt.lang)
|
||||
s, _ := ParseScript(tt.script)
|
||||
r, _ := ParseRegion(tt.region)
|
||||
p := []interface{}{l, s, r, s, r, l}
|
||||
for _, x := range strings.Split(tt.variants, "-") {
|
||||
if x != "" {
|
||||
v, _ := ParseVariant(x)
|
||||
p = append(p, v)
|
||||
}
|
||||
}
|
||||
for _, x := range tt.extList {
|
||||
e, _ := ParseExtension(x)
|
||||
p = append(p, e)
|
||||
}
|
||||
id, _ = Raw.Compose(p...)
|
||||
return id, false
|
||||
})
|
||||
}
|
||||
|
||||
func TestCompose3(t *testing.T) {
|
||||
partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
|
||||
id, _ = Raw.Parse(tt.in)
|
||||
id, _ = Raw.Compose(id)
|
||||
return id, false
|
||||
})
|
||||
}
|
||||
|
||||
func mk(s string) Tag {
|
||||
return Raw.Make(s)
|
||||
}
|
||||
|
||||
func TestParseAcceptLanguage(t *testing.T) {
|
||||
type res struct {
|
||||
t Tag
|
||||
q float32
|
||||
}
|
||||
en := []res{{mk("en"), 1.0}}
|
||||
tests := []struct {
|
||||
out []res
|
||||
in string
|
||||
ok bool
|
||||
}{
|
||||
{en, "en", true},
|
||||
{en, " en", true},
|
||||
{en, "en ", true},
|
||||
{en, " en ", true},
|
||||
{en, "en,", true},
|
||||
{en, ",en", true},
|
||||
{en, ",,,en,,,", true},
|
||||
{en, ",en;q=1", true},
|
||||
|
||||
// We allow an empty input, contrary to spec.
|
||||
{nil, "", true},
|
||||
{[]res{{mk("aa"), 1}}, "aa;", true}, // allow unspecified weight
|
||||
|
||||
// errors
|
||||
{nil, ";", false},
|
||||
{nil, "$", false},
|
||||
{nil, "e;", false},
|
||||
{nil, "x;", false},
|
||||
{nil, "x", false},
|
||||
{nil, "ac", false}, // non-existing language
|
||||
{nil, "aa;q", false},
|
||||
{nil, "aa;q=", false},
|
||||
{nil, "aa;q=.", false},
|
||||
{nil, "00-t-0o", false},
|
||||
|
||||
// odd fallbacks
|
||||
{
|
||||
[]res{{mk("en"), 0.1}},
|
||||
" english ;q=.1",
|
||||
true,
|
||||
},
|
||||
{
|
||||
[]res{{mk("it"), 1.0}, {mk("de"), 1.0}, {mk("fr"), 1.0}},
|
||||
" italian, deutsch, french",
|
||||
true,
|
||||
},
|
||||
|
||||
// lists
|
||||
{
|
||||
[]res{{mk("en"), 0.1}},
|
||||
"en;q=.1",
|
||||
true,
|
||||
},
|
||||
{
|
||||
[]res{{mk("mul"), 1.0}},
|
||||
"*",
|
||||
true,
|
||||
},
|
||||
{
|
||||
[]res{{mk("en"), 1.0}, {mk("de"), 1.0}},
|
||||
"en,de",
|
||||
true,
|
||||
},
|
||||
{
|
||||
[]res{{mk("en"), 1.0}, {mk("de"), .5}},
|
||||
"en,de;q=0.5",
|
||||
true,
|
||||
},
|
||||
{
|
||||
[]res{{mk("de"), 0.8}, {mk("en"), 0.5}},
|
||||
" en ; q = 0.5 , , de;q=0.8",
|
||||
true,
|
||||
},
|
||||
{
|
||||
[]res{{mk("en"), 1.0}, {mk("de"), 1.0}, {mk("fr"), 1.0}, {mk("tlh"), 1.0}},
|
||||
"en,de,fr,i-klingon",
|
||||
true,
|
||||
},
|
||||
// sorting
|
||||
{
|
||||
[]res{{mk("tlh"), 0.4}, {mk("de"), 0.2}, {mk("fr"), 0.2}, {mk("en"), 0.1}},
|
||||
"en;q=0.1,de;q=0.2,fr;q=0.2,i-klingon;q=0.4",
|
||||
true,
|
||||
},
|
||||
// dropping
|
||||
{
|
||||
[]res{{mk("fr"), 0.2}, {mk("en"), 0.1}},
|
||||
"en;q=0.1,de;q=0,fr;q=0.2,i-klingon;q=0.0",
|
||||
true,
|
||||
},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
tags, qs, e := ParseAcceptLanguage(tt.in)
|
||||
if e == nil != tt.ok {
|
||||
t.Errorf("%d:%s:err: was %v; want %v", i, tt.in, e == nil, tt.ok)
|
||||
}
|
||||
for j, tag := range tags {
|
||||
if out := tt.out[j]; !tag.equalTags(out.t) || qs[j] != out.q {
|
||||
t.Errorf("%d:%s: was %s, %1f; want %s, %1f", i, tt.in, tag, qs[j], out.t, out.q)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAcceptLanguageTooBig(t *testing.T) {
|
||||
s := strings.Repeat("en-x-a-", 333)
|
||||
_, _, err := ParseAcceptLanguage(s)
|
||||
if err != language.ErrSyntax {
|
||||
t.Errorf("ParseAcceptLanguage() unexpected error: got %v, want %v", err, language.ErrSyntax)
|
||||
}
|
||||
s += "en-x-a"
|
||||
_, _, err = ParseAcceptLanguage(s)
|
||||
if err != errTagListTooLarge {
|
||||
t.Errorf("ParseAcceptLanguage() unexpected error: got %v, want %v", err, errTagListTooLarge)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,298 @@
|
||||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package language
|
||||
|
||||
// CLDRVersion is the CLDR version from which the tables in this package are derived.
|
||||
const CLDRVersion = "32"
|
||||
|
||||
const (
|
||||
_de = 269
|
||||
_en = 313
|
||||
_fr = 350
|
||||
_it = 505
|
||||
_mo = 784
|
||||
_no = 879
|
||||
_nb = 839
|
||||
_pt = 960
|
||||
_sh = 1031
|
||||
_mul = 806
|
||||
_und = 0
|
||||
)
|
||||
const (
|
||||
_001 = 1
|
||||
_419 = 31
|
||||
_BR = 65
|
||||
_CA = 73
|
||||
_ES = 111
|
||||
_GB = 124
|
||||
_MD = 189
|
||||
_PT = 239
|
||||
_UK = 307
|
||||
_US = 310
|
||||
_ZZ = 358
|
||||
_XA = 324
|
||||
_XC = 326
|
||||
_XK = 334
|
||||
)
|
||||
const (
|
||||
_Latn = 91
|
||||
_Hani = 57
|
||||
_Hans = 59
|
||||
_Hant = 60
|
||||
_Qaaa = 149
|
||||
_Qaai = 157
|
||||
_Qabx = 198
|
||||
_Zinh = 255
|
||||
_Zyyy = 260
|
||||
_Zzzz = 261
|
||||
)
|
||||
|
||||
var regionToGroups = []uint8{ // 359 elements
|
||||
// Entry 0 - 3F
|
||||
0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x00, 0x04,
|
||||
0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x00,
|
||||
0x00, 0x04, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x04,
|
||||
// Entry 40 - 7F
|
||||
0x04, 0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x04, 0x00, 0x00, 0x04, 0x00, 0x00, 0x04,
|
||||
0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x00,
|
||||
0x08, 0x00, 0x04, 0x00, 0x00, 0x08, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04,
|
||||
// Entry 80 - BF
|
||||
0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x04, 0x00,
|
||||
0x00, 0x00, 0x04, 0x01, 0x00, 0x04, 0x02, 0x00,
|
||||
0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00,
|
||||
0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x08, 0x08, 0x00, 0x00, 0x00, 0x04,
|
||||
// Entry C0 - FF
|
||||
0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
|
||||
0x01, 0x04, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x04, 0x00, 0x05, 0x00, 0x00,
|
||||
0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
// Entry 100 - 13F
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04,
|
||||
0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x05, 0x04,
|
||||
0x00, 0x00, 0x04, 0x00, 0x04, 0x04, 0x05, 0x00,
|
||||
// Entry 140 - 17F
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
} // Size: 383 bytes
|
||||
|
||||
var paradigmLocales = [][3]uint16{ // 3 elements
|
||||
0: [3]uint16{0x139, 0x0, 0x7c},
|
||||
1: [3]uint16{0x13e, 0x0, 0x1f},
|
||||
2: [3]uint16{0x3c0, 0x41, 0xef},
|
||||
} // Size: 42 bytes
|
||||
|
||||
type mutualIntelligibility struct {
|
||||
want uint16
|
||||
have uint16
|
||||
distance uint8
|
||||
oneway bool
|
||||
}
|
||||
type scriptIntelligibility struct {
|
||||
wantLang uint16
|
||||
haveLang uint16
|
||||
wantScript uint8
|
||||
haveScript uint8
|
||||
distance uint8
|
||||
}
|
||||
type regionIntelligibility struct {
|
||||
lang uint16
|
||||
script uint8
|
||||
group uint8
|
||||
distance uint8
|
||||
}
|
||||
|
||||
// matchLang holds pairs of langIDs of base languages that are typically
|
||||
// mutually intelligible. Each pair is associated with a confidence and
|
||||
// whether the intelligibility goes one or both ways.
|
||||
var matchLang = []mutualIntelligibility{ // 113 elements
|
||||
0: {want: 0x1d1, have: 0xb7, distance: 0x4, oneway: false},
|
||||
1: {want: 0x407, have: 0xb7, distance: 0x4, oneway: false},
|
||||
2: {want: 0x407, have: 0x1d1, distance: 0x4, oneway: false},
|
||||
3: {want: 0x407, have: 0x432, distance: 0x4, oneway: false},
|
||||
4: {want: 0x43a, have: 0x1, distance: 0x4, oneway: false},
|
||||
5: {want: 0x1a3, have: 0x10d, distance: 0x4, oneway: true},
|
||||
6: {want: 0x295, have: 0x10d, distance: 0x4, oneway: true},
|
||||
7: {want: 0x101, have: 0x36f, distance: 0x8, oneway: false},
|
||||
8: {want: 0x101, have: 0x347, distance: 0x8, oneway: false},
|
||||
9: {want: 0x5, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
10: {want: 0xd, have: 0x139, distance: 0xa, oneway: true},
|
||||
11: {want: 0x16, have: 0x367, distance: 0xa, oneway: true},
|
||||
12: {want: 0x21, have: 0x139, distance: 0xa, oneway: true},
|
||||
13: {want: 0x56, have: 0x13e, distance: 0xa, oneway: true},
|
||||
14: {want: 0x58, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
15: {want: 0x71, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
16: {want: 0x75, have: 0x139, distance: 0xa, oneway: true},
|
||||
17: {want: 0x82, have: 0x1be, distance: 0xa, oneway: true},
|
||||
18: {want: 0xa5, have: 0x139, distance: 0xa, oneway: true},
|
||||
19: {want: 0xb2, have: 0x15e, distance: 0xa, oneway: true},
|
||||
20: {want: 0xdd, have: 0x153, distance: 0xa, oneway: true},
|
||||
21: {want: 0xe5, have: 0x139, distance: 0xa, oneway: true},
|
||||
22: {want: 0xe9, have: 0x3a, distance: 0xa, oneway: true},
|
||||
23: {want: 0xf0, have: 0x15e, distance: 0xa, oneway: true},
|
||||
24: {want: 0xf9, have: 0x15e, distance: 0xa, oneway: true},
|
||||
25: {want: 0x100, have: 0x139, distance: 0xa, oneway: true},
|
||||
26: {want: 0x130, have: 0x139, distance: 0xa, oneway: true},
|
||||
27: {want: 0x13c, have: 0x139, distance: 0xa, oneway: true},
|
||||
28: {want: 0x140, have: 0x151, distance: 0xa, oneway: true},
|
||||
29: {want: 0x145, have: 0x13e, distance: 0xa, oneway: true},
|
||||
30: {want: 0x158, have: 0x101, distance: 0xa, oneway: true},
|
||||
31: {want: 0x16d, have: 0x367, distance: 0xa, oneway: true},
|
||||
32: {want: 0x16e, have: 0x139, distance: 0xa, oneway: true},
|
||||
33: {want: 0x16f, have: 0x139, distance: 0xa, oneway: true},
|
||||
34: {want: 0x17e, have: 0x139, distance: 0xa, oneway: true},
|
||||
35: {want: 0x190, have: 0x13e, distance: 0xa, oneway: true},
|
||||
36: {want: 0x194, have: 0x13e, distance: 0xa, oneway: true},
|
||||
37: {want: 0x1a4, have: 0x1be, distance: 0xa, oneway: true},
|
||||
38: {want: 0x1b4, have: 0x139, distance: 0xa, oneway: true},
|
||||
39: {want: 0x1b8, have: 0x139, distance: 0xa, oneway: true},
|
||||
40: {want: 0x1d4, have: 0x15e, distance: 0xa, oneway: true},
|
||||
41: {want: 0x1d7, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
42: {want: 0x1d9, have: 0x139, distance: 0xa, oneway: true},
|
||||
43: {want: 0x1e7, have: 0x139, distance: 0xa, oneway: true},
|
||||
44: {want: 0x1f8, have: 0x139, distance: 0xa, oneway: true},
|
||||
45: {want: 0x20e, have: 0x1e1, distance: 0xa, oneway: true},
|
||||
46: {want: 0x210, have: 0x139, distance: 0xa, oneway: true},
|
||||
47: {want: 0x22d, have: 0x15e, distance: 0xa, oneway: true},
|
||||
48: {want: 0x242, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
49: {want: 0x24a, have: 0x139, distance: 0xa, oneway: true},
|
||||
50: {want: 0x251, have: 0x139, distance: 0xa, oneway: true},
|
||||
51: {want: 0x265, have: 0x139, distance: 0xa, oneway: true},
|
||||
52: {want: 0x274, have: 0x48a, distance: 0xa, oneway: true},
|
||||
53: {want: 0x28a, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
54: {want: 0x28e, have: 0x1f9, distance: 0xa, oneway: true},
|
||||
55: {want: 0x2a3, have: 0x139, distance: 0xa, oneway: true},
|
||||
56: {want: 0x2b5, have: 0x15e, distance: 0xa, oneway: true},
|
||||
57: {want: 0x2b8, have: 0x139, distance: 0xa, oneway: true},
|
||||
58: {want: 0x2be, have: 0x139, distance: 0xa, oneway: true},
|
||||
59: {want: 0x2c3, have: 0x15e, distance: 0xa, oneway: true},
|
||||
60: {want: 0x2ed, have: 0x139, distance: 0xa, oneway: true},
|
||||
61: {want: 0x2f1, have: 0x15e, distance: 0xa, oneway: true},
|
||||
62: {want: 0x2fa, have: 0x139, distance: 0xa, oneway: true},
|
||||
63: {want: 0x2ff, have: 0x7e, distance: 0xa, oneway: true},
|
||||
64: {want: 0x304, have: 0x139, distance: 0xa, oneway: true},
|
||||
65: {want: 0x30b, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
66: {want: 0x31b, have: 0x1be, distance: 0xa, oneway: true},
|
||||
67: {want: 0x31f, have: 0x1e1, distance: 0xa, oneway: true},
|
||||
68: {want: 0x320, have: 0x139, distance: 0xa, oneway: true},
|
||||
69: {want: 0x331, have: 0x139, distance: 0xa, oneway: true},
|
||||
70: {want: 0x351, have: 0x139, distance: 0xa, oneway: true},
|
||||
71: {want: 0x36a, have: 0x347, distance: 0xa, oneway: false},
|
||||
72: {want: 0x36a, have: 0x36f, distance: 0xa, oneway: true},
|
||||
73: {want: 0x37a, have: 0x139, distance: 0xa, oneway: true},
|
||||
74: {want: 0x387, have: 0x139, distance: 0xa, oneway: true},
|
||||
75: {want: 0x389, have: 0x139, distance: 0xa, oneway: true},
|
||||
76: {want: 0x38b, have: 0x15e, distance: 0xa, oneway: true},
|
||||
77: {want: 0x390, have: 0x139, distance: 0xa, oneway: true},
|
||||
78: {want: 0x395, have: 0x139, distance: 0xa, oneway: true},
|
||||
79: {want: 0x39d, have: 0x139, distance: 0xa, oneway: true},
|
||||
80: {want: 0x3a5, have: 0x139, distance: 0xa, oneway: true},
|
||||
81: {want: 0x3be, have: 0x139, distance: 0xa, oneway: true},
|
||||
82: {want: 0x3c4, have: 0x13e, distance: 0xa, oneway: true},
|
||||
83: {want: 0x3d4, have: 0x10d, distance: 0xa, oneway: true},
|
||||
84: {want: 0x3d9, have: 0x139, distance: 0xa, oneway: true},
|
||||
85: {want: 0x3e5, have: 0x15e, distance: 0xa, oneway: true},
|
||||
86: {want: 0x3e9, have: 0x1be, distance: 0xa, oneway: true},
|
||||
87: {want: 0x3fa, have: 0x139, distance: 0xa, oneway: true},
|
||||
88: {want: 0x40c, have: 0x139, distance: 0xa, oneway: true},
|
||||
89: {want: 0x423, have: 0x139, distance: 0xa, oneway: true},
|
||||
90: {want: 0x429, have: 0x139, distance: 0xa, oneway: true},
|
||||
91: {want: 0x431, have: 0x139, distance: 0xa, oneway: true},
|
||||
92: {want: 0x43b, have: 0x139, distance: 0xa, oneway: true},
|
||||
93: {want: 0x43e, have: 0x1e1, distance: 0xa, oneway: true},
|
||||
94: {want: 0x445, have: 0x139, distance: 0xa, oneway: true},
|
||||
95: {want: 0x450, have: 0x139, distance: 0xa, oneway: true},
|
||||
96: {want: 0x461, have: 0x139, distance: 0xa, oneway: true},
|
||||
97: {want: 0x467, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
98: {want: 0x46f, have: 0x139, distance: 0xa, oneway: true},
|
||||
99: {want: 0x476, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
100: {want: 0x3883, have: 0x139, distance: 0xa, oneway: true},
|
||||
101: {want: 0x480, have: 0x139, distance: 0xa, oneway: true},
|
||||
102: {want: 0x482, have: 0x139, distance: 0xa, oneway: true},
|
||||
103: {want: 0x494, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
104: {want: 0x49d, have: 0x139, distance: 0xa, oneway: true},
|
||||
105: {want: 0x4ac, have: 0x529, distance: 0xa, oneway: true},
|
||||
106: {want: 0x4b4, have: 0x139, distance: 0xa, oneway: true},
|
||||
107: {want: 0x4bc, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
108: {want: 0x4e5, have: 0x15e, distance: 0xa, oneway: true},
|
||||
109: {want: 0x4f2, have: 0x139, distance: 0xa, oneway: true},
|
||||
110: {want: 0x512, have: 0x139, distance: 0xa, oneway: true},
|
||||
111: {want: 0x518, have: 0x139, distance: 0xa, oneway: true},
|
||||
112: {want: 0x52f, have: 0x139, distance: 0xa, oneway: true},
|
||||
} // Size: 702 bytes
|
||||
|
||||
// matchScript holds pairs of scriptIDs where readers of one script
|
||||
// can typically also read the other. Each is associated with a confidence.
|
||||
var matchScript = []scriptIntelligibility{ // 26 elements
|
||||
0: {wantLang: 0x432, haveLang: 0x432, wantScript: 0x5b, haveScript: 0x20, distance: 0x5},
|
||||
1: {wantLang: 0x432, haveLang: 0x432, wantScript: 0x20, haveScript: 0x5b, distance: 0x5},
|
||||
2: {wantLang: 0x58, haveLang: 0x3e2, wantScript: 0x5b, haveScript: 0x20, distance: 0xa},
|
||||
3: {wantLang: 0xa5, haveLang: 0x139, wantScript: 0xe, haveScript: 0x5b, distance: 0xa},
|
||||
4: {wantLang: 0x1d7, haveLang: 0x3e2, wantScript: 0x8, haveScript: 0x20, distance: 0xa},
|
||||
5: {wantLang: 0x210, haveLang: 0x139, wantScript: 0x2e, haveScript: 0x5b, distance: 0xa},
|
||||
6: {wantLang: 0x24a, haveLang: 0x139, wantScript: 0x4f, haveScript: 0x5b, distance: 0xa},
|
||||
7: {wantLang: 0x251, haveLang: 0x139, wantScript: 0x53, haveScript: 0x5b, distance: 0xa},
|
||||
8: {wantLang: 0x2b8, haveLang: 0x139, wantScript: 0x58, haveScript: 0x5b, distance: 0xa},
|
||||
9: {wantLang: 0x304, haveLang: 0x139, wantScript: 0x6f, haveScript: 0x5b, distance: 0xa},
|
||||
10: {wantLang: 0x331, haveLang: 0x139, wantScript: 0x76, haveScript: 0x5b, distance: 0xa},
|
||||
11: {wantLang: 0x351, haveLang: 0x139, wantScript: 0x22, haveScript: 0x5b, distance: 0xa},
|
||||
12: {wantLang: 0x395, haveLang: 0x139, wantScript: 0x83, haveScript: 0x5b, distance: 0xa},
|
||||
13: {wantLang: 0x39d, haveLang: 0x139, wantScript: 0x36, haveScript: 0x5b, distance: 0xa},
|
||||
14: {wantLang: 0x3be, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5b, distance: 0xa},
|
||||
15: {wantLang: 0x3fa, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5b, distance: 0xa},
|
||||
16: {wantLang: 0x40c, haveLang: 0x139, wantScript: 0xd6, haveScript: 0x5b, distance: 0xa},
|
||||
17: {wantLang: 0x450, haveLang: 0x139, wantScript: 0xe6, haveScript: 0x5b, distance: 0xa},
|
||||
18: {wantLang: 0x461, haveLang: 0x139, wantScript: 0xe9, haveScript: 0x5b, distance: 0xa},
|
||||
19: {wantLang: 0x46f, haveLang: 0x139, wantScript: 0x2c, haveScript: 0x5b, distance: 0xa},
|
||||
20: {wantLang: 0x476, haveLang: 0x3e2, wantScript: 0x5b, haveScript: 0x20, distance: 0xa},
|
||||
21: {wantLang: 0x4b4, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5b, distance: 0xa},
|
||||
22: {wantLang: 0x4bc, haveLang: 0x3e2, wantScript: 0x5b, haveScript: 0x20, distance: 0xa},
|
||||
23: {wantLang: 0x512, haveLang: 0x139, wantScript: 0x3e, haveScript: 0x5b, distance: 0xa},
|
||||
24: {wantLang: 0x529, haveLang: 0x529, wantScript: 0x3b, haveScript: 0x3c, distance: 0xf},
|
||||
25: {wantLang: 0x529, haveLang: 0x529, wantScript: 0x3c, haveScript: 0x3b, distance: 0x13},
|
||||
} // Size: 232 bytes
|
||||
|
||||
var matchRegion = []regionIntelligibility{ // 15 elements
|
||||
0: {lang: 0x3a, script: 0x0, group: 0x4, distance: 0x4},
|
||||
1: {lang: 0x3a, script: 0x0, group: 0x84, distance: 0x4},
|
||||
2: {lang: 0x139, script: 0x0, group: 0x1, distance: 0x4},
|
||||
3: {lang: 0x139, script: 0x0, group: 0x81, distance: 0x4},
|
||||
4: {lang: 0x13e, script: 0x0, group: 0x3, distance: 0x4},
|
||||
5: {lang: 0x13e, script: 0x0, group: 0x83, distance: 0x4},
|
||||
6: {lang: 0x3c0, script: 0x0, group: 0x3, distance: 0x4},
|
||||
7: {lang: 0x3c0, script: 0x0, group: 0x83, distance: 0x4},
|
||||
8: {lang: 0x529, script: 0x3c, group: 0x2, distance: 0x4},
|
||||
9: {lang: 0x529, script: 0x3c, group: 0x82, distance: 0x4},
|
||||
10: {lang: 0x3a, script: 0x0, group: 0x80, distance: 0x5},
|
||||
11: {lang: 0x139, script: 0x0, group: 0x80, distance: 0x5},
|
||||
12: {lang: 0x13e, script: 0x0, group: 0x80, distance: 0x5},
|
||||
13: {lang: 0x3c0, script: 0x0, group: 0x80, distance: 0x5},
|
||||
14: {lang: 0x529, script: 0x3c, group: 0x80, distance: 0x5},
|
||||
} // Size: 114 bytes
|
||||
|
||||
// Total table size 1473 bytes (1KiB); checksum: 7BB90B5C
|
||||
@@ -0,0 +1,145 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import "golang.org/x/text/internal/language/compact"
|
||||
|
||||
// TODO: Various sets of commonly use tags and regions.
|
||||
|
||||
// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
|
||||
// It simplifies safe initialization of Tag values.
|
||||
func MustParse(s string) Tag {
|
||||
t, err := Parse(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
|
||||
// It simplifies safe initialization of Tag values.
|
||||
func (c CanonType) MustParse(s string) Tag {
|
||||
t, err := c.Parse(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// MustParseBase is like ParseBase, but panics if the given base cannot be parsed.
|
||||
// It simplifies safe initialization of Base values.
|
||||
func MustParseBase(s string) Base {
|
||||
b, err := ParseBase(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// MustParseScript is like ParseScript, but panics if the given script cannot be
|
||||
// parsed. It simplifies safe initialization of Script values.
|
||||
func MustParseScript(s string) Script {
|
||||
scr, err := ParseScript(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return scr
|
||||
}
|
||||
|
||||
// MustParseRegion is like ParseRegion, but panics if the given region cannot be
|
||||
// parsed. It simplifies safe initialization of Region values.
|
||||
func MustParseRegion(s string) Region {
|
||||
r, err := ParseRegion(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
var (
|
||||
und = Tag{}
|
||||
|
||||
Und Tag = Tag{}
|
||||
|
||||
Afrikaans Tag = Tag(compact.Afrikaans)
|
||||
Amharic Tag = Tag(compact.Amharic)
|
||||
Arabic Tag = Tag(compact.Arabic)
|
||||
ModernStandardArabic Tag = Tag(compact.ModernStandardArabic)
|
||||
Azerbaijani Tag = Tag(compact.Azerbaijani)
|
||||
Bulgarian Tag = Tag(compact.Bulgarian)
|
||||
Bengali Tag = Tag(compact.Bengali)
|
||||
Catalan Tag = Tag(compact.Catalan)
|
||||
Czech Tag = Tag(compact.Czech)
|
||||
Danish Tag = Tag(compact.Danish)
|
||||
German Tag = Tag(compact.German)
|
||||
Greek Tag = Tag(compact.Greek)
|
||||
English Tag = Tag(compact.English)
|
||||
AmericanEnglish Tag = Tag(compact.AmericanEnglish)
|
||||
BritishEnglish Tag = Tag(compact.BritishEnglish)
|
||||
Spanish Tag = Tag(compact.Spanish)
|
||||
EuropeanSpanish Tag = Tag(compact.EuropeanSpanish)
|
||||
LatinAmericanSpanish Tag = Tag(compact.LatinAmericanSpanish)
|
||||
Estonian Tag = Tag(compact.Estonian)
|
||||
Persian Tag = Tag(compact.Persian)
|
||||
Finnish Tag = Tag(compact.Finnish)
|
||||
Filipino Tag = Tag(compact.Filipino)
|
||||
French Tag = Tag(compact.French)
|
||||
CanadianFrench Tag = Tag(compact.CanadianFrench)
|
||||
Gujarati Tag = Tag(compact.Gujarati)
|
||||
Hebrew Tag = Tag(compact.Hebrew)
|
||||
Hindi Tag = Tag(compact.Hindi)
|
||||
Croatian Tag = Tag(compact.Croatian)
|
||||
Hungarian Tag = Tag(compact.Hungarian)
|
||||
Armenian Tag = Tag(compact.Armenian)
|
||||
Indonesian Tag = Tag(compact.Indonesian)
|
||||
Icelandic Tag = Tag(compact.Icelandic)
|
||||
Italian Tag = Tag(compact.Italian)
|
||||
Japanese Tag = Tag(compact.Japanese)
|
||||
Georgian Tag = Tag(compact.Georgian)
|
||||
Kazakh Tag = Tag(compact.Kazakh)
|
||||
Khmer Tag = Tag(compact.Khmer)
|
||||
Kannada Tag = Tag(compact.Kannada)
|
||||
Korean Tag = Tag(compact.Korean)
|
||||
Kirghiz Tag = Tag(compact.Kirghiz)
|
||||
Lao Tag = Tag(compact.Lao)
|
||||
Lithuanian Tag = Tag(compact.Lithuanian)
|
||||
Latvian Tag = Tag(compact.Latvian)
|
||||
Macedonian Tag = Tag(compact.Macedonian)
|
||||
Malayalam Tag = Tag(compact.Malayalam)
|
||||
Mongolian Tag = Tag(compact.Mongolian)
|
||||
Marathi Tag = Tag(compact.Marathi)
|
||||
Malay Tag = Tag(compact.Malay)
|
||||
Burmese Tag = Tag(compact.Burmese)
|
||||
Nepali Tag = Tag(compact.Nepali)
|
||||
Dutch Tag = Tag(compact.Dutch)
|
||||
Norwegian Tag = Tag(compact.Norwegian)
|
||||
Punjabi Tag = Tag(compact.Punjabi)
|
||||
Polish Tag = Tag(compact.Polish)
|
||||
Portuguese Tag = Tag(compact.Portuguese)
|
||||
BrazilianPortuguese Tag = Tag(compact.BrazilianPortuguese)
|
||||
EuropeanPortuguese Tag = Tag(compact.EuropeanPortuguese)
|
||||
Romanian Tag = Tag(compact.Romanian)
|
||||
Russian Tag = Tag(compact.Russian)
|
||||
Sinhala Tag = Tag(compact.Sinhala)
|
||||
Slovak Tag = Tag(compact.Slovak)
|
||||
Slovenian Tag = Tag(compact.Slovenian)
|
||||
Albanian Tag = Tag(compact.Albanian)
|
||||
Serbian Tag = Tag(compact.Serbian)
|
||||
SerbianLatin Tag = Tag(compact.SerbianLatin)
|
||||
Swedish Tag = Tag(compact.Swedish)
|
||||
Swahili Tag = Tag(compact.Swahili)
|
||||
Tamil Tag = Tag(compact.Tamil)
|
||||
Telugu Tag = Tag(compact.Telugu)
|
||||
Thai Tag = Tag(compact.Thai)
|
||||
Turkish Tag = Tag(compact.Turkish)
|
||||
Ukrainian Tag = Tag(compact.Ukrainian)
|
||||
Urdu Tag = Tag(compact.Urdu)
|
||||
Uzbek Tag = Tag(compact.Uzbek)
|
||||
Vietnamese Tag = Tag(compact.Vietnamese)
|
||||
Chinese Tag = Tag(compact.Chinese)
|
||||
SimplifiedChinese Tag = Tag(compact.SimplifiedChinese)
|
||||
TraditionalChinese Tag = Tag(compact.TraditionalChinese)
|
||||
Zulu Tag = Tag(compact.Zulu)
|
||||
)
|
||||
+389
@@ -0,0 +1,389 @@
|
||||
# TODO: this file has not yet been included in the main CLDR release.
|
||||
# The intent is to verify this file against the Go implementation and then
|
||||
# correct the cases and add merge in other interesting test cases.
|
||||
# See TestCLDRCompliance in match_test.go, as well as the list of exceptions
|
||||
# defined in the map skip below it, for the work in progress.
|
||||
|
||||
# Data-driven test for the XLocaleMatcher.
|
||||
# Format
|
||||
# • Everything after "#" is a comment
|
||||
# • Arguments are separated by ";". They are:
|
||||
|
||||
# supported ; desired ; expected
|
||||
|
||||
# • The supported may have the threshold distance reset as a first item, eg 50, en, fr
|
||||
# A line starting with @debug will reach a statement in the test code where you can put a breakpoint for debugging
|
||||
# The test code also supports reformatting this file, by setting the REFORMAT flag.
|
||||
|
||||
##################################################
|
||||
# testParentLocales
|
||||
|
||||
# es-419, es-AR, and es-MX are in a cluster; es is in a different one
|
||||
|
||||
es-419, es-ES ; es-AR ; es-419
|
||||
es-ES, es-419 ; es-AR ; es-419
|
||||
|
||||
es-419, es ; es-AR ; es-419
|
||||
es, es-419 ; es-AR ; es-419
|
||||
|
||||
es-MX, es ; es-AR ; es-MX
|
||||
es, es-MX ; es-AR ; es-MX
|
||||
|
||||
# en-GB, en-AU, and en-NZ are in a cluster; en in a different one
|
||||
|
||||
en-GB, en-US ; en-AU ; en-GB
|
||||
en-US, en-GB ; en-AU ; en-GB
|
||||
|
||||
en-GB, en ; en-AU ; en-GB
|
||||
en, en-GB ; en-AU ; en-GB
|
||||
|
||||
en-NZ, en-US ; en-AU ; en-NZ
|
||||
en-US, en-NZ ; en-AU ; en-NZ
|
||||
|
||||
en-NZ, en ; en-AU ; en-NZ
|
||||
en, en-NZ ; en-AU ; en-NZ
|
||||
|
||||
# pt-AU and pt-PT in one cluster; pt-BR in another
|
||||
|
||||
pt-PT, pt-BR ; pt-AO ; pt-PT
|
||||
pt-BR, pt-PT ; pt-AO ; pt-PT
|
||||
|
||||
pt-PT, pt ; pt-AO ; pt-PT
|
||||
pt, pt-PT ; pt-AO ; pt-PT
|
||||
|
||||
zh-MO, zh-TW ; zh-HK ; zh-MO
|
||||
zh-TW, zh-MO ; zh-HK ; zh-MO
|
||||
|
||||
zh-MO, zh-TW ; zh-HK ; zh-MO
|
||||
zh-TW, zh-MO ; zh-HK ; zh-MO
|
||||
|
||||
zh-MO, zh-CN ; zh-HK ; zh-MO
|
||||
zh-CN, zh-MO ; zh-HK ; zh-MO
|
||||
|
||||
zh-MO, zh ; zh-HK ; zh-MO
|
||||
zh, zh-MO ; zh-HK ; zh-MO
|
||||
|
||||
##################################################
|
||||
# testChinese
|
||||
|
||||
zh-CN, zh-TW, iw ; zh-Hant-TW ; zh-TW
|
||||
zh-CN, zh-TW, iw ; zh-Hant ; zh-TW
|
||||
zh-CN, zh-TW, iw ; zh-TW ; zh-TW
|
||||
zh-CN, zh-TW, iw ; zh-Hans-CN ; zh-CN
|
||||
zh-CN, zh-TW, iw ; zh-CN ; zh-CN
|
||||
zh-CN, zh-TW, iw ; zh ; zh-CN
|
||||
|
||||
##################################################
|
||||
# testenGB
|
||||
|
||||
fr, en, en-GB, es-419, es-MX, es ; en-NZ ; en-GB
|
||||
fr, en, en-GB, es-419, es-MX, es ; es-ES ; es
|
||||
fr, en, en-GB, es-419, es-MX, es ; es-AR ; es-419
|
||||
fr, en, en-GB, es-419, es-MX, es ; es-MX ; es-MX
|
||||
|
||||
##################################################
|
||||
# testFallbacks
|
||||
|
||||
91, en, hi ; sa ; hi
|
||||
|
||||
##################################################
|
||||
# testBasics
|
||||
|
||||
fr, en-GB, en ; en-GB ; en-GB
|
||||
fr, en-GB, en ; en ; en
|
||||
fr, en-GB, en ; fr ; fr
|
||||
fr, en-GB, en ; ja ; fr # return first if no match
|
||||
|
||||
##################################################
|
||||
# testFallback
|
||||
|
||||
# check that script fallbacks are handled right
|
||||
|
||||
zh-CN, zh-TW, iw ; zh-Hant ; zh-TW
|
||||
zh-CN, zh-TW, iw ; zh ; zh-CN
|
||||
zh-CN, zh-TW, iw ; zh-Hans-CN ; zh-CN
|
||||
zh-CN, zh-TW, iw ; zh-Hant-HK ; zh-TW
|
||||
zh-CN, zh-TW, iw ; he-IT ; iw
|
||||
|
||||
##################################################
|
||||
# testSpecials
|
||||
|
||||
# check that nearby languages are handled
|
||||
|
||||
en, fil, ro, nn ; tl ; fil
|
||||
en, fil, ro, nn ; mo ; ro
|
||||
en, fil, ro, nn ; nb ; nn
|
||||
|
||||
# make sure default works
|
||||
|
||||
en, fil, ro, nn ; ja ; en
|
||||
|
||||
##################################################
|
||||
# testRegionalSpecials
|
||||
|
||||
# verify that en-AU is closer to en-GB than to en (which is en-US)
|
||||
|
||||
en, en-GB, es, es-419 ; es-MX ; es-419
|
||||
en, en-GB, es, es-419 ; en-AU ; en-GB
|
||||
en, en-GB, es, es-419 ; es-ES ; es
|
||||
|
||||
##################################################
|
||||
# testHK
|
||||
|
||||
# HK and MO are closer to each other for Hant than to TW
|
||||
|
||||
zh, zh-TW, zh-MO ; zh-HK ; zh-MO
|
||||
zh, zh-TW, zh-HK ; zh-MO ; zh-HK
|
||||
|
||||
##################################################
|
||||
# testMatch-exact
|
||||
|
||||
# see localeDistance.txt
|
||||
|
||||
##################################################
|
||||
# testMatch-none
|
||||
|
||||
# see localeDistance.txt
|
||||
|
||||
##################################################
|
||||
# testMatch-matchOnMazimized
|
||||
|
||||
zh, zh-Hant ; und-TW ; zh-Hant # und-TW should be closer to zh-Hant than to zh
|
||||
en-Hant-TW, und-TW ; zh-Hant ; und-TW # zh-Hant should be closer to und-TW than to en-Hant-TW
|
||||
en-Hant-TW, und-TW ; zh ; und-TW # zh should be closer to und-TW than to en-Hant-TW
|
||||
|
||||
##################################################
|
||||
# testMatchGrandfatheredCode
|
||||
|
||||
fr, i-klingon, en-Latn-US ; en-GB-oed ; en-Latn-US
|
||||
|
||||
##################################################
|
||||
# testGetBestMatchForList-exactMatch
|
||||
fr, en-GB, ja, es-ES, es-MX ; ja, de ; ja
|
||||
|
||||
##################################################
|
||||
# testGetBestMatchForList-simpleVariantMatch
|
||||
fr, en-GB, ja, es-ES, es-MX ; de, en-US ; en-GB # Intentionally avoiding a perfect-match or two candidates for variant matches.
|
||||
|
||||
# Fallback.
|
||||
|
||||
fr, en-GB, ja, es-ES, es-MX ; de, zh ; fr
|
||||
|
||||
##################################################
|
||||
# testGetBestMatchForList-matchOnMaximized
|
||||
# Check that if the preference is maximized already, it works as well.
|
||||
|
||||
en, ja ; ja-Jpan-JP, en-AU ; ja # Match for ja-Jpan-JP (maximized already)
|
||||
|
||||
# ja-JP matches ja on likely subtags, and it's listed first, thus it wins over the second preference en-GB.
|
||||
|
||||
en, ja ; ja-JP, en-US ; ja # Match for ja-Jpan-JP (maximized already)
|
||||
|
||||
# Check that if the preference is maximized already, it works as well.
|
||||
|
||||
en, ja ; ja-Jpan-JP, en-US ; ja # Match for ja-Jpan-JP (maximized already)
|
||||
|
||||
##################################################
|
||||
# testGetBestMatchForList-noMatchOnMaximized
|
||||
# Regression test for http://b/5714572 .
|
||||
# de maximizes to de-DE. Pick the exact match for the secondary language instead.
|
||||
en, de, fr, ja ; de-CH, fr ; de
|
||||
|
||||
##################################################
|
||||
# testBestMatchForTraditionalChinese
|
||||
|
||||
# Scenario: An application that only supports Simplified Chinese (and some other languages),
|
||||
# but does not support Traditional Chinese. zh-Hans-CN could be replaced with zh-CN, zh, or
|
||||
# zh-Hans, it wouldn't make much of a difference.
|
||||
|
||||
# The script distance (simplified vs. traditional Han) is considered small enough
|
||||
# to be an acceptable match. The regional difference is considered almost insignificant.
|
||||
|
||||
fr, zh-Hans-CN, en-US ; zh-TW ; zh-Hans-CN
|
||||
fr, zh-Hans-CN, en-US ; zh-Hant ; zh-Hans-CN
|
||||
|
||||
# For geo-political reasons, you might want to avoid a zh-Hant -> zh-Hans match.
|
||||
# In this case, if zh-TW, zh-HK or a tag starting with zh-Hant is requested, you can
|
||||
# change your call to getBestMatch to include a 2nd language preference.
|
||||
# "en" is a better match since its distance to "en-US" is closer than the distance
|
||||
# from "zh-TW" to "zh-CN" (script distance).
|
||||
|
||||
fr, zh-Hans-CN, en-US ; zh-TW, en ; en-US
|
||||
fr, zh-Hans-CN, en-US ; zh-Hant-CN, en, en ; en-US
|
||||
fr, zh-Hans-CN, en-US ; zh-Hans, en ; zh-Hans-CN
|
||||
|
||||
##################################################
|
||||
# testUndefined
|
||||
# When the undefined language doesn't match anything in the list,
|
||||
# getBestMatch returns the default, as usual.
|
||||
|
||||
it, fr ; und ; it
|
||||
|
||||
# When it *does* occur in the list, bestMatch returns it, as expected.
|
||||
it, und ; und ; und
|
||||
|
||||
# The unusual part: max("und") = "en-Latn-US", and since matching is based on maximized
|
||||
# tags, the undefined language would normally match English. But that would produce the
|
||||
# counterintuitive results that getBestMatch("und", XLocaleMatcher("it,en")) would be "en", and
|
||||
# getBestMatch("en", XLocaleMatcher("it,und")) would be "und".
|
||||
|
||||
# To avoid that, we change the matcher's definitions of max
|
||||
# so that max("und")="und". That produces the following, more desirable
|
||||
# results:
|
||||
|
||||
it, en ; und ; it
|
||||
it, und ; en ; it
|
||||
|
||||
##################################################
|
||||
# testGetBestMatch-regionDistance
|
||||
|
||||
es-AR, es ; es-MX ; es-AR
|
||||
fr, en, en-GB ; en-CA ; en-GB
|
||||
de-AT, de-DE, de-CH ; de ; de-DE
|
||||
|
||||
##################################################
|
||||
# testAsymmetry
|
||||
|
||||
mul, nl ; af ; nl # af => nl
|
||||
mul, af ; nl ; mul # but nl !=> af
|
||||
|
||||
##################################################
|
||||
# testGetBestMatchForList-matchOnMaximized2
|
||||
|
||||
# ja-JP matches ja on likely subtags, and it's listed first, thus it wins over the second preference en-GB.
|
||||
|
||||
fr, en-GB, ja, es-ES, es-MX ; ja-JP, en-GB ; ja # Match for ja-JP, with likely region subtag
|
||||
|
||||
# Check that if the preference is maximized already, it works as well.
|
||||
|
||||
fr, en-GB, ja, es-ES, es-MX ; ja-Jpan-JP, en-GB ; ja # Match for ja-Jpan-JP (maximized already)
|
||||
|
||||
##################################################
|
||||
# testGetBestMatchForList-closeEnoughMatchOnMaximized
|
||||
|
||||
en-GB, en, de, fr, ja ; de-CH, fr ; de
|
||||
en-GB, en, de, fr, ja ; en-US, ar, nl, de, ja ; en
|
||||
|
||||
##################################################
|
||||
# testGetBestMatchForPortuguese
|
||||
|
||||
# pt might be supported and not pt-PT
|
||||
|
||||
# European user who prefers Spanish over Brazillian Portuguese as a fallback.
|
||||
|
||||
pt-PT, pt-BR, es, es-419 ; pt-PT, es, pt ; pt-PT
|
||||
pt-PT, pt, es, es-419 ; pt-PT, es, pt ; pt-PT # pt implicit
|
||||
|
||||
# Brazillian user who prefers South American Spanish over European Portuguese as a fallback.
|
||||
# The asymmetry between this case and above is because it's "pt-PT" that's missing between the
|
||||
# matchers as "pt-BR" is a much more common language.
|
||||
|
||||
pt-PT, pt-BR, es, es-419 ; pt, es-419, pt-PT ; pt-BR
|
||||
pt-PT, pt-BR, es, es-419 ; pt-PT, es, pt ; pt-PT
|
||||
pt-PT, pt, es, es-419 ; pt-PT, es, pt ; pt-PT
|
||||
pt-PT, pt, es, es-419 ; pt, es-419, pt-PT ; pt
|
||||
|
||||
pt-BR, es, es-419 ; pt, es-419, pt-PT ; pt-BR
|
||||
|
||||
# Code that adds the user's country can get "pt-US" for a user's language.
|
||||
# That should fall back to "pt-BR".
|
||||
|
||||
pt-PT, pt-BR, es, es-419 ; pt-US, pt-PT ; pt-BR
|
||||
pt-PT, pt, es, es-419 ; pt-US, pt-PT, pt ; pt # pt-BR implicit
|
||||
|
||||
##################################################
|
||||
# testVariantWithScriptMatch 1 and 2
|
||||
|
||||
fr, en, sv ; en-GB ; en
|
||||
fr, en, sv ; en-GB ; en
|
||||
en, sv ; en-GB, sv ; en
|
||||
|
||||
##################################################
|
||||
# testLongLists
|
||||
|
||||
en, sv ; sv ; sv
|
||||
af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, zh-CN, zh-TW, zu ; sv ; sv
|
||||
af, af-NA, af-ZA, agq, agq-CM, ak, ak-GH, am, am-ET, ar, ar-001, ar-AE, ar-BH, ar-DJ, ar-DZ, ar-EG, ar-EH, ar-ER, ar-IL, ar-IQ, ar-JO, ar-KM, ar-KW, ar-LB, ar-LY, ar-MA, ar-MR, ar-OM, ar-PS, ar-QA, ar-SA, ar-SD, ar-SO, ar-SS, ar-SY, ar-TD, ar-TN, ar-YE, as, as-IN, asa, asa-TZ, ast, ast-ES, az, az-Cyrl, az-Cyrl-AZ, az-Latn, az-Latn-AZ, bas, bas-CM, be, be-BY, bem, bem-ZM, bez, bez-TZ, bg, bg-BG, bm, bm-ML, bn, bn-BD, bn-IN, bo, bo-CN, bo-IN, br, br-FR, brx, brx-IN, bs, bs-Cyrl, bs-Cyrl-BA, bs-Latn, bs-Latn-BA, ca, ca-AD, ca-ES, ca-ES-VALENCIA, ca-FR, ca-IT, ce, ce-RU, cgg, cgg-UG, chr, chr-US, ckb, ckb-IQ, ckb-IR, cs, cs-CZ, cu, cu-RU, cy, cy-GB, da, da-DK, da-GL, dav, dav-KE, de, de-AT, de-BE, de-CH, de-DE, de-LI, de-LU, dje, dje-NE, dsb, dsb-DE, dua, dua-CM, dyo, dyo-SN, dz, dz-BT, ebu, ebu-KE, ee, ee-GH, ee-TG, el, el-CY, el-GR, en, en-001, en-150, en-AG, en-AI, en-AS, en-AT, en-AU, en-BB, en-BE, en-BI, en-BM, en-BS, en-BW, en-BZ, en-CA, en-CC, en-CH, en-CK, en-CM, en-CX, en-CY, en-DE, en-DG, en-DK, en-DM, en-ER, en-FI, en-FJ, en-FK, en-FM, en-GB, en-GD, en-GG, en-GH, en-GI, en-GM, en-GU, en-GY, en-HK, en-IE, en-IL, en-IM, en-IN, en-IO, en-JE, en-JM, en-KE, en-KI, en-KN, en-KY, en-LC, en-LR, en-LS, en-MG, en-MH, en-MO, en-MP, en-MS, en-MT, en-MU, en-MW, en-MY, en-NA, en-NF, en-NG, en-NL, en-NR, en-NU, en-NZ, en-PG, en-PH, en-PK, en-PN, en-PR, en-PW, en-RW, en-SB, en-SC, en-SD, en-SE, en-SG, en-SH, en-SI, en-SL, en-SS, en-SX, en-SZ, en-TC, en-TK, en-TO, en-TT, en-TV, en-TZ, en-UG, en-UM, en-US, en-US-POSIX, en-VC, en-VG, en-VI, en-VU, en-WS, en-ZA, en-ZM, en-ZW, eo, eo-001, es, es-419, es-AR, es-BO, es-CL, es-CO, es-CR, es-CU, es-DO, es-EA, es-EC, es-ES, es-GQ, es-GT, es-HN, es-IC, es-MX, es-NI, es-PA, es-PE, es-PH, es-PR, es-PY, es-SV, es-US, es-UY, es-VE, et, et-EE, eu, eu-ES, ewo, ewo-CM, fa, fa-AF, fa-IR, ff, ff-CM, ff-GN, ff-MR, ff-SN, fi, fi-FI, fil, fil-PH, fo, fo-DK, fo-FO, fr, fr-BE, fr-BF, fr-BI, fr-BJ, fr-BL, fr-CA, fr-CD, fr-CF, fr-CG, fr-CH, fr-CI, fr-CM, fr-DJ, fr-DZ, fr-FR, fr-GA, fr-GF, fr-GN, fr-GP, fr-GQ, fr-HT, fr-KM, fr-LU, fr-MA, fr-MC, fr-MF, fr-MG, fr-ML, fr-MQ, fr-MR, fr-MU, fr-NC, fr-NE, fr-PF, fr-PM, fr-RE, fr-RW, fr-SC, fr-SN, fr-SY, fr-TD, fr-TG, fr-TN, fr-VU, fr-WF, fr-YT, fur, fur-IT, fy, fy-NL, ga, ga-IE, gd, gd-GB, gl, gl-ES, gsw, gsw-CH, gsw-FR, gsw-LI, gu, gu-IN, guz, guz-KE, gv, gv-IM, ha, ha-GH, ha-NE, ha-NG, haw, haw-US, he, he-IL, hi, hi-IN, hr, hr-BA, hr-HR, hsb, hsb-DE, hu, hu-HU, hy, hy-AM, id, id-ID, ig, ig-NG, ii, ii-CN, is, is-IS, it, it-CH, it-IT, it-SM, ja, ja-JP, jgo, jgo-CM, jmc, jmc-TZ, ka, ka-GE, kab, kab-DZ, kam, kam-KE, kde, kde-TZ, kea, kea-CV, khq, khq-ML, ki, ki-KE, kk, kk-KZ, kkj, kkj-CM, kl, kl-GL, kln, kln-KE, km, km-KH, kn, kn-IN, ko, ko-KP, ko-KR, kok, kok-IN, ks, ks-IN, ksb, ksb-TZ, ksf, ksf-CM, ksh, ksh-DE, kw, kw-GB, ky, ky-KG, lag, lag-TZ, lb, lb-LU, lg, lg-UG, lkt, lkt-US, ln, ln-AO, ln-CD, ln-CF, ln-CG, lo, lo-LA, lrc, lrc-IQ, lrc-IR, lt, lt-LT, lu, lu-CD, luo, luo-KE, luy, luy-KE, lv, lv-LV, mas, mas-KE, mas-TZ, mer, mer-KE, mfe, mfe-MU, mg, mg-MG, mgh, mgh-MZ, mgo, mgo-CM, mk, mk-MK, ml, ml-IN, mn, mn-MN, mr, mr-IN, ms, ms-BN, ms-MY, ms-SG, mt, mt-MT, mua, mua-CM, my, my-MM, mzn, mzn-IR, naq, naq-NA, nb, nb-NO, nb-SJ, nd, nd-ZW, ne, ne-IN, ne-NP, nl, nl-AW, nl-BE, nl-BQ, nl-CW, nl-NL, nl-SR, nl-SX, nmg, nmg-CM, nn, nn-NO, nnh, nnh-CM, nus, nus-SS, nyn, nyn-UG, om, om-ET, om-KE, or, or-IN, os, os-GE, os-RU, pa, pa-Arab, pa-Arab-PK, pa-Guru, pa-Guru-IN, pl, pl-PL, prg, prg-001, ps, ps-AF, pt, pt-AO, pt-BR, pt-CV, pt-GW, pt-MO, pt-MZ, pt-PT, pt-ST, pt-TL, qu, qu-BO, qu-EC, qu-PE, rm, rm-CH, rn, rn-BI, ro, ro-MD, ro-RO, rof, rof-TZ, root, ru, ru-BY, ru-KG, ru-KZ, ru-MD, ru-RU, ru-UA, rw, rw-RW, rwk, rwk-TZ, sah, sah-RU, saq, saq-KE, sbp, sbp-TZ, se, se-FI, se-NO, se-SE, seh, seh-MZ, ses, ses-ML, sg, sg-CF, shi, shi-Latn, shi-Latn-MA, shi-Tfng, shi-Tfng-MA, si, si-LK, sk, sk-SK, sl, sl-SI, smn, smn-FI, sn, sn-ZW, so, so-DJ, so-ET, so-KE, so-SO, sq, sq-AL, sq-MK, sq-XK, sr, sr-Cyrl, sr-Cyrl-BA, sr-Cyrl-ME, sr-Cyrl-RS, sr-Cyrl-XK, sr-Latn, sr-Latn-BA, sr-Latn-ME, sr-Latn-RS, sr-Latn-XK, sv, sv-AX, sv-FI, sv-SE, sw, sw-CD, sw-KE, sw-TZ, sw-UG, ta, ta-IN, ta-LK, ta-MY, ta-SG, te, te-IN, teo, teo-KE, teo-UG, th, th-TH, ti, ti-ER, ti-ET, tk, tk-TM, to, to-TO, tr, tr-CY, tr-TR, twq, twq-NE, tzm, tzm-MA, ug, ug-CN, uk, uk-UA, ur, ur-IN, ur-PK, uz, uz-Arab, uz-Arab-AF, uz-Cyrl, uz-Cyrl-UZ, uz-Latn, uz-Latn-UZ, vai, vai-Latn, vai-Latn-LR, vai-Vaii, vai-Vaii-LR, vi, vi-VN, vo, vo-001, vun, vun-TZ, wae, wae-CH, xog, xog-UG, yav, yav-CM, yi, yi-001, yo, yo-BJ, yo-NG, zgh, zgh-MA, zh, zh-Hans, zh-Hans-CN, zh-Hans-HK, zh-Hans-MO, zh-Hans-SG, zh-Hant, zh-Hant-HK, zh-Hant-MO, zh-Hant-TW, zu, zu-ZA ; sv ; sv
|
||||
|
||||
##################################################
|
||||
# test8288
|
||||
|
||||
it, en ; und ; it
|
||||
it, en ; und, en ; en
|
||||
|
||||
# examples from
|
||||
# https://unicode.org/repos/cldr/tags/latest/common/bcp47/
|
||||
# https://unicode.org/repos/cldr/tags/latest/common/validity/variant.xml
|
||||
|
||||
##################################################
|
||||
# testUnHack
|
||||
|
||||
en-NZ, en-IT ; en-US ; en-NZ
|
||||
|
||||
##################################################
|
||||
# testEmptySupported => null
|
||||
; en ; null
|
||||
|
||||
##################################################
|
||||
# testVariantsAndExtensions
|
||||
##################################################
|
||||
# tests the .combine() method
|
||||
|
||||
und, fr ; fr-BE-fonipa ; fr ; fr-BE-fonipa
|
||||
und, fr-CA ; fr-BE-fonipa ; fr-CA ; fr-BE-fonipa
|
||||
und, fr-fonupa ; fr-BE-fonipa ; fr-fonupa ; fr-BE-fonipa
|
||||
und, no ; nn-BE-fonipa ; no ; no-BE-fonipa
|
||||
und, en-GB-u-sd-gbsct ; en-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin ; en-GB-u-sd-gbsct ; en-GB-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin
|
||||
|
||||
en-PSCRACK, de-PSCRACK, fr-PSCRACK, pt-PT-PSCRACK ; fr-PSCRACK ; fr-PSCRACK
|
||||
en-PSCRACK, de-PSCRACK, fr-PSCRACK, pt-PT-PSCRACK ; fr ; fr-PSCRACK
|
||||
en-PSCRACK, de-PSCRACK, fr-PSCRACK, pt-PT-PSCRACK ; de-CH ; de-PSCRACK
|
||||
|
||||
##################################################
|
||||
# testClusters
|
||||
# we favor es-419 over others in cluster. Clusters: es- {ES, MA, EA} {419, AR, MX}
|
||||
|
||||
und, es, es-MA, es-MX, es-419 ; es-AR ; es-419
|
||||
und, es-MA, es, es-419, es-MX ; es-AR ; es-419
|
||||
und, es, es-MA, es-MX, es-419 ; es-EA ; es
|
||||
und, es-MA, es, es-419, es-MX ; es-EA ; es
|
||||
|
||||
# of course, fall back to within cluster
|
||||
|
||||
und, es, es-MA, es-MX ; es-AR ; es-MX
|
||||
und, es-MA, es, es-MX ; es-AR ; es-MX
|
||||
und, es-MA, es-MX, es-419 ; es-EA ; es-MA
|
||||
und, es-MA, es-419, es-MX ; es-EA ; es-MA
|
||||
|
||||
# we favor es-GB over others in cluster. Clusters: en- {US, GU, VI} {GB, IN, ZA}
|
||||
|
||||
und, en, en-GU, en-IN, en-GB ; en-ZA ; en-GB
|
||||
und, en-GU, en, en-GB, en-IN ; en-ZA ; en-GB
|
||||
und, en, en-GU, en-IN, en-GB ; en-VI ; en
|
||||
und, en-GU, en, en-GB, en-IN ; en-VI ; en
|
||||
|
||||
# of course, fall back to within cluster
|
||||
|
||||
und, en, en-GU, en-IN ; en-ZA ; en-IN
|
||||
und, en-GU, en, en-IN ; en-ZA ; en-IN
|
||||
und, en-GU, en-IN, en-GB ; en-VI ; en-GU
|
||||
und, en-GU, en-GB, en-IN ; en-VI ; en-GU
|
||||
|
||||
##################################################
|
||||
# testThreshold
|
||||
@Threshold=60
|
||||
|
||||
50, und, fr-CA-fonupa ; fr-BE-fonipa ; fr-CA-fonupa ; fr-BE-fonipa
|
||||
50, und, fr-Cyrl-CA-fonupa ; fr-BE-fonipa ; fr-Cyrl-CA-fonupa ; fr-Cyrl-BE-fonipa
|
||||
|
||||
@Threshold=-1 # restore
|
||||
|
||||
##################################################
|
||||
# testScriptFirst
|
||||
@DistanceOption=SCRIPT_FIRST
|
||||
@debug
|
||||
|
||||
ru, fr ; zh, pl ; fr
|
||||
ru, fr ; zh-Cyrl, pl ; ru
|
||||
hr, en-Cyrl; sr ; en-Cyrl
|
||||
da, ru, hr; sr ; ru
|
||||
+231
@@ -0,0 +1,231 @@
|
||||
# basics
|
||||
fr, en-GB, en ; en-GB ; en-GB
|
||||
fr, en-GB, en ; en-US ; en
|
||||
fr, en-GB, en ; fr-FR ; fr
|
||||
fr, en-GB, en ; ja-JP ; fr
|
||||
|
||||
# script fallbacks
|
||||
zh-CN, zh-TW, iw ; zh-Hant ; zh-TW
|
||||
zh-CN, zh-TW, iw ; zh ; zh-CN
|
||||
zh-CN, zh-TW, iw ; zh-Hans-CN ; zh-CN
|
||||
zh-CN, zh-TW, iw ; zh-Hant-HK ; zh-TW
|
||||
zh-CN, zh-TW, iw ; he-IT ; iw ; iw-u-rg-itzzzz
|
||||
|
||||
# language-specific script fallbacks 1
|
||||
en, sr, nl ; sr-Latn ; sr
|
||||
en, sr, nl ; sh ; sr # different script, but seems okay and is as CLDR suggests
|
||||
en, sr, nl ; hr ; en
|
||||
en, sr, nl ; bs ; en
|
||||
en, sr, nl ; nl-Cyrl ; sr
|
||||
|
||||
# language-specific script fallbacks 2
|
||||
en, sh ; sr ; sh
|
||||
en, sh ; sr-Cyrl ; sh
|
||||
en, sh ; hr ; sh
|
||||
|
||||
# don't match hr to sr-Latn
|
||||
en, sr-Latn ; hr ; en
|
||||
|
||||
# both deprecated and not
|
||||
fil, tl, iw, he ; he-IT ; he
|
||||
fil, tl, iw, he ; he ; he
|
||||
fil, tl, iw, he ; iw ; iw
|
||||
fil, tl, iw, he ; fil-IT ; fil
|
||||
fil, tl, iw, he ; fil ; fil
|
||||
fil, tl, iw, he ; tl ; tl
|
||||
|
||||
# nearby languages
|
||||
en, fil, ro, nn ; tl ; fil
|
||||
en, fil, ro, nn ; mo ; ro
|
||||
en, fil, ro, nn ; nb ; nn
|
||||
en, fil, ro, nn ; ja ; en
|
||||
|
||||
# nearby languages: Nynorsk to Bokmål
|
||||
en, nb ; nn ; nb
|
||||
|
||||
# nearby languages: Danish does not match nn
|
||||
en, nn ; da ; en
|
||||
|
||||
# nearby languages: Danish matches no
|
||||
en, no ; da ; no
|
||||
|
||||
# nearby languages: Danish matches nb
|
||||
en, nb ; da ; nb
|
||||
|
||||
# prefer matching languages over language variants.
|
||||
nn, en-GB ; no, en-US ; en-GB
|
||||
nn, en-GB ; nb, en-US ; en-GB
|
||||
|
||||
# deprecated version is closer than same language with other differences
|
||||
nl, he, en-GB ; iw, en-US ; he
|
||||
|
||||
# macro equivalent is closer than same language with other differences
|
||||
nl, zh, en-GB, no ; cmn, en-US ; zh
|
||||
nl, zh, en-GB, no ; nb, en-US ; no
|
||||
|
||||
# legacy equivalent is closer than same language with other differences
|
||||
nl, fil, en-GB ; tl, en-US ; fil
|
||||
|
||||
# distinguish near equivalents
|
||||
en, ro, mo, ro-MD ; ro ; ro
|
||||
en, ro, mo, ro-MD ; mo ; mo
|
||||
en, ro, mo, ro-MD ; ro-MD ; ro-MD
|
||||
|
||||
# maximization of legacy
|
||||
sr-Cyrl, sr-Latn, ro, ro-MD ; sh ; sr-Latn
|
||||
sr-Cyrl, sr-Latn, ro, ro-MD ; mo ; ro-MD
|
||||
|
||||
# empty
|
||||
; fr ; und
|
||||
; en ; und
|
||||
|
||||
# private use subtags
|
||||
fr, en-GB, x-bork, es-ES, es-419 ; x-piglatin ; fr
|
||||
fr, en-GB, x-bork, es-ES, es-419 ; x-bork ; x-bork
|
||||
|
||||
# grandfathered codes
|
||||
fr, i-klingon, en-Latn-US ; en-GB-oed ; en-Latn-US
|
||||
fr, i-klingon, en-Latn-US ; i-klingon ; tlh
|
||||
|
||||
|
||||
# simple variant match
|
||||
fr, en-GB, ja, es-ES, es-MX ; de, en-US ; en-GB
|
||||
fr, en-GB, ja, es-ES, es-MX ; de, zh ; fr
|
||||
|
||||
# best match for traditional Chinese
|
||||
fr, zh-Hans-CN, en-US ; zh-TW ; zh-Hans-CN
|
||||
fr, zh-Hans-CN, en-US ; zh-Hant ; zh-Hans-CN
|
||||
fr, zh-Hans-CN, en-US ; zh-TW, en ; en-US
|
||||
fr, zh-Hans-CN, en-US ; zh-Hant-CN, en ; en-US
|
||||
fr, zh-Hans-CN, en-US ; zh-Hans, en ; zh-Hans-CN
|
||||
|
||||
# more specific script should win in case regions are identical
|
||||
af, af-Latn, af-Arab ; af ; af
|
||||
af, af-Latn, af-Arab ; af-ZA ; af
|
||||
af, af-Latn, af-Arab ; af-Latn-ZA ; af-Latn
|
||||
af, af-Latn, af-Arab ; af-Latn ; af-Latn
|
||||
|
||||
# more specific region should win
|
||||
nl, nl-NL, nl-BE ; nl ; nl
|
||||
nl, nl-NL, nl-BE ; nl-Latn ; nl
|
||||
nl, nl-NL, nl-BE ; nl-Latn-NL ; nl-NL
|
||||
nl, nl-NL, nl-BE ; nl-NL ; nl-NL
|
||||
|
||||
# region may replace matched if matched is enclosing
|
||||
es-419,es ; es-MX ; es-419 ; es-MX
|
||||
es-419,es ; es-SG ; es
|
||||
|
||||
# more specific region wins over more specific script
|
||||
nl, nl-Latn, nl-NL, nl-BE ; nl ; nl
|
||||
nl, nl-Latn, nl-NL, nl-BE ; nl-Latn ; nl-Latn
|
||||
nl, nl-Latn, nl-NL, nl-BE ; nl-NL ; nl-NL
|
||||
nl, nl-Latn, nl-NL, nl-BE ; nl-Latn-NL ; nl-NL
|
||||
|
||||
# region distance Portuguese
|
||||
pt, pt-PT ; pt-ES ; pt-PT
|
||||
|
||||
# if no preferred locale specified, pick top language, not regional
|
||||
en, fr, fr-CA, fr-CH ; fr-US ; fr ; fr-u-rg-uszzzz
|
||||
|
||||
# region distance German
|
||||
de-AT, de-DE, de-CH ; de ; de-DE
|
||||
|
||||
# en-AU is closer to en-GB than to en (which is en-US)
|
||||
en, en-GB, es-ES, es-419 ; en-AU ; en-GB
|
||||
en, en-GB, es-ES, es-419 ; es-MX ; es-419 ; es-MX
|
||||
en, en-GB, es-ES, es-419 ; es-PT ; es-ES
|
||||
|
||||
# undefined
|
||||
it, fr ; und ; it
|
||||
|
||||
# und does not match en
|
||||
it, en ; und ; it
|
||||
|
||||
# undefined in priority list
|
||||
it, und ; und ; und
|
||||
it, und ; en ; it
|
||||
|
||||
# undefined
|
||||
it, fr, zh ; und-FR ; fr
|
||||
it, fr, zh ; und-CN ; zh
|
||||
it, fr, zh ; und-Hans ; zh
|
||||
it, fr, zh ; und-Hant ; zh
|
||||
it, fr, zh ; und-Latn ; it
|
||||
|
||||
# match on maximized tag
|
||||
fr, en-GB, ja, es-ES, es-MX ; ja-JP, en-GB ; ja
|
||||
fr, en-GB, ja, es-ES, es-MX ; ja-Jpan-JP, en-GB ; ja
|
||||
|
||||
# pick best maximized tag
|
||||
ja, ja-Jpan-US, ja-JP, en, ru ; ja-Jpan, ru ; ja
|
||||
ja, ja-Jpan-US, ja-JP, en, ru ; ja-JP, ru ; ja-JP
|
||||
ja, ja-Jpan-US, ja-JP, en, ru ; ja-US, ru ; ja-Jpan-US
|
||||
|
||||
# termination: pick best maximized match
|
||||
ja, ja-Jpan, ja-JP, en, ru ; ja-Jpan-JP, ru ; ja-JP
|
||||
ja, ja-Jpan, ja-JP, en, ru ; ja-Jpan, ru ; ja-Jpan
|
||||
|
||||
# same language over exact, but distinguish when user is explicit
|
||||
fr, en-GB, ja, es-ES, es-MX ; ja, de ; ja
|
||||
en, de, fr, ja ; de-CH, fr ; de # TODO: ; de-u-rg-CH
|
||||
en-GB, nl ; en, nl ; en-GB
|
||||
en-GB, nl ; en, nl, en-GB ; nl
|
||||
|
||||
# parent relation preserved
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-150 ; en-GB
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-AU ; en-GB
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-BE ; en-GB
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-GG ; en-GB
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-GI ; en-GB
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-HK ; en-GB
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-IE ; en-GB
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-IM ; en-GB
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-IN ; en-GB
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-JE ; en-GB
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-MT ; en-GB
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-NZ ; en-GB
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-PK ; en-GB
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-SG ; en-GB
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-DE ; en-GB
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-MT ; en-GB
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-AR ; es-419 ; es-AR
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-BO ; es-419 ; es-BO
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-CL ; es-419 ; es-CL
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-CO ; es-419 ; es-CO
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-CR ; es-419 ; es-CR
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-CU ; es-419 ; es-CU
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-DO ; es-419 ; es-DO
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-EC ; es-419 ; es-EC
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-GT ; es-419 ; es-GT
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-HN ; es-419 ; es-HN
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-MX ; es-419 ; es-MX
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-NI ; es-419 ; es-NI
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-PA ; es-419 ; es-PA
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-PE ; es-419 ; es-PE
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-PR ; es-419 ; es-PR
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-PT ; es
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-PY ; es-419 ; es-PY
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-SV ; es-419 ; es-SV
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-US ; es-419
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-UY ; es-419 ; es-UY
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-VE ; es-419 ; es-VE
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; pt-AO ; pt-PT
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; pt-CV ; pt-PT
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; pt-GW ; pt-PT
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; pt-MO ; pt-PT
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; pt-MZ ; pt-PT
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; pt-ST ; pt-PT
|
||||
en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; pt-TL ; pt-PT
|
||||
|
||||
# preserve extensions
|
||||
en, de, sl-nedis ; de-FR-u-co-phonebk ; de ; de-u-co-phonebk-rg-frzzzz
|
||||
en, de, sl-nedis ; sl-nedis-u-cu-eur ; sl-nedis ; sl-nedis-u-cu-eur
|
||||
en, de, sl-nedis ; sl-u-cu-eur ; sl-nedis ; sl-nedis-u-cu-eur
|
||||
en, de, sl-nedis ; sl-HR-nedis-u-cu-eur ; sl-nedis ; sl-nedis-u-cu-eur-rg-hrzzzz
|
||||
en, de, sl-nedis ; de-t-m0-iso-i0-pinyin ; de ; de-t-m0-iso-i0-pinyin
|
||||
|
||||
und, nl ; nl-BE-fonipa ; nl ; nl-u-rg-bezzzz
|
||||
und, nl-CA ; nl-BE-fonipa ; nl-CA ; nl-CA-u-rg-bezzzz
|
||||
und, nl-fonupa ; nl-BE-fonipa ; nl-fonupa ; nl-fonupa-u-rg-bezzzz
|
||||
und, no ; nn-DK-fonipa ; no ; no-u-rg-dkzzzz
|
||||
und, en-GB-u-sd-usca ; en-US-fonipa-u-nu-Arab-ca-buddhist-sd-usdc-t-m0-iso-i0-pinyin ; en-GB-u-sd-usca ; en-GB-t-m0-iso-i0-pinyin-u-ca-buddhist-nu-Arab-rg-uszzzz-sd-usca
|
||||
Reference in New Issue
Block a user