whatcanGOwrong

This commit is contained in:
2024-09-19 21:38:24 -04:00
commit d0ae4d841d
17908 changed files with 4096831 additions and 0 deletions
@@ -0,0 +1,417 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package catmsg contains support types for package x/text/message/catalog.
//
// This package contains the low-level implementations of Message used by the
// catalog package and provides primitives for other packages to implement their
// own. For instance, the plural package provides functionality for selecting
// translation strings based on the plural category of substitution arguments.
//
// # Encoding and Decoding
//
// Catalogs store Messages encoded as a single string. Compiling a message into
// a string both results in compacter representation and speeds up evaluation.
//
// A Message must implement a Compile method to convert its arbitrary
// representation to a string. The Compile method takes an Encoder which
// facilitates serializing the message. Encoders also provide more context of
// the messages's creation (such as for which language the message is intended),
// which may not be known at the time of the creation of the message.
//
// Each message type must also have an accompanying decoder registered to decode
// the message. This decoder takes a Decoder argument which provides the
// counterparts for the decoding.
//
// # Renderers
//
// A Decoder must be initialized with a Renderer implementation. These
// implementations must be provided by packages that use Catalogs, typically
// formatting packages such as x/text/message. A typical user will not need to
// worry about this type; it is only relevant to packages that do string
// formatting and want to use the catalog package to handle localized strings.
//
// A package that uses catalogs for selecting strings receives selection results
// as sequence of substrings passed to the Renderer. The following snippet shows
// how to express the above example using the message package.
//
// message.Set(language.English, "You are %d minute(s) late.",
// catalog.Var("minutes", plural.Select(1, "one", "minute")),
// catalog.String("You are %[1]d ${minutes} late."))
//
// p := message.NewPrinter(language.English)
// p.Printf("You are %d minute(s) late.", 5) // always 5 minutes late.
//
// To evaluate the Printf, package message wraps the arguments in a Renderer
// that is passed to the catalog for message decoding. The call sequence that
// results from evaluating the above message, assuming the person is rather
// tardy, is:
//
// Render("You are %[1]d ")
// Arg(1)
// Render("minutes")
// Render(" late.")
//
// The calls to Arg is caused by the plural.Select execution, which evaluates
// the argument to determine whether the singular or plural message form should
// be selected. The calls to Render reports the partial results to the message
// package for further evaluation.
package catmsg
import (
"errors"
"fmt"
"strconv"
"strings"
"sync"
"golang.org/x/text/language"
)
// A Handle refers to a registered message type.
type Handle int
// A Handler decodes and evaluates data compiled by a Message and sends the
// result to the Decoder. The output may depend on the value of the substitution
// arguments, accessible by the Decoder's Arg method. The Handler returns false
// if there is no translation for the given substitution arguments.
type Handler func(d *Decoder) bool
// Register records the existence of a message type and returns a Handle that
// can be used in the Encoder's EncodeMessageType method to create such
// messages. The prefix of the name should be the package path followed by
// an optional disambiguating string.
// Register will panic if a handle for the same name was already registered.
func Register(name string, handler Handler) Handle {
mutex.Lock()
defer mutex.Unlock()
if _, ok := names[name]; ok {
panic(fmt.Errorf("catmsg: handler for %q already exists", name))
}
h := Handle(len(handlers))
names[name] = h
handlers = append(handlers, handler)
return h
}
// These handlers require fixed positions in the handlers slice.
const (
msgVars Handle = iota
msgFirst
msgRaw
msgString
msgAffix
// Leave some arbitrary room for future expansion: 20 should suffice.
numInternal = 20
)
const prefix = "golang.org/x/text/internal/catmsg."
var (
// TODO: find a more stable way to link handles to message types.
mutex sync.Mutex
names = map[string]Handle{
prefix + "Vars": msgVars,
prefix + "First": msgFirst,
prefix + "Raw": msgRaw,
prefix + "String": msgString,
prefix + "Affix": msgAffix,
}
handlers = make([]Handler, numInternal)
)
func init() {
// This handler is a message type wrapper that initializes a decoder
// with a variable block. This message type, if present, is always at the
// start of an encoded message.
handlers[msgVars] = func(d *Decoder) bool {
blockSize := int(d.DecodeUint())
d.vars = d.data[:blockSize]
d.data = d.data[blockSize:]
return d.executeMessage()
}
// First takes the first message in a sequence that results in a match for
// the given substitution arguments.
handlers[msgFirst] = func(d *Decoder) bool {
for !d.Done() {
if d.ExecuteMessage() {
return true
}
}
return false
}
handlers[msgRaw] = func(d *Decoder) bool {
d.Render(d.data)
return true
}
// A String message alternates between a string constant and a variable
// substitution.
handlers[msgString] = func(d *Decoder) bool {
for !d.Done() {
if str := d.DecodeString(); str != "" {
d.Render(str)
}
if d.Done() {
break
}
d.ExecuteSubstitution()
}
return true
}
handlers[msgAffix] = func(d *Decoder) bool {
// TODO: use an alternative method for common cases.
prefix := d.DecodeString()
suffix := d.DecodeString()
if prefix != "" {
d.Render(prefix)
}
ret := d.ExecuteMessage()
if suffix != "" {
d.Render(suffix)
}
return ret
}
}
var (
// ErrIncomplete indicates a compiled message does not define translations
// for all possible argument values. If this message is returned, evaluating
// a message may result in the ErrNoMatch error.
ErrIncomplete = errors.New("catmsg: incomplete message; may not give result for all inputs")
// ErrNoMatch indicates no translation message matched the given input
// parameters when evaluating a message.
ErrNoMatch = errors.New("catmsg: no translation for inputs")
)
// A Message holds a collection of translations for the same phrase that may
// vary based on the values of substitution arguments.
type Message interface {
// Compile encodes the format string(s) of the message as a string for later
// evaluation.
//
// The first call Compile makes on the encoder must be EncodeMessageType.
// The handle passed to this call may either be a handle returned by
// Register to encode a single custom message, or HandleFirst followed by
// a sequence of calls to EncodeMessage.
//
// Compile must return ErrIncomplete if it is possible for evaluation to
// not match any translation for a given set of formatting parameters.
// For example, selecting a translation based on plural form may not yield
// a match if the form "Other" is not one of the selectors.
//
// Compile may return any other application-specific error. For backwards
// compatibility with package like fmt, which often do not do sanity
// checking of format strings ahead of time, Compile should still make an
// effort to have some sensible fallback in case of an error.
Compile(e *Encoder) error
}
// Compile converts a Message to a data string that can be stored in a Catalog.
// The resulting string can subsequently be decoded by passing to the Execute
// method of a Decoder.
func Compile(tag language.Tag, macros Dictionary, m Message) (data string, err error) {
// TODO: pass macros so they can be used for validation.
v := &Encoder{inBody: true} // encoder for variables
v.root = v
e := &Encoder{root: v, parent: v, tag: tag} // encoder for messages
err = m.Compile(e)
// This package serves te message package, which in turn is meant to be a
// drop-in replacement for fmt. With the fmt package, format strings are
// evaluated lazily and errors are handled by substituting strings in the
// result, rather then returning an error. Dealing with multiple languages
// makes it more important to check errors ahead of time. We chose to be
// consistent and compatible and allow graceful degradation in case of
// errors.
buf := e.buf[stripPrefix(e.buf):]
if len(v.buf) > 0 {
// Prepend variable block.
b := make([]byte, 1+maxVarintBytes+len(v.buf)+len(buf))
b[0] = byte(msgVars)
b = b[:1+encodeUint(b[1:], uint64(len(v.buf)))]
b = append(b, v.buf...)
b = append(b, buf...)
buf = b
}
if err == nil {
err = v.err
}
return string(buf), err
}
// FirstOf is a message type that prints the first message in the sequence that
// resolves to a match for the given substitution arguments.
type FirstOf []Message
// Compile implements Message.
func (s FirstOf) Compile(e *Encoder) error {
e.EncodeMessageType(msgFirst)
err := ErrIncomplete
for i, m := range s {
if err == nil {
return fmt.Errorf("catalog: message argument %d is complete and blocks subsequent messages", i-1)
}
err = e.EncodeMessage(m)
}
return err
}
// Var defines a message that can be substituted for a placeholder of the same
// name. If an expression does not result in a string after evaluation, Name is
// used as the substitution. For example:
//
// Var{
// Name: "minutes",
// Message: plural.Select(1, "one", "minute"),
// }
//
// will resolve to minute for singular and minutes for plural forms.
type Var struct {
Name string
Message Message
}
var errIsVar = errors.New("catmsg: variable used as message")
// Compile implements Message.
//
// Note that this method merely registers a variable; it does not create an
// encoded message.
func (v *Var) Compile(e *Encoder) error {
if err := e.addVar(v.Name, v.Message); err != nil {
return err
}
// Using a Var by itself is an error. If it is in a sequence followed by
// other messages referring to it, this error will be ignored.
return errIsVar
}
// Raw is a message consisting of a single format string that is passed as is
// to the Renderer.
//
// Note that a Renderer may still do its own variable substitution.
type Raw string
// Compile implements Message.
func (r Raw) Compile(e *Encoder) (err error) {
e.EncodeMessageType(msgRaw)
// Special case: raw strings don't have a size encoding and so don't use
// EncodeString.
e.buf = append(e.buf, r...)
return nil
}
// String is a message consisting of a single format string which contains
// placeholders that may be substituted with variables.
//
// Variable substitutions are marked with placeholders and a variable name of
// the form ${name}. Any other substitutions such as Go templates or
// printf-style substitutions are left to be done by the Renderer.
//
// When evaluation a string interpolation, a Renderer will receive separate
// calls for each placeholder and interstitial string. For example, for the
// message: "%[1]v ${invites} %[2]v to ${their} party." The sequence of calls
// is:
//
// d.Render("%[1]v ")
// d.Arg(1)
// d.Render(resultOfInvites)
// d.Render(" %[2]v to ")
// d.Arg(2)
// d.Render(resultOfTheir)
// d.Render(" party.")
//
// where the messages for "invites" and "their" both use a plural.Select
// referring to the first argument.
//
// Strings may also invoke macros. Macros are essentially variables that can be
// reused. Macros may, for instance, be used to make selections between
// different conjugations of a verb. See the catalog package description for an
// overview of macros.
type String string
// Compile implements Message. It parses the placeholder formats and returns
// any error.
func (s String) Compile(e *Encoder) (err error) {
msg := string(s)
const subStart = "${"
hasHeader := false
p := 0
b := []byte{}
for {
i := strings.Index(msg[p:], subStart)
if i == -1 {
break
}
b = append(b, msg[p:p+i]...)
p += i + len(subStart)
if i = strings.IndexByte(msg[p:], '}'); i == -1 {
b = append(b, "$!(MISSINGBRACE)"...)
err = fmt.Errorf("catmsg: missing '}'")
p = len(msg)
break
}
name := strings.TrimSpace(msg[p : p+i])
if q := strings.IndexByte(name, '('); q == -1 {
if !hasHeader {
hasHeader = true
e.EncodeMessageType(msgString)
}
e.EncodeString(string(b))
e.EncodeSubstitution(name)
b = b[:0]
} else if j := strings.IndexByte(name[q:], ')'); j == -1 {
// TODO: what should the error be?
b = append(b, "$!(MISSINGPAREN)"...)
err = fmt.Errorf("catmsg: missing ')'")
} else if x, sErr := strconv.ParseUint(strings.TrimSpace(name[q+1:q+j]), 10, 32); sErr != nil {
// TODO: handle more than one argument
b = append(b, "$!(BADNUM)"...)
err = fmt.Errorf("catmsg: invalid number %q", strings.TrimSpace(name[q+1:q+j]))
} else {
if !hasHeader {
hasHeader = true
e.EncodeMessageType(msgString)
}
e.EncodeString(string(b))
e.EncodeSubstitution(name[:q], int(x))
b = b[:0]
}
p += i + 1
}
b = append(b, msg[p:]...)
if !hasHeader {
// Simplify string to a raw string.
Raw(string(b)).Compile(e)
} else if len(b) > 0 {
e.EncodeString(string(b))
}
return err
}
// Affix is a message that adds a prefix and suffix to another message.
// This is mostly used add back whitespace to a translation that was stripped
// before sending it out.
type Affix struct {
Message Message
Prefix string
Suffix string
}
// Compile implements Message.
func (a Affix) Compile(e *Encoder) (err error) {
// TODO: consider adding a special message type that just adds a single
// return. This is probably common enough to handle the majority of cases.
// Get some stats first, though.
e.EncodeMessageType(msgAffix)
e.EncodeString(a.Prefix)
e.EncodeString(a.Suffix)
e.EncodeMessage(a.Message)
return nil
}
@@ -0,0 +1,327 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package catmsg
import (
"errors"
"strings"
"testing"
"golang.org/x/text/language"
)
type renderer struct {
args []int
result string
}
func (r *renderer) Arg(i int) interface{} {
if i >= len(r.args) {
return nil
}
return r.args[i]
}
func (r *renderer) Render(s string) {
if r.result != "" {
r.result += "|"
}
r.result += s
}
func TestCodec(t *testing.T) {
type test struct {
args []int
out string
decErr string
}
single := func(out, err string) []test { return []test{{out: out, decErr: err}} }
testCases := []struct {
desc string
m Message
enc string
encErr string
tests []test
}{{
desc: "unused variable",
m: &Var{"name", String("foo")},
encErr: errIsVar.Error(),
tests: single("", ""),
}, {
desc: "empty",
m: empty{},
tests: single("", ""),
}, {
desc: "sequence with empty",
m: seq{empty{}},
tests: single("", ""),
}, {
desc: "raw string",
m: Raw("foo"),
tests: single("foo", ""),
}, {
desc: "raw string no sub",
m: Raw("${foo}"),
enc: "\x02${foo}",
tests: single("${foo}", ""),
}, {
desc: "simple string",
m: String("foo"),
tests: single("foo", ""),
}, {
desc: "affix",
m: &Affix{String("foo"), "\t", "\n"},
tests: single("\t|foo|\n", ""),
}, {
desc: "missing var",
m: String("foo${bar}"),
enc: "\x03\x03foo\x02\x03bar",
encErr: `unknown var "bar"`,
tests: single("foo|bar", ""),
}, {
desc: "empty var",
m: seq{
&Var{"bar", seq{}},
String("foo${bar}"),
},
enc: "\x00\x05\x04\x02bar\x03\x03foo\x00\x00",
// TODO: recognize that it is cheaper to substitute bar.
tests: single("foo|bar", ""),
}, {
desc: "var after value",
m: seq{
String("foo${bar}"),
&Var{"bar", String("baz")},
},
encErr: errIsVar.Error(),
tests: single("foo|bar", ""),
}, {
desc: "substitution",
m: seq{
&Var{"bar", String("baz")},
String("foo${bar}"),
},
tests: single("foo|baz", ""),
}, {
desc: "affix with substitution",
m: &Affix{seq{
&Var{"bar", String("baz")},
String("foo${bar}"),
}, "\t", "\n"},
tests: single("\t|foo|baz|\n", ""),
}, {
desc: "shadowed variable",
m: seq{
&Var{"bar", String("baz")},
seq{
&Var{"bar", String("BAZ")},
String("foo${bar}"),
},
},
tests: single("foo|BAZ", ""),
}, {
desc: "nested value",
m: nestedLang{nestedLang{empty{}}},
tests: single("nl|nl", ""),
}, {
desc: "not shadowed variable",
m: seq{
&Var{"bar", String("baz")},
seq{
String("foo${bar}"),
&Var{"bar", String("BAZ")},
},
},
encErr: errIsVar.Error(),
tests: single("foo|baz", ""),
}, {
desc: "duplicate variable",
m: seq{
&Var{"bar", String("baz")},
&Var{"bar", String("BAZ")},
String("${bar}"),
},
encErr: "catmsg: duplicate variable \"bar\"",
tests: single("baz", ""),
}, {
desc: "complete incomplete variable",
m: seq{
&Var{"bar", incomplete{}},
String("${bar}"),
},
enc: "\x00\t\b\x01\x01\x14\x04\x02bar\x03\x00\x00\x00",
// TODO: recognize that it is cheaper to substitute bar.
tests: single("bar", ""),
}, {
desc: "incomplete sequence",
m: seq{
incomplete{},
incomplete{},
},
encErr: ErrIncomplete.Error(),
tests: single("", ErrNoMatch.Error()),
}, {
desc: "compile error variable",
m: seq{
&Var{"bar", errorCompileMsg{}},
String("${bar}"),
},
encErr: errCompileTest.Error(),
tests: single("bar", ""),
}, {
desc: "compile error message",
m: errorCompileMsg{},
encErr: errCompileTest.Error(),
tests: single("", ""),
}, {
desc: "compile error sequence",
m: seq{
errorCompileMsg{},
errorCompileMsg{},
},
encErr: errCompileTest.Error(),
tests: single("", ""),
}, {
desc: "macro",
m: String("${exists(1)}"),
tests: single("you betya!", ""),
}, {
desc: "macro incomplete",
m: String("${incomplete(1)}"),
enc: "\x03\x00\x01\nincomplete\x01",
tests: single("incomplete", ""),
}, {
desc: "macro undefined at end",
m: String("${undefined(1)}"),
enc: "\x03\x00\x01\tundefined\x01",
tests: single("undefined", "catmsg: undefined macro \"undefined\""),
}, {
desc: "macro undefined with more text following",
m: String("${undefined(1)}."),
enc: "\x03\x00\x01\tundefined\x01\x01.",
tests: single("undefined|.", "catmsg: undefined macro \"undefined\""),
}, {
desc: "macro missing paren",
m: String("${missing(1}"),
encErr: "catmsg: missing ')'",
tests: single("$!(MISSINGPAREN)", ""),
}, {
desc: "macro bad num",
m: String("aa${bad(a)}"),
encErr: "catmsg: invalid number \"a\"",
tests: single("aa$!(BADNUM)", ""),
}, {
desc: "var missing brace",
m: String("a${missing"),
encErr: "catmsg: missing '}'",
tests: single("a$!(MISSINGBRACE)", ""),
}}
r := &renderer{}
dec := NewDecoder(language.Und, r, macros)
for _, tc := range testCases {
t.Run(tc.desc, func(t *testing.T) {
// Use a language other than Und so that we can test
// passing the language to nested values.
data, err := Compile(language.Dutch, macros, tc.m)
if failErr(err, tc.encErr) {
t.Errorf("encoding error: got %+q; want %+q", err, tc.encErr)
}
if tc.enc != "" && data != tc.enc {
t.Errorf("encoding: got %+q; want %+q", data, tc.enc)
}
for _, st := range tc.tests {
t.Run("", func(t *testing.T) {
*r = renderer{args: st.args}
if err = dec.Execute(data); failErr(err, st.decErr) {
t.Errorf("decoding error: got %+q; want %+q", err, st.decErr)
}
if r.result != st.out {
t.Errorf("decode: got %+q; want %+q", r.result, st.out)
}
})
}
})
}
}
func failErr(got error, want string) bool {
if got == nil {
return want != ""
}
return want == "" || !strings.Contains(got.Error(), want)
}
type seq []Message
func (s seq) Compile(e *Encoder) (err error) {
err = ErrIncomplete
e.EncodeMessageType(msgFirst)
for _, m := range s {
// Pass only the last error, but allow erroneous or complete messages
// here to allow testing different scenarios.
err = e.EncodeMessage(m)
}
return err
}
type empty struct{}
func (empty) Compile(e *Encoder) (err error) { return nil }
var msgIncomplete = Register(
"golang.org/x/text/internal/catmsg.incomplete",
func(d *Decoder) bool { return false })
type incomplete struct{}
func (incomplete) Compile(e *Encoder) (err error) {
e.EncodeMessageType(msgIncomplete)
return ErrIncomplete
}
var msgNested = Register(
"golang.org/x/text/internal/catmsg.nested",
func(d *Decoder) bool {
d.Render(d.DecodeString())
d.ExecuteMessage()
return true
})
type nestedLang struct{ Message }
func (n nestedLang) Compile(e *Encoder) (err error) {
e.EncodeMessageType(msgNested)
e.EncodeString(e.Language().String())
e.EncodeMessage(n.Message)
return nil
}
type errorCompileMsg struct{}
var errCompileTest = errors.New("catmsg: compile error test")
func (errorCompileMsg) Compile(e *Encoder) (err error) {
return errCompileTest
}
type dictionary struct{}
var (
macros = dictionary{}
dictMessages = map[string]string{
"exists": compile(String("you betya!")),
"incomplete": compile(incomplete{}),
}
)
func (d dictionary) Lookup(key string) (data string, ok bool) {
data, ok = dictMessages[key]
return
}
func compile(m Message) (data string) {
data, _ = Compile(language.Und, macros, m)
return data
}
@@ -0,0 +1,407 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package catmsg
import (
"errors"
"fmt"
"golang.org/x/text/language"
)
// A Renderer renders a Message.
type Renderer interface {
// Render renders the given string. The given string may be interpreted as a
// format string, such as the one used by the fmt package or a template.
Render(s string)
// Arg returns the i-th argument passed to format a message. This method
// should return nil if there is no such argument. Messages need access to
// arguments to allow selecting a message based on linguistic features of
// those arguments.
Arg(i int) interface{}
}
// A Dictionary specifies a source of messages, including variables or macros.
type Dictionary interface {
// Lookup returns the message for the given key. It returns false for ok if
// such a message could not be found.
Lookup(key string) (data string, ok bool)
// TODO: consider returning an interface, instead of a string. This will
// allow implementations to do their own message type decoding.
}
// An Encoder serializes a Message to a string.
type Encoder struct {
// The root encoder is used for storing encoded variables.
root *Encoder
// The parent encoder provides the surrounding scopes for resolving variable
// names.
parent *Encoder
tag language.Tag
// buf holds the encoded message so far. After a message completes encoding,
// the contents of buf, prefixed by the encoded length, are flushed to the
// parent buffer.
buf []byte
// vars is the lookup table of variables in the current scope.
vars []keyVal
err error
inBody bool // if false next call must be EncodeMessageType
}
type keyVal struct {
key string
offset int
}
// Language reports the language for which the encoded message will be stored
// in the Catalog.
func (e *Encoder) Language() language.Tag { return e.tag }
func (e *Encoder) setError(err error) {
if e.root.err == nil {
e.root.err = err
}
}
// EncodeUint encodes x.
func (e *Encoder) EncodeUint(x uint64) {
e.checkInBody()
var buf [maxVarintBytes]byte
n := encodeUint(buf[:], x)
e.buf = append(e.buf, buf[:n]...)
}
// EncodeString encodes s.
func (e *Encoder) EncodeString(s string) {
e.checkInBody()
e.EncodeUint(uint64(len(s)))
e.buf = append(e.buf, s...)
}
// EncodeMessageType marks the current message to be of type h.
//
// It must be the first call of a Message's Compile method.
func (e *Encoder) EncodeMessageType(h Handle) {
if e.inBody {
panic("catmsg: EncodeMessageType not the first method called")
}
e.inBody = true
e.EncodeUint(uint64(h))
}
// EncodeMessage serializes the given message inline at the current position.
func (e *Encoder) EncodeMessage(m Message) error {
e = &Encoder{root: e.root, parent: e, tag: e.tag}
err := m.Compile(e)
if _, ok := m.(*Var); !ok {
e.flushTo(e.parent)
}
return err
}
func (e *Encoder) checkInBody() {
if !e.inBody {
panic("catmsg: expected prior call to EncodeMessageType")
}
}
// stripPrefix indicates the number of prefix bytes that must be stripped to
// turn a single-element sequence into a message that is just this single member
// without its size prefix. If the message can be stripped, b[1:n] contains the
// size prefix.
func stripPrefix(b []byte) (n int) {
if len(b) > 0 && Handle(b[0]) == msgFirst {
x, n, _ := decodeUint(b[1:])
if 1+n+int(x) == len(b) {
return 1 + n
}
}
return 0
}
func (e *Encoder) flushTo(dst *Encoder) {
data := e.buf
p := stripPrefix(data)
if p > 0 {
data = data[1:]
} else {
// Prefix the size.
dst.EncodeUint(uint64(len(data)))
}
dst.buf = append(dst.buf, data...)
}
func (e *Encoder) addVar(key string, m Message) error {
for _, v := range e.parent.vars {
if v.key == key {
err := fmt.Errorf("catmsg: duplicate variable %q", key)
e.setError(err)
return err
}
}
scope := e.parent
// If a variable message is Incomplete, and does not evaluate to a message
// during execution, we fall back to the variable name. We encode this by
// appending the variable name if the message reports it's incomplete.
err := m.Compile(e)
if err != ErrIncomplete {
e.setError(err)
}
switch {
case len(e.buf) == 1 && Handle(e.buf[0]) == msgFirst: // empty sequence
e.buf = e.buf[:0]
e.inBody = false
fallthrough
case len(e.buf) == 0:
// Empty message.
if err := String(key).Compile(e); err != nil {
e.setError(err)
}
case err == ErrIncomplete:
if Handle(e.buf[0]) != msgFirst {
seq := &Encoder{root: e.root, parent: e}
seq.EncodeMessageType(msgFirst)
e.flushTo(seq)
e = seq
}
// e contains a sequence; append the fallback string.
e.EncodeMessage(String(key))
}
// Flush result to variable heap.
offset := len(e.root.buf)
e.flushTo(e.root)
e.buf = e.buf[:0]
// Record variable offset in current scope.
scope.vars = append(scope.vars, keyVal{key: key, offset: offset})
return err
}
const (
substituteVar = iota
substituteMacro
substituteError
)
// EncodeSubstitution inserts a resolved reference to a variable or macro.
//
// This call must be matched with a call to ExecuteSubstitution at decoding
// time.
func (e *Encoder) EncodeSubstitution(name string, arguments ...int) {
if arity := len(arguments); arity > 0 {
// TODO: also resolve macros.
e.EncodeUint(substituteMacro)
e.EncodeString(name)
for _, a := range arguments {
e.EncodeUint(uint64(a))
}
return
}
for scope := e; scope != nil; scope = scope.parent {
for _, v := range scope.vars {
if v.key != name {
continue
}
e.EncodeUint(substituteVar) // TODO: support arity > 0
e.EncodeUint(uint64(v.offset))
return
}
}
// TODO: refer to dictionary-wide scoped variables.
e.EncodeUint(substituteError)
e.EncodeString(name)
e.setError(fmt.Errorf("catmsg: unknown var %q", name))
}
// A Decoder deserializes and evaluates messages that are encoded by an encoder.
type Decoder struct {
tag language.Tag
dst Renderer
macros Dictionary
err error
vars string
data string
macroArg int // TODO: allow more than one argument
}
// NewDecoder returns a new Decoder.
//
// Decoders are designed to be reused for multiple invocations of Execute.
// Only one goroutine may call Execute concurrently.
func NewDecoder(tag language.Tag, r Renderer, macros Dictionary) *Decoder {
return &Decoder{
tag: tag,
dst: r,
macros: macros,
}
}
func (d *Decoder) setError(err error) {
if d.err == nil {
d.err = err
}
}
// Language returns the language in which the message is being rendered.
//
// The destination language may be a child language of the language used for
// encoding. For instance, a decoding language of "pt-PT"" is consistent with an
// encoding language of "pt".
func (d *Decoder) Language() language.Tag { return d.tag }
// Done reports whether there are more bytes to process in this message.
func (d *Decoder) Done() bool { return len(d.data) == 0 }
// Render implements Renderer.
func (d *Decoder) Render(s string) { d.dst.Render(s) }
// Arg implements Renderer.
//
// During evaluation of macros, the argument positions may be mapped to
// arguments that differ from the original call.
func (d *Decoder) Arg(i int) interface{} {
if d.macroArg != 0 {
if i != 1 {
panic("catmsg: only macros with single argument supported")
}
i = d.macroArg
}
return d.dst.Arg(i)
}
// DecodeUint decodes a number that was encoded with EncodeUint and advances the
// position.
func (d *Decoder) DecodeUint() uint64 {
x, n, err := decodeUintString(d.data)
d.data = d.data[n:]
if err != nil {
d.setError(err)
}
return x
}
// DecodeString decodes a string that was encoded with EncodeString and advances
// the position.
func (d *Decoder) DecodeString() string {
size := d.DecodeUint()
s := d.data[:size]
d.data = d.data[size:]
return s
}
// SkipMessage skips the message at the current location and advances the
// position.
func (d *Decoder) SkipMessage() {
n := int(d.DecodeUint())
d.data = d.data[n:]
}
// Execute decodes and evaluates msg.
//
// Only one goroutine may call execute.
func (d *Decoder) Execute(msg string) error {
d.err = nil
if !d.execute(msg) {
return ErrNoMatch
}
return d.err
}
func (d *Decoder) execute(msg string) bool {
saved := d.data
d.data = msg
ok := d.executeMessage()
d.data = saved
return ok
}
// executeMessageFromData is like execute, but also decodes a leading message
// size and clips the given string accordingly.
//
// It reports the number of bytes consumed and whether a message was selected.
func (d *Decoder) executeMessageFromData(s string) (n int, ok bool) {
saved := d.data
d.data = s
size := int(d.DecodeUint())
n = len(s) - len(d.data)
// Sanitize the setting. This allows skipping a size argument for
// RawString and method Done.
d.data = d.data[:size]
ok = d.executeMessage()
n += size - len(d.data)
d.data = saved
return n, ok
}
var errUnknownHandler = errors.New("catmsg: string contains unsupported handler")
// executeMessage reads the handle id, initializes the decoder and executes the
// message. It is assumed that all of d.data[d.p:] is the single message.
func (d *Decoder) executeMessage() bool {
if d.Done() {
// We interpret no data as a valid empty message.
return true
}
handle := d.DecodeUint()
var fn Handler
mutex.Lock()
if int(handle) < len(handlers) {
fn = handlers[handle]
}
mutex.Unlock()
if fn == nil {
d.setError(errUnknownHandler)
d.execute(fmt.Sprintf("\x02$!(UNKNOWNMSGHANDLER=%#x)", handle))
return true
}
return fn(d)
}
// ExecuteMessage decodes and executes the message at the current position.
func (d *Decoder) ExecuteMessage() bool {
n, ok := d.executeMessageFromData(d.data)
d.data = d.data[n:]
return ok
}
// ExecuteSubstitution executes the message corresponding to the substitution
// as encoded by EncodeSubstitution.
func (d *Decoder) ExecuteSubstitution() {
switch x := d.DecodeUint(); x {
case substituteVar:
offset := d.DecodeUint()
d.executeMessageFromData(d.vars[offset:])
case substituteMacro:
name := d.DecodeString()
data, ok := d.macros.Lookup(name)
old := d.macroArg
// TODO: support macros of arity other than 1.
d.macroArg = int(d.DecodeUint())
switch {
case !ok:
// TODO: detect this at creation time.
d.setError(fmt.Errorf("catmsg: undefined macro %q", name))
fallthrough
case !d.execute(data):
d.dst.Render(name) // fall back to macro name.
}
d.macroArg = old
case substituteError:
d.dst.Render(d.DecodeString())
default:
panic("catmsg: unreachable")
}
}
@@ -0,0 +1,62 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package catmsg
// This file implements varint encoding analogous to the one in encoding/binary.
// We need a string version of this function, so we add that here and then add
// the rest for consistency.
import "errors"
var (
errIllegalVarint = errors.New("catmsg: illegal varint")
errVarintTooLarge = errors.New("catmsg: varint too large for uint64")
)
const maxVarintBytes = 10 // maximum length of a varint
// encodeUint encodes x as a variable-sized integer into buf and returns the
// number of bytes written. buf must be at least maxVarintBytes long
func encodeUint(buf []byte, x uint64) (n int) {
for ; x > 127; n++ {
buf[n] = 0x80 | uint8(x&0x7F)
x >>= 7
}
buf[n] = uint8(x)
n++
return n
}
func decodeUintString(s string) (x uint64, size int, err error) {
i := 0
for shift := uint(0); shift < 64; shift += 7 {
if i >= len(s) {
return 0, i, errIllegalVarint
}
b := uint64(s[i])
i++
x |= (b & 0x7F) << shift
if b&0x80 == 0 {
return x, i, nil
}
}
return 0, i, errVarintTooLarge
}
func decodeUint(b []byte) (x uint64, size int, err error) {
i := 0
for shift := uint(0); shift < 64; shift += 7 {
if i >= len(b) {
return 0, i, errIllegalVarint
}
c := uint64(b[i])
i++
x |= (c & 0x7F) << shift
if c&0x80 == 0 {
return x, i, nil
}
}
return 0, i, errVarintTooLarge
}
@@ -0,0 +1,123 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package catmsg
import (
"fmt"
"testing"
)
func TestEncodeUint(t *testing.T) {
testCases := []struct {
x uint64
enc string
}{
{0, "\x00"},
{1, "\x01"},
{2, "\x02"},
{0x7f, "\x7f"},
{0x80, "\x80\x01"},
{1 << 14, "\x80\x80\x01"},
{0xffffffff, "\xff\xff\xff\xff\x0f"},
{0xffffffffffffffff, "\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01"},
}
for _, tc := range testCases {
buf := [maxVarintBytes]byte{}
got := string(buf[:encodeUint(buf[:], tc.x)])
if got != tc.enc {
t.Errorf("EncodeUint(%#x) = %q; want %q", tc.x, got, tc.enc)
}
}
}
func TestDecodeUint(t *testing.T) {
testCases := []struct {
x uint64
size int
enc string
err error
}{{
x: 0,
size: 0,
enc: "",
err: errIllegalVarint,
}, {
x: 0,
size: 1,
enc: "\x80",
err: errIllegalVarint,
}, {
x: 0,
size: 3,
enc: "\x80\x80\x80",
err: errIllegalVarint,
}, {
x: 0,
size: 1,
enc: "\x00",
}, {
x: 1,
size: 1,
enc: "\x01",
}, {
x: 2,
size: 1,
enc: "\x02",
}, {
x: 0x7f,
size: 1,
enc: "\x7f",
}, {
x: 0x80,
size: 2,
enc: "\x80\x01",
}, {
x: 1 << 14,
size: 3,
enc: "\x80\x80\x01",
}, {
x: 0xffffffff,
size: 5,
enc: "\xff\xff\xff\xff\x0f",
}, {
x: 0xffffffffffffffff,
size: 10,
enc: "\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01",
}, {
x: 0xffffffffffffffff,
size: 10,
enc: "\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x00",
}, {
x: 0,
size: 10,
enc: "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01",
err: errVarintTooLarge,
}}
forms := []struct {
name string
decode func(s string) (x uint64, size int, err error)
}{
{"decode", func(s string) (x uint64, size int, err error) {
return decodeUint([]byte(s))
}},
{"decodeString", decodeUintString},
}
for _, f := range forms {
for _, tc := range testCases {
t.Run(fmt.Sprintf("%s:%q", f.name, tc.enc), func(t *testing.T) {
x, size, err := f.decode(tc.enc)
if err != tc.err {
t.Errorf("err = %q; want %q", err, tc.err)
}
if size != tc.size {
t.Errorf("size = %d; want %d", size, tc.size)
}
if x != tc.x {
t.Errorf("decode = %#x; want %#x", x, tc.x)
}
})
}
}
}
@@ -0,0 +1,352 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package cldrtree builds and generates a CLDR index file, including all
// inheritance.
package cldrtree
//go:generate go test -gen
// cldrtree stores CLDR data in a tree-like structure called Tree. In the CLDR
// data each branch in the tree is indicated by either an element name or an
// attribute value. A Tree does not distinguish between these two cases, but
// rather assumes that all branches can be accessed by an enum with a compact
// range of positive integer values starting from 0.
//
// Each Tree consists of three parts:
// - a slice mapping compact language identifiers to an offset into a set of
// indices,
// - a set of indices, stored as a large blob of uint16 values that encode
// the actual tree structure of data, and
// - a set of buckets that each holds a collection of strings.
// each of which is explained in more detail below.
//
//
// Tree lookup
// A tree lookup is done by providing a locale and a "path", which is a
// sequence of enum values. The search starts with getting the index for the
// given locale and then incrementally jumping into the index using the path
// values. If an element cannot be found in the index, the search starts anew
// for the locale's parent locale. The path may change during lookup by means
// of aliasing, described below.
//
// Buckets
// Buckets hold the actual string data of the leaf values of the CLDR tree.
// This data is stored in buckets, rather than one large string, for multiple
// reasons:
// - it allows representing leaf values more compactly, by storing all leaf
// values in a single bucket and then needing only needing a uint16 to index
// into this bucket for all leaf values,
// - (TBD) allow multiple trees to share subsets of buckets, mostly to allow
// linking in a smaller amount of data if only a subset of the buckets is
// needed,
// - to be nice to go fmt and the compiler.
//
// indices
// An index is a slice of uint16 for which the values are interpreted in one of
// two ways: as a node or a set of leaf values.
// A set of leaf values has the following form:
// <max_size>, <bucket>, <offset>...
// max_size indicates the maximum enum value for which an offset is defined.
// An offset value of 0xFFFF (missingValue) also indicates an undefined value.
// If defined offset indicates the offset within the given bucket of the string.
// A node value has the following form:
// <max_size>, <offset_or_alias>...
// max_size indicates the maximum value for which an offset is defined.
// A missing offset may also be indicated with 0. If the high bit (0x8000, or
// inheritMask) is not set, the offset points to the offset within the index
// for the current locale.
// An offset with high bit set is an alias. In this case the uint16 has the form
// bits:
// 15: 1
// 14-12: negative offset into path relative to current position
// 0-11: new enum value for path element.
// On encountering an alias, the path is modified accordingly and the lookup is
// restarted for the given locale.
import (
"fmt"
"reflect"
"regexp"
"strings"
"unicode/utf8"
"golang.org/x/text/internal/gen"
"golang.org/x/text/language"
"golang.org/x/text/unicode/cldr"
)
// TODO:
// - allow two Trees to share the same set of buckets.
// A Builder allows storing CLDR data in compact form.
type Builder struct {
table []string
rootMeta *metaData
locales []locale
strToBucket map[string]stringInfo
buckets [][]byte
enums []*enum
err error
// Stats
size int
sizeAll int
bucketWaste int
}
const (
maxBucketSize = 8 * 1024 // 8K
maxStrlen = 254 // allow 0xFF sentinel
)
func (b *Builder) setError(err error) {
if b.err == nil {
b.err = err
}
}
func (b *Builder) addString(data string) stringInfo {
data = b.makeString(data)
info, ok := b.strToBucket[data]
if !ok {
b.size += len(data)
x := len(b.buckets) - 1
bucket := b.buckets[x]
if len(bucket)+len(data) < maxBucketSize {
info.bucket = uint16(x)
info.bucketPos = uint16(len(bucket))
b.buckets[x] = append(bucket, data...)
} else {
info.bucket = uint16(len(b.buckets))
info.bucketPos = 0
b.buckets = append(b.buckets, []byte(data))
}
b.strToBucket[data] = info
}
return info
}
func (b *Builder) addStringToBucket(data string, bucket uint16) stringInfo {
data = b.makeString(data)
info, ok := b.strToBucket[data]
if !ok || info.bucket != bucket {
if ok {
b.bucketWaste += len(data)
}
b.size += len(data)
bk := b.buckets[bucket]
info.bucket = bucket
info.bucketPos = uint16(len(bk))
b.buckets[bucket] = append(bk, data...)
b.strToBucket[data] = info
}
return info
}
func (b *Builder) makeString(data string) string {
if len(data) > maxStrlen {
b.setError(fmt.Errorf("string %q exceeds maximum length of %d", data, maxStrlen))
data = data[:maxStrlen]
for i := len(data) - 1; i > len(data)-4; i-- {
if utf8.RuneStart(data[i]) {
data = data[:i]
break
}
}
}
data = string([]byte{byte(len(data))}) + data
b.sizeAll += len(data)
return data
}
type stringInfo struct {
bufferPos uint32
bucket uint16
bucketPos uint16
}
// New creates a new Builder.
func New(tableName string) *Builder {
b := &Builder{
strToBucket: map[string]stringInfo{},
buckets: [][]byte{nil}, // initialize with first bucket.
}
b.rootMeta = &metaData{
b: b,
typeInfo: &typeInfo{},
}
return b
}
// Gen writes all the tables and types for the collected data.
func (b *Builder) Gen(w *gen.CodeWriter) error {
t, err := build(b)
if err != nil {
return err
}
return generate(b, t, w)
}
// GenTestData generates tables useful for testing data generated with Gen.
func (b *Builder) GenTestData(w *gen.CodeWriter) error {
return generateTestData(b, w)
}
type locale struct {
tag language.Tag
root *Index
}
// Locale creates an index for the given locale.
func (b *Builder) Locale(t language.Tag) *Index {
index := &Index{
meta: b.rootMeta,
}
b.locales = append(b.locales, locale{tag: t, root: index})
return index
}
// An Index holds a map of either leaf values or other indices.
type Index struct {
meta *metaData
subIndex []*Index
values []keyValue
}
func (i *Index) setError(err error) { i.meta.b.setError(err) }
type keyValue struct {
key enumIndex
value stringInfo
}
// Element is a CLDR XML element.
type Element interface {
GetCommon() *cldr.Common
}
// Index creates a subindex where the type and enum values are not shared
// with siblings by default. The name is derived from the elem. If elem is
// an alias reference, the alias will be resolved and linked. If elem is nil
// Index returns nil.
func (i *Index) Index(elem Element, opt ...Option) *Index {
if elem == nil || reflect.ValueOf(elem).IsNil() {
return nil
}
c := elem.GetCommon()
o := &options{
parent: i,
name: c.GetCommon().Element(),
}
o.fill(opt)
o.setAlias(elem)
return i.subIndexForKey(o)
}
// IndexWithName is like Section but derives the name from the given name.
func (i *Index) IndexWithName(name string, opt ...Option) *Index {
o := &options{parent: i, name: name}
o.fill(opt)
return i.subIndexForKey(o)
}
// IndexFromType creates a subindex the value of tye type attribute as key. It
// will also configure the Index to share the enumeration values with all
// sibling values. If elem is an alias, it will be resolved and linked.
func (i *Index) IndexFromType(elem Element, opts ...Option) *Index {
o := &options{
parent: i,
name: elem.GetCommon().Type,
}
o.fill(opts)
o.setAlias(elem)
useSharedType()(o)
return i.subIndexForKey(o)
}
// IndexFromAlt creates a subindex the value of tye alt attribute as key. It
// will also configure the Index to share the enumeration values with all
// sibling values. If elem is an alias, it will be resolved and linked.
func (i *Index) IndexFromAlt(elem Element, opts ...Option) *Index {
o := &options{
parent: i,
name: elem.GetCommon().Alt,
}
o.fill(opts)
o.setAlias(elem)
useSharedType()(o)
return i.subIndexForKey(o)
}
func (i *Index) subIndexForKey(opts *options) *Index {
key := opts.name
if len(i.values) > 0 {
panic(fmt.Errorf("cldrtree: adding Index for %q when value already exists", key))
}
meta := i.meta.sub(key, opts)
for _, x := range i.subIndex {
if x.meta == meta {
return x
}
}
if alias := opts.alias; alias != nil {
if a := alias.GetCommon().Alias; a != nil {
if a.Source != "locale" {
i.setError(fmt.Errorf("cldrtree: non-locale alias not supported %v", a.Path))
}
if meta.inheritOffset < 0 {
i.setError(fmt.Errorf("cldrtree: alias was already set %v", a.Path))
}
path := a.Path
for ; strings.HasPrefix(path, "../"); path = path[len("../"):] {
meta.inheritOffset--
}
m := aliasRe.FindStringSubmatch(path)
if m == nil {
i.setError(fmt.Errorf("cldrtree: could not parse alias %q", a.Path))
} else {
key := m[4]
if key == "" {
key = m[1]
}
meta.inheritIndex = key
}
}
}
x := &Index{meta: meta}
i.subIndex = append(i.subIndex, x)
return x
}
var aliasRe = regexp.MustCompile(`^([a-zA-Z]+)(\[@([a-zA-Z-]+)='([a-zA-Z-]+)'\])?`)
// SetValue sets the value, the data from a CLDR XML element, for the given key.
func (i *Index) SetValue(key string, value Element, opt ...Option) {
if len(i.subIndex) > 0 {
panic(fmt.Errorf("adding value for key %q when index already exists", key))
}
o := &options{parent: i}
o.fill(opt)
c := value.GetCommon()
if c.Alias != nil {
i.setError(fmt.Errorf("cldrtree: alias not supported for SetValue %v", c.Alias.Path))
}
i.setValue(key, c.Data(), o)
}
func (i *Index) setValue(key, data string, o *options) {
index, _ := i.meta.typeInfo.lookupSubtype(key, o)
kv := keyValue{key: index}
if len(i.values) > 0 {
// Add string to the same bucket as the other values.
bucket := i.values[0].value.bucket
kv.value = i.meta.b.addStringToBucket(data, bucket)
} else {
kv.value = i.meta.b.addString(data)
}
i.values = append(i.values, kv)
}
@@ -0,0 +1,457 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cldrtree
import (
"bytes"
"flag"
"log"
"math/rand"
"os"
"path/filepath"
"reflect"
"regexp"
"strconv"
"strings"
"testing"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/language/compact"
"golang.org/x/text/language"
"golang.org/x/text/unicode/cldr"
)
var genOutput = flag.Bool("gen", false, "generate output files")
func TestAliasRegexp(t *testing.T) {
testCases := []struct {
alias string
want []string
}{{
alias: "miscPatterns[@numberSystem='latn']",
want: []string{
"miscPatterns[@numberSystem='latn']",
"miscPatterns",
"[@numberSystem='latn']",
"numberSystem",
"latn",
},
}, {
alias: `calendar[@type='greg-foo']/days/`,
want: []string{
"calendar[@type='greg-foo']",
"calendar",
"[@type='greg-foo']",
"type",
"greg-foo",
},
}, {
alias: "eraAbbr",
want: []string{
"eraAbbr",
"eraAbbr",
"",
"",
"",
},
}, {
// match must be anchored at beginning.
alias: `../calendar[@type='gregorian']/days/`,
}}
for _, tc := range testCases {
t.Run(tc.alias, func(t *testing.T) {
got := aliasRe.FindStringSubmatch(tc.alias)
if !reflect.DeepEqual(got, tc.want) {
t.Errorf("got %v; want %v", got, tc.want)
}
})
}
}
func TestBuild(t *testing.T) {
tree1, _ := loadTestdata(t, "test1")
tree2, _ := loadTestdata(t, "test2")
// Constants for second test
const (
calendar = iota
field
)
const (
month = iota
era
filler
cyclicNameSet
)
const (
abbreviated = iota
narrow
wide
)
testCases := []struct {
desc string
tree *Tree
locale string
path []uint16
isFeature bool
result string
}{{
desc: "und/chinese month format wide m1",
tree: tree1,
locale: "und",
path: path(calendar, 0, month, 0, wide, 1),
result: "cM01",
}, {
desc: "und/chinese month format wide m12",
tree: tree1,
locale: "und",
path: path(calendar, 0, month, 0, wide, 12),
result: "cM12",
}, {
desc: "und/non-existing value",
tree: tree1,
locale: "und",
path: path(calendar, 0, month, 0, wide, 13),
result: "",
}, {
desc: "und/dangi:chinese month format wide",
tree: tree1,
locale: "und",
path: path(calendar, 1, month, 0, wide, 1),
result: "cM01",
}, {
desc: "und/chinese month format abbreviated:wide",
tree: tree1,
locale: "und",
path: path(calendar, 0, month, 0, abbreviated, 1),
result: "cM01",
}, {
desc: "und/chinese month format narrow:wide",
tree: tree1,
locale: "und",
path: path(calendar, 0, month, 0, narrow, 1),
result: "cM01",
}, {
desc: "und/gregorian month format wide",
tree: tree1,
locale: "und",
path: path(calendar, 2, month, 0, wide, 2),
result: "gM02",
}, {
desc: "und/gregorian month format:stand-alone narrow",
tree: tree1,
locale: "und",
path: path(calendar, 2, month, 0, narrow, 1),
result: "1",
}, {
desc: "und/gregorian month stand-alone:format abbreviated",
tree: tree1,
locale: "und",
path: path(calendar, 2, month, 1, abbreviated, 1),
result: "gM01",
}, {
desc: "und/gregorian month stand-alone:format wide ",
tree: tree1,
locale: "und",
path: path(calendar, 2, month, 1, abbreviated, 1),
result: "gM01",
}, {
desc: "und/dangi:chinese month format narrow:wide ",
tree: tree1,
locale: "und",
path: path(calendar, 1, month, 0, narrow, 4),
result: "cM04",
}, {
desc: "und/field era displayname 0",
tree: tree2,
locale: "und",
path: path(field, 0, 0, 0),
result: "Era",
}, {
desc: "en/field era displayname 0",
tree: tree2,
locale: "en",
path: path(field, 0, 0, 0),
result: "era",
}, {
desc: "und/calendar hebrew format wide 7-leap",
tree: tree2,
locale: "und",
path: path(calendar, 7, month, 0, wide, 0),
result: "Adar II",
}, {
desc: "en-GB:en-001:en:und/calendar hebrew format wide 7-leap",
tree: tree2,
locale: "en-GB",
path: path(calendar, 7, month, 0, wide, 0),
result: "Adar II",
}, {
desc: "und/buddhist month format wide 11",
tree: tree2,
locale: "und",
path: path(calendar, 0, month, 0, wide, 12),
result: "genWideM12",
}, {
desc: "en-GB/gregorian month stand-alone narrow 2",
tree: tree2,
locale: "en-GB",
path: path(calendar, 6, month, 1, narrow, 3),
result: "gbNarrowM3",
}, {
desc: "en-GB/gregorian month format narrow 3/missing in en-GB",
tree: tree2,
locale: "en-GB",
path: path(calendar, 6, month, 0, narrow, 4),
result: "enNarrowM4",
}, {
desc: "en-GB/gregorian month format narrow 3/missing in en and en-GB",
tree: tree2,
locale: "en-GB",
path: path(calendar, 6, month, 0, narrow, 7),
result: "gregNarrowM7",
}, {
desc: "en-GB/gregorian month format narrow 3/missing in en and en-GB",
tree: tree2,
locale: "en-GB",
path: path(calendar, 6, month, 0, narrow, 7),
result: "gregNarrowM7",
}, {
desc: "en-GB/gregorian era narrow",
tree: tree2,
locale: "en-GB",
path: path(calendar, 6, era, abbreviated, 0, 1),
isFeature: true,
result: "AD",
}, {
desc: "en-GB/gregorian era narrow",
tree: tree2,
locale: "en-GB",
path: path(calendar, 6, era, narrow, 0, 0),
isFeature: true,
result: "BC",
}, {
desc: "en-GB/gregorian era narrow",
tree: tree2,
locale: "en-GB",
path: path(calendar, 6, era, wide, 1, 0),
isFeature: true,
result: "Before Common Era",
}, {
desc: "en-GB/dangi:chinese cyclicName, months, format, narrow:abbreviated 2",
tree: tree2,
locale: "en-GB",
path: path(calendar, 1, cyclicNameSet, 3, 0, 1, 2),
isFeature: true,
result: "year2",
}, {
desc: "en-GB/field era-narrow ",
tree: tree2,
locale: "en-GB",
path: path(field, 2, 0, 0),
result: "era",
}, {
desc: "en-GB/field month-narrow relativeTime future one",
tree: tree2,
locale: "en-GB",
path: path(field, 5, 2, 0, 1),
isFeature: true,
result: "001NarrowFutMOne",
}, {
// Don't fall back to the one of "en".
desc: "en-GB/field month-short relativeTime past one:other",
tree: tree2,
locale: "en-GB",
path: path(field, 4, 2, 1, 1),
isFeature: true,
result: "001ShortPastMOther",
}, {
desc: "en-GB/field month relativeTime future two:other",
tree: tree2,
locale: "en-GB",
path: path(field, 3, 2, 0, 2),
isFeature: true,
result: "enFutMOther",
}}
for _, tc := range testCases {
t.Run(tc.desc, func(t *testing.T) {
tag, _ := compact.RegionalID(compact.Tag(language.MustParse(tc.locale)))
s := tc.tree.lookup(tag, tc.isFeature, tc.path...)
if s != tc.result {
t.Errorf("got %q; want %q", s, tc.result)
}
})
}
}
func path(e ...uint16) []uint16 { return e }
func TestGen(t *testing.T) {
testCases := []string{"test1", "test2"}
for _, tc := range testCases {
t.Run(tc, func(t *testing.T) {
_, got := loadTestdata(t, tc)
// Remove sizes that may vary per architecture.
re := regexp.MustCompile("// Size: [0-9]*")
got = re.ReplaceAllLiteral(got, []byte("// Size: xxxx"))
re = regexp.MustCompile("// Total table size [0-9]*")
got = re.ReplaceAllLiteral(got, []byte("// Total table size: xxxx"))
file := filepath.Join("testdata", tc, "output.go")
if *genOutput {
os.WriteFile(file, got, 0700)
t.SkipNow()
}
b, err := os.ReadFile(file)
if err != nil {
t.Fatalf("failed to open file: %v", err)
}
if want := string(b); string(got) != want {
t.Log(string(got))
t.Errorf("files differ")
}
})
}
}
func loadTestdata(t *testing.T, test string) (tree *Tree, file []byte) {
b := New("test")
var d cldr.Decoder
data, err := d.DecodePath(filepath.Join("testdata", test))
if err != nil {
t.Fatalf("error decoding testdata: %v", err)
}
context := Enum("context")
widthMap := func(s string) string {
// Align era with width values.
if r, ok := map[string]string{
"eraAbbr": "abbreviated",
"eraNarrow": "narrow",
"eraNames": "wide",
}[s]; ok {
s = r
}
return "w" + strings.Title(s)
}
width := EnumFunc("width", widthMap, "abbreviated", "narrow", "wide")
month := Enum("month", "leap7")
relative := EnumFunc("relative", func(s string) string {
x, err := strconv.ParseInt(s, 10, 8)
if err != nil {
log.Fatal("Invalid number:", err)
}
return []string{
"before1",
"current",
"after1",
}[x+1]
})
cycleType := EnumFunc("cycleType", func(s string) string {
return "cyc" + strings.Title(s)
})
r := rand.New(rand.NewSource(0))
for _, loc := range data.Locales() {
ldml := data.RawLDML(loc)
x := b.Locale(language.Make(loc))
if x := x.Index(ldml.Dates.Calendars); x != nil {
for _, cal := range ldml.Dates.Calendars.Calendar {
x := x.IndexFromType(cal)
if x := x.Index(cal.Months); x != nil {
for _, mc := range cal.Months.MonthContext {
x := x.IndexFromType(mc, context)
for _, mw := range mc.MonthWidth {
x := x.IndexFromType(mw, width)
for _, m := range mw.Month {
x.SetValue(m.Yeartype+m.Type, m, month)
}
}
}
}
if x := x.Index(cal.CyclicNameSets); x != nil {
for _, cns := range cal.CyclicNameSets.CyclicNameSet {
x := x.IndexFromType(cns, cycleType)
for _, cc := range cns.CyclicNameContext {
x := x.IndexFromType(cc, context)
for _, cw := range cc.CyclicNameWidth {
x := x.IndexFromType(cw, width)
for _, c := range cw.CyclicName {
x.SetValue(c.Type, c)
}
}
}
}
}
if x := x.Index(cal.Eras); x != nil {
opts := []Option{width, SharedType()}
if x := x.Index(cal.Eras.EraNames, opts...); x != nil {
for _, e := range cal.Eras.EraNames.Era {
x.IndexFromAlt(e).SetValue(e.Type, e)
}
}
if x := x.Index(cal.Eras.EraAbbr, opts...); x != nil {
for _, e := range cal.Eras.EraAbbr.Era {
x.IndexFromAlt(e).SetValue(e.Type, e)
}
}
if x := x.Index(cal.Eras.EraNarrow, opts...); x != nil {
for _, e := range cal.Eras.EraNarrow.Era {
x.IndexFromAlt(e).SetValue(e.Type, e)
}
}
}
{
// Ensure having more than 2 buckets.
f := x.IndexWithName("filler")
b := make([]byte, maxStrlen)
opt := &options{parent: x}
r.Read(b)
f.setValue("0", string(b), opt)
}
}
}
if x := x.Index(ldml.Dates.Fields); x != nil {
for _, f := range ldml.Dates.Fields.Field {
x := x.IndexFromType(f)
for _, d := range f.DisplayName {
x.Index(d).SetValue("", d)
}
for _, r := range f.Relative {
x.Index(r).SetValue(r.Type, r, relative)
}
for _, rt := range f.RelativeTime {
x := x.Index(rt).IndexFromType(rt)
for _, p := range rt.RelativeTimePattern {
x.SetValue(p.Count, p)
}
}
for _, rp := range f.RelativePeriod {
x.Index(rp).SetValue("", rp)
}
}
}
}
tree, err = build(b)
if err != nil {
t.Fatal("error building tree:", err)
}
w := gen.NewCodeWriter()
generate(b, tree, w)
generateTestData(b, w)
buf := &bytes.Buffer{}
if _, err = w.WriteGo(buf, "test", ""); err != nil {
t.Log(buf.String())
t.Fatal("error generating code:", err)
}
return tree, buf.Bytes()
}
@@ -0,0 +1,208 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cldrtree
import (
"bytes"
"fmt"
"io"
"reflect"
"strconv"
"strings"
"golang.org/x/text/internal/gen"
)
func generate(b *Builder, t *Tree, w *gen.CodeWriter) error {
fmt.Fprintln(w, `import "golang.org/x/text/internal/cldrtree"`)
fmt.Fprintln(w)
fmt.Fprintf(w, "var tree = &cldrtree.Tree{locales, indices, buckets}\n\n")
w.WriteComment("Path values:\n" + b.stats())
fmt.Fprintln(w)
// Generate enum types.
for _, e := range b.enums {
// Build enum types.
w.WriteComment("%s specifies a property of a CLDR field.", e.name)
fmt.Fprintf(w, "type %s uint16\n", e.name)
}
d, err := getEnumData(b)
if err != nil {
return err
}
fmt.Fprintln(w, "const (")
for i, k := range d.keys {
fmt.Fprintf(w, "%s %s = %d // %s\n", toCamel(k), d.enums[i], d.m[k], k)
}
fmt.Fprintln(w, ")")
w.WriteVar("locales", t.Locales)
w.WriteVar("indices", t.Indices)
// Generate string buckets.
fmt.Fprintln(w, "var buckets = []string{")
for i := range t.Buckets {
fmt.Fprintf(w, "bucket%d,\n", i)
}
fmt.Fprint(w, "}\n\n")
w.Size += int(reflect.TypeOf("").Size()) * len(t.Buckets)
// Generate string buckets.
for i, bucket := range t.Buckets {
w.WriteVar(fmt.Sprint("bucket", i), bucket)
}
return nil
}
func generateTestData(b *Builder, w *gen.CodeWriter) error {
d, err := getEnumData(b)
if err != nil {
return err
}
fmt.Fprintln(w)
fmt.Fprintln(w, "var enumMap = map[string]uint16{")
fmt.Fprintln(w, `"": 0,`)
for _, k := range d.keys {
fmt.Fprintf(w, "%q: %d,\n", k, d.m[k])
}
fmt.Fprintln(w, "}")
return nil
}
func toCamel(s string) string {
p := strings.Split(s, "-")
for i, s := range p[1:] {
p[i+1] = strings.Title(s)
}
return strings.Replace(strings.Join(p, ""), "/", "", -1)
}
func (b *Builder) stats() string {
w := &bytes.Buffer{}
b.rootMeta.validate()
for _, es := range b.enums {
fmt.Fprintf(w, "<%s>\n", es.name)
printEnumValues(w, es, 1, nil)
}
fmt.Fprintln(w)
printEnums(w, b.rootMeta.typeInfo, 0)
fmt.Fprintln(w)
fmt.Fprintln(w, "Nr elem: ", len(b.strToBucket))
fmt.Fprintln(w, "uniqued size: ", b.size)
fmt.Fprintln(w, "total string size: ", b.sizeAll)
fmt.Fprintln(w, "bucket waste: ", b.bucketWaste)
return w.String()
}
func printEnums(w io.Writer, s *typeInfo, indent int) {
idStr := strings.Repeat(" ", indent) + "- "
e := s.enum
if e == nil {
if len(s.entries) > 0 {
panic(fmt.Errorf("has entries but no enum values: %#v", s.entries))
}
return
}
if e.name != "" {
fmt.Fprintf(w, "%s<%s>\n", idStr, e.name)
} else {
printEnumValues(w, e, indent, s)
}
if s.sharedKeys() {
for _, v := range s.entries {
printEnums(w, v, indent+1)
break
}
}
}
func printEnumValues(w io.Writer, e *enum, indent int, info *typeInfo) {
idStr := strings.Repeat(" ", indent) + "- "
for i := 0; i < len(e.keys); i++ {
fmt.Fprint(w, idStr)
k := e.keys[i]
if u, err := strconv.ParseUint(k, 10, 16); err == nil {
fmt.Fprintf(w, "%s", k)
// Skip contiguous integers
var v, last uint64
for i++; i < len(e.keys); i++ {
k = e.keys[i]
if v, err = strconv.ParseUint(k, 10, 16); err != nil {
break
}
last = v
}
if u < last {
fmt.Fprintf(w, `..%d`, last)
}
fmt.Fprintln(w)
if err != nil {
fmt.Fprintf(w, "%s%s\n", idStr, k)
}
} else if k == "" {
fmt.Fprintln(w, `""`)
} else {
fmt.Fprintf(w, "%s\n", k)
}
if info != nil && !info.sharedKeys() {
if e := info.entries[enumIndex(i)]; e != nil {
printEnums(w, e, indent+1)
}
}
}
}
func getEnumData(b *Builder) (*enumData, error) {
d := &enumData{m: map[string]int{}}
if errStr := d.insert(b.rootMeta.typeInfo); errStr != "" {
// TODO: consider returning the error.
return nil, fmt.Errorf("cldrtree: %s", errStr)
}
return d, nil
}
type enumData struct {
m map[string]int
keys []string
enums []string
}
func (d *enumData) insert(t *typeInfo) (errStr string) {
e := t.enum
if e == nil {
return ""
}
for i, k := range e.keys {
if _, err := strconv.ParseUint(k, 10, 16); err == nil {
// We don't include any enum that has integer values.
break
}
if v, ok := d.m[k]; ok {
if v != i {
return fmt.Sprintf("%q has value %d and %d", k, i, v)
}
} else {
d.m[k] = i
if k != "" {
d.keys = append(d.keys, k)
d.enums = append(d.enums, e.name)
}
}
}
for i := range t.enum.keys {
if e := t.entries[enumIndex(i)]; e != nil {
if errStr := d.insert(e); errStr != "" {
return fmt.Sprintf("%q>%v", t.enum.keys[i], errStr)
}
}
}
return ""
}
@@ -0,0 +1,86 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cldrtree
import (
"reflect"
"golang.org/x/text/unicode/cldr"
)
// An Option configures an Index.
type Option func(*options)
type options struct {
parent *Index
name string
alias *cldr.Common
sharedType *typeInfo
sharedEnums *enum
}
func (o *options) fill(opt []Option) {
for _, f := range opt {
f(o)
}
}
// setAlias sets an alias from the given node, if the node defines one.
func (o *options) setAlias(n Element) {
if n != nil && !reflect.ValueOf(n).IsNil() {
o.alias = n.GetCommon()
}
}
// Enum defines an enumeration type. The resulting option may be passed for the
// construction of multiple Indexes, which they will share the same enum values.
// Calling Gen on a Builder will generate the Enum for the given name. The
// optional values fix the values for the given identifier to the argument
// position (starting at 0). Other values may still be added and will be
// assigned to subsequent values.
func Enum(name string, value ...string) Option {
return EnumFunc(name, nil, value...)
}
// EnumFunc is like Enum but also takes a function that allows rewriting keys.
func EnumFunc(name string, rename func(string) string, value ...string) Option {
enum := &enum{name: name, rename: rename, keyMap: map[string]enumIndex{}}
for _, e := range value {
enum.lookup(e)
}
return func(o *options) {
found := false
for _, e := range o.parent.meta.b.enums {
if e.name == enum.name {
found = true
break
}
}
if !found {
o.parent.meta.b.enums = append(o.parent.meta.b.enums, enum)
}
o.sharedEnums = enum
}
}
// SharedType returns an option which causes all Indexes to which this option is
// passed to have the same type.
func SharedType() Option {
info := &typeInfo{}
return func(o *options) { o.sharedType = info }
}
func useSharedType() Option {
return func(o *options) {
sub := o.parent.meta.typeInfo.keyTypeInfo
if sub == nil {
sub = &typeInfo{}
o.parent.meta.typeInfo.keyTypeInfo = sub
}
o.sharedType = sub
}
}
@@ -0,0 +1,89 @@
<?xml version="1.0" encoding="UTF-8" ?>
<ldml>
<identity>
<language type="root"/>
</identity>
<dates>
<calendars>
<calendar type="chinese">
<months>
<monthContext type="format">
<monthWidth type="abbreviated">
<alias source="locale" path="../monthWidth[@type='wide']"/>
</monthWidth>
<monthWidth type="narrow">
<alias source="locale" path="../monthWidth[@type='wide']"/>
</monthWidth>
<monthWidth type="wide">
<month type="1">cM01</month>
<month type="2">cM02</month>
<month type="3">cM03</month>
<month type="4">cM04</month>
<month type="5">cM05</month>
<month type="6">cM06</month>
<month type="7">cM07</month>
<month type="8">cM08</month>
<month type="9">cM09</month>
<month type="10">cM10</month>
<month type="11">cM11</month>
<month type="12">cM12</month>
</monthWidth>
</monthContext>
</months>
</calendar>
<calendar type="dangi">
<months>
<alias source="locale" path="../../calendar[@type='chinese']/months"/>
</months>
</calendar>
<calendar type="gregorian">
<months>
<monthContext type="format">
<monthWidth type="abbreviated">
<alias source="locale" path="../monthWidth[@type='wide']"/>
</monthWidth>
<monthWidth type="narrow">
<alias source="locale" path="../../monthContext[@type='stand-alone']/monthWidth[@type='narrow']"/>
</monthWidth>
<monthWidth type="wide">
<month type="1">gM01</month>
<month type="2">gM02</month>
<month type="3">gM03</month>
<month type="4">gM04</month>
<month type="5">gM05</month>
<month type="6">gM06</month>
<month type="7">gM07</month>
<month type="8">gM08</month>
<month type="9">gM09</month>
<month type="10">gM10</month>
<month type="11">gM11</month>
<month type="12">gM12</month>
</monthWidth>
</monthContext>
<monthContext type="stand-alone">
<monthWidth type="abbreviated">
<alias source="locale" path="../../monthContext[@type='format']/monthWidth[@type='abbreviated']"/>
</monthWidth>
<monthWidth type="narrow">
<month type="1">1</month>
<month type="2">2</month>
<month type="3">3</month>
<month type="4">4</month>
<month type="5">5</month>
<month type="6">6</month>
<month type="7">7</month>
<month type="8">8</month>
<month type="9">9</month>
<month type="10">10</month>
<month type="11">11</month>
<month type="12">12</month>
</monthWidth>
<monthWidth type="wide">
<alias source="locale" path="../../monthContext[@type='format']/monthWidth[@type='wide']"/>
</monthWidth>
</monthContext>
</months>
</calendar>
</calendars>
</dates>
</ldml>
@@ -0,0 +1,353 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package test
import "golang.org/x/text/internal/cldrtree"
var tree = &cldrtree.Tree{locales, indices, buckets}
// Path values:
// <context>
// - format
// - stand-alone
// <width>
// - wAbbreviated
// - wNarrow
// - wWide
// <month>
// - leap7
// - 1..12
//
// - calendars
// - chinese
// - dangi
// - gregorian
// - months
// - <context>
// - <width>
// - <month>
// - filler
// - 0
//
// Nr elem: 39
// uniqued size: 912
// total string size: 912
// bucket waste: 0
// context specifies a property of a CLDR field.
type context uint16
// width specifies a property of a CLDR field.
type width uint16
// month specifies a property of a CLDR field.
type month uint16
const (
calendars = 0 // calendars
chinese = 0 // chinese
dangi = 1 // dangi
gregorian = 2 // gregorian
months = 0 // months
filler = 1 // filler
format context = 0 // format
standAlone context = 1 // stand-alone
wAbbreviated width = 0 // wAbbreviated
wNarrow width = 1 // wNarrow
wWide width = 2 // wWide
leap7 month = 0 // leap7
)
var locales = []uint32{ // 775 elements
// Entry 0 - 1F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 20 - 3F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 40 - 5F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 60 - 7F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 80 - 9F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry A0 - BF
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry C0 - DF
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry E0 - FF
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 100 - 11F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 120 - 13F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 140 - 15F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 160 - 17F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 180 - 19F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 1A0 - 1BF
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 1C0 - 1DF
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 1E0 - 1FF
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 200 - 21F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 220 - 23F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 240 - 25F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 260 - 27F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 280 - 29F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 2A0 - 2BF
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 2C0 - 2DF
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 2E0 - 2FF
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 300 - 31F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000,
} // Size: xxxx bytes
var indices = []uint16{ // 86 elements
// Entry 0 - 3F
0x0001, 0x0002, 0x0003, 0x0006, 0x0021, 0x0027, 0x0002, 0x0009,
0x001e, 0x0001, 0x000b, 0x0003, 0x8002, 0x8002, 0x000f, 0x000d,
0x0000, 0xffff, 0x0000, 0x0005, 0x000a, 0x000f, 0x0014, 0x0019,
0x001e, 0x0023, 0x0028, 0x002d, 0x0032, 0x0037, 0x0001, 0x0000,
0x003c, 0x0002, 0x9000, 0x0024, 0x0001, 0x0000, 0x013b, 0x0002,
0x002a, 0x0053, 0x0002, 0x002d, 0x0040, 0x0003, 0x8002, 0x9001,
0x0031, 0x000d, 0x0000, 0xffff, 0x023a, 0x023f, 0x0244, 0x0249,
0x024e, 0x0253, 0x0258, 0x025d, 0x0262, 0x0267, 0x026c, 0x0271,
// Entry 40 - 7F
0x0003, 0x9000, 0x0044, 0x9000, 0x000d, 0x0000, 0xffff, 0x0276,
0x0278, 0x027a, 0x027c, 0x027e, 0x0280, 0x0282, 0x0284, 0x0286,
0x0288, 0x028b, 0x028e, 0x0001, 0x0000, 0x0291,
} // Size: xxxx bytes
var buckets = []string{
bucket0,
}
var bucket0 string = "" + // Size: xxxx bytes
"\x04cM01\x04cM02\x04cM03\x04cM04\x04cM05\x04cM06\x04cM07\x04cM08\x04cM09" +
"\x04cM10\x04cM11\x04cM12\xfe\x01\x94\xfd\xc2\xfa/\xfc\xc0A\xd3\xff\x12" +
"\x04[s\xc8nO\xf9_\xf6b\xa5\xee\xe8*\xbd\xf4J-\x0bu\xfb\x18\x0d\xafH\xa7" +
"\x9e\xe0\xb1\x0d9FQ\x85\x0fԡx\x89.\xe2\x85\xec\xe1Q\x14Ux\x08u\xd6N\xe2" +
"\xd3\xd0\xd0\xdek\xf8\xf9\xb4L\xe8_\xf0DƱ\xf8;\x8e\x88;\xbf\x85z\xab\x99" +
"ŲR\xc7B\x9c2\U000e8bb7\x9e\xf8V\xf6Y\xc1\x8f\x0d\xce\xccw\xc7^z\x81\xbf" +
"\xde'_g\xcf\xe2B\xcf<\xc3T\xf3\xed\xe2־\xccN\xa3\xae^\x88Rj\x9fJW\x8b˞" +
"\xf2ԦS\x14v\x8dm)\x97a\xea\x9eOZ\xa6\xae\xc3\xfcxƪ\xe0\x81\xac\x81 \xc7 " +
"\xef\xcdlꄶ\x92^`{\xe0cqo\x96\xdd\xcd\xd0\x1du\x04\\?\x00\x0f\x8ayk\xcelQ" +
",8\x01\xaa\xca\xee߭[Pfd\xe8\xc0\xe4\xa7q\xecื\xc1\x96]\x91\x81%\x1b|\x9c" +
"\x9c\xa5 Z\xfc\x16\xa26\xa2\xef\xcd\xd2\xd1-*y\xd0\xfet\xa8(\x0a\xe9C" +
"\x9e\xb0֮\xca\x08#\xae\x02\xd6}\x86j\xc2\xc4\xfeJrPS\xda\x11\x9b\x9dOQQ@" +
"\xa2\xd7#\x9c@\xb4ZÕ\x0d\x94\x1f\xc4\xfe\x1c\x0c\xb9j\xd3\x22\xd6\x22" +
"\x82)_\xbf\xe1\x1e&\xa43\x07m\xb5\xc1DL:4\xd3*\\J\x7f\xfb\xe8с\xf7\xed;" +
"\x8c\xfe\x90O\x93\xf8\xf0m)\xbc\xd9\xed\x84{\x18.\x04d\x10\xf4Kİ\xf3\xf0" +
":\x0d\x06\x82\x0a0\xf2W\xf8\x11A0g\x8a\xc0E\x86\xc1\xe3\xc94,\x8b\x80U" +
"\xc4f؆D\x1d%\x99\x06͉֚K\x96\x8a\xe9\xf0띖\\\xe6\xa4i<N\xbe\x88\x15\x01" +
"\xb7لkf\xeb\x02\xb5~\\\xda{l\xbah\x91\xd6\x16\xbdhl7\xb84a:Ⱥ\xa2,\x00" +
"\x8f\xfeh\x83RsJ\xe4\xe3\xf1!z\xcd_\x83'\x08\x140\x18g\xb5\xd0g\x11\xb28" +
"\x00\x1cyW\xb2w\x19\xce?1\x88\xdf\xe5}\xee\xbfo\x82YZ\x10\xf7\xbbV,\xa0M" +
"\\='\x04gM01\x04gM02\x04gM03\x04gM04\x04gM05\x04gM06\x04gM07\x04gM08\x04" +
"gM09\x04gM10\x04gM11\x04gM12\x011\x012\x013\x014\x015\x016\x017\x018\x01" +
"9\x0210\x0211\x0212\xfe\x94)X\xc6\xdb2bg\x06I\xf3\xbc\x97٢1g5\xed悥\xdf" +
"\xe6\xf1\xa0\x11\xfbɊ\xd0\xfb\xe7\x90\x00<\x01\xe8\xe9\x96w\x03\xaff^" +
"\x9fr@\x7fK\x03\xd4\xfd\xb4t\xaa\xfe\x8a\x0d>\x05\x15\xddFP\xcfQ\x17+" +
"\x81$\x8b\xcb\x7f\x96\x9e@\x0bl[\x12wh\xb1\xc4\x12\xfa\xe9\x8c\xf5v1\xcf" +
"7\x03;KJ\xba}~\xd3\x19\xba\x14rI\xc9\x08\xacp\xd1\xc4\x06\xda\xde\x0e" +
"\x82\x8e\xb6\xba\x0dʨ\x82\x85T>\x10!<d?\xc8`;X`#fp\xba\xbc\xad\x0b\xd7" +
"\xf4\xc4\x19\x0e26#\xa8h\xd1\xea\xe1v\x9f@\xa2f1C\x1b;\xd5!V\x05\xd2\x08" +
"o\xeaԙ\xacc\xa4e=\x12(=V\x01\x9c7\x95\xa9\x8a\x12m\x09\xcf\xcb\xe3l\xdc" +
"\xc97\x88\xa5@\x9f\x8bnB\xc2݃\xaaFa\x18R\xad\x0bP(w\\w\x16\x90\xb6\x85N" +
"\x05\xb3w$\x1es\xa8\x83\xddw\xaf\xf00,m\xa8f\\B4\x1d\xdaJ\xda\xea"
var enumMap = map[string]uint16{
"": 0,
"calendars": 0,
"chinese": 0,
"dangi": 1,
"gregorian": 2,
"months": 0,
"filler": 1,
"format": 0,
"stand-alone": 1,
"wAbbreviated": 0,
"wNarrow": 1,
"wWide": 2,
"leap7": 0,
}
// Total table size: xxxx bytes (4KiB); checksum: 7EA7DE6
@@ -0,0 +1,171 @@
<?xml version="1.0" encoding="UTF-8" ?>
<ldml>
<identity>
<language type="en"/>
</identity>
<dates>
<calendars>
<calendar type="buddhist">
<eras>
<eraAbbr>
<era type="0">BE</era>
</eraAbbr>
</eras>
</calendar>
<calendar type="chinese">
<months>
<monthContext type="format">
<monthWidth type="abbreviated">
<month type="1">Mo1</month>
<month type="2">Mo2</month>
<month type="3">Mo3</month>
<month type="4">Mo4</month>
<month type="5">Mo5</month>
<month type="6">Mo6</month>
<month type="7">Mo7</month>
<month type="8">Mo8</month>
<month type="9">Mo9</month>
<month type="10">Mo10</month>
<month type="11">Mo11</month>
<month type="12">Mo12</month>
</monthWidth>
<monthWidth type="wide">
<month type="1">First Month</month>
<month type="2">Second Month</month>
<month type="3">Third Month</month>
<month type="4">Fourth Month</month>
<month type="5">Fifth Month</month>
<month type="6">Sixth Month</month>
<month type="7">Seventh Month</month>
<month type="8">Eighth Month</month>
<month type="9">Ninth Month</month>
<month type="10">Tenth Month</month>
<month type="11">Eleventh Month</month>
<month type="12">Twelfth Month</month>
</monthWidth>
</monthContext>
</months>
<cyclicNameSets>
<cyclicNameSet type="zodiacs">
<cyclicNameContext type="format">
<cyclicNameWidth type="abbreviated">
<cyclicName type="1">Rat</cyclicName>
<cyclicName type="2">Ox</cyclicName>
<cyclicName type="3">Tiger</cyclicName>
<cyclicName type="4">Rabbit</cyclicName>
<cyclicName type="5">Dragon</cyclicName>
<cyclicName type="6">Snake</cyclicName>
<cyclicName type="7">Horse</cyclicName>
<cyclicName type="8">Goat</cyclicName>
<cyclicName type="9">Monkey</cyclicName>
<cyclicName type="10">Rooster</cyclicName>
<cyclicName type="11">Dog</cyclicName>
<cyclicName type="12">Pig</cyclicName>
</cyclicNameWidth>
</cyclicNameContext>
</cyclicNameSet>
</cyclicNameSets>
</calendar>
<calendar type="generic">
</calendar>
<calendar type="gregorian">
<months>
<monthContext type="format">
<monthWidth type="wide">
<month type="1">enWideM1</month>
<month type="2">enWideM2</month>
<month type="3">enWideM3</month>
<month type="4">enWideM4</month>
<month type="5">enWideM5</month>
<month type="6">enWideM6</month>
<month type="7">enWideM7</month>
<month type="8">enWideM8</month>
<month type="9">enWideM9</month>
<month type="10">enWideM10</month>
<month type="11">enWideM11</month>
<month type="12">enWideM12</month>
</monthWidth>
</monthContext>
<monthContext type="stand-alone">
<monthWidth type="narrow">
<month type="1">enNarrowM1</month>
<month type="2">enNarrowM2</month>
<month type="3">enNarrowM3</month>
<month type="4">enNarrowM4</month>
<month type="5">enNarrowM5</month>
<month type="6">enNarrowM6</month>
<!-- missing -->
<month type="8">enNarrowM8</month>
<month type="9">enNarrowM9</month>
<month type="10">enNarrowM10</month>
<month type="11">enNarrowM11</month>
<month type="12">enNarrowM12</month>
</monthWidth>
</monthContext>
</months>
<eras>
<eraNames>
<era type="0">Before Christ</era>
<era type="0" alt="variant">Before Common Era</era>
<era type="1">Anno Domini</era>
<era type="1" alt="variant">Common Era</era>
</eraNames>
<eraAbbr>
<era type="0">BC</era>
<era type="0" alt="variant">BCE</era>
<era type="1">AD</era>
<era type="1" alt="variant">CE</era>
</eraAbbr>
<!-- nothing for eraNarrow -->
</eras>
</calendar>
<calendar type="hebrew">
<eras>
<eraAbbr>
<era type="0">AM</era>
</eraAbbr>
</eras>
</calendar>
<calendar type="islamic">
<eras>
<eraAbbr>
<era type="0">AH</era>
</eraAbbr>
</eras>
</calendar>
</calendars>
<fields>
<field type="era">
<displayName>era</displayName>
</field>
<field type="month">
<displayName>month</displayName>
<relative type="-1">last month</relative>
<relative type="0">this month</relative>
<relative type="1">next month</relative>
<relativeTime type="future">
<relativeTimePattern count="one">enFutMOne</relativeTimePattern>
<relativeTimePattern count="other">enFutMOther</relativeTimePattern>
</relativeTime>
<relativeTime type="past">
<relativeTimePattern count="one">enPastMOne</relativeTimePattern>
<relativeTimePattern count="other">enPastMOther</relativeTimePattern>
</relativeTime>
</field>
<field type="month-short">
<displayName>mo.</displayName>
<relative type="-1">last mo.</relative>
<relative type="0">this mo.</relative>
<relative type="1">next mo.</relative>
<relativeTime type="future">
<relativeTimePattern count="one">enShortFutMOne</relativeTimePattern>
<relativeTimePattern count="other">enShortFutMOther</relativeTimePattern>
</relativeTime>
<relativeTime type="past">
<relativeTimePattern count="one">enShortPastMOne</relativeTimePattern>
<relativeTimePattern count="other">enShortPastMOther</relativeTimePattern>
</relativeTime>
</field>
</fields>
</dates>
</ldml>
@@ -0,0 +1,60 @@
<?xml version="1.0" encoding="UTF-8" ?>
<ldml>
<identity>
<language type="en"/>
<territory type="001"/>
</identity>
<dates>
<calendars>
<calendar type="chinese">
<months>
<monthContext type="format">
<monthWidth type="abbreviated">
<month type="1">001AbbrMo1</month>
<month type="2">001AbbrMo2</month>
<month type="3">001AbbrMo3</month>
<month type="4">001AbbrMo4</month>
<month type="5">001AbbrMo5</month>
<month type="6">001AbbrMo6</month>
<month type="7">001AbbrMo7</month>
<month type="8">001AbbrMo8</month>
<month type="9">001AbbrMo9</month>
<month type="10">001AbbrMo10</month>
<month type="11">001AbbrMo11</month>
<month type="12">001AbbrMo12</month>
</monthWidth>
</monthContext>
</months>
</calendar>
<calendar type="generic">
</calendar>
<calendar type="gregorian">
</calendar>
</calendars>
<fields>
<field type="month-short">
<displayName>mo</displayName>
<relativeTime type="future">
<relativeTimePattern count="one">001ShortFutMOne</relativeTimePattern>
<relativeTimePattern count="other">001ShortFutMOther</relativeTimePattern>
</relativeTime>
<relativeTime type="past">
<!-- missing -->
<relativeTimePattern count="other">001ShortPastMOther</relativeTimePattern>
</relativeTime>
</field>
<field type="month-narrow">
<displayName>mo</displayName>
<relativeTime type="future">
<relativeTimePattern count="one">001NarrowFutMOne</relativeTimePattern>
<relativeTimePattern count="two">001NarrowFutMTwo</relativeTimePattern>
<relativeTimePattern count="other">001NarrowFutMOther</relativeTimePattern>
</relativeTime>
<relativeTime type="past">
<relativeTimePattern count="one">001NarrowPastMOne</relativeTimePattern>
<relativeTimePattern count="other">001NarrowPastMOther</relativeTimePattern>
</relativeTime>
</field>
</fields>
</dates>
</ldml>
@@ -0,0 +1,63 @@
<?xml version="1.0" encoding="UTF-8" ?>
<ldml>
<identity>
<language type="en"/>
<territory type="GB"/>
</identity>
<dates>
<calendars>
<calendar type="gregorian">
<months>
<monthContext type="format">
<monthWidth type="abbreviated">
<month type="1">gbAbbrM1</month>
<month type="2">gbAbbrM2</month>
<month type="3">gbAbbrM3</month>
<month type="4">gbAbbrM4</month>
<month type="5">gbAbbrM5</month>
<month type="6">gbAbbrM6</month>
<month type="7">gbAbbrM7</month>
<month type="8">gbAbbrM8</month>
<month type="9">gbAbbrM9</month>
<month type="10">gbAbbrM10</month>
<month type="11">gbAbbrM11</month>
<month type="12">gbAbbrM12</month>
</monthWidth>
<monthWidth type="wide">
<month type="1">gbWideM1</month>
<month type="2">gbWideM2</month>
<month type="3">gbWideM3</month>
<month type="4">gbWideM4</month>
<month type="5">gbWideM5</month>
<month type="6">gbWideM6</month>
<month type="7">gbWideM7</month>
<month type="8">gbWideM8</month>
<month type="9">gbWideM9</month>
<month type="10">gbWideM10</month>
<month type="11">gbWideM11</month>
<month type="12">gbWideM12</month>
</monthWidth>
</monthContext>
<monthContext type="stand-alone">
<monthWidth type="narrow">
<month type="1">gbNarrowM1</month>
<month type="2">gbNarrowM2</month>
<month type="3">gbNarrowM3</month>
<!-- missing -->
<month type="5">gbNarrowM5</month>
<month type="6">gbNarrowM6</month>
<!-- missing -->
<month type="8">gbNarrowM8</month>
<month type="9">gbNarrowM9</month>
<month type="10">gbNarrowM10</month>
<month type="11">gbNarrowM11</month>
<month type="12">gbNarrowM12</month>
</monthWidth>
</monthContext>
</months>
</calendar>
<calendar type="islamic">
</calendar>
</calendars>
</dates>
</ldml>
@@ -0,0 +1,646 @@
<?xml version="1.0" encoding="UTF-8" ?>
<ldml>
<identity>
<language type="root"/>
</identity>
<dates>
<calendars>
<calendar type="buddhist">
<months>
<alias source="locale" path="../../calendar[@type='generic']/months"/> <!-- gregorian in original -->
</months>
<eras>
<eraNames>
<alias source="locale" path="../eraAbbr"/>
</eraNames>
<eraAbbr>
<era type="0">BE</era>
</eraAbbr>
<eraNarrow>
<alias source="locale" path="../eraAbbr"/>
</eraNarrow>
</eras>
</calendar>
<calendar type="chinese">
<months>
<monthContext type="format">
<monthWidth type="abbreviated">
<alias source="locale" path="../monthWidth[@type='wide']"/>
</monthWidth>
<monthWidth type="narrow">
<alias source="locale" path="../../monthContext[@type='stand-alone']/monthWidth[@type='narrow']"/>
</monthWidth>
<monthWidth type="wide">
<month type="1">chineseWideM01</month>
<month type="2">chineseWideM02</month>
<month type="3">chineseWideM03</month>
<month type="4">chineseWideM04</month>
<month type="5">chineseWideM05</month>
<month type="6">chineseWideM06</month>
<month type="7">chineseWideM07</month>
<month type="8">chineseWideM08</month>
<month type="9">chineseWideM09</month>
<month type="10">chineseWideM10</month>
<month type="11">chineseWideM11</month>
<month type="12">chineseWideM12</month>
</monthWidth>
</monthContext>
<monthContext type="stand-alone">
<monthWidth type="abbreviated">
<alias source="locale" path="../../monthContext[@type='format']/monthWidth[@type='abbreviated']"/>
</monthWidth>
<monthWidth type="narrow">
<month type="1">chineseNarrowM1</month>
<month type="2">chineseNarrowM2</month>
<month type="3">chineseNarrowM3</month>
<month type="4">chineseNarrowM4</month>
<month type="5">chineseNarrowM5</month>
<month type="6">chineseNarrowM6</month>
<month type="7">chineseNarrowM7</month>
<month type="8">chineseNarrowM8</month>
<month type="9">chineseNarrowM9</month>
<month type="10">chineseNarrowM10</month>
<month type="11">chineseNarrowM11</month>
<month type="12">chineseNarrowM12</month>
</monthWidth>
<monthWidth type="wide">
<alias source="locale" path="../../monthContext[@type='format']/monthWidth[@type='wide']"/>
</monthWidth>
</monthContext>
</months>
<!-- chinese eras are computed, and don't fall back to gregorian -->
<cyclicNameSets>
<cyclicNameSet type="dayParts">
<cyclicNameContext type="format">
<cyclicNameWidth type="abbreviated">
<cyclicName type="1">dpAbbr1</cyclicName>
<cyclicName type="2">dpAbbr2</cyclicName>
<cyclicName type="3">dpAbbr3</cyclicName>
<cyclicName type="4">dpAbbr4</cyclicName>
<cyclicName type="5">dpAbbr5</cyclicName>
<cyclicName type="6">dpAbbr6</cyclicName>
<cyclicName type="7">dpAbbr7</cyclicName>
<cyclicName type="8">dpAbbr8</cyclicName>
<cyclicName type="9">dpAbbr9</cyclicName>
<cyclicName type="10">dpAbbr10</cyclicName>
<cyclicName type="11">dpAbbr11</cyclicName>
<cyclicName type="12">dpAbbr12</cyclicName>
</cyclicNameWidth>
<cyclicNameWidth type="narrow">
<alias source="locale" path="../cyclicNameWidth[@type='abbreviated']"/>
</cyclicNameWidth>
<cyclicNameWidth type="wide">
<alias source="locale" path="../cyclicNameWidth[@type='abbreviated']"/>
</cyclicNameWidth>
</cyclicNameContext>
</cyclicNameSet>
<cyclicNameSet type="days">
<alias source="locale" path="../cyclicNameSet[@type='years']"/>
</cyclicNameSet>
<cyclicNameSet type="months">
<alias source="locale" path="../cyclicNameSet[@type='years']"/>
</cyclicNameSet>
<cyclicNameSet type="years">
<cyclicNameContext type="format">
<cyclicNameWidth type="abbreviated">
<cyclicName type="1">year1</cyclicName>
<cyclicName type="2">year2</cyclicName>
<cyclicName type="3">year3</cyclicName>
<cyclicName type="4">year4</cyclicName>
<cyclicName type="5">year5</cyclicName>
<cyclicName type="6">year6</cyclicName>
<cyclicName type="7">year7</cyclicName>
<cyclicName type="8">year8</cyclicName>
<cyclicName type="9">year9</cyclicName>
<cyclicName type="10">year10</cyclicName>
<cyclicName type="11">year11</cyclicName>
<cyclicName type="12">year12</cyclicName>
<cyclicName type="13">year13</cyclicName>
<cyclicName type="14">year14</cyclicName>
<cyclicName type="15">year15</cyclicName>
<cyclicName type="16">year16</cyclicName>
<cyclicName type="17">year17</cyclicName>
<cyclicName type="18">year18</cyclicName>
<cyclicName type="19">year19</cyclicName>
<cyclicName type="20">year20</cyclicName>
<cyclicName type="21">year21</cyclicName>
<cyclicName type="22">year22</cyclicName>
<cyclicName type="23">year23</cyclicName>
<cyclicName type="24">year24</cyclicName>
<cyclicName type="25">year25</cyclicName>
<cyclicName type="26">year26</cyclicName>
<cyclicName type="27">year27</cyclicName>
<cyclicName type="28">year28</cyclicName>
<cyclicName type="29">year29</cyclicName>
<cyclicName type="30">year30</cyclicName>
<cyclicName type="31">year31</cyclicName>
<cyclicName type="32">year32</cyclicName>
<cyclicName type="33">year33</cyclicName>
<cyclicName type="34">year34</cyclicName>
<cyclicName type="35">year35</cyclicName>
<cyclicName type="36">year36</cyclicName>
<cyclicName type="37">year37</cyclicName>
<cyclicName type="38">year38</cyclicName>
<cyclicName type="39">year39</cyclicName>
<cyclicName type="40">year40</cyclicName>
<cyclicName type="41">year41</cyclicName>
<cyclicName type="42">year42</cyclicName>
<cyclicName type="43">year43</cyclicName>
<cyclicName type="44">year44</cyclicName>
<cyclicName type="45">year45</cyclicName>
<cyclicName type="46">year46</cyclicName>
<cyclicName type="47">year47</cyclicName>
<cyclicName type="48">year48</cyclicName>
<cyclicName type="49">year49</cyclicName>
<cyclicName type="50">year50</cyclicName>
<cyclicName type="51">year51</cyclicName>
<cyclicName type="52">year52</cyclicName>
<cyclicName type="53">year53</cyclicName>
<cyclicName type="54">year54</cyclicName>
<cyclicName type="55">year55</cyclicName>
<cyclicName type="56">year56</cyclicName>
<cyclicName type="57">year57</cyclicName>
<cyclicName type="58">year58</cyclicName>
<cyclicName type="59">year59</cyclicName>
<cyclicName type="60">year60</cyclicName>
</cyclicNameWidth>
<cyclicNameWidth type="narrow">
<alias source="locale" path="../cyclicNameWidth[@type='abbreviated']"/>
</cyclicNameWidth>
<cyclicNameWidth type="wide">
<alias source="locale" path="../cyclicNameWidth[@type='abbreviated']"/>
</cyclicNameWidth>
</cyclicNameContext>
</cyclicNameSet>
<cyclicNameSet type="zodiacs">
<cyclicNameContext type="format">
<cyclicNameWidth type="abbreviated">
<alias source="locale" path="../../../cyclicNameSet[@type='dayParts']/cyclicNameContext[@type='format']/cyclicNameWidth[@type='abbreviated']"/>
</cyclicNameWidth>
<cyclicNameWidth type="narrow">
<alias source="locale" path="../cyclicNameWidth[@type='abbreviated']"/>
</cyclicNameWidth>
<cyclicNameWidth type="wide">
<alias source="locale" path="../cyclicNameWidth[@type='abbreviated']"/>
</cyclicNameWidth>
</cyclicNameContext>
</cyclicNameSet>
</cyclicNameSets>
</calendar>
<calendar type="dangi">
<months>
<alias source="locale" path="../../calendar[@type='chinese']/months"/>
</months>
<cyclicNameSets>
<alias source="locale" path="../../calendar[@type='chinese']/cyclicNameSets"/>
</cyclicNameSets>
</calendar>
<calendar type="ethiopic">
<months>
<monthContext type="format">
<monthWidth type="abbreviated">
<alias source="locale" path="../monthWidth[@type='wide']"/>
</monthWidth>
<monthWidth type="narrow">
<alias source="locale" path="../../monthContext[@type='stand-alone']/monthWidth[@type='narrow']"/>
</monthWidth>
<monthWidth type="wide">
<month type="1">Meskerem</month>
<month type="2">Tekemt</month>
<month type="3">Hedar</month>
<month type="4">Tahsas</month>
<month type="5">Ter</month>
<month type="6">Yekatit</month>
<month type="7">Megabit</month>
<month type="8">Miazia</month>
<month type="9">Genbot</month>
<month type="10">Sene</month>
<month type="11">Hamle</month>
<month type="12">Nehasse</month>
<month type="13">Pagumen</month>
</monthWidth>
</monthContext>
<monthContext type="stand-alone">
<monthWidth type="abbreviated">
<alias source="locale" path="../../monthContext[@type='format']/monthWidth[@type='abbreviated']"/>
</monthWidth>
<monthWidth type="narrow">
<month type="1">1</month>
<month type="2">2</month>
<month type="3">3</month>
<month type="4">4</month>
<month type="5">5</month>
<month type="6">6</month>
<month type="7">7</month>
<month type="8">8</month>
<month type="9">9</month>
<month type="10">10</month>
<month type="11">11</month>
<month type="12">12</month>
<month type="13">13</month>
</monthWidth>
<monthWidth type="wide">
<alias source="locale" path="../../monthContext[@type='format']/monthWidth[@type='wide']"/>
</monthWidth>
</monthContext>
</months>
<eras>
<eraNames>
<alias source="locale" path="../eraAbbr"/>
</eraNames>
<eraAbbr>
<era type="0">ERA0</era>
<era type="1">ERA1</era>
</eraAbbr>
<eraNarrow>
<alias source="locale" path="../eraAbbr"/>
</eraNarrow>
</eras>
</calendar>
<calendar type="ethiopic-amete-alem">
<months>
<alias source="locale" path="../../calendar[@type='ethiopic']/months"/>
</months>
<eras>
<eraNames>
<alias source="locale" path="../eraAbbr"/>
</eraNames>
<eraAbbr>
<era type="0">ERA0</era>
</eraAbbr>
<eraNarrow>
<alias source="locale" path="../eraAbbr"/>
</eraNarrow>
</eras>
</calendar>
<calendar type="generic">
<months>
<monthContext type="format">
<monthWidth type="abbreviated">
<alias source="locale" path="../monthWidth[@type='wide']"/>
</monthWidth>
<monthWidth type="narrow">
<alias source="locale" path="../../monthContext[@type='stand-alone']/monthWidth[@type='narrow']"/>
</monthWidth>
<monthWidth type="wide">
<month type="1">genWideM01</month>
<month type="2">genWideM02</month>
<month type="3">genWideM03</month>
<month type="4">genWideM04</month>
<month type="5">genWideM05</month>
<month type="6">genWideM06</month>
<month type="7">genWideM07</month>
<month type="8">genWideM08</month>
<month type="9">genWideM09</month>
<month type="10">genWideM10</month>
<month type="11">genWideM11</month>
<month type="12">genWideM12</month>
</monthWidth>
</monthContext>
<monthContext type="stand-alone">
<monthWidth type="abbreviated">
<alias source="locale" path="../../monthContext[@type='format']/monthWidth[@type='abbreviated']"/>
</monthWidth>
<monthWidth type="narrow">
<month type="1">genNarrowM1</month>
<month type="2">genNarrowM2</month>
<month type="3">genNarrowM3</month>
<month type="4">genNarrowM4</month>
<month type="5">genNarrowM5</month>
<month type="6">genNarrowM6</month>
<month type="7">genNarrowM7</month>
<month type="8">genNarrowM8</month>
<month type="9">genNarrowM9</month>
<month type="10">genNarrowM10</month>
<month type="11">genNarrowM11</month>
<month type="12">genNarrowM12</month>
</monthWidth>
<monthWidth type="wide">
<alias source="locale" path="../../monthContext[@type='format']/monthWidth[@type='wide']"/>
</monthWidth>
</monthContext>
</months>
<eras>
<eraNames>
<alias source="locale" path="../eraAbbr"/>
</eraNames>
<eraAbbr>
<era type="0">ERA0</era>
<era type="1">ERA1</era>
</eraAbbr>
<eraNarrow>
<alias source="locale" path="../eraAbbr"/>
</eraNarrow>
</eras>
</calendar>
<calendar type="gregorian">
<months>
<monthContext type="format">
<monthWidth type="abbreviated">
<alias source="locale" path="../monthWidth[@type='wide']"/>
</monthWidth>
<monthWidth type="narrow">
<alias source="locale" path="../../monthContext[@type='stand-alone']/monthWidth[@type='narrow']"/>
</monthWidth>
<monthWidth type="wide">
<month type="1">gregWideM01</month>
<month type="2">gregWideM02</month>
<month type="3">gregWideM03</month>
<month type="4">gregWideM04</month>
<month type="5">gregWideM05</month>
<month type="6">gregWideM06</month>
<month type="7">gregWideM07</month>
<month type="8">gregWideM08</month>
<month type="9">gregWideM09</month>
<month type="10">gregWideM10</month>
<month type="11">gregWideM11</month>
<month type="12">gregWideM12</month>
</monthWidth>
</monthContext>
<monthContext type="stand-alone">
<monthWidth type="abbreviated">
<alias source="locale" path="../../monthContext[@type='format']/monthWidth[@type='abbreviated']"/>
</monthWidth>
<monthWidth type="narrow">
<month type="1">gregNarrowM1</month>
<month type="2">gregNarrowM2</month>
<month type="3">gregNarrowM3</month>
<month type="4">gregNarrowM4</month>
<month type="5">gregNarrowM5</month>
<month type="6">gregNarrowM6</month>
<month type="7">gregNarrowM7</month>
<month type="8">gregNarrowM8</month>
<month type="9">gregNarrowM9</month>
<month type="10">gregNarrowM10</month>
<month type="11">gregNarrowM11</month>
<month type="12">gregNarrowM12</month>
</monthWidth>
<monthWidth type="wide">
<alias source="locale" path="../../monthContext[@type='format']/monthWidth[@type='wide']"/>
</monthWidth>
</monthContext>
</months>
<eras>
<eraNames>
<alias source="locale" path="../eraAbbr"/>
</eraNames>
<eraAbbr>
<era type="0">BCE</era>
<era type="1">CE</era>
</eraAbbr>
<eraNarrow>
<alias source="locale" path="../eraAbbr"/>
</eraNarrow>
</eras>
</calendar>
<calendar type="hebrew">
<months>
<monthContext type="format">
<monthWidth type="abbreviated">
<alias source="locale" path="../monthWidth[@type='wide']"/>
</monthWidth>
<monthWidth type="narrow">
<alias source="locale" path="../../monthContext[@type='stand-alone']/monthWidth[@type='narrow']"/>
</monthWidth>
<monthWidth type="wide">
<month type="1">Tishri</month>
<month type="2">Heshvan</month>
<month type="3">Kislev</month>
<month type="4">Tevet</month>
<month type="5">Shevat</month>
<month type="6">Adar I</month>
<month type="7">Adar</month>
<month type="7" yeartype="leap">Adar II</month>
<month type="8">Nisan</month>
<month type="9">Iyar</month>
<month type="10">Sivan</month>
<month type="11">Tamuz</month>
<month type="12">Av</month>
<month type="13">Elul</month>
</monthWidth>
</monthContext>
<monthContext type="stand-alone">
<monthWidth type="abbreviated">
<alias source="locale" path="../../monthContext[@type='format']/monthWidth[@type='abbreviated']"/>
</monthWidth>
<monthWidth type="narrow">
<month type="1">1</month>
<month type="2">2</month>
<month type="3">3</month>
<month type="4">4</month>
<month type="5">5</month>
<month type="6">6</month>
<month type="7">7</month>
<month type="7" yeartype="leap">7</month>
<month type="8">8</month>
<month type="9">9</month>
<month type="10">10</month>
<month type="11">11</month>
<month type="12">12</month>
<month type="13">13</month>
</monthWidth>
<monthWidth type="wide">
<alias source="locale" path="../../monthContext[@type='format']/monthWidth[@type='wide']"/>
</monthWidth>
</monthContext>
</months>
<eras>
<eraNames>
<alias source="locale" path="../eraAbbr"/>
</eraNames>
<eraAbbr>
<era type="0">AM</era>
<!-- HY = Anno Mundi = -180799862400000 milliseconds since 1/1/1970 AD -->
</eraAbbr>
<eraNarrow>
<alias source="locale" path="../eraAbbr"/>
</eraNarrow>
</eras>
</calendar>
<calendar type="islamic">
<months>
<monthContext type="format">
<monthWidth type="abbreviated">
<month type="1">islAbbr1</month>
<month type="2">islAbbr2</month>
<month type="3">islAbbr3</month>
<month type="4">islAbbr4</month>
<month type="5">islAbbr5</month>
<month type="6">islAbbr6</month>
<month type="7">islAbbr7</month>
<month type="8">islAbbr8</month>
<month type="9">islAbbr9</month>
<month type="10">islAbbr10</month>
<month type="11">islAbbr11</month>
<month type="12">islAbbr12</month>
</monthWidth>
<monthWidth type="narrow">
<alias source="locale" path="../../monthContext[@type='stand-alone']/monthWidth[@type='narrow']"/>
</monthWidth>
<monthWidth type="wide">
<month type="1">islWide1</month>
<month type="2">islWide2</month>
<month type="3">islWide3</month>
<month type="4">islWide4</month>
<month type="5">islWide5</month>
<month type="6">islWide6</month>
<month type="7">islWide7</month>
<month type="8">islWide8</month>
<month type="9">islWide9</month>
<month type="10">islWide10</month>
<month type="11">islWide11</month>
<month type="12">islWide12</month>
</monthWidth>
</monthContext>
<monthContext type="stand-alone">
<monthWidth type="abbreviated">
<alias source="locale" path="../../monthContext[@type='format']/monthWidth[@type='abbreviated']"/>
</monthWidth>
<monthWidth type="narrow">
<month type="1">1</month>
<month type="2">2</month>
<month type="3">3</month>
<month type="4">4</month>
<month type="5">5</month>
<month type="6">6</month>
<month type="7">7</month>
<month type="8">8</month>
<month type="9">9</month>
<month type="10">10</month>
<month type="11">11</month>
<month type="12">12</month>
</monthWidth>
<monthWidth type="wide">
<alias source="locale" path="../../monthContext[@type='format']/monthWidth[@type='wide']"/>
</monthWidth>
</monthContext>
</months>
<eras>
<eraNames>
<alias source="locale" path="../eraAbbr"/>
</eraNames>
<eraAbbr>
<era type="0">AH</era>
</eraAbbr>
<eraNarrow>
<alias source="locale" path="../eraAbbr"/>
</eraNarrow>
</eras>
</calendar>
<calendar type="islamic-civil">
<months>
<alias source="locale" path="../../calendar[@type='islamic']/months"/>
</months>
<eras>
<alias source="locale" path="../../calendar[@type='islamic']/eras"/>
</eras>
</calendar>
<calendar type="islamic-rgsa">
<months>
<alias source="locale" path="../../calendar[@type='islamic']/months"/>
</months>
<eras>
<alias source="locale" path="../../calendar[@type='islamic']/eras"/>
</eras>
</calendar>
<calendar type="islamic-tbla">
<months>
<alias source="locale" path="../../calendar[@type='islamic']/months"/>
</months>
</calendar>
<calendar type="islamic-umalqura">
<months>
<alias source="locale" path="../../calendar[@type='islamic']/months"/>
</months>
</calendar>
<calendar type="persian">
<months>
<monthContext type="format">
<monthWidth type="abbreviated">
<alias source="locale" path="../monthWidth[@type='wide']"/>
</monthWidth>
<monthWidth type="narrow">
<alias source="locale" path="../../monthContext[@type='stand-alone']/monthWidth[@type='narrow']"/>
</monthWidth>
<monthWidth type="wide">
<month type="1">Farvardin</month>
<month type="2">Ordibehesht</month>
<month type="3">Khordad</month>
<month type="4">Tir</month>
<month type="5">Mordad</month>
<month type="6">Shahrivar</month>
<month type="7">Mehr</month>
<month type="8">Aban</month>
<month type="9">Azar</month>
<month type="10">Dey</month>
<month type="11">Bahman</month>
<month type="12">Esfand</month>
</monthWidth>
</monthContext>
<monthContext type="stand-alone">
<monthWidth type="abbreviated">
<alias source="locale" path="../../monthContext[@type='format']/monthWidth[@type='abbreviated']"/>
</monthWidth>
<monthWidth type="narrow">
<month type="1">1</month>
<month type="2">2</month>
<month type="3">3</month>
<month type="4">4</month>
<month type="5">5</month>
<month type="6">6</month>
<month type="7">7</month>
<month type="8">8</month>
<month type="9">9</month>
<month type="10">10</month>
<month type="11">11</month>
<month type="12">12</month>
</monthWidth>
<monthWidth type="wide">
<alias source="locale" path="../../monthContext[@type='format']/monthWidth[@type='wide']"/>
</monthWidth>
</monthContext>
</months>
<eras>
<eraNames>
<alias source="locale" path="../eraAbbr"/>
</eraNames>
<eraAbbr>
<era type="0">AP</era>
</eraAbbr>
<eraNarrow>
<alias source="locale" path="../eraAbbr"/>
</eraNarrow>
</eras>
</calendar>
</calendars>
<fields>
<field type="era">
<displayName>Era</displayName>
</field>
<field type="era-short">
<alias source="locale" path="../field[@type='era']"/>
</field>
<field type="era-narrow">
<alias source="locale" path="../field[@type='era-short']"/>
</field>
<field type="month">
<displayName>Month</displayName>
<relative type="-1">last month</relative>
<relative type="0">this month</relative>
<relative type="1">next month</relative>
<relativeTime type="future">
<relativeTimePattern count="other">+{0} m</relativeTimePattern>
</relativeTime>
<relativeTime type="past">
<relativeTimePattern count="other">-{0} m</relativeTimePattern>
</relativeTime>
</field>
<field type="month-short">
<alias source="locale" path="../field[@type='month']"/>
</field>
<field type="month-narrow">
<alias source="locale" path="../field[@type='month-short']"/>
</field>
</fields>
</dates>
</ldml>
@@ -0,0 +1,892 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package test
import "golang.org/x/text/internal/cldrtree"
var tree = &cldrtree.Tree{locales, indices, buckets}
// Path values:
// <width>
// - wAbbreviated
// - wNarrow
// - wWide
// <context>
// - format
// - stand-alone
// <month>
// - leap7
// - 1..13
// <cycleType>
// - cycDayParts
// - cycDays
// - cycMonths
// - cycYears
// - cycZodiacs
// <relative>
// - before1
// - current
// - after1
//
// - calendars
// - buddhist
// - chinese
// - dangi
// - ethiopic
// - ethiopic-amete-alem
// - generic
// - gregorian
// - hebrew
// - islamic
// - islamic-civil
// - islamic-rgsa
// - islamic-tbla
// - islamic-umalqura
// - persian
// - months
// - <context>
// - <width>
// - <month>
// - eras
// - <width>
// - ""
// - variant
// - 0..1
// - filler
// - 0
// - cyclicNameSets
// - <cycleType>
// - <context>
// - <width>
// - 0..60
// - fields
// - era
// - era-short
// - era-narrow
// - month
// - month-short
// - month-narrow
// - displayName
// - ""
// - relative
// - <relative>
// - relativeTime
// - future
// - past
// - other
// - one
// - two
//
// Nr elem: 394
// uniqued size: 9778
// total string size: 9931
// bucket waste: 0
// width specifies a property of a CLDR field.
type width uint16
// context specifies a property of a CLDR field.
type context uint16
// month specifies a property of a CLDR field.
type month uint16
// cycleType specifies a property of a CLDR field.
type cycleType uint16
// relative specifies a property of a CLDR field.
type relative uint16
const (
calendars = 0 // calendars
fields = 1 // fields
buddhist = 0 // buddhist
chinese = 1 // chinese
dangi = 2 // dangi
ethiopic = 3 // ethiopic
ethiopicAmeteAlem = 4 // ethiopic-amete-alem
generic = 5 // generic
gregorian = 6 // gregorian
hebrew = 7 // hebrew
islamic = 8 // islamic
islamicCivil = 9 // islamic-civil
islamicRgsa = 10 // islamic-rgsa
islamicTbla = 11 // islamic-tbla
islamicUmalqura = 12 // islamic-umalqura
persian = 13 // persian
months = 0 // months
eras = 1 // eras
filler = 2 // filler
cyclicNameSets = 3 // cyclicNameSets
format context = 0 // format
standAlone context = 1 // stand-alone
wAbbreviated width = 0 // wAbbreviated
wNarrow width = 1 // wNarrow
wWide width = 2 // wWide
leap7 month = 0 // leap7
variant = 1 // variant
cycDayParts cycleType = 0 // cycDayParts
cycDays cycleType = 1 // cycDays
cycMonths cycleType = 2 // cycMonths
cycYears cycleType = 3 // cycYears
cycZodiacs cycleType = 4 // cycZodiacs
era = 0 // era
eraShort = 1 // era-short
eraNarrow = 2 // era-narrow
month = 3 // month
monthShort = 4 // month-short
monthNarrow = 5 // month-narrow
displayName = 0 // displayName
relative = 1 // relative
relativeTime = 2 // relativeTime
before1 relative = 0 // before1
current relative = 1 // current
after1 relative = 2 // after1
future = 0 // future
past = 1 // past
other = 0 // other
one = 1 // one
two = 2 // two
)
var locales = []uint32{ // 775 elements
// Entry 0 - 1F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 20 - 3F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 40 - 5F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 60 - 7F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 80 - 9F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x0000027a, 0x0000037f,
0x0000037f, 0x0000037f, 0x0000037f, 0x0000027a,
0x0000037f, 0x0000037f, 0x0000037f, 0x0000037f,
0x0000027a, 0x0000037f, 0x0000037f, 0x0000037f,
0x0000037f, 0x0000037f, 0x0000037f, 0x0000037f,
0x0000037f, 0x0000037f, 0x0000037f, 0x0000037f,
0x0000037f, 0x0000037f, 0x0000037f, 0x0000037f,
// Entry A0 - BF
0x0000037f, 0x0000037f, 0x0000037f, 0x0000037f,
0x0000037f, 0x000003dd, 0x0000037f, 0x0000037f,
0x0000037f, 0x0000037f, 0x0000037f, 0x0000027a,
0x0000037f, 0x0000037f, 0x0000037f, 0x0000037f,
0x0000037f, 0x0000037f, 0x0000037f, 0x0000037f,
0x0000037f, 0x0000037f, 0x0000037f, 0x0000037f,
0x0000037f, 0x0000037f, 0x0000037f, 0x0000037f,
0x0000037f, 0x0000027a, 0x0000037f, 0x0000027a,
// Entry C0 - DF
0x0000037f, 0x0000037f, 0x0000037f, 0x0000037f,
0x0000037f, 0x0000037f, 0x0000037f, 0x0000037f,
0x0000037f, 0x0000037f, 0x0000037f, 0x0000037f,
0x0000037f, 0x0000037f, 0x0000037f, 0x0000037f,
0x0000027a, 0x0000037f, 0x0000037f, 0x0000037f,
0x0000037f, 0x0000037f, 0x0000037f, 0x0000037f,
0x0000037f, 0x0000037f, 0x0000037f, 0x0000037f,
0x0000037f, 0x0000037f, 0x0000037f, 0x0000037f,
// Entry E0 - FF
0x0000037f, 0x0000037f, 0x0000037f, 0x0000037f,
0x0000037f, 0x0000027a, 0x0000027a, 0x0000037f,
0x0000037f, 0x0000027a, 0x0000037f, 0x0000037f,
0x0000037f, 0x0000037f, 0x0000037f, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 100 - 11F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 120 - 13F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 140 - 15F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 160 - 17F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 180 - 19F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 1A0 - 1BF
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 1C0 - 1DF
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 1E0 - 1FF
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 200 - 21F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 220 - 23F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 240 - 25F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 260 - 27F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 280 - 29F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 2A0 - 2BF
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 2C0 - 2DF
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 2E0 - 2FF
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
// Entry 300 - 31F
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x0000027a,
} // Size: xxxx bytes
var indices = []uint16{ // 1070 elements
// Entry 0 - 3F
0x0002, 0x0003, 0x0259, 0x000e, 0x0012, 0x0022, 0x00b9, 0x00c1,
0x00fd, 0x010d, 0x0147, 0x0181, 0x01bc, 0x0204, 0x020b, 0x0212,
0x0219, 0x0220, 0x0003, 0x9005, 0x0016, 0x001f, 0x0003, 0x001a,
0x8000, 0x8000, 0x0001, 0x001c, 0x0001, 0x0000, 0x0000, 0x0001,
0x0000, 0x0003, 0x0004, 0x0027, 0x0000, 0x00b6, 0x0050, 0x0002,
0x002a, 0x003d, 0x0003, 0x8002, 0x9001, 0x002e, 0x000d, 0x0000,
0xffff, 0x0102, 0x0111, 0x0120, 0x012f, 0x013e, 0x014d, 0x015c,
0x016b, 0x017a, 0x0189, 0x0198, 0x01a7, 0x0003, 0x9000, 0x0041,
// Entry 40 - 7F
0x9000, 0x000d, 0x0000, 0xffff, 0x01b6, 0x01c6, 0x01d6, 0x01e6,
0x01f6, 0x0206, 0x0216, 0x0226, 0x0236, 0x0246, 0x0257, 0x0268,
0x0005, 0x0056, 0x8003, 0x8003, 0x006b, 0x00b0, 0x0001, 0x0058,
0x0003, 0x005c, 0x8000, 0x8000, 0x000d, 0x0000, 0xffff, 0x0279,
0x0281, 0x0289, 0x0291, 0x0299, 0x02a1, 0x02a9, 0x02b1, 0x02b9,
0x02c1, 0x02ca, 0x02d3, 0x0001, 0x006d, 0x0003, 0x0071, 0x8000,
0x8000, 0x003d, 0x0000, 0xffff, 0x02dc, 0x02e2, 0x02e8, 0x02ee,
0x02f4, 0x02fa, 0x0300, 0x0306, 0x030c, 0x0312, 0x0319, 0x0320,
// Entry 80 - BF
0x0327, 0x032e, 0x0335, 0x033c, 0x0343, 0x034a, 0x0351, 0x0358,
0x035f, 0x0366, 0x036d, 0x0374, 0x037b, 0x0382, 0x0389, 0x0390,
0x0397, 0x039e, 0x03a5, 0x03ac, 0x03b3, 0x03ba, 0x03c1, 0x03c8,
0x03cf, 0x03d6, 0x03dd, 0x03e4, 0x03eb, 0x03f2, 0x03f9, 0x0400,
0x0407, 0x040e, 0x0415, 0x041c, 0x0423, 0x042a, 0x0431, 0x0438,
0x043f, 0x0446, 0x044d, 0x0454, 0x045b, 0x0462, 0x0469, 0x0470,
0x0001, 0x00b2, 0x0003, 0xa000, 0x8000, 0x8000, 0x0001, 0x0000,
0x0477, 0x0004, 0x9001, 0x0000, 0x00be, 0x9001, 0x0001, 0x0000,
// Entry C0 - FF
0x0576, 0x0003, 0x00c5, 0x00f0, 0x00fa, 0x0002, 0x00c8, 0x00dc,
0x0003, 0x8002, 0x9001, 0x00cc, 0x000e, 0x0000, 0xffff, 0x0675,
0x067e, 0x0685, 0x068b, 0x0692, 0x0696, 0x069e, 0x06a6, 0x06ad,
0x06b4, 0x06b9, 0x06bf, 0x06c7, 0x0003, 0x9000, 0x00e0, 0x9000,
0x000e, 0x0000, 0xffff, 0x06cf, 0x06d1, 0x06d3, 0x06d5, 0x06d7,
0x06d9, 0x06db, 0x06dd, 0x06df, 0x06e1, 0x06e4, 0x06e7, 0x06ea,
0x0003, 0x00f4, 0x8000, 0x8000, 0x0001, 0x00f6, 0x0002, 0x0000,
0x06ed, 0x06f2, 0x0001, 0x0000, 0x06f7, 0x0003, 0x9003, 0x0101,
// Entry 100 - 13F
0x010a, 0x0003, 0x0105, 0x8000, 0x8000, 0x0001, 0x0107, 0x0001,
0x0000, 0x06ed, 0x0001, 0x0000, 0x07f6, 0x0003, 0x0111, 0x013a,
0x0144, 0x0002, 0x0114, 0x0127, 0x0003, 0x8002, 0x9001, 0x0118,
0x000d, 0x0000, 0xffff, 0x08f5, 0x0900, 0x090b, 0x0916, 0x0921,
0x092c, 0x0937, 0x0942, 0x094d, 0x0958, 0x0963, 0x096e, 0x0003,
0x9000, 0x012b, 0x9000, 0x000d, 0x0000, 0xffff, 0x0979, 0x0985,
0x0991, 0x099d, 0x09a9, 0x09b5, 0x09c1, 0x09cd, 0x09d9, 0x09e5,
0x09f2, 0x09ff, 0x0003, 0x013e, 0x8000, 0x8000, 0x0001, 0x0140,
// Entry 140 - 17F
0x0002, 0x0000, 0x06ed, 0x06f2, 0x0001, 0x0000, 0x0a0c, 0x0003,
0x014b, 0x0174, 0x017e, 0x0002, 0x014e, 0x0161, 0x0003, 0x8002,
0x9001, 0x0152, 0x000d, 0x0000, 0xffff, 0x0b0b, 0x0b17, 0x0b23,
0x0b2f, 0x0b3b, 0x0b47, 0x0b53, 0x0b5f, 0x0b6b, 0x0b77, 0x0b83,
0x0b8f, 0x0003, 0x9000, 0x0165, 0x9000, 0x000d, 0x0000, 0xffff,
0x0b9b, 0x0ba8, 0x0bb5, 0x0bc2, 0x0bcf, 0x0bdc, 0x0be9, 0x0bf6,
0x0c03, 0x0c10, 0x0c1e, 0x0c2c, 0x0003, 0x0178, 0x8000, 0x8000,
0x0001, 0x017a, 0x0002, 0x0000, 0x0c3a, 0x0c3e, 0x0001, 0x0000,
// Entry 180 - 1BF
0x0c41, 0x0003, 0x0185, 0x01b0, 0x01b9, 0x0002, 0x0188, 0x019c,
0x0003, 0x8002, 0x9001, 0x018c, 0x000e, 0x0000, 0x0d6f, 0x0d40,
0x0d47, 0x0d4f, 0x0d56, 0x0d5c, 0x0d63, 0x0d6a, 0x0d77, 0x0d7d,
0x0d82, 0x0d88, 0x0d8e, 0x0d91, 0x0003, 0x9000, 0x01a0, 0x9000,
0x000e, 0x0000, 0x06db, 0x06cf, 0x06d1, 0x06d3, 0x06d5, 0x06d7,
0x06d9, 0x06db, 0x06dd, 0x06df, 0x06e1, 0x06e4, 0x06e7, 0x06ea,
0x0003, 0x01b4, 0x8000, 0x8000, 0x0001, 0x01b6, 0x0001, 0x0000,
0x0d96, 0x0001, 0x0000, 0x0d99, 0x0003, 0x01c0, 0x01f8, 0x0201,
// Entry 1C0 - 1FF
0x0002, 0x01c3, 0x01e5, 0x0003, 0x01c7, 0x9001, 0x01d6, 0x000d,
0x0000, 0xffff, 0x0e98, 0x0ea1, 0x0eaa, 0x0eb3, 0x0ebc, 0x0ec5,
0x0ece, 0x0ed7, 0x0ee0, 0x0ee9, 0x0ef3, 0x0efd, 0x000d, 0x0000,
0xffff, 0x0f07, 0x0f10, 0x0f19, 0x0f22, 0x0f2b, 0x0f34, 0x0f3d,
0x0f46, 0x0f4f, 0x0f58, 0x0f62, 0x0f6c, 0x0003, 0x9000, 0x01e9,
0x9000, 0x000d, 0x0000, 0xffff, 0x06cf, 0x06d1, 0x06d3, 0x06d5,
0x06d7, 0x06d9, 0x06db, 0x06dd, 0x06df, 0x06e1, 0x06e4, 0x06e7,
0x0003, 0x01fc, 0x8000, 0x8000, 0x0001, 0x01fe, 0x0001, 0x0000,
// Entry 200 - 23F
0x0f76, 0x0001, 0x0000, 0x0f79, 0x0003, 0x9008, 0x9008, 0x0208,
0x0001, 0x0000, 0x1078, 0x0003, 0x9008, 0x9008, 0x020f, 0x0001,
0x0000, 0x1177, 0x0003, 0x9008, 0x0000, 0x0216, 0x0001, 0x0000,
0x1276, 0x0003, 0x9008, 0x0000, 0x021d, 0x0001, 0x0000, 0x1375,
0x0003, 0x0224, 0x024d, 0x0256, 0x0002, 0x0227, 0x023a, 0x0003,
0x8002, 0x9001, 0x022b, 0x000d, 0x0000, 0xffff, 0x1474, 0x147e,
0x148a, 0x1492, 0x1496, 0x149d, 0x14a7, 0x14ac, 0x14b1, 0x14b6,
0x14ba, 0x14c1, 0x0003, 0x9000, 0x023e, 0x9000, 0x000d, 0x0000,
// Entry 240 - 27F
0xffff, 0x06cf, 0x06d1, 0x06d3, 0x06d5, 0x06d7, 0x06d9, 0x06db,
0x06dd, 0x06df, 0x06e1, 0x06e4, 0x06e7, 0x0003, 0x0251, 0x8000,
0x8000, 0x0001, 0x0253, 0x0001, 0x0000, 0x14c8, 0x0001, 0x0000,
0x14cb, 0x0006, 0x0260, 0x8000, 0x8001, 0x0265, 0x8003, 0x8004,
0x0001, 0x0262, 0x0001, 0x0000, 0x15ca, 0x0003, 0x0269, 0x026c,
0x0271, 0x0001, 0x0000, 0x15ce, 0x0003, 0x0000, 0x15d4, 0x15df,
0x15ea, 0x0002, 0x0274, 0x0277, 0x0001, 0x0000, 0x15f5, 0x0001,
0x0000, 0x15fc, 0x0002, 0x0003, 0x00cc, 0x0009, 0x000d, 0x001b,
// Entry 280 - 2BF
0x0000, 0x0000, 0x0000, 0x0060, 0x0067, 0x00b0, 0x00be, 0x0003,
0x0000, 0x0011, 0x0018, 0x0001, 0x0013, 0x0001, 0x0015, 0x0001,
0x0000, 0x0000, 0x0001, 0x0000, 0x1603, 0x0004, 0x0020, 0x0000,
0x005d, 0x0044, 0x0001, 0x0022, 0x0003, 0x0026, 0x0000, 0x0035,
0x000d, 0x0000, 0xffff, 0x1702, 0x1706, 0x170a, 0x170e, 0x1712,
0x1716, 0x171a, 0x171e, 0x1722, 0x1726, 0x172b, 0x1730, 0x000d,
0x0000, 0xffff, 0x1735, 0x1741, 0x174e, 0x175a, 0x1767, 0x1773,
0x177f, 0x178d, 0x179a, 0x17a6, 0x17b2, 0x17c1, 0x0005, 0x0000,
// Entry 2C0 - 2FF
0x0000, 0x0000, 0x0000, 0x004a, 0x0001, 0x004c, 0x0001, 0x004e,
0x000d, 0x0000, 0xffff, 0x17cf, 0x17d3, 0x17d6, 0x17dc, 0x17e3,
0x17ea, 0x17f0, 0x17f6, 0x17fb, 0x1802, 0x180a, 0x180e, 0x0001,
0x0000, 0x1812, 0x0003, 0x0000, 0x0000, 0x0064, 0x0001, 0x0000,
0x1911, 0x0003, 0x006b, 0x0093, 0x00ad, 0x0002, 0x006e, 0x0081,
0x0003, 0x0000, 0x0000, 0x0072, 0x000d, 0x0000, 0xffff, 0x1a10,
0x1a19, 0x1a22, 0x1a2b, 0x1a34, 0x1a3d, 0x1a46, 0x1a4f, 0x1a58,
0x1a61, 0x1a6b, 0x1a75, 0x0002, 0x0000, 0x0084, 0x000d, 0x0000,
// Entry 300 - 33F
0xffff, 0x1a7f, 0x1a8a, 0x1a95, 0x1aa0, 0x1aab, 0x1ab6, 0xffff,
0x1ac1, 0x1acc, 0x1ad7, 0x1ae3, 0x1aef, 0x0003, 0x00a2, 0x0000,
0x0097, 0x0002, 0x009a, 0x009e, 0x0002, 0x0000, 0x1afb, 0x1b1b,
0x0002, 0x0000, 0x1b09, 0x1b27, 0x0002, 0x00a5, 0x00a9, 0x0002,
0x0000, 0x1b32, 0x1b35, 0x0002, 0x0000, 0x0c3a, 0x0c3e, 0x0001,
0x0000, 0x1b38, 0x0003, 0x0000, 0x00b4, 0x00bb, 0x0001, 0x00b6,
0x0001, 0x00b8, 0x0001, 0x0000, 0x0d96, 0x0001, 0x0000, 0x1c37,
0x0003, 0x0000, 0x00c2, 0x00c9, 0x0001, 0x00c4, 0x0001, 0x00c6,
// Entry 340 - 37F
0x0001, 0x0000, 0x0f76, 0x0001, 0x0000, 0x1d36, 0x0005, 0x00d2,
0x0000, 0x0000, 0x00d7, 0x00ee, 0x0001, 0x00d4, 0x0001, 0x0000,
0x1e35, 0x0003, 0x00db, 0x00de, 0x00e3, 0x0001, 0x0000, 0x1e39,
0x0003, 0x0000, 0x15d4, 0x15df, 0x15ea, 0x0002, 0x00e6, 0x00ea,
0x0002, 0x0000, 0x1e49, 0x1e3f, 0x0002, 0x0000, 0x1e60, 0x1e55,
0x0003, 0x00f2, 0x00f5, 0x00fa, 0x0001, 0x0000, 0x1e6d, 0x0003,
0x0000, 0x1e71, 0x1e7a, 0x1e83, 0x0002, 0x00fd, 0x0101, 0x0002,
0x0000, 0x1e9b, 0x1e8c, 0x0002, 0x0000, 0x1ebc, 0x1eac, 0x0002,
// Entry 380 - 3BF
0x0003, 0x0033, 0x0007, 0x0000, 0x000b, 0x0000, 0x0000, 0x0000,
0x0025, 0x002c, 0x0003, 0x000f, 0x0000, 0x0022, 0x0001, 0x0011,
0x0001, 0x0013, 0x000d, 0x0000, 0xffff, 0x1ece, 0x1ed9, 0x1ee4,
0x1eef, 0x1efa, 0x1f05, 0x1f10, 0x1f1b, 0x1f26, 0x1f31, 0x1f3d,
0x1f49, 0x0001, 0x0001, 0x0000, 0x0003, 0x0000, 0x0000, 0x0029,
0x0001, 0x0001, 0x00ff, 0x0003, 0x0000, 0x0000, 0x0030, 0x0001,
0x0001, 0x01fe, 0x0006, 0x0000, 0x0000, 0x0000, 0x0000, 0x003a,
0x004b, 0x0003, 0x003e, 0x0000, 0x0041, 0x0001, 0x0001, 0x02fd,
// Entry 3C0 - 3FF
0x0002, 0x0044, 0x0048, 0x0002, 0x0001, 0x0310, 0x0300, 0x0001,
0x0001, 0x0322, 0x0003, 0x004f, 0x0000, 0x0052, 0x0001, 0x0001,
0x02fd, 0x0002, 0x0055, 0x005a, 0x0003, 0x0001, 0x0357, 0x0335,
0x0346, 0x0002, 0x0001, 0x037c, 0x036a, 0x0001, 0x0002, 0x0009,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x000c, 0x0000,
0x004a, 0x0003, 0x0010, 0x0000, 0x0047, 0x0002, 0x0013, 0x0035,
0x0003, 0x0017, 0x0000, 0x0026, 0x000d, 0x0001, 0xffff, 0x0390,
0x0399, 0x03a2, 0x03ab, 0x03b4, 0x03bd, 0x03c6, 0x03cf, 0x03d8,
// Entry 400 - 43F
0x03e1, 0x03eb, 0x03f5, 0x000d, 0x0001, 0xffff, 0x03ff, 0x0408,
0x0411, 0x041a, 0x0423, 0x042c, 0x0435, 0x043e, 0x0447, 0x0450,
0x045a, 0x0464, 0x0002, 0x0000, 0x0038, 0x000d, 0x0001, 0xffff,
0x046e, 0x0479, 0x0484, 0xffff, 0x048f, 0x049a, 0xffff, 0x04a5,
0x04b0, 0x04bb, 0x04c7, 0x04d3, 0x0001, 0x0001, 0x04df, 0x0003,
0x0000, 0x0000, 0x004e, 0x0001, 0x0001, 0x05de,
} // Size: xxxx bytes
var buckets = []string{
bucket0,
bucket1,
}
var bucket0 string = "" + // Size: xxxx bytes
"\x02BE\xfe\x01\x94\xfd\xc2\xfa/\xfc\xc0A\xd3\xff\x12\x04[s\xc8nO\xf9_" +
"\xf6b\xa5\xee\xe8*\xbd\xf4J-\x0bu\xfb\x18\x0d\xafH\xa7\x9e\xe0\xb1\x0d9F" +
"Q\x85\x0fԡx\x89.\xe2\x85\xec\xe1Q\x14Ux\x08u\xd6N\xe2\xd3\xd0\xd0\xdek" +
"\xf8\xf9\xb4L\xe8_\xf0DƱ\xf8;\x8e\x88;\xbf\x85z\xab\x99ŲR\xc7B\x9c2" +
"\U000e8bb7\x9e\xf8V\xf6Y\xc1\x8f\x0d\xce\xccw\xc7^z\x81\xbf\xde'_g\xcf" +
"\xe2B\xcf<\xc3T\xf3\xed\xe2־\xccN\xa3\xae^\x88Rj\x9fJW\x8b˞\xf2ԦS\x14v" +
"\x8dm)\x97a\xea\x9eOZ\xa6\xae\xc3\xfcxƪ\xe0\x81\xac\x81 \xc7 \xef\xcdlꄶ" +
"\x92^`{\xe0cqo\x96\xdd\xcd\xd0\x1du\x04\\?\x00\x0f\x8ayk\xcelQ,8\x01\xaa" +
"\xca\xee߭[Pfd\xe8\xc0\xe4\xa7q\xecื\xc1\x96]\x91\x81%\x1b|\x9c\x9c\xa5 Z" +
"\xfc\x16\xa26\xa2\xef\xcd\xd2\xd1-*y\xd0\x0echineseWideM01\x0echineseWid" +
"eM02\x0echineseWideM03\x0echineseWideM04\x0echineseWideM05\x0echineseWid" +
"eM06\x0echineseWideM07\x0echineseWideM08\x0echineseWideM09\x0echineseWid" +
"eM10\x0echineseWideM11\x0echineseWideM12\x0fchineseNarrowM1\x0fchineseNa" +
"rrowM2\x0fchineseNarrowM3\x0fchineseNarrowM4\x0fchineseNarrowM5\x0fchine" +
"seNarrowM6\x0fchineseNarrowM7\x0fchineseNarrowM8\x0fchineseNarrowM9\x10c" +
"hineseNarrowM10\x10chineseNarrowM11\x10chineseNarrowM12\x07dpAbbr1\x07dp" +
"Abbr2\x07dpAbbr3\x07dpAbbr4\x07dpAbbr5\x07dpAbbr6\x07dpAbbr7\x07dpAbbr8" +
"\x07dpAbbr9\x08dpAbbr10\x08dpAbbr11\x08dpAbbr12\x05year1\x05year2\x05yea" +
"r3\x05year4\x05year5\x05year6\x05year7\x05year8\x05year9\x06year10\x06ye" +
"ar11\x06year12\x06year13\x06year14\x06year15\x06year16\x06year17\x06year" +
"18\x06year19\x06year20\x06year21\x06year22\x06year23\x06year24\x06year25" +
"\x06year26\x06year27\x06year28\x06year29\x06year30\x06year31\x06year32" +
"\x06year33\x06year34\x06year35\x06year36\x06year37\x06year38\x06year39" +
"\x06year40\x06year41\x06year42\x06year43\x06year44\x06year45\x06year46" +
"\x06year47\x06year48\x06year49\x06year50\x06year51\x06year52\x06year53" +
"\x06year54\x06year55\x06year56\x06year57\x06year58\x06year59\x06year60" +
"\xfet\xa8(\x0a\xe9C\x9e\xb0֮\xca\x08#\xae\x02\xd6}\x86j\xc2\xc4\xfeJrPS" +
"\xda\x11\x9b\x9dOQQ@\xa2\xd7#\x9c@\xb4ZÕ\x0d\x94\x1f\xc4\xfe\x1c\x0c\xb9" +
"j\xd3\x22\xd6\x22\x82)_\xbf\xe1\x1e&\xa43\x07m\xb5\xc1DL:4\xd3*\\J\x7f" +
"\xfb\xe8с\xf7\xed;\x8c\xfe\x90O\x93\xf8\xf0m)\xbc\xd9\xed\x84{\x18.\x04d" +
"\x10\xf4Kİ\xf3\xf0:\x0d\x06\x82\x0a0\xf2W\xf8\x11A0g\x8a\xc0E\x86\xc1" +
"\xe3\xc94,\x8b\x80U\xc4f؆D\x1d%\x99\x06͉֚K\x96\x8a\xe9\xf0띖\\\xe6\xa4i<N" +
"\xbe\x88\x15\x01\xb7لkf\xeb\x02\xb5~\\\xda{l\xbah\x91\xd6\x16\xbdhl7\xb8" +
"4a:Ⱥ\xa2,\x00\x8f\xfeh\x83RsJ\xe4\xe3\xf1!z\xcd_\x83'\x08\x140\x18g\xb5" +
"\xd0g\x11\xb28\x00\x1cyW\xb2w\x19\xce?1\x88\xdf\xe5}\xee\xbfo\x82YZ\x10" +
"\xf7\xbbV,\xa0M\\='\xfe\x94)X\xc6\xdb2bg\x06I\xf3\xbc\x97٢1g5\xed悥\xdf" +
"\xe6\xf1\xa0\x11\xfbɊ\xd0\xfb\xe7\x90\x00<\x01\xe8\xe9\x96w\x03\xaff^" +
"\x9fr@\x7fK\x03\xd4\xfd\xb4t\xaa\xfe\x8a\x0d>\x05\x15\xddFP\xcfQ\x17+" +
"\x81$\x8b\xcb\x7f\x96\x9e@\x0bl[\x12wh\xb1\xc4\x12\xfa\xe9\x8c\xf5v1\xcf" +
"7\x03;KJ\xba}~\xd3\x19\xba\x14rI\xc9\x08\xacp\xd1\xc4\x06\xda\xde\x0e" +
"\x82\x8e\xb6\xba\x0dʨ\x82\x85T>\x10!<d?\xc8`;X`#fp\xba\xbc\xad\x0b\xd7" +
"\xf4\xc4\x19\x0e26#\xa8h\xd1\xea\xe1v\x9f@\xa2f1C\x1b;\xd5!V\x05\xd2\x08" +
"o\xeaԙ\xacc\xa4e=\x12(=V\x01\x9c7\x95\xa9\x8a\x12m\x09\xcf\xcb\xe3l\xdc" +
"\xc97\x88\xa5@\x9f\x8bnB\xc2݃\xaaFa\x18R\xad\x0bP(w\\w\x16\x90\xb6\x85N" +
"\x05\xb3w$\x1es\xa8\x83\xddw\xaf\xf00,m\xa8f\\B4\x1d\xdaJ\xda\xea\x08Mes" +
"kerem\x06Tekemt\x05Hedar\x06Tahsas\x03Ter\x07Yekatit\x07Megabit\x06Miazi" +
"a\x06Genbot\x04Sene\x05Hamle\x07Nehasse\x07Pagumen\x011\x012\x013\x014" +
"\x015\x016\x017\x018\x019\x0210\x0211\x0212\x0213\x04ERA0\x04ERA1\xfeYZ" +
"\xb1\x89_\x96RH\x9d\xd2δ\x9c$t0<\xbbD¹C\x03\xdbf,\x9cf\xb8x)\x05\x19\x0f" +
"\x1e\x165\xb6>4\x87\x8d?$o\xad\xfc\xe3D\xe7N\xf8\x13\x09\x0f\x800\xbc" +
"\xd5%\xac\x10e?\xf1\x82\xe0\x01 \xf7\xe1\xf7\x96\xfa\x0f\xc1k\xa7\xbb" +
"\x90\xbe*3\xe8|=`\xabb\x84q\xa4 \x83C\x83f\x18\x01\xbb\x0b\xfd\x8el\x14" +
"\x00q\xdb\x1e\xb2\xf7\xa1\x81\x94\xf1\xa0E\xa9L\x07\x885\xc7]\xff/>\x83a" +
"\x80\xba\xad\x9e\x95]\xa8@\xdct\xc4\xdc$\x98\xf8\xc2\x01\xae\xc2T\xa0" +
"\xe3dv\xb2\xee\xb1$\xfdƯ\xc1\xb7\xd8\x09\xc5\xe0\x8b^\x0e\x84Z\xaf\x9bl9" +
"W\xe9Z\xb4\xaa\x8e\x10|ۇ?-\xacR\x7f\x16\xc4լ\x87`v\x8aq^Fi˄\x0c%1\x7f" +
"\x9a6\x87t\xe5\x064\x1a\xfbFP>(\xe9.Q\xbd\x7f}KS\xb9\x02=V\xf9\xb9\xec" +
"\x99\x1a©ټE\xffd\xbb+\xf1M@Q\xa7`K\xfe(\xba\xd4M\x98\xbf\xe3\x0eT\xeb" +
"\xc0\x7f\xa4_b\xaa\xbe9\\\xc9O\xa0\xa0\xf2F\xb5ҋ.?m\xeb)\x90\x18pX\xe4" +
"\xbf\xd2\xd1d\x06S\xfc8\xa3\x0b\x0f\x83#\x1a\x96[A;\x0f&\x92~\x0d\x03." +
"\x83\x0bs+\u07b3\x09L\xb1\xa5\xfam\xec\x9f\x067^\xa2_\xe5|(S\xea\x092" +
"\x0aȀ9v\xeaʠ\x95\xc0/\x86\x9f\xd7\xdc1\x07$u\x94\x0c7Q\xd5b\x83Ğ/\xef" +
"\xd4\x1d\xf6v\xbd\xcbXU\xa0G\x0e\xfd-\xabzr\xcc^_9\xff~\xea\x0fC:\x9f綦u" +
"\xbc*\xc5\x0c\xd2\x18\xc0\x09\xe2\x1f\x91\x0f\x9d\xdb\x09\xa0\xd0Y\xc4" +
"\xcd},\xa6Z#I\xdfz\x86}\xbe݁\xe9ԉ\x16\x19\xc8<B\x89\\\xe1\xb6q\xcbzK\xca" +
"\xed\x910\xab\x1d\xd4\xcc-\x81G\xa1YPV\xb5_\x92\xa3U\xdbvZ܍=\xf8\x8e\xb9" +
"=R\x7f\x7f~\xc8i\xa7W\x03\xba\x86Գa\x10\xe9\xa0DY<\x96h\x15\xd1S\x0agenW" +
"ideM01\x0agenWideM02\x0agenWideM03\x0agenWideM04\x0agenWideM05\x0agenWid" +
"eM06\x0agenWideM07\x0agenWideM08\x0agenWideM09\x0agenWideM10\x0agenWideM" +
"11\x0agenWideM12\x0bgenNarrowM1\x0bgenNarrowM2\x0bgenNarrowM3\x0bgenNarr" +
"owM4\x0bgenNarrowM5\x0bgenNarrowM6\x0bgenNarrowM7\x0bgenNarrowM8\x0bgenN" +
"arrowM9\x0cgenNarrowM10\x0cgenNarrowM11\x0cgenNarrowM12\xfefS\x87\xdc8" +
"\xe5\x07\xe7E\x8d\xf3\xe6\xb0\xf0@5\xef\x94\x19\x88>\x03\xc0\x8e-u;\x08" +
"\xc9\x09\x0a\xab\xf1u\xfd\xb6>\x8c\xf9\xa5\xf0x7\x04\xc7A\xc1\x95\x15v&@" +
"\x1d\x94\x9e\xaam\xbd\x04\u05ed\xe5t\x9e\xabTp\xbf^\x9c\x18\xccyݤ\xe1." +
"\xfeVNˊ@\x19\xe1\xc4\x1f-\x82\x17\xc0ä7\x12\xae\x22o\xcewf1\xae\x19\xb3&" +
"\xa4\x11\xa2\x84t\x1b\xe0\x1f\xb4\xf3\xae\xfc]\uf58e\xb6\xcc\xeb\x86\x04" +
"\x86KK\x9a\xd3sˬ\x10\xea~f[)J\x8ay\x06\x91\xaaRF\xe6\xff\x8fз\xfb\x9b" +
"\x9aj\x95\x8e\xbf(\xec^\x8f\xaacJu*\xc9q\xc0\xbc\x0ccp\x04\xce\xe2b\xce" +
"\xf1.|\xf6\xd9\xcdwrQ=\xbdFav\xa0z\xb7\xc4\xf4e\xfe\x09twy\xc3\x14\x95扶_" +
"U{\x0aJ\xf6SX\x80\xb8%S\xd1&\xffr\x13T)\x05Zdt\x95\x993>\x96U\xb4:\xa3g(" +
"\xbbc\xbd(d'D\x0bgregWideM01\x0bgregWideM02\x0bgregWideM03\x0bgregWideM0" +
"4\x0bgregWideM05\x0bgregWideM06\x0bgregWideM07\x0bgregWideM08\x0bgregWid" +
"eM09\x0bgregWideM10\x0bgregWideM11\x0bgregWideM12\x0cgregNarrowM1\x0cgre" +
"gNarrowM2\x0cgregNarrowM3\x0cgregNarrowM4\x0cgregNarrowM5\x0cgregNarrowM" +
"6\x0cgregNarrowM7\x0cgregNarrowM8\x0cgregNarrowM9\x0dgregNarrowM10\x0dgr" +
"egNarrowM11\x0dgregNarrowM12\x03BCE\x02CE\xfe\x1b\xaa\x9f0]\\%\xe0R)\xbb" +
"3/~\x83u\xb7\xc4^\x1e\xa0F\x1d3<<r_tg\xb4A\xb7\xd0\xf5\xe8\x02B\xb7\xa4" +
"\xa1\x8e\xda\xe8z\xf2b\xa1\x0d3\xfc\x0d}\x9a\x0aF4\xf0{\xea\\Z\x00!/\xbc" +
"Y\x1b\xdd\xfe\xbb\x943OJ-\x92\x86s\xd2b\xad\xab\xaa\x82\x98;\x94\x96_U˒" +
"\x8ch?GB\xc1 \x99\xb72\xbd\x03c\x9c\x19yu-\x83u\x18$;t\xd6s\x01$^\xfeVa" +
"\xea\xa0B\x89\x17\xf5ZX\xcc3\xdb(M\x1f,\xaa\x05\xf1\xfd{f\x02\x98\x0f" +
"\x06\xd1\x07#\x0b\xf3\x10\xb4\x8c\xf6)B\x01}\xd6h\x0e\xb3\xab\x131\x0e" +
"\xca\x15\x81\xaf\xb3Ŷ\x19\xe5\xce\x06\x82\xd0\xdf\xc1\xfa\xde9(\x17\x9a" +
"\x9d\u008c\xd1p\xb5\xb5TN\x7f\x9bc\xb8=\xa3t\xaf\xa2\x8e\x14x\xdc\\)\x97" +
"\xa9\x83G\xec\x1eqQN\xb2h\x22\x16-\xc7Ù/\xd4\x1f\x0b,\xcc&\xe5^{\xd8\xf3" +
"\xfa7!_wKR\x16\xb5\x06Tishri\x07Heshvan\x06Kislev\x05Tevet\x06Shevat\x06" +
"Adar I\x04Adar\x07Adar II\x05Nisan\x04Iyar\x05Sivan\x05Tamuz\x02Av\x04El" +
"ul\x02AM\xfe\xb8r\xb6\xc28\x8d\xd9P\x16\x0e?\xfa;\xf0b<C\x86U\xbb\\\x8cv" +
"\x8a\xb3:\xe2\xee9!\x86t\x00\x86e\xe2\x0a:\xcd\xf8J\xbe\xf3\\\xab\xccH" +
"\x91X\xc0\x85?տ\xa9T\x22a9\xfcD\xc4\xf5\x06nQ\xdbr\xb73\xe9\xff\xb1|\xbf" +
"\x9eeIl\x07\u05c9g7͐x\x1c\xb7re\x02\xc8Ar\xcaYh\xffɽ\xd0H\x0e|\xe6\xe2_}" +
"\x0b\x80\xbe\x87B\x93\x18\xc8P8g\x9b\xa0e\xbc\x9f\xdf\xfa\xe3\xfa\x84" +
"\x86\xfc\xef݂\xce\x09\xaf\x9e\xa5\xa6H\x87\xe0k\xa7g@=\x0bx\x8a\xb0\xae" +
"\x9e#\xc1\xf7\x89\x1f\x8c\x00\x07\x0dN\xf5Ak\xf1\u0558\xac^\xb59\xf1\x13" +
"\x97\xf0g\xeb\xd1\xe4\x91\xd5\xfc\xf6\x1a\xb5\xee*\xb8/\xb7w\xadS\x8c" +
"\xfc\x11|<Pҳ\xf9\x14=Uw\x0c\x85s7\x8f+\xa3C\x84\xcc\x13\xdc\x1c+<\x93" +
"\xa3K\xbbp\xdbh)\xf2ŋ\xed\x1c|\xf8\xf7\xa2\xbaA\x0apЭI\x06\x03U\xb9\xde" +
"\x08islAbbr1\x08islAbbr2\x08islAbbr3\x08islAbbr4\x08islAbbr5\x08islAbbr6" +
"\x08islAbbr7\x08islAbbr8\x08islAbbr9\x09islAbbr10\x09islAbbr11\x09islAbb" +
"r12\x08islWide1\x08islWide2\x08islWide3\x08islWide4\x08islWide5\x08islWi" +
"de6\x08islWide7\x08islWide8\x08islWide9\x09islWide10\x09islWide11\x09isl" +
"Wide12\x02AH\xfe\xb9p\x124V\x03\xd9\xd0\xd1ܰޟ\xcc\xd2 \x13kc\xa31b\x06" +
"\xe6\x889\x80+\xa7\xc3\x0c\xa3\xf8\xddj[\x8fú\x16\x0e\x91\x83\x80W\xa8ZG" +
"\x09S0\xf8\x82\xe6M\xbb\x12\x80\x0cS\xefm\xbe.\x85,\xe6\x0b\xbb\x09\xc4." +
"\\X\xa9\xf6,Y\x04\xd6_\xbc\xb0\xa3X\xd6\xe12\xe7}ٷ\xb9\x89>\x86Nm\xac" +
"\x1a\xac\xb8\x87\xbd\x03\x01裑:\xact%\xd2٦\xf6\xee+T\x93\xac\xb09\xac(E" +
"\xeb\x0e\xfa.\xdd\x0a<\xf9k\xa9z\xd7\x1d\xae\x00U`\xab\xa2\xa2\x00z\x0f" +
"\xa0Hc\xcbiF\x9f\x94\xa3n\x89\x1e9\xad\xcb̛^4\xca(\x13\xd1\xd7CZ\xc8\xfc" +
"\xacv\xa8\x96T<}\xcdn\xd0F\x01\x1f3\x0b\xcc\xe8H\x0d4&\x8eg$\x02q\xe3M" +
"\xd9\x13\xd5\xfd\xe1d\xa1\xe0\x14\xc9\x17ދ\xd4q\xb8\xe7\x0bww\x0b\x05h" +
"\x022k8?n:\x11^\xc9\\\xb3\x01\xc7y2\x1d9\x1a\x140\xdaR\x8d,\xfe\xf0;po" +
"\x9d\x12T\x96\x90\x9b\xa8\xeex\x04\xc1\x98L,C\xb6\x89\xb53\xddƇVZ\xf5i" +
"\xfcg7\x9e\xac\xb2F9\xeczw*\x17N Y\x8fg\xbc\xb5\xebfn\xef\xcd\xe0ʇ'\xad" +
"\xfa\xb2WB\x8a\x8f2\xa8˄l\xff\xe5:-\xe15\xb4\xfe/\x0di^+\xc6\xe7\x07\xc0" +
"\xafi\x17\x88\x10\xcay\xf4.x@!LxF\x06\xab\x9b_!\xf3N\x9d\xae\x83Z?\xa8" +
"\x01\xf0{錒'>\xc6D\x7fW\xe7\x89\x18r_/X\xfd\x9d\x04\x07\x14L\xce*^}kz\xae" +
"\x1b\x9cPg\x89\x0e\x05toS\xf5g\xd4VlA\xdb\xc1:\x092\x88\xf5\xd0\xe6\x00" +
"\x1dp\x90m\x80x\x9ek:\xf6e\xa9\x12\xb8\xfb\xbfx\xf6\x86\x1dm\xb48g\x97#" +
"\xf3\xf1\xc5s\x1e\xfeh\xce\x19Cӽ\x8b\xe3\x08\xac\xd4D0\xf6}\xfbj\xfd\xf5" +
"\x22{\x8f\xf1\x0d\x87\xcf~\xeb\x0e\xbc\x03\x1d\xf9\x1c\xbcE\xad\xc6gz" +
"\x971\x11+j\xe9\x85\xe0\xfe\xc5FУ\x8d\xe1=~p\x9e1(\x89\x89\xc7l\xbd\x90" +
"\xd2h\xb35\xf0\xd2A\xf7o@KT}\xc4^=`\xe4\xa1\\\x00nNK\x86&j`\x95)\x88\xf6" +
"\xb1O\xde\x11\x92\x9e\xe5\x9b S\xcfV\x04\xdf\x09hf4\xf26\xac\x14\x16&d" +
"\x0b\xe0\x9dL\xf9\xa7\xb6\xc90'\x95j\xef\xef[b\x9e̺u\x97\xb2o\xe2\x8e" +
"\xc0\xae\xa3\xf42\xd5&\x02:\xb1b\x89\x00\xd6Y\xe0IE\x16F\xba\xfb\xbcm" +
"\x14s\x84\x91\x08\xf9\xa3\xd4:\x8b\x0f\xb7&_o\x0d\xd4X\x7fX\x90\x1b\xb4" +
"\xa3^<\\\xc8X\x1f\xb8+\x1b\xa1J\xaf\x11\xaaL8C\xb3l\xa9\x13\xa7\xb8h7" +
"\x97\xed\xa5\xb6\x90\x14~o\xe5}o\x8f\x05\xbd%.\xc2\xe1\xcf(\x1dO\x89u" +
"\xdc\xff!\xaf\xe4\x11\x99\x97yj\x88?\xb1\x1eY\xe40\\I8\x22h\xe8\xbda\xe4" +
"\x19\xf2m\x15nޕ\x98>d\xf3y*X&\xa2\xfe:r\x15\x22\xa4\xb16\x0dyw\x09\x98" +
"\x9d,\xfe\x93\x98 {\xb1u\xf0\x1e\x8b\xea2\xa8\xfc\xe3\xc1\xf0b\x9f\xe6" +
"\x08\xf9\xe8\xf1OÒ\x18r\x0cK\xb1\x88\x82\xa4܈\x95\x0b\x99a\x89\xa9&\xfb" +
"\xd6p\x814\xbf\x96\xfe\x0c\xce\x12mhI\x8f\xbf\x9f2B\xaa\x8a1|\xe3\xb4" +
"\xf5\xfdD\x0fl\x10\x8d㕄\xab\xa34Dž\xf8\x8d\x16\xd46\x1f\x04m1߭\xe7MA\x93" +
"\xd1G\xeeǐ\xd2[2$\x09\xcbA\xdb\x0dVd\xc7\x05\xb1W\xf88%)%\xa0#\xaa\xd5" +
"\xe7+:Ly+\x0a\xa7Ytئ\xc4jj\xd1x\x22\xd5\x14\x94\xebYHc\xd6&\xfb.\xfab" +
"\x0e\xa4=\xd14X\x22m\x22.\x22\xb4E\x9f\xef\x7f\xff7\xebP\xb6\xcf\xe4\xa7" +
"{վ\xa6\xfe\xc6\xe5\xf4\x02\x10\xf3\x9dØMI`\xce\xe8*\xd0\x0ac=\xe0um\x13w" +
"\xfd*\xa4\x11\xf7_$\xbfb\xf57>\x91\\J%`\x12\x10\x91\x02}\x06#\xb5\xcb%" +
"\x1d=,\x01\x95\xc0\xb1\x8b*\xdb\x10۸\x17\xc8\xe3\xfeo\xb0\xdeZ\xb1\x8e" +
"\xad\x0e\u0557!s\xb8M`\xa2u\xee>o\\\x0c*d\x81\xe7zf`\xce\xf5\x84\x11\x05" +
"\x1d\xfdů\x89\xc1\xa0\x14k\x05\x9a\x08\x9c\xbe\x0c\xa3\xc3s\x83_h\x85" +
"\xeb\x0a\xf6\u0090\xac\x1e\xf4A\x02\xe2\x8c^\xb0sS\x08\xcf_|\xee۱\xcaji." +
"4ň\xb5\x96w\x91A~\xfc\xe1:$^\x92\xd3p\xbf\xe7_\x0b\xb8]Z\x85\xbbF\x95x" +
"\xbe\x83D\x14\x01R\x18\x15R\xa2\xf0\xb0\x0b\xe3\x9d\xc9J kU\x00\x04\x97R" +
"o\xae\xd4ct\xb7\x8aeX\xe5$\xe8\x10\x0f\x1eTV\xfe\xa9vAU\xedw\x06~`\xd6" +
"\xc2\xefhƹ>\xd1k\x0f9(\x9c6\xa3-<ù\xde\x0dп]\x92-\x02\xd9i\xc7Oܭ\x82\x0c" +
"\x992\x9c6K\xec\xb6kI\xd6\xecZ+j\xff\x92\xd4?pVP\xa9\xe1\x03g\xb4\xb1" +
"\xf6d\x85!XqTu\xd1\xe5w~\xec\x91u\xe1\xcau\x99^!\x12\xf7N\x17\xac\xfa" +
"\xeb\x1e\x09Farvardin\x0bOrdibehesht\x07Khordad\x03Tir\x06Mordad\x09Shah" +
"rivar\x04Mehr\x04Aban\x04Azar\x03Dey\x06Bahman\x06Esfand\x02AP\xfeo4E" +
"\xf1\xca6\xd8\xc0>\xf0x\x90Գ\x09\xfe\xf7\x01\xaf\xd1Y7x\x89\x0e\xe4/\xb9" +
"\x8f{@\xdb@\xa1~\xf4\x83T\xc9D\xb5\xb1;\x1fe\xe2F\x8a|P\xe0\xf2\xb9\xdc." +
"9\xf2\x88\x17\xb5\xf8\xb6(\xb1\xa34\x94\xd6\xcd1\xa9_&\xdbñҎ\x01\xf0\xce" +
"yX\xd5\xffY\xe9*sBR\xb4\xa7\x92uh\xd14gn H\xab\x09\x86*\x11\x91j\xb5\xb1" +
"\x00\x95\x93f?\x17\xdc\x03\x06\xc1\xb1\xe8n\x1d\xf7\xdaw\xdat\xa5%\xaa:b" +
"'\x81\x977B;M=\x1c\xeb\x8a\xfa\xac\xcf\xf5f\x0c;+\x98\xb0ꅴ\xf37L\xa5\x93" +
"(\x08sG\x06\xf8\xbe\x0d\xfd\x1f\x18\x87\x12ݷ\x0d\x05\xe1w\xb3t\xb4e ka" +
"\x8dD\xa4-\xeaP\u05f7\x8d\xcbU2`WV\xf1\xc3G\xfd\x95Y;\x22\x8f\x8a\x0c;" +
"\xcdp֙\xf7.1o\xd2\u0590\xa1\xe7cla\xfcJ\x99\xbd>\xc73]r\x8eCk!\x95Jo\xd5" +
"\xe7W\xd1\xc3\x03Era\x05Month\x0alast month\x0athis month\x0anext month" +
"\x06+{0} m\x06-{0} m\xfeQHs\xc6\xd4tx*\xf5b\xe27\xdaT\xee\x1a\xb1\x84" +
"\x14\xb1\xd2E\x95R=\x9d\x00u\xe5u\x7fT\xd5\x14\xe0\xdf\xd5\x18\xe5q\x8e" +
"\xb4\x15S\x0c\x94\u05ff\xd3.vE\xacn\x99\xb1\xf9(ƃ\xcc\xef\xeej32y\xc0" +
"\xc1\x03X\xf4\x02\xc2\x084\x9b\xa3;\xaf\xb0X1æ\xe68\x8f\xa9E8=U\xefӐB4" +
"\xff\xc4O\xc9R\xab\xafN\x05H\xc9\x1d\xa2\x15U\x80\x9c\xd0\xc8\x1ay\xbb*r" +
"f\x9cW\x16^\xa4\xaf_/\xbc\xf2\xe7\xf68\xcf\xdc\xd8q\xcaRE\x00Yp06\x9a" +
"\xc90\xa3\x08\xce\x19Y\xff\x22H\x83\xbf\x00`\x94\x06r\x85\x965\xc9\x0d^J" +
"{Ks,\xe3o\xed(\x1f$\x10ݱ\x9a\xbf{J^3\xf5_\x9a\x1d\xb6\xd4m\x1a2P\xafR`" +
"\xbeTB+\xb9\x1b<\x08&\xa8\x8a\x18\xf8\x8cy\xc0\xcb\xed\xf1@}\x0b\xbf\xac" +
"H\x048\xf9\x0co\x92\xfa!$\x9b6\xabnY\xc05\x0cݷ\xf3\xa5\x0dE\x97\x03Mo1" +
"\x03Mo2\x03Mo3\x03Mo4\x03Mo5\x03Mo6\x03Mo7\x03Mo8\x03Mo9\x04Mo10\x04Mo11" +
"\x04Mo12\x0bFirst Month\x0cSecond Month\x0bThird Month\x0cFourth Month" +
"\x0bFifth Month\x0bSixth Month\x0dSeventh Month\x0cEighth Month\x0bNinth" +
" Month\x0bTenth Month\x0eEleventh Month\x0dTwelfth Month\x03Rat\x02Ox" +
"\x05Tiger\x06Rabbit\x06Dragon\x05Snake\x05Horse\x04Goat\x06Monkey\x07Roo" +
"ster\x03Dog\x03Pig\xfeѝ\xe0T\xfc\x12\xac\xf3cD\xd0<\xe5Wu\xa5\xc45\x0b" +
"\x9al\x9f?ium\xfc\x96\xb4\xf4\x7f\xfb\xc6\xf1\xff\x9e\x22\xe2\xc9m\x8f" +
"\xd25rg\x87L\x15Y\x10\x80\xd2t\xb5\xe5\x90\x08xH7\xfa\xdb\x02\xf70\x1fИJ" +
"\x88G\x99\xd6\x1a\x83\xb8\xbdz<\xf1\xc9t\U000953c1\xa5N\xa8\x0e\xbe\x05." +
"n\x87R\xf1\xbf\xc8>m%O?@4\xd4\xe8\xf1\x04Y\xb1_\x11\x1b\xb3\x17\xc8R\xed" +
"EHn\xa5\xf7>\xaf9:1?\x9eG\x0cg\xd0M \xbc\xcf+)\x86A\xd2qo\xbd\x18\x12N" +
"\xe4`:\x8fk|?\x8d/\x90\x8c\xe7d\xe4\x08\x9e\x8dO\x15L\x92@\xa5w}F\x7f" +
"\x84r7u\x10\x12/AΞ\xc0\xf9\x89\xb57\x1ct\xbe\x9e&\x9e\xfba\x85;\u05cb" +
"\xc2S\xc0\x97\xe3\x81]\xedg\xf6\xf6t\xd2\xfc\x1ezM\xf0\x08\x87\xeb\x12" +
"\x8f\xffd\x8a>\x09\xa5\xaa\x9ag&?\x0d\xadV\x93x!Xi{\x99\x04\xf4A r\xfeho" +
"\xd1\xffQ\x8f\xd4\xc1\xe1\x83i\x88\x9a\xfe\xfc<\x14\xd3G\x10\x94GA|\x17M" +
"2\x13\x22W@\x07\x8c-F\x81A\xe1\xb4y$S\xf18\x87v)\x07\x9b\x13R\x02\xf7<" +
"\x86\x1eD,3\xf6\xc9̳\xc3\xc3)HYX\xfbmI\x86\x93^\xe5\xa9\xe9\x12!\x82Y" +
"\xcf}a*-y\xf3\x1e6&\x91N\xe2\xec\x14\x95\x16,\x80\x1e=[E\x80\xca\xc9]." +
"\xed\x0fH:X\xd1lN}\x1d\xe0\xf1\xba'\xd6\x04\xf6u\x06\xc2\xdf\xd1g\x032" +
"\xabp55Yu'\xef\x1e>\x7f\x07\x92\xa7\x0eg\x12\xbb\xdaX\xe0p\x9c;\xd82." +
"\xa4\xc9w\xfa!\xfb\x9eD\xdd\xe7\xb7\xe2\xfa\xb9\xd8ٽ\xf4mB\x9a\xa1\xafo" +
"\x83ˣŷ#m<\x86WU\x8e\xea\xa5p:\xd4e_ܜ\xd2\xcf\x1e\u07fb$W\x96i\xa0\xc1" +
"\x00\x15o\xf8\x10\xb6h\xc2ײ:\x80\xfdO\xf5\xed\xf0\xcf4\x8d!L\x03Dc\xf2&" +
"\x8c\xcf\x15\xf6\xe3\xc3L\xbak\x08enWideM1\x08enWideM2\x08enWideM3\x08en" +
"WideM4\x08enWideM5\x08enWideM6\x08enWideM7\x08enWideM8\x08enWideM9\x09en" +
"WideM10\x09enWideM11\x09enWideM12\x0aenNarrowM1\x0aenNarrowM2\x0aenNarro" +
"wM3\x0aenNarrowM4\x0aenNarrowM5\x0aenNarrowM6\x0aenNarrowM8\x0aenNarrowM" +
"9\x0benNarrowM10\x0benNarrowM11\x0benNarrowM12\x0dBefore Christ\x11Befor" +
"e Common Era\x0bAnno Domini\x0aCommon Era\x02BC\x02AD\xfe\xfe\x1f8{\x91g" +
"\xb7\xd7\xcd\xde#Xk\xe6\x85\xd8Ì\x8e\xf7g\xf0\x10\xd02\xbdJN\x8f\xf8\x15" +
"A\xad\xfd\xcae\xac\xb6\xf7\xe1$9\xb9\xa2 \xb5\x8a\xf1f\x1d/N\xd0\xff\xb2" +
"_\xaaC͑Y\x1d\xcd$ua[\xaa\x1e\x01I\xf0\xbc\xb7\x0b\xc426\x15Ș\x19\x88\x94" +
"\x8b\xd5\xf7\xb0\xa4\xbd\\\xdb=\xafZ\x98A\xa9\xbc'\xdc\xec\xa9wCB\xaf" +
"\xe0\xdb\xf3\xb9\x03\xa2\xa0\x1ad\x98ـ-\xb4C\xa45K\xb5\xa6\x15\x87\xa9" +
"\xe9\x94j?\xb1\x9e\x10\xdf\x0dv\x7f\x1ai \x087\xe5\x17\xd2!y\x93M[\xa7ܳ" +
"\xfa\xae1ר\xe5\xfe\xe9y\xb9\xfc\x80F}Zje\xed\xbc\xc8Y.h\xfb\xb5 * S\xba" +
"\xba\xa8\xce\u07be\x03\xa6\x05\xcf\xe7,\x16i\x0ap\xbd\x16\xd6\xda$\xaf}0" +
"\xf1&\x0bCT\x19\x82x\xd5\x0c\xc7\x13\xf8\xa2R&~\x0b\xa5F\x8f\xa6\x8cݺ\\_" +
"\x06\xf8\xfc$\xbc\xda\xc1H\xe2\xf4\x7f\x84}L\x83\xfb{\xfe@\x09\xa8HF\xaf" +
"\xedRx\x9f\xbd\x0c\x0d\x06\xa5b\xebm\x9e#\xebwI\xfeDp}K\xc1\xd7\xe0\x86#" +
"\x1c;\x0f\xed\x0e`\x05\x9b\x86EI5w\xd9\x05\xfe\xb0zx\xc7T0v֚?S\xaf\xb2" +
"\x9b\x1a\x86\x12ꔚg\x14FB\xe8\x8fKvͫ\xfaz\x9c\x82\x87e\x08\x1f\x9c\x97" +
"\xc3\xc2 \x7f\x1a\xd2M#\x1f\xc2B\xcdJ\x05\xf5\x22\x94ʸ\x11\x05\xf9̄PA" +
"\x15\x8f\x0e5\xf3\xa6v\\ll\xd89y\x06\x08\x01!~\x06\xe3\x04_\xa3\x97j\xec" +
"\xeamZ\xb0\x10\x13\xdaW\x18pN\x1a\xab!\xf2k<\xea\xca\xe9%\x19\xf1\xb9" +
"\x0a\x84\xc1\x06\x84\xcb\x08\xe4\xe2\x037\xf2\x92ǭ\xd4\x0c\xf3;4b<\xc5.%" +
"\xc2!\x079\x8b\x9dG\xc9U\x86\xe6\\22\xf6\xee\xb5lʆ%\xbd\x9e\xfeQV\xf3u" +
"\xa7\xd4r \xecV\xc8V\xb1\x96\xb4\x9f2D\x88m\x13\x94\xa6X瘳\xc9\xcc\xe8K[y" +
"\xa4L\x01'IPP\xfe\xaaI+\xef)l\x86lE\xb8\xd4=\x81\x0f\x0b9렭\xf7_H\xaa\xf1" +
"\x0c\x17\xcf6\xa4\x02\xe1T\xf9\x14\xe9\x0e\xd5WmE}\xa5)\xe7s\xfc\x0c16" +
"\xd4U\xaa\x8d\xc9\xe0m\xd6\x0a\x0e\xf5ȷ9\xfen_\x02=U&vcX\x80EY U\x93\x02" +
"9\x02A\x86\xe5HGX\xf4\xed\x9ckFx(\xa2?\xfa7\x17\x8eCce\xb9\x0f5\xac\xbc" +
"\xf4\xa6\xe2C5\xdd\x08{\x1e\xd9c\x96>K\xc3\xf83\xaaܾ%\xf3\x91\x1b\xf8U" +
"\x1f\xfa<\xfd\xefв\x1b̹\x19f\xb2O\x81>f渃@\xf47l\xc9k\x13F\x1a\xa3\x84" +
"\xad\xa0\xda=_z\xf1́\x13l\xf6J\xd0\xdb\xe6\xed\x9d^ݹ\x19\x0fK\xa1H\x0b-" +
"\x7f\xed\xa8\xde&V\xbc\x9ak\xb8\x15\xc2\x12bWU\x08N1#\xe1W9ޗӬ\xacG\x80" +
"\xb2\x83ozH\xcd?\xd0T\x04ϭ\x03\xccfi\x05\xec\x02k\x9ej\x94\xa9S\xf2\xd4" +
"\xf8\x16r\x03era\x05month\x09enFutMOne\x0benFutMOther\x0aenPastMOne\x0ce" +
"nPastMOther\x03mo.\x08last mo.\x08this mo.\x08next mo.\x0eenShortFutMOne" +
"\x10enShortFutMOther\x0fenShortPastMOne\x11enShortPastMOther\x0a001AbbrM" +
"o1\x0a001AbbrMo2\x0a001AbbrMo3\x0a001AbbrMo4\x0a001AbbrMo5\x0a001AbbrMo6" +
"\x0a001AbbrMo7\x0a001AbbrMo8\x0a001AbbrMo9\x0b001AbbrMo10\x0b001AbbrMo11" +
"\x0b001AbbrMo12"
var bucket1 string = "" + // Size: xxxx bytes
"\xfe\x99ҧ\xa2ݭ\x8a\xb6\xc7&\xe6\xbe.\xca:\xec\xeb4\x0f\xd7;\xfc\x09xhhkw" +
"'\x1f\x0fb\xfb8\xe3UU^S%0XxD\x83Zg\xff\xe7\x1ds\x97n\xef\xf95\xd3k\xbf$:" +
"\x99\xbbnU\xba:n\xdeM.\xa4st\xa6E\x0eG\xf5\xf0\xd6.Q-\x1e8\x87\x11X\xf2" +
"\x19\xc1J\xacI57\xdc\x07\xf0\x87\xc1cMc\x9e\xdc\x0a\xb3%\xff\x03\xe2aR" +
"\x06,\xbf!4J\x8b]4ΙWš\x1dY2\x88:\xb9Q\x16\xfc\xb5r\xf7\xc5d^\x97\x08\xce" +
"\x04EG@\u05fa\x88\x885\x08\x8c/\x83r\x92\xb8\x96\xd4\xfa\x8d\x18\x0fF" +
"\xfd\xa2\x01\xfb\xb0\xa0ڐӔ\xca\xcd\xf7@=\xe2\x96\x03\x87\x8aH\xfa\xc3L" +
"\xa2\xe90H\x93\xf6\x80\x8ck\x05)u{d\xa4\x19D\xd4{\xfd\xb8\xc5\xc0)\xea" +
"\x01\x9b\xcb&\x12\x87y\xf6{\xbb\xcdm\x0az/\xcb\xce#\x1c\x86R\xccy\xdbC" +
"\x7f\xa2\x96\x94\xc2\x22O/\xe4t\xfe\xba4 \xc3\xf1Hdy{܃L\x9aG\xa3\xa9\xea" +
"!LmW\x05\x9d$\x01\xe5wp\x8a'<\xc1\xcao\x8d\x1b\x8d\xd8h\xccX\xdc\xe4\xfd" +
"j\xf6\x0b\xa5'\xad\xe2\x1a\x16\x8fD\xde5\x0d\xaeL\xeft\xe1\x1f/\xd8\xec" +
"\xc9\xc0\xc6C#\x18\xfa\x93\x11Wt\x82\xfc\xa7\x1c\x82\x1b\xfd\x95`\xbd" +
"\x9f;[\xb3\x9e'\xe8DmA/^Ŭ]\x15-\xf9ـb\xea\xe9\x1f\xd9̂\x92\xc8\xddL%\xaf" +
"\xd0\xcc\xc7\x02L\xbb:P3\x22\xbfU\x81\U000d06a1\xa2\xf9q\x96\xbc2\x8e" +
"\x8f\xb4\x80\xbe\x06`\x8b\x0b\xaf\xd2\xd2J\xccV>\xc7d\xf5\xfd\x1c?\xbc7⏟" +
"\xb9%\xf0\xc4\xfe\xf3P\xed\x13V<CG\x99\x12\xc2 \x8e1\x0e\xcej\x12}\xe2P" +
"\xe4\x22\xba\xf5\xb0\x98q|A\xc5\xc8\xc2\xc8\x18w\xb7yb;n\x97\\6F\xe8\xd1" +
"1\xe2Eh\x0b\x95\x09v\xc0Ze?y\xeb\xcc\xe2\xa3\xf26ۦ\xec\x02f\x11oto\xe9z" +
"\x89\xfe\x0e\x7fh\xd1\xf3\x02\x86\xde]\x86\x0a\xbeq7R\xb0\xa1\xf8\x9eLj" +
"\x05\xc4\xfd\xa7B\xe4\xbc8\xff\x1ao\xff\xc7r\xb2I\xec\x94a\x84ԯ\xe6\x919" +
"\x8a\xe3\xc5kM\xe1\x09\x02?\x18jtstj\xbe\xe3P}G\xd0e\xc8Ď\x1e\xaa$\x97" +
"\xbce\x18mx6\xaf\xe10vP\xde\xc5#\xb0\xca\xc7N\x94駢\xf0\xc35\xf6\xb6c\x00" +
":\x18\x22\x1dM\xd2\x1a\\\xdb`\xc1v\x18\xfdh\xae\x1d\xc0\x96}{mSXn]\xd29" +
"\xfdϧT\xccu\xd5\xc0\x88JΆ\x9c:\xd3\x7fﺉc\x1a%Œ:\x9cln\xaa\x08\x1e\x85U+e" +
"\x958V `\xc3C\x0d\xd9ُB\xb0ҁt\xa7\x16\x90_\x84\xc1e\xd4m\x17M\x04\xbe*`" +
"\x9dS\x0e\x01M\xa6\xb7va\xa0\xeb\xf9\xb6\xaeP>ܦ\xd7FR\x9b7\xabPu\xaa\xcf" +
"\xfca;k\xb2+\xe0zXKL\xbd\xce\xde.&\xf5ԛ\xbck\x1b\xd4F\x84\xac\x08#\x02mo" +
"\x0f001ShortFutMOne\x11001ShortFutMOther\x12001ShortPastMOther\x10001Nar" +
"rowFutMOne\x10001NarrowFutMTwo\x12001NarrowFutMOther\x11001NarrowPastMOn" +
"e\x13001NarrowPastMOther\x08gbAbbrM1\x08gbAbbrM2\x08gbAbbrM3\x08gbAbbrM4" +
"\x08gbAbbrM5\x08gbAbbrM6\x08gbAbbrM7\x08gbAbbrM8\x08gbAbbrM9\x09gbAbbrM1" +
"0\x09gbAbbrM11\x09gbAbbrM12\x08gbWideM1\x08gbWideM2\x08gbWideM3\x08gbWid" +
"eM4\x08gbWideM5\x08gbWideM6\x08gbWideM7\x08gbWideM8\x08gbWideM9\x09gbWid" +
"eM10\x09gbWideM11\x09gbWideM12\x0agbNarrowM1\x0agbNarrowM2\x0agbNarrowM3" +
"\x0agbNarrowM5\x0agbNarrowM6\x0agbNarrowM8\x0agbNarrowM9\x0bgbNarrowM10" +
"\x0bgbNarrowM11\x0bgbNarrowM12\xfeG*:*\x8e\xf9f̷\xb2p\xaa\xb9\x12{и\xf7c" +
"\u0088\xdb\x0ce\xfd\xd7\xfc_T\x0f\x05\xf9\xf1\xc1(\x80\xa2)H\x09\x02\x15" +
"\xe8Y!\xc2\xc8\xc3뿓d\x03vԧi%\xb5\xc0\x8c\x05m\x87\x0d\x02y\xe9F\xa9\xe1" +
"\xe1!e\xbc\x1a\x8d\xa0\x93q\x8b\x0c߮\xcdF\xd1Kpx\x87/is\xcc\xdd\xe0\xafʍ" +
"~\xfeҜl\xc2B\xc5\x1a\xa4#ث,kF\xe89\xec\xe6~\xaa\x12\xbf\x1a\xf6L\x0a\xba" +
"\xa9\x96n\xf1\x03Ӊ<\xf5\x03\x84dp\x98\xe1d\xf7'\x94\xe6\x97\x1a4/\x05" +
"\x99\x8f.\x7foH@\xe9\x1a\xda6`MQ\xad\x0d\x08\x99؟+\xe53\xbf\x97\x88~\xe6" +
"eh\xb7\xaf\xaf<\xe1|\xb9\x0cF\xe0\xda\xf2\xbd\xff\x19\xaa\x95\x9b\x81" +
"\xc3\x04\xe3\x1f\xd5o]$\xf5\x0f\xbbzU\xf2a\xb0\x92[\xfeX\x03\x1f\xdc\x0c" +
"\xd5I\xc0a_\xbd\xd8\xde\u009a\x1a@t\x1e\x7f\x8f&\x0c\x8d\xfeM\xd7ڟX\x90" +
"\x97\xfe%\xa3'\x88\x81\xb5\x14l\x0bL\xd9>\x8d\x99\xe2=ƭu,\x9aT \x06\xc1y" +
"\\\x01wf\xdcx\xab\xa1\xee\xec\x82\x1e8\xb09$\x88\xfe<\xb5\x13g\x95\x15NS" +
"\x83`vx\xb9\xb7\xd8h\xc7 \x9e\x9fL\x06\x9a\xadtV\xc9\x13\x85\x0d8\xc15R" +
"\xe5\xadEL\xf0\x0f\x8b:\xf6\x90\x16i۰W\x9dv\xee\xb6B\x80`Ωb\xc7w\x11\xa3" +
"N\x17\xee\xb7\xe0\xbf\xd4a\x0a\x8a\x18g\xb82\x8e\xaaVCG\xc3Ip\xc0^6\xa8N" +
"\xf1\xebt\xa6\xa4\x0cO\xd9c\x97\x8f\xfa\x11)\x1bHY\xa2ӄ\x1bLc\xd6\x08" +
"\x06\xbfj`?3s\x89\xb8\x82(\xaf\xef\x84\xdfz\xc3\x12\xf1b\xd4\xf7ir\xe8," +
"\x8apœ\x00F\xa6b+\xfa}\x03\x14..\xcb1l\xac\x93\xee\x19\x12\xaa\xbbo\x95" +
"\xf3?ݔ7\x84\xb2b\x0c4\x81\x17\xf2K@\xde\x18\x99Q\x17n\xe5?\xdao\xc6(\xfc" +
"\x9b\xees\xc6V\x91\x0dْ\x1d\x06g9o"
var enumMap = map[string]uint16{
"": 0,
"calendars": 0,
"fields": 1,
"buddhist": 0,
"chinese": 1,
"dangi": 2,
"ethiopic": 3,
"ethiopic-amete-alem": 4,
"generic": 5,
"gregorian": 6,
"hebrew": 7,
"islamic": 8,
"islamic-civil": 9,
"islamic-rgsa": 10,
"islamic-tbla": 11,
"islamic-umalqura": 12,
"persian": 13,
"months": 0,
"eras": 1,
"filler": 2,
"cyclicNameSets": 3,
"format": 0,
"stand-alone": 1,
"wAbbreviated": 0,
"wNarrow": 1,
"wWide": 2,
"leap7": 0,
"variant": 1,
"cycDayParts": 0,
"cycDays": 1,
"cycMonths": 2,
"cycYears": 3,
"cycZodiacs": 4,
"era": 0,
"era-short": 1,
"era-narrow": 2,
"month": 3,
"month-short": 4,
"month-narrow": 5,
"displayName": 0,
"relative": 1,
"relativeTime": 2,
"before1": 0,
"current": 1,
"after1": 2,
"future": 0,
"past": 1,
"other": 0,
"one": 1,
"two": 2,
}
// Total table size: xxxx bytes (14KiB); checksum: 4A3B660
@@ -0,0 +1,181 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cldrtree
import (
"golang.org/x/text/internal/language/compact"
"golang.org/x/text/language"
)
const (
inheritOffsetShift = 12
inheritMask uint16 = 0x8000
inheritValueMask uint16 = 0x0FFF
missingValue uint16 = 0xFFFF
)
// Tree holds a tree of CLDR data.
type Tree struct {
Locales []uint32
Indices []uint16
Buckets []string
}
// Lookup looks up CLDR data for the given path. The lookup adheres to the alias
// and locale inheritance rules as defined in CLDR.
//
// Each subsequent element in path indicates which subtree to select data from.
// The last element of the path must select a leaf node. All other elements
// of the path select a subindex.
func (t *Tree) Lookup(tag compact.ID, path ...uint16) string {
return t.lookup(tag, false, path...)
}
// LookupFeature is like Lookup, but will first check whether a value of "other"
// as a fallback before traversing the inheritance chain.
func (t *Tree) LookupFeature(tag compact.ID, path ...uint16) string {
return t.lookup(tag, true, path...)
}
func (t *Tree) lookup(tag compact.ID, isFeature bool, path ...uint16) string {
origLang := tag
outer:
for {
index := t.Indices[t.Locales[tag]:]
k := uint16(0)
for i := range path {
max := index[k]
if i < len(path)-1 {
// index (non-leaf)
if path[i] >= max {
break
}
k = index[k+1+path[i]]
if k == 0 {
break
}
if v := k &^ inheritMask; k != v {
offset := v >> inheritOffsetShift
value := v & inheritValueMask
path[uint16(i)-offset] = value
tag = origLang
continue outer
}
} else {
// leaf value
offset := missingValue
if path[i] < max {
offset = index[k+2+path[i]]
}
if offset == missingValue {
if !isFeature {
break
}
// "other" feature must exist
offset = index[k+2]
}
data := t.Buckets[index[k+1]]
n := uint16(data[offset])
return data[offset+1 : offset+n+1]
}
}
if tag == 0 {
break
}
tag = tag.Parent()
}
return ""
}
func build(b *Builder) (*Tree, error) {
var t Tree
t.Locales = make([]uint32, language.NumCompactTags)
for _, loc := range b.locales {
tag, _ := language.CompactIndex(loc.tag)
t.Locales[tag] = uint32(len(t.Indices))
var x indexBuilder
x.add(loc.root)
t.Indices = append(t.Indices, x.index...)
}
// Set locales for which we don't have data to the parent's data.
for i, v := range t.Locales {
p := compact.ID(i)
for v == 0 && p != 0 {
p = p.Parent()
v = t.Locales[p]
}
t.Locales[i] = v
}
for _, b := range b.buckets {
t.Buckets = append(t.Buckets, string(b))
}
if b.err != nil {
return nil, b.err
}
return &t, nil
}
type indexBuilder struct {
index []uint16
}
func (b *indexBuilder) add(i *Index) uint16 {
offset := len(b.index)
max := enumIndex(0)
switch {
case len(i.values) > 0:
for _, v := range i.values {
if v.key > max {
max = v.key
}
}
b.index = append(b.index, make([]uint16, max+3)...)
b.index[offset] = uint16(max) + 1
b.index[offset+1] = i.values[0].value.bucket
for i := offset + 2; i < len(b.index); i++ {
b.index[i] = missingValue
}
for _, v := range i.values {
b.index[offset+2+int(v.key)] = v.value.bucketPos
}
return uint16(offset)
case len(i.subIndex) > 0:
for _, s := range i.subIndex {
if s.meta.index > max {
max = s.meta.index
}
}
b.index = append(b.index, make([]uint16, max+2)...)
b.index[offset] = uint16(max) + 1
for _, s := range i.subIndex {
x := b.add(s)
b.index[offset+int(s.meta.index)+1] = x
}
return uint16(offset)
case i.meta.inheritOffset < 0:
v := uint16(-(i.meta.inheritOffset + 1)) << inheritOffsetShift
p := i.meta
for k := i.meta.inheritOffset; k < 0; k++ {
p = p.parent
}
v += uint16(p.typeInfo.enum.lookup(i.meta.inheritIndex))
v |= inheritMask
return v
}
return 0
}
@@ -0,0 +1,139 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cldrtree
import (
"log"
"strconv"
)
// enumIndex is the numerical value of an enum value.
type enumIndex int
// An enum is a collection of enum values.
type enum struct {
name string // the Go type of the enum
rename func(string) string
keyMap map[string]enumIndex
keys []string
}
// lookup returns the index for the enum corresponding to the string. If s
// currently does not exist it will add the entry.
func (e *enum) lookup(s string) enumIndex {
if e.rename != nil {
s = e.rename(s)
}
x, ok := e.keyMap[s]
if !ok {
if e.keyMap == nil {
e.keyMap = map[string]enumIndex{}
}
u, err := strconv.ParseUint(s, 10, 32)
if err == nil {
for len(e.keys) <= int(u) {
x := enumIndex(len(e.keys))
s := strconv.Itoa(int(x))
e.keyMap[s] = x
e.keys = append(e.keys, s)
}
if e.keyMap[s] != enumIndex(u) {
// TODO: handle more gracefully.
log.Fatalf("cldrtree: mix of integer and non-integer for %q %v", s, e.keys)
}
return enumIndex(u)
}
x = enumIndex(len(e.keys))
e.keyMap[s] = x
e.keys = append(e.keys, s)
}
return x
}
// A typeInfo indicates the set of possible enum values and a mapping from
// these values to subtypes.
type typeInfo struct {
enum *enum
entries map[enumIndex]*typeInfo
keyTypeInfo *typeInfo
shareKeys bool
}
func (t *typeInfo) sharedKeys() bool {
return t.shareKeys
}
func (t *typeInfo) lookupSubtype(s string, opts *options) (x enumIndex, sub *typeInfo) {
if t.enum == nil {
if t.enum = opts.sharedEnums; t.enum == nil {
t.enum = &enum{}
}
}
if opts.sharedEnums != nil && t.enum != opts.sharedEnums {
panic("incompatible enums defined")
}
x = t.enum.lookup(s)
if t.entries == nil {
t.entries = map[enumIndex]*typeInfo{}
}
sub, ok := t.entries[x]
if !ok {
sub = opts.sharedType
if sub == nil {
sub = &typeInfo{}
}
t.entries[x] = sub
}
t.shareKeys = opts.sharedType != nil // For analysis purposes.
return x, sub
}
// metaData includes information about subtypes, possibly sharing commonality
// with sibling branches, and information about inheritance, which may differ
// per branch.
type metaData struct {
b *Builder
parent *metaData
index enumIndex // index into the parent's subtype index
key string
elem string // XML element corresponding to this type.
typeInfo *typeInfo
lookup map[enumIndex]*metaData
subs []*metaData
inheritOffset int // always negative when applicable
inheritIndex string // new value for field indicated by inheritOffset
// inheritType *metaData
}
func (m *metaData) sub(key string, opts *options) *metaData {
if m.lookup == nil {
m.lookup = map[enumIndex]*metaData{}
}
enum, info := m.typeInfo.lookupSubtype(key, opts)
sub := m.lookup[enum]
if sub == nil {
sub = &metaData{
b: m.b,
parent: m,
index: enum,
key: key,
typeInfo: info,
}
m.lookup[enum] = sub
m.subs = append(m.subs, sub)
}
return sub
}
func (m *metaData) validate() {
for _, s := range m.subs {
s.validate()
}
}
@@ -0,0 +1,121 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab_test
// This file contains tests which need to import package collate, which causes
// an import cycle when done within package colltab itself.
import (
"bytes"
"testing"
"unicode"
"golang.org/x/text/collate"
"golang.org/x/text/language"
"golang.org/x/text/unicode/rangetable"
)
// assigned is used to only test runes that are inside the scope of the Unicode
// version used to generation the collation table.
var assigned = rangetable.Assigned(collate.UnicodeVersion)
func TestNonDigits(t *testing.T) {
c := collate.New(language.English, collate.Loose, collate.Numeric)
// Verify that all non-digit numbers sort outside of the number range.
for r, hi := rune(unicode.N.R16[0].Lo), rune(unicode.N.R32[0].Hi); r <= hi; r++ {
if unicode.In(r, unicode.Nd) || !unicode.In(r, assigned) {
continue
}
if a := string(r); c.CompareString(a, "0") != -1 && c.CompareString(a, "999999") != 1 {
t.Errorf("%+q non-digit number is collated as digit", a)
}
}
}
func TestNumericCompare(t *testing.T) {
c := collate.New(language.English, collate.Loose, collate.Numeric)
// Iterate over all digits.
for _, r16 := range unicode.Nd.R16 {
testDigitCompare(t, c, rune(r16.Lo), rune(r16.Hi))
}
for _, r32 := range unicode.Nd.R32 {
testDigitCompare(t, c, rune(r32.Lo), rune(r32.Hi))
}
}
func testDigitCompare(t *testing.T, c *collate.Collator, zero, nine rune) {
if !unicode.In(zero, assigned) {
return
}
n := int(nine - zero + 1)
if n%10 != 0 {
t.Fatalf("len([%+q, %+q]) = %d; want a multiple of 10", zero, nine, n)
}
for _, tt := range []struct {
prefix string
b [11]string
}{
{
prefix: "",
b: [11]string{
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10",
},
},
{
prefix: "1",
b: [11]string{
"10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20",
},
},
{
prefix: "0",
b: [11]string{
"00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10",
},
},
{
prefix: "00",
b: [11]string{
"000", "001", "002", "003", "004", "005", "006", "007", "008", "009", "010",
},
},
{
prefix: "9",
b: [11]string{
"90", "91", "92", "93", "94", "95", "96", "97", "98", "99", "100",
},
},
} {
for k := 0; k <= n; k++ {
i := k % 10
a := tt.prefix + string(zero+rune(i))
for j, b := range tt.b {
want := 0
switch {
case i < j:
want = -1
case i > j:
want = 1
}
got := c.CompareString(a, b)
if got != want {
t.Errorf("Compare(%+q, %+q) = %d; want %d", a, b, got, want)
return
}
}
}
}
}
func BenchmarkNumericWeighter(b *testing.B) {
c := collate.New(language.English, collate.Numeric)
input := bytes.Repeat([]byte("Testing, testing 123..."), 100)
b.SetBytes(int64(2 * len(input)))
for i := 0; i < b.N; i++ {
c.Compare(input, input)
}
}
@@ -0,0 +1,376 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"fmt"
"unicode"
)
// Level identifies the collation comparison level.
// The primary level corresponds to the basic sorting of text.
// The secondary level corresponds to accents and related linguistic elements.
// The tertiary level corresponds to casing and related concepts.
// The quaternary level is derived from the other levels by the
// various algorithms for handling variable elements.
type Level int
const (
Primary Level = iota
Secondary
Tertiary
Quaternary
Identity
NumLevels
)
const (
defaultSecondary = 0x20
defaultTertiary = 0x2
maxTertiary = 0x1F
MaxQuaternary = 0x1FFFFF // 21 bits.
)
// Elem is a representation of a collation element. This API provides ways to encode
// and decode Elems. Implementations of collation tables may use values greater
// or equal to PrivateUse for their own purposes. However, these should never be
// returned by AppendNext.
type Elem uint32
const (
maxCE Elem = 0xAFFFFFFF
PrivateUse = minContract
minContract = 0xC0000000
maxContract = 0xDFFFFFFF
minExpand = 0xE0000000
maxExpand = 0xEFFFFFFF
minDecomp = 0xF0000000
)
type ceType int
const (
ceNormal ceType = iota // ceNormal includes implicits (ce == 0)
ceContractionIndex // rune can be a start of a contraction
ceExpansionIndex // rune expands into a sequence of collation elements
ceDecompose // rune expands using NFKC decomposition
)
func (ce Elem) ctype() ceType {
if ce <= maxCE {
return ceNormal
}
if ce <= maxContract {
return ceContractionIndex
} else {
if ce <= maxExpand {
return ceExpansionIndex
}
return ceDecompose
}
panic("should not reach here")
return ceType(-1)
}
// For normal collation elements, we assume that a collation element either has
// a primary or non-default secondary value, not both.
// Collation elements with a primary value are of the form
//
// 01pppppp pppppppp ppppppp0 ssssssss
// - p* is primary collation value
// - s* is the secondary collation value
// 00pppppp pppppppp ppppppps sssttttt, where
// - p* is primary collation value
// - s* offset of secondary from default value.
// - t* is the tertiary collation value
// 100ttttt cccccccc pppppppp pppppppp
// - t* is the tertiar collation value
// - c* is the canonical combining class
// - p* is the primary collation value
//
// Collation elements with a secondary value are of the form
//
// 1010cccc ccccssss ssssssss tttttttt, where
// - c* is the canonical combining class
// - s* is the secondary collation value
// - t* is the tertiary collation value
// 11qqqqqq qqqqqqqq qqqqqqq0 00000000
// - q* quaternary value
const (
ceTypeMask = 0xC0000000
ceTypeMaskExt = 0xE0000000
ceIgnoreMask = 0xF00FFFFF
ceType1 = 0x40000000
ceType2 = 0x00000000
ceType3or4 = 0x80000000
ceType4 = 0xA0000000
ceTypeQ = 0xC0000000
Ignore = ceType4
firstNonPrimary = 0x80000000
lastSpecialPrimary = 0xA0000000
secondaryMask = 0x80000000
hasTertiaryMask = 0x40000000
primaryValueMask = 0x3FFFFE00
maxPrimaryBits = 21
compactPrimaryBits = 16
maxSecondaryBits = 12
maxTertiaryBits = 8
maxCCCBits = 8
maxSecondaryCompactBits = 8
maxSecondaryDiffBits = 4
maxTertiaryCompactBits = 5
primaryShift = 9
compactSecondaryShift = 5
minCompactSecondary = defaultSecondary - 4
)
func makeImplicitCE(primary int) Elem {
return ceType1 | Elem(primary<<primaryShift) | defaultSecondary
}
// MakeElem returns an Elem for the given values. It will return an error
// if the given combination of values is invalid.
func MakeElem(primary, secondary, tertiary int, ccc uint8) (Elem, error) {
if w := primary; w >= 1<<maxPrimaryBits || w < 0 {
return 0, fmt.Errorf("makeCE: primary weight out of bounds: %x >= %x", w, 1<<maxPrimaryBits)
}
if w := secondary; w >= 1<<maxSecondaryBits || w < 0 {
return 0, fmt.Errorf("makeCE: secondary weight out of bounds: %x >= %x", w, 1<<maxSecondaryBits)
}
if w := tertiary; w >= 1<<maxTertiaryBits || w < 0 {
return 0, fmt.Errorf("makeCE: tertiary weight out of bounds: %x >= %x", w, 1<<maxTertiaryBits)
}
ce := Elem(0)
if primary != 0 {
if ccc != 0 {
if primary >= 1<<compactPrimaryBits {
return 0, fmt.Errorf("makeCE: primary weight with non-zero CCC out of bounds: %x >= %x", primary, 1<<compactPrimaryBits)
}
if secondary != defaultSecondary {
return 0, fmt.Errorf("makeCE: cannot combine non-default secondary value (%x) with non-zero CCC (%x)", secondary, ccc)
}
ce = Elem(tertiary << (compactPrimaryBits + maxCCCBits))
ce |= Elem(ccc) << compactPrimaryBits
ce |= Elem(primary)
ce |= ceType3or4
} else if tertiary == defaultTertiary {
if secondary >= 1<<maxSecondaryCompactBits {
return 0, fmt.Errorf("makeCE: secondary weight with non-zero primary out of bounds: %x >= %x", secondary, 1<<maxSecondaryCompactBits)
}
ce = Elem(primary<<(maxSecondaryCompactBits+1) + secondary)
ce |= ceType1
} else {
d := secondary - defaultSecondary + maxSecondaryDiffBits
if d >= 1<<maxSecondaryDiffBits || d < 0 {
return 0, fmt.Errorf("makeCE: secondary weight diff out of bounds: %x < 0 || %x > %x", d, d, 1<<maxSecondaryDiffBits)
}
if tertiary >= 1<<maxTertiaryCompactBits {
return 0, fmt.Errorf("makeCE: tertiary weight with non-zero primary out of bounds: %x > %x", tertiary, 1<<maxTertiaryCompactBits)
}
ce = Elem(primary<<maxSecondaryDiffBits + d)
ce = ce<<maxTertiaryCompactBits + Elem(tertiary)
}
} else {
ce = Elem(secondary<<maxTertiaryBits + tertiary)
ce += Elem(ccc) << (maxSecondaryBits + maxTertiaryBits)
ce |= ceType4
}
return ce, nil
}
// MakeQuaternary returns an Elem with the given quaternary value.
func MakeQuaternary(v int) Elem {
return ceTypeQ | Elem(v<<primaryShift)
}
// Mask sets weights for any level smaller than l to 0.
// The resulting Elem can be used to test for equality with
// other Elems to which the same mask has been applied.
func (ce Elem) Mask(l Level) uint32 {
return 0
}
// CCC returns the canonical combining class associated with the underlying character,
// if applicable, or 0 otherwise.
func (ce Elem) CCC() uint8 {
if ce&ceType3or4 != 0 {
if ce&ceType4 == ceType3or4 {
return uint8(ce >> 16)
}
return uint8(ce >> 20)
}
return 0
}
// Primary returns the primary collation weight for ce.
func (ce Elem) Primary() int {
if ce >= firstNonPrimary {
if ce > lastSpecialPrimary {
return 0
}
return int(uint16(ce))
}
return int(ce&primaryValueMask) >> primaryShift
}
// Secondary returns the secondary collation weight for ce.
func (ce Elem) Secondary() int {
switch ce & ceTypeMask {
case ceType1:
return int(uint8(ce))
case ceType2:
return minCompactSecondary + int((ce>>compactSecondaryShift)&0xF)
case ceType3or4:
if ce < ceType4 {
return defaultSecondary
}
return int(ce>>8) & 0xFFF
case ceTypeQ:
return 0
}
panic("should not reach here")
}
// Tertiary returns the tertiary collation weight for ce.
func (ce Elem) Tertiary() uint8 {
if ce&hasTertiaryMask == 0 {
if ce&ceType3or4 == 0 {
return uint8(ce & 0x1F)
}
if ce&ceType4 == ceType4 {
return uint8(ce)
}
return uint8(ce>>24) & 0x1F // type 2
} else if ce&ceTypeMask == ceType1 {
return defaultTertiary
}
// ce is a quaternary value.
return 0
}
func (ce Elem) updateTertiary(t uint8) Elem {
if ce&ceTypeMask == ceType1 {
// convert to type 4
nce := ce & primaryValueMask
nce |= Elem(uint8(ce)-minCompactSecondary) << compactSecondaryShift
ce = nce
} else if ce&ceTypeMaskExt == ceType3or4 {
ce &= ^Elem(maxTertiary << 24)
return ce | (Elem(t) << 24)
} else {
// type 2 or 4
ce &= ^Elem(maxTertiary)
}
return ce | Elem(t)
}
// Quaternary returns the quaternary value if explicitly specified,
// 0 if ce == Ignore, or MaxQuaternary otherwise.
// Quaternary values are used only for shifted variants.
func (ce Elem) Quaternary() int {
if ce&ceTypeMask == ceTypeQ {
return int(ce&primaryValueMask) >> primaryShift
} else if ce&ceIgnoreMask == Ignore {
return 0
}
return MaxQuaternary
}
// Weight returns the collation weight for the given level.
func (ce Elem) Weight(l Level) int {
switch l {
case Primary:
return ce.Primary()
case Secondary:
return ce.Secondary()
case Tertiary:
return int(ce.Tertiary())
case Quaternary:
return ce.Quaternary()
}
return 0 // return 0 (ignore) for undefined levels.
}
// For contractions, collation elements are of the form
// 110bbbbb bbbbbbbb iiiiiiii iiiinnnn, where
// - n* is the size of the first node in the contraction trie.
// - i* is the index of the first node in the contraction trie.
// - b* is the offset into the contraction collation element table.
//
// See contract.go for details on the contraction trie.
const (
maxNBits = 4
maxTrieIndexBits = 12
maxContractOffsetBits = 13
)
func splitContractIndex(ce Elem) (index, n, offset int) {
n = int(ce & (1<<maxNBits - 1))
ce >>= maxNBits
index = int(ce & (1<<maxTrieIndexBits - 1))
ce >>= maxTrieIndexBits
offset = int(ce & (1<<maxContractOffsetBits - 1))
return
}
// For expansions, Elems are of the form 11100000 00000000 bbbbbbbb bbbbbbbb,
// where b* is the index into the expansion sequence table.
const maxExpandIndexBits = 16
func splitExpandIndex(ce Elem) (index int) {
return int(uint16(ce))
}
// Some runes can be expanded using NFKD decomposition. Instead of storing the full
// sequence of collation elements, we decompose the rune and lookup the collation
// elements for each rune in the decomposition and modify the tertiary weights.
// The Elem, in this case, is of the form 11110000 00000000 wwwwwwww vvvvvvvv, where
// - v* is the replacement tertiary weight for the first rune,
// - w* is the replacement tertiary weight for the second rune,
//
// Tertiary weights of subsequent runes should be replaced with maxTertiary.
// See https://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details.
func splitDecompose(ce Elem) (t1, t2 uint8) {
return uint8(ce), uint8(ce >> 8)
}
const (
// These constants were taken from https://www.unicode.org/versions/Unicode6.0.0/ch12.pdf.
minUnified rune = 0x4E00
maxUnified = 0x9FFF
minCompatibility = 0xF900
maxCompatibility = 0xFAFF
minRare = 0x3400
maxRare = 0x4DBF
)
const (
commonUnifiedOffset = 0x10000
rareUnifiedOffset = 0x20000 // largest rune in common is U+FAFF
otherOffset = 0x50000 // largest rune in rare is U+2FA1D
illegalOffset = otherOffset + int(unicode.MaxRune)
maxPrimary = illegalOffset + 1
)
// implicitPrimary returns the primary weight for the a rune
// for which there is no entry for the rune in the collation table.
// We take a different approach from the one specified in
// https://unicode.org/reports/tr10/#Implicit_Weights,
// but preserve the resulting relative ordering of the runes.
func implicitPrimary(r rune) int {
if unicode.Is(unicode.Ideographic, r) {
if r >= minUnified && r <= maxUnified {
// The most common case for CJK.
return int(r) + commonUnifiedOffset
}
if r >= minCompatibility && r <= maxCompatibility {
// This will typically not hit. The DUCET explicitly specifies mappings
// for all characters that do not decompose.
return int(r) + commonUnifiedOffset
}
return int(r) + rareUnifiedOffset
}
return int(r) + otherOffset
}
@@ -0,0 +1,183 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"fmt"
"testing"
"unicode"
)
func (e Elem) String() string {
q := ""
if v := e.Quaternary(); v == MaxQuaternary {
q = "max"
} else {
q = fmt.Sprint(v)
}
return fmt.Sprintf("[%d, %d, %d, %s]",
e.Primary(),
e.Secondary(),
e.Tertiary(),
q)
}
type ceTest struct {
f func(inout []int) (Elem, ceType)
arg []int
}
func makeCE(weights []int) Elem {
ce, _ := MakeElem(weights[0], weights[1], weights[2], uint8(weights[3]))
return ce
}
var defaultValues = []int{0, defaultSecondary, defaultTertiary, 0}
func e(w ...int) Elem {
return makeCE(append(w, defaultValues[len(w):]...))
}
func makeContractIndex(index, n, offset int) Elem {
const (
contractID = 0xC0000000
maxNBits = 4
maxTrieIndexBits = 12
maxContractOffsetBits = 13
)
ce := Elem(contractID)
ce += Elem(offset << (maxNBits + maxTrieIndexBits))
ce += Elem(index << maxNBits)
ce += Elem(n)
return ce
}
func makeExpandIndex(index int) Elem {
const expandID = 0xE0000000
return expandID + Elem(index)
}
func makeDecompose(t1, t2 int) Elem {
const decompID = 0xF0000000
return Elem(t2<<8+t1) + decompID
}
func normalCE(inout []int) (ce Elem, t ceType) {
ce = makeCE(inout)
inout[0] = ce.Primary()
inout[1] = ce.Secondary()
inout[2] = int(ce.Tertiary())
inout[3] = int(ce.CCC())
return ce, ceNormal
}
func expandCE(inout []int) (ce Elem, t ceType) {
ce = makeExpandIndex(inout[0])
inout[0] = splitExpandIndex(ce)
return ce, ceExpansionIndex
}
func contractCE(inout []int) (ce Elem, t ceType) {
ce = makeContractIndex(inout[0], inout[1], inout[2])
i, n, o := splitContractIndex(ce)
inout[0], inout[1], inout[2] = i, n, o
return ce, ceContractionIndex
}
func decompCE(inout []int) (ce Elem, t ceType) {
ce = makeDecompose(inout[0], inout[1])
t1, t2 := splitDecompose(ce)
inout[0], inout[1] = int(t1), int(t2)
return ce, ceDecompose
}
var ceTests = []ceTest{
{normalCE, []int{0, 0, 0, 0}},
{normalCE, []int{0, 30, 3, 0}},
{normalCE, []int{0, 30, 3, 0xFF}},
{normalCE, []int{100, defaultSecondary, defaultTertiary, 0}},
{normalCE, []int{100, defaultSecondary, defaultTertiary, 0xFF}},
{normalCE, []int{100, defaultSecondary, 3, 0}},
{normalCE, []int{0x123, defaultSecondary, 8, 0xFF}},
{contractCE, []int{0, 0, 0}},
{contractCE, []int{1, 1, 1}},
{contractCE, []int{1, (1 << maxNBits) - 1, 1}},
{contractCE, []int{(1 << maxTrieIndexBits) - 1, 1, 1}},
{contractCE, []int{1, 1, (1 << maxContractOffsetBits) - 1}},
{expandCE, []int{0}},
{expandCE, []int{5}},
{expandCE, []int{(1 << maxExpandIndexBits) - 1}},
{decompCE, []int{0, 0}},
{decompCE, []int{1, 1}},
{decompCE, []int{0x1F, 0x1F}},
}
func TestColElem(t *testing.T) {
for i, tt := range ceTests {
inout := make([]int, len(tt.arg))
copy(inout, tt.arg)
ce, typ := tt.f(inout)
if ce.ctype() != typ {
t.Errorf("%d: type is %d; want %d (ColElem: %X)", i, ce.ctype(), typ, ce)
}
for j, a := range tt.arg {
if inout[j] != a {
t.Errorf("%d: argument %d is %X; want %X (ColElem: %X)", i, j, inout[j], a, ce)
}
}
}
}
type implicitTest struct {
r rune
p int
}
var implicitTests = []implicitTest{
{0x33FF, 0x533FF},
{0x3400, 0x23400},
{0x4DC0, 0x54DC0},
{0x4DFF, 0x54DFF},
{0x4E00, 0x14E00},
{0x9FCB, 0x19FCB},
{0xA000, 0x5A000},
{0xF8FF, 0x5F8FF},
{0xF900, 0x1F900},
{0xFA23, 0x1FA23},
{0xFAD9, 0x1FAD9},
{0xFB00, 0x5FB00},
{0x20000, 0x40000},
{0x2B81C, 0x4B81C},
{unicode.MaxRune, 0x15FFFF}, // maximum primary value
}
func TestImplicit(t *testing.T) {
for _, tt := range implicitTests {
if p := implicitPrimary(tt.r); p != tt.p {
t.Errorf("%U: was %X; want %X", tt.r, p, tt.p)
}
}
}
func TestUpdateTertiary(t *testing.T) {
tests := []struct {
in, out Elem
t uint8
}{
{0x4000FE20, 0x0000FE8A, 0x0A},
{0x4000FE21, 0x0000FEAA, 0x0A},
{0x0000FE8B, 0x0000FE83, 0x03},
{0x82FF0188, 0x9BFF0188, 0x1B},
{0xAFF0CC02, 0xAFF0CC1B, 0x1B},
}
for i, tt := range tests {
if out := tt.in.updateTertiary(tt.t); out != tt.out {
t.Errorf("%d: was %X; want %X", i, out, tt.out)
}
}
}
@@ -0,0 +1,105 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package colltab contains functionality related to collation tables.
// It is only to be used by the collate and search packages.
package colltab // import "golang.org/x/text/internal/colltab"
import (
"sort"
"golang.org/x/text/language"
)
// MatchLang finds the index of t in tags, using a matching algorithm used for
// collation and search. tags[0] must be language.Und, the remaining tags should
// be sorted alphabetically.
//
// Language matching for collation and search is different from the matching
// defined by language.Matcher: the (inferred) base language must be an exact
// match for the relevant fields. For example, "gsw" should not match "de".
// Also the parent relation is different, as a parent may have a different
// script. So usually the parent of zh-Hant is und, whereas for MatchLang it is
// zh.
func MatchLang(t language.Tag, tags []language.Tag) int {
// Canonicalize the values, including collapsing macro languages.
t, _ = language.All.Canonicalize(t)
base, conf := t.Base()
// Estimate the base language, but only use high-confidence values.
if conf < language.High {
// The root locale supports "search" and "standard". We assume that any
// implementation will only use one of both.
return 0
}
// Maximize base and script and normalize the tag.
if _, s, r := t.Raw(); (r != language.Region{}) {
p, _ := language.Raw.Compose(base, s, r)
// Taking the parent forces the script to be maximized.
p = p.Parent()
// Add back region and extensions.
t, _ = language.Raw.Compose(p, r, t.Extensions())
} else {
// Set the maximized base language.
t, _ = language.Raw.Compose(base, s, t.Extensions())
}
// Find start index of the language tag.
start := 1 + sort.Search(len(tags)-1, func(i int) bool {
b, _, _ := tags[i+1].Raw()
return base.String() <= b.String()
})
if start < len(tags) {
if b, _, _ := tags[start].Raw(); b != base {
return 0
}
}
// Besides the base language, script and region, only the collation type and
// the custom variant defined in the 'u' extension are used to distinguish a
// locale.
// Strip all variants and extensions and add back the custom variant.
tdef, _ := language.Raw.Compose(t.Raw())
tdef, _ = tdef.SetTypeForKey("va", t.TypeForKey("va"))
// First search for a specialized collation type, if present.
try := []language.Tag{tdef}
if co := t.TypeForKey("co"); co != "" {
tco, _ := tdef.SetTypeForKey("co", co)
try = []language.Tag{tco, tdef}
}
for _, tx := range try {
for ; tx != language.Und; tx = parent(tx) {
for i, t := range tags[start:] {
if b, _, _ := t.Raw(); b != base {
break
}
if tx == t {
return start + i
}
}
}
}
return 0
}
// parent computes the structural parent. This means inheritance may change
// script. So, unlike the CLDR parent, parent(zh-Hant) == zh.
func parent(t language.Tag) language.Tag {
if t.TypeForKey("va") != "" {
t, _ = t.SetTypeForKey("va", "")
return t
}
result := language.Und
if b, s, r := t.Raw(); (r != language.Region{}) {
result, _ = language.Raw.Compose(b, s, t.Extensions())
} else if (s != language.Script{}) {
result, _ = language.Raw.Compose(b, t.Extensions())
} else if (b != language.Base{}) {
result, _ = language.Raw.Compose(t.Extensions())
}
return result
}
@@ -0,0 +1,66 @@
package colltab
import (
"testing"
"golang.org/x/text/language"
)
func TestMatchLang(t *testing.T) {
tags := []language.Tag{
0: language.Und,
1: language.MustParse("bs"),
2: language.German,
3: language.English,
4: language.AmericanEnglish,
5: language.MustParse("en-US-u-va-posix"),
6: language.Portuguese,
7: language.Serbian,
8: language.MustParse("sr-Latn"),
9: language.Chinese,
10: language.MustParse("zh-u-co-stroke"),
11: language.MustParse("zh-Hant-u-co-pinyin"),
12: language.TraditionalChinese,
}
for i, tc := range []struct {
x int
t language.Tag
}{
{0, language.Und},
{0, language.Persian}, // Default to first element when no match.
{3, language.English},
{4, language.AmericanEnglish},
{5, language.MustParse("en-US-u-va-posix")}, // Ext. variant match.
{4, language.MustParse("en-US-u-va-noposix")}, // Ext. variant mismatch.
{3, language.MustParse("en-UK-u-va-noposix")}, // Ext. variant mismatch.
{7, language.Serbian},
{0, language.Croatian}, // Don't match to close language!
{0, language.MustParse("gsw")}, // Don't match to close language!
{1, language.MustParse("bs-Cyrl")}, // Odd, but correct.
{1, language.MustParse("bs-Latn")}, // Estimated script drops.
{8, language.MustParse("sr-Latn")},
{9, language.Chinese},
{9, language.SimplifiedChinese},
{12, language.TraditionalChinese},
{11, language.MustParse("zh-Hant-u-co-pinyin")},
// TODO: should this be 12? Either inherited value (10) or default is
// fine in this case, though. Other locales are not affected.
{10, language.MustParse("zh-Hant-u-co-stroke")},
// There is no "phonebk" sorting order for zh-Hant, so use default.
{12, language.MustParse("zh-Hant-u-co-phonebk")},
{10, language.MustParse("zh-u-co-stroke")},
{12, language.MustParse("und-TW")}, // Infer script and language.
{12, language.MustParse("und-HK")}, // Infer script and language.
{6, language.MustParse("und-BR")}, // Infer script and language.
{6, language.MustParse("und-PT")}, // Infer script and language.
{2, language.MustParse("und-Latn-DE")}, // Infer language.
{0, language.MustParse("und-Jpan-BR")}, // Infers "ja", so no match.
{0, language.MustParse("zu")}, // No match past index.
} {
t.Run(tc.t.String(), func(t *testing.T) {
if x := MatchLang(tc.t, tags); x != tc.x {
t.Errorf("%d: MatchLang(%q, tags) = %d; want %d", i, tc.t, x, tc.x)
}
})
}
}
@@ -0,0 +1,145 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import "unicode/utf8"
// For a description of ContractTrieSet, see text/collate/build/contract.go.
type ContractTrieSet []struct{ L, H, N, I uint8 }
// ctScanner is used to match a trie to an input sequence.
// A contraction may match a non-contiguous sequence of bytes in an input string.
// For example, if there is a contraction for <a, combining_ring>, it should match
// the sequence <a, combining_cedilla, combining_ring>, as combining_cedilla does
// not block combining_ring.
// ctScanner does not automatically skip over non-blocking non-starters, but rather
// retains the state of the last match and leaves it up to the user to continue
// the match at the appropriate points.
type ctScanner struct {
states ContractTrieSet
s []byte
n int
index int
pindex int
done bool
}
type ctScannerString struct {
states ContractTrieSet
s string
n int
index int
pindex int
done bool
}
func (t ContractTrieSet) scanner(index, n int, b []byte) ctScanner {
return ctScanner{s: b, states: t[index:], n: n}
}
func (t ContractTrieSet) scannerString(index, n int, str string) ctScannerString {
return ctScannerString{s: str, states: t[index:], n: n}
}
// result returns the offset i and bytes consumed p so far. If no suffix
// matched, i and p will be 0.
func (s *ctScanner) result() (i, p int) {
return s.index, s.pindex
}
func (s *ctScannerString) result() (i, p int) {
return s.index, s.pindex
}
const (
final = 0
noIndex = 0xFF
)
// scan matches the longest suffix at the current location in the input
// and returns the number of bytes consumed.
func (s *ctScanner) scan(p int) int {
pr := p // the p at the rune start
str := s.s
states, n := s.states, s.n
for i := 0; i < n && p < len(str); {
e := states[i]
c := str[p]
// TODO: a significant number of contractions are of a form that
// cannot match discontiguous UTF-8 in a normalized string. We could let
// a negative value of e.n mean that we can set s.done = true and avoid
// the need for additional matches.
if c >= e.L {
if e.L == c {
p++
if e.I != noIndex {
s.index = int(e.I)
s.pindex = p
}
if e.N != final {
i, states, n = 0, states[int(e.H)+n:], int(e.N)
if p >= len(str) || utf8.RuneStart(str[p]) {
s.states, s.n, pr = states, n, p
}
} else {
s.done = true
return p
}
continue
} else if e.N == final && c <= e.H {
p++
s.done = true
s.index = int(c-e.L) + int(e.I)
s.pindex = p
return p
}
}
i++
}
return pr
}
// scan is a verbatim copy of ctScanner.scan.
func (s *ctScannerString) scan(p int) int {
pr := p // the p at the rune start
str := s.s
states, n := s.states, s.n
for i := 0; i < n && p < len(str); {
e := states[i]
c := str[p]
// TODO: a significant number of contractions are of a form that
// cannot match discontiguous UTF-8 in a normalized string. We could let
// a negative value of e.n mean that we can set s.done = true and avoid
// the need for additional matches.
if c >= e.L {
if e.L == c {
p++
if e.I != noIndex {
s.index = int(e.I)
s.pindex = p
}
if e.N != final {
i, states, n = 0, states[int(e.H)+n:], int(e.N)
if p >= len(str) || utf8.RuneStart(str[p]) {
s.states, s.n, pr = states, n, p
}
} else {
s.done = true
return p
}
continue
} else if e.N == final && c <= e.H {
p++
s.done = true
s.index = int(c-e.L) + int(e.I)
s.pindex = p
return p
}
}
i++
}
return pr
}
@@ -0,0 +1,131 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"testing"
)
type lookupStrings struct {
str string
offset int
n int // bytes consumed from input
}
type LookupTest struct {
lookup []lookupStrings
n int
tries ContractTrieSet
}
var lookupTests = []LookupTest{{
[]lookupStrings{
{"abc", 1, 3},
{"a", 0, 0},
{"b", 0, 0},
{"c", 0, 0},
{"d", 0, 0},
},
1,
ContractTrieSet{
{'a', 0, 1, 0xFF},
{'b', 0, 1, 0xFF},
{'c', 'c', 0, 1},
},
}, {
[]lookupStrings{
{"abc", 1, 3},
{"abd", 2, 3},
{"abe", 3, 3},
{"a", 0, 0},
{"ab", 0, 0},
{"d", 0, 0},
{"f", 0, 0},
},
1,
ContractTrieSet{
{'a', 0, 1, 0xFF},
{'b', 0, 1, 0xFF},
{'c', 'e', 0, 1},
},
}, {
[]lookupStrings{
{"abc", 1, 3},
{"ab", 2, 2},
{"a", 3, 1},
{"abcd", 1, 3},
{"abe", 2, 2},
},
1,
ContractTrieSet{
{'a', 0, 1, 3},
{'b', 0, 1, 2},
{'c', 'c', 0, 1},
},
}, {
[]lookupStrings{
{"abc", 1, 3},
{"abd", 2, 3},
{"ab", 3, 2},
{"ac", 4, 2},
{"a", 5, 1},
{"b", 6, 1},
{"ba", 6, 1},
},
2,
ContractTrieSet{
{'b', 'b', 0, 6},
{'a', 0, 2, 5},
{'c', 'c', 0, 4},
{'b', 0, 1, 3},
{'c', 'd', 0, 1},
},
}, {
[]lookupStrings{
{"bcde", 2, 4},
{"bc", 7, 2},
{"ab", 6, 2},
{"bcd", 5, 3},
{"abcd", 1, 4},
{"abc", 4, 3},
{"bcdf", 3, 4},
},
2,
ContractTrieSet{
{'b', 3, 1, 0xFF},
{'a', 0, 1, 0xFF},
{'b', 0, 1, 6},
{'c', 0, 1, 4},
{'d', 'd', 0, 1},
{'c', 0, 1, 7},
{'d', 0, 1, 5},
{'e', 'f', 0, 2},
},
}}
func lookup(c *ContractTrieSet, nnode int, s []uint8) (i, n int) {
scan := c.scanner(0, nnode, s)
scan.scan(0)
return scan.result()
}
func TestLookupContraction(t *testing.T) {
for i, tt := range lookupTests {
cts := ContractTrieSet(tt.tries)
for j, lu := range tt.lookup {
str := lu.str
for _, s := range []string{str, str + "X"} {
const msg = `%d:%d: %s of "%s" %v; want %v`
offset, n := lookup(&cts, tt.n, []byte(s))
if offset != lu.offset {
t.Errorf(msg, i, j, "offset", s, offset, lu.offset)
}
if n != lu.n {
t.Errorf(msg, i, j, "bytes consumed", s, n, len(str))
}
}
}
}
}
@@ -0,0 +1,178 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
// An Iter incrementally converts chunks of the input text to collation
// elements, while ensuring that the collation elements are in normalized order
// (that is, they are in the order as if the input text were normalized first).
type Iter struct {
Weighter Weighter
Elems []Elem
// N is the number of elements in Elems that will not be reordered on
// subsequent iterations, N <= len(Elems).
N int
bytes []byte
str string
// Because the Elems buffer may contain collation elements that are needed
// for look-ahead, we need two positions in the text (bytes or str): one for
// the end position in the text for the current iteration and one for the
// start of the next call to appendNext.
pEnd int // end position in text corresponding to N.
pNext int // pEnd <= pNext.
}
// Reset sets the position in the current input text to p and discards any
// results obtained so far.
func (i *Iter) Reset(p int) {
i.Elems = i.Elems[:0]
i.N = 0
i.pEnd = p
i.pNext = p
}
// Len returns the length of the input text.
func (i *Iter) Len() int {
if i.bytes != nil {
return len(i.bytes)
}
return len(i.str)
}
// Discard removes the collation elements up to N.
func (i *Iter) Discard() {
// TODO: change this such that only modifiers following starters will have
// to be copied.
i.Elems = i.Elems[:copy(i.Elems, i.Elems[i.N:])]
i.N = 0
}
// End returns the end position of the input text for which Next has returned
// results.
func (i *Iter) End() int {
return i.pEnd
}
// SetInput resets i to input s.
func (i *Iter) SetInput(s []byte) {
i.bytes = s
i.str = ""
i.Reset(0)
}
// SetInputString resets i to input s.
func (i *Iter) SetInputString(s string) {
i.str = s
i.bytes = nil
i.Reset(0)
}
func (i *Iter) done() bool {
return i.pNext >= len(i.str) && i.pNext >= len(i.bytes)
}
func (i *Iter) appendNext() bool {
if i.done() {
return false
}
var sz int
if i.bytes == nil {
i.Elems, sz = i.Weighter.AppendNextString(i.Elems, i.str[i.pNext:])
} else {
i.Elems, sz = i.Weighter.AppendNext(i.Elems, i.bytes[i.pNext:])
}
if sz == 0 {
sz = 1
}
i.pNext += sz
return true
}
// Next appends Elems to the internal array. On each iteration, it will either
// add starters or modifiers. In the majority of cases, an Elem with a primary
// value > 0 will have a CCC of 0. The CCC values of collation elements are also
// used to detect if the input string was not normalized and to adjust the
// result accordingly.
func (i *Iter) Next() bool {
if i.N == len(i.Elems) && !i.appendNext() {
return false
}
// Check if the current segment starts with a starter.
prevCCC := i.Elems[len(i.Elems)-1].CCC()
if prevCCC == 0 {
i.N = len(i.Elems)
i.pEnd = i.pNext
return true
} else if i.Elems[i.N].CCC() == 0 {
// set i.N to only cover part of i.Elems for which prevCCC == 0 and
// use rest for the next call to next.
for i.N++; i.N < len(i.Elems) && i.Elems[i.N].CCC() == 0; i.N++ {
}
i.pEnd = i.pNext
return true
}
// The current (partial) segment starts with modifiers. We need to collect
// all successive modifiers to ensure that they are normalized.
for {
p := len(i.Elems)
i.pEnd = i.pNext
if !i.appendNext() {
break
}
if ccc := i.Elems[p].CCC(); ccc == 0 || len(i.Elems)-i.N > maxCombiningCharacters {
// Leave the starter for the next iteration. This ensures that we
// do not return sequences of collation elements that cross two
// segments.
//
// TODO: handle large number of combining characters by fully
// normalizing the input segment before iteration. This ensures
// results are consistent across the text repo.
i.N = p
return true
} else if ccc < prevCCC {
i.doNorm(p, ccc) // should be rare, never occurs for NFD and FCC.
} else {
prevCCC = ccc
}
}
done := len(i.Elems) != i.N
i.N = len(i.Elems)
return done
}
// nextNoNorm is the same as next, but does not "normalize" the collation
// elements.
func (i *Iter) nextNoNorm() bool {
// TODO: remove this function. Using this instead of next does not seem
// to improve performance in any significant way. We retain this until
// later for evaluation purposes.
if i.done() {
return false
}
i.appendNext()
i.N = len(i.Elems)
return true
}
const maxCombiningCharacters = 30
// doNorm reorders the collation elements in i.Elems.
// It assumes that blocks of collation elements added with appendNext
// either start and end with the same CCC or start with CCC == 0.
// This allows for a single insertion point for the entire block.
// The correctness of this assumption is verified in builder.go.
func (i *Iter) doNorm(p int, ccc uint8) {
n := len(i.Elems)
k := p
for p--; p > i.N && ccc < i.Elems[p-1].CCC(); p-- {
}
i.Elems = append(i.Elems, i.Elems[p:k]...)
copy(i.Elems[p:], i.Elems[k:])
i.Elems = i.Elems[:n]
}
@@ -0,0 +1,63 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"testing"
)
func TestDoNorm(t *testing.T) {
const div = -1 // The insertion point of the next block.
tests := []struct {
in, out []int
}{{
in: []int{4, div, 3},
out: []int{3, 4},
}, {
in: []int{4, div, 3, 3, 3},
out: []int{3, 3, 3, 4},
}, {
in: []int{0, 4, div, 3},
out: []int{0, 3, 4},
}, {
in: []int{0, 0, 4, 5, div, 3, 3},
out: []int{0, 0, 3, 3, 4, 5},
}, {
in: []int{0, 0, 1, 4, 5, div, 3, 3},
out: []int{0, 0, 1, 3, 3, 4, 5},
}, {
in: []int{0, 0, 1, 4, 5, div, 4, 4},
out: []int{0, 0, 1, 4, 4, 4, 5},
},
}
for j, tt := range tests {
i := Iter{}
var w, p int
for k, cc := range tt.in {
if cc == div {
w = 100
p = k
continue
}
i.Elems = append(i.Elems, makeCE([]int{w, defaultSecondary, 2, cc}))
}
i.doNorm(p, i.Elems[p].CCC())
if len(i.Elems) != len(tt.out) {
t.Errorf("%d: length was %d; want %d", j, len(i.Elems), len(tt.out))
}
prevCCC := uint8(0)
for k, ce := range i.Elems {
if int(ce.CCC()) != tt.out[k] {
t.Errorf("%d:%d: unexpected CCC. Was %d; want %d", j, k, ce.CCC(), tt.out[k])
}
if k > 0 && ce.CCC() == prevCCC && i.Elems[k-1].Primary() > ce.Primary() {
t.Errorf("%d:%d: normalization crossed across CCC boundary.", j, k)
}
}
}
// Combining rune overflow is tested in search/pattern_test.go.
}
@@ -0,0 +1,236 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"unicode"
"unicode/utf8"
)
// NewNumericWeighter wraps w to replace individual digits to sort based on their
// numeric value.
//
// Weighter w must have a free primary weight after the primary weight for 9.
// If this is not the case, numeric value will sort at the same primary level
// as the first primary sorting after 9.
func NewNumericWeighter(w Weighter) Weighter {
getElem := func(s string) Elem {
elems, _ := w.AppendNextString(nil, s)
return elems[0]
}
nine := getElem("9")
// Numbers should order before zero, but the DUCET has no room for this.
// TODO: move before zero once we use fractional collation elements.
ns, _ := MakeElem(nine.Primary()+1, nine.Secondary(), int(nine.Tertiary()), 0)
return &numericWeighter{
Weighter: w,
// We assume that w sorts digits of different kinds in order of numeric
// value and that the tertiary weight order is preserved.
//
// TODO: evaluate whether it is worth basing the ranges on the Elem
// encoding itself once the move to fractional weights is complete.
zero: getElem("0"),
zeroSpecialLo: getElem(""), // U+FF10 FULLWIDTH DIGIT ZERO
zeroSpecialHi: getElem("₀"), // U+2080 SUBSCRIPT ZERO
nine: nine,
nineSpecialHi: getElem("₉"), // U+2089 SUBSCRIPT NINE
numberStart: ns,
}
}
// A numericWeighter translates a stream of digits into a stream of weights
// representing the numeric value.
type numericWeighter struct {
Weighter
// The Elems below all demarcate boundaries of specific ranges. With the
// current element encoding digits are in two ranges: normal (default
// tertiary value) and special. For most languages, digits have collation
// elements in the normal range.
//
// Note: the range tests are very specific for the element encoding used by
// this implementation. The tests in collate_test.go are designed to fail
// if this code is not updated when an encoding has changed.
zero Elem // normal digit zero
zeroSpecialLo Elem // special digit zero, low tertiary value
zeroSpecialHi Elem // special digit zero, high tertiary value
nine Elem // normal digit nine
nineSpecialHi Elem // special digit nine
numberStart Elem
}
// AppendNext calls the namesake of the underlying weigher, but replaces single
// digits with weights representing their value.
func (nw *numericWeighter) AppendNext(buf []Elem, s []byte) (ce []Elem, n int) {
ce, n = nw.Weighter.AppendNext(buf, s)
nc := numberConverter{
elems: buf,
w: nw,
b: s,
}
isZero, ok := nc.checkNextDigit(ce)
if !ok {
return ce, n
}
// ce might have been grown already, so take it instead of buf.
nc.init(ce, len(buf), isZero)
for n < len(s) {
ce, sz := nw.Weighter.AppendNext(nc.elems, s[n:])
nc.b = s
n += sz
if !nc.update(ce) {
break
}
}
return nc.result(), n
}
// AppendNextString calls the namesake of the underlying weigher, but replaces
// single digits with weights representing their value.
func (nw *numericWeighter) AppendNextString(buf []Elem, s string) (ce []Elem, n int) {
ce, n = nw.Weighter.AppendNextString(buf, s)
nc := numberConverter{
elems: buf,
w: nw,
s: s,
}
isZero, ok := nc.checkNextDigit(ce)
if !ok {
return ce, n
}
nc.init(ce, len(buf), isZero)
for n < len(s) {
ce, sz := nw.Weighter.AppendNextString(nc.elems, s[n:])
nc.s = s
n += sz
if !nc.update(ce) {
break
}
}
return nc.result(), n
}
type numberConverter struct {
w *numericWeighter
elems []Elem
nDigits int
lenIndex int
s string // set if the input was of type string
b []byte // set if the input was of type []byte
}
// init completes initialization of a numberConverter and prepares it for adding
// more digits. elems is assumed to have a digit starting at oldLen.
func (nc *numberConverter) init(elems []Elem, oldLen int, isZero bool) {
// Insert a marker indicating the start of a number and a placeholder
// for the number of digits.
if isZero {
elems = append(elems[:oldLen], nc.w.numberStart, 0)
} else {
elems = append(elems, 0, 0)
copy(elems[oldLen+2:], elems[oldLen:])
elems[oldLen] = nc.w.numberStart
elems[oldLen+1] = 0
nc.nDigits = 1
}
nc.elems = elems
nc.lenIndex = oldLen + 1
}
// checkNextDigit reports whether bufNew adds a single digit relative to the old
// buffer. If it does, it also reports whether this digit is zero.
func (nc *numberConverter) checkNextDigit(bufNew []Elem) (isZero, ok bool) {
if len(nc.elems) >= len(bufNew) {
return false, false
}
e := bufNew[len(nc.elems)]
if e < nc.w.zeroSpecialLo || nc.w.nine < e {
// Not a number.
return false, false
}
if e < nc.w.zero {
if e > nc.w.nineSpecialHi {
// Not a number.
return false, false
}
if !nc.isDigit() {
return false, false
}
isZero = e <= nc.w.zeroSpecialHi
} else {
// This is the common case if we encounter a digit.
isZero = e == nc.w.zero
}
// Test the remaining added collation elements have a zero primary value.
if n := len(bufNew) - len(nc.elems); n > 1 {
for i := len(nc.elems) + 1; i < len(bufNew); i++ {
if bufNew[i].Primary() != 0 {
return false, false
}
}
// In some rare cases, collation elements will encode runes in
// unicode.No as a digit. For example Ethiopic digits (U+1369 - U+1371)
// are not in Nd. Also some digits that clearly belong in unicode.No,
// like U+0C78 TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR, have
// collation elements indistinguishable from normal digits.
// Unfortunately, this means we need to make this check for nearly all
// non-Latin digits.
//
// TODO: check the performance impact and find something better if it is
// an issue.
if !nc.isDigit() {
return false, false
}
}
return isZero, true
}
func (nc *numberConverter) isDigit() bool {
if nc.b != nil {
r, _ := utf8.DecodeRune(nc.b)
return unicode.In(r, unicode.Nd)
}
r, _ := utf8.DecodeRuneInString(nc.s)
return unicode.In(r, unicode.Nd)
}
// We currently support a maximum of about 2M digits (the number of primary
// values). Such numbers will compare correctly against small numbers, but their
// comparison against other large numbers is undefined.
//
// TODO: define a proper fallback, such as comparing large numbers textually or
// actually allowing numbers of unlimited length.
//
// TODO: cap this to a lower number (like 100) and maybe allow a larger number
// in an option?
const maxDigits = 1<<maxPrimaryBits - 1
func (nc *numberConverter) update(elems []Elem) bool {
isZero, ok := nc.checkNextDigit(elems)
if nc.nDigits == 0 && isZero {
return true
}
nc.elems = elems
if !ok {
return false
}
nc.nDigits++
return nc.nDigits < maxDigits
}
// result fills in the length element for the digit sequence and returns the
// completed collation elements.
func (nc *numberConverter) result() []Elem {
e, _ := MakeElem(nc.nDigits, defaultSecondary, defaultTertiary, 0)
nc.elems[nc.lenIndex] = e
return nc.elems
}
@@ -0,0 +1,159 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"reflect"
"strings"
"testing"
"golang.org/x/text/internal/testtext"
)
const (
digSec = defaultSecondary
digTert = defaultTertiary
)
var tPlus3 = e(0, 50, digTert+3)
// numWeighter is a testWeighter used for testing numericWeighter.
var numWeighter = testWeighter{
"0": p(100),
"": []Elem{e(100, digSec, digTert+1)}, // U+FF10 FULLWIDTH DIGIT ZERO
"₀": []Elem{e(100, digSec, digTert+5)}, // U+2080 SUBSCRIPT ZERO
"1": p(101),
// Allow non-primary collation elements to be inserted.
"١": append(p(101), tPlus3), // U+0661 ARABIC-INDIC DIGIT ONE
// Allow varying tertiary weight if the number is Nd.
"": []Elem{e(101, digSec, digTert+1)}, // U+FF11 FULLWIDTH DIGIT ONE
"2": p(102),
// Allow non-primary collation elements to be inserted.
"٢": append(p(102), tPlus3), // U+0662 ARABIC-INDIC DIGIT TWO
// Varying tertiary weights should be ignored.
"": []Elem{e(102, digSec, digTert+3)}, // U+FF12 FULLWIDTH DIGIT TWO
"3": p(103),
"4": p(104),
"5": p(105),
"6": p(106),
"7": p(107),
// Weights must be strictly monotonically increasing, but do not need to be
// consecutive.
"8": p(118),
"9": p(119),
// Allow non-primary collation elements to be inserted.
"٩": append(p(119), tPlus3), // U+0669 ARABIC-INDIC DIGIT NINE
// Varying tertiary weights should be ignored.
"": []Elem{e(119, digSec, digTert+1)}, // U+FF19 FULLWIDTH DIGIT NINE
"₉": []Elem{e(119, digSec, digTert+5)}, // U+2089 SUBSCRIPT NINE
"a": p(5),
"b": p(6),
"c": p(8, 2),
"klm": p(99),
"nop": p(121),
"x": p(200),
"y": p(201),
}
func p(w ...int) (elems []Elem) {
for _, x := range w {
e, _ := MakeElem(x, digSec, digTert, 0)
elems = append(elems, e)
}
return elems
}
func TestNumericAppendNext(t *testing.T) {
for _, tt := range []struct {
in string
w []Elem
}{
{"a", p(5)},
{"klm", p(99)},
{"aa", p(5, 5)},
{"1", p(120, 1, 101)},
{"0", p(120, 0)},
{"01", p(120, 1, 101)},
{"0001", p(120, 1, 101)},
{"10", p(120, 2, 101, 100)},
{"99", p(120, 2, 119, 119)},
{"9999", p(120, 4, 119, 119, 119, 119)},
{"1a", p(120, 1, 101, 5)},
{"0b", p(120, 0, 6)},
{"01c", p(120, 1, 101, 8, 2)},
{"10x", p(120, 2, 101, 100, 200)},
{"99y", p(120, 2, 119, 119, 201)},
{"9999nop", p(120, 4, 119, 119, 119, 119, 121)},
// Allow follow-up collation elements if they have a zero non-primary.
{"١٢٩", []Elem{e(120), e(3), e(101), tPlus3, e(102), tPlus3, e(119), tPlus3}},
{
"129",
[]Elem{
e(120), e(3),
e(101, digSec, digTert+1),
e(102, digSec, digTert+3),
e(119, digSec, digTert+1),
},
},
// Ensure AppendNext* adds to the given buffer.
{"a10", p(5, 120, 2, 101, 100)},
} {
nw := NewNumericWeighter(numWeighter)
b := []byte(tt.in)
got := []Elem(nil)
for n, sz := 0, 0; n < len(b); {
got, sz = nw.AppendNext(got, b[n:])
n += sz
}
if !reflect.DeepEqual(got, tt.w) {
t.Errorf("AppendNext(%q) =\n%v; want\n%v", tt.in, got, tt.w)
}
got = nil
for n, sz := 0, 0; n < len(tt.in); {
got, sz = nw.AppendNextString(got, tt.in[n:])
n += sz
}
if !reflect.DeepEqual(got, tt.w) {
t.Errorf("AppendNextString(%q) =\n%v; want\n%v", tt.in, got, tt.w)
}
}
}
func TestNumericOverflow(t *testing.T) {
manyDigits := strings.Repeat("9", maxDigits+1) + "a"
nw := NewNumericWeighter(numWeighter)
got, n := nw.AppendNextString(nil, manyDigits)
if n != maxDigits {
t.Errorf("n: got %d; want %d", n, maxDigits)
}
if got[1].Primary() != maxDigits {
t.Errorf("primary(e[1]): got %d; want %d", n, maxDigits)
}
}
func TestNumericWeighterAlloc(t *testing.T) {
buf := make([]Elem, 100)
w := NewNumericWeighter(numWeighter)
s := "1234567890a"
nNormal := testtext.AllocsPerRun(3, func() { numWeighter.AppendNextString(buf, s) })
nNumeric := testtext.AllocsPerRun(3, func() { w.AppendNextString(buf, s) })
if n := nNumeric - nNormal; n > 0 {
t.Errorf("got %f; want 0", n)
}
}
@@ -0,0 +1,275 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"unicode/utf8"
"golang.org/x/text/unicode/norm"
)
// Table holds all collation data for a given collation ordering.
type Table struct {
Index Trie // main trie
// expansion info
ExpandElem []uint32
// contraction info
ContractTries ContractTrieSet
ContractElem []uint32
MaxContractLen int
VariableTop uint32
}
func (t *Table) AppendNext(w []Elem, b []byte) (res []Elem, n int) {
return t.appendNext(w, source{bytes: b})
}
func (t *Table) AppendNextString(w []Elem, s string) (res []Elem, n int) {
return t.appendNext(w, source{str: s})
}
func (t *Table) Start(p int, b []byte) int {
// TODO: implement
panic("not implemented")
}
func (t *Table) StartString(p int, s string) int {
// TODO: implement
panic("not implemented")
}
func (t *Table) Domain() []string {
// TODO: implement
panic("not implemented")
}
func (t *Table) Top() uint32 {
return t.VariableTop
}
type source struct {
str string
bytes []byte
}
func (src *source) lookup(t *Table) (ce Elem, sz int) {
if src.bytes == nil {
return t.Index.lookupString(src.str)
}
return t.Index.lookup(src.bytes)
}
func (src *source) tail(sz int) {
if src.bytes == nil {
src.str = src.str[sz:]
} else {
src.bytes = src.bytes[sz:]
}
}
func (src *source) nfd(buf []byte, end int) []byte {
if src.bytes == nil {
return norm.NFD.AppendString(buf[:0], src.str[:end])
}
return norm.NFD.Append(buf[:0], src.bytes[:end]...)
}
func (src *source) rune() (r rune, sz int) {
if src.bytes == nil {
return utf8.DecodeRuneInString(src.str)
}
return utf8.DecodeRune(src.bytes)
}
func (src *source) properties(f norm.Form) norm.Properties {
if src.bytes == nil {
return f.PropertiesString(src.str)
}
return f.Properties(src.bytes)
}
// appendNext appends the weights corresponding to the next rune or
// contraction in s. If a contraction is matched to a discontinuous
// sequence of runes, the weights for the interstitial runes are
// appended as well. It returns a new slice that includes the appended
// weights and the number of bytes consumed from s.
func (t *Table) appendNext(w []Elem, src source) (res []Elem, n int) {
ce, sz := src.lookup(t)
tp := ce.ctype()
if tp == ceNormal {
if ce == 0 {
r, _ := src.rune()
const (
hangulSize = 3
firstHangul = 0xAC00
lastHangul = 0xD7A3
)
if r >= firstHangul && r <= lastHangul {
// TODO: performance can be considerably improved here.
n = sz
var buf [16]byte // Used for decomposing Hangul.
for b := src.nfd(buf[:0], hangulSize); len(b) > 0; b = b[sz:] {
ce, sz = t.Index.lookup(b)
w = append(w, ce)
}
return w, n
}
ce = makeImplicitCE(implicitPrimary(r))
}
w = append(w, ce)
} else if tp == ceExpansionIndex {
w = t.appendExpansion(w, ce)
} else if tp == ceContractionIndex {
n := 0
src.tail(sz)
if src.bytes == nil {
w, n = t.matchContractionString(w, ce, src.str)
} else {
w, n = t.matchContraction(w, ce, src.bytes)
}
sz += n
} else if tp == ceDecompose {
// Decompose using NFKD and replace tertiary weights.
t1, t2 := splitDecompose(ce)
i := len(w)
nfkd := src.properties(norm.NFKD).Decomposition()
for p := 0; len(nfkd) > 0; nfkd = nfkd[p:] {
w, p = t.appendNext(w, source{bytes: nfkd})
}
w[i] = w[i].updateTertiary(t1)
if i++; i < len(w) {
w[i] = w[i].updateTertiary(t2)
for i++; i < len(w); i++ {
w[i] = w[i].updateTertiary(maxTertiary)
}
}
}
return w, sz
}
func (t *Table) appendExpansion(w []Elem, ce Elem) []Elem {
i := splitExpandIndex(ce)
n := int(t.ExpandElem[i])
i++
for _, ce := range t.ExpandElem[i : i+n] {
w = append(w, Elem(ce))
}
return w
}
func (t *Table) matchContraction(w []Elem, ce Elem, suffix []byte) ([]Elem, int) {
index, n, offset := splitContractIndex(ce)
scan := t.ContractTries.scanner(index, n, suffix)
buf := [norm.MaxSegmentSize]byte{}
bufp := 0
p := scan.scan(0)
if !scan.done && p < len(suffix) && suffix[p] >= utf8.RuneSelf {
// By now we should have filtered most cases.
p0 := p
bufn := 0
rune := norm.NFD.Properties(suffix[p:])
p += rune.Size()
if rune.LeadCCC() != 0 {
prevCC := rune.TrailCCC()
// A gap may only occur in the last normalization segment.
// This also ensures that len(scan.s) < norm.MaxSegmentSize.
if end := norm.NFD.FirstBoundary(suffix[p:]); end != -1 {
scan.s = suffix[:p+end]
}
for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf {
rune = norm.NFD.Properties(suffix[p:])
if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc {
break
}
prevCC = rune.TrailCCC()
if pp := scan.scan(p); pp != p {
// Copy the interstitial runes for later processing.
bufn += copy(buf[bufn:], suffix[p0:p])
if scan.pindex == pp {
bufp = bufn
}
p, p0 = pp, pp
} else {
p += rune.Size()
}
}
}
}
// Append weights for the matched contraction, which may be an expansion.
i, n := scan.result()
ce = Elem(t.ContractElem[i+offset])
if ce.ctype() == ceNormal {
w = append(w, ce)
} else {
w = t.appendExpansion(w, ce)
}
// Append weights for the runes in the segment not part of the contraction.
for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] {
w, p = t.appendNext(w, source{bytes: b})
}
return w, n
}
// TODO: unify the two implementations. This is best done after first simplifying
// the algorithm taking into account the inclusion of both NFC and NFD forms
// in the table.
func (t *Table) matchContractionString(w []Elem, ce Elem, suffix string) ([]Elem, int) {
index, n, offset := splitContractIndex(ce)
scan := t.ContractTries.scannerString(index, n, suffix)
buf := [norm.MaxSegmentSize]byte{}
bufp := 0
p := scan.scan(0)
if !scan.done && p < len(suffix) && suffix[p] >= utf8.RuneSelf {
// By now we should have filtered most cases.
p0 := p
bufn := 0
rune := norm.NFD.PropertiesString(suffix[p:])
p += rune.Size()
if rune.LeadCCC() != 0 {
prevCC := rune.TrailCCC()
// A gap may only occur in the last normalization segment.
// This also ensures that len(scan.s) < norm.MaxSegmentSize.
if end := norm.NFD.FirstBoundaryInString(suffix[p:]); end != -1 {
scan.s = suffix[:p+end]
}
for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf {
rune = norm.NFD.PropertiesString(suffix[p:])
if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc {
break
}
prevCC = rune.TrailCCC()
if pp := scan.scan(p); pp != p {
// Copy the interstitial runes for later processing.
bufn += copy(buf[bufn:], suffix[p0:p])
if scan.pindex == pp {
bufp = bufn
}
p, p0 = pp, pp
} else {
p += rune.Size()
}
}
}
}
// Append weights for the matched contraction, which may be an expansion.
i, n := scan.result()
ce = Elem(t.ContractElem[i+offset])
if ce.ctype() == ceNormal {
w = append(w, ce)
} else {
w = t.appendExpansion(w, ce)
}
// Append weights for the runes in the segment not part of the contraction.
for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] {
w, p = t.appendNext(w, source{bytes: b})
}
return w, n
}
@@ -0,0 +1,159 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// The trie in this file is used to associate the first full character in an
// UTF-8 string to a collation element. All but the last byte in a UTF-8 byte
// sequence are used to lookup offsets in the index table to be used for the
// next byte. The last byte is used to index into a table of collation elements.
// For a full description, see go.text/collate/build/trie.go.
package colltab
const blockSize = 64
type Trie struct {
Index0 []uint16 // index for first byte (0xC0-0xFF)
Values0 []uint32 // index for first byte (0x00-0x7F)
Index []uint16
Values []uint32
}
const (
t1 = 0x00 // 0000 0000
tx = 0x80 // 1000 0000
t2 = 0xC0 // 1100 0000
t3 = 0xE0 // 1110 0000
t4 = 0xF0 // 1111 0000
t5 = 0xF8 // 1111 1000
t6 = 0xFC // 1111 1100
te = 0xFE // 1111 1110
)
func (t *Trie) lookupValue(n uint16, b byte) Elem {
return Elem(t.Values[int(n)<<6+int(b)])
}
// lookup returns the trie value for the first UTF-8 encoding in s and
// the width in bytes of this encoding. The size will be 0 if s does not
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *Trie) lookup(s []byte) (v Elem, sz int) {
c0 := s[0]
switch {
case c0 < tx:
return Elem(t.Values0[c0]), 1
case c0 < t2:
return 0, 1
case c0 < t3:
if len(s) < 2 {
return 0, 0
}
i := t.Index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
return t.lookupValue(i, c1), 2
case c0 < t4:
if len(s) < 3 {
return 0, 0
}
i := t.Index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := int(i)<<6 + int(c1)
i = t.Index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
return t.lookupValue(i, c2), 3
case c0 < t5:
if len(s) < 4 {
return 0, 0
}
i := t.Index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := int(i)<<6 + int(c1)
i = t.Index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
o = int(i)<<6 + int(c2)
i = t.Index[o]
c3 := s[3]
if c3 < tx || t2 <= c3 {
return 0, 3
}
return t.lookupValue(i, c3), 4
}
// Illegal rune
return 0, 1
}
// The body of lookupString is a verbatim copy of that of lookup.
func (t *Trie) lookupString(s string) (v Elem, sz int) {
c0 := s[0]
switch {
case c0 < tx:
return Elem(t.Values0[c0]), 1
case c0 < t2:
return 0, 1
case c0 < t3:
if len(s) < 2 {
return 0, 0
}
i := t.Index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
return t.lookupValue(i, c1), 2
case c0 < t4:
if len(s) < 3 {
return 0, 0
}
i := t.Index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := int(i)<<6 + int(c1)
i = t.Index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
return t.lookupValue(i, c2), 3
case c0 < t5:
if len(s) < 4 {
return 0, 0
}
i := t.Index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := int(i)<<6 + int(c1)
i = t.Index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
o = int(i)<<6 + int(c2)
i = t.Index[o]
c3 := s[3]
if c3 < tx || t2 <= c3 {
return 0, 3
}
return t.lookupValue(i, c3), 4
}
// Illegal rune
return 0, 1
}
@@ -0,0 +1,106 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"testing"
)
// We take the smallest, largest and an arbitrary value for each
// of the UTF-8 sequence lengths.
var testRunes = []rune{
0x01, 0x0C, 0x7F, // 1-byte sequences
0x80, 0x100, 0x7FF, // 2-byte sequences
0x800, 0x999, 0xFFFF, // 3-byte sequences
0x10000, 0x10101, 0x10FFFF, // 4-byte sequences
0x200, 0x201, 0x202, 0x210, 0x215, // five entries in one sparse block
}
// Test cases for illegal runes.
type trietest struct {
size int
bytes []byte
}
var tests = []trietest{
// illegal runes
{1, []byte{0x80}},
{1, []byte{0xFF}},
{1, []byte{t2, tx - 1}},
{1, []byte{t2, t2}},
{2, []byte{t3, tx, tx - 1}},
{2, []byte{t3, tx, t2}},
{1, []byte{t3, tx - 1, tx}},
{3, []byte{t4, tx, tx, tx - 1}},
{3, []byte{t4, tx, tx, t2}},
{1, []byte{t4, t2, tx, tx - 1}},
{2, []byte{t4, tx, t2, tx - 1}},
// short runes
{0, []byte{t2}},
{0, []byte{t3, tx}},
{0, []byte{t4, tx, tx}},
// we only support UTF-8 up to utf8.UTFMax bytes (4 bytes)
{1, []byte{t5, tx, tx, tx, tx}},
{1, []byte{t6, tx, tx, tx, tx, tx}},
}
func TestLookupTrie(t *testing.T) {
for i, r := range testRunes {
b := []byte(string(r))
v, sz := testTrie.lookup(b)
if int(v) != i {
t.Errorf("lookup(%U): found value %#x, expected %#x", r, v, i)
}
if sz != len(b) {
t.Errorf("lookup(%U): found size %d, expected %d", r, sz, len(b))
}
}
for i, tt := range tests {
v, sz := testTrie.lookup(tt.bytes)
if int(v) != 0 {
t.Errorf("lookup of illegal rune, case %d: found value %#x, expected 0", i, v)
}
if sz != tt.size {
t.Errorf("lookup of illegal rune, case %d: found size %d, expected %d", i, sz, tt.size)
}
}
}
// test data is taken from exp/collate/locale/build/trie_test.go
var testValues = [832]uint32{
0x000c: 0x00000001,
0x007f: 0x00000002,
0x00c0: 0x00000003,
0x0100: 0x00000004,
0x0140: 0x0000000c, 0x0141: 0x0000000d, 0x0142: 0x0000000e,
0x0150: 0x0000000f,
0x0155: 0x00000010,
0x01bf: 0x00000005,
0x01c0: 0x00000006,
0x0219: 0x00000007,
0x027f: 0x00000008,
0x0280: 0x00000009,
0x02c1: 0x0000000a,
0x033f: 0x0000000b,
}
var testLookup = [640]uint16{
0x0e0: 0x05, 0x0e6: 0x06,
0x13f: 0x07,
0x140: 0x08, 0x144: 0x09,
0x190: 0x03,
0x1ff: 0x0a,
0x20f: 0x05,
0x242: 0x01, 0x244: 0x02,
0x248: 0x03,
0x25f: 0x04,
0x260: 0x01,
0x26f: 0x02,
0x270: 0x04, 0x274: 0x06,
}
var testTrie = Trie{testLookup[6*blockSize:], testValues[:], testLookup[:], testValues[:]}
@@ -0,0 +1,31 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab // import "golang.org/x/text/internal/colltab"
// A Weighter can be used as a source for Collator and Searcher.
type Weighter interface {
// Start finds the start of the segment that includes position p.
Start(p int, b []byte) int
// StartString finds the start of the segment that includes position p.
StartString(p int, s string) int
// AppendNext appends Elems to buf corresponding to the longest match
// of a single character or contraction from the start of s.
// It returns the new buf and the number of bytes consumed.
AppendNext(buf []Elem, s []byte) (ce []Elem, n int)
// AppendNextString appends Elems to buf corresponding to the longest match
// of a single character or contraction from the start of s.
// It returns the new buf and the number of bytes consumed.
AppendNextString(buf []Elem, s string) (ce []Elem, n int)
// Domain returns a slice of all single characters and contractions for which
// collation elements are defined in this table.
Domain() []string
// Top returns the highest variable primary value.
Top() uint32
}
@@ -0,0 +1,42 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
// testWeighter is a simple Weighter that returns weights from a user-defined map.
type testWeighter map[string][]Elem
func (t testWeighter) Start(int, []byte) int { return 0 }
func (t testWeighter) StartString(int, string) int { return 0 }
func (t testWeighter) Domain() []string { return nil }
func (t testWeighter) Top() uint32 { return 0 }
// maxContractBytes is the maximum length of any key in the map.
const maxContractBytes = 10
func (t testWeighter) AppendNext(buf []Elem, s []byte) ([]Elem, int) {
n := len(s)
if n > maxContractBytes {
n = maxContractBytes
}
for i := n; i > 0; i-- {
if e, ok := t[string(s[:i])]; ok {
return append(buf, e...), i
}
}
panic("incomplete testWeighter: could not find " + string(s))
}
func (t testWeighter) AppendNextString(buf []Elem, s string) ([]Elem, int) {
n := len(s)
if n > maxContractBytes {
n = maxContractBytes
}
for i := n; i > 0; i-- {
if e, ok := t[s[:i]]; ok {
return append(buf, e...), i
}
}
panic("incomplete testWeighter: could not find " + s)
}
@@ -0,0 +1,4 @@
The export directory contains packages that are generated using the x/text
infrastructure, but live elsewhere.
At some point we can expose some of the infrastructure, but for now this
is not done.
@@ -0,0 +1,55 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package idna
// This file contains code that is common between the generation code and the
// package's test code.
import (
"log"
"golang.org/x/text/internal/ucd"
)
func catFromEntry(p *ucd.Parser) (cat category) {
r := p.Rune(0)
switch s := p.String(1); s {
case "valid":
cat = valid
case "disallowed":
cat = disallowed
case "disallowed_STD3_valid":
cat = disallowedSTD3Valid
case "disallowed_STD3_mapped":
cat = disallowedSTD3Mapped
case "mapped":
cat = mapped
case "deviation":
cat = deviation
case "ignored":
cat = ignored
default:
log.Fatalf("%U: Unknown category %q", r, s)
}
if s := p.String(3); s != "" {
if cat != valid {
log.Fatalf(`%U: %s defined for %q; want "valid"`, r, s, p.String(1))
}
switch s {
case "NV8":
cat = validNV8
case "XV8":
cat = validXV8
default:
log.Fatalf("%U: Unexpected exception %q", r, s)
}
}
return cat
}
var joinType = map[string]info{
"L": joiningL,
"D": joiningD,
"T": joiningT,
"R": joiningR,
}
@@ -0,0 +1,72 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build go1.10
package idna
import (
"fmt"
"strings"
"testing"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/testtext"
"golang.org/x/text/internal/ucd"
)
func TestConformance(t *testing.T) {
testtext.SkipIfNotLong(t)
r := gen.OpenUnicodeFile("idna", "10.0.0", "IdnaTest.txt")
defer r.Close()
section := "main"
p := ucd.New(r)
transitional := New(Transitional(true), VerifyDNSLength(true), BidiRule(), MapForLookup())
nonTransitional := New(VerifyDNSLength(true), BidiRule(), MapForLookup())
for p.Next() {
// What to test
profiles := []*Profile{}
switch p.String(0) {
case "T":
profiles = append(profiles, transitional)
case "N":
profiles = append(profiles, nonTransitional)
case "B":
profiles = append(profiles, transitional)
profiles = append(profiles, nonTransitional)
}
src := unescape(p.String(1))
wantToUnicode := unescape(p.String(2))
if wantToUnicode == "" {
wantToUnicode = src
}
wantToASCII := unescape(p.String(3))
if wantToASCII == "" {
wantToASCII = wantToUnicode
}
wantErrToUnicode := ""
if strings.HasPrefix(wantToUnicode, "[") {
wantErrToUnicode = wantToUnicode
wantToUnicode = ""
}
wantErrToASCII := ""
if strings.HasPrefix(wantToASCII, "[") {
wantErrToASCII = wantToASCII
wantToASCII = ""
}
// TODO: also do IDNA tests.
// invalidInIDNA2008 := p.String(4) == "NV8"
for _, p := range profiles {
name := fmt.Sprintf("%s:%s", section, p)
doTest(t, p.ToUnicode, name+":ToUnicode", src, wantToUnicode, wantErrToUnicode)
doTest(t, p.ToASCII, name+":ToASCII", src, wantToASCII, wantErrToASCII)
}
}
}
@@ -0,0 +1,67 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package idna_test
import (
"fmt"
"golang.org/x/text/internal/export/idna"
)
func ExampleProfile() {
// Raw Punycode has no restrictions and does no mappings.
fmt.Println(idna.ToASCII(""))
fmt.Println(idna.ToASCII("*.GÖPHER.com"))
fmt.Println(idna.Punycode.ToASCII("*.GÖPHER.com"))
// Rewrite IDN for lookup.
fmt.Println(idna.Lookup.ToASCII(""))
fmt.Println(idna.Lookup.ToASCII("www.GÖPHER.com"))
// Convert an IDN to ASCII for registration purposes.
// This reports an error if the input was illformed.
fmt.Println(idna.Registration.ToASCII("www.GÖPHER.com"))
fmt.Println(idna.Registration.ToASCII("www.göpher.com"))
// Output:
// <nil>
// *.xn--GPHER-1oa.com <nil>
// *.xn--GPHER-1oa.com <nil>
// <nil>
// www.xn--gpher-jua.com <nil>
// www.xn--GPHER-1oa.com idna: disallowed rune U+0047
// www.xn--gpher-jua.com <nil>
}
func ExampleNew() {
var p *idna.Profile
// Raw Punycode has no restrictions and does no mappings.
p = idna.New()
fmt.Println(p.ToASCII("*.faß.com"))
// Do mappings. Note that star is not allowed in a DNS lookup.
p = idna.New(
idna.MapForLookup(),
idna.Transitional(true)) // Map ß -> ss
fmt.Println(p.ToASCII("*.faß.com"))
// Lookup for registration. Also does not allow '*'.
p = idna.New(idna.ValidateForRegistration())
fmt.Println(p.ToUnicode("*.faß.com"))
// Set up a profile maps for lookup, but allows wild cards.
p = idna.New(
idna.MapForLookup(),
idna.Transitional(true), // Map ß -> ss
idna.StrictDomainName(false)) // Set more permissive ASCII rules.
fmt.Println(p.ToASCII("*.faß.com"))
// Output:
// *.xn--fa-hia.com <nil>
// *.fass.com idna: disallowed rune U+002A
// *.faß.com idna: disallowed rune U+002A
// *.fass.com <nil>
}
@@ -0,0 +1,281 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build ignore
// This program generates the trie for idna operations. The Unicode casing
// algorithm requires the lookup of various properties and mappings for each
// rune. The table generated by this generator combines several of the most
// frequently used of these into a single trie so that they can be accessed
// with a single lookup.
package main
import (
"fmt"
"io"
"log"
"unicode"
"unicode/utf8"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/triegen"
"golang.org/x/text/internal/ucd"
"golang.org/x/text/unicode/bidi"
)
func main() {
gen.Init()
genTables()
gen.Repackage("gen_trieval.go", "trieval.go", "idna")
gen.Repackage("gen_common.go", "common_test.go", "idna")
}
var runes = map[rune]info{}
func genTables() {
t := triegen.NewTrie("idna")
ucd.Parse(gen.OpenUCDFile("DerivedNormalizationProps.txt"), func(p *ucd.Parser) {
r := p.Rune(0)
if p.String(1) == "NFC_QC" { // p.String(2) is "N" or "M"
runes[r] = mayNeedNorm
}
})
ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
r := p.Rune(0)
const cccVirama = 9
if p.Int(ucd.CanonicalCombiningClass) == cccVirama {
runes[p.Rune(0)] = viramaModifier
}
switch {
case unicode.In(r, unicode.Mark):
runes[r] |= modifier | mayNeedNorm
}
// TODO: by using UnicodeData.txt we don't mark undefined codepoints
// that are earmarked as RTL properly. However, an undefined cp will
// always fail, so there is no need to store this info.
switch p, _ := bidi.LookupRune(r); p.Class() {
case bidi.R, bidi.AL, bidi.AN:
if x := runes[r]; x != 0 && x != mayNeedNorm {
log.Fatalf("%U: rune both modifier and RTL letter/number", r)
}
runes[r] = rtl
}
})
ucd.Parse(gen.OpenUCDFile("extracted/DerivedJoiningType.txt"), func(p *ucd.Parser) {
switch v := p.String(1); v {
case "L", "D", "T", "R":
runes[p.Rune(0)] |= joinType[v] << joinShift
}
})
ucd.Parse(gen.OpenUnicodeFile("idna", "", "IdnaMappingTable.txt"), func(p *ucd.Parser) {
r := p.Rune(0)
// The mappings table explicitly defines surrogates as invalid.
if !utf8.ValidRune(r) {
return
}
cat := catFromEntry(p)
isMapped := cat == mapped || cat == disallowedSTD3Mapped || cat == deviation
if !isMapped {
// Only include additional category information for non-mapped
// runes. The additional information is only used after mapping and
// the bits would clash with mapping information.
// TODO: it would be possible to inline this data and avoid
// additional lookups. This is quite tedious, though, so let's first
// see if we need this.
cat |= category(runes[r])
}
s := string(p.Runes(2))
if s != "" && !isMapped {
log.Fatalf("%U: Mapping with non-mapping category %d", r, cat)
}
t.Insert(r, uint64(makeEntry(r, s))+uint64(cat))
})
w := gen.NewCodeWriter()
defer w.WriteVersionedGoFile("tables.go", "idna")
gen.WriteUnicodeVersion(w)
w.WriteVar("mappings", string(mappings))
w.WriteVar("mappingIndex", mappingIndex)
w.WriteVar("xorData", string(xorData))
sz, err := t.Gen(w, triegen.Compact(&normCompacter{}))
if err != nil {
log.Fatal(err)
}
w.Size += sz
}
var (
// mappings contains replacement strings for mapped runes.
mappings = []byte{}
// mappingIndex contains an offset in mappingBytes representing the start
// of a mapping. Then next entry in mappingIndex points past the end of the
// string.
mappingIndex = []uint16{0}
mapCache = map[string]int{}
// xorData is like mappings, except that it contains XOR data.
// We split these two tables so that we don't get an overflow.
xorData = []byte{}
xorCache = map[string]int{}
)
// makeEntry creates a trie entry.
func makeEntry(r rune, mapped string) info {
orig := string(r)
if len(orig) != len(mapped) {
// Store the mapped value as is in the mappings table.
index := len(mappingIndex) - 1
if x, ok := mapCache[mapped]; ok {
index = x
} else {
mapCache[mapped] = index
mappings = append(mappings, mapped...)
mappingIndex = append(mappingIndex, uint16(len(mappings)))
}
return info(index) << indexShift
}
// Create per-byte XOR mask.
var b []byte
for i := 0; i < len(orig); i++ {
b = append(b, orig[i]^mapped[i])
}
// Remove leading 0 bytes, but keep at least one byte.
for ; len(b) > 1 && b[0] == 0; b = b[1:] {
}
if len(b) == 1 {
return xorBit | inlineXOR | info(b[0])<<indexShift
}
mapped = string(b)
// Store the mapped value as is in the mappings table.
index := len(xorData)
if x, ok := xorCache[mapped]; ok {
index = x
} else {
xorCache[mapped] = index
xorData = append(xorData, byte(len(mapped)))
xorData = append(xorData, mapped...)
}
return xorBit | info(index)<<indexShift
}
// The following code implements a triegen.Compacter that was originally
// designed for normalization. The IDNA table has some similarities with the
// norm table. Using this compacter, together with the XOR pattern approach,
// reduces the table size by roughly 100K. It can probably be compressed further
// by also including elements of the compacter used by cases, but for now it is
// good enough.
const maxSparseEntries = 16
type normCompacter struct {
sparseBlocks [][]uint64
sparseOffset []uint16
sparseCount int
}
func mostFrequentStride(a []uint64) int {
counts := make(map[int]int)
var v int
for _, x := range a {
if stride := int(x) - v; v != 0 && stride >= 0 {
counts[stride]++
}
v = int(x)
}
var maxs, maxc int
for stride, cnt := range counts {
if cnt > maxc || (cnt == maxc && stride < maxs) {
maxs, maxc = stride, cnt
}
}
return maxs
}
func countSparseEntries(a []uint64) int {
stride := mostFrequentStride(a)
var v, count int
for _, tv := range a {
if int(tv)-v != stride {
if tv != 0 {
count++
}
}
v = int(tv)
}
return count
}
func (c *normCompacter) Size(v []uint64) (sz int, ok bool) {
if n := countSparseEntries(v); n <= maxSparseEntries {
return (n+1)*4 + 2, true
}
return 0, false
}
func (c *normCompacter) Store(v []uint64) uint32 {
h := uint32(len(c.sparseOffset))
c.sparseBlocks = append(c.sparseBlocks, v)
c.sparseOffset = append(c.sparseOffset, uint16(c.sparseCount))
c.sparseCount += countSparseEntries(v) + 1
return h
}
func (c *normCompacter) Handler() string {
return "idnaSparse.lookup"
}
func (c *normCompacter) Print(w io.Writer) (retErr error) {
p := func(f string, x ...interface{}) {
if _, err := fmt.Fprintf(w, f, x...); retErr == nil && err != nil {
retErr = err
}
}
ls := len(c.sparseBlocks)
p("// idnaSparseOffset: %d entries, %d bytes\n", ls, ls*2)
p("var idnaSparseOffset = %#v\n\n", c.sparseOffset)
ns := c.sparseCount
p("// idnaSparseValues: %d entries, %d bytes\n", ns, ns*4)
p("var idnaSparseValues = [%d]valueRange {", ns)
for i, b := range c.sparseBlocks {
p("\n// Block %#x, offset %#x", i, c.sparseOffset[i])
var v int
stride := mostFrequentStride(b)
n := countSparseEntries(b)
p("\n{value:%#04x,lo:%#02x},", stride, uint8(n))
for i, nv := range b {
if int(nv)-v != stride {
if v != 0 {
p(",hi:%#02x},", 0x80+i-1)
}
if nv != 0 {
p("\n{value:%#04x,lo:%#02x", nv, 0x80+i)
}
}
v = int(nv)
}
if v != 0 {
p(",hi:%#02x},", 0x80+len(b)-1)
}
}
p("\n}\n\n")
return
}
@@ -0,0 +1,93 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build go1.10
package idna
import (
"testing"
"unicode"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/testtext"
"golang.org/x/text/internal/ucd"
)
func TestTables(t *testing.T) {
testtext.SkipIfNotLong(t)
lookup := func(r rune) info {
v, _ := trie.lookupString(string(r))
return info(v)
}
ucd.Parse(gen.OpenUnicodeFile("idna", "", "IdnaMappingTable.txt"), func(p *ucd.Parser) {
r := p.Rune(0)
x := lookup(r)
if got, want := x.category(), catFromEntry(p); got != want {
t.Errorf("%U:category: got %x; want %x", r, got, want)
}
mapped := false
switch p.String(1) {
case "mapped", "disallowed_STD3_mapped", "deviation":
mapped = true
}
if x.isMapped() != mapped {
t.Errorf("%U:isMapped: got %v; want %v", r, x.isMapped(), mapped)
}
if !mapped {
return
}
want := string(p.Runes(2))
got := string(x.appendMapping(nil, string(r)))
if got != want {
t.Errorf("%U:mapping: got %+q; want %+q", r, got, want)
}
if x.isMapped() {
return
}
wantMark := unicode.In(r, unicode.Mark)
gotMark := x.isModifier()
if gotMark != wantMark {
t.Errorf("IsMark(%U) = %v; want %v", r, gotMark, wantMark)
}
})
ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
r := p.Rune(0)
x := lookup(r)
got := x.isViramaModifier()
const cccVirama = 9
want := p.Int(ucd.CanonicalCombiningClass) == cccVirama
if got != want {
t.Errorf("IsVirama(%U) = %v; want %v", r, got, want)
}
rtl := false
switch p.String(ucd.BidiClass) {
case "R", "AL", "AN":
rtl = true
}
if got := x.isBidi("A"); got != rtl && !x.isMapped() {
t.Errorf("IsBidi(%U) = %v; want %v", r, got, rtl)
}
})
ucd.Parse(gen.OpenUCDFile("extracted/DerivedJoiningType.txt"), func(p *ucd.Parser) {
r := p.Rune(0)
x := lookup(r)
if x.isMapped() {
return
}
got := x.joinType()
want := joinType[p.String(1)]
if got != want {
t.Errorf("JoinType(%U) = %x; want %x", r, got, want)
}
})
}
@@ -0,0 +1,84 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !go1.10
package idna
import (
"testing"
"unicode"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/testtext"
"golang.org/x/text/internal/ucd"
)
func TestTables(t *testing.T) {
testtext.SkipIfNotLong(t)
lookup := func(r rune) info {
v, _ := trie.lookupString(string(r))
return info(v)
}
ucd.Parse(gen.OpenUnicodeFile("idna", "", "IdnaMappingTable.txt"), func(p *ucd.Parser) {
r := p.Rune(0)
x := lookup(r)
if got, want := x.category(), catFromEntry(p); got != want {
t.Errorf("%U:category: got %x; want %x", r, got, want)
}
mapped := false
switch p.String(1) {
case "mapped", "disallowed_STD3_mapped", "deviation":
mapped = true
}
if x.isMapped() != mapped {
t.Errorf("%U:isMapped: got %v; want %v", r, x.isMapped(), mapped)
}
if !mapped {
return
}
want := string(p.Runes(2))
got := string(x.appendMapping(nil, string(r)))
if got != want {
t.Errorf("%U:mapping: got %+q; want %+q", r, got, want)
}
if x.isMapped() {
return
}
wantMark := unicode.In(r, unicode.Mark)
gotMark := x.isModifier()
if gotMark != wantMark {
t.Errorf("IsMark(%U) = %v; want %v", r, gotMark, wantMark)
}
})
ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
r := p.Rune(0)
x := lookup(r)
got := x.isViramaModifier()
const cccVirama = 9
want := p.Int(ucd.CanonicalCombiningClass) == cccVirama
if got != want {
t.Errorf("IsVirama(%U) = %v; want %v", r, got, want)
}
})
ucd.Parse(gen.OpenUCDFile("extracted/DerivedJoiningType.txt"), func(p *ucd.Parser) {
r := p.Rune(0)
x := lookup(r)
if x.isMapped() {
return
}
got := x.joinType()
want := joinType[p.String(1)]
if got != want {
t.Errorf("JoinType(%U) = %x; want %x", r, got, want)
}
})
}
@@ -0,0 +1,59 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build ignore
package main
// This file contains code that is common between the generation code and the
// package's test code.
import (
"log"
"golang.org/x/text/internal/ucd"
)
func catFromEntry(p *ucd.Parser) (cat category) {
r := p.Rune(0)
switch s := p.String(1); s {
case "valid":
cat = valid
case "disallowed":
cat = disallowed
case "disallowed_STD3_valid":
cat = disallowedSTD3Valid
case "disallowed_STD3_mapped":
cat = disallowedSTD3Mapped
case "mapped":
cat = mapped
case "deviation":
cat = deviation
case "ignored":
cat = ignored
default:
log.Fatalf("%U: Unknown category %q", r, s)
}
if s := p.String(3); s != "" {
if cat != valid {
log.Fatalf(`%U: %s defined for %q; want "valid"`, r, s, p.String(1))
}
switch s {
case "NV8":
cat = validNV8
case "XV8":
cat = validXV8
default:
log.Fatalf("%U: Unexpected exception %q", r, s)
}
}
return cat
}
var joinType = map[string]info{
"L": joiningL,
"D": joiningD,
"T": joiningT,
"R": joiningR,
}
@@ -0,0 +1,123 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build ignore
package main
// This file contains definitions for interpreting the trie value of the idna
// trie generated by "go run gen*.go". It is shared by both the generator
// program and the resultant package. Sharing is achieved by the generator
// copying gen_trieval.go to trieval.go and changing what's above this comment.
// info holds information from the IDNA mapping table for a single rune. It is
// the value returned by a trie lookup. In most cases, all information fits in
// a 16-bit value. For mappings, this value may contain an index into a slice
// with the mapped string. Such mappings can consist of the actual mapped value
// or an XOR pattern to be applied to the bytes of the UTF8 encoding of the
// input rune. This technique is used by the cases packages and reduces the
// table size significantly.
//
// The per-rune values have the following format:
//
// if mapped {
// if inlinedXOR {
// 15..13 inline XOR marker
// 12..11 unused
// 10..3 inline XOR mask
// } else {
// 15..3 index into xor or mapping table
// }
// } else {
// 15..14 unused
// 13 mayNeedNorm
// 12..11 attributes
// 10..8 joining type
// 7..3 category type
// }
// 2 use xor pattern
// 1..0 mapped category
//
// See the definitions below for a more detailed description of the various
// bits.
type info uint16
const (
catSmallMask = 0x3
catBigMask = 0xF8
indexShift = 3
xorBit = 0x4 // interpret the index as an xor pattern
inlineXOR = 0xE000 // These bits are set if the XOR pattern is inlined.
joinShift = 8
joinMask = 0x07
// Attributes
attributesMask = 0x1800
viramaModifier = 0x1800
modifier = 0x1000
rtl = 0x0800
mayNeedNorm = 0x2000
)
// A category corresponds to a category defined in the IDNA mapping table.
type category uint16
const (
unknown category = 0 // not currently defined in unicode.
mapped category = 1
disallowedSTD3Mapped category = 2
deviation category = 3
)
const (
valid category = 0x08
validNV8 category = 0x18
validXV8 category = 0x28
disallowed category = 0x40
disallowedSTD3Valid category = 0x80
ignored category = 0xC0
)
// join types and additional rune information
const (
joiningL = (iota + 1)
joiningD
joiningT
joiningR
//the following types are derived during processing
joinZWJ
joinZWNJ
joinVirama
numJoinTypes
)
func (c info) isMapped() bool {
return c&0x3 != 0
}
func (c info) category() category {
small := c & catSmallMask
if small != 0 {
return category(small)
}
return category(c & catBigMask)
}
func (c info) joinType() info {
if c.isMapped() {
return 0
}
return (c >> joinShift) & joinMask
}
func (c info) isModifier() bool {
return c&(modifier|catSmallMask) == modifier
}
func (c info) isViramaModifier() bool {
return c&(attributesMask|catSmallMask) == viramaModifier
}
@@ -0,0 +1,11 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build go1.18
package idna
// Transitional processing is disabled by default in Go 1.18.
// https://golang.org/issue/47510
const transitionalLookup = false
@@ -0,0 +1,769 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build go1.10
//go:generate go run gen.go gen_trieval.go gen_common.go
// Package idna implements IDNA2008 using the compatibility processing
// defined by UTS (Unicode Technical Standard) #46, which defines a standard to
// deal with the transition from IDNA2003.
//
// IDNA2008 (Internationalized Domain Names for Applications), is defined in RFC
// 5890, RFC 5891, RFC 5892, RFC 5893 and RFC 5894.
// UTS #46 is defined in https://www.unicode.org/reports/tr46.
// See https://unicode.org/cldr/utility/idna.jsp for a visualization of the
// differences between these two standards.
package idna // import "golang.org/x/text/internal/export/idna"
import (
"fmt"
"strings"
"unicode/utf8"
"golang.org/x/text/secure/bidirule"
"golang.org/x/text/unicode/bidi"
"golang.org/x/text/unicode/norm"
)
// NOTE: Unlike common practice in Go APIs, the functions will return a
// sanitized domain name in case of errors. Browsers sometimes use a partially
// evaluated string as lookup.
// TODO: the current error handling is, in my opinion, the least opinionated.
// Other strategies are also viable, though:
// Option 1) Return an empty string in case of error, but allow the user to
// specify explicitly which errors to ignore.
// Option 2) Return the partially evaluated string if it is itself a valid
// string, otherwise return the empty string in case of error.
// Option 3) Option 1 and 2.
// Option 4) Always return an empty string for now and implement Option 1 as
// needed, and document that the return string may not be empty in case of
// error in the future.
// I think Option 1 is best, but it is quite opinionated.
// ToASCII is a wrapper for Punycode.ToASCII.
func ToASCII(s string) (string, error) {
return Punycode.process(s, true)
}
// ToUnicode is a wrapper for Punycode.ToUnicode.
func ToUnicode(s string) (string, error) {
return Punycode.process(s, false)
}
// An Option configures a Profile at creation time.
type Option func(*options)
// Transitional sets a Profile to use the Transitional mapping as defined in UTS
// #46. This will cause, for example, "ß" to be mapped to "ss". Using the
// transitional mapping provides a compromise between IDNA2003 and IDNA2008
// compatibility. It is used by some browsers when resolving domain names. This
// option is only meaningful if combined with MapForLookup.
func Transitional(transitional bool) Option {
return func(o *options) { o.transitional = transitional }
}
// VerifyDNSLength sets whether a Profile should fail if any of the IDN parts
// are longer than allowed by the RFC.
//
// This option corresponds to the VerifyDnsLength flag in UTS #46.
func VerifyDNSLength(verify bool) Option {
return func(o *options) { o.verifyDNSLength = verify }
}
// RemoveLeadingDots removes leading label separators. Leading runes that map to
// dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well.
func RemoveLeadingDots(remove bool) Option {
return func(o *options) { o.removeLeadingDots = remove }
}
// ValidateLabels sets whether to check the mandatory label validation criteria
// as defined in Section 5.4 of RFC 5891. This includes testing for correct use
// of hyphens ('-'), normalization, validity of runes, and the context rules.
// In particular, ValidateLabels also sets the CheckHyphens and CheckJoiners flags
// in UTS #46.
func ValidateLabels(enable bool) Option {
return func(o *options) {
// Don't override existing mappings, but set one that at least checks
// normalization if it is not set.
if o.mapping == nil && enable {
o.mapping = normalize
}
o.trie = trie
o.checkJoiners = enable
o.checkHyphens = enable
if enable {
o.fromPuny = validateFromPunycode
} else {
o.fromPuny = nil
}
}
}
// CheckHyphens sets whether to check for correct use of hyphens ('-') in
// labels. Most web browsers do not have this option set, since labels such as
// "r3---sn-apo3qvuoxuxbt-j5pe" are in common use.
//
// This option corresponds to the CheckHyphens flag in UTS #46.
func CheckHyphens(enable bool) Option {
return func(o *options) { o.checkHyphens = enable }
}
// CheckJoiners sets whether to check the ContextJ rules as defined in Appendix
// A of RFC 5892, concerning the use of joiner runes.
//
// This option corresponds to the CheckJoiners flag in UTS #46.
func CheckJoiners(enable bool) Option {
return func(o *options) {
o.trie = trie
o.checkJoiners = enable
}
}
// StrictDomainName limits the set of permissible ASCII characters to those
// allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the
// hyphen). This is set by default for MapForLookup and ValidateForRegistration,
// but is only useful if ValidateLabels is set.
//
// This option is useful, for instance, for browsers that allow characters
// outside this range, for example a '_' (U+005F LOW LINE). See
// http://www.rfc-editor.org/std/std3.txt for more details.
//
// This option corresponds to the UseSTD3ASCIIRules flag in UTS #46.
func StrictDomainName(use bool) Option {
return func(o *options) { o.useSTD3Rules = use }
}
// NOTE: the following options pull in tables. The tables should not be linked
// in as long as the options are not used.
// BidiRule enables the Bidi rule as defined in RFC 5893. Any application
// that relies on proper validation of labels should include this rule.
//
// This option corresponds to the CheckBidi flag in UTS #46.
func BidiRule() Option {
return func(o *options) { o.bidirule = bidirule.ValidString }
}
// ValidateForRegistration sets validation options to verify that a given IDN is
// properly formatted for registration as defined by Section 4 of RFC 5891.
func ValidateForRegistration() Option {
return func(o *options) {
o.mapping = validateRegistration
StrictDomainName(true)(o)
ValidateLabels(true)(o)
VerifyDNSLength(true)(o)
BidiRule()(o)
}
}
// MapForLookup sets validation and mapping options such that a given IDN is
// transformed for domain name lookup according to the requirements set out in
// Section 5 of RFC 5891. The mappings follow the recommendations of RFC 5894,
// RFC 5895 and UTS 46. It does not add the Bidi Rule. Use the BidiRule option
// to add this check.
//
// The mappings include normalization and mapping case, width and other
// compatibility mappings.
func MapForLookup() Option {
return func(o *options) {
o.mapping = validateAndMap
StrictDomainName(true)(o)
ValidateLabels(true)(o)
}
}
type options struct {
transitional bool
useSTD3Rules bool
checkHyphens bool
checkJoiners bool
verifyDNSLength bool
removeLeadingDots bool
trie *idnaTrie
// fromPuny calls validation rules when converting A-labels to U-labels.
fromPuny func(p *Profile, s string) error
// mapping implements a validation and mapping step as defined in RFC 5895
// or UTS 46, tailored to, for example, domain registration or lookup.
mapping func(p *Profile, s string) (mapped string, isBidi bool, err error)
// bidirule, if specified, checks whether s conforms to the Bidi Rule
// defined in RFC 5893.
bidirule func(s string) bool
}
// A Profile defines the configuration of an IDNA mapper.
type Profile struct {
options
}
func apply(o *options, opts []Option) {
for _, f := range opts {
f(o)
}
}
// New creates a new Profile.
//
// With no options, the returned Profile is the most permissive and equals the
// Punycode Profile. Options can be passed to further restrict the Profile. The
// MapForLookup and ValidateForRegistration options set a collection of options,
// for lookup and registration purposes respectively, which can be tailored by
// adding more fine-grained options, where later options override earlier
// options.
func New(o ...Option) *Profile {
p := &Profile{}
apply(&p.options, o)
return p
}
// ToASCII converts a domain or domain label to its ASCII form. For example,
// ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and
// ToASCII("golang") is "golang". If an error is encountered it will return
// an error and a (partially) processed result.
func (p *Profile) ToASCII(s string) (string, error) {
return p.process(s, true)
}
// ToUnicode converts a domain or domain label to its Unicode form. For example,
// ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and
// ToUnicode("golang") is "golang". If an error is encountered it will return
// an error and a (partially) processed result.
func (p *Profile) ToUnicode(s string) (string, error) {
pp := *p
pp.transitional = false
return pp.process(s, false)
}
// String reports a string with a description of the profile for debugging
// purposes. The string format may change with different versions.
func (p *Profile) String() string {
s := ""
if p.transitional {
s = "Transitional"
} else {
s = "NonTransitional"
}
if p.useSTD3Rules {
s += ":UseSTD3Rules"
}
if p.checkHyphens {
s += ":CheckHyphens"
}
if p.checkJoiners {
s += ":CheckJoiners"
}
if p.verifyDNSLength {
s += ":VerifyDNSLength"
}
return s
}
var (
// Punycode is a Profile that does raw punycode processing with a minimum
// of validation.
Punycode *Profile = punycode
// Lookup is the recommended profile for looking up domain names, according
// to Section 5 of RFC 5891. The exact configuration of this profile may
// change over time.
Lookup *Profile = lookup
// Display is the recommended profile for displaying domain names.
// The configuration of this profile may change over time.
Display *Profile = display
// Registration is the recommended profile for checking whether a given
// IDN is valid for registration, according to Section 4 of RFC 5891.
Registration *Profile = registration
punycode = &Profile{}
lookup = &Profile{options{
transitional: transitionalLookup,
useSTD3Rules: true,
checkHyphens: true,
checkJoiners: true,
trie: trie,
fromPuny: validateFromPunycode,
mapping: validateAndMap,
bidirule: bidirule.ValidString,
}}
display = &Profile{options{
useSTD3Rules: true,
checkHyphens: true,
checkJoiners: true,
trie: trie,
fromPuny: validateFromPunycode,
mapping: validateAndMap,
bidirule: bidirule.ValidString,
}}
registration = &Profile{options{
useSTD3Rules: true,
verifyDNSLength: true,
checkHyphens: true,
checkJoiners: true,
trie: trie,
fromPuny: validateFromPunycode,
mapping: validateRegistration,
bidirule: bidirule.ValidString,
}}
// TODO: profiles
// Register: recommended for approving domain names: don't do any mappings
// but rather reject on invalid input. Bundle or block deviation characters.
)
type labelError struct{ label, code_ string }
func (e labelError) code() string { return e.code_ }
func (e labelError) Error() string {
return fmt.Sprintf("idna: invalid label %q", e.label)
}
type runeError rune
func (e runeError) code() string { return "P1" }
func (e runeError) Error() string {
return fmt.Sprintf("idna: disallowed rune %U", e)
}
// process implements the algorithm described in section 4 of UTS #46,
// see https://www.unicode.org/reports/tr46.
func (p *Profile) process(s string, toASCII bool) (string, error) {
var err error
var isBidi bool
if p.mapping != nil {
s, isBidi, err = p.mapping(p, s)
}
// Remove leading empty labels.
if p.removeLeadingDots {
for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
}
}
// TODO: allow for a quick check of the tables data.
// It seems like we should only create this error on ToASCII, but the
// UTS 46 conformance tests suggests we should always check this.
if err == nil && p.verifyDNSLength && s == "" {
err = &labelError{s, "A4"}
}
labels := labelIter{orig: s}
for ; !labels.done(); labels.next() {
label := labels.label()
if label == "" {
// Empty labels are not okay. The label iterator skips the last
// label if it is empty.
if err == nil && p.verifyDNSLength {
err = &labelError{s, "A4"}
}
continue
}
if strings.HasPrefix(label, acePrefix) {
u, err2 := decode(label[len(acePrefix):])
if err2 != nil {
if err == nil {
err = err2
}
// Spec says keep the old label.
continue
}
isBidi = isBidi || bidirule.DirectionString(u) != bidi.LeftToRight
labels.set(u)
if err == nil && p.fromPuny != nil {
err = p.fromPuny(p, u)
}
if err == nil {
// This should be called on NonTransitional, according to the
// spec, but that currently does not have any effect. Use the
// original profile to preserve options.
err = p.validateLabel(u)
}
} else if err == nil {
err = p.validateLabel(label)
}
}
if isBidi && p.bidirule != nil && err == nil {
for labels.reset(); !labels.done(); labels.next() {
if !p.bidirule(labels.label()) {
err = &labelError{s, "B"}
break
}
}
}
if toASCII {
for labels.reset(); !labels.done(); labels.next() {
label := labels.label()
if !ascii(label) {
a, err2 := encode(acePrefix, label)
if err == nil {
err = err2
}
label = a
labels.set(a)
}
n := len(label)
if p.verifyDNSLength && err == nil && (n == 0 || n > 63) {
err = &labelError{label, "A4"}
}
}
}
s = labels.result()
if toASCII && p.verifyDNSLength && err == nil {
// Compute the length of the domain name minus the root label and its dot.
n := len(s)
if n > 0 && s[n-1] == '.' {
n--
}
if len(s) < 1 || n > 253 {
err = &labelError{s, "A4"}
}
}
return s, err
}
func normalize(p *Profile, s string) (mapped string, isBidi bool, err error) {
// TODO: consider first doing a quick check to see if any of these checks
// need to be done. This will make it slower in the general case, but
// faster in the common case.
mapped = norm.NFC.String(s)
isBidi = bidirule.DirectionString(mapped) == bidi.RightToLeft
return mapped, isBidi, nil
}
func validateRegistration(p *Profile, s string) (idem string, bidi bool, err error) {
// TODO: filter need for normalization in loop below.
if !norm.NFC.IsNormalString(s) {
return s, false, &labelError{s, "V1"}
}
for i := 0; i < len(s); {
v, sz := trie.lookupString(s[i:])
if sz == 0 {
return s, bidi, runeError(utf8.RuneError)
}
bidi = bidi || info(v).isBidi(s[i:])
// Copy bytes not copied so far.
switch p.simplify(info(v).category()) {
// TODO: handle the NV8 defined in the Unicode idna data set to allow
// for strict conformance to IDNA2008.
case valid, deviation:
case disallowed, mapped, unknown, ignored:
r, _ := utf8.DecodeRuneInString(s[i:])
return s, bidi, runeError(r)
}
i += sz
}
return s, bidi, nil
}
func (c info) isBidi(s string) bool {
if !c.isMapped() {
return c&attributesMask == rtl
}
// TODO: also store bidi info for mapped data. This is possible, but a bit
// cumbersome and not for the common case.
p, _ := bidi.LookupString(s)
switch p.Class() {
case bidi.R, bidi.AL, bidi.AN:
return true
}
return false
}
func validateAndMap(p *Profile, s string) (vm string, bidi bool, err error) {
var (
b []byte
k int
)
// combinedInfoBits contains the or-ed bits of all runes. We use this
// to derive the mayNeedNorm bit later. This may trigger normalization
// overeagerly, but it will not do so in the common case. The end result
// is another 10% saving on BenchmarkProfile for the common case.
var combinedInfoBits info
for i := 0; i < len(s); {
v, sz := trie.lookupString(s[i:])
if sz == 0 {
b = append(b, s[k:i]...)
b = append(b, "\ufffd"...)
k = len(s)
if err == nil {
err = runeError(utf8.RuneError)
}
break
}
combinedInfoBits |= info(v)
bidi = bidi || info(v).isBidi(s[i:])
start := i
i += sz
// Copy bytes not copied so far.
switch p.simplify(info(v).category()) {
case valid:
continue
case disallowed:
if err == nil {
r, _ := utf8.DecodeRuneInString(s[start:])
err = runeError(r)
}
continue
case mapped, deviation:
b = append(b, s[k:start]...)
b = info(v).appendMapping(b, s[start:i])
case ignored:
b = append(b, s[k:start]...)
// drop the rune
case unknown:
b = append(b, s[k:start]...)
b = append(b, "\ufffd"...)
}
k = i
}
if k == 0 {
// No changes so far.
if combinedInfoBits&mayNeedNorm != 0 {
s = norm.NFC.String(s)
}
} else {
b = append(b, s[k:]...)
if norm.NFC.QuickSpan(b) != len(b) {
b = norm.NFC.Bytes(b)
}
// TODO: the punycode converters require strings as input.
s = string(b)
}
return s, bidi, err
}
// A labelIter allows iterating over domain name labels.
type labelIter struct {
orig string
slice []string
curStart int
curEnd int
i int
}
func (l *labelIter) reset() {
l.curStart = 0
l.curEnd = 0
l.i = 0
}
func (l *labelIter) done() bool {
return l.curStart >= len(l.orig)
}
func (l *labelIter) result() string {
if l.slice != nil {
return strings.Join(l.slice, ".")
}
return l.orig
}
func (l *labelIter) label() string {
if l.slice != nil {
return l.slice[l.i]
}
p := strings.IndexByte(l.orig[l.curStart:], '.')
l.curEnd = l.curStart + p
if p == -1 {
l.curEnd = len(l.orig)
}
return l.orig[l.curStart:l.curEnd]
}
// next sets the value to the next label. It skips the last label if it is empty.
func (l *labelIter) next() {
l.i++
if l.slice != nil {
if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" {
l.curStart = len(l.orig)
}
} else {
l.curStart = l.curEnd + 1
if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' {
l.curStart = len(l.orig)
}
}
}
func (l *labelIter) set(s string) {
if l.slice == nil {
l.slice = strings.Split(l.orig, ".")
}
l.slice[l.i] = s
}
// acePrefix is the ASCII Compatible Encoding prefix.
const acePrefix = "xn--"
func (p *Profile) simplify(cat category) category {
switch cat {
case disallowedSTD3Mapped:
if p.useSTD3Rules {
cat = disallowed
} else {
cat = mapped
}
case disallowedSTD3Valid:
if p.useSTD3Rules {
cat = disallowed
} else {
cat = valid
}
case deviation:
if !p.transitional {
cat = valid
}
case validNV8, validXV8:
// TODO: handle V2008
cat = valid
}
return cat
}
func validateFromPunycode(p *Profile, s string) error {
if !norm.NFC.IsNormalString(s) {
return &labelError{s, "V1"}
}
// TODO: detect whether string may have to be normalized in the following
// loop.
for i := 0; i < len(s); {
v, sz := trie.lookupString(s[i:])
if sz == 0 {
return runeError(utf8.RuneError)
}
if c := p.simplify(info(v).category()); c != valid && c != deviation {
return &labelError{s, "V6"}
}
i += sz
}
return nil
}
const (
zwnj = "\u200c"
zwj = "\u200d"
)
type joinState int8
const (
stateStart joinState = iota
stateVirama
stateBefore
stateBeforeVirama
stateAfter
stateFAIL
)
var joinStates = [][numJoinTypes]joinState{
stateStart: {
joiningL: stateBefore,
joiningD: stateBefore,
joinZWNJ: stateFAIL,
joinZWJ: stateFAIL,
joinVirama: stateVirama,
},
stateVirama: {
joiningL: stateBefore,
joiningD: stateBefore,
},
stateBefore: {
joiningL: stateBefore,
joiningD: stateBefore,
joiningT: stateBefore,
joinZWNJ: stateAfter,
joinZWJ: stateFAIL,
joinVirama: stateBeforeVirama,
},
stateBeforeVirama: {
joiningL: stateBefore,
joiningD: stateBefore,
joiningT: stateBefore,
},
stateAfter: {
joiningL: stateFAIL,
joiningD: stateBefore,
joiningT: stateAfter,
joiningR: stateStart,
joinZWNJ: stateFAIL,
joinZWJ: stateFAIL,
joinVirama: stateAfter, // no-op as we can't accept joiners here
},
stateFAIL: {
0: stateFAIL,
joiningL: stateFAIL,
joiningD: stateFAIL,
joiningT: stateFAIL,
joiningR: stateFAIL,
joinZWNJ: stateFAIL,
joinZWJ: stateFAIL,
joinVirama: stateFAIL,
},
}
// validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are
// already implicitly satisfied by the overall implementation.
func (p *Profile) validateLabel(s string) (err error) {
if s == "" {
if p.verifyDNSLength {
return &labelError{s, "A4"}
}
return nil
}
if p.checkHyphens {
if len(s) > 4 && s[2] == '-' && s[3] == '-' {
return &labelError{s, "V2"}
}
if s[0] == '-' || s[len(s)-1] == '-' {
return &labelError{s, "V3"}
}
}
if !p.checkJoiners {
return nil
}
trie := p.trie // p.checkJoiners is only set if trie is set.
// TODO: merge the use of this in the trie.
v, sz := trie.lookupString(s)
x := info(v)
if x.isModifier() {
return &labelError{s, "V5"}
}
// Quickly return in the absence of zero-width (non) joiners.
if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 {
return nil
}
st := stateStart
for i := 0; ; {
jt := x.joinType()
if s[i:i+sz] == zwj {
jt = joinZWJ
} else if s[i:i+sz] == zwnj {
jt = joinZWNJ
}
st = joinStates[st][jt]
if x.isViramaModifier() {
st = joinStates[st][joinVirama]
}
if i += sz; i == len(s) {
break
}
v, sz = trie.lookupString(s[i:])
x = info(v)
}
if st == stateFAIL || st == stateAfter {
return &labelError{s, "C"}
}
return nil
}
func ascii(s string) bool {
for i := 0; i < len(s); i++ {
if s[i] >= utf8.RuneSelf {
return false
}
}
return true
}
@@ -0,0 +1,162 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build go1.10
package idna
import "testing"
// TestLabelErrors tests strings returned in case of error. All results should
// be identical to the reference implementation and can be verified at
// https://unicode.org/cldr/utility/idna.jsp. The reference implementation,
// however, seems to not display Bidi and ContextJ errors.
//
// In some cases the behavior of browsers is added as a comment. In all cases,
// whenever a resolve search returns an error here, Chrome will treat the input
// string as a search string (including those for Bidi and Context J errors),
// unless noted otherwise.
func TestLabelErrors(t *testing.T) {
encode := func(s string) string { s, _ = encode(acePrefix, s); return s }
type kind struct {
name string
f func(string) (string, error)
}
punyA := kind{"PunycodeA", punycode.ToASCII}
resolve := kind{"ResolveA", Lookup.ToASCII}
display := kind{"ToUnicode", Display.ToUnicode}
p := New(VerifyDNSLength(true), MapForLookup(), BidiRule())
lengthU := kind{"CheckLengthU", p.ToUnicode}
lengthA := kind{"CheckLengthA", p.ToASCII}
p = New(MapForLookup(), StrictDomainName(false))
std3 := kind{"STD3", p.ToASCII}
p = New(MapForLookup(), CheckHyphens(false))
hyphens := kind{"CheckHyphens", p.ToASCII}
p = New(MapForLookup(), Transitional(true))
transitional := kind{"Transitional", p.ToASCII}
p = New(MapForLookup(), Transitional(false))
nontransitional := kind{"Nontransitional", p.ToASCII}
testCases := []struct {
kind
input string
want string
wantErr string
}{
{lengthU, "", "", "A4"}, // From UTS 46 conformance test.
{lengthA, "", "", "A4"},
{lengthU, "xn--", "", "A4"},
{lengthU, "foo.xn--", "foo.", "A4"}, // TODO: is dropping xn-- correct?
{lengthU, "xn--.foo", ".foo", "A4"},
{lengthU, "foo.xn--.bar", "foo..bar", "A4"},
{display, "xn--", "", ""},
{display, "foo.xn--", "foo.", ""}, // TODO: is dropping xn-- correct?
{display, "xn--.foo", ".foo", ""},
{display, "foo.xn--.bar", "foo..bar", ""},
{lengthA, "a..b", "a..b", "A4"},
{punyA, ".b", ".b", ""},
// For backwards compatibility, the Punycode profile does not map runes.
{punyA, "\u3002b", "xn--b-83t", ""},
{punyA, "..b", "..b", ""},
{lengthA, ".b", ".b", "A4"},
{lengthA, "\u3002b", ".b", "A4"},
{lengthA, "..b", "..b", "A4"},
{lengthA, "b..", "b..", ""},
// Sharpened Bidi rules for Unicode 10.0.0. Apply for ALL labels in ANY
// of the labels is RTL.
{lengthA, "\ufe05\u3002\u3002\U0002603e\u1ce0", "..xn--t6f5138v", "A4"},
{lengthA, "FAX\u2a77\U0001d186\u3002\U0001e942\U000e0181\u180c", "", "B6"},
{resolve, "a..b", "a..b", ""},
// Note that leading dots are not stripped. This is to be consistent
// with the Punycode profile as well as the conformance test.
{resolve, ".b", ".b", ""},
{resolve, "\u3002b", ".b", ""},
{resolve, "..b", "..b", ""},
{resolve, "b..", "b..", ""},
{resolve, "\xed", "", "P1"},
// Raw punycode
{punyA, "", "", ""},
{punyA, "*.foo.com", "*.foo.com", ""},
{punyA, "Foo.com", "Foo.com", ""},
// STD3 rules
{display, "*.foo.com", "*.foo.com", "P1"},
{std3, "*.foo.com", "*.foo.com", ""},
// Hyphens
{display, "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "V2"},
{hyphens, "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", ""},
{display, "-label-.com", "-label-.com", "V3"},
{hyphens, "-label-.com", "-label-.com", ""},
// Don't map U+2490 (DIGIT NINE FULL STOP). This is the behavior of
// Chrome, modern Firefox, Safari, and IE.
{resolve, "lab⒐be", "xn--labbe-zh9b", "P1"}, // encode("lab⒐be")
{display, "lab⒐be", "lab⒐be", "P1"},
{transitional, "plan⒐faß.de", "xn--planfass-c31e.de", "P1"}, // encode("plan⒐fass") + ".de"
{display, "Plan⒐faß.de", "plan⒐faß.de", "P1"},
// Transitional vs Nontransitional processing
{transitional, "Plan9faß.de", "plan9fass.de", ""},
{nontransitional, "Plan9faß.de", "xn--plan9fa-6va.de", ""},
// Chrome 54.0 recognizes the error and treats this input verbatim as a
// search string.
// Safari 10.0 (non-conform spec) decomposes "⒈" and computes the
// punycode on the result using transitional mapping.
// Firefox 49.0.1 goes haywire on this string and prints a bunch of what
// seems to be nested punycode encodings.
{transitional, "日本⒈co.ßßß.de", "xn--co-wuw5954azlb.ssssss.de", "P1"},
{display, "日本⒈co.ßßß.de", "日本⒈co.ßßß.de", "P1"},
{transitional, "a\u200Cb", "ab", ""},
{display, "a\u200Cb", "a\u200Cb", "C"},
{resolve, encode("a\u200Cb"), encode("a\u200Cb"), "C"},
{display, "a\u200Cb", "a\u200Cb", "C"},
{resolve, "grﻋﺮﺑﻲ.de", "xn--gr-gtd9a1b0g.de", "B"},
{
// Notice how the string gets transformed, even with an error.
// Chrome will use the original string if it finds an error, so not
// the transformed one.
display,
"gr\ufecb\ufeae\ufe91\ufef2.de",
"gr\u0639\u0631\u0628\u064a.de",
"B",
},
{resolve, "\u0671.\u03c3\u07dc", "xn--qib.xn--4xa21s", "B"}, // ٱ.σߜ
{display, "\u0671.\u03c3\u07dc", "\u0671.\u03c3\u07dc", "B"},
// normalize input
{resolve, "a\u0323\u0322", "xn--jta191l", ""}, // ạ̢
{display, "a\u0323\u0322", "\u1ea1\u0322", ""},
// Non-normalized strings are not normalized when they originate from
// punycode. Despite the error, Chrome, Safari and Firefox will attempt
// to look up the input punycode.
{resolve, encode("a\u0323\u0322") + ".com", "xn--a-tdbc.com", "V1"},
{display, encode("a\u0323\u0322") + ".com", "a\u0323\u0322.com", "V1"},
}
for _, tc := range testCases {
doTest(t, tc.f, tc.name, tc.input, tc.want, tc.wantErr)
}
}
func TestTransitionalDefault(t *testing.T) {
want := "xn--strae-oqa.de"
if transitionalLookup {
want = "strasse.de"
}
doTest(t, Lookup.ToASCII, "Lookup", "straße.de", want, "")
}
@@ -0,0 +1,732 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !go1.10
//go:generate go run gen.go gen_trieval.go gen_common.go
// Package idna implements IDNA2008 using the compatibility processing
// defined by UTS (Unicode Technical Standard) #46, which defines a standard to
// deal with the transition from IDNA2003.
//
// IDNA2008 (Internationalized Domain Names for Applications), is defined in RFC
// 5890, RFC 5891, RFC 5892, RFC 5893 and RFC 5894.
// UTS #46 is defined in https://www.unicode.org/reports/tr46.
// See https://unicode.org/cldr/utility/idna.jsp for a visualization of the
// differences between these two standards.
package idna // import "golang.org/x/text/internal/export/idna"
import (
"fmt"
"strings"
"unicode/utf8"
"golang.org/x/text/secure/bidirule"
"golang.org/x/text/unicode/norm"
)
// NOTE: Unlike common practice in Go APIs, the functions will return a
// sanitized domain name in case of errors. Browsers sometimes use a partially
// evaluated string as lookup.
// TODO: the current error handling is, in my opinion, the least opinionated.
// Other strategies are also viable, though:
// Option 1) Return an empty string in case of error, but allow the user to
// specify explicitly which errors to ignore.
// Option 2) Return the partially evaluated string if it is itself a valid
// string, otherwise return the empty string in case of error.
// Option 3) Option 1 and 2.
// Option 4) Always return an empty string for now and implement Option 1 as
// needed, and document that the return string may not be empty in case of
// error in the future.
// I think Option 1 is best, but it is quite opinionated.
// ToASCII is a wrapper for Punycode.ToASCII.
func ToASCII(s string) (string, error) {
return Punycode.process(s, true)
}
// ToUnicode is a wrapper for Punycode.ToUnicode.
func ToUnicode(s string) (string, error) {
return Punycode.process(s, false)
}
// An Option configures a Profile at creation time.
type Option func(*options)
// Transitional sets a Profile to use the Transitional mapping as defined in UTS
// #46. This will cause, for example, "ß" to be mapped to "ss". Using the
// transitional mapping provides a compromise between IDNA2003 and IDNA2008
// compatibility. It is used by some browsers when resolving domain names. This
// option is only meaningful if combined with MapForLookup.
func Transitional(transitional bool) Option {
return func(o *options) { o.transitional = transitional }
}
// VerifyDNSLength sets whether a Profile should fail if any of the IDN parts
// are longer than allowed by the RFC.
//
// This option corresponds to the VerifyDnsLength flag in UTS #46.
func VerifyDNSLength(verify bool) Option {
return func(o *options) { o.verifyDNSLength = verify }
}
// RemoveLeadingDots removes leading label separators. Leading runes that map to
// dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well.
func RemoveLeadingDots(remove bool) Option {
return func(o *options) { o.removeLeadingDots = remove }
}
// ValidateLabels sets whether to check the mandatory label validation criteria
// as defined in Section 5.4 of RFC 5891. This includes testing for correct use
// of hyphens ('-'), normalization, validity of runes, and the context rules.
// In particular, ValidateLabels also sets the CheckHyphens and CheckJoiners flags
// in UTS #46.
func ValidateLabels(enable bool) Option {
return func(o *options) {
// Don't override existing mappings, but set one that at least checks
// normalization if it is not set.
if o.mapping == nil && enable {
o.mapping = normalize
}
o.trie = trie
o.checkJoiners = enable
o.checkHyphens = enable
if enable {
o.fromPuny = validateFromPunycode
} else {
o.fromPuny = nil
}
}
}
// CheckHyphens sets whether to check for correct use of hyphens ('-') in
// labels. Most web browsers do not have this option set, since labels such as
// "r3---sn-apo3qvuoxuxbt-j5pe" are in common use.
//
// This option corresponds to the CheckHyphens flag in UTS #46.
func CheckHyphens(enable bool) Option {
return func(o *options) { o.checkHyphens = enable }
}
// CheckJoiners sets whether to check the ContextJ rules as defined in Appendix
// A of RFC 5892, concerning the use of joiner runes.
//
// This option corresponds to the CheckJoiners flag in UTS #46.
func CheckJoiners(enable bool) Option {
return func(o *options) {
o.trie = trie
o.checkJoiners = enable
}
}
// StrictDomainName limits the set of permissible ASCII characters to those
// allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the
// hyphen). This is set by default for MapForLookup and ValidateForRegistration,
// but is only useful if ValidateLabels is set.
//
// This option is useful, for instance, for browsers that allow characters
// outside this range, for example a '_' (U+005F LOW LINE). See
// http://www.rfc-editor.org/std/std3.txt for more details.
//
// This option corresponds to the UseSTD3ASCIIRules flag in UTS #46.
func StrictDomainName(use bool) Option {
return func(o *options) { o.useSTD3Rules = use }
}
// NOTE: the following options pull in tables. The tables should not be linked
// in as long as the options are not used.
// BidiRule enables the Bidi rule as defined in RFC 5893. Any application
// that relies on proper validation of labels should include this rule.
//
// This option corresponds to the CheckBidi flag in UTS #46.
func BidiRule() Option {
return func(o *options) { o.bidirule = bidirule.ValidString }
}
// ValidateForRegistration sets validation options to verify that a given IDN is
// properly formatted for registration as defined by Section 4 of RFC 5891.
func ValidateForRegistration() Option {
return func(o *options) {
o.mapping = validateRegistration
StrictDomainName(true)(o)
ValidateLabels(true)(o)
VerifyDNSLength(true)(o)
BidiRule()(o)
}
}
// MapForLookup sets validation and mapping options such that a given IDN is
// transformed for domain name lookup according to the requirements set out in
// Section 5 of RFC 5891. The mappings follow the recommendations of RFC 5894,
// RFC 5895 and UTS 46. It does not add the Bidi Rule. Use the BidiRule option
// to add this check.
//
// The mappings include normalization and mapping case, width and other
// compatibility mappings.
func MapForLookup() Option {
return func(o *options) {
o.mapping = validateAndMap
StrictDomainName(true)(o)
ValidateLabels(true)(o)
RemoveLeadingDots(true)(o)
}
}
type options struct {
transitional bool
useSTD3Rules bool
checkHyphens bool
checkJoiners bool
verifyDNSLength bool
removeLeadingDots bool
trie *idnaTrie
// fromPuny calls validation rules when converting A-labels to U-labels.
fromPuny func(p *Profile, s string) error
// mapping implements a validation and mapping step as defined in RFC 5895
// or UTS 46, tailored to, for example, domain registration or lookup.
mapping func(p *Profile, s string) (string, error)
// bidirule, if specified, checks whether s conforms to the Bidi Rule
// defined in RFC 5893.
bidirule func(s string) bool
}
// A Profile defines the configuration of an IDNA mapper.
type Profile struct {
options
}
func apply(o *options, opts []Option) {
for _, f := range opts {
f(o)
}
}
// New creates a new Profile.
//
// With no options, the returned Profile is the most permissive and equals the
// Punycode Profile. Options can be passed to further restrict the Profile. The
// MapForLookup and ValidateForRegistration options set a collection of options,
// for lookup and registration purposes respectively, which can be tailored by
// adding more fine-grained options, where later options override earlier
// options.
func New(o ...Option) *Profile {
p := &Profile{}
apply(&p.options, o)
return p
}
// ToASCII converts a domain or domain label to its ASCII form. For example,
// ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and
// ToASCII("golang") is "golang". If an error is encountered it will return
// an error and a (partially) processed result.
func (p *Profile) ToASCII(s string) (string, error) {
return p.process(s, true)
}
// ToUnicode converts a domain or domain label to its Unicode form. For example,
// ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and
// ToUnicode("golang") is "golang". If an error is encountered it will return
// an error and a (partially) processed result.
func (p *Profile) ToUnicode(s string) (string, error) {
pp := *p
pp.transitional = false
return pp.process(s, false)
}
// String reports a string with a description of the profile for debugging
// purposes. The string format may change with different versions.
func (p *Profile) String() string {
s := ""
if p.transitional {
s = "Transitional"
} else {
s = "NonTransitional"
}
if p.useSTD3Rules {
s += ":UseSTD3Rules"
}
if p.checkHyphens {
s += ":CheckHyphens"
}
if p.checkJoiners {
s += ":CheckJoiners"
}
if p.verifyDNSLength {
s += ":VerifyDNSLength"
}
return s
}
var (
// Punycode is a Profile that does raw punycode processing with a minimum
// of validation.
Punycode *Profile = punycode
// Lookup is the recommended profile for looking up domain names, according
// to Section 5 of RFC 5891. The exact configuration of this profile may
// change over time.
Lookup *Profile = lookup
// Display is the recommended profile for displaying domain names.
// The configuration of this profile may change over time.
Display *Profile = display
// Registration is the recommended profile for checking whether a given
// IDN is valid for registration, according to Section 4 of RFC 5891.
Registration *Profile = registration
punycode = &Profile{}
lookup = &Profile{options{
transitional: true,
removeLeadingDots: true,
useSTD3Rules: true,
checkHyphens: true,
checkJoiners: true,
trie: trie,
fromPuny: validateFromPunycode,
mapping: validateAndMap,
bidirule: bidirule.ValidString,
}}
display = &Profile{options{
useSTD3Rules: true,
removeLeadingDots: true,
checkHyphens: true,
checkJoiners: true,
trie: trie,
fromPuny: validateFromPunycode,
mapping: validateAndMap,
bidirule: bidirule.ValidString,
}}
registration = &Profile{options{
useSTD3Rules: true,
verifyDNSLength: true,
checkHyphens: true,
checkJoiners: true,
trie: trie,
fromPuny: validateFromPunycode,
mapping: validateRegistration,
bidirule: bidirule.ValidString,
}}
// TODO: profiles
// Register: recommended for approving domain names: don't do any mappings
// but rather reject on invalid input. Bundle or block deviation characters.
)
type labelError struct{ label, code_ string }
func (e labelError) code() string { return e.code_ }
func (e labelError) Error() string {
return fmt.Sprintf("idna: invalid label %q", e.label)
}
type runeError rune
func (e runeError) code() string { return "P1" }
func (e runeError) Error() string {
return fmt.Sprintf("idna: disallowed rune %U", e)
}
// process implements the algorithm described in section 4 of UTS #46,
// see https://www.unicode.org/reports/tr46.
func (p *Profile) process(s string, toASCII bool) (string, error) {
var err error
if p.mapping != nil {
s, err = p.mapping(p, s)
}
// Remove leading empty labels.
if p.removeLeadingDots {
for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
}
}
// It seems like we should only create this error on ToASCII, but the
// UTS 46 conformance tests suggests we should always check this.
if err == nil && p.verifyDNSLength && s == "" {
err = &labelError{s, "A4"}
}
labels := labelIter{orig: s}
for ; !labels.done(); labels.next() {
label := labels.label()
if label == "" {
// Empty labels are not okay. The label iterator skips the last
// label if it is empty.
if err == nil && p.verifyDNSLength {
err = &labelError{s, "A4"}
}
continue
}
if strings.HasPrefix(label, acePrefix) {
u, err2 := decode(label[len(acePrefix):])
if err2 != nil {
if err == nil {
err = err2
}
// Spec says keep the old label.
continue
}
labels.set(u)
if err == nil && p.fromPuny != nil {
err = p.fromPuny(p, u)
}
if err == nil {
// This should be called on NonTransitional, according to the
// spec, but that currently does not have any effect. Use the
// original profile to preserve options.
err = p.validateLabel(u)
}
} else if err == nil {
err = p.validateLabel(label)
}
}
if toASCII {
for labels.reset(); !labels.done(); labels.next() {
label := labels.label()
if !ascii(label) {
a, err2 := encode(acePrefix, label)
if err == nil {
err = err2
}
label = a
labels.set(a)
}
n := len(label)
if p.verifyDNSLength && err == nil && (n == 0 || n > 63) {
err = &labelError{label, "A4"}
}
}
}
s = labels.result()
if toASCII && p.verifyDNSLength && err == nil {
// Compute the length of the domain name minus the root label and its dot.
n := len(s)
if n > 0 && s[n-1] == '.' {
n--
}
if len(s) < 1 || n > 253 {
err = &labelError{s, "A4"}
}
}
return s, err
}
func normalize(p *Profile, s string) (string, error) {
return norm.NFC.String(s), nil
}
func validateRegistration(p *Profile, s string) (string, error) {
if !norm.NFC.IsNormalString(s) {
return s, &labelError{s, "V1"}
}
for i := 0; i < len(s); {
v, sz := trie.lookupString(s[i:])
if sz == 0 {
return s, runeError(utf8.RuneError)
}
// Copy bytes not copied so far.
switch p.simplify(info(v).category()) {
// TODO: handle the NV8 defined in the Unicode idna data set to allow
// for strict conformance to IDNA2008.
case valid, deviation:
case disallowed, mapped, unknown, ignored:
r, _ := utf8.DecodeRuneInString(s[i:])
return s, runeError(r)
}
i += sz
}
return s, nil
}
func validateAndMap(p *Profile, s string) (string, error) {
var (
err error
b []byte
k int
)
for i := 0; i < len(s); {
v, sz := trie.lookupString(s[i:])
if sz == 0 {
b = append(b, s[k:i]...)
b = append(b, "\ufffd"...)
k = len(s)
if err == nil {
err = runeError(utf8.RuneError)
}
break
}
start := i
i += sz
// Copy bytes not copied so far.
switch p.simplify(info(v).category()) {
case valid:
continue
case disallowed:
if err == nil {
r, _ := utf8.DecodeRuneInString(s[start:])
err = runeError(r)
}
continue
case mapped, deviation:
b = append(b, s[k:start]...)
b = info(v).appendMapping(b, s[start:i])
case ignored:
b = append(b, s[k:start]...)
// drop the rune
case unknown:
b = append(b, s[k:start]...)
b = append(b, "\ufffd"...)
}
k = i
}
if k == 0 {
// No changes so far.
s = norm.NFC.String(s)
} else {
b = append(b, s[k:]...)
if norm.NFC.QuickSpan(b) != len(b) {
b = norm.NFC.Bytes(b)
}
// TODO: the punycode converters require strings as input.
s = string(b)
}
return s, err
}
// A labelIter allows iterating over domain name labels.
type labelIter struct {
orig string
slice []string
curStart int
curEnd int
i int
}
func (l *labelIter) reset() {
l.curStart = 0
l.curEnd = 0
l.i = 0
}
func (l *labelIter) done() bool {
return l.curStart >= len(l.orig)
}
func (l *labelIter) result() string {
if l.slice != nil {
return strings.Join(l.slice, ".")
}
return l.orig
}
func (l *labelIter) label() string {
if l.slice != nil {
return l.slice[l.i]
}
p := strings.IndexByte(l.orig[l.curStart:], '.')
l.curEnd = l.curStart + p
if p == -1 {
l.curEnd = len(l.orig)
}
return l.orig[l.curStart:l.curEnd]
}
// next sets the value to the next label. It skips the last label if it is empty.
func (l *labelIter) next() {
l.i++
if l.slice != nil {
if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" {
l.curStart = len(l.orig)
}
} else {
l.curStart = l.curEnd + 1
if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' {
l.curStart = len(l.orig)
}
}
}
func (l *labelIter) set(s string) {
if l.slice == nil {
l.slice = strings.Split(l.orig, ".")
}
l.slice[l.i] = s
}
// acePrefix is the ASCII Compatible Encoding prefix.
const acePrefix = "xn--"
func (p *Profile) simplify(cat category) category {
switch cat {
case disallowedSTD3Mapped:
if p.useSTD3Rules {
cat = disallowed
} else {
cat = mapped
}
case disallowedSTD3Valid:
if p.useSTD3Rules {
cat = disallowed
} else {
cat = valid
}
case deviation:
if !p.transitional {
cat = valid
}
case validNV8, validXV8:
// TODO: handle V2008
cat = valid
}
return cat
}
func validateFromPunycode(p *Profile, s string) error {
if !norm.NFC.IsNormalString(s) {
return &labelError{s, "V1"}
}
for i := 0; i < len(s); {
v, sz := trie.lookupString(s[i:])
if sz == 0 {
return runeError(utf8.RuneError)
}
if c := p.simplify(info(v).category()); c != valid && c != deviation {
return &labelError{s, "V6"}
}
i += sz
}
return nil
}
const (
zwnj = "\u200c"
zwj = "\u200d"
)
type joinState int8
const (
stateStart joinState = iota
stateVirama
stateBefore
stateBeforeVirama
stateAfter
stateFAIL
)
var joinStates = [][numJoinTypes]joinState{
stateStart: {
joiningL: stateBefore,
joiningD: stateBefore,
joinZWNJ: stateFAIL,
joinZWJ: stateFAIL,
joinVirama: stateVirama,
},
stateVirama: {
joiningL: stateBefore,
joiningD: stateBefore,
},
stateBefore: {
joiningL: stateBefore,
joiningD: stateBefore,
joiningT: stateBefore,
joinZWNJ: stateAfter,
joinZWJ: stateFAIL,
joinVirama: stateBeforeVirama,
},
stateBeforeVirama: {
joiningL: stateBefore,
joiningD: stateBefore,
joiningT: stateBefore,
},
stateAfter: {
joiningL: stateFAIL,
joiningD: stateBefore,
joiningT: stateAfter,
joiningR: stateStart,
joinZWNJ: stateFAIL,
joinZWJ: stateFAIL,
joinVirama: stateAfter, // no-op as we can't accept joiners here
},
stateFAIL: {
0: stateFAIL,
joiningL: stateFAIL,
joiningD: stateFAIL,
joiningT: stateFAIL,
joiningR: stateFAIL,
joinZWNJ: stateFAIL,
joinZWJ: stateFAIL,
joinVirama: stateFAIL,
},
}
// validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are
// already implicitly satisfied by the overall implementation.
func (p *Profile) validateLabel(s string) error {
if s == "" {
if p.verifyDNSLength {
return &labelError{s, "A4"}
}
return nil
}
if p.bidirule != nil && !p.bidirule(s) {
return &labelError{s, "B"}
}
if p.checkHyphens {
if len(s) > 4 && s[2] == '-' && s[3] == '-' {
return &labelError{s, "V2"}
}
if s[0] == '-' || s[len(s)-1] == '-' {
return &labelError{s, "V3"}
}
}
if !p.checkJoiners {
return nil
}
trie := p.trie // p.checkJoiners is only set if trie is set.
// TODO: merge the use of this in the trie.
v, sz := trie.lookupString(s)
x := info(v)
if x.isModifier() {
return &labelError{s, "V5"}
}
// Quickly return in the absence of zero-width (non) joiners.
if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 {
return nil
}
st := stateStart
for i := 0; ; {
jt := x.joinType()
if s[i:i+sz] == zwj {
jt = joinZWJ
} else if s[i:i+sz] == zwnj {
jt = joinZWNJ
}
st = joinStates[st][jt]
if x.isViramaModifier() {
st = joinStates[st][joinVirama]
}
if i += sz; i == len(s) {
break
}
v, sz = trie.lookupString(s[i:])
x = info(v)
}
if st == stateFAIL || st == stateAfter {
return &labelError{s, "C"}
}
return nil
}
func ascii(s string) bool {
for i := 0; i < len(s); i++ {
if s[i] >= utf8.RuneSelf {
return false
}
}
return true
}
@@ -0,0 +1,151 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !go1.10
package idna
import "testing"
// TestLabelErrors tests strings returned in case of error. All results should
// be identical to the reference implementation and can be verified at
// https://unicode.org/cldr/utility/idna.jsp. The reference implementation,
// however, seems to not display Bidi and ContextJ errors.
//
// In some cases the behavior of browsers is added as a comment. In all cases,
// whenever a resolve search returns an error here, Chrome will treat the input
// string as a search string (including those for Bidi and Context J errors),
// unless noted otherwise.
func TestLabelErrors(t *testing.T) {
encode := func(s string) string { s, _ = encode(acePrefix, s); return s }
type kind struct {
name string
f func(string) (string, error)
}
punyA := kind{"PunycodeA", punycode.ToASCII}
resolve := kind{"ResolveA", Lookup.ToASCII}
display := kind{"ToUnicode", Display.ToUnicode}
p := New(VerifyDNSLength(true), MapForLookup(), BidiRule())
lengthU := kind{"CheckLengthU", p.ToUnicode}
lengthA := kind{"CheckLengthA", p.ToASCII}
p = New(MapForLookup(), StrictDomainName(false))
std3 := kind{"STD3", p.ToASCII}
p = New(MapForLookup(), CheckHyphens(false))
hyphens := kind{"CheckHyphens", p.ToASCII}
p = New(MapForLookup(), Transitional(true))
transitional := kind{"Transitional", p.ToASCII}
p = New(MapForLookup(), Transitional(false))
nontransitional := kind{"Nontransitional", p.ToASCII}
testCases := []struct {
kind
input string
want string
wantErr string
}{
{lengthU, "", "", "A4"}, // From UTS 46 conformance test.
{lengthA, "", "", "A4"},
{lengthU, "xn--", "", "A4"},
{lengthU, "foo.xn--", "foo.", "A4"}, // TODO: is dropping xn-- correct?
{lengthU, "xn--.foo", ".foo", "A4"},
{lengthU, "foo.xn--.bar", "foo..bar", "A4"},
{display, "xn--", "", ""},
{display, "foo.xn--", "foo.", ""}, // TODO: is dropping xn-- correct?
{display, "xn--.foo", ".foo", ""},
{display, "foo.xn--.bar", "foo..bar", ""},
{lengthA, "a..b", "a..b", "A4"},
{punyA, ".b", ".b", ""},
// For backwards compatibility, the Punycode profile does not map runes.
{punyA, "\u3002b", "xn--b-83t", ""},
{punyA, "..b", "..b", ""},
// Only strip leading empty labels for certain profiles. Stripping
// leading empty labels here but not for "empty" punycode above seems
// inconsistent, but seems to be applied by both the conformance test
// and Chrome. So we turn it off by default, support it as an option,
// and enable it in profiles where it seems commonplace.
{lengthA, ".b", "b", ""},
{lengthA, "\u3002b", "b", ""},
{lengthA, "..b", "b", ""},
{lengthA, "b..", "b..", ""},
{resolve, "a..b", "a..b", ""},
{resolve, ".b", "b", ""},
{resolve, "\u3002b", "b", ""},
{resolve, "..b", "b", ""},
{resolve, "b..", "b..", ""},
{resolve, "\xed", "", "P1"},
// Raw punycode
{punyA, "", "", ""},
{punyA, "*.foo.com", "*.foo.com", ""},
{punyA, "Foo.com", "Foo.com", ""},
// STD3 rules
{display, "*.foo.com", "*.foo.com", "P1"},
{std3, "*.foo.com", "*.foo.com", ""},
// Hyphens
{display, "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "V2"},
{hyphens, "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", ""},
{display, "-label-.com", "-label-.com", "V3"},
{hyphens, "-label-.com", "-label-.com", ""},
// Don't map U+2490 (DIGIT NINE FULL STOP). This is the behavior of
// Chrome, modern Firefox, Safari, and IE.
{resolve, "lab⒐be", "xn--labbe-zh9b", "P1"}, // encode("lab⒐be")
{display, "lab⒐be", "lab⒐be", "P1"},
{resolve, "plan⒐faß.de", "xn--planfass-c31e.de", "P1"}, // encode("plan⒐fass") + ".de"
{display, "Plan⒐faß.de", "plan⒐faß.de", "P1"},
// Transitional vs Nontransitional processing
{transitional, "Plan9faß.de", "plan9fass.de", ""},
{nontransitional, "Plan9faß.de", "xn--plan9fa-6va.de", ""},
// Chrome 54.0 recognizes the error and treats this input verbatim as a
// search string.
// Safari 10.0 (non-conform spec) decomposes "⒈" and computes the
// punycode on the result using transitional mapping.
// Firefox 49.0.1 goes haywire on this string and prints a bunch of what
// seems to be nested punycode encodings.
{resolve, "日本⒈co.ßßß.de", "xn--co-wuw5954azlb.ssssss.de", "P1"},
{display, "日本⒈co.ßßß.de", "日本⒈co.ßßß.de", "P1"},
{resolve, "a\u200Cb", "ab", ""},
{display, "a\u200Cb", "a\u200Cb", "C"},
{resolve, encode("a\u200Cb"), encode("a\u200Cb"), "C"},
{display, "a\u200Cb", "a\u200Cb", "C"},
{resolve, "grﻋﺮﺑﻲ.de", "xn--gr-gtd9a1b0g.de", "B"},
{
// Notice how the string gets transformed, even with an error.
// Chrome will use the original string if it finds an error, so not
// the transformed one.
display,
"gr\ufecb\ufeae\ufe91\ufef2.de",
"gr\u0639\u0631\u0628\u064a.de",
"B",
},
{resolve, "\u0671.\u03c3\u07dc", "xn--qib.xn--4xa21s", "B"}, // ٱ.σߜ
{display, "\u0671.\u03c3\u07dc", "\u0671.\u03c3\u07dc", "B"},
// normalize input
{resolve, "a\u0323\u0322", "xn--jta191l", ""}, // ạ̢
{display, "a\u0323\u0322", "\u1ea1\u0322", ""},
// Non-normalized strings are not normalized when they originate from
// punycode. Despite the error, Chrome, Safari and Firefox will attempt
// to look up the input punycode.
{resolve, encode("a\u0323\u0322") + ".com", "xn--a-tdbc.com", "V1"},
{display, encode("a\u0323\u0322") + ".com", "a\u0323\u0322.com", "V1"},
}
for _, tc := range testCases {
doTest(t, tc.f, tc.name, tc.input, tc.want, tc.wantErr)
}
}
@@ -0,0 +1,118 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package idna
import (
"encoding/hex"
"fmt"
"regexp"
"strconv"
"strings"
"testing"
"golang.org/x/text/internal/testtext"
)
func TestAllocToUnicode(t *testing.T) {
avg := testtext.AllocsPerRun(1000, func() {
ToUnicode("www.golang.org")
})
if avg > 0 {
t.Errorf("got %f; want 0", avg)
}
}
func TestAllocToASCII(t *testing.T) {
avg := testtext.AllocsPerRun(1000, func() {
ToASCII("www.golang.org")
})
if avg > 0 {
t.Errorf("got %f; want 0", avg)
}
}
func TestProfiles(t *testing.T) {
testCases := []struct {
name string
want, got *Profile
}{
{"Punycode", punycode, New()},
{"Registration", registration, New(ValidateForRegistration())},
{"Registration", registration, New(
ValidateForRegistration(),
VerifyDNSLength(true),
BidiRule(),
)},
{"Lookup", lookup, New(MapForLookup(), BidiRule(), Transitional(transitionalLookup))},
{"Display", display, New(MapForLookup(), BidiRule())},
}
for _, tc := range testCases {
// Functions are not comparable, but the printed version will include
// their pointers.
got := fmt.Sprintf("%#v", tc.got)
want := fmt.Sprintf("%#v", tc.want)
if got != want {
t.Errorf("%s: \ngot %#v,\nwant %#v", tc.name, got, want)
}
}
}
// doTest performs a single test f(input) and verifies that the output matches
// out and that the returned error is expected. The errors string contains
// all allowed error codes as categorized in
// https://www.unicode.org/Public/idna/9.0.0/IdnaTest.txt:
// P: Processing
// V: Validity
// A: to ASCII
// B: Bidi
// C: Context J
func doTest(t *testing.T, f func(string) (string, error), name, input, want, errors string) {
errors = strings.Trim(errors, "[]")
test := "ok"
if errors != "" {
test = "err:" + errors
}
// Replace some of the escape sequences to make it easier to single out
// tests on the command name.
in := strings.Trim(strconv.QuoteToASCII(input), `"`)
in = strings.Replace(in, `\u`, "#", -1)
in = strings.Replace(in, `\U`, "#", -1)
name = fmt.Sprintf("%s/%s/%s", name, in, test)
testtext.Run(t, name, func(t *testing.T) {
got, err := f(input)
if err != nil {
code := err.(interface {
code() string
}).code()
if strings.Index(errors, code) == -1 {
t.Errorf("error %q not in set of expected errors {%v}", code, errors)
}
} else if errors != "" {
t.Errorf("no errors; want error in {%v}", errors)
}
if want != "" && got != want {
t.Errorf(`string: got %+q; want %+q`, got, want)
}
})
}
var unescapeRE = regexp.MustCompile(`\\u([0-9a-zA-Z]{4})`)
func unescape(s string) string {
return unescapeRE.ReplaceAllStringFunc(s, func(v string) string {
var d [2]byte
hex.Decode(d[:], []byte(v[2:]))
return string(rune(d[0])<<8 | rune(d[1]))
})
}
func BenchmarkProfile(b *testing.B) {
for i := 0; i < b.N; i++ {
Lookup.ToASCII("www.yahoogle.com")
}
}
@@ -0,0 +1,9 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !go1.18
package idna
const transitionalLookup = true
@@ -0,0 +1,215 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package idna
// This file implements the Punycode algorithm from RFC 3492.
import (
"math"
"strings"
"unicode/utf8"
)
// These parameter values are specified in section 5.
//
// All computation is done with int32s, so that overflow behavior is identical
// regardless of whether int is 32-bit or 64-bit.
const (
base int32 = 36
damp int32 = 700
initialBias int32 = 72
initialN int32 = 128
skew int32 = 38
tmax int32 = 26
tmin int32 = 1
)
func punyError(s string) error { return &labelError{s, "A3"} }
// decode decodes a string as specified in section 6.2.
func decode(encoded string) (string, error) {
if encoded == "" {
return "", nil
}
pos := 1 + strings.LastIndex(encoded, "-")
if pos == 1 {
return "", punyError(encoded)
}
if pos == len(encoded) {
return encoded[:len(encoded)-1], nil
}
output := make([]rune, 0, len(encoded))
if pos != 0 {
for _, r := range encoded[:pos-1] {
output = append(output, r)
}
}
i, n, bias := int32(0), initialN, initialBias
overflow := false
for pos < len(encoded) {
oldI, w := i, int32(1)
for k := base; ; k += base {
if pos == len(encoded) {
return "", punyError(encoded)
}
digit, ok := decodeDigit(encoded[pos])
if !ok {
return "", punyError(encoded)
}
pos++
i, overflow = madd(i, digit, w)
if overflow {
return "", punyError(encoded)
}
t := k - bias
if k <= bias {
t = tmin
} else if k >= bias+tmax {
t = tmax
}
if digit < t {
break
}
w, overflow = madd(0, w, base-t)
if overflow {
return "", punyError(encoded)
}
}
if len(output) >= 1024 {
return "", punyError(encoded)
}
x := int32(len(output) + 1)
bias = adapt(i-oldI, x, oldI == 0)
n += i / x
i %= x
if n < 0 || n > utf8.MaxRune {
return "", punyError(encoded)
}
output = append(output, 0)
copy(output[i+1:], output[i:])
output[i] = n
i++
}
return string(output), nil
}
// encode encodes a string as specified in section 6.3 and prepends prefix to
// the result.
//
// The "while h < length(input)" line in the specification becomes "for
// remaining != 0" in the Go code, because len(s) in Go is in bytes, not runes.
func encode(prefix, s string) (string, error) {
output := make([]byte, len(prefix), len(prefix)+1+2*len(s))
copy(output, prefix)
delta, n, bias := int32(0), initialN, initialBias
b, remaining := int32(0), int32(0)
for _, r := range s {
if r < 0x80 {
b++
output = append(output, byte(r))
} else {
remaining++
}
}
h := b
if b > 0 {
output = append(output, '-')
}
overflow := false
for remaining != 0 {
m := int32(0x7fffffff)
for _, r := range s {
if m > r && r >= n {
m = r
}
}
delta, overflow = madd(delta, m-n, h+1)
if overflow {
return "", punyError(s)
}
n = m
for _, r := range s {
if r < n {
delta++
if delta < 0 {
return "", punyError(s)
}
continue
}
if r > n {
continue
}
q := delta
for k := base; ; k += base {
t := k - bias
if k <= bias {
t = tmin
} else if k >= bias+tmax {
t = tmax
}
if q < t {
break
}
output = append(output, encodeDigit(t+(q-t)%(base-t)))
q = (q - t) / (base - t)
}
output = append(output, encodeDigit(q))
bias = adapt(delta, h+1, h == b)
delta = 0
h++
remaining--
}
delta++
n++
}
return string(output), nil
}
// madd computes a + (b * c), detecting overflow.
func madd(a, b, c int32) (next int32, overflow bool) {
p := int64(b) * int64(c)
if p > math.MaxInt32-int64(a) {
return 0, true
}
return a + int32(p), false
}
func decodeDigit(x byte) (digit int32, ok bool) {
switch {
case '0' <= x && x <= '9':
return int32(x - ('0' - 26)), true
case 'A' <= x && x <= 'Z':
return int32(x - 'A'), true
case 'a' <= x && x <= 'z':
return int32(x - 'a'), true
}
return 0, false
}
func encodeDigit(digit int32) byte {
switch {
case 0 <= digit && digit < 26:
return byte(digit + 'a')
case 26 <= digit && digit < 36:
return byte(digit + ('0' - 26))
}
panic("idna: internal error in punycode encoding")
}
// adapt is the bias adaptation function specified in section 6.1.
func adapt(delta, numPoints int32, firstTime bool) int32 {
if firstTime {
delta /= damp
} else {
delta /= 2
}
delta += delta / numPoints
k := int32(0)
for delta > ((base-tmin)*tmax)/2 {
delta /= base - tmin
k += base
}
return k + (base-tmin+1)*delta/(delta+skew)
}
@@ -0,0 +1,199 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package idna
import (
"strings"
"testing"
)
var punycodeTestCases = [...]struct {
s, encoded string
}{
{"", ""},
{"-", "--"},
{"-a", "-a-"},
{"-a-", "-a--"},
{"a", "a-"},
{"a-", "a--"},
{"a-b", "a-b-"},
{"books", "books-"},
{"bücher", "bcher-kva"},
{"Hello世界", "Hello-ck1hg65u"},
{"ü", "tda"},
{"üý", "tdac"},
// The test cases below come from RFC 3492 section 7.1 with Errata 3026.
{
// (A) Arabic (Egyptian).
"\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644" +
"\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F",
"egbpdaj6bu4bxfgehfvwxn",
},
{
// (B) Chinese (simplified).
"\u4ED6\u4EEC\u4E3A\u4EC0\u4E48\u4E0D\u8BF4\u4E2D\u6587",
"ihqwcrb4cv8a8dqg056pqjye",
},
{
// (C) Chinese (traditional).
"\u4ED6\u5011\u7232\u4EC0\u9EBD\u4E0D\u8AAA\u4E2D\u6587",
"ihqwctvzc91f659drss3x8bo0yb",
},
{
// (D) Czech.
"\u0050\u0072\u006F\u010D\u0070\u0072\u006F\u0073\u0074" +
"\u011B\u006E\u0065\u006D\u006C\u0075\u0076\u00ED\u010D" +
"\u0065\u0073\u006B\u0079",
"Proprostnemluvesky-uyb24dma41a",
},
{
// (E) Hebrew.
"\u05DC\u05DE\u05D4\u05D4\u05DD\u05E4\u05E9\u05D5\u05D8" +
"\u05DC\u05D0\u05DE\u05D3\u05D1\u05E8\u05D9\u05DD\u05E2" +
"\u05D1\u05E8\u05D9\u05EA",
"4dbcagdahymbxekheh6e0a7fei0b",
},
{
// (F) Hindi (Devanagari).
"\u092F\u0939\u0932\u094B\u0917\u0939\u093F\u0928\u094D" +
"\u0926\u0940\u0915\u094D\u092F\u094B\u0902\u0928\u0939" +
"\u0940\u0902\u092C\u094B\u0932\u0938\u0915\u0924\u0947" +
"\u0939\u0948\u0902",
"i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd",
},
{
// (G) Japanese (kanji and hiragana).
"\u306A\u305C\u307F\u3093\u306A\u65E5\u672C\u8A9E\u3092" +
"\u8A71\u3057\u3066\u304F\u308C\u306A\u3044\u306E\u304B",
"n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa",
},
{
// (H) Korean (Hangul syllables).
"\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774" +
"\uD55C\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74" +
"\uC5BC\uB9C8\uB098\uC88B\uC744\uAE4C",
"989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5j" +
"psd879ccm6fea98c",
},
{
// (I) Russian (Cyrillic).
"\u043F\u043E\u0447\u0435\u043C\u0443\u0436\u0435\u043E" +
"\u043D\u0438\u043D\u0435\u0433\u043E\u0432\u043E\u0440" +
"\u044F\u0442\u043F\u043E\u0440\u0443\u0441\u0441\u043A" +
"\u0438",
"b1abfaaepdrnnbgefbadotcwatmq2g4l",
},
{
// (J) Spanish.
"\u0050\u006F\u0072\u0071\u0075\u00E9\u006E\u006F\u0070" +
"\u0075\u0065\u0064\u0065\u006E\u0073\u0069\u006D\u0070" +
"\u006C\u0065\u006D\u0065\u006E\u0074\u0065\u0068\u0061" +
"\u0062\u006C\u0061\u0072\u0065\u006E\u0045\u0073\u0070" +
"\u0061\u00F1\u006F\u006C",
"PorqunopuedensimplementehablarenEspaol-fmd56a",
},
{
// (K) Vietnamese.
"\u0054\u1EA1\u0069\u0073\u0061\u006F\u0068\u1ECD\u006B" +
"\u0068\u00F4\u006E\u0067\u0074\u0068\u1EC3\u0063\u0068" +
"\u1EC9\u006E\u00F3\u0069\u0074\u0069\u1EBF\u006E\u0067" +
"\u0056\u0069\u1EC7\u0074",
"TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g",
},
{
// (L) 3<nen>B<gumi><kinpachi><sensei>.
"\u0033\u5E74\u0042\u7D44\u91D1\u516B\u5148\u751F",
"3B-ww4c5e180e575a65lsy2b",
},
{
// (M) <amuro><namie>-with-SUPER-MONKEYS.
"\u5B89\u5BA4\u5948\u7F8E\u6075\u002D\u0077\u0069\u0074" +
"\u0068\u002D\u0053\u0055\u0050\u0045\u0052\u002D\u004D" +
"\u004F\u004E\u004B\u0045\u0059\u0053",
"-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n",
},
{
// (N) Hello-Another-Way-<sorezore><no><basho>.
"\u0048\u0065\u006C\u006C\u006F\u002D\u0041\u006E\u006F" +
"\u0074\u0068\u0065\u0072\u002D\u0057\u0061\u0079\u002D" +
"\u305D\u308C\u305E\u308C\u306E\u5834\u6240",
"Hello-Another-Way--fc4qua05auwb3674vfr0b",
},
{
// (O) <hitotsu><yane><no><shita>2.
"\u3072\u3068\u3064\u5C4B\u6839\u306E\u4E0B\u0032",
"2-u9tlzr9756bt3uc0v",
},
{
// (P) Maji<de>Koi<suru>5<byou><mae>
"\u004D\u0061\u006A\u0069\u3067\u004B\u006F\u0069\u3059" +
"\u308B\u0035\u79D2\u524D",
"MajiKoi5-783gue6qz075azm5e",
},
{
// (Q) <pafii>de<runba>
"\u30D1\u30D5\u30A3\u30FC\u0064\u0065\u30EB\u30F3\u30D0",
"de-jg4avhby1noc0d",
},
{
// (R) <sono><supiido><de>
"\u305D\u306E\u30B9\u30D4\u30FC\u30C9\u3067",
"d9juau41awczczp",
},
{
// (S) -> $1.00 <-
"\u002D\u003E\u0020\u0024\u0031\u002E\u0030\u0030\u0020" +
"\u003C\u002D",
"-> $1.00 <--",
},
}
func TestPunycode(t *testing.T) {
for _, tc := range punycodeTestCases {
if got, err := decode(tc.encoded); err != nil {
t.Errorf("decode(%q): %v", tc.encoded, err)
} else if got != tc.s {
t.Errorf("decode(%q): got %q, want %q", tc.encoded, got, tc.s)
}
if got, err := encode("", tc.s); err != nil {
t.Errorf(`encode("", %q): %v`, tc.s, err)
} else if got != tc.encoded {
t.Errorf(`encode("", %q): got %q, want %q`, tc.s, got, tc.encoded)
}
}
}
var punycodeErrorTestCases = [...]string{
"decode -", // A sole '-' is invalid.
"decode foo\x00bar", // '\x00' is not in [0-9A-Za-z].
"decode foo#bar", // '#' is not in [0-9A-Za-z].
"decode foo\u00A3bar", // '\u00A3' is not in [0-9A-Za-z].
"decode 9", // "9a" decodes to codepoint \u00A3; "9" is truncated.
"decode 99999a", // "99999a" decodes to codepoint \U0048A3C1, which is > \U0010FFFF.
"decode 9999999999a", // "9999999999a" overflows the int32 calculation.
"encode " + strings.Repeat("x", 65536) + "\uff00", // int32 overflow.
"encode " + strings.Repeat("x", 65666) + "\uffff", // int32 overflow. issue #28233
}
func TestPunycodeErrors(t *testing.T) {
for _, tc := range punycodeErrorTestCases {
var err error
switch {
case strings.HasPrefix(tc, "decode "):
_, err = decode(tc[7:])
case strings.HasPrefix(tc, "encode "):
_, err = encode("", tc[7:])
}
if err == nil {
if len(tc) > 256 {
tc = tc[:100] + "..." + tc[len(tc)-100:]
}
t.Errorf("no error for %s", tc)
}
}
}
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,49 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package idna
// Sparse block handling code.
type valueRange struct {
value uint16 // header: value:stride
lo, hi byte // header: lo:n
}
type sparseBlocks struct {
values []valueRange
offset []uint16
}
var idnaSparse = sparseBlocks{
values: idnaSparseValues[:],
offset: idnaSparseOffset[:],
}
// Don't use newIdnaTrie to avoid unconditional linking in of the table.
var trie = &idnaTrie{}
// lookup determines the type of block n and looks up the value for b.
// For n < t.cutoff, the block is a simple lookup table. Otherwise, the block
// is a list of ranges with an accompanying value. Given a matching range r,
// the value for b is by r.value + (b - r.lo) * stride.
func (t *sparseBlocks) lookup(n uint32, b byte) uint16 {
offset := t.offset[n]
header := t.values[offset]
lo := offset + 1
hi := lo + uint16(header.lo)
for lo < hi {
m := lo + (hi-lo)/2
r := t.values[m]
if r.lo <= b && b <= r.hi {
return r.value + uint16(b-r.lo)*header.value
}
if b < r.lo {
hi = m
} else {
lo = m + 1
}
}
return 0
}
@@ -0,0 +1,28 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !go1.16
package idna
// appendMapping appends the mapping for the respective rune. isMapped must be
// true. A mapping is a categorization of a rune as defined in UTS #46.
func (c info) appendMapping(b []byte, s string) []byte {
index := int(c >> indexShift)
if c&xorBit == 0 {
s := mappings[index:]
return append(b, s[1:s[0]+1]...)
}
b = append(b, s...)
if c&inlineXOR == inlineXOR {
// TODO: support and handle two-byte inline masks
b[len(b)-1] ^= byte(index)
} else {
for p := len(b) - int(xorData[index]); p < len(b); p++ {
index++
b[p] ^= xorData[index]
}
}
return b
}
@@ -0,0 +1,28 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build go1.16
package idna
// appendMapping appends the mapping for the respective rune. isMapped must be
// true. A mapping is a categorization of a rune as defined in UTS #46.
func (c info) appendMapping(b []byte, s string) []byte {
index := int(c >> indexShift)
if c&xorBit == 0 {
p := index
return append(b, mappings[mappingIndex[p]:mappingIndex[p+1]]...)
}
b = append(b, s...)
if c&inlineXOR == inlineXOR {
// TODO: support and handle two-byte inline masks
b[len(b)-1] ^= byte(index)
} else {
for p := len(b) - int(xorData[index]); p < len(b); p++ {
index++
b[p] ^= xorData[index]
}
}
return b
}
@@ -0,0 +1,119 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package idna
// This file contains definitions for interpreting the trie value of the idna
// trie generated by "go run gen*.go". It is shared by both the generator
// program and the resultant package. Sharing is achieved by the generator
// copying gen_trieval.go to trieval.go and changing what's above this comment.
// info holds information from the IDNA mapping table for a single rune. It is
// the value returned by a trie lookup. In most cases, all information fits in
// a 16-bit value. For mappings, this value may contain an index into a slice
// with the mapped string. Such mappings can consist of the actual mapped value
// or an XOR pattern to be applied to the bytes of the UTF8 encoding of the
// input rune. This technique is used by the cases packages and reduces the
// table size significantly.
//
// The per-rune values have the following format:
//
// if mapped {
// if inlinedXOR {
// 15..13 inline XOR marker
// 12..11 unused
// 10..3 inline XOR mask
// } else {
// 15..3 index into xor or mapping table
// }
// } else {
// 15..14 unused
// 13 mayNeedNorm
// 12..11 attributes
// 10..8 joining type
// 7..3 category type
// }
// 2 use xor pattern
// 1..0 mapped category
//
// See the definitions below for a more detailed description of the various
// bits.
type info uint16
const (
catSmallMask = 0x3
catBigMask = 0xF8
indexShift = 3
xorBit = 0x4 // interpret the index as an xor pattern
inlineXOR = 0xE000 // These bits are set if the XOR pattern is inlined.
joinShift = 8
joinMask = 0x07
// Attributes
attributesMask = 0x1800
viramaModifier = 0x1800
modifier = 0x1000
rtl = 0x0800
mayNeedNorm = 0x2000
)
// A category corresponds to a category defined in the IDNA mapping table.
type category uint16
const (
unknown category = 0 // not currently defined in unicode.
mapped category = 1
disallowedSTD3Mapped category = 2
deviation category = 3
)
const (
valid category = 0x08
validNV8 category = 0x18
validXV8 category = 0x28
disallowed category = 0x40
disallowedSTD3Valid category = 0x80
ignored category = 0xC0
)
// join types and additional rune information
const (
joiningL = (iota + 1)
joiningD
joiningT
joiningR
//the following types are derived during processing
joinZWJ
joinZWNJ
joinVirama
numJoinTypes
)
func (c info) isMapped() bool {
return c&0x3 != 0
}
func (c info) category() category {
small := c & catSmallMask
if small != 0 {
return category(small)
}
return category(c & catBigMask)
}
func (c info) joinType() info {
if c.isMapped() {
return 0
}
return (c >> joinShift) & joinMask
}
func (c info) isModifier() bool {
return c&(modifier|catSmallMask) == modifier
}
func (c info) isViramaModifier() bool {
return c&(attributesMask|catSmallMask) == viramaModifier
}
@@ -0,0 +1,13 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package unicode generates the Unicode tables in core.
package unicode
// This package is defined here, instead of core, as Go does not allow any
// standard packages to have non-standard imports, even if imported in files
// with a build ignore tag.
//go:generate go run gen.go -tables=all
//go:generate mv tables.go $GOROOT/src/unicode
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,28 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package unicode
import (
"testing"
"unicode"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/testtext"
"golang.org/x/text/internal/ucd"
)
// TestScripts tests for all runes whether they are included in the correct
// script and, indirectly, whether each script exists.
func TestScripts(t *testing.T) {
testtext.SkipIfNotLong(t)
ucd.Parse(gen.OpenUCDFile("Scripts.txt"), func(p *ucd.Parser) {
r := p.Rune(0)
script := p.String(1)
if !unicode.Is(unicode.Scripts[script], r) {
t.Errorf("%U: not in script %q", r, script)
}
})
}
@@ -0,0 +1,41 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package format contains types for defining language-specific formatting of
// values.
//
// This package is internal now, but will eventually be exposed after the API
// settles.
package format // import "golang.org/x/text/internal/format"
import (
"fmt"
"golang.org/x/text/language"
)
// State represents the printer state passed to custom formatters. It provides
// access to the fmt.State interface and the sentence and language-related
// context.
type State interface {
fmt.State
// Language reports the requested language in which to render a message.
Language() language.Tag
// TODO: consider this and removing rune from the Format method in the
// Formatter interface.
//
// Verb returns the format variant to render, analogous to the types used
// in fmt. Use 'v' for the default or only variant.
// Verb() rune
// TODO: more info:
// - sentence context such as linguistic features passed by the translator.
}
// Formatter is analogous to fmt.Formatter.
type Formatter interface {
Format(state State, verb rune)
}
@@ -0,0 +1,358 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package format
import (
"reflect"
"unicode/utf8"
)
// A Parser parses a format string. The result from the parse are set in the
// struct fields.
type Parser struct {
Verb rune
WidthPresent bool
PrecPresent bool
Minus bool
Plus bool
Sharp bool
Space bool
Zero bool
// For the formats %+v %#v, we set the plusV/sharpV flags
// and clear the plus/sharp flags since %+v and %#v are in effect
// different, flagless formats set at the top level.
PlusV bool
SharpV bool
HasIndex bool
Width int
Prec int // precision
// retain arguments across calls.
Args []interface{}
// retain current argument number across calls
ArgNum int
// reordered records whether the format string used argument reordering.
Reordered bool
// goodArgNum records whether the most recent reordering directive was valid.
goodArgNum bool
// position info
format string
startPos int
endPos int
Status Status
}
// Reset initializes a parser to scan format strings for the given args.
func (p *Parser) Reset(args []interface{}) {
p.Args = args
p.ArgNum = 0
p.startPos = 0
p.Reordered = false
}
// Text returns the part of the format string that was parsed by the last call
// to Scan. It returns the original substitution clause if the current scan
// parsed a substitution.
func (p *Parser) Text() string { return p.format[p.startPos:p.endPos] }
// SetFormat sets a new format string to parse. It does not reset the argument
// count.
func (p *Parser) SetFormat(format string) {
p.format = format
p.startPos = 0
p.endPos = 0
}
// Status indicates the result type of a call to Scan.
type Status int
const (
StatusText Status = iota
StatusSubstitution
StatusBadWidthSubstitution
StatusBadPrecSubstitution
StatusNoVerb
StatusBadArgNum
StatusMissingArg
)
// ClearFlags reset the parser to default behavior.
func (p *Parser) ClearFlags() {
p.WidthPresent = false
p.PrecPresent = false
p.Minus = false
p.Plus = false
p.Sharp = false
p.Space = false
p.Zero = false
p.PlusV = false
p.SharpV = false
p.HasIndex = false
}
// Scan scans the next part of the format string and sets the status to
// indicate whether it scanned a string literal, substitution or error.
func (p *Parser) Scan() bool {
p.Status = StatusText
format := p.format
end := len(format)
if p.endPos >= end {
return false
}
afterIndex := false // previous item in format was an index like [3].
p.startPos = p.endPos
p.goodArgNum = true
i := p.startPos
for i < end && format[i] != '%' {
i++
}
if i > p.startPos {
p.endPos = i
return true
}
// Process one verb
i++
p.Status = StatusSubstitution
// Do we have flags?
p.ClearFlags()
simpleFormat:
for ; i < end; i++ {
c := p.format[i]
switch c {
case '#':
p.Sharp = true
case '0':
p.Zero = !p.Minus // Only allow zero padding to the left.
case '+':
p.Plus = true
case '-':
p.Minus = true
p.Zero = false // Do not pad with zeros to the right.
case ' ':
p.Space = true
default:
// Fast path for common case of ascii lower case simple verbs
// without precision or width or argument indices.
if 'a' <= c && c <= 'z' && p.ArgNum < len(p.Args) {
if c == 'v' {
// Go syntax
p.SharpV = p.Sharp
p.Sharp = false
// Struct-field syntax
p.PlusV = p.Plus
p.Plus = false
}
p.Verb = rune(c)
p.ArgNum++
p.endPos = i + 1
return true
}
// Format is more complex than simple flags and a verb or is malformed.
break simpleFormat
}
}
// Do we have an explicit argument index?
i, afterIndex = p.updateArgNumber(format, i)
// Do we have width?
if i < end && format[i] == '*' {
i++
p.Width, p.WidthPresent = p.intFromArg()
if !p.WidthPresent {
p.Status = StatusBadWidthSubstitution
}
// We have a negative width, so take its value and ensure
// that the minus flag is set
if p.Width < 0 {
p.Width = -p.Width
p.Minus = true
p.Zero = false // Do not pad with zeros to the right.
}
afterIndex = false
} else {
p.Width, p.WidthPresent, i = parsenum(format, i, end)
if afterIndex && p.WidthPresent { // "%[3]2d"
p.goodArgNum = false
}
}
// Do we have precision?
if i+1 < end && format[i] == '.' {
i++
if afterIndex { // "%[3].2d"
p.goodArgNum = false
}
i, afterIndex = p.updateArgNumber(format, i)
if i < end && format[i] == '*' {
i++
p.Prec, p.PrecPresent = p.intFromArg()
// Negative precision arguments don't make sense
if p.Prec < 0 {
p.Prec = 0
p.PrecPresent = false
}
if !p.PrecPresent {
p.Status = StatusBadPrecSubstitution
}
afterIndex = false
} else {
p.Prec, p.PrecPresent, i = parsenum(format, i, end)
if !p.PrecPresent {
p.Prec = 0
p.PrecPresent = true
}
}
}
if !afterIndex {
i, afterIndex = p.updateArgNumber(format, i)
}
p.HasIndex = afterIndex
if i >= end {
p.endPos = i
p.Status = StatusNoVerb
return true
}
verb, w := utf8.DecodeRuneInString(format[i:])
p.endPos = i + w
p.Verb = verb
switch {
case verb == '%': // Percent does not absorb operands and ignores f.wid and f.prec.
p.startPos = p.endPos - 1
p.Status = StatusText
case !p.goodArgNum:
p.Status = StatusBadArgNum
case p.ArgNum >= len(p.Args): // No argument left over to print for the current verb.
p.Status = StatusMissingArg
p.ArgNum++
case verb == 'v':
// Go syntax
p.SharpV = p.Sharp
p.Sharp = false
// Struct-field syntax
p.PlusV = p.Plus
p.Plus = false
fallthrough
default:
p.ArgNum++
}
return true
}
// intFromArg gets the ArgNumth element of Args. On return, isInt reports
// whether the argument has integer type.
func (p *Parser) intFromArg() (num int, isInt bool) {
if p.ArgNum < len(p.Args) {
arg := p.Args[p.ArgNum]
num, isInt = arg.(int) // Almost always OK.
if !isInt {
// Work harder.
switch v := reflect.ValueOf(arg); v.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
n := v.Int()
if int64(int(n)) == n {
num = int(n)
isInt = true
}
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
n := v.Uint()
if int64(n) >= 0 && uint64(int(n)) == n {
num = int(n)
isInt = true
}
default:
// Already 0, false.
}
}
p.ArgNum++
if tooLarge(num) {
num = 0
isInt = false
}
}
return
}
// parseArgNumber returns the value of the bracketed number, minus 1
// (explicit argument numbers are one-indexed but we want zero-indexed).
// The opening bracket is known to be present at format[0].
// The returned values are the index, the number of bytes to consume
// up to the closing paren, if present, and whether the number parsed
// ok. The bytes to consume will be 1 if no closing paren is present.
func parseArgNumber(format string) (index int, wid int, ok bool) {
// There must be at least 3 bytes: [n].
if len(format) < 3 {
return 0, 1, false
}
// Find closing bracket.
for i := 1; i < len(format); i++ {
if format[i] == ']' {
width, ok, newi := parsenum(format, 1, i)
if !ok || newi != i {
return 0, i + 1, false
}
return width - 1, i + 1, true // arg numbers are one-indexed and skip paren.
}
}
return 0, 1, false
}
// updateArgNumber returns the next argument to evaluate, which is either the value of the passed-in
// argNum or the value of the bracketed integer that begins format[i:]. It also returns
// the new value of i, that is, the index of the next byte of the format to process.
func (p *Parser) updateArgNumber(format string, i int) (newi int, found bool) {
if len(format) <= i || format[i] != '[' {
return i, false
}
p.Reordered = true
index, wid, ok := parseArgNumber(format[i:])
if ok && 0 <= index && index < len(p.Args) {
p.ArgNum = index
return i + wid, true
}
p.goodArgNum = false
return i + wid, ok
}
// tooLarge reports whether the magnitude of the integer is
// too large to be used as a formatting width or precision.
func tooLarge(x int) bool {
const max int = 1e6
return x > max || x < -max
}
// parsenum converts ASCII to integer. num is 0 (and isnum is false) if no number present.
func parsenum(s string, start, end int) (num int, isnum bool, newi int) {
if start >= end {
return 0, false, end
}
for newi = start; newi < end && '0' <= s[newi] && s[newi] <= '9'; newi++ {
if tooLarge(num) {
return 0, false, end // Overflow; crazy long number most likely.
}
num = num*10 + int(s[newi]-'0')
isnum = true
}
return
}
@@ -0,0 +1,32 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package format
import "testing"
// TODO: most of Parser is tested in x/message. Move some tests here.
func TestParsenum(t *testing.T) {
testCases := []struct {
s string
start, end int
num int
isnum bool
newi int
}{
{"a123", 0, 4, 0, false, 0},
{"1234", 1, 1, 0, false, 1},
{"123a", 0, 4, 123, true, 3},
{"12a3", 0, 4, 12, true, 2},
{"1234", 0, 4, 1234, true, 4},
{"1a234", 1, 3, 0, false, 1},
}
for _, tt := range testCases {
num, isnum, newi := parsenum(tt.s, tt.start, tt.end)
if num != tt.num || isnum != tt.isnum || newi != tt.newi {
t.Errorf("parsenum(%q, %d, %d) = %d, %v, %d, want %d, %v, %d", tt.s, tt.start, tt.end, num, isnum, newi, tt.num, tt.isnum, tt.newi)
}
}
}
@@ -0,0 +1,226 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package bitfield converts annotated structs into integer values.
//
// Any field that is marked with a bitfield tag is compacted. The tag value has
// two parts. The part before the comma determines the method name for a
// generated type. If left blank the name of the field is used.
// The part after the comma determines the number of bits to use for the
// representation.
package bitfield
import (
"bytes"
"fmt"
"io"
"reflect"
"strconv"
"strings"
)
// Config determines settings for packing and generation. If a Config is used,
// the same Config should be used for packing and generation.
type Config struct {
// NumBits fixes the maximum allowed bits for the integer representation.
// If NumBits is not 8, 16, 32, or 64, the actual underlying integer size
// will be the next largest available.
NumBits uint
// If Package is set, code generation will write a package clause.
Package string
// TypeName is the name for the generated type. By default it is the name
// of the type of the value passed to Gen.
TypeName string
}
var nullConfig = &Config{}
// Pack packs annotated bit ranges of struct x in an integer.
//
// Only fields that have a "bitfield" tag are compacted.
func Pack(x interface{}, c *Config) (packed uint64, err error) {
packed, _, err = pack(x, c)
return
}
func pack(x interface{}, c *Config) (packed uint64, nBit uint, err error) {
if c == nil {
c = nullConfig
}
nBits := c.NumBits
v := reflect.ValueOf(x)
v = reflect.Indirect(v)
t := v.Type()
pos := 64 - nBits
if nBits == 0 {
pos = 0
}
for i := 0; i < v.NumField(); i++ {
v := v.Field(i)
field := t.Field(i)
f, err := parseField(field)
if err != nil {
return 0, 0, err
}
if f.nBits == 0 {
continue
}
value := uint64(0)
switch v.Kind() {
case reflect.Bool:
if v.Bool() {
value = 1
}
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
value = v.Uint()
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
x := v.Int()
if x < 0 {
return 0, 0, fmt.Errorf("bitfield: negative value for field %q not allowed", field.Name)
}
value = uint64(x)
}
if value > (1<<f.nBits)-1 {
return 0, 0, fmt.Errorf("bitfield: value %#x of field %q does not fit in %d bits", value, field.Name, f.nBits)
}
shift := 64 - pos - f.nBits
if pos += f.nBits; pos > 64 {
return 0, 0, fmt.Errorf("bitfield: no more bits left for field %q", field.Name)
}
packed |= value << shift
}
if nBits == 0 {
nBits = posToBits(pos)
packed >>= (64 - nBits)
}
return packed, nBits, nil
}
type field struct {
name string
value uint64
nBits uint
}
// parseField parses a tag of the form [<name>][:<nBits>][,<pos>[..<end>]]
func parseField(field reflect.StructField) (f field, err error) {
s, ok := field.Tag.Lookup("bitfield")
if !ok {
return f, nil
}
switch field.Type.Kind() {
case reflect.Bool:
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
default:
return f, fmt.Errorf("bitfield: field %q is not an integer or bool type", field.Name)
}
bits := s
f.name = ""
if i := strings.IndexByte(s, ','); i >= 0 {
bits = s[:i]
f.name = s[i+1:]
}
if bits != "" {
nBits, err := strconv.ParseUint(bits, 10, 8)
if err != nil {
return f, fmt.Errorf("bitfield: invalid bit size for field %q: %v", field.Name, err)
}
f.nBits = uint(nBits)
}
if f.nBits == 0 {
if field.Type.Kind() == reflect.Bool {
f.nBits = 1
} else {
f.nBits = uint(field.Type.Bits())
}
}
if f.name == "" {
f.name = field.Name
}
return f, err
}
func posToBits(pos uint) (bits uint) {
switch {
case pos <= 8:
bits = 8
case pos <= 16:
bits = 16
case pos <= 32:
bits = 32
case pos <= 64:
bits = 64
default:
panic("unreachable")
}
return bits
}
// Gen generates code for unpacking integers created with Pack.
func Gen(w io.Writer, x interface{}, c *Config) error {
if c == nil {
c = nullConfig
}
_, nBits, err := pack(x, c)
if err != nil {
return err
}
t := reflect.TypeOf(x)
if t.Kind() == reflect.Ptr {
t = t.Elem()
}
if c.TypeName == "" {
c.TypeName = t.Name()
}
firstChar := []rune(c.TypeName)[0]
buf := &bytes.Buffer{}
print := func(w io.Writer, format string, args ...interface{}) {
if _, e := fmt.Fprintf(w, format+"\n", args...); e != nil && err == nil {
err = fmt.Errorf("bitfield: write failed: %v", err)
}
}
pos := uint(0)
for i := 0; i < t.NumField(); i++ {
field := t.Field(i)
f, _ := parseField(field)
if f.nBits == 0 {
continue
}
shift := nBits - pos - f.nBits
pos += f.nBits
retType := field.Type.Name()
print(buf, "\nfunc (%c %s) %s() %s {", firstChar, c.TypeName, f.name, retType)
if field.Type.Kind() == reflect.Bool {
print(buf, "\tconst bit = 1 << %d", shift)
print(buf, "\treturn %c&bit == bit", firstChar)
} else {
print(buf, "\treturn %s((%c >> %d) & %#x)", retType, firstChar, shift, (1<<f.nBits)-1)
}
print(buf, "}")
}
if c.Package != "" {
print(w, "// Code generated by golang.org/x/text/internal/gen/bitfield. DO NOT EDIT.\n")
print(w, "package %s\n", c.Package)
}
bits := posToBits(pos)
print(w, "type %s uint%d", c.TypeName, bits)
if _, err := io.Copy(w, buf); err != nil {
return fmt.Errorf("bitfield: write failed: %v", err)
}
return nil
}
@@ -0,0 +1,230 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bitfield
import (
"bytes"
"fmt"
"os"
"testing"
)
type myUint8 uint8
type test1 struct { // 28 bits
foo uint16 `bitfield:",fob"`
Bar int8 `bitfield:"5,baz"`
Foo uint64
bar myUint8 `bitfield:"3"`
Bool bool `bitfield:""`
Baz int8 `bitfield:"3"`
}
type test2 struct {
larger1 uint16 `bitfield:"32"`
larger2 uint16 `bitfield:"32"`
}
type tooManyBits struct {
u1 uint16 `bitfield:"12"`
u2 uint16 `bitfield:"12"`
u3 uint16 `bitfield:"12"`
u4 uint16 `bitfield:"12"`
u5 uint16 `bitfield:"12"`
u6 uint16 `bitfield:"12"`
}
type just64 struct {
foo uint64 `bitfield:""`
}
type toUint8 struct {
foo bool `bitfield:""`
}
type toUint16 struct {
foo int `bitfield:"9"`
}
type faultySize struct {
foo uint64 `bitfield:"a"`
}
type faultyType struct {
foo *int `bitfield:"5"`
}
var (
maxed = test1{
foo: 0xffff,
Bar: 0x1f,
Foo: 0xffff,
bar: 0x7,
Bool: true,
Baz: 0x7,
}
alternate1 = test1{
foo: 0xffff,
bar: 0x7,
Baz: 0x7,
}
alternate2 = test1{
Bar: 0x1f,
Bool: true,
}
overflow = test1{
Bar: 0x3f,
}
negative = test1{
Bar: -1,
}
)
func TestPack(t *testing.T) {
testCases := []struct {
desc string
x interface{}
nBits uint
out uint64
ok bool
}{
{"maxed out fields", maxed, 0, 0xfffffff0, true},
{"maxed using less bits", maxed, 28, 0x0fffffff, true},
{"alternate1", alternate1, 0, 0xffff0770, true},
{"alternate2", alternate2, 0, 0x0000f880, true},
{"just64", &just64{0x0f0f0f0f}, 00, 0xf0f0f0f, true},
{"just64", &just64{0x0f0f0f0f}, 64, 0xf0f0f0f, true},
{"just64", &just64{0xffffFFFF}, 64, 0xffffffff, true},
{"to uint8", &toUint8{true}, 0, 0x80, true},
{"to uint16", &toUint16{1}, 0, 0x0080, true},
// errors
{"overflow", overflow, 0, 0, false},
{"too many bits", &tooManyBits{}, 0, 0, false},
{"fault size", &faultySize{}, 0, 0, false},
{"fault type", &faultyType{}, 0, 0, false},
{"negative", negative, 0, 0, false},
{"not enough bits", maxed, 27, 0, false},
}
for _, tc := range testCases {
t.Run(fmt.Sprintf("%T/%s", tc.x, tc.desc), func(t *testing.T) {
v, err := Pack(tc.x, &Config{NumBits: tc.nBits})
if ok := err == nil; v != tc.out || ok != tc.ok {
t.Errorf("got %#x, %v; want %#x, %v (%v)", v, ok, tc.out, tc.ok, err)
}
})
}
}
func TestRoundtrip(t *testing.T) {
testCases := []struct {
x test1
}{
{maxed},
{alternate1},
{alternate2},
}
for _, tc := range testCases {
t.Run("", func(t *testing.T) {
v, err := Pack(tc.x, nil)
if err != nil {
t.Fatal(err)
}
want := tc.x
want.Foo = 0 // not stored
x := myInt(v)
got := test1{
foo: x.fob(),
Bar: x.baz(),
bar: x.bar(),
Bool: x.Bool(),
Baz: x.Baz(),
}
if got != want {
t.Errorf("\ngot %#v\nwant %#v (%#x)", got, want, v)
}
})
}
}
func TestGen(t *testing.T) {
testCases := []struct {
desc string
x interface{}
config *Config
ok bool
out string
}{{
desc: "test1",
x: &test1{},
ok: true,
out: test1Gen,
}, {
desc: "test1 with options",
x: &test1{},
config: &Config{Package: "bitfield", TypeName: "myInt"},
ok: true,
out: mustRead("gen1_test.go"),
}, {
desc: "test1 with alternative bits",
x: &test1{},
config: &Config{NumBits: 28, Package: "bitfield", TypeName: "myInt2"},
ok: true,
out: mustRead("gen2_test.go"),
}, {
desc: "failure",
x: &test1{},
config: &Config{NumBits: 27}, // Too few bits.
ok: false,
out: "",
}}
for _, tc := range testCases {
t.Run(tc.desc, func(t *testing.T) {
w := &bytes.Buffer{}
err := Gen(w, tc.x, tc.config)
if ok := err == nil; ok != tc.ok {
t.Fatalf("got %v; want %v (%v)", ok, tc.ok, err)
}
got := w.String()
if got != tc.out {
t.Errorf("got:\n%s\nwant:\n%s", got, tc.out)
}
})
}
}
const test1Gen = `type test1 uint32
func (t test1) fob() uint16 {
return uint16((t >> 16) & 0xffff)
}
func (t test1) baz() int8 {
return int8((t >> 11) & 0x1f)
}
func (t test1) bar() myUint8 {
return myUint8((t >> 8) & 0x7)
}
func (t test1) Bool() bool {
const bit = 1 << 7
return t&bit == bit
}
func (t test1) Baz() int8 {
return int8((t >> 4) & 0x7)
}
`
func mustRead(filename string) string {
b, err := os.ReadFile(filename)
if err != nil {
panic(err)
}
return string(b)
}
@@ -0,0 +1,26 @@
// Code generated by golang.org/x/text/internal/gen/bitfield. DO NOT EDIT.
package bitfield
type myInt uint32
func (m myInt) fob() uint16 {
return uint16((m >> 16) & 0xffff)
}
func (m myInt) baz() int8 {
return int8((m >> 11) & 0x1f)
}
func (m myInt) bar() myUint8 {
return myUint8((m >> 8) & 0x7)
}
func (m myInt) Bool() bool {
const bit = 1 << 7
return m&bit == bit
}
func (m myInt) Baz() int8 {
return int8((m >> 4) & 0x7)
}
@@ -0,0 +1,26 @@
// Code generated by golang.org/x/text/internal/gen/bitfield. DO NOT EDIT.
package bitfield
type myInt2 uint32
func (m myInt2) fob() uint16 {
return uint16((m >> 12) & 0xffff)
}
func (m myInt2) baz() int8 {
return int8((m >> 7) & 0x1f)
}
func (m myInt2) bar() myUint8 {
return myUint8((m >> 4) & 0x7)
}
func (m myInt2) Bool() bool {
const bit = 1 << 3
return m&bit == bit
}
func (m myInt2) Baz() int8 {
return int8((m >> 0) & 0x7)
}
@@ -0,0 +1,375 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gen
import (
"bytes"
"encoding/gob"
"fmt"
"hash"
"hash/fnv"
"io"
"log"
"os"
"reflect"
"strings"
"unicode"
"unicode/utf8"
)
// This file contains utilities for generating code.
// TODO: other write methods like:
// - slices, maps, types, etc.
// CodeWriter is a utility for writing structured code. It computes the content
// hash and size of written content. It ensures there are newlines between
// written code blocks.
type CodeWriter struct {
buf bytes.Buffer
Size int
Hash hash.Hash32 // content hash
gob *gob.Encoder
// For comments we skip the usual one-line separator if they are followed by
// a code block.
skipSep bool
}
func (w *CodeWriter) Write(p []byte) (n int, err error) {
return w.buf.Write(p)
}
// NewCodeWriter returns a new CodeWriter.
func NewCodeWriter() *CodeWriter {
h := fnv.New32()
return &CodeWriter{Hash: h, gob: gob.NewEncoder(h)}
}
// WriteGoFile appends the buffer with the total size of all created structures
// and writes it as a Go file to the given file with the given package name.
func (w *CodeWriter) WriteGoFile(filename, pkg string) {
f, err := os.Create(filename)
if err != nil {
log.Fatalf("Could not create file %s: %v", filename, err)
}
defer f.Close()
if _, err = w.WriteGo(f, pkg, ""); err != nil {
log.Fatalf("Error writing file %s: %v", filename, err)
}
}
// WriteVersionedGoFile appends the buffer with the total size of all created
// structures and writes it as a Go file to the given file with the given
// package name and build tags for the current Unicode version,
func (w *CodeWriter) WriteVersionedGoFile(filename, pkg string) {
tags := buildTags()
if tags != "" {
pattern := fileToPattern(filename)
updateBuildTags(pattern)
filename = fmt.Sprintf(pattern, UnicodeVersion())
}
f, err := os.Create(filename)
if err != nil {
log.Fatalf("Could not create file %s: %v", filename, err)
}
defer f.Close()
if _, err = w.WriteGo(f, pkg, tags); err != nil {
log.Fatalf("Error writing file %s: %v", filename, err)
}
}
// WriteGo appends the buffer with the total size of all created structures and
// writes it as a Go file to the given writer with the given package name.
func (w *CodeWriter) WriteGo(out io.Writer, pkg, tags string) (n int, err error) {
sz := w.Size
if sz > 0 {
w.WriteComment("Total table size %d bytes (%dKiB); checksum: %X\n", sz, sz/1024, w.Hash.Sum32())
}
defer w.buf.Reset()
return WriteGo(out, pkg, tags, w.buf.Bytes())
}
func (w *CodeWriter) printf(f string, x ...interface{}) {
fmt.Fprintf(w, f, x...)
}
func (w *CodeWriter) insertSep() {
if w.skipSep {
w.skipSep = false
return
}
// Use at least two newlines to ensure a blank space between the previous
// block. WriteGoFile will remove extraneous newlines.
w.printf("\n\n")
}
// WriteComment writes a comment block. All line starts are prefixed with "//".
// Initial empty lines are gobbled. The indentation for the first line is
// stripped from consecutive lines.
func (w *CodeWriter) WriteComment(comment string, args ...interface{}) {
s := fmt.Sprintf(comment, args...)
s = strings.Trim(s, "\n")
// Use at least two newlines to ensure a blank space between the previous
// block. WriteGoFile will remove extraneous newlines.
w.printf("\n\n// ")
w.skipSep = true
// strip first indent level.
sep := "\n"
for ; len(s) > 0 && (s[0] == '\t' || s[0] == ' '); s = s[1:] {
sep += s[:1]
}
strings.NewReplacer(sep, "\n// ", "\n", "\n// ").WriteString(w, s)
w.printf("\n")
}
func (w *CodeWriter) writeSizeInfo(size int) {
w.printf("// Size: %d bytes\n", size)
}
// WriteConst writes a constant of the given name and value.
func (w *CodeWriter) WriteConst(name string, x interface{}) {
w.insertSep()
v := reflect.ValueOf(x)
switch v.Type().Kind() {
case reflect.String:
w.printf("const %s %s = ", name, typeName(x))
w.WriteString(v.String())
w.printf("\n")
default:
w.printf("const %s = %#v\n", name, x)
}
}
// WriteVar writes a variable of the given name and value.
func (w *CodeWriter) WriteVar(name string, x interface{}) {
w.insertSep()
v := reflect.ValueOf(x)
oldSize := w.Size
sz := int(v.Type().Size())
w.Size += sz
switch v.Type().Kind() {
case reflect.String:
w.printf("var %s %s = ", name, typeName(x))
w.WriteString(v.String())
case reflect.Struct:
w.gob.Encode(x)
fallthrough
case reflect.Slice, reflect.Array:
w.printf("var %s = ", name)
w.writeValue(v)
w.writeSizeInfo(w.Size - oldSize)
default:
w.printf("var %s %s = ", name, typeName(x))
w.gob.Encode(x)
w.writeValue(v)
w.writeSizeInfo(w.Size - oldSize)
}
w.printf("\n")
}
func (w *CodeWriter) writeValue(v reflect.Value) {
x := v.Interface()
switch v.Kind() {
case reflect.String:
w.WriteString(v.String())
case reflect.Array:
// Don't double count: callers of WriteArray count on the size being
// added, so we need to discount it here.
w.Size -= int(v.Type().Size())
w.writeSlice(x, true)
case reflect.Slice:
w.writeSlice(x, false)
case reflect.Struct:
w.printf("%s{\n", typeName(v.Interface()))
t := v.Type()
for i := 0; i < v.NumField(); i++ {
w.printf("%s: ", t.Field(i).Name)
w.writeValue(v.Field(i))
w.printf(",\n")
}
w.printf("}")
default:
w.printf("%#v", x)
}
}
// WriteString writes a string literal.
func (w *CodeWriter) WriteString(s string) {
io.WriteString(w.Hash, s) // content hash
w.Size += len(s)
const maxInline = 40
if len(s) <= maxInline {
w.printf("%q", s)
return
}
// We will render the string as a multi-line string.
const maxWidth = 80 - 4 - len(`"`) - len(`" +`)
// When starting on its own line, go fmt indents line 2+ an extra level.
n, max := maxWidth, maxWidth-4
// As per https://golang.org/issue/18078, the compiler has trouble
// compiling the concatenation of many strings, s0 + s1 + s2 + ... + sN,
// for large N. We insert redundant, explicit parentheses to work around
// that, lowering the N at any given step: (s0 + s1 + ... + s63) + (s64 +
// ... + s127) + etc + (etc + ... + sN).
explicitParens, extraComment := len(s) > 128*1024, ""
if explicitParens {
w.printf(`(`)
extraComment = "; the redundant, explicit parens are for https://golang.org/issue/18078"
}
// Print "" +\n, if a string does not start on its own line.
b := w.buf.Bytes()
if p := len(bytes.TrimRight(b, " \t")); p > 0 && b[p-1] != '\n' {
w.printf("\"\" + // Size: %d bytes%s\n", len(s), extraComment)
n, max = maxWidth, maxWidth
}
w.printf(`"`)
for sz, p, nLines := 0, 0, 0; p < len(s); {
var r rune
r, sz = utf8.DecodeRuneInString(s[p:])
out := s[p : p+sz]
chars := 1
if !unicode.IsPrint(r) || r == utf8.RuneError || r == '"' {
switch sz {
case 1:
out = fmt.Sprintf("\\x%02x", s[p])
case 2, 3:
out = fmt.Sprintf("\\u%04x", r)
case 4:
out = fmt.Sprintf("\\U%08x", r)
}
chars = len(out)
} else if r == '\\' {
out = "\\" + string(r)
chars = 2
}
if n -= chars; n < 0 {
nLines++
if explicitParens && nLines&63 == 63 {
w.printf("\") + (\"")
}
w.printf("\" +\n\"")
n = max - len(out)
}
w.printf("%s", out)
p += sz
}
w.printf(`"`)
if explicitParens {
w.printf(`)`)
}
}
// WriteSlice writes a slice value.
func (w *CodeWriter) WriteSlice(x interface{}) {
w.writeSlice(x, false)
}
// WriteArray writes an array value.
func (w *CodeWriter) WriteArray(x interface{}) {
w.writeSlice(x, true)
}
func (w *CodeWriter) writeSlice(x interface{}, isArray bool) {
v := reflect.ValueOf(x)
w.gob.Encode(v.Len())
w.Size += v.Len() * int(v.Type().Elem().Size())
name := typeName(x)
if isArray {
name = fmt.Sprintf("[%d]%s", v.Len(), name[strings.Index(name, "]")+1:])
}
if isArray {
w.printf("%s{\n", name)
} else {
w.printf("%s{ // %d elements\n", name, v.Len())
}
switch kind := v.Type().Elem().Kind(); kind {
case reflect.String:
for _, s := range x.([]string) {
w.WriteString(s)
w.printf(",\n")
}
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
// nLine and nBlock are the number of elements per line and block.
nLine, nBlock, format := 8, 64, "%d,"
switch kind {
case reflect.Uint8:
format = "%#02x,"
case reflect.Uint16:
format = "%#04x,"
case reflect.Uint32:
nLine, nBlock, format = 4, 32, "%#08x,"
case reflect.Uint, reflect.Uint64:
nLine, nBlock, format = 4, 32, "%#016x,"
case reflect.Int8:
nLine = 16
}
n := nLine
for i := 0; i < v.Len(); i++ {
if i%nBlock == 0 && v.Len() > nBlock {
w.printf("// Entry %X - %X\n", i, i+nBlock-1)
}
x := v.Index(i).Interface()
w.gob.Encode(x)
w.printf(format, x)
if n--; n == 0 {
n = nLine
w.printf("\n")
}
}
w.printf("\n")
case reflect.Struct:
zero := reflect.Zero(v.Type().Elem()).Interface()
for i := 0; i < v.Len(); i++ {
x := v.Index(i).Interface()
w.gob.EncodeValue(v)
if !reflect.DeepEqual(zero, x) {
line := fmt.Sprintf("%#v,\n", x)
line = line[strings.IndexByte(line, '{'):]
w.printf("%d: ", i)
w.printf(line)
}
}
case reflect.Array:
for i := 0; i < v.Len(); i++ {
w.printf("%d: %#v,\n", i, v.Index(i).Interface())
}
default:
panic("gen: slice elem type not supported")
}
w.printf("}")
}
// WriteType writes a definition of the type of the given value and returns the
// type name.
func (w *CodeWriter) WriteType(x interface{}) string {
t := reflect.TypeOf(x)
w.printf("type %s struct {\n", t.Name())
for i := 0; i < t.NumField(); i++ {
w.printf("\t%s %s\n", t.Field(i).Name, t.Field(i).Type)
}
w.printf("}\n")
return t.Name()
}
// typeName returns the name of the go type of x.
func typeName(x interface{}) string {
t := reflect.ValueOf(x).Type()
return strings.Replace(fmt.Sprint(t), "main.", "", 1)
}
@@ -0,0 +1,354 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package gen contains common code for the various code generation tools in the
// text repository. Its usage ensures consistency between tools.
//
// This package defines command line flags that are common to most generation
// tools. The flags allow for specifying specific Unicode and CLDR versions
// in the public Unicode data repository (https://www.unicode.org/Public).
//
// A local Unicode data mirror can be set through the flag -local or the
// environment variable UNICODE_DIR. The former takes precedence. The local
// directory should follow the same structure as the public repository.
//
// IANA data can also optionally be mirrored by putting it in the iana directory
// rooted at the top of the local mirror. Beware, though, that IANA data is not
// versioned. So it is up to the developer to use the right version.
package gen // import "golang.org/x/text/internal/gen"
import (
"bytes"
"flag"
"fmt"
"go/build"
"go/format"
"io"
"log"
"net/http"
"os"
"path"
"path/filepath"
"regexp"
"strings"
"sync"
"unicode"
"golang.org/x/text/unicode/cldr"
)
var (
url = flag.String("url",
"https://www.unicode.org/Public",
"URL of Unicode database directory")
iana = flag.String("iana",
"http://www.iana.org",
"URL of the IANA repository")
unicodeVersion = flag.String("unicode",
getEnv("UNICODE_VERSION", unicode.Version),
"unicode version to use")
cldrVersion = flag.String("cldr",
getEnv("CLDR_VERSION", cldr.Version),
"cldr version to use")
)
func getEnv(name, def string) string {
if v := os.Getenv(name); v != "" {
return v
}
return def
}
// Init performs common initialization for a gen command. It parses the flags
// and sets up the standard logging parameters.
func Init() {
log.SetPrefix("")
log.SetFlags(log.Lshortfile)
flag.Parse()
}
const header = `// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
`
// UnicodeVersion reports the requested Unicode version.
func UnicodeVersion() string {
return *unicodeVersion
}
// CLDRVersion reports the requested CLDR version.
func CLDRVersion() string {
return *cldrVersion
}
var tags = []struct{ version, buildTags string }{
{"9.0.0", "!go1.10"},
{"10.0.0", "go1.10,!go1.13"},
{"11.0.0", "go1.13,!go1.14"},
{"12.0.0", "go1.14,!go1.16"},
{"13.0.0", "go1.16,!go1.21"},
{"15.0.0", "go1.21"},
}
// buildTags reports the build tags used for the current Unicode version.
func buildTags() string {
v := UnicodeVersion()
for _, e := range tags {
if e.version == v {
return e.buildTags
}
}
log.Fatalf("Unknown build tags for Unicode version %q.", v)
return ""
}
// IsLocal reports whether data files are available locally.
func IsLocal() bool {
dir, err := localReadmeFile()
if err != nil {
return false
}
if _, err = os.Stat(dir); err != nil {
return false
}
return true
}
// OpenUCDFile opens the requested UCD file. The file is specified relative to
// the public Unicode root directory. It will call log.Fatal if there are any
// errors.
func OpenUCDFile(file string) io.ReadCloser {
return openUnicode(path.Join(*unicodeVersion, "ucd", file))
}
// OpenCLDRCoreZip opens the CLDR core zip file. It will call log.Fatal if there
// are any errors.
func OpenCLDRCoreZip() io.ReadCloser {
return OpenUnicodeFile("cldr", *cldrVersion, "core.zip")
}
// OpenUnicodeFile opens the requested file of the requested category from the
// root of the Unicode data archive. The file is specified relative to the
// public Unicode root directory. If version is "", it will use the default
// Unicode version. It will call log.Fatal if there are any errors.
func OpenUnicodeFile(category, version, file string) io.ReadCloser {
if version == "" {
version = UnicodeVersion()
}
return openUnicode(path.Join(category, version, file))
}
// OpenIANAFile opens the requested IANA file. The file is specified relative
// to the IANA root, which is typically either http://www.iana.org or the
// iana directory in the local mirror. It will call log.Fatal if there are any
// errors.
func OpenIANAFile(path string) io.ReadCloser {
return Open(*iana, "iana", path)
}
var (
dirMutex sync.Mutex
localDir string
)
const permissions = 0755
func localReadmeFile() (string, error) {
p, err := build.Import("golang.org/x/text", "", build.FindOnly)
if err != nil {
return "", fmt.Errorf("Could not locate package: %v", err)
}
return filepath.Join(p.Dir, "DATA", "README"), nil
}
func getLocalDir() string {
dirMutex.Lock()
defer dirMutex.Unlock()
readme, err := localReadmeFile()
if err != nil {
log.Fatal(err)
}
dir := filepath.Dir(readme)
if _, err := os.Stat(readme); err != nil {
if err := os.MkdirAll(dir, permissions); err != nil {
log.Fatalf("Could not create directory: %v", err)
}
os.WriteFile(readme, []byte(readmeTxt), permissions)
}
return dir
}
const readmeTxt = `Generated by golang.org/x/text/internal/gen. DO NOT EDIT.
This directory contains downloaded files used to generate the various tables
in the golang.org/x/text subrepo.
Note that the language subtag repo (iana/assignments/language-subtag-registry)
and all other times in the iana subdirectory are not versioned and will need
to be periodically manually updated. The easiest way to do this is to remove
the entire iana directory. This is mostly of concern when updating the language
package.
`
// Open opens subdir/path if a local directory is specified and the file exists,
// where subdir is a directory relative to the local root, or fetches it from
// urlRoot/path otherwise. It will call log.Fatal if there are any errors.
func Open(urlRoot, subdir, path string) io.ReadCloser {
file := filepath.Join(getLocalDir(), subdir, filepath.FromSlash(path))
return open(file, urlRoot, path)
}
func openUnicode(path string) io.ReadCloser {
file := filepath.Join(getLocalDir(), filepath.FromSlash(path))
return open(file, *url, path)
}
// TODO: automatically periodically update non-versioned files.
func open(file, urlRoot, path string) io.ReadCloser {
if f, err := os.Open(file); err == nil {
return f
}
r := get(urlRoot, path)
defer r.Close()
b, err := io.ReadAll(r)
if err != nil {
log.Fatalf("Could not download file: %v", err)
}
os.MkdirAll(filepath.Dir(file), permissions)
if err := os.WriteFile(file, b, permissions); err != nil {
log.Fatalf("Could not create file: %v", err)
}
return io.NopCloser(bytes.NewReader(b))
}
func get(root, path string) io.ReadCloser {
url := root + "/" + path
fmt.Printf("Fetching %s...", url)
defer fmt.Println(" done.")
resp, err := http.Get(url)
if err != nil {
log.Fatalf("HTTP GET: %v", err)
}
if resp.StatusCode != 200 {
log.Fatalf("Bad GET status for %q: %q", url, resp.Status)
}
return resp.Body
}
// TODO: use Write*Version in all applicable packages.
// WriteUnicodeVersion writes a constant for the Unicode version from which the
// tables are generated.
func WriteUnicodeVersion(w io.Writer) {
fmt.Fprintf(w, "// UnicodeVersion is the Unicode version from which the tables in this package are derived.\n")
fmt.Fprintf(w, "const UnicodeVersion = %q\n\n", UnicodeVersion())
}
// WriteCLDRVersion writes a constant for the CLDR version from which the
// tables are generated.
func WriteCLDRVersion(w io.Writer) {
fmt.Fprintf(w, "// CLDRVersion is the CLDR version from which the tables in this package are derived.\n")
fmt.Fprintf(w, "const CLDRVersion = %q\n\n", CLDRVersion())
}
// WriteGoFile prepends a standard file comment and package statement to the
// given bytes, applies gofmt, and writes them to a file with the given name.
// It will call log.Fatal if there are any errors.
func WriteGoFile(filename, pkg string, b []byte) {
w, err := os.Create(filename)
if err != nil {
log.Fatalf("Could not create file %s: %v", filename, err)
}
defer w.Close()
if _, err = WriteGo(w, pkg, "", b); err != nil {
log.Fatalf("Error writing file %s: %v", filename, err)
}
}
func fileToPattern(filename string) string {
suffix := ".go"
if strings.HasSuffix(filename, "_test.go") {
suffix = "_test.go"
}
prefix := filename[:len(filename)-len(suffix)]
return fmt.Sprint(prefix, "%s", suffix)
}
// tagLines returns the //go:build lines to add to the file.
func tagLines(tags string) string {
return "//go:build " + strings.ReplaceAll(tags, ",", " && ") + "\n"
}
func updateBuildTags(pattern string) {
for _, t := range tags {
oldFile := fmt.Sprintf(pattern, t.version)
b, err := os.ReadFile(oldFile)
if err != nil {
continue
}
b = regexp.MustCompile(`//go:build.*\n`).ReplaceAll(b, []byte(tagLines(t.buildTags)))
err = os.WriteFile(oldFile, b, 0644)
if err != nil {
log.Fatal(err)
}
}
}
// WriteVersionedGoFile prepends a standard file comment, adds build tags to
// version the file for the current Unicode version, and package statement to
// the given bytes, applies gofmt, and writes them to a file with the given
// name. It will call log.Fatal if there are any errors.
func WriteVersionedGoFile(filename, pkg string, b []byte) {
pattern := fileToPattern(filename)
updateBuildTags(pattern)
filename = fmt.Sprintf(pattern, UnicodeVersion())
w, err := os.Create(filename)
if err != nil {
log.Fatalf("Could not create file %s: %v", filename, err)
}
defer w.Close()
if _, err = WriteGo(w, pkg, buildTags(), b); err != nil {
log.Fatalf("Error writing file %s: %v", filename, err)
}
}
// WriteGo prepends a standard file comment and package statement to the given
// bytes, applies gofmt, and writes them to w.
func WriteGo(w io.Writer, pkg, tags string, b []byte) (n int, err error) {
src := []byte(header)
if tags != "" {
src = append(src, tagLines(tags)...)
src = append(src, '\n')
}
src = append(src, fmt.Sprintf("package %s\n\n", pkg)...)
src = append(src, b...)
formatted, err := format.Source(src)
if err != nil {
// Print the generated code even in case of an error so that the
// returned error can be meaningfully interpreted.
n, _ = w.Write(src)
return n, err
}
return w.Write(formatted)
}
// Repackage rewrites a Go file from belonging to package main to belonging to
// the given package.
func Repackage(inFile, outFile, pkg string) {
src, err := os.ReadFile(inFile)
if err != nil {
log.Fatalf("reading %s: %v", inFile, err)
}
const toDelete = "package main\n\n"
i := bytes.Index(src, []byte(toDelete))
if i < 0 {
log.Fatalf("Could not find %q in %s.", toDelete, inFile)
}
w := &bytes.Buffer{}
w.Write(src[i+len(toDelete):])
WriteGoFile(outFile, pkg, w.Bytes())
}
@@ -0,0 +1,49 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package internal contains non-exported functionality that are used by
// packages in the text repository.
package internal // import "golang.org/x/text/internal"
import (
"sort"
"golang.org/x/text/language"
)
// SortTags sorts tags in place.
func SortTags(tags []language.Tag) {
sort.Sort(sorter(tags))
}
type sorter []language.Tag
func (s sorter) Len() int {
return len(s)
}
func (s sorter) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}
func (s sorter) Less(i, j int) bool {
return s[i].String() < s[j].String()
}
// UniqueTags sorts and filters duplicate tags in place and returns a slice with
// only unique tags.
func UniqueTags(tags []language.Tag) []language.Tag {
if len(tags) <= 1 {
return tags
}
SortTags(tags)
k := 0
for i := 1; i < len(tags); i++ {
if tags[k].String() < tags[i].String() {
k++
tags[k] = tags[i]
}
}
return tags[:k+1]
}
@@ -0,0 +1,38 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package internal
import (
"fmt"
"strings"
"testing"
"golang.org/x/text/language"
)
func TestUnique(t *testing.T) {
testCases := []struct {
in, want string
}{
{"", "[]"},
{"en", "[en]"},
{"en en", "[en]"},
{"en en en", "[en]"},
{"en-u-cu-eur en", "[en en-u-cu-eur]"},
{"nl en", "[en nl]"},
{"pt-Pt pt", "[pt pt-PT]"},
}
for _, tc := range testCases {
tags := []language.Tag{}
for _, s := range strings.Split(tc.in, " ") {
if s != "" {
tags = append(tags, language.MustParse(s))
}
}
if got := fmt.Sprint(UniqueTags(tags)); got != tc.want {
t.Errorf("Unique(%s) = %s; want %s", tc.in, got, tc.want)
}
}
}
@@ -0,0 +1,16 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package language
// This file contains code common to the maketables.go and the package code.
// AliasType is the type of an alias in AliasMap.
type AliasType int8
const (
Deprecated AliasType = iota
Macro
Legacy
AliasTypeUnknown AliasType = -1
)
@@ -0,0 +1,29 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
// CompactCoreInfo is a compact integer with the three core tags encoded.
type CompactCoreInfo uint32
// GetCompactCore generates a uint32 value that is guaranteed to be unique for
// different language, region, and script values.
func GetCompactCore(t Tag) (cci CompactCoreInfo, ok bool) {
if t.LangID > langNoIndexOffset {
return 0, false
}
cci |= CompactCoreInfo(t.LangID) << (8 + 12)
cci |= CompactCoreInfo(t.ScriptID) << 12
cci |= CompactCoreInfo(t.RegionID)
return cci, true
}
// Tag generates a tag from c.
func (c CompactCoreInfo) Tag() Tag {
return Tag{
LangID: Language(c >> 20),
RegionID: Region(c & 0x3ff),
ScriptID: Script(c>>12) & 0xff,
}
}
@@ -0,0 +1,61 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package compact defines a compact representation of language tags.
//
// Common language tags (at least all for which locale information is defined
// in CLDR) are assigned a unique index. Each Tag is associated with such an
// ID for selecting language-related resources (such as translations) as well
// as one for selecting regional defaults (currency, number formatting, etc.)
//
// It may want to export this functionality at some point, but at this point
// this is only available for use within x/text.
package compact // import "golang.org/x/text/internal/language/compact"
import (
"sort"
"strings"
"golang.org/x/text/internal/language"
)
// ID is an integer identifying a single tag.
type ID uint16
func getCoreIndex(t language.Tag) (id ID, ok bool) {
cci, ok := language.GetCompactCore(t)
if !ok {
return 0, false
}
i := sort.Search(len(coreTags), func(i int) bool {
return cci <= coreTags[i]
})
if i == len(coreTags) || coreTags[i] != cci {
return 0, false
}
return ID(i), true
}
// Parent returns the ID of the parent or the root ID if id is already the root.
func (id ID) Parent() ID {
return parents[id]
}
// Tag converts id to an internal language Tag.
func (id ID) Tag() language.Tag {
if int(id) >= len(coreTags) {
return specialTags[int(id)-len(coreTags)]
}
return coreTags[id].Tag()
}
var specialTags []language.Tag
func init() {
tags := strings.Split(specialTagsStr, " ")
specialTags = make([]language.Tag, len(tags))
for i, t := range tags {
specialTags[i] = language.MustParse(t)
}
}
@@ -0,0 +1,64 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build ignore
// Language tag table generator.
// Data read from the web.
package main
import (
"flag"
"fmt"
"log"
"golang.org/x/text/internal/gen"
"golang.org/x/text/unicode/cldr"
)
var (
test = flag.Bool("test",
false,
"test existing tables; can be used to compare web data with package data.")
outputFile = flag.String("output",
"tables.go",
"output file for generated tables")
)
func main() {
gen.Init()
w := gen.NewCodeWriter()
defer w.WriteGoFile("tables.go", "compact")
fmt.Fprintln(w, `import "golang.org/x/text/internal/language"`)
b := newBuilder(w)
gen.WriteCLDRVersion(w)
b.writeCompactIndex()
}
type builder struct {
w *gen.CodeWriter
data *cldr.CLDR
supp *cldr.SupplementalData
}
func newBuilder(w *gen.CodeWriter) *builder {
r := gen.OpenCLDRCoreZip()
defer r.Close()
d := &cldr.Decoder{}
data, err := d.DecodeZip(r)
if err != nil {
log.Fatal(err)
}
b := builder{
w: w,
data: data,
supp: data.Supplemental(),
}
return &b
}
@@ -0,0 +1,113 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build ignore
package main
// This file generates derivative tables based on the language package itself.
import (
"fmt"
"log"
"sort"
"strings"
"golang.org/x/text/internal/language"
)
// Compact indices:
// Note -va-X variants only apply to localization variants.
// BCP variants only ever apply to language.
// The only ambiguity between tags is with regions.
func (b *builder) writeCompactIndex() {
// Collect all language tags for which we have any data in CLDR.
m := map[language.Tag]bool{}
for _, lang := range b.data.Locales() {
// We include all locales unconditionally to be consistent with en_US.
// We want en_US, even though it has no data associated with it.
// TODO: put any of the languages for which no data exists at the end
// of the index. This allows all components based on ICU to use that
// as the cutoff point.
// if x := data.RawLDML(lang); false ||
// x.LocaleDisplayNames != nil ||
// x.Characters != nil ||
// x.Delimiters != nil ||
// x.Measurement != nil ||
// x.Dates != nil ||
// x.Numbers != nil ||
// x.Units != nil ||
// x.ListPatterns != nil ||
// x.Collations != nil ||
// x.Segmentations != nil ||
// x.Rbnf != nil ||
// x.Annotations != nil ||
// x.Metadata != nil {
// TODO: support POSIX natively, albeit non-standard.
tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1))
m[tag] = true
// }
}
// TODO: plural rules are also defined for the deprecated tags:
// iw mo sh tl
// Consider removing these as compact tags.
// Include locales for plural rules, which uses a different structure.
for _, plurals := range b.supp.Plurals {
for _, rules := range plurals.PluralRules {
for _, lang := range strings.Split(rules.Locales, " ") {
m[language.Make(lang)] = true
}
}
}
var coreTags []language.CompactCoreInfo
var special []string
for t := range m {
if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" {
log.Fatalf("Unexpected extension %v in %v", x, t)
}
if len(t.Variants()) == 0 && len(t.Extensions()) == 0 {
cci, ok := language.GetCompactCore(t)
if !ok {
log.Fatalf("Locale for non-basic language %q", t)
}
coreTags = append(coreTags, cci)
} else {
special = append(special, t.String())
}
}
w := b.w
sort.Slice(coreTags, func(i, j int) bool { return coreTags[i] < coreTags[j] })
sort.Strings(special)
w.WriteComment(`
NumCompactTags is the number of common tags. The maximum tag is
NumCompactTags-1.`)
w.WriteConst("NumCompactTags", len(m))
fmt.Fprintln(w, "const (")
for i, t := range coreTags {
fmt.Fprintf(w, "%s ID = %d\n", ident(t.Tag().String()), i)
}
for i, t := range special {
fmt.Fprintf(w, "%s ID = %d\n", ident(t), i+len(coreTags))
}
fmt.Fprintln(w, ")")
w.WriteVar("coreTags", coreTags)
w.WriteConst("specialTagsStr", strings.Join(special, " "))
}
func ident(s string) string {
return strings.Replace(s, "-", "", -1) + "Index"
}
@@ -0,0 +1,54 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build ignore
package main
import (
"log"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/language"
"golang.org/x/text/internal/language/compact"
"golang.org/x/text/unicode/cldr"
)
func main() {
r := gen.OpenCLDRCoreZip()
defer r.Close()
d := &cldr.Decoder{}
data, err := d.DecodeZip(r)
if err != nil {
log.Fatalf("DecodeZip: %v", err)
}
w := gen.NewCodeWriter()
defer w.WriteGoFile("parents.go", "compact")
// Create parents table.
type ID uint16
parents := make([]ID, compact.NumCompactTags)
for _, loc := range data.Locales() {
tag := language.MustParse(loc)
index, ok := compact.FromTag(tag)
if !ok {
continue
}
parentIndex := compact.ID(0) // und
for p := tag.Parent(); p != language.Und; p = p.Parent() {
if x, ok := compact.FromTag(p); ok {
parentIndex = x
break
}
}
parents[index] = ID(parentIndex)
}
w.WriteComment(`
parents maps a compact index of a tag to the compact index of the parent of
this tag.`)
w.WriteVar("parents", parents)
}
@@ -0,0 +1,38 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package compact
import (
"testing"
"golang.org/x/text/internal/language"
)
func TestParents(t *testing.T) {
testCases := []struct {
tag, parent string
}{
{"af", "und"},
{"en", "und"},
{"en-001", "en"},
{"en-AU", "en-001"},
{"en-US", "en"},
{"en-US-u-va-posix", "en-US"},
{"ca-ES-valencia", "ca-ES"},
}
for _, tc := range testCases {
tag, ok := LanguageID(Make(language.MustParse(tc.tag)))
if !ok {
t.Fatalf("Could not get index of flag %s", tc.tag)
}
want, ok := LanguageID(Make(language.MustParse(tc.parent)))
if !ok {
t.Fatalf("Could not get index of parent %s of tag %s", tc.parent, tc.tag)
}
if got := parents[tag]; got != want {
t.Errorf("Parent[%s] = %d; want %d (%s)", tc.tag, got, want, tc.parent)
}
}
}
@@ -0,0 +1,260 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run gen.go gen_index.go -output tables.go
//go:generate go run gen_parents.go
package compact
// TODO: Remove above NOTE after:
// - verifying that tables are dropped correctly (most notably matcher tables).
import (
"strings"
"golang.org/x/text/internal/language"
)
// Tag represents a BCP 47 language tag. It is used to specify an instance of a
// specific language or locale. All language tag values are guaranteed to be
// well-formed.
type Tag struct {
// NOTE: exported tags will become part of the public API.
language ID
locale ID
full fullTag // always a language.Tag for now.
}
const _und = 0
type fullTag interface {
IsRoot() bool
Parent() language.Tag
}
// Make a compact Tag from a fully specified internal language Tag.
func Make(t language.Tag) (tag Tag) {
if region := t.TypeForKey("rg"); len(region) == 6 && region[2:] == "zzzz" {
if r, err := language.ParseRegion(region[:2]); err == nil {
tFull := t
t, _ = t.SetTypeForKey("rg", "")
// TODO: should we not consider "va" for the language tag?
var exact1, exact2 bool
tag.language, exact1 = FromTag(t)
t.RegionID = r
tag.locale, exact2 = FromTag(t)
if !exact1 || !exact2 {
tag.full = tFull
}
return tag
}
}
lang, ok := FromTag(t)
tag.language = lang
tag.locale = lang
if !ok {
tag.full = t
}
return tag
}
// Tag returns an internal language Tag version of this tag.
func (t Tag) Tag() language.Tag {
if t.full != nil {
return t.full.(language.Tag)
}
tag := t.language.Tag()
if t.language != t.locale {
loc := t.locale.Tag()
tag, _ = tag.SetTypeForKey("rg", strings.ToLower(loc.RegionID.String())+"zzzz")
}
return tag
}
// IsCompact reports whether this tag is fully defined in terms of ID.
func (t *Tag) IsCompact() bool {
return t.full == nil
}
// MayHaveVariants reports whether a tag may have variants. If it returns false
// it is guaranteed the tag does not have variants.
func (t Tag) MayHaveVariants() bool {
return t.full != nil || int(t.language) >= len(coreTags)
}
// MayHaveExtensions reports whether a tag may have extensions. If it returns
// false it is guaranteed the tag does not have them.
func (t Tag) MayHaveExtensions() bool {
return t.full != nil ||
int(t.language) >= len(coreTags) ||
t.language != t.locale
}
// IsRoot returns true if t is equal to language "und".
func (t Tag) IsRoot() bool {
if t.full != nil {
return t.full.IsRoot()
}
return t.language == _und
}
// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
// specific language are substituted with fields from the parent language.
// The parent for a language may change for newer versions of CLDR.
func (t Tag) Parent() Tag {
if t.full != nil {
return Make(t.full.Parent())
}
if t.language != t.locale {
// Simulate stripping -u-rg-xxxxxx
return Tag{language: t.language, locale: t.language}
}
// TODO: use parent lookup table once cycle from internal package is
// removed. Probably by internalizing the table and declaring this fast
// enough.
// lang := compactID(internal.Parent(uint16(t.language)))
lang, _ := FromTag(t.language.Tag().Parent())
return Tag{language: lang, locale: lang}
}
// nextToken returns token t and the rest of the string.
func nextToken(s string) (t, tail string) {
p := strings.Index(s[1:], "-")
if p == -1 {
return s[1:], ""
}
p++
return s[1:p], s[p:]
}
// LanguageID returns an index, where 0 <= index < NumCompactTags, for tags
// for which data exists in the text repository.The index will change over time
// and should not be stored in persistent storage. If t does not match a compact
// index, exact will be false and the compact index will be returned for the
// first match after repeatedly taking the Parent of t.
func LanguageID(t Tag) (id ID, exact bool) {
return t.language, t.full == nil
}
// RegionalID returns the ID for the regional variant of this tag. This index is
// used to indicate region-specific overrides, such as default currency, default
// calendar and week data, default time cycle, and default measurement system
// and unit preferences.
//
// For instance, the tag en-GB-u-rg-uszzzz specifies British English with US
// settings for currency, number formatting, etc. The CompactIndex for this tag
// will be that for en-GB, while the RegionalID will be the one corresponding to
// en-US.
func RegionalID(t Tag) (id ID, exact bool) {
return t.locale, t.full == nil
}
// LanguageTag returns t stripped of regional variant indicators.
//
// At the moment this means it is stripped of a regional and variant subtag "rg"
// and "va" in the "u" extension.
func (t Tag) LanguageTag() Tag {
if t.full == nil {
return Tag{language: t.language, locale: t.language}
}
tt := t.Tag()
tt.SetTypeForKey("rg", "")
tt.SetTypeForKey("va", "")
return Make(tt)
}
// RegionalTag returns the regional variant of the tag.
//
// At the moment this means that the region is set from the regional subtag
// "rg" in the "u" extension.
func (t Tag) RegionalTag() Tag {
rt := Tag{language: t.locale, locale: t.locale}
if t.full == nil {
return rt
}
b := language.Builder{}
tag := t.Tag()
// tag, _ = tag.SetTypeForKey("rg", "")
b.SetTag(t.locale.Tag())
if v := tag.Variants(); v != "" {
for _, v := range strings.Split(v, "-") {
b.AddVariant(v)
}
}
for _, e := range tag.Extensions() {
b.AddExt(e)
}
return t
}
// FromTag reports closest matching ID for an internal language Tag.
func FromTag(t language.Tag) (id ID, exact bool) {
// TODO: perhaps give more frequent tags a lower index.
// TODO: we could make the indexes stable. This will excluded some
// possibilities for optimization, so don't do this quite yet.
exact = true
b, s, r := t.Raw()
if t.HasString() {
if t.IsPrivateUse() {
// We have no entries for user-defined tags.
return 0, false
}
hasExtra := false
if t.HasVariants() {
if t.HasExtensions() {
build := language.Builder{}
build.SetTag(language.Tag{LangID: b, ScriptID: s, RegionID: r})
build.AddVariant(t.Variants())
exact = false
t = build.Make()
}
hasExtra = true
} else if _, ok := t.Extension('u'); ok {
// TODO: va may mean something else. Consider not considering it.
// Strip all but the 'va' entry.
old := t
variant := t.TypeForKey("va")
t = language.Tag{LangID: b, ScriptID: s, RegionID: r}
if variant != "" {
t, _ = t.SetTypeForKey("va", variant)
hasExtra = true
}
exact = old == t
} else {
exact = false
}
if hasExtra {
// We have some variants.
for i, s := range specialTags {
if s == t {
return ID(i + len(coreTags)), exact
}
}
exact = false
}
}
if x, ok := getCoreIndex(t); ok {
return x, exact
}
exact = false
if r != 0 && s == 0 {
// Deal with cases where an extra script is inserted for the region.
t, _ := t.Maximize()
if x, ok := getCoreIndex(t); ok {
return x, exact
}
}
for t = t.Parent(); t != root; t = t.Parent() {
// No variants specified: just compare core components.
// The key has the form lllssrrr, where l, s, and r are nibbles for
// respectively the langID, scriptID, and regionID.
if x, ok := getCoreIndex(t); ok {
return x, exact
}
}
return 0, exact
}
var root = language.Tag{}
@@ -0,0 +1,236 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package compact
import (
"reflect"
"testing"
"golang.org/x/text/internal/language"
)
func mustParse(s string) Tag {
t, err := language.Parse(s)
if err != nil {
panic(err)
}
return Make(t)
}
func TestTagSize(t *testing.T) {
id := Tag{}
typ := reflect.TypeOf(id)
if typ.Size() > 24 {
t.Errorf("size of Tag was %d; want 24", typ.Size())
}
}
func TestNoPublic(t *testing.T) {
noExportedField(t, reflect.TypeOf(Tag{}))
}
func noExportedField(t *testing.T, typ reflect.Type) {
for i := 0; i < typ.NumField(); i++ {
f := typ.Field(i)
if f.PkgPath == "" {
t.Errorf("Tag may not have exported fields, but has field %q", f.Name)
}
if f.Anonymous {
noExportedField(t, f.Type)
}
}
}
func TestEquality(t *testing.T) {
for i, tt := range parseTests() {
s := tt.in
tag := mk(s)
t1 := mustParse(tag.Tag().String())
if tag != t1 {
t.Errorf("%d:%s: equality test 1 failed\n got: %#v\nwant: %#v)", i, s, t1, tag)
}
}
}
type compactTest struct {
tag string
index ID
ok bool
}
var compactTests = []compactTest{
// TODO: these values will change with each CLDR update. This issue
// will be solved if we decide to fix the indexes.
{"und", undIndex, true},
{"ca-ES-valencia", caESvalenciaIndex, true},
{"ca-ES-valencia-u-va-posix", caESvalenciaIndex, false},
{"ca-ES-valencia-u-co-phonebk", caESvalenciaIndex, false},
{"ca-ES-valencia-u-co-phonebk-va-posix", caESvalenciaIndex, false},
{"x-klingon", 0, false},
{"en-US", enUSIndex, true},
{"en-US-u-va-posix", enUSuvaposixIndex, true},
{"en", enIndex, true},
{"en-u-co-phonebk", enIndex, false},
{"en-001", en001Index, true},
{"zh-Hant-HK", zhHantHKIndex, true},
{"zh-HK", zhHantHKIndex, false}, // maximized to zh-Hant-HK
{"nl-Beng", 0, false}, // parent skips script
{"nl-NO", nlIndex, false}, // region is ignored
{"nl-Latn-NO", nlIndex, false},
{"nl-Latn-NO-u-co-phonebk", nlIndex, false},
{"nl-Latn-NO-valencia", nlIndex, false},
{"nl-Latn-NO-oxendict", nlIndex, false},
{"sh", shIndex, true}, // From plural rules.
}
func TestLanguageID(t *testing.T) {
tests := append(compactTests, []compactTest{
{"en-GB", enGBIndex, true},
{"en-GB-u-rg-uszzzz", enGBIndex, true},
{"en-GB-u-rg-USZZZZ", enGBIndex, true},
{"en-GB-u-rg-uszzzz-va-posix", enGBIndex, false},
{"en-GB-u-co-phonebk-rg-uszzzz", enGBIndex, false},
// Invalid region specifications are ignored.
{"en-GB-u-rg-usz-va-posix", enGBIndex, false},
{"en-GB-u-co-phonebk-rg-usz", enGBIndex, false},
}...)
for _, tt := range tests {
x, ok := LanguageID(mustParse(tt.tag))
if ID(x) != tt.index || ok != tt.ok {
t.Errorf("%s: got %d, %v; want %d %v", tt.tag, x, ok, tt.index, tt.ok)
}
}
}
func TestRegionalID(t *testing.T) {
tests := append(compactTests, []compactTest{
{"en-GB", enGBIndex, true},
{"en-GB-u-rg-uszzzz", enUSIndex, true},
{"en-GB-u-rg-USZZZZ", enUSIndex, true},
// TODO: use different exact values for language and regional tag?
{"en-GB-u-rg-uszzzz-va-posix", enUSuvaposixIndex, false},
{"en-GB-u-co-phonebk-rg-uszzzz-va-posix", enUSuvaposixIndex, false},
{"en-GB-u-co-phonebk-rg-uszzzz", enUSIndex, false},
// Invalid region specifications are ignored.
{"en-GB-u-rg-usz-va-posix", enGBIndex, false},
{"en-GB-u-co-phonebk-rg-usz", enGBIndex, false},
}...)
for _, tt := range tests {
x, ok := RegionalID(mustParse(tt.tag))
if ID(x) != tt.index || ok != tt.ok {
t.Errorf("%s: got %d, %v; want %d %v", tt.tag, x, ok, tt.index, tt.ok)
}
}
}
func TestParent(t *testing.T) {
tests := []struct{ in, out string }{
// Strip variants and extensions first
{"de-u-co-phonebk", "de"},
{"de-1994", "de"},
{"de-Latn-1994", "de"}, // remove superfluous script.
// Ensure the canonical Tag for an entry is in the chain for base-script
// pairs.
{"zh-Hans", "zh"},
// Skip the script if it is the maximized version. CLDR files for the
// skipped tag are always empty.
{"zh-Hans-TW", "zh"},
{"zh-Hans-CN", "zh"},
// Insert the script if the maximized script is not the same as the
// maximized script of the base language.
{"zh-TW", "zh-Hant"},
{"zh-HK", "zh-Hant"},
{"zh-Hant-TW", "zh-Hant"},
{"zh-Hant-HK", "zh-Hant"},
// Non-default script skips to und.
// CLDR
{"az-Cyrl", "und"},
{"bs-Cyrl", "und"},
{"en-Dsrt", "und"},
{"ha-Arab", "und"},
{"mn-Mong", "und"},
{"pa-Arab", "und"},
{"shi-Latn", "und"},
{"sr-Latn", "und"},
{"uz-Arab", "und"},
{"uz-Cyrl", "und"},
{"vai-Latn", "und"},
{"zh-Hant", "und"},
// extra
{"nl-Cyrl", "und"},
// World english inherits from en-001.
{"en-150", "en-001"},
{"en-AU", "en-001"},
{"en-BE", "en-001"},
{"en-GG", "en-001"},
{"en-GI", "en-001"},
{"en-HK", "en-001"},
{"en-IE", "en-001"},
{"en-IM", "en-001"},
{"en-IN", "en-001"},
{"en-JE", "en-001"},
{"en-MT", "en-001"},
{"en-NZ", "en-001"},
{"en-PK", "en-001"},
{"en-SG", "en-001"},
// Spanish in Latin-American countries have es-419 as parent.
{"es-AR", "es-419"},
{"es-BO", "es-419"},
{"es-CL", "es-419"},
{"es-CO", "es-419"},
{"es-CR", "es-419"},
{"es-CU", "es-419"},
{"es-DO", "es-419"},
{"es-EC", "es-419"},
{"es-GT", "es-419"},
{"es-HN", "es-419"},
{"es-MX", "es-419"},
{"es-NI", "es-419"},
{"es-PA", "es-419"},
{"es-PE", "es-419"},
{"es-PR", "es-419"},
{"es-PY", "es-419"},
{"es-SV", "es-419"},
{"es-US", "es-419"},
{"es-UY", "es-419"},
{"es-VE", "es-419"},
// exceptions (according to CLDR)
{"es-CW", "es"},
// Inherit from pt-PT, instead of pt for these countries.
{"pt-AO", "pt-PT"},
{"pt-CV", "pt-PT"},
{"pt-GW", "pt-PT"},
{"pt-MO", "pt-PT"},
{"pt-MZ", "pt-PT"},
{"pt-ST", "pt-PT"},
{"pt-TL", "pt-PT"},
{"en-GB-u-co-phonebk-rg-uszzzz", "en-GB"},
{"en-GB-u-rg-uszzzz", "en-GB"},
{"en-US-u-va-posix", "en-US"},
// Difference between language and regional tag.
{"ca-ES-valencia", "ca-ES"},
{"ca-ES-valencia-u-rg-ptzzzz", "ca-ES"}, // t.full != nil
{"en-US-u-va-variant", "en-US"},
{"en-u-va-variant", "en"}, // t.full != nil
{"en-u-rg-gbzzzz", "en"},
{"en-US-u-rg-gbzzzz", "en-US"},
{"nl-US-u-rg-gbzzzz", "nl-US"}, // t.full != nil
}
for _, tt := range tests {
tag := mustParse(tt.in)
if p := mustParse(tt.out); p != tag.Parent() {
t.Errorf("%s: was %v; want %v", tt.in, tag.Parent(), p)
}
}
}
@@ -0,0 +1,120 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package compact
// parents maps a compact index of a tag to the compact index of the parent of
// this tag.
var parents = []ID{ // 775 elements
// Entry 0 - 3F
0x0000, 0x0000, 0x0001, 0x0001, 0x0000, 0x0004, 0x0000, 0x0006,
0x0000, 0x0008, 0x0000, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x0000,
0x0000, 0x0028, 0x0000, 0x002a, 0x0000, 0x002c, 0x0000, 0x0000,
0x002f, 0x002e, 0x002e, 0x0000, 0x0033, 0x0000, 0x0035, 0x0000,
0x0037, 0x0000, 0x0039, 0x0000, 0x003b, 0x0000, 0x0000, 0x003e,
// Entry 40 - 7F
0x0000, 0x0040, 0x0040, 0x0000, 0x0043, 0x0043, 0x0000, 0x0046,
0x0000, 0x0048, 0x0000, 0x0000, 0x004b, 0x004a, 0x004a, 0x0000,
0x004f, 0x004f, 0x004f, 0x004f, 0x0000, 0x0054, 0x0054, 0x0000,
0x0057, 0x0000, 0x0059, 0x0000, 0x005b, 0x0000, 0x005d, 0x005d,
0x0000, 0x0060, 0x0000, 0x0062, 0x0000, 0x0064, 0x0000, 0x0066,
0x0066, 0x0000, 0x0069, 0x0000, 0x006b, 0x006b, 0x006b, 0x006b,
0x006b, 0x006b, 0x006b, 0x0000, 0x0073, 0x0000, 0x0075, 0x0000,
0x0077, 0x0000, 0x0000, 0x007a, 0x0000, 0x007c, 0x0000, 0x007e,
// Entry 80 - BF
0x0000, 0x0080, 0x0080, 0x0000, 0x0083, 0x0083, 0x0000, 0x0086,
0x0087, 0x0087, 0x0087, 0x0086, 0x0088, 0x0087, 0x0087, 0x0087,
0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0088,
0x0087, 0x0087, 0x0087, 0x0087, 0x0088, 0x0087, 0x0088, 0x0087,
0x0087, 0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
0x0087, 0x0087, 0x0087, 0x0086, 0x0087, 0x0087, 0x0087, 0x0087,
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0086, 0x0087, 0x0086,
// Entry C0 - FF
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0088, 0x0087,
0x0087, 0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0086, 0x0086, 0x0087,
0x0087, 0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0000,
0x00ef, 0x0000, 0x00f1, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2,
0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f1, 0x00f2, 0x00f1, 0x00f1,
// Entry 100 - 13F
0x00f2, 0x00f2, 0x00f1, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f1,
0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x0000, 0x010e,
0x0000, 0x0110, 0x0000, 0x0112, 0x0000, 0x0114, 0x0114, 0x0000,
0x0117, 0x0117, 0x0117, 0x0117, 0x0000, 0x011c, 0x0000, 0x011e,
0x0000, 0x0120, 0x0120, 0x0000, 0x0123, 0x0123, 0x0123, 0x0123,
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
// Entry 140 - 17F
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
0x0123, 0x0123, 0x0000, 0x0152, 0x0000, 0x0154, 0x0000, 0x0156,
0x0000, 0x0158, 0x0000, 0x015a, 0x0000, 0x015c, 0x015c, 0x015c,
0x0000, 0x0160, 0x0000, 0x0000, 0x0163, 0x0000, 0x0165, 0x0000,
0x0167, 0x0167, 0x0167, 0x0000, 0x016b, 0x0000, 0x016d, 0x0000,
0x016f, 0x0000, 0x0171, 0x0171, 0x0000, 0x0174, 0x0000, 0x0176,
0x0000, 0x0178, 0x0000, 0x017a, 0x0000, 0x017c, 0x0000, 0x017e,
// Entry 180 - 1BF
0x0000, 0x0000, 0x0000, 0x0182, 0x0000, 0x0184, 0x0184, 0x0184,
0x0184, 0x0000, 0x0000, 0x0000, 0x018b, 0x0000, 0x0000, 0x018e,
0x0000, 0x0000, 0x0191, 0x0000, 0x0000, 0x0000, 0x0195, 0x0000,
0x0197, 0x0000, 0x0000, 0x019a, 0x0000, 0x0000, 0x019d, 0x0000,
0x019f, 0x0000, 0x01a1, 0x0000, 0x01a3, 0x0000, 0x01a5, 0x0000,
0x01a7, 0x0000, 0x01a9, 0x0000, 0x01ab, 0x0000, 0x01ad, 0x0000,
0x01af, 0x0000, 0x01b1, 0x01b1, 0x0000, 0x01b4, 0x0000, 0x01b6,
0x0000, 0x01b8, 0x0000, 0x01ba, 0x0000, 0x01bc, 0x0000, 0x0000,
// Entry 1C0 - 1FF
0x01bf, 0x0000, 0x01c1, 0x0000, 0x01c3, 0x0000, 0x01c5, 0x0000,
0x01c7, 0x0000, 0x01c9, 0x0000, 0x01cb, 0x01cb, 0x01cb, 0x01cb,
0x0000, 0x01d0, 0x0000, 0x01d2, 0x01d2, 0x0000, 0x01d5, 0x0000,
0x01d7, 0x0000, 0x01d9, 0x0000, 0x01db, 0x0000, 0x01dd, 0x0000,
0x01df, 0x01df, 0x0000, 0x01e2, 0x0000, 0x01e4, 0x0000, 0x01e6,
0x0000, 0x01e8, 0x0000, 0x01ea, 0x0000, 0x01ec, 0x0000, 0x01ee,
0x0000, 0x01f0, 0x0000, 0x0000, 0x01f3, 0x0000, 0x01f5, 0x01f5,
0x01f5, 0x0000, 0x01f9, 0x0000, 0x01fb, 0x0000, 0x01fd, 0x0000,
// Entry 200 - 23F
0x01ff, 0x0000, 0x0000, 0x0202, 0x0000, 0x0204, 0x0204, 0x0000,
0x0207, 0x0000, 0x0209, 0x0209, 0x0000, 0x020c, 0x020c, 0x0000,
0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x0000,
0x0217, 0x0000, 0x0219, 0x0000, 0x021b, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0221, 0x0000, 0x0000, 0x0224, 0x0000, 0x0226,
0x0226, 0x0000, 0x0229, 0x0000, 0x022b, 0x022b, 0x0000, 0x0000,
0x022f, 0x022e, 0x022e, 0x0000, 0x0000, 0x0234, 0x0000, 0x0236,
0x0000, 0x0238, 0x0000, 0x0244, 0x023a, 0x0244, 0x0244, 0x0244,
// Entry 240 - 27F
0x0244, 0x0244, 0x0244, 0x0244, 0x023a, 0x0244, 0x0244, 0x0000,
0x0247, 0x0247, 0x0247, 0x0000, 0x024b, 0x0000, 0x024d, 0x0000,
0x024f, 0x024f, 0x0000, 0x0252, 0x0000, 0x0254, 0x0254, 0x0254,
0x0254, 0x0254, 0x0254, 0x0000, 0x025b, 0x0000, 0x025d, 0x0000,
0x025f, 0x0000, 0x0261, 0x0000, 0x0263, 0x0000, 0x0265, 0x0000,
0x0000, 0x0268, 0x0268, 0x0268, 0x0000, 0x026c, 0x0000, 0x026e,
0x0000, 0x0270, 0x0000, 0x0000, 0x0000, 0x0274, 0x0273, 0x0273,
0x0000, 0x0278, 0x0000, 0x027a, 0x0000, 0x027c, 0x0000, 0x0000,
// Entry 280 - 2BF
0x0000, 0x0000, 0x0281, 0x0000, 0x0000, 0x0284, 0x0000, 0x0286,
0x0286, 0x0286, 0x0286, 0x0000, 0x028b, 0x028b, 0x028b, 0x0000,
0x028f, 0x028f, 0x028f, 0x028f, 0x028f, 0x0000, 0x0295, 0x0295,
0x0295, 0x0295, 0x0000, 0x0000, 0x0000, 0x0000, 0x029d, 0x029d,
0x029d, 0x0000, 0x02a1, 0x02a1, 0x02a1, 0x02a1, 0x0000, 0x0000,
0x02a7, 0x02a7, 0x02a7, 0x02a7, 0x0000, 0x02ac, 0x0000, 0x02ae,
0x02ae, 0x0000, 0x02b1, 0x0000, 0x02b3, 0x0000, 0x02b5, 0x02b5,
0x0000, 0x0000, 0x02b9, 0x0000, 0x0000, 0x0000, 0x02bd, 0x0000,
// Entry 2C0 - 2FF
0x02bf, 0x02bf, 0x0000, 0x0000, 0x02c3, 0x0000, 0x02c5, 0x0000,
0x02c7, 0x0000, 0x02c9, 0x0000, 0x02cb, 0x0000, 0x02cd, 0x02cd,
0x0000, 0x0000, 0x02d1, 0x0000, 0x02d3, 0x02d0, 0x02d0, 0x0000,
0x0000, 0x02d8, 0x02d7, 0x02d7, 0x0000, 0x0000, 0x02dd, 0x0000,
0x02df, 0x0000, 0x02e1, 0x0000, 0x0000, 0x02e4, 0x0000, 0x02e6,
0x0000, 0x0000, 0x02e9, 0x0000, 0x02eb, 0x0000, 0x02ed, 0x0000,
0x02ef, 0x02ef, 0x0000, 0x0000, 0x02f3, 0x02f2, 0x02f2, 0x0000,
0x02f7, 0x0000, 0x02f9, 0x02f9, 0x02f9, 0x02f9, 0x02f9, 0x0000,
// Entry 300 - 33F
0x02ff, 0x0300, 0x02ff, 0x0000, 0x0303, 0x0051, 0x00e6,
} // Size: 1574 bytes
// Total table size 1574 bytes (1KiB); checksum: 895AAF0B
@@ -0,0 +1,201 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package compact
import (
"strings"
"testing"
"golang.org/x/text/internal/language"
)
var errSyntax = language.ErrSyntax
type parseTest struct {
i int // the index of this test
in string
lang, script, region string
variants, ext string
extList []string // only used when more than one extension is present
invalid bool
rewrite bool // special rewrite not handled by parseTag
changed bool // string needed to be reformatted
}
func parseTests() []parseTest {
tests := []parseTest{
{in: "root", lang: "und"},
{in: "und", lang: "und"},
{in: "en", lang: "en"},
{in: "en-US-u-va-posix", lang: "en", region: "US", ext: "u-va-posix"},
{in: "ca-ES-valencia", lang: "ca", region: "ES", variants: "valencia"},
{in: "en-US-u-rg-gbzzzz", lang: "en", region: "US", ext: "u-rg-gbzzzz"},
{in: "xy", lang: "und", invalid: true},
{in: "en-ZY", lang: "en", invalid: true},
{in: "gsw", lang: "gsw"},
{in: "sr_Latn", lang: "sr", script: "Latn"},
{in: "af-Arab", lang: "af", script: "Arab"},
{in: "nl-BE", lang: "nl", region: "BE"},
{in: "es-419", lang: "es", region: "419"},
{in: "und-001", lang: "und", region: "001"},
{in: "de-latn-be", lang: "de", script: "Latn", region: "BE"},
// Variants
{in: "de-1901", lang: "de", variants: "1901"},
// Accept with unsuppressed script.
{in: "de-Latn-1901", lang: "de", script: "Latn", variants: "1901"},
// Specialized.
{in: "sl-rozaj", lang: "sl", variants: "rozaj"},
{in: "sl-rozaj-lipaw", lang: "sl", variants: "rozaj-lipaw"},
{in: "sl-rozaj-biske", lang: "sl", variants: "rozaj-biske"},
{in: "sl-rozaj-biske-1994", lang: "sl", variants: "rozaj-biske-1994"},
{in: "sl-rozaj-1994", lang: "sl", variants: "rozaj-1994"},
// Maximum number of variants while adhering to prefix rules.
{in: "sl-rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp"},
// Sorting.
{in: "sl-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true},
{in: "sl-rozaj-biske-1994-alalc97-fonupa-fonipa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", changed: true},
{in: "nl-fonxsamp-alalc97-fonipa-fonupa", lang: "nl", variants: "alalc97-fonipa-fonupa-fonxsamp", changed: true},
// Duplicates variants are removed, but not an error.
{in: "nl-fonupa-fonupa", lang: "nl", variants: "fonupa"},
// Variants that do not have correct prefixes. We still accept these.
{in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"},
{in: "sl-rozaj-lipaw-1994", lang: "sl", variants: "rozaj-lipaw-1994"},
{in: "sl-1994-biske-rozaj-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true},
{in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"},
// Invalid variant.
{in: "de-1902", lang: "de", variants: "", invalid: true},
{in: "EN_CYRL", lang: "en", script: "Cyrl"},
// private use and extensions
{in: "x-a-b-c-d", ext: "x-a-b-c-d"},
{in: "x_A.-B-C_D", ext: "x-b-c-d", invalid: true, changed: true},
{in: "x-aa-bbbb-cccccccc-d", ext: "x-aa-bbbb-cccccccc-d"},
{in: "en-c_cc-b-bbb-a-aaa", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc"}},
{in: "en-x_cc-b-bbb-a-aaa", lang: "en", ext: "x-cc-b-bbb-a-aaa", changed: true},
{in: "en-c_cc-b-bbb-a-aaa-x-x", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc", "x-x"}},
{in: "en-v-c", lang: "en", ext: "", invalid: true},
{in: "en-v-abcdefghi", lang: "en", ext: "", invalid: true},
{in: "en-v-abc-x", lang: "en", ext: "v-abc", invalid: true},
{in: "en-v-abc-x-", lang: "en", ext: "v-abc", invalid: true},
{in: "en-v-abc-w-x-xx", lang: "en", extList: []string{"v-abc", "x-xx"}, invalid: true, changed: true},
{in: "en-v-abc-w-y-yx", lang: "en", extList: []string{"v-abc", "y-yx"}, invalid: true, changed: true},
{in: "en-v-c-abc", lang: "en", ext: "c-abc", invalid: true, changed: true},
{in: "en-v-w-abc", lang: "en", ext: "w-abc", invalid: true, changed: true},
{in: "en-v-x-abc", lang: "en", ext: "x-abc", invalid: true, changed: true},
{in: "en-v-x-a", lang: "en", ext: "x-a", invalid: true, changed: true},
{in: "en-9-aa-0-aa-z-bb-x-a", lang: "en", extList: []string{"0-aa", "9-aa", "z-bb", "x-a"}, changed: true},
{in: "en-u-c", lang: "en", ext: "", invalid: true},
{in: "en-u-co-phonebk", lang: "en", ext: "u-co-phonebk"},
{in: "en-u-co-phonebk-ca", lang: "en", ext: "u-co-phonebk", invalid: true},
{in: "en-u-nu-arabic-co-phonebk-ca", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true},
{in: "en-u-nu-arabic-co-phonebk-ca-x", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true},
{in: "en-u-nu-arabic-co-phonebk-ca-s", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true},
{in: "en-u-nu-arabic-co-phonebk-ca-a12345678", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true},
{in: "en-u-co-phonebook", lang: "en", ext: "", invalid: true},
{in: "en-u-co-phonebook-cu-xau", lang: "en", ext: "u-cu-xau", invalid: true, changed: true},
{in: "en-Cyrl-u-co-phonebk", lang: "en", script: "Cyrl", ext: "u-co-phonebk"},
{in: "en-US-u-co-phonebk", lang: "en", region: "US", ext: "u-co-phonebk"},
{in: "en-US-u-co-phonebk-cu-xau", lang: "en", region: "US", ext: "u-co-phonebk-cu-xau"},
{in: "en-scotland-u-co-phonebk", lang: "en", variants: "scotland", ext: "u-co-phonebk"},
{in: "en-u-cu-xua-co-phonebk", lang: "en", ext: "u-co-phonebk-cu-xua", changed: true},
{in: "en-u-def-abc-cu-xua-co-phonebk", lang: "en", ext: "u-abc-def-co-phonebk-cu-xua", changed: true},
{in: "en-u-def-abc", lang: "en", ext: "u-abc-def", changed: true},
{in: "en-u-cu-xua-co-phonebk-a-cd", lang: "en", extList: []string{"a-cd", "u-co-phonebk-cu-xua"}, changed: true},
// Invalid "u" extension. Drop invalid parts.
{in: "en-u-cu-co-phonebk", lang: "en", extList: []string{"u-co-phonebk"}, invalid: true, changed: true},
{in: "en-u-cu-xau-co", lang: "en", extList: []string{"u-cu-xau"}, invalid: true},
// We allow duplicate keys as the LDML spec does not explicitly prohibit it.
// TODO: Consider eliminating duplicates and returning an error.
{in: "en-u-cu-xau-co-phonebk-cu-xau", lang: "en", ext: "u-co-phonebk-cu-xau", changed: true},
{in: "en-t-en-Cyrl-NL-fonipa", lang: "en", ext: "t-en-cyrl-nl-fonipa", changed: true},
{in: "en-t-en-Cyrl-NL-fonipa-t0-abc-def", lang: "en", ext: "t-en-cyrl-nl-fonipa-t0-abc-def", changed: true},
{in: "en-t-t0-abcd", lang: "en", ext: "t-t0-abcd"},
// Not necessary to have changed here.
{in: "en-t-nl-abcd", lang: "en", ext: "t-nl", invalid: true},
{in: "en-t-nl-latn", lang: "en", ext: "t-nl-latn"},
{in: "en-t-t0-abcd-x-a", lang: "en", extList: []string{"t-t0-abcd", "x-a"}},
{in: "en_t_pt_MLt", lang: "en", ext: "t-pt-mlt", changed: true},
{in: "en-t-fr-est", lang: "en", ext: "t-fr-est", changed: false},
{in: "fr-est", lang: "et", changed: true},
{in: "fr-est-t-fr-est", lang: "et", ext: "t-fr-est", changed: true},
{in: "fr-est-Cyrl", lang: "et", script: "Cyrl", changed: true},
// invalid
{in: "", lang: "und", invalid: true},
{in: "-", lang: "und", invalid: true},
{in: "x", lang: "und", invalid: true},
{in: "x-", lang: "und", invalid: true},
{in: "x--", lang: "und", invalid: true},
{in: "a-a-b-c-d", lang: "und", invalid: true},
{in: "en-", lang: "en", invalid: true},
{in: "enne-", lang: "und", invalid: true},
{in: "en.", lang: "und", invalid: true},
{in: "en.-latn", lang: "und", invalid: true},
{in: "en.-en", lang: "en", invalid: true},
{in: "x-a-tooManyChars-c-d", ext: "x-a-c-d", invalid: true, changed: true},
{in: "a-tooManyChars-c-d", lang: "und", invalid: true},
// TODO: check key-value validity
// { in: "en-u-cu-xd", lang: "en", ext: "u-cu-xd", invalid: true },
{in: "en-t-abcd", lang: "en", invalid: true},
{in: "en-Latn-US-en", lang: "en", script: "Latn", region: "US", invalid: true},
// rewrites (more tests in TestGrandfathered)
{in: "zh-min-nan", lang: "nan"},
{in: "zh-yue", lang: "yue"},
{in: "zh-xiang", lang: "hsn", rewrite: true},
{in: "zh-guoyu", lang: "cmn", rewrite: true},
{in: "iw", lang: "iw"},
{in: "sgn-BE-FR", lang: "sfb", rewrite: true},
{in: "i-klingon", lang: "tlh", rewrite: true},
}
for i, tt := range tests {
tests[i].i = i
if tt.extList != nil {
tests[i].ext = strings.Join(tt.extList, "-")
}
if tt.ext != "" && tt.extList == nil {
tests[i].extList = []string{tt.ext}
}
}
return tests
}
// partChecks runs checks for each part by calling the function returned by f.
func partChecks(t *testing.T, f func(*parseTest) (Tag, bool)) {
for i, tt := range parseTests() {
tag, skip := f(&tt)
if skip {
continue
}
if l, _ := language.ParseBase(tt.lang); l != tag.Tag().LangID {
t.Errorf("%d: lang was %q; want %q", i, tag.Tag().LangID, l)
}
if sc, _ := language.ParseScript(tt.script); sc != tag.Tag().ScriptID {
t.Errorf("%d: script was %q; want %q", i, tag.Tag().ScriptID, sc)
}
if r, _ := language.ParseRegion(tt.region); r != tag.Tag().RegionID {
t.Errorf("%d: region was %q; want %q", i, tag.Tag().RegionID, r)
}
v := tag.Tag().Variants()
if v != "" {
v = v[1:]
}
if v != tt.variants {
t.Errorf("%d: variants was %q; want %q", i, v, tt.variants)
}
if e := strings.Join(tag.Tag().Extensions(), "-"); e != tt.ext {
t.Errorf("%d: extensions were %q; want %q", i, e, tt.ext)
}
}
}
func mk(s string) Tag {
tag, _ := language.Parse(s)
return Make(tag)
}
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,91 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package compact
var (
und = Tag{}
Und Tag = Tag{}
Afrikaans Tag = Tag{language: afIndex, locale: afIndex}
Amharic Tag = Tag{language: amIndex, locale: amIndex}
Arabic Tag = Tag{language: arIndex, locale: arIndex}
ModernStandardArabic Tag = Tag{language: ar001Index, locale: ar001Index}
Azerbaijani Tag = Tag{language: azIndex, locale: azIndex}
Bulgarian Tag = Tag{language: bgIndex, locale: bgIndex}
Bengali Tag = Tag{language: bnIndex, locale: bnIndex}
Catalan Tag = Tag{language: caIndex, locale: caIndex}
Czech Tag = Tag{language: csIndex, locale: csIndex}
Danish Tag = Tag{language: daIndex, locale: daIndex}
German Tag = Tag{language: deIndex, locale: deIndex}
Greek Tag = Tag{language: elIndex, locale: elIndex}
English Tag = Tag{language: enIndex, locale: enIndex}
AmericanEnglish Tag = Tag{language: enUSIndex, locale: enUSIndex}
BritishEnglish Tag = Tag{language: enGBIndex, locale: enGBIndex}
Spanish Tag = Tag{language: esIndex, locale: esIndex}
EuropeanSpanish Tag = Tag{language: esESIndex, locale: esESIndex}
LatinAmericanSpanish Tag = Tag{language: es419Index, locale: es419Index}
Estonian Tag = Tag{language: etIndex, locale: etIndex}
Persian Tag = Tag{language: faIndex, locale: faIndex}
Finnish Tag = Tag{language: fiIndex, locale: fiIndex}
Filipino Tag = Tag{language: filIndex, locale: filIndex}
French Tag = Tag{language: frIndex, locale: frIndex}
CanadianFrench Tag = Tag{language: frCAIndex, locale: frCAIndex}
Gujarati Tag = Tag{language: guIndex, locale: guIndex}
Hebrew Tag = Tag{language: heIndex, locale: heIndex}
Hindi Tag = Tag{language: hiIndex, locale: hiIndex}
Croatian Tag = Tag{language: hrIndex, locale: hrIndex}
Hungarian Tag = Tag{language: huIndex, locale: huIndex}
Armenian Tag = Tag{language: hyIndex, locale: hyIndex}
Indonesian Tag = Tag{language: idIndex, locale: idIndex}
Icelandic Tag = Tag{language: isIndex, locale: isIndex}
Italian Tag = Tag{language: itIndex, locale: itIndex}
Japanese Tag = Tag{language: jaIndex, locale: jaIndex}
Georgian Tag = Tag{language: kaIndex, locale: kaIndex}
Kazakh Tag = Tag{language: kkIndex, locale: kkIndex}
Khmer Tag = Tag{language: kmIndex, locale: kmIndex}
Kannada Tag = Tag{language: knIndex, locale: knIndex}
Korean Tag = Tag{language: koIndex, locale: koIndex}
Kirghiz Tag = Tag{language: kyIndex, locale: kyIndex}
Lao Tag = Tag{language: loIndex, locale: loIndex}
Lithuanian Tag = Tag{language: ltIndex, locale: ltIndex}
Latvian Tag = Tag{language: lvIndex, locale: lvIndex}
Macedonian Tag = Tag{language: mkIndex, locale: mkIndex}
Malayalam Tag = Tag{language: mlIndex, locale: mlIndex}
Mongolian Tag = Tag{language: mnIndex, locale: mnIndex}
Marathi Tag = Tag{language: mrIndex, locale: mrIndex}
Malay Tag = Tag{language: msIndex, locale: msIndex}
Burmese Tag = Tag{language: myIndex, locale: myIndex}
Nepali Tag = Tag{language: neIndex, locale: neIndex}
Dutch Tag = Tag{language: nlIndex, locale: nlIndex}
Norwegian Tag = Tag{language: noIndex, locale: noIndex}
Punjabi Tag = Tag{language: paIndex, locale: paIndex}
Polish Tag = Tag{language: plIndex, locale: plIndex}
Portuguese Tag = Tag{language: ptIndex, locale: ptIndex}
BrazilianPortuguese Tag = Tag{language: ptBRIndex, locale: ptBRIndex}
EuropeanPortuguese Tag = Tag{language: ptPTIndex, locale: ptPTIndex}
Romanian Tag = Tag{language: roIndex, locale: roIndex}
Russian Tag = Tag{language: ruIndex, locale: ruIndex}
Sinhala Tag = Tag{language: siIndex, locale: siIndex}
Slovak Tag = Tag{language: skIndex, locale: skIndex}
Slovenian Tag = Tag{language: slIndex, locale: slIndex}
Albanian Tag = Tag{language: sqIndex, locale: sqIndex}
Serbian Tag = Tag{language: srIndex, locale: srIndex}
SerbianLatin Tag = Tag{language: srLatnIndex, locale: srLatnIndex}
Swedish Tag = Tag{language: svIndex, locale: svIndex}
Swahili Tag = Tag{language: swIndex, locale: swIndex}
Tamil Tag = Tag{language: taIndex, locale: taIndex}
Telugu Tag = Tag{language: teIndex, locale: teIndex}
Thai Tag = Tag{language: thIndex, locale: thIndex}
Turkish Tag = Tag{language: trIndex, locale: trIndex}
Ukrainian Tag = Tag{language: ukIndex, locale: ukIndex}
Urdu Tag = Tag{language: urIndex, locale: urIndex}
Uzbek Tag = Tag{language: uzIndex, locale: uzIndex}
Vietnamese Tag = Tag{language: viIndex, locale: viIndex}
Chinese Tag = Tag{language: zhIndex, locale: zhIndex}
SimplifiedChinese Tag = Tag{language: zhHansIndex, locale: zhHansIndex}
TraditionalChinese Tag = Tag{language: zhHantIndex, locale: zhHantIndex}
Zulu Tag = Tag{language: zuIndex, locale: zuIndex}
)
@@ -0,0 +1,167 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import (
"sort"
"strings"
)
// A Builder allows constructing a Tag from individual components.
// Its main user is Compose in the top-level language package.
type Builder struct {
Tag Tag
private string // the x extension
variants []string
extensions []string
}
// Make returns a new Tag from the current settings.
func (b *Builder) Make() Tag {
t := b.Tag
if len(b.extensions) > 0 || len(b.variants) > 0 {
sort.Sort(sortVariants(b.variants))
sort.Strings(b.extensions)
if b.private != "" {
b.extensions = append(b.extensions, b.private)
}
n := maxCoreSize + tokenLen(b.variants...) + tokenLen(b.extensions...)
buf := make([]byte, n)
p := t.genCoreBytes(buf)
t.pVariant = byte(p)
p += appendTokens(buf[p:], b.variants...)
t.pExt = uint16(p)
p += appendTokens(buf[p:], b.extensions...)
t.str = string(buf[:p])
// We may not always need to remake the string, but when or when not
// to do so is rather tricky.
scan := makeScanner(buf[:p])
t, _ = parse(&scan, "")
return t
} else if b.private != "" {
t.str = b.private
t.RemakeString()
}
return t
}
// SetTag copies all the settings from a given Tag. Any previously set values
// are discarded.
func (b *Builder) SetTag(t Tag) {
b.Tag.LangID = t.LangID
b.Tag.RegionID = t.RegionID
b.Tag.ScriptID = t.ScriptID
// TODO: optimize
b.variants = b.variants[:0]
if variants := t.Variants(); variants != "" {
for _, vr := range strings.Split(variants[1:], "-") {
b.variants = append(b.variants, vr)
}
}
b.extensions, b.private = b.extensions[:0], ""
for _, e := range t.Extensions() {
b.AddExt(e)
}
}
// AddExt adds extension e to the tag. e must be a valid extension as returned
// by Tag.Extension. If the extension already exists, it will be discarded,
// except for a -u extension, where non-existing key-type pairs will added.
func (b *Builder) AddExt(e string) {
if e[0] == 'x' {
if b.private == "" {
b.private = e
}
return
}
for i, s := range b.extensions {
if s[0] == e[0] {
if e[0] == 'u' {
b.extensions[i] += e[1:]
}
return
}
}
b.extensions = append(b.extensions, e)
}
// SetExt sets the extension e to the tag. e must be a valid extension as
// returned by Tag.Extension. If the extension already exists, it will be
// overwritten, except for a -u extension, where the individual key-type pairs
// will be set.
func (b *Builder) SetExt(e string) {
if e[0] == 'x' {
b.private = e
return
}
for i, s := range b.extensions {
if s[0] == e[0] {
if e[0] == 'u' {
b.extensions[i] = e + s[1:]
} else {
b.extensions[i] = e
}
return
}
}
b.extensions = append(b.extensions, e)
}
// AddVariant adds any number of variants.
func (b *Builder) AddVariant(v ...string) {
for _, v := range v {
if v != "" {
b.variants = append(b.variants, v)
}
}
}
// ClearVariants removes any variants previously added, including those
// copied from a Tag in SetTag.
func (b *Builder) ClearVariants() {
b.variants = b.variants[:0]
}
// ClearExtensions removes any extensions previously added, including those
// copied from a Tag in SetTag.
func (b *Builder) ClearExtensions() {
b.private = ""
b.extensions = b.extensions[:0]
}
func tokenLen(token ...string) (n int) {
for _, t := range token {
n += len(t) + 1
}
return
}
func appendTokens(b []byte, token ...string) int {
p := 0
for _, t := range token {
b[p] = '-'
copy(b[p+1:], t)
p += 1 + len(t)
}
return p
}
type sortVariants []string
func (s sortVariants) Len() int {
return len(s)
}
func (s sortVariants) Swap(i, j int) {
s[j], s[i] = s[i], s[j]
}
func (s sortVariants) Less(i, j int) bool {
return variantIndex[s[i]] < variantIndex[s[j]]
}
@@ -0,0 +1,67 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import (
"strings"
"testing"
)
func parseBase(s string) Language {
if s == "" {
return 0
}
return MustParseBase(s)
}
func parseScript(s string) Script {
if s == "" {
return 0
}
return MustParseScript(s)
}
func parseRegion(s string) Region {
if s == "" {
return 0
}
return MustParseRegion(s)
}
func TestBuilder(t *testing.T) {
partChecks(t, func(t *testing.T, tt *parseTest) (id Tag, skip bool) {
tag := Make(tt.in)
b := Builder{}
b.SetTag(Tag{
LangID: parseBase(tt.lang),
ScriptID: parseScript(tt.script),
RegionID: parseRegion(tt.region),
})
if tt.variants != "" {
b.AddVariant(strings.Split(tt.variants, "-")...)
}
for _, e := range tag.Extensions() {
b.AddExt(e)
}
got := b.Make()
if got != tag {
t.Errorf("%s: got %v; want %v", tt.in, got, tag)
}
return got, false
})
}
func TestSetTag(t *testing.T) {
partChecks(t, func(t *testing.T, tt *parseTest) (id Tag, skip bool) {
tag := Make(tt.in)
b := Builder{}
b.SetTag(tag)
got := b.Make()
if got != tag {
t.Errorf("%s: got %v; want %v", tt.in, got, tag)
}
return got, false
})
}
@@ -0,0 +1,28 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
// BaseLanguages returns the list of all supported base languages. It generates
// the list by traversing the internal structures.
func BaseLanguages() []Language {
base := make([]Language, 0, NumLanguages)
for i := 0; i < langNoIndexOffset; i++ {
// We included "und" already for the value 0.
if i != nonCanonicalUnd {
base = append(base, Language(i))
}
}
i := langNoIndexOffset
for _, v := range langNoIndex {
for k := 0; k < 8; k++ {
if v&1 == 1 {
base = append(base, Language(i))
}
v >>= 1
i++
}
}
return base
}
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,20 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build ignore
package main
// This file contains code common to the maketables.go and the package code.
// AliasType is the type of an alias in AliasMap.
type AliasType int8
const (
Deprecated AliasType = iota
Macro
Legacy
AliasTypeUnknown AliasType = -1
)
@@ -0,0 +1,627 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run gen.go gen_common.go -output tables.go
package language // import "golang.org/x/text/internal/language"
// TODO: Remove above NOTE after:
// - verifying that tables are dropped correctly (most notably matcher tables).
import (
"errors"
"fmt"
"strings"
)
const (
// maxCoreSize is the maximum size of a BCP 47 tag without variants and
// extensions. Equals max lang (3) + script (4) + max reg (3) + 2 dashes.
maxCoreSize = 12
// max99thPercentileSize is a somewhat arbitrary buffer size that presumably
// is large enough to hold at least 99% of the BCP 47 tags.
max99thPercentileSize = 32
// maxSimpleUExtensionSize is the maximum size of a -u extension with one
// key-type pair. Equals len("-u-") + key (2) + dash + max value (8).
maxSimpleUExtensionSize = 14
)
// Tag represents a BCP 47 language tag. It is used to specify an instance of a
// specific language or locale. All language tag values are guaranteed to be
// well-formed. The zero value of Tag is Und.
type Tag struct {
// TODO: the following fields have the form TagTypeID. This name is chosen
// to allow refactoring the public package without conflicting with its
// Base, Script, and Region methods. Once the transition is fully completed
// the ID can be stripped from the name.
LangID Language
RegionID Region
// TODO: we will soon run out of positions for ScriptID. Idea: instead of
// storing lang, region, and ScriptID codes, store only the compact index and
// have a lookup table from this code to its expansion. This greatly speeds
// up table lookup, speed up common variant cases.
// This will also immediately free up 3 extra bytes. Also, the pVariant
// field can now be moved to the lookup table, as the compact index uniquely
// determines the offset of a possible variant.
ScriptID Script
pVariant byte // offset in str, includes preceding '-'
pExt uint16 // offset of first extension, includes preceding '-'
// str is the string representation of the Tag. It will only be used if the
// tag has variants or extensions.
str string
}
// Make is a convenience wrapper for Parse that omits the error.
// In case of an error, a sensible default is returned.
func Make(s string) Tag {
t, _ := Parse(s)
return t
}
// Raw returns the raw base language, script and region, without making an
// attempt to infer their values.
// TODO: consider removing
func (t Tag) Raw() (b Language, s Script, r Region) {
return t.LangID, t.ScriptID, t.RegionID
}
// equalTags compares language, script and region subtags only.
func (t Tag) equalTags(a Tag) bool {
return t.LangID == a.LangID && t.ScriptID == a.ScriptID && t.RegionID == a.RegionID
}
// IsRoot returns true if t is equal to language "und".
func (t Tag) IsRoot() bool {
if int(t.pVariant) < len(t.str) {
return false
}
return t.equalTags(Und)
}
// IsPrivateUse reports whether the Tag consists solely of an IsPrivateUse use
// tag.
func (t Tag) IsPrivateUse() bool {
return t.str != "" && t.pVariant == 0
}
// RemakeString is used to update t.str in case lang, script or region changed.
// It is assumed that pExt and pVariant still point to the start of the
// respective parts.
func (t *Tag) RemakeString() {
if t.str == "" {
return
}
extra := t.str[t.pVariant:]
if t.pVariant > 0 {
extra = extra[1:]
}
if t.equalTags(Und) && strings.HasPrefix(extra, "x-") {
t.str = extra
t.pVariant = 0
t.pExt = 0
return
}
var buf [max99thPercentileSize]byte // avoid extra memory allocation in most cases.
b := buf[:t.genCoreBytes(buf[:])]
if extra != "" {
diff := len(b) - int(t.pVariant)
b = append(b, '-')
b = append(b, extra...)
t.pVariant = uint8(int(t.pVariant) + diff)
t.pExt = uint16(int(t.pExt) + diff)
} else {
t.pVariant = uint8(len(b))
t.pExt = uint16(len(b))
}
t.str = string(b)
}
// genCoreBytes writes a string for the base languages, script and region tags
// to the given buffer and returns the number of bytes written. It will never
// write more than maxCoreSize bytes.
func (t *Tag) genCoreBytes(buf []byte) int {
n := t.LangID.StringToBuf(buf[:])
if t.ScriptID != 0 {
n += copy(buf[n:], "-")
n += copy(buf[n:], t.ScriptID.String())
}
if t.RegionID != 0 {
n += copy(buf[n:], "-")
n += copy(buf[n:], t.RegionID.String())
}
return n
}
// String returns the canonical string representation of the language tag.
func (t Tag) String() string {
if t.str != "" {
return t.str
}
if t.ScriptID == 0 && t.RegionID == 0 {
return t.LangID.String()
}
buf := [maxCoreSize]byte{}
return string(buf[:t.genCoreBytes(buf[:])])
}
// MarshalText implements encoding.TextMarshaler.
func (t Tag) MarshalText() (text []byte, err error) {
if t.str != "" {
text = append(text, t.str...)
} else if t.ScriptID == 0 && t.RegionID == 0 {
text = append(text, t.LangID.String()...)
} else {
buf := [maxCoreSize]byte{}
text = buf[:t.genCoreBytes(buf[:])]
}
return text, nil
}
// UnmarshalText implements encoding.TextUnmarshaler.
func (t *Tag) UnmarshalText(text []byte) error {
tag, err := Parse(string(text))
*t = tag
return err
}
// Variants returns the part of the tag holding all variants or the empty string
// if there are no variants defined.
func (t Tag) Variants() string {
if t.pVariant == 0 {
return ""
}
return t.str[t.pVariant:t.pExt]
}
// VariantOrPrivateUseTags returns variants or private use tags.
func (t Tag) VariantOrPrivateUseTags() string {
if t.pExt > 0 {
return t.str[t.pVariant:t.pExt]
}
return t.str[t.pVariant:]
}
// HasString reports whether this tag defines more than just the raw
// components.
func (t Tag) HasString() bool {
return t.str != ""
}
// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
// specific language are substituted with fields from the parent language.
// The parent for a language may change for newer versions of CLDR.
func (t Tag) Parent() Tag {
if t.str != "" {
// Strip the variants and extensions.
b, s, r := t.Raw()
t = Tag{LangID: b, ScriptID: s, RegionID: r}
if t.RegionID == 0 && t.ScriptID != 0 && t.LangID != 0 {
base, _ := addTags(Tag{LangID: t.LangID})
if base.ScriptID == t.ScriptID {
return Tag{LangID: t.LangID}
}
}
return t
}
if t.LangID != 0 {
if t.RegionID != 0 {
maxScript := t.ScriptID
if maxScript == 0 {
max, _ := addTags(t)
maxScript = max.ScriptID
}
for i := range parents {
if Language(parents[i].lang) == t.LangID && Script(parents[i].maxScript) == maxScript {
for _, r := range parents[i].fromRegion {
if Region(r) == t.RegionID {
return Tag{
LangID: t.LangID,
ScriptID: Script(parents[i].script),
RegionID: Region(parents[i].toRegion),
}
}
}
}
}
// Strip the script if it is the default one.
base, _ := addTags(Tag{LangID: t.LangID})
if base.ScriptID != maxScript {
return Tag{LangID: t.LangID, ScriptID: maxScript}
}
return Tag{LangID: t.LangID}
} else if t.ScriptID != 0 {
// The parent for an base-script pair with a non-default script is
// "und" instead of the base language.
base, _ := addTags(Tag{LangID: t.LangID})
if base.ScriptID != t.ScriptID {
return Und
}
return Tag{LangID: t.LangID}
}
}
return Und
}
// ParseExtension parses s as an extension and returns it on success.
func ParseExtension(s string) (ext string, err error) {
defer func() {
if recover() != nil {
ext = ""
err = ErrSyntax
}
}()
scan := makeScannerString(s)
var end int
if n := len(scan.token); n != 1 {
return "", ErrSyntax
}
scan.toLower(0, len(scan.b))
end = parseExtension(&scan)
if end != len(s) {
return "", ErrSyntax
}
return string(scan.b), nil
}
// HasVariants reports whether t has variants.
func (t Tag) HasVariants() bool {
return uint16(t.pVariant) < t.pExt
}
// HasExtensions reports whether t has extensions.
func (t Tag) HasExtensions() bool {
return int(t.pExt) < len(t.str)
}
// Extension returns the extension of type x for tag t. It will return
// false for ok if t does not have the requested extension. The returned
// extension will be invalid in this case.
func (t Tag) Extension(x byte) (ext string, ok bool) {
for i := int(t.pExt); i < len(t.str)-1; {
var ext string
i, ext = getExtension(t.str, i)
if ext[0] == x {
return ext, true
}
}
return "", false
}
// Extensions returns all extensions of t.
func (t Tag) Extensions() []string {
e := []string{}
for i := int(t.pExt); i < len(t.str)-1; {
var ext string
i, ext = getExtension(t.str, i)
e = append(e, ext)
}
return e
}
// TypeForKey returns the type associated with the given key, where key and type
// are of the allowed values defined for the Unicode locale extension ('u') in
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// TypeForKey will traverse the inheritance chain to get the correct value.
//
// If there are multiple types associated with a key, only the first will be
// returned. If there is no type associated with a key, it returns the empty
// string.
func (t Tag) TypeForKey(key string) string {
if _, start, end, _ := t.findTypeForKey(key); end != start {
s := t.str[start:end]
if p := strings.IndexByte(s, '-'); p >= 0 {
s = s[:p]
}
return s
}
return ""
}
var (
errPrivateUse = errors.New("cannot set a key on a private use tag")
errInvalidArguments = errors.New("invalid key or type")
)
// SetTypeForKey returns a new Tag with the key set to type, where key and type
// are of the allowed values defined for the Unicode locale extension ('u') in
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// An empty value removes an existing pair with the same key.
func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
if t.IsPrivateUse() {
return t, errPrivateUse
}
if len(key) != 2 {
return t, errInvalidArguments
}
// Remove the setting if value is "".
if value == "" {
start, sep, end, _ := t.findTypeForKey(key)
if start != sep {
// Remove a possible empty extension.
switch {
case t.str[start-2] != '-': // has previous elements.
case end == len(t.str), // end of string
end+2 < len(t.str) && t.str[end+2] == '-': // end of extension
start -= 2
}
if start == int(t.pVariant) && end == len(t.str) {
t.str = ""
t.pVariant, t.pExt = 0, 0
} else {
t.str = fmt.Sprintf("%s%s", t.str[:start], t.str[end:])
}
}
return t, nil
}
if len(value) < 3 || len(value) > 8 {
return t, errInvalidArguments
}
var (
buf [maxCoreSize + maxSimpleUExtensionSize]byte
uStart int // start of the -u extension.
)
// Generate the tag string if needed.
if t.str == "" {
uStart = t.genCoreBytes(buf[:])
buf[uStart] = '-'
uStart++
}
// Create new key-type pair and parse it to verify.
b := buf[uStart:]
copy(b, "u-")
copy(b[2:], key)
b[4] = '-'
b = b[:5+copy(b[5:], value)]
scan := makeScanner(b)
if parseExtensions(&scan); scan.err != nil {
return t, scan.err
}
// Assemble the replacement string.
if t.str == "" {
t.pVariant, t.pExt = byte(uStart-1), uint16(uStart-1)
t.str = string(buf[:uStart+len(b)])
} else {
s := t.str
start, sep, end, hasExt := t.findTypeForKey(key)
if start == sep {
if hasExt {
b = b[2:]
}
t.str = fmt.Sprintf("%s-%s%s", s[:sep], b, s[end:])
} else {
t.str = fmt.Sprintf("%s-%s%s", s[:start+3], value, s[end:])
}
}
return t, nil
}
// findTypeForKey returns the start and end position for the type corresponding
// to key or the point at which to insert the key-value pair if the type
// wasn't found. The hasExt return value reports whether an -u extension was present.
// Note: the extensions are typically very small and are likely to contain
// only one key-type pair.
func (t Tag) findTypeForKey(key string) (start, sep, end int, hasExt bool) {
p := int(t.pExt)
if len(key) != 2 || p == len(t.str) || p == 0 {
return p, p, p, false
}
s := t.str
// Find the correct extension.
for p++; s[p] != 'u'; p++ {
if s[p] > 'u' {
p--
return p, p, p, false
}
if p = nextExtension(s, p); p == len(s) {
return len(s), len(s), len(s), false
}
}
// Proceed to the hyphen following the extension name.
p++
// curKey is the key currently being processed.
curKey := ""
// Iterate over keys until we get the end of a section.
for {
end = p
for p++; p < len(s) && s[p] != '-'; p++ {
}
n := p - end - 1
if n <= 2 && curKey == key {
if sep < end {
sep++
}
return start, sep, end, true
}
switch n {
case 0, // invalid string
1: // next extension
return end, end, end, true
case 2:
// next key
curKey = s[end+1 : p]
if curKey > key {
return end, end, end, true
}
start = end
sep = p
}
}
}
// ParseBase parses a 2- or 3-letter ISO 639 code.
// It returns a ValueError if s is a well-formed but unknown language identifier
// or another error if another error occurred.
func ParseBase(s string) (l Language, err error) {
defer func() {
if recover() != nil {
l = 0
err = ErrSyntax
}
}()
if n := len(s); n < 2 || 3 < n {
return 0, ErrSyntax
}
var buf [3]byte
return getLangID(buf[:copy(buf[:], s)])
}
// ParseScript parses a 4-letter ISO 15924 code.
// It returns a ValueError if s is a well-formed but unknown script identifier
// or another error if another error occurred.
func ParseScript(s string) (scr Script, err error) {
defer func() {
if recover() != nil {
scr = 0
err = ErrSyntax
}
}()
if len(s) != 4 {
return 0, ErrSyntax
}
var buf [4]byte
return getScriptID(script, buf[:copy(buf[:], s)])
}
// EncodeM49 returns the Region for the given UN M.49 code.
// It returns an error if r is not a valid code.
func EncodeM49(r int) (Region, error) {
return getRegionM49(r)
}
// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
// It returns a ValueError if s is a well-formed but unknown region identifier
// or another error if another error occurred.
func ParseRegion(s string) (r Region, err error) {
defer func() {
if recover() != nil {
r = 0
err = ErrSyntax
}
}()
if n := len(s); n < 2 || 3 < n {
return 0, ErrSyntax
}
var buf [3]byte
return getRegionID(buf[:copy(buf[:], s)])
}
// IsCountry returns whether this region is a country or autonomous area. This
// includes non-standard definitions from CLDR.
func (r Region) IsCountry() bool {
if r == 0 || r.IsGroup() || r.IsPrivateUse() && r != _XK {
return false
}
return true
}
// IsGroup returns whether this region defines a collection of regions. This
// includes non-standard definitions from CLDR.
func (r Region) IsGroup() bool {
if r == 0 {
return false
}
return int(regionInclusion[r]) < len(regionContainment)
}
// Contains returns whether Region c is contained by Region r. It returns true
// if c == r.
func (r Region) Contains(c Region) bool {
if r == c {
return true
}
g := regionInclusion[r]
if g >= nRegionGroups {
return false
}
m := regionContainment[g]
d := regionInclusion[c]
b := regionInclusionBits[d]
// A contained country may belong to multiple disjoint groups. Matching any
// of these indicates containment. If the contained region is a group, it
// must strictly be a subset.
if d >= nRegionGroups {
return b&m != 0
}
return b&^m == 0
}
var errNoTLD = errors.New("language: region is not a valid ccTLD")
// TLD returns the country code top-level domain (ccTLD). UK is returned for GB.
// In all other cases it returns either the region itself or an error.
//
// This method may return an error for a region for which there exists a
// canonical form with a ccTLD. To get that ccTLD canonicalize r first. The
// region will already be canonicalized it was obtained from a Tag that was
// obtained using any of the default methods.
func (r Region) TLD() (Region, error) {
// See http://en.wikipedia.org/wiki/Country_code_top-level_domain for the
// difference between ISO 3166-1 and IANA ccTLD.
if r == _GB {
r = _UK
}
if (r.typ() & ccTLD) == 0 {
return 0, errNoTLD
}
return r, nil
}
// Canonicalize returns the region or a possible replacement if the region is
// deprecated. It will not return a replacement for deprecated regions that
// are split into multiple regions.
func (r Region) Canonicalize() Region {
if cr := normRegion(r); cr != 0 {
return cr
}
return r
}
// Variant represents a registered variant of a language as defined by BCP 47.
type Variant struct {
ID uint8
str string
}
// ParseVariant parses and returns a Variant. An error is returned if s is not
// a valid variant.
func ParseVariant(s string) (v Variant, err error) {
defer func() {
if recover() != nil {
v = Variant{}
err = ErrSyntax
}
}()
s = strings.ToLower(s)
if id, ok := variantIndex[s]; ok {
return Variant{id, s}, nil
}
return Variant{}, NewValueError([]byte(s))
}
// String returns the string representation of the variant.
func (v Variant) String() string {
return v.str
}
@@ -0,0 +1,746 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import (
"reflect"
"testing"
"golang.org/x/text/internal/testtext"
)
func TestTagSize(t *testing.T) {
id := Tag{}
typ := reflect.TypeOf(id)
if typ.Size() > 32 {
t.Errorf("size of Tag was %d; want <= 32", typ.Size())
}
}
func TestIsRoot(t *testing.T) {
loc := Tag{}
if !loc.IsRoot() {
t.Errorf("unspecified should be root.")
}
for i, tt := range parseTests() {
loc, _ := Parse(tt.in)
undef := tt.lang == "und" && tt.script == "" && tt.region == "" && tt.ext == ""
if loc.IsRoot() != undef {
t.Errorf("%d: was %v; want %v", i, loc.IsRoot(), undef)
}
}
}
func TestEquality(t *testing.T) {
for i, tt := range parseTests() {
s := tt.in
tag := Make(s)
t1 := Make(tag.String())
if tag != t1 {
t.Errorf("%d:%s: equality test 1 failed\n got: %#v\nwant: %#v)", i, s, t1, tag)
}
}
}
func TestMakeString(t *testing.T) {
tests := []struct{ in, out string }{
{"und", "und"},
{"und", "und-CW"},
{"nl", "nl-NL"},
{"de-1901", "nl-1901"},
{"de-1901", "de-Arab-1901"},
{"x-a-b", "de-Arab-x-a-b"},
{"x-a-b", "x-a-b"},
}
for i, tt := range tests {
id, _ := Parse(tt.in)
mod, _ := Parse(tt.out)
id.setTagsFrom(mod)
for j := 0; j < 2; j++ {
id.RemakeString()
if str := id.String(); str != tt.out {
t.Errorf("%d:%d: found %s; want %s", i, j, id.String(), tt.out)
}
}
// The bytes to string conversion as used in remakeString
// occasionally measures as more than one alloc, breaking this test.
// To alleviate this we set the number of runs to more than 1.
if n := testtext.AllocsPerRun(8, id.RemakeString); n > 1 {
t.Errorf("%d: # allocs got %.1f; want <= 1", i, n)
}
}
}
func TestMarshal(t *testing.T) {
testCases := []string{
// TODO: these values will change with each CLDR update. This issue
// will be solved if we decide to fix the indexes.
"und",
"ca-ES-valencia",
"ca-ES-valencia-u-va-posix",
"ca-ES-valencia-u-co-phonebk",
"ca-ES-valencia-u-co-phonebk-va-posix",
"x-klingon",
"en-US",
"en-US-u-va-posix",
"en",
"en-u-co-phonebk",
"en-001",
"sh",
}
for _, tc := range testCases {
var tag Tag
err := tag.UnmarshalText([]byte(tc))
if err != nil {
t.Errorf("UnmarshalText(%q): unexpected error: %v", tc, err)
}
b, err := tag.MarshalText()
if err != nil {
t.Errorf("MarshalText(%q): unexpected error: %v", tc, err)
}
if got := string(b); got != tc {
t.Errorf("%s: got %q; want %q", tc, got, tc)
}
}
}
func TestParseBase(t *testing.T) {
tests := []struct {
in string
out string
ok bool
}{
{"en", "en", true},
{"EN", "en", true},
{"nld", "nl", true},
{"dut", "dut", true}, // bibliographic
{"aaj", "und", false}, // unknown
{"qaa", "qaa", true},
{"a", "und", false},
{"", "und", false},
{"aaaa", "und", false},
}
for i, tt := range tests {
x, err := ParseBase(tt.in)
if x.String() != tt.out || err == nil != tt.ok {
t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
}
if y, _, _ := Make(tt.out).Raw(); x != y {
t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x, y)
}
}
}
func TestParseScript(t *testing.T) {
tests := []struct {
in string
out string
ok bool
}{
{"Latn", "Latn", true},
{"zzzz", "Zzzz", true},
{"zyyy", "Zyyy", true},
{"Latm", "Zzzz", false},
{"Zzz", "Zzzz", false},
{"", "Zzzz", false},
{"Zzzxx", "Zzzz", false},
}
for i, tt := range tests {
x, err := ParseScript(tt.in)
if x.String() != tt.out || err == nil != tt.ok {
t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
}
if err == nil {
if _, y, _ := Make("und-" + tt.out).Raw(); x != y {
t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x, y)
}
}
}
}
func TestEncodeM49(t *testing.T) {
tests := []struct {
m49 int
code string
ok bool
}{
{1, "001", true},
{840, "US", true},
{899, "ZZ", false},
}
for i, tt := range tests {
if r, err := EncodeM49(tt.m49); r.String() != tt.code || err == nil != tt.ok {
t.Errorf("%d:%d: was %s, %v; want %s, %v", i, tt.m49, r, err == nil, tt.code, tt.ok)
}
}
for i := 1; i <= 1000; i++ {
if r, err := EncodeM49(i); err == nil && r.M49() == 0 {
t.Errorf("%d has no error, but maps to undefined region", i)
}
}
}
func TestParseRegion(t *testing.T) {
tests := []struct {
in string
out string
ok bool
}{
{"001", "001", true},
{"840", "US", true},
{"899", "ZZ", false},
{"USA", "US", true},
{"US", "US", true},
{"BC", "ZZ", false},
{"C", "ZZ", false},
{"CCCC", "ZZ", false},
{"01", "ZZ", false},
}
for i, tt := range tests {
r, err := ParseRegion(tt.in)
if r.String() != tt.out || err == nil != tt.ok {
t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, r, err == nil, tt.out, tt.ok)
}
if err == nil {
if _, _, y := Make("und-" + tt.out).Raw(); r != y {
t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, r, y)
}
}
}
}
func TestIsCountry(t *testing.T) {
tests := []struct {
reg string
country bool
}{
{"US", true},
{"001", false},
{"958", false},
{"419", false},
{"203", true},
{"020", true},
{"900", false},
{"999", false},
{"QO", false},
{"EU", false},
{"AA", false},
{"XK", true},
}
for i, tt := range tests {
r, _ := getRegionID([]byte(tt.reg))
if r.IsCountry() != tt.country {
t.Errorf("%d: IsCountry(%s) was %v; want %v", i, tt.reg, r.IsCountry(), tt.country)
}
}
}
func TestIsGroup(t *testing.T) {
tests := []struct {
reg string
group bool
}{
{"US", false},
{"001", true},
{"958", false},
{"419", true},
{"203", false},
{"020", false},
{"900", false},
{"999", false},
{"QO", true},
{"EU", true},
{"AA", false},
{"XK", false},
}
for i, tt := range tests {
r, _ := getRegionID([]byte(tt.reg))
if r.IsGroup() != tt.group {
t.Errorf("%d: IsGroup(%s) was %v; want %v", i, tt.reg, r.IsGroup(), tt.group)
}
}
}
func TestContains(t *testing.T) {
tests := []struct {
enclosing, contained string
contains bool
}{
// A region contains itself.
{"US", "US", true},
{"001", "001", true},
// Direct containment.
{"001", "002", true},
{"039", "XK", true},
{"150", "XK", true},
{"EU", "AT", true},
{"QO", "AQ", true},
// Indirect containemnt.
{"001", "US", true},
{"001", "419", true},
{"001", "013", true},
// No containment.
{"US", "001", false},
{"155", "EU", false},
}
for i, tt := range tests {
enc, _ := getRegionID([]byte(tt.enclosing))
con, _ := getRegionID([]byte(tt.contained))
r := enc
if got := r.Contains(con); got != tt.contains {
t.Errorf("%d: %s.Contains(%s) was %v; want %v", i, tt.enclosing, tt.contained, got, tt.contains)
}
}
}
func TestRegionCanonicalize(t *testing.T) {
for i, tt := range []struct{ in, out string }{
{"UK", "GB"},
{"TP", "TL"},
{"QU", "EU"},
{"SU", "SU"},
{"VD", "VN"},
{"DD", "DE"},
} {
r := MustParseRegion(tt.in)
want := MustParseRegion(tt.out)
if got := r.Canonicalize(); got != want {
t.Errorf("%d: got %v; want %v", i, got, want)
}
}
}
func TestRegionTLD(t *testing.T) {
for _, tt := range []struct {
in, out string
ok bool
}{
{"EH", "EH", true},
{"FR", "FR", true},
{"TL", "TL", true},
// In ccTLD before in ISO.
{"GG", "GG", true},
// Non-standard assignment of ccTLD to ISO code.
{"GB", "UK", true},
// Exceptionally reserved in ISO and valid ccTLD.
{"UK", "UK", true},
{"AC", "AC", true},
{"EU", "EU", true},
{"SU", "SU", true},
// Exceptionally reserved in ISO and invalid ccTLD.
{"CP", "ZZ", false},
{"DG", "ZZ", false},
{"EA", "ZZ", false},
{"FX", "ZZ", false},
{"IC", "ZZ", false},
{"TA", "ZZ", false},
// Transitionally reserved in ISO (e.g. deprecated) but valid ccTLD as
// it is still being phased out.
{"AN", "AN", true},
{"TP", "TP", true},
// Transitionally reserved in ISO (e.g. deprecated) and invalid ccTLD.
// Defined in package language as it has a mapping in CLDR.
{"BU", "ZZ", false},
{"CS", "ZZ", false},
{"NT", "ZZ", false},
{"YU", "ZZ", false},
{"ZR", "ZZ", false},
// Not defined in package: SF.
// Indeterminately reserved in ISO.
// Defined in package language as it has a legacy mapping in CLDR.
{"DY", "ZZ", false},
{"RH", "ZZ", false},
{"VD", "ZZ", false},
// Not defined in package: EW, FL, JA, LF, PI, RA, RB, RC, RI, RL, RM,
// RN, RP, WG, WL, WV, and YV.
// Not assigned in ISO, but legacy definitions in CLDR.
{"DD", "ZZ", false},
{"YD", "ZZ", false},
// Normal mappings but somewhat special status in ccTLD.
{"BL", "BL", true},
{"MF", "MF", true},
{"BV", "BV", true},
{"SJ", "SJ", true},
// Have values when normalized, but not as is.
{"QU", "ZZ", false},
// ISO Private Use.
{"AA", "ZZ", false},
{"QM", "ZZ", false},
{"QO", "ZZ", false},
{"XA", "ZZ", false},
{"XK", "ZZ", false}, // Sometimes used for Kosovo, but invalid ccTLD.
} {
if tt.in == "" {
continue
}
r := MustParseRegion(tt.in)
var want Region
if tt.out != "ZZ" {
want = MustParseRegion(tt.out)
}
tld, err := r.TLD()
if got := err == nil; got != tt.ok {
t.Errorf("error(%v): got %v; want %v", r, got, tt.ok)
}
if tld != want {
t.Errorf("TLD(%v): got %v; want %v", r, tld, want)
}
}
}
func TestTypeForKey(t *testing.T) {
tests := []struct{ key, in, out string }{
{"co", "en", ""},
{"co", "en-u-abc", ""},
{"co", "en-u-co-phonebk", "phonebk"},
{"co", "en-u-co-phonebk-cu-aud", "phonebk"},
{"co", "x-foo-u-co-phonebk", ""},
{"nu", "en-u-co-phonebk-nu-arabic", "arabic"},
{"kc", "cmn-u-co-stroke", ""},
}
for _, tt := range tests {
if v := Make(tt.in).TypeForKey(tt.key); v != tt.out {
t.Errorf("%q[%q]: was %q; want %q", tt.in, tt.key, v, tt.out)
}
}
}
func TestSetTypeForKey(t *testing.T) {
tests := []struct {
key, value, in, out string
err bool
}{
// replace existing value
{"co", "pinyin", "en-u-co-phonebk", "en-u-co-pinyin", false},
{"co", "pinyin", "en-u-co-phonebk-cu-xau", "en-u-co-pinyin-cu-xau", false},
{"co", "pinyin", "en-u-co-phonebk-v-xx", "en-u-co-pinyin-v-xx", false},
{"co", "pinyin", "en-u-co-phonebk-x-x", "en-u-co-pinyin-x-x", false},
{"co", "pinyin", "en-u-co-x-x", "en-u-co-pinyin-x-x", false},
{"nu", "arabic", "en-u-co-phonebk-nu-vaai", "en-u-co-phonebk-nu-arabic", false},
{"nu", "arabic", "en-u-co-phonebk-nu", "en-u-co-phonebk-nu-arabic", false},
// add to existing -u extension
{"co", "pinyin", "en-u-ca-gregory", "en-u-ca-gregory-co-pinyin", false},
{"co", "pinyin", "en-u-ca-gregory-nu-vaai", "en-u-ca-gregory-co-pinyin-nu-vaai", false},
{"co", "pinyin", "en-u-ca-gregory-v-va", "en-u-ca-gregory-co-pinyin-v-va", false},
{"co", "pinyin", "en-u-ca-gregory-x-a", "en-u-ca-gregory-co-pinyin-x-a", false},
{"ca", "gregory", "en-u-co-pinyin", "en-u-ca-gregory-co-pinyin", false},
// remove pair
{"co", "", "en-u-co-phonebk", "en", false},
{"co", "", "en-u-co", "en", false},
{"co", "", "en-u-co-v", "en", false},
{"co", "", "en-u-co-v-", "en", false},
{"co", "", "en-u-ca-gregory-co-phonebk", "en-u-ca-gregory", false},
{"co", "", "en-u-co-phonebk-nu-arabic", "en-u-nu-arabic", false},
{"co", "", "en-u-co-nu-arabic", "en-u-nu-arabic", false},
{"co", "", "en", "en", false},
// add -u extension
{"co", "pinyin", "en", "en-u-co-pinyin", false},
{"co", "pinyin", "und", "und-u-co-pinyin", false},
{"co", "pinyin", "en-a-aaa", "en-a-aaa-u-co-pinyin", false},
{"co", "pinyin", "en-x-aaa", "en-u-co-pinyin-x-aaa", false},
{"co", "pinyin", "en-v-aa", "en-u-co-pinyin-v-aa", false},
{"co", "pinyin", "en-a-aaa-x-x", "en-a-aaa-u-co-pinyin-x-x", false},
{"co", "pinyin", "en-a-aaa-v-va", "en-a-aaa-u-co-pinyin-v-va", false},
// error on invalid values
{"co", "pinyinxxx", "en", "en", true},
{"co", "piny.n", "en", "en", true},
{"co", "pinyinxxx", "en-a-aaa", "en-a-aaa", true},
{"co", "pinyinxxx", "en-u-aaa", "en-u-aaa", true},
{"co", "pinyinxxx", "en-u-aaa-co-pinyin", "en-u-aaa-co-pinyin", true},
{"co", "pinyi.", "en-u-aaa-co-pinyin", "en-u-aaa-co-pinyin", true},
{"col", "pinyin", "en", "en", true},
{"co", "cu", "en", "en", true},
// error when setting on a private use tag
{"co", "phonebook", "x-foo", "x-foo", true},
}
for i, tt := range tests {
tag := Make(tt.in)
if v, err := tag.SetTypeForKey(tt.key, tt.value); v.String() != tt.out {
t.Errorf("%d:%q[%q]=%q: was %q; want %q", i, tt.in, tt.key, tt.value, v, tt.out)
} else if (err != nil) != tt.err {
t.Errorf("%d:%q[%q]=%q: error was %v; want %v", i, tt.in, tt.key, tt.value, err != nil, tt.err)
} else if val := v.TypeForKey(tt.key); err == nil && val != tt.value {
t.Errorf("%d:%q[%q]==%q: was %v; want %v", i, tt.out, tt.key, tt.value, val, tt.value)
}
if len(tag.String()) <= 3 {
// Simulate a tag for which the string has not been set.
tag.str, tag.pExt, tag.pVariant = "", 0, 0
if tag, err := tag.SetTypeForKey(tt.key, tt.value); err == nil {
if val := tag.TypeForKey(tt.key); err == nil && val != tt.value {
t.Errorf("%d:%q[%q]==%q: was %v; want %v", i, tt.out, tt.key, tt.value, val, tt.value)
}
}
}
}
}
func TestFindKeyAndType(t *testing.T) {
// out is either the matched type in case of a match or the original
// string up till the insertion point.
tests := []struct {
key string
hasExt bool
in, out string
}{
// Don't search past a private use extension.
{"co", false, "en-x-foo-u-co-pinyin", "en"},
{"co", false, "x-foo-u-co-pinyin", ""},
{"co", false, "en-s-fff-x-foo", "en-s-fff"},
// Insertion points in absence of -u extension.
{"cu", false, "en", ""}, // t.str is ""
{"cu", false, "en-v-va", "en"},
{"cu", false, "en-a-va", "en-a-va"},
{"cu", false, "en-a-va-v-va", "en-a-va"},
{"cu", false, "en-x-a", "en"},
// Tags with the -u extension.
{"nu", true, "en-u-cu-nu", "en-u-cu"},
{"cu", true, "en-u-cu-nu", "en-u"},
{"co", true, "en-u-co-standard", "standard"},
{"co", true, "yue-u-co-pinyin", "pinyin"},
{"co", true, "en-u-co-abc", "abc"},
{"co", true, "en-u-co-abc-def", "abc-def"},
{"co", true, "en-u-co-abc-def-x-foo", "abc-def"},
{"co", true, "en-u-co-standard-nu-arab", "standard"},
{"co", true, "yue-u-co-pinyin-nu-arab", "pinyin"},
// Insertion points.
{"cu", true, "en-u-co-standard", "en-u-co-standard"},
{"cu", true, "yue-u-co-pinyin-x-foo", "yue-u-co-pinyin"},
{"cu", true, "en-u-co-abc", "en-u-co-abc"},
{"cu", true, "en-u-nu-arabic", "en-u"},
{"cu", true, "en-u-co-abc-def-nu-arabic", "en-u-co-abc-def"},
}
for i, tt := range tests {
start, sep, end, hasExt := Make(tt.in).findTypeForKey(tt.key)
if sep != end {
res := tt.in[sep:end]
if res != tt.out {
t.Errorf("%d:%s: was %q; want %q", i, tt.in, res, tt.out)
}
} else {
if hasExt != tt.hasExt {
t.Errorf("%d:%s: hasExt was %v; want %v", i, tt.in, hasExt, tt.hasExt)
continue
}
if tt.in[:start] != tt.out {
t.Errorf("%d:%s: insertion point was %q; want %q", i, tt.in, tt.in[:start], tt.out)
}
}
}
}
func TestParent(t *testing.T) {
tests := []struct{ in, out string }{
// Strip variants and extensions first
{"de-u-co-phonebk", "de"},
{"de-1994", "de"},
{"de-Latn-1994", "de"}, // remove superfluous script.
// Ensure the canonical Tag for an entry is in the chain for base-script
// pairs.
{"zh-Hans", "zh"},
// Skip the script if it is the maximized version. CLDR files for the
// skipped tag are always empty.
{"zh-Hans-TW", "zh"},
{"zh-Hans-CN", "zh"},
// Insert the script if the maximized script is not the same as the
// maximized script of the base language.
{"zh-TW", "zh-Hant"},
{"zh-HK", "zh-Hant"},
{"zh-Hant-TW", "zh-Hant"},
{"zh-Hant-HK", "zh-Hant"},
// Non-default script skips to und.
// CLDR
{"az-Cyrl", "und"},
{"bs-Cyrl", "und"},
{"en-Dsrt", "und"},
{"ha-Arab", "und"},
{"mn-Mong", "und"},
{"pa-Arab", "und"},
{"shi-Latn", "und"},
{"sr-Latn", "und"},
{"uz-Arab", "und"},
{"uz-Cyrl", "und"},
{"vai-Latn", "und"},
{"zh-Hant", "und"},
// extra
{"nl-Cyrl", "und"},
// World english inherits from en-001.
{"en-150", "en-001"},
{"en-AU", "en-001"},
{"en-BE", "en-001"},
{"en-GG", "en-001"},
{"en-GI", "en-001"},
{"en-HK", "en-001"},
{"en-IE", "en-001"},
{"en-IM", "en-001"},
{"en-IN", "en-001"},
{"en-JE", "en-001"},
{"en-MT", "en-001"},
{"en-NZ", "en-001"},
{"en-PK", "en-001"},
{"en-SG", "en-001"},
// Spanish in Latin-American countries have es-419 as parent.
{"es-AR", "es-419"},
{"es-BO", "es-419"},
{"es-CL", "es-419"},
{"es-CO", "es-419"},
{"es-CR", "es-419"},
{"es-CU", "es-419"},
{"es-DO", "es-419"},
{"es-EC", "es-419"},
{"es-GT", "es-419"},
{"es-HN", "es-419"},
{"es-MX", "es-419"},
{"es-NI", "es-419"},
{"es-PA", "es-419"},
{"es-PE", "es-419"},
{"es-PR", "es-419"},
{"es-PY", "es-419"},
{"es-SV", "es-419"},
{"es-US", "es-419"},
{"es-UY", "es-419"},
{"es-VE", "es-419"},
// exceptions (according to CLDR)
{"es-CW", "es"},
// Inherit from pt-PT, instead of pt for these countries.
{"pt-AO", "pt-PT"},
{"pt-CV", "pt-PT"},
{"pt-GW", "pt-PT"},
{"pt-MO", "pt-PT"},
{"pt-MZ", "pt-PT"},
{"pt-ST", "pt-PT"},
{"pt-TL", "pt-PT"},
}
for _, tt := range tests {
tag := MustParse(tt.in)
if p := MustParse(tt.out); p != tag.Parent() {
t.Errorf("%s: was %v; want %v", tt.in, tag.Parent(), p)
}
}
}
var (
// Tags without error that don't need to be changed.
benchBasic = []string{
"en",
"en-Latn",
"en-GB",
"za",
"zh-Hant",
"zh",
"zh-HK",
"ar-MK",
"en-CA",
"fr-CA",
"fr-CH",
"fr",
"lv",
"he-IT",
"tlh",
"ja",
"ja-Jpan",
"ja-Jpan-JP",
"de-1996",
"de-CH",
"sr",
"sr-Latn",
}
// Tags with extensions, not changes required.
benchExt = []string{
"x-a-b-c-d",
"x-aa-bbbb-cccccccc-d",
"en-x_cc-b-bbb-a-aaa",
"en-c_cc-b-bbb-a-aaa-x-x",
"en-u-co-phonebk",
"en-Cyrl-u-co-phonebk",
"en-US-u-co-phonebk-cu-xau",
"en-nedix-u-co-phonebk",
"en-t-t0-abcd",
"en-t-nl-latn",
"en-t-t0-abcd-x-a",
"en_t_pt_MLt",
"en-t-fr-est",
}
// Change, but not memory allocation required.
benchSimpleChange = []string{
"EN",
"i-klingon",
"en-latn",
"zh-cmn-Hans-CN",
"iw-NL",
}
// Change and memory allocation required.
benchChangeAlloc = []string{
"en-c_cc-b-bbb-a-aaa",
"en-u-cu-xua-co-phonebk",
"en-u-cu-xua-co-phonebk-a-cd",
"en-u-def-abc-cu-xua-co-phonebk",
"en-t-en-Cyrl-NL-1994",
"en-t-en-Cyrl-NL-1994-t0-abc-def",
}
// Tags that result in errors.
benchErr = []string{
// IllFormed
"x_A.-B-C_D",
"en-u-cu-co-phonebk",
"en-u-cu-xau-co",
"en-t-nl-abcd",
// Invalid
"xx",
"nl-Uuuu",
"nl-QB",
}
benchChange = append(benchSimpleChange, benchChangeAlloc...)
benchAll = append(append(append(benchBasic, benchExt...), benchChange...), benchErr...)
)
func doParse(b *testing.B, tag []string) {
for i := 0; i < b.N; i++ {
// Use the modulo instead of looping over all tags so that we get a somewhat
// meaningful ns/op.
Parse(tag[i%len(tag)])
}
}
func BenchmarkParse(b *testing.B) {
doParse(b, benchAll)
}
func BenchmarkParseBasic(b *testing.B) {
doParse(b, benchBasic)
}
func BenchmarkParseError(b *testing.B) {
doParse(b, benchErr)
}
func BenchmarkParseSimpleChange(b *testing.B) {
doParse(b, benchSimpleChange)
}
func BenchmarkParseChangeAlloc(b *testing.B) {
doParse(b, benchChangeAlloc)
}
@@ -0,0 +1,412 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import (
"bytes"
"fmt"
"sort"
"strconv"
"golang.org/x/text/internal/tag"
)
// findIndex tries to find the given tag in idx and returns a standardized error
// if it could not be found.
func findIndex(idx tag.Index, key []byte, form string) (index int, err error) {
if !tag.FixCase(form, key) {
return 0, ErrSyntax
}
i := idx.Index(key)
if i == -1 {
return 0, NewValueError(key)
}
return i, nil
}
func searchUint(imap []uint16, key uint16) int {
return sort.Search(len(imap), func(i int) bool {
return imap[i] >= key
})
}
type Language uint16
// getLangID returns the langID of s if s is a canonical subtag
// or langUnknown if s is not a canonical subtag.
func getLangID(s []byte) (Language, error) {
if len(s) == 2 {
return getLangISO2(s)
}
return getLangISO3(s)
}
// TODO language normalization as well as the AliasMaps could be moved to the
// higher level package, but it is a bit tricky to separate the generation.
func (id Language) Canonicalize() (Language, AliasType) {
return normLang(id)
}
// normLang returns the mapped langID of id according to mapping m.
func normLang(id Language) (Language, AliasType) {
k := sort.Search(len(AliasMap), func(i int) bool {
return AliasMap[i].From >= uint16(id)
})
if k < len(AliasMap) && AliasMap[k].From == uint16(id) {
return Language(AliasMap[k].To), AliasTypes[k]
}
return id, AliasTypeUnknown
}
// getLangISO2 returns the langID for the given 2-letter ISO language code
// or unknownLang if this does not exist.
func getLangISO2(s []byte) (Language, error) {
if !tag.FixCase("zz", s) {
return 0, ErrSyntax
}
if i := lang.Index(s); i != -1 && lang.Elem(i)[3] != 0 {
return Language(i), nil
}
return 0, NewValueError(s)
}
const base = 'z' - 'a' + 1
func strToInt(s []byte) uint {
v := uint(0)
for i := 0; i < len(s); i++ {
v *= base
v += uint(s[i] - 'a')
}
return v
}
// converts the given integer to the original ASCII string passed to strToInt.
// len(s) must match the number of characters obtained.
func intToStr(v uint, s []byte) {
for i := len(s) - 1; i >= 0; i-- {
s[i] = byte(v%base) + 'a'
v /= base
}
}
// getLangISO3 returns the langID for the given 3-letter ISO language code
// or unknownLang if this does not exist.
func getLangISO3(s []byte) (Language, error) {
if tag.FixCase("und", s) {
// first try to match canonical 3-letter entries
for i := lang.Index(s[:2]); i != -1; i = lang.Next(s[:2], i) {
if e := lang.Elem(i); e[3] == 0 && e[2] == s[2] {
// We treat "und" as special and always translate it to "unspecified".
// Note that ZZ and Zzzz are private use and are not treated as
// unspecified by default.
id := Language(i)
if id == nonCanonicalUnd {
return 0, nil
}
return id, nil
}
}
if i := altLangISO3.Index(s); i != -1 {
return Language(altLangIndex[altLangISO3.Elem(i)[3]]), nil
}
n := strToInt(s)
if langNoIndex[n/8]&(1<<(n%8)) != 0 {
return Language(n) + langNoIndexOffset, nil
}
// Check for non-canonical uses of ISO3.
for i := lang.Index(s[:1]); i != -1; i = lang.Next(s[:1], i) {
if e := lang.Elem(i); e[2] == s[1] && e[3] == s[2] {
return Language(i), nil
}
}
return 0, NewValueError(s)
}
return 0, ErrSyntax
}
// StringToBuf writes the string to b and returns the number of bytes
// written. cap(b) must be >= 3.
func (id Language) StringToBuf(b []byte) int {
if id >= langNoIndexOffset {
intToStr(uint(id)-langNoIndexOffset, b[:3])
return 3
} else if id == 0 {
return copy(b, "und")
}
l := lang[id<<2:]
if l[3] == 0 {
return copy(b, l[:3])
}
return copy(b, l[:2])
}
// String returns the BCP 47 representation of the langID.
// Use b as variable name, instead of id, to ensure the variable
// used is consistent with that of Base in which this type is embedded.
func (b Language) String() string {
if b == 0 {
return "und"
} else if b >= langNoIndexOffset {
b -= langNoIndexOffset
buf := [3]byte{}
intToStr(uint(b), buf[:])
return string(buf[:])
}
l := lang.Elem(int(b))
if l[3] == 0 {
return l[:3]
}
return l[:2]
}
// ISO3 returns the ISO 639-3 language code.
func (b Language) ISO3() string {
if b == 0 || b >= langNoIndexOffset {
return b.String()
}
l := lang.Elem(int(b))
if l[3] == 0 {
return l[:3]
} else if l[2] == 0 {
return altLangISO3.Elem(int(l[3]))[:3]
}
// This allocation will only happen for 3-letter ISO codes
// that are non-canonical BCP 47 language identifiers.
return l[0:1] + l[2:4]
}
// IsPrivateUse reports whether this language code is reserved for private use.
func (b Language) IsPrivateUse() bool {
return langPrivateStart <= b && b <= langPrivateEnd
}
// SuppressScript returns the script marked as SuppressScript in the IANA
// language tag repository, or 0 if there is no such script.
func (b Language) SuppressScript() Script {
if b < langNoIndexOffset {
return Script(suppressScript[b])
}
return 0
}
type Region uint16
// getRegionID returns the region id for s if s is a valid 2-letter region code
// or unknownRegion.
func getRegionID(s []byte) (Region, error) {
if len(s) == 3 {
if isAlpha(s[0]) {
return getRegionISO3(s)
}
if i, err := strconv.ParseUint(string(s), 10, 10); err == nil {
return getRegionM49(int(i))
}
}
return getRegionISO2(s)
}
// getRegionISO2 returns the regionID for the given 2-letter ISO country code
// or unknownRegion if this does not exist.
func getRegionISO2(s []byte) (Region, error) {
i, err := findIndex(regionISO, s, "ZZ")
if err != nil {
return 0, err
}
return Region(i) + isoRegionOffset, nil
}
// getRegionISO3 returns the regionID for the given 3-letter ISO country code
// or unknownRegion if this does not exist.
func getRegionISO3(s []byte) (Region, error) {
if tag.FixCase("ZZZ", s) {
for i := regionISO.Index(s[:1]); i != -1; i = regionISO.Next(s[:1], i) {
if e := regionISO.Elem(i); e[2] == s[1] && e[3] == s[2] {
return Region(i) + isoRegionOffset, nil
}
}
for i := 0; i < len(altRegionISO3); i += 3 {
if tag.Compare(altRegionISO3[i:i+3], s) == 0 {
return Region(altRegionIDs[i/3]), nil
}
}
return 0, NewValueError(s)
}
return 0, ErrSyntax
}
func getRegionM49(n int) (Region, error) {
if 0 < n && n <= 999 {
const (
searchBits = 7
regionBits = 9
regionMask = 1<<regionBits - 1
)
idx := n >> searchBits
buf := fromM49[m49Index[idx]:m49Index[idx+1]]
val := uint16(n) << regionBits // we rely on bits shifting out
i := sort.Search(len(buf), func(i int) bool {
return buf[i] >= val
})
if r := fromM49[int(m49Index[idx])+i]; r&^regionMask == val {
return Region(r & regionMask), nil
}
}
var e ValueError
fmt.Fprint(bytes.NewBuffer([]byte(e.v[:])), n)
return 0, e
}
// normRegion returns a region if r is deprecated or 0 otherwise.
// TODO: consider supporting BYS (-> BLR), CSK (-> 200 or CZ), PHI (-> PHL) and AFI (-> DJ).
// TODO: consider mapping split up regions to new most populous one (like CLDR).
func normRegion(r Region) Region {
m := regionOldMap
k := sort.Search(len(m), func(i int) bool {
return m[i].From >= uint16(r)
})
if k < len(m) && m[k].From == uint16(r) {
return Region(m[k].To)
}
return 0
}
const (
iso3166UserAssigned = 1 << iota
ccTLD
bcp47Region
)
func (r Region) typ() byte {
return regionTypes[r]
}
// String returns the BCP 47 representation for the region.
// It returns "ZZ" for an unspecified region.
func (r Region) String() string {
if r < isoRegionOffset {
if r == 0 {
return "ZZ"
}
return fmt.Sprintf("%03d", r.M49())
}
r -= isoRegionOffset
return regionISO.Elem(int(r))[:2]
}
// ISO3 returns the 3-letter ISO code of r.
// Note that not all regions have a 3-letter ISO code.
// In such cases this method returns "ZZZ".
func (r Region) ISO3() string {
if r < isoRegionOffset {
return "ZZZ"
}
r -= isoRegionOffset
reg := regionISO.Elem(int(r))
switch reg[2] {
case 0:
return altRegionISO3[reg[3]:][:3]
case ' ':
return "ZZZ"
}
return reg[0:1] + reg[2:4]
}
// M49 returns the UN M.49 encoding of r, or 0 if this encoding
// is not defined for r.
func (r Region) M49() int {
return int(m49[r])
}
// IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This
// may include private-use tags that are assigned by CLDR and used in this
// implementation. So IsPrivateUse and IsCountry can be simultaneously true.
func (r Region) IsPrivateUse() bool {
return r.typ()&iso3166UserAssigned != 0
}
type Script uint16
// getScriptID returns the script id for string s. It assumes that s
// is of the format [A-Z][a-z]{3}.
func getScriptID(idx tag.Index, s []byte) (Script, error) {
i, err := findIndex(idx, s, "Zzzz")
return Script(i), err
}
// String returns the script code in title case.
// It returns "Zzzz" for an unspecified script.
func (s Script) String() string {
if s == 0 {
return "Zzzz"
}
return script.Elem(int(s))
}
// IsPrivateUse reports whether this script code is reserved for private use.
func (s Script) IsPrivateUse() bool {
return _Qaaa <= s && s <= _Qabx
}
const (
maxAltTaglen = len("en-US-POSIX")
maxLen = maxAltTaglen
)
var (
// grandfatheredMap holds a mapping from legacy and grandfathered tags to
// their base language or index to more elaborate tag.
grandfatheredMap = map[[maxLen]byte]int16{
[maxLen]byte{'a', 'r', 't', '-', 'l', 'o', 'j', 'b', 'a', 'n'}: _jbo, // art-lojban
[maxLen]byte{'i', '-', 'a', 'm', 'i'}: _ami, // i-ami
[maxLen]byte{'i', '-', 'b', 'n', 'n'}: _bnn, // i-bnn
[maxLen]byte{'i', '-', 'h', 'a', 'k'}: _hak, // i-hak
[maxLen]byte{'i', '-', 'k', 'l', 'i', 'n', 'g', 'o', 'n'}: _tlh, // i-klingon
[maxLen]byte{'i', '-', 'l', 'u', 'x'}: _lb, // i-lux
[maxLen]byte{'i', '-', 'n', 'a', 'v', 'a', 'j', 'o'}: _nv, // i-navajo
[maxLen]byte{'i', '-', 'p', 'w', 'n'}: _pwn, // i-pwn
[maxLen]byte{'i', '-', 't', 'a', 'o'}: _tao, // i-tao
[maxLen]byte{'i', '-', 't', 'a', 'y'}: _tay, // i-tay
[maxLen]byte{'i', '-', 't', 's', 'u'}: _tsu, // i-tsu
[maxLen]byte{'n', 'o', '-', 'b', 'o', 'k'}: _nb, // no-bok
[maxLen]byte{'n', 'o', '-', 'n', 'y', 'n'}: _nn, // no-nyn
[maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'f', 'r'}: _sfb, // sgn-BE-FR
[maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'n', 'l'}: _vgt, // sgn-BE-NL
[maxLen]byte{'s', 'g', 'n', '-', 'c', 'h', '-', 'd', 'e'}: _sgg, // sgn-CH-DE
[maxLen]byte{'z', 'h', '-', 'g', 'u', 'o', 'y', 'u'}: _cmn, // zh-guoyu
[maxLen]byte{'z', 'h', '-', 'h', 'a', 'k', 'k', 'a'}: _hak, // zh-hakka
[maxLen]byte{'z', 'h', '-', 'm', 'i', 'n', '-', 'n', 'a', 'n'}: _nan, // zh-min-nan
[maxLen]byte{'z', 'h', '-', 'x', 'i', 'a', 'n', 'g'}: _hsn, // zh-xiang
// Grandfathered tags with no modern replacement will be converted as
// follows:
[maxLen]byte{'c', 'e', 'l', '-', 'g', 'a', 'u', 'l', 'i', 's', 'h'}: -1, // cel-gaulish
[maxLen]byte{'e', 'n', '-', 'g', 'b', '-', 'o', 'e', 'd'}: -2, // en-GB-oed
[maxLen]byte{'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'}: -3, // i-default
[maxLen]byte{'i', '-', 'e', 'n', 'o', 'c', 'h', 'i', 'a', 'n'}: -4, // i-enochian
[maxLen]byte{'i', '-', 'm', 'i', 'n', 'g', 'o'}: -5, // i-mingo
[maxLen]byte{'z', 'h', '-', 'm', 'i', 'n'}: -6, // zh-min
// CLDR-specific tag.
[maxLen]byte{'r', 'o', 'o', 't'}: 0, // root
[maxLen]byte{'e', 'n', '-', 'u', 's', '-', 'p', 'o', 's', 'i', 'x'}: -7, // en_US_POSIX"
}
altTagIndex = [...]uint8{0, 17, 31, 45, 61, 74, 86, 102}
altTags = "xtg-x-cel-gaulishen-GB-oxendicten-x-i-defaultund-x-i-enochiansee-x-i-mingonan-x-zh-minen-US-u-va-posix"
)
func grandfathered(s [maxAltTaglen]byte) (t Tag, ok bool) {
if v, ok := grandfatheredMap[s]; ok {
if v < 0 {
return Make(altTags[altTagIndex[-v-1]:altTagIndex[-v]]), true
}
t.LangID = Language(v)
return t, true
}
return t, false
}
@@ -0,0 +1,457 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import (
"testing"
"golang.org/x/text/internal/tag"
)
func b(s string) []byte {
return []byte(s)
}
func TestLangID(t *testing.T) {
tests := []struct {
id, bcp47, iso3, norm string
err error
}{
{id: "", bcp47: "und", iso3: "und", err: ErrSyntax},
{id: " ", bcp47: "und", iso3: "und", err: ErrSyntax},
{id: " ", bcp47: "und", iso3: "und", err: ErrSyntax},
{id: " ", bcp47: "und", iso3: "und", err: ErrSyntax},
{id: "xxx", bcp47: "und", iso3: "und", err: NewValueError([]byte("xxx"))},
{id: "und", bcp47: "und", iso3: "und"},
{id: "aju", bcp47: "aju", iso3: "aju", norm: "jrb"},
{id: "jrb", bcp47: "jrb", iso3: "jrb"},
{id: "es", bcp47: "es", iso3: "spa"},
{id: "spa", bcp47: "es", iso3: "spa"},
{id: "ji", bcp47: "ji", iso3: "yid-", norm: "yi"},
{id: "jw", bcp47: "jw", iso3: "jav-", norm: "jv"},
{id: "ar", bcp47: "ar", iso3: "ara"},
{id: "kw", bcp47: "kw", iso3: "cor"},
{id: "arb", bcp47: "arb", iso3: "arb", norm: "ar"},
{id: "ar", bcp47: "ar", iso3: "ara"},
{id: "kur", bcp47: "ku", iso3: "kur"},
{id: "nl", bcp47: "nl", iso3: "nld"},
{id: "NL", bcp47: "nl", iso3: "nld"},
{id: "gsw", bcp47: "gsw", iso3: "gsw"},
{id: "gSW", bcp47: "gsw", iso3: "gsw"},
{id: "und", bcp47: "und", iso3: "und"},
{id: "sh", bcp47: "sh", iso3: "hbs", norm: "sr"},
{id: "hbs", bcp47: "sh", iso3: "hbs", norm: "sr"},
{id: "no", bcp47: "no", iso3: "nor", norm: "no"},
{id: "nor", bcp47: "no", iso3: "nor", norm: "no"},
{id: "cmn", bcp47: "cmn", iso3: "cmn", norm: "zh"},
}
for i, tt := range tests {
want, err := getLangID(b(tt.id))
if err != tt.err {
t.Errorf("%d:err(%s): found %q; want %q", i, tt.id, err, tt.err)
}
if err != nil {
continue
}
if id, _ := getLangISO2(b(tt.bcp47)); len(tt.bcp47) == 2 && want != id {
t.Errorf("%d:getISO2(%s): found %v; want %v", i, tt.bcp47, id, want)
}
if len(tt.iso3) == 3 {
if id, _ := getLangISO3(b(tt.iso3)); want != id {
t.Errorf("%d:getISO3(%s): found %q; want %q", i, tt.iso3, id, want)
}
if id, _ := getLangID(b(tt.iso3)); want != id {
t.Errorf("%d:getID3(%s): found %v; want %v", i, tt.iso3, id, want)
}
}
norm := want
if tt.norm != "" {
norm, _ = getLangID(b(tt.norm))
}
id, _ := normLang(want)
if id != norm {
t.Errorf("%d:norm(%s): found %v; want %v", i, tt.id, id, norm)
}
if id := want.String(); tt.bcp47 != id {
t.Errorf("%d:String(): found %s; want %s", i, id, tt.bcp47)
}
if id := want.ISO3(); tt.iso3[:3] != id {
t.Errorf("%d:iso3(): found %s; want %s", i, id, tt.iso3[:3])
}
}
}
func TestGrandfathered(t *testing.T) {
for _, tt := range []struct{ in, out string }{
{"art-lojban", "jbo"},
{"i-ami", "ami"},
{"i-bnn", "bnn"},
{"i-hak", "hak"},
{"i-klingon", "tlh"},
{"i-lux", "lb"},
{"i-navajo", "nv"},
{"i-pwn", "pwn"},
{"i-tao", "tao"},
{"i-tay", "tay"},
{"i-tsu", "tsu"},
{"no-bok", "nb"},
{"no-nyn", "nn"},
{"sgn-BE-FR", "sfb"},
{"sgn-BE-NL", "vgt"},
{"sgn-CH-DE", "sgg"},
{"sgn-ch-de", "sgg"},
{"zh-guoyu", "cmn"},
{"zh-hakka", "hak"},
{"zh-min-nan", "nan"},
{"zh-xiang", "hsn"},
// Grandfathered tags with no modern replacement will be converted as follows:
{"cel-gaulish", "xtg-x-cel-gaulish"},
{"en-GB-oed", "en-GB-oxendict"},
{"en-gb-oed", "en-GB-oxendict"},
{"i-default", "en-x-i-default"},
{"i-enochian", "und-x-i-enochian"},
{"i-mingo", "see-x-i-mingo"},
{"zh-min", "nan-x-zh-min"},
{"root", "und"},
{"en_US_POSIX", "en-US-u-va-posix"},
{"en_us_posix", "en-US-u-va-posix"},
{"en-us-posix", "en-US-u-va-posix"},
} {
got := Make(tt.in)
want := MustParse(tt.out)
if got != want {
t.Errorf("%s: got %q; want %q", tt.in, got, want)
}
}
}
func TestRegionID(t *testing.T) {
tests := []struct {
in, out string
}{
{"_ ", ""},
{"_000", ""},
{"419", "419"},
{"AA", "AA"},
{"ATF", "TF"},
{"HV", "HV"},
{"CT", "CT"},
{"DY", "DY"},
{"IC", "IC"},
{"FQ", "FQ"},
{"JT", "JT"},
{"ZZ", "ZZ"},
{"EU", "EU"},
{"QO", "QO"},
{"FX", "FX"},
}
for i, tt := range tests {
if tt.in[0] == '_' {
id := tt.in[1:]
if _, err := getRegionID(b(id)); err == nil {
t.Errorf("%d:err(%s): found nil; want error", i, id)
}
continue
}
want, _ := getRegionID(b(tt.in))
if s := want.String(); s != tt.out {
t.Errorf("%d:%s: found %q; want %q", i, tt.in, s, tt.out)
}
if len(tt.in) == 2 {
want, _ := getRegionISO2(b(tt.in))
if s := want.String(); s != tt.out {
t.Errorf("%d:getISO2(%s): found %q; want %q", i, tt.in, s, tt.out)
}
}
}
}
func TestRegionType(t *testing.T) {
for _, tt := range []struct {
r string
t byte
}{
{"NL", bcp47Region | ccTLD},
{"EU", bcp47Region | ccTLD}, // exceptionally reserved
{"AN", bcp47Region | ccTLD}, // transitionally reserved
{"DD", bcp47Region}, // deleted in ISO, deprecated in BCP 47
{"NT", bcp47Region}, // transitionally reserved, deprecated in BCP 47
{"XA", iso3166UserAssigned | bcp47Region},
{"ZZ", iso3166UserAssigned | bcp47Region},
{"AA", iso3166UserAssigned | bcp47Region},
{"QO", iso3166UserAssigned | bcp47Region},
{"QM", iso3166UserAssigned | bcp47Region},
{"XK", iso3166UserAssigned | bcp47Region},
{"CT", 0}, // deleted in ISO, not in BCP 47, canonicalized in CLDR
} {
r := MustParseRegion(tt.r)
if tp := r.typ(); tp != tt.t {
t.Errorf("Type(%s): got %x; want %x", tt.r, tp, tt.t)
}
}
}
func TestRegionISO3(t *testing.T) {
tests := []struct {
from, iso3, to string
}{
{" ", "ZZZ", "ZZ"},
{"000", "ZZZ", "ZZ"},
{"AA", "AAA", ""},
{"CT", "CTE", ""},
{"DY", "DHY", ""},
{"EU", "QUU", ""},
{"HV", "HVO", ""},
{"IC", "ZZZ", "ZZ"},
{"JT", "JTN", ""},
{"PZ", "PCZ", ""},
{"QU", "QUU", "EU"},
{"QO", "QOO", ""},
{"YD", "YMD", ""},
{"FQ", "ATF", "TF"},
{"TF", "ATF", ""},
{"FX", "FXX", ""},
{"ZZ", "ZZZ", ""},
{"419", "ZZZ", "ZZ"},
}
for _, tt := range tests {
r, _ := getRegionID(b(tt.from))
if s := r.ISO3(); s != tt.iso3 {
t.Errorf("iso3(%q): found %q; want %q", tt.from, s, tt.iso3)
}
if tt.iso3 == "" {
continue
}
want := tt.to
if tt.to == "" {
want = tt.from
}
r, _ = getRegionID(b(want))
if id, _ := getRegionISO3(b(tt.iso3)); id != r {
t.Errorf("%s: found %q; want %q", tt.iso3, id, want)
}
}
}
func TestRegionM49(t *testing.T) {
fromTests := []struct {
m49 int
id string
}{
{0, ""},
{-1, ""},
{1000, ""},
{10000, ""},
{001, "001"},
{104, "MM"},
{180, "CD"},
{230, "ET"},
{231, "ET"},
{249, "FX"},
{250, "FR"},
{276, "DE"},
{278, "DD"},
{280, "DE"},
{419, "419"},
{626, "TL"},
{736, "SD"},
{840, "US"},
{854, "BF"},
{891, "CS"},
{899, ""},
{958, "AA"},
{966, "QT"},
{967, "EU"},
{999, "ZZ"},
}
for _, tt := range fromTests {
id, err := getRegionM49(tt.m49)
if want, have := err != nil, tt.id == ""; want != have {
t.Errorf("error(%d): have %v; want %v", tt.m49, have, want)
continue
}
r, _ := getRegionID(b(tt.id))
if r != id {
t.Errorf("region(%d): have %s; want %s", tt.m49, id, r)
}
}
toTests := []struct {
m49 int
id string
}{
{0, "000"},
{0, "IC"}, // Some codes don't have an ID
{001, "001"},
{104, "MM"},
{104, "BU"},
{180, "CD"},
{180, "ZR"},
{231, "ET"},
{250, "FR"},
{249, "FX"},
{276, "DE"},
{278, "DD"},
{419, "419"},
{626, "TL"},
{626, "TP"},
{729, "SD"},
{826, "GB"},
{840, "US"},
{854, "BF"},
{891, "YU"},
{891, "CS"},
{958, "AA"},
{966, "QT"},
{967, "EU"},
{967, "QU"},
{999, "ZZ"},
// For codes that don't have an M49 code use the replacement value,
// if available.
{854, "HV"}, // maps to Burkino Faso
}
for _, tt := range toTests {
r, _ := getRegionID(b(tt.id))
if r.M49() != tt.m49 {
t.Errorf("m49(%q): have %d; want %d", tt.id, r.M49(), tt.m49)
}
}
}
func TestRegionDeprecation(t *testing.T) {
tests := []struct{ in, out string }{
{"BU", "MM"},
{"BUR", "MM"},
{"CT", "KI"},
{"DD", "DE"},
{"DDR", "DE"},
{"DY", "BJ"},
{"FX", "FR"},
{"HV", "BF"},
{"JT", "UM"},
{"MI", "UM"},
{"NH", "VU"},
{"NQ", "AQ"},
{"PU", "UM"},
{"PZ", "PA"},
{"QU", "EU"},
{"RH", "ZW"},
{"TP", "TL"},
{"UK", "GB"},
{"VD", "VN"},
{"WK", "UM"},
{"YD", "YE"},
{"NL", "NL"},
}
for _, tt := range tests {
rIn, _ := getRegionID([]byte(tt.in))
rOut, _ := getRegionISO2([]byte(tt.out))
r := normRegion(rIn)
if rOut == rIn && r != 0 {
t.Errorf("%s: was %q; want %q", tt.in, r, tt.in)
}
if rOut != rIn && r != rOut {
t.Errorf("%s: was %q; want %q", tt.in, r, tt.out)
}
}
}
func TestGetScriptID(t *testing.T) {
idx := tag.Index("0000BbbbDdddEeeeZzzz\xff\xff\xff\xff")
tests := []struct {
in string
out Script
}{
{" ", 0},
{" ", 0},
{" ", 0},
{"", 0},
{"Aaaa", 0},
{"Bbbb", 1},
{"Dddd", 2},
{"dddd", 2},
{"dDDD", 2},
{"Eeee", 3},
{"Zzzz", 4},
}
for i, tt := range tests {
if id, err := getScriptID(idx, b(tt.in)); id != tt.out {
t.Errorf("%d:%s: found %d; want %d", i, tt.in, id, tt.out)
} else if id == 0 && err == nil {
t.Errorf("%d:%s: no error; expected one", i, tt.in)
}
}
}
func TestIsPrivateUse(t *testing.T) {
type test struct {
s string
private bool
}
tests := []test{
{"en", false},
{"und", false},
{"pzn", false},
{"qaa", true},
{"qtz", true},
{"qua", false},
}
for i, tt := range tests {
x, _ := getLangID([]byte(tt.s))
if b := x.IsPrivateUse(); b != tt.private {
t.Errorf("%d: langID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private)
}
}
tests = []test{
{"001", false},
{"419", false},
{"899", false},
{"900", false},
{"957", false},
{"958", true},
{"AA", true},
{"AC", false},
{"EU", false}, // CLDR grouping, exceptionally reserved in ISO.
{"QU", true}, // Canonicalizes to EU, User-assigned in ISO.
{"QO", true}, // CLDR grouping, User-assigned in ISO.
{"QA", false},
{"QM", true},
{"QZ", true},
{"XA", true},
{"XK", true}, // Assigned to Kosovo in CLDR, User-assigned in ISO.
{"XZ", true},
{"ZW", false},
{"ZZ", true},
}
for i, tt := range tests {
x, _ := getRegionID([]byte(tt.s))
if b := x.IsPrivateUse(); b != tt.private {
t.Errorf("%d: regionID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private)
}
}
tests = []test{
{"Latn", false},
{"Laaa", false}, // invalid
{"Qaaa", true},
{"Qabx", true},
{"Qaby", false},
{"Zyyy", false},
{"Zzzz", false},
}
for i, tt := range tests {
x, _ := getScriptID(script, []byte(tt.s))
if b := x.IsPrivateUse(); b != tt.private {
t.Errorf("%d: scriptID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private)
}
}
}
@@ -0,0 +1,226 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import "errors"
type scriptRegionFlags uint8
const (
isList = 1 << iota
scriptInFrom
regionInFrom
)
func (t *Tag) setUndefinedLang(id Language) {
if t.LangID == 0 {
t.LangID = id
}
}
func (t *Tag) setUndefinedScript(id Script) {
if t.ScriptID == 0 {
t.ScriptID = id
}
}
func (t *Tag) setUndefinedRegion(id Region) {
if t.RegionID == 0 || t.RegionID.Contains(id) {
t.RegionID = id
}
}
// ErrMissingLikelyTagsData indicates no information was available
// to compute likely values of missing tags.
var ErrMissingLikelyTagsData = errors.New("missing likely tags data")
// addLikelySubtags sets subtags to their most likely value, given the locale.
// In most cases this means setting fields for unknown values, but in some
// cases it may alter a value. It returns an ErrMissingLikelyTagsData error
// if the given locale cannot be expanded.
func (t Tag) addLikelySubtags() (Tag, error) {
id, err := addTags(t)
if err != nil {
return t, err
} else if id.equalTags(t) {
return t, nil
}
id.RemakeString()
return id, nil
}
// specializeRegion attempts to specialize a group region.
func specializeRegion(t *Tag) bool {
if i := regionInclusion[t.RegionID]; i < nRegionGroups {
x := likelyRegionGroup[i]
if Language(x.lang) == t.LangID && Script(x.script) == t.ScriptID {
t.RegionID = Region(x.region)
}
return true
}
return false
}
// Maximize returns a new tag with missing tags filled in.
func (t Tag) Maximize() (Tag, error) {
return addTags(t)
}
func addTags(t Tag) (Tag, error) {
// We leave private use identifiers alone.
if t.IsPrivateUse() {
return t, nil
}
if t.ScriptID != 0 && t.RegionID != 0 {
if t.LangID != 0 {
// already fully specified
specializeRegion(&t)
return t, nil
}
// Search matches for und-script-region. Note that for these cases
// region will never be a group so there is no need to check for this.
list := likelyRegion[t.RegionID : t.RegionID+1]
if x := list[0]; x.flags&isList != 0 {
list = likelyRegionList[x.lang : x.lang+uint16(x.script)]
}
for _, x := range list {
// Deviating from the spec. See match_test.go for details.
if Script(x.script) == t.ScriptID {
t.setUndefinedLang(Language(x.lang))
return t, nil
}
}
}
if t.LangID != 0 {
// Search matches for lang-script and lang-region, where lang != und.
if t.LangID < langNoIndexOffset {
x := likelyLang[t.LangID]
if x.flags&isList != 0 {
list := likelyLangList[x.region : x.region+uint16(x.script)]
if t.ScriptID != 0 {
for _, x := range list {
if Script(x.script) == t.ScriptID && x.flags&scriptInFrom != 0 {
t.setUndefinedRegion(Region(x.region))
return t, nil
}
}
} else if t.RegionID != 0 {
count := 0
goodScript := true
tt := t
for _, x := range list {
// We visit all entries for which the script was not
// defined, including the ones where the region was not
// defined. This allows for proper disambiguation within
// regions.
if x.flags&scriptInFrom == 0 && t.RegionID.Contains(Region(x.region)) {
tt.RegionID = Region(x.region)
tt.setUndefinedScript(Script(x.script))
goodScript = goodScript && tt.ScriptID == Script(x.script)
count++
}
}
if count == 1 {
return tt, nil
}
// Even if we fail to find a unique Region, we might have
// an unambiguous script.
if goodScript {
t.ScriptID = tt.ScriptID
}
}
}
}
} else {
// Search matches for und-script.
if t.ScriptID != 0 {
x := likelyScript[t.ScriptID]
if x.region != 0 {
t.setUndefinedRegion(Region(x.region))
t.setUndefinedLang(Language(x.lang))
return t, nil
}
}
// Search matches for und-region. If und-script-region exists, it would
// have been found earlier.
if t.RegionID != 0 {
if i := regionInclusion[t.RegionID]; i < nRegionGroups {
x := likelyRegionGroup[i]
if x.region != 0 {
t.setUndefinedLang(Language(x.lang))
t.setUndefinedScript(Script(x.script))
t.RegionID = Region(x.region)
}
} else {
x := likelyRegion[t.RegionID]
if x.flags&isList != 0 {
x = likelyRegionList[x.lang]
}
if x.script != 0 && x.flags != scriptInFrom {
t.setUndefinedLang(Language(x.lang))
t.setUndefinedScript(Script(x.script))
return t, nil
}
}
}
}
// Search matches for lang.
if t.LangID < langNoIndexOffset {
x := likelyLang[t.LangID]
if x.flags&isList != 0 {
x = likelyLangList[x.region]
}
if x.region != 0 {
t.setUndefinedScript(Script(x.script))
t.setUndefinedRegion(Region(x.region))
}
specializeRegion(&t)
if t.LangID == 0 {
t.LangID = _en // default language
}
return t, nil
}
return t, ErrMissingLikelyTagsData
}
func (t *Tag) setTagsFrom(id Tag) {
t.LangID = id.LangID
t.ScriptID = id.ScriptID
t.RegionID = id.RegionID
}
// minimize removes the region or script subtags from t such that
// t.addLikelySubtags() == t.minimize().addLikelySubtags().
func (t Tag) minimize() (Tag, error) {
t, err := minimizeTags(t)
if err != nil {
return t, err
}
t.RemakeString()
return t, nil
}
// minimizeTags mimics the behavior of the ICU 51 C implementation.
func minimizeTags(t Tag) (Tag, error) {
if t.equalTags(Und) {
return t, nil
}
max, err := addTags(t)
if err != nil {
return t, err
}
for _, id := range [...]Tag{
{LangID: t.LangID},
{LangID: t.LangID, RegionID: t.RegionID},
{LangID: t.LangID, ScriptID: t.ScriptID},
} {
if x, err := addTags(id); err == nil && max.equalTags(x) {
t.setTagsFrom(id)
break
}
}
return t, nil
}
@@ -0,0 +1,161 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import (
"flag"
"testing"
)
var verbose = flag.Bool("verbose", false, "set to true to print the internal tables of matchers")
func TestAddLikelySubtags(t *testing.T) {
tests := []struct{ in, out string }{
{"aa", "aa-Latn-ET"},
{"aa-Latn", "aa-Latn-ET"},
{"aa-Arab", "aa-Arab-ET"},
{"aa-Arab-ER", "aa-Arab-ER"},
{"kk", "kk-Cyrl-KZ"},
{"kk-CN", "kk-Arab-CN"},
{"cmn", "cmn"},
{"zh-AU", "zh-Hant-AU"},
{"zh-VN", "zh-Hant-VN"},
{"zh-SG", "zh-Hans-SG"},
{"zh-Hant", "zh-Hant-TW"},
{"zh-Hani", "zh-Hani-CN"},
{"und-Hani", "zh-Hani-CN"},
{"und", "en-Latn-US"},
{"und-GB", "en-Latn-GB"},
{"und-CW", "pap-Latn-CW"},
{"und-YT", "fr-Latn-YT"},
{"und-Arab", "ar-Arab-EG"},
{"und-AM", "hy-Armn-AM"},
{"und-TW", "zh-Hant-TW"},
{"und-002", "en-Latn-NG"},
{"und-Latn-002", "en-Latn-NG"},
{"en-Latn-002", "en-Latn-NG"},
{"en-002", "en-Latn-NG"},
{"en-001", "en-Latn-US"},
{"und-003", "en-Latn-US"},
{"und-GB", "en-Latn-GB"},
{"Latn-001", "en-Latn-US"},
{"en-001", "en-Latn-US"},
{"es-419", "es-Latn-419"},
{"he-145", "he-Hebr-IL"},
{"ky-145", "ky-Latn-TR"},
{"kk", "kk-Cyrl-KZ"},
// Don't specialize duplicate and ambiguous matches.
{"kk-034", "kk-Arab-034"}, // Matches IR and AF. Both are Arab.
{"ku-145", "ku-Latn-TR"}, // Matches IQ, TR, and LB, but kk -> TR.
{"und-Arab-CC", "ms-Arab-CC"},
{"und-Arab-GB", "ks-Arab-GB"},
{"und-Hans-CC", "zh-Hans-CC"},
{"und-CC", "en-Latn-CC"},
{"sr", "sr-Cyrl-RS"},
{"sr-151", "sr-Latn-151"}, // Matches RO and RU.
// We would like addLikelySubtags to generate the same results if the input
// only changes by adding tags that would otherwise have been added
// by the expansion.
// In other words:
// und-AA -> xx-Scrp-AA implies und-Scrp-AA -> xx-Scrp-AA
// und-AA -> xx-Scrp-AA implies xx-AA -> xx-Scrp-AA
// und-Scrp -> xx-Scrp-AA implies und-Scrp-AA -> xx-Scrp-AA
// und-Scrp -> xx-Scrp-AA implies xx-Scrp -> xx-Scrp-AA
// xx -> xx-Scrp-AA implies xx-Scrp -> xx-Scrp-AA
// xx -> xx-Scrp-AA implies xx-AA -> xx-Scrp-AA
//
// The algorithm specified in
// https://unicode.org/reports/tr35/tr35-9.html#Supplemental_Data,
// Section C.10, does not handle the first case. For example,
// the CLDR data contains an entry und-BJ -> fr-Latn-BJ, but not
// there is no rule for und-Latn-BJ. According to spec, und-Latn-BJ
// would expand to en-Latn-BJ, violating the aforementioned principle.
// We deviate from the spec by letting und-Scrp-AA expand to xx-Scrp-AA
// if a rule of the form und-AA -> xx-Scrp-AA is defined.
// Note that as of version 23, CLDR has some explicitly specified
// entries that do not conform to these rules. The implementation
// will not correct these explicit inconsistencies. A later versions of CLDR
// is supposed to fix this.
{"und-Latn-BJ", "fr-Latn-BJ"},
{"und-Bugi-ID", "bug-Bugi-ID"},
// regions, scripts and languages without definitions
{"und-Arab-AA", "ar-Arab-AA"},
{"und-Afak-RE", "fr-Afak-RE"},
{"und-Arab-GB", "ks-Arab-GB"},
{"abp-Arab-GB", "abp-Arab-GB"},
// script has preference over region
{"und-Arab-NL", "ar-Arab-NL"},
{"zza", "zza-Latn-TR"},
// preserve variants and extensions
{"de-1901", "de-Latn-DE-1901"},
{"de-x-abc", "de-Latn-DE-x-abc"},
{"de-1901-x-abc", "de-Latn-DE-1901-x-abc"},
{"x-abc", "x-abc"}, // TODO: is this the desired behavior?
}
for i, tt := range tests {
in, _ := Parse(tt.in)
out, _ := Parse(tt.out)
in, _ = in.addLikelySubtags()
if in.String() != out.String() {
t.Errorf("%d: add(%s) was %s; want %s", i, tt.in, in, tt.out)
}
}
}
func TestMinimize(t *testing.T) {
tests := []struct{ in, out string }{
{"aa", "aa"},
{"aa-Latn", "aa"},
{"aa-Latn-ET", "aa"},
{"aa-ET", "aa"},
{"aa-Arab", "aa-Arab"},
{"aa-Arab-ER", "aa-Arab-ER"},
{"aa-Arab-ET", "aa-Arab"},
{"und", "und"},
{"und-Latn", "und"},
{"und-Latn-US", "und"},
{"en-Latn-US", "en"},
{"cmn", "cmn"},
{"cmn-Hans", "cmn-Hans"},
{"cmn-Hant", "cmn-Hant"},
{"zh-AU", "zh-AU"},
{"zh-VN", "zh-VN"},
{"zh-SG", "zh-SG"},
{"zh-Hant", "zh-Hant"},
{"zh-Hant-TW", "zh-TW"},
{"zh-Hans", "zh"},
{"zh-Hani", "zh-Hani"},
{"und-Hans", "und-Hans"},
{"und-Hani", "und-Hani"},
{"und-CW", "und-CW"},
{"und-YT", "und-YT"},
{"und-Arab", "und-Arab"},
{"und-AM", "und-AM"},
{"und-Arab-CC", "und-Arab-CC"},
{"und-CC", "und-CC"},
{"und-Latn-BJ", "und-BJ"},
{"und-Bugi-ID", "und-Bugi"},
{"bug-Bugi-ID", "bug-Bugi"},
// regions, scripts and languages without definitions
{"und-Arab-AA", "und-Arab-AA"},
// preserve variants and extensions
{"de-Latn-1901", "de-1901"},
{"de-Latn-x-abc", "de-x-abc"},
{"de-DE-1901-x-abc", "de-1901-x-abc"},
{"x-abc", "x-abc"}, // TODO: is this the desired behavior?
}
for i, tt := range tests {
in, _ := Parse(tt.in)
out, _ := Parse(tt.out)
min, _ := in.minimize()
if min.String() != out.String() {
t.Errorf("%d: min(%s) was %s; want %s", i, tt.in, min, tt.out)
}
max, _ := min.addLikelySubtags()
if x, _ := in.addLikelySubtags(); x.String() != max.String() {
t.Errorf("%d: max(min(%s)) = %s; want %s", i, tt.in, max, x)
}
}
}

Some files were not shown because too many files have changed in this diff Show More