whatcanGOwrong

2024-09-19 21:38:24 -04:00
commit d0ae4d841d
17908 changed files with 4096831 additions and 0 deletions
@@ -0,0 +1,187 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+import (
+	"fmt"
+	"sort"
+
+	"golang.org/x/text/internal/language"
+)
+
+// The Coverage interface is used to define the level of coverage of an
+// internationalization service. Note that not all types are supported by all
+// services. As lists may be generated on the fly, it is recommended that users
+// of a Coverage cache the results.
+type Coverage interface {
+	// Tags returns the list of supported tags.
+	Tags() []Tag
+
+	// BaseLanguages returns the list of supported base languages.
+	BaseLanguages() []Base
+
+	// Scripts returns the list of supported scripts.
+	Scripts() []Script
+
+	// Regions returns the list of supported regions.
+	Regions() []Region
+}
+
+var (
+	// Supported defines a Coverage that lists all supported subtags. Tags
+	// always returns nil.
+	Supported Coverage = allSubtags{}
+)
+
+// TODO:
+// - Support Variants, numbering systems.
+// - CLDR coverage levels.
+// - Set of common tags defined in this package.
+
+type allSubtags struct{}
+
+// Regions returns the list of supported regions. As all regions are in a
+// consecutive range, it simply returns a slice of numbers in increasing order.
+// The "undefined" region is not returned.
+func (s allSubtags) Regions() []Region {
+	reg := make([]Region, language.NumRegions)
+	for i := range reg {
+		reg[i] = Region{language.Region(i + 1)}
+	}
+	return reg
+}
+
+// Scripts returns the list of supported scripts. As all scripts are in a
+// consecutive range, it simply returns a slice of numbers in increasing order.
+// The "undefined" script is not returned.
+func (s allSubtags) Scripts() []Script {
+	scr := make([]Script, language.NumScripts)
+	for i := range scr {
+		scr[i] = Script{language.Script(i + 1)}
+	}
+	return scr
+}
+
+// BaseLanguages returns the list of all supported base languages. It generates
+// the list by traversing the internal structures.
+func (s allSubtags) BaseLanguages() []Base {
+	bs := language.BaseLanguages()
+	base := make([]Base, len(bs))
+	for i, b := range bs {
+		base[i] = Base{b}
+	}
+	return base
+}
+
+// Tags always returns nil.
+func (s allSubtags) Tags() []Tag {
+	return nil
+}
+
+// coverage is used by NewCoverage which is used as a convenient way for
+// creating Coverage implementations for partially defined data. Very often a
+// package will only need to define a subset of slices. coverage provides a
+// convenient way to do this. Moreover, packages using NewCoverage, instead of
+// their own implementation, will not break if later new slice types are added.
+type coverage struct {
+	tags    func() []Tag
+	bases   func() []Base
+	scripts func() []Script
+	regions func() []Region
+}
+
+func (s *coverage) Tags() []Tag {
+	if s.tags == nil {
+		return nil
+	}
+	return s.tags()
+}
+
+// bases implements sort.Interface and is used to sort base languages.
+type bases []Base
+
+func (b bases) Len() int {
+	return len(b)
+}
+
+func (b bases) Swap(i, j int) {
+	b[i], b[j] = b[j], b[i]
+}
+
+func (b bases) Less(i, j int) bool {
+	return b[i].langID < b[j].langID
+}
+
+// BaseLanguages returns the result from calling s.bases if it is specified or
+// otherwise derives the set of supported base languages from tags.
+func (s *coverage) BaseLanguages() []Base {
+	if s.bases == nil {
+		tags := s.Tags()
+		if len(tags) == 0 {
+			return nil
+		}
+		a := make([]Base, len(tags))
+		for i, t := range tags {
+			a[i] = Base{language.Language(t.lang())}
+		}
+		sort.Sort(bases(a))
+		k := 0
+		for i := 1; i < len(a); i++ {
+			if a[k] != a[i] {
+				k++
+				a[k] = a[i]
+			}
+		}
+		return a[:k+1]
+	}
+	return s.bases()
+}
+
+func (s *coverage) Scripts() []Script {
+	if s.scripts == nil {
+		return nil
+	}
+	return s.scripts()
+}
+
+func (s *coverage) Regions() []Region {
+	if s.regions == nil {
+		return nil
+	}
+	return s.regions()
+}
+
+// NewCoverage returns a Coverage for the given lists. It is typically used by
+// packages providing internationalization services to define their level of
+// coverage. A list may be of type []T or func() []T, where T is either Tag,
+// Base, Script or Region. The returned Coverage derives the value for Bases
+// from Tags if no func or slice for []Base is specified. For other unspecified
+// types the returned Coverage will return nil for the respective methods.
+func NewCoverage(list ...interface{}) Coverage {
+	s := &coverage{}
+	for _, x := range list {
+		switch v := x.(type) {
+		case func() []Base:
+			s.bases = v
+		case func() []Script:
+			s.scripts = v
+		case func() []Region:
+			s.regions = v
+		case func() []Tag:
+			s.tags = v
+		case []Base:
+			s.bases = func() []Base { return v }
+		case []Script:
+			s.scripts = func() []Script { return v }
+		case []Region:
+			s.regions = func() []Region { return v }
+		case []Tag:
+			s.tags = func() []Tag { return v }
+		default:
+			panic(fmt.Sprintf("language: unsupported set type %T", v))
+		}
+	}
+	return s
+}
@@ -0,0 +1,156 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+import (
+	"fmt"
+	"reflect"
+	"testing"
+
+	"golang.org/x/text/internal/language"
+)
+
+func TestSupported(t *testing.T) {
+	// To prove the results are correct for a type, we test that the number of
+	// results is identical to the number of results on record, that all results
+	// are distinct and that all results are valid.
+	tests := map[string]int{
+		"BaseLanguages": language.NumLanguages,
+		"Scripts":       language.NumScripts,
+		"Regions":       language.NumRegions,
+		"Tags":          0,
+	}
+	sup := reflect.ValueOf(Supported)
+	for name, num := range tests {
+		v := sup.MethodByName(name).Call(nil)[0]
+		if n := v.Len(); n != num {
+			t.Errorf("len(%s()) was %d; want %d", name, n, num)
+		}
+		dup := make(map[string]bool)
+		for i := 0; i < v.Len(); i++ {
+			x := v.Index(i).Interface()
+			// An invalid value will either cause a crash or result in a
+			// duplicate when passed to Sprint.
+			s := fmt.Sprint(x)
+			if dup[s] {
+				t.Errorf("%s: duplicate entry %q", name, s)
+			}
+			dup[s] = true
+		}
+		if len(dup) != v.Len() {
+			t.Errorf("%s: # unique entries was %d; want %d", name, len(dup), v.Len())
+		}
+	}
+}
+
+func TestNewCoverage(t *testing.T) {
+	bases := []Base{Base{0}, Base{3}, Base{7}}
+	scripts := []Script{Script{11}, Script{17}, Script{23}}
+	regions := []Region{Region{101}, Region{103}, Region{107}}
+	tags := []Tag{Make("pt"), Make("en"), Make("en-GB"), Make("en-US"), Make("pt-PT")}
+	fbases := func() []Base { return bases }
+	fscripts := func() []Script { return scripts }
+	fregions := func() []Region { return regions }
+	ftags := func() []Tag { return tags }
+
+	tests := []struct {
+		desc    string
+		list    []interface{}
+		bases   []Base
+		scripts []Script
+		regions []Region
+		tags    []Tag
+	}{
+		{
+			desc: "empty",
+		},
+		{
+			desc:  "bases",
+			list:  []interface{}{bases},
+			bases: bases,
+		},
+		{
+			desc:    "scripts",
+			list:    []interface{}{scripts},
+			scripts: scripts,
+		},
+		{
+			desc:    "regions",
+			list:    []interface{}{regions},
+			regions: regions,
+		},
+		{
+			desc:  "bases derives from tags",
+			list:  []interface{}{tags},
+			bases: []Base{Base{_en}, Base{_pt}},
+			tags:  tags,
+		},
+		{
+			desc:  "tags and bases",
+			list:  []interface{}{tags, bases},
+			bases: bases,
+			tags:  tags,
+		},
+		{
+			desc:    "fully specified",
+			list:    []interface{}{tags, bases, scripts, regions},
+			bases:   bases,
+			scripts: scripts,
+			regions: regions,
+			tags:    tags,
+		},
+		{
+			desc:  "bases func",
+			list:  []interface{}{fbases},
+			bases: bases,
+		},
+		{
+			desc:    "scripts func",
+			list:    []interface{}{fscripts},
+			scripts: scripts,
+		},
+		{
+			desc:    "regions func",
+			list:    []interface{}{fregions},
+			regions: regions,
+		},
+		{
+			desc:  "tags func",
+			list:  []interface{}{ftags},
+			bases: []Base{Base{_en}, Base{_pt}},
+			tags:  tags,
+		},
+		{
+			desc:  "tags and bases",
+			list:  []interface{}{ftags, fbases},
+			bases: bases,
+			tags:  tags,
+		},
+		{
+			desc:    "fully specified",
+			list:    []interface{}{ftags, fbases, fscripts, fregions},
+			bases:   bases,
+			scripts: scripts,
+			regions: regions,
+			tags:    tags,
+		},
+	}
+
+	for i, tt := range tests {
+		l := NewCoverage(tt.list...)
+		if a := l.BaseLanguages(); !reflect.DeepEqual(a, tt.bases) {
+			t.Errorf("%d:%s: BaseLanguages was %v; want %v", i, tt.desc, a, tt.bases)
+		}
+		if a := l.Scripts(); !reflect.DeepEqual(a, tt.scripts) {
+			t.Errorf("%d:%s: Scripts was %v; want %v", i, tt.desc, a, tt.scripts)
+		}
+		if a := l.Regions(); !reflect.DeepEqual(a, tt.regions) {
+			t.Errorf("%d:%s: Regions was %v; want %v", i, tt.desc, a, tt.regions)
+		}
+		if a := l.Tags(); !reflect.DeepEqual(a, tt.tags) {
+			t.Errorf("%d:%s: Tags was %v; want %v", i, tt.desc, a, tt.tags)
+		}
+	}
+}
@@ -0,0 +1,92 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package display
+
+// This file contains sets of data for specific languages. Users can use these
+// to create smaller collections of supported languages and reduce total table
+// size.
+
+// The variable names defined here correspond to those in package language.
+
+var (
+	Afrikaans            *Dictionary = &af        // af
+	Amharic              *Dictionary = &am        // am
+	Arabic               *Dictionary = &ar        // ar
+	ModernStandardArabic *Dictionary = Arabic     // ar-001
+	Azerbaijani          *Dictionary = &az        // az
+	Bulgarian            *Dictionary = &bg        // bg
+	Bengali              *Dictionary = &bn        // bn
+	Catalan              *Dictionary = &ca        // ca
+	Czech                *Dictionary = &cs        // cs
+	Danish               *Dictionary = &da        // da
+	German               *Dictionary = &de        // de
+	Greek                *Dictionary = &el        // el
+	English              *Dictionary = &en        // en
+	AmericanEnglish      *Dictionary = English    // en-US
+	BritishEnglish       *Dictionary = English    // en-GB
+	Spanish              *Dictionary = &es        // es
+	EuropeanSpanish      *Dictionary = Spanish    // es-ES
+	LatinAmericanSpanish *Dictionary = Spanish    // es-419
+	Estonian             *Dictionary = &et        // et
+	Persian              *Dictionary = &fa        // fa
+	Finnish              *Dictionary = &fi        // fi
+	Filipino             *Dictionary = &fil       // fil
+	French               *Dictionary = &fr        // fr
+	Gujarati             *Dictionary = &gu        // gu
+	Hebrew               *Dictionary = &he        // he
+	Hindi                *Dictionary = &hi        // hi
+	Croatian             *Dictionary = &hr        // hr
+	Hungarian            *Dictionary = &hu        // hu
+	Armenian             *Dictionary = &hy        // hy
+	Indonesian           *Dictionary = &id        // id
+	Icelandic            *Dictionary = &is        // is
+	Italian              *Dictionary = &it        // it
+	Japanese             *Dictionary = &ja        // ja
+	Georgian             *Dictionary = &ka        // ka
+	Kazakh               *Dictionary = &kk        // kk
+	Khmer                *Dictionary = &km        // km
+	Kannada              *Dictionary = &kn        // kn
+	Korean               *Dictionary = &ko        // ko
+	Kirghiz              *Dictionary = &ky        // ky
+	Lao                  *Dictionary = &lo        // lo
+	Lithuanian           *Dictionary = &lt        // lt
+	Latvian              *Dictionary = &lv        // lv
+	Macedonian           *Dictionary = &mk        // mk
+	Malayalam            *Dictionary = &ml        // ml
+	Mongolian            *Dictionary = &mn        // mn
+	Marathi              *Dictionary = &mr        // mr
+	Malay                *Dictionary = &ms        // ms
+	Burmese              *Dictionary = &my        // my
+	Nepali               *Dictionary = &ne        // ne
+	Dutch                *Dictionary = &nl        // nl
+	Norwegian            *Dictionary = &no        // no
+	Punjabi              *Dictionary = &pa        // pa
+	Polish               *Dictionary = &pl        // pl
+	Portuguese           *Dictionary = &pt        // pt
+	BrazilianPortuguese  *Dictionary = Portuguese // pt-BR
+	EuropeanPortuguese   *Dictionary = &ptPT      // pt-PT
+	Romanian             *Dictionary = &ro        // ro
+	Russian              *Dictionary = &ru        // ru
+	Sinhala              *Dictionary = &si        // si
+	Slovak               *Dictionary = &sk        // sk
+	Slovenian            *Dictionary = &sl        // sl
+	Albanian             *Dictionary = &sq        // sq
+	Serbian              *Dictionary = &sr        // sr
+	SerbianLatin         *Dictionary = &srLatn    // sr
+	Swedish              *Dictionary = &sv        // sv
+	Swahili              *Dictionary = &sw        // sw
+	Tamil                *Dictionary = &ta        // ta
+	Telugu               *Dictionary = &te        // te
+	Thai                 *Dictionary = &th        // th
+	Turkish              *Dictionary = &tr        // tr
+	Ukrainian            *Dictionary = &uk        // uk
+	Urdu                 *Dictionary = &ur        // ur
+	Uzbek                *Dictionary = &uz        // uz
+	Vietnamese           *Dictionary = &vi        // vi
+	Chinese              *Dictionary = &zh        // zh
+	SimplifiedChinese    *Dictionary = Chinese    // zh-Hans
+	TraditionalChinese   *Dictionary = &zhHant    // zh-Hant
+	Zulu                 *Dictionary = &zu        // zu
+)
@@ -0,0 +1,39 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package display
+
+import (
+	"fmt"
+	"testing"
+
+	"golang.org/x/text/internal/testtext"
+)
+
+func TestLinking(t *testing.T) {
+	base := getSize(t, `display.Tags(language.English).Name(language.English)`)
+	compact := getSize(t, `display.English.Languages().Name(language.English)`)
+
+	if d := base - compact; d < 1.5*1024*1024 {
+		t.Errorf("size(base) - size(compact) = %d - %d = was %d; want > 1.5MB", base, compact, d)
+	}
+}
+
+func getSize(t *testing.T, main string) int {
+	size, err := testtext.CodeSize(fmt.Sprintf(body, main))
+	if err != nil {
+		t.Skipf("skipping link size test; binary size could not be determined: %v", err)
+	}
+	return size
+}
+
+const body = `package main
+import (
+	"golang.org/x/text/language"
+	"golang.org/x/text/language/display"
+)
+func main() {
+	%s
+}
+`
@@ -0,0 +1,420 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:generate go run maketables.go -output tables.go
+
+// Package display provides display names for languages, scripts and regions in
+// a requested language.
+//
+// The data is based on CLDR's localeDisplayNames. It includes the names of the
+// draft level "contributed" or "approved". The resulting tables are quite
+// large. The display package is designed so that users can reduce the linked-in
+// table sizes by cherry picking the languages one wishes to support. There is a
+// Dictionary defined for a selected set of common languages for this purpose.
+package display // import "golang.org/x/text/language/display"
+
+import (
+	"fmt"
+	"strings"
+
+	"golang.org/x/text/internal/format"
+	"golang.org/x/text/language"
+)
+
+/*
+TODO:
+All fairly low priority at the moment:
+  - Include alternative and variants as an option (using func options).
+  - Option for returning the empty string for undefined values.
+  - Support variants, currencies, time zones, option names and other data
+    provided in CLDR.
+  - Do various optimizations:
+    - Reduce size of offset tables.
+    - Consider compressing infrequently used languages and decompress on demand.
+*/
+
+// A Formatter formats a tag in the current language. It is used in conjunction
+// with the message package.
+type Formatter struct {
+	lookup func(tag int, x interface{}) string
+	x      interface{}
+}
+
+// Format implements "golang.org/x/text/internal/format".Formatter.
+func (f Formatter) Format(state format.State, verb rune) {
+	// TODO: there are a lot of inefficiencies in this code. Fix it when we
+	// language.Tag has embedded compact tags.
+	t := state.Language()
+	_, index, _ := matcher.Match(t)
+	str := f.lookup(index, f.x)
+	if str == "" {
+		// TODO: use language-specific punctuation.
+		// TODO: use codePattern instead of language?
+		if unknown := f.lookup(index, language.Und); unknown != "" {
+			fmt.Fprintf(state, "%v (%v)", unknown, f.x)
+		} else {
+			fmt.Fprintf(state, "[language: %v]", f.x)
+		}
+	} else {
+		state.Write([]byte(str))
+	}
+}
+
+// Language returns a Formatter that renders the name for lang in the
+// current language. x may be a language.Base or a language.Tag.
+// It renders lang in the default language if no translation for the current
+// language is supported.
+func Language(lang interface{}) Formatter {
+	return Formatter{langFunc, lang}
+}
+
+// Region returns a Formatter that renders the name for region in the current
+// language. region may be a language.Region or a language.Tag.
+// It renders region in the default language if no translation for the current
+// language is supported.
+func Region(region interface{}) Formatter {
+	return Formatter{regionFunc, region}
+}
+
+// Script returns a Formatter that renders the name for script in the current
+// language. script may be a language.Script or a language.Tag.
+// It renders script in the default language if no translation for the current
+// language is supported.
+func Script(script interface{}) Formatter {
+	return Formatter{scriptFunc, script}
+}
+
+// Tag returns a Formatter that renders the name for tag in the current
+// language. tag may be a language.Tag.
+// It renders tag in the default language if no translation for the current
+// language is supported.
+func Tag(tag interface{}) Formatter {
+	return Formatter{tagFunc, tag}
+}
+
+// A Namer is used to get the name for a given value, such as a Tag, Language,
+// Script or Region.
+type Namer interface {
+	// Name returns a display string for the given value. A Namer returns an
+	// empty string for values it does not support. A Namer may support naming
+	// an unspecified value. For example, when getting the name for a region for
+	// a tag that does not have a defined Region, it may return the name for an
+	// unknown region. It is up to the user to filter calls to Name for values
+	// for which one does not want to have a name string.
+	Name(x interface{}) string
+}
+
+var (
+	// Supported lists the languages for which names are defined.
+	Supported language.Coverage
+
+	// The set of all possible values for which names are defined. Note that not
+	// all Namer implementations will cover all the values of a given type.
+	// A Namer will return the empty string for unsupported values.
+	Values language.Coverage
+
+	matcher language.Matcher
+)
+
+func init() {
+	tags := make([]language.Tag, numSupported)
+	s := supported
+	for i := range tags {
+		p := strings.IndexByte(s, '|')
+		tags[i] = language.Raw.Make(s[:p])
+		s = s[p+1:]
+	}
+	matcher = language.NewMatcher(tags)
+	Supported = language.NewCoverage(tags)
+
+	Values = language.NewCoverage(langTagSet.Tags, supportedScripts, supportedRegions)
+}
+
+// Languages returns a Namer for naming languages. It returns nil if there is no
+// data for the given tag. The type passed to Name must be either language.Base
+// or language.Tag. Note that the result may differ between passing a tag or its
+// base language. For example, for English, passing "nl-BE" would return Flemish
+// whereas passing "nl" returns "Dutch".
+func Languages(t language.Tag) Namer {
+	if _, index, conf := matcher.Match(t); conf != language.No {
+		return languageNamer(index)
+	}
+	return nil
+}
+
+type languageNamer int
+
+func langFunc(i int, x interface{}) string {
+	return nameLanguage(languageNamer(i), x)
+}
+
+func (n languageNamer) name(i int) string {
+	return lookup(langHeaders[:], int(n), i)
+}
+
+// Name implements the Namer interface for language names.
+func (n languageNamer) Name(x interface{}) string {
+	return nameLanguage(n, x)
+}
+
+// nonEmptyIndex walks up the parent chain until a non-empty header is found.
+// It returns -1 if no index could be found.
+func nonEmptyIndex(h []header, index int) int {
+	for ; index != -1 && h[index].data == ""; index = int(parents[index]) {
+	}
+	return index
+}
+
+// Scripts returns a Namer for naming scripts. It returns nil if there is no
+// data for the given tag. The type passed to Name must be either a
+// language.Script or a language.Tag. It will not attempt to infer a script for
+// tags with an unspecified script.
+func Scripts(t language.Tag) Namer {
+	if _, index, conf := matcher.Match(t); conf != language.No {
+		if index = nonEmptyIndex(scriptHeaders[:], index); index != -1 {
+			return scriptNamer(index)
+		}
+	}
+	return nil
+}
+
+type scriptNamer int
+
+func scriptFunc(i int, x interface{}) string {
+	return nameScript(scriptNamer(i), x)
+}
+
+func (n scriptNamer) name(i int) string {
+	return lookup(scriptHeaders[:], int(n), i)
+}
+
+// Name implements the Namer interface for script names.
+func (n scriptNamer) Name(x interface{}) string {
+	return nameScript(n, x)
+}
+
+// Regions returns a Namer for naming regions. It returns nil if there is no
+// data for the given tag. The type passed to Name must be either a
+// language.Region or a language.Tag. It will not attempt to infer a region for
+// tags with an unspecified region.
+func Regions(t language.Tag) Namer {
+	if _, index, conf := matcher.Match(t); conf != language.No {
+		if index = nonEmptyIndex(regionHeaders[:], index); index != -1 {
+			return regionNamer(index)
+		}
+	}
+	return nil
+}
+
+type regionNamer int
+
+func regionFunc(i int, x interface{}) string {
+	return nameRegion(regionNamer(i), x)
+}
+
+func (n regionNamer) name(i int) string {
+	return lookup(regionHeaders[:], int(n), i)
+}
+
+// Name implements the Namer interface for region names.
+func (n regionNamer) Name(x interface{}) string {
+	return nameRegion(n, x)
+}
+
+// Tags returns a Namer for giving a full description of a tag. The names of
+// scripts and regions that are not already implied by the language name will
+// in appended within parentheses. It returns nil if there is not data for the
+// given tag. The type passed to Name must be a tag.
+func Tags(t language.Tag) Namer {
+	if _, index, conf := matcher.Match(t); conf != language.No {
+		return tagNamer(index)
+	}
+	return nil
+}
+
+type tagNamer int
+
+func tagFunc(i int, x interface{}) string {
+	return nameTag(languageNamer(i), scriptNamer(i), regionNamer(i), x)
+}
+
+// Name implements the Namer interface for tag names.
+func (n tagNamer) Name(x interface{}) string {
+	return nameTag(languageNamer(n), scriptNamer(n), regionNamer(n), x)
+}
+
+// lookup finds the name for an entry in a global table, traversing the
+// inheritance hierarchy if needed.
+func lookup(table []header, dict, want int) string {
+	for dict != -1 {
+		if s := table[dict].name(want); s != "" {
+			return s
+		}
+		dict = int(parents[dict])
+	}
+	return ""
+}
+
+// A Dictionary holds a collection of Namers for a single language. One can
+// reduce the amount of data linked in to a binary by only referencing
+// Dictionaries for the languages one needs to support instead of using the
+// generic Namer factories.
+type Dictionary struct {
+	parent *Dictionary
+	lang   header
+	script header
+	region header
+}
+
+// Tags returns a Namer for giving a full description of a tag. The names of
+// scripts and regions that are not already implied by the language name will
+// in appended within parentheses. It returns nil if there is not data for the
+// given tag. The type passed to Name must be a tag.
+func (d *Dictionary) Tags() Namer {
+	return dictTags{d}
+}
+
+type dictTags struct {
+	d *Dictionary
+}
+
+// Name implements the Namer interface for tag names.
+func (n dictTags) Name(x interface{}) string {
+	return nameTag(dictLanguages{n.d}, dictScripts{n.d}, dictRegions{n.d}, x)
+}
+
+// Languages returns a Namer for naming languages. It returns nil if there is no
+// data for the given tag. The type passed to Name must be either language.Base
+// or language.Tag. Note that the result may differ between passing a tag or its
+// base language. For example, for English, passing "nl-BE" would return Flemish
+// whereas passing "nl" returns "Dutch".
+func (d *Dictionary) Languages() Namer {
+	return dictLanguages{d}
+}
+
+type dictLanguages struct {
+	d *Dictionary
+}
+
+func (n dictLanguages) name(i int) string {
+	for d := n.d; d != nil; d = d.parent {
+		if s := d.lang.name(i); s != "" {
+			return s
+		}
+	}
+	return ""
+}
+
+// Name implements the Namer interface for language names.
+func (n dictLanguages) Name(x interface{}) string {
+	return nameLanguage(n, x)
+}
+
+// Scripts returns a Namer for naming scripts. It returns nil if there is no
+// data for the given tag. The type passed to Name must be either a
+// language.Script or a language.Tag. It will not attempt to infer a script for
+// tags with an unspecified script.
+func (d *Dictionary) Scripts() Namer {
+	return dictScripts{d}
+}
+
+type dictScripts struct {
+	d *Dictionary
+}
+
+func (n dictScripts) name(i int) string {
+	for d := n.d; d != nil; d = d.parent {
+		if s := d.script.name(i); s != "" {
+			return s
+		}
+	}
+	return ""
+}
+
+// Name implements the Namer interface for script names.
+func (n dictScripts) Name(x interface{}) string {
+	return nameScript(n, x)
+}
+
+// Regions returns a Namer for naming regions. It returns nil if there is no
+// data for the given tag. The type passed to Name must be either a
+// language.Region or a language.Tag. It will not attempt to infer a region for
+// tags with an unspecified region.
+func (d *Dictionary) Regions() Namer {
+	return dictRegions{d}
+}
+
+type dictRegions struct {
+	d *Dictionary
+}
+
+func (n dictRegions) name(i int) string {
+	for d := n.d; d != nil; d = d.parent {
+		if s := d.region.name(i); s != "" {
+			return s
+		}
+	}
+	return ""
+}
+
+// Name implements the Namer interface for region names.
+func (n dictRegions) Name(x interface{}) string {
+	return nameRegion(n, x)
+}
+
+// A SelfNamer implements a Namer that returns the name of language in this same
+// language. It provides a very compact mechanism to provide a comprehensive
+// list of languages to users in their native language.
+type SelfNamer struct {
+	// Supported defines the values supported by this Namer.
+	Supported language.Coverage
+}
+
+var (
+	// Self is a shared instance of a SelfNamer.
+	Self *SelfNamer = &self
+
+	self = SelfNamer{language.NewCoverage(selfTagSet.Tags)}
+)
+
+// Name returns the name of a given language tag in the language identified by
+// this tag. It supports both the language.Base and language.Tag types.
+func (n SelfNamer) Name(x interface{}) string {
+	t, _ := language.All.Compose(x)
+	base, scr, reg := t.Raw()
+	baseScript := language.Script{}
+	if (scr == language.Script{} && reg != language.Region{}) {
+		// For looking up in the self dictionary, we need to select the
+		// maximized script. This is even the case if the script isn't
+		// specified.
+		s1, _ := t.Script()
+		if baseScript = getScript(base); baseScript != s1 {
+			scr = s1
+		}
+	}
+
+	i, scr, reg := selfTagSet.index(base, scr, reg)
+	if i == -1 {
+		return ""
+	}
+
+	// Only return the display name if the script matches the expected script.
+	if (scr != language.Script{}) {
+		if (baseScript == language.Script{}) {
+			baseScript = getScript(base)
+		}
+		if baseScript != scr {
+			return ""
+		}
+	}
+
+	return selfHeaders[0].name(i)
+}
+
+// getScript returns the maximized script for a base language.
+func getScript(b language.Base) language.Script {
+	tag, _ := language.Raw.Compose(b)
+	scr, _ := tag.Script()
+	return scr
+}
@@ -0,0 +1,714 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package display
+
+import (
+	"fmt"
+	"reflect"
+	"strings"
+	"testing"
+	"unicode"
+
+	"golang.org/x/text/internal/testtext"
+	"golang.org/x/text/language"
+	"golang.org/x/text/message"
+)
+
+// TODO: test that tables are properly dropped by the linker for various use
+// cases.
+
+var (
+	firstLang2aa  = language.MustParseBase("aa")
+	lastLang2zu   = language.MustParseBase("zu")
+	firstLang3ace = language.MustParseBase("ace")
+	lastLang3zza  = language.MustParseBase("zza")
+	firstTagAr001 = language.MustParse("ar-001")
+	lastTagZhHant = language.MustParse("zh-Hant")
+)
+
+// TestValues tests that for all languages, regions, and scripts in Values, at
+// least one language has a name defined for it by checking it exists in
+// English, which is assumed to be the most comprehensive. It is also tested
+// that a Namer returns "" for unsupported values.
+func TestValues(t *testing.T) {
+	type testcase struct {
+		kind string
+		n    Namer
+	}
+	// checkDefined checks that a value exists in a Namer.
+	checkDefined := func(x interface{}, namers []testcase) {
+		for _, n := range namers {
+			testtext.Run(t, fmt.Sprintf("%s.Name(%s)", n.kind, x), func(t *testing.T) {
+				if n.n.Name(x) == "" {
+					// As of version 28 there is no data for az-Arab in English,
+					// although there is useful data in other languages.
+					if x.(fmt.Stringer).String() == "az-Arab" {
+						return
+					}
+					t.Errorf("supported but no result")
+				}
+			})
+		}
+	}
+	// checkUnsupported checks that a value does not exist in a Namer.
+	checkUnsupported := func(x interface{}, namers []testcase) {
+		for _, n := range namers {
+			if got := n.n.Name(x); got != "" {
+				t.Fatalf("%s.Name(%s): unsupported tag gave non-empty result: %q", n.kind, x, got)
+			}
+		}
+	}
+
+	tags := map[language.Tag]bool{}
+	namers := []testcase{
+		{"Languages(en)", Languages(language.English)},
+		{"Tags(en)", Tags(language.English)},
+		{"English.Languages()", English.Languages()},
+		{"English.Tags()", English.Tags()},
+	}
+	for _, tag := range Values.Tags() {
+		checkDefined(tag, namers)
+		tags[tag] = true
+	}
+	for _, base := range language.Supported.BaseLanguages() {
+		tag, _ := language.All.Compose(base)
+		if !tags[tag] {
+			checkUnsupported(tag, namers)
+		}
+	}
+
+	regions := map[language.Region]bool{}
+	namers = []testcase{
+		{"Regions(en)", Regions(language.English)},
+		{"English.Regions()", English.Regions()},
+	}
+	for _, r := range Values.Regions() {
+		checkDefined(r, namers)
+		regions[r] = true
+	}
+	for _, r := range language.Supported.Regions() {
+		if r = r.Canonicalize(); !regions[r] {
+			checkUnsupported(r, namers)
+		}
+	}
+
+	scripts := map[language.Script]bool{}
+	namers = []testcase{
+		{"Scripts(en)", Scripts(language.English)},
+		{"English.Scripts()", English.Scripts()},
+	}
+	for _, s := range Values.Scripts() {
+		checkDefined(s, namers)
+		scripts[s] = true
+	}
+	for _, s := range language.Supported.Scripts() {
+		// Canonicalize the script.
+		tag, _ := language.DeprecatedScript.Compose(s)
+		if _, s, _ = tag.Raw(); !scripts[s] {
+			checkUnsupported(s, namers)
+		}
+	}
+}
+
+// TestSupported tests that we have at least some Namers for languages that we
+// claim to support. To test the claims in the documentation, it also verifies
+// that if a Namer is returned, it will have at least some data.
+func TestSupported(t *testing.T) {
+	supportedTags := Supported.Tags()
+	if len(supportedTags) != numSupported {
+		t.Errorf("number of supported was %d; want %d", len(supportedTags), numSupported)
+	}
+
+	namerFuncs := []struct {
+		kind string
+		fn   func(language.Tag) Namer
+	}{
+		{"Tags", Tags},
+		{"Languages", Languages},
+		{"Regions", Regions},
+		{"Scripts", Scripts},
+	}
+
+	// Verify that we have at least one Namer for all tags we claim to support.
+	tags := make(map[language.Tag]bool)
+	for _, tag := range supportedTags {
+		// Test we have at least one Namer for this supported Tag.
+		found := false
+		for _, kind := range namerFuncs {
+			if defined(t, kind.kind, kind.fn(tag), tag) {
+				found = true
+			}
+		}
+		if !found {
+			t.Errorf("%s: supported, but no data available", tag)
+		}
+		if tags[tag] {
+			t.Errorf("%s: included in Supported.Tags more than once", tag)
+		}
+		tags[tag] = true
+	}
+
+	// Verify that we have no Namers for tags we don't claim to support.
+	for _, base := range language.Supported.BaseLanguages() {
+		tag, _ := language.All.Compose(base)
+		// Skip tags that are supported after matching.
+		if _, _, conf := matcher.Match(tag); conf != language.No {
+			continue
+		}
+		// Test there are no Namers for this tag.
+		for _, kind := range namerFuncs {
+			if defined(t, kind.kind, kind.fn(tag), tag) {
+				t.Errorf("%[1]s(%[2]s) returns a Namer, but %[2]s is not in the set of supported Tags.", kind.kind, tag)
+			}
+		}
+	}
+}
+
+// defined reports whether n is a proper Namer, which means it is non-nil and
+// must have at least one non-empty value.
+func defined(t *testing.T, kind string, n Namer, tag language.Tag) bool {
+	if n == nil {
+		return false
+	}
+	switch kind {
+	case "Tags":
+		for _, t := range Values.Tags() {
+			if n.Name(t) != "" {
+				return true
+			}
+		}
+	case "Languages":
+		for _, t := range Values.BaseLanguages() {
+			if n.Name(t) != "" {
+				return true
+			}
+		}
+	case "Regions":
+		for _, t := range Values.Regions() {
+			if n.Name(t) != "" {
+				return true
+			}
+		}
+	case "Scripts":
+		for _, t := range Values.Scripts() {
+			if n.Name(t) != "" {
+				return true
+			}
+		}
+	}
+	t.Errorf("%s(%s) returns non-nil Namer without content", kind, tag)
+	return false
+}
+
+func TestCoverage(t *testing.T) {
+	en := language.English
+	tests := []struct {
+		n Namer
+		x interface{}
+	}{
+		{Languages(en), Values.Tags()},
+		{Scripts(en), Values.Scripts()},
+		{Regions(en), Values.Regions()},
+	}
+	for i, tt := range tests {
+		uniq := make(map[string]interface{})
+
+		v := reflect.ValueOf(tt.x)
+		for j := 0; j < v.Len(); j++ {
+			x := v.Index(j).Interface()
+			// As of version 28 there is no data for az-Arab in English,
+			// although there is useful data in other languages.
+			if x.(fmt.Stringer).String() == "az-Arab" {
+				continue
+			}
+			s := tt.n.Name(x)
+			if s == "" {
+				t.Errorf("%d:%d:%s: missing content", i, j, x)
+			} else if uniq[s] != nil {
+				t.Errorf("%d:%d:%s: identical return value %q for %v and %v", i, j, x, s, x, uniq[s])
+			}
+			uniq[s] = x
+		}
+	}
+}
+
+// TestUpdate tests whether dictionary entries for certain languages need to be
+// updated. For some languages, some of the headers may be empty or they may be
+// identical to the parent. This code detects if such entries need to be updated
+// after a table update.
+func TestUpdate(t *testing.T) {
+	tests := []struct {
+		d   *Dictionary
+		tag string
+	}{
+		{ModernStandardArabic, "ar-001"},
+		{AmericanEnglish, "en-US"},
+		{EuropeanSpanish, "es-ES"},
+		{BrazilianPortuguese, "pt-BR"},
+		{SimplifiedChinese, "zh-Hans"},
+	}
+
+	for _, tt := range tests {
+		_, i, _ := matcher.Match(language.MustParse(tt.tag))
+		if !reflect.DeepEqual(tt.d.lang, langHeaders[i]) {
+			t.Errorf("%s: lang table update needed", tt.tag)
+		}
+		if !reflect.DeepEqual(tt.d.script, scriptHeaders[i]) {
+			t.Errorf("%s: script table update needed", tt.tag)
+		}
+		if !reflect.DeepEqual(tt.d.region, regionHeaders[i]) {
+			t.Errorf("%s: region table update needed", tt.tag)
+		}
+	}
+}
+
+func TestIndex(t *testing.T) {
+	notIn := []string{"aa", "xx", "zz", "aaa", "xxx", "zzz", "Aaaa", "Xxxx", "Zzzz"}
+	tests := []tagIndex{
+		{
+			"",
+			"",
+			"",
+		},
+		{
+			"bb",
+			"",
+			"",
+		},
+		{
+			"",
+			"bbb",
+			"",
+		},
+		{
+			"",
+			"",
+			"Bbbb",
+		},
+		{
+			"bb",
+			"bbb",
+			"Bbbb",
+		},
+		{
+			"bbccddyy",
+			"bbbcccdddyyy",
+			"BbbbCcccDdddYyyy",
+		},
+	}
+	for i, tt := range tests {
+		// Create the test set from the tagIndex.
+		cnt := 0
+		for sz := 2; sz <= 4; sz++ {
+			a := tt[sz-2]
+			for j := 0; j < len(a); j += sz {
+				s := a[j : j+sz]
+				if idx := tt.index(s); idx != cnt {
+					t.Errorf("%d:%s: index was %d; want %d", i, s, idx, cnt)
+				}
+				cnt++
+			}
+		}
+		if n := tt.len(); n != cnt {
+			t.Errorf("%d: len was %d; want %d", i, n, cnt)
+		}
+		for _, x := range notIn {
+			if idx := tt.index(x); idx != -1 {
+				t.Errorf("%d:%s: index was %d; want -1", i, x, idx)
+			}
+		}
+	}
+}
+
+func TestTag(t *testing.T) {
+	tests := []struct {
+		dict string
+		tag  string
+		name string
+	}{
+		// sr is in Value.Languages(), but is not supported by agq.
+		{"agq", "sr", "|[language: sr]"},
+		{"nl", "nl", "Nederlands"},
+		// CLDR 30 dropped Vlaams as the word for nl-BE. It is still called
+		// Flemish in English, though. TODO: check if this is a CLDR bug.
+		// {"nl", "nl-BE", "Vlaams"},
+		{"nl", "nl-BE", "Nederlands (België)"},
+		{"nl", "vls", "West-Vlaams"},
+		{"en", "nl-BE", "Flemish"},
+		{"en", "en", "English"},
+		{"en", "en-GB", "British English"},
+		{"en", "en-US", "American English"}, // American English in CLDR 24+
+		{"ru", "ru", "русский"},
+		{"ru", "ru-RU", "русский (Россия)"},
+		{"ru", "ru-Cyrl", "русский (кириллица)"},
+		{"en", lastLang2zu.String(), "Zulu"},
+		{"en", firstLang2aa.String(), "Afar"},
+		{"en", lastLang3zza.String(), "Zaza"},
+		{"en", firstLang3ace.String(), "Achinese"},
+		{"en", firstTagAr001.String(), "Modern Standard Arabic"},
+		{"en", lastTagZhHant.String(), "Traditional Chinese"},
+		{"en", "aaa", "|Unknown language (aaa)"},
+		{"en", "zzj", "|Unknown language (zzj)"},
+		// If full tag doesn't match, try without script or region.
+		{"en", "aa-Hans", "Afar (Simplified Han)"},
+		{"en", "af-Arab", "Afrikaans (Arabic)"},
+		{"en", "zu-Cyrl", "Zulu (Cyrillic)"},
+		{"en", "aa-GB", "Afar (United Kingdom)"},
+		{"en", "af-NA", "Afrikaans (Namibia)"},
+		{"en", "zu-BR", "Zulu (Brazil)"},
+		// Correct inheritance and language selection.
+		{"zh", "zh-TW", "中文 (台湾)"},
+		{"zh", "zh-Hant-TW", "繁体中文 (台湾)"},
+		{"zh-Hant", "zh-TW", "中文 (台灣)"},
+		{"zh-Hant", "zh-Hant-TW", "繁體中文 (台灣)"},
+		// Some rather arbitrary interpretations for Serbian. This is arguably
+		// correct and consistent with the way zh-[Hant-]TW is handled. It will
+		// also give results more in line with the expectations if users
+		// explicitly use "sh".
+		{"sr-Latn", "sr-ME", "srpski (Crna Gora)"},
+		{"sr-Latn", "sr-Latn-ME", "srpskohrvatski (Crna Gora)"},
+		// Double script and region
+		{"nl", "en-Cyrl-BE", "Engels (Cyrillisch, België)"},
+	}
+	for _, tt := range tests {
+		t.Run(tt.dict+"/"+tt.tag, func(t *testing.T) {
+			name, fmtName := splitName(tt.name)
+			dict := language.MustParse(tt.dict)
+			tag := language.Raw.MustParse(tt.tag)
+			d := Tags(dict)
+			if n := d.Name(tag); n != name {
+				// There are inconsistencies w.r.t. capitalization in the tests
+				// due to CLDR's update procedure which treats modern and other
+				// languages differently.
+				// See https://unicode.org/cldr/trac/ticket/8051.
+				// TODO: use language capitalization to sanitize the strings.
+				t.Errorf("Name(%s) = %q; want %q", tag, n, name)
+			}
+
+			p := message.NewPrinter(dict)
+			if n := p.Sprint(Tag(tag)); n != fmtName {
+				t.Errorf("Tag(%s) = %q; want %q", tag, n, fmtName)
+			}
+		})
+	}
+}
+
+func splitName(names string) (name, formatName string) {
+	split := strings.Split(names, "|")
+	name, formatName = split[0], split[0]
+	if len(split) > 1 {
+		formatName = split[1]
+	}
+	return name, formatName
+}
+
+func TestLanguage(t *testing.T) {
+	tests := []struct {
+		dict string
+		tag  string
+		name string
+	}{
+		// sr is in Value.Languages(), but is not supported by agq.
+		{"agq", "sr", "|[language: sr]"},
+		// CLDR 30 dropped Vlaams as the word for nl-BE. It is still called
+		// Flemish in English, though. TODO: this is probably incorrect.
+		// West-Vlaams (vls) is not Vlaams. West-Vlaams could be considered its
+		// own language, whereas Vlaams is generally Dutch. So expect to have
+		// to change these tests back.
+		{"nl", "nl", "Nederlands"},
+		{"nl", "vls", "West-Vlaams"},
+		{"nl", "nl-BE", "Nederlands"},
+		{"en", "pt", "Portuguese"},
+		{"en", "pt-PT", "European Portuguese"},
+		{"en", "pt-BR", "Brazilian Portuguese"},
+		{"en", "en", "English"},
+		{"en", "en-GB", "British English"},
+		{"en", "en-US", "American English"}, // American English in CLDR 24+
+		{"en", lastLang2zu.String(), "Zulu"},
+		{"en", firstLang2aa.String(), "Afar"},
+		{"en", lastLang3zza.String(), "Zaza"},
+		{"en", firstLang3ace.String(), "Achinese"},
+		{"en", firstTagAr001.String(), "Modern Standard Arabic"},
+		{"en", lastTagZhHant.String(), "Traditional Chinese"},
+		{"en", "aaa", "|Unknown language (aaa)"},
+		{"en", "zzj", "|Unknown language (zzj)"},
+		// If full tag doesn't match, try without script or region.
+		{"en", "aa-Hans", "Afar"},
+		{"en", "af-Arab", "Afrikaans"},
+		{"en", "zu-Cyrl", "Zulu"},
+		{"en", "aa-GB", "Afar"},
+		{"en", "af-NA", "Afrikaans"},
+		{"en", "zu-BR", "Zulu"},
+		{"agq", "zh-Hant", "|[language: zh-Hant]"},
+		{"en", "sh", "Serbo-Croatian"},
+		{"en", "sr-Latn", "Serbo-Croatian"},
+		{"en", "sr", "Serbian"},
+		{"en", "sr-ME", "Serbian"},
+		{"en", "sr-Latn-ME", "Serbo-Croatian"}, // See comments in TestTag.
+	}
+	for _, tt := range tests {
+		testtext.Run(t, tt.dict+"/"+tt.tag, func(t *testing.T) {
+			name, fmtName := splitName(tt.name)
+			dict := language.MustParse(tt.dict)
+			tag := language.Raw.MustParse(tt.tag)
+			p := message.NewPrinter(dict)
+			d := Languages(dict)
+			if n := d.Name(tag); n != name {
+				t.Errorf("Name(%v) = %q; want %q", tag, n, name)
+			}
+			if n := p.Sprint(Language(tag)); n != fmtName {
+				t.Errorf("Language(%v) = %q; want %q", tag, n, fmtName)
+			}
+			if len(tt.tag) <= 3 {
+				base := language.MustParseBase(tt.tag)
+				if n := d.Name(base); n != name {
+					t.Errorf("Name(%v) = %q; want %q", base, n, name)
+				}
+				if n := p.Sprint(Language(base)); n != fmtName {
+					t.Errorf("Language(%v) = %q; want %q", base, n, fmtName)
+				}
+			}
+		})
+	}
+}
+
+func TestScript(t *testing.T) {
+	tests := []struct {
+		dict string
+		scr  string
+		name string
+	}{
+		{"nl", "Arab", "Arabisch"},
+		{"en", "Arab", "Arabic"},
+		{"en", "Zzzz", "Unknown Script"},
+		{"zh-Hant", "Hang", "韓文字"},
+		{"zh-Hant-HK", "Hang", "韓文字"},
+		{"zh", "Arab", "阿拉伯文"},
+		{"zh-Hans-HK", "Arab", "阿拉伯文"}, // same as zh
+		{"zh-Hant", "Arab", "阿拉伯文"},
+		{"zh-Hant-HK", "Arab", "阿拉伯文"}, // same as zh
+		// Canonicalized form
+		{"en", "Qaai", "Inherited"},    // deprecated script, now is Zinh
+		{"en", "sh", "Unknown Script"}, // sh canonicalizes to sr-Latn
+		{"en", "en", "Unknown Script"},
+		// Don't introduce scripts with canonicalization.
+		{"en", "sh", "Unknown Script"}, // sh canonicalizes to sr-Latn
+	}
+	for _, tt := range tests {
+		t.Run(tt.dict+"/"+tt.scr, func(t *testing.T) {
+			name, fmtName := splitName(tt.name)
+			dict := language.MustParse(tt.dict)
+			p := message.NewPrinter(dict)
+			d := Scripts(dict)
+			var tag language.Tag
+			if unicode.IsUpper(rune(tt.scr[0])) {
+				x := language.MustParseScript(tt.scr)
+				if n := d.Name(x); n != name {
+					t.Errorf("Name(%v) = %q; want %q", x, n, name)
+				}
+				if n := p.Sprint(Script(x)); n != fmtName {
+					t.Errorf("Script(%v) = %q; want %q", x, n, fmtName)
+				}
+				tag, _ = language.Raw.Compose(x)
+			} else {
+				tag = language.Raw.MustParse(tt.scr)
+			}
+			if n := d.Name(tag); n != name {
+				t.Errorf("Name(%v) = %q; want %q", tag, n, name)
+			}
+			if n := p.Sprint(Script(tag)); n != fmtName {
+				t.Errorf("Script(%v) = %q; want %q", tag, n, fmtName)
+			}
+		})
+	}
+}
+
+func TestRegion(t *testing.T) {
+	tests := []struct {
+		dict string
+		reg  string
+		name string
+	}{
+		{"nl", "NL", "Nederland"},
+		{"en", "US", "United States"},
+		{"en", "ZZ", "Unknown Region"},
+		{"en-GB", "NL", "Netherlands"},
+		// Canonical equivalents
+		{"en", "UK", "United Kingdom"},
+		// No region
+		{"en", "pt", "Unknown Region"},
+		{"en", "und", "Unknown Region"},
+		// Don't introduce regions with canonicalization.
+		{"en", "mo", "Unknown Region"},
+	}
+	for _, tt := range tests {
+		t.Run(tt.dict+"/"+tt.reg, func(t *testing.T) {
+			dict := language.MustParse(tt.dict)
+			p := message.NewPrinter(dict)
+			d := Regions(dict)
+			var tag language.Tag
+			if unicode.IsUpper(rune(tt.reg[0])) {
+				// Region
+				x := language.MustParseRegion(tt.reg)
+				if n := d.Name(x); n != tt.name {
+					t.Errorf("Name(%v) = %q; want %q", x, n, tt.name)
+				}
+				if n := p.Sprint(Region(x)); n != tt.name {
+					t.Errorf("Region(%v) = %q; want %q", x, n, tt.name)
+				}
+				tag, _ = language.Raw.Compose(x)
+			} else {
+				tag = language.Raw.MustParse(tt.reg)
+			}
+			if n := d.Name(tag); n != tt.name {
+				t.Errorf("Name(%v) = %q; want %q", tag, n, tt.name)
+			}
+			if n := p.Sprint(Region(tag)); n != tt.name {
+				t.Errorf("Region(%v) = %q; want %q", tag, n, tt.name)
+			}
+		})
+	}
+}
+
+func TestSelf(t *testing.T) {
+	tests := []struct {
+		tag  string
+		name string
+	}{
+		{"nl", "Nederlands"},
+		// CLDR 30 dropped Vlaams as the word for nl-BE. It is still called
+		// Flemish in English, though. TODO: check if this is a CLDR bug.
+		// {"nl-BE", "Vlaams"},
+		{"nl-BE", "Nederlands"},
+		{"en-GB", "British English"},
+		{lastLang2zu.String(), "isiZulu"},
+		{firstLang2aa.String(), ""},  // not defined
+		{lastLang3zza.String(), ""},  // not defined
+		{firstLang3ace.String(), ""}, // not defined
+		{firstTagAr001.String(), "العربية الرسمية الحديثة"},
+		{"ar", "العربية"},
+		{lastTagZhHant.String(), "繁體中文"},
+		{"aaa", ""},
+		{"zzj", ""},
+		// Drop entries that are not in the requested script, even if there is
+		// an entry for the language.
+		{"aa-Hans", ""},
+		{"af-Arab", ""},
+		{"zu-Cyrl", ""},
+		// Append the country name in the language of the matching language.
+		{"af-NA", "Afrikaans"},
+		{"zh", "中文"},
+		// zh-TW should match zh-Hant instead of zh!
+		{"zh-TW", "繁體中文"},
+		{"zh-Hant", "繁體中文"},
+		{"zh-Hans", "简体中文"},
+		{"zh-Hant-TW", "繁體中文"},
+		{"zh-Hans-TW", "简体中文"},
+		// Take the entry for sr which has the matching script.
+		// TODO: Capitalization changed as of CLDR 26, but change seems
+		// arbitrary. Revisit capitalization with revision 27. See
+		// https://unicode.org/cldr/trac/ticket/8051.
+		{"sr", "српски"},
+		// TODO: sr-ME should show up as Serbian or Montenegrin, not Serbo-
+		// Croatian. This is an artifact of the current algorithm, which is the
+		// way it is to have the preferred behavior for other languages such as
+		// Chinese. We can hardwire this case in the table generator or package
+		// code, but we first check if CLDR can be updated.
+		// {"sr-ME", "Srpski"}, // Is Srpskohrvatski
+		{"sr-Latn-ME", "srpskohrvatski"},
+		{"sr-Cyrl-ME", "српски"},
+		{"sr-NL", "српски"},
+		// NOTE: kk is defined, but in Cyrillic script. For China, Arab is the
+		// dominant script. We do not have data for kk-Arab and we chose to not
+		// fall back in such cases.
+		{"kk-CN", ""},
+	}
+	for i, tt := range tests {
+		d := Self
+		if n := d.Name(language.Raw.MustParse(tt.tag)); n != tt.name {
+			t.Errorf("%d:%s: was %q; want %q", i, tt.tag, n, tt.name)
+		}
+	}
+}
+
+func TestEquivalence(t *testing.T) {
+	testCases := []struct {
+		desc  string
+		namer Namer
+	}{
+		{"Self", Self},
+		{"Tags", Tags(language.Romanian)},
+		{"Languages", Languages(language.Romanian)},
+		{"Scripts", Scripts(language.Romanian)},
+	}
+	for _, tc := range testCases {
+		t.Run(tc.desc, func(t *testing.T) {
+			ro := tc.namer.Name(language.Raw.MustParse("ro-MD"))
+			mo := tc.namer.Name(language.Raw.MustParse("mo"))
+			if ro != mo {
+				t.Errorf("%q != %q", ro, mo)
+			}
+		})
+	}
+}
+
+func TestDictionaryLang(t *testing.T) {
+	tests := []struct {
+		d    *Dictionary
+		tag  string
+		name string
+	}{
+		{English, "en", "English"},
+		{Portuguese, "af", "africâner"},
+		{EuropeanPortuguese, "af", "africanês"},
+		{English, "nl-BE", "Flemish"},
+	}
+	for i, test := range tests {
+		tag := language.MustParse(test.tag)
+		if got := test.d.Tags().Name(tag); got != test.name {
+			t.Errorf("%d:%v: got %s; want %s", i, tag, got, test.name)
+		}
+		if base, _ := language.Compose(tag.Base()); base == tag {
+			if got := test.d.Languages().Name(base); got != test.name {
+				t.Errorf("%d:%v: got %s; want %s", i, tag, got, test.name)
+			}
+		}
+	}
+}
+
+func TestDictionaryRegion(t *testing.T) {
+	tests := []struct {
+		d      *Dictionary
+		region string
+		name   string
+	}{
+		{English, "FR", "France"},
+		{Portuguese, "009", "Oceania"},
+		{EuropeanPortuguese, "009", "Oceânia"},
+	}
+	for i, test := range tests {
+		tag := language.MustParseRegion(test.region)
+		if got := test.d.Regions().Name(tag); got != test.name {
+			t.Errorf("%d:%v: got %s; want %s", i, tag, got, test.name)
+		}
+	}
+}
+
+func TestDictionaryScript(t *testing.T) {
+	tests := []struct {
+		d      *Dictionary
+		script string
+		name   string
+	}{
+		{English, "Cyrl", "Cyrillic"},
+		{EuropeanPortuguese, "Gujr", "guzerate"},
+	}
+	for i, test := range tests {
+		tag := language.MustParseScript(test.script)
+		if got := test.d.Scripts().Name(tag); got != test.name {
+			t.Errorf("%d:%v: got %s; want %s", i, tag, got, test.name)
+		}
+	}
+}
@@ -0,0 +1,116 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package display_test
+
+import (
+	"fmt"
+
+	"golang.org/x/text/language"
+	"golang.org/x/text/language/display"
+	"golang.org/x/text/message"
+)
+
+func ExampleFormatter() {
+	message.SetString(language.Dutch, "In %v people speak %v.", "In %v spreekt men %v.")
+
+	fr := language.French
+	region, _ := fr.Region()
+	for _, tag := range []string{"en", "nl"} {
+		p := message.NewPrinter(language.Make(tag))
+
+		p.Printf("In %v people speak %v.", display.Region(region), display.Language(fr))
+		p.Println()
+	}
+
+	// Output:
+	// In France people speak French.
+	// In Frankrijk spreekt men Frans.
+}
+
+func ExampleNamer() {
+	supported := []string{
+		"en-US", "en-GB", "ja", "zh", "zh-Hans", "zh-Hant", "pt", "pt-PT", "ko", "ar", "el", "ru", "uk", "pa",
+	}
+
+	en := display.English.Languages()
+
+	for _, s := range supported {
+		t := language.MustParse(s)
+		fmt.Printf("%-20s (%s)\n", en.Name(t), display.Self.Name(t))
+	}
+
+	// Output:
+	// American English     (American English)
+	// British English      (British English)
+	// Japanese             (日本語)
+	// Chinese              (中文)
+	// Simplified Chinese   (简体中文)
+	// Traditional Chinese  (繁體中文)
+	// Portuguese           (português)
+	// European Portuguese  (português europeu)
+	// Korean               (한국어)
+	// Arabic               (العربية)
+	// Greek                (Ελληνικά)
+	// Russian              (русский)
+	// Ukrainian            (українська)
+	// Punjabi              (ਪੰਜਾਬੀ)
+}
+
+func ExampleTags() {
+	n := display.Tags(language.English)
+	fmt.Println(n.Name(language.Make("nl")))
+	fmt.Println(n.Name(language.Make("nl-BE")))
+	fmt.Println(n.Name(language.Make("nl-CW")))
+	fmt.Println(n.Name(language.Make("nl-Arab")))
+	fmt.Println(n.Name(language.Make("nl-Cyrl-RU")))
+
+	// Output:
+	// Dutch
+	// Flemish
+	// Dutch (Curaçao)
+	// Dutch (Arabic)
+	// Dutch (Cyrillic, Russia)
+}
+
+// ExampleDictionary shows how to reduce the amount of data linked into your
+// binary by only using the predefined Dictionary variables of the languages you
+// wish to support.
+func ExampleDictionary() {
+	tags := []language.Tag{
+		language.English,
+		language.German,
+		language.Japanese,
+		language.Russian,
+	}
+	dicts := []*display.Dictionary{
+		display.English,
+		display.German,
+		display.Japanese,
+		display.Russian,
+	}
+
+	m := language.NewMatcher(tags)
+
+	getDict := func(t language.Tag) *display.Dictionary {
+		_, i, confidence := m.Match(t)
+		// Skip this check if you want to support a fall-back language, which
+		// will be the first one passed to NewMatcher.
+		if confidence == language.No {
+			return nil
+		}
+		return dicts[i]
+	}
+
+	// The matcher will match Swiss German to German.
+	n := getDict(language.Make("gsw")).Languages()
+	fmt.Println(n.Name(language.German))
+	fmt.Println(n.Name(language.Make("de-CH")))
+	fmt.Println(n.Name(language.Make("gsw")))
+
+	// Output:
+	// Deutsch
+	// Schweizer Hochdeutsch
+	// Schweizerdeutsch
+}
@@ -0,0 +1,253 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package display
+
+// This file contains common lookup code that is shared between the various
+// implementations of Namer and Dictionaries.
+
+import (
+	"fmt"
+	"sort"
+	"strings"
+
+	"golang.org/x/text/language"
+)
+
+type namer interface {
+	// name gets the string for the given index. It should walk the
+	// inheritance chain if a value is not present in the base index.
+	name(idx int) string
+}
+
+func nameLanguage(n namer, x interface{}) string {
+	t, _ := language.All.Compose(x)
+	for {
+		i, _, _ := langTagSet.index(t.Raw())
+		if s := n.name(i); s != "" {
+			return s
+		}
+		if t = t.Parent(); t == language.Und {
+			return ""
+		}
+	}
+}
+
+func nameScript(n namer, x interface{}) string {
+	t, _ := language.DeprecatedScript.Compose(x)
+	_, s, _ := t.Raw()
+	return n.name(scriptIndex.index(s.String()))
+}
+
+func nameRegion(n namer, x interface{}) string {
+	t, _ := language.DeprecatedRegion.Compose(x)
+	_, _, r := t.Raw()
+	return n.name(regionIndex.index(r.String()))
+}
+
+func nameTag(langN, scrN, regN namer, x interface{}) string {
+	t, ok := x.(language.Tag)
+	if !ok {
+		return ""
+	}
+	const form = language.All &^ language.SuppressScript
+	if c, err := form.Canonicalize(t); err == nil {
+		t = c
+	}
+	_, sRaw, rRaw := t.Raw()
+	i, scr, reg := langTagSet.index(t.Raw())
+	for i != -1 {
+		if str := langN.name(i); str != "" {
+			if hasS, hasR := (scr != language.Script{}), (reg != language.Region{}); hasS || hasR {
+				ss, sr := "", ""
+				if hasS {
+					ss = scrN.name(scriptIndex.index(scr.String()))
+				}
+				if hasR {
+					sr = regN.name(regionIndex.index(reg.String()))
+				}
+				// TODO: use patterns in CLDR or at least confirm they are the
+				// same for all languages.
+				if ss != "" && sr != "" {
+					return fmt.Sprintf("%s (%s, %s)", str, ss, sr)
+				}
+				if ss != "" || sr != "" {
+					return fmt.Sprintf("%s (%s%s)", str, ss, sr)
+				}
+			}
+			return str
+		}
+		scr, reg = sRaw, rRaw
+		if t = t.Parent(); t == language.Und {
+			return ""
+		}
+		i, _, _ = langTagSet.index(t.Raw())
+	}
+	return ""
+}
+
+// header contains the data and indexes for a single namer.
+// data contains a series of strings concatenated into one. index contains the
+// offsets for a string in data. For example, consider a header that defines
+// strings for the languages de, el, en, fi, and nl:
+//
+//	header{
+//		data: "GermanGreekEnglishDutch",
+//		index: []uint16{0, 6, 11, 18, 18, 23},
+//	}
+//
+// For a language with index i, the string is defined by
+// data[index[i]:index[i+1]]. So the number of elements in index is always one
+// greater than the number of languages for which header defines a value.
+// A string for a language may be empty, which means the name is undefined. In
+// the above example, the name for fi (Finnish) is undefined.
+type header struct {
+	data  string
+	index []uint16
+}
+
+// name looks up the name for a tag in the dictionary, given its index.
+func (h *header) name(i int) string {
+	if 0 <= i && i < len(h.index)-1 {
+		return h.data[h.index[i]:h.index[i+1]]
+	}
+	return ""
+}
+
+// tagSet is used to find the index of a language in a set of tags.
+type tagSet struct {
+	single tagIndex
+	long   []string
+}
+
+var (
+	langTagSet = tagSet{
+		single: langIndex,
+		long:   langTagsLong,
+	}
+
+	// selfTagSet is used for indexing the language strings in their own
+	// language.
+	selfTagSet = tagSet{
+		single: selfIndex,
+		long:   selfTagsLong,
+	}
+
+	zzzz = language.MustParseScript("Zzzz")
+	zz   = language.MustParseRegion("ZZ")
+)
+
+// index returns the index of the tag for the given base, script and region or
+// its parent if the tag is not available. If the match is for a parent entry,
+// the excess script and region are returned.
+func (ts *tagSet) index(base language.Base, scr language.Script, reg language.Region) (int, language.Script, language.Region) {
+	lang := base.String()
+	index := -1
+	if (scr != language.Script{} || reg != language.Region{}) {
+		if scr == zzzz {
+			scr = language.Script{}
+		}
+		if reg == zz {
+			reg = language.Region{}
+		}
+
+		i := sort.SearchStrings(ts.long, lang)
+		// All entries have either a script or a region and not both.
+		scrStr, regStr := scr.String(), reg.String()
+		for ; i < len(ts.long) && strings.HasPrefix(ts.long[i], lang); i++ {
+			if s := ts.long[i][len(lang)+1:]; s == scrStr {
+				scr = language.Script{}
+				index = i + ts.single.len()
+				break
+			} else if s == regStr {
+				reg = language.Region{}
+				index = i + ts.single.len()
+				break
+			}
+		}
+	}
+	if index == -1 {
+		index = ts.single.index(lang)
+	}
+	return index, scr, reg
+}
+
+func (ts *tagSet) Tags() []language.Tag {
+	tags := make([]language.Tag, 0, ts.single.len()+len(ts.long))
+	ts.single.keys(func(s string) {
+		tags = append(tags, language.Raw.MustParse(s))
+	})
+	for _, s := range ts.long {
+		tags = append(tags, language.Raw.MustParse(s))
+	}
+	return tags
+}
+
+func supportedScripts() []language.Script {
+	scr := make([]language.Script, 0, scriptIndex.len())
+	scriptIndex.keys(func(s string) {
+		scr = append(scr, language.MustParseScript(s))
+	})
+	return scr
+}
+
+func supportedRegions() []language.Region {
+	reg := make([]language.Region, 0, regionIndex.len())
+	regionIndex.keys(func(s string) {
+		reg = append(reg, language.MustParseRegion(s))
+	})
+	return reg
+}
+
+// tagIndex holds a concatenated lists of subtags of length 2 to 4, one string
+// for each length, which can be used in combination with binary search to get
+// the index associated with a tag.
+// For example, a tagIndex{
+//
+//	"arenesfrruzh",  // 6 2-byte tags.
+//	"barwae",        // 2 3-byte tags.
+//	"",
+//
+// }
+// would mean that the 2-byte tag "fr" had an index of 3, and the 3-byte tag
+// "wae" had an index of 7.
+type tagIndex [3]string
+
+func (t *tagIndex) index(s string) int {
+	sz := len(s)
+	if sz < 2 || 4 < sz {
+		return -1
+	}
+	a := t[sz-2]
+	index := sort.Search(len(a)/sz, func(i int) bool {
+		p := i * sz
+		return a[p:p+sz] >= s
+	})
+	p := index * sz
+	if end := p + sz; end > len(a) || a[p:end] != s {
+		return -1
+	}
+	// Add the number of tags for smaller sizes.
+	for i := 0; i < sz-2; i++ {
+		index += len(t[i]) / (i + 2)
+	}
+	return index
+}
+
+// len returns the number of tags that are contained in the tagIndex.
+func (t *tagIndex) len() (n int) {
+	for i, s := range t {
+		n += len(s) / (i + 2)
+	}
+	return n
+}
+
+// keys calls f for each tag.
+func (t *tagIndex) keys(f func(key string)) {
+	for i, s := range *t {
+		for ; s != ""; s = s[i+2:] {
+			f(s[:i+2])
+		}
+	}
+}
@@ -0,0 +1,602 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ignore
+
+// Generator for display name tables.
+
+package main
+
+import (
+	"bytes"
+	"flag"
+	"fmt"
+	"log"
+	"reflect"
+	"sort"
+	"strings"
+
+	"golang.org/x/text/internal/gen"
+	"golang.org/x/text/language"
+	"golang.org/x/text/unicode/cldr"
+)
+
+var (
+	test = flag.Bool("test", false,
+		"test existing tables; can be used to compare web data with package data.")
+	outputFile = flag.String("output", "tables.go", "output file")
+
+	stats = flag.Bool("stats", false, "prints statistics to stderr")
+
+	short = flag.Bool("short", false, `Use "short" alternatives, when available.`)
+	draft = flag.String("draft",
+		"contributed",
+		`Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
+	pkg = flag.String("package",
+		"display",
+		"the name of the package in which the generated file is to be included")
+
+	tags = newTagSet("tags",
+		[]language.Tag{},
+		"space-separated list of tags to include or empty for all")
+	dict = newTagSet("dict",
+		dictTags(),
+		"space-separated list or tags for which to include a Dictionary. "+
+			`"" means the common list from go.text/language.`)
+)
+
+func dictTags() (tag []language.Tag) {
+	// TODO: replace with language.Common.Tags() once supported.
+	const str = "af am ar ar-001 az bg bn ca cs da de el en en-US en-GB " +
+		"es es-ES es-419 et fa fi fil fr fr-CA gu he hi hr hu hy id is it ja " +
+		"ka kk km kn ko ky lo lt lv mk ml mn mr ms my ne nl no pa pl pt pt-BR " +
+		"pt-PT ro ru si sk sl sq sr sr-Latn sv sw ta te th tr uk ur uz vi " +
+		"zh zh-Hans zh-Hant zu"
+
+	for _, s := range strings.Split(str, " ") {
+		tag = append(tag, language.MustParse(s))
+	}
+	return tag
+}
+
+func main() {
+	gen.Init()
+
+	// Read the CLDR zip file.
+	r := gen.OpenCLDRCoreZip()
+	defer r.Close()
+
+	d := &cldr.Decoder{}
+	d.SetDirFilter("main", "supplemental")
+	d.SetSectionFilter("localeDisplayNames")
+	data, err := d.DecodeZip(r)
+	if err != nil {
+		log.Fatalf("DecodeZip: %v", err)
+	}
+
+	w := gen.NewCodeWriter()
+	defer w.WriteGoFile(*outputFile, "display")
+
+	gen.WriteCLDRVersion(w)
+
+	b := builder{
+		w:     w,
+		data:  data,
+		group: make(map[string]*group),
+	}
+	b.generate()
+}
+
+const tagForm = language.All
+
+// tagSet is used to parse command line flags of tags. It implements the
+// flag.Value interface.
+type tagSet map[language.Tag]bool
+
+func newTagSet(name string, tags []language.Tag, usage string) tagSet {
+	f := tagSet(make(map[language.Tag]bool))
+	for _, t := range tags {
+		f[t] = true
+	}
+	flag.Var(f, name, usage)
+	return f
+}
+
+// String implements the String method of the flag.Value interface.
+func (f tagSet) String() string {
+	tags := []string{}
+	for t := range f {
+		tags = append(tags, t.String())
+	}
+	sort.Strings(tags)
+	return strings.Join(tags, " ")
+}
+
+// Set implements Set from the flag.Value interface.
+func (f tagSet) Set(s string) error {
+	if s != "" {
+		for _, s := range strings.Split(s, " ") {
+			if s != "" {
+				tag, err := tagForm.Parse(s)
+				if err != nil {
+					return err
+				}
+				f[tag] = true
+			}
+		}
+	}
+	return nil
+}
+
+func (f tagSet) contains(t language.Tag) bool {
+	if len(f) == 0 {
+		return true
+	}
+	return f[t]
+}
+
+// builder is used to create all tables with display name information.
+type builder struct {
+	w *gen.CodeWriter
+
+	data *cldr.CLDR
+
+	fromLocs []string
+
+	// destination tags for the current locale.
+	toTags     []string
+	toTagIndex map[string]int
+
+	// list of supported tags
+	supported []language.Tag
+
+	// key-value pairs per group
+	group map[string]*group
+
+	// statistics
+	sizeIndex int // total size of all indexes of headers
+	sizeData  int // total size of all data of headers
+	totalSize int
+}
+
+type group struct {
+	// Maps from a given language to the Namer data for this language.
+	lang    map[language.Tag]keyValues
+	headers []header
+
+	toTags        []string
+	threeStart    int
+	fourPlusStart int
+}
+
+// set sets the typ to the name for locale loc.
+func (g *group) set(t language.Tag, typ, name string) {
+	kv := g.lang[t]
+	if kv == nil {
+		kv = make(keyValues)
+		g.lang[t] = kv
+	}
+	if kv[typ] == "" {
+		kv[typ] = name
+	}
+}
+
+type keyValues map[string]string
+
+type header struct {
+	tag   language.Tag
+	data  string
+	index []uint16
+}
+
+var versionInfo = `// Version is deprecated. Use CLDRVersion.
+const Version = %#v
+
+`
+
+var self = language.MustParse("mul")
+
+// generate builds and writes all tables.
+func (b *builder) generate() {
+	fmt.Fprintf(b.w, versionInfo, cldr.Version)
+
+	b.filter()
+	b.setData("lang", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
+		if ldn.Languages != nil {
+			for _, v := range ldn.Languages.Language {
+				lang := v.Type
+				if lang == "root" {
+					// We prefer the data from "und"
+					// TODO: allow both the data for root and und somehow.
+					continue
+				}
+				tag := tagForm.MustParse(lang)
+				if tags.contains(tag) {
+					g.set(loc, tag.String(), v.Data())
+				}
+			}
+		}
+	})
+	b.setData("script", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
+		if ldn.Scripts != nil {
+			for _, v := range ldn.Scripts.Script {
+				code := language.MustParseScript(v.Type)
+				if code.IsPrivateUse() { // Qaaa..Qabx
+					// TODO: data currently appears to be very meager.
+					// Reconsider if we have data for English.
+					if loc == language.English {
+						log.Fatal("Consider including data for private use scripts.")
+					}
+					continue
+				}
+				g.set(loc, code.String(), v.Data())
+			}
+		}
+	})
+	b.setData("region", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
+		if ldn.Territories != nil {
+			for _, v := range ldn.Territories.Territory {
+				g.set(loc, language.MustParseRegion(v.Type).String(), v.Data())
+			}
+		}
+	})
+
+	b.makeSupported()
+
+	b.writeParents()
+
+	b.writeGroup("lang")
+	b.writeGroup("script")
+	b.writeGroup("region")
+
+	b.w.WriteConst("numSupported", len(b.supported))
+	buf := bytes.Buffer{}
+	for _, tag := range b.supported {
+		fmt.Fprint(&buf, tag.String(), "|")
+	}
+	b.w.WriteConst("supported", buf.String())
+
+	b.writeDictionaries()
+
+	b.supported = []language.Tag{self}
+
+	// Compute the names of locales in their own language. Some of these names
+	// may be specified in their parent locales. We iterate the maximum depth
+	// of the parent three times to match successive parents of tags until a
+	// possible match is found.
+	for i := 0; i < 4; i++ {
+		b.setData("self", func(g *group, tag language.Tag, ldn *cldr.LocaleDisplayNames) {
+			parent := tag
+			if b, s, r := tag.Raw(); i > 0 && (s != language.Script{} && r == language.Region{}) {
+				parent, _ = language.Raw.Compose(b)
+			}
+			if ldn.Languages != nil {
+				for _, v := range ldn.Languages.Language {
+					key := tagForm.MustParse(v.Type)
+					saved := key
+					if key == parent {
+						g.set(self, tag.String(), v.Data())
+					}
+					for k := 0; k < i; k++ {
+						key = key.Parent()
+					}
+					if key == tag {
+						g.set(self, saved.String(), v.Data()) // set does not overwrite a value.
+					}
+				}
+			}
+		})
+	}
+
+	b.writeGroup("self")
+}
+
+func (b *builder) setData(name string, f func(*group, language.Tag, *cldr.LocaleDisplayNames)) {
+	b.sizeIndex = 0
+	b.sizeData = 0
+	b.toTags = nil
+	b.fromLocs = nil
+	b.toTagIndex = make(map[string]int)
+
+	g := b.group[name]
+	if g == nil {
+		g = &group{lang: make(map[language.Tag]keyValues)}
+		b.group[name] = g
+	}
+	for _, loc := range b.data.Locales() {
+		// We use RawLDML instead of LDML as we are managing our own inheritance
+		// in this implementation.
+		ldml := b.data.RawLDML(loc)
+
+		// We do not support the POSIX variant (it is not a supported BCP 47
+		// variant). This locale also doesn't happen to contain any data, so
+		// we'll skip it by checking for this.
+		tag, err := tagForm.Parse(loc)
+		if err != nil {
+			if ldml.LocaleDisplayNames != nil {
+				log.Fatalf("setData: %v", err)
+			}
+			continue
+		}
+		if ldml.LocaleDisplayNames != nil && tags.contains(tag) {
+			f(g, tag, ldml.LocaleDisplayNames)
+		}
+	}
+}
+
+func (b *builder) filter() {
+	filter := func(s *cldr.Slice) {
+		if *short {
+			s.SelectOnePerGroup("alt", []string{"short", ""})
+		} else {
+			s.SelectOnePerGroup("alt", []string{"stand-alone", ""})
+		}
+		d, err := cldr.ParseDraft(*draft)
+		if err != nil {
+			log.Fatalf("filter: %v", err)
+		}
+		s.SelectDraft(d)
+	}
+	for _, loc := range b.data.Locales() {
+		if ldn := b.data.RawLDML(loc).LocaleDisplayNames; ldn != nil {
+			if ldn.Languages != nil {
+				s := cldr.MakeSlice(&ldn.Languages.Language)
+				if filter(&s); len(ldn.Languages.Language) == 0 {
+					ldn.Languages = nil
+				}
+			}
+			if ldn.Scripts != nil {
+				s := cldr.MakeSlice(&ldn.Scripts.Script)
+				if filter(&s); len(ldn.Scripts.Script) == 0 {
+					ldn.Scripts = nil
+				}
+			}
+			if ldn.Territories != nil {
+				s := cldr.MakeSlice(&ldn.Territories.Territory)
+				if filter(&s); len(ldn.Territories.Territory) == 0 {
+					ldn.Territories = nil
+				}
+			}
+		}
+	}
+}
+
+// makeSupported creates a list of all supported locales.
+func (b *builder) makeSupported() {
+	// tags across groups
+	for _, g := range b.group {
+		for t, _ := range g.lang {
+			b.supported = append(b.supported, t)
+		}
+	}
+	b.supported = b.supported[:unique(tagsSorter(b.supported))]
+
+}
+
+type tagsSorter []language.Tag
+
+func (a tagsSorter) Len() int           { return len(a) }
+func (a tagsSorter) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
+func (a tagsSorter) Less(i, j int) bool { return a[i].String() < a[j].String() }
+
+func (b *builder) writeGroup(name string) {
+	g := b.group[name]
+
+	for _, kv := range g.lang {
+		for t, _ := range kv {
+			g.toTags = append(g.toTags, t)
+		}
+	}
+	g.toTags = g.toTags[:unique(tagsBySize(g.toTags))]
+
+	// Allocate header per supported value.
+	g.headers = make([]header, len(b.supported))
+	for i, sup := range b.supported {
+		kv, ok := g.lang[sup]
+		if !ok {
+			g.headers[i].tag = sup
+			continue
+		}
+		data := []byte{}
+		index := make([]uint16, len(g.toTags), len(g.toTags)+1)
+		for j, t := range g.toTags {
+			index[j] = uint16(len(data))
+			data = append(data, kv[t]...)
+		}
+		index = append(index, uint16(len(data)))
+
+		// Trim the tail of the index.
+		// TODO: indexes can be reduced in size quite a bit more.
+		n := len(index)
+		for ; n >= 2 && index[n-2] == index[n-1]; n-- {
+		}
+		index = index[:n]
+
+		// Workaround for a bug in CLDR 26.
+		// See https://unicode.org/cldr/trac/ticket/8042.
+		if cldr.Version == "26" && sup.String() == "hsb" {
+			data = bytes.Replace(data, []byte{'"'}, nil, 1)
+		}
+		g.headers[i] = header{sup, string(data), index}
+	}
+	g.writeTable(b.w, name)
+}
+
+type tagsBySize []string
+
+func (l tagsBySize) Len() int      { return len(l) }
+func (l tagsBySize) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
+func (l tagsBySize) Less(i, j int) bool {
+	a, b := l[i], l[j]
+	// Sort single-tag entries based on size first. Otherwise alphabetic.
+	if len(a) != len(b) && (len(a) <= 4 || len(b) <= 4) {
+		return len(a) < len(b)
+	}
+	return a < b
+}
+
+// parentIndices returns slice a of len(tags) where tags[a[i]] is the parent
+// of tags[i].
+func parentIndices(tags []language.Tag) []int16 {
+	index := make(map[language.Tag]int16)
+	for i, t := range tags {
+		index[t] = int16(i)
+	}
+
+	// Construct default parents.
+	parents := make([]int16, len(tags))
+	for i, t := range tags {
+		parents[i] = -1
+		for t = t.Parent(); t != language.Und; t = t.Parent() {
+			if j, ok := index[t]; ok {
+				parents[i] = j
+				break
+			}
+		}
+	}
+	return parents
+}
+
+func (b *builder) writeParents() {
+	parents := parentIndices(b.supported)
+	fmt.Fprintf(b.w, "var parents = ")
+	b.w.WriteArray(parents)
+}
+
+// writeKeys writes keys to a special index used by the display package.
+// tags are assumed to be sorted by length.
+func writeKeys(w *gen.CodeWriter, name string, keys []string) {
+	w.Size += int(3 * reflect.TypeOf("").Size())
+	w.WriteComment("Number of keys: %d", len(keys))
+	fmt.Fprintf(w, "var (\n\t%sIndex = tagIndex{\n", name)
+	for i := 2; i <= 4; i++ {
+		sub := []string{}
+		for _, t := range keys {
+			if len(t) != i {
+				break
+			}
+			sub = append(sub, t)
+		}
+		s := strings.Join(sub, "")
+		w.WriteString(s)
+		fmt.Fprintf(w, ",\n")
+		keys = keys[len(sub):]
+	}
+	fmt.Fprintln(w, "\t}")
+	if len(keys) > 0 {
+		w.Size += int(reflect.TypeOf([]string{}).Size())
+		fmt.Fprintf(w, "\t%sTagsLong = ", name)
+		w.WriteSlice(keys)
+	}
+	fmt.Fprintln(w, ")\n")
+}
+
+// identifier creates an identifier from the given tag.
+func identifier(t language.Tag) string {
+	return strings.Replace(t.String(), "-", "", -1)
+}
+
+func (h *header) writeEntry(w *gen.CodeWriter, name string) {
+	if len(dict) > 0 && dict.contains(h.tag) {
+		fmt.Fprintf(w, "\t{ // %s\n", h.tag)
+		fmt.Fprintf(w, "\t\t%[1]s%[2]sStr,\n\t\t%[1]s%[2]sIdx,\n", identifier(h.tag), name)
+		fmt.Fprintln(w, "\t},")
+	} else if len(h.data) == 0 {
+		fmt.Fprintln(w, "\t\t{}, //", h.tag)
+	} else {
+		fmt.Fprintf(w, "\t{ // %s\n", h.tag)
+		w.WriteString(h.data)
+		fmt.Fprintln(w, ",")
+		w.WriteSlice(h.index)
+		fmt.Fprintln(w, ",\n\t},")
+	}
+}
+
+// write the data for the given header as single entries. The size for this data
+// was already accounted for in writeEntry.
+func (h *header) writeSingle(w *gen.CodeWriter, name string) {
+	if len(dict) > 0 && dict.contains(h.tag) {
+		tag := identifier(h.tag)
+		w.WriteConst(tag+name+"Str", h.data)
+
+		// Note that we create a slice instead of an array. If we use an array
+		// we need to refer to it as a[:] in other tables, which will cause the
+		// array to always be included by the linker. See Issue 7651.
+		w.WriteVar(tag+name+"Idx", h.index)
+	}
+}
+
+// writeTable writes an entry for a single Namer.
+func (g *group) writeTable(w *gen.CodeWriter, name string) {
+	start := w.Size
+	writeKeys(w, name, g.toTags)
+	w.Size += len(g.headers) * int(reflect.ValueOf(g.headers[0]).Type().Size())
+
+	fmt.Fprintf(w, "var %sHeaders = [%d]header{\n", name, len(g.headers))
+
+	title := strings.Title(name)
+	for _, h := range g.headers {
+		h.writeEntry(w, title)
+	}
+	fmt.Fprintln(w, "}\n")
+
+	for _, h := range g.headers {
+		h.writeSingle(w, title)
+	}
+	n := w.Size - start
+	fmt.Fprintf(w, "// Total size for %s: %d bytes (%d KB)\n\n", name, n, n/1000)
+}
+
+func (b *builder) writeDictionaries() {
+	fmt.Fprintln(b.w, "// Dictionary entries of frequent languages")
+	fmt.Fprintln(b.w, "var (")
+	parents := parentIndices(b.supported)
+
+	for i, t := range b.supported {
+		if dict.contains(t) {
+			ident := identifier(t)
+			fmt.Fprintf(b.w, "\t%s = Dictionary{ // %s\n", ident, t)
+			if p := parents[i]; p == -1 {
+				fmt.Fprintln(b.w, "\t\tnil,")
+			} else {
+				fmt.Fprintf(b.w, "\t\t&%s,\n", identifier(b.supported[p]))
+			}
+			fmt.Fprintf(b.w, "\t\theader{%[1]sLangStr, %[1]sLangIdx},\n", ident)
+			fmt.Fprintf(b.w, "\t\theader{%[1]sScriptStr, %[1]sScriptIdx},\n", ident)
+			fmt.Fprintf(b.w, "\t\theader{%[1]sRegionStr, %[1]sRegionIdx},\n", ident)
+			fmt.Fprintln(b.w, "\t}")
+		}
+	}
+	fmt.Fprintln(b.w, ")")
+
+	var s string
+	var a []uint16
+	sz := reflect.TypeOf(s).Size()
+	sz += reflect.TypeOf(a).Size()
+	sz *= 3
+	sz += reflect.TypeOf(&a).Size()
+	n := int(sz) * len(dict)
+	fmt.Fprintf(b.w, "// Total size for %d entries: %d bytes (%d KB)\n\n", len(dict), n, n/1000)
+
+	b.w.Size += n
+}
+
+// unique sorts the given lists and removes duplicate entries by swapping them
+// past position k, where k is the number of unique values. It returns k.
+func unique(a sort.Interface) int {
+	if a.Len() == 0 {
+		return 0
+	}
+	sort.Sort(a)
+	k := 1
+	for i := 1; i < a.Len(); i++ {
+		if a.Less(k-1, i) {
+			if k != i {
+				a.Swap(k, i)
+			}
+			k++
+		}
+	}
+	return k
+}
@@ -0,0 +1,98 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package language implements BCP 47 language tags and related functionality.
+//
+// The most important function of package language is to match a list of
+// user-preferred languages to a list of supported languages.
+// It alleviates the developer of dealing with the complexity of this process
+// and provides the user with the best experience
+// (see https://blog.golang.org/matchlang).
+//
+// # Matching preferred against supported languages
+//
+// A Matcher for an application that supports English, Australian English,
+// Danish, and standard Mandarin can be created as follows:
+//
+//	var matcher = language.NewMatcher([]language.Tag{
+//	    language.English,   // The first language is used as fallback.
+//	    language.MustParse("en-AU"),
+//	    language.Danish,
+//	    language.Chinese,
+//	})
+//
+// This list of supported languages is typically implied by the languages for
+// which there exists translations of the user interface.
+//
+// User-preferred languages usually come as a comma-separated list of BCP 47
+// language tags.
+// The MatchString finds best matches for such strings:
+//
+//	handler(w http.ResponseWriter, r *http.Request) {
+//	    lang, _ := r.Cookie("lang")
+//	    accept := r.Header.Get("Accept-Language")
+//	    tag, _ := language.MatchStrings(matcher, lang.String(), accept)
+//
+//	    // tag should now be used for the initialization of any
+//	    // locale-specific service.
+//	}
+//
+// The Matcher's Match method can be used to match Tags directly.
+//
+// Matchers are aware of the intricacies of equivalence between languages, such
+// as deprecated subtags, legacy tags, macro languages, mutual
+// intelligibility between scripts and languages, and transparently passing
+// BCP 47 user configuration.
+// For instance, it will know that a reader of Bokmål Danish can read Norwegian
+// and will know that Cantonese ("yue") is a good match for "zh-HK".
+//
+// # Using match results
+//
+// To guarantee a consistent user experience to the user it is important to
+// use the same language tag for the selection of any locale-specific services.
+// For example, it is utterly confusing to substitute spelled-out numbers
+// or dates in one language in text of another language.
+// More subtly confusing is using the wrong sorting order or casing
+// algorithm for a certain language.
+//
+// All the packages in x/text that provide locale-specific services
+// (e.g. collate, cases) should be initialized with the tag that was
+// obtained at the start of an interaction with the user.
+//
+// Note that Tag that is returned by Match and MatchString may differ from any
+// of the supported languages, as it may contain carried over settings from
+// the user tags.
+// This may be inconvenient when your application has some additional
+// locale-specific data for your supported languages.
+// Match and MatchString both return the index of the matched supported tag
+// to simplify associating such data with the matched tag.
+//
+// # Canonicalization
+//
+// If one uses the Matcher to compare languages one does not need to
+// worry about canonicalization.
+//
+// The meaning of a Tag varies per application. The language package
+// therefore delays canonicalization and preserves information as much
+// as possible. The Matcher, however, will always take into account that
+// two different tags may represent the same language.
+//
+// By default, only legacy and deprecated tags are converted into their
+// canonical equivalent. All other information is preserved. This approach makes
+// the confidence scores more accurate and allows matchers to distinguish
+// between variants that are otherwise lost.
+//
+// As a consequence, two tags that should be treated as identical according to
+// BCP 47 or CLDR, like "en-Latn" and "en", will be represented differently. The
+// Matcher handles such distinctions, though, and is aware of the
+// equivalence relations. The CanonType type can be used to alter the
+// canonicalization form.
+//
+// # References
+//
+// BCP 47 - Tags for Identifying Languages http://tools.ietf.org/html/bcp47
+package language // import "golang.org/x/text/language"
+
+// TODO: explanation on how to match languages for your own locale-specific
+// service.
@@ -0,0 +1,411 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language_test
+
+import (
+	"fmt"
+	"net/http"
+
+	"golang.org/x/text/language"
+)
+
+func ExampleCanonType() {
+	p := func(id string) {
+		fmt.Printf("Default(%s) -> %s\n", id, language.Make(id))
+		fmt.Printf("BCP47(%s) -> %s\n", id, language.BCP47.Make(id))
+		fmt.Printf("Macro(%s) -> %s\n", id, language.Macro.Make(id))
+		fmt.Printf("All(%s) -> %s\n", id, language.All.Make(id))
+	}
+	p("en-Latn")
+	p("sh")
+	p("zh-cmn")
+	p("bjd")
+	p("iw-Latn-fonipa-u-cu-usd")
+	// Output:
+	// Default(en-Latn) -> en-Latn
+	// BCP47(en-Latn) -> en
+	// Macro(en-Latn) -> en-Latn
+	// All(en-Latn) -> en
+	// Default(sh) -> sr-Latn
+	// BCP47(sh) -> sh
+	// Macro(sh) -> sh
+	// All(sh) -> sr-Latn
+	// Default(zh-cmn) -> cmn
+	// BCP47(zh-cmn) -> cmn
+	// Macro(zh-cmn) -> zh
+	// All(zh-cmn) -> zh
+	// Default(bjd) -> drl
+	// BCP47(bjd) -> drl
+	// Macro(bjd) -> bjd
+	// All(bjd) -> drl
+	// Default(iw-Latn-fonipa-u-cu-usd) -> he-Latn-fonipa-u-cu-usd
+	// BCP47(iw-Latn-fonipa-u-cu-usd) -> he-Latn-fonipa-u-cu-usd
+	// Macro(iw-Latn-fonipa-u-cu-usd) -> iw-Latn-fonipa-u-cu-usd
+	// All(iw-Latn-fonipa-u-cu-usd) -> he-Latn-fonipa-u-cu-usd
+}
+
+func ExampleTag_Base() {
+	fmt.Println(language.Make("und").Base())
+	fmt.Println(language.Make("und-US").Base())
+	fmt.Println(language.Make("und-NL").Base())
+	fmt.Println(language.Make("und-419").Base()) // Latin America
+	fmt.Println(language.Make("und-ZZ").Base())
+	// Output:
+	// en Low
+	// en High
+	// nl High
+	// es Low
+	// en Low
+}
+
+func ExampleTag_Script() {
+	en := language.Make("en")
+	sr := language.Make("sr")
+	sr_Latn := language.Make("sr_Latn")
+	fmt.Println(en.Script())
+	fmt.Println(sr.Script())
+	// Was a script explicitly specified?
+	_, c := sr.Script()
+	fmt.Println(c == language.Exact)
+	_, c = sr_Latn.Script()
+	fmt.Println(c == language.Exact)
+	// Output:
+	// Latn High
+	// Cyrl Low
+	// false
+	// true
+}
+
+func ExampleTag_Region() {
+	ru := language.Make("ru")
+	en := language.Make("en")
+	fmt.Println(ru.Region())
+	fmt.Println(en.Region())
+	// Output:
+	// RU Low
+	// US Low
+}
+
+func ExampleRegion_TLD() {
+	us := language.MustParseRegion("US")
+	gb := language.MustParseRegion("GB")
+	uk := language.MustParseRegion("UK")
+	bu := language.MustParseRegion("BU")
+
+	fmt.Println(us.TLD())
+	fmt.Println(gb.TLD())
+	fmt.Println(uk.TLD())
+	fmt.Println(bu.TLD())
+
+	fmt.Println(us.Canonicalize().TLD())
+	fmt.Println(gb.Canonicalize().TLD())
+	fmt.Println(uk.Canonicalize().TLD())
+	fmt.Println(bu.Canonicalize().TLD())
+	// Output:
+	// US <nil>
+	// UK <nil>
+	// UK <nil>
+	// ZZ language: region is not a valid ccTLD
+	// US <nil>
+	// UK <nil>
+	// UK <nil>
+	// MM <nil>
+}
+
+func ExampleCompose() {
+	nl, _ := language.ParseBase("nl")
+	us, _ := language.ParseRegion("US")
+	de := language.Make("de-1901-u-co-phonebk")
+	jp := language.Make("ja-JP")
+	fi := language.Make("fi-x-ing")
+
+	u, _ := language.ParseExtension("u-nu-arabic")
+	x, _ := language.ParseExtension("x-piglatin")
+
+	// Combine a base language and region.
+	fmt.Println(language.Compose(nl, us))
+	// Combine a base language and extension.
+	fmt.Println(language.Compose(nl, x))
+	// Replace the region.
+	fmt.Println(language.Compose(jp, us))
+	// Combine several tags.
+	fmt.Println(language.Compose(us, nl, u))
+
+	// Replace the base language of a tag.
+	fmt.Println(language.Compose(de, nl))
+	fmt.Println(language.Compose(de, nl, u))
+	// Remove the base language.
+	fmt.Println(language.Compose(de, language.Base{}))
+	// Remove all variants.
+	fmt.Println(language.Compose(de, []language.Variant{}))
+	// Remove all extensions.
+	fmt.Println(language.Compose(de, []language.Extension{}))
+	fmt.Println(language.Compose(fi, []language.Extension{}))
+	// Remove all variants and extensions.
+	fmt.Println(language.Compose(de.Raw()))
+
+	// An error is gobbled or returned if non-nil.
+	fmt.Println(language.Compose(language.ParseRegion("ZA")))
+	fmt.Println(language.Compose(language.ParseRegion("HH")))
+
+	// Compose uses the same Default canonicalization as Make.
+	fmt.Println(language.Compose(language.Raw.Parse("en-Latn-UK")))
+
+	// Call compose on a different CanonType for different results.
+	fmt.Println(language.All.Compose(language.Raw.Parse("en-Latn-UK")))
+
+	// Output:
+	// nl-US <nil>
+	// nl-x-piglatin <nil>
+	// ja-US <nil>
+	// nl-US-u-nu-arabic <nil>
+	// nl-1901-u-co-phonebk <nil>
+	// nl-1901-u-co-phonebk-nu-arabic <nil>
+	// und-1901-u-co-phonebk <nil>
+	// de-u-co-phonebk <nil>
+	// de-1901 <nil>
+	// fi <nil>
+	// de <nil>
+	// und-ZA <nil>
+	// und language: subtag "HH" is well-formed but unknown
+	// en-Latn-GB <nil>
+	// en-GB <nil>
+}
+
+func ExampleParse_errors() {
+	for _, s := range []string{"Foo", "Bar", "Foobar"} {
+		_, err := language.Parse(s)
+		if err != nil {
+			if inv, ok := err.(language.ValueError); ok {
+				fmt.Println(inv.Subtag())
+			} else {
+				fmt.Println(s)
+			}
+		}
+	}
+	for _, s := range []string{"en", "aa-Uuuu", "AC", "ac-u"} {
+		_, err := language.Parse(s)
+		switch e := err.(type) {
+		case language.ValueError:
+			fmt.Printf("%s: culprit %q\n", s, e.Subtag())
+		case nil:
+			// No error.
+		default:
+			// A syntax error.
+			fmt.Printf("%s: ill-formed\n", s)
+		}
+	}
+	// Output:
+	// foo
+	// Foobar
+	// aa-Uuuu: culprit "Uuuu"
+	// AC: culprit "ac"
+	// ac-u: ill-formed
+}
+
+func ExampleTag_Parent() {
+	p := func(tag string) {
+		fmt.Printf("parent(%v): %v\n", tag, language.Make(tag).Parent())
+	}
+	p("zh-CN")
+
+	// Australian English inherits from World English.
+	p("en-AU")
+
+	// If the tag has a different maximized script from its parent, a tag with
+	// this maximized script is inserted. This allows different language tags
+	// which have the same base language and script in common to inherit from
+	// a common set of settings.
+	p("zh-HK")
+
+	// If the maximized script of the parent is not identical, CLDR will skip
+	// inheriting from it, as it means there will not be many entries in common
+	// and inheriting from it is nonsensical.
+	p("zh-Hant")
+
+	// The parent of a tag with variants and extensions is the tag with all
+	// variants and extensions removed.
+	p("de-1994-u-co-phonebk")
+
+	// Remove default script.
+	p("de-Latn-LU")
+
+	// Output:
+	// parent(zh-CN): zh
+	// parent(en-AU): en-001
+	// parent(zh-HK): zh-Hant
+	// parent(zh-Hant): und
+	// parent(de-1994-u-co-phonebk): de
+	// parent(de-Latn-LU): de
+}
+
+// ExampleMatcher_bestMatch gives some examples of getting the best match of
+// a set of tags to any of the tags of given set.
+func ExampleMatcher() {
+	// This is the set of tags from which we want to pick the best match. These
+	// can be, for example, the supported languages for some package.
+	tags := []language.Tag{
+		language.English,             // en
+		language.BritishEnglish,      // en-GB
+		language.French,              // fr
+		language.Afrikaans,           // af
+		language.BrazilianPortuguese, // pt-BR
+		language.EuropeanPortuguese,  // pt-PT
+		language.SimplifiedChinese,   // zh-Hans
+		language.Raw.Make("iw-IL"),   // Hebrew from Israel
+		language.Raw.Make("iw"),      // Hebrew
+		language.Raw.Make("he"),      // Hebrew
+	}
+	m := language.NewMatcher(tags)
+
+	// A simple match.
+	fmt.Println(m.Match(language.Make("fr")))
+
+	// Australian English is closer to British English than American English.
+	// The resulting match is "en-GB-u-rg-auzzzz". The first language listed,
+	// "en-GB", is the matched language. Next is the region override prefix
+	// "-u-rg-", the region override "au", and the region override suffix "zzzz".
+	// The region override is for things like currency, dates, and measurement
+	// systems.
+	fmt.Println(m.Match(language.Make("en-AU")))
+
+	// Default to the first tag passed to the Matcher if there is no match.
+	fmt.Println(m.Match(language.Make("ar")))
+
+	// Get the default tag.
+	fmt.Println(m.Match())
+
+	fmt.Println("----")
+
+	// We match SimplifiedChinese, but with Low confidence.
+	fmt.Println(m.Match(language.TraditionalChinese))
+
+	// British English is closer to Australian English than Traditional Chinese
+	// to Simplified Chinese.
+	fmt.Println(m.Match(language.TraditionalChinese, language.Make("en-AU")))
+
+	fmt.Println("----")
+
+	// In case a multiple variants of a language are available, the most spoken
+	// variant is typically returned.
+	fmt.Println(m.Match(language.Portuguese))
+
+	// Pick the first value passed to Match in case of a tie.
+	fmt.Println(m.Match(language.Dutch, language.Make("fr-BE"), language.Make("af-NA")))
+	fmt.Println(m.Match(language.Dutch, language.Make("af-NA"), language.Make("fr-BE")))
+
+	fmt.Println("----")
+
+	// If a Matcher is initialized with a language and its deprecated version,
+	// it will distinguish between them.
+	fmt.Println(m.Match(language.Raw.Make("iw")))
+
+	// However, for non-exact matches, it will treat deprecated versions as
+	// equivalent and consider other factors first.
+	fmt.Println(m.Match(language.Raw.Make("he-IL")))
+
+	fmt.Println("----")
+
+	// User settings passed to the Unicode extension are ignored for matching
+	// and preserved in the returned tag.
+	fmt.Println(m.Match(language.Make("de-u-co-phonebk"), language.Make("fr-u-cu-frf")))
+
+	// Even if the matching language is different.
+	fmt.Println(m.Match(language.Make("de-u-co-phonebk"), language.Make("br-u-cu-frf")))
+
+	// If there is no matching language, the options of the first preferred tag are used.
+	fmt.Println(m.Match(language.Make("de-u-co-phonebk")))
+
+	// Output:
+	// fr 2 Exact
+	// en-GB-u-rg-auzzzz 1 High
+	// en 0 No
+	// en 0 No
+	// ----
+	// zh-Hans 6 Low
+	// en-GB-u-rg-auzzzz 1 High
+	// ----
+	// pt-BR 4 Exact
+	// fr-u-rg-bezzzz 2 High
+	// af-u-rg-nazzzz 3 High
+	// ----
+	// iw-IL 7 Exact
+	// he-u-rg-ilzzzz 9 Exact
+	// ----
+	// fr-u-cu-frf 2 Exact
+	// fr-u-cu-frf 2 High
+	// en-u-co-phonebk 0 No
+}
+
+func ExampleMatchStrings() {
+	// languages supported by this service:
+	matcher := language.NewMatcher([]language.Tag{
+		language.English, language.Dutch, language.German,
+	})
+
+	http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
+		lang, _ := r.Cookie("lang")
+		tag, _ := language.MatchStrings(matcher, lang.String(), r.Header.Get("Accept-Language"))
+
+		fmt.Println("User language:", tag)
+	})
+}
+
+func ExampleComprehends() {
+	// Various levels of comprehensibility.
+	fmt.Println(language.Comprehends(language.English, language.English))
+	fmt.Println(language.Comprehends(language.AmericanEnglish, language.BritishEnglish))
+
+	// An explicit Und results in no match.
+	fmt.Println(language.Comprehends(language.English, language.Und))
+
+	fmt.Println("----")
+
+	// There is usually no mutual comprehensibility between different scripts.
+	fmt.Println(language.Comprehends(language.Make("en-Dsrt"), language.English))
+
+	// One exception is for Traditional versus Simplified Chinese, albeit with
+	// a low confidence.
+	fmt.Println(language.Comprehends(language.TraditionalChinese, language.SimplifiedChinese))
+
+	fmt.Println("----")
+
+	// A Swiss German speaker will often understand High German.
+	fmt.Println(language.Comprehends(language.Make("gsw"), language.Make("de")))
+
+	// The converse is not generally the case.
+	fmt.Println(language.Comprehends(language.Make("de"), language.Make("gsw")))
+
+	// Output:
+	// Exact
+	// High
+	// No
+	// ----
+	// No
+	// Low
+	// ----
+	// High
+	// No
+}
+
+func ExampleTag_values() {
+	us := language.MustParseRegion("US")
+	en := language.MustParseBase("en")
+
+	lang, _, region := language.AmericanEnglish.Raw()
+	fmt.Println(lang == en, region == us)
+
+	lang, _, region = language.BritishEnglish.Raw()
+	fmt.Println(lang == en, region == us)
+
+	// Tags can be compared for exact equivalence using '=='.
+	en_us, _ := language.Compose(en, us)
+	fmt.Println(en_us == language.AmericanEnglish)
+
+	// Output:
+	// true true
+	// true false
+	// true
+}
@@ -0,0 +1,307 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ignore
+
+// Language tag table generator.
+// Data read from the web.
+
+package main
+
+import (
+	"flag"
+	"fmt"
+	"io"
+	"log"
+	"sort"
+	"strconv"
+	"strings"
+
+	"golang.org/x/text/internal/gen"
+	"golang.org/x/text/internal/language"
+	"golang.org/x/text/unicode/cldr"
+)
+
+var (
+	test = flag.Bool("test",
+		false,
+		"test existing tables; can be used to compare web data with package data.")
+	outputFile = flag.String("output",
+		"tables.go",
+		"output file for generated tables")
+)
+
+func main() {
+	gen.Init()
+
+	w := gen.NewCodeWriter()
+	defer w.WriteGoFile("tables.go", "language")
+
+	b := newBuilder(w)
+	gen.WriteCLDRVersion(w)
+
+	b.writeConstants()
+	b.writeMatchData()
+}
+
+type builder struct {
+	w    *gen.CodeWriter
+	hw   io.Writer // MultiWriter for w and w.Hash
+	data *cldr.CLDR
+	supp *cldr.SupplementalData
+}
+
+func (b *builder) langIndex(s string) uint16 {
+	return uint16(language.MustParseBase(s))
+}
+
+func (b *builder) regionIndex(s string) int {
+	return int(language.MustParseRegion(s))
+}
+
+func (b *builder) scriptIndex(s string) int {
+	return int(language.MustParseScript(s))
+}
+
+func newBuilder(w *gen.CodeWriter) *builder {
+	r := gen.OpenCLDRCoreZip()
+	defer r.Close()
+	d := &cldr.Decoder{}
+	data, err := d.DecodeZip(r)
+	if err != nil {
+		log.Fatal(err)
+	}
+	b := builder{
+		w:    w,
+		hw:   io.MultiWriter(w, w.Hash),
+		data: data,
+		supp: data.Supplemental(),
+	}
+	return &b
+}
+
+// writeConsts computes f(v) for all v in values and writes the results
+// as constants named _v to a single constant block.
+func (b *builder) writeConsts(f func(string) int, values ...string) {
+	fmt.Fprintln(b.w, "const (")
+	for _, v := range values {
+		fmt.Fprintf(b.w, "\t_%s = %v\n", v, f(v))
+	}
+	fmt.Fprintln(b.w, ")")
+}
+
+// TODO: region inclusion data will probably not be use used in future matchers.
+
+var langConsts = []string{
+	"de", "en", "fr", "it", "mo", "no", "nb", "pt", "sh", "mul", "und",
+}
+
+var scriptConsts = []string{
+	"Latn", "Hani", "Hans", "Hant", "Qaaa", "Qaai", "Qabx", "Zinh", "Zyyy",
+	"Zzzz",
+}
+
+var regionConsts = []string{
+	"001", "419", "BR", "CA", "ES", "GB", "MD", "PT", "UK", "US",
+	"ZZ", "XA", "XC", "XK", // Unofficial tag for Kosovo.
+}
+
+func (b *builder) writeConstants() {
+	b.writeConsts(func(s string) int { return int(b.langIndex(s)) }, langConsts...)
+	b.writeConsts(b.regionIndex, regionConsts...)
+	b.writeConsts(b.scriptIndex, scriptConsts...)
+}
+
+type mutualIntelligibility struct {
+	want, have uint16
+	distance   uint8
+	oneway     bool
+}
+
+type scriptIntelligibility struct {
+	wantLang, haveLang     uint16
+	wantScript, haveScript uint8
+	distance               uint8
+	// Always oneway
+}
+
+type regionIntelligibility struct {
+	lang     uint16 // compact language id
+	script   uint8  // 0 means any
+	group    uint8  // 0 means any; if bit 7 is set it means inverse
+	distance uint8
+	// Always twoway.
+}
+
+// writeMatchData writes tables with languages and scripts for which there is
+// mutual intelligibility. The data is based on CLDR's languageMatching data.
+// Note that we use a different algorithm than the one defined by CLDR and that
+// we slightly modify the data. For example, we convert scores to confidence levels.
+// We also drop all region-related data as we use a different algorithm to
+// determine region equivalence.
+func (b *builder) writeMatchData() {
+	lm := b.supp.LanguageMatching.LanguageMatches
+	cldr.MakeSlice(&lm).SelectAnyOf("type", "written_new")
+
+	regionHierarchy := map[string][]string{}
+	for _, g := range b.supp.TerritoryContainment.Group {
+		regions := strings.Split(g.Contains, " ")
+		regionHierarchy[g.Type] = append(regionHierarchy[g.Type], regions...)
+	}
+	// Regions start at 1, so the slice must be one larger than the number of
+	// regions.
+	regionToGroups := make([]uint8, language.NumRegions+1)
+
+	idToIndex := map[string]uint8{}
+	for i, mv := range lm[0].MatchVariable {
+		if i > 6 {
+			log.Fatalf("Too many groups: %d", i)
+		}
+		idToIndex[mv.Id] = uint8(i + 1)
+		// TODO: also handle '-'
+		for _, r := range strings.Split(mv.Value, "+") {
+			todo := []string{r}
+			for k := 0; k < len(todo); k++ {
+				r := todo[k]
+				regionToGroups[b.regionIndex(r)] |= 1 << uint8(i)
+				todo = append(todo, regionHierarchy[r]...)
+			}
+		}
+	}
+	b.w.WriteVar("regionToGroups", regionToGroups)
+
+	// maps language id to in- and out-of-group region.
+	paradigmLocales := [][3]uint16{}
+	locales := strings.Split(lm[0].ParadigmLocales[0].Locales, " ")
+	for i := 0; i < len(locales); i += 2 {
+		x := [3]uint16{}
+		for j := 0; j < 2; j++ {
+			pc := strings.SplitN(locales[i+j], "-", 2)
+			x[0] = b.langIndex(pc[0])
+			if len(pc) == 2 {
+				x[1+j] = uint16(b.regionIndex(pc[1]))
+			}
+		}
+		paradigmLocales = append(paradigmLocales, x)
+	}
+	b.w.WriteVar("paradigmLocales", paradigmLocales)
+
+	b.w.WriteType(mutualIntelligibility{})
+	b.w.WriteType(scriptIntelligibility{})
+	b.w.WriteType(regionIntelligibility{})
+
+	matchLang := []mutualIntelligibility{}
+	matchScript := []scriptIntelligibility{}
+	matchRegion := []regionIntelligibility{}
+	// Convert the languageMatch entries in lists keyed by desired language.
+	for _, m := range lm[0].LanguageMatch {
+		// Different versions of CLDR use different separators.
+		desired := strings.Replace(m.Desired, "-", "_", -1)
+		supported := strings.Replace(m.Supported, "-", "_", -1)
+		d := strings.Split(desired, "_")
+		s := strings.Split(supported, "_")
+		if len(d) != len(s) {
+			log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
+			continue
+		}
+		distance, _ := strconv.ParseInt(m.Distance, 10, 8)
+		switch len(d) {
+		case 2:
+			if desired == supported && desired == "*_*" {
+				continue
+			}
+			// language-script pair.
+			matchScript = append(matchScript, scriptIntelligibility{
+				wantLang:   uint16(b.langIndex(d[0])),
+				haveLang:   uint16(b.langIndex(s[0])),
+				wantScript: uint8(b.scriptIndex(d[1])),
+				haveScript: uint8(b.scriptIndex(s[1])),
+				distance:   uint8(distance),
+			})
+			if m.Oneway != "true" {
+				matchScript = append(matchScript, scriptIntelligibility{
+					wantLang:   uint16(b.langIndex(s[0])),
+					haveLang:   uint16(b.langIndex(d[0])),
+					wantScript: uint8(b.scriptIndex(s[1])),
+					haveScript: uint8(b.scriptIndex(d[1])),
+					distance:   uint8(distance),
+				})
+			}
+		case 1:
+			if desired == supported && desired == "*" {
+				continue
+			}
+			if distance == 1 {
+				// nb == no is already handled by macro mapping. Check there
+				// really is only this case.
+				if d[0] != "no" || s[0] != "nb" {
+					log.Fatalf("unhandled equivalence %s == %s", s[0], d[0])
+				}
+				continue
+			}
+			// TODO: consider dropping oneway field and just doubling the entry.
+			matchLang = append(matchLang, mutualIntelligibility{
+				want:     uint16(b.langIndex(d[0])),
+				have:     uint16(b.langIndex(s[0])),
+				distance: uint8(distance),
+				oneway:   m.Oneway == "true",
+			})
+		case 3:
+			if desired == supported && desired == "*_*_*" {
+				continue
+			}
+			if desired != supported {
+				// This is now supported by CLDR, but only one case, which
+				// should already be covered by paradigm locales. For instance,
+				// test case "und, en, en-GU, en-IN, en-GB ; en-ZA ; en-GB" in
+				// testdata/CLDRLocaleMatcherTest.txt tests this.
+				if supported != "en_*_GB" {
+					log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
+				}
+				continue
+			}
+			ri := regionIntelligibility{
+				lang:     b.langIndex(d[0]),
+				distance: uint8(distance),
+			}
+			if d[1] != "*" {
+				ri.script = uint8(b.scriptIndex(d[1]))
+			}
+			switch {
+			case d[2] == "*":
+				ri.group = 0x80 // not contained in anything
+			case strings.HasPrefix(d[2], "$!"):
+				ri.group = 0x80
+				d[2] = "$" + d[2][len("$!"):]
+				fallthrough
+			case strings.HasPrefix(d[2], "$"):
+				ri.group |= idToIndex[d[2]]
+			}
+			matchRegion = append(matchRegion, ri)
+		default:
+			log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
+		}
+	}
+	sort.SliceStable(matchLang, func(i, j int) bool {
+		return matchLang[i].distance < matchLang[j].distance
+	})
+	b.w.WriteComment(`
+		matchLang holds pairs of langIDs of base languages that are typically
+		mutually intelligible. Each pair is associated with a confidence and
+		whether the intelligibility goes one or both ways.`)
+	b.w.WriteVar("matchLang", matchLang)
+
+	b.w.WriteComment(`
+		matchScript holds pairs of scriptIDs where readers of one script
+		can typically also read the other. Each is associated with a confidence.`)
+	sort.SliceStable(matchScript, func(i, j int) bool {
+		return matchScript[i].distance < matchScript[j].distance
+	})
+	b.w.WriteVar("matchScript", matchScript)
+
+	sort.SliceStable(matchRegion, func(i, j int) bool {
+		return matchRegion[i].distance < matchRegion[j].distance
+	})
+	b.w.WriteVar("matchRegion", matchRegion)
+}
@@ -0,0 +1,48 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language_test
+
+import (
+	"fmt"
+	"net/http"
+	"strings"
+
+	"golang.org/x/text/language"
+)
+
+// matcher is a language.Matcher configured for all supported languages.
+var matcher = language.NewMatcher([]language.Tag{
+	language.BritishEnglish,
+	language.Norwegian,
+	language.German,
+})
+
+// handler is an http.HandlerFunc.
+func handler(w http.ResponseWriter, r *http.Request) {
+	t, q, err := language.ParseAcceptLanguage(r.Header.Get("Accept-Language"))
+	// We ignore the error: the default language will be selected for t == nil.
+	tag, _, _ := matcher.Match(t...)
+	fmt.Printf("%17v (t: %6v; q: %3v; err: %v)\n", tag, t, q, err)
+}
+
+func ExampleParseAcceptLanguage() {
+	for _, al := range []string{
+		"nn;q=0.3, en-us;q=0.8, en,",
+		"gsw, en;q=0.7, en-US;q=0.8",
+		"gsw, nl, da",
+		"invalid",
+	} {
+		// Create dummy request with Accept-Language set and pass it to handler.
+		r, _ := http.NewRequest("GET", "example.com", strings.NewReader("Hello"))
+		r.Header.Set("Accept-Language", al)
+		handler(nil, r)
+	}
+
+	// Output:
+	//             en-GB (t: [    en  en-US     nn]; q: [  1 0.8 0.3]; err: <nil>)
+	// en-GB-u-rg-uszzzz (t: [   gsw  en-US     en]; q: [  1 0.8 0.7]; err: <nil>)
+	//                de (t: [   gsw     nl     da]; q: [  1   1   1]; err: <nil>)
+	//             en-GB (t: []; q: []; err: language: tag is not well-formed)
+}
@@ -0,0 +1,605 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:generate go run gen.go -output tables.go
+
+package language
+
+// TODO: Remove above NOTE after:
+// - verifying that tables are dropped correctly (most notably matcher tables).
+
+import (
+	"strings"
+
+	"golang.org/x/text/internal/language"
+	"golang.org/x/text/internal/language/compact"
+)
+
+// Tag represents a BCP 47 language tag. It is used to specify an instance of a
+// specific language or locale. All language tag values are guaranteed to be
+// well-formed.
+type Tag compact.Tag
+
+func makeTag(t language.Tag) (tag Tag) {
+	return Tag(compact.Make(t))
+}
+
+func (t *Tag) tag() language.Tag {
+	return (*compact.Tag)(t).Tag()
+}
+
+func (t *Tag) isCompact() bool {
+	return (*compact.Tag)(t).IsCompact()
+}
+
+// TODO: improve performance.
+func (t *Tag) lang() language.Language { return t.tag().LangID }
+func (t *Tag) region() language.Region { return t.tag().RegionID }
+func (t *Tag) script() language.Script { return t.tag().ScriptID }
+
+// Make is a convenience wrapper for Parse that omits the error.
+// In case of an error, a sensible default is returned.
+func Make(s string) Tag {
+	return Default.Make(s)
+}
+
+// Make is a convenience wrapper for c.Parse that omits the error.
+// In case of an error, a sensible default is returned.
+func (c CanonType) Make(s string) Tag {
+	t, _ := c.Parse(s)
+	return t
+}
+
+// Raw returns the raw base language, script and region, without making an
+// attempt to infer their values.
+func (t Tag) Raw() (b Base, s Script, r Region) {
+	tt := t.tag()
+	return Base{tt.LangID}, Script{tt.ScriptID}, Region{tt.RegionID}
+}
+
+// IsRoot returns true if t is equal to language "und".
+func (t Tag) IsRoot() bool {
+	return compact.Tag(t).IsRoot()
+}
+
+// CanonType can be used to enable or disable various types of canonicalization.
+type CanonType int
+
+const (
+	// Replace deprecated base languages with their preferred replacements.
+	DeprecatedBase CanonType = 1 << iota
+	// Replace deprecated scripts with their preferred replacements.
+	DeprecatedScript
+	// Replace deprecated regions with their preferred replacements.
+	DeprecatedRegion
+	// Remove redundant scripts.
+	SuppressScript
+	// Normalize legacy encodings. This includes legacy languages defined in
+	// CLDR as well as bibliographic codes defined in ISO-639.
+	Legacy
+	// Map the dominant language of a macro language group to the macro language
+	// subtag. For example cmn -> zh.
+	Macro
+	// The CLDR flag should be used if full compatibility with CLDR is required.
+	// There are a few cases where language.Tag may differ from CLDR. To follow all
+	// of CLDR's suggestions, use All|CLDR.
+	CLDR
+
+	// Raw can be used to Compose or Parse without Canonicalization.
+	Raw CanonType = 0
+
+	// Replace all deprecated tags with their preferred replacements.
+	Deprecated = DeprecatedBase | DeprecatedScript | DeprecatedRegion
+
+	// All canonicalizations recommended by BCP 47.
+	BCP47 = Deprecated | SuppressScript
+
+	// All canonicalizations.
+	All = BCP47 | Legacy | Macro
+
+	// Default is the canonicalization used by Parse, Make and Compose. To
+	// preserve as much information as possible, canonicalizations that remove
+	// potentially valuable information are not included. The Matcher is
+	// designed to recognize similar tags that would be the same if
+	// they were canonicalized using All.
+	Default = Deprecated | Legacy
+
+	canonLang = DeprecatedBase | Legacy | Macro
+
+	// TODO: LikelyScript, LikelyRegion: suppress similar to ICU.
+)
+
+// canonicalize returns the canonicalized equivalent of the tag and
+// whether there was any change.
+func canonicalize(c CanonType, t language.Tag) (language.Tag, bool) {
+	if c == Raw {
+		return t, false
+	}
+	changed := false
+	if c&SuppressScript != 0 {
+		if t.LangID.SuppressScript() == t.ScriptID {
+			t.ScriptID = 0
+			changed = true
+		}
+	}
+	if c&canonLang != 0 {
+		for {
+			if l, aliasType := t.LangID.Canonicalize(); l != t.LangID {
+				switch aliasType {
+				case language.Legacy:
+					if c&Legacy != 0 {
+						if t.LangID == _sh && t.ScriptID == 0 {
+							t.ScriptID = _Latn
+						}
+						t.LangID = l
+						changed = true
+					}
+				case language.Macro:
+					if c&Macro != 0 {
+						// We deviate here from CLDR. The mapping "nb" -> "no"
+						// qualifies as a typical Macro language mapping.  However,
+						// for legacy reasons, CLDR maps "no", the macro language
+						// code for Norwegian, to the dominant variant "nb". This
+						// change is currently under consideration for CLDR as well.
+						// See https://unicode.org/cldr/trac/ticket/2698 and also
+						// https://unicode.org/cldr/trac/ticket/1790 for some of the
+						// practical implications. TODO: this check could be removed
+						// if CLDR adopts this change.
+						if c&CLDR == 0 || t.LangID != _nb {
+							changed = true
+							t.LangID = l
+						}
+					}
+				case language.Deprecated:
+					if c&DeprecatedBase != 0 {
+						if t.LangID == _mo && t.RegionID == 0 {
+							t.RegionID = _MD
+						}
+						t.LangID = l
+						changed = true
+						// Other canonicalization types may still apply.
+						continue
+					}
+				}
+			} else if c&Legacy != 0 && t.LangID == _no && c&CLDR != 0 {
+				t.LangID = _nb
+				changed = true
+			}
+			break
+		}
+	}
+	if c&DeprecatedScript != 0 {
+		if t.ScriptID == _Qaai {
+			changed = true
+			t.ScriptID = _Zinh
+		}
+	}
+	if c&DeprecatedRegion != 0 {
+		if r := t.RegionID.Canonicalize(); r != t.RegionID {
+			changed = true
+			t.RegionID = r
+		}
+	}
+	return t, changed
+}
+
+// Canonicalize returns the canonicalized equivalent of the tag.
+func (c CanonType) Canonicalize(t Tag) (Tag, error) {
+	// First try fast path.
+	if t.isCompact() {
+		if _, changed := canonicalize(c, compact.Tag(t).Tag()); !changed {
+			return t, nil
+		}
+	}
+	// It is unlikely that one will canonicalize a tag after matching. So do
+	// a slow but simple approach here.
+	if tag, changed := canonicalize(c, t.tag()); changed {
+		tag.RemakeString()
+		return makeTag(tag), nil
+	}
+	return t, nil
+
+}
+
+// Confidence indicates the level of certainty for a given return value.
+// For example, Serbian may be written in Cyrillic or Latin script.
+// The confidence level indicates whether a value was explicitly specified,
+// whether it is typically the only possible value, or whether there is
+// an ambiguity.
+type Confidence int
+
+const (
+	No    Confidence = iota // full confidence that there was no match
+	Low                     // most likely value picked out of a set of alternatives
+	High                    // value is generally assumed to be the correct match
+	Exact                   // exact match or explicitly specified value
+)
+
+var confName = []string{"No", "Low", "High", "Exact"}
+
+func (c Confidence) String() string {
+	return confName[c]
+}
+
+// String returns the canonical string representation of the language tag.
+func (t Tag) String() string {
+	return t.tag().String()
+}
+
+// MarshalText implements encoding.TextMarshaler.
+func (t Tag) MarshalText() (text []byte, err error) {
+	return t.tag().MarshalText()
+}
+
+// UnmarshalText implements encoding.TextUnmarshaler.
+func (t *Tag) UnmarshalText(text []byte) error {
+	var tag language.Tag
+	err := tag.UnmarshalText(text)
+	*t = makeTag(tag)
+	return err
+}
+
+// Base returns the base language of the language tag. If the base language is
+// unspecified, an attempt will be made to infer it from the context.
+// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
+func (t Tag) Base() (Base, Confidence) {
+	if b := t.lang(); b != 0 {
+		return Base{b}, Exact
+	}
+	tt := t.tag()
+	c := High
+	if tt.ScriptID == 0 && !tt.RegionID.IsCountry() {
+		c = Low
+	}
+	if tag, err := tt.Maximize(); err == nil && tag.LangID != 0 {
+		return Base{tag.LangID}, c
+	}
+	return Base{0}, No
+}
+
+// Script infers the script for the language tag. If it was not explicitly given, it will infer
+// a most likely candidate.
+// If more than one script is commonly used for a language, the most likely one
+// is returned with a low confidence indication. For example, it returns (Cyrl, Low)
+// for Serbian.
+// If a script cannot be inferred (Zzzz, No) is returned. We do not use Zyyy (undetermined)
+// as one would suspect from the IANA registry for BCP 47. In a Unicode context Zyyy marks
+// common characters (like 1, 2, 3, '.', etc.) and is therefore more like multiple scripts.
+// See https://www.unicode.org/reports/tr24/#Values for more details. Zzzz is also used for
+// unknown value in CLDR.  (Zzzz, Exact) is returned if Zzzz was explicitly specified.
+// Note that an inferred script is never guaranteed to be the correct one. Latin is
+// almost exclusively used for Afrikaans, but Arabic has been used for some texts
+// in the past.  Also, the script that is commonly used may change over time.
+// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
+func (t Tag) Script() (Script, Confidence) {
+	if scr := t.script(); scr != 0 {
+		return Script{scr}, Exact
+	}
+	tt := t.tag()
+	sc, c := language.Script(_Zzzz), No
+	if scr := tt.LangID.SuppressScript(); scr != 0 {
+		// Note: it is not always the case that a language with a suppress
+		// script value is only written in one script (e.g. kk, ms, pa).
+		if tt.RegionID == 0 {
+			return Script{scr}, High
+		}
+		sc, c = scr, High
+	}
+	if tag, err := tt.Maximize(); err == nil {
+		if tag.ScriptID != sc {
+			sc, c = tag.ScriptID, Low
+		}
+	} else {
+		tt, _ = canonicalize(Deprecated|Macro, tt)
+		if tag, err := tt.Maximize(); err == nil && tag.ScriptID != sc {
+			sc, c = tag.ScriptID, Low
+		}
+	}
+	return Script{sc}, c
+}
+
+// Region returns the region for the language tag. If it was not explicitly given, it will
+// infer a most likely candidate from the context.
+// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
+func (t Tag) Region() (Region, Confidence) {
+	if r := t.region(); r != 0 {
+		return Region{r}, Exact
+	}
+	tt := t.tag()
+	if tt, err := tt.Maximize(); err == nil {
+		return Region{tt.RegionID}, Low // TODO: differentiate between high and low.
+	}
+	tt, _ = canonicalize(Deprecated|Macro, tt)
+	if tag, err := tt.Maximize(); err == nil {
+		return Region{tag.RegionID}, Low
+	}
+	return Region{_ZZ}, No // TODO: return world instead of undetermined?
+}
+
+// Variants returns the variants specified explicitly for this language tag.
+// or nil if no variant was specified.
+func (t Tag) Variants() []Variant {
+	if !compact.Tag(t).MayHaveVariants() {
+		return nil
+	}
+	v := []Variant{}
+	x, str := "", t.tag().Variants()
+	for str != "" {
+		x, str = nextToken(str)
+		v = append(v, Variant{x})
+	}
+	return v
+}
+
+// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
+// specific language are substituted with fields from the parent language.
+// The parent for a language may change for newer versions of CLDR.
+//
+// Parent returns a tag for a less specific language that is mutually
+// intelligible or Und if there is no such language. This may not be the same as
+// simply stripping the last BCP 47 subtag. For instance, the parent of "zh-TW"
+// is "zh-Hant", and the parent of "zh-Hant" is "und".
+func (t Tag) Parent() Tag {
+	return Tag(compact.Tag(t).Parent())
+}
+
+// nextToken returns token t and the rest of the string.
+func nextToken(s string) (t, tail string) {
+	p := strings.Index(s[1:], "-")
+	if p == -1 {
+		return s[1:], ""
+	}
+	p++
+	return s[1:p], s[p:]
+}
+
+// Extension is a single BCP 47 extension.
+type Extension struct {
+	s string
+}
+
+// String returns the string representation of the extension, including the
+// type tag.
+func (e Extension) String() string {
+	return e.s
+}
+
+// ParseExtension parses s as an extension and returns it on success.
+func ParseExtension(s string) (e Extension, err error) {
+	ext, err := language.ParseExtension(s)
+	return Extension{ext}, err
+}
+
+// Type returns the one-byte extension type of e. It returns 0 for the zero
+// exception.
+func (e Extension) Type() byte {
+	if e.s == "" {
+		return 0
+	}
+	return e.s[0]
+}
+
+// Tokens returns the list of tokens of e.
+func (e Extension) Tokens() []string {
+	return strings.Split(e.s, "-")
+}
+
+// Extension returns the extension of type x for tag t. It will return
+// false for ok if t does not have the requested extension. The returned
+// extension will be invalid in this case.
+func (t Tag) Extension(x byte) (ext Extension, ok bool) {
+	if !compact.Tag(t).MayHaveExtensions() {
+		return Extension{}, false
+	}
+	e, ok := t.tag().Extension(x)
+	return Extension{e}, ok
+}
+
+// Extensions returns all extensions of t.
+func (t Tag) Extensions() []Extension {
+	if !compact.Tag(t).MayHaveExtensions() {
+		return nil
+	}
+	e := []Extension{}
+	for _, ext := range t.tag().Extensions() {
+		e = append(e, Extension{ext})
+	}
+	return e
+}
+
+// TypeForKey returns the type associated with the given key, where key and type
+// are of the allowed values defined for the Unicode locale extension ('u') in
+// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
+// TypeForKey will traverse the inheritance chain to get the correct value.
+//
+// If there are multiple types associated with a key, only the first will be
+// returned. If there is no type associated with a key, it returns the empty
+// string.
+func (t Tag) TypeForKey(key string) string {
+	if !compact.Tag(t).MayHaveExtensions() {
+		if key != "rg" && key != "va" {
+			return ""
+		}
+	}
+	return t.tag().TypeForKey(key)
+}
+
+// SetTypeForKey returns a new Tag with the key set to type, where key and type
+// are of the allowed values defined for the Unicode locale extension ('u') in
+// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
+// An empty value removes an existing pair with the same key.
+func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
+	tt, err := t.tag().SetTypeForKey(key, value)
+	return makeTag(tt), err
+}
+
+// NumCompactTags is the number of compact tags. The maximum tag is
+// NumCompactTags-1.
+const NumCompactTags = compact.NumCompactTags
+
+// CompactIndex returns an index, where 0 <= index < NumCompactTags, for tags
+// for which data exists in the text repository.The index will change over time
+// and should not be stored in persistent storage. If t does not match a compact
+// index, exact will be false and the compact index will be returned for the
+// first match after repeatedly taking the Parent of t.
+func CompactIndex(t Tag) (index int, exact bool) {
+	id, exact := compact.LanguageID(compact.Tag(t))
+	return int(id), exact
+}
+
+var root = language.Tag{}
+
+// Base is an ISO 639 language code, used for encoding the base language
+// of a language tag.
+type Base struct {
+	langID language.Language
+}
+
+// ParseBase parses a 2- or 3-letter ISO 639 code.
+// It returns a ValueError if s is a well-formed but unknown language identifier
+// or another error if another error occurred.
+func ParseBase(s string) (Base, error) {
+	l, err := language.ParseBase(s)
+	return Base{l}, err
+}
+
+// String returns the BCP 47 representation of the base language.
+func (b Base) String() string {
+	return b.langID.String()
+}
+
+// ISO3 returns the ISO 639-3 language code.
+func (b Base) ISO3() string {
+	return b.langID.ISO3()
+}
+
+// IsPrivateUse reports whether this language code is reserved for private use.
+func (b Base) IsPrivateUse() bool {
+	return b.langID.IsPrivateUse()
+}
+
+// Script is a 4-letter ISO 15924 code for representing scripts.
+// It is idiomatically represented in title case.
+type Script struct {
+	scriptID language.Script
+}
+
+// ParseScript parses a 4-letter ISO 15924 code.
+// It returns a ValueError if s is a well-formed but unknown script identifier
+// or another error if another error occurred.
+func ParseScript(s string) (Script, error) {
+	sc, err := language.ParseScript(s)
+	return Script{sc}, err
+}
+
+// String returns the script code in title case.
+// It returns "Zzzz" for an unspecified script.
+func (s Script) String() string {
+	return s.scriptID.String()
+}
+
+// IsPrivateUse reports whether this script code is reserved for private use.
+func (s Script) IsPrivateUse() bool {
+	return s.scriptID.IsPrivateUse()
+}
+
+// Region is an ISO 3166-1 or UN M.49 code for representing countries and regions.
+type Region struct {
+	regionID language.Region
+}
+
+// EncodeM49 returns the Region for the given UN M.49 code.
+// It returns an error if r is not a valid code.
+func EncodeM49(r int) (Region, error) {
+	rid, err := language.EncodeM49(r)
+	return Region{rid}, err
+}
+
+// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
+// It returns a ValueError if s is a well-formed but unknown region identifier
+// or another error if another error occurred.
+func ParseRegion(s string) (Region, error) {
+	r, err := language.ParseRegion(s)
+	return Region{r}, err
+}
+
+// String returns the BCP 47 representation for the region.
+// It returns "ZZ" for an unspecified region.
+func (r Region) String() string {
+	return r.regionID.String()
+}
+
+// ISO3 returns the 3-letter ISO code of r.
+// Note that not all regions have a 3-letter ISO code.
+// In such cases this method returns "ZZZ".
+func (r Region) ISO3() string {
+	return r.regionID.ISO3()
+}
+
+// M49 returns the UN M.49 encoding of r, or 0 if this encoding
+// is not defined for r.
+func (r Region) M49() int {
+	return r.regionID.M49()
+}
+
+// IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This
+// may include private-use tags that are assigned by CLDR and used in this
+// implementation. So IsPrivateUse and IsCountry can be simultaneously true.
+func (r Region) IsPrivateUse() bool {
+	return r.regionID.IsPrivateUse()
+}
+
+// IsCountry returns whether this region is a country or autonomous area. This
+// includes non-standard definitions from CLDR.
+func (r Region) IsCountry() bool {
+	return r.regionID.IsCountry()
+}
+
+// IsGroup returns whether this region defines a collection of regions. This
+// includes non-standard definitions from CLDR.
+func (r Region) IsGroup() bool {
+	return r.regionID.IsGroup()
+}
+
+// Contains returns whether Region c is contained by Region r. It returns true
+// if c == r.
+func (r Region) Contains(c Region) bool {
+	return r.regionID.Contains(c.regionID)
+}
+
+// TLD returns the country code top-level domain (ccTLD). UK is returned for GB.
+// In all other cases it returns either the region itself or an error.
+//
+// This method may return an error for a region for which there exists a
+// canonical form with a ccTLD. To get that ccTLD canonicalize r first. The
+// region will already be canonicalized it was obtained from a Tag that was
+// obtained using any of the default methods.
+func (r Region) TLD() (Region, error) {
+	tld, err := r.regionID.TLD()
+	return Region{tld}, err
+}
+
+// Canonicalize returns the region or a possible replacement if the region is
+// deprecated. It will not return a replacement for deprecated regions that
+// are split into multiple regions.
+func (r Region) Canonicalize() Region {
+	return Region{r.regionID.Canonicalize()}
+}
+
+// Variant represents a registered variant of a language as defined by BCP 47.
+type Variant struct {
+	variant string
+}
+
+// ParseVariant parses and returns a Variant. An error is returned if s is not
+// a valid variant.
+func ParseVariant(s string) (Variant, error) {
+	v, err := language.ParseVariant(s)
+	return Variant{v.String()}, err
+}
+
+// String returns the string representation of the variant.
+func (v Variant) String() string {
+	return v.variant
+}
@@ -0,0 +1,788 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestTagSize(t *testing.T) {
+	id := Tag{}
+	typ := reflect.TypeOf(id)
+	if typ.Size() > 24 {
+		t.Errorf("size of Tag was %d; want 24", typ.Size())
+	}
+}
+
+func TestIsRoot(t *testing.T) {
+	loc := Tag{}
+	if !loc.IsRoot() {
+		t.Errorf("unspecified should be root.")
+	}
+	for i, tt := range parseTests() {
+		loc, _ := Parse(tt.in)
+		undef := tt.lang == "und" && tt.script == "" && tt.region == "" && tt.ext == ""
+		if loc.IsRoot() != undef {
+			t.Errorf("%d: was %v; want %v", i, loc.IsRoot(), undef)
+		}
+	}
+}
+
+func TestEquality(t *testing.T) {
+	for i, tt := range parseTests() {
+		s := tt.in
+		tag := Make(s)
+		t1 := Make(tag.String())
+		if tag != t1 {
+			t.Errorf("%d:%s: equality test 1 failed\n got: %#v\nwant: %#v)", i, s, t1, tag)
+		}
+		t2, _ := Compose(tag)
+		if tag != t2 {
+			t.Errorf("%d:%s: equality test 2 failed\n got: %#v\nwant: %#v", i, s, t2, tag)
+		}
+	}
+}
+
+func TestString(t *testing.T) {
+	tests := []string{
+		"no-u-rg-dkzzzz",
+	}
+	for i, s := range tests {
+		tag := Make(s)
+		if tag.String() != s {
+			t.Errorf("%d:%s: got %s: want %s (%#v)", i, s, tag.String(), s, tag)
+		}
+	}
+}
+
+func TestMarshal(t *testing.T) {
+	testCases := []string{
+		// TODO: these values will change with each CLDR update. This issue
+		// will be solved if we decide to fix the indexes.
+		"und",
+		"ca-ES-valencia",
+		"ca-ES-valencia-u-va-posix",
+		"ca-ES-valencia-u-co-phonebk",
+		"ca-ES-valencia-u-co-phonebk-va-posix",
+		"x-klingon",
+		"en-US",
+		"en-US-u-va-posix",
+		"en",
+		"en-u-co-phonebk",
+		"en-001",
+		"sh",
+
+		"en-GB-u-rg-uszzzz",
+		"en-GB-u-rg-uszzzz-va-posix",
+		"en-GB-u-co-phonebk-rg-uszzzz",
+		// Invalid tags should also roundtrip.
+		"en-GB-u-co-phonebk-rg-uszz",
+	}
+	for _, tc := range testCases {
+		var tag Tag
+		err := tag.UnmarshalText([]byte(tc))
+		if err != nil {
+			t.Errorf("UnmarshalText(%q): unexpected error: %v", tc, err)
+		}
+		b, err := tag.MarshalText()
+		if err != nil {
+			t.Errorf("MarshalText(%q): unexpected error: %v", tc, err)
+		}
+		if got := string(b); got != tc {
+			t.Errorf("%s: got %q; want %q", tc, got, tc)
+		}
+	}
+}
+
+func TestBase(t *testing.T) {
+	tests := []struct {
+		loc, lang string
+		conf      Confidence
+	}{
+		{"und", "en", Low},
+		{"x-abc", "und", No},
+		{"en", "en", Exact},
+		{"und-Cyrl", "ru", High},
+		// If a region is not included, the official language should be English.
+		{"und-US", "en", High},
+		// TODO: not-explicitly listed scripts should probably be und, No
+		// Modify addTags to return info on how the match was derived.
+		// {"und-Aghb", "und", No},
+	}
+	for i, tt := range tests {
+		loc, _ := Parse(tt.loc)
+		lang, conf := loc.Base()
+		if lang.String() != tt.lang {
+			t.Errorf("%d: language was %s; want %s", i, lang, tt.lang)
+		}
+		if conf != tt.conf {
+			t.Errorf("%d: confidence was %d; want %d", i, conf, tt.conf)
+		}
+	}
+}
+
+func TestParseBase(t *testing.T) {
+	tests := []struct {
+		in  string
+		out string
+		ok  bool
+	}{
+		{"en", "en", true},
+		{"EN", "en", true},
+		{"nld", "nl", true},
+		{"dut", "dut", true},  // bibliographic
+		{"aaj", "und", false}, // unknown
+		{"qaa", "qaa", true},
+		{"a", "und", false},
+		{"", "und", false},
+		{"aaaa", "und", false},
+	}
+	for i, tt := range tests {
+		x, err := ParseBase(tt.in)
+		if x.String() != tt.out || err == nil != tt.ok {
+			t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
+		}
+		if y, _, _ := Raw.Make(tt.out).Raw(); x != y {
+			t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x, y)
+		}
+	}
+}
+
+func TestScript(t *testing.T) {
+	tests := []struct {
+		loc, scr string
+		conf     Confidence
+	}{
+		{"und", "Latn", Low},
+		{"en-Latn", "Latn", Exact},
+		{"en", "Latn", High},
+		{"sr", "Cyrl", Low},
+		{"kk", "Cyrl", High},
+		{"kk-CN", "Arab", Low},
+		{"cmn", "Hans", Low},
+		{"ru", "Cyrl", High},
+		{"ru-RU", "Cyrl", High},
+		{"yue", "Hant", Low},
+		{"x-abc", "Zzzz", Low},
+		{"und-zyyy", "Zyyy", Exact},
+	}
+	for i, tt := range tests {
+		loc, _ := Parse(tt.loc)
+		sc, conf := loc.Script()
+		if sc.String() != tt.scr {
+			t.Errorf("%d:%s: script was %s; want %s", i, tt.loc, sc, tt.scr)
+		}
+		if conf != tt.conf {
+			t.Errorf("%d:%s: confidence was %d; want %d", i, tt.loc, conf, tt.conf)
+		}
+	}
+}
+
+func TestParseScript(t *testing.T) {
+	tests := []struct {
+		in  string
+		out string
+		ok  bool
+	}{
+		{"Latn", "Latn", true},
+		{"zzzz", "Zzzz", true},
+		{"zyyy", "Zyyy", true},
+		{"Latm", "Zzzz", false},
+		{"Zzz", "Zzzz", false},
+		{"", "Zzzz", false},
+		{"Zzzxx", "Zzzz", false},
+	}
+	for i, tt := range tests {
+		x, err := ParseScript(tt.in)
+		if x.String() != tt.out || err == nil != tt.ok {
+			t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
+		}
+		if err == nil {
+			if _, y, _ := Raw.Make("und-" + tt.out).Raw(); x != y {
+				t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x, y)
+			}
+		}
+	}
+}
+
+func TestRegion(t *testing.T) {
+	tests := []struct {
+		loc, reg string
+		conf     Confidence
+	}{
+		{"und", "US", Low},
+		{"en", "US", Low},
+		{"zh-Hant", "TW", Low},
+		{"en-US", "US", Exact},
+		{"cmn", "CN", Low},
+		{"ru", "RU", Low},
+		{"yue", "HK", Low},
+		{"x-abc", "ZZ", Low},
+	}
+	for i, tt := range tests {
+		loc, _ := Raw.Parse(tt.loc)
+		reg, conf := loc.Region()
+		if reg.String() != tt.reg {
+			t.Errorf("%d:%s: region was %s; want %s", i, tt.loc, reg, tt.reg)
+		}
+		if conf != tt.conf {
+			t.Errorf("%d:%s: confidence was %d; want %d", i, tt.loc, conf, tt.conf)
+		}
+	}
+}
+
+func TestEncodeM49(t *testing.T) {
+	tests := []struct {
+		m49  int
+		code string
+		ok   bool
+	}{
+		{1, "001", true},
+		{840, "US", true},
+		{899, "ZZ", false},
+	}
+	for i, tt := range tests {
+		if r, err := EncodeM49(tt.m49); r.String() != tt.code || err == nil != tt.ok {
+			t.Errorf("%d:%d: was %s, %v; want %s, %v", i, tt.m49, r, err == nil, tt.code, tt.ok)
+		}
+	}
+	for i := 1; i <= 1000; i++ {
+		if r, err := EncodeM49(i); err == nil && r.M49() == 0 {
+			t.Errorf("%d has no error, but maps to undefined region", i)
+		}
+	}
+}
+
+func TestParseRegion(t *testing.T) {
+	tests := []struct {
+		in  string
+		out string
+		ok  bool
+	}{
+		{"001", "001", true},
+		{"840", "US", true},
+		{"899", "ZZ", false},
+		{"USA", "US", true},
+		{"US", "US", true},
+		{"BC", "ZZ", false},
+		{"C", "ZZ", false},
+		{"CCCC", "ZZ", false},
+		{"01", "ZZ", false},
+	}
+	for i, tt := range tests {
+		r, err := ParseRegion(tt.in)
+		if r.String() != tt.out || err == nil != tt.ok {
+			t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, r, err == nil, tt.out, tt.ok)
+		}
+		if err == nil {
+			if _, _, y := Raw.Make("und-" + tt.out).Raw(); r != y {
+				t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, r, y)
+			}
+		}
+	}
+}
+
+func TestIsCountry(t *testing.T) {
+	tests := []struct {
+		reg     string
+		country bool
+	}{
+		{"US", true},
+		{"001", false},
+		{"958", false},
+		{"419", false},
+		{"203", true},
+		{"020", true},
+		{"900", false},
+		{"999", false},
+		{"QO", false},
+		{"EU", false},
+		{"AA", false},
+		{"XK", true},
+	}
+	for i, tt := range tests {
+		r, _ := ParseRegion(tt.reg)
+		if r.IsCountry() != tt.country {
+			t.Errorf("%d: IsCountry(%s) was %v; want %v", i, tt.reg, r.IsCountry(), tt.country)
+		}
+	}
+}
+
+func TestIsGroup(t *testing.T) {
+	tests := []struct {
+		reg   string
+		group bool
+	}{
+		{"US", false},
+		{"001", true},
+		{"958", false},
+		{"419", true},
+		{"203", false},
+		{"020", false},
+		{"900", false},
+		{"999", false},
+		{"QO", true},
+		{"EU", true},
+		{"AA", false},
+		{"XK", false},
+	}
+	for i, tt := range tests {
+		r, _ := ParseRegion(tt.reg)
+		if r.IsGroup() != tt.group {
+			t.Errorf("%d: IsGroup(%s) was %v; want %v", i, tt.reg, r.IsGroup(), tt.group)
+		}
+	}
+}
+
+func TestContains(t *testing.T) {
+	tests := []struct {
+		enclosing, contained string
+		contains             bool
+	}{
+		// A region contains itself.
+		{"US", "US", true},
+		{"001", "001", true},
+
+		// Direct containment.
+		{"001", "002", true},
+		{"039", "XK", true},
+		{"150", "XK", true},
+		{"EU", "AT", true},
+		{"QO", "AQ", true},
+
+		// Indirect containemnt.
+		{"001", "US", true},
+		{"001", "419", true},
+		{"001", "013", true},
+
+		// No containment.
+		{"US", "001", false},
+		{"155", "EU", false},
+	}
+	for i, tt := range tests {
+		r := MustParseRegion(tt.enclosing)
+		con := MustParseRegion(tt.contained)
+		if got := r.Contains(con); got != tt.contains {
+			t.Errorf("%d: %s.Contains(%s) was %v; want %v", i, tt.enclosing, tt.contained, got, tt.contains)
+		}
+	}
+}
+
+func TestRegionCanonicalize(t *testing.T) {
+	for i, tt := range []struct{ in, out string }{
+		{"UK", "GB"},
+		{"TP", "TL"},
+		{"QU", "EU"},
+		{"SU", "SU"},
+		{"VD", "VN"},
+		{"DD", "DE"},
+	} {
+		r := MustParseRegion(tt.in)
+		want := MustParseRegion(tt.out)
+		if got := r.Canonicalize(); got != want {
+			t.Errorf("%d: got %v; want %v", i, got, want)
+		}
+	}
+}
+
+func TestRegionTLD(t *testing.T) {
+	for _, tt := range []struct {
+		in, out string
+		ok      bool
+	}{
+		{"EH", "EH", true},
+		{"FR", "FR", true},
+		{"TL", "TL", true},
+
+		// In ccTLD before in ISO.
+		{"GG", "GG", true},
+
+		// Non-standard assignment of ccTLD to ISO code.
+		{"GB", "UK", true},
+
+		// Exceptionally reserved in ISO and valid ccTLD.
+		{"UK", "UK", true},
+		{"AC", "AC", true},
+		{"EU", "EU", true},
+		{"SU", "SU", true},
+
+		// Exceptionally reserved in ISO and invalid ccTLD.
+		{"CP", "ZZ", false},
+		{"DG", "ZZ", false},
+		{"EA", "ZZ", false},
+		{"FX", "ZZ", false},
+		{"IC", "ZZ", false},
+		{"TA", "ZZ", false},
+
+		// Transitionally reserved in ISO (e.g. deprecated) but valid ccTLD as
+		// it is still being phased out.
+		{"AN", "AN", true},
+		{"TP", "TP", true},
+
+		// Transitionally reserved in ISO (e.g. deprecated) and invalid ccTLD.
+		// Defined in package language as it has a mapping in CLDR.
+		{"BU", "ZZ", false},
+		{"CS", "ZZ", false},
+		{"NT", "ZZ", false},
+		{"YU", "ZZ", false},
+		{"ZR", "ZZ", false},
+		// Not defined in package: SF.
+
+		// Indeterminately reserved in ISO.
+		// Defined in package language as it has a legacy mapping in CLDR.
+		{"DY", "ZZ", false},
+		{"RH", "ZZ", false},
+		{"VD", "ZZ", false},
+		// Not defined in package: EW, FL, JA, LF, PI, RA, RB, RC, RI, RL, RM,
+		// RN, RP, WG, WL, WV, and YV.
+
+		// Not assigned in ISO, but legacy definitions in CLDR.
+		{"DD", "ZZ", false},
+		{"YD", "ZZ", false},
+
+		// Normal mappings but somewhat special status in ccTLD.
+		{"BL", "BL", true},
+		{"MF", "MF", true},
+		{"BV", "BV", true},
+		{"SJ", "SJ", true},
+
+		// Have values when normalized, but not as is.
+		{"QU", "ZZ", false},
+
+		// ISO Private Use.
+		{"AA", "ZZ", false},
+		{"QM", "ZZ", false},
+		{"QO", "ZZ", false},
+		{"XA", "ZZ", false},
+		{"XK", "ZZ", false}, // Sometimes used for Kosovo, but invalid ccTLD.
+	} {
+		if tt.in == "" {
+			continue
+		}
+
+		r := MustParseRegion(tt.in)
+		var want Region
+		if tt.out != "ZZ" {
+			want = MustParseRegion(tt.out)
+		}
+		tld, err := r.TLD()
+		if got := err == nil; got != tt.ok {
+			t.Errorf("error(%v): got %v; want %v", r, got, tt.ok)
+		}
+		if tld != want {
+			t.Errorf("TLD(%v): got %v; want %v", r, tld, want)
+		}
+	}
+}
+
+func TestCanonicalize(t *testing.T) {
+	// TODO: do a full test using CLDR data in a separate regression test.
+	tests := []struct {
+		in, out string
+		option  CanonType
+	}{
+		{"en-Latn", "en", SuppressScript},
+		{"sr-Cyrl", "sr-Cyrl", SuppressScript},
+		{"sh", "sr-Latn", Legacy},
+		{"sh-HR", "sr-Latn-HR", Legacy},
+		{"sh-Cyrl-HR", "sr-Cyrl-HR", Legacy},
+		{"tl", "fil", Legacy},
+		{"no", "no", Legacy},
+		{"no", "nb", Legacy | CLDR},
+		{"cmn", "cmn", Legacy},
+		{"cmn", "zh", Macro},
+		{"cmn-u-co-stroke", "zh-u-co-stroke", Macro},
+		{"yue", "yue", Macro},
+		{"nb", "no", Macro},
+		{"nb", "nb", Macro | CLDR},
+		{"no", "no", Macro},
+		{"no", "no", Macro | CLDR},
+		{"iw", "he", DeprecatedBase},
+		{"iw", "he", Deprecated | CLDR},
+		{"mo", "ro-MD", Deprecated}, // Adopted by CLDR as of version 25.
+		{"alb", "sq", Legacy},       // bibliographic
+		{"dut", "nl", Legacy},       // bibliographic
+		// As of CLDR 25, mo is no longer considered a legacy mapping.
+		{"mo", "mo", Legacy | CLDR},
+		{"und-AN", "und-AN", Deprecated},
+		{"und-YD", "und-YE", DeprecatedRegion},
+		{"und-YD", "und-YD", DeprecatedBase},
+		{"und-Qaai", "und-Zinh", DeprecatedScript},
+		{"und-Qaai", "und-Qaai", DeprecatedBase},
+		{"drh", "mn", All}, // drh -> khk -> mn
+
+		{"en-GB-u-rg-uszzzz", "en-GB-u-rg-uszzzz", Raw},
+		{"en-GB-u-rg-USZZZZ", "en-GB-u-rg-uszzzz", Raw},
+		// TODO: use different exact values for language and regional tag?
+		{"en-GB-u-rg-uszzzz-va-posix", "en-GB-u-rg-uszzzz-va-posix", Raw},
+		{"en-GB-u-rg-uszzzz-co-phonebk", "en-GB-u-co-phonebk-rg-uszzzz", Raw},
+		// Invalid region specifications are left as is.
+		{"en-GB-u-rg-usz", "en-GB-u-rg-usz", Raw},
+		{"en-GB-u-rg-usz-va-posix", "en-GB-u-rg-usz-va-posix", Raw},
+		{"en-GB-u-rg-usz-co-phonebk", "en-GB-u-co-phonebk-rg-usz", Raw},
+
+		// CVE-2020-28851
+		// invalid key-value pair of -u- extension.
+		{"ES-u-000-00", "es-u-000-00", Raw},
+		{"ES-u-000-00-v-00", "es-u-000-00-v-00", Raw},
+		// reordered and unknown extension.
+		{"ES-v-00-u-000-00", "es-u-000-00-v-00", Raw},
+	}
+	for i, tt := range tests {
+		in, _ := Raw.Parse(tt.in)
+		in, _ = tt.option.Canonicalize(in)
+		if in.String() != tt.out {
+			t.Errorf("%d:%s: was %s; want %s", i, tt.in, in.String(), tt.out)
+		}
+	}
+	// Test idempotence.
+	for _, base := range Supported.BaseLanguages() {
+		tag, _ := Raw.Compose(base)
+		got, _ := All.Canonicalize(tag)
+		want, _ := All.Canonicalize(got)
+		if got != want {
+			t.Errorf("idem(%s): got %s; want %s", tag, got, want)
+		}
+	}
+}
+
+func TestTypeForKey(t *testing.T) {
+	tests := []struct{ key, in, out string }{
+		{"co", "en", ""},
+		{"co", "en-u-abc", ""},
+		{"co", "en-u-co-phonebk", "phonebk"},
+		{"co", "en-u-co-phonebk-cu-aud", "phonebk"},
+		{"co", "x-foo-u-co-phonebk", ""},
+		{"va", "en-US-u-va-posix", "posix"},
+		{"rg", "en-u-rg-gbzzzz", "gbzzzz"},
+		{"nu", "en-u-co-phonebk-nu-arabic", "arabic"},
+		{"kc", "cmn-u-co-stroke", ""},
+		{"rg", "cmn-u-rg", ""},
+		{"rg", "cmn-u-rg-co-stroke", ""},
+		{"co", "cmn-u-rg-co-stroke", "stroke"},
+		{"co", "cmn-u-co-rg-gbzzzz", ""},
+		{"rg", "cmn-u-co-rg-gbzzzz", "gbzzzz"},
+		{"rg", "cmn-u-rg-gbzzzz-nlzzzz", "gbzzzz"},
+	}
+	for _, tt := range tests {
+		if v := Make(tt.in).TypeForKey(tt.key); v != tt.out {
+			t.Errorf("%q[%q]: was %q; want %q", tt.in, tt.key, v, tt.out)
+		}
+	}
+}
+
+func TestParent(t *testing.T) {
+	tests := []struct{ in, out string }{
+		// Strip variants and extensions first
+		{"de-u-co-phonebk", "de"},
+		{"de-1994", "de"},
+		{"de-Latn-1994", "de"}, // remove superfluous script.
+
+		// Ensure the canonical Tag for an entry is in the chain for base-script
+		// pairs.
+		{"zh-Hans", "zh"},
+
+		// Skip the script if it is the maximized version. CLDR files for the
+		// skipped tag are always empty.
+		{"zh-Hans-TW", "zh"},
+		{"zh-Hans-CN", "zh"},
+
+		// Insert the script if the maximized script is not the same as the
+		// maximized script of the base language.
+		{"zh-TW", "zh-Hant"},
+		{"zh-HK", "zh-Hant"},
+		{"zh-Hant-TW", "zh-Hant"},
+		{"zh-Hant-HK", "zh-Hant"},
+
+		// Non-default script skips to und.
+		// CLDR
+		{"az-Cyrl", "und"},
+		{"bs-Cyrl", "und"},
+		{"en-Dsrt", "und"},
+		{"ha-Arab", "und"},
+		{"mn-Mong", "und"},
+		{"pa-Arab", "und"},
+		{"shi-Latn", "und"},
+		{"sr-Latn", "und"},
+		{"uz-Arab", "und"},
+		{"uz-Cyrl", "und"},
+		{"vai-Latn", "und"},
+		{"zh-Hant", "und"},
+		// extra
+		{"nl-Cyrl", "und"},
+
+		// World english inherits from en-001.
+		{"en-150", "en-001"},
+		{"en-AU", "en-001"},
+		{"en-BE", "en-001"},
+		{"en-GG", "en-001"},
+		{"en-GI", "en-001"},
+		{"en-HK", "en-001"},
+		{"en-IE", "en-001"},
+		{"en-IM", "en-001"},
+		{"en-IN", "en-001"},
+		{"en-JE", "en-001"},
+		{"en-MT", "en-001"},
+		{"en-NZ", "en-001"},
+		{"en-PK", "en-001"},
+		{"en-SG", "en-001"},
+
+		// Spanish in Latin-American countries have es-419 as parent.
+		{"es-AR", "es-419"},
+		{"es-BO", "es-419"},
+		{"es-CL", "es-419"},
+		{"es-CO", "es-419"},
+		{"es-CR", "es-419"},
+		{"es-CU", "es-419"},
+		{"es-DO", "es-419"},
+		{"es-EC", "es-419"},
+		{"es-GT", "es-419"},
+		{"es-HN", "es-419"},
+		{"es-MX", "es-419"},
+		{"es-NI", "es-419"},
+		{"es-PA", "es-419"},
+		{"es-PE", "es-419"},
+		{"es-PR", "es-419"},
+		{"es-PY", "es-419"},
+		{"es-SV", "es-419"},
+		{"es-US", "es-419"},
+		{"es-UY", "es-419"},
+		{"es-VE", "es-419"},
+		// exceptions (according to CLDR)
+		{"es-CW", "es"},
+
+		// Inherit from pt-PT, instead of pt for these countries.
+		{"pt-AO", "pt-PT"},
+		{"pt-CV", "pt-PT"},
+		{"pt-GW", "pt-PT"},
+		{"pt-MO", "pt-PT"},
+		{"pt-MZ", "pt-PT"},
+		{"pt-ST", "pt-PT"},
+		{"pt-TL", "pt-PT"},
+
+		{"en-GB-u-co-phonebk-rg-uszzzz", "en-GB"},
+		{"en-GB-u-rg-uszzzz", "en-GB"},
+		{"en-US-u-va-posix", "en-US"},
+
+		// Difference between language and regional tag.
+		{"ca-ES-valencia", "ca-ES"},
+		{"ca-ES-valencia-u-rg-ptzzzz", "ca-ES"},
+		{"en-US-u-va-variant", "en-US"},
+		{"en-u-va-variant", "en"},
+		{"en-u-rg-gbzzzz", "en"},
+		{"en-US-u-rg-gbzzzz", "en-US"},
+		{"nl-US-u-rg-gbzzzz", "nl-US"},
+	}
+	for _, tt := range tests {
+		tag := Raw.MustParse(tt.in)
+		if p := Raw.MustParse(tt.out); p != tag.Parent() {
+			t.Errorf("%s: was %v; want %v", tt.in, tag.Parent(), p)
+		}
+	}
+}
+
+var (
+	// Tags without error that don't need to be changed.
+	benchBasic = []string{
+		"en",
+		"en-Latn",
+		"en-GB",
+		"za",
+		"zh-Hant",
+		"zh",
+		"zh-HK",
+		"ar-MK",
+		"en-CA",
+		"fr-CA",
+		"fr-CH",
+		"fr",
+		"lv",
+		"he-IT",
+		"tlh",
+		"ja",
+		"ja-Jpan",
+		"ja-Jpan-JP",
+		"de-1996",
+		"de-CH",
+		"sr",
+		"sr-Latn",
+	}
+	// Tags with extensions, not changes required.
+	benchExt = []string{
+		"x-a-b-c-d",
+		"x-aa-bbbb-cccccccc-d",
+		"en-x_cc-b-bbb-a-aaa",
+		"en-c_cc-b-bbb-a-aaa-x-x",
+		"en-u-co-phonebk",
+		"en-Cyrl-u-co-phonebk",
+		"en-US-u-co-phonebk-cu-xau",
+		"en-nedix-u-co-phonebk",
+		"en-t-t0-abcd",
+		"en-t-nl-latn",
+		"en-t-t0-abcd-x-a",
+		"en_t_pt_MLt",
+		"en-t-fr-est",
+	}
+	// Change, but not memory allocation required.
+	benchSimpleChange = []string{
+		"EN",
+		"i-klingon",
+		"en-latn",
+		"zh-cmn-Hans-CN",
+		"iw-NL",
+	}
+	// Change and memory allocation required.
+	benchChangeAlloc = []string{
+		"en-c_cc-b-bbb-a-aaa",
+		"en-u-cu-xua-co-phonebk",
+		"en-u-cu-xua-co-phonebk-a-cd",
+		"en-u-def-abc-cu-xua-co-phonebk",
+		"en-t-en-Cyrl-NL-1994",
+		"en-t-en-Cyrl-NL-1994-t0-abc-def",
+	}
+	// Tags that result in errors.
+	benchErr = []string{
+		// IllFormed
+		"x_A.-B-C_D",
+		"en-u-cu-co-phonebk",
+		"en-u-cu-xau-co",
+		"en-t-nl-abcd",
+		// Invalid
+		"xx",
+		"nl-Uuuu",
+		"nl-QB",
+	}
+	benchChange = append(benchSimpleChange, benchChangeAlloc...)
+	benchAll    = append(append(append(benchBasic, benchExt...), benchChange...), benchErr...)
+)
+
+func doParse(b *testing.B, tag []string) {
+	for i := 0; i < b.N; i++ {
+		// Use the modulo instead of looping over all tags so that we get a somewhat
+		// meaningful ns/op.
+		Parse(tag[i%len(tag)])
+	}
+}
+
+func BenchmarkParse(b *testing.B) {
+	doParse(b, benchAll)
+}
+
+func BenchmarkParseBasic(b *testing.B) {
+	doParse(b, benchBasic)
+}
+
+func BenchmarkParseError(b *testing.B) {
+	doParse(b, benchErr)
+}
+
+func BenchmarkParseSimpleChange(b *testing.B) {
+	doParse(b, benchSimpleChange)
+}
+
+func BenchmarkParseChangeAlloc(b *testing.B) {
+	doParse(b, benchChangeAlloc)
+}
@@ -0,0 +1,281 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+import (
+	"testing"
+)
+
+func TestRegionID(t *testing.T) {
+	tests := []struct {
+		in, out string
+	}{
+		{"_  ", ""},
+		{"_000", ""},
+		{"419", "419"},
+		{"AA", "AA"},
+		{"ATF", "TF"},
+		{"HV", "HV"},
+		{"CT", "CT"},
+		{"DY", "DY"},
+		{"IC", "IC"},
+		{"FQ", "FQ"},
+		{"JT", "JT"},
+		{"ZZ", "ZZ"},
+		{"EU", "EU"},
+		{"QO", "QO"},
+		{"FX", "FX"},
+	}
+	for i, tt := range tests {
+		if tt.in[0] == '_' {
+			id := tt.in[1:]
+			if _, err := ParseRegion(id); err == nil {
+				t.Errorf("%d:err(%s): found nil; want error", i, id)
+			}
+			continue
+		}
+		want, _ := ParseRegion(tt.in)
+		if s := want.String(); s != tt.out {
+			t.Errorf("%d:%s: found %q; want %q", i, tt.in, s, tt.out)
+		}
+		if len(tt.in) == 2 {
+			want, _ := ParseRegion(tt.in)
+			if s := want.String(); s != tt.out {
+				t.Errorf("%d:getISO2(%s): found %q; want %q", i, tt.in, s, tt.out)
+			}
+		}
+	}
+}
+
+func TestRegionISO3(t *testing.T) {
+	tests := []struct {
+		from, iso3, to string
+	}{
+		{"  ", "ZZZ", "ZZ"},
+		{"000", "ZZZ", "ZZ"},
+		{"AA", "AAA", ""},
+		{"CT", "CTE", ""},
+		{"DY", "DHY", ""},
+		{"EU", "QUU", ""},
+		{"HV", "HVO", ""},
+		{"IC", "ZZZ", "ZZ"},
+		{"JT", "JTN", ""},
+		{"PZ", "PCZ", ""},
+		{"QU", "QUU", "EU"},
+		{"QO", "QOO", ""},
+		{"YD", "YMD", ""},
+		{"FQ", "ATF", "TF"},
+		{"TF", "ATF", ""},
+		{"FX", "FXX", ""},
+		{"ZZ", "ZZZ", ""},
+		{"419", "ZZZ", "ZZ"},
+	}
+	for _, tt := range tests {
+		r, _ := ParseRegion(tt.from)
+		if s := r.ISO3(); s != tt.iso3 {
+			t.Errorf("iso3(%q): found %q; want %q", tt.from, s, tt.iso3)
+		}
+		if tt.iso3 == "" {
+			continue
+		}
+		want := tt.to
+		if tt.to == "" {
+			want = tt.from
+		}
+		r, _ = ParseRegion(want)
+		if id, _ := ParseRegion(tt.iso3); id != r {
+			t.Errorf("%s: found %q; want %q", tt.iso3, id, want)
+		}
+	}
+}
+
+func TestRegionM49(t *testing.T) {
+	fromTests := []struct {
+		m49 int
+		id  string
+	}{
+		{0, ""},
+		{-1, ""},
+		{1000, ""},
+		{10000, ""},
+
+		{001, "001"},
+		{104, "MM"},
+		{180, "CD"},
+		{230, "ET"},
+		{231, "ET"},
+		{249, "FX"},
+		{250, "FR"},
+		{276, "DE"},
+		{278, "DD"},
+		{280, "DE"},
+		{419, "419"},
+		{626, "TL"},
+		{736, "SD"},
+		{840, "US"},
+		{854, "BF"},
+		{891, "CS"},
+		{899, ""},
+		{958, "AA"},
+		{966, "QT"},
+		{967, "EU"},
+		{999, "ZZ"},
+	}
+	for _, tt := range fromTests {
+		id, err := EncodeM49(tt.m49)
+		if want, have := err != nil, tt.id == ""; want != have {
+			t.Errorf("error(%d): have %v; want %v", tt.m49, have, want)
+			continue
+		}
+		r, _ := ParseRegion(tt.id)
+		if r != id {
+			t.Errorf("region(%d): have %s; want %s", tt.m49, id, r)
+		}
+	}
+
+	toTests := []struct {
+		m49 int
+		id  string
+	}{
+		{0, "000"},
+		{0, "IC"}, // Some codes don't have an ID
+
+		{001, "001"},
+		{104, "MM"},
+		{104, "BU"},
+		{180, "CD"},
+		{180, "ZR"},
+		{231, "ET"},
+		{250, "FR"},
+		{249, "FX"},
+		{276, "DE"},
+		{278, "DD"},
+		{419, "419"},
+		{626, "TL"},
+		{626, "TP"},
+		{729, "SD"},
+		{826, "GB"},
+		{840, "US"},
+		{854, "BF"},
+		{891, "YU"},
+		{891, "CS"},
+		{958, "AA"},
+		{966, "QT"},
+		{967, "EU"},
+		{967, "QU"},
+		{999, "ZZ"},
+		// For codes that don't have an M49 code use the replacement value,
+		// if available.
+		{854, "HV"}, // maps to Burkino Faso
+	}
+	for _, tt := range toTests {
+		r, _ := ParseRegion(tt.id)
+		if r.M49() != tt.m49 {
+			t.Errorf("m49(%q): have %d; want %d", tt.id, r.M49(), tt.m49)
+		}
+	}
+}
+
+func TestRegionDeprecation(t *testing.T) {
+	tests := []struct{ in, out string }{
+		{"BU", "MM"},
+		{"BUR", "MM"},
+		{"CT", "KI"},
+		{"DD", "DE"},
+		{"DDR", "DE"},
+		{"DY", "BJ"},
+		{"FX", "FR"},
+		{"HV", "BF"},
+		{"JT", "UM"},
+		{"MI", "UM"},
+		{"NH", "VU"},
+		{"NQ", "AQ"},
+		{"PU", "UM"},
+		{"PZ", "PA"},
+		{"QU", "EU"},
+		{"RH", "ZW"},
+		{"TP", "TL"},
+		{"UK", "GB"},
+		{"VD", "VN"},
+		{"WK", "UM"},
+		{"YD", "YE"},
+		{"NL", "NL"},
+	}
+	for _, tt := range tests {
+		rIn, _ := ParseRegion(tt.in)
+		rOut, _ := ParseRegion(tt.out)
+		r := rIn.Canonicalize()
+		if rOut == rIn && r.String() == "ZZ" {
+			t.Errorf("%s: was %q; want %q", tt.in, r, tt.in)
+		}
+		if rOut != rIn && r != rOut {
+			t.Errorf("%s: was %q; want %q", tt.in, r, tt.out)
+		}
+
+	}
+}
+
+func TestIsPrivateUse(t *testing.T) {
+	type test struct {
+		s       string
+		private bool
+	}
+	tests := []test{
+		{"en", false},
+		{"und", false},
+		{"pzn", false},
+		{"qaa", true},
+		{"qtz", true},
+		{"qua", false},
+	}
+	for i, tt := range tests {
+		x, _ := ParseBase(tt.s)
+		if b := x.IsPrivateUse(); b != tt.private {
+			t.Errorf("%d: langID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private)
+		}
+	}
+	tests = []test{
+		{"001", false},
+		{"419", false},
+		{"899", false},
+		{"900", false},
+		{"957", false},
+		{"958", true},
+		{"AA", true},
+		{"AC", false},
+		{"EU", false}, // CLDR grouping, exceptionally reserved in ISO.
+		{"QU", true},  // Canonicalizes to EU, User-assigned in ISO.
+		{"QO", true},  // CLDR grouping, User-assigned in ISO.
+		{"QA", false},
+		{"QM", true},
+		{"QZ", true},
+		{"XA", true},
+		{"XK", true}, // Assigned to Kosovo in CLDR, User-assigned in ISO.
+		{"XZ", true},
+		{"ZW", false},
+		{"ZZ", true},
+	}
+	for i, tt := range tests {
+		x, _ := ParseRegion(tt.s)
+		if b := x.IsPrivateUse(); b != tt.private {
+			t.Errorf("%d: regionID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private)
+		}
+	}
+	tests = []test{
+		{"Latn", false},
+		{"Laaa", false}, // invalid
+		{"Qaaa", true},
+		{"Qabx", true},
+		{"Qaby", false},
+		{"Zyyy", false},
+		{"Zzzz", false},
+	}
+	for i, tt := range tests {
+		x, _ := ParseScript(tt.s)
+		if b := x.IsPrivateUse(); b != tt.private {
+			t.Errorf("%d: scriptID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private)
+		}
+	}
+}
@@ -0,0 +1,735 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+import (
+	"errors"
+	"strings"
+
+	"golang.org/x/text/internal/language"
+)
+
+// A MatchOption configures a Matcher.
+type MatchOption func(*matcher)
+
+// PreferSameScript will, in the absence of a match, result in the first
+// preferred tag with the same script as a supported tag to match this supported
+// tag. The default is currently true, but this may change in the future.
+func PreferSameScript(preferSame bool) MatchOption {
+	return func(m *matcher) { m.preferSameScript = preferSame }
+}
+
+// TODO(v1.0.0): consider making Matcher a concrete type, instead of interface.
+// There doesn't seem to be too much need for multiple types.
+// Making it a concrete type allows MatchStrings to be a method, which will
+// improve its discoverability.
+
+// MatchStrings parses and matches the given strings until one of them matches
+// the language in the Matcher. A string may be an Accept-Language header as
+// handled by ParseAcceptLanguage. The default language is returned if no
+// other language matched.
+func MatchStrings(m Matcher, lang ...string) (tag Tag, index int) {
+	for _, accept := range lang {
+		desired, _, err := ParseAcceptLanguage(accept)
+		if err != nil {
+			continue
+		}
+		if tag, index, conf := m.Match(desired...); conf != No {
+			return tag, index
+		}
+	}
+	tag, index, _ = m.Match()
+	return
+}
+
+// Matcher is the interface that wraps the Match method.
+//
+// Match returns the best match for any of the given tags, along with
+// a unique index associated with the returned tag and a confidence
+// score.
+type Matcher interface {
+	Match(t ...Tag) (tag Tag, index int, c Confidence)
+}
+
+// Comprehends reports the confidence score for a speaker of a given language
+// to being able to comprehend the written form of an alternative language.
+func Comprehends(speaker, alternative Tag) Confidence {
+	_, _, c := NewMatcher([]Tag{alternative}).Match(speaker)
+	return c
+}
+
+// NewMatcher returns a Matcher that matches an ordered list of preferred tags
+// against a list of supported tags based on written intelligibility, closeness
+// of dialect, equivalence of subtags and various other rules. It is initialized
+// with the list of supported tags. The first element is used as the default
+// value in case no match is found.
+//
+// Its Match method matches the first of the given Tags to reach a certain
+// confidence threshold. The tags passed to Match should therefore be specified
+// in order of preference. Extensions are ignored for matching.
+//
+// The index returned by the Match method corresponds to the index of the
+// matched tag in t, but is augmented with the Unicode extension ('u')of the
+// corresponding preferred tag. This allows user locale options to be passed
+// transparently.
+func NewMatcher(t []Tag, options ...MatchOption) Matcher {
+	return newMatcher(t, options)
+}
+
+func (m *matcher) Match(want ...Tag) (t Tag, index int, c Confidence) {
+	var tt language.Tag
+	match, w, c := m.getBest(want...)
+	if match != nil {
+		tt, index = match.tag, match.index
+	} else {
+		// TODO: this should be an option
+		tt = m.default_.tag
+		if m.preferSameScript {
+		outer:
+			for _, w := range want {
+				script, _ := w.Script()
+				if script.scriptID == 0 {
+					// Don't do anything if there is no script, such as with
+					// private subtags.
+					continue
+				}
+				for i, h := range m.supported {
+					if script.scriptID == h.maxScript {
+						tt, index = h.tag, i
+						break outer
+					}
+				}
+			}
+		}
+		// TODO: select first language tag based on script.
+	}
+	if w.RegionID != tt.RegionID && w.RegionID != 0 {
+		if w.RegionID != 0 && tt.RegionID != 0 && tt.RegionID.Contains(w.RegionID) {
+			tt.RegionID = w.RegionID
+			tt.RemakeString()
+		} else if r := w.RegionID.String(); len(r) == 2 {
+			// TODO: also filter macro and deprecated.
+			tt, _ = tt.SetTypeForKey("rg", strings.ToLower(r)+"zzzz")
+		}
+	}
+	// Copy options from the user-provided tag into the result tag. This is hard
+	// to do after the fact, so we do it here.
+	// TODO: add in alternative variants to -u-va-.
+	// TODO: add preferred region to -u-rg-.
+	if e := w.Extensions(); len(e) > 0 {
+		b := language.Builder{}
+		b.SetTag(tt)
+		for _, e := range e {
+			b.AddExt(e)
+		}
+		tt = b.Make()
+	}
+	return makeTag(tt), index, c
+}
+
+// ErrMissingLikelyTagsData indicates no information was available
+// to compute likely values of missing tags.
+var ErrMissingLikelyTagsData = errors.New("missing likely tags data")
+
+// func (t *Tag) setTagsFrom(id Tag) {
+// 	t.LangID = id.LangID
+// 	t.ScriptID = id.ScriptID
+// 	t.RegionID = id.RegionID
+// }
+
+// Tag Matching
+// CLDR defines an algorithm for finding the best match between two sets of language
+// tags. The basic algorithm defines how to score a possible match and then find
+// the match with the best score
+// (see https://www.unicode.org/reports/tr35/#LanguageMatching).
+// Using scoring has several disadvantages. The scoring obfuscates the importance of
+// the various factors considered, making the algorithm harder to understand. Using
+// scoring also requires the full score to be computed for each pair of tags.
+//
+// We will use a different algorithm which aims to have the following properties:
+// - clarity on the precedence of the various selection factors, and
+// - improved performance by allowing early termination of a comparison.
+//
+// Matching algorithm (overview)
+// Input:
+//   - supported: a set of supported tags
+//   - default:   the default tag to return in case there is no match
+//   - desired:   list of desired tags, ordered by preference, starting with
+//                the most-preferred.
+//
+// Algorithm:
+//   1) Set the best match to the lowest confidence level
+//   2) For each tag in "desired":
+//     a) For each tag in "supported":
+//        1) compute the match between the two tags.
+//        2) if the match is better than the previous best match, replace it
+//           with the new match. (see next section)
+//     b) if the current best match is Exact and pin is true the result will be
+//        frozen to the language found thusfar, although better matches may
+//        still be found for the same language.
+//   3) If the best match so far is below a certain threshold, return "default".
+//
+// Ranking:
+// We use two phases to determine whether one pair of tags are a better match
+// than another pair of tags. First, we determine a rough confidence level. If the
+// levels are different, the one with the highest confidence wins.
+// Second, if the rough confidence levels are identical, we use a set of tie-breaker
+// rules.
+//
+// The confidence level of matching a pair of tags is determined by finding the
+// lowest confidence level of any matches of the corresponding subtags (the
+// result is deemed as good as its weakest link).
+// We define the following levels:
+//   Exact    - An exact match of a subtag, before adding likely subtags.
+//   MaxExact - An exact match of a subtag, after adding likely subtags.
+//              [See Note 2].
+//   High     - High level of mutual intelligibility between different subtag
+//              variants.
+//   Low      - Low level of mutual intelligibility between different subtag
+//              variants.
+//   No       - No mutual intelligibility.
+//
+// The following levels can occur for each type of subtag:
+//   Base:    Exact, MaxExact, High, Low, No
+//   Script:  Exact, MaxExact [see Note 3], Low, No
+//   Region:  Exact, MaxExact, High
+//   Variant: Exact, High
+//   Private: Exact, No
+//
+// Any result with a confidence level of Low or higher is deemed a possible match.
+// Once a desired tag matches any of the supported tags with a level of MaxExact
+// or higher, the next desired tag is not considered (see Step 2.b).
+// Note that CLDR provides languageMatching data that defines close equivalence
+// classes for base languages, scripts and regions.
+//
+// Tie-breaking
+// If we get the same confidence level for two matches, we apply a sequence of
+// tie-breaking rules. The first that succeeds defines the result. The rules are
+// applied in the following order.
+//   1) Original language was defined and was identical.
+//   2) Original region was defined and was identical.
+//   3) Distance between two maximized regions was the smallest.
+//   4) Original script was defined and was identical.
+//   5) Distance from want tag to have tag using the parent relation [see Note 5.]
+// If there is still no winner after these rules are applied, the first match
+// found wins.
+//
+// Notes:
+// [2] In practice, as matching of Exact is done in a separate phase from
+//     matching the other levels, we reuse the Exact level to mean MaxExact in
+//     the second phase. As a consequence, we only need the levels defined by
+//     the Confidence type. The MaxExact confidence level is mapped to High in
+//     the public API.
+// [3] We do not differentiate between maximized script values that were derived
+//     from suppressScript versus most likely tag data. We determined that in
+//     ranking the two, one ranks just after the other. Moreover, the two cannot
+//     occur concurrently. As a consequence, they are identical for practical
+//     purposes.
+// [4] In case of deprecated, macro-equivalents and legacy mappings, we assign
+//     the MaxExact level to allow iw vs he to still be a closer match than
+//     en-AU vs en-US, for example.
+// [5] In CLDR a locale inherits fields that are unspecified for this locale
+//     from its parent. Therefore, if a locale is a parent of another locale,
+//     it is a strong measure for closeness, especially when no other tie
+//     breaker rule applies. One could also argue it is inconsistent, for
+//     example, when pt-AO matches pt (which CLDR equates with pt-BR), even
+//     though its parent is pt-PT according to the inheritance rules.
+//
+// Implementation Details:
+// There are several performance considerations worth pointing out. Most notably,
+// we preprocess as much as possible (within reason) at the time of creation of a
+// matcher. This includes:
+//   - creating a per-language map, which includes data for the raw base language
+//     and its canonicalized variant (if applicable),
+//   - expanding entries for the equivalence classes defined in CLDR's
+//     languageMatch data.
+// The per-language map ensures that typically only a very small number of tags
+// need to be considered. The pre-expansion of canonicalized subtags and
+// equivalence classes reduces the amount of map lookups that need to be done at
+// runtime.
+
+// matcher keeps a set of supported language tags, indexed by language.
+type matcher struct {
+	default_         *haveTag
+	supported        []*haveTag
+	index            map[language.Language]*matchHeader
+	passSettings     bool
+	preferSameScript bool
+}
+
+// matchHeader has the lists of tags for exact matches and matches based on
+// maximized and canonicalized tags for a given language.
+type matchHeader struct {
+	haveTags []*haveTag
+	original bool
+}
+
+// haveTag holds a supported Tag and its maximized script and region. The maximized
+// or canonicalized language is not stored as it is not needed during matching.
+type haveTag struct {
+	tag language.Tag
+
+	// index of this tag in the original list of supported tags.
+	index int
+
+	// conf is the maximum confidence that can result from matching this haveTag.
+	// When conf < Exact this means it was inserted after applying a CLDR equivalence rule.
+	conf Confidence
+
+	// Maximized region and script.
+	maxRegion language.Region
+	maxScript language.Script
+
+	// altScript may be checked as an alternative match to maxScript. If altScript
+	// matches, the confidence level for this match is Low. Theoretically there
+	// could be multiple alternative scripts. This does not occur in practice.
+	altScript language.Script
+
+	// nextMax is the index of the next haveTag with the same maximized tags.
+	nextMax uint16
+}
+
+func makeHaveTag(tag language.Tag, index int) (haveTag, language.Language) {
+	max := tag
+	if tag.LangID != 0 || tag.RegionID != 0 || tag.ScriptID != 0 {
+		max, _ = canonicalize(All, max)
+		max, _ = max.Maximize()
+		max.RemakeString()
+	}
+	return haveTag{tag, index, Exact, max.RegionID, max.ScriptID, altScript(max.LangID, max.ScriptID), 0}, max.LangID
+}
+
+// altScript returns an alternative script that may match the given script with
+// a low confidence.  At the moment, the langMatch data allows for at most one
+// script to map to another and we rely on this to keep the code simple.
+func altScript(l language.Language, s language.Script) language.Script {
+	for _, alt := range matchScript {
+		// TODO: also match cases where language is not the same.
+		if (language.Language(alt.wantLang) == l || language.Language(alt.haveLang) == l) &&
+			language.Script(alt.haveScript) == s {
+			return language.Script(alt.wantScript)
+		}
+	}
+	return 0
+}
+
+// addIfNew adds a haveTag to the list of tags only if it is a unique tag.
+// Tags that have the same maximized values are linked by index.
+func (h *matchHeader) addIfNew(n haveTag, exact bool) {
+	h.original = h.original || exact
+	// Don't add new exact matches.
+	for _, v := range h.haveTags {
+		if equalsRest(v.tag, n.tag) {
+			return
+		}
+	}
+	// Allow duplicate maximized tags, but create a linked list to allow quickly
+	// comparing the equivalents and bail out.
+	for i, v := range h.haveTags {
+		if v.maxScript == n.maxScript &&
+			v.maxRegion == n.maxRegion &&
+			v.tag.VariantOrPrivateUseTags() == n.tag.VariantOrPrivateUseTags() {
+			for h.haveTags[i].nextMax != 0 {
+				i = int(h.haveTags[i].nextMax)
+			}
+			h.haveTags[i].nextMax = uint16(len(h.haveTags))
+			break
+		}
+	}
+	h.haveTags = append(h.haveTags, &n)
+}
+
+// header returns the matchHeader for the given language. It creates one if
+// it doesn't already exist.
+func (m *matcher) header(l language.Language) *matchHeader {
+	if h := m.index[l]; h != nil {
+		return h
+	}
+	h := &matchHeader{}
+	m.index[l] = h
+	return h
+}
+
+func toConf(d uint8) Confidence {
+	if d <= 10 {
+		return High
+	}
+	if d < 30 {
+		return Low
+	}
+	return No
+}
+
+// newMatcher builds an index for the given supported tags and returns it as
+// a matcher. It also expands the index by considering various equivalence classes
+// for a given tag.
+func newMatcher(supported []Tag, options []MatchOption) *matcher {
+	m := &matcher{
+		index:            make(map[language.Language]*matchHeader),
+		preferSameScript: true,
+	}
+	for _, o := range options {
+		o(m)
+	}
+	if len(supported) == 0 {
+		m.default_ = &haveTag{}
+		return m
+	}
+	// Add supported languages to the index. Add exact matches first to give
+	// them precedence.
+	for i, tag := range supported {
+		tt := tag.tag()
+		pair, _ := makeHaveTag(tt, i)
+		m.header(tt.LangID).addIfNew(pair, true)
+		m.supported = append(m.supported, &pair)
+	}
+	m.default_ = m.header(supported[0].lang()).haveTags[0]
+	// Keep these in two different loops to support the case that two equivalent
+	// languages are distinguished, such as iw and he.
+	for i, tag := range supported {
+		tt := tag.tag()
+		pair, max := makeHaveTag(tt, i)
+		if max != tt.LangID {
+			m.header(max).addIfNew(pair, true)
+		}
+	}
+
+	// update is used to add indexes in the map for equivalent languages.
+	// update will only add entries to original indexes, thus not computing any
+	// transitive relations.
+	update := func(want, have uint16, conf Confidence) {
+		if hh := m.index[language.Language(have)]; hh != nil {
+			if !hh.original {
+				return
+			}
+			hw := m.header(language.Language(want))
+			for _, ht := range hh.haveTags {
+				v := *ht
+				if conf < v.conf {
+					v.conf = conf
+				}
+				v.nextMax = 0 // this value needs to be recomputed
+				if v.altScript != 0 {
+					v.altScript = altScript(language.Language(want), v.maxScript)
+				}
+				hw.addIfNew(v, conf == Exact && hh.original)
+			}
+		}
+	}
+
+	// Add entries for languages with mutual intelligibility as defined by CLDR's
+	// languageMatch data.
+	for _, ml := range matchLang {
+		update(ml.want, ml.have, toConf(ml.distance))
+		if !ml.oneway {
+			update(ml.have, ml.want, toConf(ml.distance))
+		}
+	}
+
+	// Add entries for possible canonicalizations. This is an optimization to
+	// ensure that only one map lookup needs to be done at runtime per desired tag.
+	// First we match deprecated equivalents. If they are perfect equivalents
+	// (their canonicalization simply substitutes a different language code, but
+	// nothing else), the match confidence is Exact, otherwise it is High.
+	for i, lm := range language.AliasMap {
+		// If deprecated codes match and there is no fiddling with the script
+		// or region, we consider it an exact match.
+		conf := Exact
+		if language.AliasTypes[i] != language.Macro {
+			if !isExactEquivalent(language.Language(lm.From)) {
+				conf = High
+			}
+			update(lm.To, lm.From, conf)
+		}
+		update(lm.From, lm.To, conf)
+	}
+	return m
+}
+
+// getBest gets the best matching tag in m for any of the given tags, taking into
+// account the order of preference of the given tags.
+func (m *matcher) getBest(want ...Tag) (got *haveTag, orig language.Tag, c Confidence) {
+	best := bestMatch{}
+	for i, ww := range want {
+		w := ww.tag()
+		var max language.Tag
+		// Check for exact match first.
+		h := m.index[w.LangID]
+		if w.LangID != 0 {
+			if h == nil {
+				continue
+			}
+			// Base language is defined.
+			max, _ = canonicalize(Legacy|Deprecated|Macro, w)
+			// A region that is added through canonicalization is stronger than
+			// a maximized region: set it in the original (e.g. mo -> ro-MD).
+			if w.RegionID != max.RegionID {
+				w.RegionID = max.RegionID
+			}
+			// TODO: should we do the same for scripts?
+			// See test case: en, sr, nl ; sh ; sr
+			max, _ = max.Maximize()
+		} else {
+			// Base language is not defined.
+			if h != nil {
+				for i := range h.haveTags {
+					have := h.haveTags[i]
+					if equalsRest(have.tag, w) {
+						return have, w, Exact
+					}
+				}
+			}
+			if w.ScriptID == 0 && w.RegionID == 0 {
+				// We skip all tags matching und for approximate matching, including
+				// private tags.
+				continue
+			}
+			max, _ = w.Maximize()
+			if h = m.index[max.LangID]; h == nil {
+				continue
+			}
+		}
+		pin := true
+		for _, t := range want[i+1:] {
+			if w.LangID == t.lang() {
+				pin = false
+				break
+			}
+		}
+		// Check for match based on maximized tag.
+		for i := range h.haveTags {
+			have := h.haveTags[i]
+			best.update(have, w, max.ScriptID, max.RegionID, pin)
+			if best.conf == Exact {
+				for have.nextMax != 0 {
+					have = h.haveTags[have.nextMax]
+					best.update(have, w, max.ScriptID, max.RegionID, pin)
+				}
+				return best.have, best.want, best.conf
+			}
+		}
+	}
+	if best.conf <= No {
+		if len(want) != 0 {
+			return nil, want[0].tag(), No
+		}
+		return nil, language.Tag{}, No
+	}
+	return best.have, best.want, best.conf
+}
+
+// bestMatch accumulates the best match so far.
+type bestMatch struct {
+	have            *haveTag
+	want            language.Tag
+	conf            Confidence
+	pinnedRegion    language.Region
+	pinLanguage     bool
+	sameRegionGroup bool
+	// Cached results from applying tie-breaking rules.
+	origLang     bool
+	origReg      bool
+	paradigmReg  bool
+	regGroupDist uint8
+	origScript   bool
+}
+
+// update updates the existing best match if the new pair is considered to be a
+// better match. To determine if the given pair is a better match, it first
+// computes the rough confidence level. If this surpasses the current match, it
+// will replace it and update the tie-breaker rule cache. If there is a tie, it
+// proceeds with applying a series of tie-breaker rules. If there is no
+// conclusive winner after applying the tie-breaker rules, it leaves the current
+// match as the preferred match.
+//
+// If pin is true and have and tag are a strong match, it will henceforth only
+// consider matches for this language. This corresponds to the idea that most
+// users have a strong preference for the first defined language. A user can
+// still prefer a second language over a dialect of the preferred language by
+// explicitly specifying dialects, e.g. "en, nl, en-GB". In this case pin should
+// be false.
+func (m *bestMatch) update(have *haveTag, tag language.Tag, maxScript language.Script, maxRegion language.Region, pin bool) {
+	// Bail if the maximum attainable confidence is below that of the current best match.
+	c := have.conf
+	if c < m.conf {
+		return
+	}
+	// Don't change the language once we already have found an exact match.
+	if m.pinLanguage && tag.LangID != m.want.LangID {
+		return
+	}
+	// Pin the region group if we are comparing tags for the same language.
+	if tag.LangID == m.want.LangID && m.sameRegionGroup {
+		_, sameGroup := regionGroupDist(m.pinnedRegion, have.maxRegion, have.maxScript, m.want.LangID)
+		if !sameGroup {
+			return
+		}
+	}
+	if c == Exact && have.maxScript == maxScript {
+		// If there is another language and then another entry of this language,
+		// don't pin anything, otherwise pin the language.
+		m.pinLanguage = pin
+	}
+	if equalsRest(have.tag, tag) {
+	} else if have.maxScript != maxScript {
+		// There is usually very little comprehension between different scripts.
+		// In a few cases there may still be Low comprehension. This possibility
+		// is pre-computed and stored in have.altScript.
+		if Low < m.conf || have.altScript != maxScript {
+			return
+		}
+		c = Low
+	} else if have.maxRegion != maxRegion {
+		if High < c {
+			// There is usually a small difference between languages across regions.
+			c = High
+		}
+	}
+
+	// We store the results of the computations of the tie-breaker rules along
+	// with the best match. There is no need to do the checks once we determine
+	// we have a winner, but we do still need to do the tie-breaker computations.
+	// We use "beaten" to keep track if we still need to do the checks.
+	beaten := false // true if the new pair defeats the current one.
+	if c != m.conf {
+		if c < m.conf {
+			return
+		}
+		beaten = true
+	}
+
+	// Tie-breaker rules:
+	// We prefer if the pre-maximized language was specified and identical.
+	origLang := have.tag.LangID == tag.LangID && tag.LangID != 0
+	if !beaten && m.origLang != origLang {
+		if m.origLang {
+			return
+		}
+		beaten = true
+	}
+
+	// We prefer if the pre-maximized region was specified and identical.
+	origReg := have.tag.RegionID == tag.RegionID && tag.RegionID != 0
+	if !beaten && m.origReg != origReg {
+		if m.origReg {
+			return
+		}
+		beaten = true
+	}
+
+	regGroupDist, sameGroup := regionGroupDist(have.maxRegion, maxRegion, maxScript, tag.LangID)
+	if !beaten && m.regGroupDist != regGroupDist {
+		if regGroupDist > m.regGroupDist {
+			return
+		}
+		beaten = true
+	}
+
+	paradigmReg := isParadigmLocale(tag.LangID, have.maxRegion)
+	if !beaten && m.paradigmReg != paradigmReg {
+		if !paradigmReg {
+			return
+		}
+		beaten = true
+	}
+
+	// Next we prefer if the pre-maximized script was specified and identical.
+	origScript := have.tag.ScriptID == tag.ScriptID && tag.ScriptID != 0
+	if !beaten && m.origScript != origScript {
+		if m.origScript {
+			return
+		}
+		beaten = true
+	}
+
+	// Update m to the newly found best match.
+	if beaten {
+		m.have = have
+		m.want = tag
+		m.conf = c
+		m.pinnedRegion = maxRegion
+		m.sameRegionGroup = sameGroup
+		m.origLang = origLang
+		m.origReg = origReg
+		m.paradigmReg = paradigmReg
+		m.origScript = origScript
+		m.regGroupDist = regGroupDist
+	}
+}
+
+func isParadigmLocale(lang language.Language, r language.Region) bool {
+	for _, e := range paradigmLocales {
+		if language.Language(e[0]) == lang && (r == language.Region(e[1]) || r == language.Region(e[2])) {
+			return true
+		}
+	}
+	return false
+}
+
+// regionGroupDist computes the distance between two regions based on their
+// CLDR grouping.
+func regionGroupDist(a, b language.Region, script language.Script, lang language.Language) (dist uint8, same bool) {
+	const defaultDistance = 4
+
+	aGroup := uint(regionToGroups[a]) << 1
+	bGroup := uint(regionToGroups[b]) << 1
+	for _, ri := range matchRegion {
+		if language.Language(ri.lang) == lang && (ri.script == 0 || language.Script(ri.script) == script) {
+			group := uint(1 << (ri.group &^ 0x80))
+			if 0x80&ri.group == 0 {
+				if aGroup&bGroup&group != 0 { // Both regions are in the group.
+					return ri.distance, ri.distance == defaultDistance
+				}
+			} else {
+				if (aGroup|bGroup)&group == 0 { // Both regions are not in the group.
+					return ri.distance, ri.distance == defaultDistance
+				}
+			}
+		}
+	}
+	return defaultDistance, true
+}
+
+// equalsRest compares everything except the language.
+func equalsRest(a, b language.Tag) bool {
+	// TODO: don't include extensions in this comparison. To do this efficiently,
+	// though, we should handle private tags separately.
+	return a.ScriptID == b.ScriptID && a.RegionID == b.RegionID && a.VariantOrPrivateUseTags() == b.VariantOrPrivateUseTags()
+}
+
+// isExactEquivalent returns true if canonicalizing the language will not alter
+// the script or region of a tag.
+func isExactEquivalent(l language.Language) bool {
+	for _, o := range notEquivalent {
+		if o == l {
+			return false
+		}
+	}
+	return true
+}
+
+var notEquivalent []language.Language
+
+func init() {
+	// Create a list of all languages for which canonicalization may alter the
+	// script or region.
+	for _, lm := range language.AliasMap {
+		tag := language.Tag{LangID: language.Language(lm.From)}
+		if tag, _ = canonicalize(All, tag); tag.ScriptID != 0 || tag.RegionID != 0 {
+			notEquivalent = append(notEquivalent, language.Language(lm.From))
+		}
+	}
+	// Maximize undefined regions of paradigm locales.
+	for i, v := range paradigmLocales {
+		t := language.Tag{LangID: language.Language(v[0])}
+		max, _ := t.Maximize()
+		if v[1] == 0 {
+			paradigmLocales[i][1] = uint16(max.RegionID)
+		}
+		if v[2] == 0 {
+			paradigmLocales[i][2] = uint16(max.RegionID)
+		}
+	}
+}
@@ -0,0 +1,384 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+import (
+	"bytes"
+	"flag"
+	"fmt"
+	"os"
+	"path"
+	"path/filepath"
+	"strings"
+	"testing"
+	"unicode/utf8"
+
+	"golang.org/x/text/internal/testtext"
+	"golang.org/x/text/internal/ucd"
+)
+
+var verbose = flag.Bool("verbose", false, "set to true to print the internal tables of matchers")
+
+func TestCompliance(t *testing.T) {
+	filepath.Walk("testdata", func(file string, info os.FileInfo, err error) error {
+		if info.IsDir() {
+			return nil
+		}
+		r, err := os.Open(file)
+		if err != nil {
+			t.Fatal(err)
+		}
+		ucd.Parse(r, func(p *ucd.Parser) {
+			name := strings.ReplaceAll(path.Join(p.String(0), p.String(1)), " ", "")
+			if skip[name] {
+				return
+			}
+			t.Run(info.Name()+"/"+short(name), func(t *testing.T) {
+				supported := makeTagList(p.String(0))
+				desired := makeTagList(p.String(1))
+				gotCombined, index, conf := NewMatcher(supported).Match(desired...)
+
+				gotMatch := supported[index]
+				wantMatch := Raw.Make(p.String(2)) // wantMatch may be null
+				if gotMatch != wantMatch {
+					t.Fatalf("match: got %q; want %q (%v)", gotMatch, wantMatch, conf)
+				}
+				if tag := strings.TrimSpace(p.String(3)); tag != "" {
+					wantCombined := Raw.MustParse(tag)
+					if err == nil && gotCombined != wantCombined {
+						t.Errorf("combined: got %q; want %q (%v)", gotCombined, wantCombined, conf)
+					}
+				}
+			})
+		})
+		return nil
+	})
+}
+
+func short(s string) string {
+	if len(s) <= 50 {
+		return s
+	}
+	var i int
+	for i = 1; i < utf8.UTFMax && !utf8.RuneStart(s[50-i]); i++ {
+	}
+	return s[:50-i] + "…"
+}
+
+var skip = map[string]bool{
+	// TODO: bugs
+	// Honor the wildcard match. This may only be useful to select non-exact
+	// stuff.
+	"mul,af/nl": true, // match: got "af"; want "mul"
+
+	// TODO: include other extensions.
+	// combined: got "en-GB-u-ca-buddhist-nu-arab"; want "en-GB-fonipa-t-m0-iso-i0-pinyin-u-ca-buddhist-nu-arab"
+	"und,en-GB-u-sd-gbsct/en-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin": true,
+
+	// Inconsistencies with Mark Davis' implementation where it is not clear
+	// which is better.
+
+	// Inconsistencies in combined. I think the Go approach is more appropriate.
+	// We could use -u-rg- as alternative.
+	"und,fr/fr-BE-fonipa":              true, // combined: got "fr"; want "fr-BE-fonipa"
+	"und,fr-CA/fr-BE-fonipa":           true, // combined: got "fr-CA"; want "fr-BE-fonipa"
+	"und,fr-fonupa/fr-BE-fonipa":       true, // combined: got "fr-fonupa"; want "fr-BE-fonipa"
+	"und,no/nn-BE-fonipa":              true, // combined: got "no"; want "no-BE-fonipa"
+	"50,und,fr-CA-fonupa/fr-BE-fonipa": true, // combined: got "fr-CA-fonupa"; want "fr-BE-fonipa"
+
+	// The initial number is a threshold. As we don't use scoring, we will not
+	// implement this.
+	"50,und,fr-Cyrl-CA-fonupa/fr-BE-fonipa": true,
+	// match: got "und"; want "fr-Cyrl-CA-fonupa"
+	// combined: got "und"; want "fr-Cyrl-BE-fonipa"
+
+	// Other interesting cases to test:
+	// - Should same language or same script have the preference if there is
+	//   usually no understanding of the other script?
+	// - More specific region in desired may replace enclosing supported.
+}
+
+func makeTagList(s string) (tags []Tag) {
+	for _, s := range strings.Split(s, ",") {
+		tags = append(tags, mk(strings.TrimSpace(s)))
+	}
+	return tags
+}
+
+func TestMatchStrings(t *testing.T) {
+	testCases := []struct {
+		supported string
+		desired   string // strings separated by |
+		tag       string
+		index     int
+	}{{
+		supported: "en",
+		desired:   "",
+		tag:       "en",
+		index:     0,
+	}, {
+		supported: "en",
+		desired:   "nl",
+		tag:       "en",
+		index:     0,
+	}, {
+		supported: "en,nl",
+		desired:   "nl",
+		tag:       "nl",
+		index:     1,
+	}, {
+		supported: "en,nl",
+		desired:   "nl|en",
+		tag:       "nl",
+		index:     1,
+	}, {
+		supported: "en-GB,nl",
+		desired:   "en ; q=0.1,nl",
+		tag:       "nl",
+		index:     1,
+	}, {
+		supported: "en-GB,nl",
+		desired:   "en;q=0.005 | dk; q=0.1,nl ",
+		tag:       "en-GB",
+		index:     0,
+	}, {
+		// do not match faulty tags with und
+		supported: "en,und",
+		desired:   "|en",
+		tag:       "en",
+		index:     0,
+	}}
+	for _, tc := range testCases {
+		t.Run(path.Join(tc.supported, tc.desired), func(t *testing.T) {
+			m := NewMatcher(makeTagList(tc.supported))
+			tag, index := MatchStrings(m, strings.Split(tc.desired, "|")...)
+			if tag.String() != tc.tag || index != tc.index {
+				t.Errorf("got %v, %d; want %v, %d", tag, index, tc.tag, tc.index)
+			}
+		})
+	}
+}
+
+func TestRegionGroups(t *testing.T) {
+	testCases := []struct {
+		a, b     string
+		distance uint8
+	}{
+		{"zh-TW", "zh-HK", 5},
+		{"zh-MO", "zh-HK", 4},
+		{"es-ES", "es-AR", 5},
+		{"es-ES", "es", 4},
+		{"es-419", "es-MX", 4},
+		{"es-AR", "es-MX", 4},
+		{"es-ES", "es-MX", 5},
+		{"es-PT", "es-MX", 5},
+	}
+	for _, tc := range testCases {
+		a := MustParse(tc.a)
+		aScript, _ := a.Script()
+		b := MustParse(tc.b)
+		bScript, _ := b.Script()
+
+		if aScript != bScript {
+			t.Errorf("scripts differ: %q vs %q", aScript, bScript)
+			continue
+		}
+		d, _ := regionGroupDist(a.region(), b.region(), aScript.scriptID, a.lang())
+		if d != tc.distance {
+			t.Errorf("got %q; want %q", d, tc.distance)
+		}
+	}
+}
+
+func TestIsParadigmLocale(t *testing.T) {
+	testCases := map[string]bool{
+		"en-US":  true,
+		"en-GB":  true,
+		"en-VI":  false,
+		"es-GB":  false,
+		"es-ES":  true,
+		"es-419": true,
+	}
+	for str, want := range testCases {
+		tt := Make(str)
+		tag := tt.tag()
+		got := isParadigmLocale(tag.LangID, tag.RegionID)
+		if got != want {
+			t.Errorf("isPL(%q) = %v; want %v", str, got, want)
+		}
+	}
+}
+
+// Implementation of String methods for various types for debugging purposes.
+
+func (m *matcher) String() string {
+	w := &bytes.Buffer{}
+	fmt.Fprintln(w, "Default:", m.default_)
+	for tag, h := range m.index {
+		fmt.Fprintf(w, "  %s: %v\n", tag, h)
+	}
+	return w.String()
+}
+
+func (h *matchHeader) String() string {
+	w := &bytes.Buffer{}
+	fmt.Fprint(w, "haveTag: ")
+	for _, h := range h.haveTags {
+		fmt.Fprintf(w, "%v, ", h)
+	}
+	return w.String()
+}
+
+func (t haveTag) String() string {
+	return fmt.Sprintf("%v:%d:%v:%v-%v|%v", t.tag, t.index, t.conf, t.maxRegion, t.maxScript, t.altScript)
+}
+
+func TestIssue43834(t *testing.T) {
+	matcher := NewMatcher([]Tag{English})
+
+	// ZZ is the largest region code and should not cause overflow.
+	desired, _, err := ParseAcceptLanguage("en-ZZ")
+	if err != nil {
+		t.Error(err)
+	}
+	_, i, _ := matcher.Match(desired...)
+	if i != 0 {
+		t.Errorf("got %v; want 0", i)
+	}
+}
+
+func TestBestMatchAlloc(t *testing.T) {
+	m := NewMatcher(makeTagList("en sr nl"))
+	// Go allocates when creating a list of tags from a single tag!
+	list := []Tag{English}
+	avg := testtext.AllocsPerRun(100, func() {
+		m.Match(list...)
+	})
+	if avg > 0 {
+		t.Errorf("got %f; want 0", avg)
+	}
+}
+
+var benchHave = []Tag{
+	mk("en"),
+	mk("en-GB"),
+	mk("za"),
+	mk("zh-Hant"),
+	mk("zh-Hans-CN"),
+	mk("zh"),
+	mk("zh-HK"),
+	mk("ar-MK"),
+	mk("en-CA"),
+	mk("fr-CA"),
+	mk("fr-US"),
+	mk("fr-CH"),
+	mk("fr"),
+	mk("lt"),
+	mk("lv"),
+	mk("iw"),
+	mk("iw-NL"),
+	mk("he"),
+	mk("he-IT"),
+	mk("tlh"),
+	mk("ja"),
+	mk("ja-Jpan"),
+	mk("ja-Jpan-JP"),
+	mk("de"),
+	mk("de-CH"),
+	mk("de-AT"),
+	mk("de-DE"),
+	mk("sr"),
+	mk("sr-Latn"),
+	mk("sr-Cyrl"),
+	mk("sr-ME"),
+}
+
+var benchWant = [][]Tag{
+	[]Tag{
+		mk("en"),
+	},
+	[]Tag{
+		mk("en-AU"),
+		mk("de-HK"),
+		mk("nl"),
+		mk("fy"),
+		mk("lv"),
+	},
+	[]Tag{
+		mk("en-AU"),
+		mk("de-HK"),
+		mk("nl"),
+		mk("fy"),
+	},
+	[]Tag{
+		mk("ja-Hant"),
+		mk("da-HK"),
+		mk("nl"),
+		mk("zh-TW"),
+	},
+	[]Tag{
+		mk("ja-Hant"),
+		mk("da-HK"),
+		mk("nl"),
+		mk("hr"),
+	},
+}
+
+func BenchmarkMatch(b *testing.B) {
+	m := newMatcher(benchHave, nil)
+	for i := 0; i < b.N; i++ {
+		for _, want := range benchWant {
+			m.getBest(want...)
+		}
+	}
+}
+
+func BenchmarkMatchExact(b *testing.B) {
+	want := mk("en")
+	m := newMatcher(benchHave, nil)
+	for i := 0; i < b.N; i++ {
+		m.getBest(want)
+	}
+}
+
+func BenchmarkMatchAltLanguagePresent(b *testing.B) {
+	want := mk("hr")
+	m := newMatcher(benchHave, nil)
+	for i := 0; i < b.N; i++ {
+		m.getBest(want)
+	}
+}
+
+func BenchmarkMatchAltLanguageNotPresent(b *testing.B) {
+	want := mk("nn")
+	m := newMatcher(benchHave, nil)
+	for i := 0; i < b.N; i++ {
+		m.getBest(want)
+	}
+}
+
+func BenchmarkMatchAltScriptPresent(b *testing.B) {
+	want := mk("zh-Hant-CN")
+	m := newMatcher(benchHave, nil)
+	for i := 0; i < b.N; i++ {
+		m.getBest(want)
+	}
+}
+
+func BenchmarkMatchAltScriptNotPresent(b *testing.B) {
+	want := mk("fr-Cyrl")
+	m := newMatcher(benchHave, nil)
+	for i := 0; i < b.N; i++ {
+		m.getBest(want)
+	}
+}
+
+func BenchmarkMatchLimitedExact(b *testing.B) {
+	want := []Tag{mk("he-NL"), mk("iw-NL")}
+	m := newMatcher(benchHave, nil)
+	for i := 0; i < b.N; i++ {
+		m.getBest(want...)
+	}
+}
@@ -0,0 +1,256 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+import (
+	"errors"
+	"sort"
+	"strconv"
+	"strings"
+
+	"golang.org/x/text/internal/language"
+)
+
+// ValueError is returned by any of the parsing functions when the
+// input is well-formed but the respective subtag is not recognized
+// as a valid value.
+type ValueError interface {
+	error
+
+	// Subtag returns the subtag for which the error occurred.
+	Subtag() string
+}
+
+// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
+// failed it returns an error and any part of the tag that could be parsed.
+// If parsing succeeded but an unknown value was found, it returns
+// ValueError. The Tag returned in this case is just stripped of the unknown
+// value. All other values are preserved. It accepts tags in the BCP 47 format
+// and extensions to this standard defined in
+// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
+// The resulting tag is canonicalized using the default canonicalization type.
+func Parse(s string) (t Tag, err error) {
+	return Default.Parse(s)
+}
+
+// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
+// failed it returns an error and any part of the tag that could be parsed.
+// If parsing succeeded but an unknown value was found, it returns
+// ValueError. The Tag returned in this case is just stripped of the unknown
+// value. All other values are preserved. It accepts tags in the BCP 47 format
+// and extensions to this standard defined in
+// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
+// The resulting tag is canonicalized using the canonicalization type c.
+func (c CanonType) Parse(s string) (t Tag, err error) {
+	defer func() {
+		if recover() != nil {
+			t = Tag{}
+			err = language.ErrSyntax
+		}
+	}()
+
+	tt, err := language.Parse(s)
+	if err != nil {
+		return makeTag(tt), err
+	}
+	tt, changed := canonicalize(c, tt)
+	if changed {
+		tt.RemakeString()
+	}
+	return makeTag(tt), err
+}
+
+// Compose creates a Tag from individual parts, which may be of type Tag, Base,
+// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
+// Base, Script or Region or slice of type Variant or Extension is passed more
+// than once, the latter will overwrite the former. Variants and Extensions are
+// accumulated, but if two extensions of the same type are passed, the latter
+// will replace the former. For -u extensions, though, the key-type pairs are
+// added, where later values overwrite older ones. A Tag overwrites all former
+// values and typically only makes sense as the first argument. The resulting
+// tag is returned after canonicalizing using the Default CanonType. If one or
+// more errors are encountered, one of the errors is returned.
+func Compose(part ...interface{}) (t Tag, err error) {
+	return Default.Compose(part...)
+}
+
+// Compose creates a Tag from individual parts, which may be of type Tag, Base,
+// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
+// Base, Script or Region or slice of type Variant or Extension is passed more
+// than once, the latter will overwrite the former. Variants and Extensions are
+// accumulated, but if two extensions of the same type are passed, the latter
+// will replace the former. For -u extensions, though, the key-type pairs are
+// added, where later values overwrite older ones. A Tag overwrites all former
+// values and typically only makes sense as the first argument. The resulting
+// tag is returned after canonicalizing using CanonType c. If one or more errors
+// are encountered, one of the errors is returned.
+func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
+	defer func() {
+		if recover() != nil {
+			t = Tag{}
+			err = language.ErrSyntax
+		}
+	}()
+
+	var b language.Builder
+	if err = update(&b, part...); err != nil {
+		return und, err
+	}
+	b.Tag, _ = canonicalize(c, b.Tag)
+	return makeTag(b.Make()), err
+}
+
+var errInvalidArgument = errors.New("invalid Extension or Variant")
+
+func update(b *language.Builder, part ...interface{}) (err error) {
+	for _, x := range part {
+		switch v := x.(type) {
+		case Tag:
+			b.SetTag(v.tag())
+		case Base:
+			b.Tag.LangID = v.langID
+		case Script:
+			b.Tag.ScriptID = v.scriptID
+		case Region:
+			b.Tag.RegionID = v.regionID
+		case Variant:
+			if v.variant == "" {
+				err = errInvalidArgument
+				break
+			}
+			b.AddVariant(v.variant)
+		case Extension:
+			if v.s == "" {
+				err = errInvalidArgument
+				break
+			}
+			b.SetExt(v.s)
+		case []Variant:
+			b.ClearVariants()
+			for _, v := range v {
+				b.AddVariant(v.variant)
+			}
+		case []Extension:
+			b.ClearExtensions()
+			for _, e := range v {
+				b.SetExt(e.s)
+			}
+		// TODO: support parsing of raw strings based on morphology or just extensions?
+		case error:
+			if v != nil {
+				err = v
+			}
+		}
+	}
+	return
+}
+
+var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
+var errTagListTooLarge = errors.New("tag list exceeds max length")
+
+// ParseAcceptLanguage parses the contents of an Accept-Language header as
+// defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and
+// a list of corresponding quality weights. It is more permissive than RFC 2616
+// and may return non-nil slices even if the input is not valid.
+// The Tags will be sorted by highest weight first and then by first occurrence.
+// Tags with a weight of zero will be dropped. An error will be returned if the
+// input could not be parsed.
+func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
+	defer func() {
+		if recover() != nil {
+			tag = nil
+			q = nil
+			err = language.ErrSyntax
+		}
+	}()
+
+	if strings.Count(s, "-") > 1000 {
+		return nil, nil, errTagListTooLarge
+	}
+
+	var entry string
+	for s != "" {
+		if entry, s = split(s, ','); entry == "" {
+			continue
+		}
+
+		entry, weight := split(entry, ';')
+
+		// Scan the language.
+		t, err := Parse(entry)
+		if err != nil {
+			id, ok := acceptFallback[entry]
+			if !ok {
+				return nil, nil, err
+			}
+			t = makeTag(language.Tag{LangID: id})
+		}
+
+		// Scan the optional weight.
+		w := 1.0
+		if weight != "" {
+			weight = consume(weight, 'q')
+			weight = consume(weight, '=')
+			// consume returns the empty string when a token could not be
+			// consumed, resulting in an error for ParseFloat.
+			if w, err = strconv.ParseFloat(weight, 32); err != nil {
+				return nil, nil, errInvalidWeight
+			}
+			// Drop tags with a quality weight of 0.
+			if w <= 0 {
+				continue
+			}
+		}
+
+		tag = append(tag, t)
+		q = append(q, float32(w))
+	}
+	sort.Stable(&tagSort{tag, q})
+	return tag, q, nil
+}
+
+// consume removes a leading token c from s and returns the result or the empty
+// string if there is no such token.
+func consume(s string, c byte) string {
+	if s == "" || s[0] != c {
+		return ""
+	}
+	return strings.TrimSpace(s[1:])
+}
+
+func split(s string, c byte) (head, tail string) {
+	if i := strings.IndexByte(s, c); i >= 0 {
+		return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:])
+	}
+	return strings.TrimSpace(s), ""
+}
+
+// Add hack mapping to deal with a small number of cases that occur
+// in Accept-Language (with reasonable frequency).
+var acceptFallback = map[string]language.Language{
+	"english": _en,
+	"deutsch": _de,
+	"italian": _it,
+	"french":  _fr,
+	"*":       _mul, // defined in the spec to match all languages.
+}
+
+type tagSort struct {
+	tag []Tag
+	q   []float32
+}
+
+func (s *tagSort) Len() int {
+	return len(s.q)
+}
+
+func (s *tagSort) Less(i, j int) bool {
+	return s.q[i] > s.q[j]
+}
+
+func (s *tagSort) Swap(i, j int) {
+	s.tag[i], s.tag[j] = s.tag[j], s.tag[i]
+	s.q[i], s.q[j] = s.q[j], s.q[i]
+}
@@ -0,0 +1,409 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+import (
+	"strings"
+	"testing"
+
+	"golang.org/x/text/internal/language"
+)
+
+// equalTags compares language, script and region subtags only.
+func (t Tag) equalTags(a Tag) bool {
+	return t.lang() == a.lang() &&
+		t.script() == a.script() &&
+		t.region() == a.region()
+}
+
+var errSyntax = language.ErrSyntax
+
+type parseTest struct {
+	i                    int // the index of this test
+	in                   string
+	lang, script, region string
+	variants, ext        string
+	extList              []string // only used when more than one extension is present
+	invalid              bool
+	rewrite              bool // special rewrite not handled by parseTag
+	changed              bool // string needed to be reformatted
+}
+
+func parseTests() []parseTest {
+	tests := []parseTest{
+		{in: "root", lang: "und"},
+		{in: "und", lang: "und"},
+		{in: "en", lang: "en"},
+
+		{in: "en-US-u-va-posix", lang: "en", region: "US", ext: "u-va-posix"},
+		{in: "ca-ES-valencia", lang: "ca", region: "ES", variants: "valencia"},
+		{in: "en-US-u-rg-gbzzzz", lang: "en", region: "US", ext: "u-rg-gbzzzz"},
+
+		{in: "xy", lang: "und", invalid: true},
+		{in: "en-ZY", lang: "en", invalid: true},
+		{in: "gsw", lang: "gsw"},
+		{in: "sr_Latn", lang: "sr", script: "Latn"},
+		{in: "af-Arab", lang: "af", script: "Arab"},
+		{in: "nl-BE", lang: "nl", region: "BE"},
+		{in: "es-419", lang: "es", region: "419"},
+		{in: "und-001", lang: "und", region: "001"},
+		{in: "de-latn-be", lang: "de", script: "Latn", region: "BE"},
+		// Variants
+		{in: "de-1901", lang: "de", variants: "1901"},
+		// Accept with unsuppressed script.
+		{in: "de-Latn-1901", lang: "de", script: "Latn", variants: "1901"},
+		// Specialized.
+		{in: "sl-rozaj", lang: "sl", variants: "rozaj"},
+		{in: "sl-rozaj-lipaw", lang: "sl", variants: "rozaj-lipaw"},
+		{in: "sl-rozaj-biske", lang: "sl", variants: "rozaj-biske"},
+		{in: "sl-rozaj-biske-1994", lang: "sl", variants: "rozaj-biske-1994"},
+		{in: "sl-rozaj-1994", lang: "sl", variants: "rozaj-1994"},
+		// Maximum number of variants while adhering to prefix rules.
+		{in: "sl-rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp"},
+
+		// Sorting.
+		{in: "sl-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true},
+		{in: "sl-rozaj-biske-1994-alalc97-fonupa-fonipa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", changed: true},
+		{in: "nl-fonxsamp-alalc97-fonipa-fonupa", lang: "nl", variants: "alalc97-fonipa-fonupa-fonxsamp", changed: true},
+
+		// Duplicates variants are removed, but not an error.
+		{in: "nl-fonupa-fonupa", lang: "nl", variants: "fonupa"},
+
+		// Variants that do not have correct prefixes. We still accept these.
+		{in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"},
+		{in: "sl-rozaj-lipaw-1994", lang: "sl", variants: "rozaj-lipaw-1994"},
+		{in: "sl-1994-biske-rozaj-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true},
+		{in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"},
+
+		// Invalid variant.
+		{in: "de-1902", lang: "de", variants: "", invalid: true},
+
+		{in: "EN_CYRL", lang: "en", script: "Cyrl"},
+		// private use and extensions
+		{in: "x-a-b-c-d", ext: "x-a-b-c-d"},
+		{in: "x_A.-B-C_D", ext: "x-b-c-d", invalid: true, changed: true},
+		{in: "x-aa-bbbb-cccccccc-d", ext: "x-aa-bbbb-cccccccc-d"},
+		{in: "en-c_cc-b-bbb-a-aaa", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc"}},
+		{in: "en-x_cc-b-bbb-a-aaa", lang: "en", ext: "x-cc-b-bbb-a-aaa", changed: true},
+		{in: "en-c_cc-b-bbb-a-aaa-x-x", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc", "x-x"}},
+		{in: "en-v-c", lang: "en", ext: "", invalid: true},
+		{in: "en-v-abcdefghi", lang: "en", ext: "", invalid: true},
+		{in: "en-v-abc-x", lang: "en", ext: "v-abc", invalid: true},
+		{in: "en-v-abc-x-", lang: "en", ext: "v-abc", invalid: true},
+		{in: "en-v-abc-w-x-xx", lang: "en", extList: []string{"v-abc", "x-xx"}, invalid: true, changed: true},
+		{in: "en-v-abc-w-y-yx", lang: "en", extList: []string{"v-abc", "y-yx"}, invalid: true, changed: true},
+		{in: "en-v-c-abc", lang: "en", ext: "c-abc", invalid: true, changed: true},
+		{in: "en-v-w-abc", lang: "en", ext: "w-abc", invalid: true, changed: true},
+		{in: "en-v-x-abc", lang: "en", ext: "x-abc", invalid: true, changed: true},
+		{in: "en-v-x-a", lang: "en", ext: "x-a", invalid: true, changed: true},
+		{in: "en-9-aa-0-aa-z-bb-x-a", lang: "en", extList: []string{"0-aa", "9-aa", "z-bb", "x-a"}, changed: true},
+		{in: "en-u-c", lang: "en", ext: "", invalid: true},
+		{in: "en-u-co-phonebk", lang: "en", ext: "u-co-phonebk"},
+		{in: "en-u-co-phonebk-ca", lang: "en", ext: "u-ca-co-phonebk", invalid: true},
+		{in: "en-u-nu-arabic-co-phonebk-ca", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true},
+		{in: "en-u-nu-arabic-co-phonebk-ca-x", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true},
+		{in: "en-u-nu-arabic-co-phonebk-ca-s", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true},
+		{in: "en-u-nu-arabic-co-phonebk-ca-a12345678", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true},
+		{in: "en-u-co-phonebook", lang: "en", ext: "u-co", invalid: true},
+		{in: "en-u-co-phonebook-cu-xau", lang: "en", ext: "u-co-cu-xau", invalid: true, changed: true},
+		{in: "en-Cyrl-u-co-phonebk", lang: "en", script: "Cyrl", ext: "u-co-phonebk"},
+		{in: "en-US-u-co-phonebk", lang: "en", region: "US", ext: "u-co-phonebk"},
+		{in: "en-US-u-co-phonebk-cu-xau", lang: "en", region: "US", ext: "u-co-phonebk-cu-xau"},
+		{in: "en-scotland-u-co-phonebk", lang: "en", variants: "scotland", ext: "u-co-phonebk"},
+		{in: "en-u-cu-xua-co-phonebk", lang: "en", ext: "u-co-phonebk-cu-xua", changed: true},
+		{in: "en-u-def-abc-cu-xua-co-phonebk", lang: "en", ext: "u-abc-def-co-phonebk-cu-xua", changed: true},
+		{in: "en-u-def-abc", lang: "en", ext: "u-abc-def", changed: true},
+		{in: "en-u-cu-xua-co-phonebk-a-cd", lang: "en", extList: []string{"a-cd", "u-co-phonebk-cu-xua"}, changed: true},
+		// Invalid "u" extension. Drop invalid parts.
+		{in: "en-u-cu-co-phonebk", lang: "en", extList: []string{"u-co-phonebk-cu"}, invalid: true, changed: true},
+		{in: "en-u-cu-xau-co", lang: "en", extList: []string{"u-co-cu-xau"}, invalid: true},
+		// We allow duplicate keys as the LDML spec does not explicitly prohibit it.
+		// TODO: Consider eliminating duplicates and returning an error.
+		{in: "en-u-cu-xau-co-phonebk-cu-xau", lang: "en", ext: "u-co-phonebk-cu-xau", changed: true},
+		{in: "en-t-en-Cyrl-NL-fonipa", lang: "en", ext: "t-en-cyrl-nl-fonipa", changed: true},
+		{in: "en-t-en-Cyrl-NL-fonipa-t0-abc-def", lang: "en", ext: "t-en-cyrl-nl-fonipa-t0-abc-def", changed: true},
+		{in: "en-t-t0-abcd", lang: "en", ext: "t-t0-abcd"},
+		// Not necessary to have changed here.
+		{in: "en-t-nl-abcd", lang: "en", ext: "t-nl", invalid: true},
+		{in: "en-t-nl-latn", lang: "en", ext: "t-nl-latn"},
+		{in: "en-t-t0-abcd-x-a", lang: "en", extList: []string{"t-t0-abcd", "x-a"}},
+		{in: "en_t_pt_MLt", lang: "en", ext: "t-pt-mlt", changed: true},
+		{in: "en-t-fr-est", lang: "en", ext: "t-fr-est", changed: false},
+		{in: "fr-est", lang: "et", changed: true},
+		{in: "fr-est-t-fr-est", lang: "et", ext: "t-fr-est", changed: true},
+		{in: "fr-est-Cyrl", lang: "et", script: "Cyrl", changed: true},
+		// invalid
+		{in: "", lang: "und", invalid: true},
+		{in: "-", lang: "und", invalid: true},
+		{in: "x", lang: "und", invalid: true},
+		{in: "x-", lang: "und", invalid: true},
+		{in: "x--", lang: "und", invalid: true},
+		{in: "a-a-b-c-d", lang: "und", invalid: true},
+		{in: "en-", lang: "en", invalid: true},
+		{in: "enne-", lang: "und", invalid: true},
+		{in: "en.", lang: "und", invalid: true},
+		{in: "en.-latn", lang: "und", invalid: true},
+		{in: "en.-en", lang: "en", invalid: true},
+		{in: "x-a-tooManyChars-c-d", ext: "x-a-c-d", invalid: true, changed: true},
+		{in: "a-tooManyChars-c-d", lang: "und", invalid: true},
+		// TODO: check key-value validity
+		// { in: "en-u-cu-xd", lang: "en", ext: "u-cu-xd", invalid: true },
+		{in: "en-t-abcd", lang: "en", invalid: true},
+		{in: "en-Latn-US-en", lang: "en", script: "Latn", region: "US", invalid: true},
+		// rewrites (more tests in TestGrandfathered)
+		{in: "zh-min-nan", lang: "nan"},
+		{in: "zh-yue", lang: "yue"},
+		{in: "zh-xiang", lang: "hsn", rewrite: true},
+		{in: "zh-guoyu", lang: "cmn", rewrite: true},
+		{in: "iw", lang: "iw"},
+		{in: "sgn-BE-FR", lang: "sfb", rewrite: true},
+		{in: "i-klingon", lang: "tlh", rewrite: true},
+	}
+	for i, tt := range tests {
+		tests[i].i = i
+		if tt.extList != nil {
+			tests[i].ext = strings.Join(tt.extList, "-")
+		}
+		if tt.ext != "" && tt.extList == nil {
+			tests[i].extList = []string{tt.ext}
+		}
+	}
+	return tests
+}
+
+// partChecks runs checks for each part by calling the function returned by f.
+func partChecks(t *testing.T, f func(*parseTest) (Tag, bool)) {
+	for i, tt := range parseTests() {
+		tag, skip := f(&tt)
+		if skip {
+			continue
+		}
+		if l, _ := language.ParseBase(tt.lang); l != tag.lang() {
+			t.Errorf("%d: lang was %q; want %q", i, tag.lang(), l)
+		}
+		if sc, _ := language.ParseScript(tt.script); sc != tag.script() {
+			t.Errorf("%d: script was %q; want %q", i, tag.script(), sc)
+		}
+		if r, _ := language.ParseRegion(tt.region); r != tag.region() {
+			t.Errorf("%d: region was %q; want %q", i, tag.region(), r)
+		}
+		v := tag.tag().Variants()
+		if v != "" {
+			v = v[1:]
+		}
+		if v != tt.variants {
+			t.Errorf("%d: variants was %q; want %q", i, v, tt.variants)
+		}
+		if e := strings.Join(tag.tag().Extensions(), "-"); e != tt.ext {
+			t.Errorf("%d: extensions were %q; want %q", i, e, tt.ext)
+		}
+	}
+}
+
+func TestParse(t *testing.T) {
+	partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
+		id, _ = Raw.Parse(tt.in)
+		return id, false
+	})
+}
+
+func TestErrors(t *testing.T) {
+	mkInvalid := func(s string) error {
+		return language.NewValueError([]byte(s))
+	}
+	tests := []struct {
+		in  string
+		out error
+	}{
+		// invalid subtags.
+		{"ac", mkInvalid("ac")},
+		{"AC", mkInvalid("ac")},
+		{"aa-Uuuu", mkInvalid("Uuuu")},
+		{"aa-AB", mkInvalid("AB")},
+		// ill-formed wins over invalid.
+		{"ac-u", errSyntax},
+		{"ac-u-ca", mkInvalid("ac")},
+		{"ac-u-ca-co-pinyin", mkInvalid("ac")},
+		{"noob", errSyntax},
+	}
+	for _, tt := range tests {
+		_, err := Parse(tt.in)
+		if err != tt.out {
+			t.Errorf("%s: was %q; want %q", tt.in, err, tt.out)
+		}
+	}
+}
+
+func TestCompose1(t *testing.T) {
+	partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
+		l, _ := ParseBase(tt.lang)
+		s, _ := ParseScript(tt.script)
+		r, _ := ParseRegion(tt.region)
+		v := []Variant{}
+		for _, x := range strings.Split(tt.variants, "-") {
+			p, _ := ParseVariant(x)
+			v = append(v, p)
+		}
+		e := []Extension{}
+		for _, x := range tt.extList {
+			p, _ := ParseExtension(x)
+			e = append(e, p)
+		}
+		id, _ = Raw.Compose(l, s, r, v, e)
+		return id, false
+	})
+}
+
+func TestCompose2(t *testing.T) {
+	partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
+		l, _ := ParseBase(tt.lang)
+		s, _ := ParseScript(tt.script)
+		r, _ := ParseRegion(tt.region)
+		p := []interface{}{l, s, r, s, r, l}
+		for _, x := range strings.Split(tt.variants, "-") {
+			if x != "" {
+				v, _ := ParseVariant(x)
+				p = append(p, v)
+			}
+		}
+		for _, x := range tt.extList {
+			e, _ := ParseExtension(x)
+			p = append(p, e)
+		}
+		id, _ = Raw.Compose(p...)
+		return id, false
+	})
+}
+
+func TestCompose3(t *testing.T) {
+	partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
+		id, _ = Raw.Parse(tt.in)
+		id, _ = Raw.Compose(id)
+		return id, false
+	})
+}
+
+func mk(s string) Tag {
+	return Raw.Make(s)
+}
+
+func TestParseAcceptLanguage(t *testing.T) {
+	type res struct {
+		t Tag
+		q float32
+	}
+	en := []res{{mk("en"), 1.0}}
+	tests := []struct {
+		out []res
+		in  string
+		ok  bool
+	}{
+		{en, "en", true},
+		{en, "   en", true},
+		{en, "en   ", true},
+		{en, "  en  ", true},
+		{en, "en,", true},
+		{en, ",en", true},
+		{en, ",,,en,,,", true},
+		{en, ",en;q=1", true},
+
+		// We allow an empty input, contrary to spec.
+		{nil, "", true},
+		{[]res{{mk("aa"), 1}}, "aa;", true}, // allow unspecified weight
+
+		// errors
+		{nil, ";", false},
+		{nil, "$", false},
+		{nil, "e;", false},
+		{nil, "x;", false},
+		{nil, "x", false},
+		{nil, "ac", false}, // non-existing language
+		{nil, "aa;q", false},
+		{nil, "aa;q=", false},
+		{nil, "aa;q=.", false},
+		{nil, "00-t-0o", false},
+
+		// odd fallbacks
+		{
+			[]res{{mk("en"), 0.1}},
+			" english ;q=.1",
+			true,
+		},
+		{
+			[]res{{mk("it"), 1.0}, {mk("de"), 1.0}, {mk("fr"), 1.0}},
+			" italian, deutsch, french",
+			true,
+		},
+
+		// lists
+		{
+			[]res{{mk("en"), 0.1}},
+			"en;q=.1",
+			true,
+		},
+		{
+			[]res{{mk("mul"), 1.0}},
+			"*",
+			true,
+		},
+		{
+			[]res{{mk("en"), 1.0}, {mk("de"), 1.0}},
+			"en,de",
+			true,
+		},
+		{
+			[]res{{mk("en"), 1.0}, {mk("de"), .5}},
+			"en,de;q=0.5",
+			true,
+		},
+		{
+			[]res{{mk("de"), 0.8}, {mk("en"), 0.5}},
+			"  en ;   q    =   0.5    ,  , de;q=0.8",
+			true,
+		},
+		{
+			[]res{{mk("en"), 1.0}, {mk("de"), 1.0}, {mk("fr"), 1.0}, {mk("tlh"), 1.0}},
+			"en,de,fr,i-klingon",
+			true,
+		},
+		// sorting
+		{
+			[]res{{mk("tlh"), 0.4}, {mk("de"), 0.2}, {mk("fr"), 0.2}, {mk("en"), 0.1}},
+			"en;q=0.1,de;q=0.2,fr;q=0.2,i-klingon;q=0.4",
+			true,
+		},
+		// dropping
+		{
+			[]res{{mk("fr"), 0.2}, {mk("en"), 0.1}},
+			"en;q=0.1,de;q=0,fr;q=0.2,i-klingon;q=0.0",
+			true,
+		},
+	}
+	for i, tt := range tests {
+		tags, qs, e := ParseAcceptLanguage(tt.in)
+		if e == nil != tt.ok {
+			t.Errorf("%d:%s:err: was %v; want %v", i, tt.in, e == nil, tt.ok)
+		}
+		for j, tag := range tags {
+			if out := tt.out[j]; !tag.equalTags(out.t) || qs[j] != out.q {
+				t.Errorf("%d:%s: was %s, %1f; want %s, %1f", i, tt.in, tag, qs[j], out.t, out.q)
+				break
+			}
+		}
+	}
+}
+
+func TestParseAcceptLanguageTooBig(t *testing.T) {
+	s := strings.Repeat("en-x-a-", 333)
+	_, _, err := ParseAcceptLanguage(s)
+	if err != language.ErrSyntax {
+		t.Errorf("ParseAcceptLanguage() unexpected error: got %v, want %v", err, language.ErrSyntax)
+	}
+	s += "en-x-a"
+	_, _, err = ParseAcceptLanguage(s)
+	if err != errTagListTooLarge {
+		t.Errorf("ParseAcceptLanguage() unexpected error: got %v, want %v", err, errTagListTooLarge)
+	}
+}
@@ -0,0 +1,298 @@
+// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
+
+package language
+
+// CLDRVersion is the CLDR version from which the tables in this package are derived.
+const CLDRVersion = "32"
+
+const (
+	_de  = 269
+	_en  = 313
+	_fr  = 350
+	_it  = 505
+	_mo  = 784
+	_no  = 879
+	_nb  = 839
+	_pt  = 960
+	_sh  = 1031
+	_mul = 806
+	_und = 0
+)
+const (
+	_001 = 1
+	_419 = 31
+	_BR  = 65
+	_CA  = 73
+	_ES  = 111
+	_GB  = 124
+	_MD  = 189
+	_PT  = 239
+	_UK  = 307
+	_US  = 310
+	_ZZ  = 358
+	_XA  = 324
+	_XC  = 326
+	_XK  = 334
+)
+const (
+	_Latn = 91
+	_Hani = 57
+	_Hans = 59
+	_Hant = 60
+	_Qaaa = 149
+	_Qaai = 157
+	_Qabx = 198
+	_Zinh = 255
+	_Zyyy = 260
+	_Zzzz = 261
+)
+
+var regionToGroups = []uint8{ // 359 elements
+	// Entry 0 - 3F
+	0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x00, 0x04,
+	0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x00,
+	0x00, 0x04, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00,
+	0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x04,
+	// Entry 40 - 7F
+	0x04, 0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x04, 0x00, 0x00, 0x04, 0x00, 0x00, 0x04,
+	0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x00,
+	0x08, 0x00, 0x04, 0x00, 0x00, 0x08, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04,
+	// Entry 80 - BF
+	0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x04, 0x00,
+	0x00, 0x00, 0x04, 0x01, 0x00, 0x04, 0x02, 0x00,
+	0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00,
+	0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x08, 0x08, 0x00, 0x00, 0x00, 0x04,
+	// Entry C0 - FF
+	0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+	0x01, 0x04, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00,
+	0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x04, 0x00, 0x05, 0x00, 0x00,
+	0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	// Entry 100 - 13F
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04,
+	0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x00, 0x00,
+	0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x05, 0x04,
+	0x00, 0x00, 0x04, 0x00, 0x04, 0x04, 0x05, 0x00,
+	// Entry 140 - 17F
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+} // Size: 383 bytes
+
+var paradigmLocales = [][3]uint16{ // 3 elements
+	0: [3]uint16{0x139, 0x0, 0x7c},
+	1: [3]uint16{0x13e, 0x0, 0x1f},
+	2: [3]uint16{0x3c0, 0x41, 0xef},
+} // Size: 42 bytes
+
+type mutualIntelligibility struct {
+	want     uint16
+	have     uint16
+	distance uint8
+	oneway   bool
+}
+type scriptIntelligibility struct {
+	wantLang   uint16
+	haveLang   uint16
+	wantScript uint8
+	haveScript uint8
+	distance   uint8
+}
+type regionIntelligibility struct {
+	lang     uint16
+	script   uint8
+	group    uint8
+	distance uint8
+}
+
+// matchLang holds pairs of langIDs of base languages that are typically
+// mutually intelligible. Each pair is associated with a confidence and
+// whether the intelligibility goes one or both ways.
+var matchLang = []mutualIntelligibility{ // 113 elements
+	0:   {want: 0x1d1, have: 0xb7, distance: 0x4, oneway: false},
+	1:   {want: 0x407, have: 0xb7, distance: 0x4, oneway: false},
+	2:   {want: 0x407, have: 0x1d1, distance: 0x4, oneway: false},
+	3:   {want: 0x407, have: 0x432, distance: 0x4, oneway: false},
+	4:   {want: 0x43a, have: 0x1, distance: 0x4, oneway: false},
+	5:   {want: 0x1a3, have: 0x10d, distance: 0x4, oneway: true},
+	6:   {want: 0x295, have: 0x10d, distance: 0x4, oneway: true},
+	7:   {want: 0x101, have: 0x36f, distance: 0x8, oneway: false},
+	8:   {want: 0x101, have: 0x347, distance: 0x8, oneway: false},
+	9:   {want: 0x5, have: 0x3e2, distance: 0xa, oneway: true},
+	10:  {want: 0xd, have: 0x139, distance: 0xa, oneway: true},
+	11:  {want: 0x16, have: 0x367, distance: 0xa, oneway: true},
+	12:  {want: 0x21, have: 0x139, distance: 0xa, oneway: true},
+	13:  {want: 0x56, have: 0x13e, distance: 0xa, oneway: true},
+	14:  {want: 0x58, have: 0x3e2, distance: 0xa, oneway: true},
+	15:  {want: 0x71, have: 0x3e2, distance: 0xa, oneway: true},
+	16:  {want: 0x75, have: 0x139, distance: 0xa, oneway: true},
+	17:  {want: 0x82, have: 0x1be, distance: 0xa, oneway: true},
+	18:  {want: 0xa5, have: 0x139, distance: 0xa, oneway: true},
+	19:  {want: 0xb2, have: 0x15e, distance: 0xa, oneway: true},
+	20:  {want: 0xdd, have: 0x153, distance: 0xa, oneway: true},
+	21:  {want: 0xe5, have: 0x139, distance: 0xa, oneway: true},
+	22:  {want: 0xe9, have: 0x3a, distance: 0xa, oneway: true},
+	23:  {want: 0xf0, have: 0x15e, distance: 0xa, oneway: true},
+	24:  {want: 0xf9, have: 0x15e, distance: 0xa, oneway: true},
+	25:  {want: 0x100, have: 0x139, distance: 0xa, oneway: true},
+	26:  {want: 0x130, have: 0x139, distance: 0xa, oneway: true},
+	27:  {want: 0x13c, have: 0x139, distance: 0xa, oneway: true},
+	28:  {want: 0x140, have: 0x151, distance: 0xa, oneway: true},
+	29:  {want: 0x145, have: 0x13e, distance: 0xa, oneway: true},
+	30:  {want: 0x158, have: 0x101, distance: 0xa, oneway: true},
+	31:  {want: 0x16d, have: 0x367, distance: 0xa, oneway: true},
+	32:  {want: 0x16e, have: 0x139, distance: 0xa, oneway: true},
+	33:  {want: 0x16f, have: 0x139, distance: 0xa, oneway: true},
+	34:  {want: 0x17e, have: 0x139, distance: 0xa, oneway: true},
+	35:  {want: 0x190, have: 0x13e, distance: 0xa, oneway: true},
+	36:  {want: 0x194, have: 0x13e, distance: 0xa, oneway: true},
+	37:  {want: 0x1a4, have: 0x1be, distance: 0xa, oneway: true},
+	38:  {want: 0x1b4, have: 0x139, distance: 0xa, oneway: true},
+	39:  {want: 0x1b8, have: 0x139, distance: 0xa, oneway: true},
+	40:  {want: 0x1d4, have: 0x15e, distance: 0xa, oneway: true},
+	41:  {want: 0x1d7, have: 0x3e2, distance: 0xa, oneway: true},
+	42:  {want: 0x1d9, have: 0x139, distance: 0xa, oneway: true},
+	43:  {want: 0x1e7, have: 0x139, distance: 0xa, oneway: true},
+	44:  {want: 0x1f8, have: 0x139, distance: 0xa, oneway: true},
+	45:  {want: 0x20e, have: 0x1e1, distance: 0xa, oneway: true},
+	46:  {want: 0x210, have: 0x139, distance: 0xa, oneway: true},
+	47:  {want: 0x22d, have: 0x15e, distance: 0xa, oneway: true},
+	48:  {want: 0x242, have: 0x3e2, distance: 0xa, oneway: true},
+	49:  {want: 0x24a, have: 0x139, distance: 0xa, oneway: true},
+	50:  {want: 0x251, have: 0x139, distance: 0xa, oneway: true},
+	51:  {want: 0x265, have: 0x139, distance: 0xa, oneway: true},
+	52:  {want: 0x274, have: 0x48a, distance: 0xa, oneway: true},
+	53:  {want: 0x28a, have: 0x3e2, distance: 0xa, oneway: true},
+	54:  {want: 0x28e, have: 0x1f9, distance: 0xa, oneway: true},
+	55:  {want: 0x2a3, have: 0x139, distance: 0xa, oneway: true},
+	56:  {want: 0x2b5, have: 0x15e, distance: 0xa, oneway: true},
+	57:  {want: 0x2b8, have: 0x139, distance: 0xa, oneway: true},
+	58:  {want: 0x2be, have: 0x139, distance: 0xa, oneway: true},
+	59:  {want: 0x2c3, have: 0x15e, distance: 0xa, oneway: true},
+	60:  {want: 0x2ed, have: 0x139, distance: 0xa, oneway: true},
+	61:  {want: 0x2f1, have: 0x15e, distance: 0xa, oneway: true},
+	62:  {want: 0x2fa, have: 0x139, distance: 0xa, oneway: true},
+	63:  {want: 0x2ff, have: 0x7e, distance: 0xa, oneway: true},
+	64:  {want: 0x304, have: 0x139, distance: 0xa, oneway: true},
+	65:  {want: 0x30b, have: 0x3e2, distance: 0xa, oneway: true},
+	66:  {want: 0x31b, have: 0x1be, distance: 0xa, oneway: true},
+	67:  {want: 0x31f, have: 0x1e1, distance: 0xa, oneway: true},
+	68:  {want: 0x320, have: 0x139, distance: 0xa, oneway: true},
+	69:  {want: 0x331, have: 0x139, distance: 0xa, oneway: true},
+	70:  {want: 0x351, have: 0x139, distance: 0xa, oneway: true},
+	71:  {want: 0x36a, have: 0x347, distance: 0xa, oneway: false},
+	72:  {want: 0x36a, have: 0x36f, distance: 0xa, oneway: true},
+	73:  {want: 0x37a, have: 0x139, distance: 0xa, oneway: true},
+	74:  {want: 0x387, have: 0x139, distance: 0xa, oneway: true},
+	75:  {want: 0x389, have: 0x139, distance: 0xa, oneway: true},
+	76:  {want: 0x38b, have: 0x15e, distance: 0xa, oneway: true},
+	77:  {want: 0x390, have: 0x139, distance: 0xa, oneway: true},
+	78:  {want: 0x395, have: 0x139, distance: 0xa, oneway: true},
+	79:  {want: 0x39d, have: 0x139, distance: 0xa, oneway: true},
+	80:  {want: 0x3a5, have: 0x139, distance: 0xa, oneway: true},
+	81:  {want: 0x3be, have: 0x139, distance: 0xa, oneway: true},
+	82:  {want: 0x3c4, have: 0x13e, distance: 0xa, oneway: true},
+	83:  {want: 0x3d4, have: 0x10d, distance: 0xa, oneway: true},
+	84:  {want: 0x3d9, have: 0x139, distance: 0xa, oneway: true},
+	85:  {want: 0x3e5, have: 0x15e, distance: 0xa, oneway: true},
+	86:  {want: 0x3e9, have: 0x1be, distance: 0xa, oneway: true},
+	87:  {want: 0x3fa, have: 0x139, distance: 0xa, oneway: true},
+	88:  {want: 0x40c, have: 0x139, distance: 0xa, oneway: true},
+	89:  {want: 0x423, have: 0x139, distance: 0xa, oneway: true},
+	90:  {want: 0x429, have: 0x139, distance: 0xa, oneway: true},
+	91:  {want: 0x431, have: 0x139, distance: 0xa, oneway: true},
+	92:  {want: 0x43b, have: 0x139, distance: 0xa, oneway: true},
+	93:  {want: 0x43e, have: 0x1e1, distance: 0xa, oneway: true},
+	94:  {want: 0x445, have: 0x139, distance: 0xa, oneway: true},
+	95:  {want: 0x450, have: 0x139, distance: 0xa, oneway: true},
+	96:  {want: 0x461, have: 0x139, distance: 0xa, oneway: true},
+	97:  {want: 0x467, have: 0x3e2, distance: 0xa, oneway: true},
+	98:  {want: 0x46f, have: 0x139, distance: 0xa, oneway: true},
+	99:  {want: 0x476, have: 0x3e2, distance: 0xa, oneway: true},
+	100: {want: 0x3883, have: 0x139, distance: 0xa, oneway: true},
+	101: {want: 0x480, have: 0x139, distance: 0xa, oneway: true},
+	102: {want: 0x482, have: 0x139, distance: 0xa, oneway: true},
+	103: {want: 0x494, have: 0x3e2, distance: 0xa, oneway: true},
+	104: {want: 0x49d, have: 0x139, distance: 0xa, oneway: true},
+	105: {want: 0x4ac, have: 0x529, distance: 0xa, oneway: true},
+	106: {want: 0x4b4, have: 0x139, distance: 0xa, oneway: true},
+	107: {want: 0x4bc, have: 0x3e2, distance: 0xa, oneway: true},
+	108: {want: 0x4e5, have: 0x15e, distance: 0xa, oneway: true},
+	109: {want: 0x4f2, have: 0x139, distance: 0xa, oneway: true},
+	110: {want: 0x512, have: 0x139, distance: 0xa, oneway: true},
+	111: {want: 0x518, have: 0x139, distance: 0xa, oneway: true},
+	112: {want: 0x52f, have: 0x139, distance: 0xa, oneway: true},
+} // Size: 702 bytes
+
+// matchScript holds pairs of scriptIDs where readers of one script
+// can typically also read the other. Each is associated with a confidence.
+var matchScript = []scriptIntelligibility{ // 26 elements
+	0:  {wantLang: 0x432, haveLang: 0x432, wantScript: 0x5b, haveScript: 0x20, distance: 0x5},
+	1:  {wantLang: 0x432, haveLang: 0x432, wantScript: 0x20, haveScript: 0x5b, distance: 0x5},
+	2:  {wantLang: 0x58, haveLang: 0x3e2, wantScript: 0x5b, haveScript: 0x20, distance: 0xa},
+	3:  {wantLang: 0xa5, haveLang: 0x139, wantScript: 0xe, haveScript: 0x5b, distance: 0xa},
+	4:  {wantLang: 0x1d7, haveLang: 0x3e2, wantScript: 0x8, haveScript: 0x20, distance: 0xa},
+	5:  {wantLang: 0x210, haveLang: 0x139, wantScript: 0x2e, haveScript: 0x5b, distance: 0xa},
+	6:  {wantLang: 0x24a, haveLang: 0x139, wantScript: 0x4f, haveScript: 0x5b, distance: 0xa},
+	7:  {wantLang: 0x251, haveLang: 0x139, wantScript: 0x53, haveScript: 0x5b, distance: 0xa},
+	8:  {wantLang: 0x2b8, haveLang: 0x139, wantScript: 0x58, haveScript: 0x5b, distance: 0xa},
+	9:  {wantLang: 0x304, haveLang: 0x139, wantScript: 0x6f, haveScript: 0x5b, distance: 0xa},
+	10: {wantLang: 0x331, haveLang: 0x139, wantScript: 0x76, haveScript: 0x5b, distance: 0xa},
+	11: {wantLang: 0x351, haveLang: 0x139, wantScript: 0x22, haveScript: 0x5b, distance: 0xa},
+	12: {wantLang: 0x395, haveLang: 0x139, wantScript: 0x83, haveScript: 0x5b, distance: 0xa},
+	13: {wantLang: 0x39d, haveLang: 0x139, wantScript: 0x36, haveScript: 0x5b, distance: 0xa},
+	14: {wantLang: 0x3be, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5b, distance: 0xa},
+	15: {wantLang: 0x3fa, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5b, distance: 0xa},
+	16: {wantLang: 0x40c, haveLang: 0x139, wantScript: 0xd6, haveScript: 0x5b, distance: 0xa},
+	17: {wantLang: 0x450, haveLang: 0x139, wantScript: 0xe6, haveScript: 0x5b, distance: 0xa},
+	18: {wantLang: 0x461, haveLang: 0x139, wantScript: 0xe9, haveScript: 0x5b, distance: 0xa},
+	19: {wantLang: 0x46f, haveLang: 0x139, wantScript: 0x2c, haveScript: 0x5b, distance: 0xa},
+	20: {wantLang: 0x476, haveLang: 0x3e2, wantScript: 0x5b, haveScript: 0x20, distance: 0xa},
+	21: {wantLang: 0x4b4, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5b, distance: 0xa},
+	22: {wantLang: 0x4bc, haveLang: 0x3e2, wantScript: 0x5b, haveScript: 0x20, distance: 0xa},
+	23: {wantLang: 0x512, haveLang: 0x139, wantScript: 0x3e, haveScript: 0x5b, distance: 0xa},
+	24: {wantLang: 0x529, haveLang: 0x529, wantScript: 0x3b, haveScript: 0x3c, distance: 0xf},
+	25: {wantLang: 0x529, haveLang: 0x529, wantScript: 0x3c, haveScript: 0x3b, distance: 0x13},
+} // Size: 232 bytes
+
+var matchRegion = []regionIntelligibility{ // 15 elements
+	0:  {lang: 0x3a, script: 0x0, group: 0x4, distance: 0x4},
+	1:  {lang: 0x3a, script: 0x0, group: 0x84, distance: 0x4},
+	2:  {lang: 0x139, script: 0x0, group: 0x1, distance: 0x4},
+	3:  {lang: 0x139, script: 0x0, group: 0x81, distance: 0x4},
+	4:  {lang: 0x13e, script: 0x0, group: 0x3, distance: 0x4},
+	5:  {lang: 0x13e, script: 0x0, group: 0x83, distance: 0x4},
+	6:  {lang: 0x3c0, script: 0x0, group: 0x3, distance: 0x4},
+	7:  {lang: 0x3c0, script: 0x0, group: 0x83, distance: 0x4},
+	8:  {lang: 0x529, script: 0x3c, group: 0x2, distance: 0x4},
+	9:  {lang: 0x529, script: 0x3c, group: 0x82, distance: 0x4},
+	10: {lang: 0x3a, script: 0x0, group: 0x80, distance: 0x5},
+	11: {lang: 0x139, script: 0x0, group: 0x80, distance: 0x5},
+	12: {lang: 0x13e, script: 0x0, group: 0x80, distance: 0x5},
+	13: {lang: 0x3c0, script: 0x0, group: 0x80, distance: 0x5},
+	14: {lang: 0x529, script: 0x3c, group: 0x80, distance: 0x5},
+} // Size: 114 bytes
+
+// Total table size 1473 bytes (1KiB); checksum: 7BB90B5C
@@ -0,0 +1,145 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+import "golang.org/x/text/internal/language/compact"
+
+// TODO: Various sets of commonly use tags and regions.
+
+// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
+// It simplifies safe initialization of Tag values.
+func MustParse(s string) Tag {
+	t, err := Parse(s)
+	if err != nil {
+		panic(err)
+	}
+	return t
+}
+
+// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
+// It simplifies safe initialization of Tag values.
+func (c CanonType) MustParse(s string) Tag {
+	t, err := c.Parse(s)
+	if err != nil {
+		panic(err)
+	}
+	return t
+}
+
+// MustParseBase is like ParseBase, but panics if the given base cannot be parsed.
+// It simplifies safe initialization of Base values.
+func MustParseBase(s string) Base {
+	b, err := ParseBase(s)
+	if err != nil {
+		panic(err)
+	}
+	return b
+}
+
+// MustParseScript is like ParseScript, but panics if the given script cannot be
+// parsed. It simplifies safe initialization of Script values.
+func MustParseScript(s string) Script {
+	scr, err := ParseScript(s)
+	if err != nil {
+		panic(err)
+	}
+	return scr
+}
+
+// MustParseRegion is like ParseRegion, but panics if the given region cannot be
+// parsed. It simplifies safe initialization of Region values.
+func MustParseRegion(s string) Region {
+	r, err := ParseRegion(s)
+	if err != nil {
+		panic(err)
+	}
+	return r
+}
+
+var (
+	und = Tag{}
+
+	Und Tag = Tag{}
+
+	Afrikaans            Tag = Tag(compact.Afrikaans)
+	Amharic              Tag = Tag(compact.Amharic)
+	Arabic               Tag = Tag(compact.Arabic)
+	ModernStandardArabic Tag = Tag(compact.ModernStandardArabic)
+	Azerbaijani          Tag = Tag(compact.Azerbaijani)
+	Bulgarian            Tag = Tag(compact.Bulgarian)
+	Bengali              Tag = Tag(compact.Bengali)
+	Catalan              Tag = Tag(compact.Catalan)
+	Czech                Tag = Tag(compact.Czech)
+	Danish               Tag = Tag(compact.Danish)
+	German               Tag = Tag(compact.German)
+	Greek                Tag = Tag(compact.Greek)
+	English              Tag = Tag(compact.English)
+	AmericanEnglish      Tag = Tag(compact.AmericanEnglish)
+	BritishEnglish       Tag = Tag(compact.BritishEnglish)
+	Spanish              Tag = Tag(compact.Spanish)
+	EuropeanSpanish      Tag = Tag(compact.EuropeanSpanish)
+	LatinAmericanSpanish Tag = Tag(compact.LatinAmericanSpanish)
+	Estonian             Tag = Tag(compact.Estonian)
+	Persian              Tag = Tag(compact.Persian)
+	Finnish              Tag = Tag(compact.Finnish)
+	Filipino             Tag = Tag(compact.Filipino)
+	French               Tag = Tag(compact.French)
+	CanadianFrench       Tag = Tag(compact.CanadianFrench)
+	Gujarati             Tag = Tag(compact.Gujarati)
+	Hebrew               Tag = Tag(compact.Hebrew)
+	Hindi                Tag = Tag(compact.Hindi)
+	Croatian             Tag = Tag(compact.Croatian)
+	Hungarian            Tag = Tag(compact.Hungarian)
+	Armenian             Tag = Tag(compact.Armenian)
+	Indonesian           Tag = Tag(compact.Indonesian)
+	Icelandic            Tag = Tag(compact.Icelandic)
+	Italian              Tag = Tag(compact.Italian)
+	Japanese             Tag = Tag(compact.Japanese)
+	Georgian             Tag = Tag(compact.Georgian)
+	Kazakh               Tag = Tag(compact.Kazakh)
+	Khmer                Tag = Tag(compact.Khmer)
+	Kannada              Tag = Tag(compact.Kannada)
+	Korean               Tag = Tag(compact.Korean)
+	Kirghiz              Tag = Tag(compact.Kirghiz)
+	Lao                  Tag = Tag(compact.Lao)
+	Lithuanian           Tag = Tag(compact.Lithuanian)
+	Latvian              Tag = Tag(compact.Latvian)
+	Macedonian           Tag = Tag(compact.Macedonian)
+	Malayalam            Tag = Tag(compact.Malayalam)
+	Mongolian            Tag = Tag(compact.Mongolian)
+	Marathi              Tag = Tag(compact.Marathi)
+	Malay                Tag = Tag(compact.Malay)
+	Burmese              Tag = Tag(compact.Burmese)
+	Nepali               Tag = Tag(compact.Nepali)
+	Dutch                Tag = Tag(compact.Dutch)
+	Norwegian            Tag = Tag(compact.Norwegian)
+	Punjabi              Tag = Tag(compact.Punjabi)
+	Polish               Tag = Tag(compact.Polish)
+	Portuguese           Tag = Tag(compact.Portuguese)
+	BrazilianPortuguese  Tag = Tag(compact.BrazilianPortuguese)
+	EuropeanPortuguese   Tag = Tag(compact.EuropeanPortuguese)
+	Romanian             Tag = Tag(compact.Romanian)
+	Russian              Tag = Tag(compact.Russian)
+	Sinhala              Tag = Tag(compact.Sinhala)
+	Slovak               Tag = Tag(compact.Slovak)
+	Slovenian            Tag = Tag(compact.Slovenian)
+	Albanian             Tag = Tag(compact.Albanian)
+	Serbian              Tag = Tag(compact.Serbian)
+	SerbianLatin         Tag = Tag(compact.SerbianLatin)
+	Swedish              Tag = Tag(compact.Swedish)
+	Swahili              Tag = Tag(compact.Swahili)
+	Tamil                Tag = Tag(compact.Tamil)
+	Telugu               Tag = Tag(compact.Telugu)
+	Thai                 Tag = Tag(compact.Thai)
+	Turkish              Tag = Tag(compact.Turkish)
+	Ukrainian            Tag = Tag(compact.Ukrainian)
+	Urdu                 Tag = Tag(compact.Urdu)
+	Uzbek                Tag = Tag(compact.Uzbek)
+	Vietnamese           Tag = Tag(compact.Vietnamese)
+	Chinese              Tag = Tag(compact.Chinese)
+	SimplifiedChinese    Tag = Tag(compact.SimplifiedChinese)
+	TraditionalChinese   Tag = Tag(compact.TraditionalChinese)
+	Zulu                 Tag = Tag(compact.Zulu)
+)
@@ -0,0 +1,389 @@
+# TODO: this file has not yet been included in the main CLDR release.
+# The intent is to verify this file against the Go implementation and then
+# correct the cases and add merge in other interesting test cases.
+# See TestCLDRCompliance in match_test.go, as well as the list of exceptions
+# defined in the map skip below it, for the work in progress.
+
+# Data-driven test for the XLocaleMatcher.
+# Format
+# • Everything after "#" is a comment
+# • Arguments are separated by ";". They are:
+
+# supported ; desired ; expected
+
+# • The supported may have the threshold distance reset as a first item, eg 50, en, fr
+# A line starting with @debug will reach a statement in the test code where you can put a breakpoint for debugging
+# The test code also supports reformatting this file, by setting the REFORMAT flag.
+
+##################################################
+# testParentLocales
+
+# es-419, es-AR, and es-MX are in a cluster; es is in a different one
+
+es-419, es-ES ; 	es-AR ; 	es-419
+es-ES, es-419 ; 	es-AR ; 	es-419
+
+es-419, es ; 	es-AR ; 	es-419
+es, es-419 ; 	es-AR ; 	es-419
+
+es-MX, es ; 	es-AR ; 	es-MX
+es, es-MX ; 	es-AR ; 	es-MX
+
+# en-GB, en-AU, and en-NZ are in a cluster; en in a different one
+
+en-GB, en-US ; 	en-AU ; 	en-GB
+en-US, en-GB ; 	en-AU ; 	en-GB
+
+en-GB, en ; 	en-AU ; 	en-GB
+en, en-GB ; 	en-AU ; 	en-GB
+
+en-NZ, en-US ; 	en-AU ; 	en-NZ
+en-US, en-NZ ; 	en-AU ; 	en-NZ
+
+en-NZ, en ; 	en-AU ; 	en-NZ
+en, en-NZ ; 	en-AU ; 	en-NZ
+
+# pt-AU and pt-PT in one cluster; pt-BR in another
+
+pt-PT, pt-BR ; 	pt-AO ; 	pt-PT
+pt-BR, pt-PT ; 	pt-AO ; 	pt-PT
+
+pt-PT, pt ; 	pt-AO ; 	pt-PT
+pt, pt-PT ; 	pt-AO ; 	pt-PT
+
+zh-MO, zh-TW ; 	zh-HK ; 	zh-MO
+zh-TW, zh-MO ; 	zh-HK ; 	zh-MO
+
+zh-MO, zh-TW ; 	zh-HK ; 	zh-MO
+zh-TW, zh-MO ; 	zh-HK ; 	zh-MO
+
+zh-MO, zh-CN ; 	zh-HK ; 	zh-MO
+zh-CN, zh-MO ; 	zh-HK ; 	zh-MO
+
+zh-MO, zh ; 	zh-HK ; 	zh-MO
+zh, zh-MO ; 	zh-HK ; 	zh-MO
+
+##################################################
+# testChinese
+
+zh-CN, zh-TW, iw ; 	zh-Hant-TW ; 	zh-TW
+zh-CN, zh-TW, iw ; 	zh-Hant ; 	zh-TW
+zh-CN, zh-TW, iw ; 	zh-TW ; 	zh-TW
+zh-CN, zh-TW, iw ; 	zh-Hans-CN ; 	zh-CN
+zh-CN, zh-TW, iw ; 	zh-CN ; 	zh-CN
+zh-CN, zh-TW, iw ; 	zh ; 	zh-CN
+
+##################################################
+# testenGB
+
+fr, en, en-GB, es-419, es-MX, es ; 	en-NZ ; 	en-GB
+fr, en, en-GB, es-419, es-MX, es ; 	es-ES ; 	es
+fr, en, en-GB, es-419, es-MX, es ; 	es-AR ; 	es-419
+fr, en, en-GB, es-419, es-MX, es ; 	es-MX ; 	es-MX
+
+##################################################
+# testFallbacks
+
+91, en, hi ; 	sa ; 	hi
+
+##################################################
+# testBasics
+
+fr, en-GB, en ; 	en-GB ; 	en-GB
+fr, en-GB, en ; 	en ; 	en
+fr, en-GB, en ; 	fr ; 	fr
+fr, en-GB, en ; 	ja ; 	fr	# return first if no match
+
+##################################################
+# testFallback
+
+# check that script fallbacks are handled right
+
+zh-CN, zh-TW, iw ; 	zh-Hant ; 	zh-TW
+zh-CN, zh-TW, iw ; 	zh ; 	zh-CN
+zh-CN, zh-TW, iw ; 	zh-Hans-CN ; 	zh-CN
+zh-CN, zh-TW, iw ; 	zh-Hant-HK ; 	zh-TW
+zh-CN, zh-TW, iw ; 	he-IT ; 	iw
+
+##################################################
+# testSpecials
+
+# check that nearby languages are handled
+
+en, fil, ro, nn ; 	tl ; 	fil
+en, fil, ro, nn ; 	mo ; 	ro
+en, fil, ro, nn ; 	nb ; 	nn
+
+# make sure default works
+
+en, fil, ro, nn ; 	ja ; 	en
+
+##################################################
+# testRegionalSpecials
+
+# verify that en-AU is closer to en-GB than to en (which is en-US)
+
+en, en-GB, es, es-419 ; 	es-MX ; 	es-419
+en, en-GB, es, es-419 ; 	en-AU ; 	en-GB
+en, en-GB, es, es-419 ; 	es-ES ; 	es
+
+##################################################
+# testHK
+
+# HK and MO are closer to each other for Hant than to TW
+
+zh, zh-TW, zh-MO ; 	zh-HK ; 	zh-MO
+zh, zh-TW, zh-HK ; 	zh-MO ; 	zh-HK
+
+##################################################
+# testMatch-exact
+
+# see localeDistance.txt
+
+##################################################
+# testMatch-none
+
+# see localeDistance.txt
+
+##################################################
+# testMatch-matchOnMazimized
+
+zh, zh-Hant ; 	und-TW ; 	zh-Hant	# und-TW should be closer to zh-Hant than to zh
+en-Hant-TW, und-TW ; 	zh-Hant ; 	und-TW	# zh-Hant should be closer to und-TW than to en-Hant-TW
+en-Hant-TW, und-TW ; 	zh ; 	und-TW	# zh should be closer to und-TW than to en-Hant-TW
+
+##################################################
+# testMatchGrandfatheredCode
+
+fr, i-klingon, en-Latn-US ; 	en-GB-oed ; 	en-Latn-US
+
+##################################################
+# testGetBestMatchForList-exactMatch
+fr, en-GB, ja, es-ES, es-MX ; 	ja, de ; 	ja
+
+##################################################
+# testGetBestMatchForList-simpleVariantMatch
+fr, en-GB, ja, es-ES, es-MX ; 	de, en-US ; 	en-GB	# Intentionally avoiding a perfect-match or two candidates for variant matches.
+
+# Fallback.
+
+fr, en-GB, ja, es-ES, es-MX ; 	de, zh ; 	fr
+
+##################################################
+# testGetBestMatchForList-matchOnMaximized
+# Check that if the preference is maximized already, it works as well.
+
+en, ja ; 	ja-Jpan-JP, en-AU ; 	ja	# Match for ja-Jpan-JP (maximized already)
+
+# ja-JP matches ja on likely subtags, and it's listed first, thus it wins over the second preference en-GB.
+
+en, ja ; 	ja-JP, en-US ; 	ja	# Match for ja-Jpan-JP (maximized already)
+
+# Check that if the preference is maximized already, it works as well.
+
+en, ja ; 	ja-Jpan-JP, en-US ; 	ja	# Match for ja-Jpan-JP (maximized already)
+
+##################################################
+# testGetBestMatchForList-noMatchOnMaximized
+# Regression test for http://b/5714572 .
+# de maximizes to de-DE. Pick the exact match for the secondary language instead.
+en, de, fr, ja ; 	de-CH, fr ; 	de
+
+##################################################
+# testBestMatchForTraditionalChinese
+
+# Scenario: An application that only supports Simplified Chinese (and some other languages),
+# but does not support Traditional Chinese. zh-Hans-CN could be replaced with zh-CN, zh, or
+# zh-Hans, it wouldn't make much of a difference.
+
+# The script distance (simplified vs. traditional Han) is considered small enough
+# to be an acceptable match. The regional difference is considered almost insignificant.
+
+fr, zh-Hans-CN, en-US ; 	zh-TW ; 	zh-Hans-CN
+fr, zh-Hans-CN, en-US ; 	zh-Hant ; 	zh-Hans-CN
+
+# For geo-political reasons, you might want to avoid a zh-Hant -> zh-Hans match.
+# In this case, if zh-TW, zh-HK or a tag starting with zh-Hant is requested, you can
+# change your call to getBestMatch to include a 2nd language preference.
+# "en" is a better match since its distance to "en-US" is closer than the distance
+# from "zh-TW" to "zh-CN" (script distance).
+
+fr, zh-Hans-CN, en-US ; 	zh-TW, en ; 	en-US
+fr, zh-Hans-CN, en-US ; 	zh-Hant-CN, en, en ; 	en-US
+fr, zh-Hans-CN, en-US ; 	zh-Hans, en ; 	zh-Hans-CN
+
+##################################################
+# testUndefined
+# When the undefined language doesn't match anything in the list,
+# getBestMatch returns the default, as usual.
+
+it, fr ; 	und ; 	it
+
+# When it *does* occur in the list, bestMatch returns it, as expected.
+it, und ; 	und ; 	und
+
+# The unusual part: max("und") = "en-Latn-US", and since matching is based on maximized
+# tags, the undefined language would normally match English. But that would produce the
+# counterintuitive results that getBestMatch("und", XLocaleMatcher("it,en")) would be "en", and
+# getBestMatch("en", XLocaleMatcher("it,und")) would be "und".
+
+# To avoid that, we change the matcher's definitions of max
+# so that max("und")="und". That produces the following, more desirable
+# results:
+
+it, en ; 	und ; 	it
+it, und ; 	en ; 	it
+
+##################################################
+# testGetBestMatch-regionDistance
+
+es-AR, es ; 	es-MX ; 	es-AR
+fr, en, en-GB ; 	en-CA ; 	en-GB
+de-AT, de-DE, de-CH ; 	de ; 	de-DE
+
+##################################################
+# testAsymmetry
+
+mul, nl ; 	af ; 	nl	# af => nl
+mul, af ; 	nl ; 	mul	# but nl !=> af
+
+##################################################
+# testGetBestMatchForList-matchOnMaximized2
+
+# ja-JP matches ja on likely subtags, and it's listed first, thus it wins over the second preference en-GB.
+
+fr, en-GB, ja, es-ES, es-MX ; 	ja-JP, en-GB ; 	ja	# Match for ja-JP, with likely region subtag
+
+# Check that if the preference is maximized already, it works as well.
+
+fr, en-GB, ja, es-ES, es-MX ; 	ja-Jpan-JP, en-GB ; 	ja	# Match for ja-Jpan-JP (maximized already)
+
+##################################################
+# testGetBestMatchForList-closeEnoughMatchOnMaximized
+
+en-GB, en, de, fr, ja ; 	de-CH, fr ; 	de
+en-GB, en, de, fr, ja ; 	en-US, ar, nl, de, ja ; 	en
+
+##################################################
+# testGetBestMatchForPortuguese
+
+# pt might be supported and not pt-PT
+
+# European user who prefers Spanish over Brazillian Portuguese as a fallback.
+
+pt-PT, pt-BR, es, es-419 ; 	pt-PT, es, pt ; 	pt-PT
+pt-PT, pt, es, es-419 ; 	pt-PT, es, pt ; 	pt-PT	# pt implicit
+
+# Brazillian user who prefers South American Spanish over European Portuguese as a fallback.
+# The asymmetry between this case and above is because it's "pt-PT" that's missing between the
+# matchers as "pt-BR" is a much more common language.
+
+pt-PT, pt-BR, es, es-419 ; 	pt, es-419, pt-PT ; 	pt-BR
+pt-PT, pt-BR, es, es-419 ; 	pt-PT, es, pt ; 	pt-PT
+pt-PT, pt, es, es-419 ; 	pt-PT, es, pt ; 	pt-PT
+pt-PT, pt, es, es-419 ; 	pt, es-419, pt-PT ; 	pt
+
+pt-BR, es, es-419 ; 	pt, es-419, pt-PT ; 	pt-BR
+
+# Code that adds the user's country can get "pt-US" for a user's language.
+# That should fall back to "pt-BR".
+
+pt-PT, pt-BR, es, es-419 ; 	pt-US, pt-PT ; 	pt-BR
+pt-PT, pt, es, es-419 ; 	pt-US, pt-PT, pt ; 	pt	# pt-BR implicit
+
+##################################################
+# testVariantWithScriptMatch 1 and 2
+
+fr, en, sv ; 	en-GB ; 	en
+fr, en, sv ; 	en-GB ; 	en
+en, sv ; 	en-GB, sv ; 	en
+
+##################################################
+# testLongLists
+
+en, sv ; 	sv ; 	sv
+af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, zh-CN, zh-TW, zu ; 	sv ; 	sv
+af, af-NA, af-ZA, agq, agq-CM, ak, ak-GH, am, am-ET, ar, ar-001, ar-AE, ar-BH, ar-DJ, ar-DZ, ar-EG, ar-EH, ar-ER, ar-IL, ar-IQ, ar-JO, ar-KM, ar-KW, ar-LB, ar-LY, ar-MA, ar-MR, ar-OM, ar-PS, ar-QA, ar-SA, ar-SD, ar-SO, ar-SS, ar-SY, ar-TD, ar-TN, ar-YE, as, as-IN, asa, asa-TZ, ast, ast-ES, az, az-Cyrl, az-Cyrl-AZ, az-Latn, az-Latn-AZ, bas, bas-CM, be, be-BY, bem, bem-ZM, bez, bez-TZ, bg, bg-BG, bm, bm-ML, bn, bn-BD, bn-IN, bo, bo-CN, bo-IN, br, br-FR, brx, brx-IN, bs, bs-Cyrl, bs-Cyrl-BA, bs-Latn, bs-Latn-BA, ca, ca-AD, ca-ES, ca-ES-VALENCIA, ca-FR, ca-IT, ce, ce-RU, cgg, cgg-UG, chr, chr-US, ckb, ckb-IQ, ckb-IR, cs, cs-CZ, cu, cu-RU, cy, cy-GB, da, da-DK, da-GL, dav, dav-KE, de, de-AT, de-BE, de-CH, de-DE, de-LI, de-LU, dje, dje-NE, dsb, dsb-DE, dua, dua-CM, dyo, dyo-SN, dz, dz-BT, ebu, ebu-KE, ee, ee-GH, ee-TG, el, el-CY, el-GR, en, en-001, en-150, en-AG, en-AI, en-AS, en-AT, en-AU, en-BB, en-BE, en-BI, en-BM, en-BS, en-BW, en-BZ, en-CA, en-CC, en-CH, en-CK, en-CM, en-CX, en-CY, en-DE, en-DG, en-DK, en-DM, en-ER, en-FI, en-FJ, en-FK, en-FM, en-GB, en-GD, en-GG, en-GH, en-GI, en-GM, en-GU, en-GY, en-HK, en-IE, en-IL, en-IM, en-IN, en-IO, en-JE, en-JM, en-KE, en-KI, en-KN, en-KY, en-LC, en-LR, en-LS, en-MG, en-MH, en-MO, en-MP, en-MS, en-MT, en-MU, en-MW, en-MY, en-NA, en-NF, en-NG, en-NL, en-NR, en-NU, en-NZ, en-PG, en-PH, en-PK, en-PN, en-PR, en-PW, en-RW, en-SB, en-SC, en-SD, en-SE, en-SG, en-SH, en-SI, en-SL, en-SS, en-SX, en-SZ, en-TC, en-TK, en-TO, en-TT, en-TV, en-TZ, en-UG, en-UM, en-US, en-US-POSIX, en-VC, en-VG, en-VI, en-VU, en-WS, en-ZA, en-ZM, en-ZW, eo, eo-001, es, es-419, es-AR, es-BO, es-CL, es-CO, es-CR, es-CU, es-DO, es-EA, es-EC, es-ES, es-GQ, es-GT, es-HN, es-IC, es-MX, es-NI, es-PA, es-PE, es-PH, es-PR, es-PY, es-SV, es-US, es-UY, es-VE, et, et-EE, eu, eu-ES, ewo, ewo-CM, fa, fa-AF, fa-IR, ff, ff-CM, ff-GN, ff-MR, ff-SN, fi, fi-FI, fil, fil-PH, fo, fo-DK, fo-FO, fr, fr-BE, fr-BF, fr-BI, fr-BJ, fr-BL, fr-CA, fr-CD, fr-CF, fr-CG, fr-CH, fr-CI, fr-CM, fr-DJ, fr-DZ, fr-FR, fr-GA, fr-GF, fr-GN, fr-GP, fr-GQ, fr-HT, fr-KM, fr-LU, fr-MA, fr-MC, fr-MF, fr-MG, fr-ML, fr-MQ, fr-MR, fr-MU, fr-NC, fr-NE, fr-PF, fr-PM, fr-RE, fr-RW, fr-SC, fr-SN, fr-SY, fr-TD, fr-TG, fr-TN, fr-VU, fr-WF, fr-YT, fur, fur-IT, fy, fy-NL, ga, ga-IE, gd, gd-GB, gl, gl-ES, gsw, gsw-CH, gsw-FR, gsw-LI, gu, gu-IN, guz, guz-KE, gv, gv-IM, ha, ha-GH, ha-NE, ha-NG, haw, haw-US, he, he-IL, hi, hi-IN, hr, hr-BA, hr-HR, hsb, hsb-DE, hu, hu-HU, hy, hy-AM, id, id-ID, ig, ig-NG, ii, ii-CN, is, is-IS, it, it-CH, it-IT, it-SM, ja, ja-JP, jgo, jgo-CM, jmc, jmc-TZ, ka, ka-GE, kab, kab-DZ, kam, kam-KE, kde, kde-TZ, kea, kea-CV, khq, khq-ML, ki, ki-KE, kk, kk-KZ, kkj, kkj-CM, kl, kl-GL, kln, kln-KE, km, km-KH, kn, kn-IN, ko, ko-KP, ko-KR, kok, kok-IN, ks, ks-IN, ksb, ksb-TZ, ksf, ksf-CM, ksh, ksh-DE, kw, kw-GB, ky, ky-KG, lag, lag-TZ, lb, lb-LU, lg, lg-UG, lkt, lkt-US, ln, ln-AO, ln-CD, ln-CF, ln-CG, lo, lo-LA, lrc, lrc-IQ, lrc-IR, lt, lt-LT, lu, lu-CD, luo, luo-KE, luy, luy-KE, lv, lv-LV, mas, mas-KE, mas-TZ, mer, mer-KE, mfe, mfe-MU, mg, mg-MG, mgh, mgh-MZ, mgo, mgo-CM, mk, mk-MK, ml, ml-IN, mn, mn-MN, mr, mr-IN, ms, ms-BN, ms-MY, ms-SG, mt, mt-MT, mua, mua-CM, my, my-MM, mzn, mzn-IR, naq, naq-NA, nb, nb-NO, nb-SJ, nd, nd-ZW, ne, ne-IN, ne-NP, nl, nl-AW, nl-BE, nl-BQ, nl-CW, nl-NL, nl-SR, nl-SX, nmg, nmg-CM, nn, nn-NO, nnh, nnh-CM, nus, nus-SS, nyn, nyn-UG, om, om-ET, om-KE, or, or-IN, os, os-GE, os-RU, pa, pa-Arab, pa-Arab-PK, pa-Guru, pa-Guru-IN, pl, pl-PL, prg, prg-001, ps, ps-AF, pt, pt-AO, pt-BR, pt-CV, pt-GW, pt-MO, pt-MZ, pt-PT, pt-ST, pt-TL, qu, qu-BO, qu-EC, qu-PE, rm, rm-CH, rn, rn-BI, ro, ro-MD, ro-RO, rof, rof-TZ, root, ru, ru-BY, ru-KG, ru-KZ, ru-MD, ru-RU, ru-UA, rw, rw-RW, rwk, rwk-TZ, sah, sah-RU, saq, saq-KE, sbp, sbp-TZ, se, se-FI, se-NO, se-SE, seh, seh-MZ, ses, ses-ML, sg, sg-CF, shi, shi-Latn, shi-Latn-MA, shi-Tfng, shi-Tfng-MA, si, si-LK, sk, sk-SK, sl, sl-SI, smn, smn-FI, sn, sn-ZW, so, so-DJ, so-ET, so-KE, so-SO, sq, sq-AL, sq-MK, sq-XK, sr, sr-Cyrl, sr-Cyrl-BA, sr-Cyrl-ME, sr-Cyrl-RS, sr-Cyrl-XK, sr-Latn, sr-Latn-BA, sr-Latn-ME, sr-Latn-RS, sr-Latn-XK, sv, sv-AX, sv-FI, sv-SE, sw, sw-CD, sw-KE, sw-TZ, sw-UG, ta, ta-IN, ta-LK, ta-MY, ta-SG, te, te-IN, teo, teo-KE, teo-UG, th, th-TH, ti, ti-ER, ti-ET, tk, tk-TM, to, to-TO, tr, tr-CY, tr-TR, twq, twq-NE, tzm, tzm-MA, ug, ug-CN, uk, uk-UA, ur, ur-IN, ur-PK, uz, uz-Arab, uz-Arab-AF, uz-Cyrl, uz-Cyrl-UZ, uz-Latn, uz-Latn-UZ, vai, vai-Latn, vai-Latn-LR, vai-Vaii, vai-Vaii-LR, vi, vi-VN, vo, vo-001, vun, vun-TZ, wae, wae-CH, xog, xog-UG, yav, yav-CM, yi, yi-001, yo, yo-BJ, yo-NG, zgh, zgh-MA, zh, zh-Hans, zh-Hans-CN, zh-Hans-HK, zh-Hans-MO, zh-Hans-SG, zh-Hant, zh-Hant-HK, zh-Hant-MO, zh-Hant-TW, zu, zu-ZA ; 	sv ; 	sv
+
+##################################################
+# test8288
+
+it, en ; 	und ; 	it
+it, en ; 	und, en ; 	en
+
+# examples from
+# https://unicode.org/repos/cldr/tags/latest/common/bcp47/
+# https://unicode.org/repos/cldr/tags/latest/common/validity/variant.xml
+
+##################################################
+# testUnHack
+
+en-NZ, en-IT ; 	en-US ; 	en-NZ
+
+##################################################
+# testEmptySupported => null
+ ; 	en ; 	null
+
+##################################################
+# testVariantsAndExtensions
+##################################################
+# tests the .combine() method
+
+und, fr ; 	fr-BE-fonipa ; 	fr ; 	fr-BE-fonipa
+und, fr-CA ; 	fr-BE-fonipa ; 	fr-CA ; 	fr-BE-fonipa
+und, fr-fonupa ; 	fr-BE-fonipa ; 	fr-fonupa ; 	fr-BE-fonipa
+und, no ; 	nn-BE-fonipa ; 	no ; 	no-BE-fonipa
+und, en-GB-u-sd-gbsct ; 	en-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin ; 	en-GB-u-sd-gbsct ; 	en-GB-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin
+
+en-PSCRACK, de-PSCRACK, fr-PSCRACK, pt-PT-PSCRACK ; 	fr-PSCRACK ; 	fr-PSCRACK
+en-PSCRACK, de-PSCRACK, fr-PSCRACK, pt-PT-PSCRACK ; 	fr ; 	fr-PSCRACK
+en-PSCRACK, de-PSCRACK, fr-PSCRACK, pt-PT-PSCRACK ; 	de-CH ; 	de-PSCRACK
+
+##################################################
+# testClusters
+# we favor es-419 over others in cluster. Clusters: es- {ES, MA, EA} {419, AR, MX}
+
+und, es, es-MA, es-MX, es-419 ; 	es-AR ; 	es-419
+und, es-MA, es, es-419, es-MX ; 	es-AR ; 	es-419
+und, es, es-MA, es-MX, es-419 ; 	es-EA ; 	es
+und, es-MA, es, es-419, es-MX ; 	es-EA ; 	es
+
+# of course, fall back to within cluster
+
+und, es, es-MA, es-MX ; 	es-AR ; 	es-MX
+und, es-MA, es, es-MX ; 	es-AR ; 	es-MX
+und, es-MA, es-MX, es-419 ; 	es-EA ; 	es-MA
+und, es-MA, es-419, es-MX ; 	es-EA ; 	es-MA
+
+# we favor es-GB over others in cluster. Clusters: en- {US, GU, VI} {GB, IN, ZA}
+
+und, en, en-GU, en-IN, en-GB ; 	en-ZA ; 	en-GB
+und, en-GU, en, en-GB, en-IN ; 	en-ZA ; 	en-GB
+und, en, en-GU, en-IN, en-GB ; 	en-VI ; 	en
+und, en-GU, en, en-GB, en-IN ; 	en-VI ; 	en
+
+# of course, fall back to within cluster
+
+und, en, en-GU, en-IN ; 	en-ZA ; 	en-IN
+und, en-GU, en, en-IN ; 	en-ZA ; 	en-IN
+und, en-GU, en-IN, en-GB ; 	en-VI ; 	en-GU
+und, en-GU, en-GB, en-IN ; 	en-VI ; 	en-GU
+
+##################################################
+# testThreshold
+@Threshold=60
+
+50, und, fr-CA-fonupa ; 	fr-BE-fonipa ; 	fr-CA-fonupa ; 	fr-BE-fonipa
+50, und, fr-Cyrl-CA-fonupa ; 	fr-BE-fonipa ; 	fr-Cyrl-CA-fonupa ; 	fr-Cyrl-BE-fonipa
+
+@Threshold=-1 # restore
+
+##################################################
+# testScriptFirst
+@DistanceOption=SCRIPT_FIRST
+@debug
+
+ru, fr ; zh, pl ; fr
+ru, fr ; zh-Cyrl, pl ; ru
+hr, en-Cyrl; sr ; en-Cyrl
+da, ru, hr; sr ; ru
@@ -0,0 +1,231 @@
+# basics
+fr, en-GB, en ; 	en-GB ; 	en-GB
+fr, en-GB, en ; 	en-US ; 	en
+fr, en-GB, en ; 	fr-FR ; 	fr
+fr, en-GB, en ; 	ja-JP ; 	fr
+
+# script fallbacks
+zh-CN, zh-TW, iw ; 	zh-Hant ; 	zh-TW
+zh-CN, zh-TW, iw ; 	zh ; 	zh-CN
+zh-CN, zh-TW, iw ; 	zh-Hans-CN ; 	zh-CN
+zh-CN, zh-TW, iw ; 	zh-Hant-HK ; 	zh-TW
+zh-CN, zh-TW, iw ; 	he-IT ; 	iw ; iw-u-rg-itzzzz
+
+# language-specific script fallbacks 1
+en, sr, nl ; 	sr-Latn ; 	sr
+en, sr, nl ; 	sh ; 	sr   # different script, but seems okay and is as CLDR suggests
+en, sr, nl ; 	hr ; 	en
+en, sr, nl ; 	bs ; 	en
+en, sr, nl ; 	nl-Cyrl ; 	sr
+
+# language-specific script fallbacks 2
+en, sh ; 	sr ; 	sh
+en, sh ; 	sr-Cyrl ; 	sh
+en, sh ; 	hr ; 	sh
+
+# don't match hr to sr-Latn
+en, sr-Latn ; 	hr ; 	en
+
+# both deprecated and not
+fil, tl, iw, he ; 	he-IT ; 	he
+fil, tl, iw, he ; 	he ; 	he
+fil, tl, iw, he ; 	iw ; 	iw
+fil, tl, iw, he ; 	fil-IT ; 	fil
+fil, tl, iw, he ; 	fil ; 	fil
+fil, tl, iw, he ; 	tl ; 	tl
+
+# nearby languages
+en, fil, ro, nn ; 	tl ; 	fil
+en, fil, ro, nn ; 	mo ; 	ro
+en, fil, ro, nn ; 	nb ; 	nn
+en, fil, ro, nn ; 	ja ; 	en
+
+# nearby languages: Nynorsk to Bokmål
+en, nb ; 	nn ; 	nb
+
+# nearby languages: Danish does not match nn
+en, nn ; 	da ; 	en
+
+# nearby languages: Danish matches no
+en, no ; 	da ; 	no
+
+# nearby languages: Danish matches nb
+en, nb ; 	da ; 	nb
+
+# prefer matching languages over language variants.
+nn, en-GB ; 	no, en-US ; 	en-GB
+nn, en-GB ; 	nb, en-US ; 	en-GB
+
+# deprecated version is closer than same language with other differences
+nl, he, en-GB ; 	iw, en-US ; 	he
+
+# macro equivalent is closer than same language with other differences
+nl, zh, en-GB, no ; 	cmn, en-US ; 	zh
+nl, zh, en-GB, no ; 	nb, en-US ; 	no
+
+# legacy equivalent is closer than same language with other differences
+nl, fil, en-GB ; 	tl, en-US ; 	fil
+
+# distinguish near equivalents
+en, ro, mo, ro-MD ; 	ro ; 	ro
+en, ro, mo, ro-MD ; 	mo ; 	mo
+en, ro, mo, ro-MD ; 	ro-MD ; 	ro-MD
+
+# maximization of legacy
+sr-Cyrl, sr-Latn, ro, ro-MD ; 	sh ; 	sr-Latn
+sr-Cyrl, sr-Latn, ro, ro-MD ; 	mo ; 	ro-MD
+
+# empty
+ ; 	fr ; 	und
+ ; 	en ; 	und
+
+# private use subtags
+fr, en-GB, x-bork, es-ES, es-419 ; 	x-piglatin ; 	fr
+fr, en-GB, x-bork, es-ES, es-419 ; 	x-bork ; 	x-bork
+
+# grandfathered codes
+fr, i-klingon, en-Latn-US ; 	en-GB-oed ; 	en-Latn-US
+fr, i-klingon, en-Latn-US ; 	i-klingon ; 	tlh
+
+
+# simple variant match
+fr, en-GB, ja, es-ES, es-MX ; 	de, en-US ; 	en-GB
+fr, en-GB, ja, es-ES, es-MX ; 	de, zh ; 	fr
+
+# best match for traditional Chinese
+fr, zh-Hans-CN, en-US ; 	zh-TW ; 	zh-Hans-CN
+fr, zh-Hans-CN, en-US ; 	zh-Hant ; 	zh-Hans-CN
+fr, zh-Hans-CN, en-US ; 	zh-TW, en ; 	en-US
+fr, zh-Hans-CN, en-US ; 	zh-Hant-CN, en ; 	en-US
+fr, zh-Hans-CN, en-US ; 	zh-Hans, en ; 	zh-Hans-CN
+
+# more specific script should win in case regions are identical
+af, af-Latn, af-Arab ; 	af ; 	af
+af, af-Latn, af-Arab ; 	af-ZA ; 	af
+af, af-Latn, af-Arab ; 	af-Latn-ZA ; 	af-Latn
+af, af-Latn, af-Arab ; 	af-Latn ; 	af-Latn
+
+# more specific region should win
+nl, nl-NL, nl-BE ; 	nl ; 	nl
+nl, nl-NL, nl-BE ; 	nl-Latn ; 	nl
+nl, nl-NL, nl-BE ; 	nl-Latn-NL ; 	nl-NL
+nl, nl-NL, nl-BE ; 	nl-NL ; 	nl-NL
+
+# region may replace matched if matched is enclosing
+es-419,es ; 	es-MX ; 	es-419 ; es-MX
+es-419,es ; 	es-SG ; 	es
+
+# more specific region wins over more specific script
+nl, nl-Latn, nl-NL, nl-BE ; 	nl ; 	nl
+nl, nl-Latn, nl-NL, nl-BE ; 	nl-Latn ; 	nl-Latn
+nl, nl-Latn, nl-NL, nl-BE ; 	nl-NL ; 	nl-NL
+nl, nl-Latn, nl-NL, nl-BE ; 	nl-Latn-NL ; 	nl-NL
+
+# region distance Portuguese
+pt, pt-PT ; 	pt-ES ; 	pt-PT
+
+# if no preferred locale specified, pick top language, not regional
+en, fr, fr-CA, fr-CH ; 	fr-US ; 	fr  ; fr-u-rg-uszzzz
+
+# region distance German
+de-AT, de-DE, de-CH ; 	de ; 	de-DE
+
+# en-AU is closer to en-GB than to en (which is en-US)
+en, en-GB, es-ES, es-419 ; 	en-AU ; 	en-GB
+en, en-GB, es-ES, es-419 ; 	es-MX ; 	es-419 ; es-MX
+en, en-GB, es-ES, es-419 ; 	es-PT ; 	es-ES
+
+# undefined
+it, fr ; 	und ; 	it
+
+# und does not match en
+it, en ; 	und ; 	it
+
+# undefined in priority list
+it, und ; 	und ; 	und
+it, und ; 	en ; 	it
+
+# undefined
+it, fr, zh ; 	und-FR ; 	fr
+it, fr, zh ; 	und-CN ; 	zh
+it, fr, zh ; 	und-Hans ; 	zh
+it, fr, zh ; 	und-Hant ; 	zh
+it, fr, zh ; 	und-Latn ; 	it
+
+# match on maximized tag
+fr, en-GB, ja, es-ES, es-MX ; 	ja-JP, en-GB ; 	ja
+fr, en-GB, ja, es-ES, es-MX ; 	ja-Jpan-JP, en-GB ; 	ja
+
+# pick best maximized tag
+ja, ja-Jpan-US, ja-JP, en, ru ; 	ja-Jpan, ru ; 	ja
+ja, ja-Jpan-US, ja-JP, en, ru ; 	ja-JP, ru ; 	ja-JP
+ja, ja-Jpan-US, ja-JP, en, ru ; 	ja-US, ru ; 	ja-Jpan-US
+
+# termination: pick best maximized match
+ja, ja-Jpan, ja-JP, en, ru ; 	ja-Jpan-JP, ru ; 	ja-JP
+ja, ja-Jpan, ja-JP, en, ru ; 	ja-Jpan, ru ; 	ja-Jpan
+
+# same language over exact, but distinguish when user is explicit
+fr, en-GB, ja, es-ES, es-MX ; 	ja, de ; 	ja
+en, de, fr, ja ; 	de-CH, fr ; 	de # TODO: ; de-u-rg-CH
+en-GB, nl ; 	en, nl ; en-GB
+en-GB, nl ; 	en, nl, en-GB ; nl
+
+# parent relation preserved
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-150 ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-AU ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-BE ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-GG ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-GI ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-HK ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-IE ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-IM ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-IN ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-JE ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-MT ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-NZ ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-PK ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-SG ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-DE ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	en-MT ; 	en-GB
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-AR ; 	es-419 ; es-AR
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-BO ; 	es-419 ; es-BO
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-CL ; 	es-419 ; es-CL
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-CO ; 	es-419 ; es-CO
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-CR ; 	es-419 ; es-CR
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-CU ; 	es-419 ; es-CU
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-DO ; 	es-419 ; es-DO
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-EC ; 	es-419 ; es-EC
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-GT ; 	es-419 ; es-GT
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-HN ; 	es-419 ; es-HN
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-MX ; 	es-419 ; es-MX
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-NI ; 	es-419 ; es-NI
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-PA ; 	es-419 ; es-PA
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-PE ; 	es-419 ; es-PE
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-PR ; 	es-419 ; es-PR
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-PT ; 	es
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-PY ; 	es-419 ; es-PY
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-SV ; 	es-419 ; es-SV
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-US ; 	es-419
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-UY ; 	es-419 ; es-UY
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	es-VE ; 	es-419 ; es-VE
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	pt-AO ; 	pt-PT
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	pt-CV ; 	pt-PT
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	pt-GW ; 	pt-PT
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	pt-MO ; 	pt-PT
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	pt-MZ ; 	pt-PT
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	pt-ST ; 	pt-PT
+en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK ; 	pt-TL ; 	pt-PT
+
+# preserve extensions
+en, de, sl-nedis ; 	de-FR-u-co-phonebk ; 	de ; de-u-co-phonebk-rg-frzzzz
+en, de, sl-nedis ; 	sl-nedis-u-cu-eur ; 	sl-nedis ; sl-nedis-u-cu-eur
+en, de, sl-nedis ; 	sl-u-cu-eur ; 	sl-nedis ; sl-nedis-u-cu-eur
+en, de, sl-nedis ; 	sl-HR-nedis-u-cu-eur ; 	sl-nedis ; sl-nedis-u-cu-eur-rg-hrzzzz
+en, de, sl-nedis ; 	de-t-m0-iso-i0-pinyin ; 	de ; de-t-m0-iso-i0-pinyin
+
+und, nl ; 	nl-BE-fonipa ; 	nl ; 	nl-u-rg-bezzzz
+und, nl-CA ;	nl-BE-fonipa ; 	nl-CA ; 	nl-CA-u-rg-bezzzz
+und, nl-fonupa ; 	nl-BE-fonipa ; 	nl-fonupa ; 	nl-fonupa-u-rg-bezzzz
+und, no ; 	nn-DK-fonipa ; 	no ; 	no-u-rg-dkzzzz
+und, en-GB-u-sd-usca ; 	en-US-fonipa-u-nu-Arab-ca-buddhist-sd-usdc-t-m0-iso-i0-pinyin ; 	en-GB-u-sd-usca ; 	en-GB-t-m0-iso-i0-pinyin-u-ca-buddhist-nu-Arab-rg-uszzzz-sd-usca