whatcanGOwrong

2024-09-19 21:38:24 -04:00
commit d0ae4d841d
17908 changed files with 4096831 additions and 0 deletions
@@ -0,0 +1,16 @@
+// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
+
+package language
+
+// This file contains code common to the maketables.go and the package code.
+
+// AliasType is the type of an alias in AliasMap.
+type AliasType int8
+
+const (
+	Deprecated AliasType = iota
+	Macro
+	Legacy
+
+	AliasTypeUnknown AliasType = -1
+)
@@ -0,0 +1,29 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+// CompactCoreInfo is a compact integer with the three core tags encoded.
+type CompactCoreInfo uint32
+
+// GetCompactCore generates a uint32 value that is guaranteed to be unique for
+// different language, region, and script values.
+func GetCompactCore(t Tag) (cci CompactCoreInfo, ok bool) {
+	if t.LangID > langNoIndexOffset {
+		return 0, false
+	}
+	cci |= CompactCoreInfo(t.LangID) << (8 + 12)
+	cci |= CompactCoreInfo(t.ScriptID) << 12
+	cci |= CompactCoreInfo(t.RegionID)
+	return cci, true
+}
+
+// Tag generates a tag from c.
+func (c CompactCoreInfo) Tag() Tag {
+	return Tag{
+		LangID:   Language(c >> 20),
+		RegionID: Region(c & 0x3ff),
+		ScriptID: Script(c>>12) & 0xff,
+	}
+}
@@ -0,0 +1,61 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package compact defines a compact representation of language tags.
+//
+// Common language tags (at least all for which locale information is defined
+// in CLDR) are assigned a unique index. Each Tag is associated with such an
+// ID for selecting language-related resources (such as translations) as well
+// as one for selecting regional defaults (currency, number formatting, etc.)
+//
+// It may want to export this functionality at some point, but at this point
+// this is only available for use within x/text.
+package compact // import "golang.org/x/text/internal/language/compact"
+
+import (
+	"sort"
+	"strings"
+
+	"golang.org/x/text/internal/language"
+)
+
+// ID is an integer identifying a single tag.
+type ID uint16
+
+func getCoreIndex(t language.Tag) (id ID, ok bool) {
+	cci, ok := language.GetCompactCore(t)
+	if !ok {
+		return 0, false
+	}
+	i := sort.Search(len(coreTags), func(i int) bool {
+		return cci <= coreTags[i]
+	})
+	if i == len(coreTags) || coreTags[i] != cci {
+		return 0, false
+	}
+	return ID(i), true
+}
+
+// Parent returns the ID of the parent or the root ID if id is already the root.
+func (id ID) Parent() ID {
+	return parents[id]
+}
+
+// Tag converts id to an internal language Tag.
+func (id ID) Tag() language.Tag {
+	if int(id) >= len(coreTags) {
+		return specialTags[int(id)-len(coreTags)]
+	}
+	return coreTags[id].Tag()
+}
+
+var specialTags []language.Tag
+
+func init() {
+	tags := strings.Split(specialTagsStr, " ")
+	specialTags = make([]language.Tag, len(tags))
+	for i, t := range tags {
+		specialTags[i] = language.MustParse(t)
+	}
+}
@@ -0,0 +1,64 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ignore
+
+// Language tag table generator.
+// Data read from the web.
+
+package main
+
+import (
+	"flag"
+	"fmt"
+	"log"
+
+	"golang.org/x/text/internal/gen"
+	"golang.org/x/text/unicode/cldr"
+)
+
+var (
+	test = flag.Bool("test",
+		false,
+		"test existing tables; can be used to compare web data with package data.")
+	outputFile = flag.String("output",
+		"tables.go",
+		"output file for generated tables")
+)
+
+func main() {
+	gen.Init()
+
+	w := gen.NewCodeWriter()
+	defer w.WriteGoFile("tables.go", "compact")
+
+	fmt.Fprintln(w, `import "golang.org/x/text/internal/language"`)
+
+	b := newBuilder(w)
+	gen.WriteCLDRVersion(w)
+
+	b.writeCompactIndex()
+}
+
+type builder struct {
+	w    *gen.CodeWriter
+	data *cldr.CLDR
+	supp *cldr.SupplementalData
+}
+
+func newBuilder(w *gen.CodeWriter) *builder {
+	r := gen.OpenCLDRCoreZip()
+	defer r.Close()
+	d := &cldr.Decoder{}
+	data, err := d.DecodeZip(r)
+	if err != nil {
+		log.Fatal(err)
+	}
+	b := builder{
+		w:    w,
+		data: data,
+		supp: data.Supplemental(),
+	}
+	return &b
+}
@@ -0,0 +1,113 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ignore
+
+package main
+
+// This file generates derivative tables based on the language package itself.
+
+import (
+	"fmt"
+	"log"
+	"sort"
+	"strings"
+
+	"golang.org/x/text/internal/language"
+)
+
+// Compact indices:
+// Note -va-X variants only apply to localization variants.
+// BCP variants only ever apply to language.
+// The only ambiguity between tags is with regions.
+
+func (b *builder) writeCompactIndex() {
+	// Collect all language tags for which we have any data in CLDR.
+	m := map[language.Tag]bool{}
+	for _, lang := range b.data.Locales() {
+		// We include all locales unconditionally to be consistent with en_US.
+		// We want en_US, even though it has no data associated with it.
+
+		// TODO: put any of the languages for which no data exists at the end
+		// of the index. This allows all components based on ICU to use that
+		// as the cutoff point.
+		// if x := data.RawLDML(lang); false ||
+		// 	x.LocaleDisplayNames != nil ||
+		// 	x.Characters != nil ||
+		// 	x.Delimiters != nil ||
+		// 	x.Measurement != nil ||
+		// 	x.Dates != nil ||
+		// 	x.Numbers != nil ||
+		// 	x.Units != nil ||
+		// 	x.ListPatterns != nil ||
+		// 	x.Collations != nil ||
+		// 	x.Segmentations != nil ||
+		// 	x.Rbnf != nil ||
+		// 	x.Annotations != nil ||
+		// 	x.Metadata != nil {
+
+		// TODO: support POSIX natively, albeit non-standard.
+		tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1))
+		m[tag] = true
+		// }
+	}
+
+	// TODO: plural rules are also defined for the deprecated tags:
+	//    iw mo sh tl
+	// Consider removing these as compact tags.
+
+	// Include locales for plural rules, which uses a different structure.
+	for _, plurals := range b.supp.Plurals {
+		for _, rules := range plurals.PluralRules {
+			for _, lang := range strings.Split(rules.Locales, " ") {
+				m[language.Make(lang)] = true
+			}
+		}
+	}
+
+	var coreTags []language.CompactCoreInfo
+	var special []string
+
+	for t := range m {
+		if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" {
+			log.Fatalf("Unexpected extension %v in %v", x, t)
+		}
+		if len(t.Variants()) == 0 && len(t.Extensions()) == 0 {
+			cci, ok := language.GetCompactCore(t)
+			if !ok {
+				log.Fatalf("Locale for non-basic language %q", t)
+			}
+			coreTags = append(coreTags, cci)
+		} else {
+			special = append(special, t.String())
+		}
+	}
+
+	w := b.w
+
+	sort.Slice(coreTags, func(i, j int) bool { return coreTags[i] < coreTags[j] })
+	sort.Strings(special)
+
+	w.WriteComment(`
+	NumCompactTags is the number of common tags. The maximum tag is
+	NumCompactTags-1.`)
+	w.WriteConst("NumCompactTags", len(m))
+
+	fmt.Fprintln(w, "const (")
+	for i, t := range coreTags {
+		fmt.Fprintf(w, "%s ID = %d\n", ident(t.Tag().String()), i)
+	}
+	for i, t := range special {
+		fmt.Fprintf(w, "%s ID = %d\n", ident(t), i+len(coreTags))
+	}
+	fmt.Fprintln(w, ")")
+
+	w.WriteVar("coreTags", coreTags)
+
+	w.WriteConst("specialTagsStr", strings.Join(special, " "))
+}
+
+func ident(s string) string {
+	return strings.Replace(s, "-", "", -1) + "Index"
+}
@@ -0,0 +1,54 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ignore
+
+package main
+
+import (
+	"log"
+
+	"golang.org/x/text/internal/gen"
+	"golang.org/x/text/internal/language"
+	"golang.org/x/text/internal/language/compact"
+	"golang.org/x/text/unicode/cldr"
+)
+
+func main() {
+	r := gen.OpenCLDRCoreZip()
+	defer r.Close()
+
+	d := &cldr.Decoder{}
+	data, err := d.DecodeZip(r)
+	if err != nil {
+		log.Fatalf("DecodeZip: %v", err)
+	}
+
+	w := gen.NewCodeWriter()
+	defer w.WriteGoFile("parents.go", "compact")
+
+	// Create parents table.
+	type ID uint16
+	parents := make([]ID, compact.NumCompactTags)
+	for _, loc := range data.Locales() {
+		tag := language.MustParse(loc)
+		index, ok := compact.FromTag(tag)
+		if !ok {
+			continue
+		}
+		parentIndex := compact.ID(0) // und
+		for p := tag.Parent(); p != language.Und; p = p.Parent() {
+			if x, ok := compact.FromTag(p); ok {
+				parentIndex = x
+				break
+			}
+		}
+		parents[index] = ID(parentIndex)
+	}
+
+	w.WriteComment(`
+	parents maps a compact index of a tag to the compact index of the parent of
+	this tag.`)
+	w.WriteVar("parents", parents)
+}
@@ -0,0 +1,38 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package compact
+
+import (
+	"testing"
+
+	"golang.org/x/text/internal/language"
+)
+
+func TestParents(t *testing.T) {
+	testCases := []struct {
+		tag, parent string
+	}{
+		{"af", "und"},
+		{"en", "und"},
+		{"en-001", "en"},
+		{"en-AU", "en-001"},
+		{"en-US", "en"},
+		{"en-US-u-va-posix", "en-US"},
+		{"ca-ES-valencia", "ca-ES"},
+	}
+	for _, tc := range testCases {
+		tag, ok := LanguageID(Make(language.MustParse(tc.tag)))
+		if !ok {
+			t.Fatalf("Could not get index of flag %s", tc.tag)
+		}
+		want, ok := LanguageID(Make(language.MustParse(tc.parent)))
+		if !ok {
+			t.Fatalf("Could not get index of parent %s of tag %s", tc.parent, tc.tag)
+		}
+		if got := parents[tag]; got != want {
+			t.Errorf("Parent[%s] = %d; want %d (%s)", tc.tag, got, want, tc.parent)
+		}
+	}
+}
@@ -0,0 +1,260 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:generate go run gen.go gen_index.go -output tables.go
+//go:generate go run gen_parents.go
+
+package compact
+
+// TODO: Remove above NOTE after:
+// - verifying that tables are dropped correctly (most notably matcher tables).
+
+import (
+	"strings"
+
+	"golang.org/x/text/internal/language"
+)
+
+// Tag represents a BCP 47 language tag. It is used to specify an instance of a
+// specific language or locale. All language tag values are guaranteed to be
+// well-formed.
+type Tag struct {
+	// NOTE: exported tags will become part of the public API.
+	language ID
+	locale   ID
+	full     fullTag // always a language.Tag for now.
+}
+
+const _und = 0
+
+type fullTag interface {
+	IsRoot() bool
+	Parent() language.Tag
+}
+
+// Make a compact Tag from a fully specified internal language Tag.
+func Make(t language.Tag) (tag Tag) {
+	if region := t.TypeForKey("rg"); len(region) == 6 && region[2:] == "zzzz" {
+		if r, err := language.ParseRegion(region[:2]); err == nil {
+			tFull := t
+			t, _ = t.SetTypeForKey("rg", "")
+			// TODO: should we not consider "va" for the language tag?
+			var exact1, exact2 bool
+			tag.language, exact1 = FromTag(t)
+			t.RegionID = r
+			tag.locale, exact2 = FromTag(t)
+			if !exact1 || !exact2 {
+				tag.full = tFull
+			}
+			return tag
+		}
+	}
+	lang, ok := FromTag(t)
+	tag.language = lang
+	tag.locale = lang
+	if !ok {
+		tag.full = t
+	}
+	return tag
+}
+
+// Tag returns an internal language Tag version of this tag.
+func (t Tag) Tag() language.Tag {
+	if t.full != nil {
+		return t.full.(language.Tag)
+	}
+	tag := t.language.Tag()
+	if t.language != t.locale {
+		loc := t.locale.Tag()
+		tag, _ = tag.SetTypeForKey("rg", strings.ToLower(loc.RegionID.String())+"zzzz")
+	}
+	return tag
+}
+
+// IsCompact reports whether this tag is fully defined in terms of ID.
+func (t *Tag) IsCompact() bool {
+	return t.full == nil
+}
+
+// MayHaveVariants reports whether a tag may have variants. If it returns false
+// it is guaranteed the tag does not have variants.
+func (t Tag) MayHaveVariants() bool {
+	return t.full != nil || int(t.language) >= len(coreTags)
+}
+
+// MayHaveExtensions reports whether a tag may have extensions. If it returns
+// false it is guaranteed the tag does not have them.
+func (t Tag) MayHaveExtensions() bool {
+	return t.full != nil ||
+		int(t.language) >= len(coreTags) ||
+		t.language != t.locale
+}
+
+// IsRoot returns true if t is equal to language "und".
+func (t Tag) IsRoot() bool {
+	if t.full != nil {
+		return t.full.IsRoot()
+	}
+	return t.language == _und
+}
+
+// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
+// specific language are substituted with fields from the parent language.
+// The parent for a language may change for newer versions of CLDR.
+func (t Tag) Parent() Tag {
+	if t.full != nil {
+		return Make(t.full.Parent())
+	}
+	if t.language != t.locale {
+		// Simulate stripping -u-rg-xxxxxx
+		return Tag{language: t.language, locale: t.language}
+	}
+	// TODO: use parent lookup table once cycle from internal package is
+	// removed. Probably by internalizing the table and declaring this fast
+	// enough.
+	// lang := compactID(internal.Parent(uint16(t.language)))
+	lang, _ := FromTag(t.language.Tag().Parent())
+	return Tag{language: lang, locale: lang}
+}
+
+// nextToken returns token t and the rest of the string.
+func nextToken(s string) (t, tail string) {
+	p := strings.Index(s[1:], "-")
+	if p == -1 {
+		return s[1:], ""
+	}
+	p++
+	return s[1:p], s[p:]
+}
+
+// LanguageID returns an index, where 0 <= index < NumCompactTags, for tags
+// for which data exists in the text repository.The index will change over time
+// and should not be stored in persistent storage. If t does not match a compact
+// index, exact will be false and the compact index will be returned for the
+// first match after repeatedly taking the Parent of t.
+func LanguageID(t Tag) (id ID, exact bool) {
+	return t.language, t.full == nil
+}
+
+// RegionalID returns the ID for the regional variant of this tag. This index is
+// used to indicate region-specific overrides, such as default currency, default
+// calendar and week data, default time cycle, and default measurement system
+// and unit preferences.
+//
+// For instance, the tag en-GB-u-rg-uszzzz specifies British English with US
+// settings for currency, number formatting, etc. The CompactIndex for this tag
+// will be that for en-GB, while the RegionalID will be the one corresponding to
+// en-US.
+func RegionalID(t Tag) (id ID, exact bool) {
+	return t.locale, t.full == nil
+}
+
+// LanguageTag returns t stripped of regional variant indicators.
+//
+// At the moment this means it is stripped of a regional and variant subtag "rg"
+// and "va" in the "u" extension.
+func (t Tag) LanguageTag() Tag {
+	if t.full == nil {
+		return Tag{language: t.language, locale: t.language}
+	}
+	tt := t.Tag()
+	tt.SetTypeForKey("rg", "")
+	tt.SetTypeForKey("va", "")
+	return Make(tt)
+}
+
+// RegionalTag returns the regional variant of the tag.
+//
+// At the moment this means that the region is set from the regional subtag
+// "rg" in the "u" extension.
+func (t Tag) RegionalTag() Tag {
+	rt := Tag{language: t.locale, locale: t.locale}
+	if t.full == nil {
+		return rt
+	}
+	b := language.Builder{}
+	tag := t.Tag()
+	// tag, _ = tag.SetTypeForKey("rg", "")
+	b.SetTag(t.locale.Tag())
+	if v := tag.Variants(); v != "" {
+		for _, v := range strings.Split(v, "-") {
+			b.AddVariant(v)
+		}
+	}
+	for _, e := range tag.Extensions() {
+		b.AddExt(e)
+	}
+	return t
+}
+
+// FromTag reports closest matching ID for an internal language Tag.
+func FromTag(t language.Tag) (id ID, exact bool) {
+	// TODO: perhaps give more frequent tags a lower index.
+	// TODO: we could make the indexes stable. This will excluded some
+	//       possibilities for optimization, so don't do this quite yet.
+	exact = true
+
+	b, s, r := t.Raw()
+	if t.HasString() {
+		if t.IsPrivateUse() {
+			// We have no entries for user-defined tags.
+			return 0, false
+		}
+		hasExtra := false
+		if t.HasVariants() {
+			if t.HasExtensions() {
+				build := language.Builder{}
+				build.SetTag(language.Tag{LangID: b, ScriptID: s, RegionID: r})
+				build.AddVariant(t.Variants())
+				exact = false
+				t = build.Make()
+			}
+			hasExtra = true
+		} else if _, ok := t.Extension('u'); ok {
+			// TODO: va may mean something else. Consider not considering it.
+			// Strip all but the 'va' entry.
+			old := t
+			variant := t.TypeForKey("va")
+			t = language.Tag{LangID: b, ScriptID: s, RegionID: r}
+			if variant != "" {
+				t, _ = t.SetTypeForKey("va", variant)
+				hasExtra = true
+			}
+			exact = old == t
+		} else {
+			exact = false
+		}
+		if hasExtra {
+			// We have some variants.
+			for i, s := range specialTags {
+				if s == t {
+					return ID(i + len(coreTags)), exact
+				}
+			}
+			exact = false
+		}
+	}
+	if x, ok := getCoreIndex(t); ok {
+		return x, exact
+	}
+	exact = false
+	if r != 0 && s == 0 {
+		// Deal with cases where an extra script is inserted for the region.
+		t, _ := t.Maximize()
+		if x, ok := getCoreIndex(t); ok {
+			return x, exact
+		}
+	}
+	for t = t.Parent(); t != root; t = t.Parent() {
+		// No variants specified: just compare core components.
+		// The key has the form lllssrrr, where l, s, and r are nibbles for
+		// respectively the langID, scriptID, and regionID.
+		if x, ok := getCoreIndex(t); ok {
+			return x, exact
+		}
+	}
+	return 0, exact
+}
+
+var root = language.Tag{}
@@ -0,0 +1,236 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package compact
+
+import (
+	"reflect"
+	"testing"
+
+	"golang.org/x/text/internal/language"
+)
+
+func mustParse(s string) Tag {
+	t, err := language.Parse(s)
+	if err != nil {
+		panic(err)
+	}
+	return Make(t)
+}
+
+func TestTagSize(t *testing.T) {
+	id := Tag{}
+	typ := reflect.TypeOf(id)
+	if typ.Size() > 24 {
+		t.Errorf("size of Tag was %d; want 24", typ.Size())
+	}
+}
+
+func TestNoPublic(t *testing.T) {
+	noExportedField(t, reflect.TypeOf(Tag{}))
+}
+
+func noExportedField(t *testing.T, typ reflect.Type) {
+	for i := 0; i < typ.NumField(); i++ {
+		f := typ.Field(i)
+		if f.PkgPath == "" {
+			t.Errorf("Tag may not have exported fields, but has field %q", f.Name)
+		}
+		if f.Anonymous {
+			noExportedField(t, f.Type)
+		}
+	}
+}
+
+func TestEquality(t *testing.T) {
+	for i, tt := range parseTests() {
+		s := tt.in
+		tag := mk(s)
+		t1 := mustParse(tag.Tag().String())
+		if tag != t1 {
+			t.Errorf("%d:%s: equality test 1 failed\n got: %#v\nwant: %#v)", i, s, t1, tag)
+		}
+	}
+}
+
+type compactTest struct {
+	tag   string
+	index ID
+	ok    bool
+}
+
+var compactTests = []compactTest{
+	// TODO: these values will change with each CLDR update. This issue
+	// will be solved if we decide to fix the indexes.
+	{"und", undIndex, true},
+	{"ca-ES-valencia", caESvalenciaIndex, true},
+	{"ca-ES-valencia-u-va-posix", caESvalenciaIndex, false},
+	{"ca-ES-valencia-u-co-phonebk", caESvalenciaIndex, false},
+	{"ca-ES-valencia-u-co-phonebk-va-posix", caESvalenciaIndex, false},
+	{"x-klingon", 0, false},
+	{"en-US", enUSIndex, true},
+	{"en-US-u-va-posix", enUSuvaposixIndex, true},
+	{"en", enIndex, true},
+	{"en-u-co-phonebk", enIndex, false},
+	{"en-001", en001Index, true},
+	{"zh-Hant-HK", zhHantHKIndex, true},
+	{"zh-HK", zhHantHKIndex, false}, // maximized to zh-Hant-HK
+	{"nl-Beng", 0, false},           // parent skips script
+	{"nl-NO", nlIndex, false},       // region is ignored
+	{"nl-Latn-NO", nlIndex, false},
+	{"nl-Latn-NO-u-co-phonebk", nlIndex, false},
+	{"nl-Latn-NO-valencia", nlIndex, false},
+	{"nl-Latn-NO-oxendict", nlIndex, false},
+	{"sh", shIndex, true}, // From plural rules.
+}
+
+func TestLanguageID(t *testing.T) {
+	tests := append(compactTests, []compactTest{
+		{"en-GB", enGBIndex, true},
+		{"en-GB-u-rg-uszzzz", enGBIndex, true},
+		{"en-GB-u-rg-USZZZZ", enGBIndex, true},
+		{"en-GB-u-rg-uszzzz-va-posix", enGBIndex, false},
+		{"en-GB-u-co-phonebk-rg-uszzzz", enGBIndex, false},
+		// Invalid region specifications are ignored.
+		{"en-GB-u-rg-usz-va-posix", enGBIndex, false},
+		{"en-GB-u-co-phonebk-rg-usz", enGBIndex, false},
+	}...)
+	for _, tt := range tests {
+		x, ok := LanguageID(mustParse(tt.tag))
+		if ID(x) != tt.index || ok != tt.ok {
+			t.Errorf("%s: got %d, %v; want %d %v", tt.tag, x, ok, tt.index, tt.ok)
+		}
+	}
+}
+
+func TestRegionalID(t *testing.T) {
+	tests := append(compactTests, []compactTest{
+		{"en-GB", enGBIndex, true},
+		{"en-GB-u-rg-uszzzz", enUSIndex, true},
+		{"en-GB-u-rg-USZZZZ", enUSIndex, true},
+		// TODO: use different exact values for language and regional tag?
+		{"en-GB-u-rg-uszzzz-va-posix", enUSuvaposixIndex, false},
+		{"en-GB-u-co-phonebk-rg-uszzzz-va-posix", enUSuvaposixIndex, false},
+		{"en-GB-u-co-phonebk-rg-uszzzz", enUSIndex, false},
+		// Invalid region specifications are ignored.
+		{"en-GB-u-rg-usz-va-posix", enGBIndex, false},
+		{"en-GB-u-co-phonebk-rg-usz", enGBIndex, false},
+	}...)
+	for _, tt := range tests {
+		x, ok := RegionalID(mustParse(tt.tag))
+		if ID(x) != tt.index || ok != tt.ok {
+			t.Errorf("%s: got %d, %v; want %d %v", tt.tag, x, ok, tt.index, tt.ok)
+		}
+	}
+}
+
+func TestParent(t *testing.T) {
+	tests := []struct{ in, out string }{
+		// Strip variants and extensions first
+		{"de-u-co-phonebk", "de"},
+		{"de-1994", "de"},
+		{"de-Latn-1994", "de"}, // remove superfluous script.
+
+		// Ensure the canonical Tag for an entry is in the chain for base-script
+		// pairs.
+		{"zh-Hans", "zh"},
+
+		// Skip the script if it is the maximized version. CLDR files for the
+		// skipped tag are always empty.
+		{"zh-Hans-TW", "zh"},
+		{"zh-Hans-CN", "zh"},
+
+		// Insert the script if the maximized script is not the same as the
+		// maximized script of the base language.
+		{"zh-TW", "zh-Hant"},
+		{"zh-HK", "zh-Hant"},
+		{"zh-Hant-TW", "zh-Hant"},
+		{"zh-Hant-HK", "zh-Hant"},
+
+		// Non-default script skips to und.
+		// CLDR
+		{"az-Cyrl", "und"},
+		{"bs-Cyrl", "und"},
+		{"en-Dsrt", "und"},
+		{"ha-Arab", "und"},
+		{"mn-Mong", "und"},
+		{"pa-Arab", "und"},
+		{"shi-Latn", "und"},
+		{"sr-Latn", "und"},
+		{"uz-Arab", "und"},
+		{"uz-Cyrl", "und"},
+		{"vai-Latn", "und"},
+		{"zh-Hant", "und"},
+		// extra
+		{"nl-Cyrl", "und"},
+
+		// World english inherits from en-001.
+		{"en-150", "en-001"},
+		{"en-AU", "en-001"},
+		{"en-BE", "en-001"},
+		{"en-GG", "en-001"},
+		{"en-GI", "en-001"},
+		{"en-HK", "en-001"},
+		{"en-IE", "en-001"},
+		{"en-IM", "en-001"},
+		{"en-IN", "en-001"},
+		{"en-JE", "en-001"},
+		{"en-MT", "en-001"},
+		{"en-NZ", "en-001"},
+		{"en-PK", "en-001"},
+		{"en-SG", "en-001"},
+
+		// Spanish in Latin-American countries have es-419 as parent.
+		{"es-AR", "es-419"},
+		{"es-BO", "es-419"},
+		{"es-CL", "es-419"},
+		{"es-CO", "es-419"},
+		{"es-CR", "es-419"},
+		{"es-CU", "es-419"},
+		{"es-DO", "es-419"},
+		{"es-EC", "es-419"},
+		{"es-GT", "es-419"},
+		{"es-HN", "es-419"},
+		{"es-MX", "es-419"},
+		{"es-NI", "es-419"},
+		{"es-PA", "es-419"},
+		{"es-PE", "es-419"},
+		{"es-PR", "es-419"},
+		{"es-PY", "es-419"},
+		{"es-SV", "es-419"},
+		{"es-US", "es-419"},
+		{"es-UY", "es-419"},
+		{"es-VE", "es-419"},
+		// exceptions (according to CLDR)
+		{"es-CW", "es"},
+
+		// Inherit from pt-PT, instead of pt for these countries.
+		{"pt-AO", "pt-PT"},
+		{"pt-CV", "pt-PT"},
+		{"pt-GW", "pt-PT"},
+		{"pt-MO", "pt-PT"},
+		{"pt-MZ", "pt-PT"},
+		{"pt-ST", "pt-PT"},
+		{"pt-TL", "pt-PT"},
+
+		{"en-GB-u-co-phonebk-rg-uszzzz", "en-GB"},
+		{"en-GB-u-rg-uszzzz", "en-GB"},
+		{"en-US-u-va-posix", "en-US"},
+
+		// Difference between language and regional tag.
+		{"ca-ES-valencia", "ca-ES"},
+		{"ca-ES-valencia-u-rg-ptzzzz", "ca-ES"}, // t.full != nil
+		{"en-US-u-va-variant", "en-US"},
+		{"en-u-va-variant", "en"}, // t.full != nil
+		{"en-u-rg-gbzzzz", "en"},
+		{"en-US-u-rg-gbzzzz", "en-US"},
+		{"nl-US-u-rg-gbzzzz", "nl-US"}, // t.full != nil
+	}
+	for _, tt := range tests {
+		tag := mustParse(tt.in)
+		if p := mustParse(tt.out); p != tag.Parent() {
+			t.Errorf("%s: was %v; want %v", tt.in, tag.Parent(), p)
+		}
+	}
+}
@@ -0,0 +1,120 @@
+// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
+
+package compact
+
+// parents maps a compact index of a tag to the compact index of the parent of
+// this tag.
+var parents = []ID{ // 775 elements
+	// Entry 0 - 3F
+	0x0000, 0x0000, 0x0001, 0x0001, 0x0000, 0x0004, 0x0000, 0x0006,
+	0x0000, 0x0008, 0x0000, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
+	0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
+	0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
+	0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x0000,
+	0x0000, 0x0028, 0x0000, 0x002a, 0x0000, 0x002c, 0x0000, 0x0000,
+	0x002f, 0x002e, 0x002e, 0x0000, 0x0033, 0x0000, 0x0035, 0x0000,
+	0x0037, 0x0000, 0x0039, 0x0000, 0x003b, 0x0000, 0x0000, 0x003e,
+	// Entry 40 - 7F
+	0x0000, 0x0040, 0x0040, 0x0000, 0x0043, 0x0043, 0x0000, 0x0046,
+	0x0000, 0x0048, 0x0000, 0x0000, 0x004b, 0x004a, 0x004a, 0x0000,
+	0x004f, 0x004f, 0x004f, 0x004f, 0x0000, 0x0054, 0x0054, 0x0000,
+	0x0057, 0x0000, 0x0059, 0x0000, 0x005b, 0x0000, 0x005d, 0x005d,
+	0x0000, 0x0060, 0x0000, 0x0062, 0x0000, 0x0064, 0x0000, 0x0066,
+	0x0066, 0x0000, 0x0069, 0x0000, 0x006b, 0x006b, 0x006b, 0x006b,
+	0x006b, 0x006b, 0x006b, 0x0000, 0x0073, 0x0000, 0x0075, 0x0000,
+	0x0077, 0x0000, 0x0000, 0x007a, 0x0000, 0x007c, 0x0000, 0x007e,
+	// Entry 80 - BF
+	0x0000, 0x0080, 0x0080, 0x0000, 0x0083, 0x0083, 0x0000, 0x0086,
+	0x0087, 0x0087, 0x0087, 0x0086, 0x0088, 0x0087, 0x0087, 0x0087,
+	0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0088,
+	0x0087, 0x0087, 0x0087, 0x0087, 0x0088, 0x0087, 0x0088, 0x0087,
+	0x0087, 0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
+	0x0087, 0x0087, 0x0087, 0x0086, 0x0087, 0x0087, 0x0087, 0x0087,
+	0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
+	0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0086, 0x0087, 0x0086,
+	// Entry C0 - FF
+	0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
+	0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
+	0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0088, 0x0087,
+	0x0087, 0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
+	0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0086, 0x0086, 0x0087,
+	0x0087, 0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0000,
+	0x00ef, 0x0000, 0x00f1, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2,
+	0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f1, 0x00f2, 0x00f1, 0x00f1,
+	// Entry 100 - 13F
+	0x00f2, 0x00f2, 0x00f1, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f1,
+	0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x0000, 0x010e,
+	0x0000, 0x0110, 0x0000, 0x0112, 0x0000, 0x0114, 0x0114, 0x0000,
+	0x0117, 0x0117, 0x0117, 0x0117, 0x0000, 0x011c, 0x0000, 0x011e,
+	0x0000, 0x0120, 0x0120, 0x0000, 0x0123, 0x0123, 0x0123, 0x0123,
+	0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
+	0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
+	0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
+	// Entry 140 - 17F
+	0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
+	0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
+	0x0123, 0x0123, 0x0000, 0x0152, 0x0000, 0x0154, 0x0000, 0x0156,
+	0x0000, 0x0158, 0x0000, 0x015a, 0x0000, 0x015c, 0x015c, 0x015c,
+	0x0000, 0x0160, 0x0000, 0x0000, 0x0163, 0x0000, 0x0165, 0x0000,
+	0x0167, 0x0167, 0x0167, 0x0000, 0x016b, 0x0000, 0x016d, 0x0000,
+	0x016f, 0x0000, 0x0171, 0x0171, 0x0000, 0x0174, 0x0000, 0x0176,
+	0x0000, 0x0178, 0x0000, 0x017a, 0x0000, 0x017c, 0x0000, 0x017e,
+	// Entry 180 - 1BF
+	0x0000, 0x0000, 0x0000, 0x0182, 0x0000, 0x0184, 0x0184, 0x0184,
+	0x0184, 0x0000, 0x0000, 0x0000, 0x018b, 0x0000, 0x0000, 0x018e,
+	0x0000, 0x0000, 0x0191, 0x0000, 0x0000, 0x0000, 0x0195, 0x0000,
+	0x0197, 0x0000, 0x0000, 0x019a, 0x0000, 0x0000, 0x019d, 0x0000,
+	0x019f, 0x0000, 0x01a1, 0x0000, 0x01a3, 0x0000, 0x01a5, 0x0000,
+	0x01a7, 0x0000, 0x01a9, 0x0000, 0x01ab, 0x0000, 0x01ad, 0x0000,
+	0x01af, 0x0000, 0x01b1, 0x01b1, 0x0000, 0x01b4, 0x0000, 0x01b6,
+	0x0000, 0x01b8, 0x0000, 0x01ba, 0x0000, 0x01bc, 0x0000, 0x0000,
+	// Entry 1C0 - 1FF
+	0x01bf, 0x0000, 0x01c1, 0x0000, 0x01c3, 0x0000, 0x01c5, 0x0000,
+	0x01c7, 0x0000, 0x01c9, 0x0000, 0x01cb, 0x01cb, 0x01cb, 0x01cb,
+	0x0000, 0x01d0, 0x0000, 0x01d2, 0x01d2, 0x0000, 0x01d5, 0x0000,
+	0x01d7, 0x0000, 0x01d9, 0x0000, 0x01db, 0x0000, 0x01dd, 0x0000,
+	0x01df, 0x01df, 0x0000, 0x01e2, 0x0000, 0x01e4, 0x0000, 0x01e6,
+	0x0000, 0x01e8, 0x0000, 0x01ea, 0x0000, 0x01ec, 0x0000, 0x01ee,
+	0x0000, 0x01f0, 0x0000, 0x0000, 0x01f3, 0x0000, 0x01f5, 0x01f5,
+	0x01f5, 0x0000, 0x01f9, 0x0000, 0x01fb, 0x0000, 0x01fd, 0x0000,
+	// Entry 200 - 23F
+	0x01ff, 0x0000, 0x0000, 0x0202, 0x0000, 0x0204, 0x0204, 0x0000,
+	0x0207, 0x0000, 0x0209, 0x0209, 0x0000, 0x020c, 0x020c, 0x0000,
+	0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x0000,
+	0x0217, 0x0000, 0x0219, 0x0000, 0x021b, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000, 0x0221, 0x0000, 0x0000, 0x0224, 0x0000, 0x0226,
+	0x0226, 0x0000, 0x0229, 0x0000, 0x022b, 0x022b, 0x0000, 0x0000,
+	0x022f, 0x022e, 0x022e, 0x0000, 0x0000, 0x0234, 0x0000, 0x0236,
+	0x0000, 0x0238, 0x0000, 0x0244, 0x023a, 0x0244, 0x0244, 0x0244,
+	// Entry 240 - 27F
+	0x0244, 0x0244, 0x0244, 0x0244, 0x023a, 0x0244, 0x0244, 0x0000,
+	0x0247, 0x0247, 0x0247, 0x0000, 0x024b, 0x0000, 0x024d, 0x0000,
+	0x024f, 0x024f, 0x0000, 0x0252, 0x0000, 0x0254, 0x0254, 0x0254,
+	0x0254, 0x0254, 0x0254, 0x0000, 0x025b, 0x0000, 0x025d, 0x0000,
+	0x025f, 0x0000, 0x0261, 0x0000, 0x0263, 0x0000, 0x0265, 0x0000,
+	0x0000, 0x0268, 0x0268, 0x0268, 0x0000, 0x026c, 0x0000, 0x026e,
+	0x0000, 0x0270, 0x0000, 0x0000, 0x0000, 0x0274, 0x0273, 0x0273,
+	0x0000, 0x0278, 0x0000, 0x027a, 0x0000, 0x027c, 0x0000, 0x0000,
+	// Entry 280 - 2BF
+	0x0000, 0x0000, 0x0281, 0x0000, 0x0000, 0x0284, 0x0000, 0x0286,
+	0x0286, 0x0286, 0x0286, 0x0000, 0x028b, 0x028b, 0x028b, 0x0000,
+	0x028f, 0x028f, 0x028f, 0x028f, 0x028f, 0x0000, 0x0295, 0x0295,
+	0x0295, 0x0295, 0x0000, 0x0000, 0x0000, 0x0000, 0x029d, 0x029d,
+	0x029d, 0x0000, 0x02a1, 0x02a1, 0x02a1, 0x02a1, 0x0000, 0x0000,
+	0x02a7, 0x02a7, 0x02a7, 0x02a7, 0x0000, 0x02ac, 0x0000, 0x02ae,
+	0x02ae, 0x0000, 0x02b1, 0x0000, 0x02b3, 0x0000, 0x02b5, 0x02b5,
+	0x0000, 0x0000, 0x02b9, 0x0000, 0x0000, 0x0000, 0x02bd, 0x0000,
+	// Entry 2C0 - 2FF
+	0x02bf, 0x02bf, 0x0000, 0x0000, 0x02c3, 0x0000, 0x02c5, 0x0000,
+	0x02c7, 0x0000, 0x02c9, 0x0000, 0x02cb, 0x0000, 0x02cd, 0x02cd,
+	0x0000, 0x0000, 0x02d1, 0x0000, 0x02d3, 0x02d0, 0x02d0, 0x0000,
+	0x0000, 0x02d8, 0x02d7, 0x02d7, 0x0000, 0x0000, 0x02dd, 0x0000,
+	0x02df, 0x0000, 0x02e1, 0x0000, 0x0000, 0x02e4, 0x0000, 0x02e6,
+	0x0000, 0x0000, 0x02e9, 0x0000, 0x02eb, 0x0000, 0x02ed, 0x0000,
+	0x02ef, 0x02ef, 0x0000, 0x0000, 0x02f3, 0x02f2, 0x02f2, 0x0000,
+	0x02f7, 0x0000, 0x02f9, 0x02f9, 0x02f9, 0x02f9, 0x02f9, 0x0000,
+	// Entry 300 - 33F
+	0x02ff, 0x0300, 0x02ff, 0x0000, 0x0303, 0x0051, 0x00e6,
+} // Size: 1574 bytes
+
+// Total table size 1574 bytes (1KiB); checksum: 895AAF0B
@@ -0,0 +1,201 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package compact
+
+import (
+	"strings"
+	"testing"
+
+	"golang.org/x/text/internal/language"
+)
+
+var errSyntax = language.ErrSyntax
+
+type parseTest struct {
+	i                    int // the index of this test
+	in                   string
+	lang, script, region string
+	variants, ext        string
+	extList              []string // only used when more than one extension is present
+	invalid              bool
+	rewrite              bool // special rewrite not handled by parseTag
+	changed              bool // string needed to be reformatted
+}
+
+func parseTests() []parseTest {
+	tests := []parseTest{
+		{in: "root", lang: "und"},
+		{in: "und", lang: "und"},
+		{in: "en", lang: "en"},
+
+		{in: "en-US-u-va-posix", lang: "en", region: "US", ext: "u-va-posix"},
+		{in: "ca-ES-valencia", lang: "ca", region: "ES", variants: "valencia"},
+		{in: "en-US-u-rg-gbzzzz", lang: "en", region: "US", ext: "u-rg-gbzzzz"},
+
+		{in: "xy", lang: "und", invalid: true},
+		{in: "en-ZY", lang: "en", invalid: true},
+		{in: "gsw", lang: "gsw"},
+		{in: "sr_Latn", lang: "sr", script: "Latn"},
+		{in: "af-Arab", lang: "af", script: "Arab"},
+		{in: "nl-BE", lang: "nl", region: "BE"},
+		{in: "es-419", lang: "es", region: "419"},
+		{in: "und-001", lang: "und", region: "001"},
+		{in: "de-latn-be", lang: "de", script: "Latn", region: "BE"},
+		// Variants
+		{in: "de-1901", lang: "de", variants: "1901"},
+		// Accept with unsuppressed script.
+		{in: "de-Latn-1901", lang: "de", script: "Latn", variants: "1901"},
+		// Specialized.
+		{in: "sl-rozaj", lang: "sl", variants: "rozaj"},
+		{in: "sl-rozaj-lipaw", lang: "sl", variants: "rozaj-lipaw"},
+		{in: "sl-rozaj-biske", lang: "sl", variants: "rozaj-biske"},
+		{in: "sl-rozaj-biske-1994", lang: "sl", variants: "rozaj-biske-1994"},
+		{in: "sl-rozaj-1994", lang: "sl", variants: "rozaj-1994"},
+		// Maximum number of variants while adhering to prefix rules.
+		{in: "sl-rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp"},
+
+		// Sorting.
+		{in: "sl-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true},
+		{in: "sl-rozaj-biske-1994-alalc97-fonupa-fonipa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", changed: true},
+		{in: "nl-fonxsamp-alalc97-fonipa-fonupa", lang: "nl", variants: "alalc97-fonipa-fonupa-fonxsamp", changed: true},
+
+		// Duplicates variants are removed, but not an error.
+		{in: "nl-fonupa-fonupa", lang: "nl", variants: "fonupa"},
+
+		// Variants that do not have correct prefixes. We still accept these.
+		{in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"},
+		{in: "sl-rozaj-lipaw-1994", lang: "sl", variants: "rozaj-lipaw-1994"},
+		{in: "sl-1994-biske-rozaj-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true},
+		{in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"},
+
+		// Invalid variant.
+		{in: "de-1902", lang: "de", variants: "", invalid: true},
+
+		{in: "EN_CYRL", lang: "en", script: "Cyrl"},
+		// private use and extensions
+		{in: "x-a-b-c-d", ext: "x-a-b-c-d"},
+		{in: "x_A.-B-C_D", ext: "x-b-c-d", invalid: true, changed: true},
+		{in: "x-aa-bbbb-cccccccc-d", ext: "x-aa-bbbb-cccccccc-d"},
+		{in: "en-c_cc-b-bbb-a-aaa", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc"}},
+		{in: "en-x_cc-b-bbb-a-aaa", lang: "en", ext: "x-cc-b-bbb-a-aaa", changed: true},
+		{in: "en-c_cc-b-bbb-a-aaa-x-x", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc", "x-x"}},
+		{in: "en-v-c", lang: "en", ext: "", invalid: true},
+		{in: "en-v-abcdefghi", lang: "en", ext: "", invalid: true},
+		{in: "en-v-abc-x", lang: "en", ext: "v-abc", invalid: true},
+		{in: "en-v-abc-x-", lang: "en", ext: "v-abc", invalid: true},
+		{in: "en-v-abc-w-x-xx", lang: "en", extList: []string{"v-abc", "x-xx"}, invalid: true, changed: true},
+		{in: "en-v-abc-w-y-yx", lang: "en", extList: []string{"v-abc", "y-yx"}, invalid: true, changed: true},
+		{in: "en-v-c-abc", lang: "en", ext: "c-abc", invalid: true, changed: true},
+		{in: "en-v-w-abc", lang: "en", ext: "w-abc", invalid: true, changed: true},
+		{in: "en-v-x-abc", lang: "en", ext: "x-abc", invalid: true, changed: true},
+		{in: "en-v-x-a", lang: "en", ext: "x-a", invalid: true, changed: true},
+		{in: "en-9-aa-0-aa-z-bb-x-a", lang: "en", extList: []string{"0-aa", "9-aa", "z-bb", "x-a"}, changed: true},
+		{in: "en-u-c", lang: "en", ext: "", invalid: true},
+		{in: "en-u-co-phonebk", lang: "en", ext: "u-co-phonebk"},
+		{in: "en-u-co-phonebk-ca", lang: "en", ext: "u-co-phonebk", invalid: true},
+		{in: "en-u-nu-arabic-co-phonebk-ca", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true},
+		{in: "en-u-nu-arabic-co-phonebk-ca-x", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true},
+		{in: "en-u-nu-arabic-co-phonebk-ca-s", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true},
+		{in: "en-u-nu-arabic-co-phonebk-ca-a12345678", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true},
+		{in: "en-u-co-phonebook", lang: "en", ext: "", invalid: true},
+		{in: "en-u-co-phonebook-cu-xau", lang: "en", ext: "u-cu-xau", invalid: true, changed: true},
+		{in: "en-Cyrl-u-co-phonebk", lang: "en", script: "Cyrl", ext: "u-co-phonebk"},
+		{in: "en-US-u-co-phonebk", lang: "en", region: "US", ext: "u-co-phonebk"},
+		{in: "en-US-u-co-phonebk-cu-xau", lang: "en", region: "US", ext: "u-co-phonebk-cu-xau"},
+		{in: "en-scotland-u-co-phonebk", lang: "en", variants: "scotland", ext: "u-co-phonebk"},
+		{in: "en-u-cu-xua-co-phonebk", lang: "en", ext: "u-co-phonebk-cu-xua", changed: true},
+		{in: "en-u-def-abc-cu-xua-co-phonebk", lang: "en", ext: "u-abc-def-co-phonebk-cu-xua", changed: true},
+		{in: "en-u-def-abc", lang: "en", ext: "u-abc-def", changed: true},
+		{in: "en-u-cu-xua-co-phonebk-a-cd", lang: "en", extList: []string{"a-cd", "u-co-phonebk-cu-xua"}, changed: true},
+		// Invalid "u" extension. Drop invalid parts.
+		{in: "en-u-cu-co-phonebk", lang: "en", extList: []string{"u-co-phonebk"}, invalid: true, changed: true},
+		{in: "en-u-cu-xau-co", lang: "en", extList: []string{"u-cu-xau"}, invalid: true},
+		// We allow duplicate keys as the LDML spec does not explicitly prohibit it.
+		// TODO: Consider eliminating duplicates and returning an error.
+		{in: "en-u-cu-xau-co-phonebk-cu-xau", lang: "en", ext: "u-co-phonebk-cu-xau", changed: true},
+		{in: "en-t-en-Cyrl-NL-fonipa", lang: "en", ext: "t-en-cyrl-nl-fonipa", changed: true},
+		{in: "en-t-en-Cyrl-NL-fonipa-t0-abc-def", lang: "en", ext: "t-en-cyrl-nl-fonipa-t0-abc-def", changed: true},
+		{in: "en-t-t0-abcd", lang: "en", ext: "t-t0-abcd"},
+		// Not necessary to have changed here.
+		{in: "en-t-nl-abcd", lang: "en", ext: "t-nl", invalid: true},
+		{in: "en-t-nl-latn", lang: "en", ext: "t-nl-latn"},
+		{in: "en-t-t0-abcd-x-a", lang: "en", extList: []string{"t-t0-abcd", "x-a"}},
+		{in: "en_t_pt_MLt", lang: "en", ext: "t-pt-mlt", changed: true},
+		{in: "en-t-fr-est", lang: "en", ext: "t-fr-est", changed: false},
+		{in: "fr-est", lang: "et", changed: true},
+		{in: "fr-est-t-fr-est", lang: "et", ext: "t-fr-est", changed: true},
+		{in: "fr-est-Cyrl", lang: "et", script: "Cyrl", changed: true},
+		// invalid
+		{in: "", lang: "und", invalid: true},
+		{in: "-", lang: "und", invalid: true},
+		{in: "x", lang: "und", invalid: true},
+		{in: "x-", lang: "und", invalid: true},
+		{in: "x--", lang: "und", invalid: true},
+		{in: "a-a-b-c-d", lang: "und", invalid: true},
+		{in: "en-", lang: "en", invalid: true},
+		{in: "enne-", lang: "und", invalid: true},
+		{in: "en.", lang: "und", invalid: true},
+		{in: "en.-latn", lang: "und", invalid: true},
+		{in: "en.-en", lang: "en", invalid: true},
+		{in: "x-a-tooManyChars-c-d", ext: "x-a-c-d", invalid: true, changed: true},
+		{in: "a-tooManyChars-c-d", lang: "und", invalid: true},
+		// TODO: check key-value validity
+		// { in: "en-u-cu-xd", lang: "en", ext: "u-cu-xd", invalid: true },
+		{in: "en-t-abcd", lang: "en", invalid: true},
+		{in: "en-Latn-US-en", lang: "en", script: "Latn", region: "US", invalid: true},
+		// rewrites (more tests in TestGrandfathered)
+		{in: "zh-min-nan", lang: "nan"},
+		{in: "zh-yue", lang: "yue"},
+		{in: "zh-xiang", lang: "hsn", rewrite: true},
+		{in: "zh-guoyu", lang: "cmn", rewrite: true},
+		{in: "iw", lang: "iw"},
+		{in: "sgn-BE-FR", lang: "sfb", rewrite: true},
+		{in: "i-klingon", lang: "tlh", rewrite: true},
+	}
+	for i, tt := range tests {
+		tests[i].i = i
+		if tt.extList != nil {
+			tests[i].ext = strings.Join(tt.extList, "-")
+		}
+		if tt.ext != "" && tt.extList == nil {
+			tests[i].extList = []string{tt.ext}
+		}
+	}
+	return tests
+}
+
+// partChecks runs checks for each part by calling the function returned by f.
+func partChecks(t *testing.T, f func(*parseTest) (Tag, bool)) {
+	for i, tt := range parseTests() {
+		tag, skip := f(&tt)
+		if skip {
+			continue
+		}
+		if l, _ := language.ParseBase(tt.lang); l != tag.Tag().LangID {
+			t.Errorf("%d: lang was %q; want %q", i, tag.Tag().LangID, l)
+		}
+		if sc, _ := language.ParseScript(tt.script); sc != tag.Tag().ScriptID {
+			t.Errorf("%d: script was %q; want %q", i, tag.Tag().ScriptID, sc)
+		}
+		if r, _ := language.ParseRegion(tt.region); r != tag.Tag().RegionID {
+			t.Errorf("%d: region was %q; want %q", i, tag.Tag().RegionID, r)
+		}
+		v := tag.Tag().Variants()
+		if v != "" {
+			v = v[1:]
+		}
+		if v != tt.variants {
+			t.Errorf("%d: variants was %q; want %q", i, v, tt.variants)
+		}
+		if e := strings.Join(tag.Tag().Extensions(), "-"); e != tt.ext {
+			t.Errorf("%d: extensions were %q; want %q", i, e, tt.ext)
+		}
+	}
+}
+
+func mk(s string) Tag {
+	tag, _ := language.Parse(s)
+	return Make(tag)
+}
@@ -0,0 +1,91 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package compact
+
+var (
+	und = Tag{}
+
+	Und Tag = Tag{}
+
+	Afrikaans            Tag = Tag{language: afIndex, locale: afIndex}
+	Amharic              Tag = Tag{language: amIndex, locale: amIndex}
+	Arabic               Tag = Tag{language: arIndex, locale: arIndex}
+	ModernStandardArabic Tag = Tag{language: ar001Index, locale: ar001Index}
+	Azerbaijani          Tag = Tag{language: azIndex, locale: azIndex}
+	Bulgarian            Tag = Tag{language: bgIndex, locale: bgIndex}
+	Bengali              Tag = Tag{language: bnIndex, locale: bnIndex}
+	Catalan              Tag = Tag{language: caIndex, locale: caIndex}
+	Czech                Tag = Tag{language: csIndex, locale: csIndex}
+	Danish               Tag = Tag{language: daIndex, locale: daIndex}
+	German               Tag = Tag{language: deIndex, locale: deIndex}
+	Greek                Tag = Tag{language: elIndex, locale: elIndex}
+	English              Tag = Tag{language: enIndex, locale: enIndex}
+	AmericanEnglish      Tag = Tag{language: enUSIndex, locale: enUSIndex}
+	BritishEnglish       Tag = Tag{language: enGBIndex, locale: enGBIndex}
+	Spanish              Tag = Tag{language: esIndex, locale: esIndex}
+	EuropeanSpanish      Tag = Tag{language: esESIndex, locale: esESIndex}
+	LatinAmericanSpanish Tag = Tag{language: es419Index, locale: es419Index}
+	Estonian             Tag = Tag{language: etIndex, locale: etIndex}
+	Persian              Tag = Tag{language: faIndex, locale: faIndex}
+	Finnish              Tag = Tag{language: fiIndex, locale: fiIndex}
+	Filipino             Tag = Tag{language: filIndex, locale: filIndex}
+	French               Tag = Tag{language: frIndex, locale: frIndex}
+	CanadianFrench       Tag = Tag{language: frCAIndex, locale: frCAIndex}
+	Gujarati             Tag = Tag{language: guIndex, locale: guIndex}
+	Hebrew               Tag = Tag{language: heIndex, locale: heIndex}
+	Hindi                Tag = Tag{language: hiIndex, locale: hiIndex}
+	Croatian             Tag = Tag{language: hrIndex, locale: hrIndex}
+	Hungarian            Tag = Tag{language: huIndex, locale: huIndex}
+	Armenian             Tag = Tag{language: hyIndex, locale: hyIndex}
+	Indonesian           Tag = Tag{language: idIndex, locale: idIndex}
+	Icelandic            Tag = Tag{language: isIndex, locale: isIndex}
+	Italian              Tag = Tag{language: itIndex, locale: itIndex}
+	Japanese             Tag = Tag{language: jaIndex, locale: jaIndex}
+	Georgian             Tag = Tag{language: kaIndex, locale: kaIndex}
+	Kazakh               Tag = Tag{language: kkIndex, locale: kkIndex}
+	Khmer                Tag = Tag{language: kmIndex, locale: kmIndex}
+	Kannada              Tag = Tag{language: knIndex, locale: knIndex}
+	Korean               Tag = Tag{language: koIndex, locale: koIndex}
+	Kirghiz              Tag = Tag{language: kyIndex, locale: kyIndex}
+	Lao                  Tag = Tag{language: loIndex, locale: loIndex}
+	Lithuanian           Tag = Tag{language: ltIndex, locale: ltIndex}
+	Latvian              Tag = Tag{language: lvIndex, locale: lvIndex}
+	Macedonian           Tag = Tag{language: mkIndex, locale: mkIndex}
+	Malayalam            Tag = Tag{language: mlIndex, locale: mlIndex}
+	Mongolian            Tag = Tag{language: mnIndex, locale: mnIndex}
+	Marathi              Tag = Tag{language: mrIndex, locale: mrIndex}
+	Malay                Tag = Tag{language: msIndex, locale: msIndex}
+	Burmese              Tag = Tag{language: myIndex, locale: myIndex}
+	Nepali               Tag = Tag{language: neIndex, locale: neIndex}
+	Dutch                Tag = Tag{language: nlIndex, locale: nlIndex}
+	Norwegian            Tag = Tag{language: noIndex, locale: noIndex}
+	Punjabi              Tag = Tag{language: paIndex, locale: paIndex}
+	Polish               Tag = Tag{language: plIndex, locale: plIndex}
+	Portuguese           Tag = Tag{language: ptIndex, locale: ptIndex}
+	BrazilianPortuguese  Tag = Tag{language: ptBRIndex, locale: ptBRIndex}
+	EuropeanPortuguese   Tag = Tag{language: ptPTIndex, locale: ptPTIndex}
+	Romanian             Tag = Tag{language: roIndex, locale: roIndex}
+	Russian              Tag = Tag{language: ruIndex, locale: ruIndex}
+	Sinhala              Tag = Tag{language: siIndex, locale: siIndex}
+	Slovak               Tag = Tag{language: skIndex, locale: skIndex}
+	Slovenian            Tag = Tag{language: slIndex, locale: slIndex}
+	Albanian             Tag = Tag{language: sqIndex, locale: sqIndex}
+	Serbian              Tag = Tag{language: srIndex, locale: srIndex}
+	SerbianLatin         Tag = Tag{language: srLatnIndex, locale: srLatnIndex}
+	Swedish              Tag = Tag{language: svIndex, locale: svIndex}
+	Swahili              Tag = Tag{language: swIndex, locale: swIndex}
+	Tamil                Tag = Tag{language: taIndex, locale: taIndex}
+	Telugu               Tag = Tag{language: teIndex, locale: teIndex}
+	Thai                 Tag = Tag{language: thIndex, locale: thIndex}
+	Turkish              Tag = Tag{language: trIndex, locale: trIndex}
+	Ukrainian            Tag = Tag{language: ukIndex, locale: ukIndex}
+	Urdu                 Tag = Tag{language: urIndex, locale: urIndex}
+	Uzbek                Tag = Tag{language: uzIndex, locale: uzIndex}
+	Vietnamese           Tag = Tag{language: viIndex, locale: viIndex}
+	Chinese              Tag = Tag{language: zhIndex, locale: zhIndex}
+	SimplifiedChinese    Tag = Tag{language: zhHansIndex, locale: zhHansIndex}
+	TraditionalChinese   Tag = Tag{language: zhHantIndex, locale: zhHantIndex}
+	Zulu                 Tag = Tag{language: zuIndex, locale: zuIndex}
+)
@@ -0,0 +1,167 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+import (
+	"sort"
+	"strings"
+)
+
+// A Builder allows constructing a Tag from individual components.
+// Its main user is Compose in the top-level language package.
+type Builder struct {
+	Tag Tag
+
+	private    string // the x extension
+	variants   []string
+	extensions []string
+}
+
+// Make returns a new Tag from the current settings.
+func (b *Builder) Make() Tag {
+	t := b.Tag
+
+	if len(b.extensions) > 0 || len(b.variants) > 0 {
+		sort.Sort(sortVariants(b.variants))
+		sort.Strings(b.extensions)
+
+		if b.private != "" {
+			b.extensions = append(b.extensions, b.private)
+		}
+		n := maxCoreSize + tokenLen(b.variants...) + tokenLen(b.extensions...)
+		buf := make([]byte, n)
+		p := t.genCoreBytes(buf)
+		t.pVariant = byte(p)
+		p += appendTokens(buf[p:], b.variants...)
+		t.pExt = uint16(p)
+		p += appendTokens(buf[p:], b.extensions...)
+		t.str = string(buf[:p])
+		// We may not always need to remake the string, but when or when not
+		// to do so is rather tricky.
+		scan := makeScanner(buf[:p])
+		t, _ = parse(&scan, "")
+		return t
+
+	} else if b.private != "" {
+		t.str = b.private
+		t.RemakeString()
+	}
+	return t
+}
+
+// SetTag copies all the settings from a given Tag. Any previously set values
+// are discarded.
+func (b *Builder) SetTag(t Tag) {
+	b.Tag.LangID = t.LangID
+	b.Tag.RegionID = t.RegionID
+	b.Tag.ScriptID = t.ScriptID
+	// TODO: optimize
+	b.variants = b.variants[:0]
+	if variants := t.Variants(); variants != "" {
+		for _, vr := range strings.Split(variants[1:], "-") {
+			b.variants = append(b.variants, vr)
+		}
+	}
+	b.extensions, b.private = b.extensions[:0], ""
+	for _, e := range t.Extensions() {
+		b.AddExt(e)
+	}
+}
+
+// AddExt adds extension e to the tag. e must be a valid extension as returned
+// by Tag.Extension. If the extension already exists, it will be discarded,
+// except for a -u extension, where non-existing key-type pairs will added.
+func (b *Builder) AddExt(e string) {
+	if e[0] == 'x' {
+		if b.private == "" {
+			b.private = e
+		}
+		return
+	}
+	for i, s := range b.extensions {
+		if s[0] == e[0] {
+			if e[0] == 'u' {
+				b.extensions[i] += e[1:]
+			}
+			return
+		}
+	}
+	b.extensions = append(b.extensions, e)
+}
+
+// SetExt sets the extension e to the tag. e must be a valid extension as
+// returned by Tag.Extension. If the extension already exists, it will be
+// overwritten, except for a -u extension, where the individual key-type pairs
+// will be set.
+func (b *Builder) SetExt(e string) {
+	if e[0] == 'x' {
+		b.private = e
+		return
+	}
+	for i, s := range b.extensions {
+		if s[0] == e[0] {
+			if e[0] == 'u' {
+				b.extensions[i] = e + s[1:]
+			} else {
+				b.extensions[i] = e
+			}
+			return
+		}
+	}
+	b.extensions = append(b.extensions, e)
+}
+
+// AddVariant adds any number of variants.
+func (b *Builder) AddVariant(v ...string) {
+	for _, v := range v {
+		if v != "" {
+			b.variants = append(b.variants, v)
+		}
+	}
+}
+
+// ClearVariants removes any variants previously added, including those
+// copied from a Tag in SetTag.
+func (b *Builder) ClearVariants() {
+	b.variants = b.variants[:0]
+}
+
+// ClearExtensions removes any extensions previously added, including those
+// copied from a Tag in SetTag.
+func (b *Builder) ClearExtensions() {
+	b.private = ""
+	b.extensions = b.extensions[:0]
+}
+
+func tokenLen(token ...string) (n int) {
+	for _, t := range token {
+		n += len(t) + 1
+	}
+	return
+}
+
+func appendTokens(b []byte, token ...string) int {
+	p := 0
+	for _, t := range token {
+		b[p] = '-'
+		copy(b[p+1:], t)
+		p += 1 + len(t)
+	}
+	return p
+}
+
+type sortVariants []string
+
+func (s sortVariants) Len() int {
+	return len(s)
+}
+
+func (s sortVariants) Swap(i, j int) {
+	s[j], s[i] = s[i], s[j]
+}
+
+func (s sortVariants) Less(i, j int) bool {
+	return variantIndex[s[i]] < variantIndex[s[j]]
+}
@@ -0,0 +1,67 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+import (
+	"strings"
+	"testing"
+)
+
+func parseBase(s string) Language {
+	if s == "" {
+		return 0
+	}
+	return MustParseBase(s)
+}
+
+func parseScript(s string) Script {
+	if s == "" {
+		return 0
+	}
+	return MustParseScript(s)
+}
+
+func parseRegion(s string) Region {
+	if s == "" {
+		return 0
+	}
+	return MustParseRegion(s)
+}
+
+func TestBuilder(t *testing.T) {
+	partChecks(t, func(t *testing.T, tt *parseTest) (id Tag, skip bool) {
+		tag := Make(tt.in)
+		b := Builder{}
+		b.SetTag(Tag{
+			LangID:   parseBase(tt.lang),
+			ScriptID: parseScript(tt.script),
+			RegionID: parseRegion(tt.region),
+		})
+		if tt.variants != "" {
+			b.AddVariant(strings.Split(tt.variants, "-")...)
+		}
+		for _, e := range tag.Extensions() {
+			b.AddExt(e)
+		}
+		got := b.Make()
+		if got != tag {
+			t.Errorf("%s: got %v; want %v", tt.in, got, tag)
+		}
+		return got, false
+	})
+}
+
+func TestSetTag(t *testing.T) {
+	partChecks(t, func(t *testing.T, tt *parseTest) (id Tag, skip bool) {
+		tag := Make(tt.in)
+		b := Builder{}
+		b.SetTag(tag)
+		got := b.Make()
+		if got != tag {
+			t.Errorf("%s: got %v; want %v", tt.in, got, tag)
+		}
+		return got, false
+	})
+}
@@ -0,0 +1,28 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+// BaseLanguages returns the list of all supported base languages. It generates
+// the list by traversing the internal structures.
+func BaseLanguages() []Language {
+	base := make([]Language, 0, NumLanguages)
+	for i := 0; i < langNoIndexOffset; i++ {
+		// We included "und" already for the value 0.
+		if i != nonCanonicalUnd {
+			base = append(base, Language(i))
+		}
+	}
+	i := langNoIndexOffset
+	for _, v := range langNoIndex {
+		for k := 0; k < 8; k++ {
+			if v&1 == 1 {
+				base = append(base, Language(i))
+			}
+			v >>= 1
+			i++
+		}
+	}
+	return base
+}
@@ -0,0 +1,20 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ignore
+
+package main
+
+// This file contains code common to the maketables.go and the package code.
+
+// AliasType is the type of an alias in AliasMap.
+type AliasType int8
+
+const (
+	Deprecated AliasType = iota
+	Macro
+	Legacy
+
+	AliasTypeUnknown AliasType = -1
+)
@@ -0,0 +1,627 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:generate go run gen.go gen_common.go -output tables.go
+
+package language // import "golang.org/x/text/internal/language"
+
+// TODO: Remove above NOTE after:
+// - verifying that tables are dropped correctly (most notably matcher tables).
+
+import (
+	"errors"
+	"fmt"
+	"strings"
+)
+
+const (
+	// maxCoreSize is the maximum size of a BCP 47 tag without variants and
+	// extensions. Equals max lang (3) + script (4) + max reg (3) + 2 dashes.
+	maxCoreSize = 12
+
+	// max99thPercentileSize is a somewhat arbitrary buffer size that presumably
+	// is large enough to hold at least 99% of the BCP 47 tags.
+	max99thPercentileSize = 32
+
+	// maxSimpleUExtensionSize is the maximum size of a -u extension with one
+	// key-type pair. Equals len("-u-") + key (2) + dash + max value (8).
+	maxSimpleUExtensionSize = 14
+)
+
+// Tag represents a BCP 47 language tag. It is used to specify an instance of a
+// specific language or locale. All language tag values are guaranteed to be
+// well-formed. The zero value of Tag is Und.
+type Tag struct {
+	// TODO: the following fields have the form TagTypeID. This name is chosen
+	// to allow refactoring the public package without conflicting with its
+	// Base, Script, and Region methods. Once the transition is fully completed
+	// the ID can be stripped from the name.
+
+	LangID   Language
+	RegionID Region
+	// TODO: we will soon run out of positions for ScriptID. Idea: instead of
+	// storing lang, region, and ScriptID codes, store only the compact index and
+	// have a lookup table from this code to its expansion. This greatly speeds
+	// up table lookup, speed up common variant cases.
+	// This will also immediately free up 3 extra bytes. Also, the pVariant
+	// field can now be moved to the lookup table, as the compact index uniquely
+	// determines the offset of a possible variant.
+	ScriptID Script
+	pVariant byte   // offset in str, includes preceding '-'
+	pExt     uint16 // offset of first extension, includes preceding '-'
+
+	// str is the string representation of the Tag. It will only be used if the
+	// tag has variants or extensions.
+	str string
+}
+
+// Make is a convenience wrapper for Parse that omits the error.
+// In case of an error, a sensible default is returned.
+func Make(s string) Tag {
+	t, _ := Parse(s)
+	return t
+}
+
+// Raw returns the raw base language, script and region, without making an
+// attempt to infer their values.
+// TODO: consider removing
+func (t Tag) Raw() (b Language, s Script, r Region) {
+	return t.LangID, t.ScriptID, t.RegionID
+}
+
+// equalTags compares language, script and region subtags only.
+func (t Tag) equalTags(a Tag) bool {
+	return t.LangID == a.LangID && t.ScriptID == a.ScriptID && t.RegionID == a.RegionID
+}
+
+// IsRoot returns true if t is equal to language "und".
+func (t Tag) IsRoot() bool {
+	if int(t.pVariant) < len(t.str) {
+		return false
+	}
+	return t.equalTags(Und)
+}
+
+// IsPrivateUse reports whether the Tag consists solely of an IsPrivateUse use
+// tag.
+func (t Tag) IsPrivateUse() bool {
+	return t.str != "" && t.pVariant == 0
+}
+
+// RemakeString is used to update t.str in case lang, script or region changed.
+// It is assumed that pExt and pVariant still point to the start of the
+// respective parts.
+func (t *Tag) RemakeString() {
+	if t.str == "" {
+		return
+	}
+	extra := t.str[t.pVariant:]
+	if t.pVariant > 0 {
+		extra = extra[1:]
+	}
+	if t.equalTags(Und) && strings.HasPrefix(extra, "x-") {
+		t.str = extra
+		t.pVariant = 0
+		t.pExt = 0
+		return
+	}
+	var buf [max99thPercentileSize]byte // avoid extra memory allocation in most cases.
+	b := buf[:t.genCoreBytes(buf[:])]
+	if extra != "" {
+		diff := len(b) - int(t.pVariant)
+		b = append(b, '-')
+		b = append(b, extra...)
+		t.pVariant = uint8(int(t.pVariant) + diff)
+		t.pExt = uint16(int(t.pExt) + diff)
+	} else {
+		t.pVariant = uint8(len(b))
+		t.pExt = uint16(len(b))
+	}
+	t.str = string(b)
+}
+
+// genCoreBytes writes a string for the base languages, script and region tags
+// to the given buffer and returns the number of bytes written. It will never
+// write more than maxCoreSize bytes.
+func (t *Tag) genCoreBytes(buf []byte) int {
+	n := t.LangID.StringToBuf(buf[:])
+	if t.ScriptID != 0 {
+		n += copy(buf[n:], "-")
+		n += copy(buf[n:], t.ScriptID.String())
+	}
+	if t.RegionID != 0 {
+		n += copy(buf[n:], "-")
+		n += copy(buf[n:], t.RegionID.String())
+	}
+	return n
+}
+
+// String returns the canonical string representation of the language tag.
+func (t Tag) String() string {
+	if t.str != "" {
+		return t.str
+	}
+	if t.ScriptID == 0 && t.RegionID == 0 {
+		return t.LangID.String()
+	}
+	buf := [maxCoreSize]byte{}
+	return string(buf[:t.genCoreBytes(buf[:])])
+}
+
+// MarshalText implements encoding.TextMarshaler.
+func (t Tag) MarshalText() (text []byte, err error) {
+	if t.str != "" {
+		text = append(text, t.str...)
+	} else if t.ScriptID == 0 && t.RegionID == 0 {
+		text = append(text, t.LangID.String()...)
+	} else {
+		buf := [maxCoreSize]byte{}
+		text = buf[:t.genCoreBytes(buf[:])]
+	}
+	return text, nil
+}
+
+// UnmarshalText implements encoding.TextUnmarshaler.
+func (t *Tag) UnmarshalText(text []byte) error {
+	tag, err := Parse(string(text))
+	*t = tag
+	return err
+}
+
+// Variants returns the part of the tag holding all variants or the empty string
+// if there are no variants defined.
+func (t Tag) Variants() string {
+	if t.pVariant == 0 {
+		return ""
+	}
+	return t.str[t.pVariant:t.pExt]
+}
+
+// VariantOrPrivateUseTags returns variants or private use tags.
+func (t Tag) VariantOrPrivateUseTags() string {
+	if t.pExt > 0 {
+		return t.str[t.pVariant:t.pExt]
+	}
+	return t.str[t.pVariant:]
+}
+
+// HasString reports whether this tag defines more than just the raw
+// components.
+func (t Tag) HasString() bool {
+	return t.str != ""
+}
+
+// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
+// specific language are substituted with fields from the parent language.
+// The parent for a language may change for newer versions of CLDR.
+func (t Tag) Parent() Tag {
+	if t.str != "" {
+		// Strip the variants and extensions.
+		b, s, r := t.Raw()
+		t = Tag{LangID: b, ScriptID: s, RegionID: r}
+		if t.RegionID == 0 && t.ScriptID != 0 && t.LangID != 0 {
+			base, _ := addTags(Tag{LangID: t.LangID})
+			if base.ScriptID == t.ScriptID {
+				return Tag{LangID: t.LangID}
+			}
+		}
+		return t
+	}
+	if t.LangID != 0 {
+		if t.RegionID != 0 {
+			maxScript := t.ScriptID
+			if maxScript == 0 {
+				max, _ := addTags(t)
+				maxScript = max.ScriptID
+			}
+
+			for i := range parents {
+				if Language(parents[i].lang) == t.LangID && Script(parents[i].maxScript) == maxScript {
+					for _, r := range parents[i].fromRegion {
+						if Region(r) == t.RegionID {
+							return Tag{
+								LangID:   t.LangID,
+								ScriptID: Script(parents[i].script),
+								RegionID: Region(parents[i].toRegion),
+							}
+						}
+					}
+				}
+			}
+
+			// Strip the script if it is the default one.
+			base, _ := addTags(Tag{LangID: t.LangID})
+			if base.ScriptID != maxScript {
+				return Tag{LangID: t.LangID, ScriptID: maxScript}
+			}
+			return Tag{LangID: t.LangID}
+		} else if t.ScriptID != 0 {
+			// The parent for an base-script pair with a non-default script is
+			// "und" instead of the base language.
+			base, _ := addTags(Tag{LangID: t.LangID})
+			if base.ScriptID != t.ScriptID {
+				return Und
+			}
+			return Tag{LangID: t.LangID}
+		}
+	}
+	return Und
+}
+
+// ParseExtension parses s as an extension and returns it on success.
+func ParseExtension(s string) (ext string, err error) {
+	defer func() {
+		if recover() != nil {
+			ext = ""
+			err = ErrSyntax
+		}
+	}()
+
+	scan := makeScannerString(s)
+	var end int
+	if n := len(scan.token); n != 1 {
+		return "", ErrSyntax
+	}
+	scan.toLower(0, len(scan.b))
+	end = parseExtension(&scan)
+	if end != len(s) {
+		return "", ErrSyntax
+	}
+	return string(scan.b), nil
+}
+
+// HasVariants reports whether t has variants.
+func (t Tag) HasVariants() bool {
+	return uint16(t.pVariant) < t.pExt
+}
+
+// HasExtensions reports whether t has extensions.
+func (t Tag) HasExtensions() bool {
+	return int(t.pExt) < len(t.str)
+}
+
+// Extension returns the extension of type x for tag t. It will return
+// false for ok if t does not have the requested extension. The returned
+// extension will be invalid in this case.
+func (t Tag) Extension(x byte) (ext string, ok bool) {
+	for i := int(t.pExt); i < len(t.str)-1; {
+		var ext string
+		i, ext = getExtension(t.str, i)
+		if ext[0] == x {
+			return ext, true
+		}
+	}
+	return "", false
+}
+
+// Extensions returns all extensions of t.
+func (t Tag) Extensions() []string {
+	e := []string{}
+	for i := int(t.pExt); i < len(t.str)-1; {
+		var ext string
+		i, ext = getExtension(t.str, i)
+		e = append(e, ext)
+	}
+	return e
+}
+
+// TypeForKey returns the type associated with the given key, where key and type
+// are of the allowed values defined for the Unicode locale extension ('u') in
+// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
+// TypeForKey will traverse the inheritance chain to get the correct value.
+//
+// If there are multiple types associated with a key, only the first will be
+// returned. If there is no type associated with a key, it returns the empty
+// string.
+func (t Tag) TypeForKey(key string) string {
+	if _, start, end, _ := t.findTypeForKey(key); end != start {
+		s := t.str[start:end]
+		if p := strings.IndexByte(s, '-'); p >= 0 {
+			s = s[:p]
+		}
+		return s
+	}
+	return ""
+}
+
+var (
+	errPrivateUse       = errors.New("cannot set a key on a private use tag")
+	errInvalidArguments = errors.New("invalid key or type")
+)
+
+// SetTypeForKey returns a new Tag with the key set to type, where key and type
+// are of the allowed values defined for the Unicode locale extension ('u') in
+// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
+// An empty value removes an existing pair with the same key.
+func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
+	if t.IsPrivateUse() {
+		return t, errPrivateUse
+	}
+	if len(key) != 2 {
+		return t, errInvalidArguments
+	}
+
+	// Remove the setting if value is "".
+	if value == "" {
+		start, sep, end, _ := t.findTypeForKey(key)
+		if start != sep {
+			// Remove a possible empty extension.
+			switch {
+			case t.str[start-2] != '-': // has previous elements.
+			case end == len(t.str), // end of string
+				end+2 < len(t.str) && t.str[end+2] == '-': // end of extension
+				start -= 2
+			}
+			if start == int(t.pVariant) && end == len(t.str) {
+				t.str = ""
+				t.pVariant, t.pExt = 0, 0
+			} else {
+				t.str = fmt.Sprintf("%s%s", t.str[:start], t.str[end:])
+			}
+		}
+		return t, nil
+	}
+
+	if len(value) < 3 || len(value) > 8 {
+		return t, errInvalidArguments
+	}
+
+	var (
+		buf    [maxCoreSize + maxSimpleUExtensionSize]byte
+		uStart int // start of the -u extension.
+	)
+
+	// Generate the tag string if needed.
+	if t.str == "" {
+		uStart = t.genCoreBytes(buf[:])
+		buf[uStart] = '-'
+		uStart++
+	}
+
+	// Create new key-type pair and parse it to verify.
+	b := buf[uStart:]
+	copy(b, "u-")
+	copy(b[2:], key)
+	b[4] = '-'
+	b = b[:5+copy(b[5:], value)]
+	scan := makeScanner(b)
+	if parseExtensions(&scan); scan.err != nil {
+		return t, scan.err
+	}
+
+	// Assemble the replacement string.
+	if t.str == "" {
+		t.pVariant, t.pExt = byte(uStart-1), uint16(uStart-1)
+		t.str = string(buf[:uStart+len(b)])
+	} else {
+		s := t.str
+		start, sep, end, hasExt := t.findTypeForKey(key)
+		if start == sep {
+			if hasExt {
+				b = b[2:]
+			}
+			t.str = fmt.Sprintf("%s-%s%s", s[:sep], b, s[end:])
+		} else {
+			t.str = fmt.Sprintf("%s-%s%s", s[:start+3], value, s[end:])
+		}
+	}
+	return t, nil
+}
+
+// findTypeForKey returns the start and end position for the type corresponding
+// to key or the point at which to insert the key-value pair if the type
+// wasn't found. The hasExt return value reports whether an -u extension was present.
+// Note: the extensions are typically very small and are likely to contain
+// only one key-type pair.
+func (t Tag) findTypeForKey(key string) (start, sep, end int, hasExt bool) {
+	p := int(t.pExt)
+	if len(key) != 2 || p == len(t.str) || p == 0 {
+		return p, p, p, false
+	}
+	s := t.str
+
+	// Find the correct extension.
+	for p++; s[p] != 'u'; p++ {
+		if s[p] > 'u' {
+			p--
+			return p, p, p, false
+		}
+		if p = nextExtension(s, p); p == len(s) {
+			return len(s), len(s), len(s), false
+		}
+	}
+	// Proceed to the hyphen following the extension name.
+	p++
+
+	// curKey is the key currently being processed.
+	curKey := ""
+
+	// Iterate over keys until we get the end of a section.
+	for {
+		end = p
+		for p++; p < len(s) && s[p] != '-'; p++ {
+		}
+		n := p - end - 1
+		if n <= 2 && curKey == key {
+			if sep < end {
+				sep++
+			}
+			return start, sep, end, true
+		}
+		switch n {
+		case 0, // invalid string
+			1: // next extension
+			return end, end, end, true
+		case 2:
+			// next key
+			curKey = s[end+1 : p]
+			if curKey > key {
+				return end, end, end, true
+			}
+			start = end
+			sep = p
+		}
+	}
+}
+
+// ParseBase parses a 2- or 3-letter ISO 639 code.
+// It returns a ValueError if s is a well-formed but unknown language identifier
+// or another error if another error occurred.
+func ParseBase(s string) (l Language, err error) {
+	defer func() {
+		if recover() != nil {
+			l = 0
+			err = ErrSyntax
+		}
+	}()
+
+	if n := len(s); n < 2 || 3 < n {
+		return 0, ErrSyntax
+	}
+	var buf [3]byte
+	return getLangID(buf[:copy(buf[:], s)])
+}
+
+// ParseScript parses a 4-letter ISO 15924 code.
+// It returns a ValueError if s is a well-formed but unknown script identifier
+// or another error if another error occurred.
+func ParseScript(s string) (scr Script, err error) {
+	defer func() {
+		if recover() != nil {
+			scr = 0
+			err = ErrSyntax
+		}
+	}()
+
+	if len(s) != 4 {
+		return 0, ErrSyntax
+	}
+	var buf [4]byte
+	return getScriptID(script, buf[:copy(buf[:], s)])
+}
+
+// EncodeM49 returns the Region for the given UN M.49 code.
+// It returns an error if r is not a valid code.
+func EncodeM49(r int) (Region, error) {
+	return getRegionM49(r)
+}
+
+// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
+// It returns a ValueError if s is a well-formed but unknown region identifier
+// or another error if another error occurred.
+func ParseRegion(s string) (r Region, err error) {
+	defer func() {
+		if recover() != nil {
+			r = 0
+			err = ErrSyntax
+		}
+	}()
+
+	if n := len(s); n < 2 || 3 < n {
+		return 0, ErrSyntax
+	}
+	var buf [3]byte
+	return getRegionID(buf[:copy(buf[:], s)])
+}
+
+// IsCountry returns whether this region is a country or autonomous area. This
+// includes non-standard definitions from CLDR.
+func (r Region) IsCountry() bool {
+	if r == 0 || r.IsGroup() || r.IsPrivateUse() && r != _XK {
+		return false
+	}
+	return true
+}
+
+// IsGroup returns whether this region defines a collection of regions. This
+// includes non-standard definitions from CLDR.
+func (r Region) IsGroup() bool {
+	if r == 0 {
+		return false
+	}
+	return int(regionInclusion[r]) < len(regionContainment)
+}
+
+// Contains returns whether Region c is contained by Region r. It returns true
+// if c == r.
+func (r Region) Contains(c Region) bool {
+	if r == c {
+		return true
+	}
+	g := regionInclusion[r]
+	if g >= nRegionGroups {
+		return false
+	}
+	m := regionContainment[g]
+
+	d := regionInclusion[c]
+	b := regionInclusionBits[d]
+
+	// A contained country may belong to multiple disjoint groups. Matching any
+	// of these indicates containment. If the contained region is a group, it
+	// must strictly be a subset.
+	if d >= nRegionGroups {
+		return b&m != 0
+	}
+	return b&^m == 0
+}
+
+var errNoTLD = errors.New("language: region is not a valid ccTLD")
+
+// TLD returns the country code top-level domain (ccTLD). UK is returned for GB.
+// In all other cases it returns either the region itself or an error.
+//
+// This method may return an error for a region for which there exists a
+// canonical form with a ccTLD. To get that ccTLD canonicalize r first. The
+// region will already be canonicalized it was obtained from a Tag that was
+// obtained using any of the default methods.
+func (r Region) TLD() (Region, error) {
+	// See http://en.wikipedia.org/wiki/Country_code_top-level_domain for the
+	// difference between ISO 3166-1 and IANA ccTLD.
+	if r == _GB {
+		r = _UK
+	}
+	if (r.typ() & ccTLD) == 0 {
+		return 0, errNoTLD
+	}
+	return r, nil
+}
+
+// Canonicalize returns the region or a possible replacement if the region is
+// deprecated. It will not return a replacement for deprecated regions that
+// are split into multiple regions.
+func (r Region) Canonicalize() Region {
+	if cr := normRegion(r); cr != 0 {
+		return cr
+	}
+	return r
+}
+
+// Variant represents a registered variant of a language as defined by BCP 47.
+type Variant struct {
+	ID  uint8
+	str string
+}
+
+// ParseVariant parses and returns a Variant. An error is returned if s is not
+// a valid variant.
+func ParseVariant(s string) (v Variant, err error) {
+	defer func() {
+		if recover() != nil {
+			v = Variant{}
+			err = ErrSyntax
+		}
+	}()
+
+	s = strings.ToLower(s)
+	if id, ok := variantIndex[s]; ok {
+		return Variant{id, s}, nil
+	}
+	return Variant{}, NewValueError([]byte(s))
+}
+
+// String returns the string representation of the variant.
+func (v Variant) String() string {
+	return v.str
+}
@@ -0,0 +1,746 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+import (
+	"reflect"
+	"testing"
+
+	"golang.org/x/text/internal/testtext"
+)
+
+func TestTagSize(t *testing.T) {
+	id := Tag{}
+	typ := reflect.TypeOf(id)
+	if typ.Size() > 32 {
+		t.Errorf("size of Tag was %d; want <= 32", typ.Size())
+	}
+}
+
+func TestIsRoot(t *testing.T) {
+	loc := Tag{}
+	if !loc.IsRoot() {
+		t.Errorf("unspecified should be root.")
+	}
+	for i, tt := range parseTests() {
+		loc, _ := Parse(tt.in)
+		undef := tt.lang == "und" && tt.script == "" && tt.region == "" && tt.ext == ""
+		if loc.IsRoot() != undef {
+			t.Errorf("%d: was %v; want %v", i, loc.IsRoot(), undef)
+		}
+	}
+}
+
+func TestEquality(t *testing.T) {
+	for i, tt := range parseTests() {
+		s := tt.in
+		tag := Make(s)
+		t1 := Make(tag.String())
+		if tag != t1 {
+			t.Errorf("%d:%s: equality test 1 failed\n got: %#v\nwant: %#v)", i, s, t1, tag)
+		}
+	}
+}
+
+func TestMakeString(t *testing.T) {
+	tests := []struct{ in, out string }{
+		{"und", "und"},
+		{"und", "und-CW"},
+		{"nl", "nl-NL"},
+		{"de-1901", "nl-1901"},
+		{"de-1901", "de-Arab-1901"},
+		{"x-a-b", "de-Arab-x-a-b"},
+		{"x-a-b", "x-a-b"},
+	}
+	for i, tt := range tests {
+		id, _ := Parse(tt.in)
+		mod, _ := Parse(tt.out)
+		id.setTagsFrom(mod)
+		for j := 0; j < 2; j++ {
+			id.RemakeString()
+			if str := id.String(); str != tt.out {
+				t.Errorf("%d:%d: found %s; want %s", i, j, id.String(), tt.out)
+			}
+		}
+		// The bytes to string conversion as used in remakeString
+		// occasionally measures as more than one alloc, breaking this test.
+		// To alleviate this we set the number of runs to more than 1.
+		if n := testtext.AllocsPerRun(8, id.RemakeString); n > 1 {
+			t.Errorf("%d: # allocs got %.1f; want <= 1", i, n)
+		}
+	}
+}
+
+func TestMarshal(t *testing.T) {
+	testCases := []string{
+		// TODO: these values will change with each CLDR update. This issue
+		// will be solved if we decide to fix the indexes.
+		"und",
+		"ca-ES-valencia",
+		"ca-ES-valencia-u-va-posix",
+		"ca-ES-valencia-u-co-phonebk",
+		"ca-ES-valencia-u-co-phonebk-va-posix",
+		"x-klingon",
+		"en-US",
+		"en-US-u-va-posix",
+		"en",
+		"en-u-co-phonebk",
+		"en-001",
+		"sh",
+	}
+	for _, tc := range testCases {
+		var tag Tag
+		err := tag.UnmarshalText([]byte(tc))
+		if err != nil {
+			t.Errorf("UnmarshalText(%q): unexpected error: %v", tc, err)
+		}
+		b, err := tag.MarshalText()
+		if err != nil {
+			t.Errorf("MarshalText(%q): unexpected error: %v", tc, err)
+		}
+		if got := string(b); got != tc {
+			t.Errorf("%s: got %q; want %q", tc, got, tc)
+		}
+	}
+}
+
+func TestParseBase(t *testing.T) {
+	tests := []struct {
+		in  string
+		out string
+		ok  bool
+	}{
+		{"en", "en", true},
+		{"EN", "en", true},
+		{"nld", "nl", true},
+		{"dut", "dut", true},  // bibliographic
+		{"aaj", "und", false}, // unknown
+		{"qaa", "qaa", true},
+		{"a", "und", false},
+		{"", "und", false},
+		{"aaaa", "und", false},
+	}
+	for i, tt := range tests {
+		x, err := ParseBase(tt.in)
+		if x.String() != tt.out || err == nil != tt.ok {
+			t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
+		}
+		if y, _, _ := Make(tt.out).Raw(); x != y {
+			t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x, y)
+		}
+	}
+}
+
+func TestParseScript(t *testing.T) {
+	tests := []struct {
+		in  string
+		out string
+		ok  bool
+	}{
+		{"Latn", "Latn", true},
+		{"zzzz", "Zzzz", true},
+		{"zyyy", "Zyyy", true},
+		{"Latm", "Zzzz", false},
+		{"Zzz", "Zzzz", false},
+		{"", "Zzzz", false},
+		{"Zzzxx", "Zzzz", false},
+	}
+	for i, tt := range tests {
+		x, err := ParseScript(tt.in)
+		if x.String() != tt.out || err == nil != tt.ok {
+			t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
+		}
+		if err == nil {
+			if _, y, _ := Make("und-" + tt.out).Raw(); x != y {
+				t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x, y)
+			}
+		}
+	}
+}
+
+func TestEncodeM49(t *testing.T) {
+	tests := []struct {
+		m49  int
+		code string
+		ok   bool
+	}{
+		{1, "001", true},
+		{840, "US", true},
+		{899, "ZZ", false},
+	}
+	for i, tt := range tests {
+		if r, err := EncodeM49(tt.m49); r.String() != tt.code || err == nil != tt.ok {
+			t.Errorf("%d:%d: was %s, %v; want %s, %v", i, tt.m49, r, err == nil, tt.code, tt.ok)
+		}
+	}
+	for i := 1; i <= 1000; i++ {
+		if r, err := EncodeM49(i); err == nil && r.M49() == 0 {
+			t.Errorf("%d has no error, but maps to undefined region", i)
+		}
+	}
+}
+
+func TestParseRegion(t *testing.T) {
+	tests := []struct {
+		in  string
+		out string
+		ok  bool
+	}{
+		{"001", "001", true},
+		{"840", "US", true},
+		{"899", "ZZ", false},
+		{"USA", "US", true},
+		{"US", "US", true},
+		{"BC", "ZZ", false},
+		{"C", "ZZ", false},
+		{"CCCC", "ZZ", false},
+		{"01", "ZZ", false},
+	}
+	for i, tt := range tests {
+		r, err := ParseRegion(tt.in)
+		if r.String() != tt.out || err == nil != tt.ok {
+			t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, r, err == nil, tt.out, tt.ok)
+		}
+		if err == nil {
+			if _, _, y := Make("und-" + tt.out).Raw(); r != y {
+				t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, r, y)
+			}
+		}
+	}
+}
+
+func TestIsCountry(t *testing.T) {
+	tests := []struct {
+		reg     string
+		country bool
+	}{
+		{"US", true},
+		{"001", false},
+		{"958", false},
+		{"419", false},
+		{"203", true},
+		{"020", true},
+		{"900", false},
+		{"999", false},
+		{"QO", false},
+		{"EU", false},
+		{"AA", false},
+		{"XK", true},
+	}
+	for i, tt := range tests {
+		r, _ := getRegionID([]byte(tt.reg))
+		if r.IsCountry() != tt.country {
+			t.Errorf("%d: IsCountry(%s) was %v; want %v", i, tt.reg, r.IsCountry(), tt.country)
+		}
+	}
+}
+
+func TestIsGroup(t *testing.T) {
+	tests := []struct {
+		reg   string
+		group bool
+	}{
+		{"US", false},
+		{"001", true},
+		{"958", false},
+		{"419", true},
+		{"203", false},
+		{"020", false},
+		{"900", false},
+		{"999", false},
+		{"QO", true},
+		{"EU", true},
+		{"AA", false},
+		{"XK", false},
+	}
+	for i, tt := range tests {
+		r, _ := getRegionID([]byte(tt.reg))
+		if r.IsGroup() != tt.group {
+			t.Errorf("%d: IsGroup(%s) was %v; want %v", i, tt.reg, r.IsGroup(), tt.group)
+		}
+	}
+}
+
+func TestContains(t *testing.T) {
+	tests := []struct {
+		enclosing, contained string
+		contains             bool
+	}{
+		// A region contains itself.
+		{"US", "US", true},
+		{"001", "001", true},
+
+		// Direct containment.
+		{"001", "002", true},
+		{"039", "XK", true},
+		{"150", "XK", true},
+		{"EU", "AT", true},
+		{"QO", "AQ", true},
+
+		// Indirect containemnt.
+		{"001", "US", true},
+		{"001", "419", true},
+		{"001", "013", true},
+
+		// No containment.
+		{"US", "001", false},
+		{"155", "EU", false},
+	}
+	for i, tt := range tests {
+		enc, _ := getRegionID([]byte(tt.enclosing))
+		con, _ := getRegionID([]byte(tt.contained))
+		r := enc
+		if got := r.Contains(con); got != tt.contains {
+			t.Errorf("%d: %s.Contains(%s) was %v; want %v", i, tt.enclosing, tt.contained, got, tt.contains)
+		}
+	}
+}
+
+func TestRegionCanonicalize(t *testing.T) {
+	for i, tt := range []struct{ in, out string }{
+		{"UK", "GB"},
+		{"TP", "TL"},
+		{"QU", "EU"},
+		{"SU", "SU"},
+		{"VD", "VN"},
+		{"DD", "DE"},
+	} {
+		r := MustParseRegion(tt.in)
+		want := MustParseRegion(tt.out)
+		if got := r.Canonicalize(); got != want {
+			t.Errorf("%d: got %v; want %v", i, got, want)
+		}
+	}
+}
+
+func TestRegionTLD(t *testing.T) {
+	for _, tt := range []struct {
+		in, out string
+		ok      bool
+	}{
+		{"EH", "EH", true},
+		{"FR", "FR", true},
+		{"TL", "TL", true},
+
+		// In ccTLD before in ISO.
+		{"GG", "GG", true},
+
+		// Non-standard assignment of ccTLD to ISO code.
+		{"GB", "UK", true},
+
+		// Exceptionally reserved in ISO and valid ccTLD.
+		{"UK", "UK", true},
+		{"AC", "AC", true},
+		{"EU", "EU", true},
+		{"SU", "SU", true},
+
+		// Exceptionally reserved in ISO and invalid ccTLD.
+		{"CP", "ZZ", false},
+		{"DG", "ZZ", false},
+		{"EA", "ZZ", false},
+		{"FX", "ZZ", false},
+		{"IC", "ZZ", false},
+		{"TA", "ZZ", false},
+
+		// Transitionally reserved in ISO (e.g. deprecated) but valid ccTLD as
+		// it is still being phased out.
+		{"AN", "AN", true},
+		{"TP", "TP", true},
+
+		// Transitionally reserved in ISO (e.g. deprecated) and invalid ccTLD.
+		// Defined in package language as it has a mapping in CLDR.
+		{"BU", "ZZ", false},
+		{"CS", "ZZ", false},
+		{"NT", "ZZ", false},
+		{"YU", "ZZ", false},
+		{"ZR", "ZZ", false},
+		// Not defined in package: SF.
+
+		// Indeterminately reserved in ISO.
+		// Defined in package language as it has a legacy mapping in CLDR.
+		{"DY", "ZZ", false},
+		{"RH", "ZZ", false},
+		{"VD", "ZZ", false},
+		// Not defined in package: EW, FL, JA, LF, PI, RA, RB, RC, RI, RL, RM,
+		// RN, RP, WG, WL, WV, and YV.
+
+		// Not assigned in ISO, but legacy definitions in CLDR.
+		{"DD", "ZZ", false},
+		{"YD", "ZZ", false},
+
+		// Normal mappings but somewhat special status in ccTLD.
+		{"BL", "BL", true},
+		{"MF", "MF", true},
+		{"BV", "BV", true},
+		{"SJ", "SJ", true},
+
+		// Have values when normalized, but not as is.
+		{"QU", "ZZ", false},
+
+		// ISO Private Use.
+		{"AA", "ZZ", false},
+		{"QM", "ZZ", false},
+		{"QO", "ZZ", false},
+		{"XA", "ZZ", false},
+		{"XK", "ZZ", false}, // Sometimes used for Kosovo, but invalid ccTLD.
+	} {
+		if tt.in == "" {
+			continue
+		}
+
+		r := MustParseRegion(tt.in)
+		var want Region
+		if tt.out != "ZZ" {
+			want = MustParseRegion(tt.out)
+		}
+		tld, err := r.TLD()
+		if got := err == nil; got != tt.ok {
+			t.Errorf("error(%v): got %v; want %v", r, got, tt.ok)
+		}
+		if tld != want {
+			t.Errorf("TLD(%v): got %v; want %v", r, tld, want)
+		}
+	}
+}
+
+func TestTypeForKey(t *testing.T) {
+	tests := []struct{ key, in, out string }{
+		{"co", "en", ""},
+		{"co", "en-u-abc", ""},
+		{"co", "en-u-co-phonebk", "phonebk"},
+		{"co", "en-u-co-phonebk-cu-aud", "phonebk"},
+		{"co", "x-foo-u-co-phonebk", ""},
+		{"nu", "en-u-co-phonebk-nu-arabic", "arabic"},
+		{"kc", "cmn-u-co-stroke", ""},
+	}
+	for _, tt := range tests {
+		if v := Make(tt.in).TypeForKey(tt.key); v != tt.out {
+			t.Errorf("%q[%q]: was %q; want %q", tt.in, tt.key, v, tt.out)
+		}
+	}
+}
+
+func TestSetTypeForKey(t *testing.T) {
+	tests := []struct {
+		key, value, in, out string
+		err                 bool
+	}{
+		// replace existing value
+		{"co", "pinyin", "en-u-co-phonebk", "en-u-co-pinyin", false},
+		{"co", "pinyin", "en-u-co-phonebk-cu-xau", "en-u-co-pinyin-cu-xau", false},
+		{"co", "pinyin", "en-u-co-phonebk-v-xx", "en-u-co-pinyin-v-xx", false},
+		{"co", "pinyin", "en-u-co-phonebk-x-x", "en-u-co-pinyin-x-x", false},
+		{"co", "pinyin", "en-u-co-x-x", "en-u-co-pinyin-x-x", false},
+		{"nu", "arabic", "en-u-co-phonebk-nu-vaai", "en-u-co-phonebk-nu-arabic", false},
+		{"nu", "arabic", "en-u-co-phonebk-nu", "en-u-co-phonebk-nu-arabic", false},
+		// add to existing -u extension
+		{"co", "pinyin", "en-u-ca-gregory", "en-u-ca-gregory-co-pinyin", false},
+		{"co", "pinyin", "en-u-ca-gregory-nu-vaai", "en-u-ca-gregory-co-pinyin-nu-vaai", false},
+		{"co", "pinyin", "en-u-ca-gregory-v-va", "en-u-ca-gregory-co-pinyin-v-va", false},
+		{"co", "pinyin", "en-u-ca-gregory-x-a", "en-u-ca-gregory-co-pinyin-x-a", false},
+		{"ca", "gregory", "en-u-co-pinyin", "en-u-ca-gregory-co-pinyin", false},
+		// remove pair
+		{"co", "", "en-u-co-phonebk", "en", false},
+		{"co", "", "en-u-co", "en", false},
+		{"co", "", "en-u-co-v", "en", false},
+		{"co", "", "en-u-co-v-", "en", false},
+		{"co", "", "en-u-ca-gregory-co-phonebk", "en-u-ca-gregory", false},
+		{"co", "", "en-u-co-phonebk-nu-arabic", "en-u-nu-arabic", false},
+		{"co", "", "en-u-co-nu-arabic", "en-u-nu-arabic", false},
+		{"co", "", "en", "en", false},
+		// add -u extension
+		{"co", "pinyin", "en", "en-u-co-pinyin", false},
+		{"co", "pinyin", "und", "und-u-co-pinyin", false},
+		{"co", "pinyin", "en-a-aaa", "en-a-aaa-u-co-pinyin", false},
+		{"co", "pinyin", "en-x-aaa", "en-u-co-pinyin-x-aaa", false},
+		{"co", "pinyin", "en-v-aa", "en-u-co-pinyin-v-aa", false},
+		{"co", "pinyin", "en-a-aaa-x-x", "en-a-aaa-u-co-pinyin-x-x", false},
+		{"co", "pinyin", "en-a-aaa-v-va", "en-a-aaa-u-co-pinyin-v-va", false},
+		// error on invalid values
+		{"co", "pinyinxxx", "en", "en", true},
+		{"co", "piny.n", "en", "en", true},
+		{"co", "pinyinxxx", "en-a-aaa", "en-a-aaa", true},
+		{"co", "pinyinxxx", "en-u-aaa", "en-u-aaa", true},
+		{"co", "pinyinxxx", "en-u-aaa-co-pinyin", "en-u-aaa-co-pinyin", true},
+		{"co", "pinyi.", "en-u-aaa-co-pinyin", "en-u-aaa-co-pinyin", true},
+		{"col", "pinyin", "en", "en", true},
+		{"co", "cu", "en", "en", true},
+		// error when setting on a private use tag
+		{"co", "phonebook", "x-foo", "x-foo", true},
+	}
+	for i, tt := range tests {
+		tag := Make(tt.in)
+		if v, err := tag.SetTypeForKey(tt.key, tt.value); v.String() != tt.out {
+			t.Errorf("%d:%q[%q]=%q: was %q; want %q", i, tt.in, tt.key, tt.value, v, tt.out)
+		} else if (err != nil) != tt.err {
+			t.Errorf("%d:%q[%q]=%q: error was %v; want %v", i, tt.in, tt.key, tt.value, err != nil, tt.err)
+		} else if val := v.TypeForKey(tt.key); err == nil && val != tt.value {
+			t.Errorf("%d:%q[%q]==%q: was %v; want %v", i, tt.out, tt.key, tt.value, val, tt.value)
+		}
+		if len(tag.String()) <= 3 {
+			// Simulate a tag for which the string has not been set.
+			tag.str, tag.pExt, tag.pVariant = "", 0, 0
+			if tag, err := tag.SetTypeForKey(tt.key, tt.value); err == nil {
+				if val := tag.TypeForKey(tt.key); err == nil && val != tt.value {
+					t.Errorf("%d:%q[%q]==%q: was %v; want %v", i, tt.out, tt.key, tt.value, val, tt.value)
+				}
+			}
+		}
+	}
+}
+
+func TestFindKeyAndType(t *testing.T) {
+	// out is either the matched type in case of a match or the original
+	// string up till the insertion point.
+	tests := []struct {
+		key     string
+		hasExt  bool
+		in, out string
+	}{
+		// Don't search past a private use extension.
+		{"co", false, "en-x-foo-u-co-pinyin", "en"},
+		{"co", false, "x-foo-u-co-pinyin", ""},
+		{"co", false, "en-s-fff-x-foo", "en-s-fff"},
+		// Insertion points in absence of -u extension.
+		{"cu", false, "en", ""}, // t.str is ""
+		{"cu", false, "en-v-va", "en"},
+		{"cu", false, "en-a-va", "en-a-va"},
+		{"cu", false, "en-a-va-v-va", "en-a-va"},
+		{"cu", false, "en-x-a", "en"},
+		// Tags with the -u extension.
+		{"nu", true, "en-u-cu-nu", "en-u-cu"},
+		{"cu", true, "en-u-cu-nu", "en-u"},
+		{"co", true, "en-u-co-standard", "standard"},
+		{"co", true, "yue-u-co-pinyin", "pinyin"},
+		{"co", true, "en-u-co-abc", "abc"},
+		{"co", true, "en-u-co-abc-def", "abc-def"},
+		{"co", true, "en-u-co-abc-def-x-foo", "abc-def"},
+		{"co", true, "en-u-co-standard-nu-arab", "standard"},
+		{"co", true, "yue-u-co-pinyin-nu-arab", "pinyin"},
+		// Insertion points.
+		{"cu", true, "en-u-co-standard", "en-u-co-standard"},
+		{"cu", true, "yue-u-co-pinyin-x-foo", "yue-u-co-pinyin"},
+		{"cu", true, "en-u-co-abc", "en-u-co-abc"},
+		{"cu", true, "en-u-nu-arabic", "en-u"},
+		{"cu", true, "en-u-co-abc-def-nu-arabic", "en-u-co-abc-def"},
+	}
+	for i, tt := range tests {
+		start, sep, end, hasExt := Make(tt.in).findTypeForKey(tt.key)
+		if sep != end {
+			res := tt.in[sep:end]
+			if res != tt.out {
+				t.Errorf("%d:%s: was %q; want %q", i, tt.in, res, tt.out)
+			}
+		} else {
+			if hasExt != tt.hasExt {
+				t.Errorf("%d:%s: hasExt was %v; want %v", i, tt.in, hasExt, tt.hasExt)
+				continue
+			}
+			if tt.in[:start] != tt.out {
+				t.Errorf("%d:%s: insertion point was %q; want %q", i, tt.in, tt.in[:start], tt.out)
+			}
+		}
+	}
+}
+
+func TestParent(t *testing.T) {
+	tests := []struct{ in, out string }{
+		// Strip variants and extensions first
+		{"de-u-co-phonebk", "de"},
+		{"de-1994", "de"},
+		{"de-Latn-1994", "de"}, // remove superfluous script.
+
+		// Ensure the canonical Tag for an entry is in the chain for base-script
+		// pairs.
+		{"zh-Hans", "zh"},
+
+		// Skip the script if it is the maximized version. CLDR files for the
+		// skipped tag are always empty.
+		{"zh-Hans-TW", "zh"},
+		{"zh-Hans-CN", "zh"},
+
+		// Insert the script if the maximized script is not the same as the
+		// maximized script of the base language.
+		{"zh-TW", "zh-Hant"},
+		{"zh-HK", "zh-Hant"},
+		{"zh-Hant-TW", "zh-Hant"},
+		{"zh-Hant-HK", "zh-Hant"},
+
+		// Non-default script skips to und.
+		// CLDR
+		{"az-Cyrl", "und"},
+		{"bs-Cyrl", "und"},
+		{"en-Dsrt", "und"},
+		{"ha-Arab", "und"},
+		{"mn-Mong", "und"},
+		{"pa-Arab", "und"},
+		{"shi-Latn", "und"},
+		{"sr-Latn", "und"},
+		{"uz-Arab", "und"},
+		{"uz-Cyrl", "und"},
+		{"vai-Latn", "und"},
+		{"zh-Hant", "und"},
+		// extra
+		{"nl-Cyrl", "und"},
+
+		// World english inherits from en-001.
+		{"en-150", "en-001"},
+		{"en-AU", "en-001"},
+		{"en-BE", "en-001"},
+		{"en-GG", "en-001"},
+		{"en-GI", "en-001"},
+		{"en-HK", "en-001"},
+		{"en-IE", "en-001"},
+		{"en-IM", "en-001"},
+		{"en-IN", "en-001"},
+		{"en-JE", "en-001"},
+		{"en-MT", "en-001"},
+		{"en-NZ", "en-001"},
+		{"en-PK", "en-001"},
+		{"en-SG", "en-001"},
+
+		// Spanish in Latin-American countries have es-419 as parent.
+		{"es-AR", "es-419"},
+		{"es-BO", "es-419"},
+		{"es-CL", "es-419"},
+		{"es-CO", "es-419"},
+		{"es-CR", "es-419"},
+		{"es-CU", "es-419"},
+		{"es-DO", "es-419"},
+		{"es-EC", "es-419"},
+		{"es-GT", "es-419"},
+		{"es-HN", "es-419"},
+		{"es-MX", "es-419"},
+		{"es-NI", "es-419"},
+		{"es-PA", "es-419"},
+		{"es-PE", "es-419"},
+		{"es-PR", "es-419"},
+		{"es-PY", "es-419"},
+		{"es-SV", "es-419"},
+		{"es-US", "es-419"},
+		{"es-UY", "es-419"},
+		{"es-VE", "es-419"},
+		// exceptions (according to CLDR)
+		{"es-CW", "es"},
+
+		// Inherit from pt-PT, instead of pt for these countries.
+		{"pt-AO", "pt-PT"},
+		{"pt-CV", "pt-PT"},
+		{"pt-GW", "pt-PT"},
+		{"pt-MO", "pt-PT"},
+		{"pt-MZ", "pt-PT"},
+		{"pt-ST", "pt-PT"},
+		{"pt-TL", "pt-PT"},
+	}
+	for _, tt := range tests {
+		tag := MustParse(tt.in)
+		if p := MustParse(tt.out); p != tag.Parent() {
+			t.Errorf("%s: was %v; want %v", tt.in, tag.Parent(), p)
+		}
+	}
+}
+
+var (
+	// Tags without error that don't need to be changed.
+	benchBasic = []string{
+		"en",
+		"en-Latn",
+		"en-GB",
+		"za",
+		"zh-Hant",
+		"zh",
+		"zh-HK",
+		"ar-MK",
+		"en-CA",
+		"fr-CA",
+		"fr-CH",
+		"fr",
+		"lv",
+		"he-IT",
+		"tlh",
+		"ja",
+		"ja-Jpan",
+		"ja-Jpan-JP",
+		"de-1996",
+		"de-CH",
+		"sr",
+		"sr-Latn",
+	}
+	// Tags with extensions, not changes required.
+	benchExt = []string{
+		"x-a-b-c-d",
+		"x-aa-bbbb-cccccccc-d",
+		"en-x_cc-b-bbb-a-aaa",
+		"en-c_cc-b-bbb-a-aaa-x-x",
+		"en-u-co-phonebk",
+		"en-Cyrl-u-co-phonebk",
+		"en-US-u-co-phonebk-cu-xau",
+		"en-nedix-u-co-phonebk",
+		"en-t-t0-abcd",
+		"en-t-nl-latn",
+		"en-t-t0-abcd-x-a",
+		"en_t_pt_MLt",
+		"en-t-fr-est",
+	}
+	// Change, but not memory allocation required.
+	benchSimpleChange = []string{
+		"EN",
+		"i-klingon",
+		"en-latn",
+		"zh-cmn-Hans-CN",
+		"iw-NL",
+	}
+	// Change and memory allocation required.
+	benchChangeAlloc = []string{
+		"en-c_cc-b-bbb-a-aaa",
+		"en-u-cu-xua-co-phonebk",
+		"en-u-cu-xua-co-phonebk-a-cd",
+		"en-u-def-abc-cu-xua-co-phonebk",
+		"en-t-en-Cyrl-NL-1994",
+		"en-t-en-Cyrl-NL-1994-t0-abc-def",
+	}
+	// Tags that result in errors.
+	benchErr = []string{
+		// IllFormed
+		"x_A.-B-C_D",
+		"en-u-cu-co-phonebk",
+		"en-u-cu-xau-co",
+		"en-t-nl-abcd",
+		// Invalid
+		"xx",
+		"nl-Uuuu",
+		"nl-QB",
+	}
+	benchChange = append(benchSimpleChange, benchChangeAlloc...)
+	benchAll    = append(append(append(benchBasic, benchExt...), benchChange...), benchErr...)
+)
+
+func doParse(b *testing.B, tag []string) {
+	for i := 0; i < b.N; i++ {
+		// Use the modulo instead of looping over all tags so that we get a somewhat
+		// meaningful ns/op.
+		Parse(tag[i%len(tag)])
+	}
+}
+
+func BenchmarkParse(b *testing.B) {
+	doParse(b, benchAll)
+}
+
+func BenchmarkParseBasic(b *testing.B) {
+	doParse(b, benchBasic)
+}
+
+func BenchmarkParseError(b *testing.B) {
+	doParse(b, benchErr)
+}
+
+func BenchmarkParseSimpleChange(b *testing.B) {
+	doParse(b, benchSimpleChange)
+}
+
+func BenchmarkParseChangeAlloc(b *testing.B) {
+	doParse(b, benchChangeAlloc)
+}
@@ -0,0 +1,412 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+import (
+	"bytes"
+	"fmt"
+	"sort"
+	"strconv"
+
+	"golang.org/x/text/internal/tag"
+)
+
+// findIndex tries to find the given tag in idx and returns a standardized error
+// if it could not be found.
+func findIndex(idx tag.Index, key []byte, form string) (index int, err error) {
+	if !tag.FixCase(form, key) {
+		return 0, ErrSyntax
+	}
+	i := idx.Index(key)
+	if i == -1 {
+		return 0, NewValueError(key)
+	}
+	return i, nil
+}
+
+func searchUint(imap []uint16, key uint16) int {
+	return sort.Search(len(imap), func(i int) bool {
+		return imap[i] >= key
+	})
+}
+
+type Language uint16
+
+// getLangID returns the langID of s if s is a canonical subtag
+// or langUnknown if s is not a canonical subtag.
+func getLangID(s []byte) (Language, error) {
+	if len(s) == 2 {
+		return getLangISO2(s)
+	}
+	return getLangISO3(s)
+}
+
+// TODO language normalization as well as the AliasMaps could be moved to the
+// higher level package, but it is a bit tricky to separate the generation.
+
+func (id Language) Canonicalize() (Language, AliasType) {
+	return normLang(id)
+}
+
+// normLang returns the mapped langID of id according to mapping m.
+func normLang(id Language) (Language, AliasType) {
+	k := sort.Search(len(AliasMap), func(i int) bool {
+		return AliasMap[i].From >= uint16(id)
+	})
+	if k < len(AliasMap) && AliasMap[k].From == uint16(id) {
+		return Language(AliasMap[k].To), AliasTypes[k]
+	}
+	return id, AliasTypeUnknown
+}
+
+// getLangISO2 returns the langID for the given 2-letter ISO language code
+// or unknownLang if this does not exist.
+func getLangISO2(s []byte) (Language, error) {
+	if !tag.FixCase("zz", s) {
+		return 0, ErrSyntax
+	}
+	if i := lang.Index(s); i != -1 && lang.Elem(i)[3] != 0 {
+		return Language(i), nil
+	}
+	return 0, NewValueError(s)
+}
+
+const base = 'z' - 'a' + 1
+
+func strToInt(s []byte) uint {
+	v := uint(0)
+	for i := 0; i < len(s); i++ {
+		v *= base
+		v += uint(s[i] - 'a')
+	}
+	return v
+}
+
+// converts the given integer to the original ASCII string passed to strToInt.
+// len(s) must match the number of characters obtained.
+func intToStr(v uint, s []byte) {
+	for i := len(s) - 1; i >= 0; i-- {
+		s[i] = byte(v%base) + 'a'
+		v /= base
+	}
+}
+
+// getLangISO3 returns the langID for the given 3-letter ISO language code
+// or unknownLang if this does not exist.
+func getLangISO3(s []byte) (Language, error) {
+	if tag.FixCase("und", s) {
+		// first try to match canonical 3-letter entries
+		for i := lang.Index(s[:2]); i != -1; i = lang.Next(s[:2], i) {
+			if e := lang.Elem(i); e[3] == 0 && e[2] == s[2] {
+				// We treat "und" as special and always translate it to "unspecified".
+				// Note that ZZ and Zzzz are private use and are not treated as
+				// unspecified by default.
+				id := Language(i)
+				if id == nonCanonicalUnd {
+					return 0, nil
+				}
+				return id, nil
+			}
+		}
+		if i := altLangISO3.Index(s); i != -1 {
+			return Language(altLangIndex[altLangISO3.Elem(i)[3]]), nil
+		}
+		n := strToInt(s)
+		if langNoIndex[n/8]&(1<<(n%8)) != 0 {
+			return Language(n) + langNoIndexOffset, nil
+		}
+		// Check for non-canonical uses of ISO3.
+		for i := lang.Index(s[:1]); i != -1; i = lang.Next(s[:1], i) {
+			if e := lang.Elem(i); e[2] == s[1] && e[3] == s[2] {
+				return Language(i), nil
+			}
+		}
+		return 0, NewValueError(s)
+	}
+	return 0, ErrSyntax
+}
+
+// StringToBuf writes the string to b and returns the number of bytes
+// written.  cap(b) must be >= 3.
+func (id Language) StringToBuf(b []byte) int {
+	if id >= langNoIndexOffset {
+		intToStr(uint(id)-langNoIndexOffset, b[:3])
+		return 3
+	} else if id == 0 {
+		return copy(b, "und")
+	}
+	l := lang[id<<2:]
+	if l[3] == 0 {
+		return copy(b, l[:3])
+	}
+	return copy(b, l[:2])
+}
+
+// String returns the BCP 47 representation of the langID.
+// Use b as variable name, instead of id, to ensure the variable
+// used is consistent with that of Base in which this type is embedded.
+func (b Language) String() string {
+	if b == 0 {
+		return "und"
+	} else if b >= langNoIndexOffset {
+		b -= langNoIndexOffset
+		buf := [3]byte{}
+		intToStr(uint(b), buf[:])
+		return string(buf[:])
+	}
+	l := lang.Elem(int(b))
+	if l[3] == 0 {
+		return l[:3]
+	}
+	return l[:2]
+}
+
+// ISO3 returns the ISO 639-3 language code.
+func (b Language) ISO3() string {
+	if b == 0 || b >= langNoIndexOffset {
+		return b.String()
+	}
+	l := lang.Elem(int(b))
+	if l[3] == 0 {
+		return l[:3]
+	} else if l[2] == 0 {
+		return altLangISO3.Elem(int(l[3]))[:3]
+	}
+	// This allocation will only happen for 3-letter ISO codes
+	// that are non-canonical BCP 47 language identifiers.
+	return l[0:1] + l[2:4]
+}
+
+// IsPrivateUse reports whether this language code is reserved for private use.
+func (b Language) IsPrivateUse() bool {
+	return langPrivateStart <= b && b <= langPrivateEnd
+}
+
+// SuppressScript returns the script marked as SuppressScript in the IANA
+// language tag repository, or 0 if there is no such script.
+func (b Language) SuppressScript() Script {
+	if b < langNoIndexOffset {
+		return Script(suppressScript[b])
+	}
+	return 0
+}
+
+type Region uint16
+
+// getRegionID returns the region id for s if s is a valid 2-letter region code
+// or unknownRegion.
+func getRegionID(s []byte) (Region, error) {
+	if len(s) == 3 {
+		if isAlpha(s[0]) {
+			return getRegionISO3(s)
+		}
+		if i, err := strconv.ParseUint(string(s), 10, 10); err == nil {
+			return getRegionM49(int(i))
+		}
+	}
+	return getRegionISO2(s)
+}
+
+// getRegionISO2 returns the regionID for the given 2-letter ISO country code
+// or unknownRegion if this does not exist.
+func getRegionISO2(s []byte) (Region, error) {
+	i, err := findIndex(regionISO, s, "ZZ")
+	if err != nil {
+		return 0, err
+	}
+	return Region(i) + isoRegionOffset, nil
+}
+
+// getRegionISO3 returns the regionID for the given 3-letter ISO country code
+// or unknownRegion if this does not exist.
+func getRegionISO3(s []byte) (Region, error) {
+	if tag.FixCase("ZZZ", s) {
+		for i := regionISO.Index(s[:1]); i != -1; i = regionISO.Next(s[:1], i) {
+			if e := regionISO.Elem(i); e[2] == s[1] && e[3] == s[2] {
+				return Region(i) + isoRegionOffset, nil
+			}
+		}
+		for i := 0; i < len(altRegionISO3); i += 3 {
+			if tag.Compare(altRegionISO3[i:i+3], s) == 0 {
+				return Region(altRegionIDs[i/3]), nil
+			}
+		}
+		return 0, NewValueError(s)
+	}
+	return 0, ErrSyntax
+}
+
+func getRegionM49(n int) (Region, error) {
+	if 0 < n && n <= 999 {
+		const (
+			searchBits = 7
+			regionBits = 9
+			regionMask = 1<<regionBits - 1
+		)
+		idx := n >> searchBits
+		buf := fromM49[m49Index[idx]:m49Index[idx+1]]
+		val := uint16(n) << regionBits // we rely on bits shifting out
+		i := sort.Search(len(buf), func(i int) bool {
+			return buf[i] >= val
+		})
+		if r := fromM49[int(m49Index[idx])+i]; r&^regionMask == val {
+			return Region(r & regionMask), nil
+		}
+	}
+	var e ValueError
+	fmt.Fprint(bytes.NewBuffer([]byte(e.v[:])), n)
+	return 0, e
+}
+
+// normRegion returns a region if r is deprecated or 0 otherwise.
+// TODO: consider supporting BYS (-> BLR), CSK (-> 200 or CZ), PHI (-> PHL) and AFI (-> DJ).
+// TODO: consider mapping split up regions to new most populous one (like CLDR).
+func normRegion(r Region) Region {
+	m := regionOldMap
+	k := sort.Search(len(m), func(i int) bool {
+		return m[i].From >= uint16(r)
+	})
+	if k < len(m) && m[k].From == uint16(r) {
+		return Region(m[k].To)
+	}
+	return 0
+}
+
+const (
+	iso3166UserAssigned = 1 << iota
+	ccTLD
+	bcp47Region
+)
+
+func (r Region) typ() byte {
+	return regionTypes[r]
+}
+
+// String returns the BCP 47 representation for the region.
+// It returns "ZZ" for an unspecified region.
+func (r Region) String() string {
+	if r < isoRegionOffset {
+		if r == 0 {
+			return "ZZ"
+		}
+		return fmt.Sprintf("%03d", r.M49())
+	}
+	r -= isoRegionOffset
+	return regionISO.Elem(int(r))[:2]
+}
+
+// ISO3 returns the 3-letter ISO code of r.
+// Note that not all regions have a 3-letter ISO code.
+// In such cases this method returns "ZZZ".
+func (r Region) ISO3() string {
+	if r < isoRegionOffset {
+		return "ZZZ"
+	}
+	r -= isoRegionOffset
+	reg := regionISO.Elem(int(r))
+	switch reg[2] {
+	case 0:
+		return altRegionISO3[reg[3]:][:3]
+	case ' ':
+		return "ZZZ"
+	}
+	return reg[0:1] + reg[2:4]
+}
+
+// M49 returns the UN M.49 encoding of r, or 0 if this encoding
+// is not defined for r.
+func (r Region) M49() int {
+	return int(m49[r])
+}
+
+// IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This
+// may include private-use tags that are assigned by CLDR and used in this
+// implementation. So IsPrivateUse and IsCountry can be simultaneously true.
+func (r Region) IsPrivateUse() bool {
+	return r.typ()&iso3166UserAssigned != 0
+}
+
+type Script uint16
+
+// getScriptID returns the script id for string s. It assumes that s
+// is of the format [A-Z][a-z]{3}.
+func getScriptID(idx tag.Index, s []byte) (Script, error) {
+	i, err := findIndex(idx, s, "Zzzz")
+	return Script(i), err
+}
+
+// String returns the script code in title case.
+// It returns "Zzzz" for an unspecified script.
+func (s Script) String() string {
+	if s == 0 {
+		return "Zzzz"
+	}
+	return script.Elem(int(s))
+}
+
+// IsPrivateUse reports whether this script code is reserved for private use.
+func (s Script) IsPrivateUse() bool {
+	return _Qaaa <= s && s <= _Qabx
+}
+
+const (
+	maxAltTaglen = len("en-US-POSIX")
+	maxLen       = maxAltTaglen
+)
+
+var (
+	// grandfatheredMap holds a mapping from legacy and grandfathered tags to
+	// their base language or index to more elaborate tag.
+	grandfatheredMap = map[[maxLen]byte]int16{
+		[maxLen]byte{'a', 'r', 't', '-', 'l', 'o', 'j', 'b', 'a', 'n'}: _jbo, // art-lojban
+		[maxLen]byte{'i', '-', 'a', 'm', 'i'}:                          _ami, // i-ami
+		[maxLen]byte{'i', '-', 'b', 'n', 'n'}:                          _bnn, // i-bnn
+		[maxLen]byte{'i', '-', 'h', 'a', 'k'}:                          _hak, // i-hak
+		[maxLen]byte{'i', '-', 'k', 'l', 'i', 'n', 'g', 'o', 'n'}:      _tlh, // i-klingon
+		[maxLen]byte{'i', '-', 'l', 'u', 'x'}:                          _lb,  // i-lux
+		[maxLen]byte{'i', '-', 'n', 'a', 'v', 'a', 'j', 'o'}:           _nv,  // i-navajo
+		[maxLen]byte{'i', '-', 'p', 'w', 'n'}:                          _pwn, // i-pwn
+		[maxLen]byte{'i', '-', 't', 'a', 'o'}:                          _tao, // i-tao
+		[maxLen]byte{'i', '-', 't', 'a', 'y'}:                          _tay, // i-tay
+		[maxLen]byte{'i', '-', 't', 's', 'u'}:                          _tsu, // i-tsu
+		[maxLen]byte{'n', 'o', '-', 'b', 'o', 'k'}:                     _nb,  // no-bok
+		[maxLen]byte{'n', 'o', '-', 'n', 'y', 'n'}:                     _nn,  // no-nyn
+		[maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'f', 'r'}:      _sfb, // sgn-BE-FR
+		[maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'n', 'l'}:      _vgt, // sgn-BE-NL
+		[maxLen]byte{'s', 'g', 'n', '-', 'c', 'h', '-', 'd', 'e'}:      _sgg, // sgn-CH-DE
+		[maxLen]byte{'z', 'h', '-', 'g', 'u', 'o', 'y', 'u'}:           _cmn, // zh-guoyu
+		[maxLen]byte{'z', 'h', '-', 'h', 'a', 'k', 'k', 'a'}:           _hak, // zh-hakka
+		[maxLen]byte{'z', 'h', '-', 'm', 'i', 'n', '-', 'n', 'a', 'n'}: _nan, // zh-min-nan
+		[maxLen]byte{'z', 'h', '-', 'x', 'i', 'a', 'n', 'g'}:           _hsn, // zh-xiang
+
+		// Grandfathered tags with no modern replacement will be converted as
+		// follows:
+		[maxLen]byte{'c', 'e', 'l', '-', 'g', 'a', 'u', 'l', 'i', 's', 'h'}: -1, // cel-gaulish
+		[maxLen]byte{'e', 'n', '-', 'g', 'b', '-', 'o', 'e', 'd'}:           -2, // en-GB-oed
+		[maxLen]byte{'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'}:           -3, // i-default
+		[maxLen]byte{'i', '-', 'e', 'n', 'o', 'c', 'h', 'i', 'a', 'n'}:      -4, // i-enochian
+		[maxLen]byte{'i', '-', 'm', 'i', 'n', 'g', 'o'}:                     -5, // i-mingo
+		[maxLen]byte{'z', 'h', '-', 'm', 'i', 'n'}:                          -6, // zh-min
+
+		// CLDR-specific tag.
+		[maxLen]byte{'r', 'o', 'o', 't'}:                                    0,  // root
+		[maxLen]byte{'e', 'n', '-', 'u', 's', '-', 'p', 'o', 's', 'i', 'x'}: -7, // en_US_POSIX"
+	}
+
+	altTagIndex = [...]uint8{0, 17, 31, 45, 61, 74, 86, 102}
+
+	altTags = "xtg-x-cel-gaulishen-GB-oxendicten-x-i-defaultund-x-i-enochiansee-x-i-mingonan-x-zh-minen-US-u-va-posix"
+)
+
+func grandfathered(s [maxAltTaglen]byte) (t Tag, ok bool) {
+	if v, ok := grandfatheredMap[s]; ok {
+		if v < 0 {
+			return Make(altTags[altTagIndex[-v-1]:altTagIndex[-v]]), true
+		}
+		t.LangID = Language(v)
+		return t, true
+	}
+	return t, false
+}
@@ -0,0 +1,457 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+import (
+	"testing"
+
+	"golang.org/x/text/internal/tag"
+)
+
+func b(s string) []byte {
+	return []byte(s)
+}
+
+func TestLangID(t *testing.T) {
+	tests := []struct {
+		id, bcp47, iso3, norm string
+		err                   error
+	}{
+		{id: "", bcp47: "und", iso3: "und", err: ErrSyntax},
+		{id: "  ", bcp47: "und", iso3: "und", err: ErrSyntax},
+		{id: "   ", bcp47: "und", iso3: "und", err: ErrSyntax},
+		{id: "    ", bcp47: "und", iso3: "und", err: ErrSyntax},
+		{id: "xxx", bcp47: "und", iso3: "und", err: NewValueError([]byte("xxx"))},
+		{id: "und", bcp47: "und", iso3: "und"},
+		{id: "aju", bcp47: "aju", iso3: "aju", norm: "jrb"},
+		{id: "jrb", bcp47: "jrb", iso3: "jrb"},
+		{id: "es", bcp47: "es", iso3: "spa"},
+		{id: "spa", bcp47: "es", iso3: "spa"},
+		{id: "ji", bcp47: "ji", iso3: "yid-", norm: "yi"},
+		{id: "jw", bcp47: "jw", iso3: "jav-", norm: "jv"},
+		{id: "ar", bcp47: "ar", iso3: "ara"},
+		{id: "kw", bcp47: "kw", iso3: "cor"},
+		{id: "arb", bcp47: "arb", iso3: "arb", norm: "ar"},
+		{id: "ar", bcp47: "ar", iso3: "ara"},
+		{id: "kur", bcp47: "ku", iso3: "kur"},
+		{id: "nl", bcp47: "nl", iso3: "nld"},
+		{id: "NL", bcp47: "nl", iso3: "nld"},
+		{id: "gsw", bcp47: "gsw", iso3: "gsw"},
+		{id: "gSW", bcp47: "gsw", iso3: "gsw"},
+		{id: "und", bcp47: "und", iso3: "und"},
+		{id: "sh", bcp47: "sh", iso3: "hbs", norm: "sr"},
+		{id: "hbs", bcp47: "sh", iso3: "hbs", norm: "sr"},
+		{id: "no", bcp47: "no", iso3: "nor", norm: "no"},
+		{id: "nor", bcp47: "no", iso3: "nor", norm: "no"},
+		{id: "cmn", bcp47: "cmn", iso3: "cmn", norm: "zh"},
+	}
+	for i, tt := range tests {
+		want, err := getLangID(b(tt.id))
+		if err != tt.err {
+			t.Errorf("%d:err(%s): found %q; want %q", i, tt.id, err, tt.err)
+		}
+		if err != nil {
+			continue
+		}
+		if id, _ := getLangISO2(b(tt.bcp47)); len(tt.bcp47) == 2 && want != id {
+			t.Errorf("%d:getISO2(%s): found %v; want %v", i, tt.bcp47, id, want)
+		}
+		if len(tt.iso3) == 3 {
+			if id, _ := getLangISO3(b(tt.iso3)); want != id {
+				t.Errorf("%d:getISO3(%s): found %q; want %q", i, tt.iso3, id, want)
+			}
+			if id, _ := getLangID(b(tt.iso3)); want != id {
+				t.Errorf("%d:getID3(%s): found %v; want %v", i, tt.iso3, id, want)
+			}
+		}
+		norm := want
+		if tt.norm != "" {
+			norm, _ = getLangID(b(tt.norm))
+		}
+		id, _ := normLang(want)
+		if id != norm {
+			t.Errorf("%d:norm(%s): found %v; want %v", i, tt.id, id, norm)
+		}
+		if id := want.String(); tt.bcp47 != id {
+			t.Errorf("%d:String(): found %s; want %s", i, id, tt.bcp47)
+		}
+		if id := want.ISO3(); tt.iso3[:3] != id {
+			t.Errorf("%d:iso3(): found %s; want %s", i, id, tt.iso3[:3])
+		}
+	}
+}
+
+func TestGrandfathered(t *testing.T) {
+	for _, tt := range []struct{ in, out string }{
+		{"art-lojban", "jbo"},
+		{"i-ami", "ami"},
+		{"i-bnn", "bnn"},
+		{"i-hak", "hak"},
+		{"i-klingon", "tlh"},
+		{"i-lux", "lb"},
+		{"i-navajo", "nv"},
+		{"i-pwn", "pwn"},
+		{"i-tao", "tao"},
+		{"i-tay", "tay"},
+		{"i-tsu", "tsu"},
+		{"no-bok", "nb"},
+		{"no-nyn", "nn"},
+		{"sgn-BE-FR", "sfb"},
+		{"sgn-BE-NL", "vgt"},
+		{"sgn-CH-DE", "sgg"},
+		{"sgn-ch-de", "sgg"},
+		{"zh-guoyu", "cmn"},
+		{"zh-hakka", "hak"},
+		{"zh-min-nan", "nan"},
+		{"zh-xiang", "hsn"},
+
+		// Grandfathered tags with no modern replacement will be converted as follows:
+		{"cel-gaulish", "xtg-x-cel-gaulish"},
+		{"en-GB-oed", "en-GB-oxendict"},
+		{"en-gb-oed", "en-GB-oxendict"},
+		{"i-default", "en-x-i-default"},
+		{"i-enochian", "und-x-i-enochian"},
+		{"i-mingo", "see-x-i-mingo"},
+		{"zh-min", "nan-x-zh-min"},
+
+		{"root", "und"},
+		{"en_US_POSIX", "en-US-u-va-posix"},
+		{"en_us_posix", "en-US-u-va-posix"},
+		{"en-us-posix", "en-US-u-va-posix"},
+	} {
+		got := Make(tt.in)
+		want := MustParse(tt.out)
+		if got != want {
+			t.Errorf("%s: got %q; want %q", tt.in, got, want)
+		}
+	}
+}
+
+func TestRegionID(t *testing.T) {
+	tests := []struct {
+		in, out string
+	}{
+		{"_  ", ""},
+		{"_000", ""},
+		{"419", "419"},
+		{"AA", "AA"},
+		{"ATF", "TF"},
+		{"HV", "HV"},
+		{"CT", "CT"},
+		{"DY", "DY"},
+		{"IC", "IC"},
+		{"FQ", "FQ"},
+		{"JT", "JT"},
+		{"ZZ", "ZZ"},
+		{"EU", "EU"},
+		{"QO", "QO"},
+		{"FX", "FX"},
+	}
+	for i, tt := range tests {
+		if tt.in[0] == '_' {
+			id := tt.in[1:]
+			if _, err := getRegionID(b(id)); err == nil {
+				t.Errorf("%d:err(%s): found nil; want error", i, id)
+			}
+			continue
+		}
+		want, _ := getRegionID(b(tt.in))
+		if s := want.String(); s != tt.out {
+			t.Errorf("%d:%s: found %q; want %q", i, tt.in, s, tt.out)
+		}
+		if len(tt.in) == 2 {
+			want, _ := getRegionISO2(b(tt.in))
+			if s := want.String(); s != tt.out {
+				t.Errorf("%d:getISO2(%s): found %q; want %q", i, tt.in, s, tt.out)
+			}
+		}
+	}
+}
+
+func TestRegionType(t *testing.T) {
+	for _, tt := range []struct {
+		r string
+		t byte
+	}{
+		{"NL", bcp47Region | ccTLD},
+		{"EU", bcp47Region | ccTLD}, // exceptionally reserved
+		{"AN", bcp47Region | ccTLD}, // transitionally reserved
+
+		{"DD", bcp47Region}, // deleted in ISO, deprecated in BCP 47
+		{"NT", bcp47Region}, // transitionally reserved, deprecated in BCP 47
+
+		{"XA", iso3166UserAssigned | bcp47Region},
+		{"ZZ", iso3166UserAssigned | bcp47Region},
+		{"AA", iso3166UserAssigned | bcp47Region},
+		{"QO", iso3166UserAssigned | bcp47Region},
+		{"QM", iso3166UserAssigned | bcp47Region},
+		{"XK", iso3166UserAssigned | bcp47Region},
+
+		{"CT", 0}, // deleted in ISO, not in BCP 47, canonicalized in CLDR
+	} {
+		r := MustParseRegion(tt.r)
+		if tp := r.typ(); tp != tt.t {
+			t.Errorf("Type(%s): got %x; want %x", tt.r, tp, tt.t)
+		}
+	}
+}
+
+func TestRegionISO3(t *testing.T) {
+	tests := []struct {
+		from, iso3, to string
+	}{
+		{"  ", "ZZZ", "ZZ"},
+		{"000", "ZZZ", "ZZ"},
+		{"AA", "AAA", ""},
+		{"CT", "CTE", ""},
+		{"DY", "DHY", ""},
+		{"EU", "QUU", ""},
+		{"HV", "HVO", ""},
+		{"IC", "ZZZ", "ZZ"},
+		{"JT", "JTN", ""},
+		{"PZ", "PCZ", ""},
+		{"QU", "QUU", "EU"},
+		{"QO", "QOO", ""},
+		{"YD", "YMD", ""},
+		{"FQ", "ATF", "TF"},
+		{"TF", "ATF", ""},
+		{"FX", "FXX", ""},
+		{"ZZ", "ZZZ", ""},
+		{"419", "ZZZ", "ZZ"},
+	}
+	for _, tt := range tests {
+		r, _ := getRegionID(b(tt.from))
+		if s := r.ISO3(); s != tt.iso3 {
+			t.Errorf("iso3(%q): found %q; want %q", tt.from, s, tt.iso3)
+		}
+		if tt.iso3 == "" {
+			continue
+		}
+		want := tt.to
+		if tt.to == "" {
+			want = tt.from
+		}
+		r, _ = getRegionID(b(want))
+		if id, _ := getRegionISO3(b(tt.iso3)); id != r {
+			t.Errorf("%s: found %q; want %q", tt.iso3, id, want)
+		}
+	}
+}
+
+func TestRegionM49(t *testing.T) {
+	fromTests := []struct {
+		m49 int
+		id  string
+	}{
+		{0, ""},
+		{-1, ""},
+		{1000, ""},
+		{10000, ""},
+
+		{001, "001"},
+		{104, "MM"},
+		{180, "CD"},
+		{230, "ET"},
+		{231, "ET"},
+		{249, "FX"},
+		{250, "FR"},
+		{276, "DE"},
+		{278, "DD"},
+		{280, "DE"},
+		{419, "419"},
+		{626, "TL"},
+		{736, "SD"},
+		{840, "US"},
+		{854, "BF"},
+		{891, "CS"},
+		{899, ""},
+		{958, "AA"},
+		{966, "QT"},
+		{967, "EU"},
+		{999, "ZZ"},
+	}
+	for _, tt := range fromTests {
+		id, err := getRegionM49(tt.m49)
+		if want, have := err != nil, tt.id == ""; want != have {
+			t.Errorf("error(%d): have %v; want %v", tt.m49, have, want)
+			continue
+		}
+		r, _ := getRegionID(b(tt.id))
+		if r != id {
+			t.Errorf("region(%d): have %s; want %s", tt.m49, id, r)
+		}
+	}
+
+	toTests := []struct {
+		m49 int
+		id  string
+	}{
+		{0, "000"},
+		{0, "IC"}, // Some codes don't have an ID
+
+		{001, "001"},
+		{104, "MM"},
+		{104, "BU"},
+		{180, "CD"},
+		{180, "ZR"},
+		{231, "ET"},
+		{250, "FR"},
+		{249, "FX"},
+		{276, "DE"},
+		{278, "DD"},
+		{419, "419"},
+		{626, "TL"},
+		{626, "TP"},
+		{729, "SD"},
+		{826, "GB"},
+		{840, "US"},
+		{854, "BF"},
+		{891, "YU"},
+		{891, "CS"},
+		{958, "AA"},
+		{966, "QT"},
+		{967, "EU"},
+		{967, "QU"},
+		{999, "ZZ"},
+		// For codes that don't have an M49 code use the replacement value,
+		// if available.
+		{854, "HV"}, // maps to Burkino Faso
+	}
+	for _, tt := range toTests {
+		r, _ := getRegionID(b(tt.id))
+		if r.M49() != tt.m49 {
+			t.Errorf("m49(%q): have %d; want %d", tt.id, r.M49(), tt.m49)
+		}
+	}
+}
+
+func TestRegionDeprecation(t *testing.T) {
+	tests := []struct{ in, out string }{
+		{"BU", "MM"},
+		{"BUR", "MM"},
+		{"CT", "KI"},
+		{"DD", "DE"},
+		{"DDR", "DE"},
+		{"DY", "BJ"},
+		{"FX", "FR"},
+		{"HV", "BF"},
+		{"JT", "UM"},
+		{"MI", "UM"},
+		{"NH", "VU"},
+		{"NQ", "AQ"},
+		{"PU", "UM"},
+		{"PZ", "PA"},
+		{"QU", "EU"},
+		{"RH", "ZW"},
+		{"TP", "TL"},
+		{"UK", "GB"},
+		{"VD", "VN"},
+		{"WK", "UM"},
+		{"YD", "YE"},
+		{"NL", "NL"},
+	}
+	for _, tt := range tests {
+		rIn, _ := getRegionID([]byte(tt.in))
+		rOut, _ := getRegionISO2([]byte(tt.out))
+		r := normRegion(rIn)
+		if rOut == rIn && r != 0 {
+			t.Errorf("%s: was %q; want %q", tt.in, r, tt.in)
+		}
+		if rOut != rIn && r != rOut {
+			t.Errorf("%s: was %q; want %q", tt.in, r, tt.out)
+		}
+
+	}
+}
+
+func TestGetScriptID(t *testing.T) {
+	idx := tag.Index("0000BbbbDdddEeeeZzzz\xff\xff\xff\xff")
+	tests := []struct {
+		in  string
+		out Script
+	}{
+		{"    ", 0},
+		{"      ", 0},
+		{"  ", 0},
+		{"", 0},
+		{"Aaaa", 0},
+		{"Bbbb", 1},
+		{"Dddd", 2},
+		{"dddd", 2},
+		{"dDDD", 2},
+		{"Eeee", 3},
+		{"Zzzz", 4},
+	}
+	for i, tt := range tests {
+		if id, err := getScriptID(idx, b(tt.in)); id != tt.out {
+			t.Errorf("%d:%s: found %d; want %d", i, tt.in, id, tt.out)
+		} else if id == 0 && err == nil {
+			t.Errorf("%d:%s: no error; expected one", i, tt.in)
+		}
+	}
+}
+
+func TestIsPrivateUse(t *testing.T) {
+	type test struct {
+		s       string
+		private bool
+	}
+	tests := []test{
+		{"en", false},
+		{"und", false},
+		{"pzn", false},
+		{"qaa", true},
+		{"qtz", true},
+		{"qua", false},
+	}
+	for i, tt := range tests {
+		x, _ := getLangID([]byte(tt.s))
+		if b := x.IsPrivateUse(); b != tt.private {
+			t.Errorf("%d: langID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private)
+		}
+	}
+	tests = []test{
+		{"001", false},
+		{"419", false},
+		{"899", false},
+		{"900", false},
+		{"957", false},
+		{"958", true},
+		{"AA", true},
+		{"AC", false},
+		{"EU", false}, // CLDR grouping, exceptionally reserved in ISO.
+		{"QU", true},  // Canonicalizes to EU, User-assigned in ISO.
+		{"QO", true},  // CLDR grouping, User-assigned in ISO.
+		{"QA", false},
+		{"QM", true},
+		{"QZ", true},
+		{"XA", true},
+		{"XK", true}, // Assigned to Kosovo in CLDR, User-assigned in ISO.
+		{"XZ", true},
+		{"ZW", false},
+		{"ZZ", true},
+	}
+	for i, tt := range tests {
+		x, _ := getRegionID([]byte(tt.s))
+		if b := x.IsPrivateUse(); b != tt.private {
+			t.Errorf("%d: regionID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private)
+		}
+	}
+	tests = []test{
+		{"Latn", false},
+		{"Laaa", false}, // invalid
+		{"Qaaa", true},
+		{"Qabx", true},
+		{"Qaby", false},
+		{"Zyyy", false},
+		{"Zzzz", false},
+	}
+	for i, tt := range tests {
+		x, _ := getScriptID(script, []byte(tt.s))
+		if b := x.IsPrivateUse(); b != tt.private {
+			t.Errorf("%d: scriptID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private)
+		}
+	}
+}
@@ -0,0 +1,226 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+import "errors"
+
+type scriptRegionFlags uint8
+
+const (
+	isList = 1 << iota
+	scriptInFrom
+	regionInFrom
+)
+
+func (t *Tag) setUndefinedLang(id Language) {
+	if t.LangID == 0 {
+		t.LangID = id
+	}
+}
+
+func (t *Tag) setUndefinedScript(id Script) {
+	if t.ScriptID == 0 {
+		t.ScriptID = id
+	}
+}
+
+func (t *Tag) setUndefinedRegion(id Region) {
+	if t.RegionID == 0 || t.RegionID.Contains(id) {
+		t.RegionID = id
+	}
+}
+
+// ErrMissingLikelyTagsData indicates no information was available
+// to compute likely values of missing tags.
+var ErrMissingLikelyTagsData = errors.New("missing likely tags data")
+
+// addLikelySubtags sets subtags to their most likely value, given the locale.
+// In most cases this means setting fields for unknown values, but in some
+// cases it may alter a value.  It returns an ErrMissingLikelyTagsData error
+// if the given locale cannot be expanded.
+func (t Tag) addLikelySubtags() (Tag, error) {
+	id, err := addTags(t)
+	if err != nil {
+		return t, err
+	} else if id.equalTags(t) {
+		return t, nil
+	}
+	id.RemakeString()
+	return id, nil
+}
+
+// specializeRegion attempts to specialize a group region.
+func specializeRegion(t *Tag) bool {
+	if i := regionInclusion[t.RegionID]; i < nRegionGroups {
+		x := likelyRegionGroup[i]
+		if Language(x.lang) == t.LangID && Script(x.script) == t.ScriptID {
+			t.RegionID = Region(x.region)
+		}
+		return true
+	}
+	return false
+}
+
+// Maximize returns a new tag with missing tags filled in.
+func (t Tag) Maximize() (Tag, error) {
+	return addTags(t)
+}
+
+func addTags(t Tag) (Tag, error) {
+	// We leave private use identifiers alone.
+	if t.IsPrivateUse() {
+		return t, nil
+	}
+	if t.ScriptID != 0 && t.RegionID != 0 {
+		if t.LangID != 0 {
+			// already fully specified
+			specializeRegion(&t)
+			return t, nil
+		}
+		// Search matches for und-script-region. Note that for these cases
+		// region will never be a group so there is no need to check for this.
+		list := likelyRegion[t.RegionID : t.RegionID+1]
+		if x := list[0]; x.flags&isList != 0 {
+			list = likelyRegionList[x.lang : x.lang+uint16(x.script)]
+		}
+		for _, x := range list {
+			// Deviating from the spec. See match_test.go for details.
+			if Script(x.script) == t.ScriptID {
+				t.setUndefinedLang(Language(x.lang))
+				return t, nil
+			}
+		}
+	}
+	if t.LangID != 0 {
+		// Search matches for lang-script and lang-region, where lang != und.
+		if t.LangID < langNoIndexOffset {
+			x := likelyLang[t.LangID]
+			if x.flags&isList != 0 {
+				list := likelyLangList[x.region : x.region+uint16(x.script)]
+				if t.ScriptID != 0 {
+					for _, x := range list {
+						if Script(x.script) == t.ScriptID && x.flags&scriptInFrom != 0 {
+							t.setUndefinedRegion(Region(x.region))
+							return t, nil
+						}
+					}
+				} else if t.RegionID != 0 {
+					count := 0
+					goodScript := true
+					tt := t
+					for _, x := range list {
+						// We visit all entries for which the script was not
+						// defined, including the ones where the region was not
+						// defined. This allows for proper disambiguation within
+						// regions.
+						if x.flags&scriptInFrom == 0 && t.RegionID.Contains(Region(x.region)) {
+							tt.RegionID = Region(x.region)
+							tt.setUndefinedScript(Script(x.script))
+							goodScript = goodScript && tt.ScriptID == Script(x.script)
+							count++
+						}
+					}
+					if count == 1 {
+						return tt, nil
+					}
+					// Even if we fail to find a unique Region, we might have
+					// an unambiguous script.
+					if goodScript {
+						t.ScriptID = tt.ScriptID
+					}
+				}
+			}
+		}
+	} else {
+		// Search matches for und-script.
+		if t.ScriptID != 0 {
+			x := likelyScript[t.ScriptID]
+			if x.region != 0 {
+				t.setUndefinedRegion(Region(x.region))
+				t.setUndefinedLang(Language(x.lang))
+				return t, nil
+			}
+		}
+		// Search matches for und-region. If und-script-region exists, it would
+		// have been found earlier.
+		if t.RegionID != 0 {
+			if i := regionInclusion[t.RegionID]; i < nRegionGroups {
+				x := likelyRegionGroup[i]
+				if x.region != 0 {
+					t.setUndefinedLang(Language(x.lang))
+					t.setUndefinedScript(Script(x.script))
+					t.RegionID = Region(x.region)
+				}
+			} else {
+				x := likelyRegion[t.RegionID]
+				if x.flags&isList != 0 {
+					x = likelyRegionList[x.lang]
+				}
+				if x.script != 0 && x.flags != scriptInFrom {
+					t.setUndefinedLang(Language(x.lang))
+					t.setUndefinedScript(Script(x.script))
+					return t, nil
+				}
+			}
+		}
+	}
+
+	// Search matches for lang.
+	if t.LangID < langNoIndexOffset {
+		x := likelyLang[t.LangID]
+		if x.flags&isList != 0 {
+			x = likelyLangList[x.region]
+		}
+		if x.region != 0 {
+			t.setUndefinedScript(Script(x.script))
+			t.setUndefinedRegion(Region(x.region))
+		}
+		specializeRegion(&t)
+		if t.LangID == 0 {
+			t.LangID = _en // default language
+		}
+		return t, nil
+	}
+	return t, ErrMissingLikelyTagsData
+}
+
+func (t *Tag) setTagsFrom(id Tag) {
+	t.LangID = id.LangID
+	t.ScriptID = id.ScriptID
+	t.RegionID = id.RegionID
+}
+
+// minimize removes the region or script subtags from t such that
+// t.addLikelySubtags() == t.minimize().addLikelySubtags().
+func (t Tag) minimize() (Tag, error) {
+	t, err := minimizeTags(t)
+	if err != nil {
+		return t, err
+	}
+	t.RemakeString()
+	return t, nil
+}
+
+// minimizeTags mimics the behavior of the ICU 51 C implementation.
+func minimizeTags(t Tag) (Tag, error) {
+	if t.equalTags(Und) {
+		return t, nil
+	}
+	max, err := addTags(t)
+	if err != nil {
+		return t, err
+	}
+	for _, id := range [...]Tag{
+		{LangID: t.LangID},
+		{LangID: t.LangID, RegionID: t.RegionID},
+		{LangID: t.LangID, ScriptID: t.ScriptID},
+	} {
+		if x, err := addTags(id); err == nil && max.equalTags(x) {
+			t.setTagsFrom(id)
+			break
+		}
+	}
+	return t, nil
+}
@@ -0,0 +1,161 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+import (
+	"flag"
+	"testing"
+)
+
+var verbose = flag.Bool("verbose", false, "set to true to print the internal tables of matchers")
+
+func TestAddLikelySubtags(t *testing.T) {
+	tests := []struct{ in, out string }{
+		{"aa", "aa-Latn-ET"},
+		{"aa-Latn", "aa-Latn-ET"},
+		{"aa-Arab", "aa-Arab-ET"},
+		{"aa-Arab-ER", "aa-Arab-ER"},
+		{"kk", "kk-Cyrl-KZ"},
+		{"kk-CN", "kk-Arab-CN"},
+		{"cmn", "cmn"},
+		{"zh-AU", "zh-Hant-AU"},
+		{"zh-VN", "zh-Hant-VN"},
+		{"zh-SG", "zh-Hans-SG"},
+		{"zh-Hant", "zh-Hant-TW"},
+		{"zh-Hani", "zh-Hani-CN"},
+		{"und-Hani", "zh-Hani-CN"},
+		{"und", "en-Latn-US"},
+		{"und-GB", "en-Latn-GB"},
+		{"und-CW", "pap-Latn-CW"},
+		{"und-YT", "fr-Latn-YT"},
+		{"und-Arab", "ar-Arab-EG"},
+		{"und-AM", "hy-Armn-AM"},
+		{"und-TW", "zh-Hant-TW"},
+		{"und-002", "en-Latn-NG"},
+		{"und-Latn-002", "en-Latn-NG"},
+		{"en-Latn-002", "en-Latn-NG"},
+		{"en-002", "en-Latn-NG"},
+		{"en-001", "en-Latn-US"},
+		{"und-003", "en-Latn-US"},
+		{"und-GB", "en-Latn-GB"},
+		{"Latn-001", "en-Latn-US"},
+		{"en-001", "en-Latn-US"},
+		{"es-419", "es-Latn-419"},
+		{"he-145", "he-Hebr-IL"},
+		{"ky-145", "ky-Latn-TR"},
+		{"kk", "kk-Cyrl-KZ"},
+		// Don't specialize duplicate and ambiguous matches.
+		{"kk-034", "kk-Arab-034"}, // Matches IR and AF. Both are Arab.
+		{"ku-145", "ku-Latn-TR"},  // Matches IQ, TR, and LB, but kk -> TR.
+		{"und-Arab-CC", "ms-Arab-CC"},
+		{"und-Arab-GB", "ks-Arab-GB"},
+		{"und-Hans-CC", "zh-Hans-CC"},
+		{"und-CC", "en-Latn-CC"},
+		{"sr", "sr-Cyrl-RS"},
+		{"sr-151", "sr-Latn-151"}, // Matches RO and RU.
+		// We would like addLikelySubtags to generate the same results if the input
+		// only changes by adding tags that would otherwise have been added
+		// by the expansion.
+		// In other words:
+		//     und-AA -> xx-Scrp-AA   implies und-Scrp-AA -> xx-Scrp-AA
+		//     und-AA -> xx-Scrp-AA   implies xx-AA -> xx-Scrp-AA
+		//     und-Scrp -> xx-Scrp-AA implies und-Scrp-AA -> xx-Scrp-AA
+		//     und-Scrp -> xx-Scrp-AA implies xx-Scrp -> xx-Scrp-AA
+		//     xx -> xx-Scrp-AA       implies xx-Scrp -> xx-Scrp-AA
+		//     xx -> xx-Scrp-AA       implies xx-AA -> xx-Scrp-AA
+		//
+		// The algorithm specified in
+		//   https://unicode.org/reports/tr35/tr35-9.html#Supplemental_Data,
+		// Section C.10, does not handle the first case. For example,
+		// the CLDR data contains an entry und-BJ -> fr-Latn-BJ, but not
+		// there is no rule for und-Latn-BJ.  According to spec, und-Latn-BJ
+		// would expand to en-Latn-BJ, violating the aforementioned principle.
+		// We deviate from the spec by letting und-Scrp-AA expand to xx-Scrp-AA
+		// if a rule of the form und-AA -> xx-Scrp-AA is defined.
+		// Note that as of version 23, CLDR has some explicitly specified
+		// entries that do not conform to these rules. The implementation
+		// will not correct these explicit inconsistencies. A later versions of CLDR
+		// is supposed to fix this.
+		{"und-Latn-BJ", "fr-Latn-BJ"},
+		{"und-Bugi-ID", "bug-Bugi-ID"},
+		// regions, scripts and languages without definitions
+		{"und-Arab-AA", "ar-Arab-AA"},
+		{"und-Afak-RE", "fr-Afak-RE"},
+		{"und-Arab-GB", "ks-Arab-GB"},
+		{"abp-Arab-GB", "abp-Arab-GB"},
+		// script has preference over region
+		{"und-Arab-NL", "ar-Arab-NL"},
+		{"zza", "zza-Latn-TR"},
+		// preserve variants and extensions
+		{"de-1901", "de-Latn-DE-1901"},
+		{"de-x-abc", "de-Latn-DE-x-abc"},
+		{"de-1901-x-abc", "de-Latn-DE-1901-x-abc"},
+		{"x-abc", "x-abc"}, // TODO: is this the desired behavior?
+	}
+	for i, tt := range tests {
+		in, _ := Parse(tt.in)
+		out, _ := Parse(tt.out)
+		in, _ = in.addLikelySubtags()
+		if in.String() != out.String() {
+			t.Errorf("%d: add(%s) was %s; want %s", i, tt.in, in, tt.out)
+		}
+	}
+}
+func TestMinimize(t *testing.T) {
+	tests := []struct{ in, out string }{
+		{"aa", "aa"},
+		{"aa-Latn", "aa"},
+		{"aa-Latn-ET", "aa"},
+		{"aa-ET", "aa"},
+		{"aa-Arab", "aa-Arab"},
+		{"aa-Arab-ER", "aa-Arab-ER"},
+		{"aa-Arab-ET", "aa-Arab"},
+		{"und", "und"},
+		{"und-Latn", "und"},
+		{"und-Latn-US", "und"},
+		{"en-Latn-US", "en"},
+		{"cmn", "cmn"},
+		{"cmn-Hans", "cmn-Hans"},
+		{"cmn-Hant", "cmn-Hant"},
+		{"zh-AU", "zh-AU"},
+		{"zh-VN", "zh-VN"},
+		{"zh-SG", "zh-SG"},
+		{"zh-Hant", "zh-Hant"},
+		{"zh-Hant-TW", "zh-TW"},
+		{"zh-Hans", "zh"},
+		{"zh-Hani", "zh-Hani"},
+		{"und-Hans", "und-Hans"},
+		{"und-Hani", "und-Hani"},
+
+		{"und-CW", "und-CW"},
+		{"und-YT", "und-YT"},
+		{"und-Arab", "und-Arab"},
+		{"und-AM", "und-AM"},
+		{"und-Arab-CC", "und-Arab-CC"},
+		{"und-CC", "und-CC"},
+		{"und-Latn-BJ", "und-BJ"},
+		{"und-Bugi-ID", "und-Bugi"},
+		{"bug-Bugi-ID", "bug-Bugi"},
+		// regions, scripts and languages without definitions
+		{"und-Arab-AA", "und-Arab-AA"},
+		// preserve variants and extensions
+		{"de-Latn-1901", "de-1901"},
+		{"de-Latn-x-abc", "de-x-abc"},
+		{"de-DE-1901-x-abc", "de-1901-x-abc"},
+		{"x-abc", "x-abc"}, // TODO: is this the desired behavior?
+	}
+	for i, tt := range tests {
+		in, _ := Parse(tt.in)
+		out, _ := Parse(tt.out)
+		min, _ := in.minimize()
+		if min.String() != out.String() {
+			t.Errorf("%d: min(%s) was %s; want %s", i, tt.in, min, tt.out)
+		}
+		max, _ := min.addLikelySubtags()
+		if x, _ := in.addLikelySubtags(); x.String() != max.String() {
+			t.Errorf("%d: max(min(%s)) = %s; want %s", i, tt.in, max, x)
+		}
+	}
+}
@@ -0,0 +1,608 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"sort"
+
+	"golang.org/x/text/internal/tag"
+)
+
+// isAlpha returns true if the byte is not a digit.
+// b must be an ASCII letter or digit.
+func isAlpha(b byte) bool {
+	return b > '9'
+}
+
+// isAlphaNum returns true if the string contains only ASCII letters or digits.
+func isAlphaNum(s []byte) bool {
+	for _, c := range s {
+		if !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9') {
+			return false
+		}
+	}
+	return true
+}
+
+// ErrSyntax is returned by any of the parsing functions when the
+// input is not well-formed, according to BCP 47.
+// TODO: return the position at which the syntax error occurred?
+var ErrSyntax = errors.New("language: tag is not well-formed")
+
+// ErrDuplicateKey is returned when a tag contains the same key twice with
+// different values in the -u section.
+var ErrDuplicateKey = errors.New("language: different values for same key in -u extension")
+
+// ValueError is returned by any of the parsing functions when the
+// input is well-formed but the respective subtag is not recognized
+// as a valid value.
+type ValueError struct {
+	v [8]byte
+}
+
+// NewValueError creates a new ValueError.
+func NewValueError(tag []byte) ValueError {
+	var e ValueError
+	copy(e.v[:], tag)
+	return e
+}
+
+func (e ValueError) tag() []byte {
+	n := bytes.IndexByte(e.v[:], 0)
+	if n == -1 {
+		n = 8
+	}
+	return e.v[:n]
+}
+
+// Error implements the error interface.
+func (e ValueError) Error() string {
+	return fmt.Sprintf("language: subtag %q is well-formed but unknown", e.tag())
+}
+
+// Subtag returns the subtag for which the error occurred.
+func (e ValueError) Subtag() string {
+	return string(e.tag())
+}
+
+// scanner is used to scan BCP 47 tokens, which are separated by _ or -.
+type scanner struct {
+	b     []byte
+	bytes [max99thPercentileSize]byte
+	token []byte
+	start int // start position of the current token
+	end   int // end position of the current token
+	next  int // next point for scan
+	err   error
+	done  bool
+}
+
+func makeScannerString(s string) scanner {
+	scan := scanner{}
+	if len(s) <= len(scan.bytes) {
+		scan.b = scan.bytes[:copy(scan.bytes[:], s)]
+	} else {
+		scan.b = []byte(s)
+	}
+	scan.init()
+	return scan
+}
+
+// makeScanner returns a scanner using b as the input buffer.
+// b is not copied and may be modified by the scanner routines.
+func makeScanner(b []byte) scanner {
+	scan := scanner{b: b}
+	scan.init()
+	return scan
+}
+
+func (s *scanner) init() {
+	for i, c := range s.b {
+		if c == '_' {
+			s.b[i] = '-'
+		}
+	}
+	s.scan()
+}
+
+// restToLower converts the string between start and end to lower case.
+func (s *scanner) toLower(start, end int) {
+	for i := start; i < end; i++ {
+		c := s.b[i]
+		if 'A' <= c && c <= 'Z' {
+			s.b[i] += 'a' - 'A'
+		}
+	}
+}
+
+func (s *scanner) setError(e error) {
+	if s.err == nil || (e == ErrSyntax && s.err != ErrSyntax) {
+		s.err = e
+	}
+}
+
+// resizeRange shrinks or grows the array at position oldStart such that
+// a new string of size newSize can fit between oldStart and oldEnd.
+// Sets the scan point to after the resized range.
+func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) {
+	s.start = oldStart
+	if end := oldStart + newSize; end != oldEnd {
+		diff := end - oldEnd
+		var b []byte
+		if n := len(s.b) + diff; n > cap(s.b) {
+			b = make([]byte, n)
+			copy(b, s.b[:oldStart])
+		} else {
+			b = s.b[:n]
+		}
+		copy(b[end:], s.b[oldEnd:])
+		s.b = b
+		s.next = end + (s.next - s.end)
+		s.end = end
+	}
+}
+
+// replace replaces the current token with repl.
+func (s *scanner) replace(repl string) {
+	s.resizeRange(s.start, s.end, len(repl))
+	copy(s.b[s.start:], repl)
+}
+
+// gobble removes the current token from the input.
+// Caller must call scan after calling gobble.
+func (s *scanner) gobble(e error) {
+	s.setError(e)
+	if s.start == 0 {
+		s.b = s.b[:+copy(s.b, s.b[s.next:])]
+		s.end = 0
+	} else {
+		s.b = s.b[:s.start-1+copy(s.b[s.start-1:], s.b[s.end:])]
+		s.end = s.start - 1
+	}
+	s.next = s.start
+}
+
+// deleteRange removes the given range from s.b before the current token.
+func (s *scanner) deleteRange(start, end int) {
+	s.b = s.b[:start+copy(s.b[start:], s.b[end:])]
+	diff := end - start
+	s.next -= diff
+	s.start -= diff
+	s.end -= diff
+}
+
+// scan parses the next token of a BCP 47 string.  Tokens that are larger
+// than 8 characters or include non-alphanumeric characters result in an error
+// and are gobbled and removed from the output.
+// It returns the end position of the last token consumed.
+func (s *scanner) scan() (end int) {
+	end = s.end
+	s.token = nil
+	for s.start = s.next; s.next < len(s.b); {
+		i := bytes.IndexByte(s.b[s.next:], '-')
+		if i == -1 {
+			s.end = len(s.b)
+			s.next = len(s.b)
+			i = s.end - s.start
+		} else {
+			s.end = s.next + i
+			s.next = s.end + 1
+		}
+		token := s.b[s.start:s.end]
+		if i < 1 || i > 8 || !isAlphaNum(token) {
+			s.gobble(ErrSyntax)
+			continue
+		}
+		s.token = token
+		return end
+	}
+	if n := len(s.b); n > 0 && s.b[n-1] == '-' {
+		s.setError(ErrSyntax)
+		s.b = s.b[:len(s.b)-1]
+	}
+	s.done = true
+	return end
+}
+
+// acceptMinSize parses multiple tokens of the given size or greater.
+// It returns the end position of the last token consumed.
+func (s *scanner) acceptMinSize(min int) (end int) {
+	end = s.end
+	s.scan()
+	for ; len(s.token) >= min; s.scan() {
+		end = s.end
+	}
+	return end
+}
+
+// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
+// failed it returns an error and any part of the tag that could be parsed.
+// If parsing succeeded but an unknown value was found, it returns
+// ValueError. The Tag returned in this case is just stripped of the unknown
+// value. All other values are preserved. It accepts tags in the BCP 47 format
+// and extensions to this standard defined in
+// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
+func Parse(s string) (t Tag, err error) {
+	// TODO: consider supporting old-style locale key-value pairs.
+	if s == "" {
+		return Und, ErrSyntax
+	}
+	defer func() {
+		if recover() != nil {
+			t = Und
+			err = ErrSyntax
+			return
+		}
+	}()
+	if len(s) <= maxAltTaglen {
+		b := [maxAltTaglen]byte{}
+		for i, c := range s {
+			// Generating invalid UTF-8 is okay as it won't match.
+			if 'A' <= c && c <= 'Z' {
+				c += 'a' - 'A'
+			} else if c == '_' {
+				c = '-'
+			}
+			b[i] = byte(c)
+		}
+		if t, ok := grandfathered(b); ok {
+			return t, nil
+		}
+	}
+	scan := makeScannerString(s)
+	return parse(&scan, s)
+}
+
+func parse(scan *scanner, s string) (t Tag, err error) {
+	t = Und
+	var end int
+	if n := len(scan.token); n <= 1 {
+		scan.toLower(0, len(scan.b))
+		if n == 0 || scan.token[0] != 'x' {
+			return t, ErrSyntax
+		}
+		end = parseExtensions(scan)
+	} else if n >= 4 {
+		return Und, ErrSyntax
+	} else { // the usual case
+		t, end = parseTag(scan, true)
+		if n := len(scan.token); n == 1 {
+			t.pExt = uint16(end)
+			end = parseExtensions(scan)
+		} else if end < len(scan.b) {
+			scan.setError(ErrSyntax)
+			scan.b = scan.b[:end]
+		}
+	}
+	if int(t.pVariant) < len(scan.b) {
+		if end < len(s) {
+			s = s[:end]
+		}
+		if len(s) > 0 && tag.Compare(s, scan.b) == 0 {
+			t.str = s
+		} else {
+			t.str = string(scan.b)
+		}
+	} else {
+		t.pVariant, t.pExt = 0, 0
+	}
+	return t, scan.err
+}
+
+// parseTag parses language, script, region and variants.
+// It returns a Tag and the end position in the input that was parsed.
+// If doNorm is true, then <lang>-<extlang> will be normalized to <extlang>.
+func parseTag(scan *scanner, doNorm bool) (t Tag, end int) {
+	var e error
+	// TODO: set an error if an unknown lang, script or region is encountered.
+	t.LangID, e = getLangID(scan.token)
+	scan.setError(e)
+	scan.replace(t.LangID.String())
+	langStart := scan.start
+	end = scan.scan()
+	for len(scan.token) == 3 && isAlpha(scan.token[0]) {
+		// From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent
+		// to a tag of the form <extlang>.
+		if doNorm {
+			lang, e := getLangID(scan.token)
+			if lang != 0 {
+				t.LangID = lang
+				langStr := lang.String()
+				copy(scan.b[langStart:], langStr)
+				scan.b[langStart+len(langStr)] = '-'
+				scan.start = langStart + len(langStr) + 1
+			}
+			scan.gobble(e)
+		}
+		end = scan.scan()
+	}
+	if len(scan.token) == 4 && isAlpha(scan.token[0]) {
+		t.ScriptID, e = getScriptID(script, scan.token)
+		if t.ScriptID == 0 {
+			scan.gobble(e)
+		}
+		end = scan.scan()
+	}
+	if n := len(scan.token); n >= 2 && n <= 3 {
+		t.RegionID, e = getRegionID(scan.token)
+		if t.RegionID == 0 {
+			scan.gobble(e)
+		} else {
+			scan.replace(t.RegionID.String())
+		}
+		end = scan.scan()
+	}
+	scan.toLower(scan.start, len(scan.b))
+	t.pVariant = byte(end)
+	end = parseVariants(scan, end, t)
+	t.pExt = uint16(end)
+	return t, end
+}
+
+var separator = []byte{'-'}
+
+// parseVariants scans tokens as long as each token is a valid variant string.
+// Duplicate variants are removed.
+func parseVariants(scan *scanner, end int, t Tag) int {
+	start := scan.start
+	varIDBuf := [4]uint8{}
+	variantBuf := [4][]byte{}
+	varID := varIDBuf[:0]
+	variant := variantBuf[:0]
+	last := -1
+	needSort := false
+	for ; len(scan.token) >= 4; scan.scan() {
+		// TODO: measure the impact of needing this conversion and redesign
+		// the data structure if there is an issue.
+		v, ok := variantIndex[string(scan.token)]
+		if !ok {
+			// unknown variant
+			// TODO: allow user-defined variants?
+			scan.gobble(NewValueError(scan.token))
+			continue
+		}
+		varID = append(varID, v)
+		variant = append(variant, scan.token)
+		if !needSort {
+			if last < int(v) {
+				last = int(v)
+			} else {
+				needSort = true
+				// There is no legal combinations of more than 7 variants
+				// (and this is by no means a useful sequence).
+				const maxVariants = 8
+				if len(varID) > maxVariants {
+					break
+				}
+			}
+		}
+		end = scan.end
+	}
+	if needSort {
+		sort.Sort(variantsSort{varID, variant})
+		k, l := 0, -1
+		for i, v := range varID {
+			w := int(v)
+			if l == w {
+				// Remove duplicates.
+				continue
+			}
+			varID[k] = varID[i]
+			variant[k] = variant[i]
+			k++
+			l = w
+		}
+		if str := bytes.Join(variant[:k], separator); len(str) == 0 {
+			end = start - 1
+		} else {
+			scan.resizeRange(start, end, len(str))
+			copy(scan.b[scan.start:], str)
+			end = scan.end
+		}
+	}
+	return end
+}
+
+type variantsSort struct {
+	i []uint8
+	v [][]byte
+}
+
+func (s variantsSort) Len() int {
+	return len(s.i)
+}
+
+func (s variantsSort) Swap(i, j int) {
+	s.i[i], s.i[j] = s.i[j], s.i[i]
+	s.v[i], s.v[j] = s.v[j], s.v[i]
+}
+
+func (s variantsSort) Less(i, j int) bool {
+	return s.i[i] < s.i[j]
+}
+
+type bytesSort struct {
+	b [][]byte
+	n int // first n bytes to compare
+}
+
+func (b bytesSort) Len() int {
+	return len(b.b)
+}
+
+func (b bytesSort) Swap(i, j int) {
+	b.b[i], b.b[j] = b.b[j], b.b[i]
+}
+
+func (b bytesSort) Less(i, j int) bool {
+	for k := 0; k < b.n; k++ {
+		if b.b[i][k] == b.b[j][k] {
+			continue
+		}
+		return b.b[i][k] < b.b[j][k]
+	}
+	return false
+}
+
+// parseExtensions parses and normalizes the extensions in the buffer.
+// It returns the last position of scan.b that is part of any extension.
+// It also trims scan.b to remove excess parts accordingly.
+func parseExtensions(scan *scanner) int {
+	start := scan.start
+	exts := [][]byte{}
+	private := []byte{}
+	end := scan.end
+	for len(scan.token) == 1 {
+		extStart := scan.start
+		ext := scan.token[0]
+		end = parseExtension(scan)
+		extension := scan.b[extStart:end]
+		if len(extension) < 3 || (ext != 'x' && len(extension) < 4) {
+			scan.setError(ErrSyntax)
+			end = extStart
+			continue
+		} else if start == extStart && (ext == 'x' || scan.start == len(scan.b)) {
+			scan.b = scan.b[:end]
+			return end
+		} else if ext == 'x' {
+			private = extension
+			break
+		}
+		exts = append(exts, extension)
+	}
+	sort.Sort(bytesSort{exts, 1})
+	if len(private) > 0 {
+		exts = append(exts, private)
+	}
+	scan.b = scan.b[:start]
+	if len(exts) > 0 {
+		scan.b = append(scan.b, bytes.Join(exts, separator)...)
+	} else if start > 0 {
+		// Strip trailing '-'.
+		scan.b = scan.b[:start-1]
+	}
+	return end
+}
+
+// parseExtension parses a single extension and returns the position of
+// the extension end.
+func parseExtension(scan *scanner) int {
+	start, end := scan.start, scan.end
+	switch scan.token[0] {
+	case 'u': // https://www.ietf.org/rfc/rfc6067.txt
+		attrStart := end
+		scan.scan()
+		for last := []byte{}; len(scan.token) > 2; scan.scan() {
+			if bytes.Compare(scan.token, last) != -1 {
+				// Attributes are unsorted. Start over from scratch.
+				p := attrStart + 1
+				scan.next = p
+				attrs := [][]byte{}
+				for scan.scan(); len(scan.token) > 2; scan.scan() {
+					attrs = append(attrs, scan.token)
+					end = scan.end
+				}
+				sort.Sort(bytesSort{attrs, 3})
+				copy(scan.b[p:], bytes.Join(attrs, separator))
+				break
+			}
+			last = scan.token
+			end = scan.end
+		}
+		// Scan key-type sequences. A key is of length 2 and may be followed
+		// by 0 or more "type" subtags from 3 to the maximum of 8 letters.
+		var last, key []byte
+		for attrEnd := end; len(scan.token) == 2; last = key {
+			key = scan.token
+			end = scan.end
+			for scan.scan(); end < scan.end && len(scan.token) > 2; scan.scan() {
+				end = scan.end
+			}
+			// TODO: check key value validity
+			if bytes.Compare(key, last) != 1 || scan.err != nil {
+				// We have an invalid key or the keys are not sorted.
+				// Start scanning keys from scratch and reorder.
+				p := attrEnd + 1
+				scan.next = p
+				keys := [][]byte{}
+				for scan.scan(); len(scan.token) == 2; {
+					keyStart := scan.start
+					end = scan.end
+					for scan.scan(); end < scan.end && len(scan.token) > 2; scan.scan() {
+						end = scan.end
+					}
+					keys = append(keys, scan.b[keyStart:end])
+				}
+				sort.Stable(bytesSort{keys, 2})
+				if n := len(keys); n > 0 {
+					k := 0
+					for i := 1; i < n; i++ {
+						if !bytes.Equal(keys[k][:2], keys[i][:2]) {
+							k++
+							keys[k] = keys[i]
+						} else if !bytes.Equal(keys[k], keys[i]) {
+							scan.setError(ErrDuplicateKey)
+						}
+					}
+					keys = keys[:k+1]
+				}
+				reordered := bytes.Join(keys, separator)
+				if e := p + len(reordered); e < end {
+					scan.deleteRange(e, end)
+					end = e
+				}
+				copy(scan.b[p:], reordered)
+				break
+			}
+		}
+	case 't': // https://www.ietf.org/rfc/rfc6497.txt
+		scan.scan()
+		if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
+			_, end = parseTag(scan, false)
+			scan.toLower(start, end)
+		}
+		for len(scan.token) == 2 && !isAlpha(scan.token[1]) {
+			end = scan.acceptMinSize(3)
+		}
+	case 'x':
+		end = scan.acceptMinSize(1)
+	default:
+		end = scan.acceptMinSize(2)
+	}
+	return end
+}
+
+// getExtension returns the name, body and end position of the extension.
+func getExtension(s string, p int) (end int, ext string) {
+	if s[p] == '-' {
+		p++
+	}
+	if s[p] == 'x' {
+		return len(s), s[p:]
+	}
+	end = nextExtension(s, p)
+	return end, s[p:end]
+}
+
+// nextExtension finds the next extension within the string, searching
+// for the -<char>- pattern from position p.
+// In the fast majority of cases, language tags will have at most
+// one extension and extensions tend to be small.
+func nextExtension(s string, p int) int {
+	for n := len(s) - 3; p < n; {
+		if s[p] == '-' {
+			if s[p+2] == '-' {
+				return p
+			}
+			p += 3
+		} else {
+			p++
+		}
+	}
+	return len(s)
+}
@@ -0,0 +1,371 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+import (
+	"bytes"
+	"strings"
+	"testing"
+
+	"golang.org/x/text/internal/tag"
+)
+
+type scanTest struct {
+	ok  bool // true if scanning does not result in an error
+	in  string
+	tok []string // the expected tokens
+}
+
+var tests = []scanTest{
+	{true, "", []string{}},
+	{true, "1", []string{"1"}},
+	{true, "en", []string{"en"}},
+	{true, "root", []string{"root"}},
+	{true, "maxchars", []string{"maxchars"}},
+	{false, "bad/", []string{}},
+	{false, "morethan8", []string{}},
+	{false, "-", []string{}},
+	{false, "----", []string{}},
+	{false, "_", []string{}},
+	{true, "en-US", []string{"en", "US"}},
+	{true, "en_US", []string{"en", "US"}},
+	{false, "en-US-", []string{"en", "US"}},
+	{false, "en-US--", []string{"en", "US"}},
+	{false, "en-US---", []string{"en", "US"}},
+	{false, "en--US", []string{"en", "US"}},
+	{false, "-en-US", []string{"en", "US"}},
+	{false, "-en--US-", []string{"en", "US"}},
+	{false, "-en--US-", []string{"en", "US"}},
+	{false, "en-.-US", []string{"en", "US"}},
+	{false, ".-en--US-.", []string{"en", "US"}},
+	{false, "en-u.-US", []string{"en", "US"}},
+	{true, "en-u1-US", []string{"en", "u1", "US"}},
+	{true, "maxchar1_maxchar2-maxchar3", []string{"maxchar1", "maxchar2", "maxchar3"}},
+	{false, "moreThan8-moreThan8-e", []string{"e"}},
+}
+
+func TestScan(t *testing.T) {
+	for i, tt := range tests {
+		scan := makeScannerString(tt.in)
+		for j := 0; !scan.done; j++ {
+			if j >= len(tt.tok) {
+				t.Errorf("%d: extra token %q", i, scan.token)
+			} else if tag.Compare(tt.tok[j], scan.token) != 0 {
+				t.Errorf("%d: token %d: found %q; want %q", i, j, scan.token, tt.tok[j])
+				break
+			}
+			scan.scan()
+		}
+		if s := strings.Join(tt.tok, "-"); tag.Compare(s, bytes.Replace(scan.b, b("_"), b("-"), -1)) != 0 {
+			t.Errorf("%d: input: found %q; want %q", i, scan.b, s)
+		}
+		if (scan.err == nil) != tt.ok {
+			t.Errorf("%d: ok: found %v; want %v", i, scan.err == nil, tt.ok)
+		}
+	}
+}
+
+func TestAcceptMinSize(t *testing.T) {
+	for i, tt := range tests {
+		// count number of successive tokens with a minimum size.
+		for sz := 1; sz <= 8; sz++ {
+			scan := makeScannerString(tt.in)
+			scan.end, scan.next = 0, 0
+			end := scan.acceptMinSize(sz)
+			n := 0
+			for i := 0; i < len(tt.tok) && len(tt.tok[i]) >= sz; i++ {
+				n += len(tt.tok[i])
+				if i > 0 {
+					n++
+				}
+			}
+			if end != n {
+				t.Errorf("%d:%d: found len %d; want %d", i, sz, end, n)
+			}
+		}
+	}
+}
+
+type parseTest struct {
+	i                    int // the index of this test
+	in                   string
+	lang, script, region string
+	variants, ext        string
+	extList              []string // only used when more than one extension is present
+	invalid              bool
+	rewrite              bool // special rewrite not handled by parseTag
+	changed              bool // string needed to be reformatted
+}
+
+func parseTests() []parseTest {
+	tests := []parseTest{
+		{in: "root", lang: "und"},
+		{in: "und", lang: "und"},
+		{in: "en", lang: "en"},
+		{in: "xy", lang: "und", invalid: true},
+		{in: "en-ZY", lang: "en", invalid: true},
+		{in: "gsw", lang: "gsw"},
+		{in: "sr_Latn", lang: "sr", script: "Latn"},
+		{in: "af-Arab", lang: "af", script: "Arab"},
+		{in: "nl-BE", lang: "nl", region: "BE"},
+		{in: "es-419", lang: "es", region: "419"},
+		{in: "und-001", lang: "und", region: "001"},
+		{in: "de-latn-be", lang: "de", script: "Latn", region: "BE"},
+		// Variants
+		{in: "de-1901", lang: "de", variants: "1901"},
+		// Accept with unsuppressed script.
+		{in: "de-Latn-1901", lang: "de", script: "Latn", variants: "1901"},
+		// Specialized.
+		{in: "sl-rozaj", lang: "sl", variants: "rozaj"},
+		{in: "sl-rozaj-lipaw", lang: "sl", variants: "rozaj-lipaw"},
+		{in: "sl-rozaj-biske", lang: "sl", variants: "rozaj-biske"},
+		{in: "sl-rozaj-biske-1994", lang: "sl", variants: "rozaj-biske-1994"},
+		{in: "sl-rozaj-1994", lang: "sl", variants: "rozaj-1994"},
+		// Maximum number of variants while adhering to prefix rules.
+		{in: "sl-rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp"},
+
+		// Sorting.
+		{in: "sl-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true},
+		{in: "sl-rozaj-biske-1994-alalc97-fonupa-fonipa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", changed: true},
+		{in: "nl-fonxsamp-alalc97-fonipa-fonupa", lang: "nl", variants: "alalc97-fonipa-fonupa-fonxsamp", changed: true},
+
+		// Duplicates variants are removed, but not an error.
+		{in: "nl-fonupa-fonupa", lang: "nl", variants: "fonupa"},
+
+		// Variants that do not have correct prefixes. We still accept these.
+		{in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"},
+		{in: "sl-rozaj-lipaw-1994", lang: "sl", variants: "rozaj-lipaw-1994"},
+		{in: "sl-1994-biske-rozaj-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true},
+		{in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"},
+
+		// Invalid variant.
+		{in: "de-1902", lang: "de", variants: "", invalid: true},
+
+		{in: "EN_CYRL", lang: "en", script: "Cyrl"},
+		// private use and extensions
+		{in: "x-a-b-c-d", ext: "x-a-b-c-d"},
+		{in: "x_A.-B-C_D", ext: "x-b-c-d", invalid: true, changed: true},
+		{in: "x-aa-bbbb-cccccccc-d", ext: "x-aa-bbbb-cccccccc-d"},
+		{in: "en-c_cc-b-bbb-a-aaa", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc"}},
+		{in: "en-x_cc-b-bbb-a-aaa", lang: "en", ext: "x-cc-b-bbb-a-aaa", changed: true},
+		{in: "en-c_cc-b-bbb-a-aaa-x-x", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc", "x-x"}},
+		{in: "en-v-c", lang: "en", ext: "", invalid: true},
+		{in: "en-v-abcdefghi", lang: "en", ext: "", invalid: true},
+		{in: "en-v-abc-x", lang: "en", ext: "v-abc", invalid: true},
+		{in: "en-v-abc-x-", lang: "en", ext: "v-abc", invalid: true},
+		{in: "en-v-abc-w-x-xx", lang: "en", extList: []string{"v-abc", "x-xx"}, invalid: true, changed: true},
+		{in: "en-v-abc-w-y-yx", lang: "en", extList: []string{"v-abc", "y-yx"}, invalid: true, changed: true},
+		{in: "en-v-c-abc", lang: "en", ext: "c-abc", invalid: true, changed: true},
+		{in: "en-v-w-abc", lang: "en", ext: "w-abc", invalid: true, changed: true},
+		{in: "en-v-x-abc", lang: "en", ext: "x-abc", invalid: true, changed: true},
+		{in: "en-v-x-a", lang: "en", ext: "x-a", invalid: true, changed: true},
+		{in: "en-9-aa-0-aa-z-bb-x-a", lang: "en", extList: []string{"0-aa", "9-aa", "z-bb", "x-a"}, changed: true},
+		{in: "en-u-c", lang: "en", ext: "", invalid: true},
+		{in: "en-u-co-phonebk", lang: "en", ext: "u-co-phonebk"},
+		{in: "en-u-co-phonebk-ca", lang: "en", ext: "u-ca-co-phonebk", changed: true},
+		{in: "en-u-nu-arabic-co-phonebk-ca", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", changed: true},
+		{in: "en-u-nu-arabic-co-phonebk-ca-x", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true},
+		{in: "en-u-nu-arabic-co-phonebk-ca-s", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true},
+		{in: "en-u-nu-arabic-co-phonebk-ca-a12345678", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true},
+		{in: "en-u-co-phonebook", lang: "en", ext: "u-co", invalid: true},
+		{in: "en-u-co-phonebook-cu-xau", lang: "en", ext: "u-co-cu-xau", invalid: true, changed: true},
+		{in: "en-Cyrl-u-co-phonebk", lang: "en", script: "Cyrl", ext: "u-co-phonebk"},
+		{in: "en-US-u-co-phonebk", lang: "en", region: "US", ext: "u-co-phonebk"},
+		{in: "en-US-u-co-phonebk-cu-xau", lang: "en", region: "US", ext: "u-co-phonebk-cu-xau"},
+		{in: "en-scotland-u-co-phonebk", lang: "en", variants: "scotland", ext: "u-co-phonebk"},
+		{in: "en-u-cu-xua-co-phonebk", lang: "en", ext: "u-co-phonebk-cu-xua", changed: true},
+		{in: "en-u-def-abc-cu-xua-co-phonebk", lang: "en", ext: "u-abc-def-co-phonebk-cu-xua", changed: true},
+		{in: "en-u-def-abc", lang: "en", ext: "u-abc-def", changed: true},
+		{in: "en-u-cu-xua-co-phonebk-a-cd", lang: "en", extList: []string{"a-cd", "u-co-phonebk-cu-xua"}, changed: true},
+		{in: "en-u-cu-co-phonebk", lang: "en", extList: []string{"u-co-phonebk-cu"}, changed: true},
+		{in: "en-u-cu-xau-co", lang: "en", extList: []string{"u-co-cu-xau"}, changed: true},
+		// LDML spec is not specific about it, but remove duplicates and return an error if the values differ.
+		{in: "en-u-cu-xau-co-phonebk-cu-xau", lang: "en", ext: "u-co-phonebk-cu-xau", changed: true},
+		// No change as the result is a substring of the original!
+		{in: "en-US-u-cu-xau-cu-eur", lang: "en", region: "US", ext: "u-cu-xau", invalid: true, changed: false},
+		{in: "en-t-en-Cyrl-NL-fonipa", lang: "en", ext: "t-en-cyrl-nl-fonipa", changed: true},
+		{in: "en-t-en-Cyrl-NL-fonipa-t0-abc-def", lang: "en", ext: "t-en-cyrl-nl-fonipa-t0-abc-def", changed: true},
+		{in: "en-t-t0-abcd", lang: "en", ext: "t-t0-abcd"},
+		// Not necessary to have changed here.
+		{in: "en-t-nl-abcd", lang: "en", ext: "t-nl", invalid: true},
+		{in: "en-t-nl-latn", lang: "en", ext: "t-nl-latn"},
+		{in: "en-t-t0-abcd-x-a", lang: "en", extList: []string{"t-t0-abcd", "x-a"}},
+		{in: "en_t_pt_MLt", lang: "en", ext: "t-pt-mlt", changed: true},
+		{in: "en-t-fr-est", lang: "en", ext: "t-fr-est", changed: false},
+		{in: "fr-est", lang: "et", changed: false},
+		{in: "fr-est-Cyrl", lang: "et", script: "Cyrl", changed: false},
+		// The same input here is used in both TestParse and TestParseExtensions.
+		// changed should be true for this input in TestParse but changed should be false for this input in TestParseExtensions
+		// because the entire input has been reformatted but the extension part hasn't.
+		// {in: "fr-est-t-fr-est", lang: "et", ext: "t-fr-est", changed: true},
+		// invalid
+		{in: "", lang: "und", invalid: true},
+		{in: "-", lang: "und", invalid: true},
+		{in: "x", lang: "und", invalid: true},
+		{in: "x-", lang: "und", invalid: true},
+		{in: "x--", lang: "und", invalid: true},
+		{in: "a-a-b-c-d", lang: "und", invalid: true},
+		{in: "en-", lang: "en", invalid: true},
+		{in: "enne-", lang: "und", invalid: true},
+		{in: "en.", lang: "und", invalid: true},
+		{in: "en.-latn", lang: "und", invalid: true},
+		{in: "en.-en", lang: "en", invalid: true},
+		{in: "x-a-tooManyChars-c-d", ext: "x-a-c-d", invalid: true, changed: true},
+		{in: "a-tooManyChars-c-d", lang: "und", invalid: true},
+		// TODO: check key-value validity
+		// { in: "en-u-cu-xd", lang: "en", ext: "u-cu-xd", invalid: true },
+		{in: "en-t-abcd", lang: "en", invalid: true},
+		{in: "en-Latn-US-en", lang: "en", script: "Latn", region: "US", invalid: true},
+		// rewrites (more tests in TestGrandfathered)
+		{in: "zh-min-nan", lang: "nan"},
+		{in: "zh-yue", lang: "yue"},
+		{in: "zh-xiang", lang: "hsn", rewrite: true},
+		{in: "zh-guoyu", lang: "cmn", rewrite: true},
+		{in: "iw", lang: "iw"},
+		{in: "sgn-BE-FR", lang: "sfb", rewrite: true},
+		{in: "i-klingon", lang: "tlh", rewrite: true},
+	}
+	for i, tt := range tests {
+		tests[i].i = i
+		if tt.extList != nil {
+			tests[i].ext = strings.Join(tt.extList, "-")
+		}
+		if tt.ext != "" && tt.extList == nil {
+			tests[i].extList = []string{tt.ext}
+		}
+	}
+	return tests
+}
+
+func TestParseExtensions(t *testing.T) {
+	for i, tt := range parseTests() {
+		if tt.ext == "" || tt.rewrite {
+			continue
+		}
+		scan := makeScannerString(tt.in)
+		if len(scan.b) > 1 && scan.b[1] != '-' {
+			scan.end = nextExtension(string(scan.b), 0)
+			scan.next = scan.end + 1
+			scan.scan()
+		}
+		start := scan.start
+		scan.toLower(start, len(scan.b))
+		parseExtensions(&scan)
+		ext := string(scan.b[start:])
+		if ext != tt.ext {
+			t.Errorf("%d(%s): ext was %v; want %v", i, tt.in, ext, tt.ext)
+		}
+		if changed := !strings.HasPrefix(tt.in[start:], ext); changed != tt.changed {
+			t.Errorf("%d(%s): changed was %v; want %v", i, tt.in, changed, tt.changed)
+		}
+	}
+}
+
+// partChecks runs checks for each part by calling the function returned by f.
+func partChecks(t *testing.T, f func(*testing.T, *parseTest) (Tag, bool)) {
+	for i, tt := range parseTests() {
+		t.Run(tt.in, func(t *testing.T) {
+			tag, skip := f(t, &tt)
+			if skip {
+				return
+			}
+			if l, _ := getLangID(b(tt.lang)); l != tag.LangID {
+				t.Errorf("%d: lang was %q; want %q", i, tag.LangID, l)
+			}
+			if sc, _ := getScriptID(script, b(tt.script)); sc != tag.ScriptID {
+				t.Errorf("%d: script was %q; want %q", i, tag.ScriptID, sc)
+			}
+			if r, _ := getRegionID(b(tt.region)); r != tag.RegionID {
+				t.Errorf("%d: region was %q; want %q", i, tag.RegionID, r)
+			}
+			if tag.str == "" {
+				return
+			}
+			p := int(tag.pVariant)
+			if p < int(tag.pExt) {
+				p++
+			}
+			if s, g := tag.str[p:tag.pExt], tt.variants; s != g {
+				t.Errorf("%d: variants was %q; want %q", i, s, g)
+			}
+			p = int(tag.pExt)
+			if p > 0 && p < len(tag.str) {
+				p++
+			}
+			if s, g := (tag.str)[p:], tt.ext; s != g {
+				t.Errorf("%d: extensions were %q; want %q", i, s, g)
+			}
+		})
+	}
+}
+
+func TestParseTag(t *testing.T) {
+	partChecks(t, func(t *testing.T, tt *parseTest) (id Tag, skip bool) {
+		if strings.HasPrefix(tt.in, "x-") || tt.rewrite {
+			return Tag{}, true
+		}
+		scan := makeScannerString(tt.in)
+		id, end := parseTag(&scan, true)
+		id.str = string(scan.b[:end])
+		tt.ext = ""
+		tt.extList = []string{}
+		return id, false
+	})
+}
+
+func TestParse(t *testing.T) {
+	partChecks(t, func(t *testing.T, tt *parseTest) (id Tag, skip bool) {
+		id, err := Parse(tt.in)
+		ext := ""
+		if id.str != "" {
+			if strings.HasPrefix(id.str, "x-") {
+				ext = id.str
+			} else if int(id.pExt) < len(id.str) && id.pExt > 0 {
+				ext = id.str[id.pExt+1:]
+			}
+		}
+		if tag, _ := Parse(id.String()); tag.String() != id.String() {
+			t.Errorf("%d:%s: reparse was %q; want %q", tt.i, tt.in, id.String(), tag.String())
+		}
+		if ext != tt.ext {
+			t.Errorf("%d:%s: ext was %q; want %q", tt.i, tt.in, ext, tt.ext)
+		}
+		changed := id.str != "" && !strings.HasPrefix(tt.in, id.str)
+		if changed != tt.changed {
+			t.Errorf("%d:%s: changed was %v; want %v", tt.i, tt.in, changed, tt.changed)
+		}
+		if (err != nil) != tt.invalid {
+			t.Errorf("%d:%s: invalid was %v; want %v. Error: %v", tt.i, tt.in, err != nil, tt.invalid, err)
+		}
+		return id, false
+	})
+}
+
+func TestErrors(t *testing.T) {
+	mkInvalid := func(s string) error {
+		return NewValueError([]byte(s))
+	}
+	tests := []struct {
+		in  string
+		out error
+	}{
+		// invalid subtags.
+		{"ac", mkInvalid("ac")},
+		{"AC", mkInvalid("ac")},
+		{"aa-Uuuu", mkInvalid("Uuuu")},
+		{"aa-AB", mkInvalid("AB")},
+		// ill-formed wins over invalid.
+		{"ac-u", ErrSyntax},
+		{"ac-u-ca", mkInvalid("ac")},
+		{"ac-u-ca-co-pinyin", mkInvalid("ac")},
+		{"noob", ErrSyntax},
+	}
+	for _, tt := range tests {
+		_, err := Parse(tt.in)
+		if err != tt.out {
+			t.Errorf("%s: was %q; want %q", tt.in, err, tt.out)
+		}
+	}
+}
@@ -0,0 +1,48 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
+// It simplifies safe initialization of Tag values.
+func MustParse(s string) Tag {
+	t, err := Parse(s)
+	if err != nil {
+		panic(err)
+	}
+	return t
+}
+
+// MustParseBase is like ParseBase, but panics if the given base cannot be parsed.
+// It simplifies safe initialization of Base values.
+func MustParseBase(s string) Language {
+	b, err := ParseBase(s)
+	if err != nil {
+		panic(err)
+	}
+	return b
+}
+
+// MustParseScript is like ParseScript, but panics if the given script cannot be
+// parsed. It simplifies safe initialization of Script values.
+func MustParseScript(s string) Script {
+	scr, err := ParseScript(s)
+	if err != nil {
+		panic(err)
+	}
+	return scr
+}
+
+// MustParseRegion is like ParseRegion, but panics if the given region cannot be
+// parsed. It simplifies safe initialization of Region values.
+func MustParseRegion(s string) Region {
+	r, err := ParseRegion(s)
+	if err != nil {
+		panic(err)
+	}
+	return r
+}
+
+// Und is the root language.
+var Und Tag