// Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc>
// See LICENSE for licensing information

package main

import (
	"log"
	"os"
	"strconv"
	"strings"
	"text/template"
	"unicode"
)

const path = "unicode.go"

var tmpl = template.Must(template.New("tlds").Parse(`// Generated by unicodegen

package xurls

const allowedUcsChar = {{.withPunc}}

const allowedUcsCharMinusPunc = {{.withoutPunc}}
`))

func visit(rt *unicode.RangeTable, fn func(rune)) {
	for _, r16 := range rt.R16 {
		for r := rune(r16.Lo); r <= rune(r16.Hi); r += rune(r16.Stride) {
			fn(r)
		}
	}
	for _, r32 := range rt.R32 {
		for r := rune(r32.Lo); r <= rune(r32.Hi); r += rune(r32.Stride) {
			fn(r)
		}
	}
}

func writeUnicode() error {
	// rfc3987Ranges contains the ranges of valid code points specified by RFC 3987.
	rfc3987Ranges := [][2]rune{
		{0xA0, 0xD7FF},
		{0xF900, 0xFDCF},
		{0xFDF0, 0xFFEF},
		{0x10000, 0x1FFFD},
		{0x20000, 0x2FFFD},
		{0x30000, 0x3FFFD},
		{0x40000, 0x4FFFD},
		{0x50000, 0x5FFFD},
		{0x60000, 0x6FFFD},
		{0x70000, 0x7FFFD},
		{0x80000, 0x8FFFD},
		{0x90000, 0x9FFFD},
		{0xA0000, 0xAFFFD},
		{0xB0000, 0xBFFFD},
		{0xC0000, 0xCFFFD},
		{0xD0000, 0xDFFFD},
		{0xE1000, 0xEFFFD},
	}

	// removeRune accepts a slice of inclusive code point ranges (in ascending order)
	// and returns a new slice that is equivalent except for excluding a specified rune
	// by removing/replacing/splitting any range containing it.
	// Its linear searches over the ranges (including those added by previous invocations)
	// are inefficient, but acceptable because this code runs only at build time.
	removeRune := func(ranges [][2]rune, cp rune) [][2]rune {
		for i, r := range ranges {
			// Ranges are in ascending order. Skip any that precede `cp`,
			// and bail out upon reaching one that follows `cp`.
			if r[1] < cp {
				continue
			} else if cp < r[0] {
				break
			}

			// `cp` is in this range and must be removed from it.
			if cp == r[0] && cp == r[1] {
				// Remove this single-element range.
				return append(ranges[0:i], ranges[i+1:]...)
			} else if cp == r[0] {
				// Remove the first element of this range.
				newRange := [2]rune{r[0] + 1, r[1]}
				newTail := append([][2]rune{newRange}, ranges[i+1:]...)
				return append(ranges[0:i], newTail...)
			} else if cp == r[1] {
				// Remove the last element of this range.
				newRange := [2]rune{r[0], r[1] - 1}
				newTail := append([][2]rune{newRange}, ranges[i+1:]...)
				return append(ranges[0:i], newTail...)
			} else {
				// Split this range.
				newTail := append(
					[][2]rune{
						{r[0], cp - 1},
						{cp + 1, r[1]},
					},
					ranges[i+1:]...)
				return append(ranges[0:i], newTail...)
			}
		}
		return ranges
	}

	// sepFreeRanges excludes separators from rfc3987Ranges.
	sepFreeRanges := append([][2]rune{}, rfc3987Ranges...)
	visit(unicode.Z, func(cp rune) {
		sepFreeRanges = removeRune(sepFreeRanges, cp)
	})

	// puncFreeRanges excludes punctuation from sepFreeRanges.
	puncFreeRanges := append([][2]rune{}, sepFreeRanges...)
	visit(unicode.Po, func(cp rune) {
		puncFreeRanges = removeRune(puncFreeRanges, cp)
	})

	// Build the corresponding regular expression character class contents.
	characterClassContents := func(ranges [][2]rune) strings.Builder {
		var builder strings.Builder
		for _, r := range ranges {
			// regexp.QuoteMeta is not necessary because all metacharacters are ASCII.
			// cf. https://golang.org/s/re2syntax and
			// https://cs.opensource.google/go/go/+/refs/tags/go1.17.6:src/regexp/regexp.go;l=721
			builder.WriteRune(r[0])
			if r[0] == r[1] {
				continue
			}
			builder.WriteRune('-')
			builder.WriteRune(r[1])
		}
		return builder
	}
	allowedUcsChar := characterClassContents(sepFreeRanges)
	allowedUcsCharMinusPunc := characterClassContents(puncFreeRanges)

	// Write to file.
	f, err := os.Create(path)
	if err != nil {
		return err
	}
	defer f.Close()
	return tmpl.Execute(f, map[string]string{
		"withPunc":    strconv.Quote(allowedUcsChar.String()),
		"withoutPunc": strconv.Quote(allowedUcsCharMinusPunc.String()),
	})
}

func main() {
	log.Printf("Generating %s...", path)
	if err := writeUnicode(); err != nil {
		log.Fatalf("Could not write path: %v", err)
	}
}