whatcanGOwrong

2024-09-19 21:38:24 -04:00
commit d0ae4d841d
17908 changed files with 4096831 additions and 0 deletions
@@ -0,0 +1,2 @@
+# To prevent CRLF breakages on Windows for fragile files, like testdata.
+* -text
@@ -0,0 +1 @@
+github: mvdan
@@ -0,0 +1,23 @@
+on: [push, pull_request]
+name: Test
+jobs:
+  test:
+    strategy:
+      matrix:
+        go-version: [1.19.x, 1.20.x]
+        os: [ubuntu-latest, macos-11, windows-latest]
+    runs-on: ${{ matrix.os }}
+    steps:
+    - uses: actions/setup-go@v3
+      with:
+        go-version: ${{ matrix.go-version }}
+    - uses: actions/checkout@v3
+    - run: go test ./...
+    - run: go test -race ./...
+
+    # Static checks from this point forward. Only run on one Go version and on
+    # Linux, since it's the fastest platform, and the tools behave the same.
+    - if: matrix.os == 'ubuntu-latest' && matrix.go-version == '1.20.x'
+      run: diff <(echo -n) <(gofmt -s -d .)
+    - if: matrix.os == 'ubuntu-latest' && matrix.go-version == '1.20.x'
+      run: go vet ./...
@@ -0,0 +1,3 @@
+cmd/xurls/xurls
+generate/tldsgen/tldsgen
+generate/regexgen/regexgen
@@ -0,0 +1,27 @@
+Copyright (c) 2015, Daniel Martí. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+   * Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,37 @@
+# xurls
+
+[![Go Reference](https://pkg.go.dev/badge/mvdan.cc/xurls/v2.svg)](https://pkg.go.dev/mvdan.cc/xurls/v2)
+
+Extract urls from text using regular expressions. Requires Go 1.19 or later.
+
+```go
+import "mvdan.cc/xurls/v2"
+
+func main() {
+	rxRelaxed := xurls.Relaxed()
+	rxRelaxed.FindString("Do gophers live in golang.org?")  // "golang.org"
+	rxRelaxed.FindString("This string does not have a URL") // ""
+
+	rxStrict := xurls.Strict()
+	rxStrict.FindAllString("must have scheme: http://foo.com/.", -1) // []string{"http://foo.com/"}
+	rxStrict.FindAllString("no scheme, no match: foo.com", -1)       // []string{}
+}
+```
+
+Since API is centered around [regexp.Regexp](https://golang.org/pkg/regexp/#Regexp),
+many other methods are available, such as finding the [byte indexes](https://golang.org/pkg/regexp/#Regexp.FindAllIndex)
+for all matches.
+
+The regular expressions are compiled when the API is first called.
+Any subsequent calls will use the same regular expression pointers.
+
+#### cmd/xurls
+
+To install the tool globally:
+
+	go install mvdan.cc/xurls/v2/cmd/xurls@latest
+
+```shell
+$ echo "Do gophers live in http://golang.org?" | xurls
+http://golang.org
+```
@@ -0,0 +1,293 @@
+// Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc>
+// See LICENSE for licensing information
+
+package main
+
+import (
+	"bufio"
+	"bytes"
+	"errors"
+	"flag"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"net/http"
+	"net/url"
+	"os"
+	"regexp"
+	"runtime/debug"
+	"strings"
+	"sync/atomic"
+	"time"
+
+	"golang.org/x/mod/module"
+
+	"mvdan.cc/xurls/v2"
+)
+
+var (
+	matching = flag.String("m", "", "")
+	relaxed  = flag.Bool("r", false, "")
+	fix      boolString
+	version  = flag.Bool("version", false, "")
+)
+
+type boolString string
+
+func (s *boolString) Set(val string) error {
+	*s = boolString(val)
+	return nil
+}
+func (s *boolString) Get() any       { return string(*s) }
+func (s *boolString) String() string { return string(*s) }
+func (*boolString) IsBoolFlag() bool { return true }
+
+func init() {
+	flag.Var(&fix, "fix", "")
+	flag.Usage = func() {
+		fmt.Fprint(os.Stderr, `
+Usage: xurls [-h] [files]
+
+xurls extracts urls from text using regular expressions.
+If no files are given, it reads from standard input.
+
+   -m <regexp>   only match urls whose scheme matches a regexp
+                    example: 'https?://|mailto:'
+   -r            also match urls without a scheme (relaxed)
+   -version      print version and exit
+
+When the -fix or -fix=auto flag is used, xurls instead attempts to replace
+any urls which result in a permanent redirect (301 or 308).
+It also fails if any urls fail to load, so that they may be removed or replaced.
+To replace urls which result in temporary redirect as well, use -fix=all.
+`[1:])
+	}
+}
+
+func scanPath(re *regexp.Regexp, path string) error {
+	in := os.Stdin
+	out := io.Writer(os.Stdout)
+	var outBuf *bytes.Buffer
+	if path != "-" {
+		var err error
+		in, err = os.Open(path)
+		if err != nil {
+			return err
+		}
+		if fix != "" {
+			outBuf = new(bytes.Buffer)
+			out = outBuf
+		}
+		defer in.Close()
+	}
+
+	// A maximum of 32 parallel requests.
+	maxWeight := int64(32)
+	seq := newSequencer(maxWeight, out, os.Stderr)
+
+	userAgent := fmt.Sprintf("mvdan.cc/xurls %s", readVersion())
+	scanner := bufio.NewScanner(in)
+
+	// Doesn't need to be part of reporterState as order doesn't matter.
+	var atomicFixedCount uint32
+
+	for scanner.Scan() {
+		line := scanner.Text() + "\n"
+		matches := re.FindAllStringIndex(line, -1)
+		if fix == "" {
+			for _, pair := range matches {
+				match := line[pair[0]:pair[1]]
+				fmt.Printf("%s\n", match)
+			}
+			continue
+		}
+		weight := int64(len(matches))
+		if weight > maxWeight {
+			weight = maxWeight
+		}
+		seq.Add(weight, func(r *reporter) error {
+			offsetWithinLine := 0
+			for _, pair := range matches {
+				// The indexes are based on the original line.
+				pair[0] += offsetWithinLine
+				pair[1] += offsetWithinLine
+				match := line[pair[0]:pair[1]]
+				origURL, err := url.Parse(match)
+				if err != nil {
+					r.appendBroken(match, err.Error())
+					continue
+				}
+				fixed := origURL.String()
+				switch origURL.Scheme {
+				case "http", "https":
+					// See if the URL redirects somewhere.
+					client := &http.Client{
+						Timeout: 10 * time.Second,
+						CheckRedirect: func(req *http.Request, via []*http.Request) error {
+							if len(via) >= 10 {
+								return errors.New("stopped after 10 redirects")
+							}
+							switch req.Response.StatusCode {
+							case http.StatusMovedPermanently, http.StatusPermanentRedirect:
+								// "auto" and "all" fix permanent redirects.
+							case http.StatusFound, http.StatusSeeOther, http.StatusTemporaryRedirect:
+								// Only "all" fixes temporary redirects.
+								if fix != "all" {
+									return http.ErrUseLastResponse
+								}
+							default:
+								// Any other redirects are ignored.
+								return http.ErrUseLastResponse
+							}
+							// Inherit the fragment if empty.
+							if req.URL.Fragment == "" {
+								req.URL.Fragment = origURL.Fragment
+							}
+							fixed = req.URL.String()
+							return nil
+						},
+					}
+					method := http.MethodHead
+				retry:
+					req, err := http.NewRequest(method, fixed, nil)
+					if err != nil {
+						r.appendBroken(match, err.Error())
+						continue
+					}
+					req.Header.Set("User-Agent", userAgent)
+					resp, err := client.Do(req)
+					if err != nil {
+						r.appendBroken(match, err.Error())
+						continue
+					}
+					if code := resp.StatusCode; code >= 400 {
+						if code == http.StatusMethodNotAllowed {
+							method = http.MethodGet
+							resp.Body.Close()
+							goto retry
+						}
+						r.appendBroken(match, fmt.Sprintf("%d %s", code, http.StatusText(code)))
+					}
+					resp.Body.Close()
+				}
+				if fixed != match {
+					// Replace the url, and update offsetWithinLine.
+					newLine := line[:pair[0]] + fixed + line[pair[1]:]
+					offsetWithinLine += len(newLine) - len(line)
+					line = newLine
+					atomic.AddUint32(&atomicFixedCount, 1)
+				}
+			}
+			io.WriteString(r, line) // add the fixed line to outBuf
+			return nil
+		})
+		if err := scanner.Err(); err != nil {
+			return err
+		}
+	}
+	state := seq.finalState()
+	if state.exitCode != 0 {
+		panic("we aren't using sequencer for any errors")
+	}
+	// Note that all goroutines have stopped at this point.
+	if atomicFixedCount > 0 && path != "-" {
+		in.Close()
+		// Overwrite the file, if we weren't reading stdin. Report its
+		// path too.
+		fmt.Println(path)
+		if err := ioutil.WriteFile(path, outBuf.Bytes(), 0o666); err != nil {
+			return err
+		}
+	}
+	if len(state.brokenURLs) > 0 {
+		var s strings.Builder
+		fmt.Fprintf(&s, "found %d broken urls in %q:\n", len(state.brokenURLs), path)
+		for _, broken := range state.brokenURLs {
+			fmt.Fprintf(&s, "  * %s - %s\n", broken.url, broken.reason)
+		}
+		return errors.New(s.String())
+	}
+	return nil
+}
+
+func main() { os.Exit(main1()) }
+
+func main1() int {
+	flag.Parse()
+	if *version {
+		fmt.Println(readVersion())
+		return 0
+	}
+	if *relaxed && *matching != "" {
+		fmt.Fprintln(os.Stderr, "-r and -m at the same time don't make much sense")
+		return 1
+	}
+	switch fix {
+	case "": // disabled by default
+	case "false": // disabled via -fix=false; normalize
+		fix = ""
+	case "auto", "all": // enabled via -fix=auto, -fix=all, etc
+	case "true": // enabled via -fix; normalize
+		fix = "auto"
+	}
+	var re *regexp.Regexp
+	if *relaxed {
+		re = xurls.Relaxed()
+	} else if *matching != "" {
+		var err error
+		if re, err = xurls.StrictMatchingScheme(*matching); err != nil {
+			fmt.Fprintln(os.Stderr, err)
+			return 1
+		}
+	} else {
+		re = xurls.Strict()
+	}
+	args := flag.Args()
+	if len(args) == 0 {
+		args = []string{"-"}
+	}
+	for _, path := range args {
+		if err := scanPath(re, path); err != nil {
+			fmt.Fprintln(os.Stderr, err)
+			return 1
+		}
+	}
+	return 0
+}
+
+// Borrowed from https://github.com/burrowers/garble.
+
+func readVersion() string {
+	info, ok := debug.ReadBuildInfo()
+	if !ok {
+		return "unknown"
+	}
+	mod := &info.Main
+	if mod.Replace != nil {
+		mod = mod.Replace
+	}
+
+	// Until https://github.com/golang/go/issues/50603 is implemented,
+	// manually construct something like a pseudo-version.
+	// TODO: remove when this code is dead, hopefully in Go 1.20.
+	if mod.Version == "(devel)" {
+		var vcsTime time.Time
+		var vcsRevision string
+		for _, setting := range info.Settings {
+			switch setting.Key {
+			case "vcs.time":
+				// If the format is invalid, we'll print a zero timestamp.
+				vcsTime, _ = time.Parse(time.RFC3339Nano, setting.Value)
+			case "vcs.revision":
+				vcsRevision = setting.Value
+				if len(vcsRevision) > 12 {
+					vcsRevision = vcsRevision[:12]
+				}
+			}
+		}
+		if vcsRevision != "" {
+			mod.Version = module.PseudoVersion("", "", vcsTime, vcsRevision)
+		}
+	}
+	return mod.Version
+}
@@ -0,0 +1,125 @@
+// Copyright (c) 2019, Daniel Martí <mvdan@mvdan.cc>
+// See LICENSE for licensing information
+
+package main
+
+import (
+	"context"
+	"fmt"
+	"io/ioutil"
+	"net"
+	"net/http"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/rogpeppe/go-internal/testscript"
+)
+
+func TestMain(m *testing.M) {
+	os.Exit(testscript.RunMain(m, map[string]func() int{
+		"xurls": main1,
+	}))
+}
+
+func TestScript(t *testing.T) {
+	t.Parallel()
+	testscript.Run(t, testscript.Params{
+		Dir:                 filepath.Join("testdata", "script"),
+		RequireExplicitExec: true,
+		Setup: func(env *testscript.Env) error {
+			mux := http.NewServeMux()
+			handle := func(method, pattern string, handler func(http.ResponseWriter, *http.Request)) {
+				mux.HandleFunc(pattern, func(w http.ResponseWriter, r *http.Request) {
+					if r.Method != method {
+						t.Errorf("expected all requests to be %q, got %q", method, r.Method)
+					}
+					handler(w, r)
+				})
+			}
+			handle("HEAD", "/plain-head", func(w http.ResponseWriter, r *http.Request) {
+				w.WriteHeader(200)
+			})
+			handle("HEAD", "/redir-1", func(w http.ResponseWriter, r *http.Request) {
+				http.Redirect(w, r, "/plain-head", http.StatusMovedPermanently)
+			})
+			handle("HEAD", "/redir-2", func(w http.ResponseWriter, r *http.Request) {
+				http.Redirect(w, r, "/redir-1", http.StatusMovedPermanently)
+			})
+
+			handle("HEAD", "/redir-longer", func(w http.ResponseWriter, r *http.Request) {
+				http.Redirect(w, r, "/redir-longtarget", http.StatusMovedPermanently)
+			})
+			handle("HEAD", "/redir-longtarget", func(w http.ResponseWriter, r *http.Request) {
+				w.WriteHeader(200)
+			})
+			handle("HEAD", "/redir-fragment", func(w http.ResponseWriter, r *http.Request) {
+				http.Redirect(w, r, "/plain-head#bar", http.StatusMovedPermanently)
+			})
+
+			handle("HEAD", "/redir-301", func(w http.ResponseWriter, r *http.Request) {
+				http.Redirect(w, r, "/plain-head", 301)
+			})
+			handle("HEAD", "/redir-302", func(w http.ResponseWriter, r *http.Request) {
+				http.Redirect(w, r, "/plain-head", 302)
+			})
+			handle("HEAD", "/redir-303", func(w http.ResponseWriter, r *http.Request) {
+				http.Redirect(w, r, "/plain-head", 303)
+			})
+			handle("HEAD", "/redir-307", func(w http.ResponseWriter, r *http.Request) {
+				http.Redirect(w, r, "/plain-head", 307)
+			})
+			handle("HEAD", "/redir-308", func(w http.ResponseWriter, r *http.Request) {
+				http.Redirect(w, r, "/plain-head", 308)
+			})
+
+			handle("HEAD", "/404", func(w http.ResponseWriter, r *http.Request) {
+				http.Error(w, "", 404)
+			})
+			handle("HEAD", "/500", func(w http.ResponseWriter, r *http.Request) {
+				http.Error(w, "", 500)
+			})
+
+			handle("GET", "/plain-get", func(w http.ResponseWriter, r *http.Request) {
+				fmt.Fprintf(w, "plaintext")
+			})
+			mux.HandleFunc("/get-only", func(w http.ResponseWriter, r *http.Request) {
+				if r.Method == "GET" {
+					http.Redirect(w, r, "/plain-get", 301)
+				} else {
+					http.Error(w, "", 405)
+				}
+			})
+
+			ln, err := net.Listen("tcp", ":0")
+			if err != nil {
+				return err
+			}
+			server := &http.Server{Handler: mux}
+			go server.Serve(ln)
+			env.Vars = append(env.Vars, "SERVER=http://"+ln.Addr().String())
+			env.Defer(func() {
+				if err := server.Shutdown(context.TODO()); err != nil {
+					t.Fatal(err)
+				}
+			})
+			return nil
+		},
+		Cmds: map[string]func(ts *testscript.TestScript, neg bool, args []string){
+			"expand": func(ts *testscript.TestScript, neg bool, args []string) {
+				if neg {
+					ts.Fatalf("unsupported: ! expand")
+				}
+				if len(args) == 0 {
+					ts.Fatalf("usage: expand file...")
+				}
+				for _, arg := range args {
+					data := ts.ReadFile(arg)
+					data = os.Expand(data, ts.Getenv)
+					err := ioutil.WriteFile(ts.MkAbs(arg), []byte(data), 0o666)
+					ts.Check(err)
+				}
+			},
+		},
+	})
+}
@@ -0,0 +1,156 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The code below is borrowed from Go's cmd/gofmt as of 1.18beta1.
+// We tweaked it slightly to add the "broken URLs" result.
+
+package main
+
+import (
+	"context"
+	"go/scanner"
+	"io"
+
+	"golang.org/x/sync/semaphore"
+)
+
+// A sequencer performs concurrent tasks that may write output, but emits that
+// output in a deterministic order.
+type sequencer struct {
+	maxWeight int64
+	sem       *semaphore.Weighted   // weighted by input bytes (an approximate proxy for memory overhead)
+	prev      <-chan *reporterState // 1-buffered
+}
+
+// newSequencer returns a sequencer that allows concurrent tasks up to maxWeight
+// and writes tasks' output to out and err.
+func newSequencer(maxWeight int64, out, err io.Writer) *sequencer {
+	sem := semaphore.NewWeighted(maxWeight)
+	prev := make(chan *reporterState, 1)
+	prev <- &reporterState{out: out, err: err}
+	return &sequencer{
+		maxWeight: maxWeight,
+		sem:       sem,
+		prev:      prev,
+	}
+}
+
+// Add blocks until the sequencer has enough weight to spare, then adds f as a
+// task to be executed concurrently.
+//
+// If the weight is either negative or larger than the sequencer's maximum
+// weight, Add blocks until all other tasks have completed, then the task
+// executes exclusively (blocking all other calls to Add until it completes).
+//
+// f may run concurrently in a goroutine, but its output to the passed-in
+// reporter will be sequential relative to the other tasks in the sequencer.
+//
+// If f invokes a method on the reporter, execution of that method may block
+// until the previous task has finished. (To maximize concurrency, f should
+// avoid invoking the reporter until it has finished any parallelizable work.)
+//
+// If f returns a non-nil error, that error will be reported after f's output
+// (if any) and will cause a nonzero final exit code.
+func (s *sequencer) Add(weight int64, f func(*reporter) error) {
+	if weight < 0 || weight > s.maxWeight {
+		weight = s.maxWeight
+	}
+	if err := s.sem.Acquire(context.TODO(), weight); err != nil {
+		// Change the task from "execute f" to "report err".
+		weight = 0
+		f = func(*reporter) error { return err }
+	}
+
+	r := &reporter{prev: s.prev}
+	next := make(chan *reporterState, 1)
+	s.prev = next
+
+	// Start f in parallel: it can run until it invokes a method on r, at which
+	// point it will block until the previous task releases the output state.
+	go func() {
+		if err := f(r); err != nil {
+			r.Report(err)
+		}
+		next <- r.getState() // Release the next task.
+		s.sem.Release(weight)
+	}()
+}
+
+// GetExitCode waits for all previously-added tasks to complete, then returns an
+// exit code for the sequence suitable for passing to os.Exit.
+func (s *sequencer) GetExitCode() int {
+	c := make(chan int, 1)
+	s.Add(0, func(r *reporter) error {
+		c <- r.ExitCode()
+		return nil
+	})
+	return <-c
+}
+
+func (s *sequencer) finalState() reporterState {
+	c := make(chan reporterState, 1)
+	s.Add(0, func(r *reporter) error {
+		c <- *r.getState()
+		return nil
+	})
+	return <-c
+}
+
+// A reporter reports output, warnings, and errors.
+type reporter struct {
+	prev  <-chan *reporterState
+	state *reporterState
+}
+
+// reporterState carries the state of a reporter instance.
+//
+// Only one reporter at a time may have access to a reporterState.
+type reporterState struct {
+	out, err io.Writer
+	exitCode int
+
+	brokenURLs []brokenURL
+}
+
+type brokenURL struct {
+	url    string
+	reason string
+}
+
+// getState blocks until any prior reporters are finished with the reporter
+// state, then returns the state for manipulation.
+func (r *reporter) getState() *reporterState {
+	if r.state == nil {
+		r.state = <-r.prev
+	}
+	return r.state
+}
+
+// Write emits a slice to the reporter's output stream.
+//
+// Any error is returned to the caller, and does not otherwise affect the
+// reporter's exit code.
+func (r *reporter) Write(p []byte) (int, error) {
+	return r.getState().out.Write(p)
+}
+
+func (r *reporter) appendBroken(url, reason string) {
+	state := r.getState()
+	state.brokenURLs = append(state.brokenURLs, brokenURL{url, reason})
+}
+
+// Report emits a non-nil error to the reporter's error stream,
+// changing its exit code to a nonzero value.
+func (r *reporter) Report(err error) {
+	if err == nil {
+		panic("Report with nil error")
+	}
+	st := r.getState()
+	scanner.PrintError(st.err, err)
+	st.exitCode = 2
+}
+
+func (r *reporter) ExitCode() int {
+	return r.getState().exitCode
+}
@@ -0,0 +1,33 @@
+stdin input
+exec xurls
+stdout 'https://foo.com'
+! stdout 'bar.com'
+! stdout 'custom://some-data'
+! stderr .
+
+! exec xurls missing
+! stdout .
+stderr 'open missing'
+
+exec xurls input
+stdout 'https://foo.com'
+! stdout 'bar.com'
+! stdout 'custom://some-data'
+! stderr .
+
+exec xurls -r input
+stdout 'https://foo.com'
+stdout 'bar.com'
+! stdout 'custom://some-data'
+! stderr .
+
+exec xurls -m 'custom://' input
+! stdout 'https://foo.com'
+! stdout 'bar.com'
+stdout 'custom://some-data'
+! stderr .
+
+-- input --
+First, a link with a scheme, https://foo.com.
+Then, one without a scheme, like bar.com.
+Also, a link with a custom scheme, custom://some-data.
@@ -0,0 +1,120 @@
+expand nothing
+cp nothing nothing.orig
+
+expand redirects
+expand redirects.golden-auto
+expand redirects.golden-all
+cp redirects redirects.orig
+
+expand broken
+expand broken.golden
+cp broken broken.orig
+
+exec xurls -fix nothing
+! stdout .
+! stderr .
+cmp nothing nothing.orig
+
+stdin redirects
+exec xurls -fix
+cmp stdout redirects.golden-auto
+cmp redirects redirects.orig
+! stderr .
+
+exec xurls -fix redirects
+stdout '^redirects$'
+! stderr .
+cmp redirects redirects.golden-auto
+cp redirects.orig redirects
+
+exec xurls -fix=auto redirects
+cmp redirects redirects.golden-auto
+cp redirects.orig redirects
+
+exec xurls -fix=all redirects
+cmp redirects redirects.golden-all
+cp redirects.orig redirects
+
+! exec xurls -fix broken
+stdout -count=1 '^broken$'
+stderr -count=1 '5 broken urls'
+stderr -count=2 '/404 - 404 Not Found'
+stderr -count=2 '/500 - 500 Internal Server Error'
+stderr -count=1 'totallydoesnotexist.localhost/ - Head .* dial tcp'
+cmp broken broken.golden
+
+-- nothing --
+No redirect: ${SERVER}/plain-head
+-- redirects --
+No redirect: ${SERVER}/plain-head
+One redirect: ${SERVER}/redir-1
+Two redirects: ${SERVER}/redir-2
+Redirect inherits fragment: ${SERVER}/redir-1#foo
+Redirect replaces fragment: ${SERVER}/redir-fragment#foo
+
+Three links in one line: ${SERVER}/redir-1 + ${SERVER}//redir-1 + ${SERVER}///redir-1
+
+Redirect to a longer path ${SERVER}/redir-longer with trailing text
+
+Permanent redirect codes:
+* ${SERVER}/redir-301
+* ${SERVER}/redir-308
+
+Temporary redirect codes:
+* ${SERVER}/redir-302
+* ${SERVER}/redir-303
+* ${SERVER}/redir-307
+
+Only GET allowed, HEAD fails: ${SERVER}/get-only
+-- redirects.golden-auto --
+No redirect: ${SERVER}/plain-head
+One redirect: ${SERVER}/plain-head
+Two redirects: ${SERVER}/plain-head
+Redirect inherits fragment: ${SERVER}/plain-head#foo
+Redirect replaces fragment: ${SERVER}/plain-head#bar
+
+Three links in one line: ${SERVER}/plain-head + ${SERVER}/plain-head + ${SERVER}/plain-head
+
+Redirect to a longer path ${SERVER}/redir-longtarget with trailing text
+
+Permanent redirect codes:
+* ${SERVER}/plain-head
+* ${SERVER}/plain-head
+
+Temporary redirect codes:
+* ${SERVER}/redir-302
+* ${SERVER}/redir-303
+* ${SERVER}/redir-307
+
+Only GET allowed, HEAD fails: ${SERVER}/plain-get
+-- redirects.golden-all --
+No redirect: ${SERVER}/plain-head
+One redirect: ${SERVER}/plain-head
+Two redirects: ${SERVER}/plain-head
+Redirect inherits fragment: ${SERVER}/plain-head#foo
+Redirect replaces fragment: ${SERVER}/plain-head#bar
+
+Three links in one line: ${SERVER}/plain-head + ${SERVER}/plain-head + ${SERVER}/plain-head
+
+Redirect to a longer path ${SERVER}/redir-longtarget with trailing text
+
+Permanent redirect codes:
+* ${SERVER}/plain-head
+* ${SERVER}/plain-head
+
+Temporary redirect codes:
+* ${SERVER}/plain-head
+* ${SERVER}/plain-head
+* ${SERVER}/plain-head
+
+Only GET allowed, HEAD fails: ${SERVER}/plain-get
+-- broken --
+One redirect: ${SERVER}/redir-1
+404 errors: ${SERVER}/404 ${SERVER}/404
+500 errors: ${SERVER}/500 ${SERVER}/500
+Dial error: http://totallydoesnotexist.localhost/
+-- broken.golden --
+One redirect: ${SERVER}/plain-head
+404 errors: ${SERVER}/404 ${SERVER}/404
+500 errors: ${SERVER}/500 ${SERVER}/500
+Dial error: http://totallydoesnotexist.localhost/
@@ -0,0 +1,11 @@
+exec xurls -h
+! stderr 'flag provided but not defined'
+stderr 'Usage: xurls'
+! stderr 'help requested' # don't duplicate usage output
+! stderr '-test\.' # don't show the test binary's usage func
+
+! exec xurls -r -m="whatever"
+stderr 'at the same time'
+
+! exec xurls -m="bad(regexp"
+stderr 'missing closing \)'
@@ -0,0 +1,5 @@
+# Note that "go test" does not embed vcs information by default.
+# We copied the code from another project which is tested,
+# so there's no need to fully test the VCS aspect.
+exec xurls -version
+stdout '\(devel\)'
@@ -0,0 +1,19 @@
+// Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc>
+// See LICENSE for licensing information
+
+package xurls_test
+
+import (
+	"fmt"
+
+	"mvdan.cc/xurls/v2"
+)
+
+func Example() {
+	rx := xurls.Relaxed()
+	fmt.Println(rx.FindString("Do gophers live in http://golang.org?"))
+	fmt.Println(rx.FindAllString("foo.com is http://foo.com/.", -1))
+	// Output:
+	// http://golang.org
+	// [foo.com http://foo.com/]
+}
@@ -0,0 +1,74 @@
+// Copyright (c) 2017, Shreyas Khare <skhare@rapid7.com>
+// See LICENSE for licensing information
+
+package main
+
+import (
+	"encoding/csv"
+	"io"
+	"log"
+	"net/http"
+	"os"
+	"strings"
+	"text/template"
+)
+
+const path = "schemes.go"
+
+var schemesTmpl = template.Must(template.New("schemes").Parse(`// Generated by schemesgen
+
+package xurls
+
+// Schemes is a sorted list of all IANA assigned schemes.
+//
+// Source: https://www.iana.org/assignments/uri-schemes/uri-schemes-1.csv
+var Schemes = []string{
+{{range $scheme := .Schemes}}` + "\t`" + `{{$scheme}}` + "`" + `,
+{{end}}}
+`))
+
+func schemeList() []string {
+	resp, err := http.Get("https://www.iana.org/assignments/uri-schemes/uri-schemes-1.csv")
+	if err != nil {
+		log.Fatal(err)
+	}
+	defer resp.Body.Close()
+	r := csv.NewReader(resp.Body)
+	r.Read() // ignore headers
+	schemes := make([]string, 0)
+	for {
+		record, err := r.Read()
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			log.Fatal(err)
+		}
+		if strings.Contains(record[0], "OBSOLETE") {
+			continue // skip obsolete schemes; note the scheme column is abused
+		}
+		schemes = append(schemes, record[0])
+	}
+	return schemes
+}
+
+func writeSchemes(schemes []string) error {
+	f, err := os.Create(path)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	return schemesTmpl.Execute(f, struct {
+		Schemes []string
+	}{
+		Schemes: schemes,
+	})
+}
+
+func main() {
+	schemes := schemeList()
+	log.Printf("Generating %s...", path)
+	if err := writeSchemes(schemes); err != nil {
+		log.Fatalf("Could not write path: %v", err)
+	}
+}
@@ -0,0 +1,111 @@
+// Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc>
+// See LICENSE for licensing information
+
+package main
+
+import (
+	"bufio"
+	"errors"
+	"fmt"
+	"log"
+	"net/http"
+	"os"
+	"regexp"
+	"sort"
+	"strings"
+	"sync"
+	"text/template"
+)
+
+const path = "tlds.go"
+
+var tldsTmpl = template.Must(template.New("tlds").Parse(`// Generated by tldsgen
+
+package xurls
+
+// TLDs is a sorted list of all public top-level domains.
+//
+// Sources:{{range $_, $url := .URLs}}
+//   - {{$url}}{{end}}
+var TLDs = []string{
+{{range $_, $tld := .TLDs}}` + "\t`" + `{{$tld}}` + "`" + `,
+{{end}}}
+`))
+
+func cleanTld(tld string) string {
+	tld = strings.ToLower(tld)
+	if strings.HasPrefix(tld, "xn--") {
+		return ""
+	}
+	return tld
+}
+
+func fetchFromURL(wg *sync.WaitGroup, url, pat string, tldSet map[string]bool) {
+	defer wg.Done()
+	log.Printf("Fetching %s", url)
+	resp, err := http.Get(url)
+	if err == nil && resp.StatusCode >= 400 {
+		err = errors.New(resp.Status)
+	}
+	if err != nil {
+		panic(fmt.Errorf("%s: %s", url, err))
+	}
+	defer resp.Body.Close()
+	scanner := bufio.NewScanner(resp.Body)
+	re := regexp.MustCompile(pat)
+	for scanner.Scan() {
+		line := scanner.Text()
+		tld := re.FindString(line)
+		tld = cleanTld(tld)
+		if tld == "" {
+			continue
+		}
+		tldSet[tld] = true
+	}
+	if err := scanner.Err(); err != nil {
+		panic(fmt.Errorf("%s: %s", url, err))
+	}
+}
+
+func tldList() ([]string, []string) {
+	var urls []string
+	var wg sync.WaitGroup
+	tldSet := make(map[string]bool)
+	fromURL := func(url, pat string) {
+		urls = append(urls, url)
+		wg.Add(1)
+		go fetchFromURL(&wg, url, pat, tldSet)
+	}
+	fromURL("https://data.iana.org/TLD/tlds-alpha-by-domain.txt", `^[^#]+$`)
+	fromURL("https://publicsuffix.org/list/effective_tld_names.dat", `^[^/.]+$`)
+	wg.Wait()
+
+	tlds := make([]string, 0, len(tldSet))
+	for tld := range tldSet {
+		tlds = append(tlds, tld)
+	}
+
+	sort.Strings(tlds)
+	return tlds, urls
+}
+
+func writeTlds(tlds, urls []string) error {
+	f, err := os.Create(path)
+	if err != nil {
+		panic(err)
+	}
+	defer f.Close()
+	return tldsTmpl.Execute(f, struct {
+		TLDs []string
+		URLs []string
+	}{
+		TLDs: tlds,
+		URLs: urls,
+	})
+}
+
+func main() {
+	tlds, urls := tldList()
+	log.Printf("Generating %s...", path)
+	writeTlds(tlds, urls)
+}
@@ -0,0 +1,152 @@
+// Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc>
+// See LICENSE for licensing information
+
+package main
+
+import (
+	"log"
+	"os"
+	"strconv"
+	"strings"
+	"text/template"
+	"unicode"
+)
+
+const path = "unicode.go"
+
+var tmpl = template.Must(template.New("tlds").Parse(`// Generated by unicodegen
+
+package xurls
+
+const allowedUcsChar = {{.withPunc}}
+
+const allowedUcsCharMinusPunc = {{.withoutPunc}}
+`))
+
+func visit(rt *unicode.RangeTable, fn func(rune)) {
+	for _, r16 := range rt.R16 {
+		for r := rune(r16.Lo); r <= rune(r16.Hi); r += rune(r16.Stride) {
+			fn(r)
+		}
+	}
+	for _, r32 := range rt.R32 {
+		for r := rune(r32.Lo); r <= rune(r32.Hi); r += rune(r32.Stride) {
+			fn(r)
+		}
+	}
+}
+
+func writeUnicode() error {
+	// rfc3987Ranges contains the ranges of valid code points specified by RFC 3987.
+	rfc3987Ranges := [][2]rune{
+		{0xA0, 0xD7FF},
+		{0xF900, 0xFDCF},
+		{0xFDF0, 0xFFEF},
+		{0x10000, 0x1FFFD},
+		{0x20000, 0x2FFFD},
+		{0x30000, 0x3FFFD},
+		{0x40000, 0x4FFFD},
+		{0x50000, 0x5FFFD},
+		{0x60000, 0x6FFFD},
+		{0x70000, 0x7FFFD},
+		{0x80000, 0x8FFFD},
+		{0x90000, 0x9FFFD},
+		{0xA0000, 0xAFFFD},
+		{0xB0000, 0xBFFFD},
+		{0xC0000, 0xCFFFD},
+		{0xD0000, 0xDFFFD},
+		{0xE1000, 0xEFFFD},
+	}
+
+	// removeRune accepts a slice of inclusive code point ranges (in ascending order)
+	// and returns a new slice that is equivalent except for excluding a specified rune
+	// by removing/replacing/splitting any range containing it.
+	// Its linear searches over the ranges (including those added by previous invocations)
+	// are inefficient, but acceptable because this code runs only at build time.
+	removeRune := func(ranges [][2]rune, cp rune) [][2]rune {
+		for i, r := range ranges {
+			// Ranges are in ascending order. Skip any that precede `cp`,
+			// and bail out upon reaching one that follows `cp`.
+			if r[1] < cp {
+				continue
+			} else if cp < r[0] {
+				break
+			}
+
+			// `cp` is in this range and must be removed from it.
+			if cp == r[0] && cp == r[1] {
+				// Remove this single-element range.
+				return append(ranges[0:i], ranges[i+1:]...)
+			} else if cp == r[0] {
+				// Remove the first element of this range.
+				newRange := [2]rune{r[0] + 1, r[1]}
+				newTail := append([][2]rune{newRange}, ranges[i+1:]...)
+				return append(ranges[0:i], newTail...)
+			} else if cp == r[1] {
+				// Remove the last element of this range.
+				newRange := [2]rune{r[0], r[1] - 1}
+				newTail := append([][2]rune{newRange}, ranges[i+1:]...)
+				return append(ranges[0:i], newTail...)
+			} else {
+				// Split this range.
+				newTail := append(
+					[][2]rune{
+						{r[0], cp - 1},
+						{cp + 1, r[1]},
+					},
+					ranges[i+1:]...)
+				return append(ranges[0:i], newTail...)
+			}
+		}
+		return ranges
+	}
+
+	// sepFreeRanges excludes separators from rfc3987Ranges.
+	sepFreeRanges := append([][2]rune{}, rfc3987Ranges...)
+	visit(unicode.Z, func(cp rune) {
+		sepFreeRanges = removeRune(sepFreeRanges, cp)
+	})
+
+	// puncFreeRanges excludes punctuation from sepFreeRanges.
+	puncFreeRanges := append([][2]rune{}, sepFreeRanges...)
+	visit(unicode.Po, func(cp rune) {
+		puncFreeRanges = removeRune(puncFreeRanges, cp)
+	})
+
+	// Build the corresponding regular expression character class contents.
+	characterClassContents := func(ranges [][2]rune) strings.Builder {
+		var builder strings.Builder
+		for _, r := range ranges {
+			// regexp.QuoteMeta is not necessary because all metacharacters are ASCII.
+			// cf. https://golang.org/s/re2syntax and
+			// https://cs.opensource.google/go/go/+/refs/tags/go1.17.6:src/regexp/regexp.go;l=721
+			builder.WriteRune(r[0])
+			if r[0] == r[1] {
+				continue
+			}
+			builder.WriteRune('-')
+			builder.WriteRune(r[1])
+		}
+		return builder
+	}
+	allowedUcsChar := characterClassContents(sepFreeRanges)
+	allowedUcsCharMinusPunc := characterClassContents(puncFreeRanges)
+
+	// Write to file.
+	f, err := os.Create(path)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	return tmpl.Execute(f, map[string]string{
+		"withPunc":    strconv.Quote(allowedUcsChar.String()),
+		"withoutPunc": strconv.Quote(allowedUcsCharMinusPunc.String()),
+	})
+}
+
+func main() {
+	log.Printf("Generating %s...", path)
+	if err := writeUnicode(); err != nil {
+		log.Fatalf("Could not write path: %v", err)
+	}
+}
@@ -0,0 +1,9 @@
+module mvdan.cc/xurls/v2
+
+go 1.19
+
+require (
+	github.com/rogpeppe/go-internal v1.10.0
+	golang.org/x/mod v0.10.0
+	golang.org/x/sync v0.1.0
+)
@@ -0,0 +1,6 @@
+github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
+github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
+golang.org/x/mod v0.10.0 h1:lFO9qtOdlre5W1jxS3r/4szv2/6iXxScdzjoBMXNhYk=
+golang.org/x/mod v0.10.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o=
+golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -0,0 +1,375 @@
+// Generated by schemesgen
+
+package xurls
+
+// Schemes is a sorted list of all IANA assigned schemes.
+//
+// Source: https://www.iana.org/assignments/uri-schemes/uri-schemes-1.csv
+var Schemes = []string{
+	`aaa`,
+	`aaas`,
+	`about`,
+	`acap`,
+	`acct`,
+	`acd`,
+	`acr`,
+	`adiumxtra`,
+	`adt`,
+	`afp`,
+	`afs`,
+	`aim`,
+	`amss`,
+	`android`,
+	`appdata`,
+	`apt`,
+	`ar`,
+	`ark`,
+	`attachment`,
+	`aw`,
+	`barion`,
+	`bb`,
+	`beshare`,
+	`bitcoin`,
+	`bitcoincash`,
+	`blob`,
+	`bolo`,
+	`browserext`,
+	`cabal`,
+	`calculator`,
+	`callto`,
+	`cap`,
+	`cast`,
+	`casts`,
+	`chrome`,
+	`chrome-extension`,
+	`cid`,
+	`coap`,
+	`coap+tcp`,
+	`coap+ws`,
+	`coaps`,
+	`coaps+tcp`,
+	`coaps+ws`,
+	`com-eventbrite-attendee`,
+	`content`,
+	`content-type`,
+	`crid`,
+	`cstr`,
+	`cvs`,
+	`dab`,
+	`dat`,
+	`data`,
+	`dav`,
+	`diaspora`,
+	`dict`,
+	`did`,
+	`dis`,
+	`dlna-playcontainer`,
+	`dlna-playsingle`,
+	`dns`,
+	`dntp`,
+	`doi`,
+	`dpp`,
+	`drm`,
+	`drop`,
+	`dtmi`,
+	`dtn`,
+	`dvb`,
+	`dvx`,
+	`dweb`,
+	`ed2k`,
+	`eid`,
+	`elsi`,
+	`embedded`,
+	`ens`,
+	`ethereum`,
+	`example`,
+	`facetime`,
+	`fax`,
+	`feed`,
+	`feedready`,
+	`fido`,
+	`file`,
+	`filesystem`,
+	`finger`,
+	`first-run-pen-experience`,
+	`fish`,
+	`fm`,
+	`ftp`,
+	`fuchsia-pkg`,
+	`geo`,
+	`gg`,
+	`git`,
+	`gitoid`,
+	`gizmoproject`,
+	`go`,
+	`gopher`,
+	`graph`,
+	`grd`,
+	`gtalk`,
+	`h323`,
+	`ham`,
+	`hcap`,
+	`hcp`,
+	`http`,
+	`https`,
+	`hxxp`,
+	`hxxps`,
+	`hydrazone`,
+	`hyper`,
+	`iax`,
+	`icap`,
+	`icon`,
+	`im`,
+	`imap`,
+	`info`,
+	`iotdisco`,
+	`ipfs`,
+	`ipn`,
+	`ipns`,
+	`ipp`,
+	`ipps`,
+	`irc`,
+	`irc6`,
+	`ircs`,
+	`iris`,
+	`iris.beep`,
+	`iris.lwz`,
+	`iris.xpc`,
+	`iris.xpcs`,
+	`isostore`,
+	`itms`,
+	`jabber`,
+	`jar`,
+	`jms`,
+	`keyparc`,
+	`lastfm`,
+	`lbry`,
+	`ldap`,
+	`ldaps`,
+	`leaptofrogans`,
+	`lorawan`,
+	`lpa`,
+	`lvlt`,
+	`magnet`,
+	`mailserver`,
+	`mailto`,
+	`maps`,
+	`market`,
+	`matrix`,
+	`message`,
+	`microsoft.windows.camera`,
+	`microsoft.windows.camera.multipicker`,
+	`microsoft.windows.camera.picker`,
+	`mid`,
+	`mms`,
+	`modem`,
+	`mongodb`,
+	`moz`,
+	`ms-access`,
+	`ms-appinstaller`,
+	`ms-browser-extension`,
+	`ms-calculator`,
+	`ms-drive-to`,
+	`ms-enrollment`,
+	`ms-excel`,
+	`ms-eyecontrolspeech`,
+	`ms-gamebarservices`,
+	`ms-gamingoverlay`,
+	`ms-getoffice`,
+	`ms-help`,
+	`ms-infopath`,
+	`ms-inputapp`,
+	`ms-lockscreencomponent-config`,
+	`ms-media-stream-id`,
+	`ms-meetnow`,
+	`ms-mixedrealitycapture`,
+	`ms-mobileplans`,
+	`ms-newsandinterests`,
+	`ms-officeapp`,
+	`ms-people`,
+	`ms-project`,
+	`ms-powerpoint`,
+	`ms-publisher`,
+	`ms-remotedesktop-launch`,
+	`ms-restoretabcompanion`,
+	`ms-screenclip`,
+	`ms-screensketch`,
+	`ms-search`,
+	`ms-search-repair`,
+	`ms-secondary-screen-controller`,
+	`ms-secondary-screen-setup`,
+	`ms-settings`,
+	`ms-settings-airplanemode`,
+	`ms-settings-bluetooth`,
+	`ms-settings-camera`,
+	`ms-settings-cellular`,
+	`ms-settings-cloudstorage`,
+	`ms-settings-connectabledevices`,
+	`ms-settings-displays-topology`,
+	`ms-settings-emailandaccounts`,
+	`ms-settings-language`,
+	`ms-settings-location`,
+	`ms-settings-lock`,
+	`ms-settings-nfctransactions`,
+	`ms-settings-notifications`,
+	`ms-settings-power`,
+	`ms-settings-privacy`,
+	`ms-settings-proximity`,
+	`ms-settings-screenrotation`,
+	`ms-settings-wifi`,
+	`ms-settings-workplace`,
+	`ms-spd`,
+	`ms-stickers`,
+	`ms-sttoverlay`,
+	`ms-transit-to`,
+	`ms-useractivityset`,
+	`ms-virtualtouchpad`,
+	`ms-visio`,
+	`ms-walk-to`,
+	`ms-whiteboard`,
+	`ms-whiteboard-cmd`,
+	`ms-word`,
+	`msnim`,
+	`msrp`,
+	`msrps`,
+	`mss`,
+	`mt`,
+	`mtqp`,
+	`mumble`,
+	`mupdate`,
+	`mvn`,
+	`news`,
+	`nfs`,
+	`ni`,
+	`nih`,
+	`nntp`,
+	`notes`,
+	`num`,
+	`ocf`,
+	`oid`,
+	`onenote`,
+	`onenote-cmd`,
+	`opaquelocktoken`,
+	`openpgp4fpr`,
+	`otpauth`,
+	`p1`,
+	`pack`,
+	`palm`,
+	`paparazzi`,
+	`payment`,
+	`payto`,
+	`pkcs11`,
+	`platform`,
+	`pop`,
+	`pres`,
+	`prospero`,
+	`proxy`,
+	`pwid`,
+	`psyc`,
+	`pttp`,
+	`qb`,
+	`query`,
+	`quic-transport`,
+	`redis`,
+	`rediss`,
+	`reload`,
+	`res`,
+	`resource`,
+	`rmi`,
+	`rsync`,
+	`rtmfp`,
+	`rtmp`,
+	`rtsp`,
+	`rtsps`,
+	`rtspu`,
+	`sarif`,
+	`secondlife`,
+	`secret-token`,
+	`service`,
+	`session`,
+	`sftp`,
+	`sgn`,
+	`shc`,
+	`sieve`,
+	`simpleledger`,
+	`simplex`,
+	`sip`,
+	`sips`,
+	`skype`,
+	`smb`,
+	`smp`,
+	`sms`,
+	`smtp`,
+	`snews`,
+	`snmp`,
+	`soap.beep`,
+	`soap.beeps`,
+	`soldat`,
+	`spiffe`,
+	`spotify`,
+	`ssb`,
+	`ssh`,
+	`starknet`,
+	`steam`,
+	`stun`,
+	`stuns`,
+	`submit`,
+	`svn`,
+	`swh`,
+	`swid`,
+	`swidpath`,
+	`tag`,
+	`taler`,
+	`teamspeak`,
+	`tel`,
+	`teliaeid`,
+	`telnet`,
+	`tftp`,
+	`things`,
+	`thismessage`,
+	`tip`,
+	`tn3270`,
+	`tool`,
+	`turn`,
+	`turns`,
+	`tv`,
+	`udp`,
+	`unreal`,
+	`upt`,
+	`urn`,
+	`ut2004`,
+	`uuid-in-package`,
+	`v-event`,
+	`vemmi`,
+	`ventrilo`,
+	`ves`,
+	`videotex`,
+	`vnc`,
+	`view-source`,
+	`vscode`,
+	`vscode-insiders`,
+	`vsls`,
+	`w3`,
+	`wais`,
+	`web3`,
+	`wcr`,
+	`webcal`,
+	`web+ap`,
+	`wifi`,
+	`wpid`,
+	`ws`,
+	`wss`,
+	`wtai`,
+	`wyciwyg`,
+	`xcon`,
+	`xcon-userid`,
+	`xfire`,
+	`xmlrpc.beep`,
+	`xmlrpc.beeps`,
+	`xmpp`,
+	`xri`,
+	`ymsgr`,
+	`z39.50`,
+	`z39.50r`,
+	`z39.50s`,
+}
@@ -0,0 +1,24 @@
+// Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc>
+// See LICENSE for licensing information
+
+package xurls
+
+// PseudoTLDs is a sorted list of some widely used unofficial TLDs.
+//
+// Sources:
+//   - https://en.wikipedia.org/wiki/Pseudo-top-level_domain
+//   - https://en.wikipedia.org/wiki/Category:Pseudo-top-level_domains
+//   - https://tools.ietf.org/html/draft-grothoff-iesg-special-use-p2p-names-00
+//   - https://www.iana.org/assignments/special-use-domain-names/special-use-domain-names.xhtml
+var PseudoTLDs = []string{
+	`bit`,       // Namecoin
+	`example`,   // Example domain
+	`exit`,      // Tor exit node
+	`gnu`,       // GNS by public key
+	`i2p`,       // I2P network
+	`invalid`,   // Invalid domain
+	`local`,     // Local network
+	`localhost`, // Local network
+	`test`,      // Test domain
+	`zkey`,      // GNS domain name
+}
@@ -0,0 +1,7 @@
+// Generated by unicodegen
+
+package xurls
+
+const allowedUcsChar = "¡-ᙿᚁ-\u1fff\u200b-‧\u202a-\u202e‰-⁞\u2060-\u2fff、-\ud7ff豈-\ufdcfﷰ-\uffef𐀀-\U0001fffd𠀀-\U0002fffd𰀀-\U0003fffd\U00040000-\U0004fffd\U00050000-\U0005fffd\U00060000-\U0006fffd\U00070000-\U0007fffd\U00080000-\U0008fffd\U00090000-\U0009fffd\U000a0000-\U000afffd\U000b0000-\U000bfffd\U000c0000-\U000cfffd\U000d0000-\U000dfffd\U000e1000-\U000efffd"
+
+const allowedUcsCharMinusPunc = "¢-¦¨-µ¸-¾À-ͽͿ-ΆΈ-ՙՠ-ֈ֊-ֿׁ-ׂׄ-ׇׅ-ײ\u05f5-؈؋؎-ؚ\u061c-\u061dؠ-٩ٮ-ۓە-ۿ\u070e-߶ߺ-\u082f\u083f-\u085d\u085f-ॣ०-९ॱ-ৼ৾-ੵ\u0a77-૯૱-\u0c76౸-ಃಅ-ෳ\u0df5-๎๐-๙\u0e5c-༃༓༕-྄྆-࿏࿕-࿘\u0fdb-၉ၐ-ჺჼ-፟፩-᙭ᙯ-ᙿᚁ-ᛪᛮ-᜴\u1737-៓ៗ៛-\u17ff᠆᠋-\u1943᥆-\u1a1dᨠ-\u1a9fᪧ\u1aae-᭙᭡-\u1bfbᰀ-\u1c3a᱀-ᱽᲀ-Ჿ\u1cc8-᳔᳒-\u1fff\u200b-―‘-‟\u202a-\u202e‹-›‿-⁀⁄-⁆⁒⁔\u2060-\u2cf8⳽ⴀ-ⵯ\u2d71-ⷿ⸂-⸅⸉-⸊⸌-⸍⸗⸚⸜-⸝⸠-⸩ⸯ⸺-⸻⹀⹂⹐-⹑\u2e53-\u2fff〄-〼〾-ヺー-ꓽꔀ-ꘌꘐ-꙲ꙴ-꙽ꙿ-꛱\ua6f8-ꡳ\ua878-\ua8cd꣐-ꣷꣻꣽ-꤭ꤰ-\ua95eꥠ-꧀\ua9ce-\ua9ddꧠ-\uaa5bꩠ-ꫝꫠ-ꫯꫲ-ꯪ꯬-\ud7ff豈-\ufdcfﷰ-️︗-︘\ufe1a-︯︱-﹄﹇-﹈﹍-﹏\ufe53﹘-﹞﹢-\ufe67﹩\ufe6c-\uff00＄（-）＋－０-９＜-＞Ａ-［］-｠｢-｣ｦ-\uffef𐀀-\U000100ff\U00010103-\U0001039e𐎠-𐏏𐏑-\U0001056e\U00010570-\U00010856𐡘-\U0001091e𐤠-\U0001093e\U00010940-\U00010a4f\U00010a59-𐩾𐪀-𐫯\U00010af7-\U00010b38𐭀-\U00010b98\U00010b9d-𐽔\U00010f5a-𑁆\U0001104e-𑂺\U000110bd\U000110c2-𑄿𑅄-𑅳𑅶-𑇄𑇉-𑇌𑇎-𑇚𑇜\U000111e0-𑈷𑈾-𑊨\U000112aa-𑑊𑑐-𑑙\U0001145c𑑞-𑓅𑓇-𑗀𑗘-𑙀𑙄-\U0001165f\U0001166d-𑜻𑜿-𑠺\U0001183c-𑥃\U00011947-𑧡𑧣-𑨾𑩇-𑪙𑪝\U00011aa3-𑱀\U00011c46-\U00011c6f𑱲-𑻶\U00011ef9-\U00011ffe𒀀-\U0001246f\U00012475-\U00016a6d\U00016a70-𖫴\U00016af6-𖬶𖬼-𖭃𖭅-𖺖\U00016e9b-𖿡𖿣-𛲞\U0001bca0-𝪆\U0001da8c-\U0001e95d\U0001e960-\U0001fffd𠀀-\U0002fffd𰀀-\U0003fffd\U00040000-\U0004fffd\U00050000-\U0005fffd\U00060000-\U0006fffd\U00070000-\U0007fffd\U00080000-\U0008fffd\U00090000-\U0009fffd\U000a0000-\U000afffd\U000b0000-\U000bfffd\U000c0000-\U000cfffd\U000d0000-\U000dfffd\U000e1000-\U000efffd"
@@ -0,0 +1,200 @@
+// Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc>
+// See LICENSE for licensing information
+
+// Package xurls extracts urls from plain text using regular expressions.
+package xurls
+
+import (
+	"regexp"
+	"strings"
+	"sync"
+	"unicode/utf8"
+)
+
+//go:generate go run ./generate/tldsgen
+//go:generate go run ./generate/schemesgen
+//go:generate go run ./generate/unicodegen
+
+const (
+	// pathCont is based on https://www.rfc-editor.org/rfc/rfc3987#section-2.2
+	// but does not match separators anywhere or most puncutation in final position,
+	// to avoid creating asymmetries like
+	// `Did you know that **<a href="...">https://example.com/**</a> is reserved for documentation?`
+	// from `Did you know that **https://example.com/** is reserved for documentation?`.
+	unreservedChar      = `a-zA-Z0-9\-._~`
+	endUnreservedChar   = `a-zA-Z0-9\-_~`
+	midSubDelimChar     = `!$&'*+,;=`
+	endSubDelimChar     = `$&+=`
+	midIPathSegmentChar = unreservedChar + `%` + midSubDelimChar + `:@` + allowedUcsChar
+	endIPathSegmentChar = endUnreservedChar + `%` + endSubDelimChar + allowedUcsCharMinusPunc
+	iPrivateChar        = `\x{E000}-\x{F8FF}\x{F0000}-\x{FFFFD}\x{100000}-\x{10FFFD}`
+	midIChar            = `/?#\\` + midIPathSegmentChar + iPrivateChar
+	endIChar            = `/#` + endIPathSegmentChar + iPrivateChar
+	wellParen           = `\((?:[` + midIChar + `]|\([` + midIChar + `]*\))*\)`
+	wellBrack           = `\[(?:[` + midIChar + `]|\[[` + midIChar + `]*\])*\]`
+	wellBrace           = `\{(?:[` + midIChar + `]|\{[` + midIChar + `]*\})*\}`
+	wellAll             = wellParen + `|` + wellBrack + `|` + wellBrace
+	pathCont            = `(?:[` + midIChar + `]*(?:` + wellAll + `|[` + endIChar + `]))+`
+
+	letter    = `\p{L}`
+	mark      = `\p{M}`
+	number    = `\p{N}`
+	iriChar   = letter + mark + number
+	iri       = `[` + iriChar + `](?:[` + iriChar + `\-]*[` + iriChar + `])?`
+	subdomain = `(?:` + iri + `\.)+`
+	octet     = `(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])`
+	ipv4Addr  = octet + `\.` + octet + `\.` + octet + `\.` + octet
+
+	// ipv6Addr is based on https://datatracker.ietf.org/doc/html/rfc4291#section-2.2
+	// with a specific alternative for each valid count of leading 16-bit hexadecimal "chomps"
+	// that have not been replaced with a `::` elision.
+	h4                 = `[0-9a-fA-F]{1,4}`
+	ipv6AddrMinusEmpty = `(?:` +
+		// 7 colon-terminated chomps, followed by a final chomp or the rest of an elision.
+		`(?:` + h4 + `:){7}(?:` + h4 + `|:)|` +
+		// 6 chomps, followed by an IPv4 address or elision with final chomp or final elision.
+		`(?:` + h4 + `:){6}(?:` + ipv4Addr + `|:` + h4 + `|:)|` +
+		// 5 chomps, followed by an elision with optional IPv4 or up to 2 final chomps.
+		`(?:` + h4 + `:){5}(?::` + ipv4Addr + `|(?::` + h4 + `){1,2}|:)|` +
+		// 4 chomps, followed by an elision with optional IPv4 (optionally preceded by a chomp) or
+		// up to 3 final chomps.
+		`(?:` + h4 + `:){4}(?:(?::` + h4 + `){0,1}:` + ipv4Addr + `|(?::` + h4 + `){1,3}|:)|` +
+		// 3 chomps, followed by an elision with optional IPv4 (preceded by up to 2 chomps) or
+		// up to 4 final chomps.
+		`(?:` + h4 + `:){3}(?:(?::` + h4 + `){0,2}:` + ipv4Addr + `|(?::` + h4 + `){1,4}|:)|` +
+		// 2 chomps, followed by an elision with optional IPv4 (preceded by up to 3 chomps) or
+		// up to 5 final chomps.
+		`(?:` + h4 + `:){2}(?:(?::` + h4 + `){0,3}:` + ipv4Addr + `|(?::` + h4 + `){1,5}|:)|` +
+		// 1 chomp, followed by an elision with optional IPv4 (preceded by up to 4 chomps) or
+		// up to 6 final chomps.
+		`(?:` + h4 + `:){1}(?:(?::` + h4 + `){0,4}:` + ipv4Addr + `|(?::` + h4 + `){1,6}|:)|` +
+		// elision, followed by optional IPv4 (preceded by up to 5 chomps) or
+		// up to 7 final chomps.
+		// `:` is an intentionally omitted alternative, to avoid matching `::`.
+		`:(?:(?::` + h4 + `){0,5}:` + ipv4Addr + `|(?::` + h4 + `){1,7})` +
+		`)`
+	ipv6Addr         = `(?:` + ipv6AddrMinusEmpty + `|::)`
+	ipAddrMinusEmpty = `(?:` + ipv6AddrMinusEmpty + `|\b` + ipv4Addr + `\b)`
+	port             = `(?::[0-9]*)?`
+)
+
+// AnyScheme can be passed to StrictMatchingScheme to match any possibly valid
+// scheme, and not just the known ones.
+var AnyScheme = `(?:[a-zA-Z][a-zA-Z.\-+]*://|` + anyOf(SchemesNoAuthority...) + `:)`
+
+// SchemesNoAuthority is a sorted list of some well-known url schemes that are
+// followed by ":" instead of "://". The list includes both officially
+// registered and unofficial schemes.
+var SchemesNoAuthority = []string{
+	`bitcoin`, // Bitcoin
+	`cid`,     // Content-ID
+	`file`,    // Files
+	`magnet`,  // Torrent magnets
+	`mailto`,  // Mail
+	`mid`,     // Message-ID
+	`sms`,     // SMS
+	`tel`,     // Telephone
+	`xmpp`,    // XMPP
+}
+
+// SchemesUnofficial is a sorted list of some well-known url schemes which
+// aren't officially registered just yet. They tend to correspond to software.
+//
+// Mostly collected from https://en.wikipedia.org/wiki/List_of_URI_schemes#Unofficial_but_common_URI_schemes.
+var SchemesUnofficial = []string{
+	`gemini`,        // gemini
+	`jdbc`,          // Java database Connectivity
+	`moz-extension`, // Firefox extension
+	`postgres`,      // PostgreSQL (short form)
+	`postgresql`,    // PostgreSQL
+	`slack`,         // Slack
+	`zoommtg`,       // Zoom (desktop)
+	`zoomus`,        // Zoom (mobile)
+}
+
+// The regular expressions are compiled when the API is first called.
+// Any subsequent calls will use the same regular expression pointers.
+//
+// We do not need to make a copy of them for each API call,
+// as Copy is now only useful if one copy calls Longest but not another,
+// and we always call Longest after compiling the regular expression.
+var (
+	strictRe   *regexp.Regexp
+	strictInit sync.Once
+
+	relaxedRe   *regexp.Regexp
+	relaxedInit sync.Once
+)
+
+func anyOf(strs ...string) string {
+	var b strings.Builder
+	b.WriteString("(?:")
+	for i, s := range strs {
+		if i != 0 {
+			b.WriteByte('|')
+		}
+		b.WriteString(regexp.QuoteMeta(s))
+	}
+	b.WriteByte(')')
+	return b.String()
+}
+
+func strictExp() string {
+	schemes := `(?:(?i)(?:` + anyOf(Schemes...) + `|` + anyOf(SchemesUnofficial...) + `)://|` + anyOf(SchemesNoAuthority...) + `:)`
+	return schemes + pathCont
+}
+
+func relaxedExp() string {
+	var asciiTLDs, unicodeTLDs []string
+	for i, tld := range TLDs {
+		if tld[0] >= utf8.RuneSelf {
+			asciiTLDs = TLDs[:i:i]
+			unicodeTLDs = TLDs[i:]
+			break
+		}
+	}
+	punycode := `xn--[a-z0-9-]+`
+
+	// Use \b to make sure ASCII TLDs are immediately followed by a word break.
+	// We can't do that with unicode TLDs, as they don't see following
+	// whitespace as a word break.
+	tlds := `(?:(?i)` + punycode + `|` + anyOf(append(asciiTLDs, PseudoTLDs...)...) + `\b|` + anyOf(unicodeTLDs...) + `)`
+	domain := subdomain + tlds
+
+	hostName := `(?:` + domain + `|\[` + ipv6Addr + `\]|\b` + ipv4Addr + `\b)`
+	webURL := hostName + port + `(?:/` + pathCont + `|/)?`
+	email := `[a-zA-Z0-9._%\-+]+@` + domain
+	return strictExp() + `|` + webURL + `|` + email + `|` + ipv6AddrMinusEmpty
+}
+
+// Strict produces a regexp that matches any URL with a scheme in either the
+// Schemes or SchemesNoAuthority lists.
+func Strict() *regexp.Regexp {
+	strictInit.Do(func() {
+		strictRe = regexp.MustCompile(strictExp())
+		strictRe.Longest()
+	})
+	return strictRe
+}
+
+// Relaxed produces a regexp that matches any URL matched by Strict, plus any
+// URL with no scheme or email address.
+func Relaxed() *regexp.Regexp {
+	relaxedInit.Do(func() {
+		relaxedRe = regexp.MustCompile(relaxedExp())
+		relaxedRe.Longest()
+	})
+	return relaxedRe
+}
+
+// StrictMatchingScheme produces a regexp similar to Strict, but requiring that
+// the scheme match the given regular expression. See AnyScheme too.
+func StrictMatchingScheme(exp string) (*regexp.Regexp, error) {
+	strictMatching := `(?i)(?:` + exp + `)(?-i)` + pathCont
+	re, err := regexp.Compile(strictMatching)
+	if err != nil {
+		return nil, err
+	}
+	re.Longest()
+	return re, nil
+}
@@ -0,0 +1,469 @@
+// Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc>
+// See LICENSE for licensing information
+
+package xurls
+
+import (
+	"fmt"
+	"regexp"
+	"sync"
+	"testing"
+)
+
+type testCase struct {
+	in   string
+	want interface{}
+}
+
+func wantStr(in string, want interface{}) string {
+	switch x := want.(type) {
+	case string:
+		return x
+	case bool:
+		if x {
+			return in
+		}
+	}
+	return ""
+}
+
+func doTest(t *testing.T, name string, re *regexp.Regexp, cases []testCase) {
+	for i, c := range cases {
+		t.Run(fmt.Sprintf("%s/%03d", name, i), func(t *testing.T) {
+			want := wantStr(c.in, c.want)
+			for _, surround := range []string{"", "\n"} {
+				in := surround + c.in + surround
+				got := re.FindString(in)
+				if got != want {
+					t.Errorf(`FindString(%q) got %q, want %q`, in, got, want)
+				}
+			}
+		})
+	}
+}
+
+var constantTestCases = []testCase{
+	{``, nil},
+	{` `, nil},
+	{`:`, nil},
+	{`::`, nil},
+	{`:::`, nil},
+	{`::::`, nil},
+	{`.`, nil},
+	{`..`, nil},
+	{`...`, nil},
+	{`1.1`, nil},
+	{`.1.`, nil},
+	{`1.1.1`, nil},
+	{`1:1`, nil},
+	{`:1:`, nil},
+	{`1:1:1`, nil},
+	{`://`, nil},
+	{`foo`, nil},
+	{`foo:`, nil},
+	{`mailto:`, nil},
+	{`foo://`, nil},
+	{`http://`, nil},
+	{`http:// foo`, nil},
+	{`http:// foo`, nil},
+	{`:foo`, nil},
+	{`://foo`, nil},
+	{`foorandom:bar`, nil},
+	{`foo.randombar`, nil},
+	{`zzz.`, nil},
+	{`.zzz`, nil},
+	{`zzz.zzz`, nil},
+	{`/some/path`, nil},
+	{`rel/path`, nil},
+	{`localhost`, nil},
+	{`com`, nil},
+	{`.com`, nil},
+	{`com.`, nil},
+	{`http`, nil},
+
+	{`http://foo`, true},
+	{`http://FOO`, true},
+	{`http://FAÀ`, true},
+	{`https://localhost`, true},
+	{`mailto:foo`, true},
+	{`MAILTO:foo`, true},
+	{`sms:123`, true},
+	{`xmpp:foo@bar`, true},
+	{`bitcoin:Addr23?amount=1&message=foo`, true},
+	{`cid:foo-32x32.v2_fe0f1423.png`, true},
+	{`mid:960830.1639@XIson.com`, true},
+	{`http://foo.com`, true},
+	{`http://foo.co.uk`, true},
+	{`http://foo.random`, true},
+	{` http://foo.com/bar `, `http://foo.com/bar`},
+	{` http://foo.com/bar more`, `http://foo.com/bar`},
+	{`<http://foo.com/bar>`, `http://foo.com/bar`},
+	{`<http://foo.com/bar>more`, `http://foo.com/bar`},
+	{`.http://foo.com/bar.`, `http://foo.com/bar`},
+	{`.http://foo.com/bar.more`, `http://foo.com/bar.more`},
+	{`,http://foo.com/bar,`, `http://foo.com/bar`},
+	{`,http://foo.com/bar,more`, `http://foo.com/bar,more`},
+	{`*http://foo.com/bar*`, `http://foo.com/bar`},
+	{`*http://foo.com/bar*more`, `http://foo.com/bar*more`},
+	{`_http://foo.com/bar_`, `http://foo.com/bar_`},
+	{`_http://foo.com/bar_more`, `http://foo.com/bar_more`},
+	{`(http://foo.com/bar)`, `http://foo.com/bar`},
+	{`(http://foo.com/bar)more`, `http://foo.com/bar`},
+	{`[http://foo.com/bar]`, `http://foo.com/bar`},
+	{`[http://foo.com/bar]more`, `http://foo.com/bar`},
+	{`'http://foo.com/bar'`, `http://foo.com/bar`},
+	{`'http://foo.com/bar'more`, `http://foo.com/bar'more`},
+	{`"http://foo.com/bar"`, `http://foo.com/bar`},
+	{`"http://foo.com/bar"more`, `http://foo.com/bar`},
+	{`{"url":"http://foo.com/bar"}`, `http://foo.com/bar`},
+	{`{"before":"foo","url":"http://foo.com/bar","after":"bar"}`, `http://foo.com/bar`},
+	{`http://a.b/a0/-+_&~*%=#@.,:;'?![]()a`, true},
+	{`http://a.b/a0/$€¥`, true},
+	{`http://✪foo.bar/pa✪th©more`, true},
+	{`http://foo.bar/path/`, true},
+	{`http://foo.bar/path-`, true},
+	{`http://foo.bar/path+`, true},
+	{`http://foo.bar/path&`, true},
+	{`http://foo.bar/path~`, true},
+	{`http://foo.bar/path%`, true},
+	{`http://foo.bar/path=`, true},
+	{`http://foo.bar/path#`, true},
+	{`http://foo.bar/path.`, `http://foo.bar/path`},
+	{`http://foo.bar/path,`, `http://foo.bar/path`},
+	{`http://foo.bar/path:`, `http://foo.bar/path`},
+	{`http://foo.bar/path;`, `http://foo.bar/path`},
+	{`http://foo.bar/path'`, `http://foo.bar/path`},
+	{`http://foo.bar/path?`, `http://foo.bar/path`},
+	{`http://foo.bar/path!`, `http://foo.bar/path`},
+	{`http://foo.bar/path@`, `http://foo.bar/path`},
+	{`http://foo.bar/path|`, `http://foo.bar/path`},
+	{`http://foo.bar/path|more`, `http://foo.bar/path`},
+	{`http://foo.bar/path<`, `http://foo.bar/path`},
+	{`http://foo.bar/path<more`, `http://foo.bar/path`},
+	{`http://foo.com/path_(more)`, true},
+	{`(http://foo.com/path_(more))`, `http://foo.com/path_(more)`},
+	{`http://foo.com/path_(even)-(more)`, true},
+	{`http://foo.com/path_(even)(more)`, true},
+	{`http://foo.com/path_(even_(nested))`, true},
+	{`(http://foo.com/path_(even_(nested)))`, `http://foo.com/path_(even_(nested))`},
+	{`http://foo.com/path_[more]`, true},
+	{`[http://foo.com/path_[more]]`, `http://foo.com/path_[more]`},
+	{`http://foo.com/path_[even]-[more]`, true},
+	{`http://foo.com/path_[even][more]`, true},
+	{`http://foo.com/path_[even_[nested]]`, true},
+	{`[http://foo.com/path_[even_[nested]]]`, `http://foo.com/path_[even_[nested]]`},
+	{`http://foo.com/path_{more}`, true},
+	{`{http://foo.com/path_{more}}`, `http://foo.com/path_{more}`},
+	{`http://foo.com/path_{even}-{more}`, true},
+	{`http://foo.com/path_{even}{more}`, true},
+	{`http://foo.com/path_{even_{nested}}`, true},
+	{`{http://foo.com/path_{even_{nested}}}`, `http://foo.com/path_{even_{nested}}`},
+	{`http://foo.com/path#fragment`, true},
+	{`http://foo.com/emptyfrag#`, true},
+	{`http://foo.com/spaced%20path`, true},
+	{`http://foo.com/?p=spaced%20param`, true},
+	{`http://test.foo.com/`, true},
+	{`http://foo.com/path`, true},
+	{`http://foo.com:8080/path`, true},
+	{`http://1.1.1.1/path`, true},
+	{`http://1.1.1.1:8080/path`, true},
+	{`http://[1080::8:800:200c:417a]/path`, true},
+	{`http://[1080::8:800:200c:417a]:8080/path`, true},
+
+	// scheme://IPv6_addr is not valid per RFC 3987, but is supported anyway (for now).
+	{`http://1080::8:800:200c:417a/path`, true},
+	{`http://2001.db8:0/path`, true},
+
+	{`http://中国.中国/中国`, true},
+	{`http://中国.中国/foo中国`, true},
+	{`http://उदाहरण.परीकषा`, true},
+	{`http://xn-foo.xn--p1acf/path`, true},
+	{`what is http://foo.com?`, `http://foo.com`},
+	{`go visit http://foo.com/path.`, `http://foo.com/path`},
+	{`go visit http://foo.com/path...`, `http://foo.com/path`},
+	{`what is http://foo.com/path?`, `http://foo.com/path`},
+	{`the http://foo.com!`, `http://foo.com`},
+	{`https://test.foo.bar/path?a=b`, `https://test.foo.bar/path?a=b`},
+	{`ftp://user@foo.bar`, true},
+	{`http://foo.com/base64-bCBwbGVhcw==`, true},
+	{`http://foo.com/–`, true},
+	{`http://foo.com/🐼`, true},
+	{`https://shmibbles.me/tmp/自殺でも？.png`, true},
+	{`randomtexthttp://foo.bar/etc`, "http://foo.bar/etc"},
+	{`postgres://user:pass@host.com:5432/path?k=v#f`, true},
+	{`postgres://user:pass@host.com:5432/path?k=v#f`, true},
+	{`zoommtg://zoom.us/join?confno=1234&pwd=xxx`, true},
+	{`zoomus://zoom.us/join?confno=1234&pwd=xxx`, true},
+}
+
+func TestRegexes(t *testing.T) {
+	doTest(t, "Relaxed", Relaxed(), constantTestCases)
+	doTest(t, "Strict", Strict(), constantTestCases)
+	doTest(t, "Relaxed2", Relaxed(), []testCase{
+		{`foo.a`, nil},
+		{`foo.com`, true},
+		{`foo.com bar.com`, `foo.com`},
+		{`foo.com-foo`, `foo.com`},
+		{`foo.company`, true},
+		{`foo.comrandom`, nil},
+		{`some.guy`, nil},
+		{`foo.example`, true},
+		{`foo.i2p`, true},
+		{`foo.local`, true},
+		{`foo.onion`, true},
+		{`中国.中国`, true},
+		{`中国.中国/foo中国`, true},
+		{`test.联通`, true},
+		{`test.联通 extra`, `test.联通`},
+		{`test.xn--8y0a063a`, true},
+		{`test.xn--8y0a063a/foobar`, true},
+		{`test.xn-foo`, nil},
+		{`test.xn--`, nil},
+		{`foo.com/`, true},
+		{`1.1.1.1`, true},
+		{`10.50.23.250`, true},
+		{`121.1.1.1`, true},
+		{`255.1.1.1`, true},
+		{`300.1.1.1`, nil},
+		{`1.1.1.300`, nil},
+		{`foo@1.2.3.4`, `1.2.3.4`},
+
+		// https://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
+		{`::1`, true},
+		//{`::`, true},
+		{`::ffff:0:0`, true},
+		{`64:ff9b::`, true},
+		{`64:ff9b:1::`, true},
+		{`100::`, true},
+		{`2001::`, true},
+		{`2001:1::1`, true},
+		{`2001:1::2`, true},
+		{`2001:2::`, true},
+		{`2001:3::`, true},
+		{`2001:4:112::`, true},
+		{`2001:10::`, true},
+		{`2001:20::`, true},
+		{`2001:db8::`, true},
+		{`2002::`, true},
+		{`2620:4f:8000::`, true},
+		{`fc00::`, true},
+		{`fe80::`, true},
+
+		// https://datatracker.ietf.org/doc/html/rfc4291#section-2.2
+		{`ABCD:EF01:2345:6789:ABCD:EF01:2345:6789`, true},
+		{`2001:DB8:0:0:8:800:200C:417A`, true},
+		{`2001:DB8:0:0:8:800:200C:417A`, true}, // a unicast address
+		{`FF01:0:0:0:0:0:0:101`, true},         // a multicast address
+		{`0:0:0:0:0:0:0:1`, true},              // the loopback address
+		{`0:0:0:0:0:0:0:0`, true},              // the unspecified address
+		{`2001:DB8::8:800:200C:417A`, true},    // a unicast address
+		{`FF01::101`, true},                    // a multicast address
+		{`::1`, true},                          // the loopback address
+		//{`::`, true},                         // the unspecified address
+		{`::`, nil},
+		{`0:0:0:0:0:0:13.1.68.3`, true},
+		{`0:0:0:0:0:FFFF:129.144.52.38`, true},
+		{`::13.1.68.3`, true},
+		{`::FFFF:129.144.52.38`, true},
+
+		// https://datatracker.ietf.org/doc/html/rfc5952#section-1
+		{`2001:db8:0:0:1:0:0:1`, true},
+		{`2001:0db8:0:0:1:0:0:1`, true},
+		{`2001:db8::1:0:0:1`, true},
+		{`2001:db8::0:1:0:0:1`, true},
+		{`2001:0db8::1:0:0:1`, true},
+		{`2001:db8:0:0:1::1`, true},
+		{`2001:db8:0000:0:1::1`, true},
+		{`2001:DB8:0:0:1::1`, true},
+
+		// https://datatracker.ietf.org/doc/html/rfc5952#section-2.1
+		{`2001:db8:aaaa:bbbb:cccc:dddd:eeee:0001`, true},
+		{`2001:db8:aaaa:bbbb:cccc:dddd:eeee:001`, true},
+		{`2001:db8:aaaa:bbbb:cccc:dddd:eeee:01`, true},
+		{`2001:db8:aaaa:bbbb:cccc:dddd:eeee:1`, true},
+
+		// https://datatracker.ietf.org/doc/html/rfc5952#section-2.2
+		{`2001:db8:aaaa:bbbb:cccc:dddd::1`, true},
+		{`2001:db8:aaaa:bbbb:cccc:dddd:0:1`, true},
+		{`2001:db8:0:0:0::1`, true},
+		{`2001:db8:0:0::1`, true},
+		{`2001:db8:0::1`, true},
+		{`2001:db8::1`, true},
+		{`2001:db8::aaaa:0:0:1`, true},
+		{`2001:db8:0:0:aaaa::1`, true},
+
+		// https://datatracker.ietf.org/doc/html/rfc5952#section-2.3
+		{`2001:db8:aaaa:bbbb:cccc:dddd:eeee:aaaa`, true},
+		{`2001:db8:aaaa:bbbb:cccc:dddd:eeee:AAAA`, true},
+		{`2001:db8:aaaa:bbbb:cccc:dddd:eeee:AaAa`, true},
+
+		// An IP address in URI host position must be bracketed unless it is IPv4.
+		// https://www.rfc-editor.org/rfc/rfc3986#section-3.2.2
+		// TODO: Implement this restriction, ideally without matching the `http://1080` prefix.
+		//{`http://1080::8:800:200c:417a/path`, `1080::8:800:200c:417a`},
+
+		{`foo.com:8080`, true},
+		{`foo.com:8080/path`, true},
+		{`test.foo.com`, true},
+		{`test.foo.com/path`, true},
+		{`test.foo.com/path/more/`, true},
+		{`TEST.FOO.COM/PATH`, true},
+		{`TEST.FÓO.COM/PÁTH`, true},
+		{`foo.com/path_(more)`, true},
+		{`foo.com/path_(even)_(more)`, true},
+		{`foo.com/path_(more)/more`, true},
+		{`foo.com/path_(more)/end)`, `foo.com/path_(more)/end`},
+		{`www.foo.com`, true},
+		{` foo.com/bar `, `foo.com/bar`},
+		{` foo.com/bar more`, `foo.com/bar`},
+		{`<foo.com/bar>`, `foo.com/bar`},
+		{`<foo.com/bar>more`, `foo.com/bar`},
+		{`,foo.com/bar.`, `foo.com/bar`},
+		{`,foo.com/bar.more`, `foo.com/bar.more`},
+		{`,foo.com/bar,`, `foo.com/bar`},
+		{`,foo.com/bar,more`, `foo.com/bar,more`},
+		{`(foo.com/bar)`, `foo.com/bar`},
+		{`"foo.com/bar'`, `foo.com/bar`},
+		{`"foo.com/bar'more`, `foo.com/bar'more`},
+		{`"foo.com/bar"`, `foo.com/bar`},
+		{`what is foo.com?`, `foo.com`},
+		{`the foo.com!`, `foo.com`},
+
+		{`foo@bar`, nil},
+		{`foo@bar.a`, nil},
+		{`foo@bar.com`, true},
+		{`foo@sub.bar.com`, true},
+		{`foo@bar.com bar@bar.com`, `foo@bar.com`},
+		{`foo@bar.onion`, true},
+		{`foo@中国.中国`, true},
+		{`foo@test.bar.com`, true},
+		{`FOO@TEST.BAR.COM`, true},
+		{`foo@bar.com/path`, `foo@bar.com`},
+		{`foo+test@bar.com`, true},
+		{`foo+._%-@bar.com`, true},
+	})
+	doTest(t, "Strict2", Strict(), []testCase{
+		{`http:// foo.com`, nil},
+		{`foo.a`, nil},
+		{`foo.com`, nil},
+		{`foo.com/`, nil},
+		{`1.1.1.1`, nil},
+		{`3ffe:2a00:100:7031::1`, nil},
+		{`test.foo.com:8080/path`, nil},
+		{`foo@bar.com`, nil},
+
+		// An IP address in URI host position must be bracketed unless it is IPv4.
+		// https://www.rfc-editor.org/rfc/rfc3986#section-3.2.2
+		// TODO: Implement this restriction, ideally without matching the `http://1080` prefix.
+		//{`http://1080::8:800:200c:417a/path`, nil},
+	})
+}
+
+func TestStrictMatchingSchemeError(t *testing.T) {
+	for _, c := range []struct {
+		exp     string
+		wantErr bool
+	}{
+		{`http://`, false},
+		{`https?://`, false},
+		{`http://|mailto:`, false},
+		{`http://(`, true},
+	} {
+		_, err := StrictMatchingScheme(c.exp)
+		if c.wantErr && err == nil {
+			t.Errorf(`StrictMatchingScheme("%s") did not error as expected`, c.exp)
+		} else if !c.wantErr && err != nil {
+			t.Errorf(`StrictMatchingScheme("%s") unexpectedly errored`, c.exp)
+		}
+	}
+}
+
+func TestStrictMatchingScheme(t *testing.T) {
+	strictMatching, _ := StrictMatchingScheme("http://|ftps?://|mailto:")
+	doTest(t, "StrictMatchingScheme", strictMatching, []testCase{
+		{`foo.com`, nil},
+		{`foo@bar.com`, nil},
+		{`http://foo`, true},
+		{`Http://foo`, true},
+		{`https://foo`, nil},
+		{`ftp://foo`, true},
+		{`ftps://foo`, true},
+		{`mailto:foo`, true},
+		{`MAILTO:foo`, true},
+		{`sms:123`, nil},
+	})
+}
+
+func TestStrictMatchingSchemeAny(t *testing.T) {
+	strictMatching, _ := StrictMatchingScheme(AnyScheme)
+	doTest(t, "StrictMatchingScheme", strictMatching, []testCase{
+		{`http://foo`, true},
+		{`git+https://foo`, true},
+		{`randomtexthttp://foo.bar/etc`, true},
+		{`mailto:foo`, true},
+	})
+}
+
+func bench(b *testing.B, re func() *regexp.Regexp, str string) {
+	b.ReportAllocs()
+	b.SetBytes(int64(len(str)))
+
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			re().FindAllString(str, -1)
+		}
+	})
+}
+
+const inputNone = `
+foo bar
+yaml: "as well"
+some more plaintext
+which does not contain any urls.
+`
+
+const inputMany = `
+foo bar http://foo.foo https://192.168.1.1/path
+foo.com bitcoin:address ftp://
+xmpp:foo@bar.com
+`
+
+func BenchmarkStrict_none(b *testing.B) {
+	bench(b, Strict, inputNone)
+}
+
+func BenchmarkStrict_many(b *testing.B) {
+	bench(b, Strict, inputMany)
+}
+
+func BenchmarkRelaxed_none(b *testing.B) {
+	bench(b, Relaxed, inputNone)
+}
+
+func BenchmarkRelaxed_many(b *testing.B) {
+	bench(b, Relaxed, inputMany)
+}
+
+var (
+	rxMatchingScheme     *regexp.Regexp
+	rxMatchingSchemeOnce sync.Once
+)
+
+func matchingScheme() *regexp.Regexp {
+	rxMatchingSchemeOnce.Do(func() {
+		rx, err := StrictMatchingScheme("https?://")
+		if err != nil {
+			panic(err)
+		}
+		rxMatchingScheme = rx
+	})
+	return rxMatchingScheme
+}
+
+func BenchmarkStrictMatchingScheme_none(b *testing.B) {
+	bench(b, matchingScheme, inputNone)
+}
+
+func BenchmarkStrictMatchingScheme_many(b *testing.B) {
+	bench(b, matchingScheme, inputMany)
+}