whatcanGOwrong

This commit is contained in:
2024-09-19 21:38:24 -04:00
commit d0ae4d841d
17908 changed files with 4096831 additions and 0 deletions
@@ -0,0 +1,293 @@
// Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc>
// See LICENSE for licensing information
package main
import (
"bufio"
"bytes"
"errors"
"flag"
"fmt"
"io"
"io/ioutil"
"net/http"
"net/url"
"os"
"regexp"
"runtime/debug"
"strings"
"sync/atomic"
"time"
"golang.org/x/mod/module"
"mvdan.cc/xurls/v2"
)
var (
matching = flag.String("m", "", "")
relaxed = flag.Bool("r", false, "")
fix boolString
version = flag.Bool("version", false, "")
)
type boolString string
func (s *boolString) Set(val string) error {
*s = boolString(val)
return nil
}
func (s *boolString) Get() any { return string(*s) }
func (s *boolString) String() string { return string(*s) }
func (*boolString) IsBoolFlag() bool { return true }
func init() {
flag.Var(&fix, "fix", "")
flag.Usage = func() {
fmt.Fprint(os.Stderr, `
Usage: xurls [-h] [files]
xurls extracts urls from text using regular expressions.
If no files are given, it reads from standard input.
-m <regexp> only match urls whose scheme matches a regexp
example: 'https?://|mailto:'
-r also match urls without a scheme (relaxed)
-version print version and exit
When the -fix or -fix=auto flag is used, xurls instead attempts to replace
any urls which result in a permanent redirect (301 or 308).
It also fails if any urls fail to load, so that they may be removed or replaced.
To replace urls which result in temporary redirect as well, use -fix=all.
`[1:])
}
}
func scanPath(re *regexp.Regexp, path string) error {
in := os.Stdin
out := io.Writer(os.Stdout)
var outBuf *bytes.Buffer
if path != "-" {
var err error
in, err = os.Open(path)
if err != nil {
return err
}
if fix != "" {
outBuf = new(bytes.Buffer)
out = outBuf
}
defer in.Close()
}
// A maximum of 32 parallel requests.
maxWeight := int64(32)
seq := newSequencer(maxWeight, out, os.Stderr)
userAgent := fmt.Sprintf("mvdan.cc/xurls %s", readVersion())
scanner := bufio.NewScanner(in)
// Doesn't need to be part of reporterState as order doesn't matter.
var atomicFixedCount uint32
for scanner.Scan() {
line := scanner.Text() + "\n"
matches := re.FindAllStringIndex(line, -1)
if fix == "" {
for _, pair := range matches {
match := line[pair[0]:pair[1]]
fmt.Printf("%s\n", match)
}
continue
}
weight := int64(len(matches))
if weight > maxWeight {
weight = maxWeight
}
seq.Add(weight, func(r *reporter) error {
offsetWithinLine := 0
for _, pair := range matches {
// The indexes are based on the original line.
pair[0] += offsetWithinLine
pair[1] += offsetWithinLine
match := line[pair[0]:pair[1]]
origURL, err := url.Parse(match)
if err != nil {
r.appendBroken(match, err.Error())
continue
}
fixed := origURL.String()
switch origURL.Scheme {
case "http", "https":
// See if the URL redirects somewhere.
client := &http.Client{
Timeout: 10 * time.Second,
CheckRedirect: func(req *http.Request, via []*http.Request) error {
if len(via) >= 10 {
return errors.New("stopped after 10 redirects")
}
switch req.Response.StatusCode {
case http.StatusMovedPermanently, http.StatusPermanentRedirect:
// "auto" and "all" fix permanent redirects.
case http.StatusFound, http.StatusSeeOther, http.StatusTemporaryRedirect:
// Only "all" fixes temporary redirects.
if fix != "all" {
return http.ErrUseLastResponse
}
default:
// Any other redirects are ignored.
return http.ErrUseLastResponse
}
// Inherit the fragment if empty.
if req.URL.Fragment == "" {
req.URL.Fragment = origURL.Fragment
}
fixed = req.URL.String()
return nil
},
}
method := http.MethodHead
retry:
req, err := http.NewRequest(method, fixed, nil)
if err != nil {
r.appendBroken(match, err.Error())
continue
}
req.Header.Set("User-Agent", userAgent)
resp, err := client.Do(req)
if err != nil {
r.appendBroken(match, err.Error())
continue
}
if code := resp.StatusCode; code >= 400 {
if code == http.StatusMethodNotAllowed {
method = http.MethodGet
resp.Body.Close()
goto retry
}
r.appendBroken(match, fmt.Sprintf("%d %s", code, http.StatusText(code)))
}
resp.Body.Close()
}
if fixed != match {
// Replace the url, and update offsetWithinLine.
newLine := line[:pair[0]] + fixed + line[pair[1]:]
offsetWithinLine += len(newLine) - len(line)
line = newLine
atomic.AddUint32(&atomicFixedCount, 1)
}
}
io.WriteString(r, line) // add the fixed line to outBuf
return nil
})
if err := scanner.Err(); err != nil {
return err
}
}
state := seq.finalState()
if state.exitCode != 0 {
panic("we aren't using sequencer for any errors")
}
// Note that all goroutines have stopped at this point.
if atomicFixedCount > 0 && path != "-" {
in.Close()
// Overwrite the file, if we weren't reading stdin. Report its
// path too.
fmt.Println(path)
if err := ioutil.WriteFile(path, outBuf.Bytes(), 0o666); err != nil {
return err
}
}
if len(state.brokenURLs) > 0 {
var s strings.Builder
fmt.Fprintf(&s, "found %d broken urls in %q:\n", len(state.brokenURLs), path)
for _, broken := range state.brokenURLs {
fmt.Fprintf(&s, " * %s - %s\n", broken.url, broken.reason)
}
return errors.New(s.String())
}
return nil
}
func main() { os.Exit(main1()) }
func main1() int {
flag.Parse()
if *version {
fmt.Println(readVersion())
return 0
}
if *relaxed && *matching != "" {
fmt.Fprintln(os.Stderr, "-r and -m at the same time don't make much sense")
return 1
}
switch fix {
case "": // disabled by default
case "false": // disabled via -fix=false; normalize
fix = ""
case "auto", "all": // enabled via -fix=auto, -fix=all, etc
case "true": // enabled via -fix; normalize
fix = "auto"
}
var re *regexp.Regexp
if *relaxed {
re = xurls.Relaxed()
} else if *matching != "" {
var err error
if re, err = xurls.StrictMatchingScheme(*matching); err != nil {
fmt.Fprintln(os.Stderr, err)
return 1
}
} else {
re = xurls.Strict()
}
args := flag.Args()
if len(args) == 0 {
args = []string{"-"}
}
for _, path := range args {
if err := scanPath(re, path); err != nil {
fmt.Fprintln(os.Stderr, err)
return 1
}
}
return 0
}
// Borrowed from https://github.com/burrowers/garble.
func readVersion() string {
info, ok := debug.ReadBuildInfo()
if !ok {
return "unknown"
}
mod := &info.Main
if mod.Replace != nil {
mod = mod.Replace
}
// Until https://github.com/golang/go/issues/50603 is implemented,
// manually construct something like a pseudo-version.
// TODO: remove when this code is dead, hopefully in Go 1.20.
if mod.Version == "(devel)" {
var vcsTime time.Time
var vcsRevision string
for _, setting := range info.Settings {
switch setting.Key {
case "vcs.time":
// If the format is invalid, we'll print a zero timestamp.
vcsTime, _ = time.Parse(time.RFC3339Nano, setting.Value)
case "vcs.revision":
vcsRevision = setting.Value
if len(vcsRevision) > 12 {
vcsRevision = vcsRevision[:12]
}
}
}
if vcsRevision != "" {
mod.Version = module.PseudoVersion("", "", vcsTime, vcsRevision)
}
}
return mod.Version
}
@@ -0,0 +1,125 @@
// Copyright (c) 2019, Daniel Martí <mvdan@mvdan.cc>
// See LICENSE for licensing information
package main
import (
"context"
"fmt"
"io/ioutil"
"net"
"net/http"
"os"
"path/filepath"
"testing"
"github.com/rogpeppe/go-internal/testscript"
)
func TestMain(m *testing.M) {
os.Exit(testscript.RunMain(m, map[string]func() int{
"xurls": main1,
}))
}
func TestScript(t *testing.T) {
t.Parallel()
testscript.Run(t, testscript.Params{
Dir: filepath.Join("testdata", "script"),
RequireExplicitExec: true,
Setup: func(env *testscript.Env) error {
mux := http.NewServeMux()
handle := func(method, pattern string, handler func(http.ResponseWriter, *http.Request)) {
mux.HandleFunc(pattern, func(w http.ResponseWriter, r *http.Request) {
if r.Method != method {
t.Errorf("expected all requests to be %q, got %q", method, r.Method)
}
handler(w, r)
})
}
handle("HEAD", "/plain-head", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(200)
})
handle("HEAD", "/redir-1", func(w http.ResponseWriter, r *http.Request) {
http.Redirect(w, r, "/plain-head", http.StatusMovedPermanently)
})
handle("HEAD", "/redir-2", func(w http.ResponseWriter, r *http.Request) {
http.Redirect(w, r, "/redir-1", http.StatusMovedPermanently)
})
handle("HEAD", "/redir-longer", func(w http.ResponseWriter, r *http.Request) {
http.Redirect(w, r, "/redir-longtarget", http.StatusMovedPermanently)
})
handle("HEAD", "/redir-longtarget", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(200)
})
handle("HEAD", "/redir-fragment", func(w http.ResponseWriter, r *http.Request) {
http.Redirect(w, r, "/plain-head#bar", http.StatusMovedPermanently)
})
handle("HEAD", "/redir-301", func(w http.ResponseWriter, r *http.Request) {
http.Redirect(w, r, "/plain-head", 301)
})
handle("HEAD", "/redir-302", func(w http.ResponseWriter, r *http.Request) {
http.Redirect(w, r, "/plain-head", 302)
})
handle("HEAD", "/redir-303", func(w http.ResponseWriter, r *http.Request) {
http.Redirect(w, r, "/plain-head", 303)
})
handle("HEAD", "/redir-307", func(w http.ResponseWriter, r *http.Request) {
http.Redirect(w, r, "/plain-head", 307)
})
handle("HEAD", "/redir-308", func(w http.ResponseWriter, r *http.Request) {
http.Redirect(w, r, "/plain-head", 308)
})
handle("HEAD", "/404", func(w http.ResponseWriter, r *http.Request) {
http.Error(w, "", 404)
})
handle("HEAD", "/500", func(w http.ResponseWriter, r *http.Request) {
http.Error(w, "", 500)
})
handle("GET", "/plain-get", func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, "plaintext")
})
mux.HandleFunc("/get-only", func(w http.ResponseWriter, r *http.Request) {
if r.Method == "GET" {
http.Redirect(w, r, "/plain-get", 301)
} else {
http.Error(w, "", 405)
}
})
ln, err := net.Listen("tcp", ":0")
if err != nil {
return err
}
server := &http.Server{Handler: mux}
go server.Serve(ln)
env.Vars = append(env.Vars, "SERVER=http://"+ln.Addr().String())
env.Defer(func() {
if err := server.Shutdown(context.TODO()); err != nil {
t.Fatal(err)
}
})
return nil
},
Cmds: map[string]func(ts *testscript.TestScript, neg bool, args []string){
"expand": func(ts *testscript.TestScript, neg bool, args []string) {
if neg {
ts.Fatalf("unsupported: ! expand")
}
if len(args) == 0 {
ts.Fatalf("usage: expand file...")
}
for _, arg := range args {
data := ts.ReadFile(arg)
data = os.Expand(data, ts.Getenv)
err := ioutil.WriteFile(ts.MkAbs(arg), []byte(data), 0o666)
ts.Check(err)
}
},
},
})
}
@@ -0,0 +1,156 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// The code below is borrowed from Go's cmd/gofmt as of 1.18beta1.
// We tweaked it slightly to add the "broken URLs" result.
package main
import (
"context"
"go/scanner"
"io"
"golang.org/x/sync/semaphore"
)
// A sequencer performs concurrent tasks that may write output, but emits that
// output in a deterministic order.
type sequencer struct {
maxWeight int64
sem *semaphore.Weighted // weighted by input bytes (an approximate proxy for memory overhead)
prev <-chan *reporterState // 1-buffered
}
// newSequencer returns a sequencer that allows concurrent tasks up to maxWeight
// and writes tasks' output to out and err.
func newSequencer(maxWeight int64, out, err io.Writer) *sequencer {
sem := semaphore.NewWeighted(maxWeight)
prev := make(chan *reporterState, 1)
prev <- &reporterState{out: out, err: err}
return &sequencer{
maxWeight: maxWeight,
sem: sem,
prev: prev,
}
}
// Add blocks until the sequencer has enough weight to spare, then adds f as a
// task to be executed concurrently.
//
// If the weight is either negative or larger than the sequencer's maximum
// weight, Add blocks until all other tasks have completed, then the task
// executes exclusively (blocking all other calls to Add until it completes).
//
// f may run concurrently in a goroutine, but its output to the passed-in
// reporter will be sequential relative to the other tasks in the sequencer.
//
// If f invokes a method on the reporter, execution of that method may block
// until the previous task has finished. (To maximize concurrency, f should
// avoid invoking the reporter until it has finished any parallelizable work.)
//
// If f returns a non-nil error, that error will be reported after f's output
// (if any) and will cause a nonzero final exit code.
func (s *sequencer) Add(weight int64, f func(*reporter) error) {
if weight < 0 || weight > s.maxWeight {
weight = s.maxWeight
}
if err := s.sem.Acquire(context.TODO(), weight); err != nil {
// Change the task from "execute f" to "report err".
weight = 0
f = func(*reporter) error { return err }
}
r := &reporter{prev: s.prev}
next := make(chan *reporterState, 1)
s.prev = next
// Start f in parallel: it can run until it invokes a method on r, at which
// point it will block until the previous task releases the output state.
go func() {
if err := f(r); err != nil {
r.Report(err)
}
next <- r.getState() // Release the next task.
s.sem.Release(weight)
}()
}
// GetExitCode waits for all previously-added tasks to complete, then returns an
// exit code for the sequence suitable for passing to os.Exit.
func (s *sequencer) GetExitCode() int {
c := make(chan int, 1)
s.Add(0, func(r *reporter) error {
c <- r.ExitCode()
return nil
})
return <-c
}
func (s *sequencer) finalState() reporterState {
c := make(chan reporterState, 1)
s.Add(0, func(r *reporter) error {
c <- *r.getState()
return nil
})
return <-c
}
// A reporter reports output, warnings, and errors.
type reporter struct {
prev <-chan *reporterState
state *reporterState
}
// reporterState carries the state of a reporter instance.
//
// Only one reporter at a time may have access to a reporterState.
type reporterState struct {
out, err io.Writer
exitCode int
brokenURLs []brokenURL
}
type brokenURL struct {
url string
reason string
}
// getState blocks until any prior reporters are finished with the reporter
// state, then returns the state for manipulation.
func (r *reporter) getState() *reporterState {
if r.state == nil {
r.state = <-r.prev
}
return r.state
}
// Write emits a slice to the reporter's output stream.
//
// Any error is returned to the caller, and does not otherwise affect the
// reporter's exit code.
func (r *reporter) Write(p []byte) (int, error) {
return r.getState().out.Write(p)
}
func (r *reporter) appendBroken(url, reason string) {
state := r.getState()
state.brokenURLs = append(state.brokenURLs, brokenURL{url, reason})
}
// Report emits a non-nil error to the reporter's error stream,
// changing its exit code to a nonzero value.
func (r *reporter) Report(err error) {
if err == nil {
panic("Report with nil error")
}
st := r.getState()
scanner.PrintError(st.err, err)
st.exitCode = 2
}
func (r *reporter) ExitCode() int {
return r.getState().exitCode
}
@@ -0,0 +1,33 @@
stdin input
exec xurls
stdout 'https://foo.com'
! stdout 'bar.com'
! stdout 'custom://some-data'
! stderr .
! exec xurls missing
! stdout .
stderr 'open missing'
exec xurls input
stdout 'https://foo.com'
! stdout 'bar.com'
! stdout 'custom://some-data'
! stderr .
exec xurls -r input
stdout 'https://foo.com'
stdout 'bar.com'
! stdout 'custom://some-data'
! stderr .
exec xurls -m 'custom://' input
! stdout 'https://foo.com'
! stdout 'bar.com'
stdout 'custom://some-data'
! stderr .
-- input --
First, a link with a scheme, https://foo.com.
Then, one without a scheme, like bar.com.
Also, a link with a custom scheme, custom://some-data.
@@ -0,0 +1,120 @@
expand nothing
cp nothing nothing.orig
expand redirects
expand redirects.golden-auto
expand redirects.golden-all
cp redirects redirects.orig
expand broken
expand broken.golden
cp broken broken.orig
exec xurls -fix nothing
! stdout .
! stderr .
cmp nothing nothing.orig
stdin redirects
exec xurls -fix
cmp stdout redirects.golden-auto
cmp redirects redirects.orig
! stderr .
exec xurls -fix redirects
stdout '^redirects$'
! stderr .
cmp redirects redirects.golden-auto
cp redirects.orig redirects
exec xurls -fix=auto redirects
cmp redirects redirects.golden-auto
cp redirects.orig redirects
exec xurls -fix=all redirects
cmp redirects redirects.golden-all
cp redirects.orig redirects
! exec xurls -fix broken
stdout -count=1 '^broken$'
stderr -count=1 '5 broken urls'
stderr -count=2 '/404 - 404 Not Found'
stderr -count=2 '/500 - 500 Internal Server Error'
stderr -count=1 'totallydoesnotexist.localhost/ - Head .* dial tcp'
cmp broken broken.golden
-- nothing --
No redirect: ${SERVER}/plain-head
-- redirects --
No redirect: ${SERVER}/plain-head
One redirect: ${SERVER}/redir-1
Two redirects: ${SERVER}/redir-2
Redirect inherits fragment: ${SERVER}/redir-1#foo
Redirect replaces fragment: ${SERVER}/redir-fragment#foo
Three links in one line: ${SERVER}/redir-1 + ${SERVER}//redir-1 + ${SERVER}///redir-1
Redirect to a longer path ${SERVER}/redir-longer with trailing text
Permanent redirect codes:
* ${SERVER}/redir-301
* ${SERVER}/redir-308
Temporary redirect codes:
* ${SERVER}/redir-302
* ${SERVER}/redir-303
* ${SERVER}/redir-307
Only GET allowed, HEAD fails: ${SERVER}/get-only
-- redirects.golden-auto --
No redirect: ${SERVER}/plain-head
One redirect: ${SERVER}/plain-head
Two redirects: ${SERVER}/plain-head
Redirect inherits fragment: ${SERVER}/plain-head#foo
Redirect replaces fragment: ${SERVER}/plain-head#bar
Three links in one line: ${SERVER}/plain-head + ${SERVER}/plain-head + ${SERVER}/plain-head
Redirect to a longer path ${SERVER}/redir-longtarget with trailing text
Permanent redirect codes:
* ${SERVER}/plain-head
* ${SERVER}/plain-head
Temporary redirect codes:
* ${SERVER}/redir-302
* ${SERVER}/redir-303
* ${SERVER}/redir-307
Only GET allowed, HEAD fails: ${SERVER}/plain-get
-- redirects.golden-all --
No redirect: ${SERVER}/plain-head
One redirect: ${SERVER}/plain-head
Two redirects: ${SERVER}/plain-head
Redirect inherits fragment: ${SERVER}/plain-head#foo
Redirect replaces fragment: ${SERVER}/plain-head#bar
Three links in one line: ${SERVER}/plain-head + ${SERVER}/plain-head + ${SERVER}/plain-head
Redirect to a longer path ${SERVER}/redir-longtarget with trailing text
Permanent redirect codes:
* ${SERVER}/plain-head
* ${SERVER}/plain-head
Temporary redirect codes:
* ${SERVER}/plain-head
* ${SERVER}/plain-head
* ${SERVER}/plain-head
Only GET allowed, HEAD fails: ${SERVER}/plain-get
-- broken --
One redirect: ${SERVER}/redir-1
404 errors: ${SERVER}/404 ${SERVER}/404
500 errors: ${SERVER}/500 ${SERVER}/500
Dial error: http://totallydoesnotexist.localhost/
-- broken.golden --
One redirect: ${SERVER}/plain-head
404 errors: ${SERVER}/404 ${SERVER}/404
500 errors: ${SERVER}/500 ${SERVER}/500
Dial error: http://totallydoesnotexist.localhost/
@@ -0,0 +1,11 @@
exec xurls -h
! stderr 'flag provided but not defined'
stderr 'Usage: xurls'
! stderr 'help requested' # don't duplicate usage output
! stderr '-test\.' # don't show the test binary's usage func
! exec xurls -r -m="whatever"
stderr 'at the same time'
! exec xurls -m="bad(regexp"
stderr 'missing closing \)'
@@ -0,0 +1,5 @@
# Note that "go test" does not embed vcs information by default.
# We copied the code from another project which is tested,
# so there's no need to fully test the VCS aspect.
exec xurls -version
stdout '\(devel\)'