// Copyright (c) 2015, Daniel Martí // See LICENSE for licensing information package xurls import ( "fmt" "regexp" "sync" "testing" ) type testCase struct { in string want interface{} } func wantStr(in string, want interface{}) string { switch x := want.(type) { case string: return x case bool: if x { return in } } return "" } func doTest(t *testing.T, name string, re *regexp.Regexp, cases []testCase) { for i, c := range cases { t.Run(fmt.Sprintf("%s/%03d", name, i), func(t *testing.T) { want := wantStr(c.in, c.want) for _, surround := range []string{"", "\n"} { in := surround + c.in + surround got := re.FindString(in) if got != want { t.Errorf(`FindString(%q) got %q, want %q`, in, got, want) } } }) } } var constantTestCases = []testCase{ {``, nil}, {` `, nil}, {`:`, nil}, {`::`, nil}, {`:::`, nil}, {`::::`, nil}, {`.`, nil}, {`..`, nil}, {`...`, nil}, {`1.1`, nil}, {`.1.`, nil}, {`1.1.1`, nil}, {`1:1`, nil}, {`:1:`, nil}, {`1:1:1`, nil}, {`://`, nil}, {`foo`, nil}, {`foo:`, nil}, {`mailto:`, nil}, {`foo://`, nil}, {`http://`, nil}, {`http:// foo`, nil}, {`http:// foo`, nil}, {`:foo`, nil}, {`://foo`, nil}, {`foorandom:bar`, nil}, {`foo.randombar`, nil}, {`zzz.`, nil}, {`.zzz`, nil}, {`zzz.zzz`, nil}, {`/some/path`, nil}, {`rel/path`, nil}, {`localhost`, nil}, {`com`, nil}, {`.com`, nil}, {`com.`, nil}, {`http`, nil}, {`http://foo`, true}, {`http://FOO`, true}, {`http://FAÀ`, true}, {`https://localhost`, true}, {`mailto:foo`, true}, {`MAILTO:foo`, true}, {`sms:123`, true}, {`xmpp:foo@bar`, true}, {`bitcoin:Addr23?amount=1&message=foo`, true}, {`cid:foo-32x32.v2_fe0f1423.png`, true}, {`mid:960830.1639@XIson.com`, true}, {`http://foo.com`, true}, {`http://foo.co.uk`, true}, {`http://foo.random`, true}, {` http://foo.com/bar `, `http://foo.com/bar`}, {` http://foo.com/bar more`, `http://foo.com/bar`}, {``, `http://foo.com/bar`}, {`more`, `http://foo.com/bar`}, {`.http://foo.com/bar.`, `http://foo.com/bar`}, {`.http://foo.com/bar.more`, `http://foo.com/bar.more`}, {`,http://foo.com/bar,`, `http://foo.com/bar`}, {`,http://foo.com/bar,more`, `http://foo.com/bar,more`}, {`*http://foo.com/bar*`, `http://foo.com/bar`}, {`*http://foo.com/bar*more`, `http://foo.com/bar*more`}, {`_http://foo.com/bar_`, `http://foo.com/bar_`}, {`_http://foo.com/bar_more`, `http://foo.com/bar_more`}, {`(http://foo.com/bar)`, `http://foo.com/bar`}, {`(http://foo.com/bar)more`, `http://foo.com/bar`}, {`[http://foo.com/bar]`, `http://foo.com/bar`}, {`[http://foo.com/bar]more`, `http://foo.com/bar`}, {`'http://foo.com/bar'`, `http://foo.com/bar`}, {`'http://foo.com/bar'more`, `http://foo.com/bar'more`}, {`"http://foo.com/bar"`, `http://foo.com/bar`}, {`"http://foo.com/bar"more`, `http://foo.com/bar`}, {`{"url":"http://foo.com/bar"}`, `http://foo.com/bar`}, {`{"before":"foo","url":"http://foo.com/bar","after":"bar"}`, `http://foo.com/bar`}, {`http://a.b/a0/-+_&~*%=#@.,:;'?![]()a`, true}, {`http://a.b/a0/$€¥`, true}, {`http://✪foo.bar/pa✪th©more`, true}, {`http://foo.bar/path/`, true}, {`http://foo.bar/path-`, true}, {`http://foo.bar/path+`, true}, {`http://foo.bar/path&`, true}, {`http://foo.bar/path~`, true}, {`http://foo.bar/path%`, true}, {`http://foo.bar/path=`, true}, {`http://foo.bar/path#`, true}, {`http://foo.bar/path.`, `http://foo.bar/path`}, {`http://foo.bar/path,`, `http://foo.bar/path`}, {`http://foo.bar/path:`, `http://foo.bar/path`}, {`http://foo.bar/path;`, `http://foo.bar/path`}, {`http://foo.bar/path'`, `http://foo.bar/path`}, {`http://foo.bar/path?`, `http://foo.bar/path`}, {`http://foo.bar/path!`, `http://foo.bar/path`}, {`http://foo.bar/path@`, `http://foo.bar/path`}, {`http://foo.bar/path|`, `http://foo.bar/path`}, {`http://foo.bar/path|more`, `http://foo.bar/path`}, {`http://foo.bar/path<`, `http://foo.bar/path`}, {`http://foo.bar/path`, `foo.com/bar`}, {`more`, `foo.com/bar`}, {`,foo.com/bar.`, `foo.com/bar`}, {`,foo.com/bar.more`, `foo.com/bar.more`}, {`,foo.com/bar,`, `foo.com/bar`}, {`,foo.com/bar,more`, `foo.com/bar,more`}, {`(foo.com/bar)`, `foo.com/bar`}, {`"foo.com/bar'`, `foo.com/bar`}, {`"foo.com/bar'more`, `foo.com/bar'more`}, {`"foo.com/bar"`, `foo.com/bar`}, {`what is foo.com?`, `foo.com`}, {`the foo.com!`, `foo.com`}, {`foo@bar`, nil}, {`foo@bar.a`, nil}, {`foo@bar.com`, true}, {`foo@sub.bar.com`, true}, {`foo@bar.com bar@bar.com`, `foo@bar.com`}, {`foo@bar.onion`, true}, {`foo@中国.中国`, true}, {`foo@test.bar.com`, true}, {`FOO@TEST.BAR.COM`, true}, {`foo@bar.com/path`, `foo@bar.com`}, {`foo+test@bar.com`, true}, {`foo+._%-@bar.com`, true}, }) doTest(t, "Strict2", Strict(), []testCase{ {`http:// foo.com`, nil}, {`foo.a`, nil}, {`foo.com`, nil}, {`foo.com/`, nil}, {`1.1.1.1`, nil}, {`3ffe:2a00:100:7031::1`, nil}, {`test.foo.com:8080/path`, nil}, {`foo@bar.com`, nil}, // An IP address in URI host position must be bracketed unless it is IPv4. // https://www.rfc-editor.org/rfc/rfc3986#section-3.2.2 // TODO: Implement this restriction, ideally without matching the `http://1080` prefix. //{`http://1080::8:800:200c:417a/path`, nil}, }) } func TestStrictMatchingSchemeError(t *testing.T) { for _, c := range []struct { exp string wantErr bool }{ {`http://`, false}, {`https?://`, false}, {`http://|mailto:`, false}, {`http://(`, true}, } { _, err := StrictMatchingScheme(c.exp) if c.wantErr && err == nil { t.Errorf(`StrictMatchingScheme("%s") did not error as expected`, c.exp) } else if !c.wantErr && err != nil { t.Errorf(`StrictMatchingScheme("%s") unexpectedly errored`, c.exp) } } } func TestStrictMatchingScheme(t *testing.T) { strictMatching, _ := StrictMatchingScheme("http://|ftps?://|mailto:") doTest(t, "StrictMatchingScheme", strictMatching, []testCase{ {`foo.com`, nil}, {`foo@bar.com`, nil}, {`http://foo`, true}, {`Http://foo`, true}, {`https://foo`, nil}, {`ftp://foo`, true}, {`ftps://foo`, true}, {`mailto:foo`, true}, {`MAILTO:foo`, true}, {`sms:123`, nil}, }) } func TestStrictMatchingSchemeAny(t *testing.T) { strictMatching, _ := StrictMatchingScheme(AnyScheme) doTest(t, "StrictMatchingScheme", strictMatching, []testCase{ {`http://foo`, true}, {`git+https://foo`, true}, {`randomtexthttp://foo.bar/etc`, true}, {`mailto:foo`, true}, }) } func bench(b *testing.B, re func() *regexp.Regexp, str string) { b.ReportAllocs() b.SetBytes(int64(len(str))) b.RunParallel(func(pb *testing.PB) { for pb.Next() { re().FindAllString(str, -1) } }) } const inputNone = ` foo bar yaml: "as well" some more plaintext which does not contain any urls. ` const inputMany = ` foo bar http://foo.foo https://192.168.1.1/path foo.com bitcoin:address ftp:// xmpp:foo@bar.com ` func BenchmarkStrict_none(b *testing.B) { bench(b, Strict, inputNone) } func BenchmarkStrict_many(b *testing.B) { bench(b, Strict, inputMany) } func BenchmarkRelaxed_none(b *testing.B) { bench(b, Relaxed, inputNone) } func BenchmarkRelaxed_many(b *testing.B) { bench(b, Relaxed, inputMany) } var ( rxMatchingScheme *regexp.Regexp rxMatchingSchemeOnce sync.Once ) func matchingScheme() *regexp.Regexp { rxMatchingSchemeOnce.Do(func() { rx, err := StrictMatchingScheme("https?://") if err != nil { panic(err) } rxMatchingScheme = rx }) return rxMatchingScheme } func BenchmarkStrictMatchingScheme_none(b *testing.B) { bench(b, matchingScheme, inputNone) } func BenchmarkStrictMatchingScheme_many(b *testing.B) { bench(b, matchingScheme, inputMany) }