whatcanGOwrong

This commit is contained in:
2024-09-19 21:38:24 -04:00
commit d0ae4d841d
17908 changed files with 4096831 additions and 0 deletions
@@ -0,0 +1,13 @@
version = 1
test_patterns = ["*_test.go"]
exclude_patterns = ["vendor/*"]
[[analyzers]]
name = "go"
enabled = true
[analyzers.meta]
import_path = "github.com/derekparker/trie"
dependencies_vendored = true
@@ -0,0 +1,20 @@
The MIT License (MIT)
Copyright (c) 2014 Derek Parker
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,62 @@
[![GoDoc](https://godoc.org/github.com/derekparker/trie?status.svg)](https://godoc.org/github.com/derekparker/trie)
# Trie
Data structure and relevant algorithms for extremely fast prefix/fuzzy string searching.
## Usage
Create a Trie with:
```Go
t := trie.New()
```
Add Keys with:
```Go
// Add can take in meta information which can be stored with the key.
// i.e. you could store any information you would like to associate with
// this particular key.
t.Add("foobar", 1)
```
Find a key with:
```Go
node, ok := t.Find("foobar")
meta := node.Meta()
// use meta with meta.(type)
```
Remove Keys with:
```Go
t.Remove("foobar")
```
Prefix search with:
```Go
t.PrefixSearch("foo")
```
Fast test for valid prefix:
```Go
t.HasKeysWithPrefix("foo")
```
Fuzzy search with:
```Go
t.FuzzySearch("fb")
```
## Contributing
Fork this repo and run tests with:
go test
Create a feature branch, write your tests and code and submit a pull request.
## License
MIT
@@ -0,0 +1,3 @@
module github.com/derekparker/trie
go 1.19
@@ -0,0 +1,317 @@
// Implementation of an R-Way Trie data structure.
//
// A Trie has a root Node which is the base of the tree.
// Each subsequent Node has a letter and children, which are
// nodes that have letter values associated with them.
package trie
import (
"sort"
"sync"
)
type Node struct {
val rune
path string
term bool
depth int
meta interface{}
mask uint64
parent *Node
children map[rune]*Node
termCount int
}
type Trie struct {
mu sync.Mutex
root *Node
size int
}
type ByKeys []string
func (a ByKeys) Len() int { return len(a) }
func (a ByKeys) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a ByKeys) Less(i, j int) bool { return len(a[i]) < len(a[j]) }
const nul = 0x0
// Creates a new Trie with an initialized root Node.
func New() *Trie {
return &Trie{
root: &Node{children: make(map[rune]*Node), depth: 0},
size: 0,
}
}
// Returns the root node for the Trie.
func (t *Trie) Root() *Node {
return t.root
}
// Adds the key to the Trie, including meta data. Meta data
// is stored as `interface{}` and must be type cast by
// the caller.
func (t *Trie) Add(key string, meta interface{}) *Node {
t.mu.Lock()
t.size++
runes := []rune(key)
bitmask := maskruneslice(runes)
node := t.root
node.mask |= bitmask
node.termCount++
for i := range runes {
r := runes[i]
bitmask = maskruneslice(runes[i:])
if n, ok := node.children[r]; ok {
node = n
node.mask |= bitmask
} else {
node = node.NewChild(r, "", bitmask, nil, false)
}
node.termCount++
}
node = node.NewChild(nul, key, 0, meta, true)
t.mu.Unlock()
return node
}
// Finds and returns meta data associated
// with `key`.
func (t *Trie) Find(key string) (*Node, bool) {
node := findNode(t.Root(), []rune(key))
if node == nil {
return nil, false
}
node, ok := node.Children()[nul]
if !ok || !node.term {
return nil, false
}
return node, true
}
func (t *Trie) HasKeysWithPrefix(key string) bool {
node := findNode(t.Root(), []rune(key))
return node != nil
}
// Removes a key from the trie, ensuring that
// all bitmasks up to root are appropriately recalculated.
func (t *Trie) Remove(key string) {
var (
i int
rs = []rune(key)
node = findNode(t.Root(), []rune(key))
)
if node == nil {
return
}
t.mu.Lock()
t.size--
for n := node.Parent(); n != nil; n = n.Parent() {
i++
if n == t.root {
t.root = &Node{children: make(map[rune]*Node)}
break
}
if len(n.Children()) > 1 {
r := rs[len(rs)-i]
n.RemoveChild(r)
break
}
}
t.mu.Unlock()
}
// Returns all the keys currently stored in the trie.
func (t *Trie) Keys() []string {
if t.size == 0 {
return []string{}
}
return t.PrefixSearch("")
}
// Performs a fuzzy search against the keys in the trie.
func (t Trie) FuzzySearch(pre string) []string {
keys := fuzzycollect(t.Root(), []rune(pre))
sort.Sort(ByKeys(keys))
return keys
}
// Performs a prefix search against the keys in the trie.
func (t Trie) PrefixSearch(pre string) []string {
node := findNode(t.Root(), []rune(pre))
if node == nil {
return nil
}
return collect(node)
}
// Creates and returns a pointer to a new child for the node.
func (parent *Node) NewChild(val rune, path string, bitmask uint64, meta interface{}, term bool) *Node {
node := &Node{
val: val,
path: path,
mask: bitmask,
term: term,
meta: meta,
parent: parent,
children: make(map[rune]*Node),
depth: parent.depth + 1,
}
parent.children[node.val] = node
parent.mask |= bitmask
return node
}
func (n *Node) RemoveChild(r rune) {
delete(n.children, r)
for nd := n.parent; nd != nil; nd = nd.parent {
nd.mask ^= nd.mask
nd.mask |= uint64(1) << uint64(nd.val-'a')
for _, c := range nd.children {
nd.mask |= c.mask
}
}
}
// Returns the parent of this node.
func (n Node) Parent() *Node {
return n.parent
}
// Returns the meta information of this node.
func (n Node) Meta() interface{} {
return n.meta
}
// Returns the children of this node.
func (n Node) Children() map[rune]*Node {
return n.children
}
func (n Node) Terminating() bool {
return n.term
}
func (n Node) Val() rune {
return n.val
}
func (n Node) Depth() int {
return n.depth
}
// Returns a uint64 representing the current
// mask of this node.
func (n Node) Mask() uint64 {
return n.mask
}
func findNode(node *Node, runes []rune) *Node {
if node == nil {
return nil
}
if len(runes) == 0 {
return node
}
n, ok := node.Children()[runes[0]]
if !ok {
return nil
}
var nrunes []rune
if len(runes) > 1 {
nrunes = runes[1:]
} else {
nrunes = runes[0:0]
}
return findNode(n, nrunes)
}
func maskruneslice(rs []rune) uint64 {
var m uint64
for _, r := range rs {
m |= uint64(1) << uint64(r-'a')
}
return m
}
func collect(node *Node) []string {
var (
n *Node
i int
)
keys := make([]string, 0, node.termCount)
nodes := make([]*Node, 1, len(node.children)+1)
nodes[0] = node
for l := len(nodes); l != 0; l = len(nodes) {
i = l - 1
n = nodes[i]
nodes = nodes[:i]
for _, c := range n.children {
nodes = append(nodes, c)
}
if n.term {
word := n.path
keys = append(keys, word)
}
}
return keys
}
type potentialSubtree struct {
idx int
node *Node
}
func fuzzycollect(node *Node, partial []rune) []string {
if len(partial) == 0 {
return collect(node)
}
var (
m uint64
i int
p potentialSubtree
keys []string
)
potential := []potentialSubtree{potentialSubtree{node: node, idx: 0}}
for l := len(potential); l > 0; l = len(potential) {
i = l - 1
p = potential[i]
potential = potential[:i]
m = maskruneslice(partial[p.idx:])
if (p.node.mask & m) != m {
continue
}
if p.node.val == partial[p.idx] {
p.idx++
if p.idx == len(partial) {
keys = append(keys, collect(p.node)...)
continue
}
}
for _, c := range p.node.children {
potential = append(potential, potentialSubtree{node: c, idx: p.idx})
}
}
return keys
}
@@ -0,0 +1,434 @@
package trie
import (
"bufio"
"log"
"os"
"sort"
"testing"
)
func addFromFile(t *Trie, path string) {
file, err := os.Open(path)
if err != nil {
log.Fatal(err)
}
reader := bufio.NewScanner(file)
for reader.Scan() {
t.Add(reader.Text(), nil)
}
if reader.Err() != nil {
log.Fatal(err)
}
}
func TestTrieAdd(t *testing.T) {
trie := New()
n := trie.Add("foo", 1)
if n.Meta().(int) != 1 {
t.Errorf("Expected 1, got: %d", n.Meta().(int))
}
}
func TestTrieFind(t *testing.T) {
trie := New()
trie.Add("foo", 1)
n, ok := trie.Find("foo")
if ok != true {
t.Fatal("Could not find node")
}
if n.Meta().(int) != 1 {
t.Errorf("Expected 1, got: %d", n.Meta().(int))
}
}
func TestTrieFindMissingWithSubtree(t *testing.T) {
trie := New()
trie.Add("fooish", 1)
trie.Add("foobar", 1)
n, ok := trie.Find("foo")
if ok != false {
t.Errorf("Expected ok to be false")
}
if n != nil {
t.Errorf("Expected nil, got: %v", n)
}
}
func TestTrieHasKeysWithPrefix(t *testing.T) {
trie := New()
trie.Add("fooish", 1)
trie.Add("foobar", 1)
testcases := []struct {
key string
expected bool
}{
{"foobar", true},
{"foo", true},
{"fool", false},
}
for _, testcase := range testcases {
if trie.HasKeysWithPrefix(testcase.key) != testcase.expected {
t.Errorf("HasKeysWithPrefix(\"%s\"): expected result to be %t", testcase.key, testcase.expected)
}
}
}
func TestTrieFindMissing(t *testing.T) {
trie := New()
n, ok := trie.Find("foo")
if ok != false {
t.Errorf("Expected ok to be false")
}
if n != nil {
t.Errorf("Expected nil, got: %v", n)
}
}
func TestRemove(t *testing.T) {
trie := New()
initial := []string{"football", "foostar", "foosball"}
for _, key := range initial {
trie.Add(key, nil)
}
trie.Remove("foosball")
keys := trie.Keys()
if len(keys) != 2 {
t.Errorf("Expected 2 keys got %d", len(keys))
}
for _, k := range keys {
if k != "football" && k != "foostar" {
t.Errorf("key was: %s", k)
}
}
keys = trie.FuzzySearch("foo")
if len(keys) != 2 {
t.Errorf("Expected 2 keys got %d", len(keys))
}
for _, k := range keys {
if k != "football" && k != "foostar" {
t.Errorf("Expected football got: %#v", k)
}
}
}
func TestRemoveRoot(t *testing.T) {
trie := New()
trie.Add("root", nil)
trie.Remove("root")
var ok bool
_, ok = trie.Find("root")
if ok {
t.Error("Expected 0 keys")
}
// Try to write some data after the trie was purged
trie.Add("root", nil)
_, ok = trie.Find("root")
if !ok {
t.Error("Expected 1 keys")
}
}
func TestTrieKeys(t *testing.T) {
tableTests := []struct {
name string
expectedKeys []string
}{
{"Two", []string{"bar", "foo"}},
{"One", []string{"foo"}},
{"Empty", []string{}},
}
for _, test := range tableTests {
t.Run(test.name, func(t *testing.T) {
trie := New()
for _, key := range test.expectedKeys {
trie.Add(key, nil)
}
keys := trie.Keys()
if len(keys) != len(test.expectedKeys) {
t.Errorf("Expected %v keys, got %d, keys were: %v", len(test.expectedKeys), len(keys), trie.Keys())
}
sort.Strings(keys)
for i, key := range keys {
if key != test.expectedKeys[i] {
t.Errorf("Expected %#v, got %#v", test.expectedKeys[i], key)
}
}
})
}
}
func TestPrefixSearch(t *testing.T) {
trie := New()
expected := []string{
"foo",
"foosball",
"football",
"foreboding",
"forementioned",
"foretold",
"foreverandeverandeverandever",
"forbidden",
}
defer func() {
r := recover()
if r != nil {
t.Error(r)
}
}()
trie.Add("bar", nil)
for _, key := range expected {
trie.Add(key, nil)
}
tests := []struct {
pre string
expected []string
length int
}{
{"fo", expected, len(expected)},
{"foosbal", []string{"foosball"}, 1},
{"abc", []string{}, 0},
}
for _, test := range tests {
actual := trie.PrefixSearch(test.pre)
sort.Strings(actual)
sort.Strings(test.expected)
if len(actual) != test.length {
t.Errorf("Expected len(actual) to == %d for pre %s", test.length, test.pre)
}
for i, key := range actual {
if key != test.expected[i] {
t.Errorf("Expected %v got: %v", test.expected[i], key)
}
}
}
trie.PrefixSearch("fsfsdfasdf")
}
func TestPrefixSearchEmpty(t *testing.T) {
trie := New()
keys := trie.PrefixSearch("")
if len(keys) != 0 {
t.Errorf("Expected 0 keys from empty trie, got: %d", len(keys))
}
}
func TestFuzzySearch(t *testing.T) {
setup := []string{
"foosball",
"football",
"bmerica",
"ked",
"kedlock",
"frosty",
"bfrza",
"foo/bart/baz.go",
}
tests := []struct {
partial string
length int
}{
{"fsb", 1},
{"footbal", 1},
{"football", 1},
{"fs", 2},
{"oos", 1},
{"kl", 1},
{"ft", 3},
{"fy", 1},
{"fz", 2},
{"a", 5},
{"", 8},
{"zzz", 0},
}
trie := New()
for _, key := range setup {
trie.Add(key, nil)
}
for _, test := range tests {
t.Run(test.partial, func(t *testing.T) {
actual := trie.FuzzySearch(test.partial)
if len(actual) != test.length {
t.Errorf("Expected len(actual) to == %d, was %d for %s actual was %#v",
test.length, len(actual), test.partial, actual)
}
})
}
}
func TestFuzzySearchEmpty(t *testing.T) {
trie := New()
keys := trie.FuzzySearch("")
if len(keys) != 0 {
t.Errorf("Expected 0 keys from empty trie, got: %d", len(keys))
}
}
func TestFuzzySearchSorting(t *testing.T) {
trie := New()
setup := []string{
"foosball",
"football",
"bmerica",
"ked",
"kedlock",
"frosty",
"bfrza",
"foo/bart/baz.go",
}
for _, key := range setup {
trie.Add(key, nil)
}
actual := trie.FuzzySearch("fz")
expected := []string{"bfrza", "foo/bart/baz.go"}
if len(actual) != len(expected) {
t.Fatalf("expected len %d got %d", len(expected), len(actual))
}
for i, v := range expected {
if actual[i] != v {
t.Errorf("Expected %s got %s", v, actual[i])
}
}
}
func BenchmarkTieKeys(b *testing.B) {
trie := New()
keys := []string{"bar", "foo", "baz", "bur", "zum", "burzum", "bark", "barcelona", "football", "foosball", "footlocker"}
for _, key := range keys {
trie.Add(key, nil)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
trie.Keys()
}
}
func BenchmarkPrefixSearch(b *testing.B) {
trie := New()
addFromFile(trie, "/usr/share/dict/words")
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = trie.PrefixSearch("fo")
}
}
func BenchmarkFuzzySearch(b *testing.B) {
trie := New()
addFromFile(trie, "/usr/share/dict/words")
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = trie.FuzzySearch("fs")
}
}
func BenchmarkBuildTree(b *testing.B) {
for i := 0; i < b.N; i++ {
trie := New()
addFromFile(trie, "/usr/share/dict/words")
}
}
func TestSupportChinese(t *testing.T) {
trie := New()
expected := []string{"苹果 沂水县", "苹果", "大蒜", "大豆"}
for _, key := range expected {
trie.Add(key, nil)
}
tests := []struct {
pre string
expected []string
length int
}{
{"苹", expected[:2], len(expected[:2])},
{"大", expected[2:], len(expected[2:])},
{"大蒜", []string{"大蒜"}, 1},
}
for _, test := range tests {
actual := trie.PrefixSearch(test.pre)
sort.Strings(actual)
sort.Strings(test.expected)
if len(actual) != test.length {
t.Errorf("Expected len(actual) to == %d for pre %s", test.length, test.pre)
}
for i, key := range actual {
if key != test.expected[i] {
t.Errorf("Expected %v got: %v", test.expected[i], key)
}
}
}
}
func BenchmarkAdd(b *testing.B) {
f, err := os.Open("/usr/share/dict/words")
if err != nil {
b.Fatal("couldn't open bag of words")
}
defer f.Close()
scanner := bufio.NewScanner(f)
var words []string
for scanner.Scan() {
word := scanner.Text()
words = append(words, word)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
trie := New()
for k := range words {
trie.Add(words[k], nil)
}
}
}
func BenchmarkAddRemove(b *testing.B) {
words := []string{"AAAA1", "AAAA2", "ABAA1", "AABA1", "ABAA2"}
b.ResetTimer()
for i := 0; i < b.N; i++ {
trie := New()
for k := range words {
trie.Add(words[k], nil)
}
for k := range words {
trie.Remove(words[k])
}
}
}