Skip to content

Commit 0f0c8dc

Browse files
committed
Fixed fuzzy search to work with cosine similarity and some other heuristics
1 parent 6e56fc7 commit 0f0c8dc

File tree

4 files changed

+84
-38
lines changed

4 files changed

+84
-38
lines changed

go.mod

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ require (
2727
github.com/hashicorp/hcl v1.0.0 // indirect
2828
github.com/inconshreveable/mousetrap v1.1.0 // indirect
2929
github.com/jmespath/go-jmespath v0.4.0 // indirect
30-
github.com/lithammer/fuzzysearch v1.1.8 // indirect
3130
github.com/magiconair/properties v1.8.7 // indirect
3231
github.com/mattn/go-runewidth v0.0.15 // indirect
3332
github.com/mitchellh/mapstructure v1.5.0 // indirect

go.sum

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,6 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
6868
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
6969
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
7070
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
71-
github.com/lithammer/fuzzysearch v1.1.8 h1:/HIuJnjHuXS8bKaiTMeeDlW2/AyIWk2brx1V8LFgLN4=
72-
github.com/lithammer/fuzzysearch v1.1.8/go.mod h1:IdqeyBClc3FFqSzYq/MXESsS4S0FsZ5ajtkr5xPLts4=
7371
github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY=
7472
github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0=
7573
github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
@@ -133,56 +131,28 @@ github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8
133131
github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU=
134132
github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778 h1:QldyIu/L63oPpyvQmHgvgickp1Yw510KJOqX7H24mg8=
135133
github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778/go.mod h1:2MuV+tbUrU1zIOPMxZ5EncGwgmMJsa+9ucAQZXxsObs=
136-
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
137134
go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE=
138135
go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
139136
go.uber.org/multierr v1.9.0 h1:7fIwc/ZtS0q++VgcfqFDxSBZVv/Xo49/SYnDFupUwlI=
140137
go.uber.org/multierr v1.9.0/go.mod h1:X2jQV1h+kxSjClGpnseKVIxpmcjrj7MNnI0bnlfKTVQ=
141-
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
142-
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
143138
golang.org/x/exp v0.0.0-20230905200255-921286631fa9 h1:GoHiUyI/Tp2nVkLI2mCxVkOjsbSXD66ic0XW0js0R9g=
144139
golang.org/x/exp v0.0.0-20230905200255-921286631fa9/go.mod h1:S2oDrQGGwySpoQPVqRShND87VCbxmc6bL1Yd2oYrm6k=
145-
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
146-
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
147-
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
148-
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
149-
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
150-
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
151-
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
152-
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
153-
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
154-
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
155140
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
156141
golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
157142
golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
158143
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
159144
golang.org/x/sys v0.0.0-20211013075003-97ac67df715c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
160145
golang.org/x/sys v0.0.0-20220319134239-a9b59b0215f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
161-
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
162-
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
163146
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
164-
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
165147
golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
166148
golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
167-
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
168149
golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
169150
golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
170151
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
171-
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
172152
golang.org/x/term v0.6.0 h1:clScbb1cHjoCkyRbWwBEUZ5H/tIFu5TAXIqaZD0Gcjw=
173153
golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U=
174-
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
175-
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
176-
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
177-
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
178-
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
179154
golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
180155
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
181-
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
182-
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
183-
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
184-
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
185-
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
186156
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
187157
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
188158
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=

pkg/cosine/cosine.go

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
package cosine
2+
3+
import (
4+
"math"
5+
"sort"
6+
"strings"
7+
)
8+
9+
type result struct {
10+
Text string
11+
Similarity float64
12+
SubString bool
13+
}
14+
15+
func charNGrams(text string, n int) map[string]float64 {
16+
text = strings.ToLower(strings.ReplaceAll(text, " ", "_"))
17+
counts := make(map[string]float64)
18+
for i := 0; i <= len(text)-n; i++ {
19+
gram := text[i : i+n]
20+
counts[gram]++
21+
}
22+
return counts
23+
}
24+
25+
func similarity(a, b map[string]float64) float64 {
26+
var dotProduct, magA, magB float64
27+
for key, valA := range a {
28+
dotProduct += valA * b[key]
29+
}
30+
for _, val := range a {
31+
magA += val * val
32+
}
33+
for _, val := range b {
34+
magB += val * val
35+
}
36+
if magA == 0 || magB == 0 {
37+
return 0
38+
}
39+
return dotProduct / (math.Sqrt(magA) * math.Sqrt(magB))
40+
}
41+
42+
func Search(docs []string, query string, ngramSize int) []result {
43+
vector := charNGrams(query, ngramSize)
44+
results := []result{}
45+
for _, doc := range docs {
46+
isSubString := strings.Index(doc, query) != -1
47+
result := result{
48+
Text: doc,
49+
Similarity: similarity(charNGrams(doc, ngramSize), vector),
50+
SubString: isSubString,
51+
}
52+
if isSubString {
53+
result.Similarity += 0.25
54+
}
55+
results = append(results, result)
56+
}
57+
sort.Slice(results, func(i, j int) bool {
58+
return results[i].Similarity > results[j].Similarity
59+
})
60+
return results
61+
}

sdk/picker/picker.go

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
package picker
22

33
import (
4-
"sort"
54
"strings"
65

76
"atomicgo.dev/keyboard"
87
"atomicgo.dev/keyboard/keys"
9-
"github.com/lithammer/fuzzysearch/fuzzy"
8+
109
"github.com/null93/aws-knox/pkg/ansi"
1110
"github.com/null93/aws-knox/pkg/color"
11+
"github.com/null93/aws-knox/pkg/cosine"
1212
. "github.com/null93/aws-knox/sdk/style"
1313
)
1414

@@ -32,6 +32,7 @@ type picker struct {
3232
type option struct {
3333
Columns []string
3434
Value interface{}
35+
Debug string
3536
}
3637

3738
type action struct {
@@ -131,15 +132,27 @@ func (p *picker) filter() {
131132
p.filtered = append(p.filtered, &p.options[i])
132133
continue
133134
}
134-
fullValue := strings.Join(option.Columns, " ")
135+
fullValue := strings.ToLower(strings.Join(option.Columns, " "))
135136
fullValues = append(fullValues, fullValue)
136137
optionsMap[fullValue] = &p.options[i]
137138
}
138139
if p.term != "" {
139-
ranks := fuzzy.RankFindFold(p.term, fullValues)
140-
sort.Sort(ranks)
141-
for _, rank := range ranks {
142-
p.filtered = append(p.filtered, optionsMap[rank.Target])
140+
ngramSize := 3
141+
results := cosine.Search(fullValues, strings.ToLower(p.term), ngramSize)
142+
averageSimilarity := 0.0
143+
minimumSimilarity := 0.15
144+
for _, result := range results {
145+
averageSimilarity += result.Similarity
146+
}
147+
if len(results) > 0 {
148+
averageSimilarity /= float64(len(results))
149+
}
150+
for _, result := range results {
151+
foundOption := optionsMap[result.Text]
152+
// foundOption.Debug = fmt.Sprintf("%t - %f", result.SubString, result.Similarity)
153+
if (result.Similarity >= averageSimilarity && result.Similarity > minimumSimilarity) || len(p.term) < ngramSize {
154+
p.filtered = append(p.filtered, foundOption)
155+
}
143156
}
144157
}
145158
} else {
@@ -196,6 +209,9 @@ func (p *picker) render() {
196209
for i, col := range option.Columns {
197210
rowStyle.Printf(" %-*s ", p.longestCols[i], col)
198211
}
212+
if option.Debug != "" {
213+
rowStyle.Printf("%s", darkGray(" DEBUG: "+option.Debug+" "))
214+
}
199215
rowStyle.Printfln("")
200216
}
201217
if p.windowEnd < len(p.filtered) {

0 commit comments

Comments
 (0)