Skip to content

Commit ac13fb7

Browse files
authored
perf(benchmarks): improves benchmarks (#26)
1 parent 7a82bcb commit ac13fb7

File tree

9 files changed

+142
-38
lines changed

9 files changed

+142
-38
lines changed

.eslintignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,5 @@ jest.config.js
99
*.sh
1010
*.png
1111
*.html
12-
*.svg
12+
*.svg
13+
get-imdb-dataset.mjs

.eslintrc.cjs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ module.exports = {
99
'plugin:@typescript-eslint/recommended',
1010
],
1111
rules: {
12-
'@typescript-eslint/no-non-null-assertion': 0
12+
'@typescript-eslint/no-non-null-assertion': 0,
13+
'no-async-promise-executor': 0,
1314
}
1415
};

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
dist
66
*.csv
7+
*.tsv
78

89
### Intellij ###
910
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider

packages/benchmarks/benchmark.js

Lines changed: 40 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
import fs from "fs";
21
/*global console*/
32

4-
import { parse } from "csv-parse";
3+
import fs from "fs";
4+
import readline from "readline";
55
import { Lyra } from "@nearform/lyra";
66

77
const db = new Lyra({
@@ -14,30 +14,32 @@ const db = new Lyra({
1414

1515
function populateDB() {
1616
console.log("Populating the database...");
17-
return new Promise((resolve) => {
18-
fs.createReadStream("./dataset/title.csv")
19-
.pipe(parse({ delimiter: ";", from_line: 2 }))
20-
.on("data", (row) => {
21-
const [, type, title, , , , , , category] = row;
22-
23-
db.insert({
24-
type,
25-
title,
26-
category,
27-
});
28-
})
29-
.on("end", () => {
30-
console.log("Database ready");
31-
resolve(1);
17+
return new Promise(async (resolve) => {
18+
const fileStream = fs.createReadStream("./dataset/title.tsv");
19+
const rl = readline.createInterface({
20+
input: fileStream,
21+
crlfDelay: Infinity,
22+
});
23+
24+
for await (const row of rl) {
25+
const [, type, title, , , , , , category] = row.split("\t");
26+
27+
db.insert({
28+
type,
29+
title,
30+
category,
3231
});
32+
}
33+
34+
resolve(1);
3335
});
3436
}
3537

3638
async function main() {
3739
await populateDB();
3840

3941
console.log("--------------------------------");
40-
console.log("Results after 100.000 iterations");
42+
console.log("Results after 1000 iterations");
4143
console.log("--------------------------------");
4244

4345
const searchOnAllIndices = await searchBenchmark(db, {
@@ -48,6 +50,24 @@ async function main() {
4850
`Searching "believe" through 1M entries in all indices: ${searchOnAllIndices}`
4951
);
5052

53+
const exactSearchOnAllIndices = await searchBenchmark(db, {
54+
term: "believe",
55+
properties: "*",
56+
exact: true,
57+
});
58+
console.log(
59+
`Exact search for "believe" through 1M entries in all indices: ${exactSearchOnAllIndices}`
60+
);
61+
62+
const typoTolerantSearch = await searchBenchmark(db, {
63+
term: "belve",
64+
properties: "*",
65+
tolerance: 2,
66+
});
67+
console.log(
68+
`Typo-tolerant search for "belve" through 1M entries in all indices: ${typoTolerantSearch}`
69+
);
70+
5171
const searchOnSpecificIndex = await searchBenchmark(db, {
5272
term: "believe",
5373
properties: ["title"],
@@ -67,6 +87,7 @@ async function main() {
6787
const searchOnSpecificIndex3 = await searchBenchmark(db, {
6888
term: "musical",
6989
properties: ["category"],
90+
exact: true,
7091
});
7192
console.log(
7293
`Searching "musical" through 1M entries in the "category" index: ${searchOnSpecificIndex3}`
@@ -82,7 +103,7 @@ async function main() {
82103
}
83104

84105
async function searchBenchmark(db, query) {
85-
const results = Array.from({ length: 100_000 });
106+
const results = Array.from({ length: 1000 });
86107

87108
for (let i = 0; i < results.length; i++) {
88109
const { elapsed } = await db.search(query);
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import cronometro from "cronometro";
2+
import { Lyra } from "@nearform/lyra";
3+
import lines from "../dataset/divinaCommedia.json" assert { type: "json" };
4+
5+
const db = new Lyra({
6+
schema: {
7+
id: "string",
8+
txt: "string",
9+
},
10+
});
11+
12+
for (const line of lines) {
13+
await db.insert(line);
14+
}
15+
16+
const testCases = {
17+
['Lyra exact search. Searching "comandamento" in Divina Commedia, "txt" index']() {
18+
return db.search({
19+
term: "comandamento",
20+
properties: ["txt"],
21+
exact: true,
22+
});
23+
},
24+
['Lyra exact search. Searching "incominciai" in Divina Commedia all indexes']() {
25+
return db.search({ term: "incominciai", exact: true });
26+
},
27+
};
28+
29+
cronometro(testCases);
30+
31+
export default testCases;

packages/benchmarks/engines/prefix-search-movies.js

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import fs from "fs";
44
import cronometro from "cronometro";
5-
import { parse } from "csv-parse";
5+
import readline from "readline";
66
import { Lyra } from "@nearform/lyra";
77

88
const db = new Lyra({
@@ -15,22 +15,24 @@ const db = new Lyra({
1515

1616
function populateDB() {
1717
console.log("Populating the database...");
18-
return new Promise((resolve) => {
19-
fs.createReadStream("./dataset/title.csv")
20-
.pipe(parse({ delimiter: ";", from_line: 2 }))
21-
.on("data", (row) => {
22-
const [, type, title, , , , , , category] = row;
18+
return new Promise(async (resolve) => {
19+
const fileStream = fs.createReadStream("./dataset/title.tsv");
20+
const rl = readline.createInterface({
21+
input: fileStream,
22+
crlfDelay: Infinity,
23+
});
24+
25+
for await (const row of rl) {
26+
const [, type, title, , , , , , category] = row.split("\t");
2327

24-
db.insert({
25-
type,
26-
title,
27-
category,
28-
});
29-
})
30-
.on("end", () => {
31-
console.log("Database ready");
32-
resolve(1);
28+
db.insert({
29+
type,
30+
title,
31+
category,
3332
});
33+
}
34+
35+
resolve(1);
3436
});
3537
}
3638

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import cronometro from "cronometro";
2+
import { Lyra } from "@nearform/lyra";
3+
import lines from "../dataset/divinaCommedia.json" assert { type: "json" };
4+
5+
const db = new Lyra({
6+
schema: {
7+
id: "string",
8+
txt: "string",
9+
},
10+
});
11+
12+
for (const line of lines) {
13+
await db.insert(line);
14+
}
15+
16+
const testCases = {
17+
['Lyra typo-tolerant search. Searching "confonderi" in Divina Commedia, "txt" index']() {
18+
return db.search({
19+
term: "confonderi",
20+
properties: ["txt"],
21+
exact: true,
22+
tolerance: 1,
23+
});
24+
},
25+
['Lyra typo-tolerant search. Searching "confondersi" in Divina Commedia all indexes']() {
26+
return db.search({ term: "confondersi", exact: true, tolerance: 2 });
27+
},
28+
};
29+
30+
cronometro(testCases);
31+
32+
export default testCases;
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#!/usr/bin/env zx
2+
3+
const datasetName = `title.basics.tsv`;
4+
const dataset = `https://datasets.imdbws.com/${datasetName}`;
5+
const datasetOutputDir = `./dataset`;
6+
const datasetNewName = `title.tsv`
7+
8+
await $`wget ${dataset} -O ${datasetOutputDir}/${datasetName}.gz`;
9+
await $`gunzip ${datasetOutputDir}/${datasetName}`;
10+
await $`head -1000000 ${datasetOutputDir}/${datasetName} >> ${datasetOutputDir}/${datasetNewName}`
11+
await $`rm ${datasetOutputDir}/${datasetName}.gz`

packages/benchmarks/index.js

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
import cronometro from "cronometro";
22
import prefixSearch from "./engines/prefix-search.js";
3-
import prefixSearchMovies from "./engines/prefix-search-movies.js";
3+
import exactSearch from "./engines/exact-search.js";
44
import indexing from "./engines/indexing.js";
5+
import typoTolerance from "./engines/typo-tolerance.js";
6+
import prefixSearchMovies from "./engines/prefix-search-movies.js";
57

6-
cronometro(prefixSearch);
78
cronometro(indexing);
9+
cronometro(prefixSearch);
10+
cronometro(exactSearch);
11+
cronometro(typoTolerance);
812
cronometro(prefixSearchMovies);

0 commit comments

Comments
 (0)