Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
154 commits
Select commit Hold shift + click to select a range
5e983ac
Sketch custom XML parser
nikeee Jun 20, 2025
f8313d5
Add api sketch
nikeee Jun 20, 2025
9cdcddb
Add poc code gen
nikeee Jun 21, 2025
c0c1d48
Better output
nikeee Jun 21, 2025
54d302f
Shorter output
nikeee Jun 21, 2025
9a6ef35
Use ability for casing
nikeee Jun 21, 2025
983d8c9
Add types for parser spec
nikeee Jun 21, 2025
4643f66
Use new parser
nikeee Jun 21, 2025
88cec8a
Buffer is toString'd internally
nikeee Jun 21, 2025
99f3614
Add generated parser
nikeee Jun 21, 2025
6674316
Add types to emitter functions
nikeee Jun 21, 2025
f9e59af
Add emit helper
nikeee Jun 21, 2025
eff35b0
Add parsing stuff
nikeee Jun 21, 2025
d1e6f34
Support optionals and default empty arrays
nikeee Jun 21, 2025
2eb752c
Makr parts default empty
nikeee Jun 21, 2025
72763c1
Move parser generator
nikeee Jun 21, 2025
e63a022
Add skip preamble
nikeee Jun 21, 2025
315682c
Add missing checksum fields
nikeee Jun 21, 2025
f535220
Better error mesage
nikeee Jun 21, 2025
7d229e1
Use new parser
nikeee Jun 21, 2025
61e2f1f
Support empty string tags
nikeee Jun 21, 2025
c467c2c
Update benchmark
nikeee Jun 21, 2025
fabd5eb
Resolve linter issues
nikeee Jun 21, 2025
c10639a
Update deps
nikeee Jun 23, 2025
452a043
Use tsx for benchmark execution due to const enums
nikeee Jun 23, 2025
4e43ff4
Migrate charCode to const enum for perf
nikeee Jun 23, 2025
ec6313f
Add return values
nikeee Jun 23, 2025
002f95d
Migrate tokenKind to an enum
nikeee Jun 23, 2025
ebe9af7
Make tokenKind const and inline vars for perf
nikeee Jun 23, 2025
d8ed31f
Add benchmark
nikeee Jun 23, 2025
499cfa0
Mark baselines
nikeee Jun 23, 2025
fc6ce63
Simpler lint
nikeee Jun 23, 2025
ecacd0c
Add type annotations
nikeee Jun 23, 2025
f9910b1
Simplify using scanTextNode
nikeee Jun 23, 2025
43d27ac
Implement scanQuotedString
nikeee Jun 23, 2025
b9a51f7
Add scanIdentifier
nikeee Jun 23, 2025
b2e7ca6
Use switch
nikeee Jun 23, 2025
2d203f4
Remove default param
nikeee Jun 23, 2025
e6beb6d
Add scanPreamble
nikeee Jun 23, 2025
d42f8d5
Update deps
nikeee Jun 23, 2025
2e011bf
Merge branch 'main' into custom-xml-parser
nikeee Jun 23, 2025
ad36fc9
Properly scan preamble
nikeee Jun 23, 2025
b5e09fb
Skip optional preamble
nikeee Jun 23, 2025
1ade459
Clean up benchmarks
nikeee Jun 23, 2025
d9860f2
Add xml benchmark
nikeee Jun 23, 2025
57f82b4
do-while(true) -> while(true)
nikeee Jun 23, 2025
d3014e3
Add comparison with s3mini in xml parser benchmarks
nikeee Jun 23, 2025
7333ae4
Support default value
nikeee Jun 23, 2025
dbc486c
Move logic to emitResultAssignment
nikeee Jun 23, 2025
39b43c4
Add bar plot
nikeee Jun 23, 2025
15ee3d1
Add defaultValue for strings
nikeee Jun 23, 2025
9f58844
Support default values for integers and dates
nikeee Jun 23, 2025
d80ea79
Prime scanner in root function
nikeee Jun 23, 2025
7f3921a
Prefix runtime with namespace import
nikeee Jun 23, 2025
a1e10cb
Add split between static and runtime parser
nikeee Jun 23, 2025
1c06eed
Add usage note
nikeee Jun 23, 2025
166d10a
Use schema-based runtime-generated parser for ListParts
nikeee Jun 23, 2025
9c5ba34
de-duplicate parse spec
nikeee Jun 23, 2025
64f1f5e
Remove obsolete getter
nikeee Jun 23, 2025
ff6d9bd
Use new parser for ListObjectsV2
nikeee Jun 23, 2025
9b592d5
Use new parser for CreateMultipartUploadResult
nikeee Jun 23, 2025
deb42f4
Fix self-closing value bug
nikeee Jun 23, 2025
588804a
Use new parser for more functions
nikeee Jun 23, 2025
e0fd0e7
Add new return types
nikeee Jun 23, 2025
e07da64
Merge branch 'main' into custom-xml-parser
nikeee Jun 23, 2025
08d577e
Rename method
nikeee Jun 23, 2025
e5ddf64
Fix isIdentifierPart
nikeee Jun 23, 2025
5b9fb17
Add parseCompleteMultipartUploadResult
nikeee Jun 23, 2025
4e4058f
Fix text nodes starting with =
nikeee Jun 23, 2025
c919d02
Add parseDeleteResult
nikeee Jun 23, 2025
8e257a8
Move common code to getObjectPropertyChecks
nikeee Jun 23, 2025
a6eaab6
Merge branch 'main' into custom-xml-parser
nikeee Jun 24, 2025
6fe318d
Execute more tests
nikeee Jun 24, 2025
88d2d42
Add support for encoded values
nikeee Jun 24, 2025
7d42c1c
Discover more tests
nikeee Jun 24, 2025
f08511e
Remove deug logging
nikeee Jun 24, 2025
3127bfb
Add xml parser tests
nikeee Jun 24, 2025
c94d3d4
Add explainer
nikeee Jun 24, 2025
8bb2113
Merge branch 'main' into custom-xml-parser
nikeee Jun 26, 2025
80413a4
Merge branch 'main' into custom-xml-parser
nikeee Jun 29, 2025
78d0e58
Merge branch 'cors-policy' into custom-xml-parser
nikeee Jun 29, 2025
1584fb3
Add response parser
nikeee Jun 29, 2025
df0464d
Add tests for PutBucketCors + GetBucketCors
nikeee Jun 29, 2025
f154c11
Merge branch 'main' into custom-xml-parser
nikeee Jun 29, 2025
365356d
Merge branch 'main' into custom-xml-parser
nikeee Jun 29, 2025
dfa5cb0
Re-add test
nikeee Jun 29, 2025
17e5f20
Add deleteBucketCors to test
nikeee Jun 29, 2025
d2e063e
Update deps
nikeee Jun 30, 2025
bb422ed
Merge branch 'main' into custom-xml-parser
nikeee Jun 30, 2025
0bad9e0
Fix globbing tests
nikeee Jun 30, 2025
ac87088
Add parser class
nikeee Jun 30, 2025
650b2a7
Add primitive parsers
nikeee Jun 30, 2025
1b3b3ef
Add parseOpeningTag
nikeee Jul 1, 2025
d29befe
Use new parser infra
nikeee Jul 1, 2025
739d9d5
Fix token consume
nikeee Jul 1, 2025
45967c5
Fix optional preamble
nikeee Jul 1, 2025
5ab68c8
Add more tests
nikeee Jul 1, 2025
3baa728
Use different test case
nikeee Jul 1, 2025
df2d9dc
Add launch config to debug parser
nikeee Jul 1, 2025
790f6cb
Remove unused isArray
nikeee Jul 1, 2025
e54732a
Add missing token consumption
nikeee Jul 1, 2025
28a37fe
Node.js 20 doesn't properly support quoted globbing :(
nikeee Jul 1, 2025
7b6c6de
Merge branch 'main' into custom-xml-parser
nikeee Jul 1, 2025
d39ba4e
Comment out static file benchmark
nikeee Jul 1, 2025
dbb064c
Re-use string parsing
nikeee Jul 3, 2025
48eeb85
Inline function call
nikeee Jul 3, 2025
59b24b4
Use different loop
nikeee Jul 3, 2025
cbe1bde
Reduce number of allocations
nikeee Jul 3, 2025
3a32e88
Less token kinds and skip more stuff in scanner
nikeee Jul 3, 2025
f9b2a94
Simplify text scanning
nikeee Jul 3, 2025
9d1cd99
Remove empty method
nikeee Jul 3, 2025
a3320ad
Don't return a value
nikeee Jul 3, 2025
1bfb6b2
Move parser to inherited class
nikeee Jul 3, 2025
4c883a1
Inline var
nikeee Jul 3, 2025
abfc34a
Merge branch 'main' into custom-xml-parser
nikeee Jul 7, 2025
f66c1a6
Fix var
nikeee Jul 7, 2025
cdb1453
Merge branch 'main' into custom-xml-parser
nikeee Jul 7, 2025
f603158
Merge branch 'main' into custom-xml-parser
nikeee Jul 7, 2025
676f862
Merge branch 'main' into custom-xml-parser
nikeee Jul 7, 2025
816f8c1
Merge branch 'main' into custom-xml-parser
nikeee Jul 8, 2025
5e86b2d
Add test for leading slash
nikeee Jul 8, 2025
3d3a73c
Better error message
nikeee Jul 8, 2025
ace23fa
Adjust optionality for garage compat
nikeee Jul 8, 2025
971921b
Fix text node detection in scanner
nikeee Jul 8, 2025
f620cfe
Fix tests for garage
nikeee Jul 8, 2025
2c5e14b
Merge branch 'main' into custom-xml-parser
nikeee Jul 9, 2025
108831b
Add parser features for ceph
nikeee Jul 9, 2025
3d9b523
Merge branch 'main' into custom-xml-parser
nikeee Jul 22, 2025
1f20fe7
Merge branch 'main' into custom-xml-parser
nikeee Jul 23, 2025
592cd50
Merge branch 'main' into custom-xml-parser
nikeee Jul 24, 2025
3ecd751
Merge branch 'main' into custom-xml-parser
nikeee Aug 12, 2025
3f83db9
Merge branch 'main' into custom-xml-parser
nikeee Aug 15, 2025
05ad1d0
Use indexOf for perf improvements
nikeee Aug 15, 2025
dc2e55f
Use indexOf for perf improvements in quotes
nikeee Aug 15, 2025
02fd6b6
Use indexOf to skip preamble
nikeee Aug 15, 2025
4f00863
Fix start index
nikeee Aug 15, 2025
107058f
Update deps
nikeee Aug 19, 2025
917df3e
Merge branch 'main' into custom-xml-parser
nikeee Aug 20, 2025
67db608
Merge branch 'main' into custom-xml-parser
nikeee Aug 28, 2025
663d29b
Merge branch 'main' into custom-xml-parser
nikeee Sep 3, 2025
58cc8f3
Simplify preamble skip
nikeee Sep 3, 2025
97487c5
Merge branch 'main' into custom-xml-parser
nikeee Sep 10, 2025
339ff2d
Merge branch 'main' into custom-xml-parser
nikeee Nov 3, 2025
2d8230e
Add checksum bag
nikeee Nov 3, 2025
59fedfd
Add parseCopyObjectResult
nikeee Nov 3, 2025
02e7799
Use parser
nikeee Nov 3, 2025
c0190ce
Update deps
nikeee Sep 11, 2025
f379f12
Merge branch 'main' into custom-xml-parser
nikeee Nov 10, 2025
3473b40
Update lockfile
nikeee Nov 10, 2025
502989f
Merge branch 'main' into custom-xml-parser
nikeee Nov 10, 2025
a40de4a
Merge branch 'main' into custom-xml-parser
nikeee Nov 10, 2025
d5bb044
Merge branch 'main' into custom-xml-parser
nikeee Nov 10, 2025
816eb0a
Merge branch 'main' into custom-xml-parser
nikeee Nov 10, 2025
9d1b2c7
Merge branch 'main' into custom-xml-parser
nikeee Nov 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "tsx",
"type": "node",
"request": "launch",
// Debug current file in VSCode
"program": "${file}",
"runtimeExecutable": "tsx",
"console": "integratedTerminal",
"internalConsoleOptions": "neverOpen",
// Files to exclude from debugger (e.g. call stack)
"skipFiles": [
// Node.js internal core modules
"<node_internals>/**",
// Ignore all dependencies (optional)
"${workspaceFolder}/node_modules/**",
]
}
]
}
124 changes: 50 additions & 74 deletions bench/benchmark-operations.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@
import { createHash } from "node:crypto";

import { summary, group, bench, run, do_not_optimize } from "mitata";
import { XMLParser } from "fast-xml-parser";

/**
* @module Case study whether to use URLSearchParams or manual string concat for simple search params.
* @module Case study whether to use URLSearchParams or manual string concat for simple search params and some other micro benchmarks to determine how we should do things.
*
* benchmarks marked with `.baseline(true)` are the ones that mark methods that we use in the code.
*/

summary(() => {
group(() => {
group("building search params", () => {
function buildSearchParamsURLSP(
amzCredential,
date,
Expand Down Expand Up @@ -154,10 +155,10 @@ summary(() => {
undefined,
);
}
});
}).baseline(true);
});

group(() => {
group("building search params v2", () => {
const options = {
prefix: "/",
maxKeys: 100,
Expand Down Expand Up @@ -186,20 +187,19 @@ summary(() => {
let s = "list-type=2";

if (options.prefix) {
// biome-ignore lint/style/useTemplate: <explanation>
// biome-ignore lint/style/useTemplate: this is what we're benchmarking
s += "&prefix=" + encodeURIComponent(options.prefix);
}
if (options.startAfter) {
// biome-ignore lint/style/useTemplate: <explanation>
// biome-ignore lint/style/useTemplate: this is what we're benchmarking
s += "&start-after=" + encodeURIComponent(options.startAfter);
}
if (options.maxKeys) {
// biome-ignore lint/style/useTemplate: <explanation>
// biome-ignore lint/style/useTemplate: this is what we're benchmarking
s += "&max-keys=" + options.maxKeys; // no encoding needed, since it's a number
}
if (options.continuationToken) {
s +=
// biome-ignore lint/style/useTemplate: <explanation>
"&continuation-token=" +
encodeURIComponent(options.continuationToken);
}
Expand All @@ -222,10 +222,12 @@ summary(() => {
s += `&continuation-token=${encodeURIComponent(options.continuationToken)}`;
}
const _ = s;
}).gc("once");
})
.gc("once")
.baseline(true);
});

group(() => {
group("computing sha256 of parts", () => {
function signUpdate(method, path, query, host) {
return createHash("sha256")
.update(method)
Expand Down Expand Up @@ -267,8 +269,9 @@ summary(() => {
"localhost:1337",
);
}
});
bench("update calls", () => {
}).baseline(true);

bench("consecutive update calls", () => {
for (let i = 0; i < 1000; ++i) {
signUpdate(
"GET",
Expand All @@ -292,7 +295,7 @@ summary(() => {
});
});

group(() => {
group("joining strings", () => {
const headers = [
["host"].sort(),
["host", "x-amz-date"].sort(),
Expand All @@ -319,6 +322,7 @@ summary(() => {
return h.join(";");
}

/** @param {string[]} h */
function concat(h) {
let res = h.length > 0 ? h[0] : "";
for (let i = 1; i < h.length; ++i) {
Expand All @@ -329,18 +333,18 @@ summary(() => {

bench("string concat join", () => {
for (let i = 0; i < headers.length; ++i) {
const x = concat(headers[i]);
const _ = concat(headers[i]);
}
});
}).baseline(true);

bench("array string join", () => {
for (let i = 0; i < headers.length; ++i) {
const x = join(headers[i]);
const _ = join(headers[i]);
}
});
});

group(() => {
group("substring vs check if string is empty before append", () => {
// Which is faster, always adding a & and substring(1) or check if we need a preceeding & on every append?

bench("substring", () => {
Expand All @@ -358,9 +362,10 @@ summary(() => {

a += "&uploadId=12323456432";

const q = a.substring(1);
const _ = a.substring(1);
}
});
}).baseline(true);

bench("conditional", () => {
for (let i = 0; i < 1000; ++i) {
let a = "";
Expand Down Expand Up @@ -418,60 +423,31 @@ summary(() => {
// What is faster, passing an empty object as a default or accepting undefined and use safe-navigation?
// -> This is probably hard to benchmark and the results are pretty close -> we don't care

group(() => {
bench("allocation", () => {
for (let i = 0; i < 1000; ++i) {
do_not_optimize(fnWithDefaultParam());
do_not_optimize(fnWithDefaultParam({ a: true }));
do_not_optimize(fnWithDefaultParam({ a: true, b: true }));
do_not_optimize(fnWithDefaultParam());
do_not_optimize(fnWithDefaultParam());
do_not_optimize(fnWithDefaultParam());
}
});
bench("conditional", () => {
for (let i = 0; i < 1000; ++i) {
do_not_optimize(fnWithOptionalParam());
do_not_optimize(fnWithOptionalParam({ a: true }));
do_not_optimize(fnWithOptionalParam({ a: true, b: true }));
do_not_optimize(fnWithOptionalParam());
do_not_optimize(fnWithOptionalParam());
do_not_optimize(fnWithOptionalParam());
}
});
});

group(() => {
// Do we want to pass a buffer to our XML parser? Undici offers a buffer directly, which could
// improve throughput due to an encoding step getting skipped

const s = `<ListPartsResult xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Bucket>test-bucket</Bucket><Key>583ea250-5016-48e5-8b26-b3ce0d9e5822/foo-key-9000</Key><UploadId>tWA7cuzMIElE_sIi8weNVQJdxXnxZI9mhRT3hi9Xuaeqv4DjyteO64y_o4SuJP_E0Uf-D4Mzqeno7eWIakTtmlgabUjQ3uko2TE9Qv5BpztLPVqqJKEQnhulwkgLzcOs</UploadId><PartNumberMarker>0</PartNumberMarker><NextPartNumberMarker>3</NextPartNumberMarker><MaxParts>1000</MaxParts><IsTruncated>false</IsTruncated><Part><ETag>"4715e35cf900ae14837e3c098e87d522"</ETag><LastModified>2025-06-20T13:58:01.000Z</LastModified><PartNumber>1</PartNumber><Size>6291456</Size></Part><Part><ETag>"ce1b200f8c97447474929b722ed93b00"</ETag><LastModified>2025-06-20T13:58:02.000Z</LastModified><PartNumber>2</PartNumber><Size>6291456</Size></Part><Part><ETag>"3bc3be0b850eacf461ec036374616058"</ETag><LastModified>2025-06-20T13:58:02.000Z</LastModified><PartNumber>3</PartNumber><Size>1048576</Size></Part><Initiator><DisplayName>webfile</DisplayName><ID>75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a</ID></Initiator><Owner><DisplayName>webfile</DisplayName><ID>75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a</ID></Owner><StorageClass>STANDARD</StorageClass></ListPartsResult>`;
const b = Buffer.from(s, "ascii");

// -> buffer and string perform basically the same
// maybe we should use a buffer via undici, because undici could skip the string decoding

const xmlParser = new XMLParser({
ignoreAttributes: true,
isArray: (_, jPath) =>
jPath === "ListMultipartUploadsResult.Upload" ||
jPath === "ListBucketResult.Contents" ||
jPath === "ListPartsResult.Part" ||
jPath === "DeleteResult.Deleted" ||
jPath === "DeleteResult.Error",
});

bench("parse string with fxp", () => {
for (let i = 0; i < 10000; ++i) {
xmlParser.parse(s);
}
});
bench("parse buffer with fxp", () => {
for (let i = 0; i < 10000; ++i) {
xmlParser.parse(b);
}
});
});
group(
"empty object as default param vs undefined param + safe navigation",
() => {
bench("allocation (param = {})", () => {
for (let i = 0; i < 1000; ++i) {
do_not_optimize(fnWithDefaultParam());
do_not_optimize(fnWithDefaultParam({ a: true }));
do_not_optimize(fnWithDefaultParam({ a: true, b: true }));
do_not_optimize(fnWithDefaultParam());
do_not_optimize(fnWithDefaultParam());
do_not_optimize(fnWithDefaultParam());
}
}).baseline(true);
bench("conditional (safe navigation, param?.value)", () => {
for (let i = 0; i < 1000; ++i) {
do_not_optimize(fnWithOptionalParam());
do_not_optimize(fnWithOptionalParam({ a: true }));
do_not_optimize(fnWithOptionalParam({ a: true, b: true }));
do_not_optimize(fnWithOptionalParam());
do_not_optimize(fnWithOptionalParam());
do_not_optimize(fnWithOptionalParam());
}
});
},
);
});
});

Expand Down
4 changes: 1 addition & 3 deletions bench/xml.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { summary, group, bench, run, barplot } from "mitata";
import { XMLParser } from "fast-xml-parser";

import * as s3mini from "./s3mini-xml.ts";
// import { parseListPartsResult as runtimeGeneratedParser } from "../src/parsers.ts";
import { parseListPartsResult as runtimeGeneratedParser } from "../src/parsers.ts";

summary(() => {
barplot(() => {
Expand Down Expand Up @@ -41,13 +41,11 @@ summary(() => {
});
*/

/*
bench("custom parser (runtime-generated)", () => {
for (let i = 0; i < 10000; ++i) {
runtimeGeneratedParser(s);
}
}).baseline(true);
*/

bench("xml parser of s3mini", () => {
for (let i = 0; i < 10000; ++i) {
Expand Down
Loading
Loading