From 32d1204e5ac46f54f4dc76d239c6b51cd188ecab Mon Sep 17 00:00:00 2001 From: Neal Beeken Date: Mon, 11 Mar 2024 13:57:09 -0400 Subject: [PATCH 1/7] feat(NODE-5958): add BSON iterating API --- .eslintrc.json | 1 + src/bson.ts | 2 +- src/parser/on_demand/index.ts | 9 + src/parser/on_demand/parse_to_elements.ts | 5 +- src/parser/on_demand/parse_to_structure.ts | 138 ++++++ .../on_demand/parse_to_structure.test.ts | 421 ++++++++++++++++++ test/node/release.test.ts | 1 + test/node/tools/utils.js | 8 +- 8 files changed, 577 insertions(+), 8 deletions(-) create mode 100644 src/parser/on_demand/parse_to_structure.ts create mode 100644 test/node/parser/on_demand/parse_to_structure.test.ts diff --git a/.eslintrc.json b/.eslintrc.json index 3e482551..9b707d61 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -64,6 +64,7 @@ "@typescript-eslint/no-unsafe-return": "off", "@typescript-eslint/no-unsafe-argument": "off", "@typescript-eslint/no-unsafe-call": "off", + "@typescript-eslint/no-unsafe-enum-comparison": "off", "@typescript-eslint/consistent-type-imports": [ "error", { diff --git a/src/bson.ts b/src/bson.ts index 5475e08e..7bd83a98 100644 --- a/src/bson.ts +++ b/src/bson.ts @@ -54,7 +54,7 @@ export { BSONValue } from './bson_value'; export { BSONError, BSONVersionError, BSONRuntimeError } from './error'; export { BSONType } from './constants'; export { EJSON } from './extended_json'; -export { onDemand } from './parser/on_demand/index'; +export { onDemand, type OnDemand } from './parser/on_demand/index'; /** @public */ export interface Document { diff --git a/src/parser/on_demand/index.ts b/src/parser/on_demand/index.ts index bd08cdb8..9a2c45a2 100644 --- a/src/parser/on_demand/index.ts +++ b/src/parser/on_demand/index.ts @@ -1,5 +1,6 @@ import { type BSONError, BSONOffsetError } from '../../error'; import { type BSONElement, parseToElements } from './parse_to_elements'; +import { type BSONReviver, type Container, parseToStructure } from './parse_to_structure'; /** * @experimental * @public @@ -12,6 +13,13 @@ export type OnDemand = { isBSONError(value: unknown): value is BSONError; }; parseToElements: (this: void, bytes: Uint8Array, startOffset?: number) => Iterable; + parseToStructure: >( + this: void, + bytes: Uint8Array, + offset?: number, + root?: Container, + reviver?: BSONReviver + ) => TRoot; }; /** @@ -21,6 +29,7 @@ export type OnDemand = { const onDemand: OnDemand = Object.create(null); onDemand.parseToElements = parseToElements; +onDemand.parseToStructure = parseToStructure; onDemand.BSONOffsetError = BSONOffsetError; Object.freeze(onDemand); diff --git a/src/parser/on_demand/parse_to_elements.ts b/src/parser/on_demand/parse_to_elements.ts index bc3c107d..15e48179 100644 --- a/src/parser/on_demand/parse_to_elements.ts +++ b/src/parser/on_demand/parse_to_elements.ts @@ -1,4 +1,3 @@ -/* eslint-disable @typescript-eslint/no-unsafe-enum-comparison */ import { BSONOffsetError } from '../../error'; /** @@ -45,8 +44,8 @@ export type BSONElement = [ length: number ]; -/** Parses a int32 little-endian at offset, throws if it is negative */ -function getSize(source: Uint8Array, offset: number): number { +/** @internal Parses a int32 little-endian at offset, throws if it is negative */ +export function getSize(source: Uint8Array, offset: number): number { if (source[offset + 3] > 127) { throw new BSONOffsetError('BSON size cannot be negative', offset); } diff --git a/src/parser/on_demand/parse_to_structure.ts b/src/parser/on_demand/parse_to_structure.ts new file mode 100644 index 00000000..196e174f --- /dev/null +++ b/src/parser/on_demand/parse_to_structure.ts @@ -0,0 +1,138 @@ +import { type Code } from '../../code'; +import { type BSONElement, getSize, parseToElements as p } from './parse_to_elements'; + +/** @internal TODO */ +const DEFAULT_REVIVER = () => null; + +/** @internal */ +function parseToElements(...args: Parameters): BSONElement[] { + const res = p(...args); + return Array.isArray(res) ? res : [...res]; +} + +/** + * @internal + * BSONElement offsets + */ +const enum e { + type = 0, + nameOffset = 1, + nameLength = 2, + offset = 3, + length = 4 +} + +/** + * @internal + * Embedded bson types + */ +const enum t { + object = 3, + array = 4, + javascriptWithScope = 15 +} + +/** @internal */ +type ParseContext = { + elementOffset: number; + elements: BSONElement[]; + container: Container; + previous: ParseContext | null; +}; + +/** + * @experimental + * @public + * A union of the possible containers for BSON elements. + * + * Depending on kind, a reviver can accurately assign a value to a name on the container. + */ +export type Container = + | { + dest: Record; + kind: 'object'; + } + | { + dest: Map; + kind: 'map'; + } + | { + dest: Array; + kind: 'array'; + } + | { + dest: Code; + kind: 'code'; + } + | { + kind: 'custom'; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + [key: string]: any; + }; + +/** + * @experimental + * @public + */ +export type BSONReviver = ( + bytes: Uint8Array, + container: Container, + element: BSONElement +) => Container | null; + +/** + * @experimental + * @public + */ +export function parseToStructure>( + bytes: Uint8Array, + startOffset?: number, + root?: Container, + reviver?: BSONReviver +): TRoot { + root ??= { + kind: 'object', + dest: Object.create(null) + }; + + reviver ??= DEFAULT_REVIVER; + + let ctx: ParseContext | null = { + elementOffset: 0, + elements: parseToElements(bytes, startOffset), + container: root, + previous: null + }; + + embedded: while (ctx !== null) { + for ( + let it: BSONElement | undefined = ctx.elements[ctx.elementOffset++]; + it != null; + it = ctx.elements[ctx.elementOffset++] + ) { + const maybeNewContainer = reviver(bytes, ctx.container, it); + const isEmbeddedType = + it[e.type] === t.object || it[e.type] === t.array || it[e.type] === t.javascriptWithScope; + const iterateEmbedded = maybeNewContainer != null && isEmbeddedType; + + if (iterateEmbedded) { + const docOffset: number = + it[e.type] !== t.javascriptWithScope + ? it[e.offset] + : it[e.offset] + getSize(bytes, it[e.offset] + 4) + 4 + 4; // value offset + codeSize + value int + code int + + ctx = { + elementOffset: 0, + elements: parseToElements(bytes, docOffset), + container: maybeNewContainer, + previous: ctx + }; + + continue embedded; + } + } + ctx = ctx.previous; + } + + return root.dest as unknown as TRoot; +} diff --git a/test/node/parser/on_demand/parse_to_structure.test.ts b/test/node/parser/on_demand/parse_to_structure.test.ts new file mode 100644 index 00000000..0dfdc51b --- /dev/null +++ b/test/node/parser/on_demand/parse_to_structure.test.ts @@ -0,0 +1,421 @@ +import * as sinon from 'sinon'; +import { expect } from 'chai'; +import { Code, onDemand } from '../../../register-bson'; +import { ByteUtils } from '../../../../src/utils/byte_utils'; + +import { bufferFromHexArray, stringToUTF8HexBytes, int32LEToHex } from '../../tools/utils'; + +const parseToStructure = onDemand.parseToStructure; + +const enum e { + type = 0, + nameOffset = 1, + nameLength = 2, + offset = 3, + length = 4 +} + +describe('parseToStructure()', () => { + context('when called with an empty document sequence', () => { + it('returns an object with no properties', () => { + const res = parseToStructure(new Uint8Array([5, 0, 0, 0, 0])); + expect(res).to.deep.equal(Object.create(null)); + }); + + it('returns an object with a null prototype', () => { + const res = parseToStructure(new Uint8Array([5, 0, 0, 0, 0])); + expect(Object.getPrototypeOf(res)).to.be.null; + }); + + it('never calls reviver', () => { + const spy = sinon.spy(); + parseToStructure(new Uint8Array([5, 0, 0, 0, 0]), undefined, undefined, spy); + expect(spy).to.not.have.been.called; + }); + + it('returns given root container', () => { + const dest = new Map(); + const res = parseToStructure(new Uint8Array([5, 0, 0, 0, 0]), undefined, { + kind: 'map', + dest + }); + // instance eq check + expect(res).to.equal(dest); + }); + }); + + context('when called with a single element sequence', () => { + const bsonBytes = bufferFromHexArray([ + '10', // int32 type + '6100', // 'a' key with key null terminator + '01000000' // little endian int32 + ]); + + it('calls the reviver with the same instance of the input bytes', () => { + const spy = sinon.spy(); + parseToStructure(bsonBytes, undefined, undefined, spy); + expect(spy).to.have.been.calledWith(sinon.match.same(bsonBytes)); + }); + + it('calls the reviver with default object container', () => { + const spy = sinon.spy(); + parseToStructure(bsonBytes, undefined, undefined, spy); + expect(spy).to.have.been.calledWith( + sinon.match.any, + sinon.match({ kind: 'object', dest: {} }) + ); + }); + + it('calls the reviver with the int element', () => { + const spy = sinon.spy(); + parseToStructure(bsonBytes, undefined, undefined, spy); + expect(spy).to.have.been.calledWith( + sinon.match.any, + sinon.match.any, + sinon.match( + Object.values({ + type: 0x10, // int + nameOffset: 5, + nameLength: 1, + offset: 7, + length: 4 + }) + ) + ); + }); + }); + + context(`when given a bson document`, () => { + const common = { nameOffset: 5, nameLength: 1, offset: 7 }; + const regexp = [ + Buffer.from('abc').toString('hex'), + '00', + Buffer.from('imx').toString('hex'), + '00' + ].join(''); + const code_w_scope = [ + int32LEToHex(13 + 5 + 4), // code is 13, document is 5, 4 for leading int + stringToUTF8HexBytes('() => {}'), + int32LEToHex(5), + '00' + ].join(''); + const tableTest = [ + { + name: 'double', + input: ['01', '6100', '0100000000000000'], + output: { type: 1, length: 8 } + }, + { + name: 'string', + input: ['02', '6100', stringToUTF8HexBytes('hello')], + output: { type: 2, length: 'hello'.length + 4 + 1 } // 4 for the size, 1 for the null + }, + { + name: 'empty object', + input: ['03', '6100', int32LEToHex(5), '00'], + output: { type: 3, length: 5 } + }, + { + name: 'empty array', + input: ['04', '6100', int32LEToHex(5), '00'], + output: { type: 4, length: 5 } + }, + { + name: 'binary', + input: ['05', '6100', int32LEToHex(5), '23', '00'], + output: { type: 5, length: 10 } + }, + { + name: 'undefined', + input: ['06', '6100'], + output: { type: 6, length: 0 } + }, + { + name: 'objectId', + input: ['07', '6100', '00'.repeat(12)], + output: { type: 7, length: 12 } + }, + { + name: 'boolean', + input: ['08', '6100', '45'], + output: { type: 8, length: 1 } + }, + { + name: 'date', + input: ['09', '6100', '00'.repeat(8)], + output: { type: 9, length: 8 } + }, + { + name: 'null', + input: ['0A', '6100'], + output: { type: 10, length: 0 } + }, + { + name: 'regexp', + input: ['0B', '6100', regexp], + output: { type: 11, length: 8 } + }, + { + name: 'dbpointer', + input: ['0C', '6100', stringToUTF8HexBytes('db.coll'), '00'.repeat(12)], + output: { type: 12, length: 'db.coll'.length + 4 + 1 + 12 } + }, + { + name: 'code', + input: ['0D', '6100', stringToUTF8HexBytes('() => {}')], + output: { type: 13, length: '() => {}'.length + 4 + 1 } + }, + { + name: 'symbol', + input: ['0E', '6100', stringToUTF8HexBytes('symbol')], + output: { type: 14, length: 'symbol'.length + 4 + 1 } + }, + { + name: 'empty code_w_scope', + input: ['0F', '6100', code_w_scope], + output: { type: 15, length: '() => {}'.length + 4 + 1 + 5 + 4 } + }, + { + name: 'int', + input: ['10', '6100', int32LEToHex(320)], + output: { type: 16, length: 4 } + }, + { + name: 'timestamp', + input: ['11', '6100', '00'.repeat(8)], + output: { type: 17, length: 8 } + }, + { + name: 'long', + input: ['12', '6100', '00'.repeat(8)], + output: { type: 18, length: 8 } + }, + { + name: 'decimal128', + input: ['13', '6100', '00'.repeat(16)], + output: { type: 19, length: 16 } + }, + { + name: 'minkey', + input: ['FF', '6100'], + output: { type: 255, length: 0 } + }, + { + name: 'maxkey', + input: ['7F', '6100'], + output: { type: 127, length: 0 } + } + ]; + + context('when reviver returns null', () => { + it('does not iterated the embedded documents', () => { + const embedded = bufferFromHexArray([ + '03', // object + '6200', // 'b' + bufferFromHexArray(['01', '6100', '0100000000000000']).toString('hex') + ]); + + const spy = sinon.stub().returns(null); + const res = parseToStructure(embedded, undefined, { kind: 'custom' }, spy); + expect(spy).to.have.been.calledOnceWith( + sinon.match.same(embedded), + sinon.match({ kind: 'custom' }), + sinon.match( + Object.values({ type: 3, nameOffset: 5, nameLength: 1, offset: 7, length: 16 }) + ) + ); + expect(res).to.be.undefined; + }); + }); + + for (const test of tableTest) { + context(`with one ${test.name} element`, () => { + it(`calls reviver with bytes, container, and element with type=${test.output.type} and length=${test.output.length}`, () => { + const bsonBytes = bufferFromHexArray(test.input); + const output = { ...common, ...test.output }; + const spy = sinon.spy(); + parseToStructure(bsonBytes, undefined, undefined, spy); + expect(spy).to.have.been.calledWith( + sinon.match.same(bsonBytes), + sinon.match({ kind: 'object', dest: {} }), + sinon.match([ + output.type, + output.nameOffset, + output.nameLength, + output.offset, + output.length + ]) + ); + }); + }); + } + + for (const test of tableTest) { + context(`with embedded document that contains ${test.name}`, () => { + const embedded = bufferFromHexArray([ + '03', // object + '6200', // 'b' + bufferFromHexArray(test.input).toString('hex') + ]); + + const makeReviverSpy = () => + sinon.stub().callsFake(function myReviver(bytes, container, element) { + const key = ByteUtils.toUTF8( + bytes, + element[e.nameOffset], + element[e.nameOffset] + element[e.nameLength], + true + ); + if (element[0] === 3 && key === 'b') { + // key is 'b' and element is object (top-level) + + container.dest[key] = Object.create(null); + return { + kind: 'object', + dest: container.dest[key] + }; + } + + container.dest[key] = element; + }); + + it(`calls reviver with embedded element`, () => { + const output = Object.values({ + type: test.output.type, + // 4 size bytes + doc type byte + 2 'b\x00' + 4 size bytes + value type byte == 12 + nameOffset: 12, + nameLength: 1, + offset: 14, // 12 + 'a\x00' + length: test.output.length + }); + const spy = makeReviverSpy(); + const res = parseToStructure(embedded, undefined, undefined, spy); + expect(res).to.deep.equal({ b: { a: output } }); + }); + }); + + context(`with embedded array that contains ${test.name}`, () => { + const embedded = bufferFromHexArray([ + '04', // array + '6200', // 'b' + bufferFromHexArray(test.input).toString('hex') + ]); + + const makeReviverSpy = () => + sinon.stub().callsFake(function myReviver(bytes, container, element) { + if (element[0] === 4) { + const key = ByteUtils.toUTF8( + bytes, + element[e.nameOffset], + element[e.nameOffset] + element[e.nameLength], + true + ); + if (key === 'b') { + // key is 'b' and element is array (top-level) + + container.dest[key] = []; + return { + kind: 'array', + dest: container.dest[key] + }; + } + } + + // wow! no key parsing necessary! + container.dest.push(element); + }); + + it(`calls reviver with embedded element`, () => { + const output = Object.values({ + type: test.output.type, + // 4 size bytes + doc type byte + 2 'b\x00' + 4 size bytes + value type byte == 12 + nameOffset: 12, + nameLength: 1, + offset: 14, // 12 + 'a\x00' + length: test.output.length + }); + const spy = makeReviverSpy(); + const res = parseToStructure(embedded, undefined, undefined, spy); + expect(res).to.deep.equal({ b: [output] }); + }); + }); + + context(`with embedded code_w_scope that contains ${test.name}`, () => { + const scope = bufferFromHexArray(test.input); + const embedded = bufferFromHexArray([ + '0F', // code_w_scope + '6200', // 'b' + int32LEToHex(13 + scope.length + 4), // code is 13, document is scope.length, 4 for leading int + stringToUTF8HexBytes('() => {}'), + scope.toString('hex') + ]); + + const makeReviverSpy = () => + sinon.stub().callsFake(function myReviver(bytes, container, element) { + const key = ByteUtils.toUTF8( + bytes, + element[e.nameOffset], + element[e.nameOffset] + element[e.nameLength], + true + ); + if (element[0] === 15 && key === 'b') { + // key is 'b' and element is code_w_scope (top-level) + const offset = element[e.offset]; + const functionStringLength = + bytes[offset + 4] | + (bytes[offset + 5] << 8) | + (bytes[offset + 6] << 16) | + (bytes[offset + 7] << 24); + const start = offset + 4 + 4; + const end = start + functionStringLength - 1; + const codeString = ByteUtils.toUTF8(bytes, start, end, true); + const code = new Code(codeString, Object.create(null)); + container.dest[key] = code; + return { + kind: 'code', + dest: container.dest[key] + }; + } + + // wow! no key parsing necessary! + container.dest.scope[key] = element; + }); + + it(`calls reviver with embedded element`, () => { + const output = Object.values({ + type: test.output.type, + /** + * 29 comes from: + * - 4 bytes for the embedded document + * - 1 type byte + * - etc... todo math + */ + nameOffset: 29, + nameLength: 1, + offset: 31, // 12 + 'a\x00' + length: test.output.length + }); + const spy = makeReviverSpy(); + const res = parseToStructure(embedded, undefined, undefined, spy); + expect(res).to.deep.equal({ b: new Code('() => {}', { a: output }) }); + }); + }); + } + }); + + context('when given a bson document with an array that has 100 items', () => { + it('calls the reviver 101 times, 1 for the document, 100 for the array items', () => { + // Cheating by making an array of 0 length keys + const intValue = ['10', '00', int32LEToHex(1)].join(''); + const bsonBytes = bufferFromHexArray([ + '04', // array + '6100', // 'a' key with key null terminator + bufferFromHexArray([intValue.repeat(100)]).toString('hex') + ]); + + const spy = sinon.stub().returnsArg(1); + parseToStructure(bsonBytes, undefined, undefined, spy); + expect(spy).to.have.callCount(101); + }); + }); +}); diff --git a/test/node/release.test.ts b/test/node/release.test.ts index da69230d..bfcae1a0 100644 --- a/test/node/release.test.ts +++ b/test/node/release.test.ts @@ -42,6 +42,7 @@ const REQUIRED_FILES = [ 'src/parser/utils.ts', 'src/parser/on_demand/index.ts', 'src/parser/on_demand/parse_to_elements.ts', + 'src/parser/on_demand/parse_to_structure.ts', 'src/regexp.ts', 'src/symbol.ts', 'src/timestamp.ts', diff --git a/test/node/tools/utils.js b/test/node/tools/utils.js index 2d2432ae..36d51c2a 100644 --- a/test/node/tools/utils.js +++ b/test/node/tools/utils.js @@ -166,12 +166,12 @@ exports.int32LEToHex = int32LEToHex; * @returns BSON string with byte size encoded */ const stringToUTF8HexBytes = str => { - var b = Buffer.from(str, 'utf8'); - var len = b.byteLength; - var out = Buffer.alloc(len + 4 + 1); + const b = Buffer.from(str, 'utf8'); + const len = b.byteLength; + const out = Buffer.alloc(len + 4 + 1); out.writeInt32LE(len + 1, 0); out.set(b, 4); - out[len + 1] = 0x00; + out[len + 4 + 1] = 0x00; return out.toString('hex'); }; From 7275458465a825158b6b08ab9cd0c383bfe2c3d1 Mon Sep 17 00:00:00 2001 From: Neal Beeken Date: Mon, 11 Mar 2024 17:27:05 -0400 Subject: [PATCH 2/7] docs: fix comment format --- src/parser/on_demand/parse_to_elements.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/parser/on_demand/parse_to_elements.ts b/src/parser/on_demand/parse_to_elements.ts index 15e48179..e0bbd0a6 100644 --- a/src/parser/on_demand/parse_to_elements.ts +++ b/src/parser/on_demand/parse_to_elements.ts @@ -44,7 +44,10 @@ export type BSONElement = [ length: number ]; -/** @internal Parses a int32 little-endian at offset, throws if it is negative */ +/** + * @internal + * Parses a int32 little-endian at offset, throws if it is negative + */ export function getSize(source: Uint8Array, offset: number): number { if (source[offset + 3] > 127) { throw new BSONOffsetError('BSON size cannot be negative', offset); From d4c557d577d5e10af1dcdfff4b73663e9bdcbcb4 Mon Sep 17 00:00:00 2001 From: Neal Beeken Date: Mon, 11 Mar 2024 17:27:16 -0400 Subject: [PATCH 3/7] fix: typescript for root --- src/parser/on_demand/index.ts | 14 +++++++++----- src/parser/on_demand/parse_to_structure.ts | 19 +++++++++++++------ 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/parser/on_demand/index.ts b/src/parser/on_demand/index.ts index 9a2c45a2..8e26c829 100644 --- a/src/parser/on_demand/index.ts +++ b/src/parser/on_demand/index.ts @@ -13,13 +13,17 @@ export type OnDemand = { isBSONError(value: unknown): value is BSONError; }; parseToElements: (this: void, bytes: Uint8Array, startOffset?: number) => Iterable; - parseToStructure: >( - this: void, + parseToStructure: < + TRoot extends Container = { + dest: Record; + kind: 'object'; + } + >( bytes: Uint8Array, - offset?: number, - root?: Container, + startOffset?: number, + root?: TRoot, reviver?: BSONReviver - ) => TRoot; + ) => TRoot extends undefined ? Record : TRoot['dest']; }; /** diff --git a/src/parser/on_demand/parse_to_structure.ts b/src/parser/on_demand/parse_to_structure.ts index 196e174f..cc6792b6 100644 --- a/src/parser/on_demand/parse_to_structure.ts +++ b/src/parser/on_demand/parse_to_structure.ts @@ -67,6 +67,8 @@ export type Container = | { kind: 'custom'; // eslint-disable-next-line @typescript-eslint/no-explicit-any + dest: any; + // eslint-disable-next-line @typescript-eslint/no-explicit-any [key: string]: any; }; @@ -84,15 +86,20 @@ export type BSONReviver = ( * @experimental * @public */ -export function parseToStructure>( +export function parseToStructure< + TRoot extends Container = { + dest: Record; + kind: 'object'; + } +>( bytes: Uint8Array, startOffset?: number, - root?: Container, + providedRoot?: TRoot, reviver?: BSONReviver -): TRoot { - root ??= { +): TRoot extends undefined ? Record : TRoot['dest'] { + const root = providedRoot ?? { kind: 'object', - dest: Object.create(null) + dest: Object.create(null) as Record }; reviver ??= DEFAULT_REVIVER; @@ -134,5 +141,5 @@ export function parseToStructure>( ctx = ctx.previous; } - return root.dest as unknown as TRoot; + return root.dest; } From cd3dd41035357bae15b3c1ec80345700261fbd3e Mon Sep 17 00:00:00 2001 From: Neal Beeken Date: Mon, 11 Mar 2024 17:37:01 -0400 Subject: [PATCH 4/7] fix: enum location and parseToElementToArray --- src/parser/on_demand/parse_to_structure.ts | 48 ++++++++++------------ 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/src/parser/on_demand/parse_to_structure.ts b/src/parser/on_demand/parse_to_structure.ts index cc6792b6..dfc80a1b 100644 --- a/src/parser/on_demand/parse_to_structure.ts +++ b/src/parser/on_demand/parse_to_structure.ts @@ -1,37 +1,15 @@ import { type Code } from '../../code'; -import { type BSONElement, getSize, parseToElements as p } from './parse_to_elements'; +import { type BSONElement, getSize, parseToElements } from './parse_to_elements'; /** @internal TODO */ const DEFAULT_REVIVER = () => null; /** @internal */ -function parseToElements(...args: Parameters): BSONElement[] { - const res = p(...args); +function parseToElementsToArray(bytes: Uint8Array, offset?: number): BSONElement[] { + const res = parseToElements(bytes, offset); return Array.isArray(res) ? res : [...res]; } -/** - * @internal - * BSONElement offsets - */ -const enum e { - type = 0, - nameOffset = 1, - nameLength = 2, - offset = 3, - length = 4 -} - -/** - * @internal - * Embedded bson types - */ -const enum t { - object = 3, - array = 4, - javascriptWithScope = 15 -} - /** @internal */ type ParseContext = { elementOffset: number; @@ -106,11 +84,27 @@ export function parseToStructure< let ctx: ParseContext | null = { elementOffset: 0, - elements: parseToElements(bytes, startOffset), + elements: parseToElementsToArray(bytes, startOffset), container: root, previous: null }; + /** BSONElement offsets */ + const enum e { + type = 0, + nameOffset = 1, + nameLength = 2, + offset = 3, + length = 4 + } + + /** BSON Embedded types */ + const enum t { + object = 3, + array = 4, + javascriptWithScope = 15 + } + embedded: while (ctx !== null) { for ( let it: BSONElement | undefined = ctx.elements[ctx.elementOffset++]; @@ -130,7 +124,7 @@ export function parseToStructure< ctx = { elementOffset: 0, - elements: parseToElements(bytes, docOffset), + elements: parseToElementsToArray(bytes, docOffset), container: maybeNewContainer, previous: ctx }; From 39f1633dab7f748a813e2f5c334ebc989929fcae Mon Sep 17 00:00:00 2001 From: Neal Beeken Date: Mon, 11 Mar 2024 17:37:09 -0400 Subject: [PATCH 5/7] test: name --- test/node/parser/on_demand/parse_to_structure.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/node/parser/on_demand/parse_to_structure.test.ts b/test/node/parser/on_demand/parse_to_structure.test.ts index 0dfdc51b..86b6c4e8 100644 --- a/test/node/parser/on_demand/parse_to_structure.test.ts +++ b/test/node/parser/on_demand/parse_to_structure.test.ts @@ -208,7 +208,7 @@ describe('parseToStructure()', () => { ]; context('when reviver returns null', () => { - it('does not iterated the embedded documents', () => { + it('does not iterate the embedded documents', () => { const embedded = bufferFromHexArray([ '03', // object '6200', // 'b' From eeae36c0e0046e24d948fc8bfe1b78b018aa00f9 Mon Sep 17 00:00:00 2001 From: Neal Beeken Date: Tue, 12 Mar 2024 10:02:26 -0400 Subject: [PATCH 6/7] cleanups, support null to default --- src/parser/on_demand/index.ts | 4 ++ src/parser/on_demand/parse_to_elements.ts | 4 +- src/parser/on_demand/parse_to_structure.ts | 46 ++++++++++++---------- 3 files changed, 32 insertions(+), 22 deletions(-) diff --git a/src/parser/on_demand/index.ts b/src/parser/on_demand/index.ts index 8e26c829..dd3094ec 100644 --- a/src/parser/on_demand/index.ts +++ b/src/parser/on_demand/index.ts @@ -24,6 +24,10 @@ export type OnDemand = { root?: TRoot, reviver?: BSONReviver ) => TRoot extends undefined ? Record : TRoot['dest']; + // Types + BSONElement: BSONElement; + Container: Container; + BSONReviver: BSONReviver; }; /** diff --git a/src/parser/on_demand/parse_to_elements.ts b/src/parser/on_demand/parse_to_elements.ts index e0bbd0a6..47ed3c5e 100644 --- a/src/parser/on_demand/parse_to_elements.ts +++ b/src/parser/on_demand/parse_to_elements.ts @@ -82,7 +82,9 @@ function findNull(bytes: Uint8Array, offset: number): number { * @public * @experimental */ -export function parseToElements(bytes: Uint8Array, startOffset = 0): Iterable { +export function parseToElements(bytes: Uint8Array, pOffset?: number | null): Iterable { + const startOffset = pOffset ?? 0; + if (bytes.length < 5) { throw new BSONOffsetError( `Input must be at least 5 bytes, got ${bytes.length} bytes`, diff --git a/src/parser/on_demand/parse_to_structure.ts b/src/parser/on_demand/parse_to_structure.ts index dfc80a1b..157b102c 100644 --- a/src/parser/on_demand/parse_to_structure.ts +++ b/src/parser/on_demand/parse_to_structure.ts @@ -1,11 +1,15 @@ import { type Code } from '../../code'; import { type BSONElement, getSize, parseToElements } from './parse_to_elements'; -/** @internal TODO */ -const DEFAULT_REVIVER = () => null; +/** @internal */ +const DEFAULT_REVIVER: BSONReviver = ( + _bytes: Uint8Array, + _container: Container, + _element: BSONElement +) => null; /** @internal */ -function parseToElementsToArray(bytes: Uint8Array, offset?: number): BSONElement[] { +function parseToElementsToArray(bytes: Uint8Array, offset?: number | null): BSONElement[] { const res = parseToElements(bytes, offset); return Array.isArray(res) ? res : [...res]; } @@ -71,16 +75,16 @@ export function parseToStructure< } >( bytes: Uint8Array, - startOffset?: number, - providedRoot?: TRoot, - reviver?: BSONReviver + startOffset?: number | null, + pRoot?: TRoot | null, + pReviver?: BSONReviver | null ): TRoot extends undefined ? Record : TRoot['dest'] { - const root = providedRoot ?? { + const root = pRoot ?? { kind: 'object', dest: Object.create(null) as Record }; - reviver ??= DEFAULT_REVIVER; + const reviver = pReviver ?? DEFAULT_REVIVER; let ctx: ParseContext | null = { elementOffset: 0, @@ -89,13 +93,10 @@ export function parseToStructure< previous: null }; - /** BSONElement offsets */ + /** BSONElement offsets: type indicator and value offset */ const enum e { type = 0, - nameOffset = 1, - nameLength = 2, - offset = 3, - length = 4 + offset = 3 } /** BSON Embedded types */ @@ -111,21 +112,24 @@ export function parseToStructure< it != null; it = ctx.elements[ctx.elementOffset++] ) { - const maybeNewContainer = reviver(bytes, ctx.container, it); + const type = it[e.type]; + const offset = it[e.offset]; + + const container = reviver(bytes, ctx.container, it); const isEmbeddedType = - it[e.type] === t.object || it[e.type] === t.array || it[e.type] === t.javascriptWithScope; - const iterateEmbedded = maybeNewContainer != null && isEmbeddedType; + type === t.object || type === t.array || type === t.javascriptWithScope; - if (iterateEmbedded) { + if (container != null && isEmbeddedType) { const docOffset: number = - it[e.type] !== t.javascriptWithScope - ? it[e.offset] - : it[e.offset] + getSize(bytes, it[e.offset] + 4) + 4 + 4; // value offset + codeSize + value int + code int + type !== t.javascriptWithScope + ? offset + : // value offset + codeSize + value int + code int + offset + getSize(bytes, offset + 4) + 4 + 4; ctx = { elementOffset: 0, elements: parseToElementsToArray(bytes, docOffset), - container: maybeNewContainer, + container, previous: ctx }; From da05d20f20841b96689461e6df3e8e9c54ab4b47 Mon Sep 17 00:00:00 2001 From: Neal Beeken Date: Wed, 13 Mar 2024 14:12:54 -0400 Subject: [PATCH 7/7] address comments --- src/parser/on_demand/parse_to_elements.ts | 53 ++++++++++++++-------- src/parser/on_demand/parse_to_structure.ts | 22 +++++---- 2 files changed, 47 insertions(+), 28 deletions(-) diff --git a/src/parser/on_demand/parse_to_elements.ts b/src/parser/on_demand/parse_to_elements.ts index 47ed3c5e..0a778a92 100644 --- a/src/parser/on_demand/parse_to_elements.ts +++ b/src/parser/on_demand/parse_to_elements.ts @@ -8,7 +8,7 @@ import { BSONOffsetError } from '../../error'; * - `minKey` is set to 255 so unsigned comparisons succeed * - Modify with caution, double check the bundle contains literals */ -const enum t { +const enum BSONElementType { double = 1, string = 2, object = 3, @@ -82,8 +82,11 @@ function findNull(bytes: Uint8Array, offset: number): number { * @public * @experimental */ -export function parseToElements(bytes: Uint8Array, pOffset?: number | null): Iterable { - const startOffset = pOffset ?? 0; +export function parseToElements( + bytes: Uint8Array, + startOffset: number | null = 0 +): Iterable { + startOffset ??= 0; if (bytes.length < 5) { throw new BSONOffsetError( @@ -125,37 +128,51 @@ export function parseToElements(bytes: Uint8Array, pOffset?: number | null): Ite let length: number; - if (type === t.double || type === t.long || type === t.date || type === t.timestamp) { + if ( + type === BSONElementType.double || + type === BSONElementType.long || + type === BSONElementType.date || + type === BSONElementType.timestamp + ) { length = 8; - } else if (type === t.int) { + } else if (type === BSONElementType.int) { length = 4; - } else if (type === t.objectId) { + } else if (type === BSONElementType.objectId) { length = 12; - } else if (type === t.decimal) { + } else if (type === BSONElementType.decimal) { length = 16; - } else if (type === t.bool) { + } else if (type === BSONElementType.bool) { length = 1; - } else if (type === t.null || type === t.undefined || type === t.maxKey || type === t.minKey) { + } else if ( + type === BSONElementType.null || + type === BSONElementType.undefined || + type === BSONElementType.maxKey || + type === BSONElementType.minKey + ) { length = 0; } // Needs a size calculation - else if (type === t.regex) { + else if (type === BSONElementType.regex) { length = findNull(bytes, findNull(bytes, offset) + 1) + 1 - offset; - } else if (type === t.object || type === t.array || type === t.javascriptWithScope) { + } else if ( + type === BSONElementType.object || + type === BSONElementType.array || + type === BSONElementType.javascriptWithScope + ) { length = getSize(bytes, offset); } else if ( - type === t.string || - type === t.binData || - type === t.dbPointer || - type === t.javascript || - type === t.symbol + type === BSONElementType.string || + type === BSONElementType.binData || + type === BSONElementType.dbPointer || + type === BSONElementType.javascript || + type === BSONElementType.symbol ) { length = getSize(bytes, offset) + 4; - if (type === t.binData) { + if (type === BSONElementType.binData) { // binary subtype length += 1; } - if (type === t.dbPointer) { + if (type === BSONElementType.dbPointer) { // dbPointer's objectId length += 12; } diff --git a/src/parser/on_demand/parse_to_structure.ts b/src/parser/on_demand/parse_to_structure.ts index 157b102c..2924c725 100644 --- a/src/parser/on_demand/parse_to_structure.ts +++ b/src/parser/on_demand/parse_to_structure.ts @@ -94,13 +94,13 @@ export function parseToStructure< }; /** BSONElement offsets: type indicator and value offset */ - const enum e { + const enum BSONElementOffset { type = 0, offset = 3 } /** BSON Embedded types */ - const enum t { + const enum BSONElementType { object = 3, array = 4, javascriptWithScope = 15 @@ -108,20 +108,22 @@ export function parseToStructure< embedded: while (ctx !== null) { for ( - let it: BSONElement | undefined = ctx.elements[ctx.elementOffset++]; - it != null; - it = ctx.elements[ctx.elementOffset++] + let bsonElement: BSONElement | undefined = ctx.elements[ctx.elementOffset++]; + bsonElement != null; + bsonElement = ctx.elements[ctx.elementOffset++] ) { - const type = it[e.type]; - const offset = it[e.offset]; + const type = bsonElement[BSONElementOffset.type]; + const offset = bsonElement[BSONElementOffset.offset]; - const container = reviver(bytes, ctx.container, it); + const container = reviver(bytes, ctx.container, bsonElement); const isEmbeddedType = - type === t.object || type === t.array || type === t.javascriptWithScope; + type === BSONElementType.object || + type === BSONElementType.array || + type === BSONElementType.javascriptWithScope; if (container != null && isEmbeddedType) { const docOffset: number = - type !== t.javascriptWithScope + type !== BSONElementType.javascriptWithScope ? offset : // value offset + codeSize + value int + code int offset + getSize(bytes, offset + 4) + 4 + 4;