diff --git a/package-lock.json b/package-lock.json index 7639d4bc479..c67dd952565 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,7 +10,7 @@ "license": "Apache-2.0", "dependencies": { "@mongodb-js/saslprep": "^1.1.5", - "bson": "^6.5.0", + "bson": "^6.6.0", "mongodb-connection-string-url": "^3.0.0" }, "devDependencies": { @@ -3891,9 +3891,9 @@ } }, "node_modules/bson": { - "version": "6.5.0", - "resolved": "https://registry.npmjs.org/bson/-/bson-6.5.0.tgz", - "integrity": "sha512-DXf1BTAS8vKyR90BO4x5v3rKVarmkdkzwOrnYDFdjAY694ILNDkmA3uRh1xXJEl+C1DAh8XCvAQ+Gh3kzubtpg==", + "version": "6.6.0", + "resolved": "https://registry.npmjs.org/bson/-/bson-6.6.0.tgz", + "integrity": "sha512-BVINv2SgcMjL4oYbBuCQTpE3/VKOSxrOA8Cj/wQP7izSzlBGVomdm+TcUd0Pzy0ytLSSDweCKQ6X3f5veM5LQA==", "engines": { "node": ">=16.20.1" } diff --git a/package.json b/package.json index ac31736335d..ffb24187c31 100644 --- a/package.json +++ b/package.json @@ -26,7 +26,7 @@ }, "dependencies": { "@mongodb-js/saslprep": "^1.1.5", - "bson": "^6.5.0", + "bson": "^6.6.0", "mongodb-connection-string-url": "^3.0.0" }, "peerDependencies": { diff --git a/src/bson.ts b/src/bson.ts index 2c0b43df12a..a44f7e2519f 100644 --- a/src/bson.ts +++ b/src/bson.ts @@ -1,8 +1,10 @@ import type { DeserializeOptions, SerializeOptions } from 'bson'; +import { BSON } from 'bson'; export { Binary, BSON, + BSONError, BSONRegExp, BSONSymbol, BSONType, @@ -25,6 +27,17 @@ export { UUID } from 'bson'; +export type BSONElement = BSON.OnDemand['BSONElement']; + +export function parseToElementsToArray(bytes: Uint8Array, offset?: number): BSONElement[] { + const res = BSON.onDemand.parseToElements(bytes, offset); + return Array.isArray(res) ? res : [...res]; +} +export const getInt32LE = BSON.onDemand.NumberUtils.getInt32LE; +export const getFloat64LE = BSON.onDemand.NumberUtils.getFloat64LE; +export const getBigInt64LE = BSON.onDemand.NumberUtils.getBigInt64LE; +export const toUTF8 = BSON.onDemand.ByteUtils.toUTF8; + /** * BSON Serialization options. * @public diff --git a/src/cmap/wire_protocol/on_demand/document.ts b/src/cmap/wire_protocol/on_demand/document.ts new file mode 100644 index 00000000000..96115a30848 --- /dev/null +++ b/src/cmap/wire_protocol/on_demand/document.ts @@ -0,0 +1,322 @@ +import { + Binary, + BSON, + type BSONElement, + BSONError, + type BSONSerializeOptions, + BSONType, + getBigInt64LE, + getFloat64LE, + getInt32LE, + ObjectId, + parseToElementsToArray, + Timestamp, + toUTF8 +} from '../../../bson'; + +// eslint-disable-next-line no-restricted-syntax +const enum BSONElementOffset { + type = 0, + nameOffset = 1, + nameLength = 2, + offset = 3, + length = 4 +} + +export type JSTypeOf = { + [BSONType.null]: null; + [BSONType.undefined]: null; + [BSONType.double]: number; + [BSONType.int]: number; + [BSONType.long]: bigint; + [BSONType.timestamp]: Timestamp; + [BSONType.binData]: Binary; + [BSONType.bool]: boolean; + [BSONType.objectId]: ObjectId; + [BSONType.string]: string; + [BSONType.date]: Date; + [BSONType.object]: OnDemandDocument; + [BSONType.array]: OnDemandDocument; +}; + +/** @internal */ +type CachedBSONElement = { element: BSONElement; value: any | undefined }; + +/** @internal */ +export class OnDemandDocument { + /** + * Maps JS strings to elements and jsValues for speeding up subsequent lookups. + * - If `false` then name does not exist in the BSON document + * - If `CachedBSONElement` instance name exists + * - If `cache[name].value == null` jsValue has not yet been parsed + * - Null/Undefined values do not get cached because they are zero-length values. + */ + private readonly cache: Record = + Object.create(null); + /** Caches the index of elements that have been named */ + private readonly indexFound: Record = Object.create(null); + + /** All bson elements in this document */ + private readonly elements: BSONElement[]; + + constructor( + /** BSON bytes, this document begins at offset */ + protected readonly bson: Uint8Array, + /** The start of the document */ + private readonly offset = 0, + /** If this is an embedded document, indicates if this was a BSON array */ + public readonly isArray = false + ) { + this.elements = parseToElementsToArray(this.bson, offset); + } + + /** Only supports basic latin strings */ + private isElementName(name: string, element: BSONElement): boolean { + const nameLength = element[BSONElementOffset.nameLength]; + const nameOffset = element[BSONElementOffset.nameOffset]; + + if (name.length !== nameLength) return false; + + for (let i = 0; i < name.length; i++) { + if (this.bson[nameOffset + i] !== name.charCodeAt(i)) return false; + } + + return true; + } + + /** + * Seeks into the elements array for an element matching the given name. + * + * @remarks + * Caching: + * - Caches the existence of a property making subsequent look ups for non-existent properties return immediately + * - Caches names mapped to elements to avoid reiterating the array and comparing the name again + * - Caches the index at which an element has been found to prevent rechecking against elements already determined to belong to another name + * + * @param name - a basic latin string name of a BSON element + * @returns + */ + private getElement(name: string): CachedBSONElement | null { + const cachedElement = this.cache[name]; + if (cachedElement === false) return null; + + if (cachedElement != null) { + return cachedElement; + } + + for (let index = 0; index < this.elements.length; index++) { + const element = this.elements[index]; + + // skip this element if it has already been associated with a name + if (!this.indexFound[index] && this.isElementName(name, element)) { + const cachedElement = { element, value: undefined }; + this.cache[name] = cachedElement; + this.indexFound[index] = true; + return cachedElement; + } + } + + this.cache[name] = false; + return null; + } + + /** + * Translates BSON bytes into a javascript value. Checking `as` against the BSON element's type + * this methods returns the small subset of BSON types that the driver needs to function. + * + * @remarks + * - BSONType.null and BSONType.undefined always return null + * - If the type requested does not match this returns null + * + * @param element - The element to revive to a javascript value + * @param as - A type byte expected to be returned + */ + private toJSValue(element: BSONElement, as: T): JSTypeOf[T]; + private toJSValue(element: BSONElement, as: keyof JSTypeOf): any { + const type = element[BSONElementOffset.type]; + const offset = element[BSONElementOffset.offset]; + const length = element[BSONElementOffset.length]; + + if (as !== type) { + return null; + } + + switch (as) { + case BSONType.null: + case BSONType.undefined: + return null; + case BSONType.double: + return getFloat64LE(this.bson, offset); + case BSONType.int: + return getInt32LE(this.bson, offset); + case BSONType.long: + return getBigInt64LE(this.bson, offset); + case BSONType.bool: + return Boolean(this.bson[offset]); + case BSONType.objectId: + return new ObjectId(this.bson.subarray(offset, offset + 12)); + case BSONType.timestamp: + return new Timestamp(getBigInt64LE(this.bson, offset)); + case BSONType.string: + return toUTF8(this.bson, offset + 4, offset + length - 1, false); + case BSONType.binData: { + const totalBinarySize = getInt32LE(this.bson, offset); + const subType = this.bson[offset + 4]; + + if (subType === 2) { + const subType2BinarySize = getInt32LE(this.bson, offset + 1 + 4); + if (subType2BinarySize < 0) + throw new BSONError('Negative binary type element size found for subtype 0x02'); + if (subType2BinarySize > totalBinarySize - 4) + throw new BSONError('Binary type with subtype 0x02 contains too long binary size'); + if (subType2BinarySize < totalBinarySize - 4) + throw new BSONError('Binary type with subtype 0x02 contains too short binary size'); + return new Binary( + this.bson.subarray(offset + 1 + 4 + 4, offset + 1 + 4 + 4 + subType2BinarySize), + 2 + ); + } + + return new Binary( + this.bson.subarray(offset + 1 + 4, offset + 1 + 4 + totalBinarySize), + subType + ); + } + case BSONType.date: + // Pretend this is correct. + return new Date(Number(getBigInt64LE(this.bson, offset))); + + case BSONType.object: + return new OnDemandDocument(this.bson, offset); + case BSONType.array: + return new OnDemandDocument(this.bson, offset, true); + + default: + throw new BSONError(`Unsupported BSON type: ${as}`); + } + } + + /** + * Checks for the existence of an element by name. + * + * @remarks + * Uses `getElement` with the expectation that will populate caches such that a `has` call + * followed by a `getElement` call will not repeat the cost paid by the first look up. + * + * @param name - element name + */ + public has(name: string): boolean { + const cachedElement = this.cache[name]; + if (cachedElement === false) return false; + if (cachedElement != null) return true; + return this.getElement(name) != null; + } + + /** + * Turns BSON element with `name` into a javascript value. + * + * @typeParam T - must be one of the supported BSON types determined by `JSTypeOf` this will determine the return type of this function. + * @param name - the element name + * @param as - the bson type expected + * @param required - whether or not the element is expected to exist, if true this function will throw if it is not present + */ + public get( + name: string, + as: T, + required?: false | undefined + ): JSTypeOf[T] | null; + + /** `required` will make `get` throw if name does not exist or is null/undefined */ + public get(name: string, as: T, required: true): JSTypeOf[T]; + + public get( + name: string, + as: T, + required?: boolean + ): JSTypeOf[T] | null { + const element = this.getElement(name); + if (element == null) { + if (required === true) { + throw new BSONError(`BSON element "${name}" is missing`); + } else { + return null; + } + } + + if (element.value == null) { + const value = this.toJSValue(element.element, as); + if (value == null) { + if (required === true) { + throw new BSONError(`BSON element "${name}" is missing`); + } else { + return null; + } + } + // It is important to never store null + element.value = value; + } + + return element.value; + } + + /** + * Supports returning int, double, long, and bool as javascript numbers + * + * @remarks + * **NOTE:** + * - Use this _only_ when you believe the potential precision loss of an int64 is acceptable + * - This method does not cache the result as Longs or booleans would be stored incorrectly + * + * @param name - element name + * @param required - throws if name does not exist + */ + public getNumber( + name: string, + required?: Req + ): Req extends true ? number : number | null; + public getNumber(name: string, required: boolean): number | null { + const maybeBool = this.get(name, BSONType.bool); + const bool = maybeBool == null ? null : maybeBool ? 1 : 0; + + const maybeLong = this.get(name, BSONType.long); + const long = maybeLong == null ? null : Number(maybeLong); + + const result = bool ?? long ?? this.get(name, BSONType.int) ?? this.get(name, BSONType.double); + + if (required === true && result == null) { + throw new BSONError(`BSON element "${name}" is missing`); + } + + return result; + } + + /** + * Deserialize this object, DOES NOT cache result so avoid multiple invocations + * @param options - BSON deserialization options + */ + public toObject(options?: BSONSerializeOptions): Record { + return BSON.deserialize(this.bson, { + ...options, + index: this.offset, + allowObjectSmallerThanBufferSize: true + }); + } + + /** + * Iterates through the elements of a document reviving them using the `as` BSONType. + * + * @param as - The type to revive all elements as + */ + public *valuesAs(as: T): Generator { + if (!this.isArray) { + throw new BSONError('Unexpected conversion of non-array value to array'); + } + let counter = 0; + for (const element of this.elements) { + const value = this.toJSValue(element, as); + this.cache[counter] = { element, value }; + yield value; + counter += 1; + } + } +} diff --git a/test/mongodb.ts b/test/mongodb.ts index b043818115f..35a2213da65 100644 --- a/test/mongodb.ts +++ b/test/mongodb.ts @@ -130,6 +130,7 @@ export * from '../src/cmap/metrics'; export * from '../src/cmap/stream_description'; export * from '../src/cmap/wire_protocol/compression'; export * from '../src/cmap/wire_protocol/constants'; +export * from '../src/cmap/wire_protocol/on_demand/document'; export * from '../src/cmap/wire_protocol/shared'; export * from '../src/collection'; export * from '../src/connection_string'; diff --git a/test/unit/cmap/wire_protocol/on_demand/document.test.ts b/test/unit/cmap/wire_protocol/on_demand/document.test.ts new file mode 100644 index 00000000000..100062dc3f2 --- /dev/null +++ b/test/unit/cmap/wire_protocol/on_demand/document.test.ts @@ -0,0 +1,297 @@ +import { expect } from 'chai'; + +import { + Binary, + BSON, + BSONError, + BSONType, + ObjectId, + OnDemandDocument, + Timestamp +} from '../../../../mongodb'; + +describe('class OnDemandDocument', () => { + context('when given an empty BSON sequence', () => { + it('sets exists cache to false for any key requested', () => { + const emptyDocument = BSON.serialize({}); + const doc = new OnDemandDocument(emptyDocument, 0, false); + expect(doc.has('ok')).to.be.false; + expect(doc.has('$clusterTime')).to.be.false; + expect(doc).to.have.nested.property('cache.ok', false); + expect(doc).to.have.nested.property('cache.$clusterTime', false); + }); + }); + + context('when given a BSON document with ok set to 1', () => { + it('sets exists cache to true for ok', () => { + const emptyDocument = BSON.serialize({ ok: 1 }); + const doc = new OnDemandDocument(emptyDocument, 0, false); + expect(doc.has('ok')).to.be.true; + expect(doc).to.have.nested.property('cache.ok').that.is.an('object'); + }); + + it('sets exists cache to false for any other key', () => { + const emptyDocument = BSON.serialize({ ok: 1 }); + const doc = new OnDemandDocument(emptyDocument, 0, false); + expect(doc.has('$clusterTime')).to.be.false; + expect(doc).to.have.nested.property('cache.$clusterTime', false); + }); + }); + + context('when given a BSON document with ok set to 0 and code set to 2', () => { + it('tracks element position when finding match', () => { + const emptyDocument = BSON.serialize({ ok: 0, code: 2 }); + const doc = new OnDemandDocument(emptyDocument, 0, false); + expect(doc.has('code')).to.be.true; + expect(doc).to.have.nested.property('cache.code').that.is.an('object'); + expect(doc).to.not.have.nested.property('indexFound.0'); + expect(doc).to.have.nested.property('indexFound.1', true); + }); + }); + + context('toObject()', () => { + it('returns the results of calling BSON.deserialize on the document bytes', () => { + const offsetDocument = new Uint8Array([0, 0, 0, ...BSON.serialize({ ok: 0, code: 2 })]); + const doc = new OnDemandDocument(offsetDocument, 3, false); + expect(doc.toObject()).to.deep.equal( + BSON.deserialize(offsetDocument, { index: 3, allowObjectSmallerThanBufferSize: true }) + ); + }); + + it('supports BSON options', () => { + const offsetDocument = new Uint8Array([0, 0, 0, ...BSON.serialize({ ok: 0, code: 2 })]); + const doc = new OnDemandDocument(offsetDocument, 3, false); + expect(doc.toObject({ promoteValues: false })).to.deep.equal( + BSON.deserialize(offsetDocument, { + index: 3, + allowObjectSmallerThanBufferSize: true, + promoteValues: false + }) + ); + }); + }); + + context('get()', () => { + let document: OnDemandDocument; + const input = { + int: 1, + double: 1.2, + long: 2n, + timestamp: new Timestamp(2n), + binData: new Binary(Uint8Array.from([1, 2, 3]), 3), + binDataSubtype2: new Binary(Uint8Array.from([1, 2, 3]), 2), + bool: true, + objectId: new ObjectId('01'.repeat(12)), + string: 'abc', + date: new Date(0), + object: { a: 1 }, + array: [1, 2], + unsupportedType: /abc/ + }; + + beforeEach(async function () { + const bytes = BSON.serialize(input); + document = new OnDemandDocument(bytes); + }); + + it('returns null if the element does not exist', () => { + expect(document.get('blah', BSONType.bool)).to.be.null; + }); + + it('returns the javascript value matching the as parameter', () => { + expect(document.get('bool', BSONType.bool)).to.be.true; + }); + + it('returns null if the BSON value mismatches the requested type', () => { + expect(document.get('bool', BSONType.int)).to.be.null; + }); + + it('supports requesting multiple types', () => { + expect( + document.get('bool', BSONType.int) ?? + document.get('bool', BSONType.long) ?? + document.get('bool', BSONType.bool) + ).to.be.true; + }); + + it('throws if required is set to true and element name does not exist', () => { + expect(() => document.get('blah!', BSONType.bool, true)).to.throw(BSONError); + expect(document).to.have.nested.property('cache.blah!', false); + }); + + it('throws if requested type is unsupported', () => { + expect(() => { + // @ts-expect-error: checking a bad BSON type + document.get('unsupportedType', BSONType.regex); + }).to.throw(BSONError, /unsupported/i); + }); + + it('caches the value', () => { + document.has('int'); + expect(document).to.have.nested.property('cache.int.value', undefined); + document.get('int', BSONType.int); + expect(document).to.have.nested.property('cache.int.value', 1); + }); + + it('supports returning null for null and undefined bson elements', () => { + const bson = Uint8Array.from([ + ...[11, 0, 0, 0], // doc size + ...[6, 97, 0], // a: undefined (6) + ...[10, 98, 0], // b: null (10) + 0 // doc term + ]); + const document = new OnDemandDocument(bson, 0, false); + expect(document.get('a', BSONType.undefined)).to.be.null; + expect(document.get('b', BSONType.null)).to.be.null; + }); + + it('supports returning int', () => { + expect(document.get('int', BSONType.int, true)).to.deep.equal(input.int); + }); + + it('supports returning double', () => { + expect(document.get('double', BSONType.double, true)).to.deep.equal(input.double); + }); + + it('supports returning long', () => { + expect(document.get('long', BSONType.long, true)).to.deep.equal(input.long); + }); + + it('supports returning timestamp', () => { + expect(document.get('timestamp', BSONType.timestamp, true)).to.deep.equal(input.timestamp); + }); + + it('supports returning binData', () => { + expect(document.get('binData', BSONType.binData, true)).to.deep.equal(input.binData); + }); + + it('supports returning binData, subtype 2', () => { + expect(document.get('binDataSubtype2', BSONType.binData, true)).to.deep.equal( + input.binDataSubtype2 + ); + }); + + it('supports returning bool', () => { + expect(document.get('bool', BSONType.bool, true)).to.deep.equal(input.bool); + }); + + it('supports returning objectId', () => { + expect(document.get('objectId', BSONType.objectId, true)).to.deep.equal(input.objectId); + }); + + it('supports returning string', () => { + expect(document.get('string', BSONType.string, true)).to.deep.equal(input.string); + }); + + it('supports returning date', () => { + expect(document.get('date', BSONType.date, true)).to.deep.equal(input.date); + }); + + it('supports returning object', () => { + const o = document.get('object', BSONType.object, true); + expect(o).to.be.instanceOf(OnDemandDocument); + expect(o.has('a')).to.be.true; + }); + + it('supports returning array', () => { + const a = document.get('array', BSONType.array, true); + expect(a).to.be.instanceOf(OnDemandDocument); + expect(a.has('0')).to.be.true; + expect(a.has('1')).to.be.true; + }); + }); + + context('getNumber()', () => { + let document: OnDemandDocument; + const input = { + int: 1, + long: 2n, + double: 2.3, + bool: false, + boolTrue: true, + string: 'abc' + }; + + beforeEach(async function () { + const bytes = BSON.serialize(input); + document = new OnDemandDocument(bytes); + }); + + it('throws if required is set to true and element name does not exist', () => { + expect(() => document.getNumber('blah!', true)).to.throw(BSONError); + }); + + it('throws if required is set to true and element is not numeric', () => { + // just making sure this test does not fail for the non-exist reason + expect(document.has('string')).to.be.true; + expect(() => { + document.getNumber('string', true); + }).to.throw(BSONError); + }); + + it('returns null if required is set to false and element does not exist', () => { + expect(document.getNumber('blah!', false)).to.be.null; + expect(document.getNumber('blah!')).to.be.null; + }); + + it('returns null if required is set to false and element is not numeric', () => { + // just making sure this test does not fail for the non-exist reason + expect(document.has('string')).to.be.true; + + expect(document.getNumber('string', false)).to.be.null; + expect(document.getNumber('string')).to.be.null; + }); + + it('supports parsing int', () => { + expect(document.getNumber('int')).to.equal(1); + }); + + it('supports parsing long', () => { + expect(document.getNumber('long')).to.equal(2); + }); + + it('supports parsing double', () => { + expect(document.getNumber('double')).to.equal(2.3); + }); + + it('supports parsing bool', () => { + expect(document.getNumber('bool')).to.equal(0); + expect(document.getNumber('boolTrue')).to.equal(1); + }); + }); + + context('*valuesAs()', () => { + let array: OnDemandDocument; + beforeEach(async function () { + const bytes = BSON.serialize( + Object.fromEntries(Array.from({ length: 10 }, () => 1).entries()) + ); + array = new OnDemandDocument(bytes, 0, true); + }); + + it('throws if document is not an array', () => { + const bytes = BSON.serialize( + Object.fromEntries(Array.from({ length: 10 }, () => 1).entries()) + ); + array = new OnDemandDocument(bytes, 0, false); + expect(() => array.valuesAs(BSONType.int).next()).to.throw(); + }); + + it('returns a generator that yields values matching the as BSONType parameter', () => { + let didRun = false; + for (const item of array.valuesAs(BSONType.int)) { + didRun = true; + expect(item).to.equal(1); + } + expect(didRun).to.be.true; + }); + + it('caches the results of array', () => { + const generator = array.valuesAs(BSONType.int); + generator.next(); + generator.next(); + expect(array).to.have.nested.property('cache.0.value', 1); + expect(array).to.have.nested.property('cache.1.value', 1); + }); + }); +});