Skip to content

fix(NODE-6735, NODE-6711): add BSON vector validation to EJSON stringification, serialization and conversion to native types #748

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Apr 3, 2025
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions src/binary.ts
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,8 @@ export class Binary extends BSONValue {
throw new BSONError('Binary datatype field is not Int8');
}

validateBinaryVector(this);

return new Int8Array(
this.buffer.buffer.slice(this.buffer.byteOffset + 2, this.buffer.byteOffset + this.position)
);
Expand All @@ -361,6 +363,8 @@ export class Binary extends BSONValue {
throw new BSONError('Binary datatype field is not Float32');
}

validateBinaryVector(this);

const floatBytes = new Uint8Array(
this.buffer.buffer.slice(this.buffer.byteOffset + 2, this.buffer.byteOffset + this.position)
);
Expand All @@ -387,6 +391,8 @@ export class Binary extends BSONValue {
throw new BSONError('Binary datatype field is not packed bit');
}

validateBinaryVector(this);

return new Uint8Array(
this.buffer.buffer.slice(this.buffer.byteOffset + 2, this.buffer.byteOffset + this.position)
);
Expand All @@ -409,6 +415,8 @@ export class Binary extends BSONValue {
throw new BSONError('Binary datatype field is not packed bit');
}

validateBinaryVector(this);

const byteCount = this.length() - 2;
const bitCount = byteCount * 8 - this.buffer[1];
const bits = new Int8Array(bitCount);
Expand Down Expand Up @@ -517,6 +525,12 @@ export function validateBinaryVector(vector: Binary): void {
throw new BSONError('Invalid Vector: padding must be zero for int8 and float32 vectors');
}

if (datatype === Binary.VECTOR_TYPE.Float32) {
if (size !== 0 && size - 2 !== 0 && (size - 2) % 4 !== 0) {
throw new BSONError('Invalid Vector: Float32 vector must contain a multiple of 4 bytes');
}
}

if (datatype === Binary.VECTOR_TYPE.PackedBit && padding !== 0 && size === 2) {
throw new BSONError(
'Invalid Vector: padding must be zero for packed bit vectors that are empty'
Expand Down
224 changes: 159 additions & 65 deletions test/node/bson_binary_vector.spec.test.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import * as util from 'util';
import * as fs from 'fs';
import * as path from 'path';
import { BSON, BSONError, Binary } from '../register-bson';
import { BSON, BSONError, Binary, EJSON } from '../register-bson';
import { expect } from 'chai';

const { toHex, fromHex } = BSON.onDemand.ByteUtils;

type VectorHexType = '0x03' | '0x27' | '0x10';
type VectorTest = {
description: string;
vector: (number | string)[];
vector?: number[];
valid: boolean;
dtype_hex: VectorHexType;
padding?: number;
Expand All @@ -17,15 +18,11 @@ type VectorTest = {
type VectorSuite = { description: string; test_key: string; tests: VectorTest[] };

function fixFloats(f: string | number): number {
// Should be nothing to "fix" but validates we didn't get
// an unexpected type so we don't silently fail on it during the test
if (typeof f === 'number') {
return f;
}
if (f === 'inf') {
return Infinity;
}
if (f === '-inf') {
return -Infinity;
}
throw new Error(`test format error: unknown float value: ${f}`);
}

Expand All @@ -49,7 +46,20 @@ function fixBits(f: number | string): number {
return f;
}

function make(vector: (number | string)[], dtype_hex: VectorHexType, padding?: number): Binary {
function dtypeToHelper(dtype_hex: string) {
switch (dtype_hex) {
case '0x10' /* packed_bit */:
return 'fromPackedBits';
case '0x03' /* int8 */:
return 'fromInt8Array';
case '0x27' /* float32 */:
return 'fromFloat32Array';
default:
throw new Error(`Unknown dtype_hex: ${dtype_hex}`);
}
}

function make(vector: number[], dtype_hex: VectorHexType, padding?: number): Binary {
let binary: Binary;
switch (dtype_hex) {
case '0x10' /* packed_bit */:
Expand Down Expand Up @@ -87,21 +97,131 @@ const invalidTestExpectedError = new Map()
'Invalid Vector: padding must be a value between 0 and 7'
)
.set('Negative padding PACKED_BIT', 'Invalid Vector: padding must be a value between 0 and 7')
// skipped
.set('Overflow Vector PACKED_BIT', false)
.set('Underflow Vector PACKED_BIT', false)
.set('Overflow Vector INT8', false)
.set('Underflow Vector INT8', false)
.set('INT8 with float inputs', false)
// duplicate test! but also skipped.
.set('Vector with float values PACKED_BIT', false)
.set('Vector with float values PACKED_BIT', false);
.set(
'Insufficient vector data FLOAT32',
'Invalid Vector: Float32 vector must contain a multiple of 4 bytes'
)
// These are not possible given the constraints of the input types allowed:
// our helpers will throw an "unsupported_error" for these
.set('Overflow Vector PACKED_BIT', 'unsupported_error')
.set('Underflow Vector PACKED_BIT', 'unsupported_error')
.set('Overflow Vector INT8', 'unsupported_error')
.set('Underflow Vector INT8', 'unsupported_error')
.set('INT8 with float inputs', 'unsupported_error')
.set('Vector with float values PACKED_BIT', 'unsupported_error');

function testVectorInvalidInputValues(test: VectorTest, expectedErrorMessage: string) {
describe('when creating a BSON Vector given invalid input values', () => {
it(`either BSON.serialize() or Binary.${dtypeToHelper(test.dtype_hex)}() throws a BSONError`, function () {
let thrownError: Error | undefined;
try {
const bin = make(test.vector!, test.dtype_hex, test.padding);
BSON.serialize({ bin });
} catch (error) {
thrownError = error;
}

if (thrownError?.message.startsWith('unsupported_error')) {
expect(
expectedErrorMessage,
'We expect a certain error message but got an unsupported error'
).to.equal('unsupported_error');
} else {
expect(thrownError, thrownError?.stack).to.be.instanceOf(BSONError);
expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage));
}
});

it(`either EJSON.stringify() or Binary.${dtypeToHelper(test.dtype_hex)}() throws a BSONError`, function () {
let thrownError: Error | undefined;
try {
const bin = make(test.vector!, test.dtype_hex, test.padding);
BSON.EJSON.stringify({ bin });
} catch (error) {
thrownError = error;
}

if (thrownError?.message.startsWith('unsupported_error')) {
expect(
expectedErrorMessage,
'We expect a certain error message but got an unsupported error'
).to.equal('unsupported_error');
} else {
expect(thrownError, thrownError?.stack).to.be.instanceOf(BSONError);
expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage));
}
});
});
}

function testVectorInvalidBSONBytes(test: VectorTest, expectedErrorMessage: string) {
describe('when creating a Binary Vector instance from invalid bytes', () => {
it(`BSON.serialize() throw a BSONError`, function () {
let thrownError: Error | undefined;
const bin = BSON.deserialize(Buffer.from(test.canonical_bson!, 'hex'));

try {
BSON.serialize(bin);
} catch (error) {
thrownError = error;
}

expect(thrownError, thrownError?.stack).to.be.instanceOf(BSONError);
expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage));
});

const toHelper = dtypeToHelper(test.dtype_hex).replace('from', 'to');
it(`Binary.${toHelper}() throw a BSONError`, function () {
let thrownError: Error | undefined;
const bin = BSON.deserialize(Buffer.from(test.canonical_bson!, 'hex'));

try {
bin.vector[toHelper]();
} catch (error) {
thrownError = error;
}

expect(thrownError, thrownError?.stack).to.be.instanceOf(BSONError);
expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage));
});

if (toHelper === 'toPackedBits') {
it(`Binary.toBits() throw a BSONError`, function () {
let thrownError: Error | undefined;
const bin = BSON.deserialize(Buffer.from(test.canonical_bson!, 'hex'));

try {
bin.vector.toBits();
} catch (error) {
thrownError = error;
}

expect(thrownError, thrownError?.stack).to.be.instanceOf(BSONError);
expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage));
});
}

it(`EJSON.stringify() throw a BSONError`, function () {
let thrownError: Error | undefined;
const bin = BSON.deserialize(Buffer.from(test.canonical_bson!, 'hex'));

try {
EJSON.stringify(bin);
} catch (error) {
thrownError = error;
}

expect(thrownError, thrownError?.stack).to.be.instanceOf(BSONError);
expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage));
});
});
}

describe('BSON Binary Vector spec tests', () => {
const tests: Record<string, VectorSuite> = Object.create(null);

for (const file of fs.readdirSync(path.join(__dirname, 'specs/bson-binary-vector'))) {
tests[path.basename(file, '.json')] = JSON.parse(
tests[path.basename(file, '.json')] = EJSON.parse(
fs.readFileSync(path.join(__dirname, 'specs/bson-binary-vector', file), 'utf8')
);
}
Expand All @@ -120,20 +240,22 @@ describe('BSON Binary Vector spec tests', () => {
* > MUST assert that the input float array is the same after encoding and decoding.
*/
for (const test of valid) {
it(`encode ${test.description}`, function () {
const bin = make(test.vector, test.dtype_hex, test.padding);
describe(test.description, () => {
it(`calling Binary.${dtypeToHelper(test.dtype_hex)}() with input numbers and serializing it does not throw`, function () {
const bin = make(test.vector!, test.dtype_hex, test.padding);

const buffer = BSON.serialize({ [suite.test_key]: bin });
expect(toHex(buffer)).to.equal(test.canonical_bson!.toLowerCase());
});
const buffer = BSON.serialize({ [suite.test_key]: bin });
expect(toHex(buffer)).to.equal(test.canonical_bson!.toLowerCase());
});

it(`decode ${test.description}`, function () {
const canonical_bson = fromHex(test.canonical_bson!.toLowerCase());
const doc = BSON.deserialize(canonical_bson);
it(`creating a Binary instance from BSON bytes does not throw`, function () {
const canonical_bson = fromHex(test.canonical_bson!.toLowerCase());
const doc = BSON.deserialize(canonical_bson);

expect(doc[suite.test_key].sub_type).to.equal(0x09);
expect(doc[suite.test_key].buffer[0]).to.equal(+test.dtype_hex);
expect(doc[suite.test_key].buffer[1]).to.equal(test.padding);
expect(doc[suite.test_key].sub_type).to.equal(0x09);
expect(doc[suite.test_key].buffer[0]).to.equal(+test.dtype_hex);
expect(doc[suite.test_key].buffer[1]).to.equal(test.padding);
});
});
}
});
Expand All @@ -147,46 +269,18 @@ describe('BSON Binary Vector spec tests', () => {
for (const test of invalid) {
const expectedErrorMessage = invalidTestExpectedError.get(test.description);

it(`bson: ${test.description}`, function () {
let thrownError: Error | undefined;
try {
const bin = make(test.vector, test.dtype_hex, test.padding);
BSON.serialize({ bin });
} catch (error) {
thrownError = error;
describe(test.description, () => {
if (test.canonical_bson != null) {
testVectorInvalidBSONBytes(test, expectedErrorMessage);
}

if (thrownError?.message.startsWith('unsupported_error')) {
expect(
expectedErrorMessage,
'We expect a certain error message but got an unsupported error'
).to.be.false;
this.skip();
if (test.vector != null) {
testVectorInvalidInputValues(test, expectedErrorMessage);
}

expect(thrownError).to.be.instanceOf(BSONError);
expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage));
});

it(`extended json: ${test.description}`, function () {
let thrownError: Error | undefined;
try {
const bin = make(test.vector, test.dtype_hex, test.padding);
BSON.EJSON.stringify({ bin });
} catch (error) {
thrownError = error;
if (test.vector == null && test.canonical_bson == null) {
throw new Error('not testing anything for: ' + util.inspect(test));
}

if (thrownError?.message.startsWith('unsupported_error')) {
expect(
expectedErrorMessage,
'We expect a certain error message but got an unsupported error'
).to.be.false;
this.skip();
}

expect(thrownError).to.be.instanceOf(BSONError);
expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage));
});
}
});
Expand Down
20 changes: 17 additions & 3 deletions test/node/specs/bson-binary-vector/float32.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
{
"description": "Infinity Vector FLOAT32",
"valid": true,
"vector": ["-inf", 0.0, "inf"],
"vector": [{"$numberDouble": "-Infinity"}, 0.0, {"$numberDouble": "Infinity"} ],
"dtype_hex": "0x27",
"dtype_alias": "FLOAT32",
"padding": 0,
Expand All @@ -44,8 +44,22 @@
"vector": [127.0, 7.0],
"dtype_hex": "0x27",
"dtype_alias": "FLOAT32",
"padding": 3
"padding": 3,
"canonical_bson": "1C00000005766563746F72000A0000000927030000FE420000E04000"
},
{
"description": "Insufficient vector data with 3 bytes FLOAT32",
"valid": false,
"dtype_hex": "0x27",
"dtype_alias": "FLOAT32",
"canonical_bson": "1700000005766563746F7200050000000927002A2A2A00"
},
{
"description": "Insufficient vector data with 5 bytes FLOAT32",
"valid": false,
"dtype_hex": "0x27",
"dtype_alias": "FLOAT32",
"canonical_bson": "1900000005766563746F7200070000000927002A2A2A2A2A00"
}
]
}

4 changes: 2 additions & 2 deletions test/node/specs/bson-binary-vector/int8.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@
"vector": [127, 7],
"dtype_hex": "0x03",
"dtype_alias": "INT8",
"padding": 3
"padding": 3,
"canonical_bson": "1600000005766563746F7200040000000903037F0700"
},
{
"description": "INT8 with float inputs",
Expand All @@ -54,4 +55,3 @@
}
]
}

Loading