Skip to content

Commit 9c7804c

Browse files
fzollKennethEnevoldsenSamoed
authored
fix: add voyage quantization models (#3092)
* Adding quantization support * Update mteb/models/voyage_models.py Co-authored-by: Kenneth Enevoldsen <[email protected]> * Update mteb/model_meta.py Co-authored-by: Roman Solomatin <[email protected]> * Update mteb/model_meta.py Co-authored-by: Roman Solomatin <[email protected]> * Simplifying the quantization/output_dtype * Update mteb/model_meta.py Co-authored-by: Kenneth Enevoldsen <[email protected]> --------- Co-authored-by: Kenneth Enevoldsen <[email protected]> Co-authored-by: Roman Solomatin <[email protected]>
1 parent 652ff2b commit 9c7804c

File tree

1 file changed

+88
-1
lines changed

1 file changed

+88
-1
lines changed

mteb/models/voyage_models.py

Lines changed: 88 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@
1616
# synthetic data
1717
}
1818

19+
# The missing values are translated to themselves
20+
VOYAGE_DTYPE_TRANSLATION = {
21+
"float32": "float",
22+
"bf16": "float",
23+
}
24+
1925
# Total token limits per model based on VoyageAI documentation
2026
VOYAGE_TOTAL_TOKEN_LIMITS = {
2127
"voyage-3.5-lite": 1_000_000,
@@ -95,6 +101,7 @@ def __init__(
95101
max_tpm: int = 1_000_000,
96102
max_tokens: int | None = None,
97103
model_prompts: dict[str, str] | None = None,
104+
output_dtype: str | None = None,
98105
**kwargs,
99106
) -> None:
100107
requires_package(self, "voyageai", model_name, "pip install 'mteb[voyageai]'")
@@ -106,6 +113,7 @@ def __init__(
106113
self._max_tpm = max_tpm
107114
self._max_tokens = max_tokens
108115
self.model_prompts = self.validate_task_to_prompt_name(model_prompts)
116+
self.output_dtype = output_dtype
109117

110118
def _calculate_default_batch_size(self) -> int:
111119
"""Calculate the default batch size based on total token limit and context length.
@@ -143,6 +151,10 @@ def _batched_encode(
143151
) -> np.ndarray:
144152
embeddings, index = [], 0
145153

154+
output_dtype = VOYAGE_DTYPE_TRANSLATION.get(
155+
self.output_dtype, self.output_dtype
156+
)
157+
146158
while index < len(sentences):
147159
batch, batch_tokens = [], 0
148160
while (
@@ -164,10 +176,31 @@ def _batched_encode(
164176
texts=batch,
165177
model=self._model_name,
166178
input_type=input_type,
179+
output_dtype=output_dtype,
167180
).embeddings
168181
)
169182

170-
return np.array(embeddings)
183+
embeddings_array = np.array(embeddings)
184+
185+
if output_dtype == "binary":
186+
# Unpack bit-packed embeddings: each byte contains 8 embedding values
187+
unpacked_embeddings = []
188+
for embedding in embeddings_array:
189+
# Convert bytes to bits and unpack
190+
unpacked = []
191+
for byte_val in embedding:
192+
# Extract 8 bits from each byte (LSB first)
193+
for bit_pos in range(8):
194+
bit_val = (byte_val >> bit_pos) & 1
195+
# Convert 0/1 to -1/1 for binary (signed)
196+
unpacked.append(1.0 if bit_val else -1.0)
197+
unpacked_embeddings.append(unpacked)
198+
embeddings_array = np.array(unpacked_embeddings, dtype=np.float32)
199+
elif output_dtype != "float":
200+
# Convert int8/uint8 embeddings to float32
201+
embeddings_array = embeddings_array.astype(np.float32)
202+
203+
return embeddings_array
171204

172205

173206
model_prompts = {
@@ -201,6 +234,60 @@ def _batched_encode(
201234
public_training_data=None,
202235
)
203236

237+
voyage_3_5_int8 = ModelMeta(
238+
name="voyageai/voyage-3.5 (output_dtype=int8)",
239+
revision="1",
240+
release_date="2025-01-21",
241+
languages=None, # supported languages not specified
242+
loader=partial(
243+
VoyageWrapper,
244+
model_name="voyage-3.5",
245+
model_prompts=model_prompts,
246+
output_dtype="int8",
247+
),
248+
max_tokens=32000,
249+
embed_dim=1024,
250+
open_weights=False,
251+
n_parameters=None,
252+
memory_usage_mb=None,
253+
license=None,
254+
reference="https://docs.voyageai.com/docs/flexible-dimensions-and-quantization",
255+
similarity_fn_name="cosine",
256+
framework=["API"],
257+
use_instructions=True,
258+
training_datasets=VOYAGE_TRAINING_DATA,
259+
public_training_code=None,
260+
public_training_data=None,
261+
adapted_from="voyageai/voyage-3.5",
262+
)
263+
264+
voyage_3_5_binary = ModelMeta(
265+
name="voyageai/voyage-3.5 (output_dtype=binary)",
266+
revision="1",
267+
release_date="2025-01-21",
268+
languages=None, # supported languages not specified
269+
loader=partial(
270+
VoyageWrapper,
271+
model_name="voyage-3.5",
272+
model_prompts=model_prompts,
273+
output_dtype="binary",
274+
),
275+
max_tokens=32000,
276+
embed_dim=1024, # Same as original after unpacking from bits
277+
open_weights=False,
278+
n_parameters=None,
279+
memory_usage_mb=None,
280+
license=None,
281+
reference="https://docs.voyageai.com/docs/flexible-dimensions-and-quantization",
282+
similarity_fn_name="cosine",
283+
framework=["API"],
284+
use_instructions=True,
285+
training_datasets=VOYAGE_TRAINING_DATA,
286+
public_training_code=None,
287+
public_training_data=None,
288+
adapted_from="voyageai/voyage-3.5",
289+
)
290+
204291
voyage_large_2_instruct = ModelMeta(
205292
name="voyageai/voyage-large-2-instruct",
206293
revision="1",

0 commit comments

Comments
 (0)