Skip to content

Commit ce47c41

Browse files
authored
Merge pull request #125 from runpod-workers/up-0.6.3
update vllm
2 parents ae56b9f + c03ecc4 commit ce47c41

File tree

2 files changed

+3
-2
lines changed

2 files changed

+3
-2
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
1212
python3 -m pip install --upgrade -r /requirements.txt
1313

1414
# Install vLLM (switching back to pip installs since issues that required building fork are fixed and space optimization is not as important since caching) and FlashInfer
15-
RUN python3 -m pip install vllm==0.6.2 && \
15+
RUN python3 -m pip install vllm==0.6.3 && \
1616
python3 -m pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3
1717

1818
# Setup for Option 2: Building the Image with the Model included

src/engine_args.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,8 @@
8888
"typical_acceptance_sampler_posterior_alpha": float(os.getenv('TYPICAL_ACCEPTANCE_SAMPLER_POSTERIOR_ALPHA', 0)) or None,
8989
"qlora_adapter_name_or_path": os.getenv('QLORA_ADAPTER_NAME_OR_PATH', None),
9090
"disable_logprobs_during_spec_decoding": os.getenv('DISABLE_LOGPROBS_DURING_SPEC_DECODING', None),
91-
"otlp_traces_endpoint": os.getenv('OTLP_TRACES_ENDPOINT', None)
91+
"otlp_traces_endpoint": os.getenv('OTLP_TRACES_ENDPOINT', None),
92+
"use_v2_block_manager": os.getenv('USE_V2_BLOCK_MANAGER', 'true')
9293
}
9394

9495
def match_vllm_args(args):

0 commit comments

Comments
 (0)