Skip to content

Commit b42c222

Browse files
authored
Merge pull request #101 from coreweave/jp/testing/slim-vllm-image
feat(vllm-tensorizer): Optimize Multi-Stage Build for Slimmer Inference Image
2 parents 25af34a + b726efb commit b42c222

File tree

3 files changed

+12
-7
lines changed

3 files changed

+12
-7
lines changed

.github/configurations/vllm-tensorizer.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,7 @@ vllm-commit:
22
- 'b6553be1bc75f046b00046a4ad7576364d03c835'
33
flashinfer-commit:
44
- 'v0.2.6.post1'
5-
base-image:
6-
- 'ghcr.io/coreweave/ml-containers/torch-extras:es-compute-12.0-67208ca-nccl-cuda12.9.0-ubuntu22.04-nccl2.27.3-1-torch2.7.1-vision0.22.1-audio2.7.1-abi1'
5+
builder-base-image:
6+
- 'ghcr.io/coreweave/ml-containers/torch-extras:es-cuda-12.9.1-74755e9-nccl-cuda12.9.1-ubuntu22.04-nccl2.27.5-1-torch2.7.1-vision0.22.1-audio2.7.1-abi1'
7+
final-base-image:
8+
- 'ghcr.io/coreweave/ml-containers/torch-extras:es-cuda-12.9.1-74755e9-base-cuda12.9.1-ubuntu22.04-torch2.7.1-vision0.22.1-audio2.7.1-abi1'

.github/workflows/vllm-tensorizer.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,5 @@ jobs:
2525
build-args: |
2626
VLLM_COMMIT=${{ matrix.vllm-commit }}
2727
FLASHINFER_COMMIT=${{ matrix.flashinfer-commit }}
28-
BASE_IMAGE=${{ matrix.base-image }}
28+
BUILDER_BASE_IMAGE=${{ matrix.builder-base-image }}
29+
FINAL_BASE_IMAGE=${{ matrix.final-base-image }}

vllm-tensorizer/Dockerfile

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1-
ARG BASE_IMAGE="ghcr.io/coreweave/ml-containers/torch-extras:es-compute-12.0-67208ca-nccl-cuda12.9.0-ubuntu22.04-nccl2.27.3-1-torch2.7.1-vision0.22.1-audio2.7.1-abi1"
1+
ARG BUILDER_BASE_IMAGE="ghcr.io/coreweave/ml-containers/torch-extras:es-cuda-12.9.1-74755e9-nccl-cuda12.9.1-ubuntu22.04-nccl2.27.5-1-torch2.7.1-vision0.22.1-audio2.7.1-abi1"
2+
ARG FINAL_BASE_IMAGE="ghcr.io/coreweave/ml-containers/torch-extras:es-cuda-12.9.1-74755e9-base-cuda12.9.1-ubuntu22.04-torch2.7.1-vision0.22.1-audio2.7.1-abi1"
3+
24
FROM scratch AS freezer
35
WORKDIR /
46
COPY --chmod=755 freeze.sh /
57

6-
FROM ${BASE_IMAGE} AS builder-base
8+
FROM ${BUILDER_BASE_IMAGE} AS builder-base
79

8-
ARG MAX_JOBS="16"
10+
ARG MAX_JOBS="32"
911

1012
RUN ldconfig
1113

@@ -81,7 +83,7 @@ RUN --mount=type=bind,from=flashinfer-downloader,source=/git/flashinfer,target=/
8183
WORKDIR /wheels
8284

8385

84-
FROM ${BASE_IMAGE} AS base
86+
FROM ${FINAL_BASE_IMAGE} AS base
8587

8688
WORKDIR /workspace
8789

0 commit comments

Comments
 (0)