fix(build): Pin cuda-python>=12,<13 to avoid trtllm breakage (#2379)

rmccorm4 · web-flow · commit 73bcc3b040b7 · 2025-08-09T00:00:59.000+01:00
diff --git a/container/Dockerfile.tensorrt_llm b/container/Dockerfile.tensorrt_llm
@@ -501,8 +501,12 @@ COPY --from=dev /workspace/target/release/metrics /usr/local/bin/metrics
 # NOTE: If a package (tensorrt_llm) exists on both --index-url and --extra-index-url,
 # uv will prioritize the --extra-index-url, unless --index-strategy unsafe-best-match
 # is also specified. So set the configurable index as a --extra-index-url for prioritization.
-# locking triton version to 3.3.1 as 3.4.0 breaks tensorrt-llm 1.0.0rc4
-RUN uv pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}" && \
+# NOTE: locking triton version to 3.3.1 as 3.4.0 breaks tensorrt-llm 1.0.0rc4
+# NOTE: locking cuda-python version to <13 to avoid breaks with tensorrt-llm 1.0.0rc4. This
+#       can be removed after https://github.com/NVIDIA/TensorRT-LLM/pull/6703 is merged
+#       we upgrade to a published pip wheel containing this change.
+RUN uv pip install "cuda-python>=12,<13" && \
+    uv pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}" && \
     if [ "$ARCH" = "amd64" ]; then \
         pip install "triton==3.3.1"; \
     fi; \