We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent da63274 commit ef5b9aaCopy full SHA for ef5b9aa
vllm/config/__init__.py
@@ -617,9 +617,10 @@ def _set_cudagraph_sizes(self):
617
not self.model_config.enforce_eager:
618
cuda_graph_sizes = self.scheduler_config.cuda_graph_sizes
619
if len(cuda_graph_sizes) == 1:
620
- batch_size_capture_list = [1, 2, 4] + [
621
- i for i in range(8, cuda_graph_sizes[0] + 1, 8)
622
- ]
+ max_graph_size = cuda_graph_sizes[0]
+ batch_size_capture_list = [
+ i for i in [1, 2, 4] if i <= max_graph_size
623
+ ] + [i for i in range(8, max_graph_size + 1, 8)]
624
elif len(cuda_graph_sizes) > 1:
625
batch_size_capture_list = sorted(cuda_graph_sizes)
626
else:
0 commit comments