Skip to content

Commit dd7bfc8

Browse files
committed
try to use default block_size
Signed-off-by: lizhiyuan <[email protected]>
1 parent d865f00 commit dd7bfc8

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

vllm/v1/worker/gpu_model_runner.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3695,7 +3695,8 @@ def _select_common_block_size(self, kv_manager_block_size: int,
36953695
attn_groups: List of attention groups
36963696
36973697
Returns:
3698-
Largest block size supported by all backends
3698+
Block size supported by all backends,
3699+
prioritizing cache_config.block_size
36993700
37003701
Raises:
37013702
ValueError: If no common block size found
@@ -3717,6 +3718,9 @@ def _select_common_block_size(self, kv_manager_block_size: int,
37173718
f"{sorted(supported)}. ")
37183719
raise ValueError(error_msg)
37193720

3721+
if self.cache_config.block_size in common_supported_sizes:
3722+
return self.cache_config.block_size
3723+
37203724
return max(common_supported_sizes)
37213725

37223726
def may_reinitialize_input_batch(self,

0 commit comments

Comments
 (0)