Skip to content

Commit 723d460

Browse files
authored
[Bugfix] fix kv nz accuracy bug (#2988)
when `enable_kv_nz` is true, output of Deepseek R1 is invalid. - vLLM version: v0.10.2 - vLLM main: vllm-project/vllm@2b85697 Signed-off-by: realliujiaxu <[email protected]>
1 parent 8bcc0cc commit 723d460

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

vllm_ascend/attention/mla_v1.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -789,7 +789,7 @@ def exec_kv_prefill(
789789
# npu_kv_rmsnorm_rope_cache needs [B, N, S, D]
790790
kv_no_split = kv_no_split.view(
791791
B, N, S, self.kv_lora_rank + self.qk_rope_head_dim)
792-
cache_mode = "PA_BLK_NZ" if self.enable_kv_nz else "PA"
792+
cache_mode = "PA_NZ" if self.enable_kv_nz else "PA"
793793
_, _, k_pe, k_nope = torch_npu.npu_kv_rmsnorm_rope_cache(
794794
kv_no_split,
795795
self.kv_a_layernorm.weight,

vllm_ascend/torchair/torchair_mla.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1006,7 +1006,7 @@ def exec_kv_prefill(
10061006
kv = self.kv_a_proj_with_mqa(hidden_states)[0]
10071007
# npu_kv_rmsnorm_rope_cache needs [B, N, S, D]
10081008
kv = kv.view(B, N, S, self.kv_lora_rank + self.qk_rope_head_dim)
1009-
cache_mode = "PA_BLK_NZ" if self.enable_kv_nz else "PA"
1009+
cache_mode = "PA_NZ" if self.enable_kv_nz else "PA"
10101010
_, _, k_pe, k_nope = torch_npu.npu_kv_rmsnorm_rope_cache(
10111011
kv,
10121012
self.kv_a_layernorm.weight,

0 commit comments

Comments
 (0)