vllm-project · jeejeelee · Sep 23, 2025 · Sep 22, 2025
@@ -83,8 +83,8 @@ def prepare_tensors(self, token_lora_mapping: torch.Tensor) -> None:
         Prepare kernel metadata tensors for the current forward pass.
 
         Args:
-            token_lora_tensor (torch.Tensor): Tensor containing lora indices
-            for each input token.
+            token_lora_mapping (torch.Tensor): Tensor containing lora indices
+                for each input token.
         """
 
         self._reset()
@@ -136,7 +136,7 @@ def meta_args(
 
         Args:
             token_nums (int): Number of input tokens in the current forward
-            pass. 
+                pass of the kernel.
         """
         return (
             self.token_lora_mapping[:token_nums],

@@ -93,7 +93,6 @@ def bgmv_shrink(
         inputs (torch.Tensor): Input tensor of shape [num_tokens, hidden_size].
         lora_b_weights (torch.Tensor): LoRA weights of shape
             [num_loras, lora_rank, hidden_size].
-        output_tensor (torch.Tensor): (Unused) output tensor (placeholder).
         lora_indices_tensor (torch.Tensor): Tensor of shape [num_tokens]
             indicating which LoRA matrix to use for each token.
         scaling (float, optional): Scalar multiplier applied to the output.