Fixes #40262

ArthurZucker · ArthurZucker · commit 170b2708cb19 · 2025-08-21T11:03:16.000+02:00
diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py
@@ -2483,6 +2483,11 @@ def _flash_attn_2_can_dispatch(self, is_init_check: bool = False) -> bool:
         if not is_flash_attn_2_available():
             preface = "FlashAttention2 has been toggled on, but it cannot be used due to the following error:"
             install_message = "Please refer to the documentation of https://huggingface.co/docs/transformers/perf_infer_gpu_one#flashattention-2 to install Flash Attention 2."
+            
+            # package `flash-attn` can not be installed on Ascend NPU, following validation logics can be ignored. 
+            if is_torch_npu_available(): 
+                logger.info("Detect using FlashAttention2 on Ascend NPU.") 
+                return True 
 
             # package `flash-attn` can not be installed on Ascend NPU, ignore related validation logi
             if importlib.util.find_spec("flash_attn") is None and not is_torch_npu_available():