33"""
44
55import asyncio
6+ from functools import lru_cache
67from lightrag .utils import logger , get_pinyin_sort_key
78import aiofiles
89import shutil
2728from lightrag .api .utils_api import get_combined_auth_dependency
2829from ..config import global_args
2930
30- # Check docling availability at module load time
31- DOCLING_AVAILABLE = False
32- try :
33- import docling # noqa: F401 # type: ignore[import-not-found]
34-
35- DOCLING_AVAILABLE = True
36- except ImportError :
37- if global_args .document_loading_engine == "DOCLING" :
38- logger .warning (
39- "DOCLING engine requested but 'docling' package not installed. "
40- "Falling back to standard document processing. "
41- "To use DOCLING, install with: pip install lightrag-hku[api,docling]"
42- )
31+
32+ @lru_cache (maxsize = 1 )
33+ def _is_docling_available () -> bool :
34+ """Check if docling is available (cached check).
35+
36+ This function uses lru_cache to avoid repeated import attempts.
37+ The result is cached after the first call.
38+
39+ Returns:
40+ bool: True if docling is available, False otherwise
41+ """
42+ try :
43+ import docling # noqa: F401 # type: ignore[import-not-found]
44+
45+ return True
46+ except ImportError :
47+ return False
4348
4449
4550# Function to format datetime to ISO format string with timezone information
@@ -1204,12 +1209,19 @@ async def pipeline_enqueue_file(
12041209 # Try DOCLING first if configured and available
12051210 if (
12061211 global_args .document_loading_engine == "DOCLING"
1207- and DOCLING_AVAILABLE
1212+ and _is_docling_available ()
12081213 ):
12091214 content = await asyncio .to_thread (
12101215 _convert_with_docling , file_path
12111216 )
12121217 else :
1218+ if (
1219+ global_args .document_loading_engine == "DOCLING"
1220+ and not _is_docling_available ()
1221+ ):
1222+ logger .warning (
1223+ f"DOCLING engine configured but not available for { file_path .name } . Falling back to pypdf."
1224+ )
12131225 # Use pypdf (non-blocking via to_thread)
12141226 content = await asyncio .to_thread (
12151227 _extract_pdf_pypdf ,
@@ -1238,12 +1250,19 @@ async def pipeline_enqueue_file(
12381250 # Try DOCLING first if configured and available
12391251 if (
12401252 global_args .document_loading_engine == "DOCLING"
1241- and DOCLING_AVAILABLE
1253+ and _is_docling_available ()
12421254 ):
12431255 content = await asyncio .to_thread (
12441256 _convert_with_docling , file_path
12451257 )
12461258 else :
1259+ if (
1260+ global_args .document_loading_engine == "DOCLING"
1261+ and not _is_docling_available ()
1262+ ):
1263+ logger .warning (
1264+ f"DOCLING engine configured but not available for { file_path .name } . Falling back to python-docx."
1265+ )
12471266 # Use python-docx (non-blocking via to_thread)
12481267 content = await asyncio .to_thread (_extract_docx , file )
12491268 except Exception as e :
@@ -1268,12 +1287,19 @@ async def pipeline_enqueue_file(
12681287 # Try DOCLING first if configured and available
12691288 if (
12701289 global_args .document_loading_engine == "DOCLING"
1271- and DOCLING_AVAILABLE
1290+ and _is_docling_available ()
12721291 ):
12731292 content = await asyncio .to_thread (
12741293 _convert_with_docling , file_path
12751294 )
12761295 else :
1296+ if (
1297+ global_args .document_loading_engine == "DOCLING"
1298+ and not _is_docling_available ()
1299+ ):
1300+ logger .warning (
1301+ f"DOCLING engine configured but not available for { file_path .name } . Falling back to python-pptx."
1302+ )
12771303 # Use python-pptx (non-blocking via to_thread)
12781304 content = await asyncio .to_thread (_extract_pptx , file )
12791305 except Exception as e :
@@ -1298,12 +1324,19 @@ async def pipeline_enqueue_file(
12981324 # Try DOCLING first if configured and available
12991325 if (
13001326 global_args .document_loading_engine == "DOCLING"
1301- and DOCLING_AVAILABLE
1327+ and _is_docling_available ()
13021328 ):
13031329 content = await asyncio .to_thread (
13041330 _convert_with_docling , file_path
13051331 )
13061332 else :
1333+ if (
1334+ global_args .document_loading_engine == "DOCLING"
1335+ and not _is_docling_available ()
1336+ ):
1337+ logger .warning (
1338+ f"DOCLING engine configured but not available for { file_path .name } . Falling back to openpyxl."
1339+ )
13071340 # Use openpyxl (non-blocking via to_thread)
13081341 content = await asyncio .to_thread (_extract_xlsx , file )
13091342 except Exception as e :
0 commit comments