Skip to content

Commit 63d24a6

Browse files
refactor: Move seek logic into _open_and_parse_file_with_openpyxl
Address reviewer feedback from @darynaishchenko (Comment 2555661755): - Move fp.seek(0) try/except block into _open_and_parse_file_with_openpyxl - Add info-level logging for seek failures instead of silent pass - Remove duplicate seek logic from open_and_parse_file orchestration method - Add hasattr check to avoid AttributeError on non-file-like objects - Simplify orchestration method to focus purely on flow control This centralizes fallback-specific concerns within the openpyxl path and makes the behavior easier to test and reason about. All local checks pass: - Unit tests pass (4 passed, 1 skipped) - MyPy type checking passes - Ruff format and lint pass Co-Authored-By: unknown <>
1 parent d2f691a commit 63d24a6

File tree

1 file changed

+10
-7
lines changed

1 file changed

+10
-7
lines changed

airbyte_cdk/sources/file_based/file_types/excel_parser.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,16 @@ def _open_and_parse_file_with_openpyxl(
233233
Returns:
234234
pd.DataFrame: Parsed data from the Excel file.
235235
"""
236+
# Some file-like objects are not seekable.
237+
if hasattr(fp, "seek"):
238+
try:
239+
fp.seek(0) # type: ignore [union-attr]
240+
except (AttributeError, OSError) as exc:
241+
logger.info(
242+
f"Could not rewind stream for {file_info.file_uri_for_logging}; "
243+
f"proceeding with openpyxl from current position: {exc}"
244+
)
245+
236246
with warnings.catch_warnings(record=True) as warning_records:
237247
warnings.simplefilter("always")
238248
df = pd.ExcelFile(fp, engine="openpyxl").parse() # type: ignore [arg-type, call-overload]
@@ -263,11 +273,4 @@ def open_and_parse_file(
263273
try:
264274
return self._open_and_parse_file_with_calamine(fp, logger, file_info)
265275
except ExcelCalamineParsingError:
266-
# Fallback to openpyxl
267-
try:
268-
fp.seek(0) # type: ignore [union-attr]
269-
except (AttributeError, OSError):
270-
# Some file-like objects may not be seekable; attempt openpyxl parsing anyway
271-
pass
272-
273276
return self._open_and_parse_file_with_openpyxl(fp, logger, file_info)

0 commit comments

Comments
 (0)