Skip to content
Draft
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 73 additions & 1 deletion pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,23 @@ from pandas._libs.util cimport (

from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime


cdef NPY_DATETIMEUNIT get_next_coarser_unit(NPY_DATETIMEUNIT creso):
"""
Get the next coarser unit in the sequence: ns -> us -> ms -> s
Returns NPY_FR_GENERIC if there is no coarser unit available.
"""
if creso == NPY_DATETIMEUNIT.NPY_FR_ns:
return NPY_DATETIMEUNIT.NPY_FR_us
elif creso == NPY_DATETIMEUNIT.NPY_FR_us:
return NPY_DATETIMEUNIT.NPY_FR_ms
elif creso == NPY_DATETIMEUNIT.NPY_FR_ms:
return NPY_DATETIMEUNIT.NPY_FR_s
else:
# No coarser unit available
return NPY_DATETIMEUNIT.NPY_FR_GENERIC


from pandas._libs.tslibs.conversion cimport (
_TSObject,
cast_from_unit,
Expand Down Expand Up @@ -301,6 +318,60 @@ cpdef array_to_datetime(
May be datetime64[creso_unit] or object dtype
tzinfo or None
"""
# Try to parse with the given resolution, falling back to coarser units if needed
cdef:
NPY_DATETIMEUNIT fallback_creso = creso
NPY_DATETIMEUNIT original_creso = creso
bint infer_reso = creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC

while True:
try:
# When falling back to coarser units, don't pass unit_for_numerics
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is inaccurate.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot this is inaccurate

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in commit 0f8a6ef. Updated the comment to accurately explain that unit_for_numerics is set to None during fallback so it will be computed from fallback_creso, rather than incorrectly stating it's "meant for ns resolution only".

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot no. unit_for_numerics should't be set to None during fallback. just keep using whatever the input unit_for_numerics is.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in commit 8c99476. Now unit_for_numerics is always passed through unchanged - no longer set to None during fallback. The fallback logic now only applies during inference mode and uses recursion as requested.

# because it's meant for ns resolution only
fallback_unit_for_numerics = unit_for_numerics if fallback_creso == original_creso else None
return _array_to_datetime_impl(
values, errors, dayfirst, yearfirst, utc, fallback_creso, fallback_unit_for_numerics
)
except (OutOfBoundsDatetime, OverflowError):
# Only attempt fallback if we're in inference mode or creso is one
# of the finer resolutions (ns, us, ms)
if not infer_reso and original_creso not in (
NPY_DATETIMEUNIT.NPY_FR_ns,
NPY_DATETIMEUNIT.NPY_FR_us,
NPY_DATETIMEUNIT.NPY_FR_ms,
):
# User explicitly requested a coarse resolution, don't fall back
raise

# If we're in inference mode and haven't set a fallback yet,
# start from nanoseconds for the first retry
if infer_reso and fallback_creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
fallback_creso = NPY_DATETIMEUNIT.NPY_FR_ns
# Continue to retry with ns
continue

# Try the next coarser unit
fallback_creso = get_next_coarser_unit(fallback_creso)
if fallback_creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
# No coarser unit available, re-raise the error
raise
# Continue with coarser unit


@cython.wraparound(False)
@cython.boundscheck(False)
cdef _array_to_datetime_impl(
ndarray values, # object dtype, arbitrary ndim
str errors,
bint dayfirst,
bint yearfirst,
bint utc,
NPY_DATETIMEUNIT creso,
str unit_for_numerics,
):
"""
Internal implementation of array_to_datetime with a specific resolution.
"""
cdef:
Py_ssize_t i, n = values.size
object val
Expand Down Expand Up @@ -453,13 +524,14 @@ cpdef array_to_datetime(
if state.creso_ever_changed:
# We encountered mismatched resolutions, need to re-parse with
# the correct one.
return array_to_datetime(
return _array_to_datetime_impl(
values,
errors=errors,
yearfirst=yearfirst,
dayfirst=dayfirst,
utc=utc,
creso=state.creso,
unit_for_numerics=None,
)
elif state.creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
# i.e. we never encountered anything non-NaT, default to "s". This
Expand Down
65 changes: 64 additions & 1 deletion pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,22 @@ from pandas._libs.tslibs.tzconversion cimport tz_localize_to_utc_single
cnp.import_array()


cdef NPY_DATETIMEUNIT get_next_coarser_unit(NPY_DATETIMEUNIT creso):
"""
Get the next coarser unit in the sequence: ns -> us -> ms -> s
Returns NPY_FR_GENERIC if there is no coarser unit available.
"""
if creso == NPY_DATETIMEUNIT.NPY_FR_ns:
return NPY_DATETIMEUNIT.NPY_FR_us
elif creso == NPY_DATETIMEUNIT.NPY_FR_us:
return NPY_DATETIMEUNIT.NPY_FR_ms
elif creso == NPY_DATETIMEUNIT.NPY_FR_ms:
return NPY_DATETIMEUNIT.NPY_FR_s
else:
# No coarser unit available
return NPY_DATETIMEUNIT.NPY_FR_GENERIC


cdef bint format_is_iso(f: str):
"""
Does format match the iso8601 set that can be handled by the C parser?
Expand Down Expand Up @@ -368,7 +384,54 @@ def array_strptime(
creso : NPY_DATETIMEUNIT, default NPY_FR_GENERIC
Set to NPY_FR_GENERIC to infer a resolution.
"""
# Try to parse with the given resolution, falling back to coarser units if needed
cdef:
NPY_DATETIMEUNIT fallback_creso = creso
NPY_DATETIMEUNIT original_creso = creso
bint infer_reso = creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC

while True:
try:
return _array_strptime_impl(
values, fmt, exact, errors, utc, fallback_creso
)
except OutOfBoundsDatetime:
# Only attempt fallback if we're in inference mode or creso is one
# of the finer resolutions (ns, us, ms)
if not infer_reso and original_creso not in (
NPY_DATETIMEUNIT.NPY_FR_ns,
NPY_DATETIMEUNIT.NPY_FR_us,
NPY_DATETIMEUNIT.NPY_FR_ms,
):
# User explicitly requested a coarse resolution, don't fall back
raise

# If we're in inference mode and haven't set a fallback yet,
# start from nanoseconds for the first retry
if infer_reso and fallback_creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
fallback_creso = NPY_DATETIMEUNIT.NPY_FR_ns
# Continue to retry with ns
continue

# Try the next coarser unit
fallback_creso = get_next_coarser_unit(fallback_creso)
if fallback_creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
# No coarser unit available, re-raise the error
raise
# Continue with coarser unit


cdef _array_strptime_impl(
ndarray[object] values,
str fmt,
bint exact,
str errors,
bint utc,
NPY_DATETIMEUNIT creso,
):
"""
Internal implementation of array_strptime with a specific resolution.
"""
cdef:
Py_ssize_t i, n = len(values)
npy_datetimestruct dts
Expand Down Expand Up @@ -565,7 +628,7 @@ def array_strptime(
if state.creso_ever_changed:
# We encountered mismatched resolutions, need to re-parse with
# the correct one.
return array_strptime(
return _array_strptime_impl(
values,
fmt=fmt,
exact=exact,
Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/tslibs/test_array_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,33 @@ def test_infer_with_nat_int_float_str(self, item):
assert tz2 is None
tm.assert_numpy_array_equal(result2, expected[::-1])

def test_array_to_datetime_fallback_to_us_explicit_ns(self):
# Test automatic fallback from explicit nanoseconds to microseconds
# When explicitly requesting ns resolution, year 2401 should fall back to us
from pandas._libs.tslibs.dtypes import NpyDatetimeUnit
creso_ns = NpyDatetimeUnit.NPY_FR_ns.value

vals = np.array(["2401-09-15"], dtype=object)
result, tz = tslib.array_to_datetime(vals, creso=creso_ns)
assert tz is None
assert result.dtype == np.dtype("M8[us]")
expected = np.array(["2401-09-15"], dtype="M8[us]")
tm.assert_numpy_array_equal(result, expected)

def test_array_to_datetime_fallback_mixed_explicit_ns(self):
# Test automatic fallback with explicit ns resolution
# When one value is in nano range and one is out, both should use coarser unit
from pandas._libs.tslibs.dtypes import NpyDatetimeUnit
creso_ns = NpyDatetimeUnit.NPY_FR_ns.value

vals = np.array(["2020-01-01", "2401-09-15"], dtype=object)
result, tz = tslib.array_to_datetime(vals, creso=creso_ns)
assert tz is None
# Both values should be in microseconds since one is out of nano range
assert result.dtype == np.dtype("M8[us]")
expected = np.array(["2020-01-01", "2401-09-15"], dtype="M8[us]")
tm.assert_numpy_array_equal(result, expected)


class TestArrayToDatetimeWithTZResolutionInference:
def test_array_to_datetime_with_tz_resolution(self):
Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/tslibs/test_strptime.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,30 @@ def test_array_strptime_str_outside_nano_range(self):
fmt2 = "%b %d, %Y"
res2, _ = array_strptime(vals2, fmt=fmt2, creso=creso_infer)
tm.assert_numpy_array_equal(res2, expected2)

def test_array_strptime_fallback_to_us_explicit_ns(self):
# Test automatic fallback from explicit nanoseconds to microseconds
# When explicitly requesting ns resolution, year 2401 should fall back to us
from pandas._libs.tslibs.dtypes import NpyDatetimeUnit
creso_ns = NpyDatetimeUnit.NPY_FR_ns.value

vals = np.array(["2401-09-15"], dtype=object)
fmt = "ISO8601"
res, _ = array_strptime(vals, fmt=fmt, creso=creso_ns)
assert res.dtype == np.dtype("M8[us]")
expected = np.array(["2401-09-15"], dtype="M8[us]")
tm.assert_numpy_array_equal(res, expected)

def test_array_strptime_fallback_mixed_explicit_ns(self):
# Test automatic fallback with explicit ns resolution
# When one value is in nano range and one is out, both should use coarser unit
from pandas._libs.tslibs.dtypes import NpyDatetimeUnit
creso_ns = NpyDatetimeUnit.NPY_FR_ns.value

vals = np.array(["2020-01-01", "2401-09-15"], dtype=object)
fmt = "ISO8601"
res, _ = array_strptime(vals, fmt=fmt, creso=creso_ns)
# Both values should be in microseconds since one is out of nano range
assert res.dtype == np.dtype("M8[us]")
expected = np.array(["2020-01-01", "2401-09-15"], dtype="M8[us]")
tm.assert_numpy_array_equal(res, expected)