diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 099e5bc48353a..6104baeb5b5f3 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -725,8 +725,8 @@ Conversion Strings ^^^^^^^ +- Bug in :meth:`Series.str.zfill` raising ``AttributeError`` for :class:`ArrowDtype` (:issue:`61485`) - Bug in :meth:`Series.value_counts` would not respect ``sort=False`` for series having ``string`` dtype (:issue:`55224`) -- Interval ^^^^^^^^ diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 0b90bcea35100..ac55a42ad4a6e 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -2601,6 +2601,13 @@ def _str_wrap(self, width: int, **kwargs) -> Self: result = self._apply_elementwise(predicate) return type(self)(pa.chunked_array(result)) + def _str_zfill(self, width: int) -> Self: + # TODO: Replace with pc.utf8_zfill when supported by arrow + # Arrow ENH - https://github.com/apache/arrow/issues/46683 + predicate = lambda val: val.zfill(width) + result = self._apply_elementwise(predicate) + return type(self)(pa.chunked_array(result)) + @property def _dt_days(self) -> Self: return type(self)( diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 8048306df91a2..0efa8550fc78d 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -1077,6 +1077,9 @@ def _cmp_method(self, other, op): _arith_method = _cmp_method + def _str_zfill(self, width: int) -> Self: + return self._str_map(lambda x: x.zfill(width)) + class StringArrayNumpySemantics(StringArray): _storage = "python" diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 81f7441846589..636a8c997f2a5 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -1938,8 +1938,8 @@ def zfill(self, width: int): if not is_integer(width): msg = f"width must be of integer type, not {type(width).__name__}" raise TypeError(msg) - f = lambda x: x.zfill(width) - result = self._data.array._str_map(f) + + result = self._data.array._str_zfill(width) return self._wrap_result(result) def slice(self, start=None, stop=None, step=None): diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index 0adb7b51cf2b7..bbb73928bf6fb 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -537,3 +537,6 @@ def f(x): return empty_row return [f(val) for val in np.asarray(self)] + + def _str_zfill(self, width: int): + return self._str_map(lambda x: x.zfill(width)) diff --git a/pandas/tests/strings/test_string_array.py b/pandas/tests/strings/test_string_array.py index c5414022e664b..11fc3034cf290 100644 --- a/pandas/tests/strings/test_string_array.py +++ b/pandas/tests/strings/test_string_array.py @@ -110,3 +110,19 @@ def test_string_array_extract(nullable_string_dtype): result = result.astype(object) tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "values, width, expected", + [ + (["a", "ab", "abc", None], 4, ["000a", "00ab", "0abc", None]), + (["1", "-1", "+1", None], 4, ["0001", "-001", "+001", None]), + (["1234", "-1234"], 3, ["1234", "-1234"]), + ], +) +def test_string_array_zfill(nullable_string_dtype, values, width, expected): + # GH #61485 + s = Series(values, dtype=nullable_string_dtype) + result = s.str.zfill(width) + expected = Series(expected, dtype=nullable_string_dtype) + tm.assert_series_equal(result, expected)