diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 228e630f95863..58847528d2183 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -393,56 +393,6 @@ def __init__( self._dtype = self._dtype.update_dtype(dtype) self._codes = coerce_indexer_dtype(codes, dtype.categories) - @property - def categories(self): - """ - The categories of this categorical. - - Setting assigns new values to each category (effectively a rename of - each individual category). - - The assigned value has to be a list-like object. All items must be - unique and the number of items in the new categories must be the same - as the number of items in the old categories. - - Assigning to `categories` is a inplace operation! - - Raises - ------ - ValueError - If the new categories do not validate as categories or if the - number of new categories is unequal the number of old categories - - See Also - -------- - rename_categories : Rename categories. - reorder_categories : Reorder categories. - add_categories : Add new categories. - remove_categories : Remove the specified categories. - remove_unused_categories : Remove categories which are not used. - set_categories : Set the categories to the specified ones. - """ - return self.dtype.categories - - @categories.setter - def categories(self, categories): - new_dtype = CategoricalDtype(categories, ordered=self.ordered) - if self.dtype.categories is not None and len(self.dtype.categories) != len( - new_dtype.categories - ): - raise ValueError( - "new categories need to have the same number of " - "items as the old categories!" - ) - self._dtype = new_dtype - - @property - def ordered(self) -> Ordered: - """ - Whether the categories have an ordered relationship. - """ - return self.dtype.ordered - @property def dtype(self) -> CategoricalDtype: """ @@ -458,10 +408,6 @@ def _constructor(self) -> Type["Categorical"]: def _from_sequence(cls, scalars, dtype=None, copy=False): return Categorical(scalars, dtype=dtype) - def _formatter(self, boxed=False): - # Defer to CategoricalFormatter's formatter. - return None - def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: """ Coerce this type to another dtype @@ -640,6 +586,59 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None): return cls(codes, dtype=dtype, fastpath=True) + # ------------------------------------------------------------------ + # Categories/Codes/Ordered + + @property + def categories(self): + """ + The categories of this categorical. + + Setting assigns new values to each category (effectively a rename of + each individual category). + + The assigned value has to be a list-like object. All items must be + unique and the number of items in the new categories must be the same + as the number of items in the old categories. + + Assigning to `categories` is a inplace operation! + + Raises + ------ + ValueError + If the new categories do not validate as categories or if the + number of new categories is unequal the number of old categories + + See Also + -------- + rename_categories : Rename categories. + reorder_categories : Reorder categories. + add_categories : Add new categories. + remove_categories : Remove the specified categories. + remove_unused_categories : Remove categories which are not used. + set_categories : Set the categories to the specified ones. + """ + return self.dtype.categories + + @categories.setter + def categories(self, categories): + new_dtype = CategoricalDtype(categories, ordered=self.ordered) + if self.dtype.categories is not None and len(self.dtype.categories) != len( + new_dtype.categories + ): + raise ValueError( + "new categories need to have the same number of " + "items as the old categories!" + ) + self._dtype = new_dtype + + @property + def ordered(self) -> Ordered: + """ + Whether the categories have an ordered relationship. + """ + return self.dtype.ordered + @property def codes(self) -> np.ndarray: """ @@ -1104,6 +1103,8 @@ def remove_unused_categories(self, inplace=False): if not inplace: return cat + # ------------------------------------------------------------------ + def map(self, mapper): """ Map categories using input correspondence (dict, Series, or function). @@ -1192,6 +1193,9 @@ def map(self, mapper): __le__ = _cat_compare_op(operator.le) __ge__ = _cat_compare_op(operator.ge) + # ------------------------------------------------------------- + # Validators; ideally these can be de-duplicated + def _validate_insert_value(self, value) -> int: code = self.categories.get_indexer([value]) if (code == -1) and not (is_scalar(value) and isna(value)): @@ -1241,6 +1245,8 @@ def _validate_fill_value(self, fill_value): ) return fill_value + # ------------------------------------------------------------- + def __array__(self, dtype=None) -> np.ndarray: """ The numpy array interface. @@ -1758,6 +1764,10 @@ def __contains__(self, key) -> bool: # ------------------------------------------------------------------ # Rendering Methods + def _formatter(self, boxed=False): + # Defer to CategoricalFormatter's formatter. + return None + def _tidy_repr(self, max_vals=10, footer=True) -> str: """ a short repr displaying only max_vals and an optional (but default @@ -1987,7 +1997,9 @@ def _reverse_indexer(self) -> Dict[Hashable, np.ndarray]: result = dict(zip(categories, _result)) return result - # reduction ops # + # ------------------------------------------------------------------ + # Reductions + def _reduce(self, name: str, skipna: bool = True, **kwargs): func = getattr(self, name, None) if func is None: @@ -2090,6 +2102,9 @@ def mode(self, dropna=True): codes = sorted(htable.mode_int64(ensure_int64(codes), dropna)) return self._constructor(values=codes, dtype=self.dtype, fastpath=True) + # ------------------------------------------------------------------ + # ExtensionArray Interface + def unique(self): """ Return the ``Categorical`` which ``categories`` and ``codes`` are @@ -2179,6 +2194,18 @@ def equals(self, other: object) -> bool: return np.array_equal(self._codes, other_codes) return False + @property + def _can_hold_na(self): + return True + + @classmethod + def _concat_same_type(self, to_concat): + from pandas.core.dtypes.concat import union_categoricals + + return union_categoricals(to_concat) + + # ------------------------------------------------------------------ + def is_dtype_equal(self, other): """ Returns True if categoricals are the same dtype @@ -2217,17 +2244,6 @@ def describe(self): return result - # Implement the ExtensionArray interface - @property - def _can_hold_na(self): - return True - - @classmethod - def _concat_same_type(self, to_concat): - from pandas.core.dtypes.concat import union_categoricals - - return union_categoricals(to_concat) - def isin(self, values) -> np.ndarray: """ Check whether `values` are contained in Categorical. diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index d38f77aaceb01..7509cb35069e8 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -433,11 +433,6 @@ def _to_safe_for_reshape(self): """ convert to object if we are a categorical """ return self.astype("object") - def _maybe_cast_indexer(self, key): - code = self.categories.get_loc(key) - code = self.codes.dtype.type(code) - return code - @doc(Index.where) def where(self, cond, other=None): # TODO: Investigate an alternative implementation with @@ -537,6 +532,14 @@ def _reindex_non_unique(self, target): return new_target, indexer, new_indexer + # -------------------------------------------------------------------- + # Indexing Methods + + def _maybe_cast_indexer(self, key): + code = self.categories.get_loc(key) + code = self.codes.dtype.type(code) + return code + @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs) def get_indexer(self, target, method=None, limit=None, tolerance=None): method = missing.clean_reindex_fill_method(method) @@ -619,6 +622,15 @@ def _convert_arr_indexer(self, keyarr): def _convert_index_indexer(self, keyarr): return self._shallow_copy(keyarr) + @doc(Index._maybe_cast_slice_bound) + def _maybe_cast_slice_bound(self, label, side, kind): + if kind == "loc": + return label + + return super()._maybe_cast_slice_bound(label, side, kind) + + # -------------------------------------------------------------------- + def take_nd(self, *args, **kwargs): """Alias for `take`""" warnings.warn( @@ -628,13 +640,6 @@ def take_nd(self, *args, **kwargs): ) return self.take(*args, **kwargs) - @doc(Index._maybe_cast_slice_bound) - def _maybe_cast_slice_bound(self, label, side, kind): - if kind == "loc": - return label - - return super()._maybe_cast_slice_bound(label, side, kind) - def map(self, mapper): """ Map values using input correspondence (a dict, Series, or function). diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 3fd93a8159041..f0b80c2852bd5 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -509,6 +509,9 @@ def snap(self, freq="S"): dta = DatetimeArray(snapped, dtype=self.dtype) return DatetimeIndex._simple_new(dta, name=self.name) + # -------------------------------------------------------------------- + # Indexing Methods + def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime): """ Calculate datetime bounds for parsed time string and its resolution. diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 419ff81a2a478..3f72577c9420e 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -57,7 +57,7 @@ from pandas.core.ops import get_op_result_name if TYPE_CHECKING: - from pandas import CategoricalIndex + from pandas import CategoricalIndex # noqa:F401 _VALID_CLOSED = {"left", "right", "both", "neither"} _index_doc_kwargs = dict(ibase._index_doc_kwargs) @@ -515,28 +515,6 @@ def is_overlapping(self) -> bool: # GH 23309 return self._engine.is_overlapping - def _should_fallback_to_positional(self) -> bool: - # integer lookups in Series.__getitem__ are unambiguously - # positional in this case - return self.dtype.subtype.kind in ["m", "M"] - - def _maybe_cast_slice_bound(self, label, side, kind): - return getattr(self, side)._maybe_cast_slice_bound(label, side, kind) - - @Appender(Index._convert_list_indexer.__doc__) - def _convert_list_indexer(self, keyarr): - """ - we are passed a list-like indexer. Return the - indexer for matching intervals. - """ - locs = self.get_indexer_for(keyarr) - - # we have missing values - if (locs == -1).any(): - raise KeyError - - return locs - def _can_reindex(self, indexer: np.ndarray) -> None: """ Check if we are allowing reindexing with this particular indexer. @@ -668,6 +646,9 @@ def _searchsorted_monotonic(self, label, side, exclude_label=False): return sub_idx._searchsorted_monotonic(label, side) + # -------------------------------------------------------------------- + # Indexing Methods + def get_loc( self, key, method: Optional[str] = None, tolerance=None ) -> Union[int, slice, np.ndarray]: @@ -885,6 +866,30 @@ def _convert_slice_indexer(self, key: slice, kind: str): return super()._convert_slice_indexer(key, kind) + def _should_fallback_to_positional(self) -> bool: + # integer lookups in Series.__getitem__ are unambiguously + # positional in this case + return self.dtype.subtype.kind in ["m", "M"] + + def _maybe_cast_slice_bound(self, label, side, kind): + return getattr(self, side)._maybe_cast_slice_bound(label, side, kind) + + @Appender(Index._convert_list_indexer.__doc__) + def _convert_list_indexer(self, keyarr): + """ + we are passed a list-like indexer. Return the + indexer for matching intervals. + """ + locs = self.get_indexer_for(keyarr) + + # we have missing values + if (locs == -1).any(): + raise KeyError + + return locs + + # -------------------------------------------------------------------- + @Appender(Index.where.__doc__) def where(self, cond, other=None): if other is None: @@ -1030,6 +1035,9 @@ def equals(self, other: object) -> bool: and self.closed == other.closed ) + # -------------------------------------------------------------------- + # Set Operations + @Appender(Index.intersection.__doc__) @SetopCheck(op_name="intersection") def intersection( @@ -1115,6 +1123,12 @@ def func(self, other, sort=sort): return func + union = _setop("union") + difference = _setop("difference") + symmetric_difference = _setop("symmetric_difference") + + # -------------------------------------------------------------------- + @property def is_all_dates(self) -> bool: """ @@ -1123,10 +1137,6 @@ def is_all_dates(self) -> bool: """ return False - union = _setop("union") - difference = _setop("difference") - symmetric_difference = _setop("symmetric_difference") - # TODO: arithmetic operations # GH#30817 until IntervalArray implements inequalities, get them from Index diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index e49a23935efbd..9630e154ccd17 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3154,6 +3154,8 @@ def _update_indexer(idxr, indexer=indexer): return indexer._values + # -------------------------------------------------------------------- + def _reorder_indexer( self, seq: Tuple[Union[Scalar, Iterable, AnyArrayLike], ...], diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index cd3f1f51a86d2..079f43cb2c66b 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -97,6 +97,9 @@ def _validate_dtype(cls, dtype: Dtype) -> None: f"Incorrect `dtype` passed: expected {expected}, received {dtype}" ) + # ---------------------------------------------------------------- + # Indexing Methods + @doc(Index._maybe_cast_slice_bound) def _maybe_cast_slice_bound(self, label, side, kind): assert kind in ["loc", "getitem", None] @@ -104,6 +107,8 @@ def _maybe_cast_slice_bound(self, label, side, kind): # we will try to coerce to integers return self._maybe_cast_indexer(label) + # ---------------------------------------------------------------- + @doc(Index._shallow_copy) def _shallow_copy(self, values=None, name: Label = lib.no_default): if values is not None and not self._can_hold_na and values.dtype.kind == "f": @@ -293,6 +298,9 @@ class UInt64Index(IntegerIndex): _engine_type = libindex.UInt64Engine _default_dtype = np.dtype(np.uint64) + # ---------------------------------------------------------------- + # Indexing Methods + @doc(Index._convert_arr_indexer) def _convert_arr_indexer(self, keyarr): # Cast the indexer to uint64 if possible so that the values returned @@ -314,6 +322,8 @@ def _convert_index_indexer(self, keyarr): return keyarr.astype(np.uint64) return keyarr + # ---------------------------------------------------------------- + def _wrap_joined_index(self, joined, other): name = get_op_result_name(self, other) return UInt64Index(joined, name=name) @@ -385,6 +395,22 @@ def _convert_slice_indexer(self, key: slice, kind: str): # translate to locations return self.slice_indexer(key.start, key.stop, key.step, kind=kind) + @doc(Index.get_loc) + def get_loc(self, key, method=None, tolerance=None): + if is_bool(key): + # Catch this to avoid accidentally casting to 1.0 + raise KeyError(key) + + if is_float(key) and np.isnan(key): + nan_idxs = self._nan_idxs + if not len(nan_idxs): + raise KeyError(key) + elif len(nan_idxs) == 1: + return nan_idxs[0] + return nan_idxs + + return super().get_loc(key, method=method, tolerance=tolerance) + # ---------------------------------------------------------------- def _format_native_types( @@ -409,22 +435,6 @@ def __contains__(self, other: Any) -> bool: return is_float(other) and np.isnan(other) and self.hasnans - @doc(Index.get_loc) - def get_loc(self, key, method=None, tolerance=None): - if is_bool(key): - # Catch this to avoid accidentally casting to 1.0 - raise KeyError(key) - - if is_float(key) and np.isnan(key): - nan_idxs = self._nan_idxs - if not len(nan_idxs): - raise KeyError(key) - elif len(nan_idxs) == 1: - return nan_idxs[0] - return nan_idxs - - return super().get_loc(key, method=method, tolerance=tolerance) - @cache_readonly def is_unique(self) -> bool: return super().is_unique and self._nan_idxs.size < 2 diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index cdb502199c6f1..5282b6f0154b4 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -433,6 +433,41 @@ def inferred_type(self) -> str: # indexing return "period" + def insert(self, loc, item): + if not isinstance(item, Period) or self.freq != item.freq: + return self.astype(object).insert(loc, item) + + i8result = np.concatenate( + (self[:loc].asi8, np.array([item.ordinal]), self[loc:].asi8) + ) + arr = type(self._data)._simple_new(i8result, dtype=self.dtype) + return type(self)._simple_new(arr, name=self.name) + + def join(self, other, how="left", level=None, return_indexers=False, sort=False): + """ + See Index.join + """ + self._assert_can_do_setop(other) + + if not isinstance(other, PeriodIndex): + return self.astype(object).join( + other, how=how, level=level, return_indexers=return_indexers, sort=sort + ) + + # _assert_can_do_setop ensures we have matching dtype + result = Int64Index.join( + self, + other, + how=how, + level=level, + return_indexers=return_indexers, + sort=sort, + ) + return result + + # ------------------------------------------------------------------------ + # Indexing Methods + @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs) def get_indexer(self, target, method=None, limit=None, tolerance=None): target = ensure_index(target) @@ -607,38 +642,6 @@ def _get_string_slice(self, key: str, use_lhs: bool = True, use_rhs: bool = True except KeyError as err: raise KeyError(key) from err - def insert(self, loc, item): - if not isinstance(item, Period) or self.freq != item.freq: - return self.astype(object).insert(loc, item) - - i8result = np.concatenate( - (self[:loc].asi8, np.array([item.ordinal]), self[loc:].asi8) - ) - arr = type(self._data)._simple_new(i8result, dtype=self.dtype) - return type(self)._simple_new(arr, name=self.name) - - def join(self, other, how="left", level=None, return_indexers=False, sort=False): - """ - See Index.join - """ - self._assert_can_do_setop(other) - - if not isinstance(other, PeriodIndex): - return self.astype(object).join( - other, how=how, level=level, return_indexers=return_indexers, sort=sort - ) - - # _assert_can_do_setop ensures we have matching dtype - result = Int64Index.join( - self, - other, - how=how, - level=level, - return_indexers=return_indexers, - sort=sort, - ) - return result - # ------------------------------------------------------------------------ # Set Operation Methods diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index f1457a9aac62b..684691501de5c 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -338,6 +338,9 @@ def __contains__(self, key: Any) -> bool: return False return key in self._range + # -------------------------------------------------------------------- + # Indexing Methods + @doc(Int64Index.get_loc) def get_loc(self, key, method=None, tolerance=None): if method is None and tolerance is None: @@ -379,6 +382,8 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): locs[valid] = len(self) - 1 - locs[valid] return ensure_platform_int(locs) + # -------------------------------------------------------------------- + def tolist(self): return list(self._range) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 85c8396dfd1fe..df08fda78823d 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -202,6 +202,9 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: """ return is_timedelta64_dtype(dtype) + # ------------------------------------------------------------------- + # Indexing Methods + def get_loc(self, key, method=None, tolerance=None): """ Get integer location for requested label @@ -248,6 +251,8 @@ def _maybe_cast_slice_bound(self, label, side: str, kind): return label + # ------------------------------------------------------------------- + def is_type_compatible(self, typ) -> bool: return typ == self.inferred_type or typ == "timedelta"