diff --git a/doc/source/getting_started/index.rst b/doc/source/getting_started/index.rst index 4792d26d021d6..61c9c37a26812 100644 --- a/doc/source/getting_started/index.rst +++ b/doc/source/getting_started/index.rst @@ -533,7 +533,7 @@ Data sets do not only contain numerical data. pandas provides a wide range of fu Coming from... -------------- -Are you familiar with other software for manipulating tablular data? Learn +Are you familiar with other software for manipulating tabular data? Learn the pandas-equivalent operations compared to software you already know: .. panels:: diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst index 3ce54cfebf65a..68024fbd05727 100644 --- a/doc/source/user_guide/advanced.rst +++ b/doc/source/user_guide/advanced.rst @@ -322,7 +322,7 @@ As usual, **both sides** of the slicers are included as this is label indexing. .. warning:: You should specify all axes in the ``.loc`` specifier, meaning the indexer for the **index** and - for the **columns**. There are some ambiguous cases where the passed indexer could be mis-interpreted + for the **columns**. There are some ambiguous cases where the passed indexer could be misinterpreted   as indexing *both* axes, rather than into say the ``MultiIndex`` for the rows. You should do this: diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index ac4a25728ba5f..ed1689f0c9f79 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -149,7 +149,7 @@ the columns except the one we specify: grouped.sum() The above GroupBy will split the DataFrame on its index (rows). To split by columns, first do -a tranpose: +a transpose: .. ipython:: diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 08618d5a6aa16..3d3a7fa6f0f33 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1491,7 +1491,7 @@ def validate_func_kwargs( Returns ------- columns : List[str] - List of user-provied keys. + List of user-provided keys. func : List[Union[str, callable[...,Any]]] List of user-provided aggfuncs diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 54d1497ad05f3..d3bdcee7a7341 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -711,13 +711,13 @@ def register_converter_cb(key) -> None: styler_max_rows = """ : int, optional The maximum number of rows that will be rendered. May still be reduced to - satsify ``max_elements``, which takes precedence. + satisfy ``max_elements``, which takes precedence. """ styler_max_columns = """ : int, optional The maximum number of columns that will be rendered. May still be reduced to - satsify ``max_elements``, which takes precedence. + satisfy ``max_elements``, which takes precedence. """ styler_precision = """ diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 23bc0e6280e27..52606cd7a914e 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1697,7 +1697,7 @@ def pandas_dtype(dtype) -> DtypeObj: try: with warnings.catch_warnings(): # GH#51523 - Series.astype(np.integer) doesn't show - # numpy deprication warning of np.integer + # numpy deprecation warning of np.integer # Hence enabling DeprecationWarning warnings.simplefilter("always", DeprecationWarning) npdtype = np.dtype(dtype) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 54f6d84c8dc2a..d302085275757 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -901,7 +901,7 @@ def _parse_dtype_strict(cls, freq: str_type) -> BaseOffset: return freq_offset raise TypeError( - "PeriodDtype argument should be string or BaseOffet, " + "PeriodDtype argument should be string or BaseOffset, " f"got {type(freq).__name__}" ) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0e9826fe8b63a..72fd7fadd0987 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6559,7 +6559,7 @@ def infer_objects(self, copy: bool_t | None = None) -> Self: Parameters ---------- copy : bool, default True - Whether to make a copy for non-object or non-inferrable columns + Whether to make a copy for non-object or non-inferable columns or Series. Returns diff --git a/pandas/io/excel/_pyxlsb.py b/pandas/io/excel/_pyxlsb.py index 64076e4952cde..bfe21082cc4d0 100644 --- a/pandas/io/excel/_pyxlsb.py +++ b/pandas/io/excel/_pyxlsb.py @@ -89,7 +89,7 @@ def get_sheet_data( file_rows_needed: int | None = None, ) -> list[list[Scalar]]: data: list[list[Scalar]] = [] - prevous_row_number = -1 + previous_row_number = -1 # When sparse=True the rows can have different lengths and empty rows are # not returned. The cells are namedtuples of row, col, value (r, c, v). for row in sheet.rows(sparse=True): @@ -99,9 +99,9 @@ def get_sheet_data( # trim trailing empty elements converted_row.pop() if converted_row: - data.extend([[]] * (row_number - prevous_row_number - 1)) + data.extend([[]] * (row_number - previous_row_number - 1)) data.append(converted_row) - prevous_row_number = row_number + previous_row_number = row_number if file_rows_needed is not None and len(data) >= file_rows_needed: break if data: diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index fc12a8b0722e6..0bd8769b5de60 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -740,7 +740,7 @@ def _calc_max_rows_fitted(self) -> int | None: _, height = get_terminal_size() if self.max_rows == 0: # rows available to fill with actual data - return height - self._get_number_of_auxillary_rows() + return height - self._get_number_of_auxiliary_rows() if self._is_screen_short(height): max_rows = height @@ -775,7 +775,7 @@ def _is_screen_narrow(self, max_width) -> bool: def _is_screen_short(self, max_height) -> bool: return bool(self.max_rows == 0 and len(self.frame) > max_height) - def _get_number_of_auxillary_rows(self) -> int: + def _get_number_of_auxiliary_rows(self) -> int: """Get number of rows occupied by prompt, dots and dimension info.""" dot_row = 1 prompt_row = 1 diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py index bfe65f8bf3c29..de88960280102 100644 --- a/pandas/tests/arrays/categorical/test_operators.py +++ b/pandas/tests/arrays/categorical/test_operators.py @@ -86,11 +86,11 @@ def test_comparisons(self, factor): cat_rev > cat_rev_base2 # Only categories with same ordering information can be compared - cat_unorderd = cat.set_ordered(False) + cat_unordered = cat.set_ordered(False) assert not (cat > cat).any() with pytest.raises(TypeError, match=msg): - cat > cat_unorderd + cat > cat_unordered # comparison (in both directions) with Series will raise s = Series(["b", "b", "b"]) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 9057d91b1960a..e862a6985160b 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -523,7 +523,7 @@ def test_freq_argument_required(self): with pytest.raises(TypeError, match=msg): PeriodDtype() - msg = "PeriodDtype argument should be string or BaseOffet, got NoneType" + msg = "PeriodDtype argument should be string or BaseOffset, got NoneType" with pytest.raises(TypeError, match=msg): # GH#51790 PeriodDtype(None) diff --git a/pandas/tests/frame/methods/test_isetitem.py b/pandas/tests/frame/methods/test_isetitem.py index e8064cbc44d5f..69f394afb6519 100644 --- a/pandas/tests/frame/methods/test_isetitem.py +++ b/pandas/tests/frame/methods/test_isetitem.py @@ -38,7 +38,7 @@ def test_isetitem_ea_df_scalar_indexer(self): ) tm.assert_frame_equal(df, expected) - def test_isetitem_dimension_missmatch(self): + def test_isetitem_dimension_mismatch(self): # GH#51701 df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}) value = df.copy() diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index b581dfd8c44b0..40c8e4fa27f90 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -2006,7 +2006,7 @@ def test_inplace_arithmetic_series_update(using_copy_on_write): tm.assert_frame_equal(df, expected) -def test_arithemetic_multiindex_align(): +def test_arithmetic_multiindex_align(): """ Regression test for: https://github.com/pandas-dev/pandas/issues/33765 """ diff --git a/pandas/tests/frame/test_npfuncs.py b/pandas/tests/frame/test_npfuncs.py index 0b7699e46d720..b40f953cd800e 100644 --- a/pandas/tests/frame/test_npfuncs.py +++ b/pandas/tests/frame/test_npfuncs.py @@ -11,7 +11,7 @@ class TestAsArray: - def test_asarray_homogenous(self): + def test_asarray_homogeneous(self): df = DataFrame({"A": Categorical([1, 2]), "B": Categorical([1, 2])}) result = np.asarray(df) # may change from object in the future diff --git a/pandas/tests/frame/test_unary.py b/pandas/tests/frame/test_unary.py index a9ec726ab443e..07bcb2ccc121a 100644 --- a/pandas/tests/frame/test_unary.py +++ b/pandas/tests/frame/test_unary.py @@ -84,7 +84,7 @@ def test_invert_mixed(self): ) tm.assert_frame_equal(result, expected) - def test_invert_empy_not_input(self): + def test_invert_empty_not_input(self): # GH#51032 df = pd.DataFrame() result = ~df diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index 3f0150a2186a9..d580b89f2f006 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -603,12 +603,12 @@ def test_filter_non_bool_raises(): def test_filter_dropna_with_empty_groups(): # GH 10780 data = Series(np.random.rand(9), index=np.repeat([1, 2, 3], 3)) - groupped = data.groupby(level=0) - result_false = groupped.filter(lambda x: x.mean() > 1, dropna=False) + grouped = data.groupby(level=0) + result_false = grouped.filter(lambda x: x.mean() > 1, dropna=False) expected_false = Series([np.nan] * 9, index=np.repeat([1, 2, 3], 3)) tm.assert_series_equal(result_false, expected_false) - result_true = groupped.filter(lambda x: x.mean() > 1, dropna=True) + result_true = grouped.filter(lambda x: x.mean() > 1, dropna=True) expected_true = Series(index=pd.Index([], dtype=int), dtype=np.float64) tm.assert_series_equal(result_true, expected_true) diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index 9b36423be73dd..c06a6fcc2a037 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -320,7 +320,7 @@ def test_readjson_nrows_chunks(request, nrows, chunksize, engine): def test_readjson_nrows_requires_lines(engine): # GH 33916 - # Test ValuError raised if nrows is set without setting lines in read_json + # Test ValueError raised if nrows is set without setting lines in read_json jsonl = """{"a": 1, "b": 2} {"a": 3, "b": 4} {"a": 5, "b": 6} diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 1635c79de9abb..1a0a5dfe213bd 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -983,12 +983,12 @@ def test_df_axis_param_depr(): index.name = "date" df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index).T - # Deprication error when axis=1 is explicitly passed + # Deprecation error when axis=1 is explicitly passed warning_msg = "DataFrame.resample with axis=1 is deprecated." with tm.assert_produces_warning(FutureWarning, match=warning_msg): df.resample("M", axis=1) - # Deprication error when axis=0 is explicitly passed + # Deprecation error when axis=0 is explicitly passed df = df.T warning_msg = ( "The 'axis' keyword in DataFrame.resample is deprecated and " diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 44b02310eb8a7..32d789c118321 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -338,7 +338,7 @@ def test_concat_mixed_objs(self): result = concat([s1, df, s2], ignore_index=True) tm.assert_frame_equal(result, expected) - def test_dtype_coerceion(self): + def test_dtype_coercion(self): # 12411 df = DataFrame({"date": [pd.Timestamp("20130101").tz_localize("UTC"), pd.NaT]}) diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 7e4002dc3a0cf..02244c1686cab 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -730,7 +730,7 @@ def test_constructor_fromisocalendar(self): assert isinstance(result, Timestamp) -def test_constructor_ambigous_dst(): +def test_constructor_ambiguous_dst(): # GH 24329 # Make sure that calling Timestamp constructor # on Timestamp created from ambiguous time diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index 2fee395886cff..0a43db87674af 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -29,7 +29,7 @@ class TestTimestampUnaryOps: # -------------------------------------------------------------- - def test_round_divison_by_zero_raises(self): + def test_round_division_by_zero_raises(self): ts = Timestamp("2016-01-01") msg = "Division by zero in rounding" diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index 91d6be01eef16..0f044ae576af8 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -576,7 +576,7 @@ def test_getitem_dataframe_raises(): ser[df > 5] -def test_getitem_assignment_series_aligment(): +def test_getitem_assignment_series_alignment(): # https://github.com/pandas-dev/pandas/issues/37427 # with getitem, when assigning with a Series, it is not first aligned ser = Series(range(10)) diff --git a/pandas/tests/series/methods/test_sort_values.py b/pandas/tests/series/methods/test_sort_values.py index c1579dbbbc21a..ea239c753ecb5 100644 --- a/pandas/tests/series/methods/test_sort_values.py +++ b/pandas/tests/series/methods/test_sort_values.py @@ -189,7 +189,7 @@ def test_sort_values_ignore_index( tm.assert_series_equal(result_ser, expected) tm.assert_series_equal(ser, Series(original_list)) - def test_mergesort_decending_stability(self): + def test_mergesort_descending_stability(self): # GH 28697 s = Series([1, 2, 1, 3], ["first", "b", "second", "c"]) result = s.sort_values(ascending=False, kind="mergesort") diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 1ec8d990add3a..bcb1fe35eaa28 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -82,7 +82,7 @@ def test_infer_with_date_and_datetime(self): expected = Index(vals, dtype=object) tm.assert_index_equal(idx, expected) - def test_unparseable_strings_with_dt64_dtype(self): + def test_unparsable_strings_with_dt64_dtype(self): # pre-2.0 these would be silently ignored and come back with object dtype vals = ["aa"] msg = "^Unknown datetime string format, unable to parse: aa, at position 0$" diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index f7d41ed536a40..250bee02e06f4 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -146,7 +146,7 @@ def test_index_equal_values_too_far(check_exact, rtol): @pytest.mark.parametrize("check_order", [True, False]) -def test_index_equal_value_oder_mismatch(check_exact, rtol, check_order): +def test_index_equal_value_order_mismatch(check_exact, rtol, check_order): idx1 = Index([1, 2, 3]) idx2 = Index([3, 2, 1]) diff --git a/pyproject.toml b/pyproject.toml index 2aadfd7bd41ef..ac6a4a7b2a61b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -282,7 +282,7 @@ ignore = [ "B904", # Magic number "PLR2004", - # Consider `elif` instead of `else` then `if` to remove indendation level + # Consider `elif` instead of `else` then `if` to remove indentation level "PLR5501", ]