pandas-dev
diff --git a/‎.github/workflows/code-checks.yml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/code-checks.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎.pre-commit-config.yaml
Lines changed: 25 additions & 6 deletions b/‎.pre-commit-config.yaml
Lines changed: 25 additions & 6 deletions
diff --git a/‎ci/deps/actions-310.yaml
Lines changed: 1 addition & 1 deletion b/‎ci/deps/actions-310.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎ci/deps/actions-38.yaml
Lines changed: 1 addition & 1 deletion b/‎ci/deps/actions-38.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎ci/deps/actions-39.yaml
Lines changed: 1 addition & 1 deletion b/‎ci/deps/actions-39.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/user_guide/basics.rst
Lines changed: 2 additions & 0 deletions b/‎doc/source/user_guide/basics.rst
Lines changed: 2 additions & 0 deletions
diff --git a/‎doc/source/user_guide/io.rst
Lines changed: 19 additions & 14 deletions b/‎doc/source/user_guide/io.rst
Lines changed: 19 additions & 14 deletions
diff --git a/‎doc/source/user_guide/timeseries.rst
Lines changed: 7 additions & 6 deletions b/‎doc/source/user_guide/timeseries.rst
Lines changed: 7 additions & 6 deletions
diff --git a/‎doc/source/whatsnew/v2.0.0.rst
Lines changed: 34 additions & 1 deletion b/‎doc/source/whatsnew/v2.0.0.rst
Lines changed: 34 additions & 1 deletion
diff --git a/‎pandas/_libs/tslib.pyx
Lines changed: 4 additions & 0 deletions b/‎pandas/_libs/tslib.pyx
Lines changed: 4 additions & 0 deletions
diff --git a/‎pandas/_libs/tslibs/parsing.pyx
Lines changed: 23 additions & 0 deletions b/‎pandas/_libs/tslibs/parsing.pyx
Lines changed: 23 additions & 0 deletions
@@ -39,8 +39,8 @@ jobs:
       with:
         extra_args: --verbose --all-files
 
-  docstring_typing_pylint:
-    name: Docstring validation, typing, and pylint
+  docstring_typing_manual_hooks:
+    name: Docstring validation, typing, and other manual pre-commit hooks
     runs-on: ubuntu-22.04
     defaults:
       run:
 
@@ -1,7 +1,17 @@
 minimum_pre_commit_version: 2.15.0
 exclude: ^LICENSES/|\.(html|csv|svg)$
-# reserve "manual" for mypy and pyright
-default_stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg, post-checkout, post-commit, post-merge, post-rewrite]
+# reserve "manual" for relatively slow hooks which we still want to run in CI
+default_stages: [
+    commit,
+    merge-commit,
+    push,
+    prepare-commit-msg,
+    commit-msg,
+    post-checkout,
+    post-commit,
+    post-merge,
+    post-rewrite
+]
 ci:
     autofix_prs: false
 repos:
@@ -34,9 +44,11 @@ repos:
     -   id: debug-statements
     -   id: end-of-file-fixer
         exclude: \.txt$
-        stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg, post-checkout, post-commit, post-merge, post-rewrite]
+        stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg,
+                 post-checkout, post-commit, post-merge, post-rewrite]
     -   id: trailing-whitespace
-        stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg, post-checkout, post-commit, post-merge, post-rewrite]
+        stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg,
+                 post-checkout, post-commit, post-merge, post-rewrite]
 -   repo: https://github.com/cpplint/cpplint
     rev: 1.6.1
     hooks:
@@ -46,7 +58,13 @@ repos:
         # this particular codebase (e.g. src/headers, src/klib). However,
         # we can lint all header files since they aren't "generated" like C files are.
         exclude: ^pandas/_libs/src/(klib|headers)/
-        args: [--quiet, '--extensions=c,h', '--headers=h', --recursive, '--filter=-readability/casting,-runtime/int,-build/include_subdir']
+        args: [
+            --quiet,
+            '--extensions=c,h',
+            '--headers=h',
+            --recursive,
+            '--filter=-readability/casting,-runtime/int,-build/include_subdir'
+        ]
 -   repo: https://github.com/PyCQA/flake8
     rev: 6.0.0
     hooks:
@@ -107,6 +125,7 @@ repos:
     hooks:
     -   id: yesqa
         additional_dependencies: *flake8_dependencies
+        stages: [manual]
 -   repo: local
     hooks:
     # NOTE: we make `black` a local hook because if it's installed from
@@ -214,7 +233,6 @@ repos:
         exclude: ^pandas/tests/extension/base/base\.py
     -   id: pip-to-conda
         name: Generate pip dependency from conda
-        description: This hook checks if the conda environment.yml and requirements-dev.txt are equal
         language: python
         entry: python scripts/generate_pip_deps_from_conda.py
         files: ^(environment.yml|requirements-dev.txt)$
@@ -311,6 +329,7 @@ repos:
         files: ^pandas
         exclude: ^(pandas/tests|pandas/_version.py|pandas/io/clipboard)
         language: python
+        stages: [manual]
         additional_dependencies:
         - autotyping==22.9.0
         - libcst==0.4.7
@@ -33,7 +33,7 @@ dependencies:
   - gcsfs
   - jinja2
   - lxml
-  - matplotlib
+  - matplotlib>=3.6.1
   - numba
   - numexpr
   - openpyxl
 
@@ -33,7 +33,7 @@ dependencies:
   - gcsfs
   - jinja2
   - lxml
-  - matplotlib
+  - matplotlib>=3.6.1
   - numba
   - numexpr
   - openpyxl
 
@@ -33,7 +33,7 @@ dependencies:
   - gcsfs
   - jinja2
   - lxml
-  - matplotlib
+  - matplotlib>=3.6.1
   - numba
   - numexpr
   - openpyxl
 
@@ -2312,6 +2312,7 @@ useful if you are reading in data which is mostly of the desired dtype (e.g. num
 non-conforming elements intermixed that you want to represent as missing:
 
 .. ipython:: python
+   :okwarning:
 
     import datetime
 
@@ -2328,6 +2329,7 @@ The ``errors`` parameter has a third option of ``errors='ignore'``, which will s
 encounters any errors with the conversion to a desired data type:
 
 .. ipython:: python
+    :okwarning:
 
     import datetime
 
 
@@ -968,17 +968,7 @@ To parse the mixed-timezone values as a datetime column, pass a partially-applie
 Inferring datetime format
 +++++++++++++++++++++++++
 
-If you have ``parse_dates`` enabled for some or all of your columns, and your
-datetime strings are all formatted the same way, you may get a large speed
-up by setting ``infer_datetime_format=True``.  If set, pandas will attempt
-to guess the format of your datetime strings, and then use a faster means
-of parsing the strings.  5-10x parsing speeds have been observed.  pandas
-will fallback to the usual parsing if either the format cannot be guessed
-or the format that was guessed cannot properly parse the entire column
-of strings.  So in general, ``infer_datetime_format`` should not have any
-negative consequences if enabled.
-
-Here are some examples of datetime strings that can be guessed (All
+Here are some examples of datetime strings that can be guessed (all
 representing December 30th, 2011 at 00:00:00):
 
 * "20111230"
@@ -988,21 +978,36 @@ representing December 30th, 2011 at 00:00:00):
 * "30/Dec/2011 00:00:00"
 * "30/December/2011 00:00:00"
 
-Note that ``infer_datetime_format`` is sensitive to ``dayfirst``.  With
+Note that format inference is sensitive to ``dayfirst``.  With
 ``dayfirst=True``, it will guess "01/12/2011" to be December 1st. With
 ``dayfirst=False`` (default) it will guess "01/12/2011" to be January 12th.
 
+If you try to parse a column of date strings, pandas will attempt to guess the format
+from the first non-NaN element, and will then parse the rest of the column with that
+format. If pandas fails to guess the format (for example if your first string is
+``'01 December US/Pacific 2000'``), then a warning will be raised and each
+row will be parsed individually by ``dateutil.parser.parse``. The safest
+way to parse dates is to explicitly set ``format=``.
+
 .. ipython:: python
 
-   # Try to infer the format for the index column
    df = pd.read_csv(
        "foo.csv",
        index_col=0,
        parse_dates=True,
-       infer_datetime_format=True,
    )
    df
 
+In the case that you have mixed datetime formats within the same column, you'll need to
+first read it in as an object dtype and then apply :func:`to_datetime` to each element.
+
+.. ipython:: python
+
+   data = io.StringIO("date\n12 Jan 2000\n2000-01-13\n")
+   df = pd.read_csv(data)
+   df['date'] = df['date'].apply(pd.to_datetime)
+   df
+
 .. ipython:: python
    :suppress:
 
 
@@ -132,6 +132,8 @@ time.
 
 .. ipython:: python
 
+   import datetime
+
    pd.Timestamp(datetime.datetime(2012, 5, 1))
    pd.Timestamp("2012-05-01")
    pd.Timestamp(2012, 5, 1)
@@ -196,26 +198,25 @@ is converted to a ``DatetimeIndex``:
 
 .. ipython:: python
 
-    pd.to_datetime(pd.Series(["Jul 31, 2009", "2010-01-10", None]))
+    pd.to_datetime(pd.Series(["Jul 31, 2009", "Jan 10, 2010", None]))
 
-    pd.to_datetime(["2005/11/23", "2010.12.31"])
+    pd.to_datetime(["2005/11/23", "2010/12/31"])
 
 If you use dates which start with the day first (i.e. European style),
 you can pass the ``dayfirst`` flag:
 
 .. ipython:: python
-   :okwarning:
+    :okwarning:
 
     pd.to_datetime(["04-01-2012 10:00"], dayfirst=True)
 
-    pd.to_datetime(["14-01-2012", "01-14-2012"], dayfirst=True)
+    pd.to_datetime(["04-14-2012 10:00"], dayfirst=True)
 
 .. warning::
 
    You see in the above example that ``dayfirst`` isn't strict. If a date
    can't be parsed with the day being first it will be parsed as if
-   ``dayfirst`` were False, and in the case of parsing delimited date strings
-   (e.g. ``31-12-2012``) then a warning will also be raised.
+   ``dayfirst`` were ``False`` and a warning will also be raised.
 
 If you pass a single string to ``to_datetime``, it returns a single ``Timestamp``.
 ``Timestamp`` can also accept string input, but it doesn't accept string parsing
 
@@ -417,6 +417,38 @@ Optional libraries below the lowest tested version may still work, but are not c
 
 See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more.
 
+Datetimes are now parsed with a consistent format
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In the past, :func:`to_datetime` guessed the format for each element independently. This was appropriate for some cases where elements had mixed date formats - however, it would regularly cause problems when users expected a consistent format but the function would switch formats between elements. As of version 2.0.0, parsing will use a consistent format, determined by the first non-NA value (unless the user specifies a format, in which case that is used).
+
+*Old behavior*:
+
+  .. code-block:: ipython
+
+     In [1]: ser = pd.Series(['13-01-2000', '12-01-2000'])
+     In [2]: pd.to_datetime(ser)
+     Out[2]:
+     0   2000-01-13
+     1   2000-12-01
+     dtype: datetime64[ns]
+
+*New behavior*:
+
+  .. ipython:: python
+    :okwarning:
+
+     ser = pd.Series(['13-01-2000', '12-01-2000'])
+     pd.to_datetime(ser)
+
+Note that this affects :func:`read_csv` as well.
+
+If you still need to parse dates with inconsistent formats, you'll need to apply :func:`to_datetime`
+to each element individually, e.g. ::
+
+     ser = pd.Series(['13-01-2000', '12 January 2000'])
+     ser.apply(pd.to_datetime)
+
 .. _whatsnew_200.api_breaking.other:
 
 Other API changes
@@ -459,7 +491,7 @@ Other API changes
 
 Deprecations
 ~~~~~~~~~~~~
--
+- Deprecated argument ``infer_datetime_format`` in :func:`to_datetime` and :func:`read_csv`, as a strict version of it is now the default (:issue:`48621`)
 
 .. ---------------------------------------------------------------------------
 
@@ -834,6 +866,7 @@ I/O
 - Improved error message in :func:`read_excel` by including the offending sheet name when an exception is raised while reading a file (:issue:`48706`)
 - Bug when a pickling a subset PyArrow-backed data that would serialize the entire data instead of the subset (:issue:`42600`)
 - Bug in :func:`read_csv` for a single-line csv with fewer columns than ``names`` raised :class:`.errors.ParserError` with ``engine="c"`` (:issue:`47566`)
+- Bug in displaying ``string`` dtypes not showing storage option (:issue:`50099`)
 - Bug in :func:`DataFrame.to_string` with ``header=False`` that printed the index name on the same line as the first row of the data (:issue:`49230`)
 - Fixed memory leak which stemmed from the initialization of the internal JSON module (:issue:`49222`)
 - Fixed issue where :func:`json_normalize` would incorrectly remove leading characters from column names that matched the ``sep`` argument (:issue:`49861`)
 
@@ -300,6 +300,10 @@ def array_with_unit_to_datetime(
             iresult = values.astype("i8", copy=False)
             # fill missing values by comparing to NPY_NAT
             mask = iresult == NPY_NAT
+            # Trying to Convert NaN to integer results in undefined
+            # behaviour, so handle it explicitly (see GH #48705)
+            if values.dtype.kind == "f":
+                mask |= values != values
             iresult[mask] = 0
             fvalues = iresult.astype("f8") * mult
             need_to_iterate = False
 
@@ -1032,6 +1032,7 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None:
     # rebuild string, capturing any inferred padding
     dt_str = "".join(tokens)
     if parsed_datetime.strftime(guessed_format) == dt_str:
+        _maybe_warn_about_dayfirst(guessed_format, dayfirst)
         return guessed_format
     else:
         return None
@@ -1051,6 +1052,28 @@ cdef str _fill_token(token: str, padding: int):
         token_filled = f"{seconds}.{nanoseconds}"
     return token_filled
 
+cdef void _maybe_warn_about_dayfirst(format: str, bint dayfirst):
+    """Warn if guessed datetime format doesn't respect dayfirst argument."""
+    cdef:
+        int day_index = format.find("%d")
+        int month_index = format.find("%m")
+
+    if (day_index != -1) and (month_index != -1):
+        if (day_index > month_index) and dayfirst:
+            warnings.warn(
+                f"Parsing dates in {format} format when dayfirst=True was specified. "
+                "Pass `dayfirst=False` or specify a format to silence this warning.",
+                UserWarning,
+                stacklevel=find_stack_level(),
+            )
+        if (day_index < month_index) and not dayfirst:
+            warnings.warn(
+                f"Parsing dates in {format} format when dayfirst=False was specified. "
+                "Pass `dayfirst=True` or specify a format to silence this warning.",
+                UserWarning,
+                stacklevel=find_stack_level(),
+            )
+
 
 @cython.wraparound(False)
 @cython.boundscheck(False)