pandas-dev · jreback · Jun 16, 2021 · Dec 19, 2020 · Dec 19, 2020 · Dec 19, 2020
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -753,6 +753,7 @@ the end of each data line, confusing the parser. To explicitly disable the
 index column inference and discard the last column, pass ``index_col=False``:
 
 .. ipython:: python
+    :okwarning:
 
     data = "a,b,c\n4,apple,bat,\n8,orange,cow,"
     print(data)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -49,6 +49,7 @@ Other enhancements
 - Improve error message when ``usecols`` and ``names`` do not match for :func:`read_csv` and ``engine="c"`` (:issue:`29042`)
 - Improved consistency of error message when passing an invalid ``win_type`` argument in :class:`Window` (:issue:`15969`)
 - :func:`pandas.read_sql_query` now accepts a ``dtype`` argument to cast the columnar data from the SQL database based on user input (:issue:`10285`)
+- :func:`read_csv` now raising ``ParserWarning`` if length of header or given names does not match length of data when ``usecols`` is not specified (:issue:`21768`)
 - Improved integer type mapping from pandas to SQLAlchemy when using :meth:`DataFrame.to_sql` (:issue:`35076`)
 - :func:`to_numeric` now supports downcasting of nullable ``ExtensionDtype`` objects (:issue:`33013`)
 - :func:`pandas.read_excel` can now auto detect .xlsb files (:issue:`35416`)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -1844,6 +1844,28 @@ def _do_date_conversions(self, names, data):
 
         return names, data
 
+    def _check_data_length(self, columns: List[str], data: List[np.ndarray]):
+        """Checks if length of data is equal to length of column names. One set of
+        trailing commas is allowed.
+
+        Parameters
+        ----------
+        columns: list of column names
+        data: list of array-likes containing the data column-wise
+
+        """
+        if not self.index_col and len(columns) != len(data) and columns:
+            if len(columns) == len(data) - 1 and np.all(
+                (is_object_dtype(data[-1]) and data[-1] == "") | isna(data[-1])
+            ):
+                return
+            warnings.warn(
+                "Length of header or names does not match length of data. This leads "
+                "to a loss of data with index_col=False.",
+                ParserWarning,
+                stacklevel=6,
+            )
+
 
 class CParserWrapper(ParserBase):
     def __init__(self, src: FilePathOrBuffer, **kwds):
@@ -2128,6 +2150,8 @@ def read(self, nrows=None):
 
             # columns as list
             alldata = [x[1] for x in data]
+            if self.usecols is None:
+                self._check_data_length(names, alldata)
 
             data = {k: v for k, (i, v) in zip(names, data)}
 
@@ -2516,6 +2540,8 @@ def _exclude_implicit_index(self, alldata):
         if self._col_indices is not None and len(names) != len(self._col_indices):
             names = [names[i] for i in sorted(self._col_indices)]
 
+        self._check_data_length(names, alldata)
+
         return {name: alldata[i + offset] for i, name in enumerate(names)}, names
 
     # legacy

diff --git a/pandas/tests/io/parser/common/__init__.py b/pandas/tests/io/parser/common/__init__.py
diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py
@@ -11,7 +11,7 @@
 import pytest
 
 from pandas._libs.tslib import Timestamp
-from pandas.errors import EmptyDataError, ParserError
+from pandas.errors import EmptyDataError, ParserError, ParserWarning
 
 from pandas import DataFrame, Index, Series, compat
 import pandas._testing as tm
@@ -660,7 +660,8 @@ def test_no_header_two_extra_columns(all_parsers):
     ref = DataFrame([["foo", "bar", "baz"]], columns=column_names)
     stream = StringIO("foo,bar,baz,bam,blah")
     parser = all_parsers
-    df = parser.read_csv(stream, header=None, names=column_names, index_col=False)
+    with tm.assert_produces_warning(ParserWarning):
+        df = parser.read_csv(stream, header=None, names=column_names, index_col=False)
     tm.assert_frame_equal(df, ref)