From 912668ecb29b08b15f4b347104d783845cf04443 Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Sun, 23 Oct 2016 16:05:13 -0400
Subject: [PATCH 1/2] API: Rename CParserError to ParserError.

Partially resolves gh-12665. We will remove
CParserError in the future.
---
 doc/source/io.rst                          |  4 ++--
 doc/source/whatsnew/v0.20.0.txt            |  1 +
 pandas/io/common.py                        |  8 +++++---
 pandas/io/parsers.py                       |  4 ++--
 pandas/io/tests/parser/common.py           |  2 +-
 pandas/io/tests/parser/test_textreader.py  |  6 +++---
 pandas/io/tests/parser/test_unsupported.py | 10 +++++-----
 pandas/io/tests/test_common.py             | 12 ++++++++++++
 pandas/io/tests/test_html.py               |  4 ++--
 pandas/parser.pyx                          | 18 +++++++++++-------
 pandas/tests/frame/test_to_csv.py          |  4 ++--
 11 files changed, 46 insertions(+), 27 deletions(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index ba1bd328d2991..ee319092c6dd5 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -1165,8 +1165,8 @@ too many will cause an error by default:
 
     In [28]: pd.read_csv(StringIO(data))
     ---------------------------------------------------------------------------
-    CParserError                              Traceback (most recent call last)
-    CParserError: Error tokenizing data. C error: Expected 3 fields in line 3, saw 4
+    ParserError                              Traceback (most recent call last)
+    ParserError: Error tokenizing data. C error: Expected 3 fields in line 3, saw 4
 
 You can elect to skip bad lines:
 
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index 8819a95f27b0d..9c9d929222bdd 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -41,6 +41,7 @@ Backwards incompatible API changes
 .. _whatsnew_0200.api:
 
 
+- ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv`` and will be removed in the future (:issue:`12665`)
 
 
 
diff --git a/pandas/io/common.py b/pandas/io/common.py
index 127ebc4839fd3..7076d5a62b626 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -65,13 +65,15 @@ def urlopen(*args, **kwargs):
 _VALID_URLS.discard('')
 
 
-class CParserError(ValueError):
+class ParserError(ValueError):
     """
-    Exception that is thrown by the C engine when it encounters
-    a parsing error in `pd.read_csv`
+    Exception that is thrown by an error is encountered in `pd.read_csv`
     """
     pass
 
+# gh-12665: Alias for now and remove later.
+CParserError = ParserError
+
 
 class DtypeWarning(Warning):
     """
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 090a21632cddb..092cba093421a 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -26,7 +26,7 @@
 from pandas.io.date_converters import generic_parser
 from pandas.io.common import (get_filepath_or_buffer, _validate_header_arg,
                               _get_handle, UnicodeReader, UTF8Recoder,
-                              BaseIterator, CParserError, EmptyDataError,
+                              BaseIterator, ParserError, EmptyDataError,
                               ParserWarning, _NA_VALUES)
 from pandas.tseries import tools
 
@@ -1141,7 +1141,7 @@ def tostr(x):
         # long
         for n in range(len(columns[0])):
             if all(['Unnamed' in tostr(c[n]) for c in columns]):
-                raise CParserError(
+                raise ParserError(
                     "Passed header=[%s] are too many rows for this "
                     "multi_index of columns"
                     % ','.join([str(x) for x in self.header])
diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py
index 397292ec6d036..4cb00c48976a4 100644
--- a/pandas/io/tests/parser/common.py
+++ b/pandas/io/tests/parser/common.py
@@ -50,7 +50,7 @@ def test_bad_stream_exception(self):
         # Issue 13652:
         # This test validates that both python engine
         # and C engine will raise UnicodeDecodeError instead of
-        # c engine raising CParserError and swallowing exception
+        # c engine raising ParserError and swallowing exception
         # that caused read to fail.
         handle = open(self.csv_shiftjs, "rb")
         codec = codecs.lookup("utf-8")
diff --git a/pandas/io/tests/parser/test_textreader.py b/pandas/io/tests/parser/test_textreader.py
index 7dda9eb9d0af4..49b70fc5e8703 100644
--- a/pandas/io/tests/parser/test_textreader.py
+++ b/pandas/io/tests/parser/test_textreader.py
@@ -154,7 +154,7 @@ def test_skip_bad_lines(self):
 
         reader = TextReader(StringIO(data), delimiter=':',
                             header=None)
-        self.assertRaises(parser.CParserError, reader.read)
+        self.assertRaises(parser.ParserError, reader.read)
 
         reader = TextReader(StringIO(data), delimiter=':',
                             header=None,
@@ -197,7 +197,7 @@ def test_header_not_enough_lines(self):
         assert_array_dicts_equal(expected, recs)
 
         # not enough rows
-        self.assertRaises(parser.CParserError, TextReader, StringIO(data),
+        self.assertRaises(parser.ParserError, TextReader, StringIO(data),
                           delimiter=',', header=5, as_recarray=True)
 
     def test_header_not_enough_lines_as_recarray(self):
@@ -218,7 +218,7 @@ def test_header_not_enough_lines_as_recarray(self):
         assert_array_dicts_equal(expected, recs)
 
         # not enough rows
-        self.assertRaises(parser.CParserError, TextReader, StringIO(data),
+        self.assertRaises(parser.ParserError, TextReader, StringIO(data),
                           delimiter=',', header=5, as_recarray=True)
 
     def test_escapechar(self):
diff --git a/pandas/io/tests/parser/test_unsupported.py b/pandas/io/tests/parser/test_unsupported.py
index 2fc238acd54e3..5d60c20854a83 100644
--- a/pandas/io/tests/parser/test_unsupported.py
+++ b/pandas/io/tests/parser/test_unsupported.py
@@ -15,7 +15,7 @@
 import pandas.util.testing as tm
 
 from pandas.compat import StringIO
-from pandas.io.common import CParserError
+from pandas.io.common import ParserError
 from pandas.io.parsers import read_csv, read_table
 
 
@@ -78,10 +78,10 @@ def test_c_engine(self):
 x   q   30      3    -0.6662 -0.5243 -0.3580  0.89145  2.5838"""
         msg = 'Error tokenizing data'
 
-        with tm.assertRaisesRegexp(CParserError, msg):
-            read_table(StringIO(text), sep=r'\s+')
-        with tm.assertRaisesRegexp(CParserError, msg):
-            read_table(StringIO(text), engine='c', sep=r'\s+')
+        with tm.assertRaisesRegexp(ParserError, msg):
+            read_table(StringIO(text), sep='\s+')
+        with tm.assertRaisesRegexp(ParserError, msg):
+            read_table(StringIO(text), engine='c', sep='\s+')
 
         msg = "Only length-1 thousands markers supported"
         data = """A|B|C
diff --git a/pandas/io/tests/test_common.py b/pandas/io/tests/test_common.py
index c08d235b07c9e..5a426e74a0b63 100644
--- a/pandas/io/tests/test_common.py
+++ b/pandas/io/tests/test_common.py
@@ -88,6 +88,18 @@ def test_iterator(self):
         tm.assert_frame_equal(first, expected.iloc[[0]])
         tm.assert_frame_equal(concat(it), expected.iloc[1:])
 
+    def test_error_rename(self):
+        # see gh-12665
+        try:
+            raise common.CParserError()
+        except common.ParserError:
+            pass
+
+        try:
+            raise common.ParserError()
+        except common.CParserError:
+            pass
+
 
 class TestMMapWrapper(tm.TestCase):
 
diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py
index c202c60f5213d..f4eec864da572 100644
--- a/pandas/io/tests/test_html.py
+++ b/pandas/io/tests/test_html.py
@@ -23,7 +23,7 @@
                            is_platform_windows)
 from pandas.io.common import URLError, urlopen, file_path_to_url
 from pandas.io.html import read_html
-from pandas.parser import CParserError
+from pandas.parser import ParserError
 
 import pandas.util.testing as tm
 from pandas.util.testing import makeCustomDataframe as mkdf, network
@@ -652,7 +652,7 @@ def test_parse_dates_combine(self):
 
     def test_computer_sales_page(self):
         data = os.path.join(DATA_PATH, 'computer_sales_page.html')
-        with tm.assertRaisesRegexp(CParserError, r"Passed header=\[0,1\] are "
+        with tm.assertRaisesRegexp(ParserError, r"Passed header=\[0,1\] are "
                                    "too many rows for this multi_index "
                                    "of columns"):
             self.read_html(data, header=[0, 1])
diff --git a/pandas/parser.pyx b/pandas/parser.pyx
index 93a494c176b99..81b59de5afd06 100644
--- a/pandas/parser.pyx
+++ b/pandas/parser.pyx
@@ -13,8 +13,12 @@ from cpython cimport (PyObject, PyBytes_FromString,
                       PyUnicode_Check, PyUnicode_AsUTF8String,
                       PyErr_Occurred, PyErr_Fetch)
 from cpython.ref cimport PyObject, Py_XDECREF
-from io.common import CParserError, DtypeWarning, EmptyDataError
+from io.common import ParserError, DtypeWarning, EmptyDataError
 
+# XXX: Import CParserError as alias of ParserError for
+# backwards compatibility. Ultimately, we want to remove
+# this import.
+from io.common import CParserError
 
 cdef extern from "Python.h":
     object PyUnicode_FromString(char *v)
@@ -719,7 +723,7 @@ cdef class TextReader:
                     if isinstance(msg, list):
                         msg = "[%s], len of %d," % (
                             ','.join([ str(m) for m in msg ]), len(msg))
-                    raise CParserError(
+                    raise ParserError(
                         'Passed header=%s but only %d lines in file'
                         % (msg, self.parser.lines))
 
@@ -812,7 +816,7 @@ cdef class TextReader:
             passed_count = len(header[0])
 
             # if passed_count > field_count:
-            #     raise CParserError('Column names have %d fields, '
+            #     raise ParserError('Column names have %d fields, '
             #                        'data has %d fields'
             #                        % (passed_count, field_count))
 
@@ -1004,7 +1008,7 @@ cdef class TextReader:
                 (num_cols >= self.parser.line_fields[i]) * num_cols
 
         if self.table_width - self.leading_cols > num_cols:
-            raise CParserError(
+            raise ParserError(
                 "Too many columns specified: expected %s and found %s" %
                 (self.table_width - self.leading_cols, num_cols))
 
@@ -1059,7 +1063,7 @@ cdef class TextReader:
                                              self.use_unsigned)
 
             if col_res is None:
-                raise CParserError('Unable to parse column %d' % i)
+                raise ParserError('Unable to parse column %d' % i)
 
             results[i] = col_res
 
@@ -1310,7 +1314,7 @@ def _is_file_like(obj):
     if PY3:
         import io
         if isinstance(obj, io.TextIOWrapper):
-            raise CParserError('Cannot handle open unicode files (yet)')
+            raise ParserError('Cannot handle open unicode files (yet)')
 
         # BufferedReader is a byte reader for Python 3
         file = io.BufferedReader
@@ -2015,7 +2019,7 @@ cdef raise_parser_error(object base, parser_t *parser):
     else:
         message += 'no error message set'
 
-    raise CParserError(message)
+    raise ParserError(message)
 
 
 def _concatenate_chunks(list chunks):
diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py
index 6d09378ca864e..4d6a5bb32038d 100644
--- a/pandas/tests/frame/test_to_csv.py
+++ b/pandas/tests/frame/test_to_csv.py
@@ -8,7 +8,7 @@
 import numpy as np
 
 from pandas.compat import (lmap, range, lrange, StringIO, u)
-from pandas.parser import CParserError
+from pandas.parser import ParserError
 from pandas import (DataFrame, Index, Series, MultiIndex, Timestamp,
                     date_range, read_csv, compat, to_datetime)
 import pandas as pd
@@ -589,7 +589,7 @@ def _make_frame(names=None):
 
             for i in [5, 6, 7]:
                 msg = 'len of {i}, but only 5 lines in file'.format(i=i)
-                with assertRaisesRegexp(CParserError, msg):
+                with assertRaisesRegexp(ParserError, msg):
                     read_csv(path, tupleize_cols=False,
                              header=lrange(i), index_col=0)
 

From dceed5cc026f77ba2f11c98e80ee3198b44bf936 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 18 Nov 2016 11:14:20 +0100
Subject: [PATCH 2/2] add test for parser.CParserError

---
 pandas/io/tests/test_common.py | 6 ++++++
 pandas/parser.pyx              | 5 ++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/pandas/io/tests/test_common.py b/pandas/io/tests/test_common.py
index 5a426e74a0b63..3c980cae3351a 100644
--- a/pandas/io/tests/test_common.py
+++ b/pandas/io/tests/test_common.py
@@ -11,6 +11,7 @@
 from pandas.compat import is_platform_windows, StringIO
 
 from pandas import read_csv, concat
+import pandas as pd
 
 try:
     from pathlib import Path
@@ -100,6 +101,11 @@ def test_error_rename(self):
         except common.CParserError:
             pass
 
+        try:
+            raise common.ParserError()
+        except pd.parser.CParserError:
+            pass
+
 
 class TestMMapWrapper(tm.TestCase):
 
diff --git a/pandas/parser.pyx b/pandas/parser.pyx
index 81b59de5afd06..9fb99637731be 100644
--- a/pandas/parser.pyx
+++ b/pandas/parser.pyx
@@ -15,9 +15,8 @@ from cpython cimport (PyObject, PyBytes_FromString,
 from cpython.ref cimport PyObject, Py_XDECREF
 from io.common import ParserError, DtypeWarning, EmptyDataError
 
-# XXX: Import CParserError as alias of ParserError for
-# backwards compatibility. Ultimately, we want to remove
-# this import.
+# Import CParserError as alias of ParserError for backwards compatibility.
+# Ultimately, we want to remove this import. See gh-12665 and gh-14479.
 from io.common import CParserError
 
 cdef extern from "Python.h":