From 912668ecb29b08b15f4b347104d783845cf04443 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 23 Oct 2016 16:05:13 -0400 Subject: [PATCH 1/2] API: Rename CParserError to ParserError. Partially resolves gh-12665. We will remove CParserError in the future. --- doc/source/io.rst | 4 ++-- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/io/common.py | 8 +++++--- pandas/io/parsers.py | 4 ++-- pandas/io/tests/parser/common.py | 2 +- pandas/io/tests/parser/test_textreader.py | 6 +++--- pandas/io/tests/parser/test_unsupported.py | 10 +++++----- pandas/io/tests/test_common.py | 12 ++++++++++++ pandas/io/tests/test_html.py | 4 ++-- pandas/parser.pyx | 18 +++++++++++------- pandas/tests/frame/test_to_csv.py | 4 ++-- 11 files changed, 46 insertions(+), 27 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index ba1bd328d2991..ee319092c6dd5 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1165,8 +1165,8 @@ too many will cause an error by default: In [28]: pd.read_csv(StringIO(data)) --------------------------------------------------------------------------- - CParserError Traceback (most recent call last) - CParserError: Error tokenizing data. C error: Expected 3 fields in line 3, saw 4 + ParserError Traceback (most recent call last) + ParserError: Error tokenizing data. C error: Expected 3 fields in line 3, saw 4 You can elect to skip bad lines: diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 8819a95f27b0d..9c9d929222bdd 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -41,6 +41,7 @@ Backwards incompatible API changes .. _whatsnew_0200.api: +- ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv`` and will be removed in the future (:issue:`12665`) diff --git a/pandas/io/common.py b/pandas/io/common.py index 127ebc4839fd3..7076d5a62b626 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -65,13 +65,15 @@ def urlopen(*args, **kwargs): _VALID_URLS.discard('') -class CParserError(ValueError): +class ParserError(ValueError): """ - Exception that is thrown by the C engine when it encounters - a parsing error in `pd.read_csv` + Exception that is thrown by an error is encountered in `pd.read_csv` """ pass +# gh-12665: Alias for now and remove later. +CParserError = ParserError + class DtypeWarning(Warning): """ diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 090a21632cddb..092cba093421a 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -26,7 +26,7 @@ from pandas.io.date_converters import generic_parser from pandas.io.common import (get_filepath_or_buffer, _validate_header_arg, _get_handle, UnicodeReader, UTF8Recoder, - BaseIterator, CParserError, EmptyDataError, + BaseIterator, ParserError, EmptyDataError, ParserWarning, _NA_VALUES) from pandas.tseries import tools @@ -1141,7 +1141,7 @@ def tostr(x): # long for n in range(len(columns[0])): if all(['Unnamed' in tostr(c[n]) for c in columns]): - raise CParserError( + raise ParserError( "Passed header=[%s] are too many rows for this " "multi_index of columns" % ','.join([str(x) for x in self.header]) diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py index 397292ec6d036..4cb00c48976a4 100644 --- a/pandas/io/tests/parser/common.py +++ b/pandas/io/tests/parser/common.py @@ -50,7 +50,7 @@ def test_bad_stream_exception(self): # Issue 13652: # This test validates that both python engine # and C engine will raise UnicodeDecodeError instead of - # c engine raising CParserError and swallowing exception + # c engine raising ParserError and swallowing exception # that caused read to fail. handle = open(self.csv_shiftjs, "rb") codec = codecs.lookup("utf-8") diff --git a/pandas/io/tests/parser/test_textreader.py b/pandas/io/tests/parser/test_textreader.py index 7dda9eb9d0af4..49b70fc5e8703 100644 --- a/pandas/io/tests/parser/test_textreader.py +++ b/pandas/io/tests/parser/test_textreader.py @@ -154,7 +154,7 @@ def test_skip_bad_lines(self): reader = TextReader(StringIO(data), delimiter=':', header=None) - self.assertRaises(parser.CParserError, reader.read) + self.assertRaises(parser.ParserError, reader.read) reader = TextReader(StringIO(data), delimiter=':', header=None, @@ -197,7 +197,7 @@ def test_header_not_enough_lines(self): assert_array_dicts_equal(expected, recs) # not enough rows - self.assertRaises(parser.CParserError, TextReader, StringIO(data), + self.assertRaises(parser.ParserError, TextReader, StringIO(data), delimiter=',', header=5, as_recarray=True) def test_header_not_enough_lines_as_recarray(self): @@ -218,7 +218,7 @@ def test_header_not_enough_lines_as_recarray(self): assert_array_dicts_equal(expected, recs) # not enough rows - self.assertRaises(parser.CParserError, TextReader, StringIO(data), + self.assertRaises(parser.ParserError, TextReader, StringIO(data), delimiter=',', header=5, as_recarray=True) def test_escapechar(self): diff --git a/pandas/io/tests/parser/test_unsupported.py b/pandas/io/tests/parser/test_unsupported.py index 2fc238acd54e3..5d60c20854a83 100644 --- a/pandas/io/tests/parser/test_unsupported.py +++ b/pandas/io/tests/parser/test_unsupported.py @@ -15,7 +15,7 @@ import pandas.util.testing as tm from pandas.compat import StringIO -from pandas.io.common import CParserError +from pandas.io.common import ParserError from pandas.io.parsers import read_csv, read_table @@ -78,10 +78,10 @@ def test_c_engine(self): x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838""" msg = 'Error tokenizing data' - with tm.assertRaisesRegexp(CParserError, msg): - read_table(StringIO(text), sep=r'\s+') - with tm.assertRaisesRegexp(CParserError, msg): - read_table(StringIO(text), engine='c', sep=r'\s+') + with tm.assertRaisesRegexp(ParserError, msg): + read_table(StringIO(text), sep='\s+') + with tm.assertRaisesRegexp(ParserError, msg): + read_table(StringIO(text), engine='c', sep='\s+') msg = "Only length-1 thousands markers supported" data = """A|B|C diff --git a/pandas/io/tests/test_common.py b/pandas/io/tests/test_common.py index c08d235b07c9e..5a426e74a0b63 100644 --- a/pandas/io/tests/test_common.py +++ b/pandas/io/tests/test_common.py @@ -88,6 +88,18 @@ def test_iterator(self): tm.assert_frame_equal(first, expected.iloc[[0]]) tm.assert_frame_equal(concat(it), expected.iloc[1:]) + def test_error_rename(self): + # see gh-12665 + try: + raise common.CParserError() + except common.ParserError: + pass + + try: + raise common.ParserError() + except common.CParserError: + pass + class TestMMapWrapper(tm.TestCase): diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py index c202c60f5213d..f4eec864da572 100644 --- a/pandas/io/tests/test_html.py +++ b/pandas/io/tests/test_html.py @@ -23,7 +23,7 @@ is_platform_windows) from pandas.io.common import URLError, urlopen, file_path_to_url from pandas.io.html import read_html -from pandas.parser import CParserError +from pandas.parser import ParserError import pandas.util.testing as tm from pandas.util.testing import makeCustomDataframe as mkdf, network @@ -652,7 +652,7 @@ def test_parse_dates_combine(self): def test_computer_sales_page(self): data = os.path.join(DATA_PATH, 'computer_sales_page.html') - with tm.assertRaisesRegexp(CParserError, r"Passed header=\[0,1\] are " + with tm.assertRaisesRegexp(ParserError, r"Passed header=\[0,1\] are " "too many rows for this multi_index " "of columns"): self.read_html(data, header=[0, 1]) diff --git a/pandas/parser.pyx b/pandas/parser.pyx index 93a494c176b99..81b59de5afd06 100644 --- a/pandas/parser.pyx +++ b/pandas/parser.pyx @@ -13,8 +13,12 @@ from cpython cimport (PyObject, PyBytes_FromString, PyUnicode_Check, PyUnicode_AsUTF8String, PyErr_Occurred, PyErr_Fetch) from cpython.ref cimport PyObject, Py_XDECREF -from io.common import CParserError, DtypeWarning, EmptyDataError +from io.common import ParserError, DtypeWarning, EmptyDataError +# XXX: Import CParserError as alias of ParserError for +# backwards compatibility. Ultimately, we want to remove +# this import. +from io.common import CParserError cdef extern from "Python.h": object PyUnicode_FromString(char *v) @@ -719,7 +723,7 @@ cdef class TextReader: if isinstance(msg, list): msg = "[%s], len of %d," % ( ','.join([ str(m) for m in msg ]), len(msg)) - raise CParserError( + raise ParserError( 'Passed header=%s but only %d lines in file' % (msg, self.parser.lines)) @@ -812,7 +816,7 @@ cdef class TextReader: passed_count = len(header[0]) # if passed_count > field_count: - # raise CParserError('Column names have %d fields, ' + # raise ParserError('Column names have %d fields, ' # 'data has %d fields' # % (passed_count, field_count)) @@ -1004,7 +1008,7 @@ cdef class TextReader: (num_cols >= self.parser.line_fields[i]) * num_cols if self.table_width - self.leading_cols > num_cols: - raise CParserError( + raise ParserError( "Too many columns specified: expected %s and found %s" % (self.table_width - self.leading_cols, num_cols)) @@ -1059,7 +1063,7 @@ cdef class TextReader: self.use_unsigned) if col_res is None: - raise CParserError('Unable to parse column %d' % i) + raise ParserError('Unable to parse column %d' % i) results[i] = col_res @@ -1310,7 +1314,7 @@ def _is_file_like(obj): if PY3: import io if isinstance(obj, io.TextIOWrapper): - raise CParserError('Cannot handle open unicode files (yet)') + raise ParserError('Cannot handle open unicode files (yet)') # BufferedReader is a byte reader for Python 3 file = io.BufferedReader @@ -2015,7 +2019,7 @@ cdef raise_parser_error(object base, parser_t *parser): else: message += 'no error message set' - raise CParserError(message) + raise ParserError(message) def _concatenate_chunks(list chunks): diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 6d09378ca864e..4d6a5bb32038d 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -8,7 +8,7 @@ import numpy as np from pandas.compat import (lmap, range, lrange, StringIO, u) -from pandas.parser import CParserError +from pandas.parser import ParserError from pandas import (DataFrame, Index, Series, MultiIndex, Timestamp, date_range, read_csv, compat, to_datetime) import pandas as pd @@ -589,7 +589,7 @@ def _make_frame(names=None): for i in [5, 6, 7]: msg = 'len of {i}, but only 5 lines in file'.format(i=i) - with assertRaisesRegexp(CParserError, msg): + with assertRaisesRegexp(ParserError, msg): read_csv(path, tupleize_cols=False, header=lrange(i), index_col=0) From dceed5cc026f77ba2f11c98e80ee3198b44bf936 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 18 Nov 2016 11:14:20 +0100 Subject: [PATCH 2/2] add test for parser.CParserError --- pandas/io/tests/test_common.py | 6 ++++++ pandas/parser.pyx | 5 ++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/io/tests/test_common.py b/pandas/io/tests/test_common.py index 5a426e74a0b63..3c980cae3351a 100644 --- a/pandas/io/tests/test_common.py +++ b/pandas/io/tests/test_common.py @@ -11,6 +11,7 @@ from pandas.compat import is_platform_windows, StringIO from pandas import read_csv, concat +import pandas as pd try: from pathlib import Path @@ -100,6 +101,11 @@ def test_error_rename(self): except common.CParserError: pass + try: + raise common.ParserError() + except pd.parser.CParserError: + pass + class TestMMapWrapper(tm.TestCase): diff --git a/pandas/parser.pyx b/pandas/parser.pyx index 81b59de5afd06..9fb99637731be 100644 --- a/pandas/parser.pyx +++ b/pandas/parser.pyx @@ -15,9 +15,8 @@ from cpython cimport (PyObject, PyBytes_FromString, from cpython.ref cimport PyObject, Py_XDECREF from io.common import ParserError, DtypeWarning, EmptyDataError -# XXX: Import CParserError as alias of ParserError for -# backwards compatibility. Ultimately, we want to remove -# this import. +# Import CParserError as alias of ParserError for backwards compatibility. +# Ultimately, we want to remove this import. See gh-12665 and gh-14479. from io.common import CParserError cdef extern from "Python.h":