diff --git a/doc/source/whatsnew/v0.15.2.txt b/doc/source/whatsnew/v0.15.2.txt index 58dc1da214c05..409ef8bee3662 100644 --- a/doc/source/whatsnew/v0.15.2.txt +++ b/doc/source/whatsnew/v0.15.2.txt @@ -164,6 +164,7 @@ Bug Fixes - Bug in ``merge`` where ``how='left'`` and ``sort=False`` would not preserve left frame order (:issue:`7331`) - Fix: The font size was only set on x axis if vertical or the y axis if horizontal. (:issue:`8765`) - Fixed division by 0 when reading big csv files in python 3 (:issue:`8621`) +- When using usecols csv reader only requires the minimum amount of columns instead of all (:issue:`8985`) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index b23aa017138e1..e858b4bf0ced4 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1918,9 +1918,17 @@ def _rows_to_cols(self, content): # Loop through rows to verify lengths are correct. if col_len != zip_len and self.index_col is not False: i = 0 + incorrect_row = False + if self.usecols: + col_len = max(self.usecols) + 1 + for (i, l) in enumerate(content): if len(l) != col_len: - break + if self.usecols and len(l) > col_len: + continue + else: + incorrect_row = True + break footers = 0 if self.skip_footer: @@ -1930,7 +1938,8 @@ def _rows_to_cols(self, content): msg = ('Expected %d fields in line %d, saw %d' % (col_len, row_num + 1, zip_len)) - raise ValueError(msg) + if incorrect_row: + raise ValueError(msg) if self.usecols: if self._implicit_index: diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index 2f211ab0381a2..e510b95bb094e 100644 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -2090,6 +2090,23 @@ def test_parse_integers_above_fp_precision(self): self.assertTrue(np.array_equal(result['Numbers'], expected['Numbers'])) + def test_use_cols_minumum_fields_in_line(self): + # issue #8985 + data = """ +19,29,39 +19,29,39 +10,20,30,40""" + df = pd.read_csv(StringIO(data), engine='python', + header=None, usecols=list(range(3))) + self.assertEqual(len(df.columns), 3) + + df = pd.read_csv(StringIO(data), engine='python', + header=None, usecols=list(range(2))) + self.assertEqual(len(df.columns), 2) + + self.assertRaises(ValueError, self.read_csv, StringIO(data), + engine='python', usecols=list(range(4)), header=None) + def test_usecols_index_col_conflict(self): # Issue 4201 Test that index_col as integer reflects usecols data = """SecId,Time,Price,P2,P3