@@ -65,8 +65,8 @@ class ParserWarning(Warning):
65
65
a list of integers that specify row locations for a multi-index on the
66
66
columns E.g. [0,1,3]. Intervening rows that are not specified will be
67
67
skipped (e.g. 2 in this example are skipped). Note that this parameter
68
- ignores commented lines, so header=0 denotes the first line of
69
- data rather than the first line of the file.
68
+ ignores commented lines and empty lines if ``skip_blank_lines=True``, so header=0
69
+ denotes the first line of data rather than the first line of the file.
70
70
skiprows : list-like or integer
71
71
Line numbers to skip (0-indexed) or number of lines to skip (int)
72
72
at the start of the file
@@ -110,10 +110,11 @@ class ParserWarning(Warning):
110
110
comment : str, default None
111
111
Indicates remainder of line should not be parsed. If found at the
112
112
beginning of a line, the line will be ignored altogether. This parameter
113
- must be a single character. Also, fully commented lines
114
- are ignored by the parameter `header` but not by `skiprows`. For example,
115
- if comment='#', parsing '#empty\n 1,2,3\n a,b,c' with `header=0` will
116
- result in '1,2,3' being treated as the header.
113
+ must be a single character. Like empty lines (as long as ``skip_blank_lines=True``),
114
+ fully commented lines are ignored by the parameter `header`
115
+ but not by `skiprows`. For example, if comment='#', parsing
116
+ '#empty\n 1,2,3\n a,b,c' with `header=0` will result in '1,2,3' being
117
+ treated as the header.
117
118
decimal : str, default '.'
118
119
Character to recognize as decimal point. E.g. use ',' for European data
119
120
nrows : int, default None
@@ -160,6 +161,8 @@ class ParserWarning(Warning):
160
161
infer_datetime_format : boolean, default False
161
162
If True and parse_dates is enabled for a column, attempt to infer
162
163
the datetime format to speed up the processing
164
+ skip_blank_lines : boolean, default True
165
+ If True, skip over blank lines rather than interpreting as NaN values
163
166
164
167
Returns
165
168
-------
@@ -288,6 +291,7 @@ def _read(filepath_or_buffer, kwds):
288
291
'mangle_dupe_cols' : True ,
289
292
'tupleize_cols' : False ,
290
293
'infer_datetime_format' : False ,
294
+ 'skip_blank_lines' : True
291
295
}
292
296
293
297
@@ -378,7 +382,8 @@ def parser_f(filepath_or_buffer,
378
382
squeeze = False ,
379
383
mangle_dupe_cols = True ,
380
384
tupleize_cols = False ,
381
- infer_datetime_format = False ):
385
+ infer_datetime_format = False ,
386
+ skip_blank_lines = True ):
382
387
383
388
# Alias sep -> delimiter.
384
389
if delimiter is None :
@@ -449,7 +454,8 @@ def parser_f(filepath_or_buffer,
449
454
buffer_lines = buffer_lines ,
450
455
mangle_dupe_cols = mangle_dupe_cols ,
451
456
tupleize_cols = tupleize_cols ,
452
- infer_datetime_format = infer_datetime_format )
457
+ infer_datetime_format = infer_datetime_format ,
458
+ skip_blank_lines = skip_blank_lines )
453
459
454
460
return _read (filepath_or_buffer , kwds )
455
461
@@ -1338,6 +1344,7 @@ def __init__(self, f, **kwds):
1338
1344
self .quoting = kwds ['quoting' ]
1339
1345
self .mangle_dupe_cols = kwds .get ('mangle_dupe_cols' , True )
1340
1346
self .usecols = kwds ['usecols' ]
1347
+ self .skip_blank_lines = kwds ['skip_blank_lines' ]
1341
1348
1342
1349
self .names_passed = kwds ['names' ] or None
1343
1350
@@ -1393,6 +1400,7 @@ def __init__(self, f, **kwds):
1393
1400
1394
1401
# needs to be cleaned/refactored
1395
1402
# multiple date column thing turning into a real spaghetti factory
1403
+
1396
1404
if not self ._has_complex_date_col :
1397
1405
(index_names ,
1398
1406
self .orig_names , self .columns ) = self ._get_index_name (self .columns )
@@ -1590,6 +1598,7 @@ def _infer_columns(self):
1590
1598
1591
1599
while self .line_pos <= hr :
1592
1600
line = self ._next_line ()
1601
+
1593
1602
unnamed_count = 0
1594
1603
this_columns = []
1595
1604
for i , c in enumerate (line ):
@@ -1727,25 +1736,35 @@ def _next_line(self):
1727
1736
line = self ._check_comments ([self .data [self .pos ]])[0 ]
1728
1737
self .pos += 1
1729
1738
# either uncommented or blank to begin with
1730
- if self ._empty (self .data [self .pos - 1 ]) or line :
1739
+ if not self .skip_blank_lines and (self ._empty (self .data [
1740
+ self .pos - 1 ]) or line ):
1731
1741
break
1742
+ elif self .skip_blank_lines :
1743
+ ret = self ._check_empty ([line ])
1744
+ if ret :
1745
+ line = ret [0 ]
1746
+ break
1732
1747
except IndexError :
1733
1748
raise StopIteration
1734
1749
else :
1735
1750
while self .pos in self .skiprows :
1736
- next (self .data )
1737
1751
self .pos += 1
1752
+ next (self .data )
1738
1753
1739
1754
while True :
1740
1755
orig_line = next (self .data )
1741
1756
line = self ._check_comments ([orig_line ])[0 ]
1742
1757
self .pos += 1
1743
- if self ._empty (orig_line ) or line :
1758
+ if not self .skip_blank_lines and ( self . _empty (orig_line ) or line ) :
1744
1759
break
1760
+ elif self .skip_blank_lines :
1761
+ ret = self ._check_empty ([line ])
1762
+ if ret :
1763
+ line = ret [0 ]
1764
+ break
1745
1765
1746
1766
self .line_pos += 1
1747
1767
self .buf .append (line )
1748
-
1749
1768
return line
1750
1769
1751
1770
def _check_comments (self , lines ):
@@ -1766,6 +1785,15 @@ def _check_comments(self, lines):
1766
1785
ret .append (rl )
1767
1786
return ret
1768
1787
1788
+ def _check_empty (self , lines ):
1789
+ ret = []
1790
+ for l in lines :
1791
+ # Remove empty lines and lines with only one whitespace value
1792
+ if len (l ) > 1 or len (l ) == 1 and (not isinstance (l [0 ],
1793
+ compat .string_types ) or l [0 ].strip ()):
1794
+ ret .append (l )
1795
+ return ret
1796
+
1769
1797
def _check_thousands (self , lines ):
1770
1798
if self .thousands is None :
1771
1799
return lines
@@ -1901,7 +1929,6 @@ def _get_lines(self, rows=None):
1901
1929
1902
1930
# already fetched some number
1903
1931
if rows is not None :
1904
-
1905
1932
# we already have the lines in the buffer
1906
1933
if len (self .buf ) >= rows :
1907
1934
new_rows , self .buf = self .buf [:rows ], self .buf [rows :]
@@ -1966,6 +1993,8 @@ def _get_lines(self, rows=None):
1966
1993
lines = lines [:- self .skip_footer ]
1967
1994
1968
1995
lines = self ._check_comments (lines )
1996
+ if self .skip_blank_lines :
1997
+ lines = self ._check_empty (lines )
1969
1998
return self ._check_thousands (lines )
1970
1999
1971
2000
0 commit comments