From d1109361431e30b378cd1497b6b52ff1fbe1bf47 Mon Sep 17 00:00:00 2001 From: rxxg <8394766+rxxg@users.noreply.github.com> Date: Wed, 5 Aug 2020 14:32:50 +0200 Subject: [PATCH] Ensure resource closure in all exceptional circumstances during construction (#35566) --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/io/sas/sas7bdat.py | 8 ++++++-- pandas/io/sas/sas_xport.py | 6 +++++- pandas/io/sas/sasreader.py | 10 +++++----- pandas/tests/io/sas/data/corrupt.sas7bdat | Bin 0 -> 292 bytes pandas/tests/io/sas/test_sas7bdat.py | 8 ++++++++ 6 files changed, 25 insertions(+), 8 deletions(-) create mode 100644 pandas/tests/io/sas/data/corrupt.sas7bdat diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 8b28a4439e1da..bd4ee9b46db67 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -289,6 +289,7 @@ MultiIndex I/O ^^^ +- :func:`read_sas` no longer leaks resources on failure (:issue:`35566`) - Bug in :meth:`to_csv` caused a ``ValueError`` when it was called with a filename in combination with ``mode`` containing a ``b`` (:issue:`35058`) - In :meth:`read_csv` `float_precision='round_trip'` now handles `decimal` and `thousands` parameters (:issue:`35365`) - :meth:`to_pickle` and :meth:`read_pickle` were closing user-provided file objects (:issue:`35679`) diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index 76dac39d1889f..f2ee642d8fd42 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -142,8 +142,12 @@ def __init__( self._path_or_buf = open(self._path_or_buf, "rb") self.handle = self._path_or_buf - self._get_properties() - self._parse_metadata() + try: + self._get_properties() + self._parse_metadata() + except Exception: + self.close() + raise def column_data_lengths(self): """Return a numpy int64 array of the column data lengths""" diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py index 1a4ba544f5d59..9727ec930119b 100644 --- a/pandas/io/sas/sas_xport.py +++ b/pandas/io/sas/sas_xport.py @@ -264,7 +264,11 @@ def __init__( # should already be opened in binary mode in Python 3. self.filepath_or_buffer = filepath_or_buffer - self._read_header() + try: + self._read_header() + except Exception: + self.close() + raise def close(self): self.filepath_or_buffer.close() diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index ae9457a8e3147..31d1a6ad471ea 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -136,8 +136,8 @@ def read_sas( if iterator or chunksize: return reader - data = reader.read() - - if ioargs.should_close: - reader.close() - return data + try: + return reader.read() + finally: + if ioargs.should_close: + reader.close() diff --git a/pandas/tests/io/sas/data/corrupt.sas7bdat b/pandas/tests/io/sas/data/corrupt.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..2941ffe3ecdf5c72773d9727c689b90dced8ebdf GIT binary patch literal 292 zcmZQzK!8K98WT2)2%g_NiGzXjxM7ckynvvR5`(cZBa;yeTp2SXinuYOvN3}l1A_oF zBTPxKW3Ymor;n?%e^5|pK!^e^08@{Pc5w`G1nC9I$L;E@kE^@m2&2Jz6!tj4Xce&S gj10_R0SIBKXJBGr=xY{XW)dG96lQ3KBu5Gp0N4g1L;wH) literal 0 HcmV?d00001 diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 8c14f9de9f61c..9de6ca75fd4d9 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -217,6 +217,14 @@ def test_zero_variables(datapath): pd.read_sas(fname) +def test_corrupt_read(datapath): + # We don't really care about the exact failure, the important thing is + # that the resource should be cleaned up afterwards (BUG #35566) + fname = datapath("io", "sas", "data", "corrupt.sas7bdat") + with pytest.raises(AttributeError): + pd.read_sas(fname) + + def round_datetime_to_ms(ts): if isinstance(ts, datetime): return ts.replace(microsecond=int(round(ts.microsecond, -3) / 1000) * 1000)