diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 1cff74d41f686..d7955d7210ade 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -60,6 +60,8 @@ Performance Improvements Bug Fixes ~~~~~~~~~ +- Bug where read_hdf store.select modifies the passed columns list when + multi-indexed (:issue:`7212`) - Bug in ``Categorical`` repr with ``display.width`` of ``None`` in Python 3 (:issue:`10087`) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 458a245da6bdb..4cbc7aeaa3df7 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3453,6 +3453,10 @@ def get_blk_items(mgr, blocks): def process_axes(self, obj, columns=None): """ process axes filters """ + # make a copy to avoid side effects + if columns is not None: + columns = list(columns) + # make sure to include levels if we have them if columns is not None and self.is_multi_index: for n in self.levels: diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 6cfd569904097..7d9c3c051344f 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -4617,6 +4617,29 @@ def test_preserve_timedeltaindex_type(self): store['df'] = df assert_frame_equal(store['df'], df) + def test_colums_multiindex_modified(self): + # BUG: 7212 + # read_hdf store.select modified the passed columns parameters + # when multi-indexed. + + df = DataFrame(np.random.rand(4, 5), + index=list('abcd'), + columns=list('ABCDE')) + df.index.name = 'letters' + df = df.set_index(keys='E', append=True) + + data_columns = df.index.names+df.columns.tolist() + with ensure_clean_path(self.path) as path: + df.to_hdf(path, 'df', + mode='a', + append=True, + data_columns=data_columns, + index=False) + cols2load = list('BCD') + cols2load_original = list(cols2load) + df_loaded = read_hdf(path, 'df', columns=cols2load) + self.assertTrue(cols2load_original == cols2load) + def _test_sort(obj): if isinstance(obj, DataFrame):