diff --git a/doc/source/release.rst b/doc/source/release.rst index ae31f1e7fc495..8fddee8954689 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -107,6 +107,7 @@ API Changes or numbering columns as needed (:issue:`2385`) - Slicing and advanced/boolean indexing operations on ``Index`` classes will no longer change type of the resulting index (:issue:`6440`). +- ``set_index`` no longer converts MultiIndexes to an Index of tuples (:issue:`6459`). Experimental Features ~~~~~~~~~~~~~~~~~~~~~ @@ -196,6 +197,7 @@ Bug Fixes - Bug in multi-axis indexing using ``.loc`` on non-unique indices (:issue:`6504`) - Bug that caused _ref_locs corruption when slice indexing across columns axis of a DataFrame (:issue:`6525`) - Regression from 0.13 in the treatmenet of numpy ``datetime64`` non-ns dtypes in Series creation (:issue:`6529`) +- ``.names`` attribute of MultiIndexes passed to ``set_index`` are now preserved (:issue:`6459`). pandas 0.13.1 ------------- diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index 4432e9e891e7d..7bcd30301e4e6 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -92,6 +92,49 @@ These are out-of-bounds selections .. ipython:: python i[[0,1,2]].astype(np.int_) +- ``set_index`` no longer converts MultiIndexes to an Index of tuples. For example, + the old behavior returned an Index in this case (:issue:`6459`): + + .. ipython:: python + :suppress: + + from itertools import product + tuples = list(product(('a', 'b'), ('c', 'd'))) + mi = MultiIndex.from_tuples(tuples) + df_multi = DataFrame(np.random.randn(4, 2), index=mi) + tuple_ind = pd.Index(tuples) + + .. ipython:: python + + df_multi.index + + @suppress + df_multi.index = tuple_ind + + # Old behavior, casted MultiIndex to an Index + df_multi.set_index(df_multi.index) + + @suppress + df_multi.index = mi + + # New behavior + df_multi.set_index(df_multi.index) + + This also applies when passing multiple indices to ``set_index``: + + .. ipython:: python + + @suppress + df_multi.index = tuple_ind + + # Old output, 2-level MultiIndex of tuples + df_multi.set_index([df_multi.index, df_multi.index]) + + @suppress + df_multi.index = mi + + # New output, 4-level MultiIndex + df_multi.set_index([df_multi.index, df_multi.index]) MultiIndexing Using Slicers diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6c1037f018e02..05f7785a401f8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2240,7 +2240,15 @@ def set_index(self, keys, drop=True, append=False, inplace=False, to_remove = [] for col in keys: - if isinstance(col, Series): + if isinstance(col, MultiIndex): + # append all but the last column so we don't have to modify + # the end of this loop + for n in range(col.nlevels - 1): + arrays.append(col.get_level_values(n)) + + level = col.get_level_values(col.nlevels - 1) + names.extend(col.names) + elif isinstance(col, (Series, Index)): level = col.values names.append(col.name) elif isinstance(col, (list, np.ndarray)): diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index a36b3c5b15384..1cc357ce2a260 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -12280,6 +12280,33 @@ def test_dtypes_are_correct_after_column_slice(self): pd.Series(odict([('a', np.float_), ('b', np.float_), ('c', np.float_),]))) + def test_set_index_names(self): + df = pd.util.testing.makeDataFrame() + df.index.name = 'name' + + self.assertEquals(df.set_index(df.index).index.names, ['name']) + + mi = MultiIndex.from_arrays(df[['A', 'B']].T.values, names=['A', 'B']) + mi2 = MultiIndex.from_arrays(df[['A', 'B', 'A', 'B']].T.values, + names=['A', 'B', 'A', 'B']) + + df = df.set_index(['A', 'B']) + + self.assertEquals(df.set_index(df.index).index.names, ['A', 'B']) + + # Check that set_index isn't converting a MultiIndex into an Index + self.assertTrue(isinstance(df.set_index(df.index).index, MultiIndex)) + + # Check actual equality + tm.assert_index_equal(df.set_index(df.index).index, mi) + + # Check that [MultiIndex, MultiIndex] yields a MultiIndex rather + # than a pair of tuples + self.assertTrue(isinstance(df.set_index([df.index, df.index]).index, MultiIndex)) + + # Check equality + tm.assert_index_equal(df.set_index([df.index, df.index]).index, mi2) + def skip_if_no_ne(engine='numexpr'): if engine == 'numexpr':