diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 4bdff1355874e..a32035d0d906f 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -140,6 +140,7 @@ Indexing - Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`) - Bug in :func:`DataFrame.groupby` where tuples were interpreted as lists of keys rather than as keys (:issue:`17979`, :issue:`18249`) - Bug in :func:`MultiIndex.remove_unused_levels`` which would fill nan values (:issue:`18417`) +- Bug in :func:`MultiIndex.from_tuples`` which would fail to take zipped tuples in python3 (:issue:`18434`) - Bug in :class:`IntervalIndex` where empty and purely NA data was constructed inconsistently depending on the construction method (:issue:`18421`) - diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 81d892fba0fe2..456999b94c523 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1162,6 +1162,11 @@ def from_arrays(cls, arrays, sortorder=None, names=None): MultiIndex.from_product : Make a MultiIndex from cartesian product of iterables """ + if not is_list_like(arrays): + raise TypeError("Input must be a list / sequence of array-likes.") + elif is_iterator(arrays): + arrays = list(arrays) + # Check if lengths of all arrays are equal or not, # raise ValueError, if not for i in range(1, len(arrays)): @@ -1206,6 +1211,11 @@ def from_tuples(cls, tuples, sortorder=None, names=None): MultiIndex.from_product : Make a MultiIndex from cartesian product of iterables """ + if not is_list_like(tuples): + raise TypeError('Input must be a list / sequence of tuple-likes.') + elif is_iterator(tuples): + tuples = list(tuples) + if len(tuples) == 0: if names is None: msg = 'Cannot infer number of levels from empty list' @@ -1260,6 +1270,11 @@ def from_product(cls, iterables, sortorder=None, names=None): from pandas.core.categorical import _factorize_from_iterables from pandas.core.reshape.util import cartesian_product + if not is_list_like(iterables): + raise TypeError("Input must be a list / sequence of iterables.") + elif is_iterator(iterables): + iterables = list(iterables) + labels, levels = _factorize_from_iterables(iterables) labels = cartesian_product(labels) return MultiIndex(levels, labels, sortorder=sortorder, names=names) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 2f8c27f1abb7d..5c2a0254b072b 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -672,8 +672,9 @@ def test_from_arrays(self): for lev, lab in zip(self.index.levels, self.index.labels): arrays.append(np.asarray(lev).take(lab)) - result = MultiIndex.from_arrays(arrays) - assert list(result) == list(self.index) + # list of arrays as input + result = MultiIndex.from_arrays(arrays, names=self.index.names) + tm.assert_index_equal(result, self.index) # infer correctly result = MultiIndex.from_arrays([[pd.NaT, Timestamp('20130101')], @@ -681,6 +682,21 @@ def test_from_arrays(self): assert result.levels[0].equals(Index([Timestamp('20130101')])) assert result.levels[1].equals(Index(['a', 'b'])) + def test_from_arrays_iterator(self): + # GH 18434 + arrays = [] + for lev, lab in zip(self.index.levels, self.index.labels): + arrays.append(np.asarray(lev).take(lab)) + + # iterator as input + result = MultiIndex.from_arrays(iter(arrays), names=self.index.names) + tm.assert_index_equal(result, self.index) + + # invalid iterator input + with tm.assert_raises_regex( + TypeError, "Input must be a list / sequence of array-likes."): + MultiIndex.from_arrays(0) + def test_from_arrays_index_series_datetimetz(self): idx1 = pd.date_range('2015-01-01 10:00', freq='D', periods=3, tz='US/Eastern') @@ -825,7 +841,25 @@ def test_from_product(self): expected = MultiIndex.from_tuples(tuples, names=names) tm.assert_index_equal(result, expected) - assert result.names == names + + def test_from_product_iterator(self): + # GH 18434 + first = ['foo', 'bar', 'buz'] + second = ['a', 'b', 'c'] + names = ['first', 'second'] + tuples = [('foo', 'a'), ('foo', 'b'), ('foo', 'c'), ('bar', 'a'), + ('bar', 'b'), ('bar', 'c'), ('buz', 'a'), ('buz', 'b'), + ('buz', 'c')] + expected = MultiIndex.from_tuples(tuples, names=names) + + # iterator as input + result = MultiIndex.from_product(iter([first, second]), names=names) + tm.assert_index_equal(result, expected) + + # Invalid non-iterable input + with tm.assert_raises_regex( + TypeError, "Input must be a list / sequence of iterables."): + MultiIndex.from_product(0) def test_from_product_empty(self): # 0 levels @@ -1725,8 +1759,28 @@ def test_from_tuples(self): 'from empty list', MultiIndex.from_tuples, []) - idx = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b']) - assert len(idx) == 2 + expected = MultiIndex(levels=[[1, 3], [2, 4]], + labels=[[0, 1], [0, 1]], + names=['a', 'b']) + + # input tuples + result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b']) + tm.assert_index_equal(result, expected) + + def test_from_tuples_iterator(self): + # GH 18434 + # input iterator for tuples + expected = MultiIndex(levels=[[1, 3], [2, 4]], + labels=[[0, 1], [0, 1]], + names=['a', 'b']) + + result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=['a', 'b']) + tm.assert_index_equal(result, expected) + + # input non-iterables + with tm.assert_raises_regex( + TypeError, 'Input must be a list / sequence of tuple-likes.'): + MultiIndex.from_tuples(0) def test_from_tuples_empty(self): # GH 16777