diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index ed7ca079a07b5..601a046498e5d 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -5,6 +5,7 @@ from functools import partial from sys import getsizeof + import numpy as np from pandas._libs import index as libindex, lib, Timestamp @@ -1084,11 +1085,10 @@ def from_arrays(cls, arrays, sortorder=None, names=None): name = None if names is None else names[0] return Index(arrays[0], name=name) - # Check if lengths of all arrays are equal or not, + # Check if lengths of all arrays are equal length or not, # raise ValueError, if not - for i in range(1, len(arrays)): - if len(arrays[i]) != len(arrays[i - 1]): - raise ValueError('all arrays must be same length') + if not _check_equal_length(arrays): + raise ValueError('all arrays must be same length') from pandas.core.categorical import _factorize_from_iterables @@ -1108,6 +1108,7 @@ def from_tuples(cls, tuples, sortorder=None, names=None): ---------- tuples : list / sequence of tuple-likes Each tuple is the index of one row/column. + A ValueError will be raised if all tuples are not the same length. sortorder : int or None Level of sortedness (must be lexicographically sorted by that level) @@ -2671,5 +2672,27 @@ def _sparsify(label_list, start=0, sentinel=''): return lzip(*result) +def _check_equal_length(seq_of_seqs): + """ + Ensure that all sequences in seq_of_seqs are the same length. + + Since this function is time critical, it does zero error checking. + A TypeError will be raised if inner sequence does not support len(). + + Return True if all sequences are the same length, otherwise False + If seq_of_seqs is empty return True as well. + """ + seq_it = iter(seq_of_seqs) + try: + L0 = len(next(seq_it)) + except StopIteration: + return True + else: + for seq in seq_it: + if len(seq) != L0: + return False + return True + + def _get_na_rep(dtype): return {np.datetime64: 'NaT', np.timedelta64: 'NaT'}.get(dtype, 'NaN') diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 18dbe6624008a..50752c9fb4f87 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1458,9 +1458,11 @@ def test_str_attribute(self): tm.assert_index_equal(idx.str.split(), expected) tm.assert_index_equal(idx.str.split(expand=False), expected) - expected = MultiIndex.from_tuples([('a', 'b', 'c'), ('d', 'e', np.nan), - ('f', np.nan, np.nan)]) - tm.assert_index_equal(idx.str.split(expand=True), expected) + # This is invalid behavior + with self.assertRaises(ValueError): + expected = MultiIndex.from_tuples([('a', 'b', 'c'), ('d', 'e', np.nan), + ('f', np.nan, np.nan)]) + tm.assert_index_equal(idx.str.split(expand=True), expected) # test boolean case, should return np.array instead of boolean Index idx = Index(['a1', 'a2', 'b1', 'b2']) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 719cd2f7e01a4..8199ba441f570 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -1727,6 +1727,21 @@ def test_from_tuples_empty(self): names=['a', 'b']) tm.assert_index_equal(result, expected) + def test_equal_length(self): + # Test _check_equal_length + from pandas.indexes.multi import _check_equal_length + + seqs = [[1, 2, 3], [2, 3, 4], [0, 1, 0]] + self.assertTrue(_check_equal_length(seqs)) + + seqs[-1].append(1) + self.assertFalse(_check_equal_length(seqs)) + + # Test TypeError + seqs = [None] + with self.assertRaises(TypeError): + _check_equal_length(seqs) + def test_argsort(self): result = self.index.argsort() expected = self.index.values.argsort() diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index bb31fb9260160..0af7d72ad19c4 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -2025,13 +2025,14 @@ def test_split_to_multiindex_expand(self): tm.assert_index_equal(result, exp) assert result.nlevels == 3 - idx = Index(['some_unequal_splits', 'one_of_these_things_is_not']) - result = idx.str.split('_', expand=True) - exp = MultiIndex.from_tuples([('some', 'unequal', 'splits', NA, NA, NA - ), ('one', 'of', 'these', 'things', - 'is', 'not')]) - tm.assert_index_equal(result, exp) - assert result.nlevels == 6 + with self.assertRaises(ValueError): + idx = Index(['some_unequal_splits', 'one_of_these_things_is_not']) + result = idx.str.split('_', expand=True) + exp = MultiIndex.from_tuples([('some', 'unequal', 'splits', NA, NA, NA + ), ('one', 'of', 'these', 'things', + 'is', 'not')]) + tm.assert_index_equal(result, exp) + self.assertEqual(result.nlevels, 6) with tm.assert_raises_regex(ValueError, "expand must be"): idx.str.split('_', expand="not_a_boolean")