Skip to content

BUG: MutliIndex variable length tuples #14823

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 10 commits into from
31 changes: 27 additions & 4 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from functools import partial
from sys import getsizeof


import numpy as np
from pandas._libs import index as libindex, lib, Timestamp

Expand Down Expand Up @@ -1084,11 +1085,10 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
name = None if names is None else names[0]
return Index(arrays[0], name=name)

# Check if lengths of all arrays are equal or not,
# Check if lengths of all arrays are equal length or not,
# raise ValueError, if not
for i in range(1, len(arrays)):
if len(arrays[i]) != len(arrays[i - 1]):
raise ValueError('all arrays must be same length')
if not _check_equal_length(arrays):
raise ValueError('all arrays must be same length')

from pandas.core.categorical import _factorize_from_iterables

Expand All @@ -1108,6 +1108,7 @@ def from_tuples(cls, tuples, sortorder=None, names=None):
----------
tuples : list / sequence of tuple-likes
Each tuple is the index of one row/column.
A ValueError will be raised if all tuples are not the same length.
sortorder : int or None
Level of sortedness (must be lexicographically sorted by that
level)
Expand Down Expand Up @@ -2671,5 +2672,27 @@ def _sparsify(label_list, start=0, sentinel=''):
return lzip(*result)


def _check_equal_length(seq_of_seqs):
"""
Ensure that all sequences in seq_of_seqs are the same length.

Since this function is time critical, it does zero error checking.
A TypeError will be raised if inner sequence does not support len().

Return True if all sequences are the same length, otherwise False
If seq_of_seqs is empty return True as well.
"""
seq_it = iter(seq_of_seqs)
try:
L0 = len(next(seq_it))
except StopIteration:
return True
else:
for seq in seq_it:
if len(seq) != L0:
return False
return True


def _get_na_rep(dtype):
return {np.datetime64: 'NaT', np.timedelta64: 'NaT'}.get(dtype, 'NaN')
8 changes: 5 additions & 3 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1458,9 +1458,11 @@ def test_str_attribute(self):
tm.assert_index_equal(idx.str.split(), expected)
tm.assert_index_equal(idx.str.split(expand=False), expected)

expected = MultiIndex.from_tuples([('a', 'b', 'c'), ('d', 'e', np.nan),
('f', np.nan, np.nan)])
tm.assert_index_equal(idx.str.split(expand=True), expected)
# This is invalid behavior
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add the issue number here

with self.assertRaises(ValueError):
expected = MultiIndex.from_tuples([('a', 'b', 'c'), ('d', 'e', np.nan),
('f', np.nan, np.nan)])
tm.assert_index_equal(idx.str.split(expand=True), expected)

# test boolean case, should return np.array instead of boolean Index
idx = Index(['a1', 'a2', 'b1', 'b2'])
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/indexes/test_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1727,6 +1727,21 @@ def test_from_tuples_empty(self):
names=['a', 'b'])
tm.assert_index_equal(result, expected)

def test_equal_length(self):
# Test _check_equal_length
from pandas.indexes.multi import _check_equal_length

seqs = [[1, 2, 3], [2, 3, 4], [0, 1, 0]]
self.assertTrue(_check_equal_length(seqs))

seqs[-1].append(1)
self.assertFalse(_check_equal_length(seqs))

# Test TypeError
seqs = [None]
with self.assertRaises(TypeError):
_check_equal_length(seqs)

def test_argsort(self):
result = self.index.argsort()
expected = self.index.values.argsort()
Expand Down
15 changes: 8 additions & 7 deletions pandas/tests/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -2025,13 +2025,14 @@ def test_split_to_multiindex_expand(self):
tm.assert_index_equal(result, exp)
assert result.nlevels == 3

idx = Index(['some_unequal_splits', 'one_of_these_things_is_not'])
result = idx.str.split('_', expand=True)
exp = MultiIndex.from_tuples([('some', 'unequal', 'splits', NA, NA, NA
), ('one', 'of', 'these', 'things',
'is', 'not')])
tm.assert_index_equal(result, exp)
assert result.nlevels == 6
with self.assertRaises(ValueError):
idx = Index(['some_unequal_splits', 'one_of_these_things_is_not'])
result = idx.str.split('_', expand=True)
exp = MultiIndex.from_tuples([('some', 'unequal', 'splits', NA, NA, NA
), ('one', 'of', 'these', 'things',
'is', 'not')])
tm.assert_index_equal(result, exp)
self.assertEqual(result.nlevels, 6)

with tm.assert_raises_regex(ValueError, "expand must be"):
idx.str.split('_', expand="not_a_boolean")
Expand Down