From 41aa759965e8593e2286eaf92f498a0e949e05c3 Mon Sep 17 00:00:00 2001
From: Ryan Grout <ryan@ryangrout.org>
Date: Wed, 7 Dec 2016 22:15:05 -0600
Subject: [PATCH 1/8] add tests for variable length sequences.

---
 pandas/tests/indexes/test_multi.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index 365236f72e80e..5bb7f7a52913c 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -1640,6 +1640,19 @@ def test_from_tuples(self):
         idx = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b'])
         self.assertEqual(len(idx), 2)
 
+    def test_from_tuples_variable_length(self):
+        # check that len(MultiIndex) == max(len(iterables))
+        T = ((1,), (2, 3), (4, 5, 6))
+
+        idx = MultiIndex.from_tuples(T)
+        self.assertEqual(len(idx), 3)
+
+        idx = MultiIndex.from_tuples(set(T))
+        self.assertEqual(len(idx), 3)
+
+        idx = MultiIndex.from_tuples(list(T))
+        self.assertEqual(len(idx), 3)
+
     def test_argsort(self):
         result = self.index.argsort()
         expected = self.index._tuple_index.argsort()

From 14da53cc19fbf251f79c19d129eefdeb2c7e697c Mon Sep 17 00:00:00 2001
From: Ryan Grout <ryan@ryangrout.org>
Date: Wed, 7 Dec 2016 22:16:32 -0600
Subject: [PATCH 2/8] use zip_longest to avoid truncating tuples.

---
 pandas/indexes/multi.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py
index 9ab07d87fd13b..95606c5307e1f 100644
--- a/pandas/indexes/multi.py
+++ b/pandas/indexes/multi.py
@@ -5,6 +5,11 @@
 from functools import partial
 from sys import getsizeof
 
+try:
+    from itertools import zip_longest
+except ImportError:
+    from itertools import izip_longest as zip_longest
+
 import numpy as np
 import pandas.lib as lib
 import pandas.index as _index
@@ -1015,7 +1020,7 @@ def from_tuples(cls, tuples, sortorder=None, names=None):
         elif isinstance(tuples, list):
             arrays = list(lib.to_object_array_tuples(tuples).T)
         else:
-            arrays = lzip(*tuples)
+            arrays = list(zip_longest(*tuples))
 
         return MultiIndex.from_arrays(arrays, sortorder=sortorder, names=names)
 

From 0c118bce72b06d3b3ce7eaee5d4ae6150a1d36e8 Mon Sep 17 00:00:00 2001
From: Ryan Grout <ryan@ryangrout.org>
Date: Wed, 1 Feb 2017 21:07:05 -0600
Subject: [PATCH 3/8] Check that tuples are all same length.

Factor out equal length check into separate method.
---
 pandas/indexes/multi.py | 33 ++++++++++++++++++++++++++++-----
 1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py
index 95606c5307e1f..97599e0fce359 100644
--- a/pandas/indexes/multi.py
+++ b/pandas/indexes/multi.py
@@ -964,11 +964,10 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
             name = None if names is None else names[0]
             return Index(arrays[0], name=name)
 
-        # Check if lengths of all arrays are equal or not,
+        # Check if lengths of all arrays are equal length or not,
         # raise ValueError, if not
-        for i in range(1, len(arrays)):
-            if len(arrays[i]) != len(arrays[i - 1]):
-                raise ValueError('all arrays must be same length')
+        if not _check_equal_length(arrays):
+            raise ValueError('all arrays must be same length')
 
         from pandas.core.categorical import _factorize_from_iterables
 
@@ -988,6 +987,7 @@ def from_tuples(cls, tuples, sortorder=None, names=None):
         ----------
         tuples : list / sequence of tuple-likes
             Each tuple is the index of one row/column.
+            A ValueError will be raised if all tuples are not the same length.
         sortorder : int or None
             Level of sortedness (must be lexicographically sorted by that
             level)
@@ -1012,6 +1012,9 @@ def from_tuples(cls, tuples, sortorder=None, names=None):
             # I think this is right? Not quite sure...
             raise TypeError('Cannot infer number of levels from empty list')
 
+        if not _check_equal_length(tuples):
+            raise ValueError('all tuples must be the same length'))
+
         if isinstance(tuples, (np.ndarray, Index)):
             if isinstance(tuples, Index):
                 tuples = tuples._values
@@ -1020,7 +1023,7 @@ def from_tuples(cls, tuples, sortorder=None, names=None):
         elif isinstance(tuples, list):
             arrays = list(lib.to_object_array_tuples(tuples).T)
         else:
-            arrays = list(zip_longest(*tuples))
+            arrays = lzip(*tuples))
 
         return MultiIndex.from_arrays(arrays, sortorder=sortorder, names=names)
 
@@ -2372,5 +2375,25 @@ def _sparsify(label_list, start=0, sentinel=''):
     return lzip(*result)
 
 
+def _check_equal_length(seq_of_seqs):
+    """
+    Ensure that all sequences in seq_of_seqs are the same length.
+
+    Since this function is time critical, it does zero error checking.
+    Two exceptions can result from calling this function.
+        1. IndexError: seq_of_seqs is not an indexed sequence.
+        2. TypeError: An inner sequence does not support len().
+
+    This check is up to O(n) and can be expensive, so use only when necessary.
+
+    Return True if all sequences are the same length, otherwise False
+    """
+    L0 = len(seq_of_seqs[0])
+    for seq in seq_of_seqs:
+        if len(seq) != L0:
+            return False
+    return True
+
+
 def _get_na_rep(dtype):
     return {np.datetime64: 'NaT', np.timedelta64: 'NaT'}.get(dtype, 'NaN')

From 0c2ca79b2e2ae79e4c970c2850d70986d0064c83 Mon Sep 17 00:00:00 2001
From: Ryan Grout <ryan@ryangrout.org>
Date: Wed, 1 Feb 2017 21:23:17 -0600
Subject: [PATCH 4/8] remove extra )

No need to import zip_longest anymore
---
 pandas/indexes/multi.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py
index 97599e0fce359..a5011fa0a97e3 100644
--- a/pandas/indexes/multi.py
+++ b/pandas/indexes/multi.py
@@ -5,10 +5,6 @@
 from functools import partial
 from sys import getsizeof
 
-try:
-    from itertools import zip_longest
-except ImportError:
-    from itertools import izip_longest as zip_longest
 
 import numpy as np
 import pandas.lib as lib
@@ -1013,7 +1009,7 @@ def from_tuples(cls, tuples, sortorder=None, names=None):
             raise TypeError('Cannot infer number of levels from empty list')
 
         if not _check_equal_length(tuples):
-            raise ValueError('all tuples must be the same length'))
+            raise ValueError('all tuples must be the same length')
 
         if isinstance(tuples, (np.ndarray, Index)):
             if isinstance(tuples, Index):
@@ -1023,7 +1019,7 @@ def from_tuples(cls, tuples, sortorder=None, names=None):
         elif isinstance(tuples, list):
             arrays = list(lib.to_object_array_tuples(tuples).T)
         else:
-            arrays = lzip(*tuples))
+            arrays = lzip(*tuples)
 
         return MultiIndex.from_arrays(arrays, sortorder=sortorder, names=names)
 

From 6593c261a171d4484c0c6c71631d9677f8498978 Mon Sep 17 00:00:00 2001
From: Ryan Grout <ryan@ryangrout.org>
Date: Thu, 2 Feb 2017 01:27:52 -0600
Subject: [PATCH 5/8] use iterators rather than indexing

---
 pandas/indexes/multi.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py
index a5011fa0a97e3..bda2f22f890d2 100644
--- a/pandas/indexes/multi.py
+++ b/pandas/indexes/multi.py
@@ -2376,16 +2376,13 @@ def _check_equal_length(seq_of_seqs):
     Ensure that all sequences in seq_of_seqs are the same length.
 
     Since this function is time critical, it does zero error checking.
-    Two exceptions can result from calling this function.
-        1. IndexError: seq_of_seqs is not an indexed sequence.
-        2. TypeError: An inner sequence does not support len().
-
-    This check is up to O(n) and can be expensive, so use only when necessary.
+    A TypeError will be raised if inner sequence does not support len().
 
     Return True if all sequences are the same length, otherwise False
     """
-    L0 = len(seq_of_seqs[0])
-    for seq in seq_of_seqs:
+    seq_it = iter(seq_of_seqs)
+    L0 = len(next(seq_it))
+    for seq in seq_it:
         if len(seq) != L0:
             return False
     return True

From d1d26ff037d42ae4591ded07542cf583ff9b1b07 Mon Sep 17 00:00:00 2001
From: Ryan Grout <ryan@ryangrout.org>
Date: Thu, 9 Feb 2017 22:33:13 -0600
Subject: [PATCH 6/8] catch ValueErrors in now invalid test cases.

---
 pandas/tests/indexes/test_base.py |  8 +++++---
 pandas/tests/test_strings.py      | 15 ++++++++-------
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 2f5b98d145e57..20ef9e897f87f 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -1443,9 +1443,11 @@ def test_str_attribute(self):
         tm.assert_index_equal(idx.str.split(), expected)
         tm.assert_index_equal(idx.str.split(expand=False), expected)
 
-        expected = MultiIndex.from_tuples([('a', 'b', 'c'), ('d', 'e', np.nan),
-                                           ('f', np.nan, np.nan)])
-        tm.assert_index_equal(idx.str.split(expand=True), expected)
+        # This is invalid behavior
+        with self.assertRaises(ValueError):
+            expected = MultiIndex.from_tuples([('a', 'b', 'c'), ('d', 'e', np.nan),
+                                            ('f', np.nan, np.nan)])
+            tm.assert_index_equal(idx.str.split(expand=True), expected)
 
         # test boolean case, should return np.array instead of boolean Index
         idx = Index(['a1', 'a2', 'b1', 'b2'])
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index ce97b09b7e3ca..86573d3272c65 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -1984,13 +1984,14 @@ def test_split_to_multiindex_expand(self):
         tm.assert_index_equal(result, exp)
         self.assertEqual(result.nlevels, 3)
 
-        idx = Index(['some_unequal_splits', 'one_of_these_things_is_not'])
-        result = idx.str.split('_', expand=True)
-        exp = MultiIndex.from_tuples([('some', 'unequal', 'splits', NA, NA, NA
-                                       ), ('one', 'of', 'these', 'things',
-                                           'is', 'not')])
-        tm.assert_index_equal(result, exp)
-        self.assertEqual(result.nlevels, 6)
+        with self.assertRaises(ValueError):
+            idx = Index(['some_unequal_splits', 'one_of_these_things_is_not'])
+            result = idx.str.split('_', expand=True)
+            exp = MultiIndex.from_tuples([('some', 'unequal', 'splits', NA, NA, NA
+                                        ), ('one', 'of', 'these', 'things',
+                                            'is', 'not')])
+            tm.assert_index_equal(result, exp)
+            self.assertEqual(result.nlevels, 6)
 
         with tm.assertRaisesRegexp(ValueError, "expand must be"):
             idx.str.split('_', expand="not_a_boolean")

From 62cbc274940ed0e33719373bc1bbf4452f8d5dcf Mon Sep 17 00:00:00 2001
From: Ryan Grout <ryan@ryangrout.org>
Date: Thu, 9 Feb 2017 22:33:27 -0600
Subject: [PATCH 7/8] test _check_equal_length

---
 pandas/tests/indexes/test_multi.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index 5bb7f7a52913c..de03ebbb616d6 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -1640,18 +1640,20 @@ def test_from_tuples(self):
         idx = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b'])
         self.assertEqual(len(idx), 2)
 
-    def test_from_tuples_variable_length(self):
-        # check that len(MultiIndex) == max(len(iterables))
-        T = ((1,), (2, 3), (4, 5, 6))
+    def test_equal_length(self):
+        # Test _check_equal_length
+        from pandas.indexes.multi import _check_equal_length
 
-        idx = MultiIndex.from_tuples(T)
-        self.assertEqual(len(idx), 3)
+        seqs = [[1, 2, 3], [2, 3, 4], [0, 1, 0]]
+        self.assertTrue(_check_equal_length(seqs))
 
-        idx = MultiIndex.from_tuples(set(T))
-        self.assertEqual(len(idx), 3)
+        seqs[-1].append(1)
+        self.assertFalse(_check_equal_length(seqs))
 
-        idx = MultiIndex.from_tuples(list(T))
-        self.assertEqual(len(idx), 3)
+        # Test TypeError
+        seqs = [None]
+        with self.assertRaises(TypeError):
+            _check_equal_length(seqs)
 
     def test_argsort(self):
         result = self.index.argsort()

From 927d439458b563f7059b898120d66f3985baa4f7 Mon Sep 17 00:00:00 2001
From: Ryan Grout <ryan@ryangrout.org>
Date: Thu, 9 Feb 2017 22:34:31 -0600
Subject: [PATCH 8/8] If we have an empty seq, then we should return True.

---
 pandas/indexes/multi.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py
index bda2f22f890d2..3b990729dcd92 100644
--- a/pandas/indexes/multi.py
+++ b/pandas/indexes/multi.py
@@ -2379,13 +2379,18 @@ def _check_equal_length(seq_of_seqs):
     A TypeError will be raised if inner sequence does not support len().
 
     Return True if all sequences are the same length, otherwise False
+    If seq_of_seqs is empty return True as well.
     """
     seq_it = iter(seq_of_seqs)
-    L0 = len(next(seq_it))
-    for seq in seq_it:
-        if len(seq) != L0:
-            return False
-    return True
+    try:
+        L0 = len(next(seq_it))
+    except StopIteration:
+        return True
+    else:
+        for seq in seq_it:
+            if len(seq) != L0:
+                return False
+        return True
 
 
 def _get_na_rep(dtype):