Skip to content

Commit 763bb01

Browse files
committed
BUG: make sure that the multi-index is lex-sorted before passing to _lexsort_indexer (GH8017)
BUG: sparse repr of multi-index frame with a FloatIndex as a level was incorrect
1 parent 35a9527 commit 763bb01

File tree

5 files changed

+60
-9
lines changed

5 files changed

+60
-9
lines changed

doc/source/v0.15.0.txt

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -667,7 +667,6 @@ Enhancements
667667

668668

669669

670-
- Bug in ``get`` where an ``IndexError`` would not cause the default value to be returned (:issue:`7725`)
671670

672671

673672

@@ -745,10 +744,10 @@ Bug Fixes
745744
- Bug in DataFrameGroupby.transform when transforming with a passed non-sorted key (:issue:`8046`)
746745
- Bug in repeated timeseries line and area plot may result in ``ValueError`` or incorrect kind (:issue:`7733`)
747746
- Bug in inference in a MultiIndex with ``datetime.date`` inputs (:issue:`7888`)
748-
747+
- Bug in ``get`` where an ``IndexError`` would not cause the default value to be returned (:issue:`7725`)
749748
- Bug in ``offsets.apply``, ``rollforward`` and ``rollback`` may reset nanosecond (:issue:`7697`)
750749
- Bug in ``offsets.apply``, ``rollforward`` and ``rollback`` may raise ``AttributeError`` if ``Timestamp`` has ``dateutil`` tzinfo (:issue:`7697`)
751-
750+
- Bug in sorting a multi-index frame with a Float64Index (:issue:`8017`)
752751

753752
- Bug in ``is_superperiod`` and ``is_subperiod`` cannot handle higher frequencies than ``S`` (:issue:`7760`, :issue:`7772`, :issue:`7803`)
754753

pandas/core/format.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -625,10 +625,17 @@ def is_numeric_dtype(dtype):
625625
fmt_columns = columns.format(sparsify=False, adjoin=False)
626626
fmt_columns = lzip(*fmt_columns)
627627
dtypes = self.frame.dtypes.values
628+
629+
# if we have a Float level, they don't use leading space at all
630+
restrict_formatting = any([ l.is_floating for l in columns.levels ])
628631
need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
629-
str_columns = list(zip(*[
630-
[' ' + y if y not in self.formatters and need_leadsp[x]
631-
else y for y in x] for x in fmt_columns]))
632+
633+
def space_format(x,y):
634+
if y not in self.formatters and need_leadsp[x] and not restrict_formatting:
635+
return ' ' + y
636+
return y
637+
638+
str_columns = list(zip(*[ [ space_format(x,y) for y in x ] for x in fmt_columns ]))
632639
if self.sparsify:
633640
str_columns = _sparsify(str_columns)
634641

pandas/core/frame.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2770,6 +2770,12 @@ def trans(v):
27702770
na_position=na_position)
27712771

27722772
elif isinstance(labels, MultiIndex):
2773+
2774+
# make sure that the axis is lexsorted to start
2775+
# if not we need to reconstruct to get the correct indexer
2776+
if not labels.is_lexsorted():
2777+
labels = MultiIndex.from_tuples(labels.values)
2778+
27732779
indexer = _lexsort_indexer(labels.labels, orders=ascending,
27742780
na_position=na_position)
27752781
indexer = com._ensure_platform_int(indexer)

pandas/core/generic.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1628,6 +1628,7 @@ def sort_index(self, axis=0, ascending=True):
16281628

16291629
new_axis = labels.take(sort_index)
16301630
return self.reindex(**{axis_name: new_axis})
1631+
16311632
_shared_docs['reindex'] = """
16321633
Conform %(klass)s to new index with optional filling logic, placing
16331634
NA/NaN in locations having no value in the previous index. A new object
@@ -3558,10 +3559,10 @@ def _tz_convert(ax, tz):
35583559
result = self._constructor(self._data, copy=copy)
35593560
result.set_axis(axis,ax)
35603561
return result.__finalize__(self)
3561-
3562+
35623563
@deprecate_kwarg(old_arg_name='infer_dst', new_arg_name='ambiguous',
35633564
mapping={True: 'infer', False: 'raise'})
3564-
def tz_localize(self, tz, axis=0, level=None, copy=True,
3565+
def tz_localize(self, tz, axis=0, level=None, copy=True,
35653566
ambiguous='raise'):
35663567
"""
35673568
Localize tz-naive TimeSeries to target time zone
@@ -3583,7 +3584,7 @@ def tz_localize(self, tz, axis=0, level=None, copy=True,
35833584
- 'raise' will raise an AmbiguousTimeError if there are ambiguous times
35843585
infer_dst : boolean, default False (DEPRECATED)
35853586
Attempt to infer fall dst-transition hours based on order
3586-
3587+
35873588
Returns
35883589
-------
35893590
"""

pandas/tests/test_multilevel.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,44 @@ def test_sort_index_preserve_levels(self):
214214
result = self.frame.sort_index()
215215
self.assertEqual(result.index.names, self.frame.index.names)
216216

217+
def test_sorting_repr_8017(self):
218+
219+
np.random.seed(0)
220+
data = np.random.randn(3,4)
221+
222+
for gen, extra in [([1.,3.,2.,5.],4.),
223+
([1,3,2,5],4),
224+
([Timestamp('20130101'),Timestamp('20130103'),Timestamp('20130102'),Timestamp('20130105')],Timestamp('20130104')),
225+
(['1one','3one','2one','5one'],'4one')]:
226+
columns = MultiIndex.from_tuples([('red', i) for i in gen])
227+
df = DataFrame(data, index=list('def'), columns=columns)
228+
df2 = pd.concat([df,DataFrame('world',
229+
index=list('def'),
230+
columns=MultiIndex.from_tuples([('red', extra)]))],axis=1)
231+
232+
# check that the repr is good
233+
# make sure that we have a correct sparsified repr
234+
# e.g. only 1 header of read
235+
self.assertEqual(str(df2).splitlines()[0].split(),['red'])
236+
237+
# GH 8017
238+
# sorting fails after columns added
239+
240+
# construct single-dtype then sort
241+
result = df.copy().sort_index(axis=1)
242+
expected = df.iloc[:,[0,2,1,3]]
243+
assert_frame_equal(result, expected)
244+
245+
result = df2.sort_index(axis=1)
246+
expected = df2.iloc[:,[0,2,1,4,3]]
247+
assert_frame_equal(result, expected)
248+
249+
# setitem then sort
250+
result = df.copy()
251+
result[('red',extra)] = 'world'
252+
result = result.sort_index(axis=1)
253+
assert_frame_equal(result, expected)
254+
217255
def test_repr_to_string(self):
218256
repr(self.frame)
219257
repr(self.ymd)

0 commit comments

Comments
 (0)