Skip to content

ENH: Partial string matching for timestamps with multiindex #12530

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions doc/source/timeseries.rst
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,20 @@ We are stopping on the included end-point as it is part of the index

dft.loc['2013-1-15 12:30:00']

DatetimeIndex Partial String Indexing also works on DataFrames with hierarchical indexing (``MultiIndex``). For
instance:

.. ipython:: python

dft2 = pd.DataFrame(randn(20,1),
columns=['A'],
index=pd.MultiIndex.from_product([date_range('20130101',
periods=10,
freq='12H'),
['a', 'b']]))
dft2.loc['2013-01-05']
dft2 = dft2.swaplevel(0, 1).sort_index()
dft2.loc[pd.IndexSlice[:,'2013-01-05'],:]

Datetime Indexing
~~~~~~~~~~~~~~~~~
Expand Down
18 changes: 18 additions & 0 deletions doc/source/whatsnew/v0.18.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,24 @@ Enhancements
~~~~~~~~~~~~


Partial string matches on ``DateTimeIndex`` when part of a ``MultiIndex``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Partial string matches on ``DateTimeIndex`` now work when part of a ``MultiIndex`` (:issue:`10331`)

For example:

.. ipython:: python

dft2 = pd.DataFrame(randn(20,1),
columns=['A'],
index=pd.MultiIndex.from_product([date_range('20130101',
periods=10,
freq='12H'),
['a', 'b']]))
dft2.loc['2013-01-05']
dft2 = dft2.swaplevel(0, 1).sort_index()
dft2.loc[pd.IndexSlice[:,'2013-01-05'],:]




Expand Down
25 changes: 25 additions & 0 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1392,8 +1392,33 @@ def error():

return True

def _get_partial_string_timestamp_match_key(self, key, labels):
"""Translate any partial string timestamp matches in key, returning the
new key (GH 10331)"""
if isinstance(labels, MultiIndex):
if isinstance(key, compat.string_types) and \
labels.levels[0].is_all_dates:
# Convert key '2016-01-01' to
# ('2016-01-01'[, slice(None, None, None)]+)
key = tuple([key] + [slice(None)] * (len(labels.levels) - 1))

if isinstance(key, tuple):
# Convert (..., '2016-01-01', ...) in tuple to
# (..., slice('2016-01-01', '2016-01-01', None), ...)
new_key = []
for i, component in enumerate(key):
if isinstance(component, compat.string_types) and \
labels.levels[i].is_all_dates:
new_key.append(slice(component, component, None))
else:
new_key.append(component)
key = tuple(new_key)

return key

def _getitem_axis(self, key, axis=0):
labels = self.obj._get_axis(axis)
key = self._get_partial_string_timestamp_match_key(key, labels)

if isinstance(key, slice):
self._has_valid_type(key, axis)
Expand Down
83 changes: 81 additions & 2 deletions pandas/tests/indexes/test_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@

import numpy as np

from pandas.util.testing import (assert_almost_equal, assertRaisesRegexp,
assert_copy)
from pandas.util.testing import (assert_almost_equal, assertRaises,
assertRaisesRegexp, assert_copy)

import pandas.util.testing as tm

Expand Down Expand Up @@ -1970,3 +1970,82 @@ def test_index_name_retained(self):
def test_equals_operator(self):
# GH9785
self.assertTrue((self.index == self.index).all())

def test_partial_string_timestamp_multiindex(self):
# GH10331
dr = pd.date_range('2016-01-01', '2016-01-03', freq='12H')
abc = ['a', 'b', 'c']
ix = pd.MultiIndex.from_product([dr, abc])
df = pd.DataFrame({'c1': range(0, 15)}, index=ix)
idx = pd.IndexSlice

# c1
# 2016-01-01 00:00:00 a 0
# b 1
# c 2
# 2016-01-01 12:00:00 a 3
# b 4
# c 5
# 2016-01-02 00:00:00 a 6
# b 7
# c 8
# 2016-01-02 12:00:00 a 9
# b 10
# c 11
# 2016-01-03 00:00:00 a 12
# b 13
# c 14

# partial string matching on a single index
df_swap = df.swaplevel(0, 1).sort_index()
just_a = df_swap.loc['a']
result = just_a.loc['2016-01-01']
expected = df.loc[idx[:, 'a'], :].iloc[0:2]
expected.index = expected.index.droplevel(1)
tm.assert_frame_equal(result, expected)

# indexing with IndexSlice
result = df.loc[idx['2016-01-01':'2016-02-01', :], :]
expected = df
tm.assert_frame_equal(result, expected)

# match on secondary index
result = df_swap.loc[idx[:, '2016-01-01':'2016-01-01'], :]
expected = df_swap.iloc[[0, 1, 5, 6, 10, 11]]
tm.assert_frame_equal(result, expected)

# Even though this syntax works on a single index, this is somewhat
# ambiguous and we don't want to extend this behavior forward to work
# in multi-indexes. This would amount to selecting a scalar from a
# column.
with assertRaises(KeyError):
df['2016-01-01']

# partial string match on year only
result = df.loc['2016']
expected = df
tm.assert_frame_equal(result, expected)

# partial string match on date
result = df.loc['2016-01-01']
expected = df.iloc[0:6]
tm.assert_frame_equal(result, expected)

# partial string match on date and hour, from middle
result = df.loc['2016-01-02 12']
expected = df.iloc[9:12]
tm.assert_frame_equal(result, expected)

# partial string match on secondary index
result = df_swap.loc[idx[:, '2016-01-02'], :]
expected = df_swap.iloc[[2, 3, 7, 8, 12, 13]]
tm.assert_frame_equal(result, expected)

# tuple selector with partial string match on date
result = df.loc[('2016-01-01', 'a'), :]
expected = df.iloc[[0, 3]]
tm.assert_frame_equal(result, expected)

# Slicing date on first level should break (of course)
with assertRaises(KeyError):
df_swap.loc['2016-01-01']