diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 903fd7ffe706a..05b0cf7465ab0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4483,8 +4483,10 @@ def _reindex_with_indexers(self, reindexers, fill_value=None, copy=False, def filter(self, items=None, like=None, regex=None, axis=None): """ - Subset rows or columns of dataframe according to labels in - the specified index. + Filter columns or rows according to labels in the specified index. + + .. deprecated:: 0.25.0 + Use .select instead. Note that this routine does not filter a dataframe on its contents. The filter is applied to the labels of the index. @@ -4540,10 +4542,76 @@ def filter(self, items=None, like=None, regex=None, axis=None): one two three rabbit 4 5 6 """ + msg = (".filter is deprecated and will be removed in the" + " future. Use .select instead.") + warnings.warn(msg, FutureWarning, stacklevel=2) + return self.select(items=items, like=like, regex=regex, axis=axis) + + def select(self, items=None, like=None, regex=None, flags=0, axis=None): + """ + Select columns or rows according to labels in the specified index. + + Note that this routine does not filter a dataframe on its + contents. The filter is applied to the labels of the info axis. + + Parameters + ---------- + items : list-like + Keep labels from axis which are in items. + like : str + Keep labels from axis for which "like in label == True". + regex : str (regular expression) + Keep labels from axis for which re.search(regex, label) == True. + flags : int, default 0 + re module flags, e.g. re.IGNORECASE. Can only be combined with + regex. + axis : int or string axis name + The axis to filter on. By default this is the info axis, + 'index' for Series, 'columns' for DataFrame. + + Returns + ------- + same type as input object + + See Also + -------- + DataFrame.loc + + Notes + ----- + The ``items``, ``like``, and ``regex`` parameters are + enforced to be mutually exclusive. + + ``axis`` defaults to the info axis that is used when indexing + with ``[]``. + + Examples + -------- + >>> df = pd.DataFrame(np.array(([1, 2, 3], [4, 5, 6])), + ... index=['mouse', 'rabbit'], + ... columns=['one', 'two', 'three']) + + >>> # select columns by name + >>> df.select(items=['one', 'three']) + one three + mouse 1 3 + rabbit 4 6 + + >>> # select columns by regular expression + >>> df.select(regex='e$', axis=1) + one three + mouse 1 3 + rabbit 4 6 + + >>> # select rows containing 'bbi' + >>> df.select(like='bbi', axis=0) + one two three + rabbit 4 5 6 + """ import re - nkw = com.count_not_none(items, like, regex) - if nkw > 1: + num_not_none = com.count_not_none(items, like, regex) + if num_not_none > 1: raise TypeError('Keyword arguments `items`, `like`, or `regex` ' 'are mutually exclusive') @@ -4563,7 +4631,7 @@ def f(x): elif regex: def f(x): return matcher.search(ensure_str(x)) is not None - matcher = re.compile(regex) + matcher = re.compile(regex, flags=flags) values = labels.map(f) return self.loc(axis=axis)[values] else: diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 42f98d5c96aa5..064622e4a4c93 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -13,6 +13,11 @@ from pandas.util.testing import assert_frame_equal +def test_filter_deprecated(int_frame): + with tm.assert_produces_warning(FutureWarning): + int_frame.filter(like='A') + + class TestDataFrameSelectReindex(TestData): # These are specific reindex-based tests; other indexing tests should go in # test_indexing @@ -775,85 +780,92 @@ def test_align_series_combinations(self): tm.assert_series_equal(res1, exp2) tm.assert_frame_equal(res2, exp1) - def test_filter(self): + def test_select(self): # Items - filtered = self.frame.filter(['A', 'B', 'E']) - assert len(filtered.columns) == 2 - assert 'E' not in filtered + selected = self.frame.select(['A', 'B', 'E']) + assert len(selected.columns) == 2 + assert 'E' not in selected - filtered = self.frame.filter(['A', 'B', 'E'], axis='columns') - assert len(filtered.columns) == 2 - assert 'E' not in filtered + selected = self.frame.select(['A', 'B', 'E'], axis='columns') + assert len(selected.columns) == 2 + assert 'E' not in selected # Other axis idx = self.frame.index[0:4] - filtered = self.frame.filter(idx, axis='index') + selected = self.frame.select(idx, axis='index') expected = self.frame.reindex(index=idx) - tm.assert_frame_equal(filtered, expected) + tm.assert_frame_equal(selected, expected) # like fcopy = self.frame.copy() fcopy['AA'] = 1 - filtered = fcopy.filter(like='A') - assert len(filtered.columns) == 2 - assert 'AA' in filtered + selected = fcopy.select(like='A') + assert len(selected.columns) == 2 + assert 'AA' in selected # like with ints in column names df = DataFrame(0., index=[0, 1, 2], columns=[0, 1, '_A', '_B']) - filtered = df.filter(like='_') - assert len(filtered.columns) == 2 + selected = df.select(like='_') + assert len(selected.columns) == 2 # regex with ints in column names # from PR #10384 df = DataFrame(0., index=[0, 1, 2], columns=['A1', 1, 'B', 2, 'C']) expected = DataFrame( 0., index=[0, 1, 2], columns=pd.Index([1, 2], dtype=object)) - filtered = df.filter(regex='^[0-9]+$') - tm.assert_frame_equal(filtered, expected) + selected = df.select(regex='^[0-9]+$') + tm.assert_frame_equal(selected, expected) expected = DataFrame(0., index=[0, 1, 2], columns=[0, '0', 1, '1']) # shouldn't remove anything - filtered = expected.filter(regex='^[0-9]+$') - tm.assert_frame_equal(filtered, expected) + selected = expected.select(regex='^[0-9]+$') + tm.assert_frame_equal(selected, expected) # pass in None with pytest.raises(TypeError, match='Must pass'): - self.frame.filter() + self.frame.select() with pytest.raises(TypeError, match='Must pass'): - self.frame.filter(items=None) + self.frame.select(items=None) with pytest.raises(TypeError, match='Must pass'): - self.frame.filter(axis=1) + self.frame.select(axis=1) # test mutually exclusive arguments with pytest.raises(TypeError, match='mutually exclusive'): - self.frame.filter(items=['one', 'three'], regex='e$', like='bbi') + self.frame.select(items=['one', 'three'], regex='e$', like='bbi') with pytest.raises(TypeError, match='mutually exclusive'): - self.frame.filter(items=['one', 'three'], regex='e$', axis=1) + self.frame.select(items=['one', 'three'], regex='e$', axis=1) with pytest.raises(TypeError, match='mutually exclusive'): - self.frame.filter(items=['one', 'three'], regex='e$') + self.frame.select(items=['one', 'three'], regex='e$') with pytest.raises(TypeError, match='mutually exclusive'): - self.frame.filter(items=['one', 'three'], like='bbi', axis=0) + self.frame.select(items=['one', 'three'], like='bbi', axis=0) with pytest.raises(TypeError, match='mutually exclusive'): - self.frame.filter(items=['one', 'three'], like='bbi') + self.frame.select(items=['one', 'three'], like='bbi') # objects - filtered = self.mixed_frame.filter(like='foo') - assert 'foo' in filtered + selected = self.mixed_frame.select(like='foo') + assert 'foo' in selected # unicode columns, won't ascii-encode df = self.frame.rename(columns={'B': '\u2202'}) - filtered = df.filter(like='C') - assert 'C' in filtered + selected = df.select(like='C') + assert 'C' in selected + + def test_select_regex_search(self): + import re - def test_filter_regex_search(self): fcopy = self.frame.copy() fcopy['AA'] = 1 # regex - filtered = fcopy.filter(regex='[A]+') - assert len(filtered.columns) == 2 - assert 'AA' in filtered + selected = fcopy.select(regex='[A]+') + assert len(selected.columns) == 2 + assert 'AA' in selected + + # regex, ignore case + selected = fcopy.select(regex='[a]+', flags=re.IGNORECASE) + assert len(selected.columns) == 2 + assert 'AA' in selected # doesn't have to be at beginning df = DataFrame({'aBBa': [1, 2], @@ -861,7 +873,12 @@ def test_filter_regex_search(self): 'aCCa': [1, 2], 'aCCaBB': [1, 2]}) - result = df.filter(regex='BB') + result = df.select(regex='BB') + exp = df[[x for x in df.columns if 'BB' in x]] + assert_frame_equal(result, exp) + + # ignore case + result = df.select(regex='bb', flags=re.IGNORECASE) exp = df[[x for x in df.columns if 'BB' in x]] assert_frame_equal(result, exp) @@ -870,29 +887,29 @@ def test_filter_regex_search(self): ('a', DataFrame({'a': [1, 2]})), ('あ', DataFrame({'あ': [3, 4]})) ]) - def test_filter_unicode(self, name, expected): + def test_select_unicode(self, name, expected): # GH13101 df = DataFrame({'a': [1, 2], 'あ': [3, 4]}) - assert_frame_equal(df.filter(like=name), expected) - assert_frame_equal(df.filter(regex=name), expected) + assert_frame_equal(df.select(like=name), expected) + assert_frame_equal(df.select(regex=name), expected) @pytest.mark.parametrize('name', ['a', 'a']) - def test_filter_bytestring(self, name): + def test_select_bytestring(self, name): # GH13101 df = DataFrame({b'a': [1, 2], b'b': [3, 4]}) expected = DataFrame({b'a': [1, 2]}) - assert_frame_equal(df.filter(like=name), expected) - assert_frame_equal(df.filter(regex=name), expected) + assert_frame_equal(df.select(like=name), expected) + assert_frame_equal(df.select(regex=name), expected) - def test_filter_corner(self): + def test_select_corner(self): empty = DataFrame() - result = empty.filter([]) + result = empty.select([]) assert_frame_equal(result, empty) - result = empty.filter(like='foo') + result = empty.select(like='foo') assert_frame_equal(result, empty) def test_take(self):