Skip to content

Commit 69c37a7

Browse files
committed
NDFrame.filter -> NDFrame.select
1 parent 430f0fd commit 69c37a7

File tree

2 files changed

+134
-49
lines changed

2 files changed

+134
-49
lines changed

pandas/core/generic.py

Lines changed: 73 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4483,8 +4483,10 @@ def _reindex_with_indexers(self, reindexers, fill_value=None, copy=False,
44834483

44844484
def filter(self, items=None, like=None, regex=None, axis=None):
44854485
"""
4486-
Subset rows or columns of dataframe according to labels in
4487-
the specified index.
4486+
Filter columns or rows according to labels in the specified index.
4487+
4488+
.. deprecated:: 0.25.0
4489+
Use .select instead.
44884490
44894491
Note that this routine does not filter a dataframe on its
44904492
contents. The filter is applied to the labels of the index.
@@ -4540,10 +4542,76 @@ def filter(self, items=None, like=None, regex=None, axis=None):
45404542
one two three
45414543
rabbit 4 5 6
45424544
"""
4545+
msg = (".filter is deprecated and will be removed in the"
4546+
" future. Use .select instead.")
4547+
warnings.warn(msg, FutureWarning, stacklevel=2)
4548+
return self.select(items=items, like=like, regex=regex, axis=axis)
4549+
4550+
def select(self, items=None, like=None, regex=None, flags=0, axis=None):
4551+
"""
4552+
Select columns or rows according to labels in the specified index.
4553+
4554+
Note that this routine does not filter a dataframe on its
4555+
contents. The filter is applied to the labels of the info axis.
4556+
4557+
Parameters
4558+
----------
4559+
items : list-like
4560+
Keep labels from axis which are in items.
4561+
like : str
4562+
Keep labels from axis for which "like in label == True".
4563+
regex : str (regular expression)
4564+
Keep labels from axis for which re.search(regex, label) == True.
4565+
flags : int, default 0
4566+
re module flags, e.g. re.IGNORECASE. Can only be combined with
4567+
regex.
4568+
axis : int or string axis name
4569+
The axis to filter on. By default this is the info axis,
4570+
'index' for Series, 'columns' for DataFrame.
4571+
4572+
Returns
4573+
-------
4574+
same type as input object
4575+
4576+
See Also
4577+
--------
4578+
DataFrame.loc
4579+
4580+
Notes
4581+
-----
4582+
The ``items``, ``like``, and ``regex`` parameters are
4583+
enforced to be mutually exclusive.
4584+
4585+
``axis`` defaults to the info axis that is used when indexing
4586+
with ``[]``.
4587+
4588+
Examples
4589+
--------
4590+
>>> df = pd.DataFrame(np.array(([1, 2, 3], [4, 5, 6])),
4591+
... index=['mouse', 'rabbit'],
4592+
... columns=['one', 'two', 'three'])
4593+
4594+
>>> # select columns by name
4595+
>>> df.select(items=['one', 'three'])
4596+
one three
4597+
mouse 1 3
4598+
rabbit 4 6
4599+
4600+
>>> # select columns by regular expression
4601+
>>> df.select(regex='e$', axis=1)
4602+
one three
4603+
mouse 1 3
4604+
rabbit 4 6
4605+
4606+
>>> # select rows containing 'bbi'
4607+
>>> df.select(like='bbi', axis=0)
4608+
one two three
4609+
rabbit 4 5 6
4610+
"""
45434611
import re
45444612

4545-
nkw = com.count_not_none(items, like, regex)
4546-
if nkw > 1:
4613+
num_not_none = com.count_not_none(items, like, regex)
4614+
if num_not_none > 1:
45474615
raise TypeError('Keyword arguments `items`, `like`, or `regex` '
45484616
'are mutually exclusive')
45494617

@@ -4563,7 +4631,7 @@ def f(x):
45634631
elif regex:
45644632
def f(x):
45654633
return matcher.search(ensure_str(x)) is not None
4566-
matcher = re.compile(regex)
4634+
matcher = re.compile(regex, flags=flags)
45674635
values = labels.map(f)
45684636
return self.loc(axis=axis)[values]
45694637
else:

pandas/tests/frame/test_axis_select_reindex.py

Lines changed: 61 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,11 @@
1313
from pandas.util.testing import assert_frame_equal
1414

1515

16+
def test_filter_deprecated(int_frame):
17+
with tm.assert_produces_warning(FutureWarning):
18+
int_frame.filter(like='A')
19+
20+
1621
class TestDataFrameSelectReindex(TestData):
1722
# These are specific reindex-based tests; other indexing tests should go in
1823
# test_indexing
@@ -775,93 +780,105 @@ def test_align_series_combinations(self):
775780
tm.assert_series_equal(res1, exp2)
776781
tm.assert_frame_equal(res2, exp1)
777782

778-
def test_filter(self):
783+
def test_select(self):
779784
# Items
780-
filtered = self.frame.filter(['A', 'B', 'E'])
781-
assert len(filtered.columns) == 2
782-
assert 'E' not in filtered
785+
selected = self.frame.select(['A', 'B', 'E'])
786+
assert len(selected.columns) == 2
787+
assert 'E' not in selected
783788

784-
filtered = self.frame.filter(['A', 'B', 'E'], axis='columns')
785-
assert len(filtered.columns) == 2
786-
assert 'E' not in filtered
789+
selected = self.frame.select(['A', 'B', 'E'], axis='columns')
790+
assert len(selected.columns) == 2
791+
assert 'E' not in selected
787792

788793
# Other axis
789794
idx = self.frame.index[0:4]
790-
filtered = self.frame.filter(idx, axis='index')
795+
selected = self.frame.select(idx, axis='index')
791796
expected = self.frame.reindex(index=idx)
792-
tm.assert_frame_equal(filtered, expected)
797+
tm.assert_frame_equal(selected, expected)
793798

794799
# like
795800
fcopy = self.frame.copy()
796801
fcopy['AA'] = 1
797802

798-
filtered = fcopy.filter(like='A')
799-
assert len(filtered.columns) == 2
800-
assert 'AA' in filtered
803+
selected = fcopy.select(like='A')
804+
assert len(selected.columns) == 2
805+
assert 'AA' in selected
801806

802807
# like with ints in column names
803808
df = DataFrame(0., index=[0, 1, 2], columns=[0, 1, '_A', '_B'])
804-
filtered = df.filter(like='_')
805-
assert len(filtered.columns) == 2
809+
selected = df.select(like='_')
810+
assert len(selected.columns) == 2
806811

807812
# regex with ints in column names
808813
# from PR #10384
809814
df = DataFrame(0., index=[0, 1, 2], columns=['A1', 1, 'B', 2, 'C'])
810815
expected = DataFrame(
811816
0., index=[0, 1, 2], columns=pd.Index([1, 2], dtype=object))
812-
filtered = df.filter(regex='^[0-9]+$')
813-
tm.assert_frame_equal(filtered, expected)
817+
selected = df.select(regex='^[0-9]+$')
818+
tm.assert_frame_equal(selected, expected)
814819

815820
expected = DataFrame(0., index=[0, 1, 2], columns=[0, '0', 1, '1'])
816821
# shouldn't remove anything
817-
filtered = expected.filter(regex='^[0-9]+$')
818-
tm.assert_frame_equal(filtered, expected)
822+
selected = expected.select(regex='^[0-9]+$')
823+
tm.assert_frame_equal(selected, expected)
819824

820825
# pass in None
821826
with pytest.raises(TypeError, match='Must pass'):
822-
self.frame.filter()
827+
self.frame.select()
823828
with pytest.raises(TypeError, match='Must pass'):
824-
self.frame.filter(items=None)
829+
self.frame.select(items=None)
825830
with pytest.raises(TypeError, match='Must pass'):
826-
self.frame.filter(axis=1)
831+
self.frame.select(axis=1)
827832

828833
# test mutually exclusive arguments
829834
with pytest.raises(TypeError, match='mutually exclusive'):
830-
self.frame.filter(items=['one', 'three'], regex='e$', like='bbi')
835+
self.frame.select(items=['one', 'three'], regex='e$', like='bbi')
831836
with pytest.raises(TypeError, match='mutually exclusive'):
832-
self.frame.filter(items=['one', 'three'], regex='e$', axis=1)
837+
self.frame.select(items=['one', 'three'], regex='e$', axis=1)
833838
with pytest.raises(TypeError, match='mutually exclusive'):
834-
self.frame.filter(items=['one', 'three'], regex='e$')
839+
self.frame.select(items=['one', 'three'], regex='e$')
835840
with pytest.raises(TypeError, match='mutually exclusive'):
836-
self.frame.filter(items=['one', 'three'], like='bbi', axis=0)
841+
self.frame.select(items=['one', 'three'], like='bbi', axis=0)
837842
with pytest.raises(TypeError, match='mutually exclusive'):
838-
self.frame.filter(items=['one', 'three'], like='bbi')
843+
self.frame.select(items=['one', 'three'], like='bbi')
839844

840845
# objects
841-
filtered = self.mixed_frame.filter(like='foo')
842-
assert 'foo' in filtered
846+
selected = self.mixed_frame.select(like='foo')
847+
assert 'foo' in selected
843848

844849
# unicode columns, won't ascii-encode
845850
df = self.frame.rename(columns={'B': '\u2202'})
846-
filtered = df.filter(like='C')
847-
assert 'C' in filtered
851+
selected = df.select(like='C')
852+
assert 'C' in selected
853+
854+
def test_select_regex_search(self):
855+
import re
848856

849-
def test_filter_regex_search(self):
850857
fcopy = self.frame.copy()
851858
fcopy['AA'] = 1
852859

853860
# regex
854-
filtered = fcopy.filter(regex='[A]+')
855-
assert len(filtered.columns) == 2
856-
assert 'AA' in filtered
861+
selected = fcopy.select(regex='[A]+')
862+
assert len(selected.columns) == 2
863+
assert 'AA' in selected
864+
865+
# regex, ignore case
866+
selected = fcopy.select(regex='[a]+', flags=re.IGNORECASE)
867+
assert len(selected.columns) == 2
868+
assert 'AA' in selected
857869

858870
# doesn't have to be at beginning
859871
df = DataFrame({'aBBa': [1, 2],
860872
'BBaBB': [1, 2],
861873
'aCCa': [1, 2],
862874
'aCCaBB': [1, 2]})
863875

864-
result = df.filter(regex='BB')
876+
result = df.select(regex='BB')
877+
exp = df[[x for x in df.columns if 'BB' in x]]
878+
assert_frame_equal(result, exp)
879+
880+
# ignore case
881+
result = df.select(regex='bb', flags=re.IGNORECASE)
865882
exp = df[[x for x in df.columns if 'BB' in x]]
866883
assert_frame_equal(result, exp)
867884

@@ -870,29 +887,29 @@ def test_filter_regex_search(self):
870887
('a', DataFrame({'a': [1, 2]})),
871888
('あ', DataFrame({'あ': [3, 4]}))
872889
])
873-
def test_filter_unicode(self, name, expected):
890+
def test_select_unicode(self, name, expected):
874891
# GH13101
875892
df = DataFrame({'a': [1, 2], 'あ': [3, 4]})
876893

877-
assert_frame_equal(df.filter(like=name), expected)
878-
assert_frame_equal(df.filter(regex=name), expected)
894+
assert_frame_equal(df.select(like=name), expected)
895+
assert_frame_equal(df.select(regex=name), expected)
879896

880897
@pytest.mark.parametrize('name', ['a', 'a'])
881-
def test_filter_bytestring(self, name):
898+
def test_select_bytestring(self, name):
882899
# GH13101
883900
df = DataFrame({b'a': [1, 2], b'b': [3, 4]})
884901
expected = DataFrame({b'a': [1, 2]})
885902

886-
assert_frame_equal(df.filter(like=name), expected)
887-
assert_frame_equal(df.filter(regex=name), expected)
903+
assert_frame_equal(df.select(like=name), expected)
904+
assert_frame_equal(df.select(regex=name), expected)
888905

889-
def test_filter_corner(self):
906+
def test_select_corner(self):
890907
empty = DataFrame()
891908

892-
result = empty.filter([])
909+
result = empty.select([])
893910
assert_frame_equal(result, empty)
894911

895-
result = empty.filter(like='foo')
912+
result = empty.select(like='foo')
896913
assert_frame_equal(result, empty)
897914

898915
def test_take(self):

0 commit comments

Comments
 (0)