Skip to content

Commit 6be8784

Browse files
committed
CLN: replace pandas.compat.scipy.scoreatpercentile with numpy.percentile
1 parent d2e1abf commit 6be8784

File tree

7 files changed

+18
-100
lines changed

7 files changed

+18
-100
lines changed

pandas/compat/scipy.py

Lines changed: 0 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -6,88 +6,6 @@
66
import numpy as np
77

88

9-
def scoreatpercentile(a, per, limit=(), interpolation_method='fraction'):
10-
"""Calculate the score at the given `per` percentile of the sequence `a`.
11-
12-
For example, the score at `per=50` is the median. If the desired quantile
13-
lies between two data points, we interpolate between them, according to
14-
the value of `interpolation`. If the parameter `limit` is provided, it
15-
should be a tuple (lower, upper) of two values. Values of `a` outside
16-
this (closed) interval will be ignored.
17-
18-
The `interpolation_method` parameter supports three values, namely
19-
`fraction` (default), `lower` and `higher`. Interpolation is done only,
20-
if the desired quantile lies between two data points `i` and `j`. For
21-
`fraction`, the result is an interpolated value between `i` and `j`;
22-
for `lower`, the result is `i`, for `higher` the result is `j`.
23-
24-
Parameters
25-
----------
26-
a : ndarray
27-
Values from which to extract score.
28-
per : scalar
29-
Percentile at which to extract score.
30-
limit : tuple, optional
31-
Tuple of two scalars, the lower and upper limits within which to
32-
compute the percentile.
33-
interpolation_method : {'fraction', 'lower', 'higher'}, optional
34-
This optional parameter specifies the interpolation method to use,
35-
when the desired quantile lies between two data points `i` and `j`:
36-
37-
- fraction: `i + (j - i)*fraction`, where `fraction` is the
38-
fractional part of the index surrounded by `i` and `j`.
39-
- lower: `i`.
40-
- higher: `j`.
41-
42-
Returns
43-
-------
44-
score : float
45-
Score at percentile.
46-
47-
See Also
48-
--------
49-
percentileofscore
50-
51-
Examples
52-
--------
53-
>>> from scipy import stats
54-
>>> a = np.arange(100)
55-
>>> stats.scoreatpercentile(a, 50)
56-
49.5
57-
58-
"""
59-
# TODO: this should be a simple wrapper around a well-written quantile
60-
# function. GNU R provides 9 quantile algorithms (!), with differing
61-
# behaviour at, for example, discontinuities.
62-
values = np.sort(a, axis=0)
63-
if limit:
64-
values = values[(limit[0] <= values) & (values <= limit[1])]
65-
66-
idx = per / 100. * (values.shape[0] - 1)
67-
if idx % 1 == 0:
68-
score = values[idx]
69-
else:
70-
if interpolation_method == 'fraction':
71-
score = _interpolate(values[int(idx)], values[int(idx) + 1],
72-
idx % 1)
73-
elif interpolation_method == 'lower':
74-
score = values[np.floor(idx)]
75-
elif interpolation_method == 'higher':
76-
score = values[np.ceil(idx)]
77-
else:
78-
raise ValueError("interpolation_method can only be 'fraction', "
79-
"'lower' or 'higher'")
80-
81-
return score
82-
83-
84-
def _interpolate(a, b, fraction):
85-
"""Returns the point at the given fraction between a and b, where
86-
'fraction' must be between 0 and 1.
87-
"""
88-
return a + (b - a) * fraction
89-
90-
919
def rankdata(a):
9210
"""
9311
Ranks the data, dealing with ties appropriately.

pandas/core/frame.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
import pandas.computation.expressions as expressions
3939
from pandas.computation.eval import eval as _eval
4040
from pandas.computation.scope import _ensure_scope
41-
from pandas.compat.scipy import scoreatpercentile as _quantile
41+
from numpy import percentile as _quantile
4242
from pandas.compat import(range, zip, lrange, lmap, lzip, StringIO, u,
4343
OrderedDict, raise_with_traceback)
4444
from pandas import compat

pandas/core/series.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
import pandas.tslib as tslib
5353
import pandas.index as _index
5454

55-
from pandas.compat.scipy import scoreatpercentile as _quantile
55+
from numpy import percentile as _quantile
5656
from pandas.core.config import get_option
5757

5858
__all__ = ['Series']

pandas/tests/test_frame.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10915,13 +10915,13 @@ def wrapper(x):
1091510915
check_dtype=False, check_dates=True)
1091610916

1091710917
def test_quantile(self):
10918-
from pandas.compat.scipy import scoreatpercentile
10918+
from numpy import percentile
1091910919

1092010920
q = self.tsframe.quantile(0.1, axis=0)
10921-
self.assertEqual(q['A'], scoreatpercentile(self.tsframe['A'], 10))
10921+
self.assertEqual(q['A'], percentile(self.tsframe['A'], 10))
1092210922
q = self.tsframe.quantile(0.9, axis=1)
1092310923
q = self.intframe.quantile(0.1)
10924-
self.assertEqual(q['A'], scoreatpercentile(self.intframe['A'], 10))
10924+
self.assertEqual(q['A'], percentile(self.intframe['A'], 10))
1092510925

1092610926
# test degenerate case
1092710927
q = DataFrame({'x': [], 'y': []}).quantile(0.1, axis=0)

pandas/tests/test_groupby.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1907,17 +1907,17 @@ def test_groupby_with_hier_columns(self):
19071907
self.assert_(result.columns.equals(df.columns[:-1]))
19081908

19091909
def test_pass_args_kwargs(self):
1910-
from pandas.compat.scipy import scoreatpercentile
1910+
from numpy import percentile
19111911

1912-
def f(x, q=None):
1913-
return scoreatpercentile(x, q)
1914-
g = lambda x: scoreatpercentile(x, 80)
1912+
def f(x, q=None, axis=0):
1913+
return percentile(x, q, axis=axis)
1914+
g = lambda x: percentile(x, 80, axis=0)
19151915

19161916
# Series
19171917
ts_grouped = self.ts.groupby(lambda x: x.month)
1918-
agg_result = ts_grouped.agg(scoreatpercentile, 80)
1919-
apply_result = ts_grouped.apply(scoreatpercentile, 80)
1920-
trans_result = ts_grouped.transform(scoreatpercentile, 80)
1918+
agg_result = ts_grouped.agg(percentile, 80, axis=0)
1919+
apply_result = ts_grouped.apply(percentile, 80, axis=0)
1920+
trans_result = ts_grouped.transform(percentile, 80, axis=0)
19211921

19221922
agg_expected = ts_grouped.quantile(.8)
19231923
trans_expected = ts_grouped.transform(g)
@@ -1935,7 +1935,7 @@ def f(x, q=None):
19351935

19361936
# DataFrame
19371937
df_grouped = self.tsframe.groupby(lambda x: x.month)
1938-
agg_result = df_grouped.agg(scoreatpercentile, 80)
1938+
agg_result = df_grouped.agg(percentile, 80, axis=0)
19391939
apply_result = df_grouped.apply(DataFrame.quantile, .8)
19401940
expected = df_grouped.quantile(.8)
19411941
assert_frame_equal(apply_result, expected)

pandas/tests/test_series.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2137,17 +2137,17 @@ def test_prod_numpy16_bug(self):
21372137
self.assertNotIsInstance(result, Series)
21382138

21392139
def test_quantile(self):
2140-
from pandas.compat.scipy import scoreatpercentile
2140+
from numpy import percentile
21412141

21422142
q = self.ts.quantile(0.1)
2143-
self.assertEqual(q, scoreatpercentile(self.ts.valid(), 10))
2143+
self.assertEqual(q, percentile(self.ts.valid(), 10))
21442144

21452145
q = self.ts.quantile(0.9)
2146-
self.assertEqual(q, scoreatpercentile(self.ts.valid(), 90))
2146+
self.assertEqual(q, percentile(self.ts.valid(), 90))
21472147

21482148
# object dtype
21492149
q = Series(self.ts,dtype=object).quantile(0.9)
2150-
self.assertEqual(q, scoreatpercentile(self.ts.valid(), 90))
2150+
self.assertEqual(q, percentile(self.ts.valid(), 90))
21512151

21522152
def test_describe(self):
21532153
_ = self.series.describe()

pandas/tseries/tests/test_timedeltas.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ def test_timedelta_ops(self):
240240

241241
result = td.quantile(.1)
242242
# This properly returned a scalar.
243-
expected = to_timedelta('00:00:02.6')
243+
expected = np.timedelta64(2599999999,'ns')
244244
tm.assert_almost_equal(result, expected)
245245

246246
result = td.median()[0]

0 commit comments

Comments
 (0)