diff --git a/pandas/algos.pyx b/pandas/algos.pyx index 77d8cea4de507..0beebbcdecfd2 100644 --- a/pandas/algos.pyx +++ b/pandas/algos.pyx @@ -1087,7 +1087,7 @@ def ewmcov(ndarray[double_t] input_x, ndarray[double_t] input_y, sum_wt = 1. sum_wt2 = 1. old_wt = 1. - + for i from 1 <= i < N: cur_x = input_x[i] cur_y = input_y[i] @@ -1117,7 +1117,7 @@ def ewmcov(ndarray[double_t] input_x, ndarray[double_t] input_y, elif is_observation: mean_x = cur_x mean_y = cur_y - + if nobs >= minp: if not bias: numerator = sum_wt * sum_wt @@ -1344,10 +1344,32 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1): #------------------------------------------------------------------------------- # Rolling skewness -def roll_skew(ndarray[double_t] input, int win, int minp): +@cython.boundscheck(False) +@cython.wraparound(False) +def _get_zscores(ndarray[double_t] inp): + """removes mean and scales variance to one""" + cdef: + ndarray[double_t] out + ndarray[np.uint8_t, ndim=1, cast=True] mask + double_t mu, sigma + + mask = np.isfinite(inp) + if not mask.any(): + return inp + + mu = inp[mask].mean() + out = inp - mu + sigma = out[mask].std() + if sigma > 0 and not np.isclose(sigma, 0.0): + out[mask] /= sigma + + return out + +def roll_skew(ndarray[double_t] inp, int win, int minp): cdef double val, prev cdef double x = 0, xx = 0, xxx = 0 cdef Py_ssize_t nobs = 0, i + cdef ndarray[double_t] input = _get_zscores(inp) cdef Py_ssize_t N = len(input) cdef ndarray[double_t] output = np.empty(N, dtype=float) @@ -1405,11 +1427,12 @@ def roll_skew(ndarray[double_t] input, int win, int minp): # Rolling kurtosis -def roll_kurt(ndarray[double_t] input, +def roll_kurt(ndarray[double_t] inp, int win, int minp): cdef double val, prev cdef double x = 0, xx = 0, xxx = 0, xxxx = 0 cdef Py_ssize_t nobs = 0, i + cdef ndarray[double_t] input = _get_zscores(inp) cdef Py_ssize_t N = len(input) cdef ndarray[double_t] output = np.empty(N, dtype=float) diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index 1d0be4ce48f4f..7165f7f11d648 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -321,6 +321,21 @@ def test_rolling_kurt(self): self._check_moment_func(mom.rolling_kurt, lambda x: kurtosis(x, bias=False)) + def test_affine_invariance(self): + """ + rolling skew/kurt should be invariant under affine transformations + """ + + xs = np.random.rand(50) + window = 10 + + for f in mom.rolling_skew, mom.rolling_kurt: + left = f(xs, window) + + for a, b in [(1, 100), (1, 5000), (100, 100), (100, 5000)]: + right = f(a*xs + b, window) + assert_almost_equal(left, right) + def test_fperr_robustness(self): # TODO: remove this once python 2.5 out of picture if PY3: @@ -524,7 +539,7 @@ def test_ewma(self): self.assertTrue(np.abs(result - 1) < 1e-2) s = Series([1.0, 2.0, 4.0, 8.0]) - + expected = Series([1.0, 1.6, 2.736842, 4.923077]) for f in [lambda s: mom.ewma(s, com=2.0, adjust=True), lambda s: mom.ewma(s, com=2.0, adjust=True, ignore_na=False), @@ -750,7 +765,7 @@ def _non_null_values(x): for (std, var, cov) in [(std_biased, var_biased, cov_biased), (std_unbiased, var_unbiased, cov_unbiased)]: - + # check that var(x), std(x), and cov(x) are all >= 0 var_x = var(x) std_x = std(x) @@ -762,7 +777,7 @@ def _non_null_values(x): # check that var(x) == cov(x, x) assert_equal(var_x, cov_x_x) - + # check that var(x) == std(x)^2 assert_equal(var_x, std_x * std_x) @@ -796,7 +811,7 @@ def _non_null_values(x): cov_x_y = cov(x, y) cov_y_x = cov(y, x) assert_equal(cov_x_y, cov_y_x) - + # check that cov(x, y) == (var(x+y) - var(x) - var(y)) / 2 var_x_plus_y = var(x + y) var_y = var(y) @@ -1007,7 +1022,7 @@ def test_rolling_consistency(self): expected.iloc[:, i, j] = rolling_f(x.iloc[:, i], x.iloc[:, j], window=window, min_periods=min_periods, center=center) assert_panel_equal(rolling_f_result, expected) - + # binary moments def test_rolling_cov(self): A = self.series @@ -1432,7 +1447,7 @@ def test_expanding_corr_pairwise_diff_length(self): assert_frame_equal(result2, expected) assert_frame_equal(result3, expected) assert_frame_equal(result4, expected) - + def test_pairwise_stats_column_names_order(self): # GH 7738 df1s = [DataFrame([[2,4],[1,2],[5,2],[8,1]], columns=[0,1]),