@@ -101,14 +101,13 @@ def __call__(self, alt):
101
101
bn_func = None
102
102
103
103
@functools .wraps (alt )
104
- def f (values , axis = None , skipna = True , ** kwds ):
104
+ def f (values , axis = None , skipna = True , min_count = 1 , ** kwds ):
105
105
if len (self .kwargs ) > 0 :
106
106
for k , v in compat .iteritems (self .kwargs ):
107
107
if k not in kwds :
108
108
kwds [k ] = v
109
109
try :
110
- if values .size == 0 :
111
-
110
+ if values .size < min_count :
112
111
# we either return np.nan or pd.NaT
113
112
if is_numeric_dtype (values ):
114
113
values = values .astype ('float64' )
@@ -132,7 +131,8 @@ def f(values, axis=None, skipna=True, **kwds):
132
131
if _has_infs (result ):
133
132
result = alt (values , axis = axis , skipna = skipna , ** kwds )
134
133
else :
135
- result = alt (values , axis = axis , skipna = skipna , ** kwds )
134
+ result = alt (values , axis = axis , skipna = skipna ,
135
+ min_count = min_count , ** kwds )
136
136
except Exception :
137
137
try :
138
138
result = alt (values , axis = axis , skipna = skipna , ** kwds )
@@ -292,34 +292,53 @@ def _wrap_results(result, dtype):
292
292
return result
293
293
294
294
295
- def nanany (values , axis = None , skipna = True ):
295
+ def _na_for_min_count (values , axis ):
296
+ # we either return np.nan or pd.NaT
297
+ if is_numeric_dtype (values ):
298
+ values = values .astype ('float64' )
299
+ fill_value = na_value_for_dtype (values .dtype )
300
+
301
+ if values .ndim == 1 :
302
+ return fill_value
303
+ else :
304
+ result_shape = (values .shape [:axis ] +
305
+ values .shape [axis + 1 :])
306
+ result = np .empty (result_shape , dtype = values .dtype )
307
+ result .fill (fill_value )
308
+ return result
309
+
310
+
311
+ def nanany (values , axis = None , skipna = True , min_count = 1 ):
296
312
values , mask , dtype , _ = _get_values (values , skipna , False , copy = skipna )
297
313
return values .any (axis )
298
314
299
315
300
- def nanall (values , axis = None , skipna = True ):
316
+ def nanall (values , axis = None , skipna = True , min_count = 1 ):
301
317
values , mask , dtype , _ = _get_values (values , skipna , True , copy = skipna )
302
318
return values .all (axis )
303
319
304
320
305
321
@disallow ('M8' )
306
322
@bottleneck_switch ()
307
- def nansum (values , axis = None , skipna = True ):
323
+ def nansum (values , axis = None , skipna = True , min_count = 1 ):
324
+ if len (values ) < min_count :
325
+ return _na_for_min_count (values , axis = axis )
326
+
308
327
values , mask , dtype , dtype_max = _get_values (values , skipna , 0 )
309
328
dtype_sum = dtype_max
310
329
if is_float_dtype (dtype ):
311
330
dtype_sum = dtype
312
331
elif is_timedelta64_dtype (dtype ):
313
332
dtype_sum = np .float64
314
333
the_sum = values .sum (axis , dtype = dtype_sum )
315
- the_sum = _maybe_null_out (the_sum , axis , mask )
334
+ the_sum = _maybe_null_out (the_sum , axis , mask , min_count = min_count )
316
335
317
336
return _wrap_results (the_sum , dtype )
318
337
319
338
320
339
@disallow ('M8' )
321
340
@bottleneck_switch ()
322
- def nanmean (values , axis = None , skipna = True ):
341
+ def nanmean (values , axis = None , skipna = True , min_count = 1 ):
323
342
values , mask , dtype , dtype_max = _get_values (values , skipna , 0 )
324
343
325
344
dtype_sum = dtype_max
@@ -345,7 +364,7 @@ def nanmean(values, axis=None, skipna=True):
345
364
346
365
@disallow ('M8' )
347
366
@bottleneck_switch ()
348
- def nanmedian (values , axis = None , skipna = True ):
367
+ def nanmedian (values , axis = None , skipna = True , min_count = 1 ):
349
368
350
369
values , mask , dtype , dtype_max = _get_values (values , skipna )
351
370
@@ -405,14 +424,14 @@ def _get_counts_nanvar(mask, axis, ddof, dtype=float):
405
424
406
425
@disallow ('M8' )
407
426
@bottleneck_switch (ddof = 1 )
408
- def nanstd (values , axis = None , skipna = True , ddof = 1 ):
427
+ def nanstd (values , axis = None , skipna = True , ddof = 1 , min_count = 1 ):
409
428
result = np .sqrt (nanvar (values , axis = axis , skipna = skipna , ddof = ddof ))
410
429
return _wrap_results (result , values .dtype )
411
430
412
431
413
432
@disallow ('M8' )
414
433
@bottleneck_switch (ddof = 1 )
415
- def nanvar (values , axis = None , skipna = True , ddof = 1 ):
434
+ def nanvar (values , axis = None , skipna = True , ddof = 1 , min_count = 1 ):
416
435
417
436
values = _values_from_object (values )
418
437
dtype = values .dtype
@@ -452,7 +471,7 @@ def nanvar(values, axis=None, skipna=True, ddof=1):
452
471
453
472
454
473
@disallow ('M8' , 'm8' )
455
- def nansem (values , axis = None , skipna = True , ddof = 1 ):
474
+ def nansem (values , axis = None , skipna = True , ddof = 1 , min_count = 1 ):
456
475
var = nanvar (values , axis , skipna , ddof = ddof )
457
476
458
477
mask = isna (values )
@@ -492,7 +511,7 @@ def reduction(values, axis=None, skipna=True):
492
511
493
512
494
513
@disallow ('O' )
495
- def nanargmax (values , axis = None , skipna = True ):
514
+ def nanargmax (values , axis = None , skipna = True , min_count = 1 ):
496
515
"""
497
516
Returns -1 in the NA case
498
517
"""
@@ -503,7 +522,7 @@ def nanargmax(values, axis=None, skipna=True):
503
522
504
523
505
524
@disallow ('O' )
506
- def nanargmin (values , axis = None , skipna = True ):
525
+ def nanargmin (values , axis = None , skipna = True , min_count = 1 ):
507
526
"""
508
527
Returns -1 in the NA case
509
528
"""
@@ -514,7 +533,7 @@ def nanargmin(values, axis=None, skipna=True):
514
533
515
534
516
535
@disallow ('M8' , 'm8' )
517
- def nanskew (values , axis = None , skipna = True ):
536
+ def nanskew (values , axis = None , skipna = True , min_count = 1 ):
518
537
""" Compute the sample skewness.
519
538
520
539
The statistic computed here is the adjusted Fisher-Pearson standardized
@@ -573,7 +592,7 @@ def nanskew(values, axis=None, skipna=True):
573
592
574
593
575
594
@disallow ('M8' , 'm8' )
576
- def nankurt (values , axis = None , skipna = True ):
595
+ def nankurt (values , axis = None , skipna = True , min_count = 1 ):
577
596
""" Compute the sample excess kurtosis.
578
597
579
598
The statistic computed here is the adjusted Fisher-Pearson standardized
@@ -641,13 +660,16 @@ def nankurt(values, axis=None, skipna=True):
641
660
642
661
643
662
@disallow ('M8' , 'm8' )
644
- def nanprod (values , axis = None , skipna = True ):
663
+ def nanprod (values , axis = None , skipna = True , min_count = 1 ):
664
+ if len (values ) < min_count :
665
+ return _na_for_min_count (values , axis = axis )
666
+
645
667
mask = isna (values )
646
668
if skipna and not is_any_int_dtype (values ):
647
669
values = values .copy ()
648
670
values [mask ] = 1
649
671
result = values .prod (axis )
650
- return _maybe_null_out (result , axis , mask )
672
+ return _maybe_null_out (result , axis , mask , min_count = min_count )
651
673
652
674
653
675
def _maybe_arg_null_out (result , axis , mask , skipna ):
@@ -683,7 +705,7 @@ def _get_counts(mask, axis, dtype=float):
683
705
return np .array (count , dtype = dtype )
684
706
685
707
686
- def _maybe_null_out (result , axis , mask ):
708
+ def _maybe_null_out (result , axis , mask , min_count = 1 ):
687
709
if axis is not None and getattr (result , 'ndim' , False ):
688
710
null_mask = (mask .shape [axis ] - mask .sum (axis )) == 0
689
711
if np .any (null_mask ):
@@ -698,7 +720,7 @@ def _maybe_null_out(result, axis, mask):
698
720
result [null_mask ] = None
699
721
elif result is not tslib .NaT :
700
722
null_mask = mask .size - mask .sum ()
701
- if null_mask == 0 :
723
+ if null_mask < min_count :
702
724
result = np .nan
703
725
704
726
return result
@@ -714,7 +736,7 @@ def _zero_out_fperr(arg):
714
736
715
737
716
738
@disallow ('M8' , 'm8' )
717
- def nancorr (a , b , method = 'pearson' , min_periods = None ):
739
+ def nancorr (a , b , method = 'pearson' , min_periods = None , min_count = 1 ):
718
740
"""
719
741
a, b: ndarrays
720
742
"""
@@ -761,7 +783,7 @@ def _spearman(a, b):
761
783
762
784
763
785
@disallow ('M8' , 'm8' )
764
- def nancov (a , b , min_periods = None ):
786
+ def nancov (a , b , min_periods = None , min_count = 1 ):
765
787
if len (a ) != len (b ):
766
788
raise AssertionError ('Operands to nancov must have same size' )
767
789
0 commit comments