@@ -62,7 +62,7 @@ var attrs = exports.attributes = {
62
62
} ,
63
63
func : {
64
64
valType : 'enumerated' ,
65
- values : [ 'count' , 'sum' , 'avg' , 'min' , 'max' , 'first' , 'last' ] ,
65
+ values : [ 'count' , 'sum' , 'avg' , 'median' , 'mode' , 'rms' , 'stddev' , ' min', 'max' , 'first' , 'last' ] ,
66
66
dflt : 'first' ,
67
67
role : 'info' ,
68
68
description : [
@@ -71,7 +71,16 @@ var attrs = exports.attributes = {
71
71
'in the `groups` array, are collected and reduced by this function.' ,
72
72
'*count* is simply the number of values in the `groups` array, so does' ,
73
73
'not even require the linked array to exist. *first* (*last*) is just' ,
74
- 'the first (last) linked value.'
74
+ 'the first (last) linked value.' ,
75
+ 'Invalid values are ignored, so for example in *avg* they do not' ,
76
+ 'contribute to either the numerator or the denominator.' ,
77
+ 'Any data type (numeric, date, category) may be aggregated with any' ,
78
+ 'function, even though in certain cases it is unlikely to make sense,' ,
79
+ 'for example a sum of dates or average of categories.' ,
80
+ '*median* will return the average of the two central values if there is' ,
81
+ 'an even count. *mode* will return the first value to reach the maximum' ,
82
+ 'count, in case of a tie. *stddev* uses the population formula' ,
83
+ '(denominator N, not N-1)'
75
84
] . join ( ' ' )
76
85
} ,
77
86
enabled : {
@@ -246,7 +255,7 @@ function getAggregateFunction(func, conversions) {
246
255
var total = 0 ;
247
256
for ( var i = 0 ; i < indices . length ; i ++ ) {
248
257
var vi = d2c ( array [ indices [ i ] ] ) ;
249
- if ( vi !== BADNUM ) total += + vi ;
258
+ if ( vi !== BADNUM ) total += vi ;
250
259
}
251
260
return c2d ( total ) ;
252
261
} ;
@@ -259,7 +268,7 @@ function getAggregateFunction(func, conversions) {
259
268
for ( var i = 0 ; i < indices . length ; i ++ ) {
260
269
var vi = d2c ( array [ indices [ i ] ] ) ;
261
270
if ( vi !== BADNUM ) {
262
- total += + vi ;
271
+ total += vi ;
263
272
cnt ++ ;
264
273
}
265
274
}
@@ -271,7 +280,7 @@ function getAggregateFunction(func, conversions) {
271
280
var out = Infinity ;
272
281
for ( var i = 0 ; i < indices . length ; i ++ ) {
273
282
var vi = d2c ( array [ indices [ i ] ] ) ;
274
- if ( vi !== BADNUM ) out = Math . min ( out , + vi ) ;
283
+ if ( vi !== BADNUM ) out = Math . min ( out , vi ) ;
275
284
}
276
285
return ( out === Infinity ) ? BADNUM : c2d ( out ) ;
277
286
} ;
@@ -281,10 +290,89 @@ function getAggregateFunction(func, conversions) {
281
290
var out = - Infinity ;
282
291
for ( var i = 0 ; i < indices . length ; i ++ ) {
283
292
var vi = d2c ( array [ indices [ i ] ] ) ;
284
- if ( vi !== BADNUM ) out = Math . max ( out , + vi ) ;
293
+ if ( vi !== BADNUM ) out = Math . max ( out , vi ) ;
285
294
}
286
295
return ( out === - Infinity ) ? BADNUM : c2d ( out ) ;
287
296
} ;
297
+
298
+ case 'median' :
299
+ return function ( array , indices ) {
300
+ var sortCalc = [ ] ;
301
+ for ( var i = 0 ; i < indices . length ; i ++ ) {
302
+ var vi = d2c ( array [ indices [ i ] ] ) ;
303
+ if ( vi !== BADNUM ) sortCalc . push ( vi ) ;
304
+ }
305
+ if ( ! sortCalc . length ) return BADNUM ;
306
+ sortCalc . sort ( ) ;
307
+ var mid = ( sortCalc . length - 1 ) / 2 ;
308
+ return c2d ( ( sortCalc [ Math . floor ( mid ) ] + sortCalc [ Math . ceil ( mid ) ] ) / 2 ) ;
309
+ } ;
310
+
311
+ case 'mode' :
312
+ return function ( array , indices ) {
313
+ var counts = { } ;
314
+ var maxCnt = 0 ;
315
+ var out = BADNUM ;
316
+ for ( var i = 0 ; i < indices . length ; i ++ ) {
317
+ var vi = d2c ( array [ indices [ i ] ] ) ;
318
+ if ( vi !== BADNUM ) {
319
+ var counti = counts [ vi ] = ( counts [ vi ] || 0 ) + 1 ;
320
+ if ( counti > maxCnt ) {
321
+ maxCnt = counti ;
322
+ out = vi ;
323
+ }
324
+ }
325
+ }
326
+ return maxCnt ? c2d ( out ) : BADNUM ;
327
+ } ;
328
+
329
+ case 'rms' :
330
+ return function ( array , indices ) {
331
+ var total = 0 ;
332
+ var cnt = 0 ;
333
+ for ( var i = 0 ; i < indices . length ; i ++ ) {
334
+ var vi = d2c ( array [ indices [ i ] ] ) ;
335
+ if ( vi !== BADNUM ) {
336
+ total += vi * vi ;
337
+ cnt ++ ;
338
+ }
339
+ }
340
+ return cnt ? c2d ( Math . sqrt ( total / cnt ) ) : BADNUM ;
341
+ } ;
342
+
343
+ case 'stddev' :
344
+ return function ( array , indices ) {
345
+ // balance numerical stability with performance:
346
+ // so that we call d2c once per element but don't need to
347
+ // store them, reference all to the first element
348
+ var total = 0 ;
349
+ var total2 = 0 ;
350
+ var cnt = 1 ;
351
+ var v0 = BADNUM ;
352
+ var i ;
353
+ for ( i = 0 ; i < indices . length && v0 === BADNUM ; i ++ ) {
354
+ v0 = d2c ( array [ indices [ i ] ] ) ;
355
+ }
356
+ if ( v0 === BADNUM ) return BADNUM ;
357
+
358
+ for ( ; i < indices . length ; i ++ ) {
359
+ var vi = d2c ( array [ indices [ i ] ] ) ;
360
+ if ( vi !== BADNUM ) {
361
+ var dv = vi - v0 ;
362
+ total += dv ;
363
+ total2 += dv * dv ;
364
+ cnt ++ ;
365
+ }
366
+ }
367
+
368
+ // This is population std dev, if we want sample std dev
369
+ // we would need (...) / (cnt - 1)
370
+ // Also note there's no c2d here - that means for dates the result
371
+ // is a number of milliseconds, and for categories it's a number
372
+ // of category differences, which is not generically meaningful but
373
+ // as in other cases we don't forbid it.
374
+ return Math . sqrt ( ( total2 - ( total * total / cnt ) ) / cnt ) ;
375
+ } ;
288
376
}
289
377
}
290
378
0 commit comments