Skip to content

Commit b6ccc01

Browse files
committed
more aggregate functions
median, mode, rms, stddev and some improved docs
1 parent 6d2d32c commit b6ccc01

File tree

2 files changed

+129
-6
lines changed

2 files changed

+129
-6
lines changed

src/transforms/aggregate.js

Lines changed: 94 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ var attrs = exports.attributes = {
6262
},
6363
func: {
6464
valType: 'enumerated',
65-
values: ['count', 'sum', 'avg', 'min', 'max', 'first', 'last'],
65+
values: ['count', 'sum', 'avg', 'median', 'mode', 'rms', 'stddev', 'min', 'max', 'first', 'last'],
6666
dflt: 'first',
6767
role: 'info',
6868
description: [
@@ -71,7 +71,16 @@ var attrs = exports.attributes = {
7171
'in the `groups` array, are collected and reduced by this function.',
7272
'*count* is simply the number of values in the `groups` array, so does',
7373
'not even require the linked array to exist. *first* (*last*) is just',
74-
'the first (last) linked value.'
74+
'the first (last) linked value.',
75+
'Invalid values are ignored, so for example in *avg* they do not',
76+
'contribute to either the numerator or the denominator.',
77+
'Any data type (numeric, date, category) may be aggregated with any',
78+
'function, even though in certain cases it is unlikely to make sense,',
79+
'for example a sum of dates or average of categories.',
80+
'*median* will return the average of the two central values if there is',
81+
'an even count. *mode* will return the first value to reach the maximum',
82+
'count, in case of a tie. *stddev* uses the population formula',
83+
'(denominator N, not N-1)'
7584
].join(' ')
7685
},
7786
enabled: {
@@ -246,7 +255,7 @@ function getAggregateFunction(func, conversions) {
246255
var total = 0;
247256
for(var i = 0; i < indices.length; i++) {
248257
var vi = d2c(array[indices[i]]);
249-
if(vi !== BADNUM) total += +vi;
258+
if(vi !== BADNUM) total += vi;
250259
}
251260
return c2d(total);
252261
};
@@ -259,7 +268,7 @@ function getAggregateFunction(func, conversions) {
259268
for(var i = 0; i < indices.length; i++) {
260269
var vi = d2c(array[indices[i]]);
261270
if(vi !== BADNUM) {
262-
total += +vi;
271+
total += vi;
263272
cnt++;
264273
}
265274
}
@@ -271,7 +280,7 @@ function getAggregateFunction(func, conversions) {
271280
var out = Infinity;
272281
for(var i = 0; i < indices.length; i++) {
273282
var vi = d2c(array[indices[i]]);
274-
if(vi !== BADNUM) out = Math.min(out, +vi);
283+
if(vi !== BADNUM) out = Math.min(out, vi);
275284
}
276285
return (out === Infinity) ? BADNUM : c2d(out);
277286
};
@@ -281,10 +290,89 @@ function getAggregateFunction(func, conversions) {
281290
var out = -Infinity;
282291
for(var i = 0; i < indices.length; i++) {
283292
var vi = d2c(array[indices[i]]);
284-
if(vi !== BADNUM) out = Math.max(out, +vi);
293+
if(vi !== BADNUM) out = Math.max(out, vi);
285294
}
286295
return (out === -Infinity) ? BADNUM : c2d(out);
287296
};
297+
298+
case 'median':
299+
return function(array, indices) {
300+
var sortCalc = [];
301+
for(var i = 0; i < indices.length; i++) {
302+
var vi = d2c(array[indices[i]]);
303+
if(vi !== BADNUM) sortCalc.push(vi);
304+
}
305+
if(!sortCalc.length) return BADNUM;
306+
sortCalc.sort();
307+
var mid = (sortCalc.length - 1) / 2;
308+
return c2d((sortCalc[Math.floor(mid)] + sortCalc[Math.ceil(mid)]) / 2);
309+
};
310+
311+
case 'mode':
312+
return function(array, indices) {
313+
var counts = {};
314+
var maxCnt = 0;
315+
var out = BADNUM;
316+
for(var i = 0; i < indices.length; i++) {
317+
var vi = d2c(array[indices[i]]);
318+
if(vi !== BADNUM) {
319+
var counti = counts[vi] = (counts[vi] || 0) + 1;
320+
if(counti > maxCnt) {
321+
maxCnt = counti;
322+
out = vi;
323+
}
324+
}
325+
}
326+
return maxCnt ? c2d(out) : BADNUM;
327+
};
328+
329+
case 'rms':
330+
return function(array, indices) {
331+
var total = 0;
332+
var cnt = 0;
333+
for(var i = 0; i < indices.length; i++) {
334+
var vi = d2c(array[indices[i]]);
335+
if(vi !== BADNUM) {
336+
total += vi * vi;
337+
cnt++;
338+
}
339+
}
340+
return cnt ? c2d(Math.sqrt(total / cnt)) : BADNUM;
341+
};
342+
343+
case 'stddev':
344+
return function(array, indices) {
345+
// balance numerical stability with performance:
346+
// so that we call d2c once per element but don't need to
347+
// store them, reference all to the first element
348+
var total = 0;
349+
var total2 = 0;
350+
var cnt = 1;
351+
var v0 = BADNUM;
352+
var i;
353+
for(i = 0; i < indices.length && v0 === BADNUM; i++) {
354+
v0 = d2c(array[indices[i]]);
355+
}
356+
if(v0 === BADNUM) return BADNUM;
357+
358+
for(; i < indices.length; i++) {
359+
var vi = d2c(array[indices[i]]);
360+
if(vi !== BADNUM) {
361+
var dv = vi - v0;
362+
total += dv;
363+
total2 += dv * dv;
364+
cnt++;
365+
}
366+
}
367+
368+
// This is population std dev, if we want sample std dev
369+
// we would need (...) / (cnt - 1)
370+
// Also note there's no c2d here - that means for dates the result
371+
// is a number of milliseconds, and for categories it's a number
372+
// of category differences, which is not generically meaningful but
373+
// as in other cases we don't forbid it.
374+
return Math.sqrt((total2 - (total * total / cnt)) / cnt);
375+
};
288376
}
289377
}
290378

test/jasmine/tests/transform_aggregate_test.js

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ describe('aggregate', function() {
3232
aggregations: [
3333
// missing array - the entry is ignored
3434
{target: '', func: 'avg'},
35+
// disabled explicitly
36+
{target: 'x', func: 'avg', enabled: false},
3537
{target: 'x', func: 'sum'},
3638
// non-numerics will not count toward numerator or denominator for avg
3739
{target: 'y', func: 'avg'},
@@ -187,4 +189,37 @@ describe('aggregate', function() {
187189
expect(traceOut.y).toBeCloseToArray([16 / 3, 7], 5);
188190
expect(traceOut.marker.size).toEqual([10, 20]);
189191
});
192+
193+
it('handles median, mode, rms, & stddev for numeric data', function() {
194+
// again, nothing is going to barf with non-numeric data, but sometimes it
195+
// won't make much sense.
196+
197+
Plotly.newPlot(gd, [{
198+
x: [1, 1, 2, 2, 1],
199+
y: [1, 2, 3, 4, 5],
200+
marker: {
201+
size: [1, 2, 3, 4, 5],
202+
line: {width: [1, 1, 2, 2, 1]}
203+
},
204+
transforms: [{
205+
type: 'aggregate',
206+
groups: [1, 2, 1, 1, 1],
207+
aggregations: [
208+
{target: 'x', func: 'mode'},
209+
{target: 'y', func: 'median'},
210+
{target: 'marker.size', func: 'rms'},
211+
{target: 'marker.line.width', func: 'stddev'}
212+
]
213+
}]
214+
}]);
215+
216+
var traceOut = gd._fullData[0];
217+
218+
// 1 and 2 both have count of 2 in the first group,
219+
// but 2 gets to that count first
220+
expect(traceOut.x).toEqual([2, 1]);
221+
expect(traceOut.y).toBeCloseToArray([3.5, 2], 5);
222+
expect(traceOut.marker.size).toBeCloseToArray([Math.sqrt(51 / 4), 2], 5);
223+
expect(traceOut.marker.line.width).toBeCloseToArray([0.5, 0], 5);
224+
});
190225
});

0 commit comments

Comments
 (0)