3
3
import pandas .util .testing as tm
4
4
from pandas import (DataFrame , Series , MultiIndex , date_range , period_range ,
5
5
isnull , NaT )
6
+ from .pandas_vb_common import setup
6
7
7
8
8
9
class GetNumericData (object ):
9
10
10
11
goal_time = 0.2
11
12
12
13
def setup (self ):
13
- np .random .seed (1234 )
14
14
self .df = DataFrame (np .random .randn (10000 , 25 ))
15
15
self .df ['foo' ] = 'bar'
16
16
self .df ['bar' ] = 'baz'
@@ -25,7 +25,6 @@ class Lookup(object):
25
25
goal_time = 0.2
26
26
27
27
def setup (self ):
28
- np .random .seed (1234 )
29
28
self .df = DataFrame (np .random .randn (10000 , 8 ),
30
29
columns = list ('abcdefgh' ))
31
30
self .df ['foo' ] = 'bar'
@@ -49,7 +48,6 @@ class Reindex(object):
49
48
50
49
def setup (self ):
51
50
N = 10 ** 3
52
- np .random .seed (1234 )
53
51
self .df = DataFrame (np .random .randn (N * 10 , N ))
54
52
self .idx = np .arange (4 * N , 7 * N )
55
53
self .df2 = DataFrame (
@@ -72,7 +70,6 @@ def time_reindex_both_axes_ix(self):
72
70
self .df .ix [self .idx , self .idx ]
73
71
74
72
def time_reindex_upcast (self ):
75
- np .random .seed (1234 )
76
73
self .df2 .reindex (np .random .permutation (range (1200 )))
77
74
78
75
@@ -82,7 +79,6 @@ class Iteration(object):
82
79
83
80
def setup (self ):
84
81
N = 1000
85
- np .random .seed (1234 )
86
82
self .df = DataFrame (np .random .randn (N * 10 , N ))
87
83
self .df2 = DataFrame (np .random .randn (N * 50 , 10 ))
88
84
self .df3 = DataFrame (np .random .randn (N , 5 * N ),
@@ -107,13 +103,16 @@ def time_itertuples(self):
107
103
for row in self .df2 .itertuples ():
108
104
pass
109
105
106
+ def time_iterrows (self ):
107
+ for row in self .df .iterrows ():
108
+ pass
109
+
110
110
111
111
class ToString (object ):
112
112
113
113
goal_time = 0.2
114
114
115
115
def setup (self ):
116
- np .random .seed (1234 )
117
116
self .df = DataFrame (np .random .randn (100 , 10 ))
118
117
119
118
def time_to_string_floats (self ):
@@ -166,7 +165,6 @@ class MaskBool(object):
166
165
goal_time = 0.2
167
166
168
167
def setup (self ):
169
- np .random .seed (1234 )
170
168
data = np .random .randn (1000 , 500 )
171
169
df = DataFrame (data )
172
170
df = df .where (df > 0 )
@@ -186,7 +184,6 @@ class Isnull(object):
186
184
187
185
def setup (self ):
188
186
N = 10 ** 3
189
- np .random .seed (1234 )
190
187
self .df_no_null = DataFrame (np .random .randn (N , N ))
191
188
192
189
sample = np .array ([np .nan , 1.0 ])
@@ -222,7 +219,6 @@ class Fillna(object):
222
219
param_names = ['inplace' , 'method' ]
223
220
224
221
def setup (self , inplace , method ):
225
- np .random .seed (1234 )
226
222
values = np .random .randn (10000 , 100 )
227
223
values [::2 ] = np .nan
228
224
self .df = DataFrame (values )
@@ -238,7 +234,6 @@ class Dropna(object):
238
234
param_names = ['how' , 'axis' ]
239
235
240
236
def setup (self , how , axis ):
241
- np .random .seed (1234 )
242
237
self .df = DataFrame (np .random .randn (10000 , 1000 ))
243
238
self .df .ix [50 :1000 , 20 :50 ] = np .nan
244
239
self .df .ix [2000 :3000 ] = np .nan
@@ -261,7 +256,6 @@ class Count(object):
261
256
param_names = ['axis' ]
262
257
263
258
def setup (self , axis ):
264
- np .random .seed (1234 )
265
259
self .df = DataFrame (np .random .randn (10000 , 1000 ))
266
260
self .df .ix [50 :1000 , 20 :50 ] = np .nan
267
261
self .df .ix [2000 :3000 ] = np .nan
@@ -289,7 +283,6 @@ class Apply(object):
289
283
goal_time = 0.2
290
284
291
285
def setup (self ):
292
- np .random .seed (1234 )
293
286
self .df = DataFrame (np .random .randn (1000 , 100 ))
294
287
295
288
self .s = Series (np .arange (1028.0 ))
@@ -320,7 +313,6 @@ class Dtypes(object):
320
313
goal_time = 0.2
321
314
322
315
def setup (self ):
323
- np .random .seed (1234 )
324
316
self .df = DataFrame (np .random .randn (1000 , 1000 ))
325
317
326
318
def time_frame_dtypes (self ):
@@ -333,7 +325,6 @@ class Equals(object):
333
325
334
326
def setup (self ):
335
327
N = 10 ** 3
336
- np .random .seed (1234 )
337
328
self .float_df = DataFrame (np .random .randn (N , N ))
338
329
self .float_df_nan = self .float_df .copy ()
339
330
self .float_df_nan .iloc [- 1 , - 1 ] = np .nan
@@ -374,7 +365,6 @@ class Interpolate(object):
374
365
375
366
def setup (self , downcast ):
376
367
N = 10000
377
- np .random .seed (1234 )
378
368
# this is the worst case, where every column has NaNs.
379
369
self .df = DataFrame (np .random .randn (N , 100 ))
380
370
self .df .values [::2 ] = np .nan
@@ -400,32 +390,15 @@ class Shift(object):
400
390
param_names = ['axis' ]
401
391
402
392
def setup (self , axis ):
403
- np .random .seed (1234 )
404
393
self .df = DataFrame (np .random .rand (10000 , 500 ))
405
394
406
395
def time_shift (self , axis ):
407
396
self .df .shift (1 , axis = axis )
408
397
409
398
410
- class FromRecords (object ):
411
-
412
- goal_time = 0.2
413
- params = [None , 1000 ]
414
- param_names = ['nrows' ]
415
-
416
- def setup (self , nrows ):
417
- N = 100000
418
- self .gen = ((x , (x * 20 ), (x * 100 )) for x in range (N ))
419
-
420
- def time_frame_from_records_generator (self , nrows ):
421
- # issue-6700
422
- self .df = DataFrame .from_records (self .gen , nrows = nrows )
423
-
424
-
425
399
class Nunique (object ):
426
400
427
401
def setup (self ):
428
- np .random .seed (1234 )
429
402
self .df = DataFrame (np .random .randn (10000 , 1000 ))
430
403
431
404
def time_frame_nunique (self ):
@@ -437,7 +410,6 @@ class Duplicated(object):
437
410
goal_time = 0.2
438
411
439
412
def setup (self ):
440
- np .random .seed (1234 )
441
413
n = (1 << 20 )
442
414
t = date_range ('2015-01-01' , freq = 'S' , periods = (n // 64 ))
443
415
xs = np .random .randn (n // 64 ).round (2 )
@@ -460,42 +432,39 @@ class XS(object):
460
432
param_names = ['axis' ]
461
433
462
434
def setup (self , axis ):
463
- np .random .seed (1234 )
464
435
self .N = 10 ** 4
465
436
self .df = DataFrame (np .random .randn (self .N , self .N ))
466
437
467
438
def time_frame_xs (self , axis ):
468
439
self .df .xs (self .N / 2 , axis = axis )
469
440
470
441
471
- class SortIndex (object ):
442
+ class SortValues (object ):
472
443
473
444
goal_time = 0.2
474
445
params = [True , False ]
475
446
param_names = ['ascending' ]
476
447
477
448
def setup (self , ascending ):
478
- np .random .seed (1234 )
479
449
self .df = DataFrame (np .random .randn (1000000 , 2 ), columns = list ('AB' ))
480
450
481
- def time_frame_sort_index (self , ascending ):
482
- self .df .sort_index ( ascending = ascending )
451
+ def time_frame_sort_values (self , ascending ):
452
+ self .df .sort_values ( by = 'A' , ascending = ascending )
483
453
484
454
485
455
class SortIndexByColumns (object ):
486
456
487
457
goal_time = 0.2
488
458
489
459
def setup (self ):
490
- np .random .seed (1234 )
491
460
N = 10000
492
461
K = 10
493
462
self .df = DataFrame ({'key1' : tm .makeStringIndex (N ).values .repeat (K ),
494
463
'key2' : tm .makeStringIndex (N ).values .repeat (K ),
495
464
'value' : np .random .randn (N * K )})
496
465
497
- def time_frame_sort_index_by_columns (self ):
498
- self .df .sort_index (by = ['key1' , 'key2' ])
466
+ def time_frame_sort_values_by_columns (self ):
467
+ self .df .sort_values (by = ['key1' , 'key2' ])
499
468
500
469
501
470
class Quantile (object ):
@@ -505,7 +474,6 @@ class Quantile(object):
505
474
param_names = ['axis' ]
506
475
507
476
def setup (self , axis ):
508
- np .random .seed (1234 )
509
477
self .df = DataFrame (np .random .randn (1000 , 3 ), columns = list ('ABC' ))
510
478
511
479
def time_frame_quantile (self , axis ):
@@ -517,7 +485,6 @@ class GetDtypeCounts(object):
517
485
goal_time = 0.2
518
486
519
487
def setup (self ):
520
- np .random .seed (1234 )
521
488
self .df = DataFrame (np .random .randn (10 , 10000 ))
522
489
523
490
def time_frame_get_dtype_counts (self ):
@@ -527,13 +494,17 @@ def time_info(self):
527
494
self .df .info ()
528
495
529
496
530
- class Nlargest (object ):
497
+ class NSort (object ):
531
498
532
499
goal_time = 0.2
500
+ params = ['first' , 'last' ]
501
+ param_names = ['keep' ]
533
502
534
- def setup (self ):
535
- np .random .seed (1234 )
503
+ def setup (self , keep ):
536
504
self .df = DataFrame (np .random .randn (1000 , 3 ), columns = list ('ABC' ))
537
505
538
- def time_frame_nlargest (self ):
539
- self .df .nlargest (100 , 'A' )
506
+ def time_nlargest (self , keep ):
507
+ self .df .nlargest (100 , 'A' , keep = keep )
508
+
509
+ def time_nsmallest (self , keep ):
510
+ self .df .nsmallest (100 , 'A' , keep = keep )
0 commit comments