Skip to content

Commit 12e4686

Browse files
committed
Address comments
1 parent 6b581f3 commit 12e4686

File tree

3 files changed

+39
-49
lines changed

3 files changed

+39
-49
lines changed

asv_bench/benchmarks/frame_ctor.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,3 +124,18 @@ def setup(self, offset, n_steps):
124124

125125
def time_frame_ctor(self, offset, n_steps):
126126
DataFrame(self.d)
127+
128+
129+
class FromRecords(object):
130+
131+
goal_time = 0.2
132+
params = [None, 1000]
133+
param_names = ['nrows']
134+
135+
def setup(self, nrows):
136+
N = 100000
137+
self.gen = ((x, (x * 20), (x * 100)) for x in range(N))
138+
139+
def time_frame_from_records_generator(self, nrows):
140+
# issue-6700
141+
self.df = DataFrame.from_records(self.gen, nrows=nrows)

asv_bench/benchmarks/frame_methods.py

Lines changed: 19 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@
33
import pandas.util.testing as tm
44
from pandas import (DataFrame, Series, MultiIndex, date_range, period_range,
55
isnull, NaT)
6+
from .pandas_vb_common import setup
67

78

89
class GetNumericData(object):
910

1011
goal_time = 0.2
1112

1213
def setup(self):
13-
np.random.seed(1234)
1414
self.df = DataFrame(np.random.randn(10000, 25))
1515
self.df['foo'] = 'bar'
1616
self.df['bar'] = 'baz'
@@ -25,7 +25,6 @@ class Lookup(object):
2525
goal_time = 0.2
2626

2727
def setup(self):
28-
np.random.seed(1234)
2928
self.df = DataFrame(np.random.randn(10000, 8),
3029
columns=list('abcdefgh'))
3130
self.df['foo'] = 'bar'
@@ -49,7 +48,6 @@ class Reindex(object):
4948

5049
def setup(self):
5150
N = 10**3
52-
np.random.seed(1234)
5351
self.df = DataFrame(np.random.randn(N * 10, N))
5452
self.idx = np.arange(4 * N, 7 * N)
5553
self.df2 = DataFrame(
@@ -72,7 +70,6 @@ def time_reindex_both_axes_ix(self):
7270
self.df.ix[self.idx, self.idx]
7371

7472
def time_reindex_upcast(self):
75-
np.random.seed(1234)
7673
self.df2.reindex(np.random.permutation(range(1200)))
7774

7875

@@ -82,7 +79,6 @@ class Iteration(object):
8279

8380
def setup(self):
8481
N = 1000
85-
np.random.seed(1234)
8682
self.df = DataFrame(np.random.randn(N * 10, N))
8783
self.df2 = DataFrame(np.random.randn(N * 50, 10))
8884
self.df3 = DataFrame(np.random.randn(N, 5 * N),
@@ -107,13 +103,16 @@ def time_itertuples(self):
107103
for row in self.df2.itertuples():
108104
pass
109105

106+
def time_iterrows(self):
107+
for row in self.df.iterrows():
108+
pass
109+
110110

111111
class ToString(object):
112112

113113
goal_time = 0.2
114114

115115
def setup(self):
116-
np.random.seed(1234)
117116
self.df = DataFrame(np.random.randn(100, 10))
118117

119118
def time_to_string_floats(self):
@@ -166,7 +165,6 @@ class MaskBool(object):
166165
goal_time = 0.2
167166

168167
def setup(self):
169-
np.random.seed(1234)
170168
data = np.random.randn(1000, 500)
171169
df = DataFrame(data)
172170
df = df.where(df > 0)
@@ -186,7 +184,6 @@ class Isnull(object):
186184

187185
def setup(self):
188186
N = 10**3
189-
np.random.seed(1234)
190187
self.df_no_null = DataFrame(np.random.randn(N, N))
191188

192189
sample = np.array([np.nan, 1.0])
@@ -222,7 +219,6 @@ class Fillna(object):
222219
param_names = ['inplace', 'method']
223220

224221
def setup(self, inplace, method):
225-
np.random.seed(1234)
226222
values = np.random.randn(10000, 100)
227223
values[::2] = np.nan
228224
self.df = DataFrame(values)
@@ -238,7 +234,6 @@ class Dropna(object):
238234
param_names = ['how', 'axis']
239235

240236
def setup(self, how, axis):
241-
np.random.seed(1234)
242237
self.df = DataFrame(np.random.randn(10000, 1000))
243238
self.df.ix[50:1000, 20:50] = np.nan
244239
self.df.ix[2000:3000] = np.nan
@@ -261,7 +256,6 @@ class Count(object):
261256
param_names = ['axis']
262257

263258
def setup(self, axis):
264-
np.random.seed(1234)
265259
self.df = DataFrame(np.random.randn(10000, 1000))
266260
self.df.ix[50:1000, 20:50] = np.nan
267261
self.df.ix[2000:3000] = np.nan
@@ -289,7 +283,6 @@ class Apply(object):
289283
goal_time = 0.2
290284

291285
def setup(self):
292-
np.random.seed(1234)
293286
self.df = DataFrame(np.random.randn(1000, 100))
294287

295288
self.s = Series(np.arange(1028.0))
@@ -320,7 +313,6 @@ class Dtypes(object):
320313
goal_time = 0.2
321314

322315
def setup(self):
323-
np.random.seed(1234)
324316
self.df = DataFrame(np.random.randn(1000, 1000))
325317

326318
def time_frame_dtypes(self):
@@ -333,7 +325,6 @@ class Equals(object):
333325

334326
def setup(self):
335327
N = 10**3
336-
np.random.seed(1234)
337328
self.float_df = DataFrame(np.random.randn(N, N))
338329
self.float_df_nan = self.float_df.copy()
339330
self.float_df_nan.iloc[-1, -1] = np.nan
@@ -374,7 +365,6 @@ class Interpolate(object):
374365

375366
def setup(self, downcast):
376367
N = 10000
377-
np.random.seed(1234)
378368
# this is the worst case, where every column has NaNs.
379369
self.df = DataFrame(np.random.randn(N, 100))
380370
self.df.values[::2] = np.nan
@@ -400,32 +390,15 @@ class Shift(object):
400390
param_names = ['axis']
401391

402392
def setup(self, axis):
403-
np.random.seed(1234)
404393
self.df = DataFrame(np.random.rand(10000, 500))
405394

406395
def time_shift(self, axis):
407396
self.df.shift(1, axis=axis)
408397

409398

410-
class FromRecords(object):
411-
412-
goal_time = 0.2
413-
params = [None, 1000]
414-
param_names = ['nrows']
415-
416-
def setup(self, nrows):
417-
N = 100000
418-
self.gen = ((x, (x * 20), (x * 100)) for x in range(N))
419-
420-
def time_frame_from_records_generator(self, nrows):
421-
# issue-6700
422-
self.df = DataFrame.from_records(self.gen, nrows=nrows)
423-
424-
425399
class Nunique(object):
426400

427401
def setup(self):
428-
np.random.seed(1234)
429402
self.df = DataFrame(np.random.randn(10000, 1000))
430403

431404
def time_frame_nunique(self):
@@ -437,7 +410,6 @@ class Duplicated(object):
437410
goal_time = 0.2
438411

439412
def setup(self):
440-
np.random.seed(1234)
441413
n = (1 << 20)
442414
t = date_range('2015-01-01', freq='S', periods=(n // 64))
443415
xs = np.random.randn(n // 64).round(2)
@@ -460,42 +432,39 @@ class XS(object):
460432
param_names = ['axis']
461433

462434
def setup(self, axis):
463-
np.random.seed(1234)
464435
self.N = 10**4
465436
self.df = DataFrame(np.random.randn(self.N, self.N))
466437

467438
def time_frame_xs(self, axis):
468439
self.df.xs(self.N / 2, axis=axis)
469440

470441

471-
class SortIndex(object):
442+
class SortValues(object):
472443

473444
goal_time = 0.2
474445
params = [True, False]
475446
param_names = ['ascending']
476447

477448
def setup(self, ascending):
478-
np.random.seed(1234)
479449
self.df = DataFrame(np.random.randn(1000000, 2), columns=list('AB'))
480450

481-
def time_frame_sort_index(self, ascending):
482-
self.df.sort_index(ascending=ascending)
451+
def time_frame_sort_values(self, ascending):
452+
self.df.sort_values(by='A', ascending=ascending)
483453

484454

485455
class SortIndexByColumns(object):
486456

487457
goal_time = 0.2
488458

489459
def setup(self):
490-
np.random.seed(1234)
491460
N = 10000
492461
K = 10
493462
self.df = DataFrame({'key1': tm.makeStringIndex(N).values.repeat(K),
494463
'key2': tm.makeStringIndex(N).values.repeat(K),
495464
'value': np.random.randn(N * K)})
496465

497-
def time_frame_sort_index_by_columns(self):
498-
self.df.sort_index(by=['key1', 'key2'])
466+
def time_frame_sort_values_by_columns(self):
467+
self.df.sort_values(by=['key1', 'key2'])
499468

500469

501470
class Quantile(object):
@@ -505,7 +474,6 @@ class Quantile(object):
505474
param_names = ['axis']
506475

507476
def setup(self, axis):
508-
np.random.seed(1234)
509477
self.df = DataFrame(np.random.randn(1000, 3), columns=list('ABC'))
510478

511479
def time_frame_quantile(self, axis):
@@ -517,7 +485,6 @@ class GetDtypeCounts(object):
517485
goal_time = 0.2
518486

519487
def setup(self):
520-
np.random.seed(1234)
521488
self.df = DataFrame(np.random.randn(10, 10000))
522489

523490
def time_frame_get_dtype_counts(self):
@@ -527,13 +494,17 @@ def time_info(self):
527494
self.df.info()
528495

529496

530-
class Nlargest(object):
497+
class NSort(object):
531498

532499
goal_time = 0.2
500+
params = ['first', 'last']
501+
param_names = ['keep']
533502

534-
def setup(self):
535-
np.random.seed(1234)
503+
def setup(self, keep):
536504
self.df = DataFrame(np.random.randn(1000, 3), columns=list('ABC'))
537505

538-
def time_frame_nlargest(self):
539-
self.df.nlargest(100, 'A')
506+
def time_nlargest(self, keep):
507+
self.df.nlargest(100, 'A', keep=keep)
508+
509+
def time_nsmallest(self, keep):
510+
self.df.nsmallest(100, 'A', keep=keep)

asv_bench/benchmarks/pandas_vb_common.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,11 @@
1313
except ImportError:
1414
pass
1515

16-
np.random.seed(1234)
16+
# This function just needs to be imported into each benchmark file in order to
17+
# sets up the random seed before each function.
18+
# http://asv.readthedocs.io/en/latest/writing_benchmarks.html
19+
def setup(*args, **kwargs):
20+
np.random.seed(1234)
1721

1822
# try em until it works!
1923
for imp in ['pandas._libs.lib', 'pandas.lib', 'pandas_tseries']:

0 commit comments

Comments
 (0)