Skip to content

Commit 5eecdb2

Browse files
committed
ENH: cython refactor, pushing more index logic into engines.pyx, broken unit tests, a mess still
1 parent b50076e commit 5eecdb2

21 files changed

+1163
-1252
lines changed

bench/bench_tseries.py

Lines changed: 0 additions & 176 deletions
This file was deleted.

pandas/core/frame.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3103,6 +3103,7 @@ def _apply_standard(self, func, axis, ignore_failures=False):
31033103
for k, v in series_gen:
31043104
results[k] = func(v)
31053105
except Exception, e:
3106+
raise # XXXXX
31063107
if hasattr(e, 'args'):
31073108
e.args = e.args + ('occurred at index %s' % str(k),)
31083109
raise

pandas/core/index.py

Lines changed: 37 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from datetime import time, datetime, date
44
from datetime import timedelta
55
from itertools import izip
6+
import weakref
67

78
import numpy as np
89

@@ -57,21 +58,26 @@ class Index(np.ndarray):
5758
----
5859
An Index instance can **only** contain hashable objects
5960
"""
61+
# _algos = {
62+
# 'groupby' : _gin.groupby_index,
63+
# }
64+
65+
# _map_indices = lib.map_indices_object
66+
6067
# Cython methods
61-
_map_indices = lib.map_indices_object
62-
_is_monotonic = lib.is_monotonic_object
6368
_groupby = lib.groupby_object
6469
_arrmap = lib.arrmap_object
6570
_left_indexer = lib.left_join_indexer_object
6671
_inner_indexer = lib.inner_join_indexer_object
6772
_outer_indexer = lib.outer_join_indexer_object
68-
_merge_indexer = lib.merge_indexer_object
6973
_pad = lib.pad_object
7074
_backfill = lib.backfill_object
7175

7276
name = None
7377
asi8 = None
7478

79+
_engine_type = _gin.ObjectEngine
80+
7581
def __new__(cls, data, dtype=None, copy=False, name=None):
7682
if isinstance(data, np.ndarray):
7783
if dtype is None:
@@ -164,25 +170,11 @@ def values(self):
164170

165171
@property
166172
def is_monotonic(self):
167-
return self._monotonicity_check[0]
168-
169-
@property
170-
def is_unique(self):
171-
is_unique = self._monotonicity_check[1]
172-
173-
if is_unique is None:
174-
return self._engine.has_integrity
175-
else:
176-
return is_unique
173+
return self._engine.is_monotonic
177174

178175
@cache_readonly
179-
def _monotonicity_check(self):
180-
try:
181-
# wrong buffer type raises ValueError
182-
is_monotonic, is_unique = self._is_monotonic(self.values)
183-
return is_monotonic, is_unique
184-
except TypeError:
185-
return False, None
176+
def is_unique(self):
177+
return self._engine.is_unique
186178

187179
def is_numeric(self):
188180
return self.inferred_type in ['integer', 'floating']
@@ -210,20 +202,13 @@ def get_duplicates(self):
210202

211203
_get_duplicates = get_duplicates
212204

213-
@property
214-
def indexMap(self):
215-
"{label -> location}"
216-
return self._engine.get_mapping(1)
217-
218205
def _cleanup(self):
219206
self._engine.clear_mapping()
220207

221208
@cache_readonly
222209
def _engine(self):
223-
import weakref
224210
# property, for now, slow to look up
225-
return _gin.DictIndexEngine(weakref.ref(self),
226-
self._map_indices)
211+
return self._engine_type(weakref.ref(self))
227212

228213
def _get_level_number(self, level):
229214
if not isinstance(level, int):
@@ -574,7 +559,15 @@ def get_loc(self, key):
574559
-------
575560
loc : int
576561
"""
577-
return self._engine.get_loc(key)
562+
# TODO: push all of this into Cython
563+
if self.is_unique:
564+
return self._engine.get_loc(key)
565+
elif self.is_monotonic:
566+
left = self.searchsorted(key, side='left')
567+
right = self.searchsorted(key, side='right')
568+
return slice(left, right)
569+
else:
570+
return self.values == key
578571

579572
def get_value(self, series, key):
580573
"""
@@ -664,7 +657,7 @@ def _get_indexer_standard(self, other):
664657
self.is_monotonic and other.is_monotonic):
665658
return self._left_indexer(other, self)
666659
else:
667-
return self._merge_indexer(other, self.indexMap)
660+
return self._engine.get_indexer(other)
668661

669662
def groupby(self, to_groupby):
670663
return self._groupby(self.values, to_groupby)
@@ -995,16 +988,19 @@ def copy(self, order='C'):
995988
class Int64Index(Index):
996989

997990
_map_indices = lib.map_indices_int64
998-
_is_monotonic = lib.is_monotonic_int64
991+
992+
# _is_monotonic = lib.is_monotonic_int64
993+
999994
_groupby = lib.groupby_int64
1000995
_arrmap = lib.arrmap_int64
1001996
_left_indexer = lib.left_join_indexer_int64
1002997
_inner_indexer = lib.inner_join_indexer_int64
1003998
_outer_indexer = lib.outer_join_indexer_int64
1004-
_merge_indexer = lib.merge_indexer_int64
1005999
_pad = lib.pad_int64
10061000
_backfill = lib.backfill_int64
10071001

1002+
_engine_type = _gin.Int64Engine
1003+
10081004
def __new__(cls, data, dtype=None, copy=False, name=None):
10091005
if not isinstance(data, np.ndarray):
10101006
if np.isscalar(data):
@@ -1172,13 +1168,12 @@ class DatetimeIndex(Int64Index):
11721168
time on or just past end argument
11731169
"""
11741170

1175-
_is_monotonic = _wrap_i8_function(lib.is_monotonic_int64)
1171+
# _is_monotonic = _wrap_i8_function(lib.is_monotonic_int64)
1172+
11761173
_inner_indexer = _join_i8_wrapper(lib.inner_join_indexer_int64)
11771174
_outer_indexer = _join_i8_wrapper(lib.outer_join_indexer_int64)
11781175
_left_indexer = _join_i8_wrapper(lib.left_join_indexer_int64,
11791176
with_indexers=False)
1180-
_merge_indexer = _join_i8_wrapper(lib.merge_indexer_int64,
1181-
with_indexers=False)
11821177
_map_indices = _wrap_i8_function(lib.map_indices_int64)
11831178
_pad = _wrap_i8_function(lib.pad_int64)
11841179
_backfill = _wrap_i8_function(lib.backfill_int64)
@@ -1199,6 +1194,10 @@ class DatetimeIndex(Int64Index):
11991194
# structured array cache for datetime fields
12001195
_sarr_cache = None
12011196

1197+
_engine_type = _gin.DatetimeEngine
1198+
1199+
offset = None
1200+
12021201
def __new__(cls, data=None,
12031202
freq=None, start=None, end=None, periods=None,
12041203
dtype=None, copy=False, name=None, tz=None,
@@ -1437,7 +1436,6 @@ def asobject(self):
14371436
"""
14381437
Unbox to an index of type object
14391438
"""
1440-
offset = getattr(self, 'offset', None)
14411439
boxed_values = _dt_box_array(self.asi8, self.offset, self.tz)
14421440
return Index(boxed_values, dtype=object)
14431441

@@ -1656,7 +1654,7 @@ def get_value(self, series, key):
16561654
know what you're doing
16571655
"""
16581656
try:
1659-
return super(DatetimeIndex, self).get_value(series, key)
1657+
return Index.get_value(self, series, key)
16601658
except KeyError:
16611659
try:
16621660
asdt, parsed, reso = datetools.parse_time_string(key)
@@ -1698,8 +1696,7 @@ def __getitem__(self, key):
16981696
arr_idx = self.view(np.ndarray)
16991697
if np.isscalar(key):
17001698
val = arr_idx[key]
1701-
offset = getattr(self, 'offset', None)
1702-
return _dt_box(val, offset=offset, tz=self.tz)
1699+
return _dt_box(val, offset=self.offset, tz=self.tz)
17031700
else:
17041701
new_offset = None
17051702
if (type(key) == slice):
@@ -1814,11 +1811,6 @@ def dtype(self):
18141811
def is_all_dates(self):
18151812
return True
18161813

1817-
@cache_readonly
1818-
def _engine(self):
1819-
mapping = lib.map_indices_int64
1820-
return _gin.DictIndexEngineDatetime(self.asi8, mapping)
1821-
18221814
def equals(self, other):
18231815
"""
18241816
Determines if two Index objects contain the same elements.
@@ -2977,7 +2969,7 @@ def get_indexer(self, target, method=None):
29772969
indexer = self._backfill(self_index, target_index,
29782970
self_index.indexMap, target.indexMap)
29792971
else:
2980-
indexer = self._merge_indexer(target_index, self_index.indexMap)
2972+
indexer = self._engine.get_indexer(target_index)
29812973

29822974
return indexer
29832975

0 commit comments

Comments
 (0)