Skip to content

Commit 07402f6

Browse files
authored
PERF: use fastpaths for is_period_dtype checks (#33937)
1 parent 911e19b commit 07402f6

20 files changed

+72
-71
lines changed

pandas/_testing.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1519,11 +1519,13 @@ def box_expected(expected, box_cls, transpose=True):
15191519

15201520
def to_array(obj):
15211521
# temporary implementation until we get pd.array in place
1522-
if is_period_dtype(obj):
1522+
dtype = getattr(obj, "dtype", None)
1523+
1524+
if is_period_dtype(dtype):
15231525
return period_array(obj)
1524-
elif is_datetime64_dtype(obj) or is_datetime64tz_dtype(obj):
1526+
elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
15251527
return DatetimeArray._from_sequence(obj)
1526-
elif is_timedelta64_dtype(obj):
1528+
elif is_timedelta64_dtype(dtype):
15271529
return TimedeltaArray._from_sequence(obj)
15281530
else:
15291531
return np.array(obj)

pandas/core/algorithms.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
from pandas._libs import Timestamp, algos, hashtable as htable, lib
1313
from pandas._libs.tslib import iNaT
14-
from pandas._typing import AnyArrayLike
14+
from pandas._typing import AnyArrayLike, DtypeObj
1515
from pandas.util._decorators import doc
1616

1717
from pandas.core.dtypes.cast import (
@@ -126,20 +126,21 @@ def _ensure_data(values, dtype=None):
126126
return ensure_object(values), "object"
127127

128128
# datetimelike
129-
if needs_i8_conversion(values) or needs_i8_conversion(dtype):
130-
if is_period_dtype(values) or is_period_dtype(dtype):
129+
vals_dtype = getattr(values, "dtype", None)
130+
if needs_i8_conversion(vals_dtype) or needs_i8_conversion(dtype):
131+
if is_period_dtype(vals_dtype) or is_period_dtype(dtype):
131132
from pandas import PeriodIndex
132133

133134
values = PeriodIndex(values)
134135
dtype = values.dtype
135-
elif is_timedelta64_dtype(values) or is_timedelta64_dtype(dtype):
136+
elif is_timedelta64_dtype(vals_dtype) or is_timedelta64_dtype(dtype):
136137
from pandas import TimedeltaIndex
137138

138139
values = TimedeltaIndex(values)
139140
dtype = values.dtype
140141
else:
141142
# Datetime
142-
if values.ndim > 1 and is_datetime64_ns_dtype(values):
143+
if values.ndim > 1 and is_datetime64_ns_dtype(vals_dtype):
143144
# Avoid calling the DatetimeIndex constructor as it is 1D only
144145
# Note: this is reached by DataFrame.rank calls GH#27027
145146
# TODO(EA2D): special case not needed with 2D EAs
@@ -154,7 +155,7 @@ def _ensure_data(values, dtype=None):
154155

155156
return values.asi8, dtype
156157

157-
elif is_categorical_dtype(values) and (
158+
elif is_categorical_dtype(vals_dtype) and (
158159
is_categorical_dtype(dtype) or dtype is None
159160
):
160161
values = values.codes
@@ -1080,7 +1081,7 @@ def nsmallest(self):
10801081
return self.compute("nsmallest")
10811082

10821083
@staticmethod
1083-
def is_valid_dtype_n_method(dtype) -> bool:
1084+
def is_valid_dtype_n_method(dtype: DtypeObj) -> bool:
10841085
"""
10851086
Helper function to determine if dtype is valid for
10861087
nsmallest/nlargest methods
@@ -1863,7 +1864,7 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3):
18631864

18641865
is_timedelta = False
18651866
is_bool = False
1866-
if needs_i8_conversion(arr):
1867+
if needs_i8_conversion(arr.dtype):
18671868
dtype = np.float64
18681869
arr = arr.view("i8")
18691870
na = iNaT

pandas/core/arrays/categorical.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1424,7 +1424,7 @@ def _internal_get_values(self):
14241424
Index if datetime / periods.
14251425
"""
14261426
# if we are a datetime and period index, return Index to keep metadata
1427-
if needs_i8_conversion(self.categories):
1427+
if needs_i8_conversion(self.categories.dtype):
14281428
return self.categories.take(self._codes, fill_value=np.nan)
14291429
elif is_integer_dtype(self.categories) and -1 in self._codes:
14301430
return self.categories.astype("object").take(self._codes, fill_value=np.nan)

pandas/core/arrays/datetimelike.py

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -782,7 +782,7 @@ def _validate_searchsorted_value(self, value):
782782
elif is_list_like(value) and not isinstance(value, type(self)):
783783
value = array(value)
784784

785-
if not type(self)._is_recognized_dtype(value):
785+
if not type(self)._is_recognized_dtype(value.dtype):
786786
raise TypeError(
787787
"searchsorted requires compatible dtype or scalar, "
788788
f"not {type(value).__name__}"
@@ -806,7 +806,7 @@ def _validate_setitem_value(self, value):
806806
except ValueError:
807807
pass
808808

809-
if not type(self)._is_recognized_dtype(value):
809+
if not type(self)._is_recognized_dtype(value.dtype):
810810
raise TypeError(
811811
"setitem requires compatible dtype or scalar, "
812812
f"not {type(value).__name__}"
@@ -1024,7 +1024,7 @@ def fillna(self, value=None, method=None, limit=None):
10241024
func = missing.backfill_1d
10251025

10261026
values = self._data
1027-
if not is_period_dtype(self):
1027+
if not is_period_dtype(self.dtype):
10281028
# For PeriodArray self._data is i8, which gets copied
10291029
# by `func`. Otherwise we need to make a copy manually
10301030
# to avoid modifying `self` in-place.
@@ -1109,10 +1109,7 @@ def _validate_frequency(cls, index, freq, **kwargs):
11091109
freq : DateOffset
11101110
The frequency to validate
11111111
"""
1112-
if is_period_dtype(cls):
1113-
# Frequency validation is not meaningful for Period Array/Index
1114-
return None
1115-
1112+
# TODO: this is not applicable to PeriodArray, move to correct Mixin
11161113
inferred = index.inferred_freq
11171114
if index.size == 0 or inferred == freq.freqstr:
11181115
return None
@@ -1253,7 +1250,7 @@ def _add_nat(self):
12531250
"""
12541251
Add pd.NaT to self
12551252
"""
1256-
if is_period_dtype(self):
1253+
if is_period_dtype(self.dtype):
12571254
raise TypeError(
12581255
f"Cannot add {type(self).__name__} and {type(NaT).__name__}"
12591256
)
@@ -1293,7 +1290,7 @@ def _sub_period_array(self, other):
12931290
result : np.ndarray[object]
12941291
Array of DateOffset objects; nulls represented by NaT.
12951292
"""
1296-
if not is_period_dtype(self):
1293+
if not is_period_dtype(self.dtype):
12971294
raise TypeError(
12981295
f"cannot subtract {other.dtype}-dtype from {type(self).__name__}"
12991296
)
@@ -1398,7 +1395,7 @@ def __add__(self, other):
13981395
elif lib.is_integer(other):
13991396
# This check must come after the check for np.timedelta64
14001397
# as is_integer returns True for these
1401-
if not is_period_dtype(self):
1398+
if not is_period_dtype(self.dtype):
14021399
raise integer_op_not_supported(self)
14031400
result = self._time_shift(other)
14041401

@@ -1413,7 +1410,7 @@ def __add__(self, other):
14131410
# DatetimeIndex, ndarray[datetime64]
14141411
return self._add_datetime_arraylike(other)
14151412
elif is_integer_dtype(other):
1416-
if not is_period_dtype(self):
1413+
if not is_period_dtype(self.dtype):
14171414
raise integer_op_not_supported(self)
14181415
result = self._addsub_int_array(other, operator.add)
14191416
else:
@@ -1437,6 +1434,8 @@ def __radd__(self, other):
14371434
@unpack_zerodim_and_defer("__sub__")
14381435
def __sub__(self, other):
14391436

1437+
other_dtype = getattr(other, "dtype", None)
1438+
14401439
# scalar others
14411440
if other is NaT:
14421441
result = self._sub_nat()
@@ -1450,7 +1449,7 @@ def __sub__(self, other):
14501449
elif lib.is_integer(other):
14511450
# This check must come after the check for np.timedelta64
14521451
# as is_integer returns True for these
1453-
if not is_period_dtype(self):
1452+
if not is_period_dtype(self.dtype):
14541453
raise integer_op_not_supported(self)
14551454
result = self._time_shift(-other)
14561455

@@ -1467,11 +1466,11 @@ def __sub__(self, other):
14671466
elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other):
14681467
# DatetimeIndex, ndarray[datetime64]
14691468
result = self._sub_datetime_arraylike(other)
1470-
elif is_period_dtype(other):
1469+
elif is_period_dtype(other_dtype):
14711470
# PeriodIndex
14721471
result = self._sub_period_array(other)
1473-
elif is_integer_dtype(other):
1474-
if not is_period_dtype(self):
1472+
elif is_integer_dtype(other_dtype):
1473+
if not is_period_dtype(self.dtype):
14751474
raise integer_op_not_supported(self)
14761475
result = self._addsub_int_array(other, operator.sub)
14771476
else:
@@ -1520,7 +1519,7 @@ def __iadd__(self, other):
15201519
result = self + other
15211520
self[:] = result[:]
15221521

1523-
if not is_period_dtype(self):
1522+
if not is_period_dtype(self.dtype):
15241523
# restore freq, which is invalidated by setitem
15251524
self._freq = result._freq
15261525
return self
@@ -1529,7 +1528,7 @@ def __isub__(self, other):
15291528
result = self - other
15301529
self[:] = result[:]
15311530

1532-
if not is_period_dtype(self):
1531+
if not is_period_dtype(self.dtype):
15331532
# restore freq, which is invalidated by setitem
15341533
self._freq = result._freq
15351534
return self
@@ -1621,7 +1620,7 @@ def mean(self, skipna=True):
16211620
-----
16221621
mean is only defined for Datetime and Timedelta dtypes, not for Period.
16231622
"""
1624-
if is_period_dtype(self):
1623+
if is_period_dtype(self.dtype):
16251624
# See discussion in GH#24757
16261625
raise TypeError(
16271626
f"mean is not implemented for {type(self).__name__} since the "

pandas/core/arrays/period.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -828,9 +828,11 @@ def period_array(
828828
['2000Q1', '2000Q2', '2000Q3', '2000Q4']
829829
Length: 4, dtype: period[Q-DEC]
830830
"""
831-
if is_datetime64_dtype(data):
831+
data_dtype = getattr(data, "dtype", None)
832+
833+
if is_datetime64_dtype(data_dtype):
832834
return PeriodArray._from_datetime64(data, freq)
833-
if is_period_dtype(data):
835+
if is_period_dtype(data_dtype):
834836
return PeriodArray(data, freq)
835837

836838
# other iterable of some kind

pandas/core/dtypes/missing.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ def array_equivalent(left, right, strict_nan: bool = False) -> bool:
424424
return True
425425

426426
# NaNs can occur in float and complex arrays.
427-
if is_float_dtype(left) or is_complex_dtype(left):
427+
if is_float_dtype(left.dtype) or is_complex_dtype(left.dtype):
428428

429429
# empty
430430
if not (np.prod(left.shape) and np.prod(right.shape)):
@@ -435,7 +435,7 @@ def array_equivalent(left, right, strict_nan: bool = False) -> bool:
435435
# GH#29553 avoid numpy deprecation warning
436436
return False
437437

438-
elif needs_i8_conversion(left) or needs_i8_conversion(right):
438+
elif needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype):
439439
# datetime64, timedelta64, Period
440440
if not is_dtype_equal(left.dtype, right.dtype):
441441
return False
@@ -460,7 +460,7 @@ def _infer_fill_value(val):
460460
if not is_list_like(val):
461461
val = [val]
462462
val = np.array(val, copy=False)
463-
if needs_i8_conversion(val):
463+
if needs_i8_conversion(val.dtype):
464464
return np.array("NaT", dtype=val.dtype)
465465
elif is_object_dtype(val.dtype):
466466
dtype = lib.infer_dtype(ensure_object(val), skipna=False)

pandas/core/frame.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5916,7 +5916,7 @@ def extract_values(arr):
59165916
if isinstance(arr, (ABCIndexClass, ABCSeries)):
59175917
arr = arr._values
59185918

5919-
if needs_i8_conversion(arr):
5919+
if needs_i8_conversion(arr.dtype):
59205920
if is_extension_array_dtype(arr.dtype):
59215921
arr = arr.asi8
59225922
else:

pandas/core/indexes/accessors.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def _get_values(self):
4949
elif is_timedelta64_dtype(data.dtype):
5050
return TimedeltaIndex(data, copy=False, name=self.name)
5151

52-
elif is_period_dtype(data):
52+
elif is_period_dtype(data.dtype):
5353
return PeriodArray(data, copy=False)
5454

5555
raise TypeError(
@@ -449,7 +449,7 @@ def __new__(cls, data: "Series"):
449449
return DatetimeProperties(data, orig)
450450
elif is_timedelta64_dtype(data.dtype):
451451
return TimedeltaProperties(data, orig)
452-
elif is_period_dtype(data):
452+
elif is_period_dtype(data.dtype):
453453
return PeriodProperties(data, orig)
454454

455455
raise AttributeError("Can only use .dt accessor with datetimelike values")

pandas/core/indexes/datetimelike.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ def __array_wrap__(self, result, context=None):
118118
return result
119119

120120
attrs = self._get_attributes_dict()
121-
if not is_period_dtype(self) and attrs["freq"]:
121+
if not is_period_dtype(self.dtype) and attrs["freq"]:
122122
# no need to infer if freq is None
123123
attrs["freq"] = "infer"
124124
return Index(result, **attrs)
@@ -542,7 +542,7 @@ def delete(self, loc):
542542
new_i8s = np.delete(self.asi8, loc)
543543

544544
freq = None
545-
if is_period_dtype(self):
545+
if is_period_dtype(self.dtype):
546546
freq = self.freq
547547
elif is_integer(loc):
548548
if loc in (0, -len(self), -1, len(self) - 1):

pandas/core/internals/blocks.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2673,13 +2673,13 @@ def get_block_type(values, dtype=None):
26732673
elif is_categorical_dtype(values.dtype):
26742674
cls = CategoricalBlock
26752675
elif issubclass(vtype, np.datetime64):
2676-
assert not is_datetime64tz_dtype(values)
2676+
assert not is_datetime64tz_dtype(values.dtype)
26772677
cls = DatetimeBlock
2678-
elif is_datetime64tz_dtype(values):
2678+
elif is_datetime64tz_dtype(values.dtype):
26792679
cls = DatetimeTZBlock
26802680
elif is_interval_dtype(dtype) or is_period_dtype(dtype):
26812681
cls = ObjectValuesExtensionBlock
2682-
elif is_extension_array_dtype(values):
2682+
elif is_extension_array_dtype(values.dtype):
26832683
cls = ExtensionBlock
26842684
elif issubclass(vtype, np.floating):
26852685
cls = FloatBlock

pandas/core/missing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ def interpolate_1d(
266266
if method in ("values", "index"):
267267
inds = np.asarray(xvalues)
268268
# hack for DatetimeIndex, #1646
269-
if needs_i8_conversion(inds.dtype.type):
269+
if needs_i8_conversion(inds.dtype):
270270
inds = inds.view(np.int64)
271271
if inds.dtype == np.object_:
272272
inds = lib.maybe_convert_objects(inds)

pandas/core/nanops.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ def _get_values(
287287

288288
dtype = values.dtype
289289

290-
if needs_i8_conversion(values):
290+
if needs_i8_conversion(values.dtype):
291291
# changing timedelta64/datetime64 to int64 needs to happen after
292292
# finding `mask` above
293293
values = np.asarray(values.view("i8"))

pandas/core/reshape/reshape.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -232,10 +232,10 @@ def get_new_values(self, values, fill_value=None):
232232
# we need to convert to a basic dtype
233233
# and possibly coerce an input to our output dtype
234234
# e.g. ints -> floats
235-
if needs_i8_conversion(values):
235+
if needs_i8_conversion(values.dtype):
236236
sorted_values = sorted_values.view("i8")
237237
new_values = new_values.view("i8")
238-
elif is_bool_dtype(values):
238+
elif is_bool_dtype(values.dtype):
239239
sorted_values = sorted_values.astype("object")
240240
new_values = new_values.astype("object")
241241
else:
@@ -253,7 +253,7 @@ def get_new_values(self, values, fill_value=None):
253253
)
254254

255255
# reconstruct dtype if needed
256-
if needs_i8_conversion(values):
256+
if needs_i8_conversion(values.dtype):
257257
new_values = new_values.view(values.dtype)
258258

259259
return new_values, new_mask

pandas/io/json/_json.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ def __init__(
306306
if len(timedeltas):
307307
obj[timedeltas] = obj[timedeltas].applymap(lambda x: x.isoformat())
308308
# Convert PeriodIndex to datetimes before serializing
309-
if is_period_dtype(obj.index):
309+
if is_period_dtype(obj.index.dtype):
310310
obj.index = obj.index.to_timestamp()
311311

312312
# exclude index from obj if index=False

0 commit comments

Comments
 (0)