Skip to content

Commit 5a65a73

Browse files
PERF: dtype checks (#52279)
Co-authored-by: Matthew Roeschke <[email protected]>
1 parent d95fc0b commit 5a65a73

28 files changed

+147
-134
lines changed

pandas/_testing/asserters.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
from typing import (
4+
TYPE_CHECKING,
45
Literal,
56
cast,
67
)
@@ -21,6 +22,7 @@
2122
)
2223
from pandas.core.dtypes.dtypes import (
2324
CategoricalDtype,
25+
ExtensionDtype,
2426
PandasDtype,
2527
)
2628
from pandas.core.dtypes.missing import array_equivalent
@@ -53,6 +55,9 @@
5355

5456
from pandas.io.formats.printing import pprint_thing
5557

58+
if TYPE_CHECKING:
59+
from pandas._typing import DtypeObj
60+
5661

5762
def assert_almost_equal(
5863
left,
@@ -965,7 +970,9 @@ def assert_series_equal(
965970
obj=str(obj),
966971
index_values=np.asarray(left.index),
967972
)
968-
elif is_extension_array_dtype(left.dtype) and is_extension_array_dtype(right.dtype):
973+
elif isinstance(left.dtype, ExtensionDtype) and isinstance(
974+
right.dtype, ExtensionDtype
975+
):
969976
assert_extension_array_equal(
970977
left._values,
971978
right._values,
@@ -1320,7 +1327,9 @@ def assert_copy(iter1, iter2, **eql_kwargs) -> None:
13201327
assert elem1 is not elem2, msg
13211328

13221329

1323-
def is_extension_array_dtype_and_needs_i8_conversion(left_dtype, right_dtype) -> bool:
1330+
def is_extension_array_dtype_and_needs_i8_conversion(
1331+
left_dtype: DtypeObj, right_dtype: DtypeObj
1332+
) -> bool:
13241333
"""
13251334
Checks that we have the combination of an ExtensionArraydtype and
13261335
a dtype that should be converted to int64
@@ -1331,7 +1340,7 @@ def is_extension_array_dtype_and_needs_i8_conversion(left_dtype, right_dtype) ->
13311340
13321341
Related to issue #37609
13331342
"""
1334-
return is_extension_array_dtype(left_dtype) and needs_i8_conversion(right_dtype)
1343+
return isinstance(left_dtype, ExtensionDtype) and needs_i8_conversion(right_dtype)
13351344

13361345

13371346
def assert_indexing_slices_equivalent(ser: Series, l_slc: slice, i_slc: slice) -> None:

pandas/core/apply.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,10 @@
4040
from pandas.core.dtypes.cast import is_nested_object
4141
from pandas.core.dtypes.common import (
4242
is_dict_like,
43-
is_extension_array_dtype,
4443
is_list_like,
4544
is_sequence,
4645
)
46+
from pandas.core.dtypes.dtypes import ExtensionDtype
4747
from pandas.core.dtypes.generic import (
4848
ABCDataFrame,
4949
ABCNDFrame,
@@ -940,7 +940,7 @@ def series_generator(self):
940940
ser = self.obj._ixs(0, axis=0)
941941
mgr = ser._mgr
942942

943-
if is_extension_array_dtype(ser.dtype):
943+
if isinstance(ser.dtype, ExtensionDtype):
944944
# values will be incorrect for this block
945945
# TODO(EA2D): special case would be unnecessary with 2D EAs
946946
obj = self.obj

pandas/core/arrays/categorical.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -437,7 +437,7 @@ def __init__(
437437
# we're inferring from values
438438
dtype = CategoricalDtype(categories, dtype.ordered)
439439

440-
elif is_categorical_dtype(values.dtype):
440+
elif isinstance(values.dtype, CategoricalDtype):
441441
old_codes = extract_array(values)._codes
442442
codes = recode_for_categories(
443443
old_codes, values.dtype.categories, dtype.categories, copy=copy
@@ -504,9 +504,7 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
504504
if self.dtype is dtype:
505505
result = self.copy() if copy else self
506506

507-
elif is_categorical_dtype(dtype):
508-
dtype = cast(CategoricalDtype, dtype)
509-
507+
elif isinstance(dtype, CategoricalDtype):
510508
# GH 10696/18593/18630
511509
dtype = self.dtype.update_dtype(dtype)
512510
self = self.copy() if copy else self
@@ -2497,7 +2495,7 @@ def __init__(self, data) -> None:
24972495

24982496
@staticmethod
24992497
def _validate(data):
2500-
if not is_categorical_dtype(data.dtype):
2498+
if not isinstance(data.dtype, CategoricalDtype):
25012499
raise AttributeError("Can only use .cat accessor with a 'category' dtype")
25022500

25032501
# error: Signature of "_delegate_property_get" incompatible with supertype

pandas/core/arrays/datetimelike.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,6 @@
9191
is_integer_dtype,
9292
is_list_like,
9393
is_object_dtype,
94-
is_period_dtype,
9594
is_string_dtype,
9695
is_timedelta64_dtype,
9796
pandas_dtype,
@@ -1405,7 +1404,7 @@ def __sub__(self, other):
14051404
):
14061405
# DatetimeIndex, ndarray[datetime64]
14071406
result = self._sub_datetime_arraylike(other)
1408-
elif is_period_dtype(other_dtype):
1407+
elif isinstance(other_dtype, PeriodDtype):
14091408
# PeriodIndex
14101409
result = self._sub_periodlike(other)
14111410
elif is_integer_dtype(other_dtype):

pandas/core/arrays/interval.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
Literal,
1313
Sequence,
1414
Union,
15-
cast,
1615
overload,
1716
)
1817

@@ -55,15 +54,17 @@
5554
is_dtype_equal,
5655
is_float_dtype,
5756
is_integer_dtype,
58-
is_interval_dtype,
5957
is_list_like,
6058
is_object_dtype,
6159
is_scalar,
6260
is_string_dtype,
6361
needs_i8_conversion,
6462
pandas_dtype,
6563
)
66-
from pandas.core.dtypes.dtypes import IntervalDtype
64+
from pandas.core.dtypes.dtypes import (
65+
CategoricalDtype,
66+
IntervalDtype,
67+
)
6768
from pandas.core.dtypes.generic import (
6869
ABCDataFrame,
6970
ABCDatetimeIndex,
@@ -317,8 +318,7 @@ def _ensure_simple_new_inputs(
317318
if dtype is not None:
318319
# GH 19262: dtype must be an IntervalDtype to override inferred
319320
dtype = pandas_dtype(dtype)
320-
if is_interval_dtype(dtype):
321-
dtype = cast(IntervalDtype, dtype)
321+
if isinstance(dtype, IntervalDtype):
322322
if dtype.subtype is not None:
323323
left = left.astype(dtype.subtype)
324324
right = right.astype(dtype.subtype)
@@ -344,7 +344,7 @@ def _ensure_simple_new_inputs(
344344
f"right [{type(right).__name__}] types"
345345
)
346346
raise ValueError(msg)
347-
if is_categorical_dtype(left.dtype) or is_string_dtype(left.dtype):
347+
if isinstance(left.dtype, CategoricalDtype) or is_string_dtype(left.dtype):
348348
# GH 19016
349349
msg = (
350350
"category, object, and string subtypes are not supported "
@@ -752,14 +752,14 @@ def _cmp_method(self, other, op):
752752
# determine the dtype of the elements we want to compare
753753
if isinstance(other, Interval):
754754
other_dtype = pandas_dtype("interval")
755-
elif not is_categorical_dtype(other.dtype):
755+
elif not isinstance(other.dtype, CategoricalDtype):
756756
other_dtype = other.dtype
757757
else:
758758
# for categorical defer to categories for dtype
759759
other_dtype = other.categories.dtype
760760

761761
# extract intervals if we have interval categories with matching closed
762-
if is_interval_dtype(other_dtype):
762+
if isinstance(other_dtype, IntervalDtype):
763763
if self.closed != other.categories.closed:
764764
return invalid_comparison(self, other, op)
765765

@@ -768,7 +768,7 @@ def _cmp_method(self, other, op):
768768
)
769769

770770
# interval-like -> need same closed and matching endpoints
771-
if is_interval_dtype(other_dtype):
771+
if isinstance(other_dtype, IntervalDtype):
772772
if self.closed != other.closed:
773773
return invalid_comparison(self, other, op)
774774
elif not isinstance(other, Interval):
@@ -951,7 +951,7 @@ def astype(self, dtype, copy: bool = True):
951951
if dtype is not None:
952952
dtype = pandas_dtype(dtype)
953953

954-
if is_interval_dtype(dtype):
954+
if isinstance(dtype, IntervalDtype):
955955
if dtype == self.dtype:
956956
return self.copy() if copy else self
957957

@@ -1683,7 +1683,7 @@ def isin(self, values) -> npt.NDArray[np.bool_]:
16831683
values = np.array(values)
16841684
values = extract_array(values, extract_numpy=True)
16851685

1686-
if is_interval_dtype(values.dtype):
1686+
if isinstance(values.dtype, IntervalDtype):
16871687
if self.closed != values.closed:
16881688
# not comparable -> no overlap
16891689
return np.zeros(self.shape, dtype=bool)

pandas/core/arrays/period.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -635,7 +635,7 @@ def astype(self, dtype, copy: bool = True):
635635
return self
636636
else:
637637
return self.copy()
638-
if is_period_dtype(dtype):
638+
if isinstance(dtype, PeriodDtype):
639639
return self.asfreq(dtype.freq)
640640

641641
if is_datetime64_any_dtype(dtype):
@@ -897,7 +897,7 @@ def period_array(
897897

898898
if is_datetime64_dtype(data_dtype):
899899
return PeriodArray._from_datetime64(data, freq)
900-
if is_period_dtype(data_dtype):
900+
if isinstance(data_dtype, PeriodDtype):
901901
return PeriodArray(data, freq=freq)
902902

903903
# other iterable of some kind
@@ -966,7 +966,7 @@ def validate_dtype_freq(
966966

967967
if dtype is not None:
968968
dtype = pandas_dtype(dtype)
969-
if not is_period_dtype(dtype):
969+
if not isinstance(dtype, PeriodDtype):
970970
raise ValueError("dtype must be PeriodDtype")
971971
if freq is None:
972972
freq = dtype.freq

pandas/core/arrays/sparse/array.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@
4545
is_array_like,
4646
is_bool_dtype,
4747
is_datetime64_any_dtype,
48-
is_datetime64tz_dtype,
4948
is_dtype_equal,
5049
is_integer,
5150
is_list_like,
@@ -54,6 +53,7 @@
5453
is_string_dtype,
5554
pandas_dtype,
5655
)
56+
from pandas.core.dtypes.dtypes import DatetimeTZDtype
5757
from pandas.core.dtypes.generic import (
5858
ABCIndex,
5959
ABCSeries,
@@ -458,7 +458,7 @@ def __init__(
458458
data = extract_array(data, extract_numpy=True)
459459
if not isinstance(data, np.ndarray):
460460
# EA
461-
if is_datetime64tz_dtype(data.dtype):
461+
if isinstance(data.dtype, DatetimeTZDtype):
462462
warnings.warn(
463463
f"Creating SparseArray from {data.dtype} data "
464464
"loses timezone information. Cast to object before "

pandas/core/base.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,10 @@
4242

4343
from pandas.core.dtypes.cast import can_hold_element
4444
from pandas.core.dtypes.common import (
45-
is_extension_array_dtype,
4645
is_object_dtype,
4746
is_scalar,
4847
)
48+
from pandas.core.dtypes.dtypes import ExtensionDtype
4949
from pandas.core.dtypes.generic import (
5050
ABCDataFrame,
5151
ABCIndex,
@@ -565,7 +565,7 @@ def to_numpy(
565565
array(['1999-12-31T23:00:00.000000000', '2000-01-01T23:00:00...'],
566566
dtype='datetime64[ns]')
567567
"""
568-
if is_extension_array_dtype(self.dtype):
568+
if isinstance(self.dtype, ExtensionDtype):
569569
return self.array.to_numpy(dtype, copy=copy, na_value=na_value, **kwargs)
570570
elif kwargs:
571571
bad_keys = list(kwargs.keys())[0]
@@ -1132,7 +1132,7 @@ def _memory_usage(self, deep: bool = False) -> int:
11321132
)
11331133

11341134
v = self.array.nbytes
1135-
if deep and is_object_dtype(self) and not PYPY:
1135+
if deep and is_object_dtype(self.dtype) and not PYPY:
11361136
values = cast(np.ndarray, self._values)
11371137
v += lib.memory_usage_of_objects(values)
11381138
return v

pandas/core/common.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@
3535
from pandas.core.dtypes.common import (
3636
is_array_like,
3737
is_bool_dtype,
38-
is_extension_array_dtype,
3938
is_integer,
4039
)
40+
from pandas.core.dtypes.dtypes import ExtensionDtype
4141
from pandas.core.dtypes.generic import (
4242
ABCExtensionArray,
4343
ABCIndex,
@@ -122,7 +122,7 @@ def is_bool_indexer(key: Any) -> bool:
122122
and convert to an ndarray.
123123
"""
124124
if isinstance(key, (ABCSeries, np.ndarray, ABCIndex)) or (
125-
is_array_like(key) and is_extension_array_dtype(key.dtype)
125+
is_array_like(key) and isinstance(key.dtype, ExtensionDtype)
126126
):
127127
if key.dtype == np.object_:
128128
key_array = np.asarray(key)

pandas/core/generic.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,6 @@
110110
is_bool,
111111
is_bool_dtype,
112112
is_datetime64_any_dtype,
113-
is_datetime64tz_dtype,
114113
is_dict_like,
115114
is_dtype_equal,
116115
is_extension_array_dtype,
@@ -123,6 +122,7 @@
123122
is_timedelta64_dtype,
124123
pandas_dtype,
125124
)
125+
from pandas.core.dtypes.dtypes import DatetimeTZDtype
126126
from pandas.core.dtypes.generic import (
127127
ABCDataFrame,
128128
ABCSeries,
@@ -9623,7 +9623,7 @@ def align(
96239623
if self.ndim == 1 or axis == 0:
96249624
# If we are aligning timezone-aware DatetimeIndexes and the timezones
96259625
# do not match, convert both to UTC.
9626-
if is_datetime64tz_dtype(left.index.dtype):
9626+
if isinstance(left.index.dtype, DatetimeTZDtype):
96279627
if left.index.tz != right.index.tz:
96289628
if join_index is not None:
96299629
# GH#33671 copy to ensure we don't change the index on

pandas/core/groupby/generic.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,15 @@
4343
from pandas.core.dtypes.common import (
4444
ensure_int64,
4545
is_bool,
46-
is_categorical_dtype,
4746
is_dict_like,
4847
is_integer_dtype,
49-
is_interval_dtype,
5048
is_numeric_dtype,
5149
is_scalar,
5250
)
51+
from pandas.core.dtypes.dtypes import (
52+
CategoricalDtype,
53+
IntervalDtype,
54+
)
5355
from pandas.core.dtypes.missing import (
5456
isna,
5557
notna,
@@ -681,7 +683,7 @@ def value_counts(
681683

682684
index_names = self.grouper.names + [self.obj.name]
683685

684-
if is_categorical_dtype(val.dtype) or (
686+
if isinstance(val.dtype, CategoricalDtype) or (
685687
bins is not None and not np.iterable(bins)
686688
):
687689
# scalar bins cannot be done at top level
@@ -717,7 +719,7 @@ def value_counts(
717719
)
718720
llab = lambda lab, inc: lab[inc]._multiindex.codes[-1]
719721

720-
if is_interval_dtype(lab.dtype):
722+
if isinstance(lab.dtype, IntervalDtype):
721723
# TODO: should we do this inside II?
722724
lab_interval = cast(Interval, lab)
723725

0 commit comments

Comments
 (0)