Skip to content

CLN: use idiomatic pandas_dtypes in pandas/dtypes/common.py #24541

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 43 commits into from
Jan 4, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
c0c87aa
CLN: use idiomatic pandas_dtypes in pandas/dtypes/common.py
jreback Jan 1, 2019
f071b20
fix imports
jreback Jan 2, 2019
3e063ba
clean asv
jreback Jan 2, 2019
2e45185
review comments
jreback Jan 2, 2019
9296042
moar clean
jreback Jan 2, 2019
2193f9e
remove extraneous
jreback Jan 2, 2019
a4f4cdb
use pandas_dtype
jreback Jan 2, 2019
a21164a
isort
jreback Jan 2, 2019
dfdc3d2
Merge branch 'master' into dtypes
jreback Jan 2, 2019
8f0a4d3
fix
jreback Jan 2, 2019
420c56a
Merge branch 'master' into dtypes
jreback Jan 3, 2019
33f33e2
Merge branch 'master' into dtypes
jreback Jan 3, 2019
abd1620
remove newline
jreback Jan 3, 2019
a0aad47
Merge branch 'master' into dtypes
jreback Jan 3, 2019
b58cf18
parametrize tests
jreback Jan 3, 2019
7cb889a
add additional types
jreback Jan 3, 2019
86a47a8
simplify issubclass a bit
jreback Jan 3, 2019
7c73269
moar clean
jreback Jan 3, 2019
d44b778
remove syntax error
jreback Jan 3, 2019
4e9887e
clean
jreback Jan 3, 2019
ee5d70f
Merge branch 'master' into dtypes
jreback Jan 3, 2019
d5cd4d6
merge
jreback Jan 3, 2019
6c72ce0
Merge branch 'master' into dtypes
jreback Jan 3, 2019
6781332
Merge branch 'master' into dtypes
jreback Jan 3, 2019
fe26970
moar
jreback Jan 3, 2019
badb3bc
introspect more types
jreback Jan 3, 2019
8252154
Merge branch 'master' into dtypes
jreback Jan 3, 2019
dd518fb
fix
jreback Jan 3, 2019
d50fb71
finalize
jreback Jan 3, 2019
0c895f5
remove base
jreback Jan 3, 2019
c93a820
Merge branch 'master' into dtypes
jreback Jan 3, 2019
f326e37
review
jreback Jan 3, 2019
18030bb
Merge branch 'master' into dtypes
jreback Jan 3, 2019
8294cc1
Merge branch 'master' into dtypes
jreback Jan 4, 2019
70b7b31
Merge branch 'master' into dtypes
jreback Jan 4, 2019
1f1d96b
doc-string
jreback Jan 4, 2019
95ef3ce
update whatsnew
jreback Jan 4, 2019
b9f4004
deprecation about MutableMapping
jreback Jan 4, 2019
76d1d86
fix warnings
jreback Jan 4, 2019
8c48457
Update doc/source/whatsnew/v0.24.0.rst
jorisvandenbossche Jan 4, 2019
7ae3cd4
Merge branch 'master' into dtypes
jreback Jan 4, 2019
3111507
Merge remote-tracking branch 'jreback/dtypes' into dtypes
jreback Jan 4, 2019
7d4bd5e
Revert "deprecation about MutableMapping"
jreback Jan 4, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions asv_bench/benchmarks/dtypes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from pandas.api.types import pandas_dtype

import numpy as np
from .pandas_vb_common import (
numeric_dtypes, datetime_dtypes, string_dtypes, extension_dtypes)


_numpy_dtypes = [np.dtype(dtype)
for dtype in (numeric_dtypes +
datetime_dtypes +
string_dtypes)]
_dtypes = _numpy_dtypes + extension_dtypes


class Dtypes(object):
params = (_dtypes +
list(map(lambda dt: dt.name, _dtypes)))
param_names = ['dtype']

def time_pandas_dtype(self, dtype):
pandas_dtype(dtype)


class DtypesInvalid(object):
param_names = ['dtype']
params = ['scalar-string', 'scalar-int', 'list-string', 'array-string']
data_dict = {'scalar-string': 'foo',
'scalar-int': 1,
'list-string': ['foo'] * 1000,
'array-string': np.array(['foo'] * 1000)}

def time_pandas_dtype_invalid(self, dtype):
try:
pandas_dtype(self.data_dict[dtype])
except TypeError:
pass


from .pandas_vb_common import setup # noqa: F401
10 changes: 10 additions & 0 deletions asv_bench/benchmarks/pandas_vb_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from importlib import import_module

import numpy as np
import pandas as pd

# Compatibility import for lib
for imp in ['pandas._libs.lib', 'pandas.lib']:
Expand All @@ -14,6 +15,15 @@
numeric_dtypes = [np.int64, np.int32, np.uint32, np.uint64, np.float32,
np.float64, np.int16, np.int8, np.uint16, np.uint8]
datetime_dtypes = [np.datetime64, np.timedelta64]
string_dtypes = [np.object]
extension_dtypes = [pd.Int8Dtype, pd.Int16Dtype,
pd.Int32Dtype, pd.Int64Dtype,
pd.UInt8Dtype, pd.UInt16Dtype,
pd.UInt32Dtype, pd.UInt64Dtype,
pd.CategoricalDtype,
pd.IntervalDtype,
pd.DatetimeTZDtype('ns', 'UTC'),
pd.PeriodDtype('D')]


def setup(*args, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ Backwards incompatible API changes
- The column order of the resultant :class:`DataFrame` from :meth:`MultiIndex.to_frame` is now guaranteed to match the :attr:`MultiIndex.names` order. (:issue:`22420`)
- Incorrectly passing a :class:`DatetimeIndex` to :meth:`MultiIndex.from_tuples`, rather than a sequence of tuples, now raises a ``TypeError`` rather than a ``ValueError`` (:issue:`24024`)
- :func:`pd.offsets.generate_range` argument ``time_rule`` has been removed; use ``offset`` instead (:issue:`24157`)
- In 0.23.x, pandas would raise a ``ValueError`` on a merge of a numeric column (e.g. ``int`` dtyped column) and an ``object`` dtyped column (:issue:`9780`). We have re-enabled the ability to merge ``object`` and other dtypes (:issue:`21681`)
- In 0.23.x, pandas would raise a ``ValueError`` on a merge of a numeric column (e.g. ``int`` dtyped column) and an ``object`` dtyped column (:issue:`9780`). We have re-enabled the ability to merge ``object`` and other dtypes; pandas will still raise on a merge between a numeric and an ``object`` dtyped column that is composed only of strings (:issue:`21681`)

Percentage change on groupby
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Expand Down
5 changes: 5 additions & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,9 +388,14 @@ def tz_aware_fixture(request):
return request.param


# ----------------------------------------------------------------
# Dtypes
UNSIGNED_INT_DTYPES = ["uint8", "uint16", "uint32", "uint64"]
UNSIGNED_EA_INT_DTYPES = ["UInt8", "UInt16", "UInt32", "UInt64"]
SIGNED_INT_DTYPES = [int, "int8", "int16", "int32", "int64"]
SIGNED_EA_INT_DTYPES = ["Int8", "Int16", "Int32", "Int64"]
ALL_INT_DTYPES = UNSIGNED_INT_DTYPES + SIGNED_INT_DTYPES
ALL_EA_INT_DTYPES = UNSIGNED_EA_INT_DTYPES + SIGNED_EA_INT_DTYPES

FLOAT_DTYPES = [float, "float32", "float64"]
COMPLEX_DTYPES = [complex, "complex64", "complex128"]
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class _IntegerDtype(ExtensionDtype):
The attributes name & type are set when these subclasses are created.
"""
name = None
base = None
type = None
na_value = np.nan

Expand Down Expand Up @@ -153,6 +154,7 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
# Avoid DeprecationWarning from NumPy about np.dtype("Int64")
# https://github.com/numpy/numpy/pull/7476
dtype = dtype.lower()

if not issubclass(type(dtype), _IntegerDtype):
try:
dtype = _dtypes[str(np.dtype(dtype))]
Expand Down Expand Up @@ -655,7 +657,8 @@ def integer_arithmetic_method(self, other):
else:
name = dtype.capitalize()
classname = "{}Dtype".format(name)
attributes_dict = {'type': getattr(np, dtype),
numpy_dtype = getattr(np, dtype)
attributes_dict = {'type': numpy_dtype,
'name': name}
dtype_type = register_extension_dtype(
type(classname, (_IntegerDtype, ), attributes_dict)
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
from pandas.compat import PY3, string_types, text_type, to_str

from .common import (
_INT64_DTYPE, _NS_DTYPE, _POSSIBLY_CAST_DTYPES, _TD_DTYPE, _string_dtypes,
ensure_int8, ensure_int16, ensure_int32, ensure_int64, ensure_object,
is_bool, is_bool_dtype, is_categorical_dtype, is_complex, is_complex_dtype,
_INT64_DTYPE, _NS_DTYPE, _POSSIBLY_CAST_DTYPES, _TD_DTYPE, ensure_int8,
ensure_int16, ensure_int32, ensure_int64, ensure_object, is_bool,
is_bool_dtype, is_categorical_dtype, is_complex, is_complex_dtype,
is_datetime64_dtype, is_datetime64_ns_dtype, is_datetime64tz_dtype,
is_datetime_or_timedelta_dtype, is_datetimelike, is_dtype_equal,
is_extension_array_dtype, is_extension_type, is_float, is_float_dtype,
Expand Down Expand Up @@ -544,7 +544,7 @@ def invalidate_string_dtypes(dtype_set):
"""Change string like dtypes to object for
``DataFrame.select_dtypes()``.
"""
non_string_dtypes = dtype_set - _string_dtypes
non_string_dtypes = dtype_set - {np.dtype('S').type, np.dtype('<U').type}
if non_string_dtypes != dtype_set:
raise TypeError("string dtypes are not allowed, use 'object' instead")

Expand Down
Loading