Skip to content

Commit 1d0b50b

Browse files
committed
move frame, series back
1 parent b261f85 commit 1d0b50b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+306
-315
lines changed

pandas/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
import pandas.core.config_init
4141

4242
from pandas.core.api import *
43-
from pandas.core.arrays.sparse.api import *
43+
from pandas.core.sparse.api import *
4444
from pandas.tseries.api import *
4545
from pandas.core.computation.api import *
4646
from pandas.core.reshape.api import *

pandas/compat/pickle_compat.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,8 @@ def load_reduce(self):
5656

5757
# If classes are moved, provide compat here.
5858
_class_locations_map = {
59-
60-
# 23123, fix sparse mapping
6159
('pandas.core.sparse.array', 'SparseArray'):
62-
('pandas.core.arrays.sparse.series', 'SparseArray'),
63-
('pandas.core.sparse.series', 'SparseSeries'):
64-
('pandas.core.arrays.sparse.series', 'SparseSeries'),
65-
('pandas.core.sparse.frame', 'SparseDataFrame'):
66-
('pandas.core.arrays.sparse.frame', 'SparseDataFrame'),
60+
('pandas.core.arrays', 'SparseArray'),
6761

6862
# 15477
6963
('pandas.core.base', 'FrozenNDArray'):
@@ -75,7 +69,7 @@ def load_reduce(self):
7569
('pandas.core.series', 'TimeSeries'):
7670
('pandas.core.series', 'Series'),
7771
('pandas.sparse.series', 'SparseTimeSeries'):
78-
('pandas.core.arrays.sparse.series', 'SparseSeries'),
72+
('pandas.core.sparse.series', 'SparseSeries'),
7973

8074
# 12588, extensions moving
8175
('pandas._sparse', 'BlockIndex'):
@@ -96,11 +90,11 @@ def load_reduce(self):
9690

9791
# 15998 top-level dirs moving
9892
('pandas.sparse.array', 'SparseArray'):
99-
('pandas.core.arrays.sparse.array', 'SparseArray'),
93+
('pandas.core.arrays.sparse', 'SparseArray'),
10094
('pandas.sparse.series', 'SparseSeries'):
101-
('pandas.core.arrays.sparse.series', 'SparseSeries'),
95+
('pandas.core.sparse.series', 'SparseSeries'),
10296
('pandas.sparse.frame', 'SparseDataFrame'):
103-
('pandas.core.arrays.sparse.frame', 'SparseDataFrame'),
97+
('pandas.core.sparse.frame', 'SparseDataFrame'),
10498
('pandas.indexes.base', '_new_Index'):
10599
('pandas.core.indexes.base', '_new_Index'),
106100
('pandas.indexes.base', 'Index'):

pandas/core/arrays/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@
88
from .timedeltas import TimedeltaArrayMixin # noqa
99
from .integer import ( # noqa
1010
IntegerArray, integer_array)
11+
from .sparse import SparseArray # noqa

pandas/core/arrays/sparse/array.py renamed to pandas/core/arrays/sparse.py

Lines changed: 249 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from __future__ import division
55
# pylint: disable=E1101,E1103,W0231
66

7+
import re
78
import operator
89
import numbers
910
import numpy as np
@@ -18,6 +19,8 @@
1819

1920
from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin
2021
import pandas.core.common as com
22+
from pandas.core.dtypes.base import ExtensionDtype
23+
from pandas.core.dtypes.dtypes import register_extension_dtype
2124
from pandas.core.dtypes.generic import (
2225
ABCSparseSeries, ABCSeries, ABCIndexClass
2326
)
@@ -45,7 +48,252 @@
4548
import pandas.core.algorithms as algos
4649
import pandas.io.formats.printing as printing
4750

48-
from pandas.core.arrays.sparse.dtype import SparseDtype
51+
52+
# ----------------------------------------------------------------------------
53+
# Dtype
54+
55+
@register_extension_dtype
56+
class SparseDtype(ExtensionDtype):
57+
"""
58+
Dtype for data stored in :class:`SparseArray`.
59+
60+
This dtype implements the pandas ExtensionDtype interface.
61+
62+
.. versionadded:: 0.24.0
63+
64+
Parameters
65+
----------
66+
dtype : str, ExtensionDtype, numpy.dtype, type, default numpy.float64
67+
The dtype of the underlying array storing the non-fill value values.
68+
fill_value : scalar, optional.
69+
The scalar value not stored in the SparseArray. By default, this
70+
depends on `dtype`.
71+
72+
========== ==========
73+
dtype na_value
74+
========== ==========
75+
float ``np.nan``
76+
int ``0``
77+
bool ``False``
78+
datetime64 ``pd.NaT``
79+
timedelta64 ``pd.NaT``
80+
========== ==========
81+
82+
The default value may be overridden by specifying a `fill_value`.
83+
"""
84+
# We include `_is_na_fill_value` in the metadata to avoid hash collisions
85+
# between SparseDtype(float, 0.0) and SparseDtype(float, nan).
86+
# Without is_na_fill_value in the comparison, those would be equal since
87+
# hash(nan) is (sometimes?) 0.
88+
_metadata = ('_dtype', '_fill_value', '_is_na_fill_value')
89+
90+
def __init__(self, dtype=np.float64, fill_value=None):
91+
# type: (Union[str, np.dtype, 'ExtensionDtype', type], Any) -> None
92+
from pandas.core.dtypes.missing import na_value_for_dtype
93+
from pandas.core.dtypes.common import (
94+
pandas_dtype, is_string_dtype, is_scalar
95+
)
96+
97+
if isinstance(dtype, type(self)):
98+
if fill_value is None:
99+
fill_value = dtype.fill_value
100+
dtype = dtype.subtype
101+
102+
dtype = pandas_dtype(dtype)
103+
if is_string_dtype(dtype):
104+
dtype = np.dtype('object')
105+
106+
if fill_value is None:
107+
fill_value = na_value_for_dtype(dtype)
108+
109+
if not is_scalar(fill_value):
110+
raise ValueError("fill_value must be a scalar. Got {} "
111+
"instead".format(fill_value))
112+
self._dtype = dtype
113+
self._fill_value = fill_value
114+
115+
def __hash__(self):
116+
# Python3 doesn't inherit __hash__ when a base class overrides
117+
# __eq__, so we explicitly do it here.
118+
return super(SparseDtype, self).__hash__()
119+
120+
def __eq__(self, other):
121+
# We have to override __eq__ to handle NA values in _metadata.
122+
# The base class does simple == checks, which fail for NA.
123+
if isinstance(other, compat.string_types):
124+
try:
125+
other = self.construct_from_string(other)
126+
except TypeError:
127+
return False
128+
129+
if isinstance(other, type(self)):
130+
subtype = self.subtype == other.subtype
131+
if self._is_na_fill_value:
132+
# this case is complicated by two things:
133+
# SparseDtype(float, float(nan)) == SparseDtype(float, np.nan)
134+
# SparseDtype(float, np.nan) != SparseDtype(float, pd.NaT)
135+
# i.e. we want to treat any floating-point NaN as equal, but
136+
# not a floating-point NaN and a datetime NaT.
137+
fill_value = (
138+
other._is_na_fill_value and
139+
isinstance(self.fill_value, type(other.fill_value)) or
140+
isinstance(other.fill_value, type(self.fill_value))
141+
)
142+
else:
143+
fill_value = self.fill_value == other.fill_value
144+
145+
return subtype and fill_value
146+
return False
147+
148+
@property
149+
def fill_value(self):
150+
"""
151+
The fill value of the array.
152+
153+
Converting the SparseArray to a dense ndarray will fill the
154+
array with this value.
155+
156+
.. warning::
157+
158+
It's possible to end up with a SparseArray that has ``fill_value``
159+
values in ``sp_values``. This can occur, for example, when setting
160+
``SparseArray.fill_value`` directly.
161+
"""
162+
return self._fill_value
163+
164+
@property
165+
def _is_na_fill_value(self):
166+
from pandas.core.dtypes.missing import isna
167+
return isna(self.fill_value)
168+
169+
@property
170+
def _is_numeric(self):
171+
from pandas.core.dtypes.common import is_object_dtype
172+
return not is_object_dtype(self.subtype)
173+
174+
@property
175+
def _is_boolean(self):
176+
from pandas.core.dtypes.common import is_bool_dtype
177+
return is_bool_dtype(self.subtype)
178+
179+
@property
180+
def kind(self):
181+
return self.subtype.kind
182+
183+
@property
184+
def type(self):
185+
return self.subtype.type
186+
187+
@property
188+
def subtype(self):
189+
return self._dtype
190+
191+
@property
192+
def name(self):
193+
return 'Sparse[{}, {}]'.format(self.subtype.name, self.fill_value)
194+
195+
def __repr__(self):
196+
return self.name
197+
198+
@classmethod
199+
def construct_array_type(cls):
200+
return SparseArray
201+
202+
@classmethod
203+
def construct_from_string(cls, string):
204+
"""
205+
Construct a SparseDtype from a string form.
206+
207+
Parameters
208+
----------
209+
string : str
210+
Can take the following forms.
211+
212+
string dtype
213+
================ ============================
214+
'int' SparseDtype[np.int64, 0]
215+
'Sparse' SparseDtype[np.float64, nan]
216+
'Sparse[int]' SparseDtype[np.int64, 0]
217+
'Sparse[int, 0]' SparseDtype[np.int64, 0]
218+
================ ============================
219+
220+
It is not possible to specify non-default fill values
221+
with a string. An argument like ``'Sparse[int, 1]'``
222+
will raise a ``TypeError`` because the default fill value
223+
for integers is 0.
224+
225+
Returns
226+
-------
227+
SparseDtype
228+
"""
229+
msg = "Could not construct SparseDtype from '{}'".format(string)
230+
if string.startswith("Sparse"):
231+
try:
232+
sub_type, has_fill_value = cls._parse_subtype(string)
233+
result = SparseDtype(sub_type)
234+
except Exception:
235+
raise TypeError(msg)
236+
else:
237+
msg = ("Could not construct SparseDtype from '{}'.\n\nIt "
238+
"looks like the fill_value in the string is not "
239+
"the default for the dtype. Non-default fill_values "
240+
"are not supported. Use the 'SparseDtype()' "
241+
"constructor instead.")
242+
if has_fill_value and str(result) != string:
243+
raise TypeError(msg.format(string))
244+
return result
245+
else:
246+
raise TypeError(msg)
247+
248+
@staticmethod
249+
def _parse_subtype(dtype):
250+
"""
251+
Parse a string to get the subtype
252+
253+
Parameters
254+
----------
255+
dtype : str
256+
A string like
257+
258+
* Sparse[subtype]
259+
* Sparse[subtype, fill_value]
260+
261+
Returns
262+
-------
263+
subtype : str
264+
265+
Raises
266+
------
267+
ValueError
268+
When the subtype cannot be extracted.
269+
"""
270+
xpr = re.compile(
271+
r"Sparse\[(?P<subtype>[^,]*)(, )?(?P<fill_value>.*?)?\]$"
272+
)
273+
m = xpr.match(dtype)
274+
has_fill_value = False
275+
if m:
276+
subtype = m.groupdict()['subtype']
277+
has_fill_value = m.groupdict()['fill_value'] or has_fill_value
278+
elif dtype == "Sparse":
279+
subtype = 'float64'
280+
else:
281+
raise ValueError("Cannot parse {}".format(dtype))
282+
return subtype, has_fill_value
283+
284+
@classmethod
285+
def is_dtype(cls, dtype):
286+
dtype = getattr(dtype, 'dtype', dtype)
287+
if (isinstance(dtype, compat.string_types) and
288+
dtype.startswith("Sparse")):
289+
sub_type, _ = cls._parse_subtype(dtype)
290+
dtype = np.dtype(sub_type)
291+
elif isinstance(dtype, cls):
292+
return True
293+
return isinstance(dtype, np.dtype) or dtype == 'Sparse'
294+
295+
# ----------------------------------------------------------------------------
296+
# Array
49297

50298

51299
_sparray_doc_kwargs = dict(klass='SparseArray')

pandas/core/arrays/sparse/api.py

Lines changed: 0 additions & 6 deletions
This file was deleted.

0 commit comments

Comments
 (0)