Skip to content

Commit 8c50682

Browse files
jbrockmendeljreback
authored andcommitted
Docstrings, de-duplicate EAMixin/DatetimeLikeIndex __new__ code (#21926)
1 parent df2ccef commit 8c50682

File tree

10 files changed

+173
-164
lines changed

10 files changed

+173
-164
lines changed

pandas/core/arrays/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -634,6 +634,7 @@ class ExtensionOpsMixin(object):
634634
"""
635635
A base class for linking the operators to their dunder names
636636
"""
637+
637638
@classmethod
638639
def _add_arithmetic_ops(cls):
639640
cls.__add__ = cls._create_arithmetic_method(operator.add)

pandas/core/arrays/datetimelike.py

Lines changed: 103 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import numpy as np
66

77
from pandas._libs import lib, iNaT, NaT
8+
from pandas._libs.tslibs import timezones
89
from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds, Timedelta
910
from pandas._libs.tslibs.period import (
1011
DIFFERENT_FREQ_INDEX, IncompatibleFrequency)
@@ -13,7 +14,7 @@
1314
from pandas import compat
1415

1516
from pandas.tseries import frequencies
16-
from pandas.tseries.offsets import Tick
17+
from pandas.tseries.offsets import Tick, DateOffset
1718

1819
from pandas.core.dtypes.common import (
1920
needs_i8_conversion,
@@ -23,10 +24,13 @@
2324
is_timedelta64_dtype,
2425
is_object_dtype)
2526
from pandas.core.dtypes.generic import ABCSeries, ABCDataFrame, ABCIndexClass
27+
from pandas.core.dtypes.dtypes import DatetimeTZDtype
2628

2729
import pandas.core.common as com
2830
from pandas.core.algorithms import checked_add_with_arr
2931

32+
from .base import ExtensionOpsMixin
33+
3034

3135
def _make_comparison_op(op, cls):
3236
# TODO: share code with indexes.base version? Main difference is that
@@ -87,7 +91,7 @@ def _shallow_copy(self, values=None, **kwargs):
8791
return self._simple_new(values, **attributes)
8892

8993

90-
class DatetimeLikeArrayMixin(AttributesMixin):
94+
class DatetimeLikeArrayMixin(ExtensionOpsMixin, AttributesMixin):
9195
"""
9296
Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray
9397
@@ -464,7 +468,10 @@ def _addsub_offset_array(self, other, op):
464468
"{cls} not vectorized"
465469
.format(cls=type(self).__name__), PerformanceWarning)
466470

467-
res_values = op(self.astype('O').values, np.array(other))
471+
# For EA self.astype('O') returns a numpy array, not an Index
472+
left = lib.values_from_object(self.astype('O'))
473+
474+
res_values = op(left, np.array(other))
468475
kwargs = {}
469476
if not is_period_dtype(self):
470477
kwargs['freq'] = 'infer'
@@ -551,3 +558,96 @@ def validate_periods(periods):
551558
raise TypeError('periods must be a number, got {periods}'
552559
.format(periods=periods))
553560
return periods
561+
562+
563+
def validate_endpoints(closed):
564+
"""
565+
Check that the `closed` argument is among [None, "left", "right"]
566+
567+
Parameters
568+
----------
569+
closed : {None, "left", "right"}
570+
571+
Returns
572+
-------
573+
left_closed : bool
574+
right_closed : bool
575+
576+
Raises
577+
------
578+
ValueError : if argument is not among valid values
579+
"""
580+
left_closed = False
581+
right_closed = False
582+
583+
if closed is None:
584+
left_closed = True
585+
right_closed = True
586+
elif closed == "left":
587+
left_closed = True
588+
elif closed == "right":
589+
right_closed = True
590+
else:
591+
raise ValueError("Closed has to be either 'left', 'right' or None")
592+
593+
return left_closed, right_closed
594+
595+
596+
def maybe_infer_freq(freq):
597+
"""
598+
Comparing a DateOffset to the string "infer" raises, so we need to
599+
be careful about comparisons. Make a dummy variable `freq_infer` to
600+
signify the case where the given freq is "infer" and set freq to None
601+
to avoid comparison trouble later on.
602+
603+
Parameters
604+
----------
605+
freq : {DateOffset, None, str}
606+
607+
Returns
608+
-------
609+
freq : {DateOffset, None}
610+
freq_infer : bool
611+
"""
612+
freq_infer = False
613+
if not isinstance(freq, DateOffset):
614+
# if a passed freq is None, don't infer automatically
615+
if freq != 'infer':
616+
freq = frequencies.to_offset(freq)
617+
else:
618+
freq_infer = True
619+
freq = None
620+
return freq, freq_infer
621+
622+
623+
def validate_tz_from_dtype(dtype, tz):
624+
"""
625+
If the given dtype is a DatetimeTZDtype, extract the implied
626+
tzinfo object from it and check that it does not conflict with the given
627+
tz.
628+
629+
Parameters
630+
----------
631+
dtype : dtype, str
632+
tz : None, tzinfo
633+
634+
Returns
635+
-------
636+
tz : consensus tzinfo
637+
638+
Raises
639+
------
640+
ValueError : on tzinfo mismatch
641+
"""
642+
if dtype is not None:
643+
try:
644+
dtype = DatetimeTZDtype.construct_from_string(dtype)
645+
dtz = getattr(dtype, 'tz', None)
646+
if dtz is not None:
647+
if tz is not None and not timezones.tz_compare(tz, dtz):
648+
raise ValueError("cannot supply both a tz and a dtype"
649+
" with a tz")
650+
tz = dtz
651+
except TypeError:
652+
pass
653+
return tz

pandas/core/arrays/datetimes.py

Lines changed: 8 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
import pandas.core.common as com
3030
from pandas.core.algorithms import checked_add_with_arr
3131

32-
from pandas.tseries.frequencies import to_offset, DateOffset
32+
from pandas.tseries.frequencies import to_offset
3333
from pandas.tseries.offsets import Tick
3434

3535
from pandas.core.arrays import datetimelike as dtl
@@ -84,10 +84,11 @@ def f(self):
8484
return property(f)
8585

8686

87-
def _dt_array_cmp(opname, cls):
87+
def _dt_array_cmp(cls, op):
8888
"""
8989
Wrap comparison operations to convert datetime-like to datetime64
9090
"""
91+
opname = '__{name}__'.format(name=op.__name__)
9192
nat_result = True if opname == '__ne__' else False
9293

9394
def wrapper(self, other):
@@ -181,12 +182,10 @@ def __new__(cls, values, freq=None, tz=None):
181182
# e.g. DatetimeIndex
182183
tz = values.tz
183184

184-
if (freq is not None and not isinstance(freq, DateOffset) and
185-
freq != 'infer'):
186-
freq = to_offset(freq)
185+
freq, freq_infer = dtl.maybe_infer_freq(freq)
187186

188187
result = cls._simple_new(values, freq=freq, tz=tz)
189-
if freq == 'infer':
188+
if freq_infer:
190189
inferred = result.inferred_freq
191190
if inferred:
192191
result.freq = to_offset(inferred)
@@ -289,17 +288,7 @@ def __iter__(self):
289288
# -----------------------------------------------------------------
290289
# Comparison Methods
291290

292-
@classmethod
293-
def _add_comparison_methods(cls):
294-
"""add in comparison methods"""
295-
cls.__eq__ = _dt_array_cmp('__eq__', cls)
296-
cls.__ne__ = _dt_array_cmp('__ne__', cls)
297-
cls.__lt__ = _dt_array_cmp('__lt__', cls)
298-
cls.__gt__ = _dt_array_cmp('__gt__', cls)
299-
cls.__le__ = _dt_array_cmp('__le__', cls)
300-
cls.__ge__ = _dt_array_cmp('__ge__', cls)
301-
# TODO: Some classes pass __eq__ while others pass operator.eq;
302-
# standardize this.
291+
_create_comparison_method = classmethod(_dt_array_cmp)
303292

304293
def _has_same_tz(self, other):
305294
zzone = self._timezone
@@ -441,14 +430,7 @@ def _local_timestamps(self):
441430
This is used to calculate time-of-day information as if the timestamps
442431
were timezone-naive.
443432
"""
444-
values = self.asi8
445-
indexer = values.argsort()
446-
result = conversion.tz_convert(values.take(indexer), utc, self.tz)
447-
448-
n = len(indexer)
449-
reverse = np.empty(n, dtype=np.int_)
450-
reverse.put(indexer, np.arange(n))
451-
return result.take(reverse)
433+
return conversion.tz_convert(self.asi8, utc, self.tz)
452434

453435
def tz_convert(self, tz):
454436
"""
@@ -1102,4 +1084,4 @@ def to_julian_date(self):
11021084
) / 24.0)
11031085

11041086

1105-
DatetimeArrayMixin._add_comparison_methods()
1087+
DatetimeArrayMixin._add_comparison_ops()

pandas/core/arrays/period.py

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,11 @@ def f(self):
4040
return property(f)
4141

4242

43-
def _period_array_cmp(opname, cls):
43+
def _period_array_cmp(cls, op):
4444
"""
4545
Wrap comparison operations to convert Period-like to PeriodDtype
4646
"""
47+
opname = '__{name}__'.format(name=op.__name__)
4748
nat_result = True if opname == '__ne__' else False
4849

4950
def wrapper(self, other):
@@ -268,6 +269,8 @@ def asfreq(self, freq=None, how='E'):
268269
# ------------------------------------------------------------------
269270
# Arithmetic Methods
270271

272+
_create_comparison_method = classmethod(_period_array_cmp)
273+
271274
def _sub_datelike(self, other):
272275
assert other is not NaT
273276
return NotImplemented
@@ -381,18 +384,8 @@ def _maybe_convert_timedelta(self, other):
381384
raise IncompatibleFrequency(msg.format(cls=type(self).__name__,
382385
freqstr=self.freqstr))
383386

384-
@classmethod
385-
def _add_comparison_methods(cls):
386-
""" add in comparison methods """
387-
cls.__eq__ = _period_array_cmp('__eq__', cls)
388-
cls.__ne__ = _period_array_cmp('__ne__', cls)
389-
cls.__lt__ = _period_array_cmp('__lt__', cls)
390-
cls.__gt__ = _period_array_cmp('__gt__', cls)
391-
cls.__le__ = _period_array_cmp('__le__', cls)
392-
cls.__ge__ = _period_array_cmp('__ge__', cls)
393-
394387

395-
PeriodArrayMixin._add_comparison_methods()
388+
PeriodArrayMixin._add_comparison_ops()
396389

397390

398391
# -------------------------------------------------------------------

pandas/core/arrays/timedeltas.py

Lines changed: 14 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
import pandas.core.common as com
1919

20-
from pandas.tseries.offsets import Tick, DateOffset
20+
from pandas.tseries.offsets import Tick
2121
from pandas.tseries.frequencies import to_offset
2222

2323
from . import datetimelike as dtl
@@ -54,10 +54,11 @@ def f(self):
5454
return property(f)
5555

5656

57-
def _td_array_cmp(opname, cls):
57+
def _td_array_cmp(cls, op):
5858
"""
5959
Wrap comparison operations to convert timedelta-like to timedelta64
6060
"""
61+
opname = '__{name}__'.format(name=op.__name__)
6162
nat_result = True if opname == '__ne__' else False
6263

6364
def wrapper(self, other):
@@ -126,25 +127,23 @@ def _simple_new(cls, values, freq=None, **kwargs):
126127

127128
def __new__(cls, values, freq=None, start=None, end=None, periods=None,
128129
closed=None):
129-
if (freq is not None and not isinstance(freq, DateOffset) and
130-
freq != 'infer'):
131-
freq = to_offset(freq)
132130

133-
periods = dtl.validate_periods(periods)
131+
freq, freq_infer = dtl.maybe_infer_freq(freq)
134132

135133
if values is None:
134+
# TODO: Remove this block and associated kwargs; GH#20535
136135
if freq is None and com._any_none(periods, start, end):
137136
raise ValueError('Must provide freq argument if no data is '
138137
'supplied')
139-
else:
140-
return cls._generate_range(start, end, periods, freq,
141-
closed=closed)
138+
periods = dtl.validate_periods(periods)
139+
return cls._generate_range(start, end, periods, freq,
140+
closed=closed)
142141

143142
result = cls._simple_new(values, freq=freq)
144-
if freq == 'infer':
143+
if freq_infer:
145144
inferred = result.inferred_freq
146145
if inferred:
147-
result._freq = to_offset(inferred)
146+
result.freq = to_offset(inferred)
148147

149148
return result
150149

@@ -161,23 +160,12 @@ def _generate_range(cls, start, end, periods, freq, closed=None, **kwargs):
161160
if end is not None:
162161
end = Timedelta(end)
163162

164-
left_closed = False
165-
right_closed = False
166-
167163
if start is None and end is None:
168164
if closed is not None:
169165
raise ValueError("Closed has to be None if not both of start"
170166
"and end are defined")
171167

172-
if closed is None:
173-
left_closed = True
174-
right_closed = True
175-
elif closed == "left":
176-
left_closed = True
177-
elif closed == "right":
178-
right_closed = True
179-
else:
180-
raise ValueError("Closed has to be either 'left', 'right' or None")
168+
left_closed, right_closed = dtl.validate_endpoints(closed)
181169

182170
if freq is not None:
183171
index = _generate_regular_range(start, end, periods, freq)
@@ -197,6 +185,8 @@ def _generate_range(cls, start, end, periods, freq, closed=None, **kwargs):
197185
# ----------------------------------------------------------------
198186
# Arithmetic Methods
199187

188+
_create_comparison_method = classmethod(_td_array_cmp)
189+
200190
def _add_offset(self, other):
201191
assert not isinstance(other, Tick)
202192
raise TypeError("cannot add the type {typ} to a {cls}"
@@ -266,19 +256,6 @@ def _evaluate_with_timedelta_like(self, other, op):
266256

267257
return NotImplemented
268258

269-
# ----------------------------------------------------------------
270-
# Comparison Methods
271-
272-
@classmethod
273-
def _add_comparison_methods(cls):
274-
"""add in comparison methods"""
275-
cls.__eq__ = _td_array_cmp('__eq__', cls)
276-
cls.__ne__ = _td_array_cmp('__ne__', cls)
277-
cls.__lt__ = _td_array_cmp('__lt__', cls)
278-
cls.__gt__ = _td_array_cmp('__gt__', cls)
279-
cls.__le__ = _td_array_cmp('__le__', cls)
280-
cls.__ge__ = _td_array_cmp('__ge__', cls)
281-
282259
# ----------------------------------------------------------------
283260
# Conversion Methods - Vectorized analogues of Timedelta methods
284261

@@ -392,7 +369,7 @@ def f(x):
392369
return result
393370

394371

395-
TimedeltaArrayMixin._add_comparison_methods()
372+
TimedeltaArrayMixin._add_comparison_ops()
396373

397374

398375
# ---------------------------------------------------------------------

0 commit comments

Comments
 (0)