Skip to content

BUG: DatetimeIndex with dayfirst/yearfirst and tz #55813

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,7 @@ Categorical

Datetimelike
^^^^^^^^^^^^
- Bug in :class:`DatetimeIndex` construction when passing both a ``tz`` and either ``dayfirst`` or ``yearfirst`` ignoring dayfirst/yearfirst (:issue:`55813`)
- Bug in :class:`DatetimeIndex` when passing an object-dtype ndarray of float objects and a ``tz`` incorrectly localizing the result (:issue:`55780`)
- Bug in :func:`concat` raising ``AttributeError`` when concatenating all-NA DataFrame with :class:`DatetimeTZDtype` dtype DataFrame. (:issue:`52093`)
- Bug in :func:`to_datetime` and :class:`DatetimeIndex` when passing a list of mixed-string-and-numeric types incorrectly raising (:issue:`55780`)
Expand Down
6 changes: 5 additions & 1 deletion pandas/_libs/tslib.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,9 @@ def array_to_datetime(
# returned ndarray may be object dtype or datetime64[ns]

def array_to_datetime_with_tz(
values: npt.NDArray[np.object_], tz: tzinfo, creso: int
values: npt.NDArray[np.object_],
tz: tzinfo,
dayfirst: bool,
yearfirst: bool,
creso: int,
) -> npt.NDArray[np.int64]: ...
24 changes: 11 additions & 13 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ from pandas._libs.tslibs.conversion cimport (
_TSObject,
cast_from_unit,
convert_str_to_tsobject,
convert_to_tsobject,
get_datetime64_nanos,
parse_pydatetime,
)
Expand Down Expand Up @@ -673,7 +674,9 @@ cdef _array_to_datetime_object(
return oresult_nd, None


def array_to_datetime_with_tz(ndarray values, tzinfo tz, NPY_DATETIMEUNIT creso):
def array_to_datetime_with_tz(
ndarray values, tzinfo tz, bint dayfirst, bint yearfirst, NPY_DATETIMEUNIT creso
):
"""
Vectorized analogue to pd.Timestamp(value, tz=tz)

Expand All @@ -689,7 +692,7 @@ def array_to_datetime_with_tz(ndarray values, tzinfo tz, NPY_DATETIMEUNIT creso)
Py_ssize_t i, n = values.size
object item
int64_t ival
datetime ts
_TSObject tsobj

for i in range(n):
# Analogous to `item = values[i]`
Expand All @@ -700,17 +703,12 @@ def array_to_datetime_with_tz(ndarray values, tzinfo tz, NPY_DATETIMEUNIT creso)
ival = NPY_NAT

else:
if PyDateTime_Check(item) and item.tzinfo is not None:
# We can't call Timestamp constructor with a tz arg, have to
# do 2-step
ts = Timestamp(item).tz_convert(tz)
else:
ts = Timestamp(item, tz=tz)
if ts is NaT:
ival = NPY_NAT
else:
ts = (<_Timestamp>ts)._as_creso(creso)
ival = ts._value
tsobj = convert_to_tsobject(
item, tz=tz, unit="ns", dayfirst=dayfirst, yearfirst=yearfirst, nanos=0
)
if tsobj.value != NPY_NAT:
tsobj.ensure_reso(creso, item, round_ok=True)
ival = tsobj.value

# Analogous to: result[i] = ival
(<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival
Expand Down
4 changes: 3 additions & 1 deletion pandas/_libs/tslibs/conversion.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ cdef class _TSObject:
bint fold
NPY_DATETIMEUNIT creso

cdef int64_t ensure_reso(self, NPY_DATETIMEUNIT creso, str val=*) except? -1
cdef int64_t ensure_reso(
self, NPY_DATETIMEUNIT creso, val=*, bint round_ok=*
) except? -1


cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
Expand Down
13 changes: 11 additions & 2 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -235,10 +235,14 @@ cdef class _TSObject:
self.fold = 0
self.creso = NPY_FR_ns # default value

cdef int64_t ensure_reso(self, NPY_DATETIMEUNIT creso, str val=None) except? -1:
cdef int64_t ensure_reso(
self, NPY_DATETIMEUNIT creso, val=None, bint round_ok=False
) except? -1:
if self.creso != creso:
try:
self.value = convert_reso(self.value, self.creso, creso, False)
self.value = convert_reso(
self.value, self.creso, creso, round_ok=round_ok
)
except OverflowError as err:
if val is not None:
raise OutOfBoundsDatetime(
Expand Down Expand Up @@ -283,6 +287,11 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
obj.value = get_datetime64_nanos(ts, reso)
if obj.value != NPY_NAT:
pandas_datetime_to_datetimestruct(obj.value, reso, &obj.dts)
if tz is not None:
# GH#24559, GH#42288 We treat np.datetime64 objects as *wall* times
obj.value = tz_localize_to_utc_single(
obj.value, tz, ambiguous="raise", nonexistent=None, creso=reso
)
elif is_integer_object(ts):
try:
ts = <int64_t>ts
Expand Down
4 changes: 0 additions & 4 deletions pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1873,10 +1873,6 @@ class Timestamp(_Timestamp):
"the tz parameter. Use tz_convert instead.")

tzobj = maybe_get_tz(tz)
if tzobj is not None and is_datetime64_object(ts_input):
# GH#24559, GH#42288 As of 2.0 we treat datetime64 as
# wall-time (consistent with DatetimeIndex)
return cls(ts_input).tz_localize(tzobj)

if nanosecond is None:
nanosecond = 0
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2240,10 +2240,13 @@ def _sequence_to_dt64(
if lib.infer_dtype(data, skipna=False) == "integer":
data = data.astype(np.int64)
elif tz is not None and ambiguous == "raise":
# TODO: yearfirst/dayfirst/etc?
obj_data = np.asarray(data, dtype=object)
i8data = tslib.array_to_datetime_with_tz(
obj_data, tz, abbrev_to_npy_unit(out_unit)
obj_data,
tz=tz,
dayfirst=dayfirst,
yearfirst=yearfirst,
creso=abbrev_to_npy_unit(out_unit),
)
return i8data.view(out_dtype), tz, None
else:
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/indexes/datetimes/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1247,6 +1247,21 @@ def test_datetimeindex_constructor_misc(self):
assert len(idx1) == len(idx2)
assert idx1.freq == idx2.freq

def test_dti_constructor_object_dtype_dayfirst_yearfirst_with_tz(self):
# GH#55813
val = "5/10/16"

dfirst = Timestamp(2016, 10, 5, tz="US/Pacific")
yfirst = Timestamp(2005, 10, 16, tz="US/Pacific")

result1 = DatetimeIndex([val], tz="US/Pacific", dayfirst=True)
expected1 = DatetimeIndex([dfirst])
tm.assert_index_equal(result1, expected1)

result2 = DatetimeIndex([val], tz="US/Pacific", yearfirst=True)
expected2 = DatetimeIndex([yfirst])
tm.assert_index_equal(result2, expected2)

def test_pass_datetimeindex_to_index(self):
# Bugs in #1396
rng = date_range("1/1/2000", "3/1/2000")
Expand Down