Skip to content

Commit 5eead57

Browse files
makbigcjreback
authored andcommitted
[BUG] Add is_coerce argument to func array_to_datetime_object (GH26122) (#26257)
1 parent 749f456 commit 5eead57

File tree

3 files changed

+28
-6
lines changed

3 files changed

+28
-6
lines changed

doc/source/whatsnew/v0.25.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ Datetimelike
274274
- Bug in :class:`DataFrame` and :class:`Series` where timezone aware data with ``dtype='datetime64[ns]`` was not cast to naive (:issue:`25843`)
275275
- Improved :class:`Timestamp` type checking in various datetime functions to prevent exceptions when using a subclassed ``datetime`` (:issue:`25851`)
276276
- Bug in :class:`Series` and :class:`DataFrame` repr where ``np.datetime64('NaT')`` and ``np.timedelta64('NaT')`` with ``dtype=object`` would be represented as ``NaN`` (:issue:`25445`)
277-
-
277+
- Bug in :func:`to_datetime` which does not replace the invalid argument with ``NaT`` when error is set to coerce (:issue:`26122`)
278278

279279
Timedelta
280280
^^^^^^^^^

pandas/_libs/tslib.pyx

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -679,7 +679,8 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise',
679679
return ignore_errors_out_of_bounds_fallback(values), tz_out
680680

681681
except TypeError:
682-
return array_to_datetime_object(values, is_raise, dayfirst, yearfirst)
682+
return array_to_datetime_object(values, errors,
683+
dayfirst, yearfirst)
683684

684685
if seen_datetime and seen_integer:
685686
# we have mixed datetimes & integers
@@ -694,7 +695,7 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise',
694695
elif is_raise:
695696
raise ValueError("mixed datetimes and integers in passed array")
696697
else:
697-
return array_to_datetime_object(values, is_raise,
698+
return array_to_datetime_object(values, errors,
698699
dayfirst, yearfirst)
699700

700701
if seen_datetime_offset and not utc_convert:
@@ -706,7 +707,7 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise',
706707
# (with individual dateutil.tzoffsets) are returned
707708
is_same_offsets = len(out_tzoffset_vals) == 1
708709
if not is_same_offsets:
709-
return array_to_datetime_object(values, is_raise,
710+
return array_to_datetime_object(values, errors,
710711
dayfirst, yearfirst)
711712
else:
712713
tz_offset = out_tzoffset_vals.pop()
@@ -754,7 +755,7 @@ cdef inline ignore_errors_out_of_bounds_fallback(ndarray[object] values):
754755

755756
@cython.wraparound(False)
756757
@cython.boundscheck(False)
757-
cdef array_to_datetime_object(ndarray[object] values, bint is_raise,
758+
cdef array_to_datetime_object(ndarray[object] values, str errors,
758759
bint dayfirst=False, bint yearfirst=False):
759760
"""
760761
Fall back function for array_to_datetime
@@ -766,7 +767,7 @@ cdef array_to_datetime_object(ndarray[object] values, bint is_raise,
766767
----------
767768
values : ndarray of object
768769
date-like objects to convert
769-
is_raise : bool
770+
errors : str, default 'raise'
770771
error behavior when parsing
771772
dayfirst : bool, default False
772773
dayfirst parsing behavior when encountering datetime strings
@@ -780,9 +781,14 @@ cdef array_to_datetime_object(ndarray[object] values, bint is_raise,
780781
cdef:
781782
Py_ssize_t i, n = len(values)
782783
object val,
784+
bint is_ignore = errors == 'ignore'
785+
bint is_coerce = errors == 'coerce'
786+
bint is_raise = errors == 'raise'
783787
ndarray[object] oresult
784788
npy_datetimestruct dts
785789

790+
assert is_raise or is_ignore or is_coerce
791+
786792
oresult = np.empty(n, dtype=object)
787793

788794
# We return an object array and only attempt to parse:
@@ -803,6 +809,9 @@ cdef array_to_datetime_object(ndarray[object] values, bint is_raise,
803809
pydatetime_to_dt64(oresult[i], &dts)
804810
check_dts_bounds(&dts)
805811
except (ValueError, OverflowError):
812+
if is_coerce:
813+
oresult[i] = NaT
814+
continue
806815
if is_raise:
807816
raise
808817
return values, None

pandas/tests/indexes/datetimes/test_tools.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -712,6 +712,19 @@ def test_week_without_day_and_calendar_year(self, date, format):
712712
with pytest.raises(ValueError, match=msg):
713713
pd.to_datetime(date, format=format)
714714

715+
def test_to_datetime_coerce(self):
716+
# GH 26122
717+
ts_strings = ['March 1, 2018 12:00:00+0400',
718+
'March 1, 2018 12:00:00+0500',
719+
'20100240']
720+
result = to_datetime(ts_strings, errors='coerce')
721+
expected = Index([datetime(2018, 3, 1, 12, 0,
722+
tzinfo=tzoffset(None, 14400)),
723+
datetime(2018, 3, 1, 12, 0,
724+
tzinfo=tzoffset(None, 18000)),
725+
NaT])
726+
tm.assert_index_equal(result, expected)
727+
715728
def test_iso_8601_strings_with_same_offset(self):
716729
# GH 17697, 11736
717730
ts_str = "2015-11-18 15:30:00+05:30"

0 commit comments

Comments
 (0)