From 9d77ffead2d7df22921c2fb69dc3ddd04d190a04 Mon Sep 17 00:00:00 2001 From: Vincent Davis Date: Mon, 6 Jul 2015 10:21:36 -0600 Subject: [PATCH 1/7] Add try/except to address inconsistent behavior with invalid dates. issue #10154 --- pandas/tslib.pyx | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index b3a6059db384f..27cd5e89220a9 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -2760,17 +2760,23 @@ def array_strptime(ndarray[object] values, object fmt, bint exact=True, bint coe # Cannot pre-calculate datetime_date() since can change in Julian # calculation and thus could have different value for the day of the wk # calculation. - if julian == -1: - # Need to add 1 to result since first day of the year is 1, not 0. - julian = datetime_date(year, month, day).toordinal() - \ - datetime_date(year, 1, 1).toordinal() + 1 - else: # Assume that if they bothered to include Julian day it will - # be accurate. - datetime_result = datetime_date.fromordinal( - (julian - 1) + datetime_date(year, 1, 1).toordinal()) - year = datetime_result.year - month = datetime_result.month - day = datetime_result.day + try: + if julian == -1: + # Need to add 1 to result since first day of the year is 1, not 0. + julian = datetime_date(year, month, day).toordinal() - \ + datetime_date(year, 1, 1).toordinal() + 1 + else: # Assume that if they bothered to include Julian day it will + # be accurate. + datetime_result = datetime_date.fromordinal( + (julian - 1) + datetime_date(year, 1, 1).toordinal()) + year = datetime_result.year + month = datetime_result.month + day = datetime_result.day + except ValueError: + if coerce: + iresult[i] = iNaT + continue + raise if weekday == -1: weekday = datetime_date(year, month, day).weekday() From 2e3d15313564bb33b340eeb13d6b0b66c50caabf Mon Sep 17 00:00:00 2001 From: Vincent Davis Date: Mon, 6 Jul 2015 11:39:27 -0600 Subject: [PATCH 2/7] Add tests for to_datetime with invalid day day of month. issue #10154 Not all tests pass there are other issues. --- pandas/tseries/tests/test_timeseries.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index a078aba2269bb..609bc5212d900 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -11,7 +11,7 @@ from pandas import (Index, Series, TimeSeries, DataFrame, isnull, date_range, Timestamp, Period, DatetimeIndex, - Int64Index, to_datetime, bdate_range, Float64Index, TimedeltaIndex) + Int64Index, to_datetime, bdate_range, Float64Index, TimedeltaIndex, NaT) import pandas.core.datetools as datetools import pandas.tseries.offsets as offsets @@ -4461,6 +4461,23 @@ def test_second(self): self.assertIsInstance(r2, Float64Index) tm.assert_index_equal(r1, r2) +class TestDaysInMonth(tm.TestCase): + def test_day_not_in_month_coerce_true_NaT(self): + self.assertTrue(isnull(to_datetime('2015-02-29', coerce=True))) + self.assertTrue(isnull(to_datetime('2015-02-29', format="%Y-%m-%d", coerce=True))) + self.assertTrue(isnull(to_datetime('2015-02-32', format="%Y-%m-%d", coerce=True))) + self.assertTrue(isnull(to_datetime('2015-04-31', format="%Y-%m-%d", coerce=True))) + def test_day_not_in_month_coerce_false_raise(self): + self.assertRaises(ValueError, to_datetime, '2015-02-29', errors='raise', coerce=False) + self.assertRaises(ValueError, to_datetime, '2015-02-29', errors='raise', format="%Y-%m-%d", coerce=False) + self.assertRaises(ValueError, to_datetime, '2015-02-32', errors='raise', format="%Y-%m-%d", coerce=False) + self.assertRaises(ValueError, to_datetime, '2015-04-31', errors='raise', format="%Y-%m-%d", coerce=False) + def test_day_not_in_month_coerce_false_ignore(self): + self.assertRaises(ValueError, to_datetime, '2015-02-29', errors='ignore', coerce=False) + self.assertRaises(ValueError, to_datetime, '2015-02-29', errors='ignore', format="%Y-%m-%d", coerce=False) + self.assertRaises(ValueError, to_datetime, '2015-02-32', errors='ignore', format="%Y-%m-%d", coerce=False) + self.assertRaises(ValueError, to_datetime, '2015-04-31', errors='ignore', format="%Y-%m-%d", coerce=False) + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) From 62ca4f250eabc52110be7a79b3303896879802ef Mon Sep 17 00:00:00 2001 From: Vincent Davis Date: Mon, 6 Jul 2015 15:11:28 -0600 Subject: [PATCH 3/7] Corrected to_datetime test case --- pandas/tseries/tests/test_timeseries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 609bc5212d900..1689a01aafea4 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -4473,7 +4473,7 @@ def test_day_not_in_month_coerce_false_raise(self): self.assertRaises(ValueError, to_datetime, '2015-02-32', errors='raise', format="%Y-%m-%d", coerce=False) self.assertRaises(ValueError, to_datetime, '2015-04-31', errors='raise', format="%Y-%m-%d", coerce=False) def test_day_not_in_month_coerce_false_ignore(self): - self.assertRaises(ValueError, to_datetime, '2015-02-29', errors='ignore', coerce=False) + self.assertEqual(to_datetime('2015-02-29', errors='ignore', coerce=False), '2015-02-29') self.assertRaises(ValueError, to_datetime, '2015-02-29', errors='ignore', format="%Y-%m-%d", coerce=False) self.assertRaises(ValueError, to_datetime, '2015-02-32', errors='ignore', format="%Y-%m-%d", coerce=False) self.assertRaises(ValueError, to_datetime, '2015-04-31', errors='ignore', format="%Y-%m-%d", coerce=False) From f63933c577aecdbc1f77ae150338782aedce34f9 Mon Sep 17 00:00:00 2001 From: Vincent Davis Date: Mon, 6 Jul 2015 15:12:02 -0600 Subject: [PATCH 4/7] Correct an issue when coerce=True did not return a NaT --- pandas/tseries/tools.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index 65fe3420f670c..72d3b00a27ea5 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -8,6 +8,7 @@ import pandas.tslib as tslib import pandas.core.common as com from pandas.compat import StringIO, callable +from pandas.tslib import NaT, iNaT import pandas.compat as compat try: @@ -320,7 +321,7 @@ def _convert_listlike(arg, box, format): except ValueError: # Only raise this error if the user provided the # datetime format, and not when it was inferred - if not infer_datetime_format: + if not infer_datetime_format and not coerce: raise if result is None and (format is None or infer_datetime_format): @@ -349,7 +350,11 @@ def _convert_listlike(arg, box, format): elif com.is_list_like(arg): return _convert_listlike(arg, box, format) - return _convert_listlike(np.array([ arg ]), box, format)[0] + try: + return _convert_listlike(np.array([ arg ]), box, format)[0] + except ValueError as e: + if not coerce: + raise e class DateParseError(ValueError): pass From 8470666443da89cc51f665c9ff96b1a078920588 Mon Sep 17 00:00:00 2001 From: Vincent Davis Date: Mon, 6 Jul 2015 20:06:39 -0600 Subject: [PATCH 5/7] Revert "Correct an issue when coerce=True did not return a NaT" This reverts commit f63933c577aecdbc1f77ae150338782aedce34f9. --- pandas/tseries/tools.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index 72d3b00a27ea5..65fe3420f670c 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -8,7 +8,6 @@ import pandas.tslib as tslib import pandas.core.common as com from pandas.compat import StringIO, callable -from pandas.tslib import NaT, iNaT import pandas.compat as compat try: @@ -321,7 +320,7 @@ def _convert_listlike(arg, box, format): except ValueError: # Only raise this error if the user provided the # datetime format, and not when it was inferred - if not infer_datetime_format and not coerce: + if not infer_datetime_format: raise if result is None and (format is None or infer_datetime_format): @@ -350,11 +349,7 @@ def _convert_listlike(arg, box, format): elif com.is_list_like(arg): return _convert_listlike(arg, box, format) - try: - return _convert_listlike(np.array([ arg ]), box, format)[0] - except ValueError as e: - if not coerce: - raise e + return _convert_listlike(np.array([ arg ]), box, format)[0] class DateParseError(ValueError): pass From 1ab09344213af6a1f56a852c2f1dedd9730a89c2 Mon Sep 17 00:00:00 2001 From: Vincent Davis Date: Mon, 6 Jul 2015 20:08:39 -0600 Subject: [PATCH 6/7] add blank lines between functions --- pandas/tseries/tests/test_timeseries.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 1689a01aafea4..fd73030a4f5ab 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -4467,11 +4467,13 @@ def test_day_not_in_month_coerce_true_NaT(self): self.assertTrue(isnull(to_datetime('2015-02-29', format="%Y-%m-%d", coerce=True))) self.assertTrue(isnull(to_datetime('2015-02-32', format="%Y-%m-%d", coerce=True))) self.assertTrue(isnull(to_datetime('2015-04-31', format="%Y-%m-%d", coerce=True))) + def test_day_not_in_month_coerce_false_raise(self): self.assertRaises(ValueError, to_datetime, '2015-02-29', errors='raise', coerce=False) self.assertRaises(ValueError, to_datetime, '2015-02-29', errors='raise', format="%Y-%m-%d", coerce=False) self.assertRaises(ValueError, to_datetime, '2015-02-32', errors='raise', format="%Y-%m-%d", coerce=False) self.assertRaises(ValueError, to_datetime, '2015-04-31', errors='raise', format="%Y-%m-%d", coerce=False) + def test_day_not_in_month_coerce_false_ignore(self): self.assertEqual(to_datetime('2015-02-29', errors='ignore', coerce=False), '2015-02-29') self.assertRaises(ValueError, to_datetime, '2015-02-29', errors='ignore', format="%Y-%m-%d", coerce=False) From 9baa88234244943f8feefa09c5025feea6c77e96 Mon Sep 17 00:00:00 2001 From: Vincent Davis Date: Mon, 6 Jul 2015 20:18:25 -0600 Subject: [PATCH 7/7] add issue comment to test --- pandas/tseries/tests/test_timeseries.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index fd73030a4f5ab..c95281d49586a 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -4462,18 +4462,20 @@ def test_second(self): tm.assert_index_equal(r1, r2) class TestDaysInMonth(tm.TestCase): + # tests for issue #10154 + # Not all tests pass there are other issues, see comments on lines def test_day_not_in_month_coerce_true_NaT(self): self.assertTrue(isnull(to_datetime('2015-02-29', coerce=True))) - self.assertTrue(isnull(to_datetime('2015-02-29', format="%Y-%m-%d", coerce=True))) + self.assertTrue(isnull(to_datetime('2015-02-29', format="%Y-%m-%d", coerce=True))) # this test fails self.assertTrue(isnull(to_datetime('2015-02-32', format="%Y-%m-%d", coerce=True))) - self.assertTrue(isnull(to_datetime('2015-04-31', format="%Y-%m-%d", coerce=True))) + self.assertTrue(isnull(to_datetime('2015-04-31', format="%Y-%m-%d", coerce=True))) # this test fails def test_day_not_in_month_coerce_false_raise(self): self.assertRaises(ValueError, to_datetime, '2015-02-29', errors='raise', coerce=False) self.assertRaises(ValueError, to_datetime, '2015-02-29', errors='raise', format="%Y-%m-%d", coerce=False) self.assertRaises(ValueError, to_datetime, '2015-02-32', errors='raise', format="%Y-%m-%d", coerce=False) self.assertRaises(ValueError, to_datetime, '2015-04-31', errors='raise', format="%Y-%m-%d", coerce=False) - + def test_day_not_in_month_coerce_false_ignore(self): self.assertEqual(to_datetime('2015-02-29', errors='ignore', coerce=False), '2015-02-29') self.assertRaises(ValueError, to_datetime, '2015-02-29', errors='ignore', format="%Y-%m-%d", coerce=False)