diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index f9c604cd76472..7263fde5eb530 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -835,7 +835,20 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, vals : ndarray[int64_t] tz : tzinfo or None ambiguous : str, bool, or arraylike - If arraylike, must have the same length as vals + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from 03:00 + DST to 02:00 non-DST, 02:30:00 local time occurs both at 00:30:00 UTC + and at 01:30:00 UTC. In such a situation, the `ambiguous` parameter + dictates how ambiguous times should be handled. + + - 'infer' will attempt to infer fall dst-transition hours based on + order + - bool-ndarray where True signifies a DST time, False signifies a + non-DST time (note that this flag is only applicable for ambiguous + times, but the array must have the same length as vals) + - bool if True, treat all vals as DST. If False, treat them as non-DST + - 'NaT' will return NaT where there are ambiguous times + nonexistent : str If arraylike, must have the same length as vals diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 0eec84ecf8285..be35ae1172f7e 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -559,6 +559,13 @@ class NaTType(_NaT): None will remove timezone holding local time. ambiguous : bool, 'NaT', default 'raise' + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from + 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at + 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the + `ambiguous` parameter dictates how ambiguous times should be + handled. + - bool contains flags to determine if time is dst or not (note that this flag is only applicable for ambiguous fall dst dates) - 'NaT' will return NaT for an ambiguous time diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 08b0c5472549e..89092af68b168 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -974,6 +974,13 @@ class Timestamp(_Timestamp): None will remove timezone holding local time. ambiguous : bool, 'NaT', default 'raise' + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from + 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at + 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the + `ambiguous` parameter dictates how ambiguous times should be + handled. + - bool contains flags to determine if time is dst or not (note that this flag is only applicable for ambiguous fall dst dates) - 'NaT' will return NaT for an ambiguous time diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index b656690b30e34..fd3e5cedcfe77 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -606,6 +606,12 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', Time zone to convert timestamps to. Passing ``None`` will remove the time zone information preserving local time. ambiguous : 'infer', 'NaT', bool array, default 'raise' + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from + 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at + 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the + `ambiguous` parameter dictates how ambiguous times should be + handled. - 'infer' will attempt to infer fall dst-transition hours based on order @@ -677,6 +683,40 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00', '2018-03-03 09:00:00'], dtype='datetime64[ns]', freq='D') + + Be careful with DST changes. When there is sequential data, pandas can + infer the DST time: + >>> s = pd.to_datetime(pd.Series([ + ... '2018-10-28 01:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 03:00:00', + ... '2018-10-28 03:30:00'])) + >>> s.dt.tz_localize('CET', ambiguous='infer') + 2018-10-28 01:30:00+02:00 0 + 2018-10-28 02:00:00+02:00 1 + 2018-10-28 02:30:00+02:00 2 + 2018-10-28 02:00:00+01:00 3 + 2018-10-28 02:30:00+01:00 4 + 2018-10-28 03:00:00+01:00 5 + 2018-10-28 03:30:00+01:00 6 + dtype: int64 + + In some cases, inferring the DST is impossible. In such cases, you can + pass an ndarray to the ambiguous parameter to set the DST explicitly + + >>> s = pd.to_datetime(pd.Series([ + ... '2018-10-28 01:20:00', + ... '2018-10-28 02:36:00', + ... '2018-10-28 03:46:00'])) + >>> s.dt.tz_localize('CET', ambiguous=np.array([True, True, False])) + 0 2018-10-28 01:20:00+02:00 + 1 2018-10-28 02:36:00+02:00 + 2 2018-10-28 03:46:00+01:00 + dtype: datetime64[ns, CET] + """ if errors is not None: warnings.warn("The errors argument is deprecated and will be " diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 693cd14c8ca1d..96bdb7cc2146a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8623,6 +8623,13 @@ def tz_localize(self, tz, axis=0, level=None, copy=True, copy : boolean, default True Also make a copy of the underlying data ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from + 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at + 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the + `ambiguous` parameter dictates how ambiguous times should be + handled. + - 'infer' will attempt to infer fall dst-transition hours based on order - bool-ndarray where True signifies a DST time, False designates @@ -8650,6 +8657,52 @@ def tz_localize(self, tz, axis=0, level=None, copy=True, ------ TypeError If the TimeSeries is tz-aware and tz is not None. + + Examples + -------- + + Localize local times: + + >>> s = pd.Series([1], + ... index=pd.DatetimeIndex(['2018-09-15 01:30:00'])) + >>> s.tz_localize('CET') + 2018-09-15 01:30:00+02:00 1 + dtype: int64 + + Be careful with DST changes. When there is sequential data, pandas + can infer the DST time: + + >>> s = pd.Series(range(7), index=pd.DatetimeIndex([ + ... '2018-10-28 01:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 03:00:00', + ... '2018-10-28 03:30:00'])) + >>> s.tz_localize('CET', ambiguous='infer') + 2018-10-28 01:30:00+02:00 0 + 2018-10-28 02:00:00+02:00 1 + 2018-10-28 02:30:00+02:00 2 + 2018-10-28 02:00:00+01:00 3 + 2018-10-28 02:30:00+01:00 4 + 2018-10-28 03:00:00+01:00 5 + 2018-10-28 03:30:00+01:00 6 + dtype: int64 + + In some cases, inferring the DST is impossible. In such cases, you can + pass an ndarray to the ambiguous parameter to set the DST explicitly + + >>> s = pd.Series(range(3), index=pd.DatetimeIndex([ + ... '2018-10-28 01:20:00', + ... '2018-10-28 02:36:00', + ... '2018-10-28 03:46:00'])) + >>> s.tz_localize('CET', ambiguous=np.array([True, True, False])) + 2018-10-28 01:20:00+02:00 0 + 2018-10-28 02:36:00+02:00 1 + 2018-10-28 03:46:00+01:00 2 + dtype: int64 + """ if nonexistent not in ('raise', 'NaT', 'shift'): raise ValueError("The nonexistent argument must be one of 'raise'," diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 39f247a7c4cfe..d3cd4d834dfa0 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -99,6 +99,12 @@ class DatetimeIndex(DatetimeArrayMixin, DatelikeOps, TimelikeOps, the 'left', 'right', or both sides (None) tz : pytz.timezone or dateutil.tz.tzfile ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from 03:00 + DST to 02:00 non-DST, 02:30:00 local time occurs both at 00:30:00 UTC + and at 01:30:00 UTC. In such a situation, the `ambiguous` parameter + dictates how ambiguous times should be handled. + - 'infer' will attempt to infer fall dst-transition hours based on order - bool-ndarray where True signifies a DST time, False signifies a @@ -173,6 +179,7 @@ class DatetimeIndex(DatetimeArrayMixin, DatelikeOps, TimelikeOps, TimedeltaIndex : Index of timedelta64 data PeriodIndex : Index of Period data pandas.to_datetime : Convert argument to datetime + """ _resolution = cache_readonly(DatetimeArrayMixin._resolution.fget)