diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 0d49a2d8db77c..a09a5576ca378 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -772,6 +772,7 @@ There are several time/date properties that one can access from ``Timestamp`` or week,"The week ordinal of the year" dayofweek,"The number of the day of the week with Monday=0, Sunday=6" weekday,"The number of the day of the week with Monday=0, Sunday=6" + isocalendar,"The ISO 8601 year, week and day of the date" quarter,"Quarter of the date: Jan-Mar = 1, Apr-Jun = 2, etc." days_in_month,"The number of days in the month of the datetime" is_month_start,"Logical indicating if first day of month (defined by frequency)" @@ -786,6 +787,15 @@ Furthermore, if you have a ``Series`` with datetimelike values, then you can access these properties via the ``.dt`` accessor, as detailed in the section on :ref:`.dt accessors`. +.. versionadded:: 1.1.0 + +You may obtain the year, week and day components of the ISO year from the ISO 8601 standard: + +.. ipython:: python + + idx = pd.date_range(start='2019-12-29', freq='D', periods=4) + idx.to_series().dt.isocalendar + .. _timeseries.offsets: DateOffset objects diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 0cbc47539d759..502d1b37a025b 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -88,6 +88,7 @@ Other enhancements - :class:`Series.str` now has a `fullmatch` method that matches a regular expression against the entire string in each row of the series, similar to `re.fullmatch` (:issue:`32806`). - :meth:`DataFrame.sample` will now also allow array-like and BitGenerator objects to be passed to ``random_state`` as seeds (:issue:`32503`) - :meth:`MultiIndex.union` will now raise `RuntimeWarning` if the object inside are unsortable, pass `sort=False` to suppress this warning (:issue:`33015`) +- :class:`Series.dt` and :class:`DatatimeIndex` now have an `isocalendar` accessor that returns a :class:`DataFrame` with year, week, and day calculated according to the ISO 8601 calendar (:issue:`33206`). - The :meth:`DataFrame.to_feather` method now supports additional keyword arguments (e.g. to set the compression) that are added in pyarrow 0.17 (:issue:`33422`). diff --git a/pandas/_libs/tslibs/ccalendar.pxd b/pandas/_libs/tslibs/ccalendar.pxd index 59ecaaaf2266e..68ad1d1e68133 100644 --- a/pandas/_libs/tslibs/ccalendar.pxd +++ b/pandas/_libs/tslibs/ccalendar.pxd @@ -2,9 +2,11 @@ from cython cimport Py_ssize_t from numpy cimport int64_t, int32_t +ctypedef (int32_t, int32_t, int32_t) iso_calendar_t cdef int dayofweek(int y, int m, int d) nogil cdef bint is_leapyear(int64_t year) nogil cpdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil cpdef int32_t get_week_of_year(int year, int month, int day) nogil +cpdef iso_calendar_t get_iso_calendar(int year, int month, int day) nogil cpdef int32_t get_day_of_year(int year, int month, int day) nogil diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index 0588dfe20e2e2..0873084d29555 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -150,33 +150,65 @@ cpdef int32_t get_week_of_year(int year, int month, int day) nogil: ------- week_of_year : int32_t + Notes + ----- + Assumes the inputs describe a valid date. + """ + return get_iso_calendar(year, month, day)[1] + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef iso_calendar_t get_iso_calendar(int year, int month, int day) nogil: + """ + Return the year, week, and day of year corresponding to ISO 8601 + + Parameters + ---------- + year : int + month : int + day : int + + Returns + ------- + year : int32_t + week : int32_t + day : int32_t + Notes ----- Assumes the inputs describe a valid date. """ cdef: int32_t doy, dow - int woy + int32_t iso_year, iso_week doy = get_day_of_year(year, month, day) dow = dayofweek(year, month, day) # estimate - woy = (doy - 1) - dow + 3 - if woy >= 0: - woy = woy // 7 + 1 + iso_week = (doy - 1) - dow + 3 + if iso_week >= 0: + iso_week = iso_week // 7 + 1 # verify - if woy < 0: - if (woy > -2) or (woy == -2 and is_leapyear(year - 1)): - woy = 53 + if iso_week < 0: + if (iso_week > -2) or (iso_week == -2 and is_leapyear(year - 1)): + iso_week = 53 else: - woy = 52 - elif woy == 53: + iso_week = 52 + elif iso_week == 53: if 31 - day + dow < 3: - woy = 1 + iso_week = 1 + + iso_year = year + if iso_week == 1 and doy > 7: + iso_year += 1 + + elif iso_week >= 52 and doy < 7: + iso_year -= 1 - return woy + return iso_year, iso_week, dow + 1 @cython.wraparound(False) diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 50b7fba67e78f..184d368659714 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -8,14 +8,14 @@ from cython import Py_ssize_t import numpy as np cimport numpy as cnp -from numpy cimport ndarray, int64_t, int32_t, int8_t +from numpy cimport ndarray, int64_t, int32_t, int8_t, uint32_t cnp.import_array() from pandas._libs.tslibs.ccalendar import ( get_locale_names, MONTHS_FULL, DAYS_FULL, DAY_SECONDS) from pandas._libs.tslibs.ccalendar cimport ( get_days_in_month, is_leapyear, dayofweek, get_week_of_year, - get_day_of_year) + get_day_of_year, get_iso_calendar, iso_calendar_t) from pandas._libs.tslibs.np_datetime cimport ( npy_datetimestruct, pandas_timedeltastruct, dt64_to_dtstruct, td64_to_tdstruct) @@ -670,3 +670,42 @@ cpdef isleapyear_arr(ndarray years): np.logical_and(years % 4 == 0, years % 100 > 0))] = 1 return out.view(bool) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def build_isocalendar_sarray(const int64_t[:] dtindex): + """ + Given a int64-based datetime array, return the ISO 8601 year, week, and day + as a structured array. + """ + cdef: + Py_ssize_t i, count = len(dtindex) + npy_datetimestruct dts + ndarray[uint32_t] iso_years, iso_weeks, days + iso_calendar_t ret_val + + sa_dtype = [ + ("year", "u4"), + ("week", "u4"), + ("day", "u4"), + ] + + out = np.empty(count, dtype=sa_dtype) + + iso_years = out["year"] + iso_weeks = out["week"] + days = out["day"] + + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + ret_val = 0, 0, 0 + else: + dt64_to_dtstruct(dtindex[i], &dts) + ret_val = get_iso_calendar(dts.year, dts.month, dts.day) + + iso_years[i] = ret_val[0] + iso_weeks[i] = ret_val[1] + days[i] = ret_val[2] + return out diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index b9f9edcebad5b..d6af11a442518 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -182,7 +182,7 @@ class DatetimeArray(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps, dtl.DatelikeOps "microsecond", "nanosecond", ] - _other_ops = ["date", "time", "timetz"] + _other_ops = ["date", "time", "timetz", "isocalendar"] _datetimelike_ops = _field_ops + _object_ops + _bool_ops + _other_ops _datetimelike_methods = [ "to_period", @@ -1234,6 +1234,50 @@ def date(self): return tslib.ints_to_pydatetime(timestamps, box="date") + @property + def isocalendar(self): + """ + Returns a DataFrame with the year, week, and day calculated according to + the ISO 8601 standard. + + .. versionadded:: 1.1.0 + + Returns + ------- + DataFrame + with columns year, week and day + + See Also + -------- + Timestamp.isocalendar + datetime.date.isocalendar + + Examples + -------- + >>> idx = pd.date_range(start='2019-12-29', freq='D', periods=4) + >>> idx.isocalendar + year week day + 0 2019 52 7 + 1 2020 1 1 + 2 2020 1 2 + 3 2020 1 3 + >>> idx.isocalendar.week + 0 52 + 1 1 + 2 1 + 3 1 + Name: week, dtype: UInt32 + """ + from pandas import DataFrame + + sarray = fields.build_isocalendar_sarray(self.asi8) + iso_calendar_df = DataFrame( + sarray, columns=["year", "week", "day"], dtype="UInt32" + ) + if self._hasnans: + iso_calendar_df.iloc[self._isnan] = None + return iso_calendar_df + year = _field_accessor( "year", "Y", diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index d2cee5d94422c..d44fed9e097e7 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -219,6 +219,38 @@ def to_pydatetime(self) -> np.ndarray: def freq(self): return self._get_values().inferred_freq + @property + def isocalendar(self): + """ + Returns a DataFrame with the year, week, and day calculated according to + the ISO 8601 standard. + + .. versionadded:: 1.1.0 + + Returns + ------- + DataFrame + with columns year, week and day + + See Also + -------- + Timestamp.isocalendar + datetime.date.isocalendar + + Examples + -------- + >>> ser = pd.to_datetime(pd.Series(["2010-01-01", pd.NaT])) + >>> ser.dt.isocalendar + year week day + 0 2009 53 5 + 1 + >>> ser.dt.isocalendar.week + 0 53 + 1 + Name: week, dtype: UInt32 + """ + return self._get_values().isocalendar.set_index(self._parent.index) + @delegate_names( delegate=TimedeltaArray, accessors=TimedeltaArray._datetimelike_ops, typ="property" diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 68d6229e798f5..1ec6cf8fd7b4e 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -89,6 +89,7 @@ def _new_DatetimeIndex(cls, d): "date", "time", "timetz", + "isocalendar", ] + DatetimeArray._bool_ops, DatetimeArray, diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index d22dc72eaaadd..515e75b82371a 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -65,7 +65,7 @@ def get_expected(s, name): if isinstance(result, np.ndarray): if is_integer_dtype(result): result = result.astype("int64") - elif not is_list_like(result): + elif not is_list_like(result) or isinstance(result, pd.DataFrame): return result return Series(result, index=s.index, name=s.name) @@ -74,6 +74,8 @@ def compare(s, name): b = get_expected(s, prop) if not (is_list_like(a) and is_list_like(b)): assert a == b + elif isinstance(a, pd.DataFrame): + tm.assert_frame_equal(a, b) else: tm.assert_series_equal(a, b) @@ -665,3 +667,19 @@ def test_setitem_with_different_tz(self): dtype=object, ) tm.assert_series_equal(ser, expected) + + @pytest.mark.parametrize( + "input_series, expected_output", + [ + [["2020-01-01"], [[2020, 1, 3]]], + [[pd.NaT], [[np.NaN, np.NaN, np.NaN]]], + [["2019-12-31", "2019-12-29"], [[2020, 1, 2], [2019, 52, 7]]], + [["2010-01-01", pd.NaT], [[2009, 53, 5], [np.NaN, np.NaN, np.NaN]]], + ], + ) + def test_isocalendar(self, input_series, expected_output): + result = pd.to_datetime(pd.Series(input_series)).dt.isocalendar + expected_frame = pd.DataFrame( + expected_output, columns=["year", "week", "day"], dtype="UInt32" + ) + tm.assert_frame_equal(result, expected_frame) diff --git a/pandas/tests/tslibs/test_ccalendar.py b/pandas/tests/tslibs/test_ccalendar.py index 6f6e32411a784..aab86d3a2df69 100644 --- a/pandas/tests/tslibs/test_ccalendar.py +++ b/pandas/tests/tslibs/test_ccalendar.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import date, datetime import numpy as np import pytest @@ -25,3 +25,26 @@ def test_get_day_of_year_dt(): expected = (dt - dt.replace(month=1, day=1)).days + 1 assert result == expected + + +@pytest.mark.parametrize( + "input_date_tuple, expected_iso_tuple", + [ + [(2020, 1, 1), (2020, 1, 3)], + [(2019, 12, 31), (2020, 1, 2)], + [(2019, 12, 30), (2020, 1, 1)], + [(2009, 12, 31), (2009, 53, 4)], + [(2010, 1, 1), (2009, 53, 5)], + [(2010, 1, 3), (2009, 53, 7)], + [(2010, 1, 4), (2010, 1, 1)], + [(2006, 1, 1), (2005, 52, 7)], + [(2005, 12, 31), (2005, 52, 6)], + [(2008, 12, 28), (2008, 52, 7)], + [(2008, 12, 29), (2009, 1, 1)], + ], +) +def test_dt_correct_iso_8601_year_week_and_day(input_date_tuple, expected_iso_tuple): + result = ccalendar.get_iso_calendar(*input_date_tuple) + expected_from_date_isocalendar = date(*input_date_tuple).isocalendar() + assert result == expected_from_date_isocalendar + assert result == expected_iso_tuple