From 36d49a954414b8ccb82359890500090962d563a4 Mon Sep 17 00:00:00 2001 From: jreback Date: Mon, 26 Aug 2013 21:42:20 -0400 Subject: [PATCH 1/3] INT: add TimeDeltaBlock support in internals ENH: GH3371 support timedelta fillna --- doc/source/release.rst | 2 + doc/source/timeseries.rst | 9 ++++ doc/source/v0.13.0.txt | 16 +++++-- pandas/core/internals.py | 84 +++++++++++++++++++++++++++++++++---- pandas/tests/test_series.py | 40 ++++++++++++++++++ 5 files changed, 138 insertions(+), 13 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 930f100fd86dc..361bd480a812a 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -67,6 +67,8 @@ pandas 0.13 - A Series of dtype ``timedelta64[ns]`` can now be divided by another ``timedelta64[ns]`` object to yield a ``float64`` dtyped Series. This is frequency conversion. + - Timedeltas support ``fillna`` with an integer interpreted as seconds, + or a ``timedelta`` (:issue:`3371`) - Performance improvements with ``__getitem__`` on ``DataFrames`` with when the key is a column - Support for using a ``DatetimeIndex/PeriodsIndex`` directly in a datelike calculation diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index e9540365c3dee..1175fe1c5a3a5 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -1195,6 +1195,15 @@ issues). ``idxmin, idxmax`` are supported as well. df.min().idxmax() df.min(axis=1).idxmin() +You can fillna on timedeltas. Integers will be interpreted as seconds. You can +pass a timedelta to get a particular value. + +.. ipython:: python + + y.fillna(0) + y.fillna(10) + y.fillna(timedelta(days=-1,seconds=5)) + .. _timeseries.timedeltas_convert: Time Deltas & Conversions diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt index 6b8b7e73f3ac4..a38ff2fa6d457 100644 --- a/doc/source/v0.13.0.txt +++ b/doc/source/v0.13.0.txt @@ -195,6 +195,7 @@ Enhancements - NaN handing in get_dummies (:issue:`4446`) with `dummy_na` .. ipython:: python + # previously, nan was erroneously counted as 2 here # now it is not counted at all get_dummies([1, 2, np.nan]) @@ -237,10 +238,17 @@ Enhancements from pandas import offsets td + offsets.Minute(5) + offsets.Milli(5) - - ``plot(kind='kde')`` now accepts the optional parameters ``bw_method`` and - ``ind``, passed to scipy.stats.gaussian_kde() (for scipy >= 0.11.0) to set - the bandwidth, and to gkde.evaluate() to specify the indicies at which it - is evaluated, respecttively. See scipy docs. + - Fillna is now supported for timedeltas + + .. ipython:: python + + td.fillna(0) + td.fillna(timedelta(days=1,seconds=5)) + + - ``plot(kind='kde')`` now accepts the optional parameters ``bw_method`` and + ``ind``, passed to scipy.stats.gaussian_kde() (for scipy >= 0.11.0) to set + the bandwidth, and to gkde.evaluate() to specify the indicies at which it + is evaluated, respecttively. See scipy docs. .. _whatsnew_0130.refactoring: diff --git a/pandas/core/internals.py b/pandas/core/internals.py index e27430b06c45c..a583a14df0671 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1,6 +1,6 @@ import itertools import re -from datetime import datetime +from datetime import datetime, timedelta import copy from collections import defaultdict @@ -41,6 +41,7 @@ class Block(PandasObject): is_integer = False is_complex = False is_datetime = False + is_timedelta = False is_bool = False is_object = False is_sparse = False @@ -326,6 +327,8 @@ def _maybe_downcast(self, blocks, downcast=None): # unless indicated if downcast is None and self.is_float: return blocks + elif downcast is None and (self.is_timedelta or self.is_datetime): + return blocks result_blocks = [] for b in blocks: @@ -485,6 +488,10 @@ def _try_cast_result(self, result, dtype=None): # may need to change the dtype here return _possibly_downcast_to_dtype(result, dtype) + def _try_operate(self, values): + """ return a version to operate on as the input """ + return values + def _try_coerce_args(self, values, other): """ provide coercion to our input arguments """ return values, other @@ -703,8 +710,11 @@ def interpolate(self, method='pad', axis=0, inplace=False, else: return [self.copy()] + fill_value = self._try_fill(fill_value) values = self.values if inplace else self.values.copy() + values = self._try_operate(values) values = com.interpolate_2d(values, method, axis, limit, fill_value) + values = self._try_coerce_result(values) blocks = [ make_block(values, self.items, self.ref_items, ndim=self.ndim, klass=self.__class__, fastpath=True) ] return self._maybe_downcast(blocks, downcast) @@ -1008,6 +1018,55 @@ def _try_cast(self, element): def should_store(self, value): return com.is_integer_dtype(value) and value.dtype == self.dtype +class TimeDeltaBlock(IntBlock): + is_timedelta = True + _can_hold_na = True + + def _try_fill(self, value): + """ if we are a NaT, return the actual fill value """ + if isinstance(value, type(tslib.NaT)) or isnull(value): + value = tslib.iNaT + elif isinstance(value, np.timedelta64): + pass + elif com.is_integer(value): + # coerce to seconds of timedelta + value = np.timedelta64(int(value*1e9)) + elif isinstance(value, timedelta): + value = np.timedelta64(value) + + return value + + def _try_operate(self, values): + """ return a version to operate on """ + return values.view('i8') + + def _try_coerce_result(self, result): + """ reverse of try_coerce_args / try_operate """ + if isinstance(result, np.ndarray): + result = result.astype('m8[ns]') + elif isinstance(result, np.integer): + result = np.timedelta64(result) + return result + + def should_store(self, value): + return issubclass(value.dtype.type, np.timedelta64) + + def to_native_types(self, slicer=None, na_rep=None, **kwargs): + """ convert to our native types format, slicing if desired """ + + values = self.values + if slicer is not None: + values = values[:, slicer] + mask = isnull(values) + + rvalues = np.empty(values.shape, dtype=object) + if na_rep is None: + na_rep = 'NaT' + rvalues[mask] = na_rep + imask = (-mask).ravel() + rvalues.flat[imask] = np.array([lib.repr_timedelta64(val) + for val in values.ravel()[imask]], dtype=object) + return rvalues.tolist() class BoolBlock(NumericBlock): is_bool = True @@ -1216,6 +1275,10 @@ def _try_cast(self, element): except: return element + def _try_operate(self, values): + """ return a version to operate on """ + return values.view('i8') + def _try_coerce_args(self, values, other): """ provide coercion to our input arguments we are going to compare vs i8, so coerce to integer @@ -1242,11 +1305,12 @@ def _try_coerce_result(self, result): def _try_fill(self, value): """ if we are a NaT, return the actual fill value """ - if isinstance(value, type(tslib.NaT)): + if isinstance(value, type(tslib.NaT)) or isnull(value): value = tslib.iNaT return value def fillna(self, value, inplace=False, downcast=None): + # straight putmask here values = self.values if inplace else self.values.copy() mask = com.isnull(self.values) value = self._try_fill(value) @@ -1267,12 +1331,9 @@ def to_native_types(self, slicer=None, na_rep=None, **kwargs): na_rep = 'NaT' rvalues[mask] = na_rep imask = (-mask).ravel() - if self.dtype == 'datetime64[ns]': - rvalues.flat[imask] = np.array( - [Timestamp(val)._repr_base for val in values.ravel()[imask]], dtype=object) - elif self.dtype == 'timedelta64[ns]': - rvalues.flat[imask] = np.array([lib.repr_timedelta64(val) - for val in values.ravel()[imask]], dtype=object) + rvalues.flat[imask] = np.array( + [Timestamp(val)._repr_base for val in values.ravel()[imask]], dtype=object) + return rvalues.tolist() def should_store(self, value): @@ -1551,6 +1612,8 @@ def make_block(values, items, ref_items, klass=None, ndim=None, dtype=None, fast klass = SparseBlock elif issubclass(vtype, np.floating): klass = FloatBlock + elif issubclass(vtype, np.integer) and issubclass(vtype, np.timedelta64): + klass = TimeDeltaBlock elif issubclass(vtype, np.integer) and not issubclass(vtype, np.datetime64): klass = IntBlock elif dtype == np.bool_: @@ -3404,12 +3467,13 @@ def _lcd_dtype(l): have_float = len(counts[FloatBlock]) > 0 have_complex = len(counts[ComplexBlock]) > 0 have_dt64 = len(counts[DatetimeBlock]) > 0 + have_td64 = len(counts[TimeDeltaBlock]) > 0 have_sparse = len(counts[SparseBlock]) > 0 have_numeric = have_float or have_complex or have_int if (have_object or (have_bool and have_numeric) or - (have_numeric and have_dt64)): + (have_numeric and (have_dt64 or have_td64))): return np.dtype(object) elif have_bool: return np.dtype(bool) @@ -3432,6 +3496,8 @@ def _lcd_dtype(l): elif have_dt64 and not have_float and not have_complex: return np.dtype('M8[ns]') + elif have_td64 and not have_float and not have_complex: + return np.dtype('m8[ns]') elif have_complex: return np.dtype('c16') else: diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 4d86e8ae4a25b..2379b0ae8d1d9 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -2405,6 +2405,46 @@ def test_timedelta64_functions(self): expected = Series([timedelta(1)], dtype='timedelta64[ns]') assert_series_equal(result, expected) + def test_timedelta_fillna(self): + if com._np_version_under1p7: + raise nose.SkipTest("timedelta broken in np 1.6.1") + + #GH 3371 + from datetime import timedelta + + s = Series([Timestamp('20130101'),Timestamp('20130101'),Timestamp('20130102'),Timestamp('20130103 9:01:01')]) + td = s.diff() + + # reg fillna + result = td.fillna(0) + expected = Series([timedelta(0),timedelta(0),timedelta(1),timedelta(days=1,seconds=9*3600+60+1)]) + assert_series_equal(result,expected) + + # interprested as seconds + result = td.fillna(1) + expected = Series([timedelta(seconds=1),timedelta(0),timedelta(1),timedelta(days=1,seconds=9*3600+60+1)]) + assert_series_equal(result,expected) + + result = td.fillna(timedelta(days=1,seconds=1)) + expected = Series([timedelta(days=1,seconds=1),timedelta(0),timedelta(1),timedelta(days=1,seconds=9*3600+60+1)]) + assert_series_equal(result,expected) + + result = td.fillna(np.timedelta64(int(1e9))) + expected = Series([timedelta(seconds=1),timedelta(0),timedelta(1),timedelta(days=1,seconds=9*3600+60+1)]) + assert_series_equal(result,expected) + + from pandas import tslib + result = td.fillna(tslib.NaT) + expected = Series([tslib.NaT,timedelta(0),timedelta(1),timedelta(days=1,seconds=9*3600+60+1)],dtype='m8[ns]') + assert_series_equal(result,expected) + + # ffill + td[2] = np.nan + result = td.ffill() + expected = td.fillna(0) + expected[0] = np.nan + assert_series_equal(result,expected) + def test_sub_of_datetime_from_TimeSeries(self): from pandas.core import common as com from datetime import datetime From df791a7fa9cef30d0e1d8b15ddb8843afa77f409 Mon Sep 17 00:00:00 2001 From: jreback Date: Mon, 26 Aug 2013 22:33:45 -0400 Subject: [PATCH 2/3] BUG/ENH: add bfill/ffill support for datetime64[ns] --- doc/source/release.rst | 3 ++- pandas/core/internals.py | 5 ++++- pandas/tests/test_series.py | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 361bd480a812a..5becc26a5b820 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -67,8 +67,9 @@ pandas 0.13 - A Series of dtype ``timedelta64[ns]`` can now be divided by another ``timedelta64[ns]`` object to yield a ``float64`` dtyped Series. This is frequency conversion. - - Timedeltas support ``fillna`` with an integer interpreted as seconds, + - Timedelta64 support ``fillna/ffill/bfill`` with an integer interpreted as seconds, or a ``timedelta`` (:issue:`3371`) + - Datetime64 support ``ffill/bfill`` - Performance improvements with ``__getitem__`` on ``DataFrames`` with when the key is a column - Support for using a ``DatetimeIndex/PeriodsIndex`` directly in a datelike calculation diff --git a/pandas/core/internals.py b/pandas/core/internals.py index a583a14df0671..683867233848b 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1043,7 +1043,8 @@ def _try_operate(self, values): def _try_coerce_result(self, result): """ reverse of try_coerce_args / try_operate """ if isinstance(result, np.ndarray): - result = result.astype('m8[ns]') + if result.dtype.kind in ['i','f','O']: + result = result.astype('m8[ns]') elif isinstance(result, np.integer): result = np.timedelta64(result) return result @@ -1299,6 +1300,8 @@ def _try_coerce_result(self, result): if result.dtype == 'i8': result = tslib.array_to_datetime( result.astype(object).ravel()).reshape(result.shape) + elif result.dtype.kind in ['i','f','O']: + result = result.astype('M8[ns]') elif isinstance(result, (np.integer, np.datetime64)): result = lib.Timestamp(result) return result diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 2379b0ae8d1d9..a4bfcc1a4f451 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -2445,6 +2445,38 @@ def test_timedelta_fillna(self): expected[0] = np.nan assert_series_equal(result,expected) + # bfill + td[2] = np.nan + result = td.bfill() + expected = td.fillna(0) + expected[2] = timedelta(days=1,seconds=9*3600+60+1) + assert_series_equal(result,expected) + + def test_datetime64_fillna(self): + + s = Series([Timestamp('20130101'),Timestamp('20130101'),Timestamp('20130102'),Timestamp('20130103 9:01:01')]) + s[2] = np.nan + + # reg fillna + result = s.fillna(Timestamp('20130104')) + expected = Series([Timestamp('20130101'),Timestamp('20130101'),Timestamp('20130104'),Timestamp('20130103 9:01:01')]) + assert_series_equal(result,expected) + + from pandas import tslib + result = s.fillna(tslib.NaT) + expected = s + assert_series_equal(result,expected) + + # ffill + result = s.ffill() + expected = Series([Timestamp('20130101'),Timestamp('20130101'),Timestamp('20130101'),Timestamp('20130103 9:01:01')]) + assert_series_equal(result,expected) + + # bfill + result = s.bfill() + expected = Series([Timestamp('20130101'),Timestamp('20130101'),Timestamp('20130103 9:01:01'),Timestamp('20130103 9:01:01')]) + assert_series_equal(result,expected) + def test_sub_of_datetime_from_TimeSeries(self): from pandas.core import common as com from datetime import datetime From b3f2444b24af412c51d79525dbc67c9b29ede0ad Mon Sep 17 00:00:00 2001 From: jreback Date: Tue, 27 Aug 2013 10:54:31 -0400 Subject: [PATCH 3/3] API: provide automatic dtype conversions on _reduce operations API: exclude non-numerics if mixed types in _reduce operations BUG: timedelta fixes CLN: small cleaning in nanops.py BUG: allow _reduce to call .apply for certain operations when the who block fails via a reduce exception --- doc/source/release.rst | 2 ++ pandas/core/common.py | 51 ++++++++++++++++++++++++++++++++++++- pandas/core/frame.py | 22 +++++++++++++--- pandas/core/generic.py | 19 +++++++++++--- pandas/core/internals.py | 1 + pandas/core/nanops.py | 6 ++--- pandas/src/inference.pyx | 3 +++ pandas/tests/test_frame.py | 36 +++++++++++++++++++------- pandas/tests/test_series.py | 8 +++--- 9 files changed, 122 insertions(+), 26 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 5becc26a5b820..c24ff6ab0ab30 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -157,6 +157,8 @@ pandas 0.13 - Remove undocumented/unused ``kind`` keyword argument from ``read_excel``, and ``ExcelFile``. (:issue:`4713`, :issue:`4712`) - The ``method`` argument of ``NDFrame.replace()`` is valid again, so that a a list can be passed to ``to_replace`` (:issue:`4743`). + - provide automatic dtype conversions on _reduce operations (:issue:`3371`) + - exclude non-numerics if mixed types with datelike in _reduce operations (:issue:`3371`) **Internal Refactoring** diff --git a/pandas/core/common.py b/pandas/core/common.py index 8b9ba4d5eea16..54197e86c961d 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -705,6 +705,54 @@ def diff(arr, n, axis=0): return out_arr +def _coerce_scalar_to_timedelta_type(r): + # kludgy here until we have a timedelta scalar + # handle the numpy < 1.7 case + + if is_integer(r): + r = timedelta(microseconds=r/1000) + + if _np_version_under1p7: + if not isinstance(r, timedelta): + raise AssertionError("Invalid type for timedelta scalar: %s" % type(r)) + if compat.PY3: + # convert to microseconds in timedelta64 + r = np.timedelta64(int(r.total_seconds()*1e9 + r.microseconds*1000)) + else: + return r + + if isinstance(r, timedelta): + r = np.timedelta64(r) + elif not isinstance(r, np.timedelta64): + raise AssertionError("Invalid type for timedelta scalar: %s" % type(r)) + return r.astype('timedelta64[ns]') + +def _coerce_to_dtypes(result, dtypes): + """ given a dtypes and a result set, coerce the result elements to the dtypes """ + if len(result) != len(dtypes): + raise AssertionError("_coerce_to_dtypes requires equal len arrays") + + def conv(r,dtype): + try: + if isnull(r): + pass + elif dtype == _NS_DTYPE: + r = Timestamp(r) + elif dtype == _TD_DTYPE: + r = _coerce_scalar_to_timedelta_type(r) + elif dtype == np.bool_: + r = bool(r) + elif dtype.kind == 'f': + r = float(r) + elif dtype.kind == 'i': + r = int(r) + except: + pass + + return r + + return np.array([ conv(r,dtype) for r, dtype in zip(result,dtypes) ]) + def _infer_dtype_from_scalar(val): """ interpret the dtype from a scalar, upcast floats and ints return the new value and the dtype """ @@ -1288,7 +1336,7 @@ def _possibly_cast_to_timedelta(value, coerce=True): # coercion compatability if coerce == 'compat' and _np_version_under1p7: - def convert(td, type): + def convert(td, dtype): # we have an array with a non-object dtype if hasattr(td,'item'): @@ -1317,6 +1365,7 @@ def convert(td, type): # < 1.7 coercion if not is_list_like(value): value = np.array([ value ]) + dtype = value.dtype return np.array([ convert(v,dtype) for v in value ], dtype='m8[ns]') diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a3eb3ea54c784..52d3a15d8d184 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -23,7 +23,8 @@ from pandas.core.common import (isnull, notnull, PandasError, _try_sort, _default_index, _maybe_upcast, _is_sequence, - _infer_dtype_from_scalar, _values_from_object) + _infer_dtype_from_scalar, _values_from_object, + _coerce_to_dtypes, _DATELIKE_DTYPES) from pandas.core.generic import NDFrame from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.indexing import (_NDFrameIndexer, _maybe_droplevels, @@ -4235,11 +4236,24 @@ def _reduce(self, op, axis=0, skipna=True, numeric_only=None, axis = self._get_axis_number(axis) f = lambda x: op(x, axis=axis, skipna=skipna, **kwds) labels = self._get_agg_axis(axis) + + # exclude timedelta/datetime unless we are uniform types + if axis == 1 and self._is_mixed_type and len(set(self.dtypes) & _DATELIKE_DTYPES): + numeric_only = True + if numeric_only is None: try: values = self.values result = f(values) except Exception as e: + + # try by-column first + if filter_type is None and axis == 0: + try: + return self.apply(f).iloc[0] + except: + pass + if filter_type is None or filter_type == 'numeric': data = self._get_numeric_data() elif filter_type == 'bool': @@ -4273,9 +4287,11 @@ def _reduce(self, op, axis=0, skipna=True, numeric_only=None, result = result.astype(np.float64) elif filter_type == 'bool' and notnull(result).all(): result = result.astype(np.bool_) - # otherwise, accept it except (ValueError, TypeError): - pass + + # try to coerce to the original dtypes item by item if we can + if axis == 0: + result = com._coerce_to_dtypes(result, self.dtypes) return Series(result, index=labels) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3bdfd98127bb7..58e1fbc4f177d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -20,12 +20,9 @@ _infer_dtype_from_scalar, _maybe_promote, ABCSeries) - - def is_dictlike(x): return isinstance(x, (dict, com.ABCSeries)) - def _single_replace(self, to_replace, method, inplace, limit): orig_dtype = self.dtype result = self if inplace else self.copy() @@ -1906,7 +1903,21 @@ def abs(self): abs: type of caller """ obj = np.abs(self) - obj = com._possibly_cast_to_timedelta(obj, coerce=False) + + # suprimo numpy 1.6 hacking + if com._np_version_under1p7: + if self.ndim == 1: + if obj.dtype == 'm8[us]': + obj = obj.astype('m8[ns]') + elif self.ndim == 2: + def f(x): + if x.dtype == 'm8[us]': + x = x.astype('m8[ns]') + return x + + if 'm8[us]' in obj.dtypes.values: + obj = obj.apply(f) + return obj def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 683867233848b..1716980813cea 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1021,6 +1021,7 @@ def should_store(self, value): class TimeDeltaBlock(IntBlock): is_timedelta = True _can_hold_na = True + is_numeric = False def _try_fill(self, value): """ if we are a NaT, return the actual fill value """ diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 3e247caae9c42..3a185ca83604d 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -287,8 +287,7 @@ def nanmin(values, axis=None, skipna=True): values, mask, dtype = _get_values(values, skipna, fill_value_typ = '+inf') # numpy 1.6.1 workaround in Python 3.x - if (values.dtype == np.object_ - and sys.version_info[0] >= 3): # pragma: no cover + if (values.dtype == np.object_ and compat.PY3): if values.ndim > 1: apply_ax = axis if axis is not None else 0 result = np.apply_along_axis(builtins.min, apply_ax, values) @@ -311,8 +310,7 @@ def nanmax(values, axis=None, skipna=True): values, mask, dtype = _get_values(values, skipna, fill_value_typ ='-inf') # numpy 1.6.1 workaround in Python 3.x - if (values.dtype == np.object_ - and sys.version_info[0] >= 3): # pragma: no cover + if (values.dtype == np.object_ and compat.PY3): if values.ndim > 1: apply_ax = axis if axis is not None else 0 diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx index 39334e95e8c81..e0bbc1a4e64c1 100644 --- a/pandas/src/inference.pyx +++ b/pandas/src/inference.pyx @@ -480,6 +480,9 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, seen_object = 1 # objects[i] = val.astype('O') break + elif util.is_timedelta64_object(val): + seen_object = 1 + break elif util.is_integer_object(val): seen_int = 1 floats[i] = val diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index bb76547da0c28..cefe15952d329 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -3232,25 +3232,43 @@ def test_operators_timedelta64(self): result = diffs.max(axis=1) self.assert_((result == diffs['A']).all() == True) - # abs ###### THIS IS BROKEN NOW ###### (results are dtype=timedelta64[us] - # even though fixed in series - #result = np.abs(df['A']-df['B']) - #result = diffs.abs() - #expected = DataFrame(dict(A = df['A']-df['C'], - # B = df['B']-df['A'])) - #assert_frame_equal(result,expected) + # abs + result = diffs.abs() + expected = DataFrame(dict(A = df['A']-df['C'], + B = df['B']-df['A'])) + assert_frame_equal(result,expected) # mixed frame mixed = diffs.copy() mixed['C'] = 'foo' mixed['D'] = 1 mixed['E'] = 1. + mixed['F'] = Timestamp('20130101') - # this is ok + # results in an object array result = mixed.min() + expected = Series([com._coerce_scalar_to_timedelta_type(timedelta(seconds=5*60+5)), + com._coerce_scalar_to_timedelta_type(timedelta(days=-1)), + 'foo', + 1, + 1.0, + Timestamp('20130101')], + index=mixed.columns) + assert_series_equal(result,expected) - # this is not + # excludes numeric result = mixed.min(axis=1) + expected = Series([1, 1, 1.],index=[0, 1, 2]) + assert_series_equal(result,expected) + + # works when only those columns are selected + result = mixed[['A','B']].min(1) + expected = Series([ timedelta(days=-1) ] * 3) + assert_series_equal(result,expected) + + result = mixed[['A','B']].min() + expected = Series([ timedelta(seconds=5*60+5), timedelta(days=-1) ],index=['A','B']) + assert_series_equal(result,expected) # GH 3106 df = DataFrame({'time' : date_range('20130102',periods=5), diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index a4bfcc1a4f451..b0911ed10be20 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -4277,16 +4277,14 @@ def test_reindex_corner(self): def test_reindex_pad(self): - s = Series(np.arange(10), np.arange(10)) + s = Series(np.arange(10)) s2 = s[::2] reindexed = s2.reindex(s.index, method='pad') reindexed2 = s2.reindex(s.index, method='ffill') assert_series_equal(reindexed, reindexed2) - # used platform int above, need to pass int explicitly here per #1219 - expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8], dtype=int, - index=np.arange(10)) + expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8], index=np.arange(10)) assert_series_equal(reindexed, expected) # GH4604 @@ -4696,7 +4694,7 @@ def test_replace_with_single_list(self): assert_series_equal(s, ser) def test_replace_mixed_types(self): - s = Series(np.arange(5)) + s = Series(np.arange(5),dtype='int64') def check_replace(to_rep, val, expected): sc = s.copy()