diff --git a/doc/source/release.rst b/doc/source/release.rst index 5d256ddf6dca3..5001c5142f330 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -590,6 +590,7 @@ Bug Fixes - Fixed segfault on ``isnull(MultiIndex)`` (now raises an error instead) (:issue:`5123`, :issue:`5125`) - Allow duplicate indices when performing operations that align (:issue:`5185`) + - Compound dtypes in a constructor raise ``NotImplementedError`` (:issue:`5191`) pandas 0.12.0 ------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 126ed9242ecdd..24a4e4800e750 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -189,6 +189,8 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, copy=False): if data is None: data = {} + if dtype is not None: + dtype = self._validate_dtype(dtype) if isinstance(data, DataFrame): data = data._data @@ -276,9 +278,6 @@ def _init_dict(self, data, index, columns, dtype=None): Segregate Series based on type and coerce into matrices. Needs to handle a lot of exceptional cases. """ - if dtype is not None: - dtype = np.dtype(dtype) - if columns is not None: columns = _ensure_index(columns) @@ -4659,9 +4658,6 @@ def _get_names_from_index(data): def _homogenize(data, index, dtype=None): from pandas.core.series import _sanitize_array - if dtype is not None: - dtype = np.dtype(dtype) - oindex = None homogenized = [] diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 556a7652b9270..3fca45b00d565 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -89,6 +89,18 @@ def __init__(self, data, axes=None, copy=False, dtype=None, fastpath=False): object.__setattr__(self, '_data', data) object.__setattr__(self, '_item_cache', {}) + def _validate_dtype(self, dtype): + """ validate the passed dtype """ + + if dtype is not None: + dtype = np.dtype(dtype) + + # a compound dtype + if dtype.kind == 'V': + raise NotImplementedError("compound dtypes are not implemented" + "in the {0} constructor".format(self.__class__.__name__)) + return dtype + def _init_mgr(self, mgr, axes=None, dtype=None, copy=False): """ passed a manager and a axes dict """ for a, axe in axes.items(): diff --git a/pandas/core/panel.py b/pandas/core/panel.py index f0d9dbe9c5877..1389445b29943 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -140,6 +140,8 @@ def _init_data(self, data, copy, dtype, **kwargs): """ if data is None: data = {} + if dtype is not None: + dtype = self._validate_dtype(dtype) passed_axes = [kwargs.get(a) for a in self._AXIS_ORDERS] axes = None diff --git a/pandas/core/series.py b/pandas/core/series.py index e475495c63164..fba3e946de0b0 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -139,6 +139,8 @@ def __init__(self, data=None, index=None, dtype=None, name=None, if data is None: data = {} + if dtype is not None: + dtype = self._validate_dtype(dtype) if isinstance(data, MultiIndex): raise NotImplementedError diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 697f9c94aff93..6e7683d29a934 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -10709,6 +10709,21 @@ def test_constructor_series_copy(self): self.assert_(not (series['A'] == 5).all()) + def test_constructor_compound_dtypes(self): + # GH 5191 + # compound dtypes should raise not-implementederror + + def f(dtype): + return DataFrame(data = list(itertools.repeat((datetime(2001, 1, 1), "aa", 20), 9)), + columns=["A", "B", "C"], dtype=dtype) + + self.assertRaises(NotImplementedError, f, [("A","datetime64[h]"), ("B","str"), ("C","int32")]) + + # these work (though results may be unexpected) + f('int64') + f('float64') + f('M8[ns]') + def test_assign_columns(self): self.frame['hi'] = 'there' diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index c9ef3ea4e217c..ce0cb909cf1c5 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -86,7 +86,7 @@ def _construct(self, shape, value=None, dtype=None, **kwargs): arr = np.repeat(arr,new_shape).reshape(shape) else: arr = np.random.randn(*shape) - return self._typ(arr,**kwargs) + return self._typ(arr,dtype=dtype,**kwargs) def _compare(self, result, expected): self._comparator(result,expected) @@ -210,6 +210,20 @@ def test_downcast(self): expected = o.astype(np.int64) self._compare(result, expected) + def test_constructor_compound_dtypes(self): + # GH 5191 + # compound dtypes should raise not-implementederror + + def f(dtype): + return self._construct(shape=3, dtype=dtype) + + self.assertRaises(NotImplementedError, f, [("A","datetime64[h]"), ("B","str"), ("C","int32")]) + + # these work (though results may be unexpected) + f('int64') + f('float64') + f('M8[ns]') + class TestSeries(unittest.TestCase, Generic): _typ = Series _comparator = lambda self, x, y: assert_series_equal(x,y)