diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 4ae3d9be04aa7..2b5f899091edd 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -80,6 +80,7 @@ Other API Changes - :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`) - Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`) - Restricted ``DateOffset`` keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`, :issue:`18226`). +- :func:`DataFrame.from_items` provides a more informative error message when passed scalar values (:issue:`17312`) .. _whatsnew_0220.deprecations: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e82eb8635d4c7..d3561f8a0eadf 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -54,6 +54,7 @@ _ensure_int64, _ensure_platform_int, is_list_like, + is_nested_list_like, is_iterator, is_sequence, is_named_tuple) @@ -1271,16 +1272,34 @@ def from_items(cls, items, columns=None, orient='columns'): columns = _ensure_index(keys) arrays = values - return cls._from_arrays(arrays, columns, None) + # GH 17312 + # Provide more informative error msg when scalar values passed + try: + return cls._from_arrays(arrays, columns, None) + + except ValueError: + if not is_nested_list_like(values): + raise ValueError('The value in each (key, value) pair ' + 'must be an array, Series, or dict') + elif orient == 'index': if columns is None: raise TypeError("Must pass columns with orient='index'") keys = _ensure_index(keys) - arr = np.array(values, dtype=object).T - data = [lib.maybe_convert_objects(v) for v in arr] - return cls._from_arrays(data, columns, keys) + # GH 17312 + # Provide more informative error msg when scalar values passed + try: + arr = np.array(values, dtype=object).T + data = [lib.maybe_convert_objects(v) for v in arr] + return cls._from_arrays(data, columns, keys) + + except TypeError: + if not is_nested_list_like(values): + raise ValueError('The value in each (key, value) pair ' + 'must be an array, Series, or dict') + else: # pragma: no cover raise ValueError("'orient' must be either 'columns' or 'index'") diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 2f947527ce95b..b6090a13c8d38 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -268,13 +268,14 @@ def test_constructor_dict(self): # GH10856 # dict with scalar values should raise error, even if columns passed - with pytest.raises(ValueError): + msg = 'If using all scalar values, you must pass an index' + with tm.assert_raises_regex(ValueError, msg): DataFrame({'a': 0.7}) - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg): DataFrame({'a': 0.7}, columns=['a']) - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg): DataFrame({'a': 0.7}, columns=['b']) def test_constructor_multi_index(self): @@ -1204,6 +1205,19 @@ def test_constructor_from_items(self): columns=['one', 'two', 'three']) tm.assert_frame_equal(rs, xp) + def test_constructor_from_items_scalars(self): + # GH 17312 + with tm.assert_raises_regex(ValueError, + 'The value in each \(key, value\) ' + 'pair must be an array, Series, or dict'): + DataFrame.from_items([('A', 1), ('B', 4)]) + + with tm.assert_raises_regex(ValueError, + 'The value in each \(key, value\) ' + 'pair must be an array, Series, or dict'): + DataFrame.from_items([('A', 1), ('B', 2)], columns=['col1'], + orient='index') + def test_constructor_mix_series_nonseries(self): df = DataFrame({'A': self.frame['A'], 'B': list(self.frame['B'])}, columns=['A', 'B'])