From a499d320174ae3b621439e2cf441837c8f7e5b09 Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Sun, 15 Oct 2017 18:20:42 +0100 Subject: [PATCH 1/6] EHN: Improve from_items error message (#17312) --- pandas/core/frame.py | 7 +++++++ pandas/tests/frame/test_constructors.py | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e82eb8635d4c7..42bb7c0adde99 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1255,6 +1255,13 @@ def from_items(cls, items, columns=None, orient='columns'): """ keys, values = lzip(*items) + import array + for val in values: + if not isinstance(val, (list, Series, np.ndarray, Categorical, + array.array)): + raise TypeError('The value in each (key, value) pair must ' + 'be an array or a Series') + if orient == 'columns': if columns is not None: columns = _ensure_index(columns) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 2f947527ce95b..7190abe694ab0 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1204,6 +1204,12 @@ def test_constructor_from_items(self): columns=['one', 'two', 'three']) tm.assert_frame_equal(rs, xp) + # GH 17312 + with tm.assert_raises_regex(TypeError, + 'The value in each \(key, value\) ' + 'pair must be an array or a Series'): + DataFrame.from_items([('A', 1), ('B', 4)]) + def test_constructor_mix_series_nonseries(self): df = DataFrame({'A': self.frame['A'], 'B': list(self.frame['B'])}, columns=['A', 'B']) From 14659999945c8ef24a9700c61160cb3f6d0a9333 Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Mon, 16 Oct 2017 10:44:38 +0100 Subject: [PATCH 2/6] Using is_list_like to check value --- pandas/core/frame.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 42bb7c0adde99..7fdadc064f3e8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1255,10 +1255,8 @@ def from_items(cls, items, columns=None, orient='columns'): """ keys, values = lzip(*items) - import array for val in values: - if not isinstance(val, (list, Series, np.ndarray, Categorical, - array.array)): + if not is_list_like(val): raise TypeError('The value in each (key, value) pair must ' 'be an array or a Series') From aec9b338f10524f5ce74063f81da123a5fb0dff1 Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Mon, 16 Oct 2017 16:43:13 +0100 Subject: [PATCH 3/6] Move check and add another test --- pandas/core/frame.py | 31 +++++++++++++++++-------- pandas/tests/frame/test_constructors.py | 6 +++++ 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7fdadc064f3e8..49cd286ab80f8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -54,6 +54,7 @@ _ensure_int64, _ensure_platform_int, is_list_like, + is_nested_list_like, is_iterator, is_sequence, is_named_tuple) @@ -1255,11 +1256,6 @@ def from_items(cls, items, columns=None, orient='columns'): """ keys, values = lzip(*items) - for val in values: - if not is_list_like(val): - raise TypeError('The value in each (key, value) pair must ' - 'be an array or a Series') - if orient == 'columns': if columns is not None: columns = _ensure_index(columns) @@ -1276,16 +1272,31 @@ def from_items(cls, items, columns=None, orient='columns'): columns = _ensure_index(keys) arrays = values - return cls._from_arrays(arrays, columns, None) + try: + return cls._from_arrays(arrays, columns, None) + + except ValueError: + if not is_nested_list_like(values): + raise TypeError('The value in each (key, value) pair must ' + 'be an array or a Series') + elif orient == 'index': if columns is None: raise TypeError("Must pass columns with orient='index'") - keys = _ensure_index(keys) + try: + keys = _ensure_index(keys) + + arr = np.array(values, dtype=object).T + data = [lib.maybe_convert_objects(v) for v in arr] + + return cls._from_arrays(data, columns, keys) + + except TypeError: + if not is_nested_list_like(values): + raise TypeError('The value in each (key, value) pair must ' + 'be an array or a Series') - arr = np.array(values, dtype=object).T - data = [lib.maybe_convert_objects(v) for v in arr] - return cls._from_arrays(data, columns, keys) else: # pragma: no cover raise ValueError("'orient' must be either 'columns' or 'index'") diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 7190abe694ab0..b4130436eb9e2 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1210,6 +1210,12 @@ def test_constructor_from_items(self): 'pair must be an array or a Series'): DataFrame.from_items([('A', 1), ('B', 4)]) + with tm.assert_raises_regex(TypeError, + 'The value in each \(key, value\) ' + 'pair must be an array or a Series'): + DataFrame.from_items([('A', 1), ('B', 2)], columns=['col1'], + orient='index') + def test_constructor_mix_series_nonseries(self): df = DataFrame({'A': self.frame['A'], 'B': list(self.frame['B'])}, columns=['A', 'B']) From 7e6356460650bafa16a1071fac66f507a24e6759 Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Tue, 31 Oct 2017 21:54:58 +0000 Subject: [PATCH 4/6] add comments and tests --- doc/source/whatsnew/v0.22.0.txt | 1 + pandas/core/frame.py | 13 ++++++++----- pandas/tests/frame/test_constructors.py | 12 +++++++----- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 4ae3d9be04aa7..2b5f899091edd 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -80,6 +80,7 @@ Other API Changes - :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`) - Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`) - Restricted ``DateOffset`` keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`, :issue:`18226`). +- :func:`DataFrame.from_items` provides a more informative error message when passed scalar values (:issue:`17312`) .. _whatsnew_0220.deprecations: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 49cd286ab80f8..cffce0675b5e2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1272,30 +1272,33 @@ def from_items(cls, items, columns=None, orient='columns'): columns = _ensure_index(keys) arrays = values + # GH 17312 + # Provide more informative error msg when scalar values passed try: return cls._from_arrays(arrays, columns, None) except ValueError: if not is_nested_list_like(values): raise TypeError('The value in each (key, value) pair must ' - 'be an array or a Series') + 'be an array, Series, or dict') elif orient == 'index': if columns is None: raise TypeError("Must pass columns with orient='index'") - try: - keys = _ensure_index(keys) + keys = _ensure_index(keys) + # GH 17312 + # Provide more informative error msg when scalar values passed + try: arr = np.array(values, dtype=object).T data = [lib.maybe_convert_objects(v) for v in arr] - return cls._from_arrays(data, columns, keys) except TypeError: if not is_nested_list_like(values): raise TypeError('The value in each (key, value) pair must ' - 'be an array or a Series') + 'be an array, Series, or dict') else: # pragma: no cover raise ValueError("'orient' must be either 'columns' or 'index'") diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index b4130436eb9e2..f33435e6eaeda 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -268,13 +268,14 @@ def test_constructor_dict(self): # GH10856 # dict with scalar values should raise error, even if columns passed - with pytest.raises(ValueError): + msg = 'If using all scalar values, you must pass an index' + with tm.assert_raises_regex(ValueError, msg): DataFrame({'a': 0.7}) - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg): DataFrame({'a': 0.7}, columns=['a']) - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg): DataFrame({'a': 0.7}, columns=['b']) def test_constructor_multi_index(self): @@ -1204,15 +1205,16 @@ def test_constructor_from_items(self): columns=['one', 'two', 'three']) tm.assert_frame_equal(rs, xp) + def test_constructor_from_items_scalars(self): # GH 17312 with tm.assert_raises_regex(TypeError, 'The value in each \(key, value\) ' - 'pair must be an array or a Series'): + 'pair must be an array, Series, or dict'): DataFrame.from_items([('A', 1), ('B', 4)]) with tm.assert_raises_regex(TypeError, 'The value in each \(key, value\) ' - 'pair must be an array or a Series'): + 'pair must be an array, Series, or dict'): DataFrame.from_items([('A', 1), ('B', 2)], columns=['col1'], orient='index') From 6d7b4cbaa7018297b1c9165c37229c8ad0d6e0df Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Sun, 26 Nov 2017 00:00:18 +0000 Subject: [PATCH 5/6] Change TypeErrors to ValueErrors --- pandas/core/frame.py | 4 ++-- pandas/tests/frame/test_constructors.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cffce0675b5e2..7d3cdc207310b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1279,7 +1279,7 @@ def from_items(cls, items, columns=None, orient='columns'): except ValueError: if not is_nested_list_like(values): - raise TypeError('The value in each (key, value) pair must ' + raise ValueError('The value in each (key, value) pair must ' 'be an array, Series, or dict') elif orient == 'index': @@ -1297,7 +1297,7 @@ def from_items(cls, items, columns=None, orient='columns'): except TypeError: if not is_nested_list_like(values): - raise TypeError('The value in each (key, value) pair must ' + raise ValueError('The value in each (key, value) pair must ' 'be an array, Series, or dict') else: # pragma: no cover diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index f33435e6eaeda..b6090a13c8d38 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1207,12 +1207,12 @@ def test_constructor_from_items(self): def test_constructor_from_items_scalars(self): # GH 17312 - with tm.assert_raises_regex(TypeError, + with tm.assert_raises_regex(ValueError, 'The value in each \(key, value\) ' 'pair must be an array, Series, or dict'): DataFrame.from_items([('A', 1), ('B', 4)]) - with tm.assert_raises_regex(TypeError, + with tm.assert_raises_regex(ValueError, 'The value in each \(key, value\) ' 'pair must be an array, Series, or dict'): DataFrame.from_items([('A', 1), ('B', 2)], columns=['col1'], From fc7fd26e99acb039f98440cb02fb22f4684237f9 Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Sun, 26 Nov 2017 11:17:01 +0000 Subject: [PATCH 6/6] fix line lengths --- pandas/core/frame.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7d3cdc207310b..d3561f8a0eadf 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1279,8 +1279,8 @@ def from_items(cls, items, columns=None, orient='columns'): except ValueError: if not is_nested_list_like(values): - raise ValueError('The value in each (key, value) pair must ' - 'be an array, Series, or dict') + raise ValueError('The value in each (key, value) pair ' + 'must be an array, Series, or dict') elif orient == 'index': if columns is None: @@ -1297,8 +1297,8 @@ def from_items(cls, items, columns=None, orient='columns'): except TypeError: if not is_nested_list_like(values): - raise ValueError('The value in each (key, value) pair must ' - 'be an array, Series, or dict') + raise ValueError('The value in each (key, value) pair ' + 'must be an array, Series, or dict') else: # pragma: no cover raise ValueError("'orient' must be either 'columns' or 'index'")