From 451e421e80c262f963d5000f680bf7c6c104f344 Mon Sep 17 00:00:00 2001 From: nuffe Date: Sat, 20 May 2017 20:58:10 +0200 Subject: [PATCH 1/3] BUG: wide_to_long should check for unique id vars (#16382) --- pandas/core/reshape/reshape.py | 3 +++ pandas/tests/reshape/test_reshape.py | 11 +++++++++++ 2 files changed, 14 insertions(+) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index b0ed6d4c4b84d..f944dfe22361a 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -1046,6 +1046,9 @@ def melt_stub(df, stub, i, j, value_vars, sep): else: i = list(i) + if df[i].duplicated().any(): + raise ValueError("the id variables need to uniquely identify each row") + value_vars = list(map(lambda stub: get_var_names(df, stub, sep, suffix), stubnames)) diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index 79626d89026a7..cfbd9c71828d4 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -976,3 +976,14 @@ def test_multiple_id_columns(self): exp_frame = exp_frame.set_index(['famid', 'birth', 'age'])[['ht']] long_frame = wide_to_long(df, 'ht', i=['famid', 'birth'], j='age') tm.assert_frame_equal(long_frame, exp_frame) + + def test_non_unique_idvars(self): + # GH16382 + # Raise an error message if non unique id vars (i) are passed + df = pd.DataFrame({ + 'A_A1' : [1, 2, 3, 4, 5], + 'B_B1' : [1, 2, 3, 4, 5], + 'x' : [1, 1, 1, 1, 1] + }) + with pytest.raises(ValueError): + wide_to_long(df, ['A_A', 'B_B'], i='x', j='colname') From cc46e98ccc3bd21c8c6030a7d163b7c808a93b47 Mon Sep 17 00:00:00 2001 From: nuffe Date: Sat, 20 May 2017 21:51:41 +0200 Subject: [PATCH 2/3] Fix uncaught lint error --- pandas/tests/reshape/test_reshape.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index cfbd9c71828d4..d47a95924bd10 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -981,9 +981,9 @@ def test_non_unique_idvars(self): # GH16382 # Raise an error message if non unique id vars (i) are passed df = pd.DataFrame({ - 'A_A1' : [1, 2, 3, 4, 5], - 'B_B1' : [1, 2, 3, 4, 5], - 'x' : [1, 1, 1, 1, 1] + 'A_A1': [1, 2, 3, 4, 5], + 'B_B1': [1, 2, 3, 4, 5], + 'x': [1, 1, 1, 1, 1] }) with pytest.raises(ValueError): wide_to_long(df, ['A_A', 'B_B'], i='x', j='colname') From a88ff8934ea5fed7f60de39a7dd1976ed4e1177a Mon Sep 17 00:00:00 2001 From: nuffe Date: Sat, 20 May 2017 22:56:37 +0200 Subject: [PATCH 3/3] Add whatsnew note (bug fix) --- doc/source/whatsnew/v0.20.2.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index be4cf85606935..e15aa4def00e7 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -79,6 +79,7 @@ Reshaping ^^^^^^^^^ - Bug in ``DataFrame.stack`` with unsorted levels in MultiIndex columns (:issue:`16323`) +- Bug in ``pd.wide_to_long()`` where no error was raised when ``i`` was not a unique identifier (:issue:`16382`) Numeric