@@ -406,33 +406,52 @@ def test_mixed_groupings(normalize, expected_label, expected_values):
406
406
407
407
408
408
@pytest .mark .parametrize (
409
- "test, expected_names" ,
409
+ "test, columns, expected_names" ,
410
410
[
411
- ("repeat" , ["a" , None , "d" , "b" , "b" , "e" ]),
412
- ("level" , ["a" , None , "d" , "b" , "c" , "level_1" ]),
411
+ ("repeat" , list ( "abbde" ), ["a" , None , "d" , "b" , "b" , "e" ]),
412
+ ("level" , list ( "abcd" ) + [ "level_1" ], ["a" , None , "d" , "b" , "c" , "level_1" ]),
413
413
],
414
414
)
415
415
@pytest .mark .parametrize ("as_index" , [False , True ])
416
- def test_column_name_clashes (test , expected_names , as_index ):
417
- df = DataFrame ({"a" : [1 , 2 ], "b" : [3 , 4 ], "c" : [5 , 6 ], "d" : [7 , 8 ], "e" : [9 , 10 ]})
418
- if test == "repeat" :
419
- df .columns = list ("abbde" )
420
- else :
421
- df .columns = list ("abcd" ) + ["level_1" ]
422
-
416
+ def test_column_label_duplicates (test , columns , expected_names , as_index ):
417
+ # GH 44992
418
+ # Test for duplicate input column labels and generated duplicate labels
419
+ df = DataFrame ([[1 , 3 , 5 , 7 , 9 ], [2 , 4 , 6 , 8 , 10 ]], columns = columns )
420
+ expected_data = [(1 , 0 , 7 , 3 , 5 , 9 ), (2 , 1 , 8 , 4 , 6 , 10 )]
421
+ result = df .groupby (["a" , [0 , 1 ], "d" ], as_index = as_index ).value_counts ()
423
422
if as_index :
424
- result = df .groupby (["a" , [0 , 1 ], "d" ], as_index = as_index ).value_counts ()
425
423
expected = Series (
426
424
data = (1 , 1 ),
427
425
index = MultiIndex .from_tuples (
428
- [( 1 , 0 , 7 , 3 , 5 , 9 ), ( 2 , 1 , 8 , 4 , 6 , 10 )] ,
426
+ expected_data ,
429
427
names = expected_names ,
430
428
),
431
429
)
432
430
tm .assert_series_equal (result , expected )
433
431
else :
434
- with pytest .raises (ValueError , match = "cannot insert" ):
435
- df .groupby (["a" , [0 , 1 ], "d" ], as_index = as_index ).value_counts ()
432
+ expected_data = [list (row ) + [1 ] for row in expected_data ]
433
+ expected_columns = list (expected_names )
434
+ expected_columns [1 ] = "level_1"
435
+ expected_columns .append ("count" )
436
+ expected = DataFrame (expected_data , columns = expected_columns )
437
+ tm .assert_frame_equal (result , expected )
438
+
439
+
440
+ @pytest .mark .parametrize (
441
+ "normalize, expected_label" ,
442
+ [
443
+ (False , "count" ),
444
+ (True , "proportion" ),
445
+ ],
446
+ )
447
+ def test_result_label_duplicates (normalize , expected_label ):
448
+ # Test for result column label duplicating an input column label
449
+ gb = DataFrame ([[1 , 2 , 3 ]], columns = ["a" , "b" , expected_label ]).groupby (
450
+ "a" , as_index = False
451
+ )
452
+ msg = f"Column label '{ expected_label } ' is duplicate of result column"
453
+ with pytest .raises (ValueError , match = msg ):
454
+ gb .value_counts (normalize = normalize )
436
455
437
456
438
457
def test_ambiguous_grouping ():
0 commit comments