pandas-dev · Farsidetfs · May 12, 2025 · May 13, 2025 · rhshadrach · May 19, 2025
diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst
@@ -170,7 +170,7 @@ Groupby/resample/rolling
 
 Reshaping
 ^^^^^^^^^
--
+- Bug in :meth:`DataFrame.merge` where user-provided suffixes could result in duplicate column names if the resulting names matched existing columns. Now raises a :class:`MergeError` in such cases. (:issue:`61402`)
 -
 
 Sparse

diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
@@ -3058,17 +3058,19 @@ def renamer(x, suffix: str | None):
     llabels = left._transform_index(lrenamer)
     rlabels = right._transform_index(rrenamer)
 
-    dups = []
+    dups = set()
     if not llabels.is_unique:
         # Only warn when duplicates are caused because of suffixes, already duplicated
         # columns in origin should not warn
-        dups = llabels[(llabels.duplicated()) & (~left.duplicated())].tolist()
+        dups.update(llabels[(llabels.duplicated()) & (~left.duplicated())])
     if not rlabels.is_unique:
-        dups.extend(rlabels[(rlabels.duplicated()) & (~right.duplicated())].tolist())
+        dups.update(rlabels[(rlabels.duplicated()) & (~right.duplicated())])
+    # Suffix addition creates duplicate to pre-existing column name
+    dups.update(llabels.intersection(right.difference(to_rename)))
+    dups.update(rlabels.intersection(left.difference(to_rename)))
     if dups:
         raise MergeError(
-            f"Passing 'suffixes' which cause duplicate columns {set(dups)} is "
-            f"not allowed.",
+            f"Passing 'suffixes' which cause duplicate columns {dups} is not allowed.",
         )
 
     return llabels, rlabels
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
@@ -3060,3 +3060,18 @@ def test_merge_on_all_nan_column():
         {"x": [1, 2, 3], "y": [np.nan, np.nan, np.nan], "z": [4, 5, 6], "zz": [4, 5, 6]}
     )
     tm.assert_frame_equal(result, expected)
+
+
+def test_merge_for_suffix_collisions():
+    # GH#61402
+    # Case 1: suffixes=("_dup", "") test collision
+    df1 = DataFrame({"col1": [1], "col2": [2]})
+    df2 = DataFrame({"col1": [1], "col2": [2], "col2_dup": [3]})
+    with pytest.raises(MergeError, match="duplicate columns"):
+        merge(df1, df2, on="col1", suffixes=("_dup", ""))
+
+    # Case 2: suffixes=("", "_dup") test collision
+    df1 = DataFrame({"col1": [1], "col2": [2]})
+    df2 = DataFrame({"col1": [1], "col2": [2], "col2_dup": [3]})
+    with pytest.raises(MergeError, match="duplicate columns"):
+        merge(df1, df2, on="col1", suffixes=("", "_dup"))
-Original file line number
+Diff line change
@@ Expand Up / @@ -170,7 +170,7 @@ Groupby/resample/rolling @@
     Reshaping
     ^^^^^^^^^
-    -
+    - Bug in :meth:`DataFrame.merge` where user-provided suffixes could result in duplicate column names if the resulting names matched existing columns. Now raises a :class:`MergeError` in such cases. (:issue:`61402`)
     -
     Sparse
@@ Expand Down @@