From 6ee0371b5ead4983e82fb60dff7b5e6a758bb3ac Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Thu, 16 Nov 2023 21:57:38 -0500
Subject: [PATCH 1/5] BUG: GroupBy.value_counts sorting order

---
 pandas/core/groupby/groupby.py                | 23 +++++---
 .../groupby/methods/test_value_counts.py      | 52 +++++++++++++++----
 2 files changed, 58 insertions(+), 17 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 3412f18a40313..d4a86bcd68db1 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -2826,6 +2826,22 @@ def _value_counts(
             ).sortlevel()
             result_series = result_series.reindex(multi_index, fill_value=0)
 
+        if sort:
+            # Sort by the values
+            result_series = result_series.sort_values(
+                ascending=ascending, kind="stable"
+            )
+        if self.sort:
+            # Sort by the groupings
+            names = result_series.index.names
+            # GH#56007 - Temporarily replace names in case they are integers
+            result_series.index.names = range(len(names))
+            index_level = list(range(len(self.grouper.groupings)))
+            result_series = result_series.sort_index(
+                level=index_level, sort_remaining=False
+            )
+            result_series.index.names = names
+
         if normalize:
             # Normalize the results by dividing by the original group sizes.
             # We are guaranteed to have the first N levels be the
@@ -2845,13 +2861,6 @@ def _value_counts(
             # Handle groups of non-observed categories
             result_series = result_series.fillna(0.0)
 
-        if sort:
-            # Sort the values and then resort by the main grouping
-            index_level = range(len(self.grouper.groupings))
-            result_series = result_series.sort_values(ascending=ascending).sort_index(
-                level=index_level, sort_remaining=False
-            )
-
         result: Series | DataFrame
         if self.as_index:
             result = result_series
diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py
index 200fa5fd4367d..8c440992df392 100644
--- a/pandas/tests/groupby/methods/test_value_counts.py
+++ b/pandas/tests/groupby/methods/test_value_counts.py
@@ -385,8 +385,8 @@ def test_against_frame_and_seriesgroupby(
     "sort, ascending, expected_rows, expected_count, expected_group_size",
     [
         (False, None, [0, 1, 2, 3, 4], [1, 1, 1, 2, 1], [1, 3, 1, 3, 1]),
-        (True, False, [4, 3, 1, 2, 0], [1, 2, 1, 1, 1], [1, 3, 3, 1, 1]),
-        (True, True, [4, 1, 3, 2, 0], [1, 1, 2, 1, 1], [1, 3, 3, 1, 1]),
+        (True, False, [3, 0, 1, 2, 4], [2, 1, 1, 1, 1], [3, 1, 3, 1, 1]),
+        (True, True, [0, 1, 2, 4, 3], [1, 1, 1, 1, 2], [1, 3, 1, 1, 3]),
     ],
 )
 def test_compound(
@@ -811,19 +811,19 @@ def test_categorical_single_grouper_observed_false(
         ("FR", "female", "high"),
         ("FR", "male", "medium"),
         ("FR", "female", "low"),
-        ("FR", "male", "high"),
         ("FR", "female", "medium"),
+        ("FR", "male", "high"),
         ("US", "female", "high"),
         ("US", "male", "low"),
-        ("US", "male", "medium"),
-        ("US", "male", "high"),
-        ("US", "female", "medium"),
         ("US", "female", "low"),
-        ("ASIA", "male", "low"),
-        ("ASIA", "male", "high"),
-        ("ASIA", "female", "medium"),
-        ("ASIA", "female", "low"),
+        ("US", "female", "medium"),
+        ("US", "male", "high"),
+        ("US", "male", "medium"),
         ("ASIA", "female", "high"),
+        ("ASIA", "female", "low"),
+        ("ASIA", "female", "medium"),
+        ("ASIA", "male", "high"),
+        ("ASIA", "male", "low"),
         ("ASIA", "male", "medium"),
     ]
 
@@ -1177,3 +1177,35 @@ def test_value_counts_integer_columns():
         {1: ["a", "a", "a"], 2: ["a", "a", "d"], 3: ["a", "b", "c"], "count": 1}
     )
     tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("vc_sort", [True, False])
+@pytest.mark.parametrize("normalize", [True, False])
+def test_value_counts_sort(sort, vc_sort, normalize):
+    # GH#55951
+    # These values give different results for all four combinations of sort options
+    # and would give a different result if we sorted by normalized values.
+    # However, value_counts only sorts by frequencies.
+    df = DataFrame({"a": [2, 1, 1, 1], 0: [3, 4, 3, 3]})
+    gb = df.groupby("a", sort=sort)
+    result = gb.value_counts(sort=vc_sort, normalize=normalize)
+
+    if normalize:
+        values = [2 / 3, 1 / 3, 1.0]
+    else:
+        values = [2, 1, 1]
+    index = MultiIndex(
+        levels=[[1, 2], [3, 4]], codes=[[0, 0, 1], [0, 1, 0]], names=["a", 0]
+    )
+    expected = Series(values, index=index, name="proportion" if normalize else "count")
+    if sort and vc_sort:
+        taker = [0, 1, 2]
+    elif sort and not vc_sort:
+        taker = [0, 1, 2]
+    elif not sort and vc_sort:
+        taker = [0, 2, 1]
+    else:
+        taker = [2, 1, 0]
+    expected = expected.take(taker)
+
+    tm.assert_series_equal(result, expected)

From 8f54fed6e1b34444a98bd14d33b5506850723fd2 Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Thu, 16 Nov 2023 23:12:36 -0500
Subject: [PATCH 2/5] Whatsnew

---
 doc/source/whatsnew/v2.2.0.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index 93b0ccad9f2aa..18c9d2fb8e3ba 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -444,6 +444,9 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrame.resample` not respecting ``closed`` and ``label`` arguments for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55282`)
 - Bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55281`)
 - Bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.MonthBegin` (:issue:`55271`)
+- Bug in :meth:`DataFrameGroupBy.value_counts` and :meth:`SeriesGroupBy.value_count` could result in incorrect sorting if the columns of the DataFrame or name of the Series are integers (:issue:`56007`)
+- Bug in :meth:`DataFrameGroupBy.value_counts` and :meth:`SeriesGroupBy.value_count` would not respect ``sort=False`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` (:issue:`56007`)
+- Bug in :meth:`DataFrameGroupBy.value_counts` and :meth:`SeriesGroupBy.value_count` would sort by proportions rather than frequencies when ``sort=True`` and ``normalize=True`` (:issue:`56007`)
 
 Reshaping
 ^^^^^^^^^

From 5d7b1225f34625658826eb71e20587bddf151bfa Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Thu, 16 Nov 2023 23:17:16 -0500
Subject: [PATCH 3/5] cleanup

---
 pandas/tests/groupby/methods/test_value_counts.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py
index 8c440992df392..cfe8dcd9d74dd 100644
--- a/pandas/tests/groupby/methods/test_value_counts.py
+++ b/pandas/tests/groupby/methods/test_value_counts.py
@@ -1183,9 +1183,6 @@ def test_value_counts_integer_columns():
 @pytest.mark.parametrize("normalize", [True, False])
 def test_value_counts_sort(sort, vc_sort, normalize):
     # GH#55951
-    # These values give different results for all four combinations of sort options
-    # and would give a different result if we sorted by normalized values.
-    # However, value_counts only sorts by frequencies.
     df = DataFrame({"a": [2, 1, 1, 1], 0: [3, 4, 3, 3]})
     gb = df.groupby("a", sort=sort)
     result = gb.value_counts(sort=vc_sort, normalize=normalize)

From ee2e4680b280cc0e733c18f023ad836dc8c8a8ea Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Fri, 17 Nov 2023 16:17:55 -0500
Subject: [PATCH 4/5] Fix categorical and add test

---
 pandas/core/groupby/groupby.py                |  4 +--
 .../groupby/methods/test_value_counts.py      | 35 ++++++++++++++++++-
 2 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index d4a86bcd68db1..c17badccfb59a 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -2821,9 +2821,9 @@ def _value_counts(
             for grouping in groupings
         ):
             levels_list = [ping.result_index for ping in groupings]
-            multi_index, _ = MultiIndex.from_product(
+            multi_index = MultiIndex.from_product(
                 levels_list, names=[ping.name for ping in groupings]
-            ).sortlevel()
+            )
             result_series = result_series.reindex(multi_index, fill_value=0)
 
         if sort:
diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py
index cfe8dcd9d74dd..f1650b22acf4c 100644
--- a/pandas/tests/groupby/methods/test_value_counts.py
+++ b/pandas/tests/groupby/methods/test_value_counts.py
@@ -1184,7 +1184,7 @@ def test_value_counts_integer_columns():
 def test_value_counts_sort(sort, vc_sort, normalize):
     # GH#55951
     df = DataFrame({"a": [2, 1, 1, 1], 0: [3, 4, 3, 3]})
-    gb = df.groupby("a", sort=sort)
+    gb = df.groupby("a", sort=sort, observed=True)
     result = gb.value_counts(sort=vc_sort, normalize=normalize)
 
     if normalize:
@@ -1206,3 +1206,36 @@ def test_value_counts_sort(sort, vc_sort, normalize):
     expected = expected.take(taker)
 
     tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("vc_sort", [True, False])
+@pytest.mark.parametrize("normalize", [True, False])
+def test_value_counts_sort_categorical(sort, vc_sort, normalize):
+    # GH#55951
+    df = DataFrame({"a": [2, 1, 1, 1], 0: [3, 4, 3, 3]}, dtype="category")
+    gb = df.groupby("a", sort=sort, observed=True)
+    result = gb.value_counts(sort=vc_sort, normalize=normalize)
+
+    if normalize:
+        values = [2 / 3, 1 / 3, 1.0, 0.0]
+    else:
+        values = [2, 1, 1, 0]
+    name = "proportion" if normalize else "count"
+    expected = DataFrame(
+        {
+            "a": Categorical([1, 1, 2, 2]),
+            0: Categorical([3, 4, 3, 4]),
+            name: values,
+        }
+    ).set_index(["a", 0])[name]
+    if sort and vc_sort:
+        taker = [0, 1, 2, 3]
+    elif sort and not vc_sort:
+        taker = [0, 1, 2, 3]
+    elif not sort and vc_sort:
+        taker = [0, 2, 1, 3]
+    else:
+        taker = [2, 3, 0, 1]
+    expected = expected.take(taker)
+
+    tm.assert_series_equal(result, expected)

From d0dad4c2e726001d2e4ef56dd9ab906cb1ef6bd3 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Fri, 17 Nov 2023 16:25:07 -0500
Subject: [PATCH 5/5] cleanup

---
 pandas/tests/groupby/methods/test_value_counts.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py
index f1650b22acf4c..2fa79c815d282 100644
--- a/pandas/tests/groupby/methods/test_value_counts.py
+++ b/pandas/tests/groupby/methods/test_value_counts.py
@@ -1184,7 +1184,7 @@ def test_value_counts_integer_columns():
 def test_value_counts_sort(sort, vc_sort, normalize):
     # GH#55951
     df = DataFrame({"a": [2, 1, 1, 1], 0: [3, 4, 3, 3]})
-    gb = df.groupby("a", sort=sort, observed=True)
+    gb = df.groupby("a", sort=sort)
     result = gb.value_counts(sort=vc_sort, normalize=normalize)
 
     if normalize: