diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index de3a05a2ccdfb..31d2d29c71386 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -1122,6 +1122,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.groupby` lost index, when one of the ``agg`` keys referenced an empty list (:issue:`32580`) - Bug in :meth:`Rolling.apply` where ``center=True`` was ignored when ``engine='numba'`` was specified (:issue:`34784`) - Bug in :meth:`DataFrame.ewm.cov` was throwing ``AssertionError`` for :class:`MultiIndex` inputs (:issue:`34440`) +- Bug in :meth:`core.groupby.DataFrameGroupBy.quantile` raises ``TypeError`` for non-numeric types rather than dropping columns (:issue:`27892`) - Bug in :meth:`core.groupby.DataFrameGroupBy.transform` when ``func='nunique'`` and columns are of type ``datetime64``, the result would also be of type ``datetime64`` instead of ``int64`` (:issue:`35109`) - Bug in :meth:'DataFrameGroupBy.first' and :meth:'DataFrameGroupBy.last' that would raise an unnecessary ``ValueError`` when grouping on multiple ``Categoricals`` (:issue:`34951`) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 65483abbd2a6e..ac45222625569 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2403,7 +2403,7 @@ def _get_cythonized_result( signature needs_2d : bool, default False Whether the values and result of the Cython call signature - are at least 2-dimensional. + are 2-dimensional. min_count : int, default None When not None, min_count for the Cython call needs_mask : bool, default False @@ -2419,7 +2419,9 @@ def _get_cythonized_result( Function should return a tuple where the first element is the values to be passed to Cython and the second element is an optional type which the values should be converted to after being returned - by the Cython operation. Raises if `needs_values` is False. + by the Cython operation. This function is also responsible for + raising a TypeError if the values have an invalid type. Raises + if `needs_values` is False. post_processing : function, default None Function to be applied to result of Cython function. Should accept an array of values as the first argument and type inferences as its @@ -2451,6 +2453,7 @@ def _get_cythonized_result( output: Dict[base.OutputKey, np.ndarray] = {} base_func = getattr(libgroupby, how) + error_msg = "" for idx, obj in enumerate(self._iterate_slices()): name = obj.name values = obj._values @@ -2477,7 +2480,11 @@ def _get_cythonized_result( if needs_values: vals = values if pre_processing: - vals, inferences = pre_processing(vals) + try: + vals, inferences = pre_processing(vals) + except TypeError as e: + error_msg = str(e) + continue if needs_2d: vals = vals.reshape((-1, 1)) vals = vals.astype(cython_dtype, copy=False) @@ -2509,6 +2516,10 @@ def _get_cythonized_result( key = base.OutputKey(label=name, position=idx) output[key] = result + # error_msg is "" on an frame/series with no rows or columns + if len(output) == 0 and error_msg != "": + raise TypeError(error_msg) + if aggregate: return self._wrap_aggregated_output(output) else: diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index 8cfd8035502c3..9338742195bfe 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -232,3 +232,11 @@ def test_groupby_quantile_nullable_array(values, q): expected = pd.Series(true_quantiles * 2, index=idx, name="b") tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("q", [0.5, [0.0, 0.5, 1.0]]) +def test_groupby_quantile_skips_invalid_dtype(q): + df = pd.DataFrame({"a": [1], "b": [2.0], "c": ["x"]}) + result = df.groupby("a").quantile(q) + expected = df.groupby("a")[["b"]].quantile(q) + tm.assert_frame_equal(result, expected)