diff --git a/pandas/conftest.py b/pandas/conftest.py index 5e0dfd7ee644d..dc1b0316fcb5d 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -701,6 +701,7 @@ def _create_mi_with_dt64tz_level(): "empty": Index([]), "tuples": MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])), "mi-with-dt64tz-level": _create_mi_with_dt64tz_level(), + "mixed-int-string": Index([0, "a", 1, "b", 2, "c"]), "multi": _create_multiindex(), "repeats": Index([0, 0, 1, 1, 2, 2]), "nullable_int": Index(np.arange(10), dtype="Int64"), diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py index f6a4396ca5be0..d0d7759a09a9c 100644 --- a/pandas/tests/base/test_misc.py +++ b/pandas/tests/base/test_misc.py @@ -160,6 +160,10 @@ def test_searchsorted(request, index_or_series_obj): mark = pytest.mark.xfail(reason="complex objects are not comparable") request.applymarker(mark) + # This check is written for the mixed-int-string entry + if request.node.callspec.id == "mixed-int-string": + pytest.skip("'>' not supported between instances of 'str' and 'int'") + max_obj = max(obj, default=0) index = np.searchsorted(obj, max_obj) assert 0 <= index <= len(obj) diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index c72abfeb9f3e7..bff6161a50167 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -21,7 +21,7 @@ @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") -def test_value_counts(index_or_series_obj): +def test_value_counts(index_or_series_obj, request): obj = index_or_series_obj obj = np.repeat(obj, range(1, len(obj) + 1)) result = obj.value_counts() @@ -49,10 +49,16 @@ def test_value_counts(index_or_series_obj): tm.assert_series_equal(result, expected) + # This check is written for the mixed-int-string entry + if request.node.callspec.id == "mixed-int-string": + msg = "'<' not supported between instances of 'int' and 'str'" + with pytest.raises(TypeError, match=msg): + result.sort_index() + @pytest.mark.parametrize("null_obj", [np.nan, None]) @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") -def test_value_counts_null(null_obj, index_or_series_obj): +def test_value_counts_null(null_obj, index_or_series_obj, request): orig = index_or_series_obj obj = orig.copy() @@ -62,6 +68,9 @@ def test_value_counts_null(null_obj, index_or_series_obj): pytest.skip("Test doesn't make sense on empty data") elif isinstance(orig, MultiIndex): pytest.skip(f"MultiIndex can't hold '{null_obj}'") + # This check is written for the mixed-int-string entry + if request.node.callspec.id in ["mixed-int-string-nan", "mixed-int-string-None"]: + pytest.skip("'<' not supported between instances of 'str' and 'int'") values = obj._values values[0:2] = null_obj diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index 47f21cc7f8182..38774b99fbf4d 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -633,6 +633,10 @@ def test_union_duplicates(index, request): values = index.unique().values.tolist() mi1 = MultiIndex.from_arrays([values, [1] * len(values)]) mi2 = MultiIndex.from_arrays([[values[0]] + values, [1] * (len(values) + 1)]) + # This check is written for the mixed-int-string entry + if request.node.callspec.id == "mixed-int-string": + pytest.skip("'<' not supported between instances of 'str' and 'int'") + result = mi2.union(mi1) expected = mi2.sort_values() tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 43445433e2a04..fddb8e0422c6e 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -437,9 +437,16 @@ def test_hasnans_isnans(self, index_flat): @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") @pytest.mark.parametrize("na_position", [None, "middle"]) -def test_sort_values_invalid_na_position(index_with_missing, na_position): - with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"): - index_with_missing.sort_values(na_position=na_position) +def test_sort_values_invalid_na_position(index_with_missing, na_position, request): + # This check is written for the mixed-int-string entry + if request.node.callspec.id in ["mixed-int-string-None", "mixed-int-string-middle"]: + with pytest.raises( + TypeError, match="'<' not supported between instances of 'int' and 'str'" + ): + index_with_missing.sort_values(na_position=na_position) + else: + with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"): + index_with_missing.sort_values(na_position=na_position) @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") @@ -457,17 +464,27 @@ def test_sort_values_with_missing(index_with_missing, na_position, request): missing_count = np.sum(index_with_missing.isna()) not_na_vals = index_with_missing[index_with_missing.notna()].values - sorted_values = np.sort(not_na_vals) - if na_position == "first": - sorted_values = np.concatenate([[None] * missing_count, sorted_values]) + # This check is written for the mixed-int-string entry + + if request.node.callspec.id in ["mixed-int-string-first", "mixed-int-string-last"]: + with pytest.raises( + TypeError, match="'<' not supported between instances of 'int' and 'str'" + ): + np.sort(not_na_vals) else: - sorted_values = np.concatenate([sorted_values, [None] * missing_count]) + sorted_values = np.sort(not_na_vals) + if na_position == "first": + sorted_values = np.concatenate([[None] * missing_count, sorted_values]) + else: + sorted_values = np.concatenate([sorted_values, [None] * missing_count]) - # Explicitly pass dtype needed for Index backed by EA e.g. IntegerArray - expected = type(index_with_missing)(sorted_values, dtype=index_with_missing.dtype) + # Explicitly pass dtype needed for Index backed by EA e.g. IntegerArray + expected = type(index_with_missing)( + sorted_values, dtype=index_with_missing.dtype + ) - result = index_with_missing.sort_values(na_position=na_position) - tm.assert_index_equal(result, expected) + result = index_with_missing.sort_values(na_position=na_position) + tm.assert_index_equal(result, expected) def test_sort_values_natsort_key(): diff --git a/pandas/tests/indexes/test_numpy_compat.py b/pandas/tests/indexes/test_numpy_compat.py index ace78d77350cb..e4e8797a82136 100644 --- a/pandas/tests/indexes/test_numpy_compat.py +++ b/pandas/tests/indexes/test_numpy_compat.py @@ -160,6 +160,16 @@ def test_numpy_ufuncs_reductions(index, func, request): with pytest.raises(TypeError, match="is not ordered for"): func.reduce(index) return + # This check is written for the mixed-int-string entry + elif request.node.callspec.id in [ + "mixed-int-string-maximum", + "mixed-int-string-minimum", + ]: + with pytest.raises( + TypeError, match=".* not supported between instances of 'int' and 'str'" + ): + func.reduce(index) + return else: result = func.reduce(index) diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py index b929616c814ee..43dbd996e1a2e 100644 --- a/pandas/tests/indexes/test_old_base.py +++ b/pandas/tests/indexes/test_old_base.py @@ -350,22 +350,38 @@ def test_memory_usage_doesnt_trigger_engine(self, index): assert res_without_engine > 0 assert res_with_engine > 0 - def test_argsort(self, index): + def test_argsort(self, index, request): if isinstance(index, CategoricalIndex): pytest.skip(f"{type(self).__name__} separately tested") - result = index.argsort() - expected = np.array(index).argsort() - tm.assert_numpy_array_equal(result, expected, check_dtype=False) - - def test_numpy_argsort(self, index): - result = np.argsort(index) - expected = index.argsort() - tm.assert_numpy_array_equal(result, expected) + # This check is written for the mixed-int-string entry + if request.node.callspec.id == "mixed-int-string": + with pytest.raises( + TypeError, + match="'<' not supported between instances of 'str' and 'int'", + ): + index.argsort() + else: + result = index.argsort() + expected = np.array(index).argsort() + tm.assert_numpy_array_equal(result, expected, check_dtype=False) + + def test_numpy_argsort(self, index, request): + # This check is written for the mixed-int-string entry + if request.node.callspec.id == "mixed-int-string": + with pytest.raises( + TypeError, + match="'<' not supported between instances of 'str' and 'int'", + ): + np.argsort(index) + else: + result = np.argsort(index) + expected = index.argsort() + tm.assert_numpy_array_equal(result, expected) - result = np.argsort(index, kind="mergesort") - expected = index.argsort(kind="mergesort") - tm.assert_numpy_array_equal(result, expected) + result = np.argsort(index, kind="mergesort") + expected = index.argsort(kind="mergesort") + tm.assert_numpy_array_equal(result, expected) # these are the only two types that perform # pandas compatibility input validation - the diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 8fd349dacf9e9..6639167f0ccbc 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -62,12 +62,19 @@ def index_flat2(index_flat): return index_flat -def test_union_same_types(index): +def test_union_same_types(index, request): # Union with a non-unique, non-monotonic index raises error # Only needed for bool index factory - idx1 = index.sort_values() - idx2 = index.sort_values() - assert idx1.union(idx2).dtype == idx1.dtype + # This check is written for the mixed-int-string entry + if request.node.callspec.id == "mixed-int-string": + with pytest.raises( + TypeError, match="'<' not supported between instances of 'str' and 'int'" + ): + index.sort_values() + else: + idx1 = index.sort_values() + idx2 = index.sort_values() + assert idx1.union(idx2).dtype == idx1.dtype def test_union_different_types(index_flat, index_flat2, request): @@ -129,19 +136,26 @@ def test_union_different_types(index_flat, index_flat2, request): # Union with a non-unique, non-monotonic index raises error # This applies to the boolean index - idx1 = idx1.sort_values() - idx2 = idx2.sort_values() - - with tm.assert_produces_warning(warn, match=msg): - res1 = idx1.union(idx2) - res2 = idx2.union(idx1) - - if any_uint64 and (idx1_signed or idx2_signed): - assert res1.dtype == np.dtype("O") - assert res2.dtype == np.dtype("O") + # This check is written for the mixed-int-string entry + if request.node.callspec.id == "mixed-int-string": + with pytest.raises( + TypeError, match="'<' not supported between instances of 'str' and 'int'" + ): + idx1.sort_values() + idx2.sort_values() else: - assert res1.dtype == common_dtype - assert res2.dtype == common_dtype + idx1 = idx1.sort_values() + idx2 = idx2.sort_values() + with tm.assert_produces_warning(warn, match=msg): + res1 = idx1.union(idx2) + res2 = idx2.union(idx1) + + if any_uint64 and (idx1_signed or idx2_signed): + assert res1.dtype == np.dtype("O") + assert res2.dtype == np.dtype("O") + else: + assert res1.dtype == common_dtype + assert res2.dtype == common_dtype @pytest.mark.parametrize( @@ -250,14 +264,22 @@ def test_intersection_base(self, index): "ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning" ) @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") - def test_union_base(self, index): + def test_union_base(self, index, request): index = index.unique() first = index[3:] second = index[:5] everything = index union = first.union(second) - tm.assert_index_equal(union.sort_values(), everything.sort_values()) + # This check is written for the mixed-int-string entry + if request.node.callspec.id == "mixed-int-string": + with pytest.raises( + TypeError, + match="'<' not supported between instances of 'str' and 'int'", + ): + tm.assert_index_equal(union.sort_values(), everything.sort_values()) + else: + tm.assert_index_equal(union.sort_values(), everything.sort_values()) if isinstance(index.dtype, DatetimeTZDtype): # The second.values below will drop tz, so the rest of this test @@ -308,7 +330,7 @@ def test_difference_base(self, sort, index): @pytest.mark.filterwarnings( "ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning" ) - def test_symmetric_difference(self, index): + def test_symmetric_difference(self, index, request): if isinstance(index, CategoricalIndex): pytest.skip(f"Not relevant for {type(index).__name__}") if len(index) < 2: @@ -322,7 +344,15 @@ def test_symmetric_difference(self, index): second = index[:-1] answer = index[[0, -1]] result = first.symmetric_difference(second) - tm.assert_index_equal(result.sort_values(), answer.sort_values()) + # This check is written for the mixed-int-string entry + if request.node.callspec.id == "mixed-int-string": + with pytest.raises( + TypeError, + match="'<' not supported between instances of 'str' and 'int'", + ): + tm.assert_index_equal(result.sort_values(), answer.sort_values()) + else: + tm.assert_index_equal(result.sort_values(), answer.sort_values()) # GH#10149 cases = [second.to_numpy(), second.to_series(), second.to_list()] @@ -392,7 +422,7 @@ def test_corner_union(self, index_flat, fname, sname, expected_name): (None, None, None), ], ) - def test_union_unequal(self, index_flat, fname, sname, expected_name): + def test_union_unequal(self, index_flat, fname, sname, expected_name, request): if not index_flat.is_unique: index = index_flat.unique() else: @@ -401,9 +431,26 @@ def test_union_unequal(self, index_flat, fname, sname, expected_name): # test copy.union(subset) - need sort for unicode and string first = index.copy().set_names(fname) second = index[1:].set_names(sname) - union = first.union(second).sort_values() - expected = index.set_names(expected_name).sort_values() - tm.assert_index_equal(union, expected) + # This check is written for the mixed-int-string entry + if request.node.callspec.id in [ + "-".join(["mixed-int-string", t]) + for t in [ + "A-A-A", + "A-B-None", + "A-None-None", + "None-B-None", + "None-None-None", + ] + ]: + with pytest.raises( + TypeError, + match="'<' not supported between instances of 'str' and 'int'", + ): + first.union(second).sort_values() + else: + union = first.union(second).sort_values() + expected = index.set_names(expected_name).sort_values() + tm.assert_index_equal(union, expected) @pytest.mark.parametrize( "fname, sname, expected_name", @@ -461,7 +508,7 @@ def test_corner_intersect(self, index_flat, fname, sname, expected_name): (None, None, None), ], ) - def test_intersect_unequal(self, index_flat, fname, sname, expected_name): + def test_intersect_unequal(self, index_flat, fname, sname, expected_name, request): if not index_flat.is_unique: index = index_flat.unique() else: @@ -470,9 +517,26 @@ def test_intersect_unequal(self, index_flat, fname, sname, expected_name): # test copy.intersection(subset) - need sort for unicode and string first = index.copy().set_names(fname) second = index[1:].set_names(sname) - intersect = first.intersection(second).sort_values() - expected = index[1:].set_names(expected_name).sort_values() - tm.assert_index_equal(intersect, expected) + # This check is written for the mixed-int-string entry + if request.node.callspec.id in [ + "-".join(["mixed-int-string", t]) + for t in [ + "A-A-A", + "A-B-None", + "A-None-None", + "None-B-None", + "None-None-None", + ] + ]: + with pytest.raises( + TypeError, + match="'<' not supported between instances of 'int' and 'str'", + ): + first.intersection(second).sort_values() + else: + intersect = first.intersection(second).sort_values() + expected = index[1:].set_names(expected_name).sort_values() + tm.assert_index_equal(intersect, expected) @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") def test_intersection_name_retention_with_nameless(self, index): diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index cdcd36846c560..0cf8eb2decaa7 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -63,7 +63,7 @@ def test_factorize_complex(self): expected_uniques = np.array([(1 + 0j), (2 + 0j), (2 + 1j)], dtype=complex) tm.assert_numpy_array_equal(uniques, expected_uniques) - def test_factorize(self, index_or_series_obj, sort): + def test_factorize(self, index_or_series_obj, sort, request): obj = index_or_series_obj result_codes, result_uniques = obj.factorize(sort=sort) @@ -82,6 +82,9 @@ def test_factorize(self, index_or_series_obj, sort): expected_uniques = expected_uniques.astype(object) if sort: + # This check is written for the mixed-int-string entry + if request.node.callspec.id == "mixed-int-string-True": + pytest.skip("'<' not supported between instances of 'str' and 'int'") expected_uniques = expected_uniques.sort_values() # construct an integer ndarray so that