From 2ece0c39c54b853de574453bcb5e05f93899cf23 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 12 Feb 2022 21:53:55 -0800 Subject: [PATCH 1/2] TST: Inline once used variables --- pandas/tests/frame/test_query_eval.py | 9 ++++--- pandas/tests/indexes/test_setops.py | 5 ---- pandas/tests/io/parser/test_parse_dates.py | 5 +--- pandas/tests/io/test_html.py | 2 -- pandas/tests/series/test_ufunc.py | 6 ++--- pandas/tests/test_multilevel.py | 28 ++++++++-------------- pandas/tests/test_nanops.py | 3 +-- 7 files changed, 18 insertions(+), 40 deletions(-) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 842ff172c34c4..73d1fbc26a56c 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -16,16 +16,15 @@ import pandas._testing as tm from pandas.core.computation.check import NUMEXPR_INSTALLED -PARSERS = "python", "pandas" -ENGINES = "python", pytest.param("numexpr", marks=td.skip_if_no_ne) - -@pytest.fixture(params=PARSERS, ids=lambda x: x) +@pytest.fixture(params=["python", "pandas"], ids=lambda x: x) def parser(request): return request.param -@pytest.fixture(params=ENGINES, ids=lambda x: x) +@pytest.fixture( + params=["python", pytest.param("numexpr", marks=td.skip_if_no_ne)], ids=lambda x: x +) def engine(request): return request.param diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 592fe02454346..8502c43feeab5 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -32,11 +32,6 @@ UInt64Index, ) -COMPATIBLE_INCONSISTENT_PAIRS = [ - (np.float64, np.int64), - (np.float64, np.uint64), -] - def test_union_same_types(index): # Union with a non-unique, non-monotonic index raises error diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 2a6d4acceeaee..444fc47934b6e 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -42,9 +42,6 @@ # cause a deadlock instead, so we skip these instead of xfailing skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") -# constant -_DEFAULT_DATETIME = datetime(1, 1, 1) - @xfail_pyarrow def test_read_csv_with_custom_date_parser(all_parsers): @@ -1719,7 +1716,7 @@ def test_hypothesis_delimited_date( except_in_dateutil, expected = _helper_hypothesis_delimited_date( du_parse, date_string, - default=_DEFAULT_DATETIME, + default=datetime(1, 1, 1), dayfirst=dayfirst, yearfirst=False, ) diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index eeebb9a638afb..df373cf42590d 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -31,8 +31,6 @@ import pandas.io.html from pandas.io.html import read_html -HERE = os.path.dirname(__file__) - @pytest.fixture( params=[ diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index b0201db798789..43d33f5b498bc 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -10,11 +10,9 @@ import pandas._testing as tm from pandas.arrays import SparseArray -UNARY_UFUNCS = [np.positive, np.floor, np.exp] BINARY_UFUNCS = [np.add, np.logaddexp] # dunder op SPARSE = [True, False] SPARSE_IDS = ["sparse", "dense"] -SHUFFLE = [True, False] @pytest.fixture @@ -29,7 +27,7 @@ def arrays_for_binary_ufunc(): return a1, a2 -@pytest.mark.parametrize("ufunc", UNARY_UFUNCS) +@pytest.mark.parametrize("ufunc", [np.positive, np.floor, np.exp]) @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) def test_unary_ufunc(ufunc, sparse): # Test that ufunc(pd.Series) == pd.Series(ufunc) @@ -174,7 +172,7 @@ def test_binary_ufunc_scalar(ufunc, sparse, flip, arrays_for_binary_ufunc): @pytest.mark.parametrize("ufunc", [np.divmod]) # TODO: np.modf, np.frexp @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) -@pytest.mark.parametrize("shuffle", SHUFFLE) +@pytest.mark.parametrize("shuffle", [True, False]) @pytest.mark.filterwarnings("ignore:divide by zero:RuntimeWarning") def test_multiple_output_binary_ufuncs(ufunc, sparse, shuffle, arrays_for_binary_ufunc): # Test that diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index a06efea47dd6d..674b86687a7ca 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -9,20 +9,6 @@ ) import pandas._testing as tm -AGG_FUNCTIONS = [ - "sum", - "prod", - "min", - "max", - "median", - "mean", - "skew", - "mad", - "std", - "var", - "sem", -] - class TestMultiLevel: def test_reindex_level(self, multiindex_year_month_day_dataframe_random_data): @@ -169,15 +155,15 @@ def test_alignment(self): exp = x.reindex(exp_index) - y.reindex(exp_index) tm.assert_series_equal(res, exp) - @pytest.mark.parametrize("op", AGG_FUNCTIONS) @pytest.mark.parametrize("level", [0, 1]) @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("sort", [True, False]) def test_series_group_min_max( - self, op, level, skipna, sort, series_with_multilevel_index + self, all_numeric_reductions, level, skipna, sort, series_with_multilevel_index ): # GH 17537 ser = series_with_multilevel_index + op = all_numeric_reductions grouped = ser.groupby(level=level, sort=sort) # skipna=True @@ -188,13 +174,18 @@ def test_series_group_min_max( rightside = rightside.sort_index(level=level) tm.assert_series_equal(leftside, rightside) - @pytest.mark.parametrize("op", AGG_FUNCTIONS) @pytest.mark.parametrize("level", [0, 1]) @pytest.mark.parametrize("axis", [0, 1]) @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("sort", [True, False]) def test_frame_group_ops( - self, op, level, axis, skipna, sort, multiindex_dataframe_random_data + self, + all_numeric_reductions, + level, + axis, + skipna, + sort, + multiindex_dataframe_random_data, ): # GH 17537 frame = multiindex_dataframe_random_data @@ -212,6 +203,7 @@ def test_frame_group_ops( grouped = frame.groupby(level=level, axis=axis, sort=sort) pieces = [] + op = all_numeric_reductions def aggf(x): pieces.append(x) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index ab1c0d29924d3..3e07682d1cdd2 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -19,7 +19,6 @@ import pandas.core.nanops as nanops use_bn = nanops._USE_BOTTLENECK -has_c16 = hasattr(np, "complex128") @pytest.fixture(params=[True, False]) @@ -128,7 +127,7 @@ def check_results(self, targ, res, axis, check_dtype=True): if targ.dtype.kind != "O": res = res.astype(targ.dtype) else: - cast_dtype = "c16" if has_c16 else "f8" + cast_dtype = "c16" if hasattr(np, "complex128") else "f8" res = res.astype(cast_dtype) targ = targ.astype(cast_dtype) # there should never be a case where numpy returns an object From 44859080f4ba65c4554affc34c2395dedb1d5bb6 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 13 Feb 2022 11:20:53 -0800 Subject: [PATCH 2/2] Fix more tests --- pandas/tests/apply/test_str.py | 11 +- pandas/tests/arithmetic/test_numeric.py | 131 +++++++++++++------- pandas/tests/arithmetic/test_timedelta64.py | 101 +++++++-------- 3 files changed, 140 insertions(+), 103 deletions(-) diff --git a/pandas/tests/apply/test_str.py b/pandas/tests/apply/test_str.py index 266423fea689d..554fd4174c7a4 100644 --- a/pandas/tests/apply/test_str.py +++ b/pandas/tests/apply/test_str.py @@ -45,12 +45,11 @@ def test_apply_with_string_funcs(request, float_frame, func, args, kwds, how): tm.assert_series_equal(result, expected) -def test_with_string_args(datetime_series): - - for arg in ["sum", "mean", "min", "max", "std"]: - result = datetime_series.apply(arg) - expected = getattr(datetime_series, arg)() - assert result == expected +@pytest.mark.parametrize("arg", ["sum", "mean", "min", "max", "std"]) +def test_with_string_args(datetime_series, arg): + result = datetime_series.apply(arg) + expected = getattr(datetime_series, arg)() + assert result == expected @pytest.mark.parametrize("op", ["mean", "median", "std", "var"]) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 22d20d7fe2356..881a5f1de1c60 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -5,7 +5,6 @@ from collections import abc from decimal import Decimal -from itertools import combinations import operator from typing import Any @@ -928,7 +927,6 @@ def test_datetime64_with_index(self): # TODO: taken from tests.frame.test_operators, needs cleanup def test_frame_operators(self, float_frame): frame = float_frame - frame2 = pd.DataFrame(float_frame, columns=["D", "C", "B", "A"]) garbage = np.random.random(4) colSeries = Series(garbage, index=np.array(frame.columns)) @@ -952,23 +950,27 @@ def test_frame_operators(self, float_frame): else: assert np.isnan(origVal) + def test_frame_operators_col_align(self, float_frame): + frame2 = pd.DataFrame(float_frame, columns=["D", "C", "B", "A"]) added = frame2 + frame2 expected = frame2 * 2 tm.assert_frame_equal(added, expected) + def test_frame_operators_none_to_nan(self): df = pd.DataFrame({"a": ["a", None, "b"]}) tm.assert_frame_equal(df + df, pd.DataFrame({"a": ["aa", np.nan, "bb"]})) + @pytest.mark.parametrize("dtype", ("float", "int64")) + def test_frame_operators_empty_like(self, dtype): # Test for issue #10181 - for dtype in ("float", "int64"): - frames = [ - pd.DataFrame(dtype=dtype), - pd.DataFrame(columns=["A"], dtype=dtype), - pd.DataFrame(index=[0], dtype=dtype), - ] - for df in frames: - assert (df + df).equals(df) - tm.assert_frame_equal(df + df, df) + frames = [ + pd.DataFrame(dtype=dtype), + pd.DataFrame(columns=["A"], dtype=dtype), + pd.DataFrame(index=[0], dtype=dtype), + ] + for df in frames: + assert (df + df).equals(df) + tm.assert_frame_equal(df + df, df) @pytest.mark.parametrize( "func", @@ -1164,45 +1166,85 @@ def test_operators_reverse_object(self, op): class TestNumericArithmeticUnsorted: # Tests in this class have been moved from type-specific test modules # but not yet sorted, parametrized, and de-duplicated - - def check_binop(self, ops, scalars, idxs): - for op in ops: - for a, b in combinations(idxs, 2): - a = a._rename("foo") - b = b._rename("bar") - result = op(a, b) - expected = op(Int64Index(a), Int64Index(b)) - tm.assert_index_equal(result, expected, exact="equiv") - for idx in idxs: - for scalar in scalars: - result = op(idx, scalar) - expected = op(Int64Index(idx), scalar) - tm.assert_index_equal(result, expected, exact="equiv") - - def test_binops(self): - ops = [ + @pytest.mark.parametrize( + "op", + [ operator.add, operator.sub, operator.mul, operator.floordiv, operator.truediv, - ] - scalars = [-1, 1, 2] - idxs = [ + ], + ) + @pytest.mark.parametrize( + "idx1", + [ RangeIndex(0, 10, 1), RangeIndex(0, 20, 2), RangeIndex(-10, 10, 2), RangeIndex(5, -5, -1), - ] - self.check_binop(ops, scalars, idxs) + ], + ) + @pytest.mark.parametrize( + "idx2", + [ + RangeIndex(0, 10, 1), + RangeIndex(0, 20, 2), + RangeIndex(-10, 10, 2), + RangeIndex(5, -5, -1), + ], + ) + def test_binops_index(self, op, idx1, idx2): + idx1 = idx1._rename("foo") + idx2 = idx2._rename("bar") + result = op(idx1, idx2) + expected = op(Int64Index(idx1), Int64Index(idx2)) + tm.assert_index_equal(result, expected, exact="equiv") - def test_binops_pow(self): + @pytest.mark.parametrize( + "op", + [ + operator.add, + operator.sub, + operator.mul, + operator.floordiv, + operator.truediv, + ], + ) + @pytest.mark.parametrize( + "idx", + [ + RangeIndex(0, 10, 1), + RangeIndex(0, 20, 2), + RangeIndex(-10, 10, 2), + RangeIndex(5, -5, -1), + ], + ) + @pytest.mark.parametrize("scalar", [-1, 1, 2]) + def test_binops_index_scalar(self, op, idx, scalar): + result = op(idx, scalar) + expected = op(Int64Index(idx), scalar) + tm.assert_index_equal(result, expected, exact="equiv") + + @pytest.mark.parametrize("idx1", [RangeIndex(0, 10, 1), RangeIndex(0, 20, 2)]) + @pytest.mark.parametrize("idx2", [RangeIndex(0, 10, 1), RangeIndex(0, 20, 2)]) + def test_binops_index_pow(self, idx1, idx2): # numpy does not allow powers of negative integers so test separately # https://github.com/numpy/numpy/pull/8127 - ops = [pow] - scalars = [1, 2] - idxs = [RangeIndex(0, 10, 1), RangeIndex(0, 20, 2)] - self.check_binop(ops, scalars, idxs) + idx1 = idx1._rename("foo") + idx2 = idx2._rename("bar") + result = pow(idx1, idx2) + expected = pow(Int64Index(idx1), Int64Index(idx2)) + tm.assert_index_equal(result, expected, exact="equiv") + + @pytest.mark.parametrize("idx", [RangeIndex(0, 10, 1), RangeIndex(0, 20, 2)]) + @pytest.mark.parametrize("scalar", [1, 2]) + def test_binops_index_scalar_pow(self, idx, scalar): + # numpy does not allow powers of negative integers so test separately + # https://github.com/numpy/numpy/pull/8127 + result = pow(idx, scalar) + expected = pow(Int64Index(idx), scalar) + tm.assert_index_equal(result, expected, exact="equiv") # TODO: divmod? @pytest.mark.parametrize( @@ -1273,8 +1315,9 @@ def test_numeric_compat2(self): expected = Int64Index(idx._values) ** 2 tm.assert_index_equal(Index(result.values), expected, exact=True) - # __floordiv__ - cases_exact = [ + @pytest.mark.parametrize( + "idx, div, expected", + [ (RangeIndex(0, 1000, 2), 2, RangeIndex(0, 500, 1)), (RangeIndex(-99, -201, -3), -3, RangeIndex(33, 67, 1)), ( @@ -1291,9 +1334,11 @@ def test_numeric_compat2(self): (RangeIndex(2, 4, 2), 3, RangeIndex(0, 1, 1)), (RangeIndex(-5, -10, -6), 4, RangeIndex(-2, -1, 1)), (RangeIndex(-100, -200, 3), 2, RangeIndex(0)), - ] - for idx, div, expected in cases_exact: - tm.assert_index_equal(idx // div, expected, exact=True) + ], + ) + def test_numeric_compat2_floordiv(self, idx, div, expected): + # __floordiv__ + tm.assert_index_equal(idx // div, expected, exact=True) @pytest.mark.parametrize("dtype", [np.int64, np.float64]) @pytest.mark.parametrize("delta", [1, 0, -1]) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 1878b8c08b9a2..bb7949c9f08e2 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -204,8 +204,26 @@ def test_comp_nat(self, dtype): tm.assert_numpy_array_equal(lhs < NaT, expected) tm.assert_numpy_array_equal(NaT > lhs, expected) - def test_comparisons_nat(self): - tdidx1 = TimedeltaIndex( + @pytest.mark.parametrize( + "idx2", + [ + TimedeltaIndex( + ["2 day", "2 day", NaT, NaT, "1 day 00:00:02", "5 days 00:00:03"] + ), + np.array( + [ + np.timedelta64(2, "D"), + np.timedelta64(2, "D"), + np.timedelta64("nat"), + np.timedelta64("nat"), + np.timedelta64(1, "D") + np.timedelta64(2, "s"), + np.timedelta64(5, "D") + np.timedelta64(3, "s"), + ] + ), + ], + ) + def test_comparisons_nat(self, idx2): + idx1 = TimedeltaIndex( [ "1 day", NaT, @@ -215,48 +233,30 @@ def test_comparisons_nat(self): "5 day 00:00:03", ] ) - tdidx2 = TimedeltaIndex( - ["2 day", "2 day", NaT, NaT, "1 day 00:00:02", "5 days 00:00:03"] - ) - tdarr = np.array( - [ - np.timedelta64(2, "D"), - np.timedelta64(2, "D"), - np.timedelta64("nat"), - np.timedelta64("nat"), - np.timedelta64(1, "D") + np.timedelta64(2, "s"), - np.timedelta64(5, "D") + np.timedelta64(3, "s"), - ] - ) - - cases = [(tdidx1, tdidx2), (tdidx1, tdarr)] - # Check pd.NaT is handles as the same as np.nan - for idx1, idx2 in cases: - - result = idx1 < idx2 - expected = np.array([True, False, False, False, True, False]) - tm.assert_numpy_array_equal(result, expected) + result = idx1 < idx2 + expected = np.array([True, False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) - result = idx2 > idx1 - expected = np.array([True, False, False, False, True, False]) - tm.assert_numpy_array_equal(result, expected) + result = idx2 > idx1 + expected = np.array([True, False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) - result = idx1 <= idx2 - expected = np.array([True, False, False, False, True, True]) - tm.assert_numpy_array_equal(result, expected) + result = idx1 <= idx2 + expected = np.array([True, False, False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) - result = idx2 >= idx1 - expected = np.array([True, False, False, False, True, True]) - tm.assert_numpy_array_equal(result, expected) + result = idx2 >= idx1 + expected = np.array([True, False, False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) - result = idx1 == idx2 - expected = np.array([False, False, False, False, False, True]) - tm.assert_numpy_array_equal(result, expected) + result = idx1 == idx2 + expected = np.array([False, False, False, False, False, True]) + tm.assert_numpy_array_equal(result, expected) - result = idx1 != idx2 - expected = np.array([True, True, True, True, True, False]) - tm.assert_numpy_array_equal(result, expected) + result = idx1 != idx2 + expected = np.array([True, True, True, True, True, False]) + tm.assert_numpy_array_equal(result, expected) # TODO: better name def test_comparisons_coverage(self): @@ -665,28 +665,21 @@ def test_tdi_ops_attributes(self): class TestAddSubNaTMasking: # TODO: parametrize over boxes - def test_tdarr_add_timestamp_nat_masking(self, box_with_array): + @pytest.mark.parametrize("str_ts", ["1950-01-01", "1980-01-01"]) + def test_tdarr_add_timestamp_nat_masking(self, box_with_array, str_ts): # GH#17991 checking for overflow-masking with NaT tdinat = pd.to_timedelta(["24658 days 11:15:00", "NaT"]) tdobj = tm.box_expected(tdinat, box_with_array) - tsneg = Timestamp("1950-01-01") - ts_neg_variants = [ - tsneg, - tsneg.to_pydatetime(), - tsneg.to_datetime64().astype("datetime64[ns]"), - tsneg.to_datetime64().astype("datetime64[D]"), - ] - - tspos = Timestamp("1980-01-01") - ts_pos_variants = [ - tspos, - tspos.to_pydatetime(), - tspos.to_datetime64().astype("datetime64[ns]"), - tspos.to_datetime64().astype("datetime64[D]"), + ts = Timestamp(str_ts) + ts_variants = [ + ts, + ts.to_pydatetime(), + ts.to_datetime64().astype("datetime64[ns]"), + ts.to_datetime64().astype("datetime64[D]"), ] - for variant in ts_neg_variants + ts_pos_variants: + for variant in ts_variants: res = tdobj + variant if box_with_array is DataFrame: assert res.iloc[1, 1] is NaT