Skip to content

MNT update test framework for sklearn 0.24 #788

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Feb 8, 2021
2 changes: 1 addition & 1 deletion azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
# Linux environment to test the latest available dependencies and MKL.
pylatest_pip_openblas_pandas:
DISTRIB: 'conda-pip-latest'
PYTHON_VERSION: '3.8'
PYTHON_VERSION: '3.9'
COVERAGE: 'true'
PANDAS_VERSION: '*'
TEST_DOCSTRINGS: 'true'
Expand Down
2 changes: 1 addition & 1 deletion build_tools/azure/test_script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ except ImportError:
python -c "import multiprocessing as mp; print('%d CPUs' % mp.cpu_count())"
pip list

TEST_CMD="python -m pytest --showlocals --durations=20 --junitxml=$JUNITXML"
TEST_CMD="python -m pytest -vsl --durations=20 --junitxml=$JUNITXML"

if [[ "$COVERAGE" == "true" ]]; then
export COVERAGE_PROCESS_START="$BUILD_SOURCESDIRECTORY/.coveragerc"
Expand Down
7 changes: 5 additions & 2 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@
'sphinx_gallery.gen_gallery',
]

# bibtex file
bibtex_bibfiles = ['bibtex/refs.bib']

# this is needed for some reason...
# see https://github.com/numpy/numpydoc/issues/69
numpydoc_show_class_members = False
Expand Down Expand Up @@ -345,8 +348,8 @@ def patch_signature(subject, bound_method=False, follow_wrapped=True):
# https://github.com/readthedocs/sphinx_rtd_theme/pull/747/files
def setup(app):
app.registry.documenters["class"] = PatchedClassDocumenter
app.add_javascript("js/copybutton.js")
app.add_stylesheet("basic.css")
app.add_js_file("js/copybutton.js")
app.add_css_file("basic.css")
# app.connect('autodoc-process-docstring', generate_example_rst)


Expand Down
2 changes: 1 addition & 1 deletion doc/over_sampling.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ In addition, :class:`RandomOverSampler` allows to sample heterogeneous data

>>> import numpy as np
>>> X_hetero = np.array([['xxx', 1, 1.0], ['yyy', 2, 2.0], ['zzz', 3, 3.0]],
... dtype=np.object)
... dtype=object)
>>> y_hetero = np.array([0, 0, 1])
>>> X_resampled, y_resampled = ros.fit_resample(X_hetero, y_hetero)
>>> print(X_resampled)
Expand Down
2 changes: 1 addition & 1 deletion doc/under_sampling.rst
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ In addition, :class:`RandomUnderSampler` allows to sample heterogeneous data
(e.g. containing some strings)::

>>> X_hetero = np.array([['xxx', 1, 1.0], ['yyy', 2, 2.0], ['zzz', 3, 3.0]],
... dtype=np.object)
... dtype=object)
>>> y_hetero = np.array([0, 0, 1])
>>> X_resampled, y_resampled = rus.fit_resample(X_hetero, y_hetero)
>>> print(X_resampled)
Expand Down
2 changes: 1 addition & 1 deletion imblearn/over_sampling/tests/test_random_over_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def test_multiclass_fit_resample():

def test_random_over_sampling_heterogeneous_data():
X_hetero = np.array(
[["xxx", 1, 1.0], ["yyy", 2, 2.0], ["zzz", 3, 3.0]], dtype=np.object
[["xxx", 1, 1.0], ["yyy", 2, 2.0], ["zzz", 3, 3.0]], dtype=object
)
y = np.array([0, 0, 1])
ros = RandomOverSampler(random_state=RND_SEED)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def test_multiclass_fit_resample():

def test_random_under_sampling_heterogeneous_data():
X_hetero = np.array(
[["xxx", 1, 1.0], ["yyy", 2, 2.0], ["zzz", 3, 3.0]], dtype=np.object
[["xxx", 1, 1.0], ["yyy", 2, 2.0], ["zzz", 3, 3.0]], dtype=object
)
y = np.array([0, 0, 1])
rus = RandomUnderSampler(random_state=RND_SEED)
Expand Down
72 changes: 43 additions & 29 deletions imblearn/utils/estimator_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
from sklearn.cluster import KMeans
from sklearn.exceptions import SkipTestWarning
from sklearn.preprocessing import label_binarize
from sklearn.utils.estimator_checks import _mark_xfail_checks
from sklearn.utils.estimator_checks import _set_check_estimator_ids
from sklearn.utils.estimator_checks import _maybe_mark_xfail
from sklearn.utils.estimator_checks import _get_check_estimator_ids
from sklearn.utils._testing import assert_allclose
from sklearn.utils._testing import assert_raises_regex
from sklearn.utils.multiclass import type_of_target
Expand All @@ -44,7 +44,7 @@ def _set_checking_parameters(estimator):
if name == "ClusterCentroids":
estimator.set_params(
voting="soft",
estimator=KMeans(random_state=0, algorithm="full"),
estimator=KMeans(random_state=0, algorithm="full", n_init=1),
)
if name == "KMeansSMOTE":
estimator.set_params(kmeans_estimator=12)
Expand Down Expand Up @@ -117,21 +117,19 @@ def parametrize_with_checks(estimators):
... def test_sklearn_compatible_estimator(estimator, check):
... check(estimator)
"""
names = (type(estimator).__name__ for estimator in estimators)
def checks_generator():
for estimator in estimators:
name = type(estimator).__name__
for check in _yield_all_checks(estimator):
check = partial(check, name)
yield _maybe_mark_xfail(estimator, check, pytest)

checks_generator = ((clone(estimator), partial(check, name))
for name, estimator in zip(names, estimators)
for check in _yield_all_checks(estimator))
return pytest.mark.parametrize("estimator, check", checks_generator(),
ids=_get_check_estimator_ids)

checks_with_marks = (
_mark_xfail_checks(estimator, check, pytest)
for estimator, check in checks_generator)

return pytest.mark.parametrize("estimator, check", checks_with_marks,
ids=_set_check_estimator_ids)


def check_target_type(name, estimator):
def check_target_type(name, estimator_orig):
estimator = clone(estimator_orig)
# should raise warning if the target is continuous (we cannot raise error)
X = np.random.random((20, 2))
y = np.linspace(0, 1, 20)
Expand All @@ -148,7 +146,8 @@ def check_target_type(name, estimator):
)


def check_samplers_one_label(name, sampler):
def check_samplers_one_label(name, sampler_orig):
sampler = clone(sampler_orig)
error_string_fit = "Sampler can't balance when only one class is present."
X = np.random.random((20, 2))
y = np.zeros(20)
Expand All @@ -168,7 +167,8 @@ def check_samplers_one_label(name, sampler):
raise AssertionError(error_string_fit)


def check_samplers_fit(name, sampler):
def check_samplers_fit(name, sampler_orig):
sampler = clone(sampler_orig)
np.random.seed(42) # Make this test reproducible
X = np.random.random((30, 2))
y = np.array([1] * 20 + [0] * 10)
Expand All @@ -178,7 +178,8 @@ def check_samplers_fit(name, sampler):
), "No fitted attribute sampling_strategy_"


def check_samplers_fit_resample(name, sampler):
def check_samplers_fit_resample(name, sampler_orig):
sampler = clone(sampler_orig)
X, y = make_classification(
n_samples=1000,
n_classes=3,
Expand Down Expand Up @@ -213,7 +214,8 @@ def check_samplers_fit_resample(name, sampler):
)


def check_samplers_sampling_strategy_fit_resample(name, sampler):
def check_samplers_sampling_strategy_fit_resample(name, sampler_orig):
sampler = clone(sampler_orig)
# in this test we will force all samplers to not change the class 1
X, y = make_classification(
n_samples=1000,
Expand All @@ -240,7 +242,8 @@ def check_samplers_sampling_strategy_fit_resample(name, sampler):
assert Counter(y_res)[1] == expected_stat


def check_samplers_sparse(name, sampler):
def check_samplers_sparse(name, sampler_orig):
sampler = clone(sampler_orig)
# check that sparse matrices can be passed through the sampler leading to
# the same results than dense
X, y = make_classification(
Expand All @@ -252,14 +255,16 @@ def check_samplers_sparse(name, sampler):
)
X_sparse = sparse.csr_matrix(X)
X_res_sparse, y_res_sparse = sampler.fit_resample(X_sparse, y)
sampler = clone(sampler)
X_res, y_res = sampler.fit_resample(X, y)
assert sparse.issparse(X_res_sparse)
assert_allclose(X_res_sparse.A, X_res)
assert_allclose(X_res_sparse.A, X_res, rtol=1e-5)
assert_allclose(y_res_sparse, y_res)


def check_samplers_pandas(name, sampler):
def check_samplers_pandas(name, sampler_orig):
pd = pytest.importorskip("pandas")
sampler = clone(sampler_orig)
# Check that the samplers handle pandas dataframe and pandas series
X, y = make_classification(
n_samples=1000,
Expand Down Expand Up @@ -290,7 +295,8 @@ def check_samplers_pandas(name, sampler):
assert_allclose(y_res_s.to_numpy(), y_res)


def check_samplers_list(name, sampler):
def check_samplers_list(name, sampler_orig):
sampler = clone(sampler_orig)
# Check that the can samplers handle simple lists
X, y = make_classification(
n_samples=1000,
Expand All @@ -312,7 +318,8 @@ def check_samplers_list(name, sampler):
assert_allclose(y_res, y_res_list)


def check_samplers_multiclass_ova(name, sampler):
def check_samplers_multiclass_ova(name, sampler_orig):
sampler = clone(sampler_orig)
# Check that multiclass target lead to the same results than OVA encoding
X, y = make_classification(
n_samples=1000,
Expand All @@ -329,7 +336,8 @@ def check_samplers_multiclass_ova(name, sampler):
assert_allclose(y_res, y_res_ova.argmax(axis=1))


def check_samplers_2d_target(name, sampler):
def check_samplers_2d_target(name, sampler_orig):
sampler = clone(sampler_orig)
X, y = make_classification(
n_samples=100,
n_classes=3,
Expand All @@ -342,7 +350,8 @@ def check_samplers_2d_target(name, sampler):
sampler.fit_resample(X, y)


def check_samplers_preserve_dtype(name, sampler):
def check_samplers_preserve_dtype(name, sampler_orig):
sampler = clone(sampler_orig)
X, y = make_classification(
n_samples=1000,
n_classes=3,
Expand All @@ -358,7 +367,8 @@ def check_samplers_preserve_dtype(name, sampler):
assert y.dtype == y_res.dtype, "y dtype is not preserved"


def check_samplers_sample_indices(name, sampler):
def check_samplers_sample_indices(name, sampler_orig):
sampler = clone(sampler_orig)
X, y = make_classification(
n_samples=1000,
n_classes=3,
Expand All @@ -374,17 +384,21 @@ def check_samplers_sample_indices(name, sampler):
assert not hasattr(sampler, "sample_indices_")


def check_classifier_on_multilabel_or_multioutput_targets(name, estimator):
def check_classifier_on_multilabel_or_multioutput_targets(
name, estimator_orig
):
estimator = clone(estimator_orig)
X, y = make_multilabel_classification(n_samples=30)
msg = "Multilabel and multioutput targets are not supported."
with pytest.raises(ValueError, match=msg):
estimator.fit(X, y)


def check_classifiers_with_encoded_labels(name, classifier):
def check_classifiers_with_encoded_labels(name, classifier_orig):
# Non-regression test for #709
# https://github.com/scikit-learn-contrib/imbalanced-learn/issues/709
pytest.importorskip("pandas")
classifier = clone(classifier_orig)
df, y = fetch_openml("iris", version=1, as_frame=True, return_X_y=True)
df, y = make_imbalance(
df, y, sampling_strategy={
Expand Down