Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Explicit support for Regression, performed major refactoring of tests, removed unused code and updated notebooks to work (again). #248

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
dc327f9
simplify mkdocs config
Mar 7, 2024
0935989
update to version 2.1.2
Mar 7, 2024
6c1755f
update precommit
Mar 7, 2024
1a4cecf
remove functionality which is done better in other libraries (Skoreca…
Mar 14, 2024
35a9401
update version & nb
Mar 14, 2024
2ae1b7f
update ruff config
Mar 14, 2024
e785c6a
downgrade ruff
Mar 14, 2024
fa4d975
downgrade ruff p2
Mar 14, 2024
7770889
revert back downgrade - its an image issue
Mar 14, 2024
71c4925
add no-cache option and update readme
Mar 14, 2024
0f474f0
add no-cache option to other as well
Mar 14, 2024
d779c61
remove shap inspector
Mar 15, 2024
54ae4ba
update documentation
Mar 17, 2024
ea72e62
remove image
Mar 17, 2024
b688a60
allow for python version 3.12 and fix the bug for upgrading to shap 0…
Mar 17, 2024
0a79441
Merge branch 'main' into add_compatibility_p312
Mar 17, 2024
3b930fc
Update pre-commit
Mar 17, 2024
92ee361
remove import
Mar 17, 2024
a37a8d8
fix dependency of shap
Mar 17, 2024
1b453b8
fix file
Mar 17, 2024
ce185be
fix for python v 3.8
Mar 17, 2024
aec0e80
removal of leftover references
Mar 17, 2024
90f2794
add explicit state setting
Mar 18, 2024
75d3fd3
another random state found to be added
Mar 18, 2024
4271cf7
Merge branch 'add_compatibility_p312' into set_random_state_explicit
Mar 18, 2024
1286eed
fix tests
Mar 18, 2024
6b71074
fix tests to more consistent standard.
Mar 18, 2024
7d9d466
Merge branch 'main' into set_random_state_explicit
Mar 18, 2024
22e17c2
major test refactor
Mar 19, 2024
1371d5d
Merge remote-tracking branch 'origin/set_random_state_explicit' into …
Mar 19, 2024
fb3a33e
fix many things
Mar 20, 2024
cdc6b88
update readme
Mar 20, 2024
108db45
update cronjob
Mar 20, 2024
351e4f9
update copyright
Mar 20, 2024
cb1b3ef
change version from 3.0.1 to 3.1.0 since the changes are a bit more t…
Mar 20, 2024
b6bf310
change cronjob
Mar 20, 2024
0be6f3f
fix nb run flag
Mar 20, 2024
4a0c9b3
remove debug file
Mar 20, 2024
fe97bf2
Merge branch 'main' into fixes_and_add_explicit_multi_and_regression
Mar 26, 2024
c1285c3
Add explicit state setting (#242)
Mar 28, 2024
77f303f
Update catboost requirement (#254)
dependabot[bot] Mar 28, 2024
bccc06e
rebase master
Mar 28, 2024
18db9d9
update version & nb
Mar 14, 2024
1e9988e
rebase
Mar 28, 2024
b576151
fix tests to more consistent standard.
Mar 18, 2024
0f2b816
major test refactor
Mar 19, 2024
036afa2
fix many thigns
Mar 28, 2024
3ad20b9
update readme
Mar 20, 2024
085c316
update cronjob
Mar 20, 2024
42577ac
update copyright
Mar 20, 2024
5475447
change version from 3.0.1 to 3.1.0 since the changes are a bit more t…
Mar 20, 2024
38edbc7
change cronjob
Mar 20, 2024
ecf4647
fix nb run flag
Mar 20, 2024
cdda30c
remove debug file
Mar 20, 2024
6d2c39a
rebase master
Mar 28, 2024
37808de
Merge branch 'fixes_and_add_explicit_multi_and_regression' of https:/…
Mar 28, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix tests to more consistent standard.
  • Loading branch information
Reinier Koops committed Mar 18, 2024
commit 6b71074a9424bd66ba8324b9c2d32cec12ed23ee
26 changes: 13 additions & 13 deletions probatus/feature_elimination/feature_elimination.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from sklearn.base import clone, is_classifier, is_regressor
from sklearn.model_selection import check_cv
from sklearn.model_selection._search import BaseSearchCV
from loguru import logger

from probatus.utils import (
BaseFitComputePlotClass,
Expand Down Expand Up @@ -156,9 +157,8 @@ def __init__(
Controls verbosity of the output:

- 0 - neither prints nor warnings are shown
- 1 - 50 - only most important warnings
- 51 - 100 - shows other warnings and prints
- above 100 - presents all prints and all warnings (including SHAP warnings).
- 1 - only most important warnings
- 2 - shows all prints and all warnings.

random_state (int, optional):
Random state set at each round of feature elimination. If it is None, the results will not be
Expand Down Expand Up @@ -537,7 +537,7 @@ def fit(
self.min_features_to_select = 0
# This ensures that, if columns_to_keep is provided ,
# the last features remaining are only the columns_to_keep.
if self.verbose > 50:
if self.verbose > 1:
warnings.warn(f"Minimum features to select : {stopping_criteria}")

while len(current_features_set) > stopping_criteria:
Expand Down Expand Up @@ -615,8 +615,8 @@ def fit(
val_metric_mean=np.mean(scores_val),
val_metric_std=np.std(scores_val),
)
if self.verbose > 50:
print(
if self.verbose > 1:
logger.info(
f"Round: {round_number}, Current number of features: {len(current_features_set)}, "
f'Current performance: Train {self.report_df.loc[round_number]["train_metric_mean"]} '
f'+/- {self.report_df.loc[round_number]["train_metric_std"]}, CV Validation '
Expand Down Expand Up @@ -841,8 +841,8 @@ def _get_best_num_features(self, best_method, standard_error_threshold=1.0):
)

# Log shap_report for users who want to inspect / debug
if self.verbose > 50:
print(shap_report)
if self.verbose > 1:
logger.info(shap_report)

return best_num_features

Expand Down Expand Up @@ -1110,10 +1110,9 @@ def __init__(
verbose (int, optional):
Controls verbosity of the output:

- 0 - nether prints nor warnings are shown
- 1 - 50 - only most important warnings
- 51 - 100 - shows other warnings and prints
- above 100 - presents all prints and all warnings (including SHAP warnings).
- 0 - neither prints nor warnings are shown
- 1 - only most important warnings
- 2 - shows all prints and all warnings.

random_state (int, optional):
Random state set at each round of feature elimination. If it is None, the results will not be
Expand Down Expand Up @@ -1210,7 +1209,8 @@ def _get_fit_params_lightGBM(
"eval_set": [(X_val, y_val)],
"callbacks": [early_stopping(self.early_stopping_rounds, first_metric_only=True)],
}
if self.verbose >= 100:

if self.verbose >= 2:
fit_params["callbacks"].append(log_evaluation(1))
else:
fit_params["callbacks"].append(log_evaluation(0))
Expand Down
5 changes: 2 additions & 3 deletions probatus/interpret/model_interpret.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,8 @@ def __init__(self, clf, scoring="roc_auc", verbose=0, random_state=None):
Controls verbosity of the output:

- 0 - neither prints nor warnings are shown
- 1 - 50 - only most important warnings
- 51 - 100 - shows other warnings and prints
- above 100 - presents all prints and all warnings (including SHAP warnings).
- 1 - only most important warnings
- 2 - shows all prints and all warnings.

random_state (int, optional):
Random state set for the nr of samples. If it is None, the results will not be reproducible. For
Expand Down
5 changes: 2 additions & 3 deletions probatus/interpret/shap_dependence.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,8 @@ def __init__(self, clf, verbose=0, random_state=None):
Controls verbosity of the output:

- 0 - neither prints nor warnings are shown
- 1 - 50 - only most important warnings regarding data properties are shown (excluding SHAP warnings)
- 51 - 100 - shows most important warnings, prints of the feature removal process
- above 100 - presents all prints and all warnings (including SHAP warnings).
- 1 - only most important warnings
- 2 - shows all prints and all warnings.

random_state (int, optional):
Random state set for the nr of samples. If it is None, the results will not be reproducible. For
Expand Down
20 changes: 9 additions & 11 deletions probatus/sample_similarity/resemblance_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import warnings

import matplotlib.pyplot as plt
from loguru import logger
import numpy as np
import pandas as pd
from shap import summary_plot
Expand Down Expand Up @@ -76,9 +77,8 @@ class is 'roc_auc'.
Controls verbosity of the output:

- 0 - neither prints nor warnings are shown
- 1 - 50 - only most important warnings
- 51 - 100 - shows other warnings and prints
- above 100 - presents all prints and all warnings (including SHAP warnings).
- 1 - only most important warnings
- 2 - shows all prints and all warnings.

random_state (int, optional):
Random state set at each round of feature elimination. If it is None, the results will not be
Expand Down Expand Up @@ -178,8 +178,8 @@ def fit(self, X1, X2, column_names=None, class_names=None):
f"Train {self.scorer.metric_name}: {np.round(self.train_score, 3)},\n"
f"Test {self.scorer.metric_name}: {np.round(self.test_score, 3)}."
)
if self.verbose > 50:
print(f"Finished model training: \n{self.results_text}")
if self.verbose > 1:
logger.info(f"Finished model training: \n{self.results_text}")

if self.verbose > 0:
if self.train_score > self.test_score:
Expand Down Expand Up @@ -343,9 +343,8 @@ class is 'roc_auc'.
Controls verbosity of the output:

- 0 - neither prints nor warnings are shown
- 1 - 50 - only most important warnings
- 51 - 100 - shows other warnings and prints
- above 100 - presents all prints and all warnings (including SHAP warnings).
- 1 - only most important warnings
- 2 - shows all prints and all warnings.

random_state (int, optional):
Random state set at each round of feature elimination. If it is None, the results will not be
Expand Down Expand Up @@ -572,9 +571,8 @@ class is 'roc_auc'.
Controls verbosity of the output:

- 0 - neither prints nor warnings are shown
- 1 - 50 - only most important warnings
- 51 - 100 - shows other warnings and prints
- above 100 - presents all prints and all warnings (including SHAP warnings).
- 1 - only most important warnings
- 2 - shows all prints and all warnings.

random_state (int, optional):
Random state set at each round of feature elimination. If it is None, the results will not be
Expand Down
11 changes: 5 additions & 6 deletions probatus/utils/arrayfuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,9 +189,9 @@ def preprocess_data(X, X_name=None, column_names=None, verbose=0):
Controls verbosity of the output:

- 0 - neither prints nor warnings are shown
- 1 - 50 - only most important warnings regarding data properties are shown (excluding SHAP warnings)
- 51 - 100 - shows most important warnings, prints of the feature removal process
- above 100 - presents all prints and all warnings (including SHAP warnings).
- 1 - only most important warnings
- 2 - shows all prints and all warnings.


Returns:
(pd.DataFrame):
Expand Down Expand Up @@ -255,9 +255,8 @@ def preprocess_labels(y, y_name=None, index=None, verbose=0):
Controls verbosity of the output:

- 0 - neither prints nor warnings are shown
- 1 - 50 - only most important warnings regarding data properties are shown (excluding SHAP warnings)
- 51 - 100 - shows most important warnings, prints of the feature removal process
- above 100 - presents all prints and all warnings (including SHAP warnings).
- 1 - only most important warnings
- 2 - shows all prints and all warnings.

Returns:
(pd.Series):
Expand Down
9 changes: 4 additions & 5 deletions probatus/utils/shap_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,9 @@ def shap_calc(
verbose (int, optional):
Controls verbosity of the output:

- 0 - nether prints nor warnings are shown
- 1 - 50 - only most important warnings
- 51 - 100 - shows other warnings and prints
- above 100 - presents all prints and all warnings (including SHAP warnings).
- 0 - neither prints nor warnings are shown
- 1 - only most important warnings
- 2 - shows all prints and all warnings.

random_state (int, optional):
Random state set for the nr of samples. If it is None, the results will not be reproducible. For
Expand Down Expand Up @@ -87,7 +86,7 @@ def shap_calc(
)
# Suppress warnings regarding XGboost and Lightgbm models.
with warnings.catch_warnings():
if verbose <= 100:
if verbose <= 1:
warnings.simplefilter("ignore")

# For tree explainers, do not pass masker when feature_perturbation is
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ dependencies = [
"shap>=0.43.0 ; python_version != '3.8'",
"numpy>=1.23.2",
"numba>=0.57.0",
"loguru>=0.7.2",
]

[project.urls]
Expand Down
93 changes: 82 additions & 11 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,51 @@
import pytest
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from catboost import CatBoostClassifier
import lightgbm
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import RandomizedSearchCV


@pytest.fixture(scope="function")
def random_state():
"""
Fixture to automatically provide a random state.
"""
RANDOM_STATE = 0

return RANDOM_STATE


@pytest.fixture(scope="function")
def random_state_42():
"""
Fixture to automatically provide a random state.
"""
RANDOM_STATE = 42

return RANDOM_STATE


@pytest.fixture(scope="function")
def random_state_1234():
"""
Fixture to automatically provide a random state.
"""
RANDOM_STATE = 1234

return RANDOM_STATE


@pytest.fixture(scope="function")
def random_state_1():
"""
Fixture to automatically provide a random state.
"""
RANDOM_STATE = 1

return RANDOM_STATE


@pytest.fixture(scope="function")
Expand All @@ -16,7 +61,7 @@ def mock_model():


@pytest.fixture(scope="function")
def complex_data():
def complex_data(random_state):
"""
Fixture.
"""
Expand All @@ -29,7 +74,7 @@ def complex_data():
class_sep=0.05,
n_informative=2,
n_features=5,
random_state=0,
random_state=random_state,
n_redundant=2,
n_clusters_per_class=1,
)
Expand All @@ -40,23 +85,20 @@ def complex_data():


@pytest.fixture(scope="function")
def complex_data_split(complex_data):
def complex_data_split(complex_data, random_state_42):
"""
Fixture.
"""
X, y = complex_data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_state_42)
return X_train, X_test, y_train, y_test


@pytest.fixture(scope="function")
def complex_lightgbm():
"""
Fixture.
"""
import lightgbm

return lightgbm.LGBMClassifier(max_depth=5, num_leaves=11, class_weight="balanced", random_state=42)
def complex_lightgbm(random_state_42):
"""This fixture allows to reuse the import of the LGBMClassifier class across different tests."""
model = lightgbm.LGBMClassifier(max_depth=5, num_leaves=11, class_weight="balanced", random_state=random_state_42)
return model


@pytest.fixture(scope="function")
Expand All @@ -68,3 +110,32 @@ def complex_fitted_lightgbm(complex_data_split, complex_lightgbm):
X_train["f1_categorical"] = X_train["f1_categorical"].astype("category")

return complex_lightgbm.fit(X_train, y_train)


@pytest.fixture(scope="function")
def catboost_classifier(random_state):
"""This fixture allows to reuse the import of the CatboostClassifier class across different tests."""
model = CatBoostClassifier(random_seed=random_state)
return model


@pytest.fixture(scope="function")
def decision_tree_classifier(random_state):
"""This fixture allows to reuse the import of the DecisionTreeClassifier class across different tests."""
model = DecisionTreeClassifier(max_depth=1, random_state=random_state)
return model


@pytest.fixture(scope="function")
def randomized_search_decision_tree_classifier(decision_tree_classifier, random_state):
"""This fixture allows to reuse the import of the DecisionTreeClassifier in combination with a new CV class across different tests."""
param_grid = {"criterion": ["gini"], "min_samples_split": [1, 2]}
cv = RandomizedSearchCV(decision_tree_classifier, param_grid, cv=2, n_iter=2, random_state=random_state)
return cv


@pytest.fixture(scope="function")
def logistic_regression(random_state):
"""This fixture allows to reuse the import of the DecisionTreeClassifier class across different tests."""
model = LogisticRegression(random_state=random_state)
return model
Loading