fix tests to more consistent standard.

ing-bank · ReinierKoops · Apr 11, 2024 · Mar 7, 2024 · Mar 7, 2024 · Mar 7, 2024
commit 6b71074a9424bd66ba8324b9c2d32cec12ed23ee
diff --git a/probatus/feature_elimination/feature_elimination.py b/probatus/feature_elimination/feature_elimination.py
@@ -7,6 +7,7 @@
 from sklearn.base import clone, is_classifier, is_regressor
 from sklearn.model_selection import check_cv
 from sklearn.model_selection._search import BaseSearchCV
+from loguru import logger
 
 from probatus.utils import (
     BaseFitComputePlotClass,
@@ -156,9 +157,8 @@ def __init__(
                 Controls verbosity of the output:
 
                 - 0 - neither prints nor warnings are shown
-                - 1 - 50 - only most important warnings
-                - 51 - 100 - shows other warnings and prints
-                - above 100 - presents all prints and all warnings (including SHAP warnings).
+                - 1 - only most important warnings
+                - 2 - shows all prints and all warnings.
 
             random_state (int, optional):
                 Random state set at each round of feature elimination. If it is None, the results will not be
@@ -537,7 +537,7 @@ def fit(
             self.min_features_to_select = 0
             # This ensures that, if columns_to_keep is provided ,
             # the last features remaining are only the columns_to_keep.
-            if self.verbose > 50:
+            if self.verbose > 1:
                 warnings.warn(f"Minimum features to select : {stopping_criteria}")
 
         while len(current_features_set) > stopping_criteria:
@@ -615,8 +615,8 @@ def fit(
                 val_metric_mean=np.mean(scores_val),
                 val_metric_std=np.std(scores_val),
             )
-            if self.verbose > 50:
-                print(
+            if self.verbose > 1:
+                logger.info(
                     f"Round: {round_number}, Current number of features: {len(current_features_set)}, "
                     f'Current performance: Train {self.report_df.loc[round_number]["train_metric_mean"]} '
                     f'+/- {self.report_df.loc[round_number]["train_metric_std"]}, CV Validation '
@@ -841,8 +841,8 @@ def _get_best_num_features(self, best_method, standard_error_threshold=1.0):
             )
 
         # Log shap_report for users who want to inspect / debug
-        if self.verbose > 50:
-            print(shap_report)
+        if self.verbose > 1:
+            logger.info(shap_report)
 
         return best_num_features
 
@@ -1110,10 +1110,9 @@ def __init__(
             verbose (int, optional):
                 Controls verbosity of the output:
 
-                - 0 - nether prints nor warnings are shown
-                - 1 - 50 - only most important warnings
-                - 51 - 100 - shows other warnings and prints
-                - above 100 - presents all prints and all warnings (including SHAP warnings).
+                - 0 - neither prints nor warnings are shown
+                - 1 - only most important warnings
+                - 2 - shows all prints and all warnings.
 
             random_state (int, optional):
                 Random state set at each round of feature elimination. If it is None, the results will not be
@@ -1210,7 +1209,8 @@ def _get_fit_params_lightGBM(
             "eval_set": [(X_val, y_val)],
             "callbacks": [early_stopping(self.early_stopping_rounds, first_metric_only=True)],
         }
-        if self.verbose >= 100:
+
+        if self.verbose >= 2:
             fit_params["callbacks"].append(log_evaluation(1))
         else:
             fit_params["callbacks"].append(log_evaluation(0))

diff --git a/probatus/interpret/model_interpret.py b/probatus/interpret/model_interpret.py
@@ -98,9 +98,8 @@ def __init__(self, clf, scoring="roc_auc", verbose=0, random_state=None):
                 Controls verbosity of the output:
 
                 - 0 - neither prints nor warnings are shown
-                - 1 - 50 - only most important warnings
-                - 51 - 100 - shows other warnings and prints
-                - above 100 - presents all prints and all warnings (including SHAP warnings).
+                - 1 - only most important warnings
+                - 2 - shows all prints and all warnings.
 
             random_state (int, optional):
                 Random state set for the nr of samples. If it is None, the results will not be reproducible. For

diff --git a/probatus/interpret/shap_dependence.py b/probatus/interpret/shap_dependence.py
@@ -64,9 +64,8 @@ def __init__(self, clf, verbose=0, random_state=None):
                 Controls verbosity of the output:
 
                 - 0 - neither prints nor warnings are shown
-                - 1 - 50 - only most important warnings regarding data properties are shown (excluding SHAP warnings)
-                - 51 - 100 - shows most important warnings, prints of the feature removal process
-                - above 100 - presents all prints and all warnings (including SHAP warnings).
+                - 1 - only most important warnings
+                - 2 - shows all prints and all warnings.
 
             random_state (int, optional):
                 Random state set for the nr of samples. If it is None, the results will not be reproducible. For

diff --git a/probatus/sample_similarity/resemblance_model.py b/probatus/sample_similarity/resemblance_model.py
@@ -21,6 +21,7 @@
 import warnings
 
 import matplotlib.pyplot as plt
+from loguru import logger
 import numpy as np
 import pandas as pd
 from shap import summary_plot
@@ -76,9 +77,8 @@ class is 'roc_auc'.
                 Controls verbosity of the output:
 
                 - 0 - neither prints nor warnings are shown
-                - 1 - 50 - only most important warnings
-                - 51 - 100 - shows other warnings and prints
-                - above 100 - presents all prints and all warnings (including SHAP warnings).
+                - 1 - only most important warnings
+                - 2 - shows all prints and all warnings.
 
             random_state (int, optional):
                 Random state set at each round of feature elimination. If it is None, the results will not be
@@ -178,8 +178,8 @@ def fit(self, X1, X2, column_names=None, class_names=None):
             f"Train {self.scorer.metric_name}: {np.round(self.train_score, 3)},\n"
             f"Test {self.scorer.metric_name}: {np.round(self.test_score, 3)}."
         )
-        if self.verbose > 50:
-            print(f"Finished model training: \n{self.results_text}")
+        if self.verbose > 1:
+            logger.info(f"Finished model training: \n{self.results_text}")
 
         if self.verbose > 0:
             if self.train_score > self.test_score:
@@ -343,9 +343,8 @@ class is 'roc_auc'.
                 Controls verbosity of the output:
 
                 - 0 - neither prints nor warnings are shown
-                - 1 - 50 - only most important warnings
-                - 51 - 100 - shows other warnings and prints
-                - above 100 - presents all prints and all warnings (including SHAP warnings).
+                - 1 - only most important warnings
+                - 2 - shows all prints and all warnings.
 
             random_state (int, optional):
                 Random state set at each round of feature elimination. If it is None, the results will not be
@@ -572,9 +571,8 @@ class is 'roc_auc'.
                 Controls verbosity of the output:
 
                 - 0 - neither prints nor warnings are shown
-                - 1 - 50 - only most important warnings
-                - 51 - 100 - shows other warnings and prints
-                - above 100 - presents all prints and all warnings (including SHAP warnings).
+                - 1 - only most important warnings
+                - 2 - shows all prints and all warnings.
 
             random_state (int, optional):
                 Random state set at each round of feature elimination. If it is None, the results will not be

diff --git a/probatus/utils/arrayfuncs.py b/probatus/utils/arrayfuncs.py
@@ -189,9 +189,9 @@ def preprocess_data(X, X_name=None, column_names=None, verbose=0):
             Controls verbosity of the output:
 
             - 0 - neither prints nor warnings are shown
-            - 1 - 50 - only most important warnings regarding data properties are shown (excluding SHAP warnings)
-            - 51 - 100 - shows most important warnings, prints of the feature removal process
-            - above 100 - presents all prints and all warnings (including SHAP warnings).
+            - 1 - only most important warnings
+            - 2 - shows all prints and all warnings.
+
 
     Returns:
         (pd.DataFrame):
@@ -255,9 +255,8 @@ def preprocess_labels(y, y_name=None, index=None, verbose=0):
         Controls verbosity of the output:
 
         - 0 - neither prints nor warnings are shown
-        - 1 - 50 - only most important warnings regarding data properties are shown (excluding SHAP warnings)
-        - 51 - 100 - shows most important warnings, prints of the feature removal process
-        - above 100 - presents all prints and all warnings (including SHAP warnings).
+        - 1 - only most important warnings
+        - 2 - shows all prints and all warnings.
 
     Returns:
         (pd.Series):

diff --git a/probatus/utils/shap_helpers.py b/probatus/utils/shap_helpers.py
@@ -55,10 +55,9 @@ def shap_calc(
         verbose (int, optional):
             Controls verbosity of the output:
 
-            - 0 - nether prints nor warnings are shown
-            - 1 - 50 - only most important warnings
-            - 51 - 100 - shows other warnings and prints
-            - above 100 - presents all prints and all warnings (including SHAP warnings).
+            - 0 - neither prints nor warnings are shown
+            - 1 - only most important warnings
+            - 2 - shows all prints and all warnings.
 
         random_state (int, optional):
             Random state set for the nr of samples. If it is None, the results will not be reproducible. For
@@ -87,7 +86,7 @@ def shap_calc(
         )
     # Suppress warnings regarding XGboost and Lightgbm models.
     with warnings.catch_warnings():
-        if verbose <= 100:
+        if verbose <= 1:
             warnings.simplefilter("ignore")
 
         # For tree explainers, do not pass masker when feature_perturbation is

diff --git a/pyproject.toml b/pyproject.toml
@@ -37,6 +37,7 @@ dependencies = [
     "shap>=0.43.0 ; python_version != '3.8'",
     "numpy>=1.23.2",
     "numba>=0.57.0",
+    "loguru>=0.7.2",
 ]
 
 [project.urls]

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -5,6 +5,51 @@
 import pytest
 from sklearn.datasets import make_classification
 from sklearn.model_selection import train_test_split
+from sklearn.tree import DecisionTreeClassifier
+from catboost import CatBoostClassifier
+import lightgbm
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import RandomizedSearchCV
+
+
+@pytest.fixture(scope="function")
+def random_state():
+    """
+    Fixture to automatically provide a random state.
+    """
+    RANDOM_STATE = 0
+
+    return RANDOM_STATE
+
+
+@pytest.fixture(scope="function")
+def random_state_42():
+    """
+    Fixture to automatically provide a random state.
+    """
+    RANDOM_STATE = 42
+
+    return RANDOM_STATE
+
+
+@pytest.fixture(scope="function")
+def random_state_1234():
+    """
+    Fixture to automatically provide a random state.
+    """
+    RANDOM_STATE = 1234
+
+    return RANDOM_STATE
+
+
+@pytest.fixture(scope="function")
+def random_state_1():
+    """
+    Fixture to automatically provide a random state.
+    """
+    RANDOM_STATE = 1
+
+    return RANDOM_STATE
 
 
 @pytest.fixture(scope="function")
@@ -16,7 +61,7 @@ def mock_model():
 
 
 @pytest.fixture(scope="function")
-def complex_data():
+def complex_data(random_state):
     """
     Fixture.
     """
@@ -29,7 +74,7 @@ def complex_data():
         class_sep=0.05,
         n_informative=2,
         n_features=5,
-        random_state=0,
+        random_state=random_state,
         n_redundant=2,
         n_clusters_per_class=1,
     )
@@ -40,23 +85,20 @@ def complex_data():
 
 
 @pytest.fixture(scope="function")
-def complex_data_split(complex_data):
+def complex_data_split(complex_data, random_state_42):
     """
     Fixture.
     """
     X, y = complex_data
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_state_42)
     return X_train, X_test, y_train, y_test
 
 
 @pytest.fixture(scope="function")
-def complex_lightgbm():
-    """
-    Fixture.
-    """
-    import lightgbm
-
-    return lightgbm.LGBMClassifier(max_depth=5, num_leaves=11, class_weight="balanced", random_state=42)
+def complex_lightgbm(random_state_42):
+    """This fixture allows to reuse the import of the LGBMClassifier class across different tests."""
+    model = lightgbm.LGBMClassifier(max_depth=5, num_leaves=11, class_weight="balanced", random_state=random_state_42)
+    return model
 
 
 @pytest.fixture(scope="function")
@@ -68,3 +110,32 @@ def complex_fitted_lightgbm(complex_data_split, complex_lightgbm):
     X_train["f1_categorical"] = X_train["f1_categorical"].astype("category")
 
     return complex_lightgbm.fit(X_train, y_train)
+
+
+@pytest.fixture(scope="function")
+def catboost_classifier(random_state):
+    """This fixture allows to reuse the import of the CatboostClassifier class across different tests."""
+    model = CatBoostClassifier(random_seed=random_state)
+    return model
+
+
+@pytest.fixture(scope="function")
+def decision_tree_classifier(random_state):
+    """This fixture allows to reuse the import of the DecisionTreeClassifier class across different tests."""
+    model = DecisionTreeClassifier(max_depth=1, random_state=random_state)
+    return model
+
+
+@pytest.fixture(scope="function")
+def randomized_search_decision_tree_classifier(decision_tree_classifier, random_state):
+    """This fixture allows to reuse the import of the DecisionTreeClassifier in combination with a new CV class across different tests."""
+    param_grid = {"criterion": ["gini"], "min_samples_split": [1, 2]}
+    cv = RandomizedSearchCV(decision_tree_classifier, param_grid, cv=2, n_iter=2, random_state=random_state)
+    return cv
+
+
+@pytest.fixture(scope="function")
+def logistic_regression(random_state):
+    """This fixture allows to reuse the import of the DecisionTreeClassifier class across different tests."""
+    model = LogisticRegression(random_state=random_state)
+    return model