style: 💄 apply black

Thomas Bury · Thomas Bury · commit aac43e79ded1 · 2023-08-02T14:16:24.000+02:00
diff --git a/docs/Methods overview.rst b/docs/Methods overview.rst
@@ -62,7 +62,7 @@ BorutaPy vs. Boruta R:
     * Using either the native variable importance, scikit permutation importance, SHAP importance.
 
     We highly recommend using pruned trees with a depth between 3-7. For more, see the docs of these functions, and the examples below. Original code and method by: Miron B Kursa, https://m2.icm.edu.pl/boruta/
-
+    
 GrootCV, a new method
 ---------------------
 
@@ -84,9 +84,7 @@ Re-implementing the Uber MRmr scheme using associations for handling continuous
 Lasso
 -----
 
-Performing a simple grid search
-
- with enforced lasso regularization.
+Performing a simple grid search with enforced lasso regularization.
 The best model is chosen based on the minimum BIC or deviance score, and all non-zero coefficients are selected.
 The loss function can belong to the exponential family, as seen in the statsmodels GLM documentation.
 Using the bic metric is faster since it is evaluated on the training data, making it unsuitable for the test data, whereas the deviance is cross-validated.
diff --git a/src/arfs/association.py b/src/arfs/association.py
@@ -1436,7 +1436,8 @@ def _callable_association_matrix_fn(
 
 
 def f_oneway_weighted(*args):
-    """Calculate the weighted F-statistic for one-way ANOVA (continuous target, categorical predictor).
+    """
+    Calculate the weighted F-statistic for one-way ANOVA (continuous target, categorical predictor).
 
     Parameters
     ----------
@@ -1455,6 +1456,7 @@ def f_oneway_weighted(*args):
     Notes
     -----
     The F-statistic is calculated as:
+
     .. math::
         F(rf) = \\frac{\\sum_i (\\bar{Y}_{i \\bullet} - \\bar{Y})^2 / (K-1)}{\\sum_i \\sum_k (\\bar{Y}_{ij} - \\bar{Y}_{i\\bullet})^2 / (N - K)}
     """
@@ -1667,13 +1669,13 @@ def f_cont_regression_parallel(
 def f_stat_regression_parallel(
     X, y, sample_weight=None, n_jobs=-1, force_finite=True, handle_na="drop"
 ):
-    """f_stat_regression_parallel computes the weighted explained variance for the provided categorical
-    and numerical predictors using parallelization of the code.
+    """
+    Compute the weighted explained variance for the provided categorical and numerical predictors using parallelization.
 
     Parameters
     ----------
     X : array-like of shape (n_samples, n_features)
-        Predictor dataframe.
+        The predictor dataframe.
     y : array-like of shape (n_samples,)
         The target vector.
     sample_weight : array-like of shape (n_samples,), optional
@@ -1835,7 +1837,8 @@ def f_cat_classification_parallel(
     force_finite=True,
     handle_na="drop",
 ):
-    """Univariate information dependence.
+    """
+    Univariate information dependence.
 
     It ranks features in the same order if all the features are positively correlated with the target.
     Note that it is therefore recommended as a feature selection criterion to identify
@@ -1858,15 +1861,15 @@ def f_cat_classification_parallel(
         Whether or not to force the F-statistics and associated p-values to
         be finite. There are two cases where the F-statistic is expected to not
         be finite:
-        - when the target `y` or some features in `X` are constant. In this
-          case, the Pearson's R correlation is not defined leading to obtain
-          `np.nan` values in the F-statistic and p-value. When
-          `force_finite=True`, the F-statistic is set to `0.0` and the
-          associated p-value is set to `1.0`.
-        - when a feature in `X` is perfectly correlated (or
-          anti-correlated) with the target `y`. In this case, the F-statistic
-          is expected to be `np.inf`. When `force_finite=True`, the F-statistic
-          is set to `np.finfo(dtype).max`.
+            - when the target `y` or some features in `X` are constant. In this
+              case, the Pearson's R correlation is not defined leading to obtain
+              `np.nan` values in the F-statistic and p-value. When
+              `force_finite=True`, the F-statistic is set to `0.0` and the
+              associated p-value is set to `1.0`.
+            - when a feature in `X` is perfectly correlated (or
+              anti-correlated) with the target `y`. In this case, the F-statistic
+              is expected to be `np.inf`. When `force_finite=True`, the F-statistic
+              is set to `np.finfo(dtype).max`.
 
     Returns
     -------
@@ -1908,13 +1911,13 @@ def f_cat_classification_parallel(
 def f_stat_classification_parallel(
     X, y, sample_weight=None, n_jobs=-1, force_finite=True, handle_na="drop"
 ):
-    """f_stat_classification_parallel computes the weighted ANOVA F-value for the provided categorical
-    and numerical predictors using parallelization of the code.
+    """
+    Compute the weighted ANOVA F-value for the provided categorical and numerical predictors using parallelization.
 
     Parameters
     ----------
     X : array-like of shape (n_samples, n_features)
-        Predictor dataframe.
+        The predictor dataframe.
     y : array-like of shape (n_samples,)
         The target vector.
     sample_weight : array-like of shape (n_samples,), optional
@@ -2110,26 +2113,27 @@ def xy_to_matrix(xy):
 
 
 def cluster_sq_matrix(sq_matrix, method="ward"):
-    """cluster_sq_matrix applies agglomerative clustering in order to sort
-    a correlation matrix.
+    """
+    Apply agglomerative clustering to sort a square correlation matrix.
 
     Parameters
     ----------
     sq_matrix : pd.DataFrame
-        a square correlation matrix
+        A square correlation matrix.
     method : str, optional
-        linkage method, by default "ward"
+        The linkage method, by default "ward".
 
     Returns
     -------
     pd.DataFrame
-        a sorted square matrix
+        A sorted square matrix.
+
+    Example
+    -------
+    >>> from some_module import association_matrix, cluster_sq_matrix
 
-    Example:
-    --------
     >>> assoc = association_matrix(iris_df, plot=False)
     >>> assoc_clustered = cluster_sq_matrix(assoc, method="complete")
-
     """
     d = sch.distance.pdist(sq_matrix.values)
     L = sch.linkage(d, method=method)
diff --git a/src/arfs/feature_selection/__init__.py b/src/arfs/feature_selection/__init__.py
@@ -23,5 +23,5 @@
     "BoostAGroota",
     "GrootCV",
     "MinRedundancyMaxRelevance",
-    "LassoFeatureSelection"
+    "LassoFeatureSelection",
 ]
diff --git a/src/arfs/feature_selection/lasso.py b/src/arfs/feature_selection/lasso.py
@@ -89,17 +89,17 @@ def __init__(
         link:
             the GLM link function
         alpha :
-            The penalty weight. If a scalar, the same penalty weight applies to all variables in the model. 
+            The penalty weight. If a scalar, the same penalty weight applies to all variables in the model.
             If a vector, it must have the same length as params, and contains a penalty weight for each coefficient.
         L1_wt :
-            The `L1_wt` parameter represents the weight of the L1 penalty term in the model and 
-            should be within the range 0 to 1. A value of 0 corresponds to ridge regression, 
-            while a value of 1 corresponds to lasso regression. However, for obtaining statistics, 
-            `L1_wt` should be set to a value greater than 0. If it is set to 0.0, statsmodels returns 
-            a ridge regularized wrapper without refitting the model, making the statistics unavailable 
-            and breaking the class. Nevertheless, you can set `L1_wt` to a very small value, such as 1e-9, 
+            The `L1_wt` parameter represents the weight of the L1 penalty term in the model and
+            should be within the range 0 to 1. A value of 0 corresponds to ridge regression,
+            while a value of 1 corresponds to lasso regression. However, for obtaining statistics,
+            `L1_wt` should be set to a value greater than 0. If it is set to 0.0, statsmodels returns
+            a ridge regularized wrapper without refitting the model, making the statistics unavailable
+            and breaking the class. Nevertheless, you can set `L1_wt` to a very small value, such as 1e-9,
             to obtain close-to-ridge behavior while still obtaining the necessary statistics.
-            
+
         fit_intercept :
             Whether to fit an intercept term in the model.
         """
@@ -157,13 +157,13 @@ def fit(
         self : object
             Returns self.
         """
-        
+
         # see the if kwargs.get("L1_wt", 1) == 0 condition in
         # https://www.statsmodels.org/dev/_modules/statsmodels/genmod/generalized_linear_model.html#GLM.fit_regularized
         # workaround to get the statistics
         if self.alpha == 0.0:
             self.alpha = 1e-9
-        
+
         if not isinstance(X, pd.DataFrame):
             X = pd.DataFrame(X)
             X.columns = [f"pred_{i}" for i in range(X.shape[1])]
diff --git a/src/arfs/preprocessing.py b/src/arfs/preprocessing.py
@@ -42,6 +42,7 @@
 # fix random seed for reproducibility
 np.random.seed(7)
 
+
 class OrdinalEncoderPandas(OrdinalEncoder):
     # class OrdinalEncoderPandas(BaseEstimator, TransformerMixin):
     """Encode categorical features as an integer array and returns a pandas DF.
@@ -391,10 +392,11 @@ def cat_var(data, col_excl=None, return_cat=True):
 
 
 class TreeDiscretizer(BaseEstimator, TransformerMixin):
-    """The purpose of the function is to discretize continuous and/or categorical data, returning a pandas DataFrame.
-    It is designed to support regression and binary classification tasks. Discretization, also known as quantization or binning,
-    allows for the partitioning of continuous features into discrete values. In certain datasets with continuous attributes,
-    discretization can be beneficial as it transforms the dataset into one with only nominal attributes.
+    """
+    Discretize continuous and/or categorical data using univariate regularized trees, returning a pandas DataFrame.
+    The TreeDiscretizer is designed to support regression and binary classification tasks.
+    Discretization, also known as quantization or binning, allows for the partitioning of continuous features into discrete values.
+    In certain datasets with continuous attributes, discretization can be beneficial as it transforms the dataset into one with only nominal attributes.
     Additionally, for categorical predictors, grouping levels can help reduce overfitting and create meaningful clusters.
 
     By encoding discretized features, a model can become more expressive while maintaining interpretability.
@@ -502,22 +504,22 @@ def __init__(
         self.cat_features = None
 
     def fit(self, X, y, sample_weight=None):
-        """Fit the discretizer on `X`.
+        """
+        Fit the TreeDiscretizer on the input data.
 
         Parameters
         ----------
         X : array-like of shape (n_samples, n_features)
-            Input data with shape (n_samples, n_features), where `n_samples` is the number of samples and
-            `n_features` is the number of features.
+            The predictor dataframe.
         y : array-like of shape (n_samples,)
-            Target for internally fitting the tree(s).
+            The target vector.
         sample_weight : array-like of shape (n_samples,), optional
-            Sample weight (e.g., exposure) if any.
+            The weight vector, by default None.
 
         Returns
         -------
-        X : pd.DataFrame
-            DataFrame with the binned and grouped columns.
+        self : object
+            Returns self.
         """
         X = X.copy()
 
@@ -640,7 +642,8 @@ def fit(self, X, y, sample_weight=None):
         return self
 
     def transform(self, X):
-        """Apply the discretizer on `X`. Only the columns with more than n_bins_max unique values will be transformed.
+        """
+        Apply the discretizer on `X`. Only the columns with more than n_bins_max unique values will be transformed.
 
         Parameters
         ----------
@@ -690,7 +693,8 @@ def transform(self, X):
 
 
 def highlight_discarded(s):
-    """highlight X in red and V in green.
+    """
+    highlight X in red and V in green.
 
     Parameters
     ----------

Original file line number	Diff line number	Diff line change
`@@ -23,5 +23,5 @@`
`23`	`23`	`"BoostAGroota",`
`24`	`24`	`"GrootCV",`
`25`	`25`	`"MinRedundancyMaxRelevance",`
`26`		`- "LassoFeatureSelection"`
	`26`	`+ "LassoFeatureSelection",`
`27`	`27`	`]`