|
25 | 25 | # numpy and pandas for data manipulation
|
26 | 26 | import pandas as pd
|
27 | 27 | import numpy as np
|
28 |
| -from dython.nominal import compute_associations |
| 28 | +from dython.nominal import associations |
29 | 29 | # model used for feature importance, Shapley values are builtin
|
30 | 30 | import lightgbm as lgb
|
31 | 31 | from lightgbm import early_stopping
|
@@ -219,13 +219,16 @@ def plot_associations(df, features=None, size=1200, theil_u=False):
|
219 | 219 | # nominal features
|
220 | 220 | nom_features = set(features) - set(con_features)
|
221 | 221 |
|
222 |
| - assoc_df = compute_associations(df, |
223 |
| - nominal_columns=nom_features, |
224 |
| - mark_columns=True, |
225 |
| - theil_u=theil_u, |
226 |
| - clustering=True, |
227 |
| - bias_correction=True, |
228 |
| - nan_strategy='drop_samples') |
| 222 | + nom_nom_assoc = 'theil' if theil_u else 'cramer' |
| 223 | + assoc_df = associations(df, |
| 224 | + nominal_columns=nom_features, |
| 225 | + mark_columns=True, |
| 226 | + num_num_assoc='spearman', |
| 227 | + nom_nom_assoc=nom_nom_assoc, |
| 228 | + clustering=True, |
| 229 | + bias_correction=True, |
| 230 | + nan_strategy='drop_samples', |
| 231 | + compute_only=True)['corr'] |
229 | 232 |
|
230 | 233 | heatmap = hv.HeatMap((assoc_df.columns, assoc_df.index, assoc_df)).redim.range(z=(-1, 1))
|
231 | 234 |
|
@@ -743,14 +746,14 @@ def identify_collinear(self, correlation_threshold, encode=False, method='associ
|
743 | 746 | # nominal features
|
744 | 747 | nom_features = set(features) - set(con_features)
|
745 | 748 |
|
746 |
| - self.corr_matrix = compute_associations(self.data, |
747 |
| - nominal_columns=nom_features, |
748 |
| - mark_columns=True, |
749 |
| - num_num_assoc='spearman', |
750 |
| - nom_nom_assoc='theil', |
751 |
| - clustering=True, |
752 |
| - bias_correction=True, |
753 |
| - nan_strategy='drop_samples') |
| 749 | + self.corr_matrix = associations(self.data, |
| 750 | + nominal_columns=nom_features, |
| 751 | + mark_columns=True, |
| 752 | + num_num_assoc='spearman', |
| 753 | + nom_nom_assoc='theil', |
| 754 | + clustering=True, |
| 755 | + nan_strategy='drop_samples', |
| 756 | + compute_only=True)['corr'] |
754 | 757 |
|
755 | 758 | upper = self.corr_matrix.where(np.triu(np.ones(self.corr_matrix.shape), k=1).astype(np.bool))
|
756 | 759 | to_drop = [column for column in upper.columns if
|
@@ -1271,7 +1274,7 @@ def plot_collinear(self, plot_all=False, size=1000):
|
1271 | 1274 | panel_layout = pn.Column(
|
1272 | 1275 | pn.pane.Markdown(title_str, align="start"), # bold
|
1273 | 1276 | pn.pane.Markdown(sub_title_str, align="start"), # italic
|
1274 |
| - heatmap, background='#ebebeb' |
| 1277 | + heatmap, background='#b3b3b3' |
1275 | 1278 | )
|
1276 | 1279 |
|
1277 | 1280 | return panel_layout
|
|
0 commit comments