Skip to content

Commit 992c271

Browse files
author
Thomas Bury
committed
update associations according to dython new syntax
1 parent 2210858 commit 992c271

File tree

4 files changed

+63
-54
lines changed

4 files changed

+63
-54
lines changed

arfs/featselect.py

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
# numpy and pandas for data manipulation
2626
import pandas as pd
2727
import numpy as np
28-
from dython.nominal import compute_associations
28+
from dython.nominal import associations
2929
# model used for feature importance, Shapley values are builtin
3030
import lightgbm as lgb
3131
from lightgbm import early_stopping
@@ -219,13 +219,16 @@ def plot_associations(df, features=None, size=1200, theil_u=False):
219219
# nominal features
220220
nom_features = set(features) - set(con_features)
221221

222-
assoc_df = compute_associations(df,
223-
nominal_columns=nom_features,
224-
mark_columns=True,
225-
theil_u=theil_u,
226-
clustering=True,
227-
bias_correction=True,
228-
nan_strategy='drop_samples')
222+
nom_nom_assoc = 'theil' if theil_u else 'cramer'
223+
assoc_df = associations(df,
224+
nominal_columns=nom_features,
225+
mark_columns=True,
226+
num_num_assoc='spearman',
227+
nom_nom_assoc=nom_nom_assoc,
228+
clustering=True,
229+
bias_correction=True,
230+
nan_strategy='drop_samples',
231+
compute_only=True)['corr']
229232

230233
heatmap = hv.HeatMap((assoc_df.columns, assoc_df.index, assoc_df)).redim.range(z=(-1, 1))
231234

@@ -743,14 +746,14 @@ def identify_collinear(self, correlation_threshold, encode=False, method='associ
743746
# nominal features
744747
nom_features = set(features) - set(con_features)
745748

746-
self.corr_matrix = compute_associations(self.data,
747-
nominal_columns=nom_features,
748-
mark_columns=True,
749-
num_num_assoc='spearman',
750-
nom_nom_assoc='theil',
751-
clustering=True,
752-
bias_correction=True,
753-
nan_strategy='drop_samples')
749+
self.corr_matrix = associations(self.data,
750+
nominal_columns=nom_features,
751+
mark_columns=True,
752+
num_num_assoc='spearman',
753+
nom_nom_assoc='theil',
754+
clustering=True,
755+
nan_strategy='drop_samples',
756+
compute_only=True)['corr']
754757

755758
upper = self.corr_matrix.where(np.triu(np.ones(self.corr_matrix.shape), k=1).astype(np.bool))
756759
to_drop = [column for column in upper.columns if
@@ -1271,7 +1274,7 @@ def plot_collinear(self, plot_all=False, size=1000):
12711274
panel_layout = pn.Column(
12721275
pn.pane.Markdown(title_str, align="start"), # bold
12731276
pn.pane.Markdown(sub_title_str, align="start"), # italic
1274-
heatmap, background='#ebebeb'
1277+
heatmap, background='#b3b3b3'
12751278
)
12761279

12771280
return panel_layout

changelog.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# Changes
22

3+
## 0.3.1
4+
5+
- Update the syntax for computing associations using the latest version of dython
36
## 0.3.0
47

58
- Fix the Boruta_py feature counts, now adds up to n_features

0 commit comments

Comments
 (0)