-
Notifications
You must be signed in to change notification settings - Fork 2
Open
Description
I have data with highly correlated variables and many of them are clustering together at height 0 at algorithms. The function hierarchical_importance does not work with this example.
Below I provide reproducible example.
library(dplyr)
library(triplot)
library(DALEX)
library(gbm)
## download https://github.com/woznicak/MetaFeaturesImpact/blob/master/summary_results_surrogate_models_rank_per_algo.Rd
## This is the Rdata object with list of explainers
summary_results <- readRDS('summary_results_surrogate_models_rank_per_algo.Rd')
explainer_gbm <- summary_results$explainer_GBM_deep[[11]]
tri_var_imp <- calculate_triplot(explainer_gbm,
data = explainer_gbm$data,
y = explainer_gbm$y,
new_observation = explainer_gbm$data[1,],
predict_function = sexplainer_gbm$predict_function)
I work around this problem and this is my fixing of function
hierarchical_importance <- function (x, data, y = NULL, predict_function = predict, type = "predict",
new_observation = NULL, N = 1000, loss_function = DALEX::loss_root_mean_square,
B = 10, fi_type = c("raw", "ratio", "difference"), clust_method = "complete",
cor_method = "spearman", ...)
{
if (all(type != "predict", is.null(y))) {
stop("Target is needed for hierarchical_importance calculated at model \n level")
}
fi_type <- match.arg(fi_type)
x_hc <- hclust(as.dist(1 - abs(cor(data, method = cor_method))),
method = clust_method)
cutting_heights <- x_hc$height
# aspects_list_previous <- list_variables(x_hc, 1)
aspects_list_previous <- as.list(colnames(data))
int_node_importance <- as.data.frame(NULL)
for (i in c(1:(length(cutting_heights) - 1))) {
aspects_list_current <- list_variables(x_hc, 1 - cutting_heights[i])
t1 <- match(aspects_list_current, setdiff(aspects_list_current,
aspects_list_previous))
for(k in na.omit(t1)){
t2 <- which(t1 == k)
t3 <- aspects_list_current[t2]
group_name <- names(t3)
if (type != "predict") {
explainer <- explain(model = x, data = data, y = y,
predict_function = predict_function, verbose = FALSE)
res_ai <- feature_importance(explainer = explainer,
variable_groups = aspects_list_current, N = N,
loss_function = loss_function, B = B, type = fi_type)
class(res_ai) <- c("model_parts" ,"feature_importance_explainer", class(res_ai))
res_ai <- res_ai[res_ai$permutation == "0", ]
int_node_importance[nrow(int_node_importance) + 1, 1] <- res_ai[res_ai$variable ==
group_name, ]$dropout_loss
}
else {
res_ai <- aspect_importance(x = x, data = data,
predict_function = predict_function, new_observation = new_observation,
variable_groups = aspects_list_current, N = N)
int_node_importance[nrow(int_node_importance) +1 , 1] <- res_ai[res_ai$variable_groups ==
group_name, ]$importance
}
int_node_importance[nrow(int_node_importance), 2] <- group_name
int_node_importance[nrow(int_node_importance), 3] <- cutting_heights[i]
}
aspects_list_previous <- aspects_list_current
}
if (type != "predict") {
res <- feature_importance(explainer = explainer, variable_groups = , N = N, loss_function = loss_function, B = B)
res <- res[res$permutation == "0", ]
baseline_val <- res[res$variable == "aspect.group1",
]$dropout_loss
int_node_importance[length(cutting_heights), 1] <- baseline_val
}
else {
int_node_importance[(nrow(int_node_importance)+1):length(cutting_heights), 1] <- NA
}
x_hc$height <- int_node_importance$V1
hi <- list(x_hc, type, new_observation)
class(hi) <- c("hierarchical_importance")
return(hi)
}
Metadata
Metadata
Assignees
Labels
No labels