## ----installation, eval=FALSE------------------------------------------------- # if (!require("BiocManager")) { # install.packages("BiocManager") # } # BiocManager::install('XAItest') ## ----scenarios---------------------------------------------------------------- library(XAItest) library(gridExtra) library(ggplot2) set.seed(12) # Scenario 1: Variance Difference df1 <- genScenario(1) # Scenario 2: Bimodal Distribution df2 <- genScenario(2) # Scenario 3: XOR Interaction df3 <- genScenario(3) # Scenario 4: Concentric Circles df4 <- genScenario(4) # Scenario 5: Parabolic Relationship df5 <- genScenario(5) # Scenario 6: Rising Sinusoid df6 <- genScenario(6) # Create plots for each scenario p1 <- ggplot(df1, aes(x = norm_noise01, y = diffVar01, color = y)) + geom_point() + scale_color_manual(values = c("orange", "purple")) + ggtitle("Scenario 1:\nVariance Difference") p2 <- ggplot(df2, aes(x = norm_noise01, y = biDistrib, color = y)) + geom_point() + scale_color_manual(values = c("orange", "purple")) + ggtitle("Scenario 2:\nBimodal Distribution") p3 <- ggplot(df3, aes(x = relVar01, y = relVar02, color = y)) + geom_point() + scale_color_manual(values = c("orange", "purple")) + ggtitle("Scenario 3:\nXOR Interaction") p4 <- ggplot(df4, aes(x = relVar01, y = relVar02, color = y)) + geom_point() + scale_color_manual(values = c("orange", "purple")) + ggtitle("Scenario 4:\nConcentric Circles") p5 <- ggplot(df5, aes(x = norm_noise01, y = y)) + geom_point() + ggtitle("Scenario 5:\nParabolic Relationship") p6 <- ggplot(df6, aes(x = unif_noise01, y = y)) + geom_point() + ggtitle("Scenario 6:\nRising Sinusoid") # Load required library for grid arrangement # Arrange the plots in a 2x3 grid with width twice the height grid.arrange(p1, p2, p3, p4, p5, p6, ncol = 3, nrow = 2, widths = rep(2, 3), heights = rep(1, 2)) ## ----addSimThresh------------------------------------------------------------- set.seed(1) # Add the simThresh variable to achieve a p-value = 0.001 between A and B df_st <- addSimThresh(df1, pval_target = 0.001) df_st[c(1:2,50:51),c(1:3,(ncol(df_st)-2):ncol(df_st))] ## ----check pvalue------------------------------------------------------------- grp_A <- df_st$simThresh[df_st$y == "A"] grp_B <- df_st$simThresh[df_st$y == "B"] tt <- t.test(grp_A, grp_B) tt$p.value ## ----build dt----------------------------------------------------------------- library(rpart) fit <- rpart(y ~ ., data = df_st, method = "class") imp_raw <- fit$variable.importance # Significance threshold fi_sim <- imp_raw[["simThresh"]] fi_sim ## ----detect features---------------------------------------------------------- # Feature Importance imp_sorted <- sort(imp_raw, decreasing = TRUE) imp_sorted # Features above the threshold vars_above <- setdiff(names(imp_sorted[imp_sorted > fi_sim]), "simThresh") vars_above ## ----addSimThresh regress----------------------------------------------------- set.seed(1) # Add the simThresh variable to achieve a p-value = 0.001 for the correlation with `y` df_st <- addSimThresh(df5, pval_target = 0.001) df_st[1:4,c(1:3,ncol(df_st))] tt <- cor.test(df_st$y, df_st$simThresh ) tt$p.value fit <- rpart(y ~ ., data = df_st) imp_raw <- fit$variable.importance imp_raw # Significance threshold fi_sim <- imp_raw[["simThresh"]] fi_sim # Feature Importance imp_sorted <- sort(imp_raw, decreasing = TRUE) imp_sorted # Features above the threshold vars_above <- setdiff(names(imp_sorted[imp_sorted > fi_sim]), "simThresh") vars_above ## ----load libs, message=F----------------------------------------------------- # Load the libraries library(ggforce) library(SummarizedExperiment) ## ----------------------------------------------------------------------------- data(airway, package="airway") se <- airway ## ----first XAI.test, warning=FALSE-------------------------------------------- results <- XAI.test(se[1:100,], y = "dex", verbose = TRUE, simData = TRUE) ## ----------------------------------------------------------------------------- results ## ----load classif simu data--------------------------------------------------- se_path <- system.file("extdata", "seClassif.rds", package="XAItest") dataset_classif <- readRDS(se_path) data_matrix <- assay(dataset_classif, "counts") data_matrix <- t(data_matrix) metadata <- as.data.frame(colData(dataset_classif)) df_simu_classif <- as.data.frame(cbind(data_matrix, y = metadata[['y']])) for (col in names(df_simu_classif)) { if (col != 'y') { df_simu_classif[[col]] <- as.numeric(df_simu_classif[[col]]) } } ## ----load dataframe ex classif simu data-------------------------------------- df_path <- system.file("extdata", "dfClassif.txt", package="XAItest") dataset_classif_df <- read.table(df_path) df_simu_classif_df <- dataset_classif ## ----fig.width=12, fig.height=5----------------------------------------------- p1 <- ggplot(df_simu_classif, aes(x=norm_noise01, y=norm_noise02, color=y)) + geom_point() + ggtitle("Noise features\nnorm_noise01 vs norm_noise02") + theme_bw() p2 <- ggplot(df_simu_classif, aes(x=diff_distrib01, y=diff_distrib02, color=y)) + geom_point() + ggtitle("Normal distributions\ndiff_distrib01 vs diff_distrib02") + theme_bw() p3 <- ggplot(df_simu_classif, aes(x=norm_noise03, y=biDistrib, color=y)) + geom_point() + ggtitle("Normal bidistribution\nbidistrib01 vs norm_noise03") + theme_bw() grid.arrange(p1, p2, p3, ncol = 3, nrow = 1) ## ----XAI.test 1 , warning=FALSE----------------------------------------------- set.seed(123) objXAI <- XAI.test(dataset_classif, "y", simData = TRUE, simPvalTarget = 0.01) ## ----------------------------------------------------------------------------- head(getMetricsTable(objXAI)) ## ----mapPvalImportance1------------------------------------------------------- mpi <- mapPvalImportance(objXAI) mpi$df ## ----mapPvalImportance5, eval=FALSE------------------------------------------- # mpi$dt ## ----xai2, warning=FALSE------------------------------------------------------ set.seed(123) objXAI <- XAI.test(df_simu_classif[,setdiff(colnames(df_simu_classif), c("diff_distrib01", "diff_distrib02"))], simData=TRUE) head(getMetricsTable(objXAI)) ## ----fig.width=10, fig.height=10---------------------------------------------- p1 <- plotModel(objXAI, "lm_pval", "biDistrib", "simThresh") p2 <- plotModel(objXAI, "RF_feat_imp", "biDistrib", "simThresh") p3 <- plotModel(objXAI, "SHAP_feat_imp", "biDistrib", "simThresh") p4 <- plotModel(objXAI, "LIME_feat_imp", "biDistrib", "simThresh") grid.arrange(p1, p2, p3, p4, ncol = 2, nrow = 2) ## ----mapPvalImportance2------------------------------------------------------- mpi <- mapPvalImportance(objXAI) head(mpi$df) ## ----mapPvalImportance15, eval=FALSE------------------------------------------ # mpi$dt ## ----------------------------------------------------------------------------- transfo_parab <- function(xs){ x1 <- min(xs) x2 <- max(xs) h <- (x1 + x2) / 2 k <- max(xs)/2 a <- k / ((h - x1) * (h - x2)) y <- a * (xs - x1) * (xs - x2) return (y) } ## ----load regress simu data--------------------------------------------------- se_path <- system.file("extdata", "seRegress.rds", package="XAItest") dataset_regress <- readRDS(se_path) data_matrix <- assay(dataset_regress, "counts") data_matrix <- t(data_matrix) metadata <- as.data.frame(colData(dataset_regress)) df_simu_regr <- as.data.frame(cbind(data_matrix, y = metadata[['y']])) for (col in names(df_simu_regr)) { if (col != 'y') { df_simu_regr[[col]] <- as.numeric(df_simu_regr[[col]]) } } ## ----load dataframe regress simu data----------------------------------------- df_path <- system.file("extdata", "dfRegress.txt", package="XAItest") dataset_regress_df <- read.table(df_path) df_simu_regr_df <- dataset_regress ## ----------------------------------------------------------------------------- ggplot(df_simu_regr, aes(x=norm_noise2, y=y)) + geom_point() + theme_bw() ## ----xai3, warning=FALSE------------------------------------------------------ set.seed(123) regr_results <- XAI.test(dataset_regress, "y", simData=TRUE, simPvalTarget = 0.01) getMetricsTable(regr_results) ## ----mapPvalImportance3------------------------------------------------------- mpi <- mapPvalImportance(regr_results, refPvalColumn = "cor_adjPval", refPval = 0.01) head(mpi$df) ## ----mapPvalImportance25, eval=FALSE------------------------------------------ # mpi$dt ## ----------------------------------------------------------------------------- regr_results@args$modelType ## ----fig.width=10, fig.height=5----------------------------------------------- p1 <- plotModel(regr_results, "lm_pval", "norm_noise2") p2 <- plotModel(regr_results, "SHAP_feat_imp", "norm_noise2") grid.arrange(p1, p2, ncol = 2, nrow = 1) ## ----------------------------------------------------------------------------- modelsOverview(regr_results) ## ----cust fct----------------------------------------------------------------- script_path <- system.file("fctParamFIPV.R", package = "XAItest") source(script_path) xai_res <- XAI.test(df1, y="y", simData=F, customFIPV=list("pimp"=function(data, ...) { fctParamFIPV(data=data, fi_algo="gini", pv_algo="pimp", ml_algo="dt", fi_transfo="none", n_perm=20, shap_nsim = 10, verbose=0, ...) }), defaultMethods=c()) xai_res ## ----times-------------------------------------------------------------------- xai_res@computeTimes ## ----------------------------------------------------------------------------- sessionInfo()