## ----installation, eval=FALSE-------------------------------------------------
# if (!require("BiocManager")) {
#     install.packages("BiocManager")
# }
# BiocManager::install('XAItest')

## ----scenarios----------------------------------------------------------------
library(XAItest)
library(gridExtra)
library(ggplot2)

set.seed(12)

# Scenario 1: Variance Difference
df1 <- genScenario(1)
# Scenario 2: Bimodal Distribution
df2 <- genScenario(2)
# Scenario 3: XOR Interaction
df3 <- genScenario(3)
# Scenario 4: Concentric Circles
df4 <- genScenario(4)
# Scenario 5: Parabolic Relationship
df5 <- genScenario(5)
# Scenario 6: Rising Sinusoid
df6 <- genScenario(6)

# Create plots for each scenario
p1 <- ggplot(df1, aes(x = norm_noise01, y = diffVar01, color = y)) +
  geom_point() + scale_color_manual(values = c("orange", "purple")) +
  ggtitle("Scenario 1:\nVariance Difference")
p2 <- ggplot(df2, aes(x = norm_noise01, y = biDistrib, color = y)) +
  geom_point() + scale_color_manual(values = c("orange", "purple")) +
  ggtitle("Scenario 2:\nBimodal Distribution")
p3 <- ggplot(df3, aes(x = relVar01, y = relVar02, color = y)) +
  geom_point() + scale_color_manual(values = c("orange", "purple")) +
  ggtitle("Scenario 3:\nXOR Interaction")
p4 <- ggplot(df4, aes(x = relVar01, y = relVar02, color = y)) +
  geom_point() + scale_color_manual(values = c("orange", "purple")) +
  ggtitle("Scenario 4:\nConcentric Circles")
p5 <- ggplot(df5, aes(x = norm_noise01, y = y)) +
  geom_point() + ggtitle("Scenario 5:\nParabolic Relationship")
p6 <- ggplot(df6, aes(x = unif_noise01, y = y)) +
  geom_point() + ggtitle("Scenario 6:\nRising Sinusoid")

# Load required library for grid arrangement


# Arrange the plots in a 2x3 grid with width twice the height
grid.arrange(p1, p2, p3, p4, p5, p6, ncol = 3, nrow = 2,
             widths = rep(2, 3), heights = rep(1, 2))


## ----addSimThresh-------------------------------------------------------------
set.seed(1)
# Add the simThresh variable to achieve a p-value = 0.001 between A and B
df_st <- addSimThresh(df1, pval_target = 0.001)
df_st[c(1:2,50:51),c(1:3,(ncol(df_st)-2):ncol(df_st))]

## ----check pvalue-------------------------------------------------------------
grp_A <- df_st$simThresh[df_st$y == "A"]
grp_B <- df_st$simThresh[df_st$y == "B"]
tt <- t.test(grp_A, grp_B)
 tt$p.value

## ----build dt-----------------------------------------------------------------
library(rpart)
fit <- rpart(y ~ ., data = df_st, method = "class")

imp_raw <- fit$variable.importance

# Significance threshold
fi_sim <- imp_raw[["simThresh"]]
fi_sim

## ----detect features----------------------------------------------------------
# Feature Importance
imp_sorted <- sort(imp_raw, decreasing = TRUE)
imp_sorted
# Features above the threshold
vars_above <- setdiff(names(imp_sorted[imp_sorted > fi_sim]), "simThresh")
vars_above

## ----addSimThresh regress-----------------------------------------------------
set.seed(1)
# Add the simThresh variable to achieve a p-value = 0.001 for the correlation with `y`
df_st <- addSimThresh(df5, pval_target = 0.001)
df_st[1:4,c(1:3,ncol(df_st))]

tt <- cor.test(df_st$y, df_st$simThresh )
tt$p.value

fit <- rpart(y ~ ., data = df_st)

imp_raw <- fit$variable.importance
imp_raw

# Significance threshold
fi_sim <- imp_raw[["simThresh"]]
fi_sim

# Feature Importance
imp_sorted <- sort(imp_raw, decreasing = TRUE)
imp_sorted
# Features above the threshold
vars_above <- setdiff(names(imp_sorted[imp_sorted > fi_sim]), "simThresh")
vars_above

## ----load libs, message=F-----------------------------------------------------
# Load the libraries
library(ggforce)
library(SummarizedExperiment)

## -----------------------------------------------------------------------------
data(airway, package="airway")
se <- airway

## ----first XAI.test, warning=FALSE--------------------------------------------
results <- XAI.test(se[1:100,], y = "dex", verbose = TRUE, simData = TRUE)

## -----------------------------------------------------------------------------
results

## ----load classif simu data---------------------------------------------------
se_path <- system.file("extdata", "seClassif.rds", package="XAItest")
dataset_classif <- readRDS(se_path)

data_matrix <- assay(dataset_classif, "counts")
data_matrix <- t(data_matrix)
metadata <- as.data.frame(colData(dataset_classif))
df_simu_classif <- as.data.frame(cbind(data_matrix, y = metadata[['y']]))
for (col in names(df_simu_classif)) {
    if (col != 'y') {
        df_simu_classif[[col]] <- as.numeric(df_simu_classif[[col]])
    }
}

## ----load dataframe ex classif simu data--------------------------------------
df_path <- system.file("extdata", "dfClassif.txt", package="XAItest")
dataset_classif_df <- read.table(df_path)
df_simu_classif_df <- dataset_classif

## ----fig.width=12, fig.height=5-----------------------------------------------
p1 <- ggplot(df_simu_classif, aes(x=norm_noise01, y=norm_noise02,
            color=y)) +
    geom_point() +
    ggtitle("Noise features\nnorm_noise01 vs norm_noise02") +
    theme_bw()
p2 <- ggplot(df_simu_classif, aes(x=diff_distrib01, y=diff_distrib02,
            color=y)) +
    geom_point() +
    ggtitle("Normal distributions\ndiff_distrib01 vs diff_distrib02") +
    theme_bw()
p3 <- ggplot(df_simu_classif, aes(x=norm_noise03, y=biDistrib, color=y)) +
    geom_point() +
    ggtitle("Normal bidistribution\nbidistrib01 vs norm_noise03") +
    theme_bw()
grid.arrange(p1, p2, p3, ncol = 3, nrow = 1)

## ----XAI.test 1 , warning=FALSE-----------------------------------------------
set.seed(123)
objXAI <- XAI.test(dataset_classif, "y", simData = TRUE, simPvalTarget = 0.01)

## -----------------------------------------------------------------------------
head(getMetricsTable(objXAI))

## ----mapPvalImportance1-------------------------------------------------------
mpi <- mapPvalImportance(objXAI)
mpi$df

## ----mapPvalImportance5, eval=FALSE-------------------------------------------
# mpi$dt

## ----xai2, warning=FALSE------------------------------------------------------
set.seed(123)
objXAI <- XAI.test(df_simu_classif[,setdiff(colnames(df_simu_classif),
                                    c("diff_distrib01", "diff_distrib02"))],
                  simData=TRUE)
head(getMetricsTable(objXAI))

## ----fig.width=10, fig.height=10----------------------------------------------
p1 <- plotModel(objXAI, "lm_pval", "biDistrib", "simThresh")
p2 <- plotModel(objXAI, "RF_feat_imp", "biDistrib", "simThresh")
p3 <- plotModel(objXAI, "SHAP_feat_imp", "biDistrib", "simThresh")
p4 <- plotModel(objXAI, "LIME_feat_imp", "biDistrib", "simThresh")
grid.arrange(p1, p2, p3, p4, ncol = 2, nrow = 2)

## ----mapPvalImportance2-------------------------------------------------------
mpi <- mapPvalImportance(objXAI)
head(mpi$df)

## ----mapPvalImportance15, eval=FALSE------------------------------------------
# mpi$dt

## -----------------------------------------------------------------------------
transfo_parab <- function(xs){
    x1 <- min(xs)
    x2 <- max(xs)
    h <- (x1 + x2) / 2
    k <- max(xs)/2
    a <- k / ((h - x1) * (h - x2))
    
    y <- a * (xs - x1) * (xs - x2)
    
    return (y)
}


## ----load regress simu data---------------------------------------------------
se_path <- system.file("extdata", "seRegress.rds", package="XAItest")
dataset_regress <- readRDS(se_path)

data_matrix <- assay(dataset_regress, "counts")
data_matrix <- t(data_matrix)
metadata <- as.data.frame(colData(dataset_regress))
df_simu_regr <- as.data.frame(cbind(data_matrix, y = metadata[['y']]))
for (col in names(df_simu_regr)) {
    if (col != 'y') {
        df_simu_regr[[col]] <- as.numeric(df_simu_regr[[col]])
    }
}

## ----load dataframe regress simu data-----------------------------------------
df_path <- system.file("extdata", "dfRegress.txt", package="XAItest")
dataset_regress_df <- read.table(df_path)
df_simu_regr_df <- dataset_regress

## -----------------------------------------------------------------------------
ggplot(df_simu_regr, aes(x=norm_noise2, y=y)) + geom_point() + theme_bw()

## ----xai3, warning=FALSE------------------------------------------------------
set.seed(123)
regr_results <- XAI.test(dataset_regress, "y",
                            simData=TRUE, simPvalTarget = 0.01)
getMetricsTable(regr_results)

## ----mapPvalImportance3-------------------------------------------------------
mpi <- mapPvalImportance(regr_results, refPvalColumn = "cor_adjPval", refPval = 0.01)
head(mpi$df)

## ----mapPvalImportance25, eval=FALSE------------------------------------------
# mpi$dt

## -----------------------------------------------------------------------------
regr_results@args$modelType

## ----fig.width=10, fig.height=5-----------------------------------------------
p1 <- plotModel(regr_results, "lm_pval", "norm_noise2")
p2 <- plotModel(regr_results, "SHAP_feat_imp", "norm_noise2")
grid.arrange(p1, p2, ncol = 2, nrow = 1)

## -----------------------------------------------------------------------------
modelsOverview(regr_results)

## ----cust fct-----------------------------------------------------------------
script_path <- system.file("fctParamFIPV.R", package = "XAItest")
source(script_path)

xai_res <- XAI.test(df1, y="y", simData=F, customFIPV=list("pimp"=function(data, ...) {
    fctParamFIPV(data=data, fi_algo="gini", pv_algo="pimp", ml_algo="dt",
    fi_transfo="none", n_perm=20, shap_nsim = 10, verbose=0, ...)
}), defaultMethods=c())

xai_res

## ----times--------------------------------------------------------------------
xai_res@computeTimes

## -----------------------------------------------------------------------------
sessionInfo()