## ----setup, include=FALSE----------------------------------------------------- options(prompt = 'R> ', continue = '+ ') options(ggrepel.max.overlaps = Inf) def.chunk.hook <- knitr::knit_hooks$get("chunk") knitr::knit_hooks$set(chunk = function(x, options) { x <- def.chunk.hook(x, options) paste0("\n \\", "footnotesize","\n\n", x, "\n\n \\normalsize") }) knitr::opts_chunk$set( fig.path = "figures/" ) ## ----eval = FALSE------------------------------------------------------------- # install.packages("RGCCA") ## ----------------------------------------------------------------------------- RGCCA::available_methods() ## ----------------------------------------------------------------------------- library("RGCCA") data("Russett") colnames(Russett) ## ----------------------------------------------------------------------------- A <- list( Agriculture = Russett[, c("gini", "farm", "rent")], Industrial = Russett[, c("gnpr", "labo")], Politic = Russett[, c("inst", "ecks", "death", "demostab", "dictator")]) lab <- factor( apply(Russett[, 9:11], 1, which.max), labels = c("demost", "demoinst", "dict") ) ## ----------------------------------------------------------------------------- C <- matrix(c(0, 0, 1, 0, 0, 1, 1, 1, 0), 3, 3) C ## ----------------------------------------------------------------------------- fit <- rgcca(blocks = A, connection = C, tau = 1, ncomp = 2, scheme = "factorial", scale = TRUE, scale_block = FALSE, comp_orth = TRUE, verbose = FALSE) ## ----------------------------------------------------------------------------- summary(fit) ## ----fig-weight, fig.height = 12, fig.width=18, fig.cap = 'Block-weight vectors of a fitted RGCCA model.', fig.pos = "H"---- plot(fit, type = "weight", block = 1:3, comp = 1, display_order = FALSE, cex = 2) ## ----fig-sample1, fig.align='center', fig.cap = '\\label{fig:sample}Graphical display of the countries by drawing the block component of the first block against the block component of the second block, colored according to their political regime.', fig.height = 12, fig.width=18, fig.pos = "H"---- plot(fit, type = "sample", block = 1:2, comp = 1, resp = lab, repel = TRUE, cex = 2) ## ----fig-ave, fig.align='center', fig.cap = 'Average variance explained of the different blocks.', fig.height = 8, fig.width=18, fig.pos = "H"---- plot(fit, type = "ave", cex = 2) ## ----fig-cor-circle, fig.align='center', fig.cap = 'Correlation circle associated with the first two components of the first block.', fig.height = 12, fig.width=18, fig.pos = "H"---- plot(fit, type = "cor_circle", block = 1, comp = 1:2, display_blocks = 1:3, cex = 2) ## ----fig-biplot1, fig.align='center', fig.cap = 'Biplot associated with the first two components of the first block.', fig.height = 12, fig.width=18, fig.pos = "H"---- plot(fit, type = "biplot", block = 1, comp = 1:2, repel = TRUE, resp = lab, cex = 2, show_arrow = TRUE) ## ----size = "tiny"------------------------------------------------------------ summary(boot_out, block = 1:3, ncomp = 1) ## ----fig-boot1, fig.cap = 'Bootstrap confidence intervals for the block-weight vectors.', fig.height = 12, fig.width=18, fig.pos = "H"---- plot(boot_out, type = "weight", block = 1:3, comp = 1, display_order = FALSE, cex = 2, show_stars = TRUE) ## ----------------------------------------------------------------------------- fit <- rgcca(blocks = A, connection = C, tau = "optimal", scheme = "factorial") ## ----------------------------------------------------------------------------- fit$call$tau ## ----------------------------------------------------------------------------- set.seed(0) perm_out <- rgcca_permutation(blocks = A, connection = C, par_type = "tau", par_length = 10, n_cores = 1, n_perms = 10) ## ----width=30----------------------------------------------------------------- summary(perm_out) ## ----fig-permutation, fig.height = 12, fig.width=18, fig.pos = "H", fig.cap = "Values of the objective function of RGCCA against the sets of tuning parameters, triangles correspond to evaluations on non-permuted datasets."---- plot(perm_out, cex = 2) ## ----------------------------------------------------------------------------- fit <- rgcca(perm_out) ## ----------------------------------------------------------------------------- fit.mcoa <- rgcca(blocks = A, method = "mcoa", ncomp = 2) ## ----------------------------------------------------------------------------- summary(fit.mcoa) ## ----fig-biplot2, fig.align='center', fig.cap = 'Biplot of the countries obtained by crossing the two first components of the superblock. Individuals are colored according to their political regime and variables according to their block membership.', fig.height = 12, fig.width=18, fig.pos = "H"---- plot(fit.mcoa, type = "biplot", block = 4, comp = 1:2, response = lab, repel = TRUE, cex = 2) ## ----eval = FALSE------------------------------------------------------------- # if (!("gliomaData" %in% rownames(installed.packages()))) { # destfile <- tempfile() # download.file("http://biodev.cea.fr/sgcca/gliomaData_0.4.tar.gz", destfile) # install.packages(destfile, repos = NULL, type = "source") # } ## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set(eval = "gliomaData" %in% rownames(installed.packages())) ## ----------------------------------------------------------------------------- # data("ge_cgh_locIGR", package = "gliomaData") # # blocks <- ge_cgh_locIGR$multiblocks # Loc <- factor(ge_cgh_locIGR$y) # levels(Loc) <- colnames(ge_cgh_locIGR$multiblocks$y) # blocks[[3]] <- Loc # # vapply(blocks, NCOL, FUN.VALUE = 1L) ## ----------------------------------------------------------------------------- # fit.rgcca <- rgcca(blocks = blocks, response = 3, ncomp = 2, verbose = FALSE) ## ----------------------------------------------------------------------------- # fit.rgcca$call$connection # fit.rgcca$call$tau ## ----------------------------------------------------------------------------- # fit.rgcca$primal_dual ## ----------------------------------------------------------------------------- # system.time( # rgcca(blocks = blocks, response = 3) # ) ## ----fig-sample2, fig.align='center', fig.height = 12, fig.width=18, fig.pos = "H", fig.cap = "Graphical display of the tumors obtained by crossing the block components, and colored according to their location."---- # plot(fit.rgcca, type = "sample", block = 1:2, # comp = 1, response = Loc, cex = 2) ## ----------------------------------------------------------------------------- # fit.sgcca <- rgcca(blocks = blocks, response = 3, ncomp = 2, # sparsity = c(0.0710, 0.2000, 1), # verbose = FALSE) ## ----------------------------------------------------------------------------- # summary(fit.sgcca) ## ----cache = TRUE------------------------------------------------------------- # set.seed(0) # in_train <- caret::createDataPartition( # blocks[[3]], p = .75, list = FALSE # ) # training <- lapply(blocks, function(x) as.matrix(x)[in_train, , drop = FALSE]) # testing <- lapply(blocks, function(x) as.matrix(x)[-in_train, , drop = FALSE]) # # cv_out <- rgcca_cv(blocks = training, response = 3, # par_type = "sparsity", # par_value = c(.2, .2, 0), # par_length = 10, # prediction_model = "lda", # validation = "kfold", # k = 7, n_run = 3, metric = "Balanced_Accuracy", # n_cores = 1) ## ----------------------------------------------------------------------------- # summary(cv_out) ## ----fig-cv, fig.height = 12, fig.width=18, fig.pos = "H", fig.cap = "Balanced accuracies of the models on the different validation folds for the different tuning parameter sets."---- # plot(cv_out, cex = 2) ## ----------------------------------------------------------------------------- # fit <- rgcca(cv_out) # summary(fit) ## ----------------------------------------------------------------------------- # pred <- rgcca_predict(fit, blocks_test = testing, prediction_model = "lda") ## ----------------------------------------------------------------------------- # pred$confusion$test ## ----------------------------------------------------------------------------- # projection <- rgcca_transform(fit, blocks_test = testing) ## ----cache = TRUE, message = FALSE-------------------------------------------- # set.seed(0) # fit_stab <- rgcca_stability(fit, # keep = vapply( # fit$a, function(x) mean(x != 0), # FUN.VALUE = 1.0 # ), # n_boot = 100, verbose = TRUE, n_cores = 1) ## ----cache = TRUE------------------------------------------------------------- # set.seed(0) # boot_out <- rgcca_bootstrap(fit_stab, n_boot = 500) ## ----fig-boot2, fig.height = 12, fig.width=18, fig.pos = "H", fig.cap = "Bootstrap confidence intervals for the block-weight vectors associated with GE."---- # plot(boot_out, block = 1, # display_order = FALSE, # n_mark = 50, cex = 1.5, cex_sub = 17, # show_star = TRUE)