## ----bioconductor, eval=FALSE-------------------------------------------------
# if (!requireNamespace("BiocManager", quietly = TRUE)) {
# install.packages("BiocManager")
# }
#
# # The following initializes usage of Bioc devel
# BiocManager::install(version = "devel")
#
# BiocManager::install("multiGSEA")
## ----devtools, eval=FALSE-----------------------------------------------------
# install.packages("devtools")
# devtools::install_github("https://github.com/yigbt/multiGSEA")
## ----load_multiGSEA, eval=FALSE-----------------------------------------------
# library(multiGSEA)
## ----load_mapping_library, results='hide', warning=FALSE, message=FALSE-------
library("org.Hs.eg.db")
## ----organismsTable, results='asis', echo=FALSE-------------------------------
caption <- "Supported organisms, their abbreviations being used in `multiGSEA`,
and mapping database that are needed for feature conversion.
Supported abbreviations can be seen with `getOrganisms()`"
df <- data.frame(
Organisms = c(
"Human", "Mouse", "Rat", "Dog", "Cow", "Pig",
"Chicken", "Zebrafish", "Frog", "Fruit Fly",
"C.elegans"
),
Abbreviations = c(
"hsapiens", "mmusculus", "rnorvegicus",
"cfamiliaris", "btaurus", "sscrofa",
"ggallus", "drerio", "xlaevis",
"dmelanogaster", "celegans"
),
Mapping = c(
"org.Hs.eg.db", "org.Mm.eg.db", "org.Rn.eg.db",
"org.Cf.eg.db", "org.Bt.eg.db", "org.Ss.eg.db",
"org.Gg.eg.db", "org.Xl.eg.db", "org.Dr.eg.db",
"org.Dm.eg.db", "org.Ce.eg.db"
)
)
knitr::kable(df, caption = caption)
## ----load_multiGSEA_package, results='hide', message=FALSE, warning=FALSE-----
library(multiGSEA)
library(magrittr)
## ----load_omics_data----------------------------------------------------------
# load transcriptomic features
data(transcriptome)
# load proteomic features
data(proteome)
# load metabolomic features
data(metabolome)
## ----omicsTable, results='asis', echo=FALSE-----------------------------------
caption <- "Structure of the necessary omics data. For each layer
(here: transcriptome), feature IDs, log2FCs, and p-values
are needed."
knitr::kable(
transcriptome %>%
dplyr::arrange(Symbol) %>%
dplyr::slice(1:6),
caption = caption
)
## ----rank_features, results='hide'--------------------------------------------
# create data structure
omics_data <- initOmicsDataStructure(layer = c(
"transcriptome",
"proteome",
"metabolome"
))
## add transcriptome layer
omics_data$transcriptome <- rankFeatures(
transcriptome$logFC,
transcriptome$pValue
)
names(omics_data$transcriptome) <- transcriptome$Symbol
## add proteome layer
omics_data$proteome <- rankFeatures(proteome$logFC, proteome$pValue)
names(omics_data$proteome) <- proteome$Symbol
## add metabolome layer
## HMDB features have to be updated to the new HMDB format
omics_data$metabolome <- rankFeatures(metabolome$logFC, metabolome$pValue)
names(omics_data$metabolome) <- metabolome$HMDB
names(omics_data$metabolome) <- gsub(
"HMDB", "HMDB00",
names(omics_data$metabolome)
)
## ----omics_short--------------------------------------------------------------
omics_short <- lapply(names(omics_data), function(name) {
head(omics_data[[name]])
})
names(omics_short) <- names(omics_data)
omics_short
## ----calculate_enrichment, results='hide', message=FALSE, warning=FALSE-------
databases <- c("kegg", "reactome")
layers <- names(omics_data)
pathways <- getMultiOmicsFeatures(
dbs = databases, layer = layers,
returnTranscriptome = "SYMBOL",
returnProteome = "SYMBOL",
returnMetabolome = "HMDB",
useLocal = FALSE
)
## ----pathways_short-----------------------------------------------------------
pathways_short <- lapply(names(pathways), function(name) {
head(pathways[[name]], 2)
})
names(pathways_short) <- names(pathways)
pathways_short
## ----run_enrichment, results='hide', message=FALSE, warning=FALSE-------------
# use the multiGSEA function to calculate the enrichment scores
# for all omics layer at once.
enrichment_scores <- multiGSEA(pathways, omics_data)
## ----combine_pvalues----------------------------------------------------------
df <- extractPvalues(
enrichmentScores = enrichment_scores,
pathwayNames = names(pathways[[1]])
)
df$combined_pval <- combinePvalues(df)
df$combined_padj <- p.adjust(df$combined_pval, method = "BH")
df <- cbind(data.frame(pathway = names(pathways[[1]])), df)
## ----resultTable, results='asis', echo=FALSE----------------------------------
caption <- "Table summarizing the top 15 pathways where we can calculate an
enrichment for all three layers . Pathways from KEGG, Reactome,
and BioCarta are listed based on their aggregated adjusted p-value.
Corrected p-values are displayed for each omics layer separately and
aggregated by means of the Stouffer's Z-method."
knitr::kable(
df %>%
dplyr::arrange(combined_padj) %>%
dplyr::filter(!is.na(metabolome_padj)) %>%
dplyr::select(c(pathway, transcriptome_padj, proteome_padj, metabolome_padj, combined_pval)) %>%
dplyr::slice(1:15),
caption = caption
)
## ----msigdbr, eval=FALSE------------------------------------------------------
# library(msigdbr)
# library(dplyr)
#
# hallmark <- msigdbr(species = "Rattus norvegicus", category = "H") %>%
# dplyr::select(gs_name, ensembl_gene) %>%
# dplyr::filter(!is.na(ensembl_gene)) %>%
# unstack(ensembl_gene ~ gs_name)
#
# pathways <- list("transcriptome" = hallmark)
## ----session, echo=FALSE------------------------------------------------------
sessionInfo()