## ----bioconductor, eval=FALSE-------------------------------------------------
# if (!requireNamespace("BiocManager", quietly = TRUE)) {
#   install.packages("BiocManager")
# }
# 
# # The following initializes usage of Bioc devel
# BiocManager::install(version = "devel")
# 
# BiocManager::install("multiGSEA")

## ----devtools, eval=FALSE-----------------------------------------------------
# install.packages("devtools")
# devtools::install_github("https://github.com/yigbt/multiGSEA")

## ----load_multiGSEA, eval=FALSE-----------------------------------------------
# library(multiGSEA)

## ----load_mapping_library, results='hide', warning=FALSE, message=FALSE-------
library("org.Hs.eg.db")

## ----organismsTable, results='asis', echo=FALSE-------------------------------
caption <- "Supported organisms, their abbreviations being used in `multiGSEA`,
            and mapping database that are needed for feature conversion.
            Supported abbreviations can be seen with `getOrganisms()`"
df <- data.frame(
  Organisms = c(
    "Human", "Mouse", "Rat", "Dog", "Cow", "Pig",
    "Chicken", "Zebrafish", "Frog", "Fruit Fly",
    "C.elegans"
  ),
  Abbreviations = c(
    "hsapiens", "mmusculus", "rnorvegicus",
    "cfamiliaris", "btaurus", "sscrofa",
    "ggallus", "drerio", "xlaevis",
    "dmelanogaster", "celegans"
  ),
  Mapping = c(
    "org.Hs.eg.db", "org.Mm.eg.db", "org.Rn.eg.db",
    "org.Cf.eg.db", "org.Bt.eg.db", "org.Ss.eg.db",
    "org.Gg.eg.db", "org.Xl.eg.db", "org.Dr.eg.db",
    "org.Dm.eg.db", "org.Ce.eg.db"
  )
)

knitr::kable(df, caption = caption)

## ----load_multiGSEA_package, results='hide', message=FALSE, warning=FALSE-----
library(multiGSEA)
library(magrittr)

## ----load_omics_data----------------------------------------------------------
# load transcriptomic features
data(transcriptome)

# load proteomic features
data(proteome)

# load metabolomic features
data(metabolome)

## ----omicsTable, results='asis', echo=FALSE-----------------------------------
caption <- "Structure of the necessary omics data. For each layer
            (here: transcriptome), feature IDs, log2FCs, and p-values
            are needed."

knitr::kable(
  transcriptome %>%
    dplyr::arrange(Symbol) %>%
    dplyr::slice(1:6),
  caption = caption
)

## ----rank_features, results='hide'--------------------------------------------
# create data structure
omics_data <- initOmicsDataStructure(layer = c(
  "transcriptome",
  "proteome",
  "metabolome"
))

## add transcriptome layer
omics_data$transcriptome <- rankFeatures(
  transcriptome$logFC,
  transcriptome$pValue
)
names(omics_data$transcriptome) <- transcriptome$Symbol

## add proteome layer
omics_data$proteome <- rankFeatures(proteome$logFC, proteome$pValue)
names(omics_data$proteome) <- proteome$Symbol

## add metabolome layer
## HMDB features have to be updated to the new HMDB format
omics_data$metabolome <- rankFeatures(metabolome$logFC, metabolome$pValue)
names(omics_data$metabolome) <- metabolome$HMDB
names(omics_data$metabolome) <- gsub(
  "HMDB", "HMDB00",
  names(omics_data$metabolome)
)

## ----omics_short--------------------------------------------------------------
omics_short <- lapply(names(omics_data), function(name) {
  head(omics_data[[name]])
})
names(omics_short) <- names(omics_data)
omics_short

## ----calculate_enrichment, results='hide', message=FALSE, warning=FALSE-------
databases <- c("kegg", "reactome")
layers <- names(omics_data)

pathways <- getMultiOmicsFeatures(
  dbs = databases, layer = layers,
  returnTranscriptome = "SYMBOL",
  returnProteome = "SYMBOL",
  returnMetabolome = "HMDB",
  useLocal = FALSE
)

## ----pathways_short-----------------------------------------------------------
pathways_short <- lapply(names(pathways), function(name) {
  head(pathways[[name]], 2)
})
names(pathways_short) <- names(pathways)
pathways_short

## ----run_enrichment, results='hide', message=FALSE, warning=FALSE-------------
# use the multiGSEA function to calculate the enrichment scores
# for all omics layer at once.
enrichment_scores <- multiGSEA(pathways, omics_data)

## ----combine_pvalues----------------------------------------------------------
df <- extractPvalues(
  enrichmentScores = enrichment_scores,
  pathwayNames = names(pathways[[1]])
)

df$combined_pval <- combinePvalues(df)
df$combined_padj <- p.adjust(df$combined_pval, method = "BH")

df <- cbind(data.frame(pathway = names(pathways[[1]])), df)

## ----resultTable, results='asis', echo=FALSE----------------------------------
caption <- "Table summarizing the top 15 pathways where we can calculate an
            enrichment for all three layers . Pathways from KEGG, Reactome,
            and BioCarta are listed based on their aggregated adjusted p-value.
            Corrected p-values are displayed for each omics layer separately and
            aggregated by means of the Stouffer's Z-method."

knitr::kable(
  df %>%
    dplyr::arrange(combined_padj) %>%
    dplyr::filter(!is.na(metabolome_padj)) %>%
    dplyr::select(c(pathway, transcriptome_padj, proteome_padj, metabolome_padj, combined_pval)) %>%
    dplyr::slice(1:15),
  caption = caption
)

## ----msigdbr, eval=FALSE------------------------------------------------------
# library(msigdbr)
# library(dplyr)
# 
# hallmark <- msigdbr(species = "Rattus norvegicus", category = "H") %>%
#   dplyr::select(gs_name, ensembl_gene) %>%
#   dplyr::filter(!is.na(ensembl_gene)) %>%
#   unstack(ensembl_gene ~ gs_name)
# 
# pathways <- list("transcriptome" = hallmark)

## ----session, echo=FALSE------------------------------------------------------
sessionInfo()