## ---- echo=FALSE-------------------------------------------------------------- knitr::opts_chunk$set(message = FALSE, warning = FALSE, crop = NULL) ## ----install, eval = FALSE---------------------------------------------------- # if (!"BiocManager" %in% rownames(installed.packages())) # install.packages("BiocManager") # # Other packages required in this vignette # pkg <- c("tidyverse", "magrittr", "ggplot2", "cowplot", "DT") # BiocManager::install(pkg) # BiocManager::install("sSNAPPY") # install.packages("htmltools") ## ----setup, results="hide", warning=FALSE------------------------------------ library(sSNAPPY) library(tidyverse) library(magrittr) library(ggplot2) library(cowplot) library(DT) library(htmltools) ## ----data--------------------------------------------------------------------- data(logCPM_example) data(metadata_example) # check if samples included in the logCPM matrix and metadata dataframe are identical setequal(colnames(logCPM_example), metadata_example$sample) # View sample metadata datatable(metadata_example, filter = "top") ## ----logCPM_example, eval=FALSE----------------------------------------------- # head(logCPM_example) ## ----ssFC--------------------------------------------------------------------- #compute weighted single sample logFCs weightedFC <- weight_ss_fc(logCPM_example, metadata = metadata_example, factor = "patient", control = "Vehicle") ## ----lowess, fig.width=8,fig.height=4----------------------------------------- perSample_FC <- lapply(levels(metadata_example$patient), function(x){ temp <- logCPM_example[seq_len(1000),str_detect(colnames(logCPM_example), x)] ratio <- temp[, str_detect(colnames(temp), "Vehicle", negate = TRUE)] - temp[, str_detect(colnames(temp), "Vehicle")] }) %>% do.call(cbind,.) aveCPM <- logCPM_example[seq_len(1000),] %>% rowMeans() %>% enframe(name = "gene_id", value = "aveCPM") p1 <- perSample_FC %>% as.data.frame() %>% rownames_to_column("gene_id") %>% pivot_longer(cols = -"gene_id", names_to = "name", values_to = "logFC") %>% left_join(aveCPM) %>% ggplot(aes(aveCPM, logFC)) + geom_point() + labs(y = "sslogFC", x = "Average logCPM") + theme( panel.background = element_blank() ) p2 <- data.frame( gene_id = rownames(perSample_FC), variance = perSample_FC %>% apply(1,var)) %>% left_join(aveCPM) %>% ggplot(aes(aveCPM, variance)) + geom_point() + geom_smooth(method = "loess") + labs(y = "Variance in ssLogFCs", x = "Average logCPM") + theme( panel.background = element_blank() ) plot_grid(p1, p2) ## ----pathwayDatabases--------------------------------------------------------- library(graphite) graphite::pathwayDatabases() %>% dplyr::filter(species == "hsapiens") %>% pander::pander() ## ----retrieve_topology-------------------------------------------------------- gsTopology <- retrieve_topology(database = "kegg") head(names(gsTopology)) ## ----gsTopology_sub----------------------------------------------------------- gsTopology_sub <- retrieve_topology( database = "kegg", pathwayName = c( "Glycolysis / Gluconeogenesis", "Citrate cycle (TCA cycle)", "Pentose phosphate pathway" )) names(gsTopology_sub) ## ----------------------------------------------------------------------------- genePertScore <- raw_gene_pert(weightedFC$logFC, gsTopology) ## ----------------------------------------------------------------------------- pathwayPertScore <- pathway_pert(genePertScore) head(pathwayPertScore) ## ----permutedScore, eval=FALSE------------------------------------------------ # permutedScore <- generate_permuted_scores( # expreMatrix = logCPM_example, # numOfTreat = 3, NB = 1000, # gsTopology = gsTopology, # weight = weightedFC$weight # ) ## ----normalisedScores, eval=FALSE--------------------------------------------- # normalisedScores <- normalise_by_permu(permutedScore, ssPertScore) ## ----------------------------------------------------------------------------- load(system.file("extdata", "normalisedScores.rda", package = "sSNAPPY")) ## ----DT_indi------------------------------------------------------------------ normalisedScores %>% dplyr::filter(adjPvalue < 0.05) %>% left_join(metadata_example) %>% mutate_at(vars(c("sample", "gs_name")), as.factor) %>% mutate_if(is.numeric, sprintf, fmt = '%#.4f') %>% mutate(Direction = ifelse(robustZ < 0, "Inhibited", "Activation")) %>% dplyr::select( sample, patient, Treatment = treatment, `Perturbation Score` = robustZ, Direction, `Gene-set name` = gs_name, `P-value` = pvalue, FDR = adjPvalue ) %>% datatable( filter = "top", options = list( columnDefs = list(list(targets = "Direction", visible = FALSE)) ), caption = htmltools::tags$caption( htmltools::em( "Pathways that were significant perturbed within individual samples.") ) ) %>% formatStyle( 'Perturbation Score', 'Direction', color = styleEqual(c("Inhibited", "Activation"), c("blue", "red")) ) ## ----sigGS_nt_zscore, fig.width= 10, fig.height=4, fig.cap="*Pathways significantly perturbed in individual samples, where gene-sets were colored by pathways' directions of changes.*"---- pl <- normalisedScores %>% dplyr::filter(adjPvalue < 0.05) %>% split(f = .$sample) %>% lapply( plot_gs_network, # layout = "dh", gsTopology = gsTopology, colorBy = "robustZ" ) %>% lapply(function(x){ x + theme( panel.grid = element_blank(), panel.background = element_blank() ) }) plot_grid( plotlist = pl, labels = names(pl), label_size = 8, nrow = 1) ## ----sigGS_nt_pvalue, fig.width= 10, fig.height=4,fig.cap="*Pathways significantly perturbed in individual samples, where gene-sets were colored by pathways' p-values*"---- pl <- normalisedScores %>% dplyr::filter(adjPvalue < 0.05) %>% split(f = .$sample) %>% lapply( plot_gs_network, gsTopology = gsTopology, colorBy = "pvalue", color_lg_title = "P-value" ) %>% lapply(function(x){ x + theme( panel.grid = element_blank(), panel.background = element_blank() ) }) plot_grid( plotlist = pl, labels = names(pl), label_size = 8, nrow = 1) ## ----fit---------------------------------------------------------------------- fit <- normalisedScores %>% left_join(metadata_example) %>% split(f = .$gs_name) %>% #.["Estrogen signaling pathway"] %>% lapply(function(x)lm(robustZ ~ 0 + treatment + PR, data = x)) %>% lapply(summary) treat_sig <- lapply( names(fit), function(x){ fit[[x]]$coefficients %>% as.data.frame() %>% .[seq_len(2),] %>% dplyr::select(Estimate, pvalue = `Pr(>|t|)` ) %>% rownames_to_column("Treatment") %>% mutate( gs_name = x, FDR = p.adjust(pvalue, "fdr"), Treatment = str_remove_all(Treatment, "treatment") ) }) %>% bind_rows() ## ----treat_sig_DT------------------------------------------------------------- treat_sig %>% dplyr::filter(FDR < 0.05) %>% mutate_at(vars(c("Treatment", "gs_name")), as.factor) %>% mutate_if(is.numeric, sprintf, fmt = '%#.4f') %>% mutate(Direction = ifelse(Estimate < 0, "Inhibited", "Activation")) %>% dplyr::select( Treatment, `Perturbation Score` = Estimate, Direction, `Gene-set name` = gs_name, `P-value` = pvalue, FDR ) %>% datatable( filter = "top", options = list( columnDefs = list(list(targets = "Direction", visible = FALSE)) ), caption = htmltools::tags$caption( htmltools::em( "Pathways that were significant perturbed within each treatment group.") ) ) %>% formatStyle( 'Perturbation Score', 'Direction', color = styleEqual(c("Inhibited", "Activation"), c("blue", "red")) ) ## ----fig.height= 5, fig.width=8, fig.cap="*Gene-level perturbation scores of all genes in the Ovarian steroidogenesis gene-set. The activation of pathway was driven by a few dominating genes.*"---- plot_gene_contribution( genePertScore = genePertScore, gsToPlot = "Ovarian steroidogenesis", metadata = metadata_example, annotation_attribute = c("pathwayPertScore", "treatment"), pathwayPertScore = pathwayPertScore, breaks = seq(-0.0001, 0.0001, length.out = 100), show_rownames = FALSE ) ## ----fig.height= 5, fig.width=8, fig.cap="*Gene-level perturbation scores of all genes in the Ovarian steroidogenesis gene-set.Genes playing the most important roles in pathway activation was INS, CGA, GNAS.*"---- load(system.file("extdata", "entrez2name.rda", package = "sSNAPPY")) plot_gene_contribution( genePertScore = genePertScore, gsToPlot = "Ovarian steroidogenesis", metadata = metadata_example, annotation_attribute = c("pathwayPertScore", "treatment"), pathwayPertScore = pathwayPertScore, breaks = seq(-0.0001, 0.0001, length.out = 100), fontsize_row = 6, mapEntrezID = entrez2name ) ## ----fig.width=10, fig.height=8, fig.cap="*Pathways significantly perturbed by the E2+R5020 combintation treatment, where colors of nodes reflect pathways' directions of changes.*"---- treat_sig %>% dplyr::filter(FDR < 0.05, Treatment == "E2+R5020") %>% dplyr::rename(robustZ = Estimate) %>% plot_gs_network( gsTopology = gsTopology, colorBy = "robustZ" ) + theme( panel.grid = element_blank(), panel.background = element_blank() ) ## ---- fig.height=8, fig.width=12, fig.cap="*Pathways significantly perturbed by the E2+R5020 combintation treatment, annotated by the main biological processes each pathways belong to.*"---- treat_sig %>% dplyr::filter(FDR < 0.05, Treatment == "E2+R5020") %>% dplyr::rename(robustZ = Estimate) %>% plot_community( gsTopology = gsTopology, colorBy = "community", color_lg_title = "Community" ) + theme(panel.background = element_blank()) ## ---- fig.height=7, fig.width=12, fig.cap="*Pathways significantly perturbed by the E2+R5020 combintation treatment and all genes contained in those pathways.*"---- treat_sig %>% dplyr::filter(FDR < 0.05, Treatment == "E2+R5020") %>% dplyr::rename(robustZ = Estimate) %>% plot_gs2gene( gsTopology = gsTopology, colorGS_By = "robustZ", label_Gene = FALSE, GeneNode_size = 1, edgeAlpha = 0.1 ) + theme(panel.background = element_blank()) ## ----top200_FC---------------------------------------------------------------- meanFC <- weightedFC$logFC %>% .[, str_detect(colnames(.), "E2", negate = TRUE)] %>% apply(1, mean ) top200_gene <- meanFC %>% abs() %>% sort(decreasing = TRUE, ) %>% .[1:200] %>% names() top200_FC <- meanFC %>% .[names(.) %in% top200_gene] ## ---- fig.height=8, fig.width=10, fig.cap="*Pathways significantly perturbed by the E2+R5020 combintation treatment, and pathway genes with top 200 magnitudes of changes among all E2+R5020-treated samples. Both pathways and genes were colored by their directions of changes.*"---- treat_sig %>% dplyr::filter(FDR < 0.05, Treatment == "E2+R5020") %>% dplyr::rename(robustZ = Estimate) %>% plot_gs2gene( gsTopology = gsTopology, colorGS_By = "robustZ", mapEntrezID = entrez2name, geneFC = top200_FC, edgeAlpha = 0.3, GsName_size = 4 ) + theme(panel.background = element_blank()) ## ----geneRank----------------------------------------------------------------- geneRank <- rank_gene_pert(genePertScore, gsTopology) ## ----sessionInfo-------------------------------------------------------------- sessionInfo()