## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----------------------------------------------------------------------------- library(SpliceWiz) ## ---- eval = FALSE------------------------------------------------------------ # # # Usual pipeline: # ref_path <- file.path(tempdir(), "Reference") # buildRef( # reference_path = ref_path, # fasta = chrZ_genome(), # gtf = chrZ_gtf() # ) # # pb_path <- file.path(tempdir(), "pb_output") # processBAM( # bamfiles = bams$path, # sample_names = bams$sample, # reference_path = ref_path, # output_path = pb_path # ) # # # Modified pipeline - collateData with novel ASE discovery: # # nxtse_path <- file.path(tempdir(), "NxtSE_output") # collateData( # Experiment = expr, # reference_path = ref_path, # output_path = nxtse_path, # # ## NEW ## # novelSplicing = TRUE, # # switches on novel splice detection # # novelSplicing_requireOneAnnotatedSJ = TRUE, # # novel junctions must share one annotated splice site # # novelSplicing_minSamples = 3, # # retain junctions observed in 3+ samples (of any non-zero expression) # # novelSplicing_minSamplesAboveThreshold = 1, # # only 1 sample required if its junction count exceeds a set threshold # novelSplicing_countThreshold = 10 # # threshold for previous parameter # ) ## ----------------------------------------------------------------------------- # Retrieve example NxtSE object se <- SpliceWiz_example_NxtSE() # Assign annotation of the experimental conditions colData(se)$treatment <- rep(c("A", "B"), each = 3) # Return a list of ggplot and plotly objects, also plotting junction counts p <- plotCoverage( se = se, Event = "SE:SRSF3-203-exon4;SRSF3-202-int3", tracks = colnames(se)[1:4], ## NEW ## plotJunctions = TRUE ) ## ---- fig.width = 8, fig.height = 6, eval = Sys.info()["sysname"] != "Darwin"---- if(interactive()) { # Display as plotly object p$final_plot } else { # Display as ggplot as_ggplot_cov(p) } ## ----------------------------------------------------------------------------- # Plot by condition "treatment", including provisional PSIs p <- plotCoverage( se = se, Event = "SE:SRSF3-203-exon4;SRSF3-202-int3", tracks = c("A", "B"), condition = "treatment", ## NEW ## plotJunctions = TRUE ) ## ---- fig.width = 8, fig.height = 6, eval = Sys.info()["sysname"] != "Darwin"---- if(interactive()) { # Display as plotly object p$final_plot } else { # Display as ggplot as_ggplot_cov(p) } ## ----eval=FALSE--------------------------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("SpliceWiz") ## ----eval=FALSE--------------------------------------------------------------- # # install.packages("DoubleExpSeq") # # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install(c("DESeq2", "limma", "satuRn")) ## ----------------------------------------------------------------------------- library(SpliceWiz) ## ----------------------------------------------------------------------------- if(interactive()) { spliceWiz(demo = TRUE) } ## ---- echo=FALSE, out.width='231pt', fig.align = 'center', fig.cap="Menu Side Bar"---- knitr::include_graphics("img/MenuBar.jpg") ## ---- results='hide', message = FALSE, warning = FALSE------------------------ ref_path <- file.path(tempdir(), "Reference") buildRef( reference_path = ref_path, fasta = chrZ_genome(), gtf = chrZ_gtf() ) ## ---- results='hide', message = FALSE, warning = FALSE------------------------ df <- viewASE(ref_path) ## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="Reference GUI"---- knitr::include_graphics("img/buildRef.jpg") ## ----------------------------------------------------------------------------- # Provides the path to the example genome: chrZ_genome() # Provides the path to the example gene annotation: chrZ_gtf() ## ----eval = FALSE------------------------------------------------------------- # ref_path_hg38 <- "./Reference" # buildRef( # reference_path = ref_path_hg38, # fasta = "genome.fa", # gtf = "transcripts.gtf", # genome_type = "hg38" # ) ## ----------------------------------------------------------------------------- bams <- SpliceWiz_example_bams() ## ----------------------------------------------------------------------------- # as BAM file names denote their sample names bams <- findBAMS(tempdir(), level = 0) # In the case where BAM files are labelled using sample names as parent # directory names (which oftens happens with the STAR aligner), use level = 1 ## ---- results='hide', message = FALSE, warning = FALSE------------------------ pb_path <- file.path(tempdir(), "pb_output") processBAM( bamfiles = bams$path, sample_names = bams$sample, reference_path = ref_path, output_path = pb_path ) ## ---- echo=FALSE, out.width='8000pt', fig.align = 'center', fig.cap="Experiment GUI"---- knitr::include_graphics("img/Expr_empty.jpg") ## ---- echo=FALSE, out.width='480pt', fig.align = 'center', fig.cap="Define Project Folders"---- knitr::include_graphics("img/Expr_drop_2.jpg") ## ---- echo=FALSE, out.width='640pt', fig.align = 'center', fig.cap="Running processBAM"---- knitr::include_graphics("img/Expr_pb.jpg") ## ---- results='hide', message = FALSE, warning = FALSE------------------------ pb_path <- file.path(tempdir(), "pb_output") processBAM( bamfiles = bams$path, sample_names = bams$sample, reference_path = ref_path, output_path = pb_path ) ## ---- eval = FALSE------------------------------------------------------------ # # NOT RUN # # # Re-run IRFinder without overwrite, and run featureCounts # require(Rsubread) # # processBAM( # bamfiles = bams$path, # sample_names = bams$sample, # reference_path = ref_path, # output_path = pb_path, # n_threads = 2, # overwrite = FALSE, # run_featureCounts = TRUE # ) # # # Load gene counts # gene_counts <- readRDS(file.path(pb_path, "main.FC.Rds")) # # # Access gene counts: # gene_counts$counts ## ----------------------------------------------------------------------------- expr <- findSpliceWizOutput(pb_path) ## ---- results='hide', message = FALSE, warning = FALSE------------------------ nxtse_path <- file.path(tempdir(), "NxtSE_output") collateData( Experiment = expr, reference_path = ref_path, output_path = nxtse_path ) ## ---- results='hide', message = FALSE, warning = FALSE------------------------ # Modified pipeline - collateData with novel ASE discovery: nxtse_path <- file.path(tempdir(), "NxtSE_output_novel") collateData( Experiment = expr, reference_path = ref_path, output_path = nxtse_path, novelSplicing = TRUE ## NEW ## ) ## ----eval = FALSE------------------------------------------------------------- # nxtse_path <- file.path(tempdir(), "NxtSE_output_novel") # collateData( # Experiment = expr, # reference_path = ref_path, # output_path = nxtse_path, # # ## NEW ## # novelSplicing = TRUE, # # switches on novel splice detection # # novelSplicing_requireOneAnnotatedSJ = TRUE, # # novel junctions must share one annotated splice site # # novelSplicing_minSamples = 3, # # retain junctions observed in 3+ samples (of any non-zero expression) # # novelSplicing_minSamplesAboveThreshold = 1, # # only 1 sample required if its junction count exceeds a set threshold # novelSplicing_countThreshold = 10 # # threshold for previous parameter # ) ## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="collateData GUI"---- knitr::include_graphics("img/Expr_cd.jpg") ## ---- results='hide', message = FALSE, warning = FALSE------------------------ se <- makeSE(nxtse_path) ## ---- echo=FALSE, out.width='480pt', fig.align = 'center', fig.cap="Loading the NxtSE GUI"---- knitr::include_graphics("img/Expr_load_empty.jpg") ## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="Loading the NxtSE GUI"---- knitr::include_graphics("img/Expr_load_folder.jpg") ## ----------------------------------------------------------------------------- se <- realize_NxtSE(se) ## ----eval = FALSE------------------------------------------------------------- # se <- makeSE(nxtse_path, realize = TRUE) ## ---- eval = FALSE------------------------------------------------------------ # subset_samples <- colnames(se)[1:4] # df <- data.frame(sample = subset_samples) # se_small <- makeSE(nxtse_path, colData = df, RemoveOverlapping = TRUE) ## ----------------------------------------------------------------------------- colData(se)$condition <- rep(c("A", "B"), each = 3) colData(se)$batch <- rep(c("K", "L", "M"), 2) ## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="NxtSE Annotations GUI"---- knitr::include_graphics("img/Expr_anno_empty.jpg") ## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="NxtSE Annotations GUI"---- knitr::include_graphics("img/Expr_anno_filled.jpg") ## ----------------------------------------------------------------------------- se ## ----------------------------------------------------------------------------- head(rowData(se)) ## ----------------------------------------------------------------------------- # Subset by columns: select the first 2 samples se_sample_subset <- se[,1:2] # Subset by rows: select the first 10 ASE events se_ASE_subset <- se[1:10,] ## ----------------------------------------------------------------------------- se.filtered <- se[applyFilters(se),] ## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="Filters - GUI"---- knitr::include_graphics("img/Filters_empty.jpg") ## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="SpliceWiz default filters- GUI"---- knitr::include_graphics("img/Filters_applied.jpg") ## ---- results='hide', message = FALSE, warning = FALSE------------------------ # Requires limma to be installed: require("limma") res_limma <- ASE_limma( se = se.filtered, test_factor = "condition", test_nom = "B", test_denom = "A", ) ## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="Differential Expression GUI"---- knitr::include_graphics("img/DE_empty.jpg") ## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="Differential Expression (limma) GUI"---- knitr::include_graphics("img/DE_limma.jpg") ## ---- results='hide', message = FALSE, warning = FALSE------------------------ # Requires limma to be installed: require("limma") res_limma <- ASE_limma( se = se.filtered, test_factor = "condition", test_nom = "B", test_denom = "A" ) # Requires DESeq2 to be installed: require("DESeq2") res_deseq <- ASE_DESeq( se = se.filtered, test_factor = "condition", test_nom = "B", test_denom = "A", n_threads = 1 ) # Requires DoubleExpSeq to be installed: require("DoubleExpSeq") res_DES <- ASE_DoubleExpSeq( se = se.filtered, test_factor = "condition", test_nom = "B", test_denom = "A" ) # Requires satuRn to be installed: require("satuRn") res_saturn <- ASE_satuRn( se = se.filtered, test_factor = "condition", test_nom = "B", test_denom = "A", n_threads = 1 ) ## ---- results='hide', message = FALSE, warning = FALSE------------------------ res_limma_allIntrons <- ASE_limma( se = se.filtered, test_factor = "condition", test_nom = "B", test_denom = "A", IRmode = "all" ) res_limma_annotatedIR <- ASE_limma( se = se.filtered, test_factor = "condition", test_nom = "B", test_denom = "A", IRmode = "annotated" ) res_limma_annotated_binaryIR <- ASE_limma( se = se.filtered, test_factor = "condition", test_nom = "B", test_denom = "A", IRmode = "annotated_binary" ) ## ---- results='hide', message = FALSE, warning = FALSE------------------------ require("limma") res_limma_batchnorm <- ASE_limma( se = se.filtered, test_factor = "condition", test_nom = "B", test_denom = "A", batch1 = "batch" ) ## ---- results='hide', message = FALSE, warning = FALSE------------------------ colData(se.filtered)$timevar <- rep(c(0,1,2), 2) require("DESeq2") res_deseq_cont <- ASE_DESeq( se = se.filtered, test_factor = "timevar" ) ## ---- fig.width = 7, fig.height = 5, eval = Sys.info()["sysname"] != "Darwin"---- library(ggplot2) ggplot(res_limma, aes(x = logFC, y = -log10(adj.P.Val))) + geom_point() + labs(title = "Differential analysis - B vs A", x = "Log2-fold change", y = "BH-adjusted P values (-log10)") ## ---- fig.width = 7, fig.height = 5, eval = Sys.info()["sysname"] != "Darwin"---- ggplot(res_limma, aes(x = logFC, y = -log10(adj.P.Val))) + geom_point() + facet_wrap(vars(EventType)) + labs(title = "Differential analysis - B vs A", x = "Log2-fold change", y = "BH-adjusted P values (-log10)") ## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="Volcano Plot - GUI"---- knitr::include_graphics("img/Vis_volcano.jpg") ## ---- fig.width = 7, fig.height = 5, eval = Sys.info()["sysname"] != "Darwin"---- library(ggplot2) ggplot(res_limma, aes(x = 100 * AvgPSI_B, y = 100 * AvgPSI_A)) + geom_point() + xlim(0, 100) + ylim(0, 100) + labs(title = "PSI values across conditions", x = "PSI of condition B", y = "PSI of condition A") ## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="Scatter plot - GUI"---- knitr::include_graphics("img/Vis_scatter.jpg") ## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="Volcano plot - selecting ASEs of interest via the GUI"---- knitr::include_graphics("img/Vis_volcano_select.jpg") ## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="Scatter plot - highlighted ASE events"---- knitr::include_graphics("img/Vis_scatter_highlighted.jpg") ## ----------------------------------------------------------------------------- meanPSIs <- makeMeanPSI( se = se, condition = "batch", conditionList = list("K", "L", "M") ) ## ----------------------------------------------------------------------------- # Create a matrix of values of the top 10 differentially expressed events: mat <- makeMatrix( se.filtered, event_list = res_limma$EventName[1:10], method = "PSI" ) ## ---- fig.width = 8, fig.height = 6, eval = Sys.info()["sysname"] != "Darwin"---- library(pheatmap) anno_col_df <- as.data.frame(colData(se.filtered)) anno_col_df <- anno_col_df[, 1, drop=FALSE] pheatmap(mat, annotation_col = anno_col_df) ## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="Heatmap - GUI"---- knitr::include_graphics("img/Vis_heatmap.jpg") ## ----------------------------------------------------------------------------- res_limma.filtered <- subset(res_limma, abs(deltaPSI) > 0.05) ## ---- fig.width = 8, fig.height = 6, eval = Sys.info()["sysname"] != "Darwin"---- p <- plotCoverage( se = se, Event = res_limma.filtered$EventName[1], tracks = colnames(se)[c(1,2,4,5)], ) if(interactive()) { # Display as plotly object p$final_plot } else { # Display as ggplot as_ggplot_cov(p) } ## ---- eval = Sys.info()["sysname"] != "Darwin"-------------------------------- p <- plotCoverage( se = se, Event = res_limma.filtered$EventName[1], tracks = colnames(se)[c(1,2,4,5)], plotJunctions = TRUE ) if(interactive()) { # Display as plotly object p$final_plot } else { # Display as ggplot as_ggplot_cov(p) } ## ---- eval = Sys.info()["sysname"] != "Darwin"-------------------------------- p <- plotCoverage( se = se, Event = res_limma.filtered$EventName[1], tracks = colnames(se)[c(1,2,4,5)], selected_transcripts = c("NSUN5-201", "NSUN5-203", "NSUN5-206") ) if(interactive()) { # Display as plotly object p$final_plot } else { # Display as ggplot as_ggplot_cov(p) } ## ---- fig.width = 8, fig.height = 6, eval = Sys.info()["sysname"] != "Darwin"---- p <- plotCoverage( se = se, Event = res_limma.filtered$EventName[1], tracks = c("A", "B"), condition = "condition", plotJunctions = TRUE, ) if(interactive()) { # Display as plotly object p$final_plot } else { # Display as ggplot as_ggplot_cov(p) } ## ---- fig.width = 8, fig.height = 6, eval = Sys.info()["sysname"] != "Darwin"---- p <- plotCoverage( se = se, Event = res_limma.filtered$EventName[1], tracks = c("A", "B"), condition = "condition", stack_tracks = TRUE, ) if(interactive()) { # Display as plotly object p$final_plot } else { # Display as ggplot as_ggplot_cov(p) } ## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="Coverage Plots - GUI"---- knitr::include_graphics("img/Cov.jpg") ## ----------------------------------------------------------------------------- sessionInfo()