## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>"
)

## -----------------------------------------------------------------------------
library(SpliceWiz)

## ---- eval = FALSE------------------------------------------------------------
#  
#  # Usual pipeline:
#  ref_path <- file.path(tempdir(), "Reference")
#  buildRef(
#      reference_path = ref_path,
#      fasta = chrZ_genome(),
#      gtf = chrZ_gtf()
#  )
#  
#  pb_path <- file.path(tempdir(), "pb_output")
#  processBAM(
#      bamfiles = bams$path,
#      sample_names = bams$sample,
#      reference_path = ref_path,
#      output_path = pb_path
#  )
#  
#  # Modified pipeline - collateData with novel ASE discovery:
#  
#  nxtse_path <- file.path(tempdir(), "NxtSE_output")
#  collateData(
#      Experiment = expr,
#      reference_path = ref_path,
#      output_path = nxtse_path,
#  
#          ## NEW ##
#      novelSplicing = TRUE,
#          # switches on novel splice detection
#  
#      novelSplicing_requireOneAnnotatedSJ = TRUE,
#          # novel junctions must share one annotated splice site
#  
#      novelSplicing_minSamples = 3,
#          # retain junctions observed in 3+ samples (of any non-zero expression)
#  
#      novelSplicing_minSamplesAboveThreshold = 1,
#          # only 1 sample required if its junction count exceeds a set threshold
#      novelSplicing_countThreshold = 10
#          # threshold for previous parameter
#  )

## -----------------------------------------------------------------------------
# Retrieve example NxtSE object
se <- SpliceWiz_example_NxtSE()

# Assign annotation of the experimental conditions
colData(se)$treatment <- rep(c("A", "B"), each = 3)

# Return a list of ggplot and plotly objects, also plotting junction counts
p <- plotCoverage(
    se = se,
    Event = "SE:SRSF3-203-exon4;SRSF3-202-int3",
    tracks = colnames(se)[1:4], 
    
        ## NEW ##
    plotJunctions = TRUE
)

## ---- fig.width = 8, fig.height = 6, eval = Sys.info()["sysname"] != "Darwin"----
if(interactive()) {
    # Display as plotly object
    p$final_plot
} else {
    # Display as ggplot
    as_ggplot_cov(p)
}

## -----------------------------------------------------------------------------
# Plot by condition "treatment", including provisional PSIs
p <- plotCoverage(
    se = se,
    Event = "SE:SRSF3-203-exon4;SRSF3-202-int3",
    tracks = c("A", "B"), condition = "treatment", 

        ## NEW ##
    plotJunctions = TRUE
)


## ---- fig.width = 8, fig.height = 6, eval = Sys.info()["sysname"] != "Darwin"----
if(interactive()) {
    # Display as plotly object
    p$final_plot
} else {
    # Display as ggplot
    as_ggplot_cov(p)
}

## ----eval=FALSE---------------------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  
#  BiocManager::install("SpliceWiz")

## ----eval=FALSE---------------------------------------------------------------
#  
#  install.packages("DoubleExpSeq")
#  
#  if (!requireNamespace("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  
#  BiocManager::install(c("DESeq2", "limma", "satuRn"))

## -----------------------------------------------------------------------------
library(SpliceWiz)

## -----------------------------------------------------------------------------
if(interactive()) {
    spliceWiz(demo = TRUE)
}

## ---- echo=FALSE, out.width='231pt', fig.align = 'center', fig.cap="Menu Side Bar"----
knitr::include_graphics("img/MenuBar.jpg")

## ---- results='hide', message = FALSE, warning = FALSE------------------------
ref_path <- file.path(tempdir(), "Reference")
buildRef(
    reference_path = ref_path,
    fasta = chrZ_genome(),
    gtf = chrZ_gtf()
)

## ---- results='hide', message = FALSE, warning = FALSE------------------------
df <- viewASE(ref_path)


## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="Reference GUI"----
knitr::include_graphics("img/buildRef.jpg")

## -----------------------------------------------------------------------------
# Provides the path to the example genome:
chrZ_genome()

# Provides the path to the example gene annotation:
chrZ_gtf()

## ----eval = FALSE-------------------------------------------------------------
#  ref_path_hg38 <- "./Reference"
#  buildRef(
#      reference_path = ref_path_hg38,
#      fasta = "genome.fa",
#      gtf = "transcripts.gtf",
#      genome_type = "hg38"
#  )

## -----------------------------------------------------------------------------
bams <- SpliceWiz_example_bams()

## -----------------------------------------------------------------------------
# as BAM file names denote their sample names
bams <- findBAMS(tempdir(), level = 0) 

# In the case where BAM files are labelled using sample names as parent 
# directory names (which oftens happens with the STAR aligner), use level = 1

## ---- results='hide', message = FALSE, warning = FALSE------------------------
pb_path <- file.path(tempdir(), "pb_output")
processBAM(
    bamfiles = bams$path,
    sample_names = bams$sample,
    reference_path = ref_path,
    output_path = pb_path
)

## ---- echo=FALSE, out.width='8000pt', fig.align = 'center', fig.cap="Experiment GUI"----
knitr::include_graphics("img/Expr_empty.jpg")

## ---- echo=FALSE, out.width='480pt', fig.align = 'center', fig.cap="Define Project Folders"----
knitr::include_graphics("img/Expr_drop_2.jpg")

## ---- echo=FALSE, out.width='640pt', fig.align = 'center', fig.cap="Running processBAM"----
knitr::include_graphics("img/Expr_pb.jpg")

## ---- results='hide', message = FALSE, warning = FALSE------------------------
pb_path <- file.path(tempdir(), "pb_output")
processBAM(
    bamfiles = bams$path,
    sample_names = bams$sample,
    reference_path = ref_path,
    output_path = pb_path
)

## ---- eval = FALSE------------------------------------------------------------
#  # NOT RUN
#  
#  # Re-run IRFinder without overwrite, and run featureCounts
#  require(Rsubread)
#  
#  processBAM(
#      bamfiles = bams$path,
#      sample_names = bams$sample,
#      reference_path = ref_path,
#      output_path = pb_path,
#      n_threads = 2,
#      overwrite = FALSE,
#      run_featureCounts = TRUE
#  )
#  
#  # Load gene counts
#  gene_counts <- readRDS(file.path(pb_path, "main.FC.Rds"))
#  
#  # Access gene counts:
#  gene_counts$counts

## -----------------------------------------------------------------------------
expr <- findSpliceWizOutput(pb_path)

## ---- results='hide', message = FALSE, warning = FALSE------------------------
nxtse_path <- file.path(tempdir(), "NxtSE_output")
collateData(
    Experiment = expr,
    reference_path = ref_path,
    output_path = nxtse_path
)

## ---- results='hide', message = FALSE, warning = FALSE------------------------
# Modified pipeline - collateData with novel ASE discovery:

nxtse_path <- file.path(tempdir(), "NxtSE_output_novel")
collateData(
    Experiment = expr,
    reference_path = ref_path,
    output_path = nxtse_path,
    novelSplicing = TRUE     ## NEW ##
)

## ----eval = FALSE-------------------------------------------------------------
#  nxtse_path <- file.path(tempdir(), "NxtSE_output_novel")
#  collateData(
#      Experiment = expr,
#      reference_path = ref_path,
#      output_path = nxtse_path,
#  
#          ## NEW ##
#      novelSplicing = TRUE,
#          # switches on novel splice detection
#  
#      novelSplicing_requireOneAnnotatedSJ = TRUE,
#          # novel junctions must share one annotated splice site
#  
#      novelSplicing_minSamples = 3,
#          # retain junctions observed in 3+ samples (of any non-zero expression)
#  
#      novelSplicing_minSamplesAboveThreshold = 1,
#          # only 1 sample required if its junction count exceeds a set threshold
#      novelSplicing_countThreshold = 10
#          # threshold for previous parameter
#  )

## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="collateData GUI"----
knitr::include_graphics("img/Expr_cd.jpg")

## ---- results='hide', message = FALSE, warning = FALSE------------------------
se <- makeSE(nxtse_path)

## ---- echo=FALSE, out.width='480pt', fig.align = 'center', fig.cap="Loading the NxtSE GUI"----
knitr::include_graphics("img/Expr_load_empty.jpg")

## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="Loading the NxtSE GUI"----
knitr::include_graphics("img/Expr_load_folder.jpg")

## -----------------------------------------------------------------------------
se <- realize_NxtSE(se)

## ----eval = FALSE-------------------------------------------------------------
#  se <- makeSE(nxtse_path, realize = TRUE)

## ---- eval = FALSE------------------------------------------------------------
#  subset_samples <- colnames(se)[1:4]
#  df <- data.frame(sample = subset_samples)
#  se_small <- makeSE(nxtse_path, colData = df, RemoveOverlapping = TRUE)

## -----------------------------------------------------------------------------
colData(se)$condition <- rep(c("A", "B"), each = 3)
colData(se)$batch <- rep(c("K", "L", "M"), 2)

## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="NxtSE Annotations GUI"----
knitr::include_graphics("img/Expr_anno_empty.jpg")

## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="NxtSE Annotations GUI"----
knitr::include_graphics("img/Expr_anno_filled.jpg")

## -----------------------------------------------------------------------------
se

## -----------------------------------------------------------------------------
head(rowData(se))

## -----------------------------------------------------------------------------
# Subset by columns: select the first 2 samples
se_sample_subset <- se[,1:2]

# Subset by rows: select the first 10 ASE events
se_ASE_subset <- se[1:10,]

## -----------------------------------------------------------------------------
se.filtered <- se[applyFilters(se),]

## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="Filters - GUI"----
knitr::include_graphics("img/Filters_empty.jpg")

## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="SpliceWiz default filters- GUI"----
knitr::include_graphics("img/Filters_applied.jpg")

## ---- results='hide', message = FALSE, warning = FALSE------------------------
# Requires limma to be installed:
require("limma")
res_limma <- ASE_limma(
    se = se.filtered,
    test_factor = "condition",
    test_nom = "B",
    test_denom = "A",
)

## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="Differential Expression GUI"----
knitr::include_graphics("img/DE_empty.jpg")

## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="Differential Expression (limma) GUI"----
knitr::include_graphics("img/DE_limma.jpg")

## ---- results='hide', message = FALSE, warning = FALSE------------------------
# Requires limma to be installed:
require("limma")
res_limma <- ASE_limma(
    se = se.filtered,
    test_factor = "condition",
    test_nom = "B",
    test_denom = "A"
)

# Requires DESeq2 to be installed:
require("DESeq2")
res_deseq <- ASE_DESeq(
    se = se.filtered,
    test_factor = "condition",
    test_nom = "B",
    test_denom = "A",
    n_threads = 1
)

# Requires DoubleExpSeq to be installed:
require("DoubleExpSeq")
res_DES <- ASE_DoubleExpSeq(
    se = se.filtered,
    test_factor = "condition",
    test_nom = "B",
    test_denom = "A"
)

# Requires satuRn to be installed:
require("satuRn")
res_saturn <- ASE_satuRn(
    se = se.filtered,
    test_factor = "condition",
    test_nom = "B",
    test_denom = "A",
    n_threads = 1
)

## ---- results='hide', message = FALSE, warning = FALSE------------------------
res_limma_allIntrons <- ASE_limma(
    se = se.filtered,
    test_factor = "condition",
    test_nom = "B",
    test_denom = "A",
    IRmode = "all"
)

res_limma_annotatedIR <- ASE_limma(
    se = se.filtered,
    test_factor = "condition",
    test_nom = "B",
    test_denom = "A",
    IRmode = "annotated"
)

res_limma_annotated_binaryIR <- ASE_limma(
    se = se.filtered,
    test_factor = "condition",
    test_nom = "B",
    test_denom = "A",
    IRmode = "annotated_binary"
)

## ---- results='hide', message = FALSE, warning = FALSE------------------------
require("limma")
res_limma_batchnorm <- ASE_limma(
    se = se.filtered,
    test_factor = "condition",
    test_nom = "B",
    test_denom = "A",
    batch1 = "batch"
)

## ---- results='hide', message = FALSE, warning = FALSE------------------------
colData(se.filtered)$timevar <- rep(c(0,1,2), 2)

require("DESeq2")
res_deseq_cont <- ASE_DESeq(
    se = se.filtered,
    test_factor = "timevar"
)

## ---- fig.width = 7, fig.height = 5, eval = Sys.info()["sysname"] != "Darwin"----
library(ggplot2)

ggplot(res_limma,
        aes(x = logFC, y = -log10(adj.P.Val))) + 
    geom_point() +
    labs(title = "Differential analysis - B vs A",
         x = "Log2-fold change", y = "BH-adjusted P values (-log10)")

## ---- fig.width = 7, fig.height = 5, eval = Sys.info()["sysname"] != "Darwin"----
ggplot(res_limma,
        aes(x = logFC, y = -log10(adj.P.Val))) + 
    geom_point() + facet_wrap(vars(EventType)) +
    labs(title = "Differential analysis - B vs A",
         x = "Log2-fold change", y = "BH-adjusted P values (-log10)")

## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="Volcano Plot - GUI"----
knitr::include_graphics("img/Vis_volcano.jpg")

## ---- fig.width = 7, fig.height = 5, eval = Sys.info()["sysname"] != "Darwin"----
library(ggplot2)

ggplot(res_limma, aes(x = 100 * AvgPSI_B, y = 100 * AvgPSI_A)) + 
    geom_point() + xlim(0, 100) + ylim(0, 100) +
    labs(title = "PSI values across conditions",
         x = "PSI of condition B", y = "PSI of condition A")

## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="Scatter plot - GUI"----
knitr::include_graphics("img/Vis_scatter.jpg")

## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="Volcano plot - selecting ASEs of interest via the GUI"----
knitr::include_graphics("img/Vis_volcano_select.jpg")

## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="Scatter plot - highlighted ASE events"----
knitr::include_graphics("img/Vis_scatter_highlighted.jpg")

## -----------------------------------------------------------------------------
meanPSIs <- makeMeanPSI(
    se = se,
    condition = "batch",
    conditionList = list("K", "L", "M")
)

## -----------------------------------------------------------------------------
# Create a matrix of values of the top 10 differentially expressed events:
mat <- makeMatrix(
    se.filtered,
    event_list = res_limma$EventName[1:10],
    method = "PSI"
)

## ---- fig.width = 8, fig.height = 6, eval = Sys.info()["sysname"] != "Darwin"----
library(pheatmap)

anno_col_df <- as.data.frame(colData(se.filtered))
anno_col_df <- anno_col_df[, 1, drop=FALSE]

pheatmap(mat, annotation_col = anno_col_df)

## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="Heatmap - GUI"----
knitr::include_graphics("img/Vis_heatmap.jpg")

## -----------------------------------------------------------------------------
res_limma.filtered <- subset(res_limma, abs(deltaPSI) > 0.05)

## ---- fig.width = 8, fig.height = 6, eval = Sys.info()["sysname"] != "Darwin"----
p <- plotCoverage(
    se = se,
    Event = res_limma.filtered$EventName[1],
    tracks = colnames(se)[c(1,2,4,5)],
)

if(interactive()) {
    # Display as plotly object
    p$final_plot
} else {
    # Display as ggplot
    as_ggplot_cov(p)
}


## ---- eval = Sys.info()["sysname"] != "Darwin"--------------------------------
p <- plotCoverage(
    se = se,
    Event = res_limma.filtered$EventName[1],
    tracks = colnames(se)[c(1,2,4,5)],
    plotJunctions = TRUE
)

if(interactive()) {
    # Display as plotly object
    p$final_plot
} else {
    # Display as ggplot
    as_ggplot_cov(p)
}

## ---- eval = Sys.info()["sysname"] != "Darwin"--------------------------------
p <- plotCoverage(
    se = se,
    Event = res_limma.filtered$EventName[1],
    tracks = colnames(se)[c(1,2,4,5)],
    selected_transcripts = c("NSUN5-201", "NSUN5-203", "NSUN5-206")
)

if(interactive()) {
    # Display as plotly object
    p$final_plot
} else {
    # Display as ggplot
    as_ggplot_cov(p)
}

## ---- fig.width = 8, fig.height = 6, eval = Sys.info()["sysname"] != "Darwin"----
p <- plotCoverage(
    se = se,
    Event = res_limma.filtered$EventName[1],
    tracks = c("A", "B"),
    condition = "condition",
    plotJunctions = TRUE,
)

if(interactive()) {
    # Display as plotly object
    p$final_plot
} else {
    # Display as ggplot
    as_ggplot_cov(p)
}

## ---- fig.width = 8, fig.height = 6, eval = Sys.info()["sysname"] != "Darwin"----
p <- plotCoverage(
    se = se,
    Event = res_limma.filtered$EventName[1],
    tracks = c("A", "B"),
    condition = "condition",
    stack_tracks = TRUE,
)

if(interactive()) {
    # Display as plotly object
    p$final_plot
} else {
    # Display as ggplot
    as_ggplot_cov(p)
}

## ---- echo=FALSE, out.width='800pt', fig.align = 'center', fig.cap="Coverage Plots - GUI"----
knitr::include_graphics("img/Cov.jpg")

## -----------------------------------------------------------------------------
sessionInfo()