## ----include=FALSE------------------------------------------------------------
library(BiocStyle)
## ----echo = FALSE, fig.cap = "A: Overview of the diffUTR workflow. B: Bin creation scheme."----
knitr::include_graphics(system.file('docs', 'figure1.svg', package = 'diffUTR'))
## ----eval=FALSE---------------------------------------------------------------
# if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
# BiocManager::install("diffUTR")
## -----------------------------------------------------------------------------
suppressPackageStartupMessages({
library(diffUTR)
library(SummarizedExperiment)
})
## ----eval=FALSE---------------------------------------------------------------
# library(AnnotationHub)
# ah <- AnnotationHub()
# # obtain the identifier of the latest mouse ensembl annotation:
# ahid <- rev(query(ah, c("EnsDb", "Mus musculus"))$ah_id)[1]
# ensdb <- ah[[ahid]]
## -----------------------------------------------------------------------------
data(example_gene_annotation, package="diffUTR")
g <- example_gene_annotation
head(g)
## -----------------------------------------------------------------------------
# If you know that your data will be stranded, use
# bins <- prepareBins(g, stranded=TRUE)
# Otherwise use
bins <- prepareBins(g)
## ----eval=FALSE---------------------------------------------------------------
# bamfiles <- c("path/to/sample1.bam", "path/to/sample2.bam", ...)
# rse <- countFeatures(bamfiles, bins, strandSpecific=2, nthreads=6, isPairedEnd=FALSE)
## -----------------------------------------------------------------------------
data(example_bin_se, package="diffUTR")
rse <- example_bin_se
rse
## -----------------------------------------------------------------------------
head(rowRanges(rse))
## -----------------------------------------------------------------------------
rse <- diffSpliceWrapper(rse, design = ~condition)
## -----------------------------------------------------------------------------
head(rowData(rse))
## -----------------------------------------------------------------------------
perGene <- metadata(rse)$geneLevel
head(perGene)
## ----eval=FALSE---------------------------------------------------------------
# download.file("https://polyasite.unibas.ch/download/atlas/2.0/GRCm38.96/atlas.clusters.2.0.GRCm38.96.bed.gz",
# destfile="apa.mm38.bed.gz")
# bins <- prepareBins(g, "apa.mm38.bed.gz")
# # (Again, if you know that your data will be stranded, use `stranded=TRUE`)
## -----------------------------------------------------------------------------
data(rn6_PAS)
# bins <- prepareBins(g, rn6_PAS)
## ----eval=FALSE---------------------------------------------------------------
# bamfiles <- c("path/to/sample1.bam", "path/to/sample2.bam", ...)
# se <- countFeatures(bamfiles, bins, strandSpecific=2, nthreads=6, isPairedEnd=TRUE)
## -----------------------------------------------------------------------------
rse <- diffSpliceWrapper( rse, design = ~condition )
## -----------------------------------------------------------------------------
# we can then only look for changes in CDS bins:
cds <- geneLevelStats(rse, includeTypes="CDS", returnSE=FALSE)
head(cds)
# or only look for changes in UTR bins:
utr <- geneLevelStats(rse, includeTypes="UTR", returnSE=FALSE)
head(utr)
# or only look for changes in bins that _could be_ UTRs:
# geneLevelStats(rse, excludeTypes=c("CDS","non-coding"), returnSE=FALSE)
## -----------------------------------------------------------------------------
plotTopGenes(rse)
## -----------------------------------------------------------------------------
# `utr` being the output of the above
# geneLevelStats(rse, includeTypes="UTR", returnSE=FALSE)
plotTopGenes(utr, diffUTR = TRUE)
## -----------------------------------------------------------------------------
rse <- addNormalizedAssays(rse)
## -----------------------------------------------------------------------------
deuBinPlot(rse,"Jund")
deuBinPlot(rse,"Jund", type="condition", colour="condition")
deuBinPlot(rse,"Jund", type="sample", colour="condition") # shows separate samples
## -----------------------------------------------------------------------------
library(ggplot2)
deuBinPlot(rse,"Jund",type="condition",colour="condition") +
guides(colour = guide_legend(override.aes = list(size = 3))) +
scale_colour_manual(values=c(CTRL="darkblue", LTP="red"))
## -----------------------------------------------------------------------------
geneBinHeatmap(rse, "Smg6")
## -----------------------------------------------------------------------------
#' binTxPlot
#'
#' @param se A bin-wise SummarizedExperiment as produced by
#' \code{\link{countFeatures}} and including bin-level tests (i.e. having been
#' passed through one of the DEU wrappers such as
#' \code{\link{diffSpliceWrapper}} or \code{\link{DEXSeqWrapper}})
#' @param ensdb The `EnsDb` which was used to create the bins
#' @param gene The gene of interest
#' @param by The colData column of `se` used to split the samples
#' @param assayName The assay to plot
#' @param removeAmbiguous Logical; whether to remove bins that are
#' gene-ambiguous (i.e. overlap multiple genes).
#' @param size Size of the lines
#' @param ... Passed to `plotRangesLinkedToData`
#'
#' @return A `ggbio` `Tracks`
#' @importFrom AnnotationFilter GeneNameFilter GeneIdFilter
#' @importFrom ensembldb getGeneRegionTrackForGviz
#' @importFrom ggbio plotRangesLinkedToData autoplot
#' @export
binTxPlot <- function(se, ensdb, gene, by=NULL, assayName=c("logNormDensity"),
removeAmbiguous=TRUE, size=3, threshold=0.05, ...){
w <- diffUTR:::.matchGene(se, gene)
se <- sort(se[w,])
if(removeAmbiguous) se <- se[!rowData(se)$geneAmbiguous,]
if(length(w)==0) return(NULL)
if(!is.null(by)) by <- match.arg(by, colnames(colData(se)))
assayName <- match.arg(assayName, assayNames(se))
if(rowData(se)$gene[[1]]==gene){
filt <- GeneIdFilter(gene)
}else{
filt <- GeneNameFilter(gene)
}
gr <- ggbio::autoplot(ensdb, filt)
gr2 <- rowRanges(se)
if(!is.null(by)){
sp <- split(seq_len(ncol(se)), se[[by]])
for(f in names(sp))
mcols(gr2)[[f]] <- rowMeans(assays(se)[[assayName]][,sp[[f]],drop=FALSE])
y <- names(sp)
}else{
mcols(gr2)[[assayName]] <- rowMeans(assays(se)[[assayName]])
y <- assayName
}
ggbio::plotRangesLinkedToData(gr2, stat.y=y, stat.ylab=assayName,
annotation=list(gr),
size=size, ...) + ggtitle(gene)
}
# example usage (not run):
# binTxPlot(rse, ensdb, gene="Arid5a", by="condition")
## -----------------------------------------------------------------------------
sessionInfo()