%\VignetteIndexEntry{4. Ten Things You Didn't Know (slides from BioC 2016)}
%\VignetteDepends{GenomicRanges, Biostrings, Rsamtools, BSgenome, hgu95av2probe}

\SweaveOpts{keep.source=TRUE, eps=FALSE, width=9, height=3}

\documentclass[9pt]{beamer}

\usepackage{slides}
\renewcommand\Rclass[1]{{\texttt{#1}\index{#1 (class)}}}

\title{10 things (maybe) you didn't know about GenomicRanges, Biostrings,
       and Rsamtools}

\author{Herv\'e Pag\`es\\
        \href{mailto:hpages.on.github@gmail.com}{hpages.on.github@gmail.com}}

\date{June 2016}

\begin{document}

<<setup, echo=FALSE, results=hide>>=
options(width=80)
library(GenomicRanges)
library(Biostrings)
library(Rsamtools)
library(BSgenome)
library(hgu95av2probe)

example(GRangesList)

gr <- GRanges(Rle(c("chr2", "chr2", "chr1", "chr3"), c(1, 3, 2, 4)),
              IRanges(1:10, width=10:1, names=head(letters, 10)),
              Rle(strand(c("-", "+", "*", "+", "-")), c(1, 2, 2, 3, 2)),
              score=1:10, GC=seq(1, 0, length=10))

ir <- IRanges(c(11:13, 2, 7:6), width=3)
mcols(ir) <- DataFrame(id=letters[1:6], score=3:-2)

x <- GRanges(c("chr1:1-1000", "chr2:2000-3000"),
             score=c(0.45, 0.1), a1=c(5L, 7L), a2=c(6, 8))
mcols(x)$score[2] <- NA
y <- GRanges(c("chr2:150-151", "chr1:1-10", "chr2:2000-3000"),
             score=c(0.7, 0.82, 0.1), b1=c(0L, 5L, 1L), b2=c(1, -2, 1))
@

\maketitle

%\frame{\tableofcontents}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}[fragile]
  \frametitle{1. {\it Inner} vs {\it outer} metadata columns}
  \begin{exampleblock}{}
{\small
<<inner_outer_mcols1>>=
mcols(grl)$id <- paste0("ID", seq_along(grl))
grl
@
}
  \end{exampleblock}
\end{frame}

\begin{frame}[fragile]
  \frametitle{1. {\it Inner} vs {\it outer} metadata columns}
  \begin{exampleblock}{}
{\small
<<inner_outer_mcols2>>=
mcols(grl)  # outer mcols
mcols(unlist(grl, use.names=FALSE))  # inner mcols
@
}
  \end{exampleblock}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}[fragile]
  \frametitle{2. invertStrand()}
  Works out-of-the-box on any object that has a strand() getter and setter
  ==> no need to implement specific methods.
  \begin{exampleblock}{}
{\small
<<>>=
gr
@
}
  \end{exampleblock}
\end{frame}

\begin{frame}[fragile]
  \frametitle{2. invertStrand()}
  \begin{exampleblock}{}
{\small
<<>>=
invertStrand(gr)
@
}
  \end{exampleblock}
\end{frame}

\begin{frame}[fragile]
  \frametitle{2. invertStrand()}
  \begin{exampleblock}{}
{\small
<<>>=
grl
@
}
  \end{exampleblock}
\end{frame}

\begin{frame}[fragile]
  \frametitle{2. invertStrand()}
  \begin{exampleblock}{}
{\small
<<>>=
invertStrand(grl)
@
}
  \end{exampleblock}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}[fragile]
  \frametitle{3. extractList()}
  Extract groups of elements from a vector-like object and return them in
  a list-like object.
  \begin{exampleblock}{}
<<>>=
cvg <- Rle(c(0L, 2L, 5L, 1L, 0L), c(10, 6, 3, 4, 15))
cvg
i <- IRanges(c(16, 19, 9), width=5, names=letters[1:3])
i
@
  \end{exampleblock}
\end{frame}

\begin{frame}[fragile]
  \frametitle{3. extractList()}
  \begin{exampleblock}{}
<<>>=
extractList(cvg, i)
@
  \end{exampleblock}
\end{frame}

\begin{frame}[fragile]
  \frametitle{3. extractList()}
  \begin{exampleblock}{}
  \Rcode{i} can be an IntegerList object:
{\small
<<>>=
i <- IntegerList(c(25:20), NULL, seq(from=2, to=length(cvg), by=2))
i
extractList(cvg, i)
@
}
  \end{exampleblock}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}[fragile]
  \frametitle{4. 'with.revmap' arg for reduce() and (now) disjoin()}
  \begin{exampleblock}{}
<<>>=
ir
ir2 <- reduce(ir, with.revmap=TRUE)
ir2
@
  \end{exampleblock}
\end{frame}

\begin{frame}[fragile]
  \frametitle{4. 'with.revmap' arg for reduce() and disjoin()}
  \begin{exampleblock}{}
{\small
<<>>=
revmap <- mcols(ir2)$revmap
extractList(mcols(ir)$id, revmap)
extractList(mcols(ir)$score, revmap)
mcols(ir2) <- DataFrame(id=extractList(mcols(ir)$id, revmap),
                        score=extractList(mcols(ir)$score, revmap))
ir2
@
}
  \end{exampleblock}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}[fragile]
  \frametitle{5. Zero-width ranges}
  \Rcode{findOverlaps}/\Rcode{countOverlaps} support zero-width ranges.
  \begin{exampleblock}{}
{\small
<<>>=
sliding_query <- IRanges(1:6, width=0)
sliding_query
countOverlaps(sliding_query, IRanges(3, 4))
@
}
  \end{exampleblock}
  But you have to specify \Rcode{minoverlap=0} for this to work (default is 1).
  \begin{exampleblock}{}
{\small
<<>>=
countOverlaps(sliding_query, IRanges(3, 4), minoverlap=0)
@
}
  \end{exampleblock}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}[fragile]
  \frametitle{6. Biostrings::replaceAt()}
  Perform multiple substitutions at arbitrary positions in a set of
  sequences.
  \begin{exampleblock}{}
<<>>=
library(Biostrings)
library(hgu95av2probe)
probes <- DNAStringSet(hgu95av2probe)
probes
@
  \end{exampleblock}
\end{frame}

\begin{frame}[fragile]
  \frametitle{6. Biostrings::replaceAt()}
  Replace 3rd and 4th nucleotides by pattern \Rcode{-++-}.
  \begin{exampleblock}{}
<<>>=
replaceAt(probes, at=IRanges(3, 4), value="-++-")
@
  \end{exampleblock}
\end{frame}

\begin{frame}[fragile]
  \frametitle{6. Biostrings::replaceAt()}
  If supplied pattern is empty, then performs deletions.
  \begin{exampleblock}{}
<<>>=
replaceAt(probes, at=IRanges(3, 4), value="")
@
  \end{exampleblock}
\end{frame}

\begin{frame}[fragile]
  \frametitle{6. Biostrings::replaceAt()}
  If \Rcode{at} is a zero-with range, then performs insertions.
  \begin{exampleblock}{}
<<>>=
replaceAt(probes, at=IRanges(4, 3), value="-++-")
@
  \end{exampleblock}
\end{frame}

\begin{frame}[fragile]
  \frametitle{6. Biostrings::replaceAt()}
  Use it in combination with \Rcode{vmatchPattern} to replace all the
  occurences of a given pattern with another pattern:
  \begin{exampleblock}{}
<<>>=
midx <- vmatchPattern("VCGTT", probes, fixed=FALSE)
replaceAt(probes, at=midx, value="-++-")
@
  \end{exampleblock}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}[fragile]
  \frametitle{7. GRanges as a subscript}
  \begin{exampleblock}{}
{\small
<<GRanges_as_a_subscript_1>>=
cvg <- RleList(chr1=101:120, chr2=2:-8, chr3=31:40)
gr
@
}
  \end{exampleblock}
\end{frame}

\begin{frame}[fragile]
  \frametitle{7. GRanges as a subscript}
  \begin{exampleblock}{}
{\scriptsize
<<GRanges_as_a_subscript_2>>=
cvg[gr]
@
}
  \end{exampleblock}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}[fragile]
  \frametitle{8. BSgenomeViews objects}
  \begin{exampleblock}{}
<<>>=
library(BSgenome.Mmusculus.UCSC.mm10)
genome <- BSgenome.Mmusculus.UCSC.mm10
library(TxDb.Mmusculus.UCSC.mm10.knownGene)
txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
ex <- exons(txdb, columns=c("exon_id", "tx_name", "gene_id"))
v <- Views(genome, ex)
@
  \end{exampleblock}
\end{frame}

\begin{frame}[fragile]
  \frametitle{8. BSgenomeViews objects}
  \begin{exampleblock}{}
{\scriptsize
<<>>=
v
@
}
  \end{exampleblock}
\end{frame}

\begin{frame}[fragile]
  \frametitle{8. BSgenomeViews objects}
  \begin{exampleblock}{}
<<>>=
af <- alphabetFrequency(v, baseOnly=TRUE)
head(af)
@
  \end{exampleblock}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}[fragile]
  \frametitle{9. Pile-up statistics on a BAM file with Rsamtools::pileup()}
  \begin{exampleblock}{}
<<>>=
library(Rsamtools)
library(RNAseqData.HNRNPC.bam.chr14)
fl <- RNAseqData.HNRNPC.bam.chr14_BAMFILES[1]
sbp <- ScanBamParam(which=GRanges("chr14", IRanges(1, 53674770)))
pp <- PileupParam(distinguish_nucleotides=FALSE,
                  distinguish_strands=FALSE,
                  min_mapq=13,
                  min_base_quality=10,
                  min_nucleotide_depth=4)
res <- pileup(fl, scanBamParam=sbp, pileupParam=pp)
@
  \end{exampleblock}
\end{frame}

\begin{frame}[fragile]
  \frametitle{9. Pile-up statistics on a BAM file with Rsamtools::pileup()}
  \begin{exampleblock}{}
<<>>=
dim(res)
head(res)
@
  \end{exampleblock}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}[fragile]
  \frametitle{10. Merging 2 GRanges objects (added this week)}
  \begin{exampleblock}{}
{\small
<<>>=
x
y
@
}
  \end{exampleblock}
\end{frame}

\begin{frame}[fragile]
  \frametitle{10. Merging 2 GRanges objects}
  \begin{exampleblock}{}
{\small
<<>>=
merge(x, y)
@
}
  \end{exampleblock}
\end{frame}

\begin{frame}[fragile]
  \frametitle{10. Merging 2 GRanges objects}
  \begin{exampleblock}{}
{\small
<<>>=
merge(x, y, all=TRUE)
@
}
  \end{exampleblock}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\end{document}