## ----style-knitr, eval=TRUE, echo=FALSE, results="asis"--------------------
BiocStyle::latex()

## ----setup, echo=FALSE------------------------------------
library(knitr)
opts_chunk$set(dev="pdf", fig.align="center", cache=FALSE, message=FALSE, out.width=".55\\textwidth", echo=TRUE, results="markup", fig.show="hold")
options(width=60)

## ----data-------------------------------------------------
require(EDASeq)
require(yeastRNASeq)
require(leeBamViews)

## ----import-raw-------------------------------------------
files <- list.files(file.path(system.file(package = "yeastRNASeq"), 
                              "reads"), pattern = "fastq", full.names = TRUE)
names(files) <- gsub("\\.fastq.*", "", basename(files))
met <- DataFrame(conditions=c(rep("mut",2), rep("wt",2)), 
                 row.names=names(files))
fastq <- FastqFileList(files)
elementMetadata(fastq) <- met
fastq

## ----import-aligned---------------------------------------
files <- list.files(file.path(system.file(package = "leeBamViews"), "bam"),
                    pattern = "bam$", full.names = TRUE)
names(files) <- gsub("\\.bam", "", basename(files))

gt <- gsub(".*/", "", files)
gt <- gsub("_.*", "", gt)
lane <- gsub(".*(.)$", "\\1", gt)
geno <- gsub(".$", "", gt)

pd <- DataFrame(geno=geno, lane=lane,
                row.names=paste(geno,lane,sep="."))

bfs <- BamFileList(files)
elementMetadata(bfs) <- pd
bfs

## ----plot-total, fig.cap="Per-lane number of mapped reads and quality scores", fig.subcap=c("Number of mapped reads", "Mean per-base quality of mapped reads"), out.width='.49\\linewidth'----
colors <- c(rep(rgb(1,0,0,alpha=0.7),2), 
            rep(rgb(0,0,1,alpha=0.7),2),
            rep(rgb(0,1,0,alpha=0.7),2),
            rep(rgb(0,1,1,alpha=0.7),2))
barplot(bfs,las=2,col=colors)

plotQuality(bfs,col=colors,lty=1)
legend("topright",unique(elementMetadata(bfs)[,1]), fill=unique(colors))

## ----plot-qual, fig.cap="Quality scores and number of mapped reads for lane 'isowt5\\_13e.'", fig.subcap=c("Per-base quality of mapped reads", "Number of mapped reads per-chromosome"), out.width='.49\\linewidth'----
plotQuality(bfs[[1]],cex.axis=.8)
barplot(bfs[[1]],las=2)

## ----plot-nt, fig.cap="Per-base nucleotide frequencies of mapped reads for lane 'isowt5\\_13e.'", out.width='.49\\linewidth'----
plotNtFrequency(bfs[[1]])

## ----load-gene-level--------------------------------------
data(geneLevelData)
head(geneLevelData)

## ----load-lgc---------------------------------------------
data(yeastGC)
head(yeastGC)
data(yeastLength)
head(yeastLength)

## ----filter-----------------------------------------------
filter <- apply(geneLevelData,1,function(x) mean(x)>10)
table(filter)
common <- intersect(names(yeastGC),
                    rownames(geneLevelData[filter,])) 
length(common)

## ----create-object----------------------------------------
feature <- data.frame(gc=yeastGC,length=yeastLength)
data <- newSeqExpressionSet(counts=as.matrix(geneLevelData[common,]),
                            featureData=feature[common,],
                            phenoData=data.frame(
                              conditions=c(rep("mut",2),rep("wt",2)),
                              row.names=colnames(geneLevelData)))
data

## ----show-data--------------------------------------------
head(counts(data))
pData(data)
head(fData(data))

## ----show-offset------------------------------------------
head(offst(data))

## ----boxplot-genelevel, fig.cap="Between-lane distribution of gene-level counts (log)."----
boxplot(data,col=colors[1:4])

## ----md-plot, fig.cap="Mean-difference plot of the gene-level counts (log) of lanes 'mut\\_1' and 'wt\\_1.'"----
MDPlot(data,c(1,3))

## ----plot-mean-var, fig.cap="Mean-variance relationship for the two mutant lanes and all four lanes: the black line corresponds to the Poisson distribution (variance equal to the mean), while the red curve is a lowess fit.", fig.subcap=c("Mutant lanes", "All four lanes"), out.width='.49\\linewidth'----
meanVarPlot(data[,1:2], log=TRUE, ylim=c(0,16))
meanVarPlot(data, log=TRUE, ylim=c(0,16))

## ----plot-gc, fig.cap="Lowess regression of the gene-level counts (log) on GC-content for each lane, color-coded by experimental condition."----
biasPlot(data, "gc", log=TRUE, ylim=c(1,5))

## ----boxplot-gc, fig.cap="Boxplots of the log-fold-change between 'mut\\_1' and 'wt\\_1' lanes stratified by GC-content."----
lfc <- log(counts(data)[,3]+0.1) - log(counts(data)[,1]+0.1)
biasBoxplot(lfc, fData(data)$gc)

## ----normalization----------------------------------------
dataWithin <- withinLaneNormalization(data,"gc", which="full")
dataNorm <- betweenLaneNormalization(dataWithin, which="full")

## ----plot-gc-norm, fig.cap="Full-quantile within- and between-lane normalization. (a) Lowess regression of normalized gene-level counts (log) on GC-content for each lane. (b) Between-lane distribution of normalized gene-level counts (log).", fig.subcap=c("GC-content", "Count distribution"), out.width='.49\\linewidth'----
biasPlot(dataNorm, "gc", log=TRUE, ylim=c(1,5))
boxplot(dataNorm, col=colors)

## ----norm-offset------------------------------------------
dataOffset <- withinLaneNormalization(data,"gc",
                                      which="full",offset=TRUE)
dataOffset <- betweenLaneNormalization(dataOffset,
                                       which="full",offset=TRUE)

## ----edger------------------------------------------------
library(edgeR)
design <- model.matrix(~conditions, data=pData(dataOffset))
disp <- estimateGLMCommonDisp(counts(dataOffset), 
                              design, offset=-offst(dataOffset))

fit <- glmFit(counts(dataOffset), design, disp, offset=-offst(dataOffset))

lrt <- glmLRT(fit, coef=2)
topTags(lrt)

## ----deseq------------------------------------------------
library(DESeq)
counts <- as(dataNorm, "CountDataSet")
sizeFactors(counts) <- rep(1,4)
counts <- estimateDispersions(counts)
res <- nbinomTest(counts, "wt", "mut")
head(res)

## ----unrounded--------------------------------------------
dataNorm <- betweenLaneNormalization(data, round=FALSE, offset=TRUE)

norm1 <- normCounts(dataNorm)
norm2 <- exp(log(counts(dataNorm) + 0.1 ) + offst(dataNorm)) - 0.1

head(norm1 - norm2)

## ----rounded----------------------------------------------
head(round(normCounts(dataNorm)) - round(counts(dataNorm) * exp(offst(dataNorm))))

## ----getLengthAndGC---------------------------------------
getGeneLengthAndGCContent(id=c("ENSG00000012048", "ENSG00000139618"), org="hsa")

## ----getLengthAndGC-full, eval=FALSE----------------------
#  fData(data) <- getGeneLengthAndGCContent(featureNames(data),
#                                                org="sacCer3", mode="org.db")

## ----sessionInfo, results="asis"--------------------------
toLatex(sessionInfo())