## ----style, eval=TRUE, echo=FALSE, results="asis"------------------------
BiocStyle::latex()


## ----knitr, include=FALSE, cache=FALSE-----------------------------------
library(knitr)
opts_chunk$set(fig.align = 'center',
               fig.show = 'hold',
               stop_on_error = 1L,
               par = TRUE,
               prompt = TRUE,
               comment = NA)
options(replace.assign = TRUE,
        width = 70)


## ----setup, echo=FALSE---------------------------------------------------
suppressPackageStartupMessages(library(qvalue))
library(xtable)
suppressPackageStartupMessages(library(synapter))
library(synapterdata)
## preparing some data
data(ups25b) ## from synapterdata
res <- ups25b ## for synergise and plotRt


## ----install, echo=TRUE, eval=FALSE--------------------------------------
## source("http://bioconductor.org/biocLite.R")
## ## or, if you have already used the above before
## library("BiocInstaller")
## ## and to install the package
## biocLite("synapter")


## ----library, echo=TRUE, eval=FALSE--------------------------------------
## library(synapter)


## ----plotRt, dev='pdf', fig.width=5.5, fig.height=5----------------------
plotRt(ups25b, what = "model", nsd = 1)


## ----prepare-synergise, echo=TRUE, eval=TRUE, tidy = FALSE---------------
library(synapterdata)
hdmsefile <- getHDMSeFinalPeptide()[2]
basename(hdmsefile)
msefile <- getMSeFinalPeptide()[2]
basename(msefile)
msepep3dfile <- getMSePep3D()[2]
basename(msepep3dfile)
fas <- getFasta()
basename(fas)
## the synergise input is a (named) list of filenames
input <- list(identpeptide = hdmsefile,
              quantpeptide = msefile,
              quantpep3d = msepep3dfile,
              fasta = fas)
## a report and result files will be stored
## in the 'output' directory
output <- tempdir()
output


## ----synergise, echo=TRUE, eval=FALSE------------------------------------
## res <- synergise(filenames = input, outputdir = output)


## ----performance, echo=TRUE----------------------------------------------
performance(res)


## ----inputfiles, echo=FALSE, eval=TRUE-----------------------------------
inputfiles <- getHDMSeFinalPeptide()


## ----masterFdr, echo=TRUE, eval=TRUE, cache=TRUE, tidy=FALSE-------------
## using the full set of 6 HDMSe files and a
## fasta database from the synapterdata package
inputfiles <- getHDMSeFinalPeptide()
fasta <- getFasta()
cmb <- estimateMasterFdr(inputfiles, fasta, masterFdr = 0.02,
                         verbose = FALSE)
cmb


## ----estimateFdrPlot, dev='pdf', fig.width=5, fig.height=5---------------
plot(cmb)


## ----bestComb, echo=TRUE, eval=TRUE--------------------------------------
bestComb(cmb)


## ----master, echo=TRUE, eval=TRUE, cache=TRUE----------------------------
master <- makeMaster(inputfiles[bestComb(cmb)], verbose = FALSE)
master


## ----loadups, eval=TRUE, echo=TRUE---------------------------------------
data(ups25a, ups25b, ups25c, ups50a, ups50b, ups50c)


## ----setas,echo=TRUE,eval=TRUE-------------------------------------------
ms25a <- as(ups25a, "MSnSet")
class(ups25a)[1]
class(ms25a)[1]
ms25a


## ----updatesamplename----------------------------------------------------
sampleNames(ms25a)
sampleNames(ms25a) <- "ups25a"
sampleNames(ms25a)


## ----accessor------------------------------------------------------------
tail(exprs(ms25a))
tail(fData(ms25a)[, c(2,9)])
## all fetaure metadata
fvarLabels(ms25a)


## ----dotop3, echo=TRUE, cache=TRUE---------------------------------------
ms25a <- topN(ms25a, groupBy = fData(ms25a)$protein.Accession, n = 3)
nPeps <- nQuants(ms25a, fcol = "protein.Accession")
ms25a <- combineFeatures(ms25a, fData(ms25a)$protein.Accession,
                         "sum", na.rm = TRUE, verbose = FALSE)
head(exprs(ms25a))
head(nPeps)
## scale intensities
exprs(ms25a) <- exprs(ms25a) * (3/nPeps)
## NaN result from the division by 0, when
## no peptide was found for that protein
head(exprs(ms25a))


## ----batch, echo=TRUE, tidy = FALSE--------------------------------------
nms <- c(paste0("ups", 25, c("b", "c")),
         paste0("ups", 50, c("a", "b", "c")))
tmp <- sapply(nms, function(.ups) {
  cat("Processing", .ups, "... ")
  ## get the object from workspace and convert to MSnSet
  x <- get(.ups, envir = .GlobalEnv)
  x <- as(x, "MSnSet")
  sampleNames(x) <- .ups
  ## extract top 3 peptides
  x <- topN(x, groupBy = fData(x)$protein.Accession, n = 3)
  ## calculate the number of peptides that are available
  nPeps <- nQuants(x, fcol = "protein.Accession")
  ## sum top3 peptides into protein quantitation
  x <- combineFeatures(x, fData(x)$protein.Accession,
                       "sum", na.rm = TRUE, verbose = FALSE)
  ## adjust protein intensity based on actual number of top peptides
  exprs(x) <- exprs(x) * (3/nPeps)
  ## adjust feature variable names for combine
  x <- updateFvarLabels(x, .ups)
  ## save the new MSnExp instance in the workspace
  varnm <- sub("ups", "ms", .ups)
  assign(varnm, x, envir = .GlobalEnv)
  cat("done\n")
})


## ----combine25, echo=TRUE------------------------------------------------
ms25 <- combine(ms25a, ms25b)
ms25 <- combine(ms25, ms25c)
dim(ms25)
ms25 <- filterNA(ms25, pNA = 1/3)
dim(ms25)


## ----combine50, echo=TRUE------------------------------------------------
ms50 <- combine(ms50a, ms50b)
ms50 <- combine(ms50, ms50c)
dim(ms50)
ms50 <- filterNA(ms50, pNA = 1/3)
dim(ms50)


## ----msUps---------------------------------------------------------------
msUps <- combine(ms25, ms50)
msUps <- filterNA(msUps, pNA = 2/6)
head(exprs(msUps))
table(apply(exprs(msUps), 1, function(.x) sum(!is.na(.x))))


## ----scale---------------------------------------------------------------
ecoli <- -grep("ups$", featureNames(msUps))
meds <- apply(exprs(msUps)[ecoli, ], 2, median, na.rm=TRUE)
exprs(msUps) <- t(apply(exprs(msUps), 1, "/", meds))


## ----stats, tidy = FALSE-------------------------------------------------
## use log2 data for t-test
exprs(msUps) <- log2(exprs(msUps))
## apply a t-test and extract the p-value
pv <- apply(exprs(msUps), 1 ,
            function(x) t.test(x[1:3], x[4:6])$p.value)
## calculate q-values
library(qvalue)
qv <- qvalue(pv)$qvalues
## calculate log2 fold-changes
lfc <- apply(exprs(msUps), 1 ,
             function(x) mean(x[1:3], na.rm=TRUE) - mean(x[4:6], na.rm=TRUE))
## create a summary table
res <- data.frame(cbind(exprs(msUps), pv, qv, lfc))
## reorder based on q-values
res <- res[order(res$qv), ]
head(round(res, 3))


## ----writecsv, eval=FALSE------------------------------------------------
## write.csv(res, file = "upsResults.csv")


## ----volcano, dev='pdf', fig.width=5, fig.height=5, tidy=FALSE-----------
plot(res$lfc, -log10(res$qv),
     col = ifelse(grepl("ups$", rownames(res)),
       "#4582B3AA",
       "#A1A1A180"),
     pch = 19,
     xlab = expression(log[2]~fold-change),
     ylab = expression(-log[10]~q-value))
grid()
abline(v = -1, lty = "dotted")
abline(h = -log10(0.1), lty = "dotted")
legend("topright", c("UPS", "E. coli"),
       col = c("#4582B3AA", "#A1A1A1AA"),
       pch = 19, bty = "n")


## ----hmap, dev='pdf', fig.width=4, fig.height=4, fig.keep='last'---------
heatmap(exprs(msUps)[grep("ups",featureNames(msUps)), ])


## ----falsepos, echo=FALSE------------------------------------------------
sel1 <- (res$qv < 0.1)
signms <- rownames(res)[sel1]
i <- grep("ups", signms)
n1 <- length(i)
n2 <- length(signms) - n1


## ----upstab, echo=FALSE, results='asis'----------------------------------
k <- grep("ups", rownames(res))
print(xtable(round(res[k, ], 3),
             caption = "UPS1 proteins.",
             table.placement = "thb",
             label = "tab:ups"),
      size = "scriptsize")


## ----echo=FALSE----------------------------------------------------------
  dev.off()


## ----sessioninfo, results='asis', echo=FALSE, cache=FALSE----------------
toLatex(sessionInfo())