## ----style, echo = FALSE, results = 'asis'------------------------------------
BiocStyle::markdown()
knitr::opts_chunk$set(fig.wide = TRUE, fig.retina = 3)
## ----arch, echo=FALSE, out.width="80%", eval=TRUE, fig.cap="Integration of rawDiag and rawrr into the Spectra ecosystem (by courtesy of Johannes Rainer)."----
knitr::include_graphics("arch.jpg")
## ----require------------------------------------------------------------------
suppressMessages(
stopifnot(require(Spectra),
require(MsBackendRawFileReader),
require(tartare),
require(BiocParallel))
)
## ----installAssemblies, echo=TRUE---------------------------------------------
if (isFALSE(rawrr::.checkDllInMonoPath())){
rawrr::installRawFileReaderDLLs()
}
if (isFALSE(file.exists(rawrr:::.rawrrAssembly()))){
rawrr::installRawrrExe(sourceUrl = "http://fgcz-ms.uzh.ch/~cpanse/rawrr/rawrr.1.1.12.exe")
}
## ----tartareEH4547, warning=FALSE, message=FALSE, eval=TRUE-------------------
# fetch via ExperimentHub
library(ExperimentHub)
eh <- ExperimentHub::ExperimentHub()
## ----tartare------------------------------------------------------------------
query(eh, c('tartare'))
## ----EH3220, message=FALSE, warning=FALSE-------------------------------------
EH3220 <- normalizePath(eh[["EH3220"]])
(rawfileEH3220 <- paste0(EH3220, ".raw"))
if (!file.exists(rawfileEH3220)){
file.link(EH3220, rawfileEH3220)
}
EH3222 <- normalizePath(eh[["EH3222"]])
(rawfileEH3222 <- paste0(EH3222, ".raw"))
if (!file.exists(rawfileEH3222)){
file.link(EH3222, rawfileEH3222)
}
EH4547 <- normalizePath(eh[["EH4547"]])
(rawfileEH4547 <- paste0(EH4547 , ".raw"))
if (!file.exists(rawfileEH4547 )){
file.link(EH4547 , rawfileEH4547 )
}
## ----backendInitializeMsBackendRawFileReader, message=FALSE-------------------
beRaw <- Spectra::backendInitialize(
MsBackendRawFileReader::MsBackendRawFileReader(),
files = c(rawfileEH3220, rawfileEH3222, rawfileEH4547))
## ----show---------------------------------------------------------------------
beRaw
## ----rawrrFigure2, fig.cap = "Peptide spectrum match. The vertical grey lines indicate the *in-silico* computed y-ions of the peptide precusor LGGNEQVTR++ as calculated by the [protViz]( https://CRAN.R-project.org/package=protViz) package."----
(S <- (beRaw |>
filterScan("FTMS + c NSI Full ms2 487.2567@hcd27.00 [100.0000-1015.0000]") )[437]) |>
plotSpectra()
# supposed to be scanIndex 9594
S
# add yIonSeries to the plot
(yIonSeries <- protViz::fragmentIon("LGGNEQVTR")[[1]]$y[1:8])
names(yIonSeries) <- paste0("y", seq(1, length(yIonSeries)))
abline(v = yIonSeries, col='#DDDDDD88', lwd=5)
axis(3, yIonSeries, names(yIonSeries))
## ----defineFilterIon----------------------------------------------------------
setGeneric("filterIons", function(object, ...) standardGeneric("filterIons"))
setMethod("filterIons", "MsBackend",
function(object, mZ=numeric(), tol=numeric(), ...) {
keep <- lapply(peaksData(object, BPPARAM = bpparam()),
FUN=function(x){
NN <- protViz::findNN(mZ, x[, 1])
hit <- (error <- mZ - x[NN, 1]) < tol & x[NN, 2] >= quantile(x[, 2], .9)
if (sum(hit) == length(mZ))
TRUE
else
FALSE
})
object[unlist(keep)]
})
## ----applyFilterIons----------------------------------------------------------
start_time <- Sys.time()
X <- beRaw |>
MsBackendRawFileReader::filterScan("FTMS + c NSI Full ms2 487.2567@hcd27.00 [100.0000-1015.0000]") |>
filterIons(yIonSeries, tol = 0.005) |>
Spectra::Spectra() |>
Spectra::peaksData()
end_time <- Sys.time()
## ----runTime------------------------------------------------------------------
end_time - start_time
## ----definePlotLGGNEQVTR------------------------------------------------------
.plot.LGGNEQVTR <- function(x){
yIonSeries <- protViz::fragmentIon("LGGNEQVTR")[[1]]$y[1:8]
names(yIonSeries) <- paste0("y", seq(1, length(yIonSeries)))
plot(x, type ='h', xlim=range(yIonSeries))
abline(v = yIonSeries, col='#DDDDDD88', lwd=5)
axis(3, yIonSeries, names(yIonSeries))
# find nearest mZ value
idx <- protViz::findNN(yIonSeries, x[,1])
data.frame(
ion = names(yIonSeries),
mZ.yIon = yIonSeries,
mZ = x[idx, 1],
intensity = x[idx, 2]
)
}
## ----filterIons2, fig.height=8, fig.cap = "Visualizing of the LGGNEQVTR spectrum matches.", echo=FALSE----
op <- par(mfrow=c(4, 1), mar=c(4,4,4,1))
rv <- X |>
lapply(FUN = .plot.LGGNEQVTR) |>
Reduce(f=rbind)
## ----sanityCheck--------------------------------------------------------------
stats::aggregate(formula = mZ ~ ion, FUN = base::mean, data=rv)
stats::aggregate(formula = intensity ~ ion, FUN = base::max, data=rv)
## ----combinePeaks, fig.cap = "Combined LGGNEQVTR peptide spectrum match plot."----
X |>
Spectra::combinePeaks(ppm=10, intensityFun=base::max) |>
.plot.LGGNEQVTR()
## ----readBenchmarkData, fig.cap="I/O Benchmark. The XY plot graphs how many spectra the backend can read in one second versus the chunk size of the rawrr::readSpectrum method for different compute architectures."----
ioBm <- file.path(system.file(package = 'MsBackendRawFileReader'),
'extdata', 'specs.csv') |>
read.csv2(header=TRUE)
# perform and include a local IO benchmark
ioBmLocal <- ioBenchmark(1000, c(32, 64, 128, 256), rawfile = rawfileEH4547)
lattice::xyplot((1 / as.numeric(time)) * workers ~ size | factor(n) ,
group = host,
data = rbind(ioBm, ioBmLocal),
horizontal = FALSE,
scales=list(y = list(log = 10)),
auto.key = TRUE,
layout = c(3, 1),
ylab = 'spectra read in one second',
xlab = 'number of spectra / file')
## ----mzXML--------------------------------------------------------------------
mzXMLEH3219 <- normalizePath(eh[["EH3219"]])
mzXMLEH3221 <- normalizePath(eh[["EH3221"]])
## ----backendInitialize, message=FALSE, fig.cap='Aggregated intensities mzXML versus raw of the 1st 100 spectra.', message=FALSE, warning=FALSE----
if (require(mzR)){
beMzXML <- Spectra::backendInitialize(
Spectra::MsBackendMzR(),
files = c(mzXMLEH3219))
beRaw <- Spectra::backendInitialize(
MsBackendRawFileReader::MsBackendRawFileReader(),
files = c(rawfileEH3220))
intensity.xml <- sapply(intensity(beMzXML[1:100]), sum)
intensity.raw <- sapply(intensity(beRaw[1:100]), sum)
plot(intensity.xml ~ intensity.raw, log = 'xy', asp = 1,
pch = 16, col = rgb(0.5, 0.5, 0.5, alpha=0.5), cex=2)
abline(lm(intensity.xml ~ intensity.raw),
col='red')
}
## -----------------------------------------------------------------------------
if (require(mzR)){
table(scanIndex(beRaw) %in% scanIndex(beMzXML))
}
## ----sessionInfo--------------------------------------------------------------
sessionInfo()