## ----style, echo = FALSE, results = 'asis'------------------------------------
BiocStyle::markdown()

## ----env, include=FALSE, echo=FALSE, cache=FALSE------------------------------
library("knitr")
opts_chunk$set(stop_on_error = 1L)
suppressPackageStartupMessages(library("MSnbase"))
suppressWarnings(suppressPackageStartupMessages(library("pRoloc")))
suppressPackageStartupMessages(library("pRolocdata"))
suppressPackageStartupMessages(library("class"))
set.seed(1)
setStockcol(NULL)

## ----loadpkg------------------------------------------------------------------
library("pRoloc")

## -----------------------------------------------------------------------------
library("pRolocdata")
data("andy2011")

## -----------------------------------------------------------------------------
andy2011

## -----------------------------------------------------------------------------
head(exprs(andy2011))

## -----------------------------------------------------------------------------
getMarkers(andy2011, fcol = "markers.tl")

## ----loaddata-----------------------------------------------------------------
data("andy2011goCC")
andy2011goCC

## -----------------------------------------------------------------------------
dim(andy2011goCC)
exprs(andy2011goCC)[1:10, 1:10]

## -----------------------------------------------------------------------------
all(featureNames(andy2011) == featureNames(andy2011goCC))

head(featureNames(andy2011))
head(featureNames(andy2011goCC))

## -----------------------------------------------------------------------------
data("andy2011hpa")
andy2011

## ----tabdelim-----------------------------------------------------------------
ppif <- system.file("extdata/tabdelimited._gHentss2F9k.txt.gz", package = "pRolocdata")
ppidf <- read.delim(ppif, header = TRUE, stringsAsFactors = FALSE)
head(ppidf)

## ----ppiset-------------------------------------------------------------------
uid <- unique(c(ppidf$X.node1, ppidf$node2))
ppim <- diag(length(uid))
colnames(ppim) <- rownames(ppim) <- uid

for (k in 1:nrow(ppidf)) {
    i <- ppidf[[k, "X.node1"]]
    j <- ppidf[[k, "node2"]]
    ppim[i, j] <- ppidf[[k, "combined_score"]]
}

ppim[1:5, 1:8]

## ----ppiset2------------------------------------------------------------------
andyppi <- andy2011
featureNames(andyppi) <- sub("_HUMAN", "", fData(andyppi)$UniProtKB.entry.name)
cmn <- intersect(featureNames(andyppi), rownames(ppim))
ppim <- ppim[cmn, ]
andyppi <- andyppi[cmn, ]

ppi <- MSnSet(ppim, fData = fData(andyppi),
              pData = data.frame(row.names = colnames(ppim)))
ppi <- filterZeroCols(ppi)

## -----------------------------------------------------------------------------
andyppi

## ----mclasses, echo=FALSE-----------------------------------------------------
data(andy2011) ## load clean LOPIT data

## marker classes for andy2011
m <- unique(fData(andy2011)$markers.tl)
m <- m[m != "unknown"]

## ----andypca, fig.width=6, fig.height=6, echo=FALSE, fig.cap = "PCA plot of `andy2011`. The multivariate protein profiles are summarised along the two first principal components. Proteins of unknown localisation are represented by empty grey points. Protein markers, which are well-known residents of specific sub-cellular niches are colour-coded and form clusters on the figure."----
setStockcol(paste0(getStockcol(), "80"))
plot2D(andy2011, fcol = "markers.tl")
setStockcol(NULL)
addLegend(andy2011, fcol = "markers.tl",
          where = "topright", bty = "n", cex = .7)

## ----thetas0, echo=TRUE-------------------------------------------------------
head(thetas(3, by = 0.5))
dim(thetas(3, by = 0.5))

## ----thetas1, echo=TRUE-------------------------------------------------------
dim(thetas(5, length.out = 4))

## ----thetaandy----------------------------------------------------------------
## marker classes for andy2011
m <- unique(fData(andy2011)$markers.tl)
m <- m[m != "unknown"]
th <- thetas(length(m), length.out=4)
dim(th)

## ----thetaopt0, eval=FALSE----------------------------------------------------
# topt <- knntlOptimisation(andy2011, andy2011goCC,
#                           th = th,
#                           k = c(3, 3),
#                           fcol = "markers.tl",
#                           times = 50)

## ----thetaopt, eval=TRUE------------------------------------------------------
set.seed(1)
i <- sample(nrow(th), 12)
topt <- knntlOptimisation(andy2011, andy2011goCC,
                          th = th[i, ],
                          k = c(3, 3),
                          fcol = "markers.tl",
                          times = 5)
topt

## ----getParam-----------------------------------------------------------------
getParams(topt)

## ----besttheta----------------------------------------------------------------
(bw <- experimentData(andy2011)@other$knntl$thetas)

## ----tlclass------------------------------------------------------------------
andy2011 <- knntlClassification(andy2011, andy2011goCC,
                                bestTheta = bw,
                                k = c(3, 3),
                                fcol = "markers.tl")

## ----tlpreds------------------------------------------------------------------
andy2011 <- getPredictions(andy2011, fcol = "knntl")

## ----andypca2, fig.width=6, fig.height=6, fig.cap = "PCA plot of `andy2011` after transfer learning classification. The size of the points is proportional to the classification scores."----
setStockcol(paste0(getStockcol(), "80"))
ptsze <- exp(fData(andy2011)$knntl.scores) - 1
plot2D(andy2011, fcol = "knntl", cex = ptsze)
setStockcol(NULL)
addLegend(andy2011, where = "topright",
          fcol = "markers.tl",
          bty = "n", cex = .7)

## ----sessioninfo, echo=FALSE--------------------------------------------------
sessionInfo()