## ---- echo = FALSE, results = "asis"--------------------------------------------------------------------------------------------
options(width = 130)
BiocStyle::markdown()
set.seed(44)
## ---- message = FALSE-----------------------------------------------------------------------------------------------------------
library(ClassifyR)
library(ggplot2)
library(curatedOvarianData)
data(GSE26712_eset)
GSE26712_eset <- GSE26712_eset[1:1000, ]
## ---- results = "hold", tidy = FALSE--------------------------------------------------------------------------------------------
curatedClinical <- pData(GSE26712_eset)
ovarPoor <- curatedClinical[, "vital_status"] == "deceased" & curatedClinical[, "days_to_death"] < 365 * 1
ovarGood <- curatedClinical[, "vital_status"] == "living" & curatedClinical[, "days_to_death"] > 365 * 5
sum(ovarPoor, na.rm = TRUE)
sum(ovarGood, na.rm = TRUE)
## -------------------------------------------------------------------------------------------------------------------------------
ovarExpression <- exprs(GSE26712_eset)[, c(which(ovarPoor), which(ovarGood))]
ovarGroups <- factor(rep(c("Poor", "Good"), c(length(which(ovarPoor)), length(which(ovarGood)))),
levels = c("Poor", "Good"))
## ---- fig.width = 18, fig.height = 10, tidy = FALSE-----------------------------------------------------------------------------
plotData <- data.frame(expression = as.numeric(ovarExpression),
sample = factor(rep(1:ncol(ovarExpression), each = nrow(ovarExpression))))
ggplot(plotData, aes(x = sample, y = expression)) + geom_boxplot() +
scale_y_continuous(limits = c(0, 15)) + xlab("Sample") + ylab("Expression Value") +
ggtitle("Expression for All Arrays")
## ---- tidy = FALSE--------------------------------------------------------------------------------------------------------------
groupsTable <- data.frame(class = ovarGroups)
rownames(groupsTable) <- colnames(ovarExpression)
ovarSet <- ExpressionSet(ovarExpression, AnnotatedDataFrame(groupsTable))
featureNames(ovarSet) <- rownames(ovarExpression)
dim(ovarSet)
## ---- tidy = FALSE--------------------------------------------------------------------------------------------------------------
library(sparsediscrim)
DEresults <- runTests(ovarSet, "Ovarian Cancer", "Differential Expression", validation = "bootstrap", resamples = 5, folds = 3,
params = list(SelectParams(limmaSelection, resubstituteParams = ResubstituteParams(nFeatures = c(25, 50, 75, 100), performanceType = "balanced", better = "lower")),
TrainParams(dlda, TRUE, doesTests = FALSE),
PredictParams(predict, TRUE, getClasses = function(result) result[["class"]])),
parallelParams = bpparam(), verbose = 1)
DEresults
## ---- fig.height = 12, fig.width = 12, results = "hold", message = FALSE--------------------------------------------------------
DEplots <- plotFeatureClasses(ovarSet, features(DEresults)[[1]][[2]][1:5])
## -------------------------------------------------------------------------------------------------------------------------------
DEresults <- calcPerformance(DEresults, "balanced")
DEresults
performance(DEresults)
## -------------------------------------------------------------------------------------------------------------------------------
DEresults <- calcPerformance(DEresults, "mat")
DEresults
performance(DEresults)
## ---- tidy = FALSE--------------------------------------------------------------------------------------------------------------
DVresults <- runTests(ovarSet, "Ovarian Cancer", "Differential Variability",
validation = "bootstrap", resamples = 2, folds = 4,
params = list(SelectParams(leveneSelection, resubstituteParams = ResubstituteParams(nFeatures = c(25, 50, 75, 100), performanceType = "balanced", better = "lower")),
TransformParams(subtractFromLocation, location = "median"),
TrainParams(fisherDiscriminant, FALSE, doesTests = TRUE),
PredictParams(predictor = function(){}, FALSE, getClasses = function(result) result, returnType = "both")),
verbose = 1)
DVresults
## ---- fig.height = 12, fig.width = 12, results = "hold", message = FALSE--------------------------------------------------------
DVplots <- plotFeatureClasses(ovarSet, features(DVresults)[[1]][[2]][1:5])
## -------------------------------------------------------------------------------------------------------------------------------
DVresults <- calcPerformance(DVresults, "balanced")
DVresults
performance(DVresults)
## ---- tidy = FALSE--------------------------------------------------------------------------------------------------------------
dParams <- list(bw = "nrd0", n = 4096, from = expression(min(featureValues)),
to = expression(max(featureValues)))
DDresults <- runTests(ovarSet, "Ovarian Cancer", "Differential Distribution",
validation = "bootstrap", resamples = 2, folds = 2,
params = list(SelectParams(KullbackLeiblerSelection, resubstituteParams = ResubstituteParams(nFeatures = c(25, 50, 75, 100), performanceType = "balanced", better = "lower")),
TrainParams(naiveBayesKernel, FALSE, doesTests = TRUE),
PredictParams(predictor = function(){}, FALSE, getClasses = function(result) result, weighted = "weighted", returnType = "both",
densityParameters = dParams)),
verbose = 1)
DDresults
## ---- fig.height = 12, fig.width = 12, results = "hold", message = FALSE--------------------------------------------------------
DDplots <- plotFeatureClasses(ovarSet, features(DDresults[[1]])[[1]][[1]][1:5])
## -------------------------------------------------------------------------------------------------------------------------------
DDresults[["weight=crossover distance"]] <- calcPerformance(DDresults[["weight=crossover distance"]], "balanced")
DDresults[["weight=crossover distance"]]
performance(DDresults[["weight=crossover distance"]])
## ---- fig.width = 10, fig.height = 7--------------------------------------------------------------------------------------------
library(grid)
resultsList <- list(Expression = DEresults, Variability = DVresults)
errorPlot <- samplesMetricMap(resultsList)
accuracyPlot <- samplesMetricMap(resultsList, metric = "accuracy")
## ---- fig.width = 6.8, fig.height = 5-------------------------------------------------------------------------------------------
errorBoxes <- performancePlot(list(DEresults, DVresults, DDresults[["weight=crossover distance"]]),
performanceName = "Balanced Error Rate",
boxFillColouring = "None", boxLineColouring = "None",
title = "Errors Across Classification Types")
## -------------------------------------------------------------------------------------------------------------------------------
trainingExpr <- matrix(rnorm(500 * 50, 9, 3), ncol = 50)
trainingClasses <- factor(rep(c("Healthy", "Diseased"), each = 25), levels = c("Healthy", "Diseased"))
trainingExpr[101:125, trainingClasses == "Diseased"] <- trainingExpr[101:125, trainingClasses == "Diseased"] - 2
testingExpr <- matrix(rnorm(500 * 50, 9, 3), ncol = 50)
testingClasses <- factor(rep(c("Healthy", "Diseased"), each = 25), levels = c("Healthy", "Diseased"))
testingExpr[111:135, testingClasses == "Diseased"] <- testingExpr[111:135, testingClasses == "Diseased"] - 2
## -------------------------------------------------------------------------------------------------------------------------------
allExpr <- cbind(trainingExpr, testingExpr)
allClasses <- unlist(list(trainingClasses, testingClasses))
independentResult <- runTest(allExpr, allClasses, datasetName = "Simulation", classificationName = "DE",
training = 1:50, testing = 51:100)
independentResult
## ---- fig.height = 5, fig.width = 6---------------------------------------------------------------------------------------------
ROCcurves <- ROCplot(list(DVresults, DDresults[["weight=crossover distance"]]))
## -------------------------------------------------------------------------------------------------------------------------------
library(e1071) # Provides SVM functions.
resubstituteParams = ResubstituteParams(nFeatures = c(25, 50, 75, seq(100, 1000, 100)), performanceType = "balanced", better = "lower")
SVMresults <- suppressWarnings(runTests(ovarSet, "Ovarian Cancer", "Differential Expression", validation = "bootstrap", resamples = 5, folds = 3,
params = list(SelectParams(limmaSelection, resubstituteParams = resubstituteParams),
TrainParams(svm, TRUE, doesTests = FALSE, kernel = "linear", resubstituteParams = resubstituteParams, tuneParams = list(cost = c(0.01, 0.1, 1, 10))),
PredictParams(predict, TRUE, getClasses = function(result) result)),
parallelParams = bpparam(), verbose = 1))
## -------------------------------------------------------------------------------------------------------------------------------
length(tunedParameters(SVMresults))
tunedParameters(SVMresults)[[1]]