## ----style, echo = FALSE, results = 'hide'------------------------------------ BiocStyle::markdown() ## ----------------------------------------------------------------------------- # if (!require("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("CytoMDS") ## ----rlibs, results=FALSE----------------------------------------------------- suppressPackageStartupMessages(library(HDCytoData)) library(CytoMDS) library(ggplot2) library(patchwork) ## ----loadDataSet-------------------------------------------------------------- BCRXL_fs <- HDCytoData::Bodenmiller_BCR_XL_flowSet() BCRXL_fs ## ----convertPhenoData--------------------------------------------------------- phenoData <- flowCore::pData(BCRXL_fs) additionalPhenoData <- keyword(BCRXL_fs[[1]], "EXPERIMENT_INFO")$EXPERIMENT_INFO phenoData <- cbind(phenoData, additionalPhenoData) flowCore::pData(BCRXL_fs) <- phenoData ## ----selectMarkers------------------------------------------------------------ markerInfo <- keyword(BCRXL_fs[[1]], "MARKER_INFO")$MARKER_INFO chClass <- markerInfo$marker_class table(chClass) chLabels <- markerInfo$channel_name[chClass != "none"] (chMarkers <- markerInfo$marker_name[chClass != "none"]) ## ----scaleTransform----------------------------------------------------------- trans <- arcsinhTransform( transformationId="ArcsinhTransform", a = 0, b = 1/5, c = 0) BCRXL_fs_trans <- transform( BCRXL_fs, transformList(chLabels, trans)) ## ----distCalc----------------------------------------------------------------- pwDist <- pairwiseEMDDist( BCRXL_fs_trans, channels = chMarkers, verbose = FALSE ) ## ----distShow----------------------------------------------------------------- round(pwDist[1:10, 1:10], 2) ## ----distShowHist, fig.align='center', fig.wide = TRUE------------------------ distVec <- pwDist[upper.tri(pwDist)] distVecDF <- data.frame(dist = distVec) pHist <- ggplot(distVecDF, mapping = aes(x=dist)) + geom_histogram(fill = "darkgrey", col = "black", bins = 15) + theme_bw() + ggtitle("EMD distances for Bodenmiller2012 dataset") pHist ## ----MDSCalc------------------------------------------------------------------ mdsObj <- computeMetricMDS(pwDist, seed = 0) show(mdsObj) ## ----plotMDS1_simplest, fig.align='center', fig.asp = 1, fig.wide = TRUE------ ggplotSampleMDS(mdsObj) ## ----plotMDS_colours_shapes_1_2, fig.align='center', fig.asp = 0.9, fig.wide = TRUE---- p12 <- ggplotSampleMDS( mdsObj, pData = phenoData, projectionAxes = c(1,2), pDataForColour = "group_id", pDataForLabel = "patient_id" ) p12 ## ----plotMDS_colours_shapes_2_3_4, fig.align='center', fig.width = 9, fig.height = 12---- p23 <- ggplotSampleMDS( mdsObj, pData = phenoData, projectionAxes = c(2,3), pDataForColour = "group_id", pDataForLabel = "patient_id" ) p34 <- ggplotSampleMDS( mdsObj, pData = phenoData, projectionAxes = c(3,4), pDataForColour = "group_id", pDataForLabel = "patient_id" ) p23 / p34 ## ----plotMDSShepard, fig.align='center', fig.wide = TRUE, fig.asp = 1--------- ggplotSampleMDSShepard(mdsObj) ## ----MDSCalc.nDim2, fig.align='center', fig.asp = 0.85, fig.wide = TRUE------- mdsObj2 <- CytoMDS::computeMetricMDS(pwDist, seed = 0, nDim = 2) ggplotSampleMDS(mdsObj2, pData = phenoData, projectionAxes = c(1,2), pDataForColour = "group_id", pDataForLabel = "patient_id", flipYAxis = TRUE) ## ----MDSCalc.Rsq99, fig.align='center', fig.asp = 0.85, fig.wide = TRUE------- mdsObj3 <- CytoMDS::computeMetricMDS(pwDist, seed = 0, targetPseudoRSq = 0.99) ggplotSampleMDS(mdsObj3, pData = phenoData, projectionAxes = c(1,2), pDataForColour = "group_id", pDataForLabel = "patient_id") ## ----plotMDSShepard.Rsq99, fig.align='center', fig.wide = TRUE, fig.asp = 1---- ggplotSampleMDSShepard(mdsObj3) ## ----plotMDS_medians---------------------------------------------------------- medians <- channelSummaryStats(BCRXL_fs_trans, channels = chLabels, statFUNs = median) ## ----plotMDS_biplot, fig.align='center', fig.asp = 0.8, fig.wide = TRUE------- ggplotSampleMDS(mdsObj, pData = phenoData, pDataForColour = "group_id", displayPointLabels = FALSE, displayArrowLabels = TRUE, repelArrowLabels = TRUE, biplot = TRUE, extVariables = medians) ## ----plotMDS_biplot_0.6, fig.align='center', fig.asp = 0.8, fig.wide = TRUE---- ggplotSampleMDS(mdsObj, pData = phenoData, pDataForColour = "group_id", displayPointLabels = FALSE, displayArrowLabels = TRUE, repelArrowLabels = TRUE, biplot = TRUE, extVariables = medians, arrowThreshold = 0.6) ## ----plotMarginalDensities, fig.align='center', fig.asp = 1.2, fig.wide = TRUE---- ggplotMarginalDensities( BCRXL_fs_trans, channels = chLabels, pDataForColour = "group_id", pDataForGroup = "sample_id") ## ----plotMDS_stats------------------------------------------------------------ statFUNs = c("median" = stats::median, "Q25" = function(x, na.rm) { stats::quantile(x, probs = 0.25) }, "Q75" = function(x, na.rm) { stats::quantile(x, probs = 0.75) }, "standard deviation" = stats::sd) chStats <- channelSummaryStats(BCRXL_fs_trans, channels = chLabels, statFUNs = statFUNs) ## ----plotMDS_biplot_facetting, fig.align='center', fig.asp = 1, fig.wide = TRUE---- ggplotSampleMDSWrapBiplots( mdsObj, extVariableList = chStats, ncol = 2, pData = phenoData, pDataForColour = "group_id", displayPointLabels = FALSE, displayArrowLabels = TRUE, repelArrowLabels = TRUE, displayLegend = FALSE) ## ----plotMDS_biplot_stddev, fig.align='center', fig.asp = 1.0, fig.wide = TRUE---- stdDevs <- list( "std dev of channels 1 to 6" = chStats[["standard deviation"]][,1:6], "std dev of channels 7 to 12" = chStats[["standard deviation"]][,7:12], "std dev of channels 13 to 18" = chStats[["standard deviation"]][,13:18], "std dev of channels 19 to 24" = chStats[["standard deviation"]][,19:24] ) ggplotSampleMDSWrapBiplots( mdsObj, ncol = 2, extVariableList = stdDevs, pData = phenoData, pDataForColour = "group_id", displayPointLabels = FALSE, displayArrowLabels = TRUE, repelArrowLabels = TRUE) ## ----temporary_store---------------------------------------------------------- storageLocation <- suppressMessages(base::tempdir()) nSample <- length(BCRXL_fs_trans) fileNames <- file.path( storageLocation, paste0("BodenMiller2012_TransformedSample", sprintf("%02d.rds", seq_len(nSample)))) for (i in seq_len(nSample)) { saveRDS(BCRXL_fs_trans[[i]], file = fileNames[i]) } ## ----distance_calc_loading_otf_parallel--------------------------------------- bp <- BiocParallel::SnowParam( stop.on.error = FALSE, progressbar = TRUE) pwDistLast <- suppressWarnings(pairwiseEMDDist( x = nSample, channels = chMarkers, loadFlowFrameFUN = function(ffIndex, theFiles){ readRDS(file = theFiles[ffIndex]) }, loadFlowFrameFUNArgs = list(theFiles = fileNames), BPPARAM = bp)) ## ----dist1ShowHistAgain, fig.align='center', fig.wide = TRUE------------------ distVecLast <- pwDistLast[upper.tri(pwDistLast)] distVecDFLast <- data.frame(dist = distVecLast) pHistLast <- ggplot(distVecDFLast, mapping = aes(x=dist)) + geom_histogram(fill = "darkgrey", col = "black", bins = 15) + theme_bw() + ggtitle( "EMD distances for Bodenmiller2012 dataset", subtitle = "on the fly memory loading and parallel computation") pHistLast ## ----sessioninfo, echo=FALSE-------------------------------------------------- sessionInfo()