--- title: "POMA Normalization Methods" author: - name: Pol Castellano-Escuder affiliation: Duke University email: polcaes@gmail.com date: "`r BiocStyle::doc_date()`" output: BiocStyle::html_document vignette: > %\VignetteIndexEntry{POMA Normalization Methods} %\VignetteEngine{knitr::rmarkdown} %\usepackage[utf8]{inputenc} %\VignetteEncoding{UTF-8} bibliography: ["POMA.bib"] biblio-style: apalike link-citations: true --- **Compiled date**: `r Sys.Date()` **Last edited**: 2022-08-01 **License**: `r packageDescription("POMA")[["License"]]` ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, # fig.align = "center", comment = ">" ) ``` # Installation Run the following code to install the Bioconductor version of package. ```{r, eval = FALSE} # install.packages("BiocManager") BiocManager::install("POMA") ``` # Load Packages ```{r, warning = FALSE, message = FALSE, comment = FALSE} library(POMA) library(patchwork) ``` # Load Data and Imputation Let's create a cleaned `SummarizedExperiment` object from the sample `st000336` data to explore the normalization effects. ```{r, warning = FALSE, comment = NA} # imputation using the default method KNN example_data <- st000336 %>% PomaImpute() example_data ``` # Normalization Here we will evaluate ALL normalization methods that POMA offers on the same `SummarizedExperiment` object to compare them [@normalization]. ```{r, warning = FALSE} none <- PomaNorm(example_data, method = "none") auto_scaling <- PomaNorm(example_data, method = "auto_scaling") level_scaling <- PomaNorm(example_data, method = "level_scaling") log_scaling <- PomaNorm(example_data, method = "log_scaling") log_transformation <- PomaNorm(example_data, method = "log_transformation") vast_scaling <- PomaNorm(example_data, method = "vast_scaling") log_pareto <- PomaNorm(example_data, method = "log_pareto") ``` ## Normalization effect on data dimensions When we check for the dimension of the data after normalization we can see that ALL methods have the same effect on data dimension. `PomaNorm` **only** change the data dimension when the data have **features that only have zeros** and when the data have **features with 0 variance**. Only in these two cases `PomaNorm` will remove features of the data, changing the data dimensions. ```{r, warning = FALSE} dim(SummarizedExperiment::assay(none)) dim(SummarizedExperiment::assay(auto_scaling)) dim(SummarizedExperiment::assay(level_scaling)) dim(SummarizedExperiment::assay(log_scaling)) dim(SummarizedExperiment::assay(log_transformation)) dim(SummarizedExperiment::assay(vast_scaling)) dim(SummarizedExperiment::assay(log_pareto)) ``` ## Normalization effect on samples Here we can evaluate the different normalization effects on samples [@normalization]. ```{r, message = FALSE, warning = FALSE} a <- PomaBoxplots(none, group = "samples", jitter = FALSE) + ggplot2::ggtitle("Not Normalized") b <- PomaBoxplots(auto_scaling, group = "samples", jitter = FALSE, legend_position = "none") + ggplot2::ggtitle("Auto Scaling") + ggplot2::theme(axis.text.x = ggplot2::element_blank()) c <- PomaBoxplots(level_scaling, group = "samples", jitter = FALSE, legend_position = "none") + ggplot2::ggtitle("Level Scaling") + ggplot2::theme(axis.text.x = ggplot2::element_blank()) d <- PomaBoxplots(log_scaling, group = "samples", jitter = FALSE, legend_position = "none") + ggplot2::ggtitle("Log Scaling") + ggplot2::theme(axis.text.x = ggplot2::element_blank()) e <- PomaBoxplots(log_transformation, group = "samples", jitter = FALSE, legend_position = "none") + ggplot2::ggtitle("Log Transformation") + ggplot2::theme(axis.text.x = ggplot2::element_blank()) f <- PomaBoxplots(vast_scaling, group = "samples", jitter = FALSE, legend_position = "none") + ggplot2::ggtitle("Vast Scaling") + ggplot2::theme(axis.text.x = ggplot2::element_blank()) g <- PomaBoxplots(log_pareto, group = "samples", jitter = FALSE, legend_position = "none") + ggplot2::ggtitle("Log Pareto") + ggplot2::theme(axis.text.x = ggplot2::element_blank()) a (b + c + d) / (e + f + g) ``` ## Normalization effect on features Here we can evaluate the different normalization effects on features. ```{r, message = FALSE, warning = FALSE} h <- PomaDensity(none, group = "features", legend_position = "none") + ggplot2::ggtitle("Not Normalized") i <- PomaDensity(auto_scaling, group = "features", legend_position = "none") + ggplot2::ggtitle("Auto Scaling") + ggplot2::theme(axis.title.x = ggplot2::element_blank(), axis.title.y = ggplot2::element_blank()) j <- PomaDensity(level_scaling, group = "features", legend_position = "none") + ggplot2::ggtitle("Level Scaling") + ggplot2::theme(axis.title.x = ggplot2::element_blank(), axis.title.y = ggplot2::element_blank()) k <- PomaDensity(log_scaling, group = "features", legend_position = "none") + ggplot2::ggtitle("Log Scaling") + ggplot2::theme(axis.title.x = ggplot2::element_blank(), axis.title.y = ggplot2::element_blank()) l <- PomaDensity(log_transformation, group = "features", legend_position = "none") + ggplot2::ggtitle("Log Transformation") + ggplot2::theme(axis.title.x = ggplot2::element_blank(), axis.title.y = ggplot2::element_blank()) m <- PomaDensity(vast_scaling, group = "features", legend_position = "none") + ggplot2::ggtitle("Vast Scaling") + ggplot2::theme(axis.title.x = ggplot2::element_blank(), axis.title.y = ggplot2::element_blank()) n <- PomaDensity(log_pareto, group = "features", legend_position = "none") + ggplot2::ggtitle("Log Pareto") + ggplot2::theme(axis.title.x = ggplot2::element_blank(), axis.title.y = ggplot2::element_blank()) h (i + j + k) / (l + m + n) ``` # Session Information ```{r} sessionInfo() ``` # References