## ----setup, include = FALSE, fig.show='hide'---------------------------------- knitr::knit_hooks$set(pngquant = knitr::hook_pngquant) knitr::opts_chunk$set( collapse = TRUE, comment = "#>", dev = "ragg_png", dpi = 72, fig.retina = 2, fig.align = "center", out.width = "100%", pngquant = "--speed=1 --quality=5-10" ) ## ----message=FALSE, fig.show='hide'------------------------------------------- # Load library library(scDiagnostics) # Load datasets data("reference_data") data("query_data") data("qc_data") # Set seed for reproducibility set.seed(0) ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Generate the MDS scatter plot with cell type coloring plotCellTypeMDS(query_data = query_data, reference_data = reference_data, cell_types = c("CD4", "CD8", "B_and_plasma", "Myeloid"), query_cell_type_col = "SingleR_annotation", ref_cell_type_col = "expert_annotation") ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Generate the MDS scatter plot with cell type coloring plotCellTypePCA(query_data = query_data, reference_data = reference_data, cell_types = c("CD4", "CD8", "B_and_plasma", "Myeloid"), query_cell_type_col = "SingleR_annotation", ref_cell_type_col = "expert_annotation", pc_subset = 1:3) ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Plot the PCA data as boxplots boxplotPCA(query_data = query_data, reference_data = reference_data, cell_types = c("CD4", "CD8", "B_and_plasma", "Myeloid"), query_cell_type_col = "SingleR_annotation", ref_cell_type_col = "expert_annotation", pc_subset = 1:3) ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Compute important variables for all pairwise cell comparisons disc_output <- calculateDiscriminantSpace( reference_data = reference_data, query_data = query_data, query_cell_type_col = "SingleR_annotation", ref_cell_type_col = "expert_annotation") # Generate scatter and boxplot plot(disc_output, plot_type = "scatterplot") ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Perform discriminant analysis and projection discriminant_results <- calculateDiscriminantSpace( reference_data = reference_data, query_data = query_data, ref_cell_type_col = "expert_annotation", query_cell_type_col = "SingleR_annotation", calculate_metrics = TRUE, # Compute Mahalanobis distance and cosine similarity alpha = 0.01 # Significance level for Mahalanobis distance cutoff ) ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Extract Mahalanobis distances mahalanobis_distances <- discriminant_results$query_mahalanobis_dist # Identify anomalies based on a threshold threshold <- discriminant_results$mahalanobis_crit # Use the computed cutoff value anomalies <- mahalanobis_distances[mahalanobis_distances > threshold] ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Load ggplot2 for visualization library(ggplot2) # Convert distances to a data frame distances_df <- data.frame(Distance = mahalanobis_distances, Anomaly = ifelse(mahalanobis_distances > threshold, "Anomaly", "Normal")) # Plot histogram of Mahalanobis distances ggplot(distances_df, aes(x = Distance, fill = Anomaly)) + geom_histogram(binwidth = 0.5, position = "identity", alpha = 0.7) + labs(title = "Histogram of Mahalanobis Distances", x = "Mahalanobis Distance", y = "Frequency") + theme_bw() ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Calculate the SIR space projections sir_output <- calculateSIRSpace( reference_data = reference_data, query_data = query_data, query_cell_type_col = "SingleR_annotation", ref_cell_type_col = "expert_annotation", multiple_cond_means = TRUE ) # Plot the SIR space projections plot(sir_output, sir_subset = 1:5, cell_types = c("CD4", "CD8", "B_and_plasma", "Myeloid"), lower_facet = "scatter", diagonal_facet = "boxplot", upper_facet = "blank") ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Plot the top contributing markers plot(sir_output, plot_type = "loadings", sir_subset = 1:5, n_top = 10) ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Visualize VPREB3 gene on a PCA plot plotGeneExpressionDimred(sce_object = query_data, method = "PCA", pc_subset = 1:5, feature = "NKG7", cell_types = "CD4", cell_type_col = "SingleR_annotation") ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ plotMarkerExpression( reference_data = reference_data, query_data = query_data, ref_cell_type_col = "expert_annotation", query_cell_type_col = "SingleR_annotation", gene_name = "NKG7", cell_type = "CD4", normalization = "z_score" ) ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Remove cell types with very few cells qc_data_subset <- qc_data[, !(qc_data$SingleR_annotation %in% c("Chondrocytes", "DC", "Neurons","Platelets"))] # Generate scatter plot p1 <- plotQCvsAnnotation(sce_object = qc_data_subset, cell_type_col = "SingleR_annotation", qc_col = "total", score_col = "annotation_scores") p1 + ggplot2::xlab("Library Size") ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Generate histograms histQCvsAnnotation(sce_object = query_data, cell_type_col = "SingleR_annotation", qc_col = "percent_mito", score_col = "annotation_scores") ## ----fig.height=5, fig.width=10, fig.show='hide'------------------------------ # Plot gene set scores on PCA plotGeneSetScores(sce_object = query_data, cell_type_col = "SingleR_annotation", cell_types = "CD8", method = "PCA", score_col = "gene_set_scores", pc_subset = 1:3) ## ----SessionInfo, echo=FALSE, message=FALSE, warning=FALSE, comment=NA, fig.show='hide'---- options(width = 80) sessionInfo()