--- title: "Visualization Guide" author: "Chen Yang" date: "`r Sys.Date()`" output: html_document: toc: true toc_float: true toc_depth: 3 theme: flatly vignette: > %\VignetteIndexEntry{Visualization Guide} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include=FALSE} knitr::opts_chunk$set( echo = TRUE, message = FALSE, warning = FALSE, eval = FALSE ) ``` # Visualization Guide This guide provides instructions for visualizing mLLMCelltype results. Creating effective visualizations is crucial for interpreting cell type annotations and communicating uncertainty metrics. ## Basic Visualization Concepts mLLMCelltype provides three key types of information that can be visualized: 1. **Cell Type Annotations**: The final cell type labels assigned to each cluster 2. **Consensus Proportion**: A measure of agreement among models (0-1) 3. **Shannon Entropy**: A measure of uncertainty in the annotations (lower is better) ## Integrating with Seurat ### Adding mLLMCelltype Results to Seurat Objects ```{r} library(Seurat) library(mLLMCelltype) library(ggplot2) # Add consensus annotations to Seurat object seurat_obj$cell_type_consensus <- plyr::mapvalues( x = as.character(Idents(seurat_obj)), from = as.character(0:(length(consensus_results$final_annotations)-1)), to = consensus_results$final_annotations ) # Extract and add consensus metrics consensus_metrics <- lapply(names(consensus_results$initial_results$consensus_results), function(cluster_id) { metrics <- consensus_results$initial_results$consensus_results[[cluster_id]] return(list( cluster = cluster_id, consensus_proportion = metrics$consensus_proportion, entropy = metrics$entropy )) }) metrics_df <- do.call(rbind, lapply(consensus_metrics, data.frame)) # Add consensus proportion seurat_obj$consensus_proportion <- as.numeric(plyr::mapvalues( x = as.character(Idents(seurat_obj)), from = metrics_df$cluster, to = metrics_df$consensus_proportion )) # Add entropy seurat_obj$entropy <- as.numeric(plyr::mapvalues( x = as.character(Idents(seurat_obj)), from = metrics_df$cluster, to = metrics_df$entropy )) ``` ### Basic Visualizations #### Cell Type Annotations ```{r} # Basic cell type visualization p1 <- DimPlot(seurat_obj, group.by = "cell_type_consensus", label = TRUE, repel = TRUE) + ggtitle("Cell Type Annotations") + theme(plot.title = element_text(hjust = 0.5)) p1 ``` #### Consensus Proportion ```{r} # Visualize consensus proportion p2 <- FeaturePlot(seurat_obj, features = "consensus_proportion", cols = c("yellow", "green", "blue")) + ggtitle("Consensus Proportion") + theme(plot.title = element_text(hjust = 0.5)) p2 ``` #### Shannon Entropy ```{r} # Visualize Shannon entropy p3 <- FeaturePlot(seurat_obj, features = "entropy", cols = c("red", "orange", "yellow")) + ggtitle("Shannon Entropy") + theme(plot.title = element_text(hjust = 0.5)) p3 ``` #### Combined Visualization ```{r} # Combine all three visualizations library(patchwork) combined_plot <- p1 | p2 | p3 combined_plot ``` ## Advanced Visualizations ### Publication-Ready Plots For publication-quality visualizations, consider using additional packages: ```{r} # Install SCpubr if not already installed if (!requireNamespace("SCpubr", quietly = TRUE)) { remotes::install_github("enblacar/SCpubr") } library(SCpubr) # Enhanced cell type visualization p_enhanced <- SCpubr::do_DimPlot( sample = seurat_obj, group.by = "cell_type_consensus", label = TRUE, repel = TRUE, pt.size = 0.5 ) + ggtitle("Enhanced Cell Type Annotations") p_enhanced ``` ### Marker Gene Visualization ```{r} # Find marker genes library(dplyr) pbmc_markers <- FindAllMarkers(seurat_obj, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25) # Get top markers top_markers <- pbmc_markers %>% group_by(cluster) %>% top_n(n = 3, wt = avg_log2FC) %>% pull(gene) %>% unique() # Create dotplot Idents(seurat_obj) <- seurat_obj$cell_type_consensus DotPlot(seurat_obj, features = top_markers) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + ggtitle("Top Marker Genes by Cell Type") ``` ## Saving Visualizations ```{r} # Save visualizations ggsave("cell_type_annotations.png", plot = p1, width = 10, height = 8, dpi = 300) ggsave("consensus_proportion.png", plot = p2, width = 10, height = 8, dpi = 300) ggsave("entropy.png", plot = p3, width = 10, height = 8, dpi = 300) ggsave("combined_results.png", plot = combined_plot, width = 15, height = 5, dpi = 300) ``` ## Best Practices ### Color Selection - **Cell Type Annotations**: Use distinct colors for different cell types - **Consensus Proportion**: Use a gradient from yellow (low) to blue (high) - **Shannon Entropy**: Use a gradient from red (low uncertainty) to yellow (high uncertainty) ### Layout Considerations - Arrange plots in a logical order (annotations, consensus, entropy) - Use consistent point sizes and fonts across plots - Include informative titles and legends - Consider adding cluster labels for easier reference ## Next Steps - [Introduction](introduction.html): Learn about mLLMCelltype principles and background - [Getting Started Guide](getting-started.html): Basic usage examples - [Usage Tutorial](usage-tutorial.html): Detailed usage scenarios and advanced features