## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup, eval=FALSE-------------------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("EasyCellType") ## ---- results=FALSE, warning=FALSE, message=FALSE----------------------------- library(devtools) install_github("rx-li/EasyCellType") ## ---- results=FALSE, warning=FALSE, message=FALSE----------------------------- library(EasyCellType) ## ---- results=FALSE, warning=FALSE, message=FALSE----------------------------- data(pbmc_data) ## ---- results=FALSE, warning=FALSE, message=FALSE----------------------------- library(Seurat) # Initialize the Seurat object pbmc <- CreateSeuratObject(counts = pbmc_data, project = "pbmc3k", min.cells = 3, min.features = 200) # QC and select samples pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-") pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5) # Normalize the data pbmc <- NormalizeData(pbmc) # Identify highly variable features pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000) # Scale the data all.genes <- rownames(pbmc) pbmc <- ScaleData(pbmc, features = all.genes) # Perfom linear dimensional reduction pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc)) # Cluster the cells pbmc <- FindNeighbors(pbmc, dims = 1:10) pbmc <- FindClusters(pbmc, resolution = 0.5) # Find differentially expressed features markers <- FindAllMarkers(pbmc, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25) ## ---- results=FALSE, warning=FALSE, message=FALSE----------------------------- library(org.Hs.eg.db) library(AnnotationDbi) markers$entrezid <- mapIds(org.Hs.eg.db, keys=markers$gene, #Column containing Ensembl gene ids column="ENTREZID", keytype="SYMBOL", multiVals="first") markers <- na.omit(markers) ## ---- results=FALSE, warning=FALSE, message=FALSE----------------------------- library(dplyr) markers_sort <- data.frame(gene=markers$entrezid, cluster=markers$cluster, score=markers$avg_log2FC) %>% group_by(cluster) %>% mutate(rank = rank(score), ties.method = "random") %>% arrange(desc(rank)) input.d <- as.data.frame(markers_sort[, 1:3]) ## ---- results=FALSE, warning=FALSE, message=FALSE----------------------------- data("gene_pbmc") input.d <- gene_pbmc ## ---- results=FALSE, warning=FALSE, message=FALSE----------------------------- annot.GSEA <- easyct(input.d, db="cellmarker", species="Human", tissue=c("Blood", "Peripheral blood", "Blood vessel", "Umbilical cord blood", "Venous blood"), p_cut=0.3, test="GSEA") ## ---- results=FALSE, warning=FALSE, message=FALSE----------------------------- plot_dot(test="GSEA", annot.GSEA) ## ---- results=FALSE, warning=FALSE, message=FALSE, fig.show='hide'------------ plot_bar(test="GSEA", annot.GSEA) ## ----------------------------------------------------------------------------- sessionInfo()