This vignette shows how to use SignacX with Seurat and MASC. There are three parts: Seurat, SignacX and then MASC. We use an example data set of kidney cells from AMP from this publication.
Read the CEL-seq2 data.
ReadCelseq <- function(counts.file, meta.file) {
E = suppressWarnings(readr::read_tsv(counts.file))
gns <- E$gene
E = E[, -1]
M = suppressWarnings(readr::read_tsv(meta.file))
S = lapply(unique(M$sample), function(x) {
logik = colnames(E) %in% M$cell_name[M$sample == x]
Matrix::Matrix(as.matrix(E[, logik]), sparse = TRUE)
})
names(S) <- unique(M$sample)
S = lapply(S, function(x) {
rownames(x) <- gns
x
})
S
}
counts.file = "./SDY997_EXP15176_celseq_matrix_ru10_molecules.tsv.gz"
meta.file = "./SDY997_EXP15176_celseq_meta.tsv"
E = ReadCelseq(counts.file = counts.file, meta.file = meta.file)
M = suppressWarnings(readr::read_tsv(meta.file))
Merge into a single matrix
Start with the standard pre-processing steps for a Seurat object.
Create a Seurat object, and then perform SCTransform normalization. Note:
# load data
kidney <- CreateSeuratObject(counts = E, project = "celseq")
# run sctransform
kidney <- SCTransform(kidney)
Perform dimensionality reduction by PCA and UMAP embedding. Note:
First, make sure that you have the SignacX package installed.
Generate cellular phenotype labels for the Seurat object. Note:
Now we can visualize the cell type classifications at many different levels:
kidney <- AddMetaData(kidney, metadata = celltypes$Immune, col.name = "immmune")
kidney <- SetIdent(kidney, value = "immmune")
png(filename = "fls/plot1_amp.png")
DimPlot(kidney)
dev.off()
Immune, Nonimmune (if any) and unclassified cells
kidney <- AddMetaData(kidney, metadata = celltypes$CellTypes, col.name = "celltypes")
kidney <- SetIdent(kidney, value = "celltypes")
png(filename = "fls/plot2_amp.png")
DimPlot(kidney)
dev.off()
Cell types
kidney <- AddMetaData(kidney, metadata = celltypes$CellStates, col.name = "cellstates")
kidney <- SetIdent(kidney, value = "cellstates")
png(filename = "fls/plot3_amp.png")
DimPlot(kidney)
dev.off()
Cell states
Identify immune marker genes (IMAGES)
# Downsample just the immune cells
kidney.small <- kidney[, !celltypes$CellStates %in% c("NonImmune", "Fibroblasts", "Unclassified",
"Endothelial", "Epithelial")]
# Find protein markers for all clusters, and draw a heatmap
markers <- FindAllMarkers(kidney.small, only.pos = TRUE, verbose = F, logfc.threshold = 1)
require(dplyr)
top5 <- markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_logFC)
png(filename = "fls/plot4_amp.png", width = 640, height = 640)
DoHeatmap(kidney.small, features = unique(top5$gene), angle = 90)
dev.off()
Immune marker genes
Meta_mapped = M[match(colnames(kidney), M$cell_name), ]
Meta_mapped$CellStates = celltypes$CellStates
Meta_mapped$disease = factor(Meta_mapped$disease)
Q = MASC(dataset = Meta_mapped, cluster = Meta_mapped$CellStates, contrast = "disease", random_effects = c("plate",
"lane", "sample"))
MASC results reveals that fibroblasts, plasma cells and B memory cells are enriched (p value < 0.05) for disease.
cluster | size | model.pvalue | diseaseSLE.OR | diseaseSLE.OR.95pct.ci.lower | diseaseSLE.OR.95pct.ci.upper |
---|---|---|---|---|---|
B.memory | 391 | 0.0197539 | 3.041576e+00 | 1.1841285 | 7.812654e+00 |
B.naive | 144 | 0.4652596 | 1.548526e+00 | 0.4932181 | 4.861810e+00 |
DC | 160 | 0.0536208 | 2.346694e+00 | 1.0278135 | 5.357951e+00 |
Endothelial | 30 | 0.2290332 | 1.183979e+01 | 0.1021348 | 1.372508e+03 |
Epithelial | 89 | 0.5615659 | 6.424482e-01 | 0.1488724 | 2.772440e+00 |
Fibroblasts | 360 | 0.0174935 | 3.926459e+00 | 2.5047560 | 6.155124e+00 |
Macrophages | 356 | 0.1061625 | 4.700437e-01 | 0.1959415 | 1.127587e+00 |
Mon.Classical | 247 | 0.6346884 | 8.482458e-01 | 0.4416789 | 1.629059e+00 |
Mon.NonClassical | 658 | 0.7360915 | 8.693916e-01 | 0.3903675 | 1.936231e+00 |
Neutrophils | 11 | 0.3806773 | 1.800080e-02 | 0.0000020 | 1.590775e+02 |
NK | 562 | 0.2814722 | 6.046977e-01 | 0.2470403 | 1.480160e+00 |
NonImmune | 112 | 0.1608263 | 7.817140e-02 | 0.0021302 | 2.868614e+00 |
Plasma.cells | 93 | 0.0055213 | 5.387766e+06 | 0.0000000 | 1.579379e+25 |
T.CD4.memory | 283 | 0.5351536 | 6.933100e-01 | 0.2284183 | 2.104379e+00 |
T.CD4.naive | 459 | 0.4627315 | 7.488830e-01 | 0.3546322 | 1.581429e+00 |
T.CD8.cm | 482 | 0.1776758 | 1.917053e+00 | 0.7686826 | 4.781025e+00 |
T.CD8.em | 1196 | 0.7985566 | 1.107881e+00 | 0.5097435 | 2.407877e+00 |
T.CD8.naive | 39 | 0.8117720 | 1.132623e+00 | 0.4022482 | 3.189161e+00 |
T.regs | 84 | 0.6186262 | 1.571148e+00 | 0.2607223 | 9.467957e+00 |
Unclassified | 717 | 0.8900750 | 9.198574e-01 | 0.2840429 | 2.978908e+00 |
Save results
saveRDS(kidney, file = "fls/amp_kidney_signac.rds")
saveRDS(Q, file = "fls/amp_kidney_MASC_result.rds")
saveRDS(celltypes, file = "fls/amp_kidney_celltypes.rds")
Session Info
## R version 3.5.0 (2018-04-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
##
## Matrix products: default
## BLAS/LAPACK: /site/ne/home/i0369218/.local/share/r-miniconda/envs/r-reticulate/lib/libopenblasp-r0.3.10.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=C
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] compiler_3.5.0 magrittr_1.5 formatR_1.7 htmltools_0.4.0
## [5] tools_3.5.0 yaml_2.2.1 Rcpp_1.0.4.6 stringi_1.4.6
## [9] rmarkdown_2.1 highr_0.8 knitr_1.28 stringr_1.4.0
## [13] digest_0.6.18 xfun_0.12 rlang_0.4.8 evaluate_0.14