## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----echo = FALSE, results="hide", message=FALSE, loadpkgs--------------------
{library(SingleCellExperiment)
library(SpatialExperiment)
library(ggsc)
library(scuttle)
library(scran)
library(SVP)
library(ggplot2)
library(magrittr)
library(scatterpie)
library(STexampleData)
library(scater)
} |> suppressPackageStartupMessages()

## ----echo=FALSE, fig.width = 12, dpi=400, fig.align="center", fig.cap= "Overview of SVP.", overview----
knitr::include_graphics("./SVP_Framework.png") 

## ----eval=FALSE, INSTALL------------------------------------------------------
# #Once Bioconductor 3.21 is released, you can install it as follows.
# if (!requireNamespace("BiocManager", quietly=TRUE))
#     install.packages("BiocManager")
# 
# BiocManager::install("SVP")
# 
# #Or you can install it via github
# if (!requireNamespace("remotes", quietly=TRUE))
#     install.packages("remotes")
# remotes::install_github("YuLab-SMU/SVP")

## ----runMCA_spe---------------------------------------------------------------
# load the packages required in the vignette
library(SpatialExperiment)
library(SingleCellExperiment)
library(scuttle)
library(scran)
library(SVP)
library(ggsc)
library(ggplot2)
library(magrittr)
library(STexampleData)

# load the spatial transcriptome, it is a SpatialExperiment object
# to build the object, you can refer to the SpatialExperiment package
mob_spe <- ST_mouseOB()  
# the genes that did not express in any captured location were removed.
# we use logNormCounts of scuttle to normalize the spatial transcriptome
mob_spe <- logNormCounts(mob_spe)
# Now the normalized counts was added to the assays of mob_spe as `logcounts`
# it can be extracted via logcounts(mob_spe) or assay(mob_spe, 'logcounts')

# Next, we use `runMCA` of `SVP` to preform the MCA dimensionality reduction
mob_spe <- runMCA(mob_spe, assay.type = 'logcounts')
# The MCA result was added to the reducedDims, it can be extracted by reducedDim(mob_spe, 'MCA')
# more information can refer to SingleCellExperiment package
mob_spe

## ----fig.width = 11, fig.height = 7, fig.align = 'center', fig.cap="The heatmap of spatially variable biocarta pathway", Biocarta----
#Note: the gset.idx.list supports the named gene list, gson object or
# locate gmt file or html url of gmt.
mob_spe <- runSGSA(mob_spe, 
             gset.idx.list = "https://data.broadinstitute.org/gsea-msigdb/msigdb/release/2024.1.Mm/m2.cp.biocarta.v2024.1.Mm.symbols.gmt", # The target gene set gmt file
             assay.type = 'logcounts', # The name of assays of gene profiler
             gsvaExp.name = 'Biocarta' # The name of result to save to gsvaExps slot
           )

# Then the result was added to gsvaExps to return a SVPExperiment object, the result
# can be extracted with gsvaExp, you can view more information via help(gsvaExp).
mob_spe

gsvaExpNames(mob_spe)

# The result is also a SingleCellExperiment or SpatialExperiment.
gsvaExp(mob_spe, 'Biocarta')

# identify the spatially variable Biocarta pathway.
Biocarta.svdf <- gsvaExp(mob_spe, "Biocarta") |>
 runDetectSVG(
   assay.type = 1,
   action = 'only'
 )

Biocarta.svdf |> head()

# We can obtain significant spatially variable pathways, 
# which are sorted according to the Moran index.
# More details see also the Spatial statistical analysis session
Biocarta.svdf |> dplyr::filter(padj<=0.01) |> dplyr::arrange(rank) |> head()

ids.sv.biocarta <- Biocarta.svdf |> dplyr::filter(padj<=0.01) |> dplyr::arrange(rank) |> rownames()

# We use sc_spatial of ggsc to visualize the distribution of some spatially variable functions
gsvaExp(mob_spe, 'Biocarta') %>% 
  sc_spatial(
    features = ids.sv.biocarta[seq(12)],
    mapping = aes(x = x, y = y),
    geom = geom_bgpoint,
    pointsize = 3.5,
    ncol = 4,
    common.legend = FALSE # The output is a patchwork 
  ) & 
  scale_color_viridis_c(option='B', guide='none')


## ----fig.width = 4.5, fig.height = 3.5, fig.align = 'center', fig.cap='The pie plot of cell-type activity', runSGSA_free----
# The marker gene sets
# We can view the marker gene number of each cell-type.
data(mob_marker_genes)
lapply(mob_marker_genes, length) |> unlist()

mob_spe <- runSGSA(
                mob_spe, 
                gset.idx.list = mob_marker_genes,
                gsvaExp.name = 'CellTypeFree', 
                assay.type = 'logcounts'
              )

# Then we can visualize the cell-type activity via sc_spatial of SVP with pie plot
gsvaExp(mob_spe, "CellTypeFree") |> 
   sc_spatial(
     features = sort(names(mob_marker_genes)),
     mapping = aes(x = x, y = y),
     plot.pie = TRUE,
     color = NA,
     pie.radius.scale = 1.1
   ) +
   scale_fill_brewer(palette='Set2')


## ----fig.align = 'center', fig.width = 11, fig.height=2.2, fig.cap = 'the heatmap of each cell-type activity', heatmap_celltype----
# calculated the of cell-type
assay(gsvaExp(mob_spe, 2)) |> as.matrix() |> 
  prop.table(2) |> 
  Matrix::Matrix(sparse=TRUE) -> 
  assay(gsvaExp(mob_spe,2), "prop")


mob_spe |> gsvaExp(2) |> 
   sc_spatial(
     features = sort(names(mob_marker_genes)),
     mapping = aes(x = x,  y = y),
     geom = geom_bgpoint,
     pointsize = 2,
     ncol = 5,
     slot = 2
   ) + 
   scale_color_viridis_c()

## ----runDetectMarker----------------------------------------------------------
# load the reference single cell transcriptome
# it is a SingleCellExperiment
data(mob_sce) 

mob_sce <- logNormCounts(mob_sce)

# You can also first obtain the high variable genes, then
# perform the MCA reduction.
# set.seed(123)
# stats <- modelGeneVar(mob_sce)
# hvgs <- getTopHVGs(stats)

# perform the MCA reduction.
mob_sce <- mob_sce |> runMCA(assay.type = 'logcounts')

# obtain the marker gene sets from reference single-cell transcriptome based on MCA space
refmakergene <- runDetectMarker(mob_sce, group.by='cellType', 
  ntop=200, present.prop.in.group=.2, present.prop.in.sample=.5
  )
refmakergene |> lapply(length) |> unlist()

## ----fig.width = 4.5, fig.height = 3.5, fig.align = 'center', fig.cap='The pie plot of cell-type activity with the marker gene sets from reference single-cell transcriptome', runSGSA_ref----
# use the maker gene from the reference single-cell transcriptome
mob_spe <- runSGSA(
  mob_spe,
  gset.idx.list = refmakergene,
  gsvaExp.name = 'CellTypeRef'
)

mob_spe |> 
  gsvaExp('CellTypeRef') |> 
  sc_spatial(
    features = sort(names(refmakergene)), 
    mapping = aes(x = x, y = y),
    plot.pie = TRUE, 
    color = NA, 
    pie.radius.scale = 1.1
  ) +
  scale_fill_brewer(palette = 'Set2')

## ----fig.width = 4.5, fig.height=3.5, fig.align = 'center', fig.cap = 'The major cell type for each spot'----
# the result will be added to the colData of gsvaExp(mob_spe, "CellTypeRef")
mob_spe <- mob_spe |> pred.cell.signature(gsvaexp='CellTypeFree', pred.col.name='pred')

# Then we can use sc_spatial to visualize it.
mob_spe |>
  gsvaExp("CellTypeFree") |>
  sc_spatial(
    mapping = aes(x =  x, y = y, color = pred),
    geom = geom_bgpoint,
    pointsize = 4
  ) +
  scale_color_brewer(palette = 'Set2')

## ----runDetectSVG_I-----------------------------------------------------------

# First approach:
# we can first obtain the target gsvaExp with gsvaExp names via 
# gsvaExp() function, for example, CellTypeFree
mob_spe |> gsvaExp("CellTypeFree") 

# Then we use runDetectSVG with method = 'moran' to identify the spatial variable 
# cell-types with global Moran's I.

celltype_free.I <- mob_spe |> gsvaExp("CellTypeFree") |>
  runDetectSVG(
    assay.type = "prop", # because default is 'logcounts', it should be adjused
    method = 'moransi',
    action = 'only'
  ) 
  
celltype_free.I |> dplyr::arrange(rank) |> head()

# Second approach:
# the result was added to the original object by specific the 
# gsvaexp argument in the runDetectSVG
mob_spe <- mob_spe |> 
  runDetectSVG(
    gsvaexp = 'CellTypeFree',  
    method = 'moran',
    gsvaexp.assay.type = 'prop'
    )
gsvaExp(mob_spe, 'CellTypeFree') |> svDf("sv.moransi") |> dplyr::arrange(rank) |> dplyr::filter(padj<=0.01)

## ----runDetectSVG_C-----------------------------------------------------------
# using Geary's C 
mob_spe <- mob_spe |>
  runDetectSVG(
    gsvaexp = 'CellTypeFree',
    gsvaexp.assay.type = 'prop',
    method = 'geary'
  )

gsvaExp(mob_spe, "CellTypeFree") |> svDf('sv.gearysc') |> dplyr::arrange(rank) |> dplyr::filter(padj<=0.01)

## ----runDetectSVG_G-----------------------------------------------------------
# using Getis-ord's G
mob_spe <- mob_spe |>
  runDetectSVG(
    gsvaexp = 'CellTypeFree',
    gsvaexp.assay.type = 'prop',
    method = 'getis'
  )

gsvaExp(mob_spe, "CellTypeFree") |> svDf('sv.getisord') |> dplyr::arrange(rank) |> dplyr::filter(padj<=0.01) 

## ----runLISA_G----------------------------------------------------------------
mob_spe_cellfree <- mob_spe |> gsvaExp("CellTypeFree")

mob_spe_cellfree <- mob_spe_cellfree |> 
    runPCA(assay.type='prop') |> runTSNE(dimred='PCA')

# Here, we use the `TSNE` space to identify the location clustering regions
celltypefree_lisares <- mob_spe_cellfree |> 
   runLISA(
     features = c("GC", "OSNs", "M/TC", "PGC"), 
     assay.type = 'prop', 
     reduction.used = 'TSNE', 
     weight.method = 'knn', 
     k = 8
   ) 

celltypefree_lisares$GC |> head()

## ----fig.width = 9, fig.height = 2, fig.align = 'center', fig.cap="The result of local Getis-Ord for cell-type", LISA_G_fig----
gsvaExp(mob_spe, 'CellTypeFree') |> 
  plot_lisa_feature(
    lisa.res = celltypefree_lisares, 
    assay.type = 2, 
    pointsize = 2,
    gap_line_width = .18,
    hlpointsize= 1.8
  )


## ----runGLOBALBV--------------------------------------------------------------
gbv.res <- mob_spe |> 
  gsvaExp('CellTypeFree') |> 
  runGLOBALBV(
    features1 = c("GC", "OSNs", "M/TC", "PGC"),
    assay.type = 2, 
    permutation = NULL, # if permutation is NULL, mantel test will be used to calculated the pvalue
    add.pvalue = TRUE,
    alternative = 'greater' # one-side test 
  )
  
# gbv.res is a list contained Lee and pvalue matrix.
gbv.res |> as_tbl_df(diag=FALSE, flag.clust = TRUE) |> 
  dplyr::arrange(desc(Lee))

## ----fig.width = 6, fig.height = 5.5, fig.align = 'center', fig.cap="The results of clustering analysis of spatial distribution between cell-type", fig_gbv----
# We can also display the result of global univariate spatial analysis 
moran.res <- gsvaExp(mob_spe, 'CellTypeFree') |> svDf("sv.moransi")

# The result of local univariate spatial analysis also can be added
lisa.f1.res <- gsvaExp(mob_spe, "CellTypeFree", withColData = TRUE) |> 
    cal_lisa_f1(lisa.res = celltypefree_lisares, group.by = 'layer', rm.group.nm=NA)

plot_heatmap_globalbv(gbv.res, moran.t=moran.res, lisa.t=lisa.f1.res)

## ----fig.width = 4.5, fig.height=6, fig.align = 'center', fig.cap = "The heatmap of spatial correlation between cell tyep and Biocarta states", runGLOBALBV_CellType_Biocarta----
# If the number of features is excessive, we recommend selecting 
# those with spatial variability.
 celltypeid <- gsvaExp(mob_spe, "CellTypeFree") |> svDf("sv.moransi") |> 
   dplyr::filter(padj<=0.01) |> rownames()

biocartaid <- gsvaExp(mob_spe, "Biocarta") |> 
   runDetectSVG(assay.type = 1, verbose=FALSE) |> 
   svDf() |> 
   dplyr::filter(padj <= 0.01) |>
   rownames()

runGLOBALBV(
  mob_spe, 
  gsvaexp = c("CellTypeFree", "Biocarta"), 
  gsvaexp.features = c(celltypeid, biocartaid[seq(20)]), 
  gsvaexp.assay.type = c(2, 1),
  permutation = NULL, 
  add.pvalue = TRUE
) -> res.gbv.celltype.biocarta

plot_heatmap_globalbv(res.gbv.celltype.biocarta)

## ----fig.width = 8.5, fig.height=2.5, fig.align = 'center', fig.cap = "The heatmap of spatial correlation between cell tyep and spatially variable genes", runGLOBALBV_CellType_genes----
svgid <- mob_spe |> runDetectSVG(assay.type = 'logcounts') |> svDf() |>
         dplyr::filter(padj <= 0.01) |> dplyr::arrange(rank) |> rownames()

res.gbv.genes.celltype <- runGLOBALBV(mob_spe,
  features1 = svgid[seq(40)],
  gsvaexp = 'CellTypeFree',
  gsvaexp.features = celltypeid,
  gsvaexp.assay.type = 2,
  permutation = NULL,
  add.pvalue = TRUE
)
plot_heatmap_globalbv(res.gbv.genes.celltype)

## ----fig.width = 9, fig.height = 2, fig.align = "center", fig.cap='The heatmap of Local bivariate spatial analysis', runLOCALBV----
lbv.res <- mob_spe |> 
  runLOCALBV(features1=c("Psd", "Nrgn", "Kcnh3", "Gpsm1", "Pcp4"),
    assay.type = 'logcounts',
    gsvaexp = 'CellTypeFree',
    gsvaexp.features=c('GC'),
    gsvaexp.assay.type = 'prop',
    weight.method='knn',
    k = 8
  ) 

# The result can be added to gsvaExp, then visualized by ggsc.
LISAsce(mob_spe, lisa.res=lbv.res, gsvaexp.name='LBV.res') |> 
  gsvaExp("LBV.res") %>% 
  plot_lisa_feature(lisa.res = lbv.res, assay.type = 1)

## ----echo=FALSE---------------------------------------------------------------
sessionInfo()