## ----knitr-options, include = FALSE-------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")

## ----setup--------------------------------------------------------------------
suppressPackageStartupMessages({
    library("splatter")
    library("scater")
    library("VariantAnnotation")
    library("ggplot2")
})
set.seed(42)

## ----quick-start--------------------------------------------------------------
vcf <- mockVCF()
gff <- mockGFF()

sim <- splatPopSimulate(vcf = vcf, gff = gff, sparsify = FALSE)
sim <- logNormCounts(sim)
sim <- runPCA(sim, ncomponents = 5)
plotPCA(sim, colour_by = "Sample")

## ----eqtlEstimate-------------------------------------------------------------
bulk.means <- mockBulkMatrix(n.genes = 100, n.samples = 100)
bulk.eqtl <- mockBulkeQTL(n.genes = 100)
counts <- mockSCE()

params.est <- splatPopEstimate(
    means = bulk.means,
    eqtl = bulk.eqtl,
    counts = counts
)
params.est

## ----splatPopSimulateMeans----------------------------------------------------
params <- newSplatPopParams()

sim.means <- splatPopSimulateMeans(
    vcf = vcf,
    gff = gff,
    params = params
)

round(sim.means$means[1:5, 1:3], digits = 2)

print(sim.means$key[1:5, ], digits = 2)

print(sim.means$conditions)

## ----splatPopSimulateMeans-from-key-------------------------------------------
sim.means.rep2 <- splatPopSimulateMeans(
    vcf = vcf,
    key = sim.means$key,
    params = newSplatPopParams()
)

round(sim.means.rep2$means[1:5, 1:5], digits = 2)

## ----quant-normalize-population-data------------------------------------------
bulk.qnorm <- splatPopQuantNorm(newSplatPopParams(), bulk.means)
round(bulk.qnorm[1:5, 1:5], 3)

## ----empirical-mock-data------------------------------------------------------
emp <- mockEmpiricalSet()
head(emp$eqtl[, c("geneID", "snpID", "snpCHR", "snpLOC", "snpMAF", "slope")])

## ----empirical-based-simulation-----------------------------------------------
emp.sim <- splatPopSimulateMeans(
    vcf = emp$vcf,
    gff = emp$gff,
    eqtl = emp$eqtl,
    means = emp$means,
    params = newSplatPopParams()
)
head(emp.sim$key)

## ----eqtl-splatPopSimulateSC-simple-object------------------------------------
sim.sc <- splatPopSimulateSC(
    params = params,
    key = sim.means$key,
    sim.means = sim.means$means,
    batchCells = 50,
    sparsify = FALSE
)
sim.sc

## ----eqtl-splatPopSimulateSC-simple-plots-------------------------------------
sim.sc <- logNormCounts(sim.sc)
sim.sc <- runPCA(sim.sc, ncomponents = 10)
plotPCA(sim.sc, colour_by = "Sample")

## ----splatPop-simulation-nCells-sampled---------------------------------------
params.nCells <- newSplatPopParams(
    nCells.sample = TRUE,
    nCells.shape = 1.5,
    nCells.rate = 0.01
)

sim.sc.nCells <- splatPopSimulate(
    vcf = vcf,
    gff = gff,
    params = params.nCells,
    sparsify = FALSE
)

sim.sc.nCells <- logNormCounts(sim.sc.nCells)
sim.sc.nCells <- runPCA(sim.sc.nCells, ncomponents = 2)
plotPCA(sim.sc.nCells, colour_by = "Sample")

## ----group-specific-eQTL-simulations------------------------------------------
params.group <- newSplatPopParams(
    batchCells = 50,
    group.prob = c(0.5, 0.5)
)

sim.sc.gr2 <- splatPopSimulate(
    vcf = vcf,
    gff = gff,
    params = params.group,
    sparsify = FALSE
)

sim.sc.gr2 <- logNormCounts(sim.sc.gr2)
sim.sc.gr2 <- runPCA(sim.sc.gr2, ncomponents = 10)
plotPCA(sim.sc.gr2, colour_by = "Group", shape_by = "Sample")

## ----group-specific-eQTL-simulations-bigger-----------------------------------
params.group <- newSplatPopParams(
    batchCells = 50,
    similarity.scale = 8,
    eqtl.group.specific = 0.6,
    de.prob = 0.5,
    de.facLoc = 0.5,
    de.facScale = 0.5,
    group.prob = c(0.5, 0.5)
)

sim.sc.gr2 <- splatPopSimulate(
    vcf = vcf,
    gff = gff,
    params = params.group,
    sparsify = FALSE
)

sim.sc.gr2 <- logNormCounts(sim.sc.gr2)
sim.sc.gr2 <- runPCA(sim.sc.gr2, ncomponents = 10)
plotPCA(sim.sc.gr2, colour_by = "Group", shape_by = "Sample")

## ----simulate-population-with-condition-effects-------------------------------
params.cond <- newSplatPopParams(
    eqtl.n = 0.5,
    batchCells = 50,
    similarity.scale = 5,
    condition.prob = c(0.5, 0.5),
    eqtl.condition.specific = 0.5,
    cde.facLoc = 1,
    cde.facScale = 1
)

sim.pop.cond <- splatPopSimulate(
    vcf = vcf,
    gff = gff,
    params = params.cond,
    sparsify = FALSE
)

sim.pop.cond <- logNormCounts(sim.pop.cond)
sim.pop.cond <- runPCA(sim.pop.cond, ncomponents = 10)
plotPCA(
    sim.pop.cond,
    colour_by = "Condition",
    shape_by = "Sample",
    point_size = 3
)

## ----simulate-population-with-batch-effects-----------------------------------
params.batches <- newSplatPopParams(
    similarity.scale = 4,
    batchCells = c(20, 20),
    batch.size = 3,
    batch.facLoc = 0.1,
    batch.facScale = 0.2
)

sim.pop.batches <- splatPopSimulate(
    vcf = vcf,
    gff = gff,
    params = params.batches,
    sparsify = FALSE
)
sim.pop.batches <- logNormCounts(sim.pop.batches)
sim.pop.batches <- runPCA(sim.pop.batches, ncomponents = 10)
plotPCA(
    sim.pop.batches,
    colour_by = "Sample",
    shape_by = "Batch",
    point_size = 3
)

## ----simulate-population-with-different-sized-batch-effects-------------------
params.batches <- newSplatPopParams(
    similarity.scale = 8,
    batchCells = c(5, 5, 5),
    batch.size = 3,
    batch.facLoc = c(0.5, 0.1, 0.1),
    batch.facScale = c(0.6, 0.15, 0.15)
)

sim.pop.batches <- splatPopSimulate(
    vcf = vcf,
    gff = gff,
    params = params.batches,
    sparsify = FALSE
)
sim.pop.batches <- logNormCounts(sim.pop.batches)
sim.pop.batches <- runPCA(sim.pop.batches, ncomponents = 10)
plotPCA(
    sim.pop.batches,
    colour_by = "Sample",
    shape_by = "Batch",
    point_size = 3
)

## ----simulate-population-with-coregulation, fig.height=2----------------------
params.coreg <- newSplatPopParams(eqtl.coreg = 0.4, eqtl.ES.rate = 2)
vcf <- mockVCF(n.sample = 20, n.snps = 1200)
gff <- mockGFF(n.genes = 100)
sim.coreg <- splatPopSimulateMeans(vcf = vcf, gff = gff, params = params.coreg)
key <- sim.coreg$key
sim.cor <- cor(t(sim.coreg$means))
coregSNPs <- names(which(table(sim.coreg$key$eSNP.ID) == 2))

coregCorList <- c()
for (snp in coregSNPs) {
    coregGenes <- row.names(sim.coreg$key)[which(sim.coreg$key$eSNP.ID == snp)]
    coregCorList <- c(coregCorList, sim.cor[coregGenes[1], coregGenes[2]])
}

randCorList <- c()
for (n in 1:1000) {
    genes <- sample(row.names(sim.coreg$key), 2)
    randCorList <- c(randCorList, sim.cor[genes[1], genes[2]])
}

cor.df <- as.data.frame(list(
    type = c(
        rep("coregulated pairs", length(coregCorList)),
        rep("random pairs", length(randCorList))
    ),
    correlation = c(coregCorList, randCorList),
    id = "x"
))

ggplot(cor.df, aes(x = correlation, y = id, color = type, alpha = type)) +
    geom_jitter(size = 2, width = 0.1) +
    scale_alpha_discrete(range = c(0.6, 0.1)) +
    stat_summary(fun = mean, geom = "pointrange", size = 1, alpha = 1) +
    theme_minimal() +
    theme(
        axis.title.y = element_blank(),
        axis.text.y = element_blank(),
        legend.position = "top"
    ) +
    xlim(c(-1, 1)) +
    scale_color_manual(values = c("#56B4E9", "#999999"))

## ----sessionInfo--------------------------------------------------------------
sessionInfo()