## ---- install-packages, eval=FALSE--------------------------------------------
#  if (!requireNamespace("BiocManager", quietly=TRUE)) {
#      install.packages("BiocManager")
#  }
#  BiocManager::install(c("SummarizedExperiment", "ExperimentHub", "FieldEffectCrc"))

## ---- load-hub----------------------------------------------------------------
library(SummarizedExperiment)
library(ExperimentHub)

## ---- find-data---------------------------------------------------------------
hub = ExperimentHub()
## simply list the resource names
ns <- listResources(hub, "FieldEffectCrc")
ns
## query the hub for full metadata
crcData <- query(hub, "FieldEffectCrc")
crcData
## extract metadata
df <- mcols(crcData)
df
## see where the cache is stored
hc <- hubCache(crcData)
hc

## ---- explore-file------------------------------------------------------------
md <- hub["EH3524"]
md

## ---- download-data-----------------------------------------------------------
## using resource ID
data1 <- hub[["EH3524"]]
data1
## using loadResources()
data2 <- loadResources(hub, "FieldEffectCrc", "cohort A")
data2

## ---- phenotypes--------------------------------------------------------------
summary(colData(data1)$sampType)

## ---- quick-start-1-----------------------------------------------------------
se <- data1
counts1 <- assays(se)[["counts"]]

## ---- quick-start-2-----------------------------------------------------------
counts2 <- assay(se, 2)

## ---- quick-start-3-----------------------------------------------------------
all(counts1==counts2)

## ---- make-txi----------------------------------------------------------------
## manual construction
txi <- as.list(assays(se))
txi$countsFromAbundance <- "no"
## convenience function construction
txi <- make_txi(se)

## ---- make-dds----------------------------------------------------------------
if (!requireNamespace("DESeq2", quietly=TRUE)) {
    BiocManager::install("DESeq2")
}
library(DESeq2)
dds <- DESeqDataSetFromTximport(txi, colData(se), ~ sampType)

## ---- typical-filter----------------------------------------------------------
countLimit <- 10
sampleLimit <- (1/3) * ncol(dds)
keep <- rowSums( counts(dds) > countLimit ) >= sampleLimit
dds <- dds[keep, ]

## ---- special-filter----------------------------------------------------------
r <- sample(seq_len(nrow(dds)), 6000)
c <- sample(seq_len(ncol(dds)), 250)
dds <- dds[r, c]
r <- rowSums(counts(dds)) >= 10
dds <- dds[r, ]

## ---- size-factors------------------------------------------------------------
dds <- DESeq(dds)

## ---- prep-input--------------------------------------------------------------
dat <- counts(dds, normalized=TRUE)  ## extract the normalized counts
mod <- model.matrix(~sampType, data=colData(dds))  ## set the full model
mod0 <- model.matrix(~1, data=colData(dds))  ## set the null model

## ---- get-nsv-----------------------------------------------------------------
if (!requireNamespace("sva", quietly=TRUE)) {
    BiocManager::install("sva")
}
library(sva)
nsv <- num.sv(dat, mod, method=c("be"), B=20, seed=1)  ## Buja Eyuboglu method

## ---- get-svs-----------------------------------------------------------------
svs <- svaseq(dat, mod, mod0, n.sv=nsv)

## ---- add-svs-----------------------------------------------------------------
for (i in seq_len(svs$n.sv)) {
    newvar <- paste0("sv", i)
    colData(dds)[ , newvar] <- svs$sv[, i]
}
nvidx <- (ncol(colData(dds)) - i + 1):ncol(colData(dds))
newvars <- colnames(colData(dds))[nvidx]
d <- formula(
    paste0("~", paste(paste(newvars, collapse="+"), "sampType", sep="+"))
)
design(dds) <- d

## ---- test-de-----------------------------------------------------------------
dds <- DESeq(dds)

## ---- results-----------------------------------------------------------------
cons <- list()
m <- combn(levels(colData(dds)$sampType), 2)
for (i in seq_len(ncol(m))) {
    cons[[i]] <- c("sampType", rev(m[, i]))
    names(cons) <- c(
        names(cons)[seq_len(length(cons) - 1)], paste(rev(m[, i]), collapse="v")
    )
}
res <- list()
for (i in seq_len(length(cons))) {
    res[[i]] <- results(dds, contrast=cons[[i]], alpha=0.05)  ## default alpha is 0.1
}
names(res) <- names(cons)

## ---- display-----------------------------------------------------------------
lapply(res, head)
lapply(res, summary)

## ---- sessionInfo-------------------------------------------------------------
sessionInfo()