## ----installation, eval=FALSE-------------------------------------------------
#  install.packages("BiocManager")
#  BiocManager::install("TEKRABber")

## ----load package, message=FALSE----------------------------------------------
library(TEKRABber)
library(SummarizedExperiment) # load it if you are running this tutorial

## ----load built-in data (two species)-----------------------------------------
# load built-in data
data(speciesCounts)
hmGene <- speciesCounts$hmGene
hmTE <- speciesCounts$hmTE
chimpGene <- speciesCounts$chimpGene
chimpTE <- speciesCounts$chimpTE
# the first column must be Ensembl gene ID for gene, and TE name for TE
head(hmGene)

## ----search species name, eval=FALSE------------------------------------------
#  # You can use the code below to search for species name
#  ensembl <- biomaRt::useEnsembl(biomart = "genes")
#  biomaRt::listDatasets(ensembl)

## ----orthology and normalizeation, message=FALSE------------------------------
# In order to save time, we have save the data for this tutorial.
data(fetchDataHmChimp)
fetchData <- fetchDataHmChimp

# Query the data and calculate scaling factor using orthologScale():
#' data(speciesCounts)
#' data(hg38_panTro6_rmsk)
#' hmGene <- speciesCounts$hmGene
#' chimpGene <- speciesCounts$chimpGene
#' hmTE <- speciesCounts$hmTE
#' chimpTE <- speciesCounts$chimpTE
#' 
#' ## For demonstration, here we only select 1000 rows to save time
#' set.seed(1234)
#' hmGeneSample <- hmGene[sample(nrow(hmGene), 1000), ]
#' chimpGeneSample <- chimpGene[sample(nrow(chimpGene), 1000), ]
#' 
#' fetchData <- orthologScale(
#'     speciesRef = "hsapiens",
#'     speciesCompare = "ptroglodytes",
#'     geneCountRef = hmGeneSample,
#'     geneCountCompare = chimpGeneSample,
#'     teCountRef = hmTE,
#'     teCountCompare = chimpTE,
#'     rmsk = hg38_panTro6_rmsk
#' )

## ----create input files, warning=FALSE----------------------------------------
inputBundle <- DECorrInputs(fetchData)

## ----DE analysis (two species), message=FALSE, results='hide', warning=FALSE----
meta <- data.frame(
    species = c(rep("human", ncol(hmGene) - 1), 
    rep("chimpanzee", ncol(chimpGene) - 1))
)

meta$species <- factor(meta$species, levels = c("human", "chimpanzee"))
rownames(meta) <- colnames(inputBundle$geneInputDESeq2)
hmchimpDE <- DEgeneTE(
    geneTable = inputBundle$geneInputDESeq2,
    teTable = inputBundle$teInputDESeq2,
    metadata = meta,
    expDesign = TRUE
)

## ----correlation (two species), warning=FALSE---------------------------------
# load built-in data
data(speciesCorr)
hmGeneCorrInput <- assay_tekcorrset(speciesCorr, "gene", "human")
hmTECorrInput <- assay_tekcorrset(speciesCorr, "te", "human")
chimpGeneCorrInput <- assay_tekcorrset(speciesCorr, "gene", "chimpanzee")
chimpTECorrInput <- assay_tekcorrset(speciesCorr, "te", "chimpanzee")

hmCorrResult <- corrOrthologTE(
    geneInput = hmGeneCorrInput,
    teInput = hmTECorrInput,
    corrMethod = "pearson",
    padjMethod = "fdr"
)

chimpCorrResult <- corrOrthologTE(
    geneInput = chimpGeneCorrInput,
    teInput = chimpTECorrInput,
    corrMethod = "pearson",
    padjMethod = "fdr"
)

head(hmCorrResult)

## ----app visualize (two species), warning=FALSE, eval=FALSE-------------------
#  #create global variables for app-use
#  appDE <- hmchimpDE
#  appRef <- hmCorrResult
#  appCompare <- chimpCorrResult
#  appMeta <- meta # this is the same one in DE analysis
#  
#  appTEKRABber()

## ----load built-in data (same species)----------------------------------------
# load built-in data
data(ctInputDE)
geneInputDE <- ctInputDE$gene
teInputDE <- ctInputDE$te

# you need to follow the input format as below
head(geneInputDE)

## ----DE analysis (same species), warning=FALSE, results='hide', message=FALSE----
metaExp <- data.frame(experiment = c(rep("control", 5), rep("treatment", 5)))
rownames(metaExp) <- colnames(geneInputDE)
metaExp$experiment <- factor(
    metaExp$experiment, 
    levels = c("control", "treatment")
)

resultDE <- DEgeneTE(
    geneTable = geneInputDE,
    teTable = teInputDE,
    metadata = metaExp,
    expDesign = FALSE
)

## ----load built-in data (same species correlation), warning=FALSE-------------
# load built-in data
data(ctCorr)
geneConCorrInput <- assay_tekcorrset(ctCorr, "gene", "control")
teConCorrInput <- assay_tekcorrset(ctCorr, "te", "control")
geneTreatCorrInput <- assay_tekcorrset(ctCorr, "gene", "treatment")
teTreatCorrInput <- assay_tekcorrset(ctCorr, "te", "treatment")

# you need to follow the input format as below
head(geneConCorrInput)

## ----correlation (same species), warning=FALSE--------------------------------
controlCorr <- corrOrthologTE(
    geneInput = geneConCorrInput,
    teInput = teConCorrInput,
    corrMethod = "pearson",
    padjMethod = "fdr"
)

treatmentCorr <- corrOrthologTE(
    geneInput = geneTreatCorrInput,
    teInput = teTreatCorrInput,
    corrMethod = "pearson",
    padjMethod = "fdr"
)

head(treatmentCorr)

## ----app visualize (same species), warning=FALSE, eval=FALSE------------------
#  appDE <- resultDE
#  appRef <- controlCorr
#  appCompare <- treatmentCorr
#  appMeta <- metaExp
#  
#  appTEKRABber()

## -----------------------------------------------------------------------------
sessionInfo()