## ----echo = FALSE, message = FALSE--------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
options(tibble.print_min = 4, tibble.print_max = 4)

## ----eval = FALSE-------------------------------------------------------------
# ## Install BiocManager, which is required to install packages from Bioconductor
# if (!requireNamespace("BiocManager", quietly = TRUE)) {
#     install.packages("BiocManager")
# }
# 
# BiocManager::install(version = "devel")
# BiocManager::install("TENET")

## ----eval = FALSE-------------------------------------------------------------
# ## Install prerequisite packages to install the development version from GitHub
# if (!requireNamespace("BiocManager", quietly = TRUE)) {
#     install.packages("BiocManager")
# }
# if (!requireNamespace("remotes", quietly = TRUE)) {
#     install.packages("remotes")
# }
# 
# BiocManager::install(version = "devel")
# BiocManager::install("rhielab/TENET.ExperimentHub")
# BiocManager::install("rhielab/TENET")

## ----message = FALSE----------------------------------------------------------
library(TENET)

## ----message = FALSE----------------------------------------------------------
library(TENET.ExperimentHub)

## ----message = FALSE----------------------------------------------------------
library(TENET.AnnotationHub)

## ----message = FALSE----------------------------------------------------------
## Load the MultiAssayExperiment package. This is not strictly necessary, but
## allows the user to avoid typing MultiAssayExperiment:: before its functions.
library(MultiAssayExperiment)

## -----------------------------------------------------------------------------
## Load in the example MultiAssayExperiment dataset from the TENET.ExperimentHub
## package
exampleTENETMultiAssayExperiment <-
    TENET.ExperimentHub::exampleTENETMultiAssayExperiment()

## Examine the SummarizedExperiments that should be contained in a
## MultiAssayExperiment object appropriate for use in TENET analyses, including
## the "expression" object
experiments(exampleTENETMultiAssayExperiment)

class(
    experiments(
        exampleTENETMultiAssayExperiment
    )[["expression"]]
)

## Examine data for the first 6 genes and samples in the assay object of the
## "expression" SummarizedExperiment object
assays(
    experiments(
        exampleTENETMultiAssayExperiment
    )[["expression"]]
)[[1]][
    seq_len(6), seq_len(6)
]

## -----------------------------------------------------------------------------
## Examine the rowRanges of the "expression" SummarizedExperiment object for the
## first genes.
## Note: Additional columns are included here, but only the chromosome
## (seqnames), coordinates (rowRanges), strand, and geneName columns are used.
head(
    rowRanges(
        experiments(
            exampleTENETMultiAssayExperiment
        )[["expression"]]
    )
)

## The names of this object are the gene IDs
head(
    names(
        rowRanges(
            experiments(
                exampleTENETMultiAssayExperiment
            )[["expression"]]
        )
    )
)

## -----------------------------------------------------------------------------
## Again, examine the SummarizedExperiments included in the MultiAssayExperiment
## object, focusing on the "methylation" object here
experiments(exampleTENETMultiAssayExperiment)

class(
    experiments(
        exampleTENETMultiAssayExperiment
    )[["methylation"]]
)

## Examine data for the first six RE DNA methylation sites and samples in the
## assay object of the "methylation" SummarizedExperiment object
assays(
    experiments(
        exampleTENETMultiAssayExperiment
    )[["methylation"]]
)[[1]][
    seq_len(6), seq_len(6)
]

## -----------------------------------------------------------------------------
## Examine the rowRanges of the "methylation" SummarizedExperiment object for
## the first six RE DNA methylation sites.
## Note: Additional columns are included here, but only the chromosome
## (seqnames) and coordinates (ranges) are used.
head(
    rowRanges(
        experiments(
            exampleTENETMultiAssayExperiment
        )[["methylation"]]
    )[, seq_len(6)]
)

## The names of this object are the RE DNA methylation site IDs (usually probe
## IDs)
head(
    names(
        rowRanges(
            experiments(
                exampleTENETMultiAssayExperiment
            )[["methylation"]]
        )
    )
)

## -----------------------------------------------------------------------------
## Examine the number of rows in the colData of the MultiAssayExperiment object
## compared to the number of samples (columns) in the "expression" and
## "methylation" summarized experiment objects. The number of patient entries
## does not need to match the number of samples included in the "expression" or
## "methylation" objects, as a single "Control" and "Case" sample could be
## derived from the same patient (though ideally, no more than one of each)
nrow(colData(exampleTENETMultiAssayExperiment))

experiments(exampleTENETMultiAssayExperiment)

## Examine some of the rownames, which should contain a unique identifier for
## each patient. These will be used in the MultiAssayExperiment's sampleMap
## object to match them to the samples included in the "expression" and
## "methylation" objects
rownames(colData(exampleTENETMultiAssayExperiment))[seq_len(6)]

## -----------------------------------------------------------------------------
## The sampleMap object should contain a row for each of the samples included
## in both the "expression" and "methylation" objects
nrow(sampleMap(exampleTENETMultiAssayExperiment))

experiments(exampleTENETMultiAssayExperiment)

## 4 columns of data should be included in the sampleMap, "assay", "primary",
## "colname", and "sampleType"
colnames(sampleMap(exampleTENETMultiAssayExperiment))

## The "assay" column should be a factor which lists which data object each
## sample is from ("expression", or "methylation)
sampleMap(exampleTENETMultiAssayExperiment)$assay[seq_len(6)]

levels(sampleMap(exampleTENETMultiAssayExperiment)$assay)

table(sampleMap(exampleTENETMultiAssayExperiment)$assay)

## The "primary" column should note which of the patient IDs from the
## MultiAssayExperiment's colData object each sample maps to.
sampleMap(exampleTENETMultiAssayExperiment)$primary[seq_len(6)]

table(
    sampleMap(exampleTENETMultiAssayExperiment)$primary %in%
        rownames(colData(exampleTENETMultiAssayExperiment))
)

## The "colname" column should include the sample names of each of the samples
## as they are listed in the colnames of either the "expression" or
## "methylation" SummarizedExperiments' assay objects
sampleMap(exampleTENETMultiAssayExperiment)$colname[seq_len(6)]

table(
    sampleMap(exampleTENETMultiAssayExperiment)$colname %in% c(
        colnames(
            assays(
                experiments(
                    exampleTENETMultiAssayExperiment
                )[["expression"]]
            )[[1]]
        ),
        colnames(
            assays(
                experiments(
                    exampleTENETMultiAssayExperiment
                )[["methylation"]]
            )[[1]]
        )
    )
)

## Finally, the "sampleType" column should list whether each sample in the
## "expression" or "methylation" SummarizedExperiment objects is a "Case" or
## "Control" sample for the purposes of TENET analyses.
sampleMap(exampleTENETMultiAssayExperiment)$sampleType[seq_len(6)]

table(sampleMap(exampleTENETMultiAssayExperiment)$sampleType)

## ----eval = FALSE-------------------------------------------------------------
# ## This example first creates a dataset of putative enhancer regulatory elements
# ## from consensus datasets and breast invasive carcinoma-relevant sources
# ## collected in the TENET.AnnotationHub package, then runs the step 2 through
# ## step 6 functions analyzing RE DNA methylation sites in potential
# ## enhancer elements located over 1500 bp from transcription start sites
# ## listed for genes and transcripts in the GENCODE v36 human genome
# ## annotations (already contained in the exampleTENETMultiAssayExperiment
# ## object loaded earlier), using a minimum case sample count of 5 and one
# ## CPU core to perform the analysis.
# exampleObject <- easyTENET(
#     TENETMultiAssayExperiment = exampleTENETMultiAssayExperiment,
#     extHM = NA,
#     extNDR = NA,
#     publicEnhancer = TRUE,
#     publicNDR = TRUE,
#     cancerType = "BRCA",
#     ENCODEdELS = TRUE,
#     minCaseCount = 5
# )
# 
# ## The exampleObject should have data from the step 2 through step 6 functions,
# ## as the dataset created by the step1MakeExternalDatasets function is used by
# ## and saved in the output of the step2GetDifferentiallyMethylatedSites
# ## function.
# 
# ## See the types of data that were saved by the step 2 function
# names(metadata(exampleObject)$step2GetDifferentiallyMethylatedSites)
# 
# ## See the GRanges object created by the step 1 function
# metadata(
#     exampleObject
# )$step2GetDifferentiallyMethylatedSites$regulatoryElementGRanges
# 
# ## See the types of data that were saved by the step 3 function
# names(metadata(exampleObject)$step3GetAnalysisZScores)
# 
# ## See the types of data that were saved by the step 4 function
# names(
#     metadata(exampleObject)$step4SelectMostSignificantLinksPerDNAMethylationSite
# )
# 
# ## See the types of data that were saved by the step 5 function
# names(metadata(exampleObject)$step5OptimizeLinks)
# 
# ## See the types of data that were saved by the step 6 function
# names(
#     metadata(
#         exampleObject
#     )$step6DNAMethylationSitesPerGeneTabulation
# )

## -----------------------------------------------------------------------------
## Create an example GRanges object representing putative enhancer regions for
## BRCA using all available enhancer-relevant BRCA datasets present in the
## TENET.ExperimentHub package. These datasets include consensus enhancer
## histone mark and open chromatin datasets from a wide variety of tissue and
## cell types, enhancer histone mark and open chromatin datasets from a
## variety of BRCA-relevant samples from the Cistrome database and TCGA, as well
## as identified distal enhancer regions from the ENCODE SCREEN project.
step1Output <- step1MakeExternalDatasets(
    consensusEnhancer = TRUE,
    consensusNDR = TRUE,
    publicEnhancer = TRUE,
    publicNDR = TRUE,
    cancerType = "BRCA",
    ENCODEdELS = TRUE
)

## View the first regions in the created GRanges object
head(step1Output)

## -----------------------------------------------------------------------------
## Identify differentially methylated RE DNA methylation sites using the
## step2GetDifferentiallyMethylatedSites function, using the
## exampleTENETMultiAssayExperiment object loaded previously from the
## TENET.ExperimentHub package as a reference, and the GRanges object that was
## just created using the step1MakeExternalDatasets function. At least 5 case
## samples in the dataset are required to show methylation levels above/below
## the hyper/hypomethylation cutoff for a given RE DNA methylation site to be
## potentially considered differentially methylated.
## All transcription start sites (TSS) included in the rowRanges of the
## elementMetadata of the TENETMultiAssayExperiment object will be considered
## when selecting enhancer DNA methylation sites (which must be at least 1500
## bp from these TSS).
exampleObject <- step2GetDifferentiallyMethylatedSites(
    TENETMultiAssayExperiment = exampleTENETMultiAssayExperiment,
    regulatoryElementGRanges = step1Output,
    minCaseCount = 5
)

## See the data that were saved by the step 2 function
names(metadata(exampleObject)$step2GetDifferentiallyMethylatedSites)

## Since cutoffs were set automatically by the step 2 function in this case,
## we can see what they are set to, using the hypomethylation cutoff as an
## example.
metadata(
    exampleObject
)$step2GetDifferentiallyMethylatedSites$hypomethCutoff

## The identities of all identified RE DNA methylation sites, as well as the
## methylated, unmethylated, and most importantly, hyper- and hypomethylated
## RE DNA methylation sites are also recorded in the siteIdentitiesList. To
## demonstrate this, view the first hypomethylated RE DNA methylation sites
## that were identified.
head(
    metadata(
        exampleObject
    )$step2GetDifferentiallyMethylatedSites$siteIdentitiesList$
        hypomethylatedSites
)

## -----------------------------------------------------------------------------
## Identify significant Z-scores and initial RE DNA methylation site-gene links
## using the exampleTENETMultiAssayExperiment with results from the
## step2GetDifferentiallyMethylatedSites function. For this analysis, we will
## use the one-sample Z-score function, consider only TFs, rather than all
## genes, and save only significant Z-scores, to cut down on computational time
## and reduce the size of the returned MultiAssayExperiment object. Two CPU
## cores will be used if they exist (except on Windows where the
## parallel::mclapply function is not supported).
exampleObject <- step3GetAnalysisZScores(
    TENETMultiAssayExperiment = exampleObject,
    pValue = 0.05,
    TFOnly = TRUE,
    zScoreCalculation = "oneSample",
    hypermethAnalysis = TRUE,
    hypomethAnalysis = TRUE,
    includeControl = FALSE,
    sparseResults = TRUE,
    coreCount = min(
        parallel::detectCores(), ifelse(.Platform$OS.type != "windows", 2, 1)
    )
)

## See the data that were saved by the step 3 function. They are subdivided into
## hypermeth and/or hypometh results based on function options.
names(
    metadata(
        exampleObject
    )$step3GetAnalysisZScores
)

## Since the sparseResults argument was set to TRUE, only
## significant Z-scores are saved, and since the TFOnly argument was also set
## to TRUE, only TF genes were analyzed.
## View the significant Z scores for the first TF genes with links to
## hypomethylated RE DNA methylation sites.
head(
    metadata(
        exampleObject
    )$step3GetAnalysisZScores$hypomethResults
)

## -----------------------------------------------------------------------------
## Get the 25 (if present) most significant links per RE DNA methylation site
## identified by the step3GetAnalysisZScores function
exampleObject <- step4SelectMostSignificantLinksPerDNAMethylationSite(
    TENETMultiAssayExperiment = exampleObject,
    hypermethGplusAnalysis = TRUE,
    hypomethGplusAnalysis = TRUE,
    linksPerREDNAMethylationSiteMaximum = 25
)

## See the data that were saved by the step 4 function. They are subdivided into
## hypermeth and/or hypometh results based on function options.
names(
    metadata(exampleObject)$step4SelectMostSignificantLinksPerDNAMethylationSite
)

## View the results for the most significant links to the hypomethylated RE
## DNA methylation sites
head(
    metadata(
        exampleObject
    )$step4SelectMostSignificantLinksPerDNAMethylationSite$hypomethGplusResults
)

## -----------------------------------------------------------------------------
## Identify final optimized RE DNA methylation site-gene links
exampleObject <- step5OptimizeLinks(
    TENETMultiAssayExperiment = exampleObject,
    hypermethGplusAnalysis = TRUE,
    hypomethGplusAnalysis = TRUE,
    expressionPvalCutoff = 0.05
)

## See the data that were saved by the step 5 function. They are again
## subdivided into hypermeth and/or hypometh results based on function options.
names(metadata(exampleObject)$step5OptimizeLinks)

## Check the results, which include various metrics used to priortize the
## optimized final RE DNA methylation site-gene links.
## This is a subsection of the data frame detailing all the hypomethylated RE
## DNA methylation site-gene links as an example.
head(
    metadata(
        exampleObject
    )$step5OptimizeLinks$hypomethGplusResults
)

## -----------------------------------------------------------------------------
## Prioritize the top genes by adding up the number of RE DNA methylation sites
## linked to each of the genes
exampleObject <- step6DNAMethylationSitesPerGeneTabulation(
    TENETMultiAssayExperiment = exampleObject
)

## See the data that were saved by the step 6 function. They are again
## subdivided into hypermeth and/or hypometh results based on function options.
names(
    metadata(
        exampleObject
    )$step6DNAMethylationSitesPerGeneTabulation
)

## Check the results, which include a count of the RE DNA methylation sites per
## gene, and is organized by decreasing RE DNA methylation site count.
## This is a subsection of the data frame detailing the number of hypomethylated
## RE DNA methylation site links to the top TFs.
head(
    metadata(
        exampleObject
    )$step6DNAMethylationSitesPerGeneTabulation$hypomethGplusResults
)

## ----eval = FALSE-------------------------------------------------------------
# ## Download a TCGA BRCA dataset with log2-normalized
# ## FPKM-UQ expression values from tumor and adjacent normal tissue samples
# ## with matching expression and methylation data and keeping only one tumor
# ## sample from each patient. Additionally, survival data will be combined
# ## from three clinical datasets downloaded by TCGAbiolinks. Raw data files
# ## will be saved to the working directory, and the processed dataset will
# ## be returned as a variable.
# TCGADataset <- TCGADownloader(
#     rawDataDownloadDirectory = ".",
#     TCGAStudyAbbreviation = "BRCA",
#     RNASeqWorkflow = "STAR - FPKM-UQ",
#     RNASeqLog2Normalization = TRUE,
#     matchingExpAndMetSamples = TRUE,
#     clinicalSurvivalData = "combined",
#     outputFile = NA
# )

## ----eval = FALSE-------------------------------------------------------------
# ## Cache all online datasets required by optional TENET features.
# ## This function takes no arguments and returns NULL.
# TENETCacheAllData()

## -----------------------------------------------------------------------------
## Load the exampleTENETClinicalDataFrame object from the TENET.ExperimentHub
## package. It contains copy number variation (CNV), somatic mutation (SM),
## and purity data for the top 10 TFs by linked hypomethylated RE
## DNA methylation sites in the exampleTENETMultiAssayExperiment object.
exampleTENETClinicalDataFrame <-
    TENET.ExperimentHub::exampleTENETClinicalDataFrame()
CNVData <- subset(exampleTENETClinicalDataFrame,
    select = grepl("_CNV$", colnames(exampleTENETClinicalDataFrame))
)
SMData <- subset(exampleTENETClinicalDataFrame,
    select = grepl("_SM$", colnames(exampleTENETClinicalDataFrame))
)
purityData <- subset(exampleTENETClinicalDataFrame, select = "purity")

## -----------------------------------------------------------------------------
## Show the CNV data for the first 4 TFs
head(CNVData[, 1:4])

## -----------------------------------------------------------------------------
## Show the SM data for the first 4 TFs
head(SMData[, 1:4])

## -----------------------------------------------------------------------------
## Show the first few rows of the purity data
head(purityData)

## -----------------------------------------------------------------------------
## Create complex scatterplots using the previously acquired data.
## Since we performed analyses only using TFs in the step 3 function, the
## top genes are all TFs, so a message that separate output for TFs will be
## skipped is displayed.
exampleObject <- step7ExpressionVsDNAMethylationScatterplots(
    TENETMultiAssayExperiment = exampleObject,
    hypermethGplusAnalysis = FALSE,
    hypomethGplusAnalysis = TRUE,
    purityData = purityData,
    SMData = SMData,
    CNVData = CNVData,
    simpleOrComplex = "complex",
    topGeneNumber = 10
)

## Results are included in the metadata of the returned MultiAssayExperiment
## object under the step7ExpressionVsDNAMethylationScatterplots list.
## For each analysis type, results are included in sub-lists, each of which
## contains results for topGenes and topTFs, unless the top genes are
## all TFs, in which case the separate top TFs output is skipped.
names(
    metadata(
        exampleObject
    )$step7ExpressionVsDNAMethylationScatterplots$hypomethGplusResults$topGenes
)

## For each gene, scatterplots are generated showing the expression of that
## gene and the methylation of each RE DNA methylation site linked to it for
## the given analysis type.
head(
    names(
        metadata(
            exampleObject
        )$step7ExpressionVsDNAMethylationScatterplots$hypomethGplusResults$
            topGenes$ENSG00000124664
    )
)

## ----fig.width=10, fig.height=7-----------------------------------------------
metadata(
    exampleObject
)$step7ExpressionVsDNAMethylationScatterplots$hypomethGplusResults$
    topGenes$ENSG00000124664$cg25731211

## ----fig.width=10, fig.height=7-----------------------------------------------
## Run the step7LinkedDNAMethylationSiteCountHistograms function.
## Since we performed analyses only using TFs in the step 3 function, the
## top genes are all TFs, so a message that separate output for
## TFs will be skipped is displayed.
exampleObject <- step7LinkedDNAMethylationSiteCountHistograms(
    TENETMultiAssayExperiment = exampleObject,
    hypomethGplusAnalysis = TRUE,
    hypermethGplusAnalysis = FALSE
)

## Results are included in the metadata of the returned MultiAssayExperiment
## object under the step7LinkedDNAMethylationSiteCountHistograms list.
## For each analysis type, histograms are included in sub-lists, each of which
## contains results for topGenes and topTFs, unless the top genes are all TFs,
## in which case the separate top TFs output is skipped.

## Display the histogram. Note the relatively small number of top TF genes with
## larger numbers of linked RE DNA methylation sites.
metadata(
    exampleObject
)$step7LinkedDNAMethylationSiteCountHistograms$hypomethGplusResults$topGenes

## -----------------------------------------------------------------------------
## View the first few available motifs for the FOXM1 TF
head(names(MotifDb::query(MotifDb::MotifDb, "FOXA1")))

## View the first few available motifs for the ESR1 TF
head(names(MotifDb::query(MotifDb::MotifDb, "ESR1")))

## -----------------------------------------------------------------------------
## The human HOCOMOCO v11 core motif is the 3rd listed for FOXA1, and 4th for
## ESR1
TFMotifList <- list(
    "FOXA1" = MotifDb::query(MotifDb::MotifDb, "FOXA1")[[3]],
    "ESR1" = MotifDb::query(MotifDb::MotifDb, "ESR1")[[4]]
)

TFMotifList

## ----eval = FALSE-------------------------------------------------------------
# exampleObject <- step7LinkedDNAMethylationSitesMotifSearching(
#     TENETMultiAssayExperiment = exampleObject,
#     TFMotifList = TFMotifList,
#     matchPWMMinScore = "80%"
# )
# 
# ## For each analysis type and TF, a seqLogo diagram of the chosen PWM and two
# ## data frames with information on the found motifs in the vicinity of RE
# ## DNA methylation sites, and total motifs found per RE DNA methylation site
# ## linked to those TFs, respectively, are saved in the metadata of the returned
# ## MultiAssayExperiment object under the
# ## step7LinkedDNAMethylationSitesMotifSearching list
# names(
#     metadata(
#         exampleObject
#     )$step7LinkedDNAMethylationSitesMotifSearching$hypomethGplusResults$FOXA1
# )
# 
# ## View the motif occurrences near hypomethylated RE DNA methylation sites
# ## linked to the FOXA1 TF
# head(
#     metadata(
#         exampleObject
#     )$step7LinkedDNAMethylationSitesMotifSearching$hypomethGplusResults$
#         FOXA1$DNAMethylationSiteMotifOccurrences
# )
# 
# ## Also see the total number of motifs found in the vicinity of each RE DNA
# ## methylation site
# head(
#     metadata(
#         exampleObject
#     )$step7LinkedDNAMethylationSitesMotifSearching$hypomethGplusResults$
#         FOXA1$totalMotifOccurrencesPerREDNAMethylationSite
# )

## -----------------------------------------------------------------------------
head(
    metadata(
        exampleObject
    )$step6DNAMethylationSitesPerGeneTabulation$hypomethGplusResults
)

## -----------------------------------------------------------------------------
DNAMethylationSites <- subset(
    metadata(
        exampleObject
    )$step5OptimizeLinks$hypomethGplusResults,
    geneID == "ENSG00000129514"
)$DNAMethylationSiteID
head(DNAMethylationSites)

## -----------------------------------------------------------------------------
exampleObject <- step7SelectedDNAMethylationSitesCaseVsControlBoxplots(
    TENETMultiAssayExperiment = exampleObject,
    DNAMethylationSites = DNAMethylationSites
)

## Each plot is saved under the ID of the RE DNA methylation site and included
## in the metadata of the returned MultiAssayExperiment object under the
## step7SelectedDNAMethylationSitesCaseVsControlBoxplots list
head(names(
    metadata(
        exampleObject
    )$step7SelectedDNAMethylationSitesCaseVsControlBoxplots
))

## ----fig.width=10, fig.height=7-----------------------------------------------
## Note: There may be a warning for "rows containing non-finite values" if there
## are any samples lacking methylation data for the RE DNA methylation site.
metadata(
    exampleObject
)$step7SelectedDNAMethylationSitesCaseVsControlBoxplots$cg13776095

## -----------------------------------------------------------------------------
## Calculate potential link status for case samples for hypomethylated RE DNA
## methylation site-gene links
exampleObject <- step7StatesForLinks(
    TENETMultiAssayExperiment = exampleObject,
    hypomethGplusAnalysis = TRUE,
    hypermethGplusAnalysis = FALSE
)

## Results are included in the metadata of the returned MultiAssayExperiment
## object under the step7StatesForLinks list.
## A single data frame is returned for each analysis type, with the combined RE
## DNA methylation site ID and gene ID from each link in the rows, and each case
## sample in the columns.
dim(
    metadata(
        exampleObject
    )$step7StatesForLinks$hypomethGplusResults
)

## Show the results for the first 6 case samples. 1 indicates that a given case
## sample might harbor a given RE DNA methylation site-gene link, and 0
## indicates that it does not. NA values are shown for samples that lack
## methylation data for the site or expression data for the gene.
head(
    metadata(
        exampleObject
    )$step7StatesForLinks$hypomethGplusResults[
        , seq_len(6)
    ]
)

## -----------------------------------------------------------------------------
## Run the step7TopGenesCaseVsControlExpressionBoxplots function.
## Since we performed analyses only using TFs in the step 3 function, the
## top genes are all TFs, so a message that separate output for
## TFs will be skipped is displayed.
exampleObject <- step7TopGenesCaseVsControlExpressionBoxplots(
    TENETMultiAssayExperiment = exampleObject,
    hypomethGplusAnalysis = TRUE,
    hypermethGplusAnalysis = FALSE,
    topGeneNumber = 10
)

## Results are included in the metadata of the returned MultiAssayExperiment
## object under the step7TopGenesCaseVsControlExpressionBoxplots list.
## For each analysis type, results are included in sub-lists, each of which
## contains lists with results for topGenes and topTFs, unless the
## top genes are all TFs, in which case the separate top TFs output is skipped.
## Each boxplot is saved under its gene ID.
names(
    metadata(
        exampleObject
    )$step7TopGenesCaseVsControlExpressionBoxplots$
        hypomethGplusResults$topGenes
)

## ----fig.width=10, fig.height=7-----------------------------------------------
## Note: There may be a warning for "rows containing non-finite values" if there
## are any samples lacking expression data for the given gene.
metadata(
    exampleObject
)$step7TopGenesCaseVsControlExpressionBoxplots$hypomethGplusResults$
    topGenes$ENSG00000107485

## ----fig.width=8, fig.height=8, out.width="80%", out.height="80%"-------------
## Run the step7TopGenesCircosPlots function
exampleObject <- step7TopGenesCircosPlots(
    TENETMultiAssayExperiment = exampleObject,
    hypermethGplusAnalysis = FALSE,
    hypomethGplusAnalysis = TRUE,
    topGeneNumber = 10
)

## Results are included in the metadata of the returned MultiAssayExperiment
## object under the step7TopGenesCircosPlots list.
## For each analysis type, results are included in sub-lists, each
## of which contains lists with results for topGenes and topTFs, unless the
## top genes are all TFs, in which case the separate top TFs output is skipped.
## Each Circos plot is saved under its gene ID.

## Note: Since we performed analyses only using TFs in the step 3 function,
## the top genes are all TFs, so topTFs will be NA here.
names(
    metadata(
        exampleObject
    )$step7TopGenesCircosPlots$hypomethGplusResults$topGenes
)

## Display an example Circos plot for ENSG00000091831 (ESR1).
## Note: Plots may take some time to display.
metadata(
    exampleObject
)$step7TopGenesCircosPlots$hypomethGplusResults$topGenes$ENSG00000091831

## ----fig.width=10, fig.height=10.5, out.width="80%", out.height="80%"---------
## Run the step7TopGenesExpressionCorrelationHeatmaps function
exampleObject <- step7TopGenesExpressionCorrelationHeatmaps(
    TENETMultiAssayExperiment = exampleObject,
    hypermethGplusAnalysis = FALSE,
    hypomethGplusAnalysis = TRUE,
    topGeneNumber = 10
)

## Results are included in the metadata of the returned MultiAssayExperiment
## object under the step7TopGenesExpressionCorrelationHeatmaps list.
## For each analysis type, results are included in sub-lists, each
## of which contains lists with results for the topGenes and topTFs, unless
## the top genes are all TFs, in which case the separate top TFs output is
## skipped. For each of these, the heatmap is generated along with a data frame
## with the correlation values displayed in the heatmap.

## Display the mirrored heatmap.
## Note: Since we performed analyses only using TFs in the step 3 function,
## the top genes are all TFs, so topTFs will be NA here.
metadata(
    exampleObject
)$step7TopGenesExpressionCorrelationHeatmaps$hypomethGplusResults$
    topGenes$heatmap

## Display the data frame with correlation values
head(
    metadata(
        exampleObject
    )$step7TopGenesExpressionCorrelationHeatmaps$hypomethGplusResults$
        topGenes$correlationMatrix
)

## -----------------------------------------------------------------------------
## Run the step7TopGenesDNAMethylationHeatmaps function
exampleObject <- step7TopGenesDNAMethylationHeatmaps(
    TENETMultiAssayExperiment = exampleObject,
    hypermethGplusAnalysis = FALSE,
    hypomethGplusAnalysis = TRUE,
    topGeneNumber = 10
)

## Results are included in the metadata of the returned MultiAssayExperiment
## object under the step7TopGenesDNAMethylationHeatmaps list.
## For each analysis type, results are included in sub-lists, each
## of which contains heatmaps for the topGenes and topTFs, unless the
## top genes are all TFs, in which case the separate top TFs output is skipped.

## Note: Since we performed analyses only using TFs in the step 3 function,
## the top genes are all TFs, so topTFs will be NA here.
names(metadata(
    exampleObject
)$step7TopGenesDNAMethylationHeatmaps$hypomethGplusResults)

## ----fig.width=20, fig.height=14, out.width="80%", out.height="80%"-----------
## Run the step7TopGenesOverlappingLinkedDNAMethylationSitesHeatmaps function
exampleObject <- step7TopGenesOverlappingLinkedDNAMethylationSitesHeatmaps(
    TENETMultiAssayExperiment = exampleObject,
    hypermethGplusAnalysis = FALSE,
    hypomethGplusAnalysis = TRUE,
    topGeneNumber = 10
)

## Results are included in the metadata of the returned MultiAssayExperiment
## object under the step7TopGenesOverlappingLinkedDNAMethylationSitesHeatmaps
## list. For each analysis type, results are included in sub-lists, each of
## which contains lists with results for the topGenes and topTFs, unless
## the top genes are all top TFs,  in which case the separate top TFs output is
## skipped. For each of these, the heatmap is generated along with a data frame
## with the correlation values displayed in the heatmap.

## Display the binary heatmap.
## Note: Since we performed analyses only using TFs in the step 3 function,
## the top genes are all TFs, so topTFs will be NA here.
metadata(
    exampleObject
)$step7TopGenesOverlappingLinkedDNAMethylationSitesHeatmaps$
    hypomethGplusResults$topGenes$heatmap

## Display a subset of the data frame noting the presence/absence of links.
## 1 indicates a link, while 0 indicates no link.
head(
    metadata(
        exampleObject
    )$step7TopGenesOverlappingLinkedDNAMethylationSitesHeatmaps$
        hypomethGplusResults$topGenes$linkTable[
        , seq_len(6)
    ]
)

## -----------------------------------------------------------------------------
## Load the exampleTENETClinicalDataFrame object from the TENET.ExperimentHub
## package. It contains the vital_status (survival status) and time (survival
## time) data for each sample in the exampleTENETMultiAssayExperiment
exampleTENETClinicalDataFrame <-
    TENET.ExperimentHub::exampleTENETClinicalDataFrame()
vitalStatusData <- subset(
    exampleTENETClinicalDataFrame,
    select = "vital_status"
)
survivalTimeData <- subset(exampleTENETClinicalDataFrame, select = "time")

## -----------------------------------------------------------------------------
## Show the vital status data
head(vitalStatusData)

## Show the survival time data
head(survivalTimeData)

## ----fig.width=10, fig.height=7-----------------------------------------------
## Since we performed analyses only using TFs in the step 3 function, the
## top genes are all TFs, so a message that separate output for
## TFs will be skipped is displayed.
exampleObject <- step7TopGenesSurvival(
    TENETMultiAssayExperiment = exampleObject,
    hypermethGplusAnalysis = FALSE,
    hypomethGplusAnalysis = TRUE,
    vitalStatusData = vitalStatusData,
    survivalTimeData = survivalTimeData,
    topGeneNumber = 10,
    generatePlots = TRUE
)

## Results are included in the metadata of the returned MultiAssayExperiment
## object under the step7TopGenesSurvival list.
## For each analysis type, results are included in sub-lists, each
## of which contains lists with results for topGenes and topTFs, unless the
## top genes are all TFs, in which case the separate top TFs output is skipped.
## Each includes two data frames with the survival statistics for Kaplan-Meier
## and Cox regression survival analyses, and if the generatePlots
## argument is TRUE, topGenesSurvivalPlots and topMethylationSitesSurvivalPlots
## lists are included which contain the Kaplan-Meier survival plots for the top
## genes and each of their unique linked RE DNA methylation sites, respectively.
names(
    metadata(
        exampleObject
    )$step7TopGenesSurvival$hypomethGplusResults$topGenes
)

## The topGenesSurvivalStats and topMethylationSitesSurvivalStats variables are
## data frames containing survival statistics.
## Note: A significant amount of data is output, so selected values are shown
## here.
head(
    metadata(
        exampleObject
    )$step7TopGenesSurvival$hypomethGplusResults$
        topGenes$topGenesSurvivalStats[
        , c(1:2, 15, 17, 22:24, 26)
    ]
)

head(
    metadata(
        exampleObject
    )$step7TopGenesSurvival$hypomethGplusResults$
        topGenes$topMethylationSitesSurvivalStats[
        , c(1, 24, 26, 31:33, 35)
    ]
)

## Show the names of the gene survival plots
names(
    metadata(
        exampleObject
    )$step7TopGenesSurvival$hypomethGplusResults$
        topGenes$topGenesSurvivalPlots
)

## Plot the Kaplan-Meier survival plot for GATA3 (ENSG00000107485) as an example
metadata(
    exampleObject
)$step7TopGenesSurvival$hypomethGplusResults$
    topGenes$topGenesSurvivalPlots$ENSG00000107485

## -----------------------------------------------------------------------------
## Load the example TAD dataset from the TENET.ExperimentHub package
exampleTADRegions <- TENET.ExperimentHub::exampleTENETTADRegions()

## TAD files for this function must include the chromosome of each TAD region
## in the first column, and the start and end positions of each in the second
## and third columns respectively. Additional columns can be included but
## are not considered in this function.
class(exampleTADRegions)
head(exampleTADRegions)

## -----------------------------------------------------------------------------
## Use the example TAD object to perform TAD overlapping.
## Since we performed analyses only using TFs in the step 3 function, the
## top genes are all TFs, so a message that separate output for
## TFs will be skipped is displayed.
exampleObject <- step7TopGenesTADTables(
    TENETMultiAssayExperiment = exampleObject,
    TADFiles = exampleTADRegions,
    hypomethGplusAnalysis = TRUE,
    hypermethGplusAnalysis = FALSE,
    topGeneNumber = 10
)

## Results are included in the metadata of the returned MultiAssayExperiment
## object under the step7TopGenesTADTables list.
## For each analysis type, results are included in sub-lists, each
## of which contains results in the form of a data frame for topGenes and
## topTFs, unless the top genes are all TFs, in which case
## the separate top TFs output is skipped.
class(
    metadata(
        exampleObject
    )$step7TopGenesTADTables$hypomethGplusResults$topGenes
)

## Display results for selected hypomethylated RE DNA methylation sites. A
## variety of data are included for each RE DNA methylation site, including its
## location, the top genes it is linked to, and information on the count and
## identities of other genes found within the same TAD of the RE DNA methylation
## site. Note: A significant amount of data is output, so selected values are
## shown here.
head(
    metadata(
        exampleObject
    )$step7TopGenesTADTables$hypomethGplusResults$topGenes[
        c(1:6, 16:17)
    ]
)

## -----------------------------------------------------------------------------
## Get the path to a temporary directory in which to save the output interact
## file
tempDirectory <- tempdir()

## Run the step7TopGenesUCSCBedFiles function.
## Since we performed analyses only using TFs in the step 3 function, the
## top genes are all TFs, so a message that separate output for
## TFs will be skipped is displayed.
filePaths <- step7TopGenesUCSCBedFiles(
    TENETMultiAssayExperiment = exampleObject,
    outputDirectory = tempDirectory,
    hypomethGplusAnalysis = TRUE,
    hypermethGplusAnalysis = FALSE,
    topGeneNumber = 10
)

## Unlike other functions, this function does not return the
## given TENETMultiAssayExperiment with additional information generated by the
## function in its metadata, but rather returns an object with information on
## where to find the created interact file.

## Get the path of the output file for top genes with hypomethylated G+ links
bedPath <- filePaths$hypoGplus$topGenes

## Read the first few lines of the file.
## The file largely contains information about each RE DNA methylation site-gene
## link, with additional information in the first line which allows it to be
## loaded by the UCSC Genome Browser.
cat(head(readLines(bedPath)), sep = "\n")

## Delete the output file, since this is just an example.
## Do not use this line if running on real data, as it will delete your created
## file.
invisible(file.remove(unlist(bedPath)))

## -----------------------------------------------------------------------------
## Load the example peak dataset from the TENET.ExperimentHub package
examplePeakFile <- TENET.ExperimentHub::exampleTENETPeakRegions()

## Peak files for this function must include the chromosome of each peak region
## in the first column, and the start and end positions of each peak in the
## second and third columns respectively. Additional columns can be included,
## but are not considered in this function.
class(examplePeakFile)
head(examplePeakFile)

## -----------------------------------------------------------------------------
## Run the step7TopGenesUserPeakOverlap function.
## Since we performed analyses only using TFs in the step 3 function, the
## top genes are all TFs, so a message that separate output for
## TFs will be skipped is displayed.
exampleObject <- step7TopGenesUserPeakOverlap(
    TENETMultiAssayExperiment = exampleObject,
    peakData = examplePeakFile,
    hypermethGplusAnalysis = FALSE,
    hypomethGplusAnalysis = TRUE,
    topGeneNumber = 10,
    distanceFromREDNAMethylationSites = 100
)

## Results are included in the metadata of the returned MultiAssayExperiment
## object under the step7TopGenesSurvival list.
## For each analysis type, data frames with peak overlap information are
## included in sub-lists, with each data frame saved under the names
## topGenes and topTFs, unless the top genes are all TFs, in which case
## the separate top TFs output is skipped.

## Display the data frame of results for RE DNA methylation sites linked to the
## top TFs. A variety of data are included for each RE DNA methylation site,
## including its location, the coordinates of its search window, the top genes
## it is linked to, and whether it was found within the specified distance to
## any peak in each of the peak files.
head(
    metadata(
        exampleObject
    )$step7TopGenesUserPeakOverlap$hypomethGplusResults$topGenes
)

## -----------------------------------------------------------------------------
## Load the humanTranscriptionFactorList dataset
data("humanTranscriptionFactorList", package = "TENET")
## Display the names of the first few TFs on the list
head(humanTranscriptionFactorList)

## -----------------------------------------------------------------------------
sessionInfo()