## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>",
    crop = NULL ## Related to https://stat.ethz.ch/pipermail/bioc-devel/2020-April/016656.html
)

## ----"install", eval = FALSE--------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly = TRUE)) {
#        install.packages("BiocManager")
#    }
#  
#  BiocManager::install("planttfhunter")

## ----load_package, message = FALSE--------------------------------------------
library(planttfhunter)

## ----data---------------------------------------------------------------------
data(gsu)
gsu

## ----scheme, echo = FALSE-----------------------------------------------------
data(classification_scheme)
knitr::kable(classification_scheme)

## ----identifying_tfs----------------------------------------------------------
data(gsu_annotation)

# Annotate TF-related domains using a local installation of HMMER
if(hmmer_is_installed()) {
  gsu_annotation <- annotate_pfam(gsu)
} 

# Take a look at the first few lines of the output
head(gsu_annotation)

## ----classifying_tfs----------------------------------------------------------
# Classify TFs into families
gsu_families <- classify_tfs(gsu_annotation)

# Take a look at the output
head(gsu_families)

# Count number of TFs per family
table(gsu_families$Family)

## ----simulate_data_proteomes--------------------------------------------------
set.seed(123) # for reproducibility

# Simulate 4 different species by sampling 100 random genes from `gsu`
proteomes <- list(
    Gsu1 = gsu[sample(names(gsu), 50, replace = FALSE)],
    Gsu2 = gsu[sample(names(gsu), 50, replace = FALSE)],
    Gsu3 = gsu[sample(names(gsu), 50, replace = FALSE)],
    Gsu4 = gsu[sample(names(gsu), 50, replace = FALSE)]
)
proteomes

## ----simulate_data_species_metadata-------------------------------------------
# Create simulated species metadata
species_metadata <- data.frame(
    row.names = names(proteomes),
    Division = "Rhodophyta",
    Origin = c("US", "Belgium", "China", "Brazil")
)

species_metadata

## ----get_tf_counts------------------------------------------------------------
data(tf_counts)

# Get TF counts per family in each species as a SummarizedExperiment object
if(hmmer_is_installed()) {
    tf_counts <- get_tf_counts(proteomes, species_metadata)
}

# Take a look at the SummarizedExperiment object
tf_counts

# Look at the matrix of counts: assay() function from SummarizedExperiment
SummarizedExperiment::assay(tf_counts)

# Look at the species metadata: colData() function from SummarizedExperiment
SummarizedExperiment::colData(tf_counts)

## ----session_info-------------------------------------------------------------
sessioninfo::session_info()