## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>",
    crop = NULL,
    dpi = 100
)

## ----installation, eval=FALSE-------------------------------------------------
# if(!requireNamespace('BiocManager', quietly = TRUE))
#   install.packages('BiocManager')
# BiocManager::install("cogeqc")

## ----load_package, message=FALSE----------------------------------------------
# Load package after installation
library(cogeqc)

## ----data_description---------------------------------------------------------
# Orthogroups for Arabidopsis thaliana and Brassica oleraceae
data(og)
head(og)

# Interpro domain annotations
data(interpro_ath)
data(interpro_bol)

head(interpro_ath)
head(interpro_bol)

## ----read_orthogroups_demo----------------------------------------------------
# Path to the Orthogroups.tsv file created by OrthoFinder
og_file <- system.file("extdata", "Orthogroups.tsv.gz", package = "cogeqc") 

# Read and parse file
orthogroups <- read_orthogroups(og_file)
head(orthogroups)

## ----assess_og----------------------------------------------------------------
# Create a list of annotation data frames
annotation <- list(Ath = interpro_ath, Bol = interpro_bol)
str(annotation) # This is what the list must look like

og_assessment <- assess_orthogroups(og, annotation)
head(og_assessment)

## ----mean_h-------------------------------------------------------------------
mean(og_assessment$Mean_score)

## ----ref-based_og_assessment--------------------------------------------------
set.seed(123)

# Subset the top 5000 rows for demonstration purposes
og_subset <- og[1:5000, ]
ref <- og_subset

# Shuffle 100 genes to simulate a test set
idx_shuffle <- sample(seq_len(nrow(og_subset)), 100, replace = FALSE)
test <- og_subset
test$Gene[idx_shuffle] <- sample(
  test$Gene[idx_shuffle], size = length(idx_shuffle), replace = FALSE
)

# Compare test set to reference set
comparison <- compare_orthogroups(ref, test)
head(comparison)

# Calculating percentage of preservation
preserved <- sum(comparison$Preserved) / length(comparison$Preserved)
preserved

## -----------------------------------------------------------------------------
stats_dir <- system.file("extdata", package = "cogeqc")
ortho_stats <- read_orthofinder_stats(stats_dir)
ortho_stats

## ----plot_species_tree--------------------------------------------------------
data(tree)
plot_species_tree(tree)

## ----plot_species_tree_with_dups----------------------------------------------
plot_species_tree(tree, stats_list = ortho_stats)

## -----------------------------------------------------------------------------
plot_duplications(ortho_stats)

## ----plot_genes_in_ogs--------------------------------------------------------
plot_genes_in_ogs(ortho_stats)

## ----ssOGs--------------------------------------------------------------------
plot_species_specific_ogs(ortho_stats)

## ----plot_orthofinder_stats, fig.width = 12, fig.height = 7-------------------
plot_orthofinder_stats(
  tree, 
  xlim = c(-0.1, 2),
  stats_list = ortho_stats
)

## ----plot_og_overlap----------------------------------------------------------
plot_og_overlap(ortho_stats)

## ----og_sizes-----------------------------------------------------------------
plot_og_sizes(og) 
plot_og_sizes(og, log = TRUE) # natural logarithm scale
plot_og_sizes(og, max_size = 100) # only OGs with <= 100 genes

## ----session_info-------------------------------------------------------------
sessioninfo::session_info()