## ----MAE_SE_install, eval = FALSE---------------------------------------------
# if (!requireNamespace("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")
# 
# BiocManager::install("MultiAssayExperiment")
# BiocManager::install("SummarizedExperiment")

## ----load_LegATo, eval = FALSE------------------------------------------------
# BiocManager::install("wejlab/LegATo")

## -----------------------------------------------------------------------------
library(LegATo)

## -----------------------------------------------------------------------------
library(ggeffects)
library(emmeans)

## -----------------------------------------------------------------------------
counts <- system.file("extdata", "counts.csv", package = "LegATo") |>
  read.csv(row.names = 1) |>
  dplyr::rename_with(function(x) stringr::str_replace(x, "\\.", "-"))
tax <- system.file("extdata", "tax.csv", package = "LegATo") |> read.csv(row.names = 1)
sample <- system.file("extdata", "sample.csv", package = "LegATo") |> read.csv(row.names = 1)

## -----------------------------------------------------------------------------
ndim <- 5
counts[seq_len(ndim), seq_len(ndim)] |>
  knitr::kable(caption = "Counts Table Preview",
               label = NA)

tax[seq_len(ndim), ] |>
  knitr::kable(caption = "Taxonomy Table Preview",
               label = NA)

sample[seq_len(ndim), ] |>
  knitr::kable(caption = "Sample Table Preview",
               label = NA)

## -----------------------------------------------------------------------------
output <- create_formatted_MAE(counts_dat = counts,
                               tax_dat = tax,
                               metadata_dat = sample)

class(output)
MultiAssayExperiment::assays(output)
SummarizedExperiment::assays(output[["MicrobeGenetics"]])

## -----------------------------------------------------------------------------
dat <- system.file("extdata", "MAE.RDS", package = "LegATo") |>
  readRDS()

dat_subsetted <- MultiAssayExperiment::subsetByColData(dat,
                                                       dat$MothChild == "Infant")

## -----------------------------------------------------------------------------
dat_cleaned <- clean_MAE(dat_subsetted)

## -----------------------------------------------------------------------------
dat_filt_1 <- filter_MAE(dat_cleaned, relabu_threshold = 0.05, occur_pct_cutoff = 10)

## -----------------------------------------------------------------------------
dat_filt <- filter_animalcules_MAE(dat_cleaned, 0.05)

## -----------------------------------------------------------------------------
parsed <- parse_MAE_SE(dat_filt, type = "MAE")

parsed$counts[seq_len(5), seq_len(5)] |> 
  knitr::kable(caption = "Counts Table")

parsed$sam[seq_len(5), ]  |>
  knitr::kable(caption = "Sample Metadata")

parsed$tax[seq_len(5), ] |> 
  knitr::kable(caption = "Taxonomy Table")


## -----------------------------------------------------------------------------
group_vars <- c("HIVStatus", "MothChild")
get_summary_table(dat_filt, group_vars) |>
  knitr::kable(caption = "Summary Table", label = NA)

## -----------------------------------------------------------------------------
best_genus <- get_top_taxa(dat_filt, "genus")
best_genus |> knitr::kable(caption = "Table of genera, ranked by abundance")

## -----------------------------------------------------------------------------
longdat <- get_long_data(dat_filt, "genus", log = TRUE, counts_to_CPM = TRUE)

stackeddat <- get_stacked_data(dat_filt, "genus", covariate_1 = "HIVStatus",
                               covariate_time = "timepoint")

## -----------------------------------------------------------------------------
this_palette <- c("#709AE1", "#8A9197", "#D2AF81", "#FD7446", "#D5E4A2", "#197EC0", "#F05C3B", "#46732E",
                  "#71D0F5", "#370335", "#075149", "#C80813", "#91331F", "#1A9993", "#FD8CC1") |>
  magrittr::extract(seq_len(nrow(best_genus)))

## -----------------------------------------------------------------------------
plot_alluvial(dat = dat_filt, 
              taxon_level = "genus", 
              covariate_1 = "HIVStatus",
              covariate_time = "timepoint",
              palette_input = this_palette,
              subtitle = "Alluvial plot")

## ----results = "asis", message = FALSE----------------------------------------
  plot_spaghetti(dat = dat_filt,
               covariate_time = "timepoint",
               covariate_1 = "HIVStatus",
               unit_var = "Subject",
               taxon_level = "genus",
               which_taxon = "Staphylococcus",
               palette_input = this_palette,
               title = "Spaghetti Plot",
               subtitle = NULL) +
  ggplot2::xlab("Infant Age (timepoint)") +
  ggplot2::ylab("Relative Abundance (log CPM)")

## -----------------------------------------------------------------------------
plot_stacked_bar(dat_filt, "genus", 
             "HIVStatus",
             "timepoint",
             palette_input = this_palette)

## -----------------------------------------------------------------------------
plot_stacked_area(dat_filt, "genus", 
                  "HIVStatus",
                  "timepoint",
                  palette_input = this_palette)

## -----------------------------------------------------------------------------
this_taxon <- parsed$counts |>
  animalcules::upsample_counts(parsed$tax, "genus") |>
  animalcules::counts_to_logcpm()
p <- plot_heatmap(inputData = this_taxon,
                  annotationData = dplyr::select(parsed$sam, "timepoint", "HIVStatus", "pairing"),
                  name = "Data",
                  plot_title = "Example",
                  plottingColNames = NULL,
                  annotationColNames = NULL,
                  colList = list(),
                  scale = FALSE,
                  showColumnNames = FALSE,
                  showRowNames = FALSE,
                  colorSets = c("Set1", "Set2", "Set3", "Pastel1", "Pastel2", "Accent", "Dark2",
                                "Paired"),
                  choose_color = c("blue", "gray95", "red"),
                  split_heatmap = "none",
                  column_order = NULL
)

## -----------------------------------------------------------------------------
dat_1 <- filter_MAE(dat_cleaned, 0.05, 10, "species")

NMIT(dat_1, unit_var = "Subject", fixed_cov = "HIVStatus",
     covariate_time = "timepoint",
     method = "kendall", dist_type = "F",
     heatmap = TRUE, classify = FALSE, fill_na = 0)

## -----------------------------------------------------------------------------
test_hotelling_t2(dat = dat_1,
                  test_index = which(dat_filt$MothChild == "Infant" &
                                       dat_filt$timepoint == 6),
                  taxon_level = "genus",
                  num_taxa = 6,
                  paired = TRUE,
                  grouping_var = "HIVStatus",
                  pairing_var = "pairing")

## -----------------------------------------------------------------------------
test_hotelling_t2(dat = dat_1,
                  test_index = which(dat_filt$timepoint == 0),
                  taxon_level = "genus",
                  num_taxa = 6,
                  grouping_var = "HIVStatus",
                  unit_var = "Subject",
                  paired = FALSE)


## -----------------------------------------------------------------------------
output <- run_gee_model(dat_1, unit_var = "Subject",
                        fixed_cov = c("HIVStatus", "timepoint"),
                        corstr = "ar1",
                        plot_out = FALSE,
                        plotsave_loc = ".",
                        plot_terms = NULL)

head(output) |> knitr::kable(caption = "GEE Outputs")

## ----out.width = "50%", fig.align = "center", echo = FALSE--------------------
tempfolder <- tempdir()

# Trying out plotting
output <- run_gee_model(dat_1, unit_var = "Subject",
              taxon_level = "genus",
              fixed_cov = c("HIVStatus", "timepoint"),
              corstr = "ar1",
              plot_out = TRUE,
              plotsave_loc = tempfolder,
              plot_terms = "timepoint",
              width = 6, height = 4, units = "in", scale = 0.7)

list.files(tempfolder) |> head()

## -----------------------------------------------------------------------------
# If there are issues with matrix being positive definite
# Revisit filtering parameters in filter_MAE
output <- run_lmm_model(dat_1, unit_var = "Subject",
                        taxon_level = "genus",
                        fixed_cov = c("timepoint", "HIVStatus"),
                        plot_out = FALSE,
                        plotsave_loc = ".",
                        plot_terms = NULL)

head(output) |> knitr::kable(caption = "LMM Outputs")

## -----------------------------------------------------------------------------
sessionInfo()