## ----chunk_settings, eval = T, echo = F---------------------------------------
knitr::opts_chunk$set(eval = FALSE, echo = TRUE, warning = FALSE, message = FALSE)

## ----setup, eval = T, echo = F------------------------------------------------
# get the system load paths
dpath <- system.file("extdata", "glint_files", 
                     package = "recountmethylation")  # local path to example data
res1.fname <- "glint_results_tutorialdata.epistructure.pcs.txt" 
res1.fpath <- file.path(dpath, res1.fname)
res1 <- read.table(res1.fpath, sep = "\t")            # read in example dataset #1
res2.fpath <- file.path(dpath, "glint_results_minfidata.epistructure.pcs.txt")
res2 <- read.table(res2.fpath, sep = "\t")            # read in example dataset #2

## -----------------------------------------------------------------------------
#  BiocManager::install("basilisk")
#  library(basilisk)

## -----------------------------------------------------------------------------
#  env.name <- "glint_env"          # name the new virtual environment
#  pkg.name <- "recountmethylation" # set env properties
#  pkgv <- c("python==2.7",         # python version (v2.7)
#            "numpy==1.15",         # numpy version (v1.15)
#            "pandas==0.22",        # pandas version (v0.22)
#            "scipy==1.2",          # scipy version (v1.2)
#            "scikit-learn==0.19",  # scikit-learn (v0.19)
#            "matplotlib==2.2",     # matplotlib (v2.2)
#            "statsmodels==0.9",    # statsmodels (v0.9)
#            "cvxopt==1.2")         # cvxopt (v1.2)
#  glint_env <- BasiliskEnvironment(envname = env.name, pkgname = pkg.name,
#                                   packages = pkgv)
#  proc <- basiliskStart(glint_env) # define run process
#  on.exit(basiliskStop(proc))      # define exit process

## -----------------------------------------------------------------------------
#  library(minfiData)
#  ms <- get(data("MsetEx")) # load example MethylSet

## -----------------------------------------------------------------------------
#  dpath <- system.file("extdata", "glint_files",
#                       package = "recountmethylation") # get the dir path
#  cgv.fpath <- file.path(dpath, "glint_epistructure_explanatory-cpgs.rda")
#  glint.cgv <- get(load(cgv.fpath)) # load explanatory probes

## -----------------------------------------------------------------------------
#  mf <- ms[rownames(ms) %in% glint.cgv,] # filter ms on explanatory probes
#  dim(mf)                                # mf dimensions: [1] 4913    6

## -----------------------------------------------------------------------------
#  # get covar -- all vars should be numeric/float
#  covar <- as.data.frame(colData(mf)[,c("age", "sex")]) # get sample metadata
#  covar[,"sex"] <- ifelse(covar[,"sex"] == "M", 1, 0)   # relabel sex for glint
#  # write covariates matrix
#  covar.fpath <- file.path("covariates_minfidata.txt")  # specify covariate table path
#  # write table colnames, values
#  write.table(covar, file = covar.fpath, sep = "\t", row.names = T,
#              col.names = T, append = F, quote = F)     # write covariates table

## -----------------------------------------------------------------------------
#  bval.fpath <- file.path("bval_minfidata.txt")     # specify dnam fractions table name
#  mbval <- t(apply(as.matrix(getBeta(mf)),1,function(ri){
#    ri[is.na(ri)] <- median(ri,na.rm=T)             # impute na's with row medians
#    return(round(ri, 4))
#  })); rownames(mbval) <- rownames(mf)              # assign probe ids to row names
#  write.table(mbval, file = bval.fpath, sep = sepsym,
#              row.names = T, col.names = T, append = F,
#              quote = F)                            # write dnam fractions table

## -----------------------------------------------------------------------------
#  glint.dpath <- "glint-1.0.4"                         # path to the main glint app dir
#  glint.pypath <- file.path(glint.dpath, "glint.py")   # path to the glint .py script
#  data.fpath <- file.path("bval_minfidata.txt")        # path to the DNAm data table
#  covar.fpath <- file.path("covariates_minfidata.txt") # path to the metadata table
#  out.fstr <- file.path("glint_results_minfidata")     # base string for ouput results files
#  covarv <- c("age", "sex")                            # vector of valid covariates
#  covar.str <- paste0(covarv, collapse = " ")          # format the covariates vector
#  cmd.str <- paste0(c("python", glint.pypath,
#                      "--datafile", data.fpath,
#                      "--covarfile", covar.fpath,
#                      "--covar", covar.str,
#                      "--epi", "--out", out.fstr),
#                    collapse = " ")                    # get the final command line call

## -----------------------------------------------------------------------------
#  basiliskRun(proc, function(cmd.str){system(cmd.str)}, cmd.str = cmd.str) # run glint
#  # this returns:
#  # INFO      >>> python glint-1.0.4/glint.py --datafile bval_minfidata.txt --covarfile covariates_minfidata.txt --covar age sex --epi --out glint_results_minfidata
#  # INFO      Starting GLINT...
#  # INFO      Validating arguments...
#  # INFO      Loading file bval_minfidata.txt...
#  # INFO      Checking for missing values in the data file...
#  # INFO      Validating covariates file...
#  # INFO      Loading file covariates_minfidata.txt...
#  # INFO      New covariates were found: age, sex.
#  # INFO      Running EPISTRUCTURE...
#  # INFO      Removing non-informative sites...
#  # INFO      Including sites...
#  # INFO      Include sites: 4913 CpGs from the reference list of 4913 CpGs will be included...
#  # WARNING   Found no sites to exclude.
#  # INFO      Using covariates age, sex.
#  # INFO      Regressing out covariates...
#  # INFO      Running PCA...
#  # INFO      The first 1 PCs were saved to glint_results_minfidata.epistructure.pcs.txt.
#  # INFO      Added covariates epi1.
#  # Validating all dependencies are installed...
#  # All dependencies are installed
#  # [1] 0

## -----------------------------------------------------------------------------
#  out.fpath <- paste0(out.fpath, ".epistructure.pcs.txt")
#  res2 <- read.table(out.fpath, sep = "\t")

## ----eval = T-----------------------------------------------------------------
colnames(res2) <- c("sample", "epistructure.pc")
dim(res2)
res2

## ----eval = T-----------------------------------------------------------------
utils::sessionInfo()