## ----m6aPeak, echo=FALSE, out.height = "80%", out.width = "80%", include=TRUE,fig.cap="Peaks from MeRIP-seq in a mouse brain study"----
knitr::include_graphics("m6APeak.png")

## ---- eval = FALSE, warning=FALSE, message=FALSE------------------------------
#  install.packages("devtools") # if you have not installed "devtools" package
#  library(devtools)
#  install_github("https://github.com/ZhenxingGuo0015/TRESS",
#                 build_vignettes = TRUE)

## ---- eval = FALSE, warning=FALSE, message=FALSE------------------------------
#  if (!requireNamespace("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  
#  # The following initializes usage of Bioc devel
#  BiocManager::install(version='devel')
#  
#  BiocManager::install("TRESS")

## ----eval=FALSE,warning=FALSE,message=FALSE-----------------------------------
#  library(TRESS)
#  vignette("TRESS")

## ---- eval= FALSE-------------------------------------------------------------
#  install_github("https://github.com/ZhenxingGuo0015/datasetTRES")

## ---- eval=FALSE, message=FALSE, warning=FALSE--------------------------------
#  ## Directly use "makeTxDbFromUCSC" function to create one
#  library(GenomicFeatures)
#  txdb = makeTxDbFromUCSC("mm9", "knownGene")
#  # saveDb(txdb, file = paste0("YourPATH", "/", "YourGenome.sqlite")
#  
#  ## or load a TxDb annotation package like
#  # library(TxDb.Mmusculus.UCSC.mm9.knownGene)
#  # txdb <- TxDb.Mmusculus.UCSC.mm9.knownGene
#  # saveDb(txdb, file = paste0("YourPATH", "/", "mm9_knownGene.sqlite")

## ----loadData,warning=FALSE,message=FALSE-------------------------------------
library(TRESS)
data("Basal")
Bins <- Basal$Bins$Bins
BinCounts <- Basal$Bins$Counts
dim(BinCounts)
Candidates <- Basal$Candidates$Regions
CandidatesCounts <- Basal$Candidates$Counts
sf <- Basal$Bins$sf

## ----BasalBins----------------------------------------------------------------
head(Bins, 3) 
head(Candidates, 3)

## ----BasalCount---------------------------------------------------------------
dim(BinCounts)
head(BinCounts, 3)
dim(CandidatesCounts)
head(CandidatesCounts,3)

## ----Basalsf------------------------------------------------------------------
sf

## ----loadDataM3---------------------------------------------------------------
data("DMR_M3vsWT")
Candidates <- DMR_M3vsWT$Regions
CandidatesCounts <- DMR_M3vsWT$Counts
sf <- DMR_M3vsWT$sf

### bins overlapping with 6 candidates
bins <- DMR_M3vsWT$Bins

## ----CheckCanM3---------------------------------------------------------------
dim(Candidates)
head(Candidates, 3) 

## ----CheckDataM3--------------------------------------------------------------
dim(CandidatesCounts)
head(CandidatesCounts, 3)

## ----ChecksfM3----------------------------------------------------------------
sf

## ----CheckOverlapM3,warning=FALSE,message=FALSE-------------------------------
library(GenomicRanges)
bins.GR <- GRanges(Rle(bins$chr), IRanges(bins$start, bins$end), 
                   Rle(bins$strand))
Candidates.GR <- GRanges(Rle(Candidates$chr), 
                         IRanges(Candidates$start, Candidates$end), 
                   Rle(Candidates$strand))
library(GenomicFeatures)
sum(countOverlaps(Candidates.GR, bins.GR) > 0)

## ----loadDataTime-------------------------------------------------------------
data("DMR_SixWeekvsTwoWeek")
Candidates <- DMR_SixWeekvsTwoWeek$Regions
CandidatesCounts <- DMR_SixWeekvsTwoWeek$Counts
sf <- DMR_SixWeekvsTwoWeek$sf

## ----CheckCanTime-------------------------------------------------------------
dim(Candidates)
head(Candidates, 3) 

## ----CheckDataTime------------------------------------------------------------
dim(CandidatesCounts)
head(CandidatesCounts, 3)

## ----ChecksfTime--------------------------------------------------------------
sf

## ----CheckRatioTime-----------------------------------------------------------
MeRatio <- DMR_SixWeekvsTwoWeek$MeRatio
head(MeRatio, 3)

## ---- eval= FALSE,warning=FALSE, message=FALSE--------------------------------
#  ## Directly take BAM files in "datasetTRES" available on github
#  library(datasetTRES)
#  Input.file = c("cb_input_rep1_chr19.bam", "cb_input_rep2_chr19.bam")
#  IP.file = c("cb_ip_rep1_chr19.bam", "cb_ip_rep2_chr19.bam")
#  BamDir = file.path(system.file(package = "datasetTRES"), "extdata/")
#  annoDir = file.path(system.file(package = "datasetTRES"),
#                      "extdata/mm9_chr19_knownGene.sqlite")
#  OutDir = "/directory/to/output"
#  TRESS_peak(IP.file = IP.file,
#             Input.file = Input.file,
#             Path_To_AnnoSqlite = annoDir,
#             InputDir = BamDir,
#             OutputDir = OutDir, # specify a directory for output
#             experiment_name = "examplebyBam", # name your output
#             filetype = "bam")

## ---- eval= FALSE-------------------------------------------------------------
#  ### example peaks
#  peaks = read.table(file.path(system.file(package = "TRESS"),
#                             "extdata/examplebyBam_peaks.xls"),
#                   sep = "\t", header = TRUE)
#  head(peaks[, -c(5, 14, 15)], 3)

## ---- eval=FALSE--------------------------------------------------------------
#  ## or, take BAM files from your path
#  Input.file = c("input_rep1.bam", "input_rep2.bam")
#  IP.file = c("ip_rep1.bam", "ip_rep2.bam")
#  BamDir = "/directory/to/BAMfile"
#  annoDir = "/path/to/xxx.sqlite"
#  OutDir = "/directory/to/output"
#  TRESS_peak(IP.file = IP.file,
#             Input.file = Input.file,
#             Path_To_AnnoSqlite = annoDir,
#             InputDir = BamDir,
#             OutputDir = OutDir,
#             experiment_name = "xxx",
#             filetype = "bam")
#  peaks = read.table(paste0(OutDir, "/",
#                            "xxx_peaks.xls"),
#                     sep = "\t", header = TRUE)
#  head(peaks, 3)

## ----eval=FALSE, message=FALSE, warning=FALSE---------------------------------
#  ## load in first example dataset
#  data("Basal")
#  Candidates = CallCandidates(
#      Counts = Basal$Bins$Counts,
#      bins = Basal$Bins$Bins
#      )

## ---- eval=FALSE, message= TRUE, warning= FALSE-------------------------------
#  data("Basal") ### load candidate regions
#  Basal$Candidates$sf = Basal$Bins$sf
#  peaks1 = CallPeaks.multiRep(
#    Candidates = Basal$Candidates,
#    mu.cutoff = 0.5
#    )
#  head(peaks1, 3)

## ---- eval=FALSE, message= TRUE, warning= FALSE-------------------------------
#  ### use different threshold to filter peaks
#  peaks2 = CallPeaks.multiRep(
#    Candidates = Basal$Candidates,
#    mu.cutoff = 0.5,
#    fdr.cutoff = 0.01,
#    lfc.cutoff = 1.6
#    )

## ---- eval=FALSE, message= TRUE, warning= FALSE-------------------------------
#  ### use different threshold to filter peaks
#  peaks3 = CallPeaks.multiRep(
#    Candidates = Basal$Candidates,
#    mu.cutoff = 0.5,
#    WhichThreshold = "fdr"
#    )

## ---- eval = FALSE, message= FALSE, warning= FALSE----------------------------
#  # A toy example
#  data("Basal")
#  bincounts = Basal$Bins$Counts[, 1:2]
#  sf0 = Basal$Bins$sf[1:2]
#  bins = Basal$Bins$Bins
#  peaks = CallPeaks.oneRep(Counts = bincounts,
#                           sf = sf0, bins = bins)
#  head(peaks, 3)

## ---- eval=FALSE, message= FALSE, warning= FALSE------------------------------
#  ShowOnePeak(onePeak, allBins, binCounts, ext = 500, ylim = c(0,1))

## ---- eval=TRUE, message= FALSE, warning= FALSE-------------------------------
peaks = read.table(file.path(system.file(package = "TRESS"),
                             "extdata/examplebyBam_peaks.xls"),
                   sep = "\t", header = TRUE) 
load(file.path(system.file(package = "TRESS"),
               "extdata/examplebyBam.rda"))
allBins = as.data.frame(bins$bins)
colnames(allBins)[1] = "chr"
allBins$strand = binStrand

head(peaks, 1)
ShowOnePeak(onePeak = peaks[1,], allBins = allBins, 
            binCounts = allCounts)

## ---- eval=FALSE, message= FALSE, warning= FALSE------------------------------
#  InputDir = "/directory/to/BAMfile"
#  Input.file = c("input1.bam", "input2.bam",..., "inputN.bam")
#  IP.file = c("ip1.bam", "ip2.bam", ..., "ipN.bam")
#  OutputDir = "/directory/to/output"
#  Path_sqlit = "/path/to/xxx.sqlite"
#  variable = "YourVariable" # a dataframe containing both
#  # testing factor and potential covariates,
#  # e.g., for a two-group comparison with balanced samples
#  # variable = data.frame(Trt = rep(c("Ctrl", "Trt"), each = N/2))
#  model = "YourModel"     # e.g. model = ~1 + Trt
#  DMR.fit = TRESS_DMRfit(IP.file = IP.file,
#                         Input.file = Input.file,
#                         Path_To_AnnoSqlite = Path_sqlit,
#                         variable = variable,
#                         model = model,
#                         InputDir = InputDir,
#                         OutputDir = OutputDir,
#                         experimentName = "xxx"
#                         )
#  CoefName(DMR.fit)# show the name and order of coefficients
#                   # in the design matrix
#  Contrast = "YourContrast" # e.g., Contrast = c(0, 1)
#  DMR.test = TRESS_DMRtest(DMR = DMR.fit, contrast = Contrast)

## ---- eval=FALSE--------------------------------------------------------------
#  InputDir = "/directory/to/BAMfile"
#  Input.file = c("input1.bam", "input2.bam",..., "inputN.bam")
#  IP.file = c("ip1.bam", "ip2.bam", ..., "ipN.bam")
#  OutputDir = "/directory/to/output"
#  Path_sqlit = "/path/to/xxx.sqlite"
#  allBins = DivideBins(IP.file = IP.file,
#                       Input.file = Input.file,
#                       Path_To_AnnoSqlite = Path_sqlit,
#                       InputDir = InputDir,
#                       OutputDir = OutputDir,
#                       experimentName = "example")
#  Candidates = CallCandidates(Counts = allBins$binCount,
#                              bins = allBins$bins)
#  Candidates = filterRegions(Candidates)

## ---- eval= FALSE-------------------------------------------------------------
#  data("DMR_M3vsWT") # data from TRESS
#  head(DMR_M3vsWT$Counts, 3)
#  variable = data.frame(predictor = rep(c("WT", "M3"), c(2, 2)))
#  model = ~1+predictor
#  DMR.fit = CallDMRs.paramEsti(counts = DMR_M3vsWT$Counts,
#                               sf = DMR_M3vsWT$sf,
#                               variable = variable,
#                               model = model)

## ----eval= FALSE--------------------------------------------------------------
#  CoefName(DMR.fit) ## show variable name of each coefficient
#  DMR.test = TRESS_DMRtest(DMR = DMR.fit, contrast = c(0, 1))
#  DMR.test[which(DMR.test$padj < 0.05)[1:3], ]

## ---- eval=TRUE---------------------------------------------------------------
data("DMR_M3vsWT")
snames = paste0(rep(c("WT_", "M3_"), each = 2), 
                rep(c("Replicate1", "Replicate2"), 2))
ShowOnePeak(onePeak = DMR_M3vsWT$Regions[1, ],
            allBins = DMR_M3vsWT$Bins,
            binCounts = DMR_M3vsWT$BinsCounts,
            isDMR = TRUE,
            Sname = snames)

## ---- eval = TRUE-------------------------------------------------------------
library(TRESS)
data("DMR_SixWeekvsTwoWeek")
variable = data.frame(time = rep(c("2wk", "6wk"), each = 4),
                    region = rep(rep(c("Cortex", "Hypothalamus"), 
                                       each = 2),2)
                    )
model = ~1 + time + region 
DMRfit = CallDMRs.paramEsti(counts = DMR_SixWeekvsTwoWeek$Counts, 
                            sf = DMR_SixWeekvsTwoWeek$sf,
                            variable = variable,
                            model = model)

## ---- eval = TRUE-------------------------------------------------------------
CoefName(DMRfit)
DMRtest = TRESS_DMRtest(DMRfit, contrast = c(0, 1, 0))
idx = order(abs(DMRtest$stat), decreasing = TRUE)
DMRtest[idx[1:3], ]
DMR_SixWeekvsTwoWeek$MeRatio[idx[1:3],]

## ---- eval = TRUE-------------------------------------------------------------
model.int = ~1+ time + region + time*region 
model.matrix( model.int, variable)
DMRfit.int = CallDMRs.paramEsti(
  counts = DMR_SixWeekvsTwoWeek$Counts, 
  sf = DMR_SixWeekvsTwoWeek$sf, 
  variable = variable,
  model = model.int
  )

## ---- eval = TRUE-------------------------------------------------------------
CoefName(DMRfit.int)
CTX.6vs2 = TRESS_DMRtest(DMRfit.int, contrast = c(0, 1, 0, 0))
idx = order(abs(CTX.6vs2$stat), decreasing = TRUE)
CTX.6vs2[idx[1:3], ]
DMR_SixWeekvsTwoWeek$MeRatio[
  idx[1:3], 
  grepl("Cortex",colnames(DMR_SixWeekvsTwoWeek$MeRatio))]

## ---- eval = TRUE-------------------------------------------------------------
HTH.6vs2 = TRESS_DMRtest(DMRfit.int, contrast = c(0, 1, 0, 1))
idx = order(abs(HTH.6vs2$stat), decreasing = TRUE)
HTH.6vs2[idx[1:3], ]
DMR_SixWeekvsTwoWeek$MeRatio[
  idx[1:3], 
  grepl("Hypothalamus",colnames(DMR_SixWeekvsTwoWeek$MeRatio))]

## ---- eval = TRUE-------------------------------------------------------------
CTXvsHTH.6vs2 = TRESS_DMRtest(DMRfit.int, contrast = c(0, 0, 0, 1))
idx = order(abs(CTXvsHTH.6vs2$stat), decreasing = TRUE)
CTXvsHTH.6vs2[idx[1:3], ]
DMR_SixWeekvsTwoWeek$MeRatio[
  idx[1:3], c(1,2,5,6,3,4,7,8)]

## ----sessionInfo, echo=FALSE--------------------------------------------------
sessionInfo()