## ----m6aPeak, echo=FALSE, out.height = "80%", out.width = "80%", include=TRUE,fig.cap="Peaks from MeRIP-seq in a mouse brain study"---- knitr::include_graphics("m6APeak.png") ## ---- eval = FALSE, warning=FALSE, message=FALSE------------------------------ # install.packages("devtools") # if you have not installed "devtools" package # library(devtools) # install_github("https://github.com/ZhenxingGuo0015/TRESS", # build_vignettes = TRUE) ## ---- eval = FALSE, warning=FALSE, message=FALSE------------------------------ # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # # The following initializes usage of Bioc devel # BiocManager::install(version='devel') # # BiocManager::install("TRESS") ## ----eval=FALSE,warning=FALSE,message=FALSE----------------------------------- # library(TRESS) # vignette("TRESS") ## ---- eval= FALSE------------------------------------------------------------- # install_github("https://github.com/ZhenxingGuo0015/datasetTRES") ## ---- eval=FALSE, message=FALSE, warning=FALSE-------------------------------- # ## Directly use "makeTxDbFromUCSC" function to create one # library(GenomicFeatures) # txdb = makeTxDbFromUCSC("mm9", "knownGene") # # saveDb(txdb, file = paste0("YourPATH", "/", "YourGenome.sqlite") # # ## or load a TxDb annotation package like # # library(TxDb.Mmusculus.UCSC.mm9.knownGene) # # txdb <- TxDb.Mmusculus.UCSC.mm9.knownGene # # saveDb(txdb, file = paste0("YourPATH", "/", "mm9_knownGene.sqlite") ## ----loadData,warning=FALSE,message=FALSE------------------------------------- library(TRESS) data("Basal") Bins <- Basal$Bins$Bins BinCounts <- Basal$Bins$Counts dim(BinCounts) Candidates <- Basal$Candidates$Regions CandidatesCounts <- Basal$Candidates$Counts sf <- Basal$Bins$sf ## ----BasalBins---------------------------------------------------------------- head(Bins, 3) head(Candidates, 3) ## ----BasalCount--------------------------------------------------------------- dim(BinCounts) head(BinCounts, 3) dim(CandidatesCounts) head(CandidatesCounts,3) ## ----Basalsf------------------------------------------------------------------ sf ## ----loadDataM3--------------------------------------------------------------- data("DMR_M3vsWT") Candidates <- DMR_M3vsWT$Regions CandidatesCounts <- DMR_M3vsWT$Counts sf <- DMR_M3vsWT$sf ### bins overlapping with 6 candidates bins <- DMR_M3vsWT$Bins ## ----CheckCanM3--------------------------------------------------------------- dim(Candidates) head(Candidates, 3) ## ----CheckDataM3-------------------------------------------------------------- dim(CandidatesCounts) head(CandidatesCounts, 3) ## ----ChecksfM3---------------------------------------------------------------- sf ## ----CheckOverlapM3,warning=FALSE,message=FALSE------------------------------- library(GenomicRanges) bins.GR <- GRanges(Rle(bins$chr), IRanges(bins$start, bins$end), Rle(bins$strand)) Candidates.GR <- GRanges(Rle(Candidates$chr), IRanges(Candidates$start, Candidates$end), Rle(Candidates$strand)) library(GenomicFeatures) sum(countOverlaps(Candidates.GR, bins.GR) > 0) ## ----loadDataTime------------------------------------------------------------- data("DMR_SixWeekvsTwoWeek") Candidates <- DMR_SixWeekvsTwoWeek$Regions CandidatesCounts <- DMR_SixWeekvsTwoWeek$Counts sf <- DMR_SixWeekvsTwoWeek$sf ## ----CheckCanTime------------------------------------------------------------- dim(Candidates) head(Candidates, 3) ## ----CheckDataTime------------------------------------------------------------ dim(CandidatesCounts) head(CandidatesCounts, 3) ## ----ChecksfTime-------------------------------------------------------------- sf ## ----CheckRatioTime----------------------------------------------------------- MeRatio <- DMR_SixWeekvsTwoWeek$MeRatio head(MeRatio, 3) ## ---- eval= FALSE,warning=FALSE, message=FALSE-------------------------------- # ## Directly take BAM files in "datasetTRES" available on github # library(datasetTRES) # Input.file = c("cb_input_rep1_chr19.bam", "cb_input_rep2_chr19.bam") # IP.file = c("cb_ip_rep1_chr19.bam", "cb_ip_rep2_chr19.bam") # BamDir = file.path(system.file(package = "datasetTRES"), "extdata/") # annoDir = file.path(system.file(package = "datasetTRES"), # "extdata/mm9_chr19_knownGene.sqlite") # OutDir = "/directory/to/output" # TRESS_peak(IP.file = IP.file, # Input.file = Input.file, # Path_To_AnnoSqlite = annoDir, # InputDir = BamDir, # OutputDir = OutDir, # specify a directory for output # experiment_name = "examplebyBam", # name your output # filetype = "bam") ## ---- eval= FALSE------------------------------------------------------------- # ### example peaks # peaks = read.table(file.path(system.file(package = "TRESS"), # "extdata/examplebyBam_peaks.xls"), # sep = "\t", header = TRUE) # head(peaks[, -c(5, 14, 15)], 3) ## ---- eval=FALSE-------------------------------------------------------------- # ## or, take BAM files from your path # Input.file = c("input_rep1.bam", "input_rep2.bam") # IP.file = c("ip_rep1.bam", "ip_rep2.bam") # BamDir = "/directory/to/BAMfile" # annoDir = "/path/to/xxx.sqlite" # OutDir = "/directory/to/output" # TRESS_peak(IP.file = IP.file, # Input.file = Input.file, # Path_To_AnnoSqlite = annoDir, # InputDir = BamDir, # OutputDir = OutDir, # experiment_name = "xxx", # filetype = "bam") # peaks = read.table(paste0(OutDir, "/", # "xxx_peaks.xls"), # sep = "\t", header = TRUE) # head(peaks, 3) ## ----eval=FALSE, message=FALSE, warning=FALSE--------------------------------- # ## load in first example dataset # data("Basal") # Candidates = CallCandidates( # Counts = Basal$Bins$Counts, # bins = Basal$Bins$Bins # ) ## ---- eval=FALSE, message= TRUE, warning= FALSE------------------------------- # data("Basal") ### load candidate regions # Basal$Candidates$sf = Basal$Bins$sf # peaks1 = CallPeaks.multiRep( # Candidates = Basal$Candidates, # mu.cutoff = 0.5 # ) # head(peaks1, 3) ## ---- eval=FALSE, message= TRUE, warning= FALSE------------------------------- # ### use different threshold to filter peaks # peaks2 = CallPeaks.multiRep( # Candidates = Basal$Candidates, # mu.cutoff = 0.5, # fdr.cutoff = 0.01, # lfc.cutoff = 1.6 # ) ## ---- eval=FALSE, message= TRUE, warning= FALSE------------------------------- # ### use different threshold to filter peaks # peaks3 = CallPeaks.multiRep( # Candidates = Basal$Candidates, # mu.cutoff = 0.5, # WhichThreshold = "fdr" # ) ## ---- eval = FALSE, message= FALSE, warning= FALSE---------------------------- # # A toy example # data("Basal") # bincounts = Basal$Bins$Counts[, 1:2] # sf0 = Basal$Bins$sf[1:2] # bins = Basal$Bins$Bins # peaks = CallPeaks.oneRep(Counts = bincounts, # sf = sf0, bins = bins) # head(peaks, 3) ## ---- eval=FALSE, message= FALSE, warning= FALSE------------------------------ # ShowOnePeak(onePeak, allBins, binCounts, ext = 500, ylim = c(0,1)) ## ---- eval=TRUE, message= FALSE, warning= FALSE------------------------------- peaks = read.table(file.path(system.file(package = "TRESS"), "extdata/examplebyBam_peaks.xls"), sep = "\t", header = TRUE) load(file.path(system.file(package = "TRESS"), "extdata/examplebyBam.rda")) allBins = as.data.frame(bins$bins) colnames(allBins)[1] = "chr" allBins$strand = binStrand head(peaks, 1) ShowOnePeak(onePeak = peaks[1,], allBins = allBins, binCounts = allCounts) ## ---- eval=FALSE, message= FALSE, warning= FALSE------------------------------ # InputDir = "/directory/to/BAMfile" # Input.file = c("input1.bam", "input2.bam",..., "inputN.bam") # IP.file = c("ip1.bam", "ip2.bam", ..., "ipN.bam") # OutputDir = "/directory/to/output" # Path_sqlit = "/path/to/xxx.sqlite" # variable = "YourVariable" # a dataframe containing both # # testing factor and potential covariates, # # e.g., for a two-group comparison with balanced samples # # variable = data.frame(Trt = rep(c("Ctrl", "Trt"), each = N/2)) # model = "YourModel" # e.g. model = ~1 + Trt # DMR.fit = TRESS_DMRfit(IP.file = IP.file, # Input.file = Input.file, # Path_To_AnnoSqlite = Path_sqlit, # variable = variable, # model = model, # InputDir = InputDir, # OutputDir = OutputDir, # experimentName = "xxx" # ) # CoefName(DMR.fit)# show the name and order of coefficients # # in the design matrix # Contrast = "YourContrast" # e.g., Contrast = c(0, 1) # DMR.test = TRESS_DMRtest(DMR = DMR.fit, contrast = Contrast) ## ---- eval=FALSE-------------------------------------------------------------- # InputDir = "/directory/to/BAMfile" # Input.file = c("input1.bam", "input2.bam",..., "inputN.bam") # IP.file = c("ip1.bam", "ip2.bam", ..., "ipN.bam") # OutputDir = "/directory/to/output" # Path_sqlit = "/path/to/xxx.sqlite" # allBins = DivideBins(IP.file = IP.file, # Input.file = Input.file, # Path_To_AnnoSqlite = Path_sqlit, # InputDir = InputDir, # OutputDir = OutputDir, # experimentName = "example") # Candidates = CallCandidates(Counts = allBins$binCount, # bins = allBins$bins) # Candidates = filterRegions(Candidates) ## ---- eval= FALSE------------------------------------------------------------- # data("DMR_M3vsWT") # data from TRESS # head(DMR_M3vsWT$Counts, 3) # variable = data.frame(predictor = rep(c("WT", "M3"), c(2, 2))) # model = ~1+predictor # DMR.fit = CallDMRs.paramEsti(counts = DMR_M3vsWT$Counts, # sf = DMR_M3vsWT$sf, # variable = variable, # model = model) ## ----eval= FALSE-------------------------------------------------------------- # CoefName(DMR.fit) ## show variable name of each coefficient # DMR.test = TRESS_DMRtest(DMR = DMR.fit, contrast = c(0, 1)) # DMR.test[which(DMR.test$padj < 0.05)[1:3], ] ## ---- eval=TRUE--------------------------------------------------------------- data("DMR_M3vsWT") snames = paste0(rep(c("WT_", "M3_"), each = 2), rep(c("Replicate1", "Replicate2"), 2)) ShowOnePeak(onePeak = DMR_M3vsWT$Regions[1, ], allBins = DMR_M3vsWT$Bins, binCounts = DMR_M3vsWT$BinsCounts, isDMR = TRUE, Sname = snames) ## ---- eval = TRUE------------------------------------------------------------- library(TRESS) data("DMR_SixWeekvsTwoWeek") variable = data.frame(time = rep(c("2wk", "6wk"), each = 4), region = rep(rep(c("Cortex", "Hypothalamus"), each = 2),2) ) model = ~1 + time + region DMRfit = CallDMRs.paramEsti(counts = DMR_SixWeekvsTwoWeek$Counts, sf = DMR_SixWeekvsTwoWeek$sf, variable = variable, model = model) ## ---- eval = TRUE------------------------------------------------------------- CoefName(DMRfit) DMRtest = TRESS_DMRtest(DMRfit, contrast = c(0, 1, 0)) idx = order(abs(DMRtest$stat), decreasing = TRUE) DMRtest[idx[1:3], ] DMR_SixWeekvsTwoWeek$MeRatio[idx[1:3],] ## ---- eval = TRUE------------------------------------------------------------- model.int = ~1+ time + region + time*region model.matrix( model.int, variable) DMRfit.int = CallDMRs.paramEsti( counts = DMR_SixWeekvsTwoWeek$Counts, sf = DMR_SixWeekvsTwoWeek$sf, variable = variable, model = model.int ) ## ---- eval = TRUE------------------------------------------------------------- CoefName(DMRfit.int) CTX.6vs2 = TRESS_DMRtest(DMRfit.int, contrast = c(0, 1, 0, 0)) idx = order(abs(CTX.6vs2$stat), decreasing = TRUE) CTX.6vs2[idx[1:3], ] DMR_SixWeekvsTwoWeek$MeRatio[ idx[1:3], grepl("Cortex",colnames(DMR_SixWeekvsTwoWeek$MeRatio))] ## ---- eval = TRUE------------------------------------------------------------- HTH.6vs2 = TRESS_DMRtest(DMRfit.int, contrast = c(0, 1, 0, 1)) idx = order(abs(HTH.6vs2$stat), decreasing = TRUE) HTH.6vs2[idx[1:3], ] DMR_SixWeekvsTwoWeek$MeRatio[ idx[1:3], grepl("Hypothalamus",colnames(DMR_SixWeekvsTwoWeek$MeRatio))] ## ---- eval = TRUE------------------------------------------------------------- CTXvsHTH.6vs2 = TRESS_DMRtest(DMRfit.int, contrast = c(0, 0, 0, 1)) idx = order(abs(CTXvsHTH.6vs2$stat), decreasing = TRUE) CTXvsHTH.6vs2[idx[1:3], ] DMR_SixWeekvsTwoWeek$MeRatio[ idx[1:3], c(1,2,5,6,3,4,7,8)] ## ----sessionInfo, echo=FALSE-------------------------------------------------- sessionInfo()