## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(fig.width=5, fig.height=5) ## ---- echo=FALSE-------------------------------------------------------------- knitr::include_graphics("images/bootRanges.jpg") ## ---- message=FALSE, warning=FALSE-------------------------------------------- suppressPackageStartupMessages(library(ExperimentHub)) eh = ExperimentHub() # query(eh, "nullrangesdata") exclude <- eh[["EH7306"]] ## ---- message=FALSE, warning=FALSE-------------------------------------------- seg_cbs <- eh[["EH7307"]] seg_hmm <- eh[["EH7308"]] seg <- seg_cbs ## ----------------------------------------------------------------------------- suppressPackageStartupMessages(library(ensembldb)) suppressPackageStartupMessages(library(EnsDb.Hsapiens.v86)) edb <- EnsDb.Hsapiens.v86 filt <- AnnotationFilterList(GeneIdFilter("ENSG", "startsWith")) g <- genes(edb, filter = filt) ## ----------------------------------------------------------------------------- library(GenomeInfoDb) g <- keepStandardChromosomes(g, pruning.mode = "coarse") seqlevels(g, pruning.mode="coarse") <- setdiff(seqlevels(g), "MT") # normally we would assign a new style, but for recent host issues ## seqlevelsStyle(g) <- "UCSC" seqlevels(g) <- paste0("chr", seqlevels(g)) genome(g) <- "hg38" g <- sortSeqlevels(g) g <- sort(g) table(seqnames(g)) ## ----------------------------------------------------------------------------- library(nullranges) suppressPackageStartupMessages(library(plyranges)) library(patchwork) ## ----seg-cbs------------------------------------------------------------------ set.seed(5) exclude <- exclude %>% plyranges::filter(width(exclude) >= 500) L_s <- 1e6 seg_cbs <- segmentDensity(g, n = 3, L_s = L_s, exclude = exclude, type = "cbs") plots <- lapply(c("ranges","barplot","boxplot"), function(t) { plotSegment(seg_cbs, exclude, type = t) }) plots[[1]] plots[[2]] + plots[[3]] ## ----------------------------------------------------------------------------- region <- GRanges("chr16", IRanges(3e7,4e7)) plotSegment(seg_cbs, exclude, type="ranges", region=region) ## ----seg-hmm------------------------------------------------------------------ seg_hmm <- segmentDensity(g, n = 3, L_s = L_s, exclude = exclude, type = "hmm") plots <- lapply(c("ranges","barplot","boxplot"), function(t) { plotSegment(seg_hmm, exclude, type = t) }) plots[[1]] plots[[2]] + plots[[3]] ## ----------------------------------------------------------------------------- suppressPackageStartupMessages(library(nullrangesData)) dhs <- DHSA549Hg38() dhs <- dhs %>% plyranges::filter(signalValue > 100) %>% mutate(id = seq_along(.)) %>% plyranges::select(id) length(dhs) table(seqnames(dhs)) ## ----------------------------------------------------------------------------- set.seed(5) # for reproducibility R <- 50 blockLength <- 5e5 boots <- bootRanges(dhs, blockLength, R = R, seg = seg, exclude=exclude) boots ## ----------------------------------------------------------------------------- suppressPackageStartupMessages(library(tidyr)) combined <- dhs %>% mutate(iter=0) %>% bind_ranges(boots) %>% plyranges::select(iter) stats <- combined %>% group_by(iter) %>% summarize(n = n()) %>% as_tibble() head(stats) ## ---- warning=FALSE----------------------------------------------------------- suppressPackageStartupMessages(library(ggridges)) suppressPackageStartupMessages(library(purrr)) suppressPackageStartupMessages(library(ggplot2)) interdist <- function(dat) { x = dat[-1,] y = dat[-nrow(dat),] ifelse(x$seqnames == y$seqnames, x$start + floor((x$width - 1)/2) - y$start-floor((y$width - 1)/2), NA)} combined %>% plyranges::filter(iter %in% 0:3) %>% plyranges::select(iter) %>% as.data.frame() %>% nest(-iter) %>% mutate(interdist = map(data, ~interdist(.))) %>% dplyr::select(iter,interdist) %>% unnest(interdist) %>% mutate(type = ifelse(iter == 0, "original", "boot")) %>% ggplot(aes(log10(interdist), iter, fill=type)) + geom_density_ridges(alpha = 0.75) + geom_text(data=head(stats,4), aes(x=1.5, y=iter, label=paste0("n=",n), fill=NULL), vjust=1.5) ## ----------------------------------------------------------------------------- x <- GRanges("chr2", IRanges(1 + 50:99 * 1e6, width=1e6), x_id=1:50) x <- x %>% mutate(n_overlaps = count_overlaps(., dhs)) mean( x$n_overlaps ) ## ----------------------------------------------------------------------------- boot_stats <- x %>% join_overlap_inner(boots) %>% group_by(x_id, iter) %>% summarize(n_overlaps = n()) %>% as.data.frame() %>% complete(x_id, iter, fill=list(n_overlaps = 0)) %>% group_by(iter) %>% summarize(meanOverlaps = mean(n_overlaps)) ## ----boot-hist---------------------------------------------------------------- suppressPackageStartupMessages(library(ggplot2)) ggplot(boot_stats, aes(meanOverlaps)) + geom_histogram(binwidth=.2) ## ----------------------------------------------------------------------------- library(microbenchmark) microbenchmark( list=alist( prop = bootRanges(dhs, blockLength, seg = seg, proportionLength = TRUE), no_prop = bootRanges(dhs, blockLength, seg = seg, proportionLength = FALSE) ), times=10) ## ----------------------------------------------------------------------------- suppressPackageStartupMessages(library(plotgardener)) my_palette <- function(n) { head(c("red","green3","red3","dodgerblue", "blue2","green4","darkred"), n) } plotGRanges <- function(gr) { pageCreate(width = 5, height = 5, xgrid = 0, ygrid = 0, showGuides = TRUE) for (i in seq_along(seqlevels(gr))) { chrom <- seqlevels(gr)[i] chromend <- seqlengths(gr)[[chrom]] suppressMessages({ p <- pgParams(chromstart = 0, chromend = chromend, x = 0.5, width = 4*chromend/500, height = 2, at = seq(0, chromend, 50), fill = colorby("state_col", palette=my_palette)) prngs <- plotRanges(data = gr, params = p, chrom = chrom, y = 2 * i, just = c("left", "bottom")) annoGenomeLabel(plot = prngs, params = p, y = 0.1 + 2 * i) }) } } ## ----------------------------------------------------------------------------- library(GenomicRanges) seq_nms <- rep(c("chr1","chr2"), c(4,3)) seg <- GRanges( seqnames = seq_nms, IRanges(start = c(1, 101, 201, 401, 1, 201, 301), width = c(100, 100, 200, 100, 200, 100, 100)), seqlengths=c(chr1=500,chr2=400), state = c(1,2,1,3,3,2,1), state_col = factor(1:7) ) ## ----toysegments-------------------------------------------------------------- plotGRanges(seg) ## ----toyranges---------------------------------------------------------------- set.seed(1) n <- 200 gr <- GRanges( seqnames=sort(sample(c("chr1","chr2"), n, TRUE)), IRanges(start=round(runif(n, 1, 500-10+1)), width=10) ) suppressWarnings({ seqlengths(gr) <- seqlengths(seg) }) gr <- gr[!(seqnames(gr) == "chr2" & end(gr) > 400)] gr <- sort(gr) idx <- findOverlaps(gr, seg, type="within", select="first") gr <- gr[!is.na(idx)] idx <- idx[!is.na(idx)] gr$state <- seg$state[idx] gr$state_col <- factor(seg$state_col[idx]) plotGRanges(gr) ## ----toy-no-prop-------------------------------------------------------------- set.seed(1) gr_prime <- bootRanges(gr, blockLength = 25, seg = seg, proportionLength = FALSE) plotGRanges(gr_prime) ## ----toy-prop----------------------------------------------------------------- set.seed(1) gr_prime <- bootRanges(gr, blockLength = 50, seg = seg, proportionLength = TRUE) plotGRanges(gr_prime) ## ----------------------------------------------------------------------------- sessionInfo()