## ----setup, echo=FALSE---------------------------------------------------
library(UseBioconductor)
stopifnot(BiocInstaller::biocVersion() == "3.1")

## ----style, echo = FALSE, results = 'asis'-------------------------------
BiocStyle::markdown()

## ----benchmark-----------------------------------------------------------
f0 <- function(n) {
    ## inefficient!
    ans <- numeric()
    for (i in seq_len(n))
        ans <- c(ans, exp(i))
    ans
}

f1 <- function(n) {
    ans <- numeric(n)
    for (i in seq_len(n))
        ans[[i]] <- exp(i)
    ans
}

f2 <- function(n)
    sapply(seq_len(n), exp)

f3 <- function(n)
    exp(seq_len(n))

## ----parallel-sleep------------------------------------------------------
library(BiocParallel)

fun <- function(i) {
    Sys.sleep(1)
    i
}

## serial
f0 <- function(n)
    lapply(seq_len(n), fun)

## parallel
f1 <- function(n)
    bplapply(seq_len(n), fun)

## ----reduceByYield-setup-------------------------------------------------
suppressPackageStartupMessages({
    library(GenomicFiles)
    library(GenomicAlignments)
    library(Rsamtools)
    library(TxDb.Hsapiens.UCSC.hg19.knownGene)
})

yield <-     # how to input the next chunk of data
    function(X, ...)
{
    readGAlignments(X)
}

map <-       # what to do to each chunk
    function(VALUE, ..., roi)
{
    olaps <- findOverlaps(VALUE, roi, type="within", ignore.strand=TRUE)
    count <- tabulate(subjectHits(olaps), subjectLength(olaps))
    setNames(count, names(roi))
}

reduce <-    # how to combine mapped chunks
    `+`

## ----yieldFactory--------------------------------------------------------
yieldFactory <-  # return a function with local state 
    function() 
{
    n_records <- 0L
    function(X, ...) {
        aln <- readGAlignments(X)
        n_records <<- n_records + length(aln)
        message(n_records)
        aln
    }
}

## ----count-overlaps-roi, eval=FALSE--------------------------------------
#  exByTx <- exonsBy(TxDb.Hsapiens.UCSC.hg19.knownGene, "tx")
#  
#  map0 <- read.delim("~/igv/genomes/hg19_alias.tab", header=FALSE,
#      stringsAsFactors=FALSE)
#  seqlevels(exByTx, force=TRUE) <- setNames(map0$V1, map0$V2)

## ----count-overlaps, eval=FALSE------------------------------------------
#  count1 <- function(filename, roi) {
#      message(filename)
#      ## Create and open BAM file
#      bf <- BamFile(filename, yieldSize=1000000)
#      reduceByYield(bf, yieldFactory(), map, reduce, roi=roi)
#  }

## ----count-overlaps-doit, eval=FALSE-------------------------------------
#  filename <- "~/bam/SRR1039508_sorted.bam"
#  count <- count1(filename, exByTx)

## ----count-overlaps-parallel, eval=FALSE---------------------------------
#  library(BiocParallel)
#  
#  ## all bam files
#  filenames <- dir("~/bam", pattern="bam$", full=TRUE)
#  names(filenames) <- sub("_sorted.bam", "", basename(filenames))
#  
#  ## iterate
#  counts <- bplapply(filenames, count1, exByTx)
#  counts <- simplify2array(counts)
#  head(counts)