## ----setup, echo=FALSE--------------------------------------------------------
knitr::opts_chunk$set(collapse=TRUE)
## ----eval = FALSE-------------------------------------------------------------
# if (!"BiocManager" %in% rownames(installed.packages()))
# install.packages("BiocManager")
# BiocManager::install("BiocFileCache", dependencies=TRUE)
## ----results='hide', warning=FALSE, message=FALSE-----------------------------
library(BiocFileCache)
## -----------------------------------------------------------------------------
path <- tempfile()
bfc <- BiocFileCache(path, ask = FALSE)
## ----url----------------------------------------------------------------------
## paste to avoid long line in vignette
url <- paste(
"ftp://ftp.ensembl.org/pub/release-71/gtf",
"homo_sapiens/Homo_sapiens.GRCh37.71.gtf.gz",
sep="/")
## ----eval=FALSE---------------------------------------------------------------
# library(BiocFileCache)
# bfc <- BiocFileCache()
# path <- bfcrpath(bfc, url)
## ----eval=FALSE---------------------------------------------------------------
# gtf <- rtracklayer::import.gff(path)
## ----eval=FALSE---------------------------------------------------------------
# gtf <- rtracklayer::import.gff(bfcrpath(BiocFileCache(), url))
## ----eval=FALSE---------------------------------------------------------------
# library(BiocFileCache)
# bfc <- BiocFileCache("~/my-experiment/results")
## ----eval=FALSE---------------------------------------------------------------
# suppressPackageStartupMessages({
# library(DESeq2)
# library(airway)
# })
# data(airway)
# dds <- DESeqDataData(airway, design = ~ cell + dex)
# result <- DESeq(dds)
## ----eval=FALSE---------------------------------------------------------------
# saveRDS(result, bfcnew(bfc, "airway / DESeq standard analysis"))
## ----eval=FALSE---------------------------------------------------------------
# result <- readRDS(bfcrpath(bfc, "airway / DESeq standard analysis"))
## ----eval=FALSE---------------------------------------------------------------
# suppressPackageStartupMessages({
# library(BiocFileCache)
# library(rtracklayer)
# })
#
# # load the cache
# path <- file.path(tempdir(), "tempCacheDir")
# bfc <- BiocFileCache(path)
#
# # the web resource of interest
# url <- "ftp://ftp.ensembl.org/pub/release-71/gtf/homo_sapiens/Homo_sapiens.GRCh37.71.gtf.gz"
#
# # check if url is being tracked
# res <- bfcquery(bfc, url, exact=TRUE)
#
# if (bfccount(res) == 0L) {
#
# # if it is not in cache, add
# ans <- bfcadd(bfc, rname="ensembl, homo sapien", fpath=url)
#
# } else {
#
# # if it is in cache, get path to load
# rid = res$rid
# ans <- bfcrpath(bfc, rid)
#
# # check to see if the resource needs to be updated
# check <- bfcneedsupdate(bfc, rid)
# # check can be NA if it cannot be determined, choose how to handle
# if (is.na(check)) check <- TRUE
# if (check){
# ans < - bfcdownload(bfc, rid)
# }
# }
#
# # ans is the path of the file to load
# ans
#
# # we know because we search for the url that the file is a .gtf.gz,
# # if we searched on other terms we can use 'bfcpath' to see the
# # original fpath to know the appropriate load/read/import method
# bfcpath(bfc, names(ans))
#
# temp = GTFFile(ans)
# info = import(temp)
## ----ensemblremote, eval=TRUE-------------------------------------------------
#
# A simpler test to see if something is in the cache
# and if not start tracking it is using `bfcrpath`
#
suppressPackageStartupMessages({
library(BiocFileCache)
library(rtracklayer)
})
# load the cache
path <- file.path(tempdir(), "tempCacheDir")
bfc <- BiocFileCache(path, ask=FALSE)
# the web resources of interest
url <- "ftp://ftp.ensembl.org/pub/release-71/gtf/homo_sapiens/Homo_sapiens.GRCh37.71.gtf.gz"
url2 <- "ftp://ftp.ensembl.org/pub/release-71/gtf/rattus_norvegicus/Rattus_norvegicus.Rnor_5.0.71.gtf.gz"
# if not in cache will download and create new entry
pathsToLoad <- bfcrpath(bfc, c(url, url2))
pathsToLoad
# now load files as see fit
info = import(GTFFile(pathsToLoad[1]))
class(info)
summary(info)
## ----eval=FALSE---------------------------------------------------------------
# #
# # One could also imagine the following:
# #
#
# library(BiocFileCache)
#
# # load the cache
# bfc <- BiocFileCache()
#
# #
# # Do some work!
# #
#
# # add a location in the cache
# filepath <- bfcnew(bfc, "R workspace")
#
# save(list = ls(), file=filepath)
#
# # now the R workspace is being tracked in the cache
## ----eval=FALSE---------------------------------------------------------------
# .get_cache <-
# function()
# {
# cache <- tools::R_user_dir("MyNewPackage", which="cache")
# BiocFileCache::BiocFileCache(cache)
# }
## ----eval=FALSE---------------------------------------------------------------
# download_data_file <-
# function( verbose = FALSE )
# {
# fileURL <- "http://a_path_to/someremotefile.tsv.gz"
#
# bfc <- .get_cache()
# rid <- bfcquery(bfc, "geneFileV2", "rname")$rid
# if (!length(rid)) {
# if( verbose )
# message( "Downloading GENE file" )
# rid <- names(bfcadd(bfc, "geneFileV2", fileURL ))
# }
# if (!isFALSE(bfcneedsupdate(bfc, rid)))
# bfcdownload(bfc, rid)
#
# bfcrpath(bfc, rids = rid)
# }
## ----preprocess---------------------------------------------------------------
url <- "http://bioconductor.org/packages/stats/bioc/BiocFileCache/BiocFileCache_stats.tab"
headFile <- # how to process file before caching
function(from, to)
{
dat <- readLines(from)
writeLines(head(dat), to)
TRUE
}
rid <- bfcquery(bfc, url, "fpath")$rid
if (!length(rid)) # not in cache, add but do not download
rid <- names(bfcadd(bfc, url, download = FALSE))
update <- bfcneedsupdate(bfc, rid) # TRUE if newly added or stale
if (!isFALSE(update)) # download & process
bfcdownload(bfc, rid, ask = FALSE, FUN = headFile)
rpath <- bfcrpath(bfc, rids=rid) # path to processed result
readLines(rpath) # read processed result
## ----eval=FALSE---------------------------------------------------------------
# proxy <- "http://my_user:my_password@myproxy:8080"
# bfcadd(bfc, rname="uniquename", fpath="https://remoteresource", proxy=proxy)
## ----eval=FALSE---------------------------------------------------------------
# ssl_opts <- list(verbose = 1L,ssl_verifypeer = 0L, ssl_verifyhost = 0L)
# bfcadd(bfc, rname="uniquename", fpath="https://remoteresource", config=ssl_opts)
## ----sessioninfo--------------------------------------------------------------
sessionInfo()