## ----setup, echo=FALSE-----------------------------------------------------
knitr::opts_chunk$set(collapse=TRUE)

## ---- eval = FALSE---------------------------------------------------------
#  source("http://www.bioconductor.org/biocLite.R")
#  biocLite("BiocFileCache", dependencies = TRUE)

## ---- results='hide', warning=FALSE, message=FALSE-------------------------
library(BiocFileCache)

## --------------------------------------------------------------------------
path <- file.path(tempdir(), "tempCacheDir")
bfc <- BiocFileCache(path)

## --------------------------------------------------------------------------
bfccache(bfc)
length(bfc)

## --------------------------------------------------------------------------
bfc

## --------------------------------------------------------------------------
bfcinfo(bfc)

## --------------------------------------------------------------------------
savepath <- bfcnew(bfc, "NewResource", ext="RData")
savepath

## now we can use that path in any save function
m = matrix(1:12, nrow=3)
save(m, file=savepath)

## and that file will be tracked in the cache
bfcinfo(bfc)

## --------------------------------------------------------------------------
fl1 <- tempfile(); file.create(fl1)
add2 <- bfcadd(bfc, "Test_addCopy", fl1)                 # copy
# returns filepath being tracked in cache
add2
# the name is the unique rid in the cache
rid2 <- names(add2)

fl2 <- tempfile(); file.create(fl2)
add3 <- bfcadd(bfc, "Test2_addMove", fl2, action="move") # move
rid3 <- names(add3)

fl3 <- tempfile(); file.create(fl3)
add4 <- bfcadd(bfc, "Test3_addAsis", fl3, rtype="local",
	       action="asis") # reference
rid4 <- names(add4)

file.exists(fl1)    # TRUE - copied from original location
file.exists(fl2)    # FALSE - moved from original location
file.exists(fl3)    # TRUE - left asis, original location tracked

## --------------------------------------------------------------------------
url <- "http://httpbin.org/get"
add5 <- bfcadd(bfc, "TestWeb", fpath=url)
rid5 <- names(add5)

url2<- "https://en.wikipedia.org/wiki/Bioconductor"
add6 <- bfcadd(bfc, "TestWeb", fpath=url2)
rid6 <- names(add6)

# let's look at our BiocFileCache object now
bfc
bfcinfo(bfc)

## --------------------------------------------------------------------------
bfcquery(bfc, "Web")

bfcquery(bfc, "copy")

q1 <- bfcquery(bfc, "wiki")
q1
class(q1)

## --------------------------------------------------------------------------
bfccount(q1)

## --------------------------------------------------------------------------
bfcsubWeb = bfc[paste0("BFC", 5:6)]
bfcsubWeb
bfcinfo(bfcsubWeb)

## --------------------------------------------------------------------------
bfc[["BFC2"]]
bfcpath(bfc, "BFC2")
bfcpath(bfc, "BFC5")
bfcrpath(bfc, rids="BFC5")
bfcrpath(bfc)
bfcrpath(bfc, c("http://httpbin.org/get","Test3_addAsis"))

## --------------------------------------------------------------------------
bfcneedsupdate(bfc, "BFC5")
bfcneedsupdate(bfc, "BFC6")
bfcneedsupdate(bfc)

## --------------------------------------------------------------------------
fileBeingReplaced <- bfc[[rid3]]
fileBeingReplaced

# fl3 was created when we were adding resources
fl3

bfc[[rid3]]<-fl3
bfc[[rid3]]

## --------------------------------------------------------------------------
bfcinfo(bfc, "BFC1")
bfcupdate(bfc, "BFC1", rname="FirstEntry")
bfcinfo(bfc, "BFC1")

## --------------------------------------------------------------------------
library(dplyr)
bfcinfo(bfc, "BFC6") %>% select(rid, rpath, fpath)
bfcupdate(bfc, "BFC6", fpath=url, rname="Duplicate")
bfcinfo(bfc, "BFC6") %>% select(rid, rpath, fpath)

## --------------------------------------------------------------------------
rid <- "BFC5"
test <- !identical(bfcneedsupdate(bfc, rid), FALSE) # 'TRUE' or 'NA'
if (test)
    bfcdownload(bfc, rid)

## --------------------------------------------------------------------------
names(bfcinfo(bfc))
meta <- as.data.frame(list(rid=bfcrid(bfc)[1:3], idx=1:3))
bfcmeta(bfc, name="resourceData") <- meta
names(bfcinfo(bfc))

## --------------------------------------------------------------------------
bfcmetalist(bfc)
bfcmeta(bfc, name="resourceData")

## --------------------------------------------------------------------------
bfcmetaremove(bfc, name="resourceData")

## ----eval=FALSE------------------------------------------------------------
#  bfcmeta(name="resourceData") <- meta
#  Error in bfcmeta(name = "resourceData") <- meta :
#    target of assignment expands to non-language object

## ----eval=FALSE------------------------------------------------------------
#  bfc <- BiocFileCache()
#  bfcmeta(bfc, name="resourceData") <- meta

## --------------------------------------------------------------------------
# let's remind ourselves of our object
bfc

bfcremove(bfc, "BFC6")
bfcremove(bfc, "BFC1")

# let's look at our BiocFileCache object now
bfc

## --------------------------------------------------------------------------
# create a new entry that hasn't been used
path <- bfcnew(bfc, "UseMe")
rmMe <- names(path)
# We also have a file not being tracked because we updated rpath

bfcsync(bfc)

# you can suppress the messages and just have a TRUE/FALSE
bfcsync(bfc, FALSE)

#
# Let's do some cleaning to have a synced object
#
bfcremove(bfc, rmMe)
unlink(fileBeingReplaced)

bfcsync(bfc)

## ----eval=FALSE------------------------------------------------------------
#  cleanbfc(bfc)

## ----eval=FALSE------------------------------------------------------------
#  removebfc(bfc)

## --------------------------------------------------------------------------
## paste to avoid long line in vignette
url <- paste(
    "ftp://ftp.ensembl.org/pub/release-71/gtf",
    "homo_sapiens/Homo_sapiens.GRCh37.71.gtf.gz",
    sep="/")

## ---- eval=FALSE-----------------------------------------------------------
#  library(BiocFileCache)
#  bfc <- BiocFileCache()
#  path <- bfcrpath(bfc, url)

## ---- eval=FALSE-----------------------------------------------------------
#  gtf <- rtracklayer::import.gff(path)

## ---- eval=FALSE-----------------------------------------------------------
#  gtf <- rtracklayer::import.gff(bfcrpath(BiocFileCache(), url))

## ---- eval=FALSE-----------------------------------------------------------
#  library(BiocFileCache)
#  bfc <- BiocFileCache("~/my-experiment/results")

## ---- eval=FALSE-----------------------------------------------------------
#  library(DESeq2)
#  library(airway)
#  data(airway)
#  dds <- DESeqDataData(airway, design = ~ cell + dex)
#  result <- DESeq(dds)

## ---- eval=FALSE-----------------------------------------------------------
#  saveRDS(result, bfcnew(bfc, "airway / DESeq standard analysis"))

## ---- eval=FALSE-----------------------------------------------------------
#  result <- readRDS(bfcrpath(bfc, "airway / DESeq standard analysis"))

## ----eval=FALSE------------------------------------------------------------
#  library(BiocFileCache)
#  library(rtracklayer)
#  
#  # load the cache
#  path <- file.path(tempdir(), "tempCacheDir")
#  bfc <- BiocFileCache(path)
#  
#  # the web resource of interest
#  url <- "ftp://ftp.ensembl.org/pub/release-71/gtf/homo_sapiens/Homo_sapiens.GRCh37.71.gtf.gz"
#  
#  # check if url is being tracked
#  res <- bfcquery(bfc, url)
#  
#  if (bfccount(res) == 0L) {
#  
#      # if it is not in cache, add
#      ans <- bfcadd(bfc, rname="ensembl, homo sapien", fpath=url)
#  
#  } else {
#  
#    # if it is in cache, get path to load
#    rid = res %>% filter(fpath == url) %>% collect(Inf) %>% `[[`("rid")
#    ans <- bfcrpath(bfc, rid)
#  
#    # check to see if the resource needs to be updated
#    check <- bfcneedsupdate(bfc, rid)
#    # check can be NA if it cannot be determined, choose how to handle
#    if (is.na(check)) check <- TRUE
#    if (check){
#      ans < - bfcdownload(bfc, rid)
#    }
#  }
#  
#  
#  # ans is the path of the file to load
#  ans
#  
#  
#  # we know because we search for the url that the file is a .gtf.gz,
#  # if we searched on other terms we can use 'bfcpath' to see the
#  # original fpath to know the appropriate load/read/import method
#  bfcpath(bfc, names(ans))
#  
#  temp = GTFFile(ans)
#  info = import(temp)

## ----eval=TRUE-------------------------------------------------------------

#
# A simplier test to see if something is in the cache
# and if not start tracking it is using `bfcrpath`
#


library(BiocFileCache)
library(rtracklayer)

# load the cache
path <- file.path(tempdir(), "tempCacheDir")
bfc <- BiocFileCache(path)

# the web resources of interest
url <- "ftp://ftp.ensembl.org/pub/release-71/gtf/homo_sapiens/Homo_sapiens.GRCh37.71.gtf.gz"

url2 <- "ftp://ftp.ensembl.org/pub/release-71/gtf/rattus_norvegicus/Rattus_norvegicus.Rnor_5.0.71.gtf.gz"

# if not in cache will download and create new entry
pathsToLoad <- bfcrpath(bfc, c(url, url2))

pathsToLoad

# now load files as see fit
info = import(GTFFile(pathsToLoad[1]))
class(info)
summary(info)

## ----eval=FALSE------------------------------------------------------------
#  #
#  # One could also imagine the following:
#  #
#  
#  library(BiocFileCache)
#  
#  # load the cache
#  bfc <- BiocFileCache()
#  
#  #
#  # Do some work!
#  #
#  
#  # add a location in the cache
#  filepath <- bfcnew(bfc, "R workspace")
#  
#  save(list = ls(), file=filepath)
#  
#  # now the R workspace is being tracked in the cache