## ----echo=FALSE---------------------------------------------------------------
source(system.file('vignettes_inc.R', package='biodb'))

## -----------------------------------------------------------------------------
biodb::genNewExtPkg(path='biodbChebiEx', dbName='chebi.ex', connType='compound',
                    dbTitle='ChEBI connector example', entryType='xml', remote=TRUE)

## -----------------------------------------------------------------------------
list.files('biodbChebiEx', all.files=TRUE, recursive=TRUE)

## ----eval=FALSE, highlight=FALSE, code=readLines('biodbChebiEx/inst/definitions.yml')----
# # biodb example definitions file for extensions packages, version 1.0.0
# 
# databases:
#   chebi.ex:
#     name: ChEBI connector example
#     description: Write here the description of this database.
#     compound.db: true
#     entry.content.type: xml
#     parsing.expr:
#       accession: substring-after(//dbns:return/dbns:accessionId,'ACCESSION:')
#       name:
#       - //dbns:name
#       - //dbns:synonyms/dbns:data
#       mass: //dbns:mass
#       monoisotopic.mass: //dbns:monoisotopicMass
#       smiles: //dbns:return/dbns:smiles
#       inchi: //dbns:return/dbns:inchi
#       inchikey: //dbns:return/dbns:inchiKey
#       formula:
#       - //dbns:Formulae/dbns:source[text()='MyDatabase']/../dbns:data
#       - (//dbns:Formulae/dbns:data)[1]
#     xml.ns:
#       dbns: https://my.database.org/webservices/v1
#       xsd: http://www.w3.org/2001/XMLSchema
#     searchable.fields:
#       - name
#       - monoisotopic.mass
#       - molecular.mass
#       - average.mass
#       - nominal.mass
#     remote: true
#     # Length in seconds of the connection sliding window
#     scheduler.t: 1
#     # Number of connections allowed inside the connection sliding window
#     scheduler.n: 3
#     urls:
#       # Base URL of the database server, where to find entry pages
#       base.url: https://my.database.org/mydb/
#       # Webservice URL to use to contact web services
#       ws.url: https://my.database.org/webservices/mydb/3.2/
#       # Add any other URL you need for the development of your connector
#       # Inside your code, you can get each of these URLs with a call like the following one:
#       #     .self$getPropValSlot('urls', 'ws.url')
# 
# fields:
#   chebi.ex.id:
#     description: ChEBI connector example ID
#     case.insensitive: true
#     forbids.duplicates: true
#     type: id
#     card: many

## -----------------------------------------------------------------------------
defFile <- system.file("extdata", "chebi_ex.yml", package='biodb')

## ----eval=FALSE, highlight=FALSE, code=readLines(system.file("extdata", "chebi_ex.yml", package='biodb'))----
# databases:
# 
#   chebi.ex:
#     name: ChEBI example connector
#     description: An example connector for ChEBI.
#     compound.db: true
#     entry.content.encoding: UTF-8
#     entry.content.type: xml
#     parsing.expr:
#       accession: substring-after(//chebi:return/chebi:chebiId,'CHEBI:')
#       formula:
#         - //chebi:Formulae/chebi:source[text()='ChEBI']/../chebi:data
#         - (//chebi:Formulae/chebi:data)[1]
#       inchi: //chebi:return/chebi:inchi
#       inchikey: //chebi:return/chebi:inchiKey
#       mass: //chebi:mass
#       monoisotopic.mass: //chebi:monoisotopicMass
#       name:
#         - //chebi:chebiAsciiName
#       smiles: //chebi:return/chebi:smiles
#     searchable.fields:
#       - name
#       - monoisotopic.mass
#       - molecular.mass
#     remote: true
#     scheduler.t: 1
#     scheduler.n: 3
#     urls:
#       base.url: https://www.ebi.ac.uk/chebi/
#       ws.url: https://www.ebi.ac.uk/webservices/chebi/2.0/
#     xml.ns:
#       chebi: https://www.ebi.ac.uk/webservices/chebi
#       xsd: http://www.w3.org/2001/XMLSchema
# 
# fields:
# 
#   chebi.ex.id:
#     description: ChEBI ID
#     type: id
#     card: many
#     forbids.duplicates: true
#     case.insensitive: true

## ----eval=FALSE, highlight=TRUE, code=readLines('biodbChebiEx/R/ChebiExEntry.R')----
# #' ChEBI connector example entry class.
# #'
# #' Entry class for ChEBI connector example.
# #'
# #' @seealso
# #' \code{\link{BiodbXmlEntry}}.
# #'
# #' @examples
# #' # Create an instance with default settings:
# #' mybiodb <- biodb::newInst()
# #'
# #' # Get a connector that inherits from ChebiExConn:
# #' conn <- mybiodb$getFactory()$createConn('chebi.ex')
# #'
# #' # Get the first entry
# #' e <- conn$getEntry(conn$getEntryIds(1L))
# #'
# #' # Terminate instance.
# #' mybiodb$terminate()
# #'
# #' @import biodb
# #' @import R6
# #' @export
# ChebiExEntry <- R6::R6Class("ChebiExEntry",
#     inherit=
#         biodb::BiodbXmlEntry
#     ,
# 
# public=list(
# 
# initialize=function(...) {
#     super$initialize(...)
# }
# 
# ,doCheckContent=function(content) {
# 
#     # You can do some more checks of the content here.
# 
#     return(TRUE)
# }
# 
# ,doParseFieldsStep2=function(parsed.content) {
# 
#     # TODO Implement your custom parsing processing here.
# }
# ))

## ----echo=FALSE, results='asis'-----------------------------------------------
make_vignette_ref('details')

## ----eval=FALSE, highlight=TRUE, code=readLines('biodbChebiEx/R/ChebiExConn.R')----
# #' ChEBI connector example connector class.
# #'
# #' Connector class for ChEBI connector example.
# #'
# #' @seealso \code{\link{BiodbConn}}.
# #'
# #' @examples
# #' # Create an instance with default settings:
# #' mybiodb <- biodb::newInst()
# #'
# #' # Get a connector:
# #' conn <- mybiodb$getFactory()$createConn('chebi.ex')
# #'
# #' # Get the first entry
# #' e <- conn$getEntry(conn$getEntryIds(1L))
# #'
# #' # Terminate instance.
# #' mybiodb$terminate()
# #'
# #' @import biodb
# #' @import R6
# #' @export
# ChebiExConn <- R6::R6Class("ChebiExConn",
# inherit=biodb::BiodbConn,
# 
# public=list(
# 
# initialize=function(...) {
#     super$initialize(...)
# }
# 
# ,wsFind=function(name="", retfmt=c('plain', 'parsed', 'ids', 'request')) {
#     # This is the implementation of a fictive web service called "find" that
#     # search for entries by name.
#     # Use it as an example for implementing your own web services.
# 
#     retfmt <- match.arg(retfmt)
# 
#     # Build request
#     params <- list(name=name)
#     url <- BiodbUrl$new(url=c(self$getPropValSlot('urls', 'ws.url'), 'find'),
#                     params=params)
#     request <- self$makeRequest(method='get', url=url)
# 
#     # Return request
#     if (retfmt == 'request')
#         return(request)
# 
#     # Send request
#     # This the line that should be run for sending the request and getting the
#     # results:
#     #results <- self$getBiodb()$getRequestScheduler()$sendRequest(request)
#     # Instead, for this example, we just generate the results of this fictive
#     # web service:
#     results <- paste('{"0001": {"name": "name1"},',
#                      ' "0198": {"name": "name2"},',
#                      ' "9834": {"name": "name3"}}')
# 
#     # Parse
#     if (retfmt != 'plain') {
# 
#         # Parse JSON
#         results <- jsonlite::fromJSON(results, simplifyDataFrame=FALSE)
# 
#         # Get IDs
#         if (retfmt == 'ids')
#             results <- names(results)
#     }
# 
#     return(results)
# }
# ),
# 
# private=list(
# 
# doGetNbEntries=function(count=FALSE) {
# 
#     # Replace the call below if you have a direct way (specific web service for
#     # a remote database, provided method or information for a local database)
#     # to count entries for your database.
#     return(callSuper(count=count))
# }
# 
# 
# ,doGetEntryIds=function(max.results=NA_integer_) {
#     # Overrides super class' method.
# 
#     ids <- NA_character_
# 
#     # TODO Implement retrieval of accession numbers.
# 
#     return(ids)
# }
# 
# ,doSearchForEntries=function(fields=NULL, max.results=NA_integer_) {
#     # Overrides super class' method.
# 
#     ids <- character()
# 
#     # TODO Implement search of entries by filtering on values of fields.
# 
#     return(ids)
# }
# 
# ,doGetEntryContentRequest=function(id, concatenate=TRUE) {
# 
#     # TODO Modify the code below to build the URLs to get the contents of the
#     # entries.
#     # Depending on the database, you may have to build one URL for each
#     # individual entry or may be able to write just one or a few URL for all
#     # entries to retrieve.
#     u <- c(self$getPropValSlot('urls', 'base.url'), 'entries',
#            paste(id, 'xml', sep='.'))
#     url <- BiodbUrl$new(url=u)$toString()
# 
#     return(url)
# }
# 
# ,doGetEntryPageUrl=function(id) {
# 
#     # TODO Modify this code to build the individual URLs to the entry web pages
#     fct <- function(x) {
#         u <- c(self$getPropValSlot('urls', 'base.url'), 'entries', x)
#         BiodbUrl$new(url=u)$toString()
#     }
# 
#     return(vapply(id, fct, FUN.VALUE=''))
# }
# 
# ,doGetEntryImageUrl=function(id) {
# 
#     # TODO Modify this code to build the individual URLs to the entry images
#     fct <- function(x) {
#         u <- c(self$getPropValSlot('urls', 'base.url'), 'images', x,
#                'image.png')
#         BiodbUrl$new(url=u)$toString()
#     }
# 
#     return(vapply(id, fct, FUN.VALUE=''))
# }
# ))

## ----eval=FALSE---------------------------------------------------------------
# wsGetLiteEntity=function(search=NULL, search.category='ALL', stars='ALL',
#                          max.results=10,
#                          retfmt=c('plain', 'parsed', 'request', 'ids')) {
# }

## ----echo=FALSE, results='hide'-----------------------------------------------
connClass <- system.file("extdata", "ChebiExConn.R", package='biodb')
entryClass <- system.file("extdata", "ChebiExEntry.R", package='biodb')
source(connClass)
source(entryClass)

## ----code=readLines(connClass)------------------------------------------------
ChebiExConn <- R6::R6Class("ChebiExConn",
inherit=biodb::BiodbConn,

public=list(

initialize=function(...) {
    super$initialize(...)
},

wsGetLiteEntity=function(search=NULL, search.category='ALL', stars='ALL',
                         max.results=10,
                         retfmt=c('plain', 'parsed', 'request', 'ids')) {

    # Check parameters
    chk::chk_string(search)
    chk::chk_in(search.category, self$getSearchCategories())
    chk::chk_number(max.results)
    chk::chk_gte(max.results, 0)
    chk::chk_in(stars, self$getStarsCategories())
    retfmt <- match.arg(retfmt)

    # Build request
    params <- c(search=search,
                searchCategory=search.category,
                maximumResults=max.results,
                starsCategory=stars)
    url <- c(self$getPropValSlot('urls', 'ws.url'), 'test/getLiteEntity')
    request <- self$makeRequest(method='get', url=BiodbUrl$new(url=url,
                                                                params=params),
                                 encoding='UTF-8')
    if (retfmt == 'request')
        return(request)

    # Send request
    results <- self$getBiodb()$getRequestScheduler()$sendRequest(request)

    # Parse
    if (retfmt != 'plain') {

        # Parse XML
        results <-  XML::xmlInternalTreeParse(results, asText=TRUE)

        if (retfmt == 'ids') {
            ns <- self$getPropertyValue('xml.ns')
            results <- XML::xpathSApply(results, "//chebi:chebiId",
                                        XML::xmlValue, namespaces=ns)
            results <- sub('CHEBI:', '', results)
            if (length(grep("^[0-9]+$", results)) != length(results))
                self$error("Impossible to parse XML to get entry IDs.")
        }
    }

    return(results)
}
),

private=list(
doSearchForEntries=function(fields=NULL, max.results=0) {

    ids <- character()

    if ( ! is.null(fields)) {

        # Search by name
        if ('name' %in% names(fields))
            ids <- self$wsGetLiteEntity(search=fields$name,
                                         search.category="ALL NAMES",
                                         max.results=0, retfmt='ids')
    }

    # Cut
    if (max.results > 0 && max.results < length(ids))
        ids <- ids[seq_len(max.results)]

    return(ids)
},

doGetEntryContentRequest=function(id, concatenate=TRUE) {

    url <- c(self$getPropValSlot('urls', 'ws.url'), 'test',
             'getCompleteEntity')

    urls <- vapply(id, function(x) BiodbUrl$new(url=url,
                                            params=list(chebiId=x))$toString(),
                   FUN.VALUE='')

    return(urls)
},

doGetEntryIds=function(max.results=NA_integer_) {
    return(NULL)
},

doGetEntryPageUrl=function(id) {
    # Overrides super class' method

    url <- c(self$getPropValSlot('urls', 'base.url'), 'searchId.do')

    fct <- function(x) {
        BiodbUrl$new(url=url, params=list(chebiId=x))$toString()
    }
    
    urls <- vapply(id, fct, FUN.VALUE='')

    return(urls)
},

doGetEntryImageUrl=function(id) {
    # Overrides super class' method

    url <- c(self$getPropValSlot('urls', 'base.url'), 'displayImage.do')

    fct <- function(x) {
        BiodbUrl$new(url=url, params=list(defaultImage='true', imageIndex=0,
                                      chebiId=x, dimensions=400))$toString()
    }
    
    urls <- vapply(id, fct, FUN.VALUE='')

    return(urls)
}
))

## ----code=readLines(entryClass)-----------------------------------------------
ChebiExEntry <- R6::R6Class("ChebiExEntry",
inherit=BiodbXmlEntry,

public=list(

initialize=function(...) {
    super$initialize(...)
}
),

private=list(
doCheck=function(content) {
    
    # You can do some more checks of the content here.
    
    return(TRUE)
}

,doParseFieldsStep2=function(parsed.content) {
    
    # TODO Implement your custom parsing processing here.
}

))

## -----------------------------------------------------------------------------
mybiodb <- biodb::newInst()

## -----------------------------------------------------------------------------
mybiodb$loadDefinitions(defFile)

## -----------------------------------------------------------------------------
conn <- mybiodb$getFactory()$createConn('chebi.ex')

## -----------------------------------------------------------------------------
entry <- conn$getEntry('17001')
entry$getFieldsAsDataframe()

## ----Closing of the biodb instance--------------------------------------------
mybiodb$terminate()

## -----------------------------------------------------------------------------
MyEntryClass <- R6::R6Class("MyEntryClass", inherit=biodb::BiodbCsvEntry,
    public=list(
        initialize=function() {
            super$initialize(sep=';', na.strings=c('', 'NA'))
        }
))

## ----eval=FALSE---------------------------------------------------------------
# doParseContent=function(content) {
# 
#     # Get lines of content
#     lines <- strsplit(content, "\r?\n")[[1]]
# 
#     return(lines)
# },
# 
# doParseFieldsStep1=function(parsed.content) {
# 
#     # Get parsing expressions
#     parsing.expr <- .self$getParent()$getPropertyValue('parsing.expr')
# 
#     .self$.assertNotNull(parsed.content)
#     .self$.assertNotNa(parsed.content)
#     .self$.assertNotNull(parsing.expr)
#     .self$.assertNotNa(parsing.expr)
#     .self$.assertNotNull(names(parsing.expr))
# 
#     # Loop on all parsing expressions
#     for (field in names(parsing.expr)) {
# 
#         # Match whole content
#         g <- stringr::str_match(parsed.content, parsing.expr[[field]])
# 
#         # Get positive results
#         results <- g[ ! is.na(g[, 1]), , drop=FALSE]
# 
#         # Any match ?
#         if (nrow(results) > 0)
#             .self$setFieldValue(field, results[, 2])
#     }
# }

## ----eval=FALSE---------------------------------------------------------------
# doParseFieldsStep2=function(parsed.content) {
# 
#     # Remove fields with empty string
#     for (f in .self$getFieldNames()) {
#         v <- .self$getFieldValue(f)
#         if (is.character(v) && ! is.na(v) && v == '')
#             .self$removeField(f)
#     }
# 
#     # Correct InChIKey
#     if (.self$hasField('INCHIKEY')) {
#         v <- sub('^InChIKey=', '', .self$getFieldValue('INCHIKEY'), perl=TRUE)
#         .self$setFieldValue('INCHIKEY', v)
#     }
# 
#     # Synonyms
#     synonyms <- XML::xpathSApply(parsed.content, "//synonym", XML::xmlValue)
#     if (length(synonyms) > 0)
#         .self$appendFieldValue('name', synonyms)
# }

## -----------------------------------------------------------------------------
sessionInfo()