## ----style, echo=FALSE, results='asis', message=FALSE--------------------
BiocStyle::markdown()

## ----eval = FALSE--------------------------------------------------------
#  install.packages("BiocManager")
#  BiocManager::install("AnnotationHub")

## ------------------------------------------------------------------------
library('AnnotationHub')

# create an AnnotationHub connection
ah <- AnnotationHub()

# search for all EuPathDB resources
meta <- query(ah, "EuPathDB")

length(meta)
head(meta)

# types of EuPathDB data available
table(meta$rdataclass)

# distribution of resources by specific databases
table(meta$dataprovider)

# list of organisms for which resources are available
length(unique(meta$species))
head(unique(meta$species))

## ------------------------------------------------------------------------
res <- query(ah, c('Leishmania major strain Friedlin', 'OrgDb', 'EuPathDB'))
res

## ------------------------------------------------------------------------
orgdb <- res[['AH65089']]
class(orgdb)

## ------------------------------------------------------------------------
# list available fields to retrieve
columns(orgdb)

# create a vector containing all gene ids for the organism
gids <- keys(orgdb, keytype='GID')
head(gids)

# retrieve the chromosome, description, and biotype for each gene
dat <- select(orgdb, keys=gids, keytype='GID', columns=c('CHR', 'TYPE', 'GENEDESCRIPTION'))

head(dat)

table(dat$TYPE)
table(dat$CHR)

# create a gene / GO term mapping 
gene_go_mapping <- select(orgdb, keys=gids, keytype='GID', columns=c('GO_ID', 'GO_TERM_NAME', 'ONTOLOGY'))
head(gene_go_mapping)

# retrieve KEGG, etc. pathway annotations
gene_pathway_mapping <- select(orgdb, keys=gids, keytype='GID', columns=c('PATHWAY', 'PATHWAY_SOURCE'))
table(gene_pathway_mapping$PATHWAY_SOURCE)
head(gene_pathway_mapping)

## ------------------------------------------------------------------------
# query AnnotationHub
res <- query(ah, c('Leishmania major strain Friedlin', 'GRanges', 'EuPathDB'))
res

# retrieve a GRanges instance associated with the result record
gr <- res[['AH65354']]
gr

## ------------------------------------------------------------------------
# chromosome names
seqnames(gr)

# strand information
strand(gr)

# feature widths
width(gr)

## ------------------------------------------------------------------------
# list of location types in the resource
table(gr$type)

## ------------------------------------------------------------------------
# get the first three ranges
gr[1:3]

# get all gene entries on chromosome 4
gr[gr$type == 'gene' & seqnames(gr) == 'LmjF.04']

## ------------------------------------------------------------------------
sessionInfo()