## Warning: replacing previous import 'utils::findMatches' by
## 'S4Vectors::findMatches' when loading 'AnnotationDbi'The epigenomics road map describes locations of epigenetic marks in DNA from a variety of cell types. Of interest are locations of histone modifications, sites of DNA methylation, and regions of accessible chromatin.
This package presents a selection of elements of the road map including metadata and outputs of the ChromImpute procedure applied to ENCODE cell lines by Ernst and Kellis.
I have retrieved a Google Docs spreadsheet with comprehensive information. The mapmeta() function provides access to a local DataFrame image of the file as retrieved in mid April 2015. We provide a dynamic view of a selection of columns. Use the search box to filter records shown, for example .
## NOTE: input data had non-ASCII characters replaced by ' '.The chromatin states and standard colorings
used are enumerated in states_25:
The emission parameters of the 25 state model are depicted in the supplementary Figure 33 of Ernst and Kellis:
I have retrieved a modest number of roadmap bed files with ChromImpute
mnemonic labeling of chromatin by states. These can be
managed with an ErmaSet instance,
a trivial extension of GenomicFiles class.
The cellTypes method yields a character vector. The colData
component has full metadata on the cell lines available.
## NOTE: input data had non-ASCII characters replaced by ' '.## ErmaSet object with 0 ranges and 31 files: 
## files: E002_25_imputed12marks_mnemonics.bed.gz, E003_25_imputed12marks_mnemonics.bed.gz, ..., E088_25_imputed12marks_mnemonics.bed.gz, E096_25_imputed12marks_mnemonics.bed.gz 
## detail: use files(), rowRanges(), colData(), ... 
## cellTypes() for type names; data(short_celltype) for abbr.## [1] "ES-WA7 Cells"                         
## [2] "H1 Cells"                             
## [3] "iPS DF 6.9 Cells"                     
## [4] "Primary B cells from peripheral blood"
## [5] "Primary T cells from cord blood"We form a GRanges representing 50kb upstream of IL33.
## 'select()' returned 1:many mapping between keys and columns## GRanges object with 1 range and 0 metadata columns:
##       seqnames          ranges strand
##          <Rle>       <IRanges>  <Rle>
##   [1]     chr9 6165786-6215785      +
##   -------
##   seqinfo: 1 sequence from hg19 genomeBind this to the ErmaSet instance.
## ErmaSet object with 1 ranges and 31 files: 
## files: E002_25_imputed12marks_mnemonics.bed.gz, E003_25_imputed12marks_mnemonics.bed.gz, ..., E088_25_imputed12marks_mnemonics.bed.gz, E096_25_imputed12marks_mnemonics.bed.gz 
## detail: use files(), rowRanges(), colData(), ... 
## cellTypes() for type names; data(short_celltype) for abbr.Now query the files for cell-specific states in this interval.
library(BiocParallel)
register(MulticoreParam(workers=2))
suppressWarnings({
csstates = lapply(reduceByFile(ermaset, MAP=function(range, file) {
  imp = import(file, which=range, genome=genome(range)[1])
  seqlevels(imp) = seqlevels(range)
  imp$rgb = erma:::rgbByState(imp$name)
  imp
}), "[[", 1) 
})
tys = cellTypes(ermaset)  # need to label with cell types
csstates = lapply(1:length(csstates), function(x) {
   csstates[[x]]$celltype = tys[x]
   csstates[[x]]
   })
csstates[1:2]## [[1]]
## GRanges object with 15 ranges and 3 metadata columns:
##        seqnames          ranges strand |        name         rgb     celltype
##           <Rle>       <IRanges>  <Rle> | <character> <character>  <character>
##    [1]     chr9 6161801-6166600      * |    25_Quies     #FEFEFE ES-WA7 Cells
##    [2]     chr9 6166601-6166800      * |    17_EnhW2     #FEFE00 ES-WA7 Cells
##    [3]     chr9 6166801-6171200      * |    25_Quies     #FEFEFE ES-WA7 Cells
##    [4]     chr9 6171201-6171800      * |    17_EnhW2     #FEFE00 ES-WA7 Cells
##    [5]     chr9 6171801-6172000      * |    16_EnhW1     #FEFE00 ES-WA7 Cells
##    ...      ...             ...    ... .         ...         ...          ...
##   [11]     chr9 6183401-6197400      * |    25_Quies     #FEFEFE ES-WA7 Cells
##   [12]     chr9 6197401-6197600      * |    19_DNase     #FEFE66 ES-WA7 Cells
##   [13]     chr9 6197601-6208800      * |    25_Quies     #FEFEFE ES-WA7 Cells
##   [14]     chr9 6208801-6211000      * |      21_Het     #8990CF ES-WA7 Cells
##   [15]     chr9 6211001-6217800      * |    25_Quies     #FEFEFE ES-WA7 Cells
##   -------
##   seqinfo: 1 sequence from hg19 genome
## 
## [[2]]
## GRanges object with 14 ranges and 3 metadata columns:
##        seqnames          ranges strand |        name         rgb    celltype
##           <Rle>       <IRanges>  <Rle> | <character> <character> <character>
##    [1]     chr9 6161801-6166600      * |    25_Quies     #FEFEFE    H1 Cells
##    [2]     chr9 6166601-6166800      * |    17_EnhW2     #FEFE00    H1 Cells
##    [3]     chr9 6166801-6171200      * |    25_Quies     #FEFEFE    H1 Cells
##    [4]     chr9 6171201-6173000      * |    17_EnhW2     #FEFE00    H1 Cells
##    [5]     chr9 6173001-6175400      * |      21_Het     #8990CF    H1 Cells
##    ...      ...             ...    ... .         ...         ...         ...
##   [10]     chr9 6183401-6197400      * |    25_Quies     #FEFEFE    H1 Cells
##   [11]     chr9 6197401-6197600      * |    19_DNase     #FEFE66    H1 Cells
##   [12]     chr9 6197601-6209000      * |    25_Quies     #FEFEFE    H1 Cells
##   [13]     chr9 6209001-6211000      * |      21_Het     #8990CF    H1 Cells
##   [14]     chr9 6211001-6218200      * |    25_Quies     #FEFEFE    H1 Cells
##   -------
##   seqinfo: 1 sequence from hg19 genomeThis sort of code underlies
the csProfile utility to visualize variation in state assignments
in promoter regions for various genes.
## 'select()' returned 1:many mapping between keys and columns## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.Set useShiny to TRUE to permit interactive selection of
region to visualize.