## ----knitrSetup, include=FALSE---------------------------------------------
library(knitr)
opts_chunk$set(out.extra='style="display:block; margin: auto"', fig.align="center", tidy=TRUE)
## ----style, include=FALSE, echo=FALSE, results='asis'----------------------
BiocStyle::markdown()
## ----installPaxtoolsr, eval=FALSE------------------------------------------
# if (!requireNamespace("BiocManager", quietly=TRUE))
# install.packages("BiocManager")
# BiocManager::install("paxtoolsr")
## ----loadLibrary, message=FALSE, warning=FALSE-----------------------------
library(paxtoolsr)
## ----searchHelp, eval=FALSE, tidy=FALSE------------------------------------
# help.search("paxtoolsr")
## ----showHelp, eval=FALSE, tidy=FALSE--------------------------------------
# help(graphPc)
# ?graphPc
## ----paxtoolsMergeExample--------------------------------------------------
file1 <- system.file("extdata", "raf_map_kinase_cascade_reactome.owl", package="paxtoolsr")
file2 <- system.file("extdata", "biopax3-short-metabolic-pathway.owl", package="paxtoolsr")
mergedFile <- mergeBiopax(file1, file2)
## ----summarize, results='hold'---------------------------------------------
s1 <- summarize(file1)
s2 <- summarize(file2)
s3 <- summarize(mergedFile)
s1$Catalysis
s2$Catalysis
s3$Catalysis
## ----paxtoolsValidationExample, eval=FALSE---------------------------------
# errorLog <- validate(system.file("extdata", "raf_map_kinase_cascade_reactome.owl", package="paxtoolsr"), onlyErrors=TRUE)
## ----loadSifFromFile-------------------------------------------------------
sif <- toSif(system.file("extdata", "biopax3-short-metabolic-pathway.owl", package="paxtoolsr"))
## ----viewSifHead-----------------------------------------------------------
head(sif)
## ----toSifnxExample, tidy=TRUE---------------------------------------------
# Select additional node and edge properties
inputFile <- system.file("extdata", "raf_map_kinase_cascade_reactome.owl", package="paxtoolsr")
results <- toSifnx(inputFile=inputFile)
## ----downloadSif, eval=FALSE-----------------------------------------------
# results <- downloadPc2()
## ----paxtoolsrCache, eval=FALSE--------------------------------------------
# Sys.getenv("PAXTOOLSR_CACHE")
## ----searchResultsExample, eval=FALSE--------------------------------------
# ## Search Pathway Commons for "glycolysis"-related pathways
# searchResults <- searchPc(q="glycolysis", type="pathway")
## ----searchResultsExampleVerbose-------------------------------------------
## Search Pathway Commons for "glycolysis"-related pathways
searchResults <- searchPc(q="glycolysis", type="pathway", verbose=TRUE)
## ----searchResultsType-----------------------------------------------------
str(searchResults)
## ----searchResultsXpathExample---------------------------------------------
xpathSApply(searchResults, "/searchResponse/searchHit/name", xmlValue)[1]
xpathSApply(searchResults, "/searchResponse/searchHit/pathway", xmlValue)[1]
## ----convertXmlToDf, message=FALSE-----------------------------------------
library(plyr)
searchResultsDf <- ldply(xmlToList(searchResults), data.frame)
# Simplified results
simplifiedSearchResultsDf <- searchResultsDf[, c("name", "uri", "biopaxClass")]
head(simplifiedSearchResultsDf)
## ----saveBiopaxFileFromPcQuery, message=FALSE, results='hide'--------------
## Use an XPath expression to extract the results of interest. In this case, the URIs (IDs) for the pathways from the results
tmpSearchResults <- xpathSApply(searchResults, "/searchResponse/searchHit/uri", xmlValue)
## Generate temporary file to save content into
biopaxFile <- tempfile()
## Extract a URI for a pathway in the search results and save into a file
idx <- which(grepl("panther", simplifiedSearchResultsDf$uri) &
grepl("glycolysis", simplifiedSearchResultsDf$name, ignore.case=TRUE))
uri <- simplifiedSearchResultsDf$uri[idx]
saveXML(getPc(uri, format="BIOPAX"), biopaxFile)
## ----traverse--------------------------------------------------------------
# Convert the Uniprot ID to a URI that would be found in Pathway Commons
uri <- paste0("http://identifiers.org/uniprot/P31749")
# Get URIs for only the ModificationFeatures of the protein
xml <- traverse(uri=uri, path="ProteinReference/entityFeature:ModificationFeature")
# Extract all the URIs
uris <- xpathSApply(xml, "//value/text()", xmlValue)
# For the first URI get the modification position and type
tmpXml <- traverse(uri=uris[1], path="ModificationFeature/featureLocation:SequenceSite/sequencePosition")
cat("Modification Position: ", xpathSApply(tmpXml, "//value/text()", xmlValue))
tmpXml <- traverse(uri=uris[1], path="ModificationFeature/modificationType/term")
cat("Modification Type: ", xpathSApply(tmpXml, "//value/text()", xmlValue))
## ----loadGraphLibraries, message=FALSE-------------------------------------
library(igraph)
## ----plotGraph-------------------------------------------------------------
sif <- toSif(system.file("extdata", "biopax3-short-metabolic-pathway.owl", package="paxtoolsr"))
# graph.edgelist requires a matrix
g <- graph.edgelist(as.matrix(sif[,c(1,3)]), directed=FALSE)
plot(g, layout=layout.fruchterman.reingold)
## ----graphQueryExampleSingle, figure.width=20, figure.height=20------------
gene <- "BDNF"
t1 <- graphPc(source=gene, kind="neighborhood", format="SIF", verbose=TRUE)
t2 <- t1[which(t1[,2] == "controls-state-change-of"),]
# Show only 100 interactions for simplicity
g <- graph.edgelist(as.matrix(t2[1:100, c(1,3)]), directed=FALSE)
plot(g, layout=layout.fruchterman.reingold)
## ----graphQueryExample, fig.width=7, fig.height=7--------------------------
genes <- c("AKT1", "IRS1", "MTOR", "IGF1R")
t1 <- graphPc(source=genes,
kind="PATHSBETWEEN",
format="SIF",
verbose=TRUE)
t2 <- t1[which(t1[,2] == "controls-state-change-of"),]
# Show only 100 interactions for simplicity
g <- graph.edgelist(as.matrix(t2[1:100, c(1,3)]), directed=FALSE)
plot(g, layout=layout.fruchterman.reingold)
## ----dataOverlayExample, fig.width=7, fig.height=7-------------------------
library(RColorBrewer)
# Generate a color palette that goes from white to red that contains 10 colors
numColors <- 10
colors <- colorRampPalette(brewer.pal(9, "Reds"))(numColors)
# Generate values that could represent some experimental values
values <- runif(length(V(g)$name))
# Scale values to generate indicies from the color palette
xrange <- range(values)
newrange <- c(1, numColors)
factor <- (newrange[2]-newrange[1]) / (xrange[2]-xrange[1])
scaledValues <- newrange[1] + (values-xrange[1]) * factor
indicies <- as.integer(scaledValues)
# Color the nodes based using the indicies and the color palette created above
g <- set.vertex.attribute(g, "color", value=colors[indicies])
#get.vertex.attribute(h, "color")
plot(g, layout = layout.fruchterman.reingold)
## ----sifNetStats-----------------------------------------------------------
# Degree for each node in the igraph network
degree(g)
#Number of nodes
length(V(g)$name)
#Clustering coefficient
transitivity(g)
#Network density
graph.density(g)
# Network diameter
diameter(g)
## ----sifShortestPath-------------------------------------------------------
# Get the first shortest path between two nodes
tmp <- get.shortest.paths(g, from="IRS1", to="MTOR")
# igraph seems to return different objects on Linux versus OS X for get.shortest.paths()
if(class(tmp[[1]]) == "list") {
path <- tmp[[1]][[1]] # Linux
} else {
path <- tmp[[1]] # OS X
}
# Convert from indicies to vertex names
V(g)$name[path]
## ----gseaExampleLibraries, message=FALSE-----------------------------------
library(paxtoolsr) # To retrieve data from Pathway Commons
library(limma) # Contains geneSetTest
library(affy) # To load microarray data
library(hgu95av2) # Annotation packages for the hgu95av2 platform
library(hgu95av2cdf)
library(XML) # To parse XML files
## ----gseaExampleGenGeneSet, results='hide', eval=FALSE---------------------
# # Generate a Gene Set
# ## Search Pathway Commons for "glycolysis"-related pathways
# searchResults <- searchPc(q="glycolysis", type="pathway")
#
# ## Use an XPath expression to extract the results of interest. In this case, the URIs (IDs) for the pathways from the results
# searchResults <- xpathSApply(searchResults, "/searchResponse/searchHit/uri", xmlValue)
#
# ## Generate temporary files to save content into
# biopaxFile <- tempfile()
#
# ## Extract the URI for the first pathway in the search results and save into a file
# uri <- searchResults[2]
# saveXML(getPc(uri, "BIOPAX"), biopaxFile)
## ----gseaExampleGenGeneSetSave, results='hide'-----------------------------
## Generate temporary files to save content into
gseaFile <- tempfile()
## Generate a gene set for the BioPAX pathway with gene symbols
### NOTE: Not all search results are guaranteed to result in gene sets
tmp <- toGSEA(biopaxFile, gseaFile, "HGNC Symbol", FALSE)
geneSet <- tmp$geneSet
## ----gseaExampleMicroarrayAnalyis------------------------------------------
# Process Microarray Data
## Load/process the estrogen microarray data
estrogenDataDir <- system.file("extdata", package="estrogen")
targets <- readTargets(file.path(estrogenDataDir, "estrogen.txt"), sep="")
abatch <- ReadAffy(filenames=file.path(estrogenDataDir, targets$filename))
eset <- rma(abatch)
f <- paste(targets$estrogen,targets$time.h,sep="")
f <- factor(f)
design <- model.matrix(~0+f)
colnames(design) <- levels(f)
fit <- lmFit(eset, design)
cont.matrix <- makeContrasts(E10="present10-absent10",E48="present48-absent48",Time="absent48-absent10",levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
## Map the gene symbols to the probe IDs
symbol <- unlist(as.list(hgu95av2SYMBOL))
### Check that the gene symbols are on the microarray platform
genesOnChip <- match(geneSet,symbol)
genesOnChip # CHECK FOR ERROR HERE
genesOnChip <- genesOnChip[!is.na(genesOnChip)]
### Grab the probe IDs for the genes present
genesOnChip <- names(symbol[genesOnChip])
genesOnChip <- match(genesOnChip, rownames(fit2$t))
genesOnChip <- genesOnChip[!is.na(genesOnChip)]
## Run the Gene Set Test from the limma Package
geneSetTest(genesOnChip,fit2$t[,1],"two.sided")
## ----idMapping, eval=FALSE-------------------------------------------------
# sif <- toSif(system.file("extdata", "raf_map_kinase_cascade_reactome.owl", package="paxtoolsr"))
#
# # Generate a mapping between the HGNC symbols in the SIF to the Uniprot IDs
# library(biomaRt)
# ensembl <- useMart("ensembl")
# ensembl <- useDataset("hsapiens_gene_ensembl", mart=ensembl)
#
# hgnc_symbol <- c(sif$PARTICIPANT_A, sif$PARTICIPANT_B)
# output <- getBM(attributes=c('hgnc_symbol', 'uniprot_sptrembl'), filters='hgnc_symbol', values=hgnc_symbol, mart=ensembl)
#
# # Remove blank entries
# output <- output[output[,2] != "",]
## ----filePathExample, eval=FALSE-------------------------------------------
# toSif("/directory/file")
# #or
# toSif("X:\\directory\\file")
## ----changeHeapSize, eval=FALSE--------------------------------------------
# options(java.parameters="-Xmx1024m")
#
# library(paxtoolsr)
#
# # Megabyte size
# mbSize <- 1048576.0
#
# runtime <- .jcall("java/lang/Runtime", "Ljava/lang/Runtime;", "getRuntime")
# maxMemory <- .jcall(runtime, "J", "maxMemory")
# maxMemoryMb <- maxMemory / mbSize
# cat("Max Memory: ", maxMemoryMb, "\n")
## ----sessionInfo-----------------------------------------------------------
sessionInfo()