### R code from vignette source 'OmnipathR.Rnw'
### Encoding: UTF-8

###################################################
### code chunk number 1: style-Sweave
###################################################
    BiocStyle::latex()


###################################################
### code chunk number 2: installation (eval = FALSE)
###################################################
## if (!requireNamespace("BiocManager", quietly = TRUE))
##     install.packages("BiocManager")
## 
## BiocManager::install("OmnipathR")


###################################################
### code chunk number 3: OmnipathR.Rnw:193-208
###################################################
library(OmnipathR)
library(tidyr)
library(dnet)
library(gprofiler2)

## We check some of the different interaction databases
head(get_interaction_databases(),10)

## The interactions are stored into a data frame.
interactions <- 
    import_Omnipath_Interactions(filter_databases=c("SignaLink3","PhosphoSite", 
    "Signor"))

## We visualize the first interactions in the data frame.
print_interactions(head(interactions))


###################################################
### code chunk number 4: OmnipathR.Rnw:231-242
###################################################
## We transform the interactions data frame into a graph
OPI_g <- interaction_graph(interactions = interactions)

## Find and print shortest paths on the directed network between proteins 
## of interest:
printPath_es(shortest_paths(OPI_g,from = "TYRO3",to = "STAT3", 
    output = 'epath')$epath[[1]],OPI_g)

## Find and print all shortest paths between proteins of interest:
printPath_vs(all_shortest_paths(OPI_g,from = "DYRK2",
    to = "MAPKAPK2")$res,OPI_g)


###################################################
### code chunk number 5: OmnipathR.Rnw:255-267
###################################################
## We apply a clustering algorithm (Louvain) to group proteins in 
## our network. We apply here Louvain which is fast but can only run 
## on undirected graphs. Other clustering algorithms can deal with 
## directed networks but with longer computational times, 
## such as cluster_edge_betweenness. These cluster methods are directly
## available in the igraph package.
OPI_g_undirected <- as.undirected(OPI_g, mode=c("mutual"))
cl_results <- cluster_louvain(OPI_g_undirected)
## We extract the cluster where a protein of interest is contained
cluster_id <- cl_results$membership[which(cl_results$names == "CD22")]
module_graph <- induced_subgraph(OPI_g_undirected, 
    V(OPI_g)$name[which(cl_results$membership == cluster_id)])


###################################################
### code chunk number 6: fig2
###################################################
## We print that cluster with its interactions. 
par(mar=c(0.1,0.1,0.1,0.1))
plot(module_graph, vertex.label.color="black",vertex.frame.color="#ffffff",
    vertex.size= 15, edge.curved=.2,
    vertex.color = ifelse(igraph::V(module_graph)$name == "CD22","yellow",
    "#00CCFF"), edge.color="blue",edge.width=0.8)


###################################################
### code chunk number 7: OmnipathR.Rnw:300-311
###################################################
## We query and store the interactions into a dataframe
interactions <- 
    import_PathwayExtra_Interactions(filter_databases=c("BioGRID","IntAct"),
    select_organism = 10090)

## We select all the interactions in which Amfr gene is involved
interactions_Amfr <- dplyr::filter(interactions, source_genesymbol == "Amfr" | 
    target_genesymbol == "Amfr")

## We print these interactions: 
print_interactions(interactions_Amfr)


###################################################
### code chunk number 8: OmnipathR.Rnw:318-329
###################################################
## We query and store the interactions into a dataframe
interactions <- 
    import_KinaseExtra_Interactions(filter_databases=c("PhosphoPoint",
    "PhosphoSite"), select_organism = 10116)

## We select the interactions in which Dpysl2 gene is a target
interactions_TargetDpysl2 <- dplyr::filter(interactions, 
    target_genesymbol == "Dpysl2")

## We print these interactions: 
print_interactions(interactions_TargetDpysl2)


###################################################
### code chunk number 9: OmnipathR.Rnw:338-354
###################################################
## We query and store the interactions into a dataframe
interactions <- import_LigrecExtra_Interactions(filter_databases=c("HPRD",
    "Guide2Pharma"),select_organism=9606)

## Receptors of the CDH1 ligand.
interactions_CDH1 <- dplyr::filter(interactions, source_genesymbol == "CDH1")

## We transform the interactions data frame into a graph
OPI_g <- interaction_graph(interactions = interactions_CDH1)

## We induce a network with the genes involved in the shortest path and their
## first neighbors to get a more general overview of the interactions 
Induced_Network <-  dNetInduce(g=OPI_g, 
    nodes_query=as.character( V(OPI_g)$name), knn=0,
    remove.loops=FALSE, largest.comp=FALSE)


###################################################
### code chunk number 10: fig3
###################################################
## We print the induced network
par(mar=c(0.1,0.1,0.1,0.1))
plot(Induced_Network, vertex.label.color="black",
    vertex.frame.color="#ffffff",vertex.size= 20, edge.curved=.2,
    vertex.color = 
        ifelse(igraph::V(Induced_Network)$name %in% c("CDH1"),
        "yellow","#00CCFF"), edge.color="blue",edge.width=0.8)


###################################################
### code chunk number 11: OmnipathR.Rnw:384-393
###################################################
## We query and store the interactions into a dataframe
interactions <- import_TFregulons_Interactions(filter_databases=c("DoRothEA_A",
    "ARACNe-GTEx"),select_organism=9606)

## We select the most confident interactions for a given TF and we print 
## the interactions to check the way it regulates its different targets
interactions_A_GLI1  <- dplyr::filter(interactions, tfregulons_level=="A", 
    source_genesymbol == "GLI1")
print_interactions(interactions_A_GLI1)


###################################################
### code chunk number 12: OmnipathR.Rnw:405-418
###################################################
## We query and store the interactions into a dataframe
interactions <- 
  import_miRNAtarget_Interactions(filter_databases=c("miRTarBase","miRecords"))

## We select the interactions where a miRNA is interacting with the TF 
## used in the previous code chunk and we print these interactions.
interactions_miRNA_GLI1 <- 
    dplyr::filter(interactions,  target_genesymbol == "GLI1")
print_interactions(interactions_miRNA_GLI1)

## We transform the previous selections to graphs (igraph objects)
OPI_g_1 <-interaction_graph(interactions = interactions_A_GLI1)
OPI_g_2 <-interaction_graph(interactions = interactions_miRNA_GLI1)


###################################################
### code chunk number 13: fig4
###################################################
## We print the union of both previous graphs
par(mar=c(0.1,0.1,0.1,0.1))
plot(OPI_g_1 %u% OPI_g_2, vertex.label.color="black",
    vertex.frame.color="#ffffff",vertex.size= 20, edge.curved=.25,
    vertex.color = ifelse(grepl("miR",igraph::V(OPI_g_1 %u% OPI_g_2)$name),
    "red",ifelse(igraph::V(OPI_g_1 %u% OPI_g_2)$name == "GLI1",
    "yellow","#00CCFF")), edge.color="blue",
    vertex.shape = ifelse(grepl("miR",igraph::V(OPI_g_1 %u% OPI_g_2)$name),
    "vrectangle","circle"),edge.width=0.8)


###################################################
### code chunk number 14: OmnipathR.Rnw:456-485
###################################################
## We check the different PTMs databases
get_ptms_databases()

## We query and store the ptms into a dataframe. No filtering by
## databases in this case.
ptms <- import_Omnipath_PTMS()

## We can select and print the reactions between a specific kinase and
## a specific substrate  
print_interactions(dplyr::filter(ptms,enzyme_genesymbol=="MAP2K1",
    substrate_genesymbol=="MAPK3"))

## In the previous results, we can see that ptms does not contain sign 
## (activation/inhibition). We can generate this information based on the
## protein-protein Omnipath interaction dataset. 
interactions <- import_Omnipath_Interactions()
ptms <- get_signed_ptms(ptms,interactions)

## We select again the same kinase and substrate. Now we have information 
## about inhibition or activation when we print the ptms
print_interactions(dplyr::filter(ptms,enzyme_genesymbol=="MAP2K1",
    substrate_genesymbol=="MAPK3")) 

## We can also transform the ptms into a graph.
ptms_g <- ptms_graph(ptms = ptms)

## We download PTMs for mouse 
ptms <- import_Omnipath_PTMS(filter_databases=c("PhosphoSite", "Signor"),
    select_organism=10090)


###################################################
### code chunk number 15: OmnipathR.Rnw:507-529
###################################################
## We check the different complexes databases
get_complexes_databases()

## We query and store complexes from some sources into a dataframe. 
complexes <- import_Omnipath_complexes(filter_databases=c("CORUM", "hu.MAP"))

## We check all the molecular complexes where a set of genes participate
query_genes <- c("WRN","PARP1") 

## Complexes where any of the input genes participate
complexes_query_genes_any <- unique(get_complex_genes(complexes,query_genes,
    total_match=FALSE))

## We print the components of the different selected components 
head(complexes_query_genes_any$components_genesymbols,6)

## Complexes where all the input genes participate jointly
complexes_query_genes_join <- unique(get_complex_genes(complexes,query_genes,
    total_match=TRUE))

## We print the components of the different selected components 
complexes_query_genes_join$components_genesymbols


###################################################
### code chunk number 16: OmnipathR.Rnw:532-544
###################################################
genes_complex <- 
  unlist(strsplit(complexes_query_genes_join$components_genesymbols, "_"))

## We can perform an enrichment analyses with the genes in the complex
EnrichmentResults <- gost(genes_complex, significant = TRUE, 
    user_threshold = 0.001, correction_method = c("fdr"),
    sources=c("GO:BP","GO:CC","GO:MF"))

## We show the most significant results
EnrichmentResults$result %>% 
  dplyr::select(term_id, source, term_name,p_value) %>%
  dplyr::top_n(5,-p_value) 


###################################################
### code chunk number 17: OmnipathR.Rnw:568-579
###################################################
## We check the different annotation databases
get_annotation_databases()

## We can further investigate the features of the complex selected 
## in the previous section.

## We first get the annotations of the complex itself:
annotations <-import_Omnipath_annotations(select_genes=paste0("COMPLEX:",
  complexes_query_genes_join$components_genesymbols))

head(dplyr::select(annotations,source,label,value),10)


###################################################
### code chunk number 18: OmnipathR.Rnw:587-604
###################################################
## Then, we explore some annotations of its individual components

## Pathways where the proteins belong:
annotations <- import_Omnipath_annotations(select_genes=genes_complex,
    filter_databases=c("NetPath"))

dplyr::select(annotations,genesymbol,value)

## Cellular localization of our proteins 
annotations <-import_Omnipath_annotations(select_genes=genes_complex,
   filter_databases=c("ComPPI"))

## Since we have same record_id for some results of our query, we spread 
## these records across columns
spread(annotations, label,value) %>% 
    dplyr::arrange(desc(score)) %>%
    dplyr::top_n(10, score)


###################################################
### code chunk number 19: OmnipathR.Rnw:625-646
###################################################
## We check some of the different intercell categories
head(get_intercell_categories(),10)

## We import the intercell data into a dataframe
intercell <- import_Omnipath_intercell()

## We check the intercell annotations for our previous complex itself
dplyr::filter(intercell,
    genesymbol == complexes_query_genes_join$components_genesymbols,
    mainclass != "") %>%
    dplyr::select(category,genesymbol, mainclass)

## We check the intercell annotations for the individual components of 
## our previous complex. We filter our data to print it in a good format
dplyr::filter(intercell,genesymbol %in% genes_complex, mainclass!="") %>% 
    dplyr::distinct(genesymbol,mainclass, .keep_all = TRUE) %>%
    dplyr::select(category, genesymbol, mainclass) %>%
    dplyr::arrange(genesymbol)
  
## We close graphical connections
while (!is.null(dev.list()))  dev.off()


###################################################
### code chunk number 20: sessionInfo
###################################################
  toLatex(sessionInfo())