## ---- include = FALSE--------------------------------------------------------- # Prevent certificate issues for GitHub actions options(gemma.SSL = FALSE,gemma.memoised = TRUE) # options(gemma.API = "https://dev.gemma.msl.ubc.ca/rest/v2/") knitr::opts_chunk$set( comment = "" ) ## ----setup, message = FALSE--------------------------------------------------- library(gemma.R) library(dplyr) library(ggplot2) library(ggrepel) library(SummarizedExperiment) library(pheatmap) library(viridis) ## ---- include = FALSE--------------------------------------------------------- forget_gemma_memoised() # to make sure local tests don't succeed because of history ## ----'install', eval = FALSE-------------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) { # install.packages("BiocManager") # } # # BiocManager::install("gemma.R") ## ----------------------------------------------------------------------------- get_taxon_datasets(taxon = 'human') %>% # all human datasets select(experiment.ShortName, experiment.Name,taxon.Name) %>% head search_datasets('bipolar',taxon = 'human') %>% # human datasets mentioning bipolar select(experiment.ShortName, experiment.Name,taxon.Name) %>% head search_datasets('http://purl.obolibrary.org/obo/DOID_3312', # ontology term URI for the bipolar disorder taxon = 'human') %>% select(experiment.ShortName, experiment.Name,taxon.Name) %>% head ## ----------------------------------------------------------------------------- # a quick call with a limit of 1 to get the result count result <- get_taxon_datasets(taxon = 'human',limit = 1) print(attributes(result)$totalElements) ## ----eval=FALSE--------------------------------------------------------------- # count = attributes(result)$totalElements # all_results <- lapply(seq(0, count, 100), function(offset){ # get_taxon_datasets(taxon = 'human',limit = 100, offset = offset) # }) %>% do.call(rbind,.) %>% # select(experiment.ShortName, experiment.Name,taxon.Name) %>% # head() ## ----------------------------------------------------------------------------- get_taxon_datasets(taxon = 'human') %>% # get human datasets filter(geeq.batchEffect !=-1 & experiment.SampleCount > 4) %>% # filter for datasets without batch effects and with a sample count more than 4 select(experiment.ShortName, experiment.Name,taxon.Name) %>% head ## ----------------------------------------------------------------------------- search_annotations('bipolar') %>% head ## ----dataset------------------------------------------------------------------ get_datasets_by_ids("GSE46416") %>% select(experiment.ShortName, experiment.Name, experiment.ID, experiment.Description) ## ----load-expression, eval = TRUE--------------------------------------------- dat <- get_dataset_object("GSE46416", type = 'se') # SummarizedExperiment is the default output type ## ----------------------------------------------------------------------------- # Check the levels of the disease factor dat$disease %>% unique() # Subset patients during manic phase and controls manic <- dat[, dat$disease == "bipolar_disorder_|_manic_phase_|" | dat$disease == "reference_subject_role"] manic ## ----boxplot, fig.cap="Sample to sample correlations of bipolar patients during manic phase and controls."---- # Get Expression matrix manicExpr <- assay(manic, "counts") manicExpr %>% cor %>% pheatmap(col =viridis(10),border_color = NA,angle_col = 45,fontsize = 7) ## ----------------------------------------------------------------------------- head(get_platform_annotations('GPL96')) ## ----------------------------------------------------------------------------- head(get_platform_annotations('Generic_human')) ## ----------------------------------------------------------------------------- # lists genes in gemma matching the symbol or identifier get_genes('Eno2') # ncbi id for human ENO2 probes <- get_gene_probes(2026) # remove the description for brevity of output head(probes[,.SD, .SDcols = !colnames(probes) %in% c('mapping.Description','platform.Description')]) ## ----------------------------------------------------------------------------- dif_exp <- get_differential_expression_values('GSE46416') (dif_exp[[1]]) ## ----------------------------------------------------------------------------- (contrasts <- get_dataset_differential_expression_analyses('GSE46416')) ## ----------------------------------------------------------------------------- # using result.ID and contrast.ID of the output above, we can access specific # results. Note that one study may have multiple contrast objects seq_len(nrow(contrasts)) %>% sapply(function(i){ result_set = dif_exp[[as.character(contrasts[i,]$result.ID)]] p_values = result_set[[glue::glue("contrast_{contrasts[i,]$contrast.id}_pvalue")]] # multiple testing correction sum(p.adjust(p_values,method = 'BH') < 0.05) }) -> dif_exp_genes data.frame(result.ID = contrasts$result.ID, contrast.id = contrasts$contrast.id, baseline.factorValue = contrasts$baseline.factorValue, experimental.factorValue = contrasts$experimental.factorValue, n_diff = dif_exp_genes) ## ----diffExpr, fig.cap="Differentially-expressed genes in bipolar patients during manic phase versus controls.", fig.wide=TRUE, warning = FALSE---- (de <- get_differential_expression_values("GSE46416",readableContrasts = TRUE)[[1]]) # Classify probes for plotting de$diffexpr <- "No" de$diffexpr[de$`contrast_bipolar disorder, manic phase_logFoldChange` > 1.0 & de$`contrast_bipolar disorder, manic phase_pvalue` < 0.05] <- "Up" de$diffexpr[de$`contrast_bipolar disorder, manic phase_logFoldChange` < -1.0 & de$`contrast_bipolar disorder, manic phase_pvalue` < 0.05] <- "Down" # Upregulated probes filter(de, diffexpr == "Up") %>% arrange(`contrast_bipolar disorder, manic phase_pvalue`) %>% select(Probe, GeneSymbol, `contrast_bipolar disorder, manic phase_pvalue`, `contrast_bipolar disorder, manic phase_logFoldChange`) %>% head(10) # Downregulated probes filter(de, diffexpr == "Down") %>% arrange(`contrast_bipolar disorder, manic phase_pvalue`) %>% select(Probe, GeneSymbol, `contrast_bipolar disorder, manic phase_pvalue`, `contrast_bipolar disorder, manic phase_logFoldChange`) %>% head(10) # Add gene symbols as labels to DE genes de$delabel <- "" de$delabel[de$diffexpr != "No"] <- de$GeneSymbol[de$diffexpr != "No"] # Volcano plot for bipolar patients vs controls ggplot( data = de, aes( x = `contrast_bipolar disorder, manic phase_logFoldChange`, y = -log10(`contrast_bipolar disorder, manic phase_pvalue`), color = diffexpr, label = delabel ) ) + geom_point() + geom_hline(yintercept = -log10(0.05), col = "gray45", linetype = "dashed") + geom_vline(xintercept = c(-1.0, 1.0), col = "gray45", linetype = "dashed") + labs(x = "log2(FoldChange)", y = "-log10(p-value)") + scale_color_manual(values = c("blue", "black", "red")) + geom_text_repel(show.legend = FALSE) + theme_minimal() ## ----------------------------------------------------------------------------- platform_count = attributes(get_platforms_by_ids(limit = 1))$totalElements print(platform_count) ## ----------------------------------------------------------------------------- lapply(seq(0,platform_count,100), function(offset){ get_platforms_by_ids(limit = 100, offset = offset) %>% select(platform.ID, platform.ShortName, taxon.Name) }) %>% do.call(rbind,.) ## ----error, error = TRUE------------------------------------------------------ get_dataset_annotations(c("GSE35974", "GSE46416")) ## ----loop--------------------------------------------------------------------- lapply(c("GSE35974", "GSE12649"), function(dataset) { get_dataset_annotations(dataset) %>% mutate(experiment.shortName = dataset) %>% select(experiment.shortName, class.Name, term.Name) }) %>% do.call(rbind,.) ## ----------------------------------------------------------------------------- get_gene_locations("DYRK1A") get_gene_locations("DYRK1A", raw = TRUE) ## ----defaults, eval = FALSE--------------------------------------------------- # options(gemma.memoised = TRUE) # always refer to cache # options(gemma.overwrite = TRUE) # always overwrite when saving files # options(gemma.raw = TRUE) # always receive results as-is from Gemma ## ----include = FALSE---------------------------------------------------------- options(gemma.memoised = FALSE) ## ----------------------------------------------------------------------------- sessionInfo()