## ----setup, include = FALSE, warning = FALSE---------------------------------- knitr::opts_chunk$set(comment = FALSE, warning = FALSE, message = FALSE) ## ----------------------------------------------------------------------------- require(IgGeneUsage) require(knitr) require(ggplot2) require(ggforce) require(gridExtra) require(ggrepel) require(rstan) require(reshape2) rstan_options(auto_write = TRUE) ## ----------------------------------------------------------------------------- data("IGHV_HCV", package = "IgGeneUsage") ## ----------------------------------------------------------------------------- kable(x = head(IGHV_HCV), row.names = FALSE) ## ---- fig.height = 5, fig.width = 8------------------------------------------- # we can compute the total number of rearrangements per sample total.usage <- aggregate(gene_usage_count~sample_id+condition, FUN = sum, data = IGHV_HCV) total.usage$total <- total.usage$gene_usage_count total.usage$gene_usage_count <- NULL # merge it with the original data viz <- merge(x = IGHV_HCV, y = total.usage, by = c("sample_id", "condition"), all.x = TRUE) # compute % viz$gene_usage_pct <- viz$gene_usage_count/viz$total*100 # For this example lets only consider the top 45 genes # Hint: In real analyses you should not filter any gene top <- aggregate(gene_usage_pct~gene_name, data = viz, FUN = mean) top <- top[order(top$gene_usage_pct, decreasing = TRUE), ] IGHV_HCV <- IGHV_HCV[IGHV_HCV$gene_name %in% top$gene_name[seq_len(45)], ] viz <- viz[viz$gene_name %in% top$gene_name[seq_len(45)], ] # visualize ggplot(data = viz)+ geom_point(aes(x = gene_name, y = gene_usage_pct, fill = condition, shape = condition), position = position_dodge(width = .7), stroke = 0)+ theme_bw(base_size = 11)+ ylab(label = "Usage [%]")+ xlab(label = '')+ theme(legend.position = "top")+ theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4))+ scale_fill_manual(name = "Condition", values = c("orange", "#6666ff"))+ scale_shape_manual(name = "Condition", values = c(21, 22)) ## ----------------------------------------------------------------------------- M <- DGU(usage.data = IGHV_HCV, # input data mcmc.warmup = 500, # how many MCMC warm-ups per chain (default: 500) mcmc.steps = 1500, # how many MCMC steps per chain (default: 1,500) mcmc.chains = 2, # how many MCMC chain to run (default: 4) mcmc.cores = 2, # how many PC cores to use? (e.g. parallel chains) hdi.level = 0.95, # highest density interval level (de fault: 0.95) adapt.delta = 0.95, # MCMC target acceptance rate (default: 0.95) max.treedepth = 10) # tree depth evaluated at each step (default: 12) ## ----------------------------------------------------------------------------- summary(M) ## ----------------------------------------------------------------------------- rstan::check_hmc_diagnostics(M$glm) ## ---- fig.height = 3, fig.width = 6------------------------------------------- gridExtra::grid.arrange(rstan::stan_rhat(object = M$glm), rstan::stan_ess(object = M$glm), nrow = 1) ## ---- fig.height = 10, fig.width = 8------------------------------------------ ggplot(data = M$ppc.data$ppc.repertoire)+ facet_wrap(facets = ~sample_name, ncol = 5)+ geom_abline(intercept = 0, slope = 1, linetype = "dashed", col = "darkgray")+ geom_errorbar(aes(x = observed_count, y = ppc_mean_count, ymin = ppc_L_count, ymax = ppc_H_count), col = "darkgray")+ geom_point(aes(x = observed_count, y = ppc_mean_count, fill = condition), shape = 21, size = 1)+ theme_bw(base_size = 11)+ theme(legend.position = "top")+ scale_x_log10(breaks = c(1, 10, 100, 1000), labels = expression(10^0, 10^1, 10^2, 10^3))+ scale_y_log10(breaks = c(1, 10, 100, 1000), labels = expression(10^0, 10^1, 10^2, 10^3))+ xlab(label = "Observed usage [counts]")+ ylab(label = "Predicted usage [counts]")+ annotation_logticks(base = 10, sides = "lb") ## ---- fig.height = 4, fig.width = 6------------------------------------------- ggplot(data = M$ppc.data$ppc.gene)+ geom_abline(intercept = 0, slope = 1, linetype = "dashed", col = "darkgray")+ geom_errorbar(aes(x = observed_prop*100, ymin = ppc_L_prop*100, ymax = ppc_H_prop*100), col = "darkgray")+ geom_point(aes(x = observed_prop*100, y = ppc_mean_prop*100, fill = condition), shape = 21, size = 1)+ theme_bw(base_size = 11)+ theme(legend.position = "top")+ xlab(label = "Observed usage [%]")+ ylab(label = "Predicted usage [%]")+ scale_x_log10()+ scale_y_log10()+ annotation_logticks(base = 10, sides = "lb") ## ----------------------------------------------------------------------------- kable(x = head(M$glm.summary), row.names = FALSE, digits = 3) ## ---- fig.height = 4, fig.width = 6------------------------------------------- # format data stats <- M$glm.summary stats <- stats[order(abs(stats$es_mean), decreasing = FALSE), ] stats$gene_fac <- factor(x = stats$gene_name, levels = stats$gene_name) stats <- merge(x = stats, y = M$test.summary, by = "gene_name") ggplot(data = stats)+ geom_hline(yintercept = 0, linetype = "dashed", col = "gray")+ geom_errorbar(aes(x = pmax, y = es_mean, ymin = es_L, ymax = es_H), col = "darkgray")+ geom_point(aes(x = pmax, y = es_mean))+ geom_text_repel(data = stats[stats$pmax >= 0.9, ], aes(x = pmax, y = es_mean, label = gene_fac))+ theme_bw(base_size = 11)+ xlab(label = expression(pi)) ## ---- fig.height = 3, fig.width = 6------------------------------------------- promising.genes <- stats$gene_name[stats$pmax >= 0.9] ppc.gene <- M$ppc.data$ppc.gene ppc.gene <- ppc.gene[ppc.gene$gene_name %in% promising.genes, ] ggplot()+ geom_errorbar(data = ppc.gene, aes(x = gene_name, ymin = ppc_L_prop*100, ymax = ppc_H_prop*100, col = condition), position = position_dodge(width = .8), width = 0.75)+ geom_point(data = viz[viz$gene_name %in% promising.genes, ], aes(x = gene_name, y = gene_usage_pct, col = condition), shape = 21, size = 1.5, fill = "black", position = position_jitterdodge(jitter.width = 0.15, jitter.height = 0, dodge.width = 0.8))+ theme_bw(base_size = 11)+ theme(legend.position = "top")+ ylab(label = "Usage [%]")+ xlab(label = '') ## ---- fig.height = 8, fig.width = 6------------------------------------------- promising.genes <- stats$gene_name[stats$pmax >= 0.9] ggplot(data = viz[viz$gene_name %in% promising.genes, ])+ facet_wrap(facets = ~gene_name, ncol = 1, scales = "free_y")+ geom_point(aes(x = sample_id, y = gene_usage_count, fill = condition, size = total/10^3), shape = 21)+ theme_bw(base_size = 11)+ ylab(label = "Usage [count]")+ xlab(label = '')+ theme(legend.position = "top")+ theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4))+ scale_fill_manual(name = "Condition", values = c("orange", "#6666ff"))+ scale_size_continuous(name = expression("N ("*10^3*")"), range = c(1, 5))+ ylab(label = "Usage [count]")+ xlab(label = '')+ scale_y_log10()+ annotation_logticks(base = 10, sides = "l") ## ---- fig.height = 5, fig.width = 8------------------------------------------- ggplot()+ geom_hline(yintercept = c(-log10(0.05), -log10(0.01)), linetype = "dashed", col = "darkgray")+ geom_point(data = stats, col = "red", size = 2, aes(x = pmax, y = -log10(t.test.fdr.pvalue)))+ geom_text_repel(data = stats[stats$pmax >= 0.5, ], aes(x = pmax, y = -log10(t.test.fdr.pvalue), label = gene_name), size = 4, min.segment.length = 0.1)+ xlim(0, 1)+ ylab(label = "-log10 (P-value) from t-test [FDR corrected]")+ xlab(label = expression(pi))+ theme_bw(base_size = 11)+ theme(legend.position = "top")+ scale_color_discrete(name = '') ## ---- fig.height = 3, fig.width = 6------------------------------------------- promising.genes <- c("IGHV1-58", "IGHV3-72") ppc.gene <- M$ppc.data$ppc.gene ppc.gene <- ppc.gene[ppc.gene$gene_name %in% promising.genes, ] ggplot()+ geom_errorbar(data = ppc.gene, aes(x = gene_name, ymin = ppc_L_prop*100, ymax = ppc_H_prop*100, col = condition), position = position_dodge(width = .8), width = 0.75)+ geom_point(data = viz[viz$gene_name %in% promising.genes, ], aes(x = gene_name, y = gene_usage_pct, col = condition), shape = 21, size = 1.5, fill = "black", position = position_jitterdodge(jitter.width = 0.15, jitter.height = 0, dodge.width = 0.8))+ theme_bw(base_size = 11)+ theme(legend.position = "top")+ ylab(label = "Gene usage [%]")+ xlab(label = '') ## ---- fig.height = 5, fig.width = 6------------------------------------------- promising.genes <- c("IGHV1-58", "IGHV3-72") ggplot(data = viz[viz$gene_name %in% promising.genes, ])+ facet_wrap(facets = ~gene_name, ncol = 1, scales = "free_y")+ geom_point(aes(x = sample_id, y = gene_usage_count, fill = condition, size = total/10^3), shape = 21)+ theme_bw(base_size = 11)+ ylab(label = "Usage [count]")+ xlab(label = '')+ theme(legend.position = "top")+ theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4))+ scale_fill_manual(name = "Condition", values = c("orange", "#6666ff"))+ scale_size_continuous(name = expression("N ("*10^3*")"), range = c(1, 5))+ ylab(label = "Usage [count]")+ xlab(label = '')+ scale_y_log10()+ annotation_logticks(base = 10, sides = "l") ## ---- fig.height = 6, fig.width = 8------------------------------------------- ggplot()+ geom_hline(yintercept = c(-log10(0.05), -log10(0.01)), linetype = "dashed", col = "darkgray")+ geom_point(data = stats, col = "red", size = 2, aes(x = pmax, y = -log10(u.test.fdr.pvalue)))+ geom_text_repel(data = stats[stats$pmax >= 0.5, ], aes(x = pmax, y = -log10(u.test.fdr.pvalue), label = gene_name), size = 4, min.segment.length = 0.1)+ xlim(0, 1)+ ylab(label = "-log10 (P-value) from U-test [FDR corrected]")+ xlab(label = expression(pi))+ theme_bw(base_size = 11)+ theme(legend.position = "top")+ scale_color_discrete(name = '') ## ----------------------------------------------------------------------------- data(Ig, package = "IgGeneUsage") ## ----------------------------------------------------------------------------- kable(x = head(Ig), row.names = FALSE) ## ---- fig.height = 4, fig.width = 6------------------------------------------- # we can compute the total number of rearrangements per sample total.usage <- aggregate(gene_usage_count~sample_id+condition, FUN = sum, data = Ig) total.usage$total <- total.usage$gene_usage_count total.usage$gene_usage_count <- NULL # merge it with the original data viz <- merge(x = Ig, y = total.usage, by = c("sample_id", "condition"), all.x = TRUE) # compute % viz$gene_usage_pct <- viz$gene_usage_count/viz$total*100 # visualize ggplot(data = viz)+ geom_point(aes(x = gene_name, y = gene_usage_pct, fill = condition, shape = condition), stroke = 0, size = 3, position = position_jitterdodge(jitter.width = 0.25, dodge.width = 0.75))+ theme_bw(base_size = 11)+ ylab(label = "Usage [%]")+ xlab(label = '')+ theme(legend.position = "top")+ scale_fill_manual(name = "Condition", values = c("orange", "#6666ff"))+ scale_shape_manual(name = "Condition", values = c(21, 22)) ## ----------------------------------------------------------------------------- M <- DGU(usage.data = Ig, mcmc.warmup = 500, mcmc.steps = 1500, mcmc.chains = 2, mcmc.cores = 2, hdi.level = 0.95, adapt.delta = 0.95, max.treedepth = 12) ## ----------------------------------------------------------------------------- rstan::check_hmc_diagnostics(M$glm) ## ---- fig.height = 3, fig.width = 6------------------------------------------- gridExtra::grid.arrange(rstan::stan_rhat(object = M$glm), rstan::stan_ess(object = M$glm), nrow = 1) ## ---- fig.height = 5, fig.width = 8------------------------------------------- ggplot(data = M$ppc$ppc.repertoire)+ facet_wrap(facets = ~sample_name, ncol = 4)+ geom_abline(intercept = 0, slope = 1, linetype = "dashed", col = "darkgray")+ geom_errorbar(aes(x = observed_count, y = ppc_mean_count, ymin = ppc_L_count, ymax = ppc_H_count), col = "darkgray")+ geom_point(aes(x = observed_count, y = ppc_mean_count, fill = condition), shape = 21, size = 1)+ theme_bw(base_size = 11)+ theme(legend.position = "top")+ scale_x_log10(breaks = c(1, 10, 100, 1000), labels = expression(10^0, 10^1, 10^2, 10^3))+ scale_y_log10(breaks = c(1, 10, 100, 1000), labels = expression(10^0, 10^1, 10^2, 10^3))+ xlab(label = "Observed usage [counts]")+ ylab(label = "Predicted usage [counts]")+ annotation_logticks(base = 10, sides = "bl") ## ---- fig.height = 4, fig.width = 6------------------------------------------- ggplot(data = M$ppc.data$ppc.gene)+ geom_abline(intercept = 0, slope = 1, linetype = "dashed", col = "darkgray")+ geom_errorbar(aes(x = observed_prop*100, ymin = ppc_L_prop*100, ymax = ppc_H_prop*100), col = "darkgray")+ geom_point(aes(x = observed_prop*100, y = ppc_mean_prop*100, fill = condition), shape = 21, size = 1)+ theme_bw(base_size = 11)+ theme(legend.position = "top")+ xlab(label = "Observed usage [%]")+ ylab(label = "Predicted usage [%]")+ scale_x_log10()+ scale_y_log10()+ annotation_logticks(base = 10, sides = "bl") ## ----------------------------------------------------------------------------- kable(x = M$glm.summary, row.names = FALSE, digits = 3) ## ---- fig.height = 4, fig.width = 6------------------------------------------- # format data stats <- M$glm.summary stats <- stats[order(abs(stats$es_mean), decreasing = FALSE), ] stats$gene_fac <- factor(x = stats$gene_name, levels = stats$gene_name) stats <- merge(x= stats, y = M$test.summary, by = "gene_name") ggplot(data = stats)+ geom_hline(yintercept = 0, linetype = "dashed", col = "gray")+ geom_errorbar(aes(x = pmax, y = es_mean, ymin = es_L, ymax = es_H), col = "darkgray")+ geom_point(aes(x = pmax, y = es_mean))+ geom_text_repel(data = stats, aes(x = pmax, y = es_mean, label = gene_fac))+ theme_bw(base_size = 11)+ xlab(label = expression(pi))+ xlim(0, 1) ## ---- fig.height = 6, fig.width = 8------------------------------------------- ggplot()+ geom_hline(yintercept = c(-log10(0.05), -log10(0.01)), linetype = "dashed", col = "darkgray")+ geom_point(data = stats, col = "red", size = 2, aes(x = pmax, y = -log10(t.test.fdr.pvalue)))+ geom_text_repel(data = stats, aes(x = pmax, y = -log10(t.test.fdr.pvalue), label = gene_name), size = 4, min.segment.length = 0.1)+ xlim(0, 1)+ ylab(label = "-log10 (P-value) from t-test [FDR corrected]")+ xlab(label = expression(pi))+ theme_bw(base_size = 11)+ theme(legend.position = "top")+ scale_color_discrete(name = '') ## ---- fig.height = 5, fig.width = 8------------------------------------------- promising.genes <- unique(M$usage.data$gene_names) ppc.gene <- M$ppc.data$ppc.gene ppc.gene <- ppc.gene[ppc.gene$gene_name %in% promising.genes, ] ggplot()+ geom_errorbar(data = ppc.gene, aes(x = gene_name, ymin = ppc_L_prop*100, ymax = ppc_H_prop*100, col = condition), position = position_dodge(width = .8), width = 0.75)+ geom_point(data = viz[viz$gene_name %in% promising.genes, ], aes(x = gene_name, y = gene_usage_pct, col = condition), shape = 21, size = 1.5, fill = "black", position = position_jitterdodge(jitter.width = 0.15, jitter.height = 0, dodge.width = 0.8))+ theme_bw(base_size = 11)+ theme(legend.position = "top") ## ---- fig.height = 4, fig.width = 7------------------------------------------- promising.genes <- "IGHV5" ggplot(data = viz[viz$gene_name %in% promising.genes, ])+ facet_wrap(facets = ~gene_name, ncol = 1, scales = "free_y")+ geom_point(aes(x = sample_id, y = gene_usage_count, fill = condition, size = total/10^3), shape = 21)+ theme_bw(base_size = 11)+ ylab(label = "Usage [count]")+ xlab(label = '')+ theme(legend.position = "top")+ theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4))+ scale_fill_manual(name = "Condition", values = c("orange", "#6666ff"))+ scale_size_continuous(name = expression("N ("*10^3*")"), range = c(1, 5))+ ylab(label = "Usage [count]")+ xlab(label = '')+ scale_y_log10()+ annotation_logticks(base = 10, sides = "l") ## ---- fig.height = 4.5, fig.width = 8----------------------------------------- ggplot()+ geom_hline(yintercept = c(-log10(0.05), -log10(0.01)), linetype = "dashed", col = "darkgray")+ geom_point(data = stats, col = "red", size = 2, aes(x = pmax, y = -log10(u.test.fdr.pvalue)))+ geom_text_repel(data = stats, aes(x = pmax, y = -log10(u.test.fdr.pvalue), label = gene_name), size = 4, min.segment.length = 0.1)+ xlim(0, 1)+ ylab(label = "-log10 (P-value) from U-test [FDR corrected]")+ xlab(label = expression(pi))+ theme_bw(base_size = 11)+ theme(legend.position = "top")+ scale_color_discrete(name = '') ## ----------------------------------------------------------------------------- data(CDR3_Epitopes, package = "IgGeneUsage") ## ----------------------------------------------------------------------------- kable(x = head(CDR3_Epitopes), row.names = FALSE) ## ---- fig.height = 4, fig.width = 7------------------------------------------- # we can compute the total number of rearrangements per sample total.usage <- aggregate(gene_usage_count~sample_id+condition, FUN = sum, data = CDR3_Epitopes) total.usage$total <- total.usage$gene_usage_count total.usage$gene_usage_count <- NULL # merge it with the original data viz <- merge(x = CDR3_Epitopes, y = total.usage, by = c("sample_id", "condition"), all.x = TRUE) # compute % viz$gene_usage_pct <- viz$gene_usage_count/viz$total*100 # visualize ggplot(data = viz)+ geom_point(aes(x = as.numeric(gene_name), y = gene_usage_pct, fill = condition, shape = condition), stroke = 0, size = 2, alpha = 0.5, position = position_jitterdodge(jitter.width = 0.15, dodge.width = 0.5))+ theme_bw(base_size = 11)+ ylab(label = "Usage [%]")+ xlab(label = 'Net Charge')+ theme(legend.position = "top")+ scale_fill_manual(name = "Condition", values = c("orange", "#6666ff"))+ scale_shape_manual(name = "Condition", values = c(21, 22))+ scale_x_continuous(breaks = -7:7, labels = -7:7) ## ----------------------------------------------------------------------------- M <- DGU(usage.data = CDR3_Epitopes, mcmc.chains = 2, mcmc.cores = 2) # default DGU parameters: # mcmc.warmup = 500 # mcmc.steps = 1,500 # mcmc.chains = 2 # mcmc.cores = 1 # hdi.level = 0.95 # adapt.delta = 0.95 # max.treedepth = 12 ## ----------------------------------------------------------------------------- rstan::check_hmc_diagnostics(M$glm) ## ---- fig.height = 3, fig.width = 6------------------------------------------- grid.arrange(rstan::stan_rhat(object = M$glm), rstan::stan_ess(object = M$glm), nrow = 1) ## ---- fig.height = 8, fig.width = 8------------------------------------------- ggplot(data = M$ppc.data$ppc.repertoire)+ facet_wrap(facets = ~sample_name, ncol = 4)+ geom_abline(intercept = 0, slope = 1, linetype = "dashed", col = "darkgray")+ geom_errorbar(aes(x = observed_count, y = ppc_mean_count, ymin = ppc_L_count, ymax = ppc_H_count), col = "darkgray")+ geom_point(aes(x = observed_count, y = ppc_mean_count, fill = condition), shape = 21, size = 1)+ theme_bw(base_size = 11)+ theme(legend.position = "top")+ xlab(label = "Observed usage [counts]")+ ylab(label = "Predicted usage [counts]")+ scale_x_log10()+ scale_y_log10()+ annotation_logticks(base = 10, sides = "bl") ## ---- fig.height = 4, fig.width = 6------------------------------------------- ggplot(data = M$ppc.data$ppc.gene)+ geom_abline(intercept = 0, slope = 1, linetype = "dashed", col = "darkgray")+ geom_errorbar(aes(x = observed_prop*100, ymin = ppc_L_prop*100, ymax = ppc_H_prop*100), col = "darkgray")+ geom_point(aes(x = observed_prop*100, y = ppc_mean_prop*100, fill = condition), shape = 21, size = 1)+ theme_bw(base_size = 11)+ theme(legend.position = "top")+ xlab(label = "Observed usage [%]")+ ylab(label = "Predicted usage [%]")+ scale_x_log10()+ scale_y_log10()+ annotation_logticks(base = 10, sides = "bl") ## ----------------------------------------------------------------------------- kable(x = head(M$glm.summary), row.names = FALSE, digits = 3) ## ---- fig.height = 4, fig.width = 6------------------------------------------- # format data stats <- M$glm.summary stats <- stats[order(abs(stats$es_mean), decreasing = FALSE), ] stats$gene_fac <- factor(x = stats$gene_name, levels = stats$gene_name) stats <- merge(x = stats, y = M$test.summary, by = "gene_name") ggplot(data = stats)+ geom_hline(yintercept = 0, linetype = "dashed", col = "gray")+ geom_errorbar(aes(x = pmax, y = es_mean, ymin = es_L, ymax = es_H), col = "darkgray")+ geom_point(aes(x = pmax, y = es_mean))+ geom_text_repel(data = stats, aes(x = pmax, y = es_mean, label = gene_fac))+ theme_bw(base_size = 11)+ xlab(label = expression(pi))+ scale_x_continuous(limits = c(0, 1)) ## ---- fig.height = 4, fig.width = 6------------------------------------------- ggplot(data = stats)+ geom_hline(yintercept = c(-log10(0.05), -log10(0.01)), linetype = "dashed", col = "darkgray")+ geom_point(aes(x = pmax, y = -log10(t.test.fdr.pvalue)), size = 1.5, col = "red")+ geom_text_repel(aes(x = pmax, y = -log10(t.test.fdr.pvalue), label = gene_name), size = 5)+ xlim(0, 1)+ ylab(label = "-log10 (P-value) from t-test [FDR corrected]")+ xlab(label = expression(pi))+ theme_bw(base_size = 11)+ theme(legend.position = "top")+ scale_color_discrete(name = '') ## ---- fig.height = 4, fig.width = 6------------------------------------------- ggplot(data = stats)+ geom_hline(yintercept = c(-log10(0.05), -log10(0.01)), linetype = "dashed", col = "darkgray")+ geom_point(aes(x = pmax, y = -log10(u.test.fdr.pvalue)), size = 1.5, col = "red")+ geom_text_repel(aes(x = pmax, y = -log10(u.test.fdr.pvalue), label = gene_name), size = 5)+ xlim(0, 1)+ ylab(label = "-log10(p-value) from U-test [FDR corrected]")+ xlab(label = expression(pi))+ theme_bw(base_size = 11)+ theme(legend.position = "top")+ scale_color_discrete(name = '') ## ---- fig.height = 5, fig.width = 8------------------------------------------- promising.genes <- M$usage.data$gene_names ppc.gene <- M$ppc.data$ppc.gene ppc.gene <- ppc.gene[ppc.gene$gene_name %in% promising.genes, ] ppc.gene$gene_name <- factor(x = ppc.gene$gene_name, levels = -5:6) ggplot()+ geom_errorbar(data = ppc.gene, aes(x = gene_name, ymin = ppc_L_prop*100, ymax = ppc_H_prop*100, col = condition), position = position_dodge(width = .8), width = 0.75)+ geom_point(data = viz[viz$gene_name %in% promising.genes, ], aes(x = gene_name, y = gene_usage_pct, col = condition), shape = 21, size = 1.5, fill = "black", position = position_jitterdodge(jitter.width = 0.2, jitter.height = 0, dodge.width = 0.75))+ theme_bw(base_size = 11)+ theme(legend.position = "top")+ ylab(label = "Gene usage [%]")+ xlab(label = '')