## ----Loadpackage----------------------------------------------------------- library(DEqMS) ## ----retrieveExampleData--------------------------------------------------- ### retrieve example dataset library(ExperimentHub) eh = ExperimentHub() query(eh, "DEqMS") dat.psm = eh[["EH1663"]] ## ----log2transform--------------------------------------------------------- dat.psm.log = dat.psm dat.psm.log[,3:12] = log2(dat.psm[,3:12]) head(dat.psm.log) ## ----design---------------------------------------------------------------- cond = c("ctrl","miR191","miR372","miR519","ctrl", "miR372","miR519","ctrl","miR191","miR372") sampleTable <- data.frame( row.names = colnames(dat.psm)[3:12], cond = as.factor(cond)) sampleTable ## ----PSMdistribution------------------------------------------------------- psm.count.table = as.data.frame(table(dat.psm$gene)) rownames(psm.count.table)=psm.count.table$Var1 plot(sort(log2(psm.count.table$Freq)),pch=".", xlab="Proteins ordered by PSM count",ylab="log2(PSM count)") ## ----boxplot--------------------------------------------------------------- dat.gene.nm = medianSweeping(dat.psm.log,group_col = 2) boxplot(dat.gene.nm,xlab="TMT 10 channels", ylab="log2 relative protein abundance") ## ----t-test---------------------------------------------------------------- pval.372 = apply(dat.gene.nm, 1, function(x) t.test(as.numeric(x[c(1,5,8)]), as.numeric(x[c(3,6,10)]))$p.value) logFC.372 = rowMeans(dat.gene.nm[,c(3,6,10)])-rowMeans(dat.gene.nm[,c(1,5,8)]) ## ----echo=TRUE------------------------------------------------------------- ttest.results = data.frame(gene=rownames(dat.gene.nm), logFC=logFC.372,P.Value = pval.372, adj.pval = p.adjust(pval.372,method = "BH")) ttest.results$PSMcount = psm.count.table[ttest.results$gene,]$Freq ttest.results = ttest.results[with(ttest.results, order(P.Value)), ] head(ttest.results) ## ----Anova----------------------------------------------------------------- library(limma) gene.matrix = as.matrix(dat.gene.nm) colnames(gene.matrix) = as.character(sampleTable$cond) design = model.matrix(~cond,sampleTable) fit1 <- lmFit(gene.matrix,design) ord.t = fit1$coefficients[, 3]/fit1$sigma/fit1$stdev.unscaled[, 3] ord.p = 2*pt(-abs(ord.t), fit1$df.residual) ord.q = p.adjust(ord.p,method = "BH") anova.results = data.frame(gene=names(fit1$sigma), logFC=fit1$coefficients[,3], t=ord.t, P.Value=ord.p, adj.P.Val = ord.q) anova.results$PSMcount = psm.count.table[anova.results$gene,]$Freq anova.results = anova.results[with(anova.results,order(P.Value)),] head(anova.results) ## ----Limma----------------------------------------------------------------- fit2 <- eBayes(lmFit(gene.matrix,design)) head(fit2$coefficients) ## ----echo=TRUE------------------------------------------------------------- limma.results = topTable(fit2,coef = 3,n= Inf) limma.results$gene = rownames(limma.results) limma.results$PSMcount = psm.count.table[limma.results$gene,]$Freq head(limma.results) ## ----Variance, echo=TRUE, fig.height=5, fig.width=10----------------------- dat.temp = data.frame(var = fit2$sigma^2, PSMcount = psm.count.table[names(fit2$sigma),]$Freq) dat.temp.filter = dat.temp[dat.temp$PSMcount<21,] op <- par(mfrow=c(1,2), mar=c(4,4,4,1), oma=c(0.5,0.5,0.5,0)) plot(log2(psm.count.table[rownames(dat.gene.nm),2]), dat.gene.nm[,1]-dat.gene.nm[,5],pch=20,cex=0.5, xlab="log2(PSM count)",ylab="log2 ratio between two ctrl replicates") boxplot(log2(var)~PSMcount,dat.temp.filter,xlab="PSMcount", ylab="log2(Variance)") ## ----DEqMS----------------------------------------------------------------- fit2$count = psm.count.table[rownames(fit2$coefficients),]$Freq fit3 = spectraCounteBayes(fit2) DEqMS.results = outputResult(fit3,coef_col = 3) head(DEqMS.results) ## ----PriorVarianceTrend---------------------------------------------------- plotFitCurve(fit3,type = "boxplot",xlab="PSM count") ## ----PriorVar-------------------------------------------------------------- x = fit3$count y = fit3$sigma^2 limma.prior = fit3$s2.prior DEqMS.prior = fit3$sca.priorvar plot(log2(x),log(y),pch=20, cex=0.5,xlab = "log2(PSMcount)", ylab="log(Variance)") abline(h = log(limma.prior),col="green",lwd=3 ) k=order(x) lines(log2(x[k]),log(DEqMS.prior[k]),col="red",lwd=3 ) legend("topright",legend=c("Limma prior variance","DEqMS prior variance"), col=c("green","red"),lwd=3) ## ----PostVar, echo=TRUE, fig.height=5, fig.width=10------------------------ x = fit3$count y = fit3$s2.post op <- par(mfrow=c(1,2), mar=c(4,4,4,1), oma=c(0.5,0.5,0.5,0)) plot(log2(x),log(y),pch=20, cex=0.5, xlab = "log2(PSMcount)", ylab="log(Variance)", main="Posterior Variance in Limma") y = fit3$sca.postvar plot(log2(x),log(y),pch=20,cex=0.5, xlab = "log2(PSMcount)", ylab="log(Variance)", main="Posterior Variance in DEqMS") ## ----pvalue---------------------------------------------------------------- plot(sort(-log10(limma.results$P.Value),decreasing = TRUE)[1:500], type="l",lty=2,lwd=2, ylab="-log10(p-value)", xlab="Proteins ranked by p-values", col="purple") lines(sort(-log10(DEqMS.results$sca.P.Value),decreasing = TRUE)[1:500], lty=1,lwd=2,col="red") lines(sort(-log10(anova.results$P.Value),decreasing = TRUE)[1:500], lty=2,lwd=2,col="blue") lines(sort(-log10(ttest.results$P.Value),decreasing = TRUE)[1:500], lty=2,lwd=2,col="orange") legend("topright",legend = c("Limma","DEqMS","Anova","t.test"), col = c("purple","red","blue","orange"),lty=c(2,1,2,2),lwd=2) ## ----PSMintensity---------------------------------------------------------- library(ggplot2) dat.psm.log.ordered = dat.psm.log[,c("Peptide","gene", sort(colnames(dat.psm.log)[3:12]))] peptideProfilePlot(dat=dat.psm.log.ordered,col=2,gene="TGFBR2") ## ----volcanoplot1---------------------------------------------------------- library(ggrepel) # to plot adjusted p-values by BH method on y-axis DEqMS.results$log.adj.pval = -log10(DEqMS.results$sca.adj.pval) ggplot(DEqMS.results, aes(x = logFC, y = log.adj.pval)) + geom_point(size=0.5 )+ theme_bw(base_size = 16) + # change theme xlab(expression("log2 miR372/ctrl")) + # x-axis label ylab(expression(" -log10 adj.pval")) + # y-axis label geom_vline(xintercept = c(-1,1), colour = "red") + # Add fold change cutoffs geom_hline(yintercept = 2, colour = "red") + # Add significance cutoffs geom_vline(xintercept = 0, colour = "black") + # Add 0 lines scale_colour_gradient(low = "black", high = "black", guide = FALSE)+ geom_text_repel(data=subset(DEqMS.results, abs(logFC)>1&log.adj.pval> 2), aes( logFC, log.adj.pval ,label=gene)) # add gene label ## ----volcanoplot2---------------------------------------------------------- fit3$p.value = fit3$sca.p volcanoplot(fit3,coef=3, style = "p-value", highlight = 20, names=rownames(fit3$coefficients)) ## ----PCAplot--------------------------------------------------------------- library( RColorBrewer ) pr <- prcomp(t(gene.matrix)) plot( pr$x[,1:2], asp=1, col=brewer.pal(4,"Set1")[sampleTable$cond], pch=17) text( pr$x[,1], pr$x[,2]-1, label=as.character(sampleTable$cond)) legend( "bottomright", legend = levels( sampleTable$cond ), col = brewer.pal(4,"Set1"), pch=17 ) ## ----heatmap1-------------------------------------------------------------- library( pheatmap ) cm <- cor( gene.matrix ) # rearrange columns so that same sample types are together cm.order = cm[order(colnames(cm)),order(colnames(cm))] pheatmap(cm.order, cluster_rows=FALSE, cluster_cols=FALSE, color = colorRampPalette(c("blue", "white", "red"))(100)) ## ----heatmap2-------------------------------------------------------------- dm <- as.matrix( dist( t( gene.matrix ) )) dm.order = dm[order(colnames(cm)),order(colnames(cm))] pheatmap(dm.order, cluster_rows=FALSE, cluster_cols=FALSE, color = colorRampPalette(c("red", "white"))(100))