## ----Loadpackage-----------------------------------------------------------
library(DEqMS)

## ----retrieveExampleData---------------------------------------------------
### retrieve example dataset
library(ExperimentHub)
eh = ExperimentHub()
query(eh, "DEqMS")
dat.psm = eh[["EH1663"]]

## ----log2transform---------------------------------------------------------
dat.psm.log = dat.psm
dat.psm.log[,3:12] =  log2(dat.psm[,3:12])
head(dat.psm.log)

## ----design----------------------------------------------------------------
cond = c("ctrl","miR191","miR372","miR519","ctrl",
"miR372","miR519","ctrl","miR191","miR372")

sampleTable <- data.frame(
row.names = colnames(dat.psm)[3:12],
cond = as.factor(cond))
sampleTable

## ----PSMdistribution-------------------------------------------------------
psm.count.table = as.data.frame(table(dat.psm$gene))
rownames(psm.count.table)=psm.count.table$Var1

plot(sort(log2(psm.count.table$Freq)),pch=".",
    xlab="Proteins ordered by PSM count",ylab="log2(PSM count)")

## ----boxplot---------------------------------------------------------------
dat.gene.nm = medianSweeping(dat.psm.log,group_col = 2)

boxplot(dat.gene.nm,xlab="TMT 10 channels",
        ylab="log2 relative protein abundance")

## ----t-test----------------------------------------------------------------

pval.372 = apply(dat.gene.nm, 1, function(x) 
t.test(as.numeric(x[c(1,5,8)]), as.numeric(x[c(3,6,10)]))$p.value)

logFC.372 = rowMeans(dat.gene.nm[,c(3,6,10)])-rowMeans(dat.gene.nm[,c(1,5,8)])

## ----echo=TRUE-------------------------------------------------------------
ttest.results = data.frame(gene=rownames(dat.gene.nm),
                    logFC=logFC.372,P.Value = pval.372, 
                    adj.pval = p.adjust(pval.372,method = "BH")) 

ttest.results$PSMcount = psm.count.table[ttest.results$gene,]$Freq
ttest.results = ttest.results[with(ttest.results, order(P.Value)), ]
head(ttest.results)

## ----Anova-----------------------------------------------------------------
library(limma)
gene.matrix = as.matrix(dat.gene.nm)
colnames(gene.matrix) = as.character(sampleTable$cond)
design = model.matrix(~cond,sampleTable)

fit1 <- lmFit(gene.matrix,design)

ord.t = fit1$coefficients[, 3]/fit1$sigma/fit1$stdev.unscaled[, 3]
ord.p = 2*pt(-abs(ord.t), fit1$df.residual)
ord.q = p.adjust(ord.p,method = "BH")
anova.results = data.frame(gene=names(fit1$sigma),
                            logFC=fit1$coefficients[,3],
                            t=ord.t, 
                            P.Value=ord.p, 
                            adj.P.Val = ord.q)

anova.results$PSMcount = psm.count.table[anova.results$gene,]$Freq
anova.results = anova.results[with(anova.results,order(P.Value)),]

head(anova.results)

## ----Limma-----------------------------------------------------------------
fit2 <- eBayes(lmFit(gene.matrix,design))
head(fit2$coefficients)

## ----echo=TRUE-------------------------------------------------------------
limma.results = topTable(fit2,coef = 3,n= Inf)
limma.results$gene = rownames(limma.results)
limma.results$PSMcount = psm.count.table[limma.results$gene,]$Freq

head(limma.results)

## ----Variance, echo=TRUE, fig.height=5, fig.width=10-----------------------
dat.temp = data.frame(var = fit2$sigma^2,
                        PSMcount = psm.count.table[names(fit2$sigma),]$Freq)
dat.temp.filter = dat.temp[dat.temp$PSMcount<21,]

op <- par(mfrow=c(1,2), mar=c(4,4,4,1), oma=c(0.5,0.5,0.5,0))
plot(log2(psm.count.table[rownames(dat.gene.nm),2]),
    dat.gene.nm[,1]-dat.gene.nm[,5],pch=20,cex=0.5,
    xlab="log2(PSM count)",ylab="log2 ratio between two ctrl replicates")

boxplot(log2(var)~PSMcount,dat.temp.filter,xlab="PSMcount",
        ylab="log2(Variance)")


## ----DEqMS-----------------------------------------------------------------
fit2$count = psm.count.table[rownames(fit2$coefficients),]$Freq

fit3 = spectraCounteBayes(fit2)
DEqMS.results = outputResult(fit3,coef_col = 3)
head(DEqMS.results)

## ----PriorVarianceTrend----------------------------------------------------
plotFitCurve(fit3,type = "boxplot",xlab="PSM count")

## ----PriorVar--------------------------------------------------------------
x = fit3$count
y = fit3$sigma^2

limma.prior = fit3$s2.prior
DEqMS.prior = fit3$sca.priorvar

plot(log2(x),log(y),pch=20, cex=0.5,xlab = "log2(PSMcount)",
     ylab="log(Variance)")
abline(h = log(limma.prior),col="green",lwd=3 )
k=order(x)
lines(log2(x[k]),log(DEqMS.prior[k]),col="red",lwd=3 )
legend("topright",legend=c("Limma prior variance","DEqMS prior variance"),
        col=c("green","red"),lwd=3)

## ----PostVar, echo=TRUE, fig.height=5, fig.width=10------------------------
x = fit3$count
y = fit3$s2.post

op <- par(mfrow=c(1,2), mar=c(4,4,4,1), oma=c(0.5,0.5,0.5,0))
plot(log2(x),log(y),pch=20, cex=0.5, xlab = "log2(PSMcount)", 
    ylab="log(Variance)",
    main="Posterior Variance in Limma")

y = fit3$sca.postvar
plot(log2(x),log(y),pch=20,cex=0.5, xlab = "log2(PSMcount)",
    ylab="log(Variance)", 
    main="Posterior Variance in DEqMS")

## ----pvalue----------------------------------------------------------------
plot(sort(-log10(limma.results$P.Value),decreasing = TRUE)[1:500], 
    type="l",lty=2,lwd=2, ylab="-log10(p-value)",
    xlab="Proteins ranked by p-values",
    col="purple")
lines(sort(-log10(DEqMS.results$sca.P.Value),decreasing = TRUE)[1:500], 
        lty=1,lwd=2,col="red")
lines(sort(-log10(anova.results$P.Value),decreasing = TRUE)[1:500], 
        lty=2,lwd=2,col="blue")
lines(sort(-log10(ttest.results$P.Value),decreasing = TRUE)[1:500], 
        lty=2,lwd=2,col="orange")
legend("topright",legend = c("Limma","DEqMS","Anova","t.test"),
        col = c("purple","red","blue","orange"),lty=c(2,1,2,2),lwd=2)

## ----PSMintensity----------------------------------------------------------
library(ggplot2)
dat.psm.log.ordered = dat.psm.log[,c("Peptide","gene",
                                    sort(colnames(dat.psm.log)[3:12]))]
peptideProfilePlot(dat=dat.psm.log.ordered,col=2,gene="TGFBR2")

## ----volcanoplot1----------------------------------------------------------
library(ggrepel)

# to plot adjusted p-values by BH method on y-axis
DEqMS.results$log.adj.pval = -log10(DEqMS.results$sca.adj.pval) 
ggplot(DEqMS.results, aes(x = logFC, y = log.adj.pval)) + 
    geom_point(size=0.5 )+
    theme_bw(base_size = 16) + # change theme
    xlab(expression("log2 miR372/ctrl")) + # x-axis label
    ylab(expression(" -log10 adj.pval")) + # y-axis label
    geom_vline(xintercept = c(-1,1), colour = "red") + # Add fold change cutoffs
    geom_hline(yintercept = 2, colour = "red") + # Add significance cutoffs
    geom_vline(xintercept = 0, colour = "black") + # Add 0 lines
    scale_colour_gradient(low = "black", high = "black", guide = FALSE)+
    geom_text_repel(data=subset(DEqMS.results, abs(logFC)>1&log.adj.pval> 2),
                    aes( logFC, log.adj.pval ,label=gene)) # add gene label


## ----volcanoplot2----------------------------------------------------------
fit3$p.value = fit3$sca.p
volcanoplot(fit3,coef=3, style = "p-value", highlight = 20,
            names=rownames(fit3$coefficients))

## ----PCAplot---------------------------------------------------------------
library( RColorBrewer )
pr <- prcomp(t(gene.matrix)) 
plot( pr$x[,1:2], asp=1, col=brewer.pal(4,"Set1")[sampleTable$cond], pch=17)
text( pr$x[,1], pr$x[,2]-1, label=as.character(sampleTable$cond))
legend( "bottomright", legend = levels( sampleTable$cond ), 
    col = brewer.pal(4,"Set1"), pch=17 )

## ----heatmap1--------------------------------------------------------------
library( pheatmap )
cm <- cor( gene.matrix )
# rearrange columns so that same sample types are together
cm.order = cm[order(colnames(cm)),order(colnames(cm))]

pheatmap(cm.order,
    cluster_rows=FALSE, cluster_cols=FALSE,
    color = colorRampPalette(c("blue", "white", "red"))(100))

## ----heatmap2--------------------------------------------------------------
dm <- as.matrix( dist( t( gene.matrix ) ))
dm.order = dm[order(colnames(cm)),order(colnames(cm))]
pheatmap(dm.order,
    cluster_rows=FALSE, cluster_cols=FALSE,
    color = colorRampPalette(c("red", "white"))(100))