## ----load-libs, message=FALSE--------------------------------------------
library(missMethyl)
library(limma)
library(minfi)

## ----reading-data, message=FALSE-----------------------------------------
library(minfiData)
baseDir <- system.file("extdata", package = "minfiData")
targets <- read.metharray.sheet(baseDir)
targets[,1:9]
targets[,10:12]
rgSet <- read.metharray.exp(targets = targets)

## ----ppraw---------------------------------------------------------------
mSet <- preprocessRaw(rgSet)

## ----swan----------------------------------------------------------------
mSetSw <- SWAN(mSet,verbose=TRUE)

## ----betasByType,fig.height=5,fig.width=10,fig.retina=1,fig.cap="Density distributions of $\beta$ values before and after using SWAN."----
par(mfrow=c(1,2), cex=1.25)
densityByProbeType(mSet[,1], main = "Raw")
densityByProbeType(mSetSw[,1], main = "SWAN")

## ----filtering-----------------------------------------------------------
detP <- detectionP(rgSet)
keep <- rowSums(detP < 0.01) == ncol(rgSet)
mSetSw <- mSetSw[keep,]

## ----extraction----------------------------------------------------------
mset_reduced <- mSetSw[sample(1:nrow(mSetSw), 20000),]
meth <- getMeth(mset_reduced)
unmeth <- getUnmeth(mset_reduced)
Mval <- log2((meth + 100)/(unmeth + 100))
beta <- getBeta(mset_reduced)
dim(Mval)

## ----mdsplot,fig.height=5,fig.width=5,fig.cap="A multi-dimensional scaling (MDS) plot of cancer and normal samples."----
par(mfrow=c(1,1))
plotMDS(Mval, labels=targets$Sample_Name, col=as.integer(factor(targets$status)))
legend("topleft",legend=c("Cancer","Normal"),pch=16,cex=1.2,col=1:2)

## ----design--------------------------------------------------------------
group <- factor(targets$status,levels=c("normal","cancer"))
id <- factor(targets$person)
design <- model.matrix(~id + group)
design

## ----diffmeth------------------------------------------------------------
fit.reduced <- lmFit(Mval,design)
fit.reduced <- eBayes(fit.reduced)

## ----diffmeth-results----------------------------------------------------
summary(decideTests(fit.reduced))
top<-topTable(fit.reduced,coef=4)
top

## ----top4,fig.width=10,fig.height=10,fig.cap="The $\beta$ values for the top 4 differentially methylated CpGs."----
cpgs <- rownames(top)
par(mfrow=c(2,2))
for(i in 1:4){
stripchart(beta[rownames(beta)==cpgs[i],]~design[,4],method="jitter",
group.names=c("Normal","Cancer"),pch=16,cex=1.5,col=c(4,2),ylab="Beta values",
vertical=TRUE,cex.axis=1.5,cex.lab=1.5)
title(cpgs[i],cex.main=1.5)
}

## ----diffmeth2-----------------------------------------------------------
# get M-values for ALL probes
meth <- getMeth(mSet)
unmeth <- getUnmeth(mSet)
M <- log2((meth + 100)/(unmeth + 100))

grp <- factor(targets$status,levels=c("normal","cancer"))
des <- model.matrix(~grp)
des

INCs <- getINCs(rgSet)
head(INCs)

Mc <- rbind(M,INCs)
ctl <- rownames(Mc) %in% rownames(INCs)
table(ctl)

rfit1 <- RUVfit(data=Mc, design=des, coef=2, ctl=ctl) # Stage 1 analysis
rfit2 <- RUVadj(rfit1)

## ----ruv1----------------------------------------------------------------
top1 <- topRUV(rfit2, num=Inf)
head(top1)

ctl <- rownames(M) %in% rownames(top1[top1$p.ebayes.BH > 0.5,])
table(ctl)

## ----ruv2----------------------------------------------------------------
# Perform RUV adjustment and fit
rfit1 <- RUVfit(data=M, design=des, coef=2, ctl=ctl) # Stage 2 analysis
rfit2 <- RUVadj(rfit1)

# Look at table of top results
topRUV(rfit2)

## ----diffvar-------------------------------------------------------------
fitvar <- varFit(Mval, design = design, coef = c(1,4))

## ----diffvar-results-----------------------------------------------------
summary(decideTests(fitvar))
topDV <- topVar(fitvar, coef=4)
topDV

## ----alternative---------------------------------------------------------
design2 <- model.matrix(~0+group+id)
fitvar.contr <- varFit(Mval, design=design2, coef=c(1,2))
contr <- makeContrasts(groupcancer-groupnormal,levels=colnames(design2))
fitvar.contr <- contrasts.varFit(fitvar.contr,contrasts=contr)

## ----altresults----------------------------------------------------------
summary(decideTests(fitvar.contr))
topVar(fitvar.contr,coef=1)

## ----top4DV,fig.width=10,fig.height=10,fig.cap="The $\beta$ values for the top 4 differentially variable CpGs."----
cpgsDV <- rownames(topDV)
par(mfrow=c(2,2))
for(i in 1:4){
stripchart(beta[rownames(beta)==cpgsDV[i],]~design[,4],method="jitter",
group.names=c("Normal","Cancer"),pch=16,cex=1.5,col=c(4,2),ylab="Beta values",
vertical=TRUE,cex.axis=1.5,cex.lab=1.5)
title(cpgsDV[i],cex.main=1.5)
}

## ----loadingdata---------------------------------------------------------
library(tweeDEseqCountData)
data(pickrell1)
counts<-exprs(pickrell1.eset)
dim(counts)
gender <- pickrell1.eset$gender
table(gender)
rm(pickrell1.eset)
data(genderGenes)
data(annotEnsembl63)
annot <- annotEnsembl63[,c("Symbol","Chr")]
rm(annotEnsembl63)

## ----dgelist-------------------------------------------------------------
library(edgeR)
y <- DGEList(counts=counts, genes=annot[rownames(counts),])

## ----dgelist-filtering---------------------------------------------------
isexpr <- rowSums(cpm(y)>1) >= 20
hasannot <- rowSums(is.na(y$genes))==0
y <- y[isexpr & hasannot,,keep.lib.sizes=FALSE]
dim(y)
y <- calcNormFactors(y)

## ----testhapmap----------------------------------------------------------
design.hapmap <- model.matrix(~gender)
fitvar.hapmap <- varFit(y, design = design.hapmap)
fitvar.hapmap$genes <- y$genes

## ----resultshapmap-------------------------------------------------------
summary(decideTests(fitvar.hapmap))
topDV.hapmap <- topVar(fitvar.hapmap,coef=ncol(design.hapmap))
topDV.hapmap

## ----top4DVhapmap,fig.width=10,fig.height=10,fig.cap="The log counts per million for the top 4 differentially variably expressed genes."----
genesDV <- rownames(topDV.hapmap)
par(mfrow=c(2,2))
for(i in 1:4){
stripchart(cpm(y,log=TRUE)[rownames(y)==genesDV[i],]~design.hapmap[,ncol(design.hapmap)],method="jitter",
group.names=c("Female","Male"),pch=16,cex=1.5,col=c(4,2),ylab="Log counts per million",
vertical=TRUE,cex.axis=1.5,cex.lab=1.5)
title(genesDV[i],cex.main=1.5)
}

## ----gometh1-------------------------------------------------------------
topRUV(rfit2)
table(rfit2$p.ebayes.BH < 0.01)

## ----gometh2-------------------------------------------------------------
beta <- getBeta(mSet)
beta_norm <- rowMeans(beta[,des[,2]==0])
beta_can <- rowMeans(beta[,des[,2]==1])
Delta_beta <- beta_can - beta_norm
sigDM <- rfit2$p.ebayes.BH < 0.01 & abs(Delta_beta) > 0.25
table(sigDM)

## ----gometh3-------------------------------------------------------------
topCpGs<-topRUV(rfit2,number=10000)
sigCpGs <- rownames(topCpGs)
sigCpGs[1:10]

## ----gometh4-------------------------------------------------------------
library(IlluminaHumanMethylation450kanno.ilmn12.hg19)
gst <- gometh(sig.cpg=sigCpGs, all.cpg=rownames(rfit2), collection="GO")
topGO(gst)

## ----gsameth-------------------------------------------------------------
library(org.Hs.eg.db)
genes <- toTable(org.Hs.egSYMBOL2EG)
set1 <- sample(genes$gene_id,size=80)
set2 <- sample(genes$gene_id,size=100)
set3 <- sample(genes$gene_id,size=30)
genesets <- list(set1,set2,set3)
gsa <- gsameth(sig.cpg=sigCpGs, all.cpg=rownames(rfit2), collection=genesets)
topGSA(gsa)

## ----sessionInfo, eval=TRUE, results='asis'------------------------------
sessionInfo()