## ----style, eval=TRUE, echo=FALSE, results="asis"---------------------------------------
BiocStyle::latex()

## ----include=FALSE----------------------------------------------------------------------
knitr::opts_chunk$set(concordance=TRUE, 
                      resize.width="0.45\\textwidth", 
                      fig.align='center',
                      tidy = FALSE,
                      message=FALSE)

## ----echo=FALSE, cache=FALSE, message=FALSE---------------------------------------------
knitr::set_parent(file.path('..', 'PGPC.Rnw'))

## ----setup, results='hide'--------------------------------------------------------------
library("PGPC")
if(!file.exists(file.path("result","data")))
  dir.create(file.path("result","data"),recursive=TRUE)
if(!file.exists(file.path("result","Figures")))
  dir.create(file.path("result","Figures"),recursive=TRUE)

## ----load, eval=FALSE-------------------------------------------------------------------
#  data("interactions", package="PGPC")

## ----load2, eval=FALSE------------------------------------------------------------------
#  ?interactions

## ----echo=FALSE, cache=FALSE, message=FALSE---------------------------------------------
knitr::set_parent(file.path('..', 'PGPC.Rnw'))
library(PGPC)
if(!file.exists(file.path("result","data")))
  dir.create(file.path("result","data"),recursive=TRUE)

## ----imageProcessing, eval=FALSE--------------------------------------------------------
#  localPath = tempdir()
#  serverURL = system.file("extdata", package = "PGPC")
#  
#  imageConfFile = file.path("conf", "imageconf.txt")
#  
#  ## circumvent memory problem on 32bit windows by segementing only spot 1.
#  if(.Platform$OS.type == "windows" & R.Version()$arch == "i386")
#      imageConfFile = file.path("conf", "imageconf_windows32.txt")
#  
#  x = parseImageConf(imageConfFile, localPath=localPath, serverURL=serverURL)
#  
#  unames = getUnames(x) ## select all wells for processing
#  unames = c("045-01-C23") ## select single well for demonstration
#  
#  segmentWells(x, unames, file.path("conf", "segmentationpar.txt"))
#  
#  PGPC:::extractFeaturesWithParameter(x,
#                                      unames,
#                                      file.path("conf", "featurepar.txt"))
#  
#  summarizeWellsExtended(x, unames, file.path("conf", "featurepar.txt"))
#  
#  ## PGPC:::mergeProfiles(x)  only needed if wells were processed in parallel
#  
#  ftrs = readHTS(x, type="file",
#                 filename=file.path("data", "profiles.tab"),
#                 format="tab")

## ----convertData1,eval=FALSE------------------------------------------------------------
#  data("ftrs", package="PGPC")
#  dim(ftrs)
#  ftrnames = colnames(ftrs[,-1])

## ----convertData2, eval=FALSE-----------------------------------------------------------
#  makeArray <- function(df, nLines=12, nWells=384, nPlates=4, nRep=2){
#    ## df = list of features ordered by unames
#    if(! nLines*nWells*nPlates*nRep == nrow(df))
#      stop("Wrong dimensions!")
#  
#    ## order by replicate
#    replicate <- rep(c(rep(1, nWells), rep(2, nWells)),
#                     nrow(df)/(nWells*nRep))
#    df = df[order(replicate),]
#  
#    ## create matrix
#    D = as.matrix(df[,-1])
#  
#    ## convert Matrix: x=Gene/wells, y=Celllines, z=replicates
#    dim(D) = c(nPlates*nWells,nLines,nRep,ncol(D))
#    invisible(D)
#  }
#  
#  D = makeArray(ftrs)
#  dim(D)
#  
#  ## free space and save full data matrix
#  rm("ftrs")
#  datamatrixfull = list(D = D, anno=list(ftr=ftrnames))
#  save(datamatrixfull, file=file.path("result","data","datamatrixfull.rda"),
#       compress="xz")
#  rm("datamatrixfull")

## ----annotation1, eval=FALSE------------------------------------------------------------
#  drug = read.delim(
#    system.file("extdata",
#                "conf",
#                "Changed_LOPAC3_384_convert96er_GeneID_updated_Selectivity.txt",
#      package = "PGPC"),
#    stringsAsFactors=FALSE,
#    encoding="latin1")
#  
#  drug$Plate = NULL
#  drug = unique(drug)
#  drug$Content[drug$GeneID == "no treatment"] = "empty"
#  drug$Content[grep("ctrl", drug$GeneID)] = drug$GeneID[grep("ctrl", drug$GeneID)]
#  drug$Content[grep("neg", drug$GeneID)] = drug$GeneID[grep("neg", drug$GeneID)]

## ----annotation2, eval=FALSE------------------------------------------------------------
#  line <- data.frame(name = c('02-006', '02-031',
#                       '02-004', '104-009', 'PAR007',
#                       '104-001', '104-004', '104-007',
#                       '104-008', '02-008', '02-030', 'PAR001'),
#                     mutationDetailed =  c('AKT1-/-&AKT2-/-', 'MEK2-/-',
#                       'AKT1-/-', 'CTNNB1 mt-/wt+', 'PARENTAL007',
#                       'P53-/-', 'PTEN-/-', 'PI3KCA mt-/wt+', 'KRAS mt-/wt+',
#                       'BAX-/-', 'MEK1-/-', 'PARENTAL001'),
#                     mutation = c('AKT1/2', 'MEK2',
#                       'AKT1', 'CTNNB1 wt', 'HCT116 P2',
#                       'P53', 'PTEN', 'PI3KCA wt', 'KRAS wt',
#                       'BAX', 'MEK1', 'HCT116 P1'),
#                     startPlate=seq(1, (4*12), by = 4),
#                     stringsAsFactors=FALSE)
#  
#  anno=list(drug=drug, line=line, repl=1:2, ftr=ftrnames)
#  save(anno, file=file.path("result","data","anno.rda"), compress="bzip2")

## ----removeEmptyWells, eval=FALSE-------------------------------------------------------
#  emptyWell = anno$drug$Content == "empty"
#  
#  screenlog <- read.delim(system.file("extdata",
#                                      "conf",
#                                      "2013-01-10_LOPACScreenlog_plate.txt",
#                                      package = "PGPC"),
#                          stringsAsFactors=FALSE)
#  allPlates = screenlog$Well[screenlog$Plate=="*"]
#  flaggedWell = anno$drug$Well %in% allPlates
#  
#  singlePlate = screenlog[screenlog$Plate!="*",]
#  singlePlate$Plate = paste("P", singlePlate$Plate, sep="")
#  for (i in 1:nrow(singlePlate)){
#    flaggedWell[singlePlate$Plate[i] == anno$drug$Plate &
#                  singlePlate$Well[i] == anno$drug$Well] = TRUE
#  }
#  
#  D <- D[!(emptyWell | flaggedWell),,,]
#  
#  anno$drug <- anno$drug[!(emptyWell | flaggedWell),]
#  rownames(anno$drug) <- 1:nrow(anno$drug)

## ----removeFeature1,eval=FALSE----------------------------------------------------------
#  levels = apply(D, 4, function(x) length(unique(c(x))))
#  
#  D = D[,,,!(levels < 2000)]
#  anno$ftr = anno$ftr[-which(levels < 2000)]
#  
#  datamatrixWithAnnotation = list(D=D, anno=anno)
#  save(datamatrixWithAnnotation,
#       file=file.path("result","data","datamatrixWithAnnotation.rda"),
#       compress="xz")

## ----glog-------------------------------------------------------------------------------
glog <- function(x, c) {
  log2((x+sqrt(x^2+c^2))/2)
}

## ----transformation1, eval=FALSE--------------------------------------------------------
#  D = datamatrixWithAnnotation$D
#  quantile <- apply(D, 4, quantile, probs=0.05)
#  
#  zeroQt <- quantile == 0
#  quantile[zeroQt] <- 0.05*apply(D[,,,zeroQt], 4, max)
#  
#  for (i in seq_len(dim(D)[4])) {
#    D[,,,i] = glog(c(D[,,,i]), c=quantile[i])
#  }
#  
#  datamatrixTransformed = list(D=D, anno=datamatrixWithAnnotation$anno)
#  save(datamatrixTransformed,
#       file=file.path("result","data","datamatrixTransformed.rda"),
#       compress="xz")

## ----transformation2--------------------------------------------------------------------
if(exists("datamatrixWithAnnotation")) rm(datamatrixWithAnnotation)
if(!exists("datamatrixTransformed")){
  data("datamatrixTransformed", package="PGPC")
} else {
  PGPC:::checkConsistency("datamatrixTransformed")
}

## ----echo=FALSE, cache=FALSE, message=FALSE---------------------------------------------
knitr::set_parent(file.path('..', 'PGPC.Rnw'))
library(PGPC)
if(!file.exists(file.path("result","data")))
  dir.create(file.path("result","data"),recursive=TRUE)

## ----loadDatamatrixTransformed, echo=FALSE----------------------------------------------
if(!exists("datamatrixTransformed")){
  data("datamatrixTransformed", package="PGPC")
}

## ----qualityControlFeatures1, cache=TRUE, dependson = c(-1)-----------------------------
getCorrelation <- function(d){
  df <- do.call(rbind, 
             lapply(seq_along(d$anno$ftr), 
                    function(i) {
                      data.frame(name=d$anno$ftr[i], 
                                             cor=cor(c(d$D[,,1,i]), 
                                               c(d$D[,,2,i]),
                                               use="pairwise.complete.obs"),
                                             stringsAsFactors=FALSE)
                    }           
                    )
                )
  df
}
correlation = getCorrelation(datamatrixTransformed)
correlation = correlation[order(correlation$cor, decreasing=TRUE),]

## ----correlation1, fig.cap=c("Correlation of features. This figure is the basis for Figure 1B in the paper."), cache=TRUE, dependson = c(-1), dev.args=list(pointsize=16)----
selection = c("n", "nseg.dna.m.eccentricity.qt.0.01", "nseg.0.m.cx.mean")

plot(1:nrow(correlation),
     correlation$cor, 
     ylab="correlation coefficient", 
     xlab="feature",
     pch=20)
abline(h=0.7)
points(which(correlation$name %in% selection),
       correlation$cor[correlation$name %in% selection], 
       pch=19,
       col=c(2,3,4))
legend("bottomleft", legend=selection, col=c(2,3,4), pch=19, cex=0.6)

## ----singleFeatureCorrelation, fig.cap=c("Correlation of feature n, nseg.dna.m.eccentricity.qt.0.01 and nseg.0.m.cx.mean"), cache=TRUE, dependson = c(-1), fig.show='hold', resize.width="0.3\\textwidth"----
for (i in seq_along(selection)){
  plot(c(datamatrixTransformed$D[,,1,match(selection[i], 
                                           datamatrixTransformed$anno$ftr)]), 
       c(datamatrixTransformed$D[,,2,match(selection[i], 
                                           datamatrixTransformed$anno$ftr)]),
       xlab="replicate 1",
       ylab="replicate 2",
       main = selection[i],
       pch=20,
       col="#00000033",
       asp=1)
}

## ----correlation3, eval=TRUE, cache=TRUE, dependson = c(-1)-----------------------------
## remove columns with low correlation or few levels
remove = correlation$name[correlation$cor < 0.7]


D = datamatrixTransformed$D[,,, -match(remove, datamatrixTransformed$anno$ftr)]
anno = datamatrixTransformed$anno
anno$ftr = anno$ftr[-match(remove, anno$ftr)]

datamatrixFiltered = list(D=D, anno=anno)
save(datamatrixFiltered, 
     file=file.path("result","data","datamatrixFiltered.rda"), 
     compress="xz")
rm(datamatrixTransformed)

## ----stabilitySelection1, eval=FALSE, cache=TRUE----------------------------------------
#  D = apply(datamatrixFiltered$D,
#            2:4,
#            function(x) { (x - median(x,na.rm=TRUE)) / mad(x, na.rm=TRUE) } )
#  D[!is.finite(D)] = 0
#  dim(D) = c(prod(dim(D)[1:2]),2,dim(D)[4])
#  
#  forStabilitySelection = list(D=D,
#                               Sample = 1:prod(dim(D)[1:2]),
#                               phenotype = datamatrixFiltered$anno$ftr)
#  
#  preselect=c("n")
#  
#  selected <- PGPC:::selectByStability(forStabilitySelection,
#                                              preselect,
#                                              Rdim=40)
#  save(selected, file=file.path("result","data","selected.rda"), compress="xz")

## ----featureSelection1_, fig.cap=c("Correlation of selected feature after subtraction of the information containted in the previously selected features."), cache=TRUE, fig.width=6.3, fig.height=4.2, dev.args=list(pointsize=8), dependson = c(-2)----
if(!exists("selected")){
  data("selected", package="PGPC")
} else {
  PGPC:::checkConsistency("selected")
}

selectedFtrs = selected$selected
correlation = selected$correlation
ratioPositive = selected$ratioPositive

par(mfrow=c(2,1))
barplot(correlation, 
        names.arg=PGPC:::hrNames(selectedFtrs), 
        las=2,
        col = ifelse(ratioPositive > 0.5, 
                     brewer.pal(3,"Pastel1")[2], brewer.pal(3,"Pastel1")[1]),
        ylab = "information gain")


## ----featureSelection2_, fig.cap=c("Fraction of positive correlations. This figure is the basis for Figure 1C and Appendix Figure S2A in the paper."), cache=TRUE, fig.width=6.3, fig.height=4.2, dev.args=list(pointsize=8), dependson = c(-1)----
par(mfrow=c(2,1))
barplot(ratioPositive-0.5, 
        names.arg=PGPC:::hrNames(selectedFtrs), 
        las=2,
        col = ifelse(ratioPositive > 0.5, 
                     brewer.pal(3,"Pastel1")[2], brewer.pal(3,"Pastel1")[1]),
        offset=0.5,
        ylab = "fraction positive correlated")
#abline(a=0.5,0)

selectedFtrs=selectedFtrs[ratioPositive > 0.5]

D = datamatrixFiltered$D[,,,match(selectedFtrs, datamatrixFiltered$anno$ftr)]
anno = datamatrixFiltered$anno
anno$ftr = anno$ftr[match(selectedFtrs, anno$ftr)]

datamatrixSelected = list(D=D, anno=anno)
save(datamatrixSelected, 
     file=file.path("result","data","datamatrixSelected.rda"),
     compress="xz")

## ----calculateInteractions1_, eval=TRUE, cache=TRUE, results='hide', echo=TRUE, dependson = c(-1), dev.args=list(pointsize=16), fig.show='hold'----
neg = grepl("neg", datamatrixSelected$anno$drug$GeneID)
pos = grepl("Paclitaxel", datamatrixSelected$anno$drug$GeneID)

negMedians = apply(datamatrixSelected$D[neg,,,1], c(2,3), median)
posMedians = apply(datamatrixSelected$D[pos,,,1], c(2,3), median)

Nnorm = (datamatrixSelected$D[,,,1] - rep(posMedians, 
                                          each=dim(datamatrixSelected$D)[1])) / 
      (rep(negMedians, each=dim(datamatrixSelected$D)[1]) - 
         rep(posMedians, each=dim(datamatrixSelected$D)[1]))

datamatrixSelected$D[,,,1] <- Nnorm


plot(c(datamatrixSelected$D[!(neg | pos),,1,1]), 
     c(datamatrixSelected$D[!(neg | pos),,2,1]),
     xlab="replicate 1",
     ylab="replicate 2",
     main = "n",
     pch=20,
     col="grey",
     asp=1)
points(c(datamatrixSelected$D[pos,,1,1]), 
     c(datamatrixSelected$D[pos,,2,1]),
     col=2,
     pch=20)
points(c(datamatrixSelected$D[neg,,1,1]), 
     c(datamatrixSelected$D[neg,,2,1]),
     col=4,
     pch=20)

## ----calculateInteractions2_, cache=TRUE, results='hide', echo=TRUE, dependson = c(-1)----
interactions = getInteractions(datamatrixSelected, 
                               datamatrixSelected$anno$ftr, 
                               eps = 1e-4, 
                               maxiter = 100) 
save(interactions, 
     file=file.path("result","data", "interactions.rda"), compress="xz")

## check consistency
PGPC:::checkConsistency("interactions")

## ----echo=FALSE, cache=FALSE, message=FALSE---------------------------------------------
knitr::set_parent(file.path('..', 'PGPC.Rnw'))
library(PGPC)
if(!file.exists(file.path("result","data")))
  dir.create(file.path("result","data"),recursive=TRUE)

## ----plotPhenotypes1, cache=TRUE, fig.height=8, fig.width=8, dev.args=list(pointsize=8), fig.show='hold',fig.cap=c("Phenoprints of the two parental cell lines and the CTNNB1 WT background. This figure is the basis for Figure 1E-K, Figure 2A and Appendix Figure S2B in the paper.")----
if(!exists("interactions")) data(interactions, package="PGPC")
D = interactions$D
D2 = D
dim(D2) = c(prod(dim(D2)[1:2]),dim(D2)[3],dim(D2)[4])

SD = apply(D2, 3, function(m) apply(m, 2, mad, na.rm=TRUE))
MSD = apply(SD, 2, function(x) { median(x,na.rm=TRUE) } )

pAdjusted = interactions$pVal[,,,2]

bin <- function(x) (x-min(x)) / (max(x)-min(x))

## normalize by mean SD
D = apply(D, c(1, 2, 4), mean)
for (i in 1:dim(D)[3]) {
  D[,,i] = bin(D[,,i] / MSD[i])
}

dimnames(D) = list(template = paste(interactions$anno$drug$GeneID),
                   query = interactions$anno$line$mutation,
                   phenotype = PGPC:::hrNames(interactions$anno$ftr))
dimnames(pAdjusted) = dimnames(D)


drugPheno <- data.frame(name = c("DMSO", "PD98", "Vinblastin", 
                                 "Vincristine", "Ouabain", "Rottlerin", 
                                 "Etoposide", "Amsacrine", "Colchicine", 
                                 "BIX"),
                        GeneID = c("neg ctr DMSO", "79902", "80101",
                                   "80082", "79817", "79926",
                                   "79294", "79028", "79184",
                                   "80002"),
                        stringsAsFactors=FALSE)
drugPheno$annotationName = 
  interactions$anno$drug$Name[match(drugPheno$GeneID, 
                                    interactions$anno$drug$GeneID)]

drugPositions <- match(drugPheno$GeneID, dimnames(D)$template)
# define order of ftrs in plot:
#feature clustering
#4 nseg.0.m.majoraxis.mean       --> nuclear shape
#12 nseg.dna.m.eccentricity.sd   --> nuclear shape

#7 nseg.dna.h.var.s2.mean        --> nuclear texture
#13 nseg.dna.h.idm.s1.sd         --> nuclear texture
#17 nseg.dna.h.cor.s2.sd         --> nuclear texture

#1 n                             --> cell number

#6 cseg.act.h.f12.s2.sd          --> cellular texture
#15 cseg.act.h.asm.s2.mean       --> cellular texture
#18 cseg.dnaact.b.mad.mean       --> cellular texture
#16 cseg.dnaact.h.den.s2.sd      --> cellular texture
#10 cseg.dnaact.b.mean.qt.0.05   --> cellular texture
#3 cseg.act.h.cor.s1.mean        --> cellular texture
#19 cseg.act.h.idm.s2.sd         --> cellular texture
#14 cseg.dnaact.h.f13.s1.mean    --> cellular texture

#9 cseg.0.s.radius.min.qt.0.05   --> cellular shape
#5 cseg.dnaact.m.eccentricity.sd --> cellular shape
#11 cseg.act.m.eccentricity.mean --> cellular shape
#2 cseg.act.m.majoraxis.mean     --> cellular shape

#20 nseg.0.s.radius.max.qt.0.05  --> nuclear shape
#8 nseg.0.m.eccentricity.mean    --> nuclear shape

orderFtr <- c(4,12,7,13,17,1,6,15,18,16,10,3,14,19,9,5,11,2,20,8)
## select cell lines
par001 <- match("HCT116 P1", dimnames(D)$query)
Dselect <- D[drugPositions,par001,]
Dselect <- Dselect[, orderFtr]

stars(Dselect, 
      len = 0.8, 
      key.loc = c(8, 2),
      labels=drugPheno$name,
      col.stars=rainbow(dim(Dselect)[1]),
      main = "Phenotypes of HCT116 P1", 
      draw.segments = FALSE,
      scale=FALSE)

par007 <- match("HCT116 P2", dimnames(D)$query)

Dselect <- D[drugPositions,par007,]
Dselect <- Dselect[, orderFtr]

stars(Dselect, 
      len = 0.8, 
      key.loc = c(8, 2),
      labels=drugPheno$name,
      col.stars=rainbow(dim(Dselect)[1]),
      main = "Phenotypes of HCT116 P2", 
      draw.segments = FALSE,
      scale=FALSE)

mtCtnb1KO <- match("CTNNB1 wt", dimnames(D)$query)
Dselect <- D[drugPositions,mtCtnb1KO,]
Dselect <- Dselect[, orderFtr]

stars(Dselect, 
      len = 0.8, 
      key.loc = c(8, 2),
      labels=drugPheno$name,
      col.stars= rainbow(dim(Dselect)[1]),
      main = "Phenotypes of CTNNB1 wt", 
      draw.segments = FALSE,
      scale=FALSE)


## ----plotPhenotypes2, cache=TRUE, fig.height=8, fig.width=8, dev.args=list(pointsize=8), fig.show='hold', fig.cap=c("Phenoprints of the all cell lines for a DMSO control. This figure is the basis for Expanded View Figure EV2 in the paper.")----
## neg controls for all cell lines
drugPositions <- match("neg ctr DMSO", dimnames(D)$template)

Dselect <- D[drugPositions,,orderFtr]
stars(Dselect, 
      len = 0.8, 
      key.loc = NULL,
      labels=dimnames(D)$query,
      col.stars= rainbow(dim(Dselect)[1]),
      main = "Phenotypes of DMSO control for all cell lines", 
      draw.segments = FALSE,
      scale=FALSE)

## ----echo=FALSE, cache=FALSE, message=FALSE---------------------------------------------
knitr::set_parent(file.path('..', 'PGPC.Rnw'))
library(PGPC)
if(!file.exists(file.path("result")))
  dir.create(file.path("result"),recursive=TRUE)

## ----figureLabels, cache=TRUE, echo=FALSE-----------------------------------------------
pi1_fig.cap = c("Interaction profiles")
pi1_fig.subcap = c("Interaction profile of Bendamustine. This figure is the basis for Figure 4A in the paper.",
                   "Interaction profile of Disulfiram. This figure is the basis for Figure 4C in the paper.", 
                   "Interaction profile of Colchicine in the parental cell line and the CTNNB1 WT background. This figure is the basis for Figure 2B in the paper.", 
                   "Interaction profile of BIX01294 in the parental cell line and the CTNNB1 WT background. This figure is the basis for Figure 2B in the paper.")

pi2_fig.cap=c("Interaction profiles")
pi2_fig.subcap=c("Interaction profile of Bendamustine. This figure is the basis for Appendix Figure S7A in the paper.",
                 "Interaction profile of Disulfiram. This figure is the basis for Appendix Figure S7B in the paper.",
                 "Interaction profile of Colchicine in the parental cell line and the CTNNB1 WT background. This figure is the basis for Appendix Figure S3 in the paper.",
                 "Interaction profile of BIX01294 in the parental cell line and the CTNNB1 WT background. This figure is the basis for Appendix Figure S3 in the paper.")

ow = '.45\\textwidth'

## ----plotInteractions1_, cache=TRUE, dependson=c(-1), fig.show='hold', fig.cap=pi1_fig.cap, fig.subcap=pi1_fig.subcap, out.width=ow----
#### plot interactions for 1 drug as radar plot of all celllines
#### including ftrs as segments...
if(!exists("interactions")) data(interactions, package="PGPC")
int = interactions$res
dim(int) = c(prod(dim(int)[1:2]),dim(int)[3],dim(int)[4])
dim(int)
SD = apply(int, 3, function(m) apply(m, 2, mad, na.rm=TRUE))
MSD = apply(SD, 2, function(x) { median(x,na.rm=TRUE) } )

pAdjusted = interactions$pVal[,,,2]

bin <- function(x) (x-min(x)) / (max(x)-min(x))

int = apply(interactions$res, c(1, 2, 4), mean)
for (i in 1:dim(int)[3]) {
  int[,,i] = int[,,i] / MSD[i]
  int[,,i] = bin(int[,,i])
}

dimnames(int) = list(template = paste(interactions$anno$drug$GeneID),
                     query = interactions$anno$line$mutation,
                     phenotype = interactions$anno$ftr)
dimnames(pAdjusted) = dimnames(int)

drugPheno <- data.frame(name = c("Bendamustine", "Disulfiram",
                                 "Colchicine", "BIX01294"),
                        GeneID = c("79497", "80038", "79184", "80002"),
                        stringsAsFactors=FALSE)
drugPheno$annotationName = 
  interactions$anno$drug$Name[match(drugPheno$GeneID, 
                                    interactions$anno$drug$GeneID)]

##order of featurs for plot
ftrLevels=c("n",
            "nseg.dna.h.cor.s2.sd",
            "nseg.dna.h.idm.s1.sd",
            "nseg.dna.h.var.s2.mean",
            "nseg.dna.m.eccentricity.sd",
            "nseg.0.m.majoraxis.mean",
            "nseg.0.m.eccentricity.mean",
            "nseg.0.s.radius.max.qt.0.05",
            "cseg.act.m.majoraxis.mean" ,
            "cseg.act.m.eccentricity.mean",
            "cseg.dnaact.m.eccentricity.sd",
            "cseg.0.s.radius.min.qt.0.05",
            "cseg.act.h.idm.s2.sd",
            "cseg.dnaact.h.f13.s1.mean",
            "cseg.act.h.cor.s1.mean",
            "cseg.dnaact.b.mean.qt.0.05",
            "cseg.dnaact.h.den.s2.sd",
            "cseg.dnaact.b.mad.mean",
            "cseg.act.h.asm.s2.mean",
            "cseg.act.h.f12.s2.sd" )

## define background colors for phenogroups
backgroundColors = c("black", 
                     rep("grey60", 3),
                     rep("grey40", 4),
                     rep("grey20", 4),
                     rep("grey80", 8))

## order of mutations for plot
mutationOrder = c("HCT116 P1", "HCT116 P2", "PI3KCA wt", 
                  "AKT1", "AKT1/2", "PTEN", "KRAS wt", 
                  "MEK1", "MEK2", "CTNNB1 wt", "P53", "BAX") 



### plot radar for each drug showing all cell lines
for(i in seq_len(nrow(drugPheno))){

  drugPosition <- match(drugPheno$GeneID[i], dimnames(int)$template)
  
  Dselect <- int[drugPosition,,]
  pAdjustedSelect <- pAdjusted[drugPosition,,]
  
  featureDf <- data.frame(Dselect)  
  featureDf$line <- rownames(featureDf)
  featureDf = melt(featureDf, id.vars="line", variable.name="feature")
  
  pAdjustedDf <- data.frame(pAdjustedSelect)
  pAdjustedDf$line <- rownames(pAdjustedDf)
  pAdjustedDf <- melt(pAdjustedDf,
                      id.vars="line",
                      variable.name="feature",
                      value.name="pAdjusted")
  
  
  featureDf <- merge(featureDf, pAdjustedDf, sort=FALSE)
  
  pAdjustedThresh = 0.01
  
  ## order features and cell lines
  featureDf$feature = factor(featureDf$feature, 
                             levels=ftrLevels, 
                             ordered=TRUE)  
  featureDf$line <- factor(featureDf$line, levels=mutationOrder)
  
  theme_new = theme_bw()
  #theme_new$text$size = 3
  #theme_new$axis.text$size = rel(0.2)
  theme_new$axis.text.x = element_blank()
  
  maxInt = max(featureDf$value)
  
  colors = colorRampPalette(c("black", "grey90"))(length(unique(featureDf$line)))
  if(i==1) colors[grep("AKT1/2", featureDf$line)[1]] = "red"
  if(i==2) colors[grep("MEK1", featureDf$line)[1]] = "red"
  
  if(i > 2) {
    colors = c("red", "black")
    featureDf <- subset(featureDf, line %in% c("HCT116 P1", "CTNNB1 wt"))
    featureDf$line = gsub("HCT116 P1", "CTNNB1 mut", featureDf$line)
  }
  
  ## remove grid
  theme_new$panel.grid.major$colour="white"
  
  if(i < 3){
    barColor = "lightblue"    
  }  
  if(i==3){
    barColor = "darkslateblue" 
  }
  if(i==4){
    barColor =  "mediumvioletred"
  }
  
  starplot <- ggplot(featureDf) + 
    facet_wrap(~line, ncol=3) +
    geom_bar(aes(feature, maxInt*1.2),
             fill=rep(backgroundColors,nrow(featureDf)/length(backgroundColors)),
             stat="identity",
             width = 1)  + 
    geom_bar(aes(feature, maxInt),
             fill="white",
             stat="identity",
             width = 1)  + 
    geom_bar(aes(feature, value),
             fill=barColor,
             stat="identity") + 
    coord_polar(start=-pi/nlevels(featureDf$feature)) +
    ylim(c(0,maxInt*1.2)) +
    geom_bar(aes(feature, value),
             data = featureDf[featureDf$pAdjusted < pAdjustedThresh,],
             fill="red",
             stat="identity")  +
    geom_point(aes(feature, maxInt*1.1), 
               data = featureDf[featureDf$pAdjusted < pAdjustedThresh,],
               pch=8,
               col=2)  +
    theme_new + labs(title = paste0("Interactions of ", drugPheno$name[i]))  
  print(starplot)

}

## ----plotInteractionsAllScaled_, cache=TRUE, fig.show='hold', eval=FALSE, dependson=c(-1)----
#  #### plot interactions for 1 drug as radar plot of all celllines
#  #### including ftrs as segments...
#  
#  featureDf <- do.call(rbind,
#                       lapply(seq_len(dim(int)[1]),
#                              function(i){
#                                tmp=data.frame(int[i,,])
#                                tmp$GeneID = dimnames(int)[[1]][i]
#                                tmp$line <- rownames(tmp)
#                                tmp
#                              }))
#  featureDf = melt(featureDf, id.vars=c("GeneID", "line"),
#                   variable.name="feature")
#  
#  pAdjustedDf <- do.call(rbind,
#                       lapply(seq_len(dim(pAdjusted)[1]),
#                              function(i){
#                                tmp=data.frame(pAdjusted[i,,])
#                                tmp$GeneID = dimnames(pAdjusted)[[1]][i]
#                                tmp$line <- rownames(tmp)
#                                tmp
#                              }))
#  pAdjustedDf <- melt(pAdjustedDf,
#                      id.vars=c("GeneID", "line"),
#                      variable.name="feature",
#                      value.name="pAdjusted")
#  
#  featureDf <- merge(featureDf, pAdjustedDf, sort=FALSE)
#  
#  ## remove controls
#  featureDf <- subset(featureDf, !grepl("ctr", GeneID))
#  featureDf$Name <- with(interactions$anno$drug,
#                         Name[match(featureDf$GeneID, GeneID)])
#  
#  
#  pAdjustedThresh = 0.01
#  ## just keep drugs with interactions
#  featureDf <- subset(featureDf,
#                      GeneID %in% unique(GeneID[pAdjusted < pAdjustedThresh]))
#  
#  ## order features and cell lines
#  featureDf$feature = factor(featureDf$feature,
#                             levels=ftrLevels,
#                             ordered=TRUE)
#  featureDf$line <- factor(featureDf$line, levels=mutationOrder)
#  
#  maxInt = max(featureDf$value)
#  
#  colors = colorRampPalette(c("black", "grey90"))(length(unique(featureDf$line)))
#  
#  
#  theme_new = theme_bw(base_size=5) +
#    theme(panel.grid.major = element_blank(),
#          panel.grid.minor = element_blank(),
#          axis.text.y = element_text(size=3))
#  #theme_new$text$size = 3
#  #theme_new$axis.text$size = rel(0.2)
#  theme_new$axis.text.x = element_blank()
#  
#  
#  barColor = "lightblue"
#  
#  allplots <- lapply(unique(featureDf$GeneID[order(featureDf$Name)]),
#                     function(id){
#    subset=subset(featureDf, GeneID %in% id)
#    starplot <- ggplot(subset) +
#      facet_grid(GeneID ~ line) +
#      geom_bar(aes(feature, maxInt*1.2),
#               fill=rep(backgroundColors,nrow(featureDf)/length(backgroundColors)),
#               stat="identity",
#               width = 1)  +
#      geom_bar(aes(feature, maxInt),
#               fill="white",
#               stat="identity",
#               width = 1)  +
#      geom_bar(aes(feature, value),
#               fill=barColor,
#               stat="identity") +
#      coord_polar(start=-pi/nlevels(featureDf$feature)) +
#      ylim(c(0,maxInt*1.2)) +
#      geom_bar(aes(feature, value),
#               data = subset[subset$pAdjusted < pAdjustedThresh,],
#               fill="red",
#               stat="identity")  +
#      geom_point(aes(feature, maxInt*1.1),
#                 data = subset[subset$pAdjusted < pAdjustedThresh,],
#                 pch=8,
#                 col=2,
#                 size=0.3) +
#      labs(y="Interaction score", x="", title=unique(subset$Name))   +
#      theme_new
#    invisible(starplot)
#  })
#  
#  library(gridExtra)
#  ggsave(file.path("result", "all_plots.pdf"),
#         do.call(marrangeGrob,
#                 c(allplots, list(nrow=8, ncol=1, top=NULL))),
#         width=8.27, height=11.7)

## ----plotInteractions2_, cache=TRUE, fig.show='hold', dependson=c(-1), fig.cap=pi2_fig.cap, fig.subcap=pi2_fig.subcap, out.width=ow----
#### plot interactions for 1 drug as radar plot of all celllines
#### including ftrs as segments...

int = apply(interactions$res, c(1, 2, 4), mean)
for (i in 1:dim(int)[3]) {
  int[,,i] = int[,,i] / MSD[i]
}

## use abs value and replace values larger than 10 by 10
direction <- sign(int)

int = abs(int)

dimnames(int) = list(template = paste(interactions$anno$drug$GeneID),
                     query = interactions$anno$line$mutation,
                     phenotype = interactions$anno$ftr)
dimnames(direction) = dimnames(int)


### plot radar for each drug showing all cell lines
for(i in seq_len(nrow(drugPheno))){

  drugPosition <- match(drugPheno$GeneID[i], dimnames(int)$template)
  
  Dselect <- int[drugPosition,,]
  pAdjustedSelect <- pAdjusted[drugPosition,,]
  directionSelect <- direction[drugPosition,,]
  
  featureDf <- data.frame(Dselect)  
  featureDf$line <- rownames(featureDf)
  featureDf = melt(featureDf, id.vars="line", variable.name="feature")
  
  pAdjustedDf <- data.frame(pAdjustedSelect)
  pAdjustedDf$line <- rownames(pAdjustedDf)
  pAdjustedDf <- melt(pAdjustedDf,
                      id.vars="line",
                      variable.name="feature",
                      value.name="pAdjusted")
  
  directionDf <- data.frame(directionSelect)
  directionDf$line <- rownames(directionDf)
  directionDf <- melt(directionDf,
                      id.vars="line",
                      variable.name="feature",
                      value.name="direction")

  directionDf$direction =
      c("negative", "positive")[ifelse(directionDf$direction < 0, 1, 2)]
  
  featureDf <- merge(featureDf, pAdjustedDf, sort=FALSE)
  featureDf <- merge(featureDf, directionDf, sort=FALSE)
  
  pAdjustedThresh = 0.01
  
  ## order features and cell lines
  featureDf$feature = factor(featureDf$feature, 
                             levels=ftrLevels, 
                             ordered=TRUE)
  featureDf$line <- factor(featureDf$line, levels=mutationOrder)
  
  theme_new = theme_bw()
  #theme_new$text$size = 3
  #theme_new$axis.text$size = rel(0.2)
  theme_new$axis.text.x = element_blank()
  
  maxInt = max(featureDf$value)
  
  colors = colorRampPalette(c("black", "grey90"))(length(unique(featureDf$line)))
  if(i==1) colors[grep("AKT1/2", featureDf$line)[1]] = "red"
  if(i==2) colors[grep("MEK1", featureDf$line)[1]] = "red"
  
  if(i > 2) {
    colors = c("red", "black")
    featureDf <- subset(featureDf, line %in% c("HCT116 P1", "CTNNB1 wt"))
    featureDf$line = gsub("HCT116 P1", "CTNNB1 mut", featureDf$line)
  }
  
  ## remove grid
  theme_new$panel.grid.major$colour="white"
  
  starplot <- ggplot(featureDf, aes(fill=direction)) + 
    facet_wrap(~line, ncol=3) +
    geom_bar(aes(feature, maxInt*1.2),
             fill=rep(backgroundColors,nrow(featureDf)/length(backgroundColors)),
             stat="identity",
             width = 1)  + 
    geom_bar(aes(feature, maxInt),
             fill="white",
             stat="identity",
             width = 1)  + 
    geom_bar(aes(feature, value),
             stat="identity") + 
    coord_polar(start=-pi/nlevels(featureDf$feature)) +
    ylim(c(0,maxInt*1.2)) +
    geom_point(aes(feature, maxInt*1.1), 
               data = featureDf[featureDf$pAdjusted < pAdjustedThresh,],
               pch=8,
               col=2) +
    theme_new + labs(title = paste0("Interactions of ", drugPheno$name[i]))  
  print(starplot)  

}

## ----plotInteractionsAll_, cache=TRUE, fig.show='hold', eval=FALSE, dependson=c(-1)-----
#  #### plot interactions for 1 drug as radar plot of all celllines
#  #### including ftrs as segments...
#  
#  featureDf <- do.call(rbind,
#                       lapply(seq_len(dim(int)[1]),
#                              function(i){
#                                tmp=data.frame(int[i,,])
#                                tmp$GeneID = dimnames(int)[[1]][i]
#                                tmp$line <- rownames(tmp)
#                                tmp
#                              }))
#  featureDf = melt(featureDf, id.vars=c("GeneID", "line"),
#                   variable.name="feature")
#  
#  pAdjustedDf <- do.call(rbind,
#                       lapply(seq_len(dim(pAdjusted)[1]),
#                              function(i){
#                                tmp=data.frame(pAdjusted[i,,])
#                                tmp$GeneID = dimnames(pAdjusted)[[1]][i]
#                                tmp$line <- rownames(tmp)
#                                tmp
#                              }))
#  pAdjustedDf <- melt(pAdjustedDf,
#                      id.vars=c("GeneID", "line"),
#                      variable.name="feature",
#                      value.name="pAdjusted")
#  
#  directionDf <- do.call(rbind,
#                       lapply(seq_len(dim(pAdjusted)[1]),
#                              function(i){
#                                tmp=data.frame(direction[i,,])
#                                tmp$GeneID = dimnames(direction)[[1]][i]
#                                tmp$line <- rownames(tmp)
#                                tmp
#                              }))
#  directionDf <- melt(directionDf, id.vars=c("GeneID", "line"), variable.name="feature", value.name="direction")
#  directionDf$direction = c("negative", "positive")[ifelse(directionDf$direction < 0, 1, 2)]
#  
#  featureDf <- merge(featureDf, pAdjustedDf, sort=FALSE)
#  featureDf <- merge(featureDf, directionDf, sort=FALSE)
#  
#  ## remove controls
#  featureDf <- subset(featureDf, !grepl("ctr", GeneID))
#  featureDf$Name <- with(interactions$anno$drug,
#                         Name[match(featureDf$GeneID, GeneID)])
#  
#  
#  pAdjustedThresh = 0.01
#  ## just keep drugs with interactions
#  featureDf <- subset(featureDf,
#                      GeneID %in% unique(GeneID[pAdjusted < pAdjustedThresh]))
#  
#  ## order features and cell lines
#  featureDf$feature = factor(featureDf$feature,
#                             levels=ftrLevels,
#                             ordered=TRUE)
#  featureDf$line <- factor(featureDf$line, levels=mutationOrder)
#  
#  maxValue=10
#  
#  colors = colorRampPalette(c("black", "grey90"))(length(unique(featureDf$line)))
#  
#  
#  theme_new = theme_bw(base_size=5) +
#    theme(panel.grid.major = element_blank(),
#          panel.grid.minor = element_blank(),
#          legend.key.size = unit(.3, "cm"),
#          axis.text.y = element_text(size=3))
#  #theme_new$text$size = 3
#  #theme_new$axis.text$size = rel(0.2)
#  theme_new$axis.text.x = element_blank()
#  
#  
#  barColor = "lightblue"
#  
#  allplots <- lapply(unique(featureDf$GeneID[order(featureDf$Name)]),
#                     function(id){
#    subset=subset(featureDf, GeneID %in% id)
#    subset$maxInt = ifelse(max(subset$value) < maxValue,
#                           maxValue,
#                           max(subset$value))
#  
#    starplot <- ggplot(subset, aes(fill=direction)) +
#      facet_grid(GeneID ~ line) +
#      geom_bar(aes(feature, maxInt*1.2),
#               fill=rep(backgroundColors,nrow(featureDf)/length(backgroundColors)),
#               stat="identity",
#               width = 1)  +
#      geom_bar(aes(feature, maxInt),
#               fill="white",
#               stat="identity",
#               width = 1)  +
#      geom_bar(aes(feature, value),
#               stat="identity") +
#      coord_polar(start=-pi/nlevels(featureDf$feature)) +
#      #ylim(c(0,maxInt*1.2)) +
#      scale_y_continuous() +
#      geom_point(aes(feature, maxInt*1.1),
#                 data = subset[subset$pAdjusted < pAdjustedThresh,],
#                 pch=8,
#                 col=2,
#                 size=0.3,
#                 show_guide=FALSE) +
#      labs(y="Interaction score", x="", title=unique(subset$Name)) +
#      theme_new
#    invisible(starplot)
#  })
#  
#  library(gridExtra)
#  ggsave(file.path("result", "all_plots_unscaled.pdf"),
#         do.call(marrangeGrob,
#                 c(allplots, list(nrow=8, ncol=1, top=NULL))),
#         width=8.27, height=11.7)
#  

## ----echo=FALSE, cache=FALSE, message=FALSE---------------------------------------------
knitr::set_parent(file.path('..', 'PGPC.Rnw'))
library(gplots)
if(!file.exists(file.path("result")))
  dir.create(file.path("result"),recursive=TRUE)

## ----figureLabelsCellLineClustering, cache=TRUE, echo=FALSE-----------------------------
clRaw_fig.cap = paste("Clustering of cell lines based on the raw values of the",
                      "selected features. This figure is the basis for",
                      "Appendix Figure S5B in the paper.")

clInt_fig.cap = paste("Clustering of cell lines based on the interaction",
                      "profiles of the selected features. This figure is the",
                      "basis for Appendix Figure S5C in the paper.")

## ----clusterRawFtrCelllinesSelectedControls, cache=TRUE, fig.width=8, fig.height=8, fig.show='hold', fig.cap=clRaw_fig.cap----
if(!exists("interactions"))
  data("interactions", package="PGPC")

drugAnno = interactions$anno$drug

filterFDR = function(d, pAdjusted, pAdjustedThresh = 0.1){
  select = pAdjusted <= pAdjustedThresh
  select[is.na(select)] = FALSE
  selectedRows = apply(select, 1, any)
  d[selectedRows,,]
}

D = interactions$D
D2 = D
dim(D2) = c(prod(dim(D2)[1:2]),dim(D2)[3],dim(D2)[4])

SD = apply(D2, 3, function(m) apply(m, 2, mad, na.rm=TRUE))
MSD = apply(SD, 2, function(x) { median(x,na.rm=TRUE) } )

D = apply(D, c(1, 2, 4), mean)
for (i in 1:dim(D)[3]) {
  D[,,i] = (D[,,i] - median(D[,,i])) / MSD[i]
}

dimnames(D) = list(template = paste(interactions$anno$drug$GeneID),
                   query = interactions$anno$line$mutation,
                   phenotype = interactions$anno$ftr)


## filter FDR
pAdjustedThresh = 0.01
pAdjusted = interactions$pVal[,,,2]
Dfilter = filterFDR(D, pAdjusted, pAdjustedThresh)

## combine controls
Dfilter = apply(Dfilter, c(2,3), 
                function(x) tapply(x, dimnames(Dfilter)$template, mean))
ctrlToKeep = c("ctrl Paclitaxel", "ctrl U0126", "ctrl Vinblastin")
Dfilter = Dfilter[!grepl("ctr", dimnames(Dfilter)[[1]]) | 
                              dimnames(Dfilter)[[1]] %in% ctrlToKeep,,]

celllineCorrelation = PGPC:::getCorr(aperm(Dfilter, c(2, 1, 3)),
                                            drugAnno)
celllineDist = PGPC:::trsf(celllineCorrelation)

hc <- hclust(as.dist(celllineDist))

heatmap.2(celllineDist, 
          Colv = as.dendrogram(hc),
          Rowv = as.dendrogram(hc),
          trace="none",
          col=colorRampPalette(c("darkblue", "white"))(64),
          breaks = c(seq(0,0.5999,length.out=64),0.6),
          margin=c(9,9))

tmp = par(mar=c(5, 4, 4, 10) + 0.1)
plot(as.dendrogram(hc), horiz=TRUE)
par(tmp)

save(celllineDist, file=file.path("result", "celllineDist.rda"))

## ----clusterCelllinesSelectedControls, cache=TRUE, fig.width=8, fig.height=8, fig.show='hold', dependson = c(-1), fig.cap=clInt_fig.cap----
PI = interactions$res
PI2 = PI  ##aperm(PI, c(1,3,2,4,5))
dim(PI2) = c(prod(dim(PI2)[1:2]),dim(PI2)[3],dim(PI2)[4])

SD = apply(PI2, 3, function(m) apply(m, 2, mad, na.rm=TRUE))
MSD = apply(SD, 2, function(x) { median(x,na.rm=TRUE) } )

## normalize by mean SD
PI = apply(interactions$res, c(1, 2, 4), mean)
for (i in 1:dim(PI)[3]) {
  PI[,,i] = PI[,,i] / MSD[i]
}

dimnames(PI) = list(template = interactions$anno$drug$GeneID,
                   query = interactions$anno$line$mutation,
                   phenotype = interactions$anno$ftr)

PIfilter = filterFDR(PI, pAdjusted, pAdjustedThresh)

## combine controls
PIfilter = apply(PIfilter, c(2,3), 
                 function(x) tapply(x, dimnames(PIfilter)$template, mean))
PIfilter = PIfilter[!grepl("ctr", dimnames(PIfilter)[[1]]) | 
                              dimnames(PIfilter)[[1]] %in% ctrlToKeep,,]


celllineCorrelation = PGPC:::getCorr(aperm(PIfilter, c(2, 1, 3)),
                                            drugAnno)
celllineDist = PGPC:::trsf(celllineCorrelation)

hccelllineDist <- as.dendrogram(hclust(as.dist(celllineDist)))


## reorder HCT116 P1 to top
lines = rownames(celllineDist)
wts = rep(0, length(lines))
wts[match("HCT116 P1", lines)] = 10
hccelllineDist= reorder(hccelllineDist, wts)


heatmap.2(celllineDist, 
          Colv = hccelllineDist,
          Rowv = hccelllineDist,
          trace="none",
          col=colorRampPalette(c("darkblue", "white"))(64),
          breaks = c(seq(0,0.5999,length.out=64),0.6),
          margin=c(9,9))

tmp = par(mar=c(5, 4, 4, 10) + 0.1)
plot(hccelllineDist, horiz=TRUE)
par(tmp)

## ----echo=FALSE, cache=FALSE, message=FALSE---------------------------------------------
knitr::set_parent(file.path('..', 'PGPC.Rnw'))
library(PGPC)
if(!file.exists(file.path("result")))
  dir.create(file.path("result"),recursive=TRUE)

## ----figureLabelsInteractionNetwork, cache=TRUE, echo=FALSE-----------------------------
totalInt_fig.cap = c("Total number of interactions per cell line. This figure is the basis for Figure 2E in the paper.")

drugInt_fig.cap = c("Number of drugs with at least one specific interaction per cell line.")

pleDeg_fig.cap = c("Pleiotropic degree per cell line. This figure is the basis for Figure 2D in the paper.")

intFtr_fig.cap = c("Distributions and overlap of interactions per feature")
intFtr_fig.subcap = c("Total interactions per feature.",
                 "Overlap of interactions in the 5 phenotypic classes. This figure is the basis for Figure 2C in the paper.",
                 "Overlap of interactions in the 5 phenotypic classes shown as bar plots.")

ow = '.45\\textwidth'

## ----drugCellLineInteractionData_, eval=TRUE, cache=TRUE--------------------------------
library(PGPC)
data(interactions, package="PGPC")
drugAnno = interactions$anno$drug

d = interactions

pAdjusted = interactions$pVal[,,,2]
dimnames(pAdjusted) = list(template = paste(interactions$anno$drug$GeneID),
                     query = interactions$anno$line$mutation,
                     phenotype = interactions$anno$ftr)
pAdjustedThresh = 0.01

result = NULL
for (ftr in seq_along(interactions$anno$ftr)){
  pAdjusted = d$pVal[,,ftr,2]

  top = pAdjusted <= pAdjustedThresh
  
  r1 = d$res[,,1,ftr][top]
  r2 = d$res[,,2,ftr][top]

  rMean = apply(d$res[,,,ftr], c(1,2), mean)[top]
  
  unames <- data.frame(well = rep(d$anno$drug$Well, 
                                  length(d$anno$line$startPlate)), 
                       plate= rep(as.numeric(gsub("P", 
                                                  "", 
                                                  d$anno$drug$PlateName)),
                                  length(d$anno$line$startPlate)),
                       startPlate = rep(d$anno$line$startPlate, 
                                        each=nrow(d$anno$drug)))
  unames$uname = paste(sprintf("%03d", unames$plate + unames$startPlate - 1),
                       unames$well,
                       sep="-")

  unames = as.matrix(unames$uname)
  dim(unames) = dim(top) 

  uname = unames[which(top)]
  
  ## add annotation  
  selTreatment = which(top) %% dim(top)[1]
  selTreatment[selTreatment == 0] = dim(top)[1]
  drug = d$anno$drug[selTreatment,]
  line = d$anno$line[which(top) %/% dim(top)[1]+1,]

  topHits = data.frame(ftr = d$anno$ftr[ftr],
                       uname = gsub("-", "-01-", uname),
                       GeneID = drug$GeneID,
                       r1,
                       r2,
                       rMean,
                       pAdjusted = pAdjusted[which(top)],
                       stringsAsFactors=FALSE)
  
  topHits = cbind(topHits, line)
  topHits = cbind(topHits,
                  drugAnno[match(topHits$GeneID, drugAnno$GeneID),
                           -match("GeneID",names(drugAnno))])
  topHits = topHits[order(topHits$pAdjusted),]
  rownames(topHits) = 1:nrow(topHits)
  result=rbind(result, topHits)
}  

## add controls to names
result$Name[grep("ctr", result$GeneID)] <- 
  result$GeneID[grep("ctr", result$GeneID)]

cat("No. of all interactions (including controls):", nrow(result))

## ----drugCellLineInteractionData3_, eval=TRUE, cache=TRUE, dependson = c(-1), fig.cap=totalInt_fig.cap----
#########################
## filter interactions
#########################
## 1. remove all controls
result <- subset(result, !grepl("ctr", GeneID))

cat("No. of interactions (controls removed):", nrow(result))
cat("No. of drug-cell line interactions (ftrs combined):", 
    nrow(unique(result[,c("GeneID", "mutation")])))

## genereate summary plots
## number of interactions per line vs. no of drugs / no of interactions per drugs
mutationOrder = c("HCT116 P1", "HCT116 P2", "PI3KCA wt", 
                  "AKT1", "AKT1/2", "PTEN", "KRAS wt", 
                  "MEK1", "MEK2", "CTNNB1 wt", "P53", "BAX")

## number of total interactions per cell line
noIntPerLine = table(result$mutation)
noIntPerLine = noIntPerLine[mutationOrder]

tmp = par(mar=c(6, 4, 4, 2) + 0.1)
mp <- barplot(noIntPerLine,
              ylab="total interactions per cell line",
              names.arg="")
text(mp[,1], 
     par("usr")[3], 
     labels = names(noIntPerLine), 
     srt = 45, 
     adj = c(1.1,1.1), 
     xpd = TRUE, 
     cex=.9)



## ----drugCellLineInteractionData4_, eval=TRUE, cache=TRUE, dependson = c(-1), fig.cap=drugInt_fig.cap----

## number of interaction drugs per line
noDrugsPerLine = sapply(names(noIntPerLine), 
                        function(line){
                          length(unique(result$GeneID[result$mutation==line]))
                        })
noDrugsPerLine
mp <- barplot(noDrugsPerLine,
              ylab="interacting drugs per cell line",
              names.arg="")
text(mp[,1], 
     par("usr")[3], 
     labels = names(noDrugsPerLine), 
     srt = 45, 
     adj = c(1.1,1.1), 
     xpd = TRUE, 
     cex=.9)

par(tmp)

## percentage of all possible interactions (removing controls)
nrow(result) / 
  ((dim(interactions$res)[1] - 
        sum(grepl("ctr", interactions$anno$drug$GeneID))) * 
     prod(dim(interactions$res)[c(2,4)]))

## percentage of drugs showing an interactions (removing controls)
length(unique(result$GeneID)) / 
    (dim(interactions$res)[1] - 
         sum(grepl("ctr", interactions$anno$drug$GeneID)))

## ----drugCellLineInteractionData5_, eval=TRUE, cache=TRUE, dependson = c(-1), fig.cap=pleDeg_fig.cap----
## pleiotropy degree
pleiotropicDegree = sapply(unique(result$GeneID), 
                          function(drug) 
                            length(unique(subset(result, GeneID==drug)$name)))

mp <- barplot(table(pleiotropicDegree),
              ylab="number of drugs",
              names.arg="",
              xlab="pleiotropy degree")
text(mp[,1], 
     par("usr")[3], 
     labels = 1:12,
     adj = c(1.1,1.1), 
     xpd = TRUE, 
     cex=.9)

table(pleiotropicDegree)

## ----drugCellLineInteractionData6_, eval=TRUE, cache=TRUE, dependson = c(-1), fig.show='hold'----
## pleiotropic degree vs. drug effect
drugEffect = apply(interactions$effect$drug, c(1,3), mean)
dimnames(drugEffect) = list(interactions$anno$drug$GeneID,
                            interactions$anno$ftr)

for(ftr in colnames(drugEffect)){
  plot(pleiotropicDegree,
       drugEffect[match(names(pleiotropicDegree), rownames(drugEffect)), ftr],
       main=ftr,
       ylab ="drug effect")
}

## ----drugCellLineInteractionData7_, eval=TRUE, cache=TRUE, dependson = c(-1), fig.show='hold', fig.cap=intFtr_fig.cap, fig.subcap=intFtr_fig.subcap, out.width=ow----

## no of interactions per feature
noIntPerFtr = table(result$ftr)[interactions$anno$ftr]

tmp = par(mar=c(9, 4, 4, 2) + 0.1)
mp <- barplot(noIntPerFtr,
              ylab="total interactions per feature",
              names.arg="")
text(mp[,1], 
     par("usr")[3], 
     labels = names(noIntPerFtr), 
     srt = 45, 
     adj = c(1.1,1.1), 
     xpd = TRUE, 
     cex=.9)

par(tmp)


result$ftrClass = PGPC:::hrClass(result$ftr)
significant = lapply(unique(result$ftrClass), 
                     function(selectedFtr) 
                       unique(subset(result, ftrClass==selectedFtr)$uname))
names(significant) = unique(result$ftrClass)

## merging the interactions for each category
plot(venn(significant))


overlap = sapply(names(significant), 
                 function(class1){
                   sapply(names(significant), function(class2){
                     length(intersect(significant[[class1]], 
                                      significant[[class2]]))
                   })
                 })
barplot(overlap, 
        beside=TRUE,
        legend=names(significant),
        args.legend=list(x="topleft"))

## ----drugCellLineInteractionData8_, eval=TRUE, cache=TRUE, dependson = c(-1)------------

## remove low confidence interactions
result <- do.call(rbind,
                  lapply(unique(result$name), 
                         function(line) {
                           tmp = subset(result, line == name)
                           noFtrPerDrug = table(tmp$GeneID)
                           selectedDrug = names(noFtrPerDrug[noFtrPerDrug > 1])
                           subset(tmp, GeneID %in% selectedDrug)
                         }))

## remove ambiguous interactions
noLinesPerDrug = sapply(unique(result$GeneID), 
                        function(drug) 
                          length(unique(subset(result, GeneID==drug)$name)))
unambigDrugs = names(noLinesPerDrug[noLinesPerDrug <= 3])
result = subset(result, GeneID %in% unambigDrugs)


## ----drugCellLineInteractionData10_, eval=FALSE, cache=TRUE, dependson = c(-1)----------
#  
#  ## number of interactions per line vs. no of drugs / no of interactions per drugs
#  noIntPerLine = table(result$name)
#  barplot(noIntPerLine)
#  
#  noDrugsPerLine = sapply(names(noIntPerLine),
#                          function(line)
#                              length(unique(result$GeneID[result$name==line])))
#  
#  plot(as.vector(noIntPerLine), noDrugsPerLine)
#  text(as.vector(noIntPerLine), noDrugsPerLine, names(noIntPerLine))
#  
#  noIntPerDrug = table(result$GeneID)
#  barplot(noIntPerDrug)
#  
#  for(line in unique(result$name)){
#    barplot(table(result$GeneID[result$name==line]), main=line)
#  }
#  
#  ## features showing interactions per drug
#  noFtrsPerDrug = sapply(names(noIntPerDrug),
#                         function(drug)
#                             length(unique(result$ftr[result$GeneID==drug])))
#  
#  plot(as.vector(noIntPerDrug), noFtrsPerDrug)
#  text(as.vector(noIntPerDrug), noFtrsPerDrug, names(noIntPerDrug))
#  
#  hist(noFtrsPerDrug, breaks=20)
#  
#  for(i in 1:5) print(sum(noFtrsPerDrug>i))

## ----drugCellLineInteractionData11_, eval=TRUE, cache=TRUE, dependson = c(-1)-----------
write.table(result, file=file.path("result", "cytoscapeExportFiltered.txt"),
            sep="\t",
            row.names=FALSE)

############################
## transform to pheno groups
############################
result$ftr = PGPC:::hrClass(result$ftr)
columnsToRemove = c("r1", "r2", "rMean", "pAdjusted")
result = unique(result[,-match(columnsToRemove, names(result))])

write.table(result, 
            file=file.path("result", "cytoscapeExportFilteredPhenoGroups.txt"),
            sep="\t",
            row.names=FALSE)

#########################
## combine features
#########################
result = do.call(rbind,
                 lapply(unique(result$uname),
                        function(u){
                          tmp = subset(result, uname==u)
                          ftr = tmp$ftr
                          tmp = unique(tmp[,-match(c("ftr", "ftrClass"),
                                                   names(tmp))])
                          tmp$cellnumber =
                              ifelse("cell number" %in% ftr, 1, 0)
                          tmp$cellshape =
                              ifelse("cell shape" %in% ftr, 1, 0)
                          tmp$celltexture =
                              ifelse("cellular texture" %in% ftr, 1, 0)
                          tmp$nuclearshape =
                              ifelse("nuclear shape" %in% ftr, 1, 0)
                          tmp$nucleartexture =
                              ifelse("nuclear texture" %in% ftr, 1, 0)
                          tmp$noFtrClasses = length(ftr)
                          tmp
                        }))

write.table(result, file=file.path("result",
                                   "cytoscapeExportFilteredFtrsCombined.txt"),
            sep="\t",
            row.names=FALSE)

## ----echo=FALSE, cache=FALSE, message=FALSE---------------------------------------------
knitr::set_parent(file.path('..', 'PGPC.Rnw'))
library(PGPC)
if(!file.exists(file.path("result","Figures")))
  dir.create(file.path("result","Figures"),recursive=TRUE)

## ----heatmapAll, cache=TRUE, fig.height=9, fig.width=14, resize.width="0.9\\textwidth", dev.args=list(pointsize=6), fig.cap="Heatmap of interaction profiles for all drugs"----
if(!exists("interactions")){
  data("interactions", package="PGPC")
} 
PI = interactions$res
PI2 = PI  ##aperm(PI, c(1,3,2,4,5))
dim(PI2) = c(prod(dim(PI2)[1:2]),dim(PI2)[3],dim(PI2)[4])

SD = apply(PI2, 3, function(m) apply(m, 2, mad, na.rm=TRUE))
MSD = apply(SD, 2, function(x) { median(x,na.rm=TRUE) } )

## normalize by mean SD
PI = apply(interactions$res, c(1, 2, 4), mean)
for (i in 1:dim(PI)[3]) {
  PI[,,i] = PI[,,i] / MSD[i]
}

dimnames(PI) = list(template = interactions$anno$drug$GeneID,
                   query = interactions$anno$line$mutation,
                   phenotype = interactions$anno$ftr)

myColors = c(`Blue`="cornflowerblue",
  `Black`="#000000",
  `Yellow`="yellow")
colBY = colorRampPalette(myColors)(513)

cuts = c(-Inf,
         seq(-6, -2, length.out=(length(colBY)-3)/2),
         0.0,
         seq(2, 6, length.out=(length(colBY)-3)/2),
         +Inf)

ppiw = .25
ppih = 1.4
fac = 2.2
d = dim(PI)

ordTempl = PGPC:::orderDim(PI, 1)
ordQuery = PGPC:::orderDim(PI, 2)
ordFeat  = PGPC:::orderDim(PI, 3)

PGPC:::myHeatmap(PI[ordTempl, ordQuery, ordFeat], 
                 cuts=cuts,
                 fontsize=10,
                 col=colBY)

## ----heatmapFiltered, cache=TRUE, fig.height=9, fig.width=14, resize.width="0.9\\textwidth", dev.args=list(pointsize=6), dependson = c(-1), fig.cap="Heatmap of interaction profiles for drugs that show at least one specific interaction"----
filterFDR = function(d, pAdjusted, pAdjustedThresh = 0.1){
  select = pAdjusted <= pAdjustedThresh
  select[is.na(select)] = FALSE
  selectedRows = apply(select, 1, any)
  d[selectedRows,,]
}

pAdjustedThreshold = 0.01
pAdjusted = interactions$pVal[,,,2]
PIfilter = filterFDR(PI, 
                    pAdjusted, 
                    pAdjustedThresh = pAdjustedThreshold)

PIfilter = apply(PIfilter, c(2,3), 
                 function(x) tapply(x, dimnames(PIfilter)$template, mean))

ctrlToKeep = c("ctrl Paclitaxel", "ctrl U0126", "ctrl Vinblastin")
### some other contrls:
# ctrlToKeep = c("ctrl Paclitaxel", "ctrl U0126", "ctrl Vinblastin", "ctrl IWP", "ctrl DAPT")
PIfilter = PIfilter[!grepl("ctr", dimnames(PIfilter)[[1]]) | 
                              dimnames(PIfilter)[[1]] %in% ctrlToKeep,,]

ordTempl = PGPC:::orderDim(PIfilter, 1)
ordQuery = PGPC:::orderDim(PIfilter, 2)
ordFeat  = PGPC:::orderDim(PIfilter, 3)

PGPC:::myHeatmap(PIfilter[ordTempl,ordQuery,ordFeat],
                 cuts=cuts,
                 fontsize=10,
                 col=colBY)

drugAnno = interactions$anno$drug
subset = drugAnno[drugAnno$compoundID %in% dimnames(PIfilter)[[1]] & 
                    !grepl("ctr", drugAnno$GeneID),]
write.table(subset[, c("Name", "GeneID", "Selectivity", "Selectivity_updated")],
            file=file.path("result", "annotation_selected_compounds.txt"),
            sep="\t",
            quote=FALSE,
            row.names=FALSE)

## ----clusterHeatmap, cache=TRUE, fig.width=8, fig.height=8, resize.width="0.8\\textwidth", dev.args=list(pointsize=2.5), dependson = c(-1), fig.cap="Clustering of interaction profiles for drugs that show at least one interaction."----
PIdist = PGPC:::getDist(PIfilter, drugAnno=drugAnno)

hcInt <- as.dendrogram(hclust(as.dist(PIdist)))

heatmap.2(PIdist, 
          Colv = hcInt,
          Rowv = hcInt,
          trace="none",
          col=colorRampPalette(c("darkblue", "white"))(64),
          breaks = c(seq(0,0.5999,length.out=64),0.6),
          cexRow=.15,
          cexCol=.15)

## ----clusterHeatmapReordered, cache=TRUE, fig.width=8, fig.height=8, resize.width="0.8\\textwidth", dev.args=list(pointsize=2.5), dependson = c(-1), fig.cap="Clustering of interaction profiles for drugs that show at least one interaction. Clusters of size larger than 2 are colored. The dendrogram and coloring of clusters in this figure are the basis for Figure 5A and Appendix Figure S8 in the paper."----

## reorder dendrogram
wts = rep(0, dim(PIdist)[1])

## reorder bio cluster
inbetween = c(146, 187,  66, 170,  73, 121, 180)
wts[inbetween] = 1000

drugIds = sapply(strsplit(rownames(PIdist), " "), "[", 1)

## reorder Etoposide cluster
wts[match("79462", drugIds)] = 10

## reorder calcimycin cluster
wts[match("79471", drugIds)] = 5
wts[match("79982", drugIds)] = 10

hcInt = reorder(hcInt, wts)

cluster = cutree(as.hclust(hcInt), h=0.6)

## make color table
inCl <- table(cluster)
cl2col <- data.frame(cl=names(inCl), col="white", stringsAsFactors=FALSE)
largerCluster <- names(inCl[inCl>2])
cl2col$col[cl2col$cl %in% largerCluster] <- rainbow(length(largerCluster))

col = cl2col$col[match(cluster, cl2col$cl)]

heatmap.2(PIdist, 
          Colv = hcInt,
          Rowv = hcInt,
          trace="none",
          col=colorRampPalette(c("darkblue", "white"))(64),
          breaks = c(seq(0,0.5999,length.out=64),0.6),
          cexRow=.15,
          cexCol=.15,
          RowSideColors=col)

## ----chemicalSimilarity, results='hide', cache=TRUE, fig.width=8, fig.height=8, resize.width="0.8\\textwidth", dev.args=list(pointsize=2.5), dependson = c(-1), fig.cap="Clustering of drug similarity defined by Tanimoto distance."----

## read structure file
sdfset <- read.SDFset(system.file("extdata",
                                  "conf", 
                                  "LOPAC-regexpr-formated.sdf", 
                                  package = "PGPC"))

## map GeneIDs and library names to the compound names in the sdf file
drugs <-  data.frame(GeneID = dimnames(PIfilter)[[1]], stringsAsFactors=FALSE)
drugs$Name = interactions$anno$drug$Name[match(drugs$GeneID, 
                                               interactions$anno$drug$GeneID)]

## add SDF compound name to controls
controls <- data.frame(GeneID = c("ctrl DAPT", "ctrl IWP", "ctrl LY",  
                                  "ctrl Paclitaxel", "ctrl PD",  "ctrl RAPA",
                                  "ctrl U0126",   "ctrl Vinblastin",
                                  "ctrl Wortmannin", "ctrl Y27", 
                                  "neg ctr DMSO"),
                       Name = c("DAPT", "IWP-2", "LY-294,002 hydrochloride", 
                                "Taxol", "PD 98,059", "Sirolimus", "U0126",
                                "Vinblastine sulfate salt", 
                                "Wortmannin from Penicillium funiculosum", 
                                "Y-27632 dihydrochloride", "neg ctr DMSO"),
                       stringsAsFactors=FALSE)

findNames <- match(controls$GeneID, drugs$GeneID)
drugs$Name[findNames[!is.na(findNames)]] <- controls$Name[!is.na(findNames)]

## get drug names in sdf file and adjust format
drugsInSDF <- sapply(SDFset2SDF(sdfset), datablocktag, 5)
drugsInSDF[grep(" $", drugsInSDF)] <- 
    gsub(" *$","", drugsInSDF[grep(" $", drugsInSDF)])

## keep only selected drugs and add selected controls twice to sdf file
ctrlToKeep = c("ctrl Paclitaxel", "ctrl U0126", "ctrl Vinblastin")
selectedControls <- match(controls$Name[match(ctrlToKeep, controls$GeneID)], 
                          sapply(SDFset2SDF(sdfset), datablocktag, 5))
sdfsetControls <- sdfset[selectedControls]
cid(sdfsetControls) <- ctrlToKeep

sdfsetMerged <- c(sdfset[unique(match(drugs$Name, drugsInSDF))] , 
                  sdfsetControls)
selectedDrugsInSDF <- drugsInSDF[unique(match(drugs$Name, drugsInSDF))]

## Structure similarity searching and clustering using atom pairs
## use unique(inSDFfile) because controls could appear twice
## Generate atom pair descriptor database for searching
apset <- sdf2ap(sdfsetMerged)
dummy <- cmp.cluster(db=apset,
                     cutoff=0,
                     save.distances=file.path("result", "distmat.rda"))
load(file.path("result", "distmat.rda"))

## annotate distance matrix with GeneIDs and drugnames
dimnames(distmat) <- list(c(drugs$GeneID[match(selectedDrugsInSDF, drugs$Name)],
                            ctrlToKeep),
                          c(selectedDrugsInSDF, ctrlToKeep))

hc <- as.dendrogram(hclust(as.dist(distmat), method="single"))

heatmap.2(1-distmat,
          Rowv=hc,
          Colv=hc,
          col=colorRampPalette(c( "white","antiquewhite", "darkorange2"))(64), 
          density.info="none", 
          trace="none",
          main="Structural similarity")

## ----chemicalSimilarityReorderd, results='hide', cache=TRUE, fig.width=8, fig.height=8, resize.width="0.8\\textwidth", dev.args=list(pointsize=2.5), dependson = c(-1), fig.cap="Heatmap of drug similarity defined by Tanimoto distance ordered according to interaction profile similarity."----
## check correct ordering
drugIds = sapply(strsplit(rownames(PIdist), " "), "[", 1)
drugIds[grep("ctrl", drugIds)] = gsub(" $", "", 
                                      rownames(PIdist)[grep("ctrl", drugIds)])
stopifnot(!any(diff(match(drugIds, rownames(distmat))) != 1))

heatmap.2(1-distmat,
          Rowv=hcInt,
          Colv=hcInt,
          col=colorRampPalette(c( "white","antiquewhite", "darkorange2"))(64), 
          density.info="none", 
          trace="none",
          main="Structural similarity orderd by interaction similarity")

## ----chemicalSimilarityCombined, results='hide', cache=TRUE, fig.width=8, fig.height=8, resize.width="0.8\\textwidth", dev.args=list(pointsize=2.5), dependson = c(-1), fig.cap="Clustering of drug interaction profiles combined with chemical similarity defined by Tanimoto distance. This figure is the basis of Figure 5A in the paper."----
## plot combined cluster heatmap
chemDistOrdered = distmat[order.dendrogram(rev(hcInt)), order.dendrogram(hcInt)]

ordered  = PIdist[order.dendrogram(rev(hcInt)), order.dendrogram(hcInt)]
ordered[nrow(ordered) - row(ordered) + 2 < col(ordered) + 1] = 
    -1-chemDistOrdered[nrow(ordered) - row(ordered) + 2 < col(ordered) + 1]

heatmap.2(ordered, 
          Rowv=FALSE,
          Colv=FALSE,
          dendrogram="none",
          col=c(colorRampPalette(c( "white","antiquewhite", "darkorange2"))(64), 
                colorRampPalette(c("darkblue", "white"))(64)),
          breaks = c(seq(-2,-1,length.out=64), -0.5, seq(0,0.6,length.out=64)),
          trace="none",
          main="Structural similarity orderd by interaction similarity")


## ----chemicalSimilarityCombinedCluster, results='hide', cache=TRUE, fig.width=8, fig.height=8, resize.width="0.8\\textwidth", dev.args=list(pointsize=2.5), dependson = c(-1)----

cluster = list(C1 = c(79802, 79184, 80101, 80082, "ctrl Paclitaxel", 80075, 
                      "ctrl Vinblastin", 79615, 79607),
               C2 = c("ctrl U0126", 80091, 79902),
               C3 = c(79225, 79014),
               C4=c(79275, 79047),
               C5=c(79087, 79033),
               C6=c(78978, 78919),
               C7=c(79411, 79410),
               C8=c(79294, 79028, 79812, 79190, 79016),
               C9=c(79653, 79215, 80136, 79191),
               C10=c(79104, 79074),
               C11=c(79497, 79444, 80044, 79819, 79111),
               C12=c(79926, 79740),
               C13=c(79164, 80032, 79143),
               C13B=c(80038, 79164, 80032, 79143, 79064),
               C14=c(79817, 79229, 79038),
               C15=c(79165, 79334, 79503),
               C15B=c(79474, 79165, 79334, 79503),
               C16=c(79892, 79057, 79922),
               C17=c(80104, 79837),
               C18=c(79192, 79122, 79647),
               C18B=c(79192, 79122, 79647, 79116, 79020),
               C19=c(78909, 78908, 78894),
               C19B=c(79859, 78909, 78908, 78894))

## get reordered drug ids
drugIds = sapply(strsplit(rownames(ordered), " "), "[", 1)
drugIds[grep("ctrl", drugIds)] = gsub(" $", "",
                                      rownames(ordered)[grep("ctrl", drugIds)])

for(clName in names(cluster)){
  cl = cluster[[clName]]
  pdf(file=file.path("result", "Figures", paste0(clName, ".pdf")), 
      width=8,
      height=8)
  heatmap.2(ordered[match(cl, drugIds), rev(match(cl, rev(drugIds)))], 
            Rowv=FALSE,
            Colv=FALSE,            
            dendrogram="none",
            col=c(colorRampPalette(c("white",
                                     "antiquewhite",
                                     "darkorange2"))(64), 
                  colorRampPalette(c("darkblue", "white"))(64)),
            breaks = c(seq(-2,-1,length.out=64), 
                       -0.5, 
                       seq(0,0.6,length.out=64)),
            trace="none",
            key=FALSE,
            main=clName)
  dev.off()
}

## ----clusterHeatmapSingleCellLine, cache=TRUE, fig.width=8, fig.height=8, resize.width="0.8\\textwidth", dev.args=list(pointsize=2.5), dependson = c(-1),  fig.cap="Clustering of drug interaction profiles using data from just one parental cell line. This figure is the basis of Appendix Figure S10 in the paper."----
## remove controls
singleCellLine = PIfilter[, match("HCT116 P2", dimnames(PIfilter)[[2]]),]

singleDist = PGPC:::getDist(singleCellLine, drugAnno=drugAnno)

hcsingleDist <- as.dendrogram(hclust(as.dist(singleDist)))

heatmap.2(singleDist, 
          Colv = hcsingleDist,
          Rowv = hcsingleDist,
          trace="none",
          col=colorRampPalette(c("darkblue", "white"))(64),
          breaks = c(seq(0,0.5999,length.out=64),0.6))

## ----clusterHeatmapSingleCellLineOrdered, cache=TRUE, fig.width=8, fig.height=8, resize.width="0.8\\textwidth", dev.args=list(pointsize=2.5), dependson = c(-1), fig.cap="Clustering of drug interaction profiles using data from just one parental cell line ordered by the clustering of the whole data set."----
## combine controls
heatmap.2(singleDist, 
          Colv = hcInt,
          Rowv = hcInt,
          trace="none",
          col=colorRampPalette(c("darkblue", "white"))(64),
          breaks = c(seq(0,0.5999,length.out=64),0.6))

## ----clusterHeatmapJustCellNumber, cache=TRUE, fig.width=8, fig.height=8, resize.width="0.8\\textwidth", dev.args=list(pointsize=2.5), dependson = c(-1), fig.cap="Clustering of drug interaction profiles using just the cell number feature of all cell lines. This figure is the basis of Appendix Figure 13 in the paper."----
## combine controls
justN = PIfilter[,,match("n", dimnames(PIfilter)[[3]])]

justNDist = PGPC:::getDist(justN, drugAnno=drugAnno)

hcjustNDist <- as.dendrogram(hclust(as.dist(justNDist)))

heatmap.2(justNDist, 
          Colv = hcjustNDist,
          Rowv = hcjustNDist,
          trace="none",
          col=colorRampPalette(c("darkblue", "white"))(64),
          breaks = c(seq(0,0.5999,length.out=64),0.6))

## ----clusterHeatmapJustCellNumberOrdered, cache=TRUE, fig.width=8, fig.height=8, resize.width="0.8\\textwidth", dev.args=list(pointsize=2.5), dependson = c(-1), fig.cap="Clustering of drug interaction profiles using just the cell number feature of all cell lines ordered by the clustering of the whole data set."----
## combine controls
heatmap.2(justNDist, 
          Colv = hcInt,
          Rowv = hcInt,
          trace="none",
          col=colorRampPalette(c("darkblue", "white"))(64),
          breaks = c(seq(0,0.5999,length.out=64),0.6))

## ----chemicalSimilarity2, results='hide', cache=TRUE, fig.keep='none', dependson = c(-1)----
###### check tanimoto for specifc compounds..
grepSDFset("BAY 11-7082", sdfset, field="datablock", mode="subset")
grepSDFset("BAY 11-7085", sdfset, field="datablock", mode="subset")
grepSDFset("Stattic", sdfset, field="datablock", mode="subset")
grepSDFset("Ouabain", sdfset, field="datablock", mode="subset")
grepSDFset("Dihydroouabain", sdfset, field="datablock", mode="subset")
grepSDFset("Brefeldin A", sdfset, field="datablock", mode="subset")

# bay7082 is CMP1018
# bay7085 is CMP183
# stattic is 1048
cmp.similarity(apset["CMP1018"], apset["CMP183"])
cmp.similarity(apset["CMP1018"], apset["CMP1048"])
cmp.similarity(apset["CMP183"], apset["CMP1048"])

#plot(sdfset[1018], print=FALSE)
#plot(sdfset[183], print=FALSE)
#plot(sdfset[1048], print=FALSE)
#plot(sdfset[c("1018","183", "1048", "943")], print=FALSE)
# ouabain is CMP943?
#79229
#79817
# Dihydroouabain is CMP355
# brefeldin is 164

cmp.similarity(apset["CMP355"], apset["CMP943"])
cmp.similarity(apset["CMP355"], apset["CMP164"])
cmp.similarity(apset["CMP943"], apset["CMP164"])

## ----sharedTargets, cache=TRUE, dev.args=list(pointsize=20), dependson = c(-1)----------
## get mean distance for drugs with shared targets
drugAnno = interactions$anno$drug
Selectivity <- drugAnno$Selectivity_updated[match(dimnames(PIfilter)[[1]],
                                                 drugAnno$GeneID)]

annoForDensity = drugAnno[match(dimnames(PIfilter)[[1]],
                                  drugAnno$GeneID),]

plotDensities <- function(d, anno, ...){
  M = PGPC:::getCorr(d, drugAnno=anno)
  sharedClass <- array(FALSE, dim=dim(M))
  for(target in unique(Selectivity)){
    if(is.na(target)) next
    targetdrugs = anno$GeneID[anno$Selectivity_updated %in% target]
    if(length(targetdrugs)>1){
      sel = dimnames(M)[[1]] %in% targetdrugs
      sharedClass[sel, sel] = TRUE
    }
  }
  diag(sharedClass) = TRUE
  notSharedDist = M[!sharedClass]
  
  diag(sharedClass) = FALSE
  sharedDist = M[sharedClass]
  
  classDist = c(sharedDist, notSharedDist)
  class = c(rep("shared selectivity", length(sharedDist)),
            rep("no shared selectivity", length(notSharedDist)))
  
  multidensity(classDist ~ class,
               xlab="Correlation between drug profiles",
               ylim=c(0,2.5),
               col=c("blue", "red"),
               ...)
  tmp <- multiecdf(classDist ~ class,
                   xlab="Correlation between drug profiles",
                   subsample=FALSE,
                   col=c("blue", "red"),
                   ...)
  
  integrals <- sapply(lapply(tmp, 
                             integrate, 
                             lower=min(classDist),
                             upper=max(classDist),
                             subdivisions=length(table(classDist))),
                      function(x) x$value)
  
  ## add class numbers
  integrals = c(integrals, N=table(class))
  
  corOrder = order(classDist, decreasing=TRUE)
  classDist = classDist[corOrder]
  class = class[corOrder]
  
  ratio = (cumsum(class=="shared selectivity"))/
    (cumsum(class=="shared selectivity") + 
         cumsum(class=="no shared selectivity"))
  
  plot(classDist, ratio,
       xlab="correlation coefficient",
       ylab="shared target fraction",
       ylim=c(0,1),
       type="l",
       ...)
  abline(h = c(.1, .2, .3), col="grey", lty=2)
  text(0, 0.3, sum(ratio >= 0.3))
  text(0, 0.2, sum(ratio >= 0.2))
  text(0, 0.1, sum(ratio >= 0.1))
  
  integrals
}



## ----figureLabelsDrugClustering, cache=TRUE, echo=FALSE---------------------------------
all_fig.cap = c("Distribution of correlation coefficients of the interaction profiles for the whole data set. Pannel (b) is the basis of Figure 5B in the paper.")
justN_fig.cap = c("Distribution of correlation coefficients of the interaction profiles using only cell number for all cell lines. Pannel (b) is the basis of Figure 5B in the paper.")
parOnly_fig.cap = c("Distribution of correlation coefficients of the interaction profiles using all features of only one parental cell line. Pannel (b) is the basis of Figure 5B in the paper.")
fig.subcap = c("Density distribution of correlation coefficients per class",
                   "Cumulative distribution of correlation coefficients per class.", 
                   "Fraction of drugs with shared targets plotted against the correlation coefficients.")


allStruct_fig.cap = c("Distribution of correlation coefficients of the interaction profiles for the whole data set. Pannel (b) is the basis of Appendix Figure S11A in the paper.")
justNStruct_fig.cap = c("Distribution of correlation coefficients of the interaction profiles using only cell number for all cell lines. Pannel (b) is the basis of Appendix Figure S11A in the paper.")
parOnlyStruct_fig.cap = c("Distribution of correlation coefficients of the interaction profiles using all features of only one parental cell line. Pannel (b) is the basis of Appendix Figure S11A in the paper.")

integral_fig.cap = "Comparison of the area under the curve (AUC) of the cummulative distribution function from the different data sets and classes defined by the target selectivity."
integral_fig.subcap = c("Direct comparison of the AUC for each class and data set.", "Comparison of the differece in AUC between classes for the different data sets. This figure ist the basis for Figure 5C in the paper.")

integralStruct_fig.cap = "Comparison of the area under the curve (AUC) of the cummulative distribution function from the different data sets and classes defined by the structural similarity."
integralStruct_fig.subcap = c("Direct comparison of the AUC for each class and data set.", 
                        "Comparison of the differece in AUC between classes for the different data sets. This figure ist the basis for Appendix Figure S11B in the paper.",
                        "Comparison of the differece in AUC between classes for the different data sets and classes defined by target selectivity or structural similarity.")

ow = '.45\\textwidth'

## ----correlationDistributionAll, cache=TRUE, fig.width=8, fig.height=8, fig.show='hold', dev.args=list(pointsize=20), dependson = c(-1), fig.cap=all_fig.cap, fig.subcap=fig.subcap, out.width=ow----
intAll <- plotDensities(PIfilter, 
                        annoForDensity, lwd=5,
                        xlim=c(-1,1),
                        main=paste("ctrl combined all data,",
                                   "all isogenics and all features"))

## ----correlationDistributionJustN, cache=TRUE, fig.width=8, fig.height=8, fig.show='hold', dev.args=list(pointsize=20), dependson = c(-1), fig.cap=justN_fig.cap, fig.subcap=fig.subcap, out.width=ow----
intN <- plotDensities(justN, 
                      annoForDensity, lwd=5,
                      xlim=c(-1,1),
                      main="all isogenics, only cell number")

## ----correlationDistributionSingleLine, cache=TRUE, fig.width=8, fig.height=8, fig.show='hold', dev.args=list(pointsize=20), dependson = c(-1), fig.cap=parOnly_fig.cap, fig.subcap=fig.subcap, out.width=ow----
intSingle <- plotDensities(singleCellLine, 
                           annoForDensity, lwd=5,
                           xlim=c(-1,1),
                           main="all features, only parental cell line")

## ----correlationDistributionIntegral, cache=TRUE, fig.width=8, fig.height=8, fig.show='hold', dev.args=list(pointsize=20), dependson = c(-1), fig.cap=integral_fig.cap, fig.subcap=integral_fig.subcap, out.width=ow----
integrals <- as.data.frame(rbind(intAll, intN, intSingle))
integrals$type = c("all data", "only cell number,\nall cell lines", 
                   "only parental cell line,\nall feature")

theme_new = theme_bw()
theme_new$text$size = 20
theme_new$axis.text$size = rel(1.2) 

theme_new = theme_new + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
p = ggplot(data=melt(integrals,
                     measure.vars=c("no shared selectivity",
                                    "shared selectivity")), 
           aes(x=type, 
               y= value,
               fill=variable))
dodge <- position_dodge(.5)

p = p + geom_bar(stat="identity", position=dodge, width=.5) + 
  theme_new +
  scale_fill_manual(values = c("blue", "red"))
print(p)

## difference
integrals$difference = integrals$"no shared selectivity" -
    integrals$"shared selectivity"
integrals$similarity = "Target selectivity"

p = ggplot(data=integrals, 
           aes(x=type, 
               y=difference))
p = p + geom_bar(stat="identity", width=.5) + 
  theme_new +
  labs(title = "Target selectivity") +
  scale_y_continuous(limits = c(0,.3))
print(p)

## ----TargetsDistancesVsChemicalSimilarity, cache=TRUE, fig.keep='all', fig.width=8, fig.height=8, fig.show='hold', dev.args=list(pointsize=20), dependson = c(-1)----
## get mean distance for drugs with shared targets
plotDensityTanimotoThresh <- function(profile,
                                      distmat,
                                      tanimotoThresh = 0.6,
                                      ...){

  similarStructure = distmat < tanimotoThresh 
  M = PGPC:::getCorr(profile, drugAnno=drugAnno)
  
  diag(similarStructure) = TRUE
  notSharedDist = M[!similarStructure]
  
  diag(similarStructure) = FALSE
  sharedDist = M[similarStructure]
  
  classDist = c(sharedDist, notSharedDist)
  class = c(rep("similar structure", length(sharedDist)),
            rep("different structure", length(notSharedDist)))
  
  multidensity(classDist ~ class,
               xlab="Correlation between drug profiles",
               ylim=c(0,2.5),
               col=c("blue", "red"),
               ...)
  tmp <- multiecdf(classDist ~ class,
                   xlab="Correlation between drug profiles",
                   subsample=FALSE,
                   col=c("blue", "red"),
                   ...)
  
  integrals <- sapply(lapply(tmp, 
                             integrate, 
                             lower=min(classDist),
                             upper=max(classDist),
                             subdivisions=length(table(classDist))),
                      function(x) x$value)
  
  ## add class numbers
  integrals = c(integrals, N=table(class))
  
  corOrder = order(classDist, decreasing=TRUE)
  classDist = classDist[corOrder]
  class = class[corOrder]
  
  ratio = (cumsum(class=="similar structure"))/
    (cumsum(class=="similar structure") + cumsum(class=="different structure"))
  
  plot(classDist, ratio,
       xlab="correlation coefficient",
       ylab="shared target fraction",
       ylim=c(0,1),
       type="l",
       ...)
  abline(h = c(.1, .2, .3), col="grey", lty=2)
  text(0, 0.3, sum(ratio >= 0.3))
  text(0, 0.2, sum(ratio >= 0.2))
  text(0, 0.1, sum(ratio >= 0.1))
  
  integrals
}

## reorder according to drug order in ctrlCombined
stopifnot(!any(diff(match(dimnames(PIfilter)[[1]], rownames(distmat))) != 1))

tanimotoThresh = 0.6


## ----correlationDistributionStructureAll, cache=TRUE, fig.width=8, fig.height=8, fig.show='hold', dev.args=list(pointsize=20), dependson = c(-1), fig.cap=allStruct_fig.cap, fig.subcap=fig.subcap, out.width=ow----
intAllTanimoto <- plotDensityTanimotoThresh(PIfilter, distmat, 
                                            tanimotoThres=tanimotoThresh, lwd=5,
                                            xlim=c(-1,1),
                                            main=paste0("all data,", 
                                                        " Tanimoto dist < ", 
                                                        tanimotoThresh))

## ----correlationDistributionStructureJustN, cache=TRUE, fig.width=8, fig.height=8, fig.show='hold', dev.args=list(pointsize=20), dependson = c(-1), fig.cap=justNStruct_fig.cap, fig.subcap=fig.subcap, out.width=ow----
intNTanimoto <- plotDensityTanimotoThresh(justN, distmat, 
                                          tanimotoThres=tanimotoThresh,lwd=5,
                                          xlim=c(-1,1),
                                          main=paste("all isogenics,",
                                                     "cellnumber only,",
                                                     "Tanimoto dist < ", 
                                                     tanimotoThresh))

## ----correlationDistributionStructureSingleLine, cache=TRUE, fig.width=8, fig.height=8, fig.show='hold', dev.args=list(pointsize=20), dependson = c(-1), fig.cap=parOnlyStruct_fig.cap, fig.subcap=fig.subcap, out.width=ow----
intSingleTanimoto <- plotDensityTanimotoThresh(singleCellLine, distmat,
                                               tanimotoThres=tanimotoThresh,
                                               lwd=5,
                                               xlim=c(-1,1),
                                               main=paste("single cell line,",
                                                          "Tanimoto dist < ", 
                                                          tanimotoThresh))

## ----correlationDistributionStructureIntegral, cache=TRUE, fig.width=8, fig.height=8, fig.show='hold', dev.args=list(pointsize=20), dependson = c(-1), fig.cap=integralStruct_fig.cap, fig.subcap=integralStruct_fig.subcap, out.width=ow----
integralsTanimoto <- as.data.frame(rbind(intAllTanimoto,
                                         intNTanimoto,
                                         intSingleTanimoto))
integralsTanimoto$type = c("all data",
                           "only cell number,\nall cell lines",
                           "only parental cell line,\nall feature")

theme_new = theme_bw()
theme_new$text$size = 20
theme_new$axis.text$size = rel(1.2) 

theme_new = theme_new + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
p = ggplot(data=melt(integralsTanimoto, 
                     measure.vars=c("different structure",
                                    "similar structure")), 
           aes(x=type, 
               y= value,
               fill=variable))
dodge <- position_dodge(.5)

p = p + geom_bar(stat="identity", position=dodge, width=.5) + 
  theme_new +
  scale_fill_manual(values = c("blue", "red"))
print(p)


## difference
integralsTanimoto$difference = integralsTanimoto$"different structure" -
    integralsTanimoto$"similar structure"
integralsTanimoto$similarity = "Chemical similarity"

p = ggplot(data=integralsTanimoto, 
           aes(x=type, 
               y=difference))
p = p + geom_bar(stat="identity", width=.5) + 
  theme_new +
  labs(title = "Chemical similarity") +
  scale_y_continuous(limits = c(0,.3))
print(p)

cols = c("type", "difference", "similarity")
p = ggplot(data=rbind(integrals[,cols], integralsTanimoto[,cols]), 
           aes(x=type, 
               y= difference,
               fill=similarity))
dodge <- position_dodge(.5)

p = p + geom_bar(stat="identity", position=dodge, width=.5) + 
  theme_new +
  scale_fill_manual(values = c("grey", "black"))
print(p)

## ----echo=FALSE, cache=FALSE, message=FALSE---------------------------------------------
knitr::set_parent(file.path('..', 'PGPC.Rnw'))
library(PGPC)
if(!file.exists(file.path("result")))
  dir.create(file.path("result"),recursive=TRUE)

## ----generateCellHTS2reportsDrugCombi, echo=TRUE, results='hide', cache=TRUE------------
savePath="result"
dataPath=system.file("extdata", "drug_combinations", 
                     package="PGPC")
Name="drug_combis"
LogTransform=TRUE
Annotation="2013-10-22drugcombiAnno_2plates.txt"
Plateconf="2013-10-22plateconfig_all.txt"
Description="Description.txt"
NormalizationMethod="NPI"
NormalizationScaling="multiplicative"
VarianceAdjust="none"
SummaryMethod="mean"
Score="zscore"


PlateList=c("2013-10-22platelist_HCT_72_2layout_8plates_mod.txt",
            "2013-10-22platelist_DLD_72_2layout_8plates.txt")

## include intensities
mySettings = getSettings()
mySettings$plateList$intensities$include = TRUE
setSettings(mySettings)



## ----generateCellHTS2reportsDrugCombi2, echo=TRUE, results='hide', cache=TRUE, dependson = c(-1)----
for(plate in PlateList){
  Outdir = file.path(savePath, 
                     paste(gsub("_2layout_8plates.txt|_2layout_8plates_mod.txt",
                                "", plate),
                           NormalizationMethod, 
                           VarianceAdjust, sep = "_"))
  x = readPlateList(plate,path=dataPath, name=Name)
  x = configure(x, descripFile=Description,
                confFile=Plateconf,,
                path=dataPath)
  xp = normalizePlates(x, 
                       log=LogTransform,
                       scale=NormalizationScaling,
                       method=NormalizationMethod,
                       varianceAdjust=VarianceAdjust)
  xp@state["normalized"] = TRUE
  xsc = scoreReplicates(xp, sign = "-",
                        method = Score)
  xsc = summarizeReplicates(xsc,
                            summary = SummaryMethod)
  xsc = cellHTS2::annotate(xsc, 
                           geneIDFile = file.path(dataPath,
                                                  Annotation))
  out = writeReport(raw = x,
                    normalized = xp,
                    scored = xsc,
                    outdir=Outdir,
                    force=TRUE)
}

## ----qualityControl, cache=TRUE, eval=TRUE, dependson = c(-1), fig.show='hold'----------
experiment = "2013-10-22platelist_HCT_72_NPI_none"

## get the dataframe and check the data in an explorative manner
df = read.delim(file.path("result", experiment, "in", "topTable.txt"),
                stringsAsFactors=FALSE)
df$identifier = paste(df$drug1, df$drug2, sep="_")
#  head(df)

# plot correlations
p = ggplot(data=df, aes(x=raw_r1_ch1, y= raw_r2_ch1))
p = p + geom_point() +
  labs(title = gsub("_", " ", gsub("2013-10-22platelist_", "", experiment)))  
print(p)

p = ggplot(data=df, aes(x=raw_r2_ch1, y= raw_r3_ch1))
p = p + geom_point() +
  labs(title = gsub("_", " ", gsub("2013-10-22platelist_", "", experiment))) 
print(p)

p = ggplot(data=df, aes(x=raw_r1_ch1, y= raw_r3_ch1))
p = p + geom_point() +
  labs(title = gsub("_", " ", gsub("2013-10-22platelist_", "", experiment)))
print(p)

p = ggplot(data=df, aes(x=raw_r4_ch1, y= raw_r5_ch1))
p = p + geom_point() +
  labs(title = gsub("_", " ", gsub("2013-10-22platelist_", "", experiment)))  
print(p)

p = ggplot(data=df, aes(x=raw_r1_ch1, y= raw_r5_ch1))
p = p + geom_point(na.rm=TRUE) +
  labs(title = gsub("_", " ", gsub("2013-10-22platelist_", "", experiment)))
print(p)

# plot all
p = ggplot(data=melt(df, measure.vars=names(df)[grep("raw_r", names(df))]),
           aes(x=concdrug1.micro_M., y=value, color=variable))
p = p + facet_wrap(~identifier) +
  scale_x_log10() +  
  geom_point(na.rm=TRUE) + 
  theme_bw() +
  labs(title = gsub("_", " ", gsub("2013-10-22platelist_", "", experiment))) 
print(p)

p = ggplot(data=melt(df, 
                     measure.vars=names(df)[grep("normalized_r", names(df))]),
           aes(x=concdrug1.micro_M., y=value, color=variable))
p = p + facet_wrap(~identifier) +
  scale_x_log10() +  
  geom_point(na.rm=TRUE) + 
  theme_bw() +
  labs(title = gsub("_", " ", gsub("2013-10-22platelist_", "", experiment)))  
print(p)

## ----getExpectedValue, dependson = c(-1)------------------------------------------------
getExpectedValue <- function(df,
                             position1,
                             position2,
                             drugNames,
                             type=c("nonInteraction", "HSA")){
  normCols = c("normalized_r1_ch1", 
               "normalized_r2_ch1" , 
               "normalized_r3_ch1", 
               "normalized_r4_ch1" ,     
               "normalized_r5_ch1")
  normCols <- normCols[normCols %in% names(df)]
  
  if(type=="nonInteraction"){
    tmp <- df[position1,]
    tmp[,normCols] <- tmp[,normCols] + df[position2,normCols] - 1
    tmp$identifier = "expected"
    tmp$GeneID = gsub(drugNames[1], "expected", tmp$GeneID)
    tmp$drug1 = gsub(drugNames[1], "expected", tmp$GeneID)
    tmp$color = "white"
  } else if(type=="HSA"){
    ## test which single compound has the stronges mean effect and use this as expected
    mean1 <- tapply(unlist(df[position1,normCols]), 
                    rep(df$concdrug1.micro_M.[position1], length(normCols)), 
                    mean, 
                    na.rm=TRUE)
    mean2 <- tapply(unlist(df[position2,normCols]), 
                    rep(df$concdrug1.micro_M.[position2], length(normCols)), 
                    mean, 
                    na.rm=TRUE)
    
    ## take values of drug1 and exchange if drug2 shows a stronger effect for a conc
    tmp <- df[position1,]
    
    concToChange <- names(mean1[mean1 > mean2])
    
    for(conc in concToChange){
      concPos <- df$concdrug1.micro_M.[position2] == as.numeric(conc)
      tmp[concPos,normCols] <- df[position2[concPos],normCols]
    }
    
    tmp$identifier = "expected"
    tmp$GeneID = gsub(drugNames[1], "expected", tmp$GeneID)
    tmp$drug1 = gsub(drugNames[1], "expected", tmp$GeneID)
    tmp$color = "white"
  } else {
    stop("type must be 'nonInteraction' or 'HSA'.")
  }
  
  columnsToKeep <- c("plate", 
                     normCols,
                     "drug1",
                     "concdrug1.micro_M.",
                     "drug2",
                     "concdrug2.micro_M.",
                     "GeneID", 
                     "identifier",
                     "color")  
  
  invisible(melt(rbind(df, tmp)[,columnsToKeep], 
             measure.vars=normCols))
}   

## ----testSynergy, dependson = c(-1)-----------------------------------------------------
testSynergy <- function(df, drugNames, summarizeWells=TRUE){
  
  if(summarizeWells){
    ## summarize wells on the same plate
    meanValue <- tapply(df$value, list(df$GeneID, df$variable, df$plate), 
                        mean, na.rm=TRUE)
    for(i in dimnames(meanValue)[[1]]){
      for(j in dimnames(meanValue)[[2]]){
        for(k in dimnames(meanValue)[[3]]){
          selection <- df$GeneID == i &
            df$variable == j & df$plate == k
          if(sum(selection) > 0)
            df$meanValue[selection] = meanValue[i, j, k]
        }        
      }
    }
    df$value = NULL
    df = unique(df)
    colnames(df) <- gsub("meanValue", "value", colnames(df))
  }
  
  ## test synergy for each drug concentration
  for(conc in unique(df$concdrug1.micro_M.)){
    if(conc==0) next
    dfConc = df[df$concdrug1.micro_M. == conc, ]
    test = t.test(dfConc$value[dfConc$identifier == paste(drugNames[1],
                                                          drugNames[2],
                                                          sep="_")], 
                  dfConc$value[dfConc$identifier == "expected"],
                  alternative="less")
    df$pvalue[df$concdrug1.micro_M. == conc] = test$p.value
    df$combi_mean[df$concdrug1.micro_M. == conc] = test$estimate[1]
    df$expected_mean[df$concdrug1.micro_M. == conc] = test$estimate[2]
  }
  ## set pvalue to 1 for 0 drug concentrations
  df$pvalue[is.na(df$pvalue)] = 1
  invisible(df)
}


## ----getSummary, dependson = c(-1)------------------------------------------------------
getSummary <- function(df){
  ## calculate mean values and sem
  ids = paste(df$concdrug1.micro_M., df$identifier)
  
  sem <- function(x, ...) sd(x, ...)/sqrt(sum(!is.na((x))))
  
  summary = data.frame(concdrug1.micro_M. = 
                           as.vector(tapply(df$concdrug1.micro_M., 
                                            ids,
                                            unique)),
                       identifier = as.vector(tapply(df$identifier, 
                                                     ids,
                                                     unique)),
                       color = as.vector(tapply(df$color, 
                                                ids,
                                                unique)),
                       pvalue = as.vector(tapply(df$pvalue, 
                                                 ids,
                                                 unique)),
                       mean = as.vector(tapply(df$value, 
                                               ids, 
                                               mean, 
                                               na.rm=TRUE)),
                       sem = as.vector(tapply(df$value, 
                                              ids, 
                                              sem, 
                                              na.rm=TRUE)),
                       n = as.vector(tapply(!is.na(df$value), 
                                            ids, 
                                            sum)),
                       stringsAsFactors=FALSE)
                       
  summary$identifier = as.factor(summary$identifier)
  levels(summary$identifier) = levels(df$identifier)
  summary$color = as.factor(summary$color)
  levels(summary$color) = levels(df$color)
  invisible(summary)
}    

## ----analyseDrugCombi, dependson = c(-1)------------------------------------------------
analyseDrugCombi <- function(df, combi, plotSingle=TRUE, type="nonInteraction"){

  drugNames = unlist(strsplit(combi, "_"))
  
  ## select all wells beloning to the combination
  df_sub = subset(df, 
                  grepl(paste(c(paste(drugNames, collapse="_"), 
                                paste0(drugNames, "_DMSO")), 
                              collapse="|"), 
                        df$identifier))
  
  ## head(df_sub)
  df_sub <- df_sub[order(df_sub$GeneID),]
  
  position1 <- which(df_sub$identifier == paste0(drugNames[1], "_DMSO") & 
                       !grepl("DMSO_0$|^DMSO_0", df_sub$GeneID))
  position2 <- which(df_sub$identifier == paste0(drugNames[2], "_DMSO") & 
                       !grepl("DMSO_0$|^DMSO_0", df_sub$GeneID))
  
  if(!identical(gsub(drugNames[1], "", df_sub$GeneID[position1]),
                gsub(drugNames[2], "", df_sub$GeneID[position2])))
    stop("single drug values do not match")
  
  ## add color information for plots
  df_sub$color = "#5CBA48"
  df_sub$color[position1] = "gray60"
  df_sub$color[position2] = "gray30"    
  
  tmp <- getExpectedValue(df_sub,
                          position1,
                          position2,
                          drugNames,
                          type=type)
  
  ## manually set colors and order for plots
  labelOrder = c(paste0(drugNames, "_DMSO"),
                 "expected",
                 paste(drugNames, collapse="_"))
  tmp$identifier = factor(tmp$identifier, levels = labelOrder)
  colorForPlot = c("gray60", "gray30", "white", "#5CBA48")
  tmp$color = factor(tmp$color, levels= colorForPlot)
  
  tmp <- testSynergy(tmp, drugNames)
  if(plotSingle) PGPC:::plotSingleValues(tmp, pthresh=0.05)

  invisible(getSummary(tmp))
}

## ----drugInteractions, dependson = c(-1)------------------------------------------------
experiment = "2013-10-22platelist_HCT_72_NPI_none"

## get the dataframe and check the data in an explorative manner
df = read.delim(file.path("result", experiment, "in", "topTable.txt"), 
                stringsAsFactors=FALSE)
df$identifier = paste(df$drug1, df$drug2, sep="_")
df$concdrug1.micro_M. = round(df$concdrug1.micro_M., 4)

## remove problematic replicate 4
df <- df[, -grep("r4", names(df))]


theme_new = theme_bw()
theme_new$text$size = 20
theme_new$axis.text$size = rel(1.2) 

## ----figureLegends, echo=FALSE----------------------------------------------------------
Bendamustin_fig.cap = "Drug synergies for Bendamustine and AKT inhibitors calculated using an non-interacting model in the HCT116 parental cell line. Error bars, means +- s.e.m"
Bendamustin_fig.subcap = c("Normalized viability values for all experiments.", 
                           "Line plot of sumarized viability values for selected concentrations.",
                           "Barplot of sumarized viability values for selected concentrations.",
                           "Barplot of drug inhibition for selected concentrations. This figure is the basis of Figure 4B in the paper.")

BendamustinHSA_fig.cap = "Drug synergies for Bendamustine and AKT inhibitors calculated using the HSA model in the HCT116 parental cell line. Error bars, means +- s.e.m"
BendamustinHSA_fig.subcap = c("Normalized viability values for all experiments.", 
                           "Line plot of sumarized viability values for selected concentrations.",
                           "Barplot of sumarized viability values for selected concentrations.",
                           "Barplot of drug inhibition for selected concentrations.")

Disulfiram_fig.cap = "Drug synergies for Disulfiram and MEK inhibitors calculated using an non-interacting model in the HCT116 parental cell line. Error bars, means +- s.e.m"
Disulfiram_fig.subcap = c("Normalized viability values for all experiments.", 
                           "Line plot of sumarized viability values for selected concentrations.",
                           "Barplot of sumarized viability values for selected concentrations.",
                           "Barplot of drug inhibition for selected concentrations. This figure is the basis of Figure 4D in the paper.")

DisulfiramHSA_fig.cap = "Drug synergies for Disulfiram and MEK inhibitors calculated using an non-interacting model in the HCT116 parental cell line. Error bars, means +- s.e.m"
DisulfiramHSA_fig.subcap = c("Normalized viability values for all experiments.", 
                           "Line plot of sumarized viability values for selected concentrations.",
                           "Barplot of sumarized viability values for selected concentrations.",
                           "Barplot of drug inhibition for selected concentrations.")
ow = '.45\\textwidth'

## ----drugInteractionsBendatmustin, dependson = c(-2), fig.show='hold', fig.cap=Bendamustin_fig.cap, fig.subcap=Bendamustin_fig.subcap, out.width=ow----
drugCombis = c("Bendamustine_AKTi VIII")
##drugCombis = c("Bendamustine_AKTi VIII", "Bendamustine_PD", "Bendamustine_U0126",
##               "Bendamustine_MK2206")

## for better overview we plot only a selection of concentrations  
concentrations = c("1.25", "2.5", "5", "10")
breaks = c(1,3,10)
limits = c(1, 11)

for (combi in drugCombis){
  summary <- analyseDrugCombi(df, combi, plotSingle=TRUE)
  
  PGPC:::plotSummary(summary, 
                            concentration=concentrations, 
                            pthresh=0.05, 
                            breaks=breaks, 
                            limits=limits)
  
  PGPC:::plotSummaryBarplot(summary, 
                                   concentration=concentrations, 
                                   pthresh=0.05)
  
  PGPC:::plotSummaryBarplotInhibition(summary, 
                                             concentration=concentrations, 
                                             pthresh=0.05)
} 

## ----drugInteractionsBendatmustinHSA, dependson = c(-1), fig.show='hold', fig.cap=BendamustinHSA_fig.cap, fig.subcap=BendamustinHSA_fig.subcap, out.width=ow----
for (combi in drugCombis){
  summary <- analyseDrugCombi(df, combi, plotSingle=TRUE, type="HSA")
  
  PGPC:::plotSummary(summary, 
                            concentration=concentrations, 
                            pthresh=0.05, 
                            breaks=breaks, 
                            limits=limits)
  
  PGPC:::plotSummaryBarplot(summary, 
                                   concentration=concentrations, 
                                   pthresh=0.05)
  
  PGPC:::plotSummaryBarplotInhibition(summary, 
                                             concentration=concentrations, 
                                             pthresh=0.05)
} 

## ----drugInteractionsDisulfiram, dependson = c(-1), fig.show='hold', fig.cap=Disulfiram_fig.cap, fig.subcap=Disulfiram_fig.subcap, out.width=ow----
drugCombis = c("Disulfiram_PD")
#drugCombis = c("Disulfiram_PD", "Disulfiram_U0126")

## for better overview plot only a selection of concentrations  
concentrations = c("0.0195", "0.0391","0.0781","0.1562")
breaks = c(0.01, 0.03, 0.1, 0.3)
limits=c (0.01, 0.5)

for (combi in drugCombis){
  summary <- analyseDrugCombi(df, combi, plotSingle=TRUE)
  
  PGPC:::plotSummary(summary, 
                            concentration=concentrations, 
                            pthresh=0.05, 
                            breaks=breaks, 
                            limits=limits)
  
  PGPC:::plotSummaryBarplot(summary, 
                                   concentration=concentrations, 
                                   pthresh=0.05)
  
  PGPC:::plotSummaryBarplotInhibition(summary, 
                                             concentration=concentrations, 
                                             pthresh=0.05)
} 

## ----drugInteractionsDisulfiramHSA, dependson = c(-1), fig.show='hold', fig.cap=DisulfiramHSA_fig.cap, fig.subcap=DisulfiramHSA_fig.subcap, out.width=ow----
for (combi in drugCombis){
  summary <- analyseDrugCombi(df, combi, plotSingle=TRUE, type="HSA")
  
  PGPC:::plotSummary(summary, 
                            concentration=concentrations, 
                            pthresh=0.05, 
                            breaks=breaks, 
                            limits=limits)
  
  PGPC:::plotSummaryBarplot(summary, 
                                   concentration=concentrations, 
                                   pthresh=0.05)
  
  PGPC:::plotSummaryBarplotInhibition(summary, 
                                             concentration=concentrations, 
                                             pthresh=0.05)
} 

## ----figureLegends2, echo=FALSE---------------------------------------------------------
BendamustinDLD_fig.cap = "Drug synergies for Bendamustine and AKT inhibitors calculated using an non-interacting model in the DLD-1 cell line. Error bars, means +- s.e.m"
BendamustinDLD_fig.subcap = c("Normalized viability values for all experiments.", 
                           "Line plot of sumarized viability values for selected concentrations.",
                           "Barplot of sumarized viability values for selected concentrations.",
                           "Barplot of drug inhibition for selected concentrations. This figure is the basis of Expanded view Figure EV3C in the paper.")

BendamustinDLDHSA_fig.cap = "Drug synergies for Bendamustine and AKT inhibitors calculated using the HSA model in the DLD-1 cell line. Error bars, means +- s.e.m"
BendamustinDLDHSA_fig.subcap = c("Normalized viability values for all experiments.", 
                           "Line plot of sumarized viability values for selected concentrations.",
                           "Barplot of sumarized viability values for selected concentrations.",
                           "Barplot of drug inhibition for selected concentrations.")

DisulfiramDLD_fig.cap = "Drug synergies for Disulfiram and MEK inhibitors calculated using an non-interacting model in the DLD-1 cell line. Error bars, means +- s.e.m"
DisulfiramDLD_fig.subcap = c("Normalized viability values for all experiments.", 
                           "Line plot of sumarized viability values for selected concentrations.",
                           "Barplot of sumarized viability values for selected concentrations.",
                           "Barplot of drug inhibition for selected concentrations. This figure is the basis of Expanded View Figure EV3H in the paper.")

DisulfiramDLDHSA_fig.cap = "Drug synergies for Disulfiram and MEK inhibitors calculated using an non-interacting model in the DLD-1 cell line. Error bars, means +- s.e.m"
DisulfiramDLDHSA_fig.subcap = c("Normalized viability values for all experiments.", 
                           "Line plot of sumarized viability values for selected concentrations.",
                           "Barplot of sumarized viability values for selected concentrations.",
                           "Barplot of drug inhibition for selected concentrations.")

## ----drugInteractionsDLD, dependson = c(-2)---------------------------------------------
experiment = "2013-10-22platelist_DLD_72_NPI_none"

## get the dataframe and check the data in an explorative manner
df = read.delim(file.path("result", experiment, "in", "topTable.txt"), 
                stringsAsFactors=FALSE)
df$identifier = paste(df$drug1, df$drug2, sep="_")  
df$concdrug1.micro_M. = round(df$concdrug1.micro_M., 4)

theme_new = theme_bw()
theme_new$text$size = 20
theme_new$axis.text$size = rel(1.2) 

## ----drugInteractionsDLDBendatmustin, dependson = c(-1), fig.show='hold', fig.cap=BendamustinDLD_fig.cap, fig.subcap=BendamustinDLD_fig.subcap, out.width=ow----
drugCombis = c("Bendamustine_AKTi VIII")
## drugCombis = c("Bendamustine_AKTi VIII", "Bendamustine_PD", "Bendamustine_U0126",
##                "Bendamustine_MK2206")


## for better overview plot only a selection of concentrations  
concentrations = c("1.25", "2.5", "5", "10")
breaks = c(1,3,10)
limits = c(1, 11)

for (combi in drugCombis){
  summary <- analyseDrugCombi(df, combi, plotSingle=TRUE)
  
  PGPC:::plotSummary(summary, 
                            concentration=concentrations, 
                            pthresh=0.05, 
                            breaks=breaks, 
                            limits=limits)
  
  PGPC:::plotSummaryBarplot(summary, 
                                   concentration=concentrations, 
                                   pthresh=0.05)
  
  PGPC:::plotSummaryBarplotInhibition(summary, 
                                             concentration=concentrations, 
                                             pthresh=0.05, 
                                             ylimits=c(-0.05,.6))
} 

## ----drugInteractionsDLDBendatmustinHSA, dependson = c(-1), fig.show='hold', fig.cap=BendamustinDLDHSA_fig.cap, fig.subcap=BendamustinDLDHSA_fig.subcap, out.width=ow----
for (combi in drugCombis){
  summary <- analyseDrugCombi(df, combi, plotSingle=TRUE, type="HSA")
  
  PGPC:::plotSummary(summary, 
                            concentration=concentrations, 
                            pthresh=0.05, 
                            breaks=breaks, 
                            limits=limits)
  
  PGPC:::plotSummaryBarplot(summary, 
                                   concentration=concentrations, 
                                   pthresh=0.05)
  
  PGPC:::plotSummaryBarplotInhibition(summary, 
                                             concentration=concentrations, 
                                             pthresh=0.05, 
                                             ylimits=c(-0.05,.6))
} 

## ----drugInteractionsDLDDisulfiram, dependson = c(-1), fig.show='hold', fig.cap=DisulfiramDLD_fig.cap, fig.subcap=DisulfiramDLD_fig.subcap, out.width=ow----
drugCombis = c("Disulfiram_PD")
##drugCombis = c("Disulfiram_PD", "Disulfiram_U0126")



## for better overview plot only a selection of concentrations  
concentrations = c("0.0195", "0.0391","0.0781","0.1562")
breaks = c(0.01, 0.03, 0.1, 0.3)
limits=c (0.01, 0.5)

for (combi in drugCombis){
  summary <- analyseDrugCombi(df, combi, plotSingle=TRUE)
  
  PGPC:::plotSummary(summary, 
                            concentration=concentrations, 
                            pthresh=0.05, 
                            breaks=breaks, 
                            limits=limits)
  
  PGPC:::plotSummaryBarplot(summary, 
                                   concentration=concentrations, 
                                   pthresh=0.05)
  
  PGPC:::plotSummaryBarplotInhibition(summary, 
                                             concentration=concentrations, 
                                             pthresh=0.05)
} 

## ----drugInteractionsDLDDisulfiramHSA, dependson = c(-1), fig.show='hold', fig.cap=DisulfiramDLDHSA_fig.cap, fig.subcap=DisulfiramDLDHSA_fig.subcap, out.width=ow----
for (combi in drugCombis){
  summary <- analyseDrugCombi(df, combi, plotSingle=TRUE, type="HSA")
  
  PGPC:::plotSummary(summary, 
                            concentration=concentrations, 
                            pthresh=0.05, 
                            breaks=breaks, 
                            limits=limits)
  
  PGPC:::plotSummaryBarplot(summary, 
                                   concentration=concentrations, 
                                   pthresh=0.05)
  
  PGPC:::plotSummaryBarplotInhibition(summary, 
                                             concentration=concentrations, 
                                             pthresh=0.05)
} 

## ----echo=FALSE, cache=FALSE, message=FALSE---------------------------------------------
knitr::set_parent(file.path('..', 'PGPC.Rnw'))
library(PGPC)

raw_fig.cap = "Raw plate reader values for the different drugs and assays. CellTiterGlo (CTG), chymotrypsin-like (CT-L), trypsin-like (T-L), and caspase-like (C-L) activity assay."
raw_fig.subcap = c("Untransformed values.", "log10 transformed values.")

viab_fig.cap = "Viability normalized values for the different drugs and assays. CellTiterGlo (CTG), chymotrypsin-like (CT-L), trypsin-like (T-L), and caspase-like (C-L) activity assay."
viab_fig.subcap = c("Untransformed values.", "log10 transformed values.")

prot_fig.cap = "Viability and proteasome activity normalized values for the different drugs and assays. CellTiterGlo (CTG), chymotrypsin-like (CT-L), trypsin-like (T-L), and caspase-like (C-L) activity assay."
prot_fig.subcap = c("Untransformed values.", "log10 transformed values.")

sum_fig.cap = "Summary of viability and proteasome activity normalized values for the different drugs and assays. CellTiterGlo (CTG), chymotrypsin-like (CT-L), trypsin-like (T-L), and caspase-like (C-L) activity assay. Error bars, means +- s.e.m."
sum_fig.subcap = c("Untransformed values. This figure is the basis of Figure 6B in the paper.
", "log10 transformed values.")

fw = 10
fh = 6

## ----readData, dependson = c(-1), fig.show="hold", fig.cap=raw_fig.cap, fig.subcap=raw_fig.subcap, fig.width=fw, fig.height=fh----
readData <- function(path, files, log10Transform=TRUE){
  do.call(rbind, lapply(files, function(file){
    tmp <- read.delim(file.path(path, file))
    tmp$plate = match(file, files)
    tmp$plateName = file
    if(log10Transform) tmp$log10Value = log10(tmp$value)
    tmp
  }))
}

path=system.file("extdata", "Proteasome_assays_follow-up",
                 package="PGPC")
files = list.files(path)

data = readData(path,files)
head(data)

## remove BTO-1 drug
data <- subset(data, drug != "BTO-1")

data$assay = factor(data$assay,
                    levels=c("CTG", "Chym","Tryp", "CASP"), 
                    ordered=TRUE)
levels(data$assay) = c("CTG", "CT-L","T-L", "C-L")
data$drug = factor(data$drug, 
                   levels=c("DMSO", "Disulfiram", "ZPCK", "AG555", "CAPE",  
                            "MG132","Bortezomib 5", "AG1478", "DAPH"), 
                   ordered=TRUE)

cbpalette = c("black", "#ee1e39", "#e51d3a", "#bb1f3b", 
              "#b21e42", "#d1d2d2", "#9b9b9b", "#3c53a4", "#4a52a3")

ggplot(data, aes(x=drug, y=value, color=drug, shape=plateName)) + 
  geom_jitter(position = position_jitter(width = .3), size=2.5) + 
  theme_bw() + 
  scale_color_manual(values=cbpalette) + 
  facet_grid(assay ~ ., scales="free_y") 


ggplot(data, aes(x=drug, y=log10Value, color=drug, shape=plateName)) + 
  geom_jitter(position = position_jitter(width = .3), size=2.5) + 
  theme_bw() + 
  scale_color_manual(values=cbpalette) + 
  facet_grid(assay ~ ., scales="free_y")

## ----plateMeanValues--------------------------------------------------------------------
mergeWells <- function(data){
  for(plate in unique(data$plate)){
    pp = data$plate == plate
    for(drug in unique(data$drug)){
      ii = data$drug == drug
      for (assay in unique(data$assay)){      
        jj = data$assay == assay
        data$mean[pp & ii & jj] = mean(data$value[pp & ii & jj])        
      }
    }  
  }
  data$log10Value = log10(data$mean)
  data$value=NULL
  names(data) <- gsub("mean", "value", names(data))
  unique(data)
}

data = mergeWells(data)

## ----normalizeViability, dependson = c(-1), fig.show="hold", fig.cap=viab_fig.cap, fig.subcap=viab_fig.subcap, fig.width=fw, fig.height=fh----
normalizeViability <- function(data){
  for(plate in unique(data$plate)){
    pp = data$plate == plate
    for(drug in unique(data$drug)){
      ii = data$drug == drug
      jj = data$assay == "CTG"
         
      data$normalized[ii & pp] = data$value[ii & pp] / data$value[ii & jj & pp]
      
      data$logNormalized[ii & pp] = 
        data$log10Value[ii & pp] - data$log10Value[ii & jj & pp]
    }  
  }
  data
}

data = normalizeViability(data)
head(data)


# Error bars represent standard error of the mean
ggplot(data, aes(x=drug, y=normalized, color=drug, shape=plateName)) + 
  geom_point(position=position_jitter(height=0), size=2.5) +
  theme_bw() +
  scale_color_manual(values=cbpalette) +
  facet_grid(assay ~ ., scales="free_y") 


ggplot(data, aes(x=drug, y=logNormalized, color=drug, shape=plateName)) + 
  geom_point(position=position_jitter(height=0), size=2.5) +
  theme_bw() +
  scale_color_manual(values=cbpalette) +
  facet_grid(assay ~ ., scales="free_y") 

## ----normalizeAssay, dependson = c(-1), fig.show="hold", fig.cap=prot_fig.cap, fig.subcap=prot_fig.subcap, fig.width=fw, fig.height=fh----
normalizeAssay <- function(data){
  for(plate in unique(data$plate)){
    pp = data$plate == plate
    for(assay in unique(data$assay)){    
      ii = data$assay == assay
      jj = data$drug == "DMSO"
      
      data$normalizedActivity[ii & pp] =
        data$normalized[ii & pp] / data$normalized[ii & jj & pp]
      
      data$logNormalizedActivity[ii & pp] =
        data$logNormalized[ii & pp] - data$logNormalized[ii & jj & pp]
    }  
  }
  data
}

data = normalizeAssay(data)
head(data)

# Error bars represent standard error of the mean
ggplot(data, aes(x=drug, y=normalizedActivity, color=drug, shape=plateName)) + 
  geom_point(position=position_jitter(height=0), size=2.5) +
  theme_bw() +
  scale_color_manual(values=cbpalette) +
  facet_grid(assay ~ ., scales="free_y") 


ggplot(data, aes(x=drug, 
                 y=logNormalizedActivity,
                 color=drug,
                 shape=plateName)) + 
  geom_point(position=position_jitter(height=0), size=2.5) +
  theme_bw() +
  scale_color_manual(values=cbpalette) +
  facet_grid(assay ~ ., scales="free_y") 

## ----summary, dependson = c(-1), fig.show="hold", fig.cap=sum_fig.cap, fig.subcap=sum_fig.subcap, fig.width=fw, fig.height=fh----
summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
                      conf.interval=.95, .drop=TRUE) {
  
  #  handle NA's: if na.rm==TRUE, don't count them
  length2 <- function (x, na.rm=FALSE) {
    if (na.rm) sum(!is.na(x))
    else       length(x)
  }
  
  #  summary. For each group's data frame, return a vector with
  # N, mean, and sd
  datac <- ddply(data, groupvars, .drop=.drop,
                 .fun = function(xx, col) {
                   tTest = try(t.test(xx[[col]] - 
                                        ifelse(grepl("log", measurevar), 
                                               0,
                                               1)), 
                               silent=TRUE)
                   c(N    = length2(xx[[col]], na.rm=na.rm),
                     mean = mean   (xx[[col]], na.rm=na.rm),
                     sd   = sd     (xx[[col]], na.rm=na.rm),
                     p.value = ifelse(class(tTest) != "try-error",
                                      tTest$p.value,
                                      1)
                   )
                 },
                 measurevar
  )
  
  # Rename the "mean" column    
  datac <- rename(datac, c("mean" = measurevar))
  
  datac$se <- datac$sd / sqrt(datac$N)  # Calculate standard error of the mean
  
  # Confidence interval multiplier for standard error
  # Calculate t-statistic for confidence interval: 
  # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
  ciMult <- qt(conf.interval/2 + .5, datac$N-1)
  datac$ci <- datac$se * ciMult
  
  return(datac)
}

summary = summarySE(data, 
                    measurevar="normalizedActivity", 
                    groupvars=c("drug","assay"))

## remove ctg assay
summary <- subset(summary, assay!="CTG")

## remove DMSO control
summary <- subset(summary, drug!="DMSO")

pthresh = 0.05

p <- ggplot(summary,
            aes(x=assay, 
                y=100*(1-normalizedActivity), 
                ymin=100*(1-normalizedActivity-se), 
                ymax=100*(1-normalizedActivity+se), 
                fill=drug)) + 
  geom_bar(position=position_dodge(), stat="identity", color="black") +
  geom_errorbar(width=.2,
                position=position_dodge(.9)) +
  theme_bw() + theme(panel.grid = element_blank()) +
  scale_fill_manual(values=cbpalette[-1]) +
  ylab("normalized proteasome inhibition (%)")
if(any(summary$p.value < pthresh))
  p = p + geom_point(aes(y=ifelse(p.value < pthresh,
                                  100*(1-normalizedActivity + se + 0.05),
                                  NA)),
                     position=position_dodge(.9),
                     pch=8,
                     col=1,
                     show_guide = FALSE,
                     na.rm=TRUE)
print(p)


logSummary = summarySE(data,
                       measurevar="logNormalizedActivity",
                       groupvars=c("drug","assay"))

## remove ctg assay
logSummary <- subset(logSummary, assay!="CTG")

## remove DMSO control
logSummary <- subset(logSummary, drug!="DMSO")

p <- ggplot(logSummary, 
            aes(x=assay, y=-logNormalizedActivity, 
                ymin=-logNormalizedActivity-se, 
                ymax=-logNormalizedActivity+se, 
                fill=drug)) + 
  geom_bar(position=position_dodge(), stat="identity", color="black") +
  geom_errorbar(width=.2,
                position=position_dodge(.9)) + 
  theme_bw() + theme(panel.grid = element_blank()) +
  scale_fill_manual(values=cbpalette[-1])
if(any(summary$p.value < pthresh))
  p = p + geom_point(aes(y=ifelse(p.value < pthresh,
                                  -logNormalizedActivity+se+0.05,
                                  NA)),
                     position=position_dodge(width=.9),
                     pch=8,
                     col=1,
                     show_guide = FALSE,
                     na.rm=TRUE)
print(p)

## ----sessionInfo, echo=FALSE------------------------------------------------------------
sessionInfo()