## ----eval=FALSE---------------------------------------------------------------
#  
#  suppressWarnings(suppressMessages(require(netDx)))
#  suppressMessages(require(curatedTCGAData))
#  
#  # fetch data for example
#  brca <- suppressMessages(
#    curatedTCGAData("BRCA",c("mRNAArray"),FALSE, version="1.1.38"))
#  
#  # reformat for this example
#  staget <- sub("[abcd]","",
#    sub("t","",colData(brca)$pathology_T_stage))
#  staget <- suppressWarnings(as.integer(staget))
#  colData(brca)$STAGE <- staget
#  
#  pam50 <- colData(brca)$PAM50.mRNA
#  # binarize class
#  pam50[which(!pam50 %in% "Luminal A")] <- "notLumA"
#  pam50[which(pam50 %in% "Luminal A")] <- "LumA"
#  colData(brca)$pam_mod <- pam50
#  tmp <- colData(brca)$PAM50.mRNA
#  idx <- union(which(tmp %in% c("Normal-like",
#                    "Luminal B","HER2-enriched")),
#              which(is.na(staget)))
#  pID <- colData(brca)$patientID
#  tokeep <- setdiff(pID, pID[idx])
#  brca <- brca[,tokeep,]
#  # remove duplicate assays mapped to the same sample
#  smp <- sampleMap(brca)
#  samps <- smp[which(smp$assay=="BRCA_mRNAArray-20160128"),]
#  notdup <- samps[which(!duplicated(samps$primary)),"colname"]
#  brca[[1]] <- suppressMessages(brca[[1]][,notdup])
#  pam50 <- colData(brca)$pam_mod
#  pheno <- colData(brca)
#  
#  # create holdout set
#  set.seed(123) # make reproducible
#  idx_holdout <- c(
#    sample(which(pam50 == "LumA"),10,F),
#    sample(which(pam50 == "notLumA"),10,F)
#  	)
#  holdout <- brca[,rownames(pheno)[idx_holdout]]
#  colData(holdout)$ID <- as.character(colData(holdout)$patientID)
#  colData(holdout)$STATUS <- colData(holdout)$pam_mod
#  
#  # remove holdout samples from training set
#  tokeep <- setdiff(1:length(pam50),idx_holdout)
#  brca <- brca[,rownames(pheno)[tokeep]]
#  pID <- as.character(colData(brca)$patientID)
#  colData(brca)$ID <- pID
#  colData(brca)$STATUS <- colData(brca)$pam_mod
#  
#  # feature design
#  # genes in mRNA data are grouped by pathways
#  pathList <- readPathways(fetchPathwayDefinitions("January",2018))
#  
#  groupList <- list()
#  groupList[["BRCA_mRNAArray-20160128"]] <- pathList[1:3]
#  # clinical data is not grouped; each variable is its own feature
#  groupList[["clinical"]] <- list(
#       age="patient.age_at_initial_pathologic_diagnosis",
#  	   stage="STAGE"
#  )
#  
#  # define function to create nets
#  makeNets <- function(dataList, groupList, netDir, ...) {
#    netList <- c() # initialize before is.null() check
#    # make RNA nets (NOTE: the check for is.null() is important!)
#    # (Pearson correlation)
#    if (!is.null(groupList[["BRCA_mRNAArray-20160128"]])) {
#      netList <- makePSN_NamedMatrix(
#          dataList[["BRCA_mRNAArray-20160128"]],
#          rownames(dataList[["BRCA_mRNAArray-20160128"]]),
#          groupList[["BRCA_mRNAArray-20160128"]],
#          netDir, verbose = FALSE,
#          writeProfiles = TRUE, runSerially=TRUE, ...)
#    }
#  
#    # make clinical nets (normalized difference)
#    netList2 <- c()
#    if (!is.null(groupList[["clinical"]])) {
#      netList2 <- makePSN_NamedMatrix(dataList$clinical,
#      rownames(dataList$clinical),
#      groupList[["clinical"]], netDir,
#      simMetric = "custom",
#      customFunc = normDiff, # custom function
#      writeProfiles = FALSE,
#      sparsify = TRUE, verbose = TRUE, runSerially=TRUE, ...)
#    }
#    netList <- c(unlist(netList), unlist(netList2))
#    return(netList)
#  }
#  
#  # run predictor
#  set.seed(42) # make results reproducible
#  outDir <- paste(tempdir(),randAlphanumString(),
#  	"pred_output",sep=getFileSep())
#  if (file.exists(outDir)) unlink(outDir,recursive=TRUE)
#  numSplits <- 2L
#  out <- suppressMessages(
#    buildPredictor(
#        dataList=brca,groupList=groupList,
#        makeNetFunc=makeNets,
#        outDir=outDir,
#        numSplits=numSplits,
#        featScoreMax=2L,
#        featSelCutoff=1L,
#        numCores=1L,debugMode=FALSE,
#        logging="none")
#  )
#  
#  # compile consistently high-scoring functions
#   featScores <- list() # feature scores per class
#   st <- unique(colData(brca)$STATUS)
#   for (k in 1:numSplits) {
#   	# feature scores
#   	for (cur in unique(st)) {
#       if (is.null(featScores[[cur]])) featScores[[cur]] <- list()
#   	   tmp <- out[[sprintf("Split%i",k)]]
#       tmp2 <- tmp[["featureScores"]][[cur]]
#   	   colnames(tmp2) <- c("PATHWAY_NAME","SCORE")
#   	   featScores[[cur]][[sprintf("Split%i",k)]] <- tmp2
#   	}
#   }
#   featScores2 <- lapply(featScores, getNetConsensus)
#   featSelNet <- lapply(featScores2, function(x) {
#       callFeatSel(x, fsCutoff=1, fsPctPass=0)
#  })
#  
#  # predict performance on holdout set
#  outDir <- paste(tempdir(), randAlphanumString(),
#    sep = getFileSep())
#  if (file.exists(outDir)) unlink(outDir,recursive=TRUE)
#  dir.create(outDir)
#  predModel <- suppressMessages(
#    predict(brca, holdout, groupList,
#      featSelNet, makeNets,
#      outDir, verbose = FALSE)
#  )
#  
#  # compute performance for prediction
#  perf <- getPerformance(predModel, c("LumA", "notLumA"))
#  message(sprintf("AUROC=%1.2f", perf$auroc))
#  message(sprintf("AUPR=%1.2f", perf$aupr))
#  message(sprintf("Accuracy=%1.1f%%", perf$acc))
#  
#  # plot ROC and PR curve
#  plotPerf_multi(list(perf$rocCurve),
#    plotTitle = sprintf(
#      "BRCA Validation: %i samples",
#      nrow(colData(holdout))))
#  plotPerf_multi(list(perf$prCurve),
#    plotType = "PR",
#    plotTitle = sprintf(
#      "BRCA Validation: %i samples",
#      nrow(colData(holdout))))
#  

## ----eval=TRUE----------------------------------------------------------------
suppressWarnings(suppressMessages(require(netDx)))
suppressMessages(require(curatedTCGAData))

brca <- suppressMessages(
  curatedTCGAData("BRCA",c("mRNAArray"),FALSE, version="1.1.38"))

staget <- sub("[abcd]","",
  sub("t","",colData(brca)$pathology_T_stage))
staget <- suppressWarnings(as.integer(staget))
colData(brca)$STAGE <- staget

pam50 <- colData(brca)$PAM50.mRNA
pam50[which(!pam50 %in% "Luminal A")] <- "notLumA"
pam50[which(pam50 %in% "Luminal A")] <- "LumA"
colData(brca)$pam_mod <- pam50

tmp <- colData(brca)$PAM50.mRNA
idx <- union(which(tmp %in% c("Normal-like",
                  "Luminal B","HER2-enriched")),
            which(is.na(staget)))
pID <- colData(brca)$patientID
tokeep <- setdiff(pID, pID[idx])
brca <- brca[,tokeep,]

# remove duplicate assays mapped to the same sample
smp <- sampleMap(brca)
samps <- smp[which(smp$assay=="BRCA_mRNAArray-20160128"),]
notdup <- samps[which(!duplicated(samps$primary)),"colname"]
brca[[1]] <- suppressMessages(brca[[1]][,notdup])

pam50 <- colData(brca)$pam_mod
pheno <- colData(brca)

## ----eval=TRUE----------------------------------------------------------------
set.seed(123) # make reproducible
idx_holdout <- c(
  sample(which(pam50 == "LumA"),10,F),
  sample(which(pam50 == "notLumA"),10,F)
	)
holdout <- brca[,rownames(pheno)[idx_holdout]]
colData(holdout)$ID <- as.character(colData(holdout)$patientID)
colData(holdout)$STATUS <- colData(holdout)$pam_mod
tokeep <- setdiff(1:length(pam50),idx_holdout)
brca <- brca[,rownames(pheno)[tokeep]]

pID <- as.character(colData(brca)$patientID)
colData(brca)$ID <- pID
colData(brca)$STATUS <- colData(brca)$pam_mod

## ----eval=TRUE----------------------------------------------------------------
# genes in mRNA data are grouped by pathways
pathList <- readPathways(fetchPathwayDefinitions("January",2018))

groupList <- list()
groupList[["BRCA_mRNAArray-20160128"]] <- pathList[1:3]

# clinical data is not grouped; each variable is its own feature
groupList[["clinical"]] <- list(
     age="patient.age_at_initial_pathologic_diagnosis",
	   stage="STAGE"
)

## ----eval=TRUE----------------------------------------------------------------
makeNets <- function(dataList, groupList, netDir, ...) {
  netList <- c() # initialize before is.null() check

  # make RNA nets (NOTE: the check for is.null() is important!)
  # (Pearson correlation)
  if (!is.null(groupList[["BRCA_mRNAArray-20160128"]])) {
    netList <- makePSN_NamedMatrix(
        dataList[["BRCA_mRNAArray-20160128"]],
        rownames(dataList[["BRCA_mRNAArray-20160128"]]),
        groupList[["BRCA_mRNAArray-20160128"]],
        netDir, verbose = FALSE,
        writeProfiles = TRUE, runSerially=TRUE, ...)
  }

  # make clinical nets (normalized difference)
  netList2 <- c()
  if (!is.null(groupList[["clinical"]])) {
    netList2 <- makePSN_NamedMatrix(dataList$clinical,
    rownames(dataList$clinical),
    groupList[["clinical"]], netDir,
    simMetric = "custom", 
    customFunc = normDiff, # custom function
    writeProfiles = FALSE,
    sparsify = TRUE, verbose = TRUE, runSerially=TRUE, ...)
  }
  netList <- c(unlist(netList), unlist(netList2))
  return(netList)
}

## ----eval=TRUE----------------------------------------------------------------
set.seed(42) # make results reproducible
outDir <- paste(tempdir(),randAlphanumString(),
	"pred_output",sep=getFileSep())
if (file.exists(outDir)) unlink(outDir,recursive=TRUE)
numSplits <- 2L
out <- suppressMessages(
  buildPredictor(
      dataList=brca,groupList=groupList,
      makeNetFunc=makeNets,
      outDir=outDir, 
      numSplits=numSplits,
      featScoreMax=2L,
      featSelCutoff=1L,
      numCores=1L,debugMode=FALSE,
      logging="none")
)

## ----eval=TRUE----------------------------------------------------------------
 featScores <- list() # feature scores per class
 st <- unique(colData(brca)$STATUS)
 for (k in 1:numSplits) { 
 	# feature scores
 	for (cur in unique(st)) {
     if (is.null(featScores[[cur]])) featScores[[cur]] <- list()
 	   tmp <- out[[sprintf("Split%i",k)]]
     tmp2 <- tmp[["featureScores"]][[cur]]
 	   colnames(tmp2) <- c("PATHWAY_NAME","SCORE")
 	   featScores[[cur]][[sprintf("Split%i",k)]] <- tmp2
 	}
 }
 # compile scores across runs
 featScores2 <- lapply(featScores, getNetConsensus)

## ----eval=TRUE----------------------------------------------------------------
 featSelNet <- lapply(featScores2, function(x) {
     callFeatSel(x, fsCutoff=1, fsPctPass=0)
})

## ----eval=TRUE----------------------------------------------------------------
outDir <- paste(tempdir(), randAlphanumString(), 
  sep = getFileSep())
if (file.exists(outDir)) unlink(outDir,recursive=TRUE)
dir.create(outDir)

predModel <- suppressMessages(
  predict(brca, holdout, groupList, 
    featSelNet, makeNets,
    outDir, verbose = FALSE)
)

## ----eval=TRUE----------------------------------------------------------------
perf <- getPerformance(predModel, c("LumA", "notLumA"))

message(sprintf("AUROC=%1.2f", perf$auroc))
message(sprintf("AUPR=%1.2f", perf$aupr))
message(sprintf("Accuracy=%1.1f%%", perf$acc))

## ----eval=TRUE----------------------------------------------------------------
plotPerf_multi(list(perf$rocCurve),
  plotTitle = sprintf(
    "BRCA Validation: %i samples", 
    nrow(colData(holdout))))
plotPerf_multi(list(perf$prCurve), 
  plotType = "PR",
  plotTitle = sprintf(
    "BRCA Validation: %i samples", 
    nrow(colData(holdout))))

## ----eval=TRUE----------------------------------------------------------------
sessionInfo()