## ----eval=FALSE----------------------------------------------------------
#  source("http://bioconductor.org/biocLite.R")
#  biocLite("RTCGAToolbox")

## ------------------------------------------------------------------------
library(RTCGAToolbox)

# Valid aliases
getFirehoseDatasets()

## ------------------------------------------------------------------------
# Valid stddata runs
stddata = getFirehoseRunningDates()
stddata

## ------------------------------------------------------------------------
# Valid analysis running dates (will return 3 recent date)
gisticDate = getFirehoseAnalyzeDates(last=3)
gisticDate

## ----eval=TRUE,message=FALSE---------------------------------------------
# READ mutation data and clinical data 
brcaData = getFirehoseData (dataset="READ", runDate="20150402",forceDownload = TRUE,
               Clinic=TRUE, Mutation=TRUE)

## ------------------------------------------------------------------------
data(RTCGASample)
RTCGASample

## ------------------------------------------------------------------------
# Differential gene expression analysis for gene level RNA data.
diffGeneExprs = getDiffExpressedGenes(dataObject=RTCGASample,DrawPlots=TRUE,
                                   adj.method="BH",adj.pval=0.05,raw.pval=0.05,
                                   logFC=2,hmTopUpN=10,hmTopDownN=10)
# Show head for expression outputs
diffGeneExprs
showResults(diffGeneExprs[[1]])
toptableOut = showResults(diffGeneExprs[[1]])

## ------------------------------------------------------------------------
#Correlation between gene expression values and copy number
corrGECN = getCNGECorrelation(dataObject=RTCGASample,adj.method="BH",
                              adj.pval=0.9,raw.pval=0.05)
corrGECN
showResults(corrGECN[[1]])
corRes = showResults(corrGECN[[1]])

## ------------------------------------------------------------------------
# Mutation frequencies
mutFrq = getMutationRate(dataObject=RTCGASample)
head(mutFrq[order(mutFrq[,2],decreasing=TRUE),])

## ----fig.width=6,fig.height=6,fig.align='center'-------------------------
# Creating survival data frame and running analysis for  
# FCGBP which is one of the most frequently mutated gene in the toy data
# Running following code will provide following KM plot.
clinicData <- getData(RTCGASample,"Clinical")
head(clinicData)
clinicData = clinicData[,3:5]
clinicData[is.na(clinicData[,3]),3] = clinicData[is.na(clinicData[,3]),2]
survData <- data.frame(Samples=rownames(clinicData),
                        Time=as.numeric(clinicData[,3]),
                        Censor=as.numeric(clinicData[,1]))
getSurvival(dataObject=RTCGASample,geneSymbols=c("FCGBP"),sampleTimeCensor=survData)

## ------------------------------------------------------------------------
# Note: This function is provided for real dataset test since the toy dataset is small.
RTCGASample

## ----message=FALSE-------------------------------------------------------
RTCGASampleClinical = getData(RTCGASample,"Clinical")
RTCGASampleRNAseqCounts = getData(RTCGASample,"RNASeqGene")
RTCGASampleCN = getData(RTCGASample,"GISTIC")

## ----eval=FALSE----------------------------------------------------------
#  # BRCA data with mRNA (Both array and RNASeq), GISTIC processed copy number data
#  # mutation data and clinical data
#  # (Depends on bandwidth this process may take long time)
#  brcaData = getFirehoseData (dataset="BRCA", runDate="20140416", gistic2_Date="20140115",
#                 Clinic=TRUE, RNAseq_Gene=TRUE, mRNA_Array=TRUE, Mutation=TRUE)
#  
#  # Differential gene expression analysis for gene level RNA data.
#  # Heatmaps are given below.
#  diffGeneExprs = getDiffExpressedGenes(dataObject=brcaData,DrawPlots=TRUE,
#                                     adj.method="BH",adj.pval=0.05,raw.pval=0.05,
#                                     logFC=2,hmTopUpN=100,hmTopDownN=100)
#  # Show head for expression outputs
#  diffGeneExprs
#  showResults(diffGeneExprs[[1]])
#  toptableOut = showResults(diffGeneExprs[[1]])
#  
#  # Correlation between expresiion profiles and copy number data
#  corrGECN = getCNGECorrelation(dataObject=brcaData,adj.method="BH",
#                                adj.pval=0.05,raw.pval=0.05)
#  
#  corrGECN
#  showResults(corrGECN[[1]])
#  corRes = showResults(corrGECN[[1]])
#  
#  # Gene mutation frequincies in BRCA dataset
#  mutFrq = getMutationRate(dataObject=brcaData)
#  head(mutFrq[order(mutFrq[,2],decreasing=TRUE),])
#  
#  # PIK3CA which is one of the most frequently mutated gene in BRCA dataset
#  # KM plot is given below.
#  clinicData <- getData(brcaData,"Clinical")
#  head(clinicData)
#  clinicData = clinicData[,3:5]
#  clinicData[is.na(clinicData[,3]),3] = clinicData[is.na(clinicData[,3]),2]
#  survData <- data.frame(Samples=rownames(clinicData),
#                          Time=as.numeric(clinicData[,3]),
#                          Censor=as.numeric(clinicData[,1]))
#  getSurvival(dataObject=brcaData,geneSymbols=c("PIK3CA"),sampleTimeCensor=survData)

## ----eval=FALSE----------------------------------------------------------
#  # Creating dataset analysis summary figure with getReport.
#  # Figure will be saved as PDF file.
#  library("Homo.sapiens")
#  locations = genes(Homo.sapiens,columns="SYMBOL")
#  locations = as.data.frame(locations)
#  locations <- locations[,c(6,1,5,2:3)]
#  locations <- locations[!is.na(locations[,1]),]
#  locations <- locations[!duplicated(locations[,1]),]
#  rownames(locations) <- locations[,1]
#  getReport(dataObject=brcaData,DGEResult1=diffGeneExprs[[1]],
#  DGEResult2=diffGeneExprs[[2]],geneLocations=locations)

## ------------------------------------------------------------------------
data(RTCGASample)

## ------------------------------------------------------------------------
sessionInfo()