## ----results='hide', message=FALSE---------------------------------------
require(maftools)

laml.maf = system.file('extdata', 'tcga_laml.maf.gz', package = 'maftools') #path to TCGA LAML MAF file
laml.clin = system.file('extdata', 'tcga_laml_annot.tsv', package = 'maftools') # clinical information containing survival information and histology. This is optional

laml = read.maf(maf = laml.maf, clinicalData = laml.clin)

## ------------------------------------------------------------------------
#Typing laml shows basic summary of MAF file.
laml
#Shows sample summry.
getSampleSummary(laml)
#Shows gene summary.
getGeneSummary(laml)
#shows clinical data associated with samples
getClinicalData(laml)
#Shows all fields in MAF
getFields(laml)
#Writes maf summary to an output file with basename laml.
write.mafSummary(maf = laml, basename = 'laml')

## ----fig.height=5, fig.width=6-------------------------------------------
plotmafSummary(maf = laml, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE, titvRaw = FALSE)

## ---- fig.align='left',fig.height=5,fig.width=10, fig.align='left'-------
#We will draw oncoplots for top ten mutated genes.
oncoplot(maf = laml, top = 10, fontSize = 12)

## ----results='hide', message=FALSE, fig.height=5,fig.width=10, fig.align='left'----
#read TCGA maf file for LAML
laml.maf = system.file('extdata', 'tcga_laml.maf.gz', package = 'maftools')
all.lesions <- system.file("extdata", "all_lesions.conf_99.txt", package = "maftools")
amp.genes <- system.file("extdata", "amp_genes.conf_99.txt", package = "maftools")
del.genes <- system.file("extdata", "del_genes.conf_99.txt", package = "maftools")
scores.gis <- system.file("extdata", "scores.gistic", package = "maftools")
laml.plus.gistic = read.maf(maf = laml.maf, gisticAllLesionsFile = all.lesions, gisticAmpGenesFile = amp.genes, gisticDelGenesFile = del.genes, gisticScoresFile = scores.gis, isTCGA = TRUE)

## ---- fig.align='left',fig.height=5,fig.width=10, eval=T, fig.align='left'----
#We will draw oncoplots for top ten mutated genes. (Removing non-mutated samples from the plot for better visualization)
oncoplot(maf = laml.plus.gistic, top = 10, fontSize = 12)

## ---- fig.height=7,fig.width=10, eval=T, fig.align='left'----------------
#Changing colors for variant classifications (You can use any colors, here in this example we will use a color palette from RColorBrewer)
col = RColorBrewer::brewer.pal(n = 8, name = 'Paired')
names(col) = c('Frame_Shift_Del','Missense_Mutation', 'Nonsense_Mutation', 'Multi_Hit', 'Frame_Shift_Ins',
               'In_Frame_Ins', 'Splice_Site', 'In_Frame_Del')

#Color coding for FAB classification; try getAnnotations(x = laml) to see available annotations.
fabcolors = RColorBrewer::brewer.pal(n = 8,name = 'Spectral')
names(fabcolors) = c("M0", "M1", "M2", "M3", "M4", "M5", "M6", "M7")
fabcolors = list(FAB_classification = fabcolors)

#MutSig reusults
laml.mutsig <- system.file("extdata", "LAML_sig_genes.txt.gz", package = "maftools")

oncoplot(maf = laml, colors = col, mutsig = laml.mutsig, mutsigQval = 0.01, clinicalFeatures = 'FAB_classification', sortByAnnotation = TRUE, annotationColor = fabcolors)

## ---- fig.height=2,fig.width=8,fig.align='left'--------------------------
oncostrip(maf = laml, genes = c('DNMT3A','NPM1', 'RUNX1'))

## ---- fig.height=5, fig.width=6, eval = T, fig.align='left'--------------
laml.titv = titv(maf = laml, plot = FALSE, useSyn = TRUE)
#plot titv summary
plotTiTv(res = laml.titv)

## ----fig.height=3,fig.width=7,fig.align='left'---------------------------
#Lets plot lollipop plot for DNMT3A, which is one of the most frequent mutated gene in Leukemia.
dnmt3a.lpop = lollipopPlot(maf = laml, gene = 'DNMT3A', AACol = 'Protein_Change', showMutationRate = TRUE, domainLabelSize = 3, defaultYaxis = FALSE)

## ----fig.height=3,fig.width=7,fig.align='left'---------------------------
#Lets plot mutations on KIT gene, without repel option.
kit.lpop = lollipopPlot(maf = laml, gene = 'KIT', AACol = 'Protein_Change', labelPos = c(416, 418), refSeqID = 'NM_000222', domainLabelSize = 3, labPosSize = 3)

## ---- warning=FALSE, message=FALSE, fig.height=3,fig.width=7,fig.align='left'----
laml.dnmt3a = lollipopPlot(maf = laml, gene = 'DNMT3A', AACol = 'Protein_Change', refSeqID = 'NM_175629', labelPos = 882, collapsePosLabel = TRUE, cBioPortal = TRUE, domainLabelSize = 3, defaultYaxis = FALSE, labPosSize = 3)

## ---- fig.height=3,fig.width=8,fig.align='center'------------------------
tcga.ab.009.seg <- system.file("extdata", "TCGA.AB.3009.hg19.seg.txt", package = "maftools")
plotCBSsegments(cbsFile = tcga.ab.009.seg, maf = laml, labelAll = TRUE)

## ---- results='hide', message=FALSE--------------------------------------
coad <- system.file("extdata", "coad.maf.gz", package = "maftools")
coad = read.maf(maf = coad)

## ---- fig.height=4.5,fig.width=8,fig.align='center'----------------------
 rainfallPlot(maf = coad, detectChangePoints = TRUE, fontSize = 12, pointSize = 0.6)

## ---- fig.align='left', fig.height=4.5, fig.width=8, message=FALSE, results='hide'----
laml.mutload = tcgaCompare(maf = laml, cohortName = 'Example-LAML')

## ---- fig.align='left', fig.width=4, fig.height=4------------------------
vafPlot = plotVaf(maf = laml, vafCol = 'i_TumorVAF_WU', flip = FALSE)

## ---- fig.align='left', eval=T-------------------------------------------
geneCloud(input = laml, minMut = 3)

## ------------------------------------------------------------------------
all.lesions <- system.file("extdata", "all_lesions.conf_99.txt", package = "maftools")
amp.genes <- system.file("extdata", "amp_genes.conf_99.txt", package = "maftools")
del.genes <- system.file("extdata", "del_genes.conf_99.txt", package = "maftools")
scores.gis <- system.file("extdata", "scores.gistic", package = "maftools")

laml.gistic = readGistic(gisticAllLesionsFile = all.lesions, gisticAmpGenesFile = amp.genes, gisticDelGenesFile = del.genes, gisticScoresFile = scores.gis, isTCGA = TRUE)

#GISTIC object
laml.gistic

## ---- fig.width=6, fig.height=4.5, fig.align='left'----------------------
gcp = gisticChromPlot(gistic = laml.gistic, markBands = "all")

## ---- fig.width=6, fig.height=4.5, fig.align='left'----------------------
gbp = gisticBubblePlot(gistic = laml.gistic)

## ---- fig.align='left',fig.width=7, fig.height=5, eval=T-----------------
gisticOncoPlot(gistic = laml.gistic, clinicalData = getClinicalData(x = laml), clinicalFeatures = 'FAB_classification', sortByAnnotation = TRUE, top = 10)

## ---- message=FALSE------------------------------------------------------
#We will run mutExclusive on top 10 mutated genes. 
somaticInteractions(maf = laml, top = 25, pvalue = c(0.05, 0.1))

## ---- fig.height=2,fig.width=8,fig.align='center'------------------------
oncostrip(maf = laml, genes = c('TP53', 'FLT3', 'RUNX1'))

## ---- fig.align='default', fig.width=5, message=F,results='hide', eval=T----
laml.sig = oncodrive(maf = laml, AACol = 'Protein_Change', minMut = 5, pvalMethod = 'zscore')
head(laml.sig)

## ---- fig.align='left', fig.width=5, fig.height=4, eval= T---------------
plotOncodrive(res = laml.sig, fdrCutOff = 0.1, useFraction = TRUE)

## ---- fig.align='left',fig.width=5, fig.height=4-------------------------
laml.pfam = pfamDomains(maf = laml, AACol = 'Protein_Change', top = 10)
#Protein summary (Printing first 7 columns for display convenience)
laml.pfam$proteinSummary[,1:7, with = FALSE]
#Domain summary (Printing first 3 columns for display convenience)
laml.pfam$domainSummary[,1:3, with = FALSE]

## ---- fig.width=5, fig.height=4------------------------------------------
#MutsigCV results for TCGA-AML
laml.mutsig <- system.file("extdata", "LAML_sig_genes.txt.gz", package = "maftools")
laml.pancan = pancanComparison(mutsigResults = laml.mutsig, qval = 0.1, cohortName = 'LAML', inputSampleSize = 200, label = 1, normSampleSize = TRUE)

## ---- fig.width=5, fig.height=5------------------------------------------
#Survival analysis based on grouping of DNMT3A mutation status
laml.dnmt3a.surv = mafSurvival(maf = laml, genes = 'DNMT3A', time = 'days_to_last_followup', Status = 'Overall_Survival_Status', isTCGA = TRUE)

## ----results='hide', message=FALSE---------------------------------------
#Primary APL MAF
primary.apl = system.file("extdata", "APL_primary.maf.gz", package = "maftools")
primary.apl = read.maf(maf = primary.apl)
#Relapse APL MAF
relapse.apl = system.file("extdata", "APL_relapse.maf.gz", package = "maftools")
relapse.apl = read.maf(maf = relapse.apl)

## ---- fig.align='left'---------------------------------------------------
#We will consider only genes which are mutated in at-least in 5 samples in one of the cohort, to avoid bias due to single mutated genes.
pt.vs.rt <- mafCompare(m1 = primary.apl, m2 = relapse.apl, m1Name = 'PrimaryAPL', m2Name = 'RelapseAPL', minMut = 5)
print(pt.vs.rt)

## ---- fig.width=5, fig.height=4, fig.align='left'------------------------
apl.pt.vs.rt.fp = forestPlot(mafCompareRes = pt.vs.rt, pVal = 0.05, show = 'stat', color = c('royalblue', 'maroon'))

## ---- fig.height=3,fig.width=11, eval=T, fig.align='left'----------------
genes = c("PML", "RARA", "RUNX1", "ARID1B", "FLT3")
coOncoplot(m1 = primary.apl, m2 = relapse.apl, m1Name = 'PrimaryAPL', m2Name = 'RelapseAPL', genes = genes, removeNonMutated = TRUE)

## ---- echo = TRUE, fig.align='left', fig.width=5, eval=T-----------------
#We will run this for sample TCGA.AB.2972
tcga.ab.2972.het = inferHeterogeneity(maf = laml, tsb = 'TCGA-AB-2972', vafCol = 'i_TumorVAF_WU')
print(tcga.ab.2972.het$clusterMeans)
#Visualizing results
plotClusters(clusters = tcga.ab.2972.het)

## ---- fig.align='left',fig.width=5, eval=T-------------------------------
seg = system.file('extdata', 'TCGA.AB.3009.hg19.seg.txt', package = 'maftools')
tcga.ab.3009.het = inferHeterogeneity(maf = laml, tsb = 'TCGA-AB-3009', segFile = seg, vafCol = 'i_TumorVAF_WU')
#Visualizing results. Highlighting those variants on copynumber altered variants.
plotClusters(clusters = tcga.ab.3009.het, genes = 'CN_altered', showCNvars = TRUE)

## ---- eval=FALSE---------------------------------------------------------
#  #First we extract adjacent bases to the mutated locus and clssify them into 96 substitution classes. This also estimates APOBEC enrichment per sample.
#  laml.tnm = trinucleotideMatrix(maf = laml, ref_genome = '/path/to/hg19.fa',
#                                 prefix = 'chr', add = TRUE, ignoreChr = 'chr23', useSyn = TRUE)
#  # reading /path/to/hg19.fa (this might take few minutes)..
#  # Extracting 5' and 3' adjacent bases..
#  # Extracting +/- 20bp around mutated bases for background estimation..
#  # Estimating APOBEC enrichment scores..
#  # Performing one-way Fisher's test for APOBEC enrichment..
#  # APOBEC related mutations are enriched in 2.674% of samples (APOBEC enrichment score > 2 ; 5 of 187 samples)
#  # Creating mutation matrix..
#  # matrix of dimension 193x96

## ---- echo=FALSE---------------------------------------------------------
APOBEC_scores = structure(list(Tumor_Sample_Barcode = structure(c(136L, 10L, 
132L, 90L, 192L, 101L, 189L, 39L, 188L, 140L, 72L, 94L, 110L, 
180L, 70L, 176L, 61L, 69L, 129L, 80L, 96L, 112L, 1L, 64L, 85L, 
128L, 117L, 92L, 179L, 160L, 134L, 11L, 53L, 139L, 111L, 18L, 
43L, 177L, 9L, 81L, 106L, 35L, 17L, 120L, 159L, 147L, 152L, 143L, 
76L, 95L, 59L, 19L, 148L, 99L, 97L, 103L, 24L, 146L, 55L, 118L, 
71L, 6L, 2L, 3L, 4L, 5L, 7L, 8L, 12L, 13L, 14L, 15L, 16L, 21L, 
22L, 23L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 37L, 
38L, 40L, 41L, 42L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 54L, 
56L, 57L, 58L, 60L, 62L, 63L, 65L, 66L, 67L, 68L, 73L, 74L, 75L, 
78L, 79L, 82L, 83L, 84L, 86L, 87L, 88L, 89L, 91L, 93L, 98L, 102L, 
104L, 105L, 107L, 108L, 109L, 113L, 114L, 115L, 116L, 119L, 121L, 
122L, 123L, 124L, 125L, 126L, 127L, 130L, 131L, 135L, 137L, 138L, 
141L, 142L, 144L, 145L, 149L, 150L, 151L, 153L, 154L, 155L, 156L, 
157L, 158L, 161L, 162L, 163L, 164L, 165L, 166L, 167L, 168L, 169L, 
170L, 171L, 172L, 173L, 174L, 175L, 178L, 181L, 182L, 183L, 184L, 
185L, 186L, 187L, 190L, 191L, 193L), .Label = c("TCGA-AB-2802", 
"TCGA-AB-2803", "TCGA-AB-2804", "TCGA-AB-2805", "TCGA-AB-2806", 
"TCGA-AB-2807", "TCGA-AB-2808", "TCGA-AB-2809", "TCGA-AB-2810", 
"TCGA-AB-2812", "TCGA-AB-2813", "TCGA-AB-2814", "TCGA-AB-2816", 
"TCGA-AB-2817", "TCGA-AB-2818", "TCGA-AB-2819", "TCGA-AB-2820", 
"TCGA-AB-2821", "TCGA-AB-2822", "TCGA-AB-2823", "TCGA-AB-2824", 
"TCGA-AB-2825", "TCGA-AB-2826", "TCGA-AB-2827", "TCGA-AB-2828", 
"TCGA-AB-2829", "TCGA-AB-2830", "TCGA-AB-2831", "TCGA-AB-2832", 
"TCGA-AB-2833", "TCGA-AB-2834", "TCGA-AB-2835", "TCGA-AB-2836", 
"TCGA-AB-2838", "TCGA-AB-2839", "TCGA-AB-2840", "TCGA-AB-2841", 
"TCGA-AB-2842", "TCGA-AB-2843", "TCGA-AB-2844", "TCGA-AB-2845", 
"TCGA-AB-2846", "TCGA-AB-2847", "TCGA-AB-2848", "TCGA-AB-2849", 
"TCGA-AB-2850", "TCGA-AB-2851", "TCGA-AB-2853", "TCGA-AB-2854", 
"TCGA-AB-2855", "TCGA-AB-2857", "TCGA-AB-2858", "TCGA-AB-2859", 
"TCGA-AB-2860", "TCGA-AB-2861", "TCGA-AB-2862", "TCGA-AB-2863", 
"TCGA-AB-2864", "TCGA-AB-2865", "TCGA-AB-2866", "TCGA-AB-2867", 
"TCGA-AB-2868", "TCGA-AB-2869", "TCGA-AB-2870", "TCGA-AB-2871", 
"TCGA-AB-2872", "TCGA-AB-2873", "TCGA-AB-2874", "TCGA-AB-2875", 
"TCGA-AB-2876", "TCGA-AB-2877", "TCGA-AB-2878", "TCGA-AB-2879", 
"TCGA-AB-2880", "TCGA-AB-2881", "TCGA-AB-2882", "TCGA-AB-2883", 
"TCGA-AB-2884", "TCGA-AB-2885", "TCGA-AB-2886", "TCGA-AB-2887", 
"TCGA-AB-2888", "TCGA-AB-2889", "TCGA-AB-2890", "TCGA-AB-2891", 
"TCGA-AB-2892", "TCGA-AB-2894", "TCGA-AB-2895", "TCGA-AB-2896", 
"TCGA-AB-2897", "TCGA-AB-2898", "TCGA-AB-2899", "TCGA-AB-2900", 
"TCGA-AB-2901", "TCGA-AB-2904", "TCGA-AB-2905", "TCGA-AB-2906", 
"TCGA-AB-2907", "TCGA-AB-2908", "TCGA-AB-2909", "TCGA-AB-2910", 
"TCGA-AB-2911", "TCGA-AB-2912", "TCGA-AB-2913", "TCGA-AB-2914", 
"TCGA-AB-2915", "TCGA-AB-2916", "TCGA-AB-2917", "TCGA-AB-2918", 
"TCGA-AB-2919", "TCGA-AB-2920", "TCGA-AB-2921", "TCGA-AB-2922", 
"TCGA-AB-2923", "TCGA-AB-2924", "TCGA-AB-2925", "TCGA-AB-2926", 
"TCGA-AB-2927", "TCGA-AB-2928", "TCGA-AB-2929", "TCGA-AB-2930", 
"TCGA-AB-2931", "TCGA-AB-2932", "TCGA-AB-2933", "TCGA-AB-2934", 
"TCGA-AB-2935", "TCGA-AB-2936", "TCGA-AB-2937", "TCGA-AB-2938", 
"TCGA-AB-2939", "TCGA-AB-2940", "TCGA-AB-2941", "TCGA-AB-2942", 
"TCGA-AB-2943", "TCGA-AB-2945", "TCGA-AB-2946", "TCGA-AB-2947", 
"TCGA-AB-2948", "TCGA-AB-2949", "TCGA-AB-2950", "TCGA-AB-2952", 
"TCGA-AB-2954", "TCGA-AB-2955", "TCGA-AB-2956", "TCGA-AB-2957", 
"TCGA-AB-2959", "TCGA-AB-2963", "TCGA-AB-2964", "TCGA-AB-2965", 
"TCGA-AB-2966", "TCGA-AB-2967", "TCGA-AB-2968", "TCGA-AB-2970", 
"TCGA-AB-2971", "TCGA-AB-2972", "TCGA-AB-2973", "TCGA-AB-2974", 
"TCGA-AB-2975", "TCGA-AB-2976", "TCGA-AB-2977", "TCGA-AB-2978", 
"TCGA-AB-2979", "TCGA-AB-2980", "TCGA-AB-2981", "TCGA-AB-2982", 
"TCGA-AB-2983", "TCGA-AB-2984", "TCGA-AB-2985", "TCGA-AB-2986", 
"TCGA-AB-2987", "TCGA-AB-2988", "TCGA-AB-2989", "TCGA-AB-2990", 
"TCGA-AB-2991", "TCGA-AB-2992", "TCGA-AB-2993", "TCGA-AB-2994", 
"TCGA-AB-2995", "TCGA-AB-2996", "TCGA-AB-2997", "TCGA-AB-2998", 
"TCGA-AB-2999", "TCGA-AB-3000", "TCGA-AB-3001", "TCGA-AB-3002", 
"TCGA-AB-3005", "TCGA-AB-3006", "TCGA-AB-3007", "TCGA-AB-3008", 
"TCGA-AB-3009", "TCGA-AB-3011", "TCGA-AB-3012", "TCGA-AB-2903"
), class = "factor"), n_mutations = c(3, 8, 4, 6, 6, 10, 4, 11, 
7, 11, 12, 8, 10, 11, 10, 9, 10, 9, 18, 11, 20, 8, 10, 10, 15, 
9, 12, 13, 17, 9, 15, 14, 10, 13, 9, 14, 10, 11, 13, 14, 19, 
18, 14, 13, 16, 15, 16, 15, 16, 22, 17, 19, 14, 18, 14, 20, 14, 
25, 19, 23, 17, 26, 14, 5, 14, 13, 9, 4, 8, 7, 10, 10, 14, 3, 
1, 4, 12, 10, 9, 2, 10, 8, 1, 2, 2, 20, 4, 2, 11, 3, 11, 24, 
3, 6, 8, 11, 2, 14, 12, 10, 10, 17, 15, 1, 13, 10, 12, 10, 1, 
15, 2, 1, 9, 4, 11, 6, 4, 7, 1, 5, 15, 1, 17, 17, 16, 2, 13, 
16, 11, 14, 1, 12, 20, 9, 14, 9, 5, 10, 5, 1, 8, 9, 8, 14, 2, 
11, 2, 1, 13, 1, 4, 2, 7, 15, 7, 6, 9, 21, 4, 7, 2, 14, 6, 5, 
4, 2, 14, 10, 5, 9, 6, 13, 9, 8, 8, 7, 6, 8, 11, 3, 10, 26, 17, 
17, 35, 5, 1), APOBEC_Enriched = c("yes", "yes", "yes", "no", 
"no", "yes", "no", "no", "no", "no", "no", "no", "no", "no", 
"no", "no", "no", "no", "yes", "no", "no", "no", "no", "no", 
"no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", 
"no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", 
"no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", 
"no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", 
"no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", 
"no", "no", "no", NA, "no", "no", "no", "no", "no", "no", "no", 
"no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", 
"no", "no", NA, "no", "no", "no", "no", "no", "no", NA, NA, "no", 
"no", "no", "no", "no", "no", NA, "no", "no", "no", "no", "no", 
"no", "no", "no", "no", "no", "no", NA, "no", "no", "no", "no", 
"no", "no", "no", "no", NA, "no", "no", "no", "no", "no", "no", 
"no", NA, "no", NA, "no", "no", "no", "no", "no", "no", "no", 
"no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", 
NA, "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", 
"no", "no", "no", "no", "no", "no", "no", "no"), fraction_APOBEC_mutations = c(0.333, 
0.125, 0.25, 0.167, 0.167, 0.2, 0.25, 0.182, 0.143, 0.182, 0.167, 
0.125, 0.2, 0.091, 0.1, 0.111, 0.1, 0.111, 0.111, 0.091, 0.2, 
0.125, 0.1, 0.1, 0.067, 0.111, 0.083, 0.077, 0.118, 0.111, 0.067, 
0.071, 0.1, 0.077, 0.111, 0.071, 0.1, 0.091, 0.077, 0.071, 0.105, 
0.056, 0.071, 0.077, 0.125, 0.067, 0.062, 0.067, 0.062, 0.091, 
0.118, 0.105, 0.071, 0.056, 0.071, 0.1, 0.071, 0.04, 0.053, 0.043, 
0.059, 0.038, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0)), .Names = c("Tumor_Sample_Barcode", "n_mutations", 
"APOBEC_Enriched", "fraction_APOBEC_mutations"), row.names = c(NA, 
-187L), class = c("data.table", "data.frame"))

laml.tnm = list(nmf_matrix = NA, APOBEC_scores = APOBEC_scores)
    

## ---- eval=TRUE, fig.width=6, fig.height=5, fig.align='left'-------------
plotApobecDiff(tnm = laml.tnm, maf = laml)

## ---- fig.height=5, fig.width=5, eval=F, message=FALSE-------------------
#  #Run main function with maximum 6 signatures.
#  require('NMF')
#  laml.sign = extractSignatures(mat = laml.tnm, nTry = 6, plotBestFitRes = FALSE)
#  # Warning : Found zero mutations for conversions A[T>G]C
#  #Comparing against experimentally validated 30 signatures.. (See http://cancer.sanger.ac.uk/cosmic/signatures for details.)
#  #Found Signature_1 most similar to validated Signature_1. Aetiology: spontaneous deamination of 5-methylcytosine [cosine-similarity: 0.767]
#  #Found Signature_2 most similar to validated Signature_1. Aetiology: spontaneous deamination of 5-methylcytosine [cosine-similarity: 0.757]

## ---- echo=F-------------------------------------------------------------
laml.sign = structure(list(signatures = structure(c(1.08748973712201e-18, 
0.00839574301030403, 1.08748973712201e-18, 1.08748973712201e-18, 
0.00240385240062567, 0.0077139799108362, 1.08748973712201e-18, 
1.08748973712201e-18, 0.0162307404236774, 1.08748973712201e-18, 
1.08748973712201e-18, 0.00375682293486133, 0.0149882188409168, 
0.0166193873319139, 1.08748973712201e-18, 0.0340641337293563, 
0.0177133495392653, 1.08748973712201e-18, 1.08748973712201e-18, 
0.010900522793394, 1.08748973712201e-18, 1.08748973712201e-18, 
0.0204384802376138, 1.08748973712201e-18, 0.016350784190091, 
0.00817539209504552, 0.00817539209504552, 0.00510268819463668, 
4.64672034428952e-06, 1.08748973712201e-18, 0.00226222985404151, 
0.0136256534917425, 0.0305962431704599, 1.08748973712201e-18, 
0.0644710774331736, 0.00640610796992356, 0.0940170090930234, 
0.0580805923991668, 0.0849820094561238, 1.08748973712201e-18, 
0.0397128561572591, 0.0074963059243485, 0.0626642796173211, 0.0351236523951451, 
0.0201728180626018, 0.014196017550947, 0.0643882909888946, 1.08748973712201e-18, 
1.08748973712201e-18, 0.00455330562748865, 0.00408769604752276, 
1.08748973712201e-18, 0.00272513069834851, 0.00650167581744194, 
0.016350784190091, 1.08748973712201e-18, 1.08748973712201e-18, 
0.0122630881425683, 1.08748973712201e-18, 0.00334016875229922, 
0.00136256534917425, 1.08748973712201e-18, 0.00545026139669701, 
0.00545026139669701, 4.42324620317284e-15, 1.08748973712201e-18, 
0.00737413388604955, 0.0269789492628818, 0.00988463958213221, 
0.0100520699314415, 1.08748973712201e-18, 0.00316631020385699, 
1.08748973712201e-18, 0.010900522793394, 1.08748973712201e-18, 
0.010900522793394, 0.00272513069834851, 1.08748973712201e-18, 
0.00394072268462079, 0.00852966039575841, 7.59566702198884e-17, 
0.0115340943434354, 1.08748973712201e-18, 0.00237636221356833, 
1.08748973712201e-18, 1.08748973712201e-18, 0.0136256534917425, 
0.00181567829546557, 0.00408769604752276, 1.08748973712201e-18, 
1.08748973712201e-18, 0.00545026139669701, 0.00408769604752276, 
0.00499540259887787, 0.00272513069834851, 0.00353514720450814, 
0.0134931114940269, 0.0108267734159331, 0.00758987521539011, 
0.00674655574701343, 0.0094753598393619, 0.0070321331529894, 
0.0109631530888968, 0.0109631530888968, 0.00344757540744538, 
0.00674655574701343, 0.00505991681026007, 0.0128545761394095, 
9.04499029878464e-19, 0.00742363125585919, 0.0101198336205201, 
9.04499029878464e-19, 9.04499029878464e-19, 0.00505991681026007, 
0.00505991681026007, 9.04499029878464e-19, 0.0118064725572735, 
0.00421659734188339, 9.04499029878464e-19, 0.00758987521539011, 
9.04499029878464e-19, 9.04499029878464e-19, 9.04499029878464e-19, 
0.00190175907624695, 0.00421372139194615, 0.00590323627863675, 
0.00703305451506787, 9.04499029878464e-19, 0.0131095012433463, 
0.0480692096974707, 0.0857521368222207, 0.0171181159082965, 9.04499029878464e-19, 
0.0526012816612792, 0.103417003771369, 0.108788211420591, 0.00746704363785783, 
0.0358397179449789, 0.051450983147391, 0.0119940343267555, 0.020404090976265, 
0.0173566988936855, 0.0225544149118068, 0.0312028203299371, 0.00337327787350671, 
0.00645838048724634, 9.04499029878464e-19, 0.00252995840513004, 
9.04499029878464e-19, 0.00187921658868539, 9.04499029878464e-19, 
0.00590323627863675, 0.00337327787350671, 9.04499029878464e-19, 
0.00421659734188339, 0.002149298816947, 9.04499029878464e-19, 
0.00168663893675336, 9.04499029878464e-19, 9.04499029878464e-19, 
0.0227696256461676, 0.0118064725572735, 0.0123023875952219, 0.00691512349188281, 
0.00147207029502386, 0.00642836104928948, 0.0160230698991569, 
0.0132200565053105, 0.00758987521539011, 9.04499029878464e-19, 
0.0109631530888968, 9.04499029878464e-19, 9.04499029878464e-19, 
0.00337327787350671, 0.00430756215200655, 0.0115872086847547, 
0.00337327787350667, 0.0086313879649405, 0.00168663893675336, 
0.00105917939270956, 0.000843319468376679, 0.00758987521539011, 
9.04499029878464e-19, 0.00309283703504524, 9.04499029878464e-19, 
0.00252995840513004, 0.00421659734188339, 9.04499029878464e-19, 
9.04499029878464e-19, 0.00112484084993519, 9.04499029878464e-19, 
0.00287194214691947), .Dim = c(96L, 2L), .Dimnames = list(c("A[C>A]A", 
"A[C>A]C", "A[C>A]G", "A[C>A]T", "C[C>A]A", "C[C>A]C", "C[C>A]G", 
"C[C>A]T", "G[C>A]A", "G[C>A]C", "G[C>A]G", "G[C>A]T", "T[C>A]A", 
"T[C>A]C", "T[C>A]G", "T[C>A]T", "A[C>G]A", "A[C>G]C", "A[C>G]G", 
"A[C>G]T", "C[C>G]A", "C[C>G]C", "C[C>G]G", "C[C>G]T", "G[C>G]A", 
"G[C>G]C", "G[C>G]G", "G[C>G]T", "T[C>G]A", "T[C>G]C", "T[C>G]G", 
"T[C>G]T", "A[C>T]A", "A[C>T]C", "A[C>T]G", "A[C>T]T", "C[C>T]A", 
"C[C>T]C", "C[C>T]G", "C[C>T]T", "G[C>T]A", "G[C>T]C", "G[C>T]G", 
"G[C>T]T", "T[C>T]A", "T[C>T]C", "T[C>T]G", "T[C>T]T", "A[T>A]A", 
"A[T>A]C", "A[T>A]G", "A[T>A]T", "C[T>A]A", "C[T>A]C", "C[T>A]G", 
"C[T>A]T", "G[T>A]A", "G[T>A]C", "G[T>A]G", "G[T>A]T", "T[T>A]A", 
"T[T>A]C", "T[T>A]G", "T[T>A]T", "A[T>C]A", "A[T>C]C", "A[T>C]G", 
"A[T>C]T", "C[T>C]A", "C[T>C]C", "C[T>C]G", "C[T>C]T", "G[T>C]A", 
"G[T>C]C", "G[T>C]G", "G[T>C]T", "T[T>C]A", "T[T>C]C", "T[T>C]G", 
"T[T>C]T", "A[T>G]A", "A[T>G]C", "A[T>G]G", "A[T>G]T", "C[T>G]A", 
"C[T>G]C", "C[T>G]G", "C[T>G]T", "G[T>G]A", "G[T>G]C", "G[T>G]G", 
"G[T>G]T", "T[T>G]A", "T[T>G]C", "T[T>G]G", "T[T>G]T"), c("Signature_1", 
"Signature_2"))), coSineSimMat = structure(c(0.76789895507522, 
0.757733629596811, 0.171803248681684, 0.199391522195904, 0.407671912943102, 
0.372979035914154, 0.344078922420868, 0.319857408370786, 0.573357292983596, 
0.562412460243176, 0.685700701802704, 0.686217358302521, 0.377725890462418, 
0.386689478887272, 0.382312659188403, 0.407516946456442, 0.339149804914427, 
0.305305965796845, 0.386629499233586, 0.15685755480318, 0.350678506033931, 
0.562433289508901, 0.268840367435164, 0.322933955777266, 0.108666524311962, 
0.0628339785033974, 0.49126593617209, 0.527932757462746, 0.47172512923794, 
0.461711639647726, 0.362590079921887, 0.387794528034913, 0.154909499589746, 
0.154613800740969, 0.303423806321064, 0.204479833568232, 0.570031076792535, 
0.740784602225925, 0.445644443725404, 0.510768207280784, 0.248807908838572, 
0.28784224944225, 0.140154287925718, 0.0725523826114571, 0.407829024906403, 
0.610157444568381, 0.256945337078229, 0.227615984891259, 0.43572741633734, 
0.391864627867027, 0.296855754287958, 0.345602091793204, 0.105681572106723, 
0.0918011629446175, 0.0955192240249301, 0.0892005087879189, 0.35734783741945, 
0.351111836488432, 0.570462074592721, 0.532907409369077), .Dim = c(2L, 
30L), .Dimnames = list(c("Signature_1", "Signature_2"), c("Signature_1", 
"Signature_2", "Signature_3", "Signature_4", "Signature_5", "Signature_6", 
"Signature_7", "Signature_8", "Signature_9", "Signature_10", 
"Signature_11", "Signature_12", "Signature_13", "Signature_14", 
"Signature_15", "Signature_16", "Signature_17", "Signature_18", 
"Signature_19", "Signature_20", "Signature_21", "Signature_22", 
"Signature_23", "Signature_24", "Signature_25", "Signature_26", 
"Signature_27", "Signature_28", "Signature_29", "Signature_30"
))), nmfObj = NULL, contributions = structure(c(0.50337879623466, 
0.49662120376534, 0.735184687577203, 0.264815312422796, 0.926454506999779, 
0.0735454930002212, 6.84562518316801e-15, 0.999999999999993, 
0.63514205992338, 0.36485794007662, 0.405196139226508, 0.594803860773492, 
1.0648750284928e-14, 0.999999999999989, 2.3959688141088e-14, 
0.999999999999976, 0.388932338884403, 0.611067661115597, 0.789353253877505, 
0.210646746122495, 0.70558528036746, 0.294414719632539, 0.425275906358389, 
0.574724093641611, 1.36912503663359e-14, 0.999999999999986, 0.335618602334815, 
0.664381397665185, 0.854637052669362, 0.145362947330638, 0.999999999999993, 
7.12839115795779e-15, 0.451109064541674, 0.548890935458326, 0.999999999999993, 
7.12839115795777e-15, 0.231557676487435, 0.768442323512564, 3.19462508547833e-14, 
0.999999999999968, 0.9999999999999, 9.97974762114e-14, 2.39596881410876e-14, 
0.999999999999976, 0.707339986758224, 0.292660013241776, 0.623888274173047, 
0.376111725826953, 0.761349287087994, 0.238650712912006, 0.185110462876877, 
0.814889537123123, 0.99999999999995, 4.98987381057022e-14, 0.99999999999999, 
9.97974762114085e-15, 1.1979844070544e-14, 0.999999999999988, 
0.9999999999999, 9.97974762113995e-14, 0.923471904280283, 0.076528095719717, 
4.79193762821742e-14, 0.999999999999952, 0.417499523255935, 0.582500476744065, 
0.427651278437266, 0.572348721562734, 0.999999999999975, 2.49493690528517e-14, 
4.79193762821741e-14, 0.999999999999952, 8.71261386948656e-15, 
0.999999999999991, 0.901731575615889, 0.098268424384111, 0.526348275461158, 
0.473651724538842, 0.383123830992873, 0.616876169007127, 0.676458226167224, 
0.323541773832776, 0.62617579781792, 0.37382420218208, 3.19462508547836e-14, 
0.999999999999968, 0.571318817161484, 0.428681182838516, 0.539484231563079, 
0.460515768436921, 0.999999999999991, 9.07249783740083e-15, 0.663750621983501, 
0.336249378016499, 0.340128808961781, 0.659871191038219, 0.999999999999992, 
8.31645635095072e-15, 0.99999999999999, 9.97974762114085e-15, 
9.5838752564352e-15, 0.99999999999999, 0.123828233985777, 0.876171766014223, 
0.99999999999999, 9.97974762114086e-15, 0.999999999999994, 5.8704397771417e-15, 
0.795422962132652, 0.204577037867348, 0.502029549198604, 0.497970450801396, 
9.58387525643452e-14, 0.999999999999904, 0.147092909392627, 0.852907090607373, 
7.3722117357194e-15, 0.999999999999993, 0.99999999999999, 9.97974762114086e-15, 
0.508037326891693, 0.491962673108307, 0.535750610709636, 0.464249389290364, 
0.99999999999999, 9.97974762114087e-15, 0.9999999999999, 9.97974762114e-14, 
0.999999999999993, 6.65316508076059e-15, 0.422991976717691, 0.577008023282309, 
0.161518855160448, 0.838481144839552, 0.95649107631202, 0.0435089236879803, 
0.252788998249348, 0.747211001750652, 0.99999999999995, 4.98987381057023e-14, 
0.9999999999999, 9.97974762113995e-14, 0.464185141122002, 0.535814858877999, 
0.505653558249352, 0.494346441750648, 0.444394111481323, 0.555605888518677, 
0.357951030231189, 0.642048969768811, 8.71261386948657e-15, 0.999999999999991, 
0.470221885688493, 0.529778114311507, 0.557673480791699, 0.442326519208301, 
1.84111243164613e-05, 0.999981588875683, 0.999999999999986, 1.42567823159154e-14, 
6.38925017095682e-15, 0.999999999999994, 0.999999712408211, 2.87591789023604e-07, 
0.99999999999998, 1.99594952422816e-14, 0.29671503347543, 0.70328496652457, 
0.9999999999999, 9.97974762114e-14, 1.59731254273919e-14, 0.999999999999984, 
0.637601867611109, 0.362398132388891, 0.609548816453349, 0.390451183546651, 
5.63757368025602e-15, 0.999999999999994, 0.99999997290847, 2.70915302413065e-08, 
0.501204783088238, 0.498795216911762, 0.461243728880635, 0.538756271119365, 
0.999999999999993, 7.12839115795777e-15, 0.615608327489882, 0.384391672510118, 
0.999999999999994, 5.54430423396717e-15, 0.569928863637076, 0.430071136362924, 
4.79193762821743e-14, 0.999999999999952, 0.100851461315826, 0.899148538684174, 
0.999999999999992, 7.67672893933913e-15, 0.141416581853325, 0.858583418146675, 
0.704078506817208, 0.295921493182792, 0.256820491175602, 0.743179508824398, 
6.84562518316802e-15, 0.999999999999993, 9.58387525643438e-14, 
0.999999999999904, 0.308965325142456, 0.691034674857544, 0.54420855115639, 
0.45579144884361, 1.1979844070544e-14, 0.999999999999988, 0.390887402815504, 
0.609112597184496, 0.403491456621558, 0.596508543378441, 0.498611242993035, 
0.501388757006965, 0.999999999999993, 7.12839115795777e-15, 7.98656271369601e-15, 
0.999999999999992, 0.35276002632962, 0.64723997367038, 0.251306887425971, 
0.748693112574029, 0.572879892841475, 0.427120107158525, 1.91677505128702e-14, 
0.999999999999981, 9.5838752564352e-15, 0.99999999999999, 0.607255141365351, 
0.392744858634649, 9.58387525643437e-14, 0.999999999999904, 0.999999999999988, 
1.2474684526426e-14, 0.999999999999989, 1.10886084679343e-14, 
1.1979844070544e-14, 0.999999999999988, 0.744287626983694, 0.255712373016306, 
0.390812458637443, 0.609187541362557, 0.999999999999993, 7.12839115795777e-15, 
0.99999999999995, 4.98987381057022e-14, 0.311861999705299, 0.688138000294701, 
6.38925017095682e-15, 0.999999999999994, 0.67213229360714, 0.327867706392859, 
3.19462508547833e-14, 0.999999999999968, 4.79193762821742e-14, 
0.999999999999952, 9.58387525643452e-14, 0.999999999999904, 0.28017165708634, 
0.71982834291366, 8.71261386948655e-15, 0.999999999999991, 0.395003268601784, 
0.604996731398216, 9.58387525643437e-14, 0.999999999999904, 6.38925017095682e-15, 
0.999999999999994, 2.39596881410877e-14, 0.999999999999976, 4.79193762821741e-14, 
0.999999999999952, 0.214731893915975, 0.785268106084026, 0.365495605625826, 
0.634504394374174, 0.434278914613828, 0.565721085386172, 0.999999999999986, 
1.42567823159154e-14, 6.38925017095682e-15, 0.999999999999994, 
0.999999999999986, 1.42567823159155e-14, 5.98992203527202e-15, 
0.999999999999994, 0.61728491590254, 0.38271508409746, 0.622547952361923, 
0.377452047638077, 4.56375012211202e-15, 0.999999999999995, 2.39596881410877e-14, 
0.999999999999976, 0.651162583867872, 0.348837416132128, 4.79193762821742e-14, 
0.999999999999952, 0.548809595398619, 0.451190404601381, 0.350410016472764, 
0.649589983527236, 0.999999999999993, 7.12839115795778e-15, 0.999999999999983, 
1.66329127019013e-14, 0.734514125942381, 0.265485874057619, 2.39596881410876e-14, 
0.999999999999976, 0.99999999999995, 4.98987381057022e-14, 0.142044376495145, 
0.857955623504855, 0.364173961579135, 0.635826038420865, 1.91677505128702e-14, 
0.999999999999981, 0.999999999999989, 1.10886084679343e-14, 0.626999624911337, 
0.373000375088663, 0.999999999999992, 7.67672893933913e-15, 0.999999999999989, 
1.10886084679343e-14, 0.746961249019172, 0.253038750980829, 0.59777540188719, 
0.40222459811281, 1.36912503663359e-14, 0.999999999999986, 0.999999999999989, 
1.10886084679343e-14, 8.71261386948656e-15, 0.999999999999991, 
0.999999999999983, 1.66329127019013e-14, 5.63757368025602e-15, 
0.999999999999994, 8.71261386948655e-15, 0.999999999999991, 0.999999999999988, 
1.2474684526426e-14, 0.999999999999991, 9.07249783740084e-15, 
0.999999999999967, 3.32658254038021e-14, 0.99999999999999, 9.97974762114085e-15, 
0.569329445983671, 0.43067055401633, 0.972356604437691, 0.027643395562309, 
0.999999999999994, 5.8704397771417e-15, 1.36912503663359e-14, 
0.999999999999986, 0.311861999705299, 0.688138000294701, 0.294158858972731, 
0.705841141027269, 0.99999999999998, 1.99594952422817e-14, 0.367632671587342, 
0.632367328412658, 9.58387525643437e-14, 0.999999999999904), .Dim = c(2L, 
187L), .Dimnames = list(c("Signature_1", "Signature_2"), NULL))), .Names = c("signatures", 
"coSineSimMat", "nmfObj", "contributions"))

## ---- fig.width=7, fig.align='left', eval = T----------------------------
plotSignatures(laml.sign)

## ---- fig.width=7, fig.height=2.5, fig.align='left'----------------------
require('pheatmap')
pheatmap::pheatmap(mat = laml.sign$coSineSimMat, cluster_rows = FALSE, main = "cosine similarity against validated signatures")

## ---- fig.width=7, fig.height=4, fig.align='left'------------------------
plotSignatures(nmfRes = laml.sign, contributions = TRUE)

## ---- fig.width=7, fig.height=4, fig.align='center'----------------------
nsig = ncol(laml.sign$signatures) #number of signatures identified
contrib = t(laml.sign$contributions) #contribution of signatures in every sample

require('ggfortify')

contrib.gg = autoplot(kmeans(contrib, nsig), data = contrib, frame = TRUE, frame.type = 'norm', loadings = TRUE, loadings.label = TRUE, loadings.label.size = 4, loadings.colour = 'blue')+cowplot::theme_cowplot(line_size = 1)+cowplot::background_grid(major = 'xy')

print(contrib.gg)

## ------------------------------------------------------------------------
var.file = system.file('extdata', 'variants.tsv', package = 'maftools')
#This is what input looks like
var = read.delim(var.file, sep = '\t')
head(var)

## ---- results='hide', eval=F, message=F----------------------------------
#  #Annotate
#  var.maf = oncotate(maflite = var.file, header = TRUE)

## ---- eval = F-----------------------------------------------------------
#  #Results from oncotate. First 20 columns.
#  var.maf[1:10, 1:20, with = FALSE]

## ---- eval=T-------------------------------------------------------------
var.annovar = system.file("extdata", "variants.hg19_multianno.txt", package = "maftools")
var.annovar.maf = annovarToMaf(annovar = var.annovar, Center = 'CSI-NUS', refBuild = 'hg19', 
                               tsbCol = 'Tumor_Sample_Barcode', table = 'ensGene')


## ------------------------------------------------------------------------
#Read sample ICGC data for ESCA
esca.icgc <- system.file("extdata", "simple_somatic_mutation.open.ESCA-CN.sample.tsv.gz", package = "maftools")
esca.maf <- icgcSimpleMutationToMAF(icgc = esca.icgc, addHugoSymbol = TRUE)
#Printing first 16 columns for display convenience.
print(esca.maf[1:5,1:16, with = FALSE])

## ---- eval=FALSE---------------------------------------------------------
#  laml.mutsig.corrected = prepareMutSig(maf = laml)
#  # Converting gene names for 1 variants from 1 genes
#  #    Hugo_Symbol MutSig_Synonym N
#  # 1:    ARHGAP35          GRLF1 1
#  # Original symbols are preserved under column OG_Hugo_Symbol.

## ------------------------------------------------------------------------
##Extract data for samples 'TCGA.AB.3009' and 'TCGA.AB.2933'  (Printing just 5 rows for display convenience)
subsetMaf(maf = laml, tsb = c('TCGA-AB-3009', 'TCGA-AB-2933'))[1:5]
##Same as above but return output as an MAF object
subsetMaf(maf = laml, tsb = c('TCGA-AB-3009', 'TCGA-AB-2933'), mafObj = TRUE)

## ------------------------------------------------------------------------
##Select all Splice_Site mutations from DNMT3A and NPM1
subsetMaf(maf = laml, genes = c('DNMT3A', 'NPM1'), query = "Variant_Classification == 'Splice_Site'")

##Same as above but include only 'i_transcript_name' column in the output
subsetMaf(maf = laml, genes = c('DNMT3A', 'NPM1'), query = "Variant_Classification == 'Splice_Site'", fields = 'i_transcript_name')

## ---- eval=FALSE---------------------------------------------------------
#  devtools::install_github(repo = "PoisonAlien/TCGAmutations")

## ------------------------------------------------------------------------
sessionInfo()