## ----style, echo = FALSE, results = 'asis', warning=FALSE, message=FALSE------ BiocStyle::markdown() suppressPackageStartupMessages({ library(knitr) library(GenomicRanges) library(CNVMetrics) }) set.seed(121444) ## ----graphCNVpro, echo = FALSE, fig.align="center", fig.cap="Copy number profiles of two mouse metastatic pancreatic organoids (M10 and M30).", out.width = '95%', results='asis'---- knitr::include_graphics("CNV_mM30_mM10_v03_Feb_08_2021_small.jpeg") ## ----installDemo01, eval=FALSE------------------------------------------------ # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # # The following initializes usage of Bioc devel # BiocManager::install(version='devel') # # BiocManager::install("CNVMetrics") ## ----figureCNVFile, echo = FALSE, fig.align="center", fig.cap="Example of a copy number file containing CNV calls.", out.width = '100%'---- knitr::include_graphics("Input_CNV_call_300ppi_v02_low_quality.jpg") ## ----demoImport01------------------------------------------------------------- ## Load required libraries library(GenomicRanges) library(CNVMetrics) ## Load file containing CNV calls for 10 mouse organoids data.dir <- system.file("extdata", package="CNVMetrics") cnv.file <- file.path(data.dir, "mousePairedOrganoids.txt") calls <- read.table(cnv.file, header=TRUE, sep="\t") ## The CNV status calls for all samples are present in one file ## The 'state' column is required ## The chromosome Y has been removed head(calls) ## The ID column identifies the 10 samples unique(calls[,"ID"]) ## The ID column is used to split the samples into different GRanges ## inside a GRangesList ## The 'keep.extra.column=TRUE' parameter is needed to retained the extra ## column 'state' that is needed for the calculation of the metrics grl <- GenomicRanges::makeGRangesListFromDataFrame(calls, split.field="ID", keep.extra.columns=TRUE) grl ## ----demoCalculateMetric01---------------------------------------------------- ## In this case, the default states (AMPLIFICATION, DELETION) are used. ## So, the 'states' parameter doesn't have to be specified ## The 'states' parameter needs to be adjusted for user-specific states ## Ex: states=c("LOH", "gain") metric <- calculateOverlapMetric(segmentData=grl, method="sorensen", nJobs=1) metric ## ----demoPlot01, fig.align="center", fig.height=4.5, fig.width=4.5------------ ## Create graph for the metrics related to amplified regions plotMetric(metric, type="AMPLIFICATION") ## ----demoPlot02, fig.align="center", fig.height=4.8, fig.width=4.8------------ ## Create graph for the metrics related to deleted regions ## Metric values are printed as 'display_numbers' and 'number_format' are ## arguments recognized by pheatmap() function plotMetric(metric, type="DELETION", colorRange=c("white", "darkorange"), show_colnames=TRUE, display_numbers=TRUE, number_format="%.2f") ## ----demoPlot03, fig.align="center", fig.height=4.8, fig.width=6-------------- ## Load file containing annotations for the mouse organoids ## The mouse ID identifying the source of the sample ## The stage identifying the state (tumor vs metastasis) of the sample data.dir <- system.file("extdata", package="CNVMetrics") annotation.file <- file.path(data.dir, "mousePairedOrganoidsInfo.txt") annotOrg <- read.table(annotation.file, header=TRUE, sep="\t") ## The row names must correspond to the names assigned to the rows/columns ## in the CNVMetric object rownames(annotOrg) <- annotOrg$ID annotOrg$ID <- NULL all(rownames(annotOrg) == rownames(metric$AMPLIFICATION)) ## Create graph for the metrics related to amplified regions ## Rows are annotated with the stage and mouse information plotMetric(metric, type="AMPLIFICATION", colorRange=c("white", "steelblue"), annotation_row=annotOrg) ## ----figureCNVFileLog2Ratio, echo = FALSE, fig.align="center", fig.cap="Example of a copy number file containing log2ratio values.", out.width = '100%'---- knitr::include_graphics("Input_CNV_log2ratio_v01_low_quality.jpg") ## ----demoImport02------------------------------------------------------------- ## Load required libraries library(GenomicRanges) library(CNVMetrics) ## Load file containing CNV calls for 10 mouse organoids data.dir <- system.file("extdata", package="CNVMetrics") cnv.file <- file.path(data.dir, "mousePairedOrganoids.txt") calls <- read.table(cnv.file, header=TRUE, sep="\t") ## The CNV status calls for all samples are present in one file ## The 'log2ratio' column is required ## The chromosome Y has been removed head(calls) ## The ID column identifies the 10 samples unique(calls[,"ID"]) ## The ID column is used to split the samples into different GRanges ## inside a GRangesList ## The 'keep.extra.column=TRUE' parameter is needed to retained the extra ## column 'state' that is needed for the calculation of the metrics grlog <- GenomicRanges::makeGRangesListFromDataFrame(df=calls, split.field="ID", keep.extra.columns=TRUE) grlog ## ----demoCalculateMetricLog--------------------------------------------------- metricLog <- calculateLog2ratioMetric(segmentData=grlog, method="weightedEuclideanDistance", nJobs=1) metricLog ## ----demoPlotLog01, fig.align="center", fig.height=4.5, fig.width=4.5--------- ## Create graph for the metrics related to weighted Euclidean distance-based plotMetric(metricLog) ## ----demoPlotLog02, fig.align="center", fig.height=4.8, fig.width=4.8--------- ## Create graph for the weighted Euclidean distance-based metrics ## Remove title (argument main="") ## Metric values are printed as 'display_numbers' and 'number_format' are ## arguments recognized by pheatmap() function plotMetric(metricLog, colorRange=c("white", "darkorange"), show_colnames=TRUE, display_numbers=TRUE, number_format="%.2f", main="") ## ----demoGR------------------------------------------------------------------- ## First, create the GRanges objects; one per sample gr1 <- GRanges(seqnames="chr2", ranges=IRanges(3, 6000), strand="+", state="AMPLIFICATION", log2ratio=0.45) gr2 <- GRanges(seqnames=c("chr1", "chr2"), ranges=IRanges(c(7,5555), width=c(1200, 40)), strand=c("+", "-"), state=c("NEUTRAL", "AMPLIFICATION"), log2ratio=c(0.034, 0.5)) gr3 <- GRanges(seqnames=c("chr1", "chr2"), ranges=IRanges(c(1, 5577), c(3, 5666)), strand=c("-", "-"), state=c("NEUTRAL", "AMPLIFICATION"), log2ratio=c(0.04, 0.31)) ## Then, construct a GRangesList() using all the GRanges objects grl <- GRangesList("sample01"=gr1, "sample02"=gr2, "sample03"=gr3) ## ----demoSeed, eval=FALSE----------------------------------------------------- # ## First, fixe the seed value # set.seed(121234) # # ## Run the method to calculated the desired metrics # ## The number of jobs (*nJobs* parameter) can be higher than one but # ## have to remain the same then the calculation is redone to ensure # ## reproducitble results # metricLog <- calculateLog2ratioMetric(segmentData=grlog, # method="weightedEuclideanDistance", nJobs=1) # ## ----sessionInfo, echo=FALSE-------------------------------------------------- sessionInfo()