%\VignetteIndexEntry{CGHnormaliter}
%\VignettePackage{CGHnormaliter}


\documentclass{article}

\bibliographystyle{plain}

\begin{document}


<<CGHnormaliterPackage, include=FALSE, echo=FALSE>>=
options(keep.source=TRUE)
CGHnormaliterPackage <- packageDescription("CGHnormaliter")
@ 

\title{CGHnormaliter Package (Version \Sexpr{CGHnormaliterPackage$Version})} 

\author{Bart P.P. van Houte, Thomas W. Binsl, Hannes Hettling}
\maketitle 

\section{Introduction}
This package contains an implementation of the CGHnormaliter strategy for
normalization of two-channel array Comparative Genomic Hybridization (aCGH)
data displaying many copy number imbalances. The key idea of our method is
that temporary exclusion of aberrations from the aCGH data allows for a more
appropriate calculation of the LOWESS regression curve. As a result, after
normalization, the log$_2$ intensity ratios of the normals will generally
be closer to zero and better reflect the biological reality. We coined this
normalization strategy `local-LOWESS' since only a subset of the log$_2$
ratios is considered in the LOWESS regression.

The strategy can be summarized as follows (see Figure~\ref{fig:method}).
Initially the log$_{2}$ intensity ratios are segmented using
DNAcopy~\cite{venkatraman2007}. The segmented data are then given as input
to a calling tool named CGHcall~\cite{wiel2007} to discriminate the normals
from gains and losses. These normals are subsequently used for normalization
based on LOWESS. These steps are then iterated to refine the normalization.
For more detailed information we refer to the publications of the
method~\cite{vanhoute2009,vanhoute2010}.

\begin{figure}[h]
\begin{center}
\resizebox{6.6cm}{!}{\includegraphics{CGHnormaliter-method}}
\caption{Overview of the CGHnormaliter method.\label{fig:method}}
\end{center}
\end{figure}

\section{Data format}
The input should be either a \begin{tt}data.frame\end{tt} or the file name
of a tabseparated text file (text files must contain a header). The first
four columns should describe the clone and its position on the genome:
\begin{enumerate}
    \item ID : The unique identifiers of array elements.
    \item Chromosome : Chromosome number of each array element.
    \item Start : Chromosomal start position in bp of each array element.
    \item End : Chromosomal end position in bp of each array element.
\end{enumerate}
The start and end positions must be numeric. The next columns hold the
actual data. For each sample in the experiment, there must be two adjacent
columns with the \emph{test} and \emph{reference} intensities, respectively.
All entries must be delimited by tabs, and missing entries must be denoted
with \textit{NA} or by an empty value. Below, an example is given of a
correctly formatted data file or data.frame containing measurements on 7
clones in 2 samples.

\begin{footnotesize}
\begin{verbatim}
ID           Chromosome  Start   End     Case1.test  Case1.ref  Case2.test  Case2.ref
RP11-34P13   1           1       254479  279        294         NA         NA
RP11-379K15  1           95421   244136  1815       2269        2793       3996
RP11-776O18  1           357737  465038  387        349         429        362
RP11-45C18   1           579118  696613  786        734         900        735
RP11-242B5   1           606617  711982  2955       4158        4478       5229
RP13-586C17  1           619355  783174  NA         NA          823        841
RP11-414L23  1           658751  846904  630        937         959        744
\end{verbatim}
\end{footnotesize}


\section{Example}
First, we load the example acute lymphoblastic leukemia
dataset~\cite{paulsson2006} which comes with the CGHnormaliter package:
<<loadData>>=
library(CGHnormaliter)
data(Leukemia)
@

\vspace{0.5cm}
\noindent Next, we run the CGHnormaliter routine on the first four
chromosomes of the \begin{tt}Leukemia\end{tt} data:
<<runCGHnormaliter>>=
result <- CGHnormaliter(Leukemia, nchrom=4, cellularity=0.9)
@

\vspace{0.5cm}
\noindent To enable a visual assessment of the bias reduction, MA-plots are
(by default) automatically generated before and after normalization of each
sample. These plots are stored into a PDF, usually named
\begin{tt}MAplot.pdf\end{tt} (the exact file name is supplied at the end of
each CGHnormaliter run). See Figure~\ref{fig:ma-plot} for such MA-plots of
the second \begin{tt}Leukemia\end{tt} sample.

\begin{figure}[h]
\begin{center}
\resizebox{11cm}{!}{\includegraphics[page=2]{MAplot}}
\end{center}
\vspace*{-0.5cm}
\caption{MA-plot for the second leukemia sample before and after normalization. Note
that the normalization is based on the normals only, represented by the black dots.}
\label{fig:ma-plot}
\end{figure}


\vspace{0.5cm}
\noindent Now, several fields of the \begin{tt}result\end{tt} object can be
acccessed, for example:
<<accessResults>>=
normalized.data <- copynumber(result)  # log2 ratios
segmented.data <- segmented(result)
called.data <- calls(result)
@


\vspace{2cm}
\noindent Plotting the normalized log$_{2}$ ratios in a density plot provides
another means (besides MA plots) to inspect whether or not the centralization
has been successful. Figure~\ref{fig:density-plot} shows such a density plot
for sample 2:
<<label=densityPlotCommand, term=FALSE>>=
plot(density(normalized.data[, 2]), col=1, xlab="log2 ratio",
                                    main="Density plot")
abline(v=0, lty=2)

@
\begin{figure}
\begin{center}
\resizebox{8.5cm}{!}{
<<label=densityPlotFigure, fig=TRUE, eps=FALSE, echo=FALSE, results=hide>>=
<<densityPlotCommand>>
@
}
\end{center}
\vspace*{-0.7cm}
\caption{Density plot after CGHnormaliter normalization for the second leukemia
sample. The data are adequately centralized around peak at the left, which corresponds
to the normals. The peak at the right corresponds to the gains.}
\label{fig:density-plot}
\end{figure}


\vspace{0.5cm}
\noindent The results, including segments and calls, can be visualized using the
\begin{tt}plot\end{tt} function. In Figure~\ref{fig:calls-plot} the results of
sample 2 are plotted in full resolution:
<<label=callPlotCommand, term=FALSE>>=
plot(result[,2], ylimit=c(-2,2), dotres=1)
@
\begin{figure}
\begin{center}
\resizebox{8.5cm}{!}{
<<label=callPlotFigure, fig=TRUE, eps=FALSE, echo=FALSE, results=hide>>=
<<callPlotCommand>>
@
}
\end{center}
\vspace*{-1.4cm}
\caption{Results of the CGHnormaliter normalization for the second leukemia
sample.}
\label{fig:calls-plot}
\end{figure}


\vspace{0.5cm}
\noindent Finally, the package provides the function
\begin{tt}CGHnormaliter.write.table\end{tt} to save the normalized data into
a tabdelimited plain text file:
<<saveNormalizedData>>=
CGHnormaliter.write.table(result)
@

\noindent The segmented and called data from the \begin{tt}result\end{tt}
object can be saved to file as well using this function:

<<saveOtherData>>=
CGHnormaliter.write.table(result, data.type="segmented")
CGHnormaliter.write.table(result, data.type="called")
@

\bibliography{CGHnormaliter}

\end{document}