%\VignetteIndexEntry{Family Based Association Tests Using the \Rpackage{fbat} package}
%\VignetteDepends{MASS}
%\VignetteKeywords{}
%\VignettePackage{fbat}

\documentclass[12pt]{article}
\usepackage{amsmath, amssymb, amsfonts}

\usepackage{hyperref}

\textwidth=6.2in
\textheight=8.5in
%\parskip=.3cm
\oddsidemargin=.1in
\evensidemargin=.1in
\headheight=-.3in

\newcommand{\Rfunction}[1]{{\texttt{#1}}}
\newcommand{\Robject}[1]{{\texttt{#1}}}
\newcommand{\Rpackage}[1]{{\textsf{#1}}}
\newcommand{\Rmethod}[1]{{\texttt{#1}}}
\newcommand{\Rfunarg}[1]{{\texttt{#1}}}
\newcommand{\Rclass}[1]{{\textit{#1}}}
\newcommand{\code}[1]{\texttt{#1}}

\newcommand{\classdef}[1]{%
  {\em #1}
}

\def\Err{{\rm Err}\,}
\def\err{{\rm err}\,}
\def\CV{{\rm CV}\,}
\newcommand{\mb}[1]{{\boldsymbol{#1}}}
\newcommand{\mypartial}[2]{\protect{\frac{\partial #1}{\partial #2}}}
\newcommand{\mypdtwo}[2]{\protect{\frac{\partial\protect{^2} #1}{\partial #2 \partial #2\protect{^T}}}}
\newcommand\myC[1]{{\cal #1}\,}
\def\half{\protect{{1\over 2}}}
\def\from {\protect{\leftarrow}}
% use \gets, not \from (\gets is already in Tex)
\def\fromto {\protect{\Leftrightarrow}}
\def\Uniform {\textrm{Uniform}}
\def\N{{\rm N}\,}
\def\MSD{{\rm MSD}\,}
\def\det{{\rm det}\,}
\def\old{{\rm old}\,}
\def\org{{\rm org}\,}
\def\TN{{\rm TN}\,}
\def\E{{\rm E}\,}
\def\Var{{\rm Var}\,}
\def\MSE{{\rm MSE}\,}
\def\bias{{\rm bias}\,}
\def\low{{\rm low}\,}
\def\upp{{\rm upp}\,}
\def\step{{\rm step}\,}
\def\Multinomial{{\rm Multinomial}\,}
\def\member{{\rm member}\,}
\def\arg{{\rm arg}\,}
\def\Cov{{\rm Cov}\,}
\def\wCov{\widehat{\rm Cov}\,}
\def\tr{\,{\rm tr}\,}
\def\rank{\,{\rm rank}\,}
\def\diag{\,{\rm diag}\,}
\def\mysep{\,{\rm mysep}\,}
\def\old{\,{\rm old}\,}
\def\tg{\,{\rm tg}\,}
\def\arctg{\,{\rm arctg}\,}
\def\MAD{\,{\rm MAD}\,}
\def\median{\,{\rm median}\,}
\def\Pr{\,{\rm Pr}\,}
\def\pre{\,{\rm pre}\,}
\def\cur{\,{\rm cur}\,}
\def\opt{\,{\rm opt}\,}
\def\sep{\,{\rm sep}\,}
\def\new{\,{\rm new}\,}
\def\flag{\,{\rm flag}\,}
\def\sign{\,{\rm sign}\,}
\def\myin{\,{\rm in}\,}
\def\out{\,{\rm out}\,}
\newcommand{\M}[1]{{\cal M}(#1)}
\newcommand{\be}[1]{{\rm between}_#1}
\newcommand{\wi}[1]{{\rm within}_#1}
\newcommand{\de}[1]{{\rm dependence}_#1}

\def\z{{\bf z}}
\def\Z{\mb{Z}}
\def\ww{{\bf w}}
\def\bb{{\bf b}}
\def\a{\alpha}
\def\abf{\mb{\alpha}}
\def\b{\beta}
\def\ee{\mb{e}}
\def\bbf{\mb{\beta}}
\def\th{\theta}
\def\S{\mb{\Sigma}}
\def\mbS{\mb{\Sigma}}
\def\Q{\mb{Q}}
\def\I{\mb{I}}
\def\V{\mb{V}}
\def\v{\mb{v}}
\def\B{\mb{B}}
\def\W{\mb{W}}
\def\A{\mb{A}}
\def\H{\mb{H}}
\def\Y{{\bf Y}}
\def\y{{\bf y}}
\def\thmu{\mb{\mu}}
\def\mbmu{\mb{\mu}}
\newcommand{\oo}{\infty}
\def\diam{\,{\rm diam}\,}
\def\chs{\,{\rm chs}\,}
\def\join{\,{\rm join}\,}
\def\x{{\bf x}}
\def\X{{\bf X}}

%* labelling of equations, and theorems, lemmas, corollary:
%  To  get  the  section number to appear in an equation, add (to the
%  beginning):

\renewcommand{\theequation}{\thesection.\arabic{equation}}
%   and add
%\setcounter{equation}{0}
%   at the beginning of each section.
%   To  get  Theorems, Lemmas, Corollaries to be on the same numbering
%   scheme, by section, add:
\newtheorem{theorem}{Theorem}[section]
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{remark}[theorem]{Remark}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{document}

\title{Family Based Association Tests Using the \Rpackage{fbat} package}
\author{Weiliang Qiu \\
        email: \code{stwxq@channing.harvard.edu} \\
        Ross Lazarus \\
        email: \code{ross.lazarus@channing.harvard.edu} \\
	Gregory Warnes \\
        email: \code{warnes@bst.rochester.edu} \\
	Nitin Jain \\
        email: \code{nitin.jain@pfizer.com}
}
%

\maketitle

\hrule

\tableofcontents

\hrule

\section{Introduction}
The \texttt{R} package \Rpackage{fbat} can be used to test the
following null hypotheses for each marker based on family pedigrees:
\begin{eqnarray*}
H_{01}:& \mbox{the marker has no association and no linkage with the trait;}\\
H_{02}:& \mbox{the marker has no
  association  with the trait in the presence of linkage.}
\end{eqnarray*}

We assume that
    \begin{itemize}
      \item the families are \textbf{nuclear} families
      \item there are no missing genotypes and phenotypes for children
      \item markers are bi-allelic.
    \end{itemize}

A more general software \texttt{FBAT} is available as a stand-alone
executable with documentation and example files from
\url{http://www.biostat.harvard.edu/~fbat/fbat.htm}. While this
\texttt{R} package has some important limitations as present, these
will be addressed in further versions.


\section{Pedigree data file format}
All fields are separated by whitespace (e.g. one or more spaces).

\begin{description}
  \item[First line]: names of all markers in the sequence of the
  genotype data. For example, marker$_1$, marker$_2$, $\ldots$,
  marker$_m$.

  \item[Remaining lines:]\quad
The remaining lines contain only non-negative integers and have the
same format: {\footnotesize
\begin{center}
\begin{tabular}{cccccc|ccccccc}
\hline family & pid & father& mother& sex & affection &
marker$_{1.1}$ & marker$_{1.2}$ &$\cdots$&
marker$_{m.1}$ & marker$_{m.2}$\\
\hline
\end{tabular}
\end{center}
} where
\begin{description}
  \item[family:] family id
  \item[pid:] patient id
  \item[father:] father id.

  Use $0$ (zero) for founders or marry-ins (parents not specified)
  in a pedigree. A \textbf{founder} in a pedigree is an individual who is
  not a child of any individuals in the pedigree.

  \item[mother:] mother id.

  Use $0$ (zero) for founders or marry-ins (parents not specified)
  in a pedigree. A \textbf{founder} in a pedigree is an individual who is
  not a child of any individuals in the pedigree.

  \item[sex:] $1$ -- male; $2$ -- female;

  \item[affection:] affection status (i.e., trait)

  $2$ -- affected; $1$ -- unaffected; $0$ -- unknown

  \item[marker$_{i.j}$:] allele $j$ of marker $i$, $j=1, 2$; $i=1,
  2,\ldots, m$.

  non-missing Alleles are represented by positive integers. Missing alleles are
  represented by zero ($0$).

\end{description}
\end{description}

\section{Data quality control}
The \texttt{R} package \Rpackage{fbat} also provides some basic QC functions.

The function \Rfunction{missGFreq} checks the completeness of 
genotypes. This function outputs counts of missing genotypes per marker and per subject.

The function \Rfunction{pedHardyWeinberg} checks the assumption of the Hardy-Weinberg equilibrium for markers.

The function \Rfunction{checkMendelian} checks the following possible Mendelian-related errors:
\begin{enumerate}
  \item father id $=$ subject id;  
  \item mother id $=$ subject id;  
  \item could not determine if an individual is a parent or a child in a family;
  \item inconsistent parental sex in a family;
  \item parental genotypes are not compatible with childrens' genotypes in a family;
  \item all childrens' genotypes are missing in a family;
  \item inconsistent sib genotypes in a family.
\end{enumerate}

\section{Examples}

To call the functions in the \texttt{R} package \Rpackage{fbat}, we
first need to load it into \texttt{R}: 
<<R.hide, echo=F, results=hide>>=
library(fbat)
@

To read the pedigree file \verb+CAMP.ped+ into \texttt{R}, we use
the function \Rfunction{readGenes} in the \texttt{R} package
\Rpackage{GeneticsBase}:
\begin{verbatim}
gSet<-readGenes(gfile="CAMP.ped", gformat="fbat")
\end{verbatim}
The function \Rfunction{readGenes.ped} returns back an object of the \texttt{R} class \Rclass{geneSet}.

Before we apply family based association tests, it would be good practice to check Hardy-Weinberg equilibrium for each marker based on parental data. We can use the function \Rfunction{pedHardyWeinberg} to do this. 
<<>>=
data(CAMP)
ch<-pedHardyWeinberg(CAMP)
@
The column \code{nInfoInd} means the number of informative individuals, i.e. individuals whose genotypes contain no missing alleles for the specified marker;
the column \code{nGenotype} means number of possible genotypes; the column
\code{nHET} means number of heterozygous genotypes; the column \code{nHOM} means number of homozygous genotypes; the column \code{nAllele} means number of alleles; the column \code{nMissing} means number of missing alleles; the column \code{chi2} means chi square test statistic; the column means \code{df} means degree of freedom of the chi square test statistic under the null hypothesis that Hardy-Weinberg condition holds; and the column \code{p-value} means pvalue of the test.

To view the statistics for individual markers, we can use the function \Rfunction{viewHW}. For example,
<<>>=
viewHW(ch, "p79")
@

To check Mendelian-realted errors, we can use the function \Rfunction{checkMendelian}. For example,
<<>>=
tmp<-checkMendelian(CAMP, quiet=TRUE)
cat("For each marker, how many families contains mendelian errors?\n")
print(tmp$nMerrMarker)
cat("For each family, how many markers contains mendelian errors?\n")
cat("tmp$nMerrFamily[1:10]>>\n")
print(tmp$nMerrFamily[1:10])
cat("For each family, how many times\n")
cat("'father id = subject id' or 'mother id = subejct id'?\n")
cat("tmp$nErrFamilySample[1:10]>>\n")
print(tmp$nErrFamilySample[1:10])
@

To count the number of missing genotypes for a marker or for a subject, we can
use the function \Rfunction{missGFreq}. For example,
<<>>=
res<-missGFreq(CAMP, founderOnly=FALSE, quiet=TRUE)
cat("The number of missing genotypes for markers>>")
print(res$nMissMarkers)

cat("The number of missing genotypes for the first 10 subjects>>")
print(res$nMissSubjects[1:10,])
@

To get the family based association test statistics, we use the function \Rfunction{fbat}:
<<>>=
res<-fbat(CAMP)
@


The usage of the function \Rfunction{fbat} is
\begin{verbatim}
fbat(geneSetObject, model="a", traitMethod=3, traitOffset=0, quiet=TRUE)
\end{verbatim}

The function argument \Rfunarg{model} specifies the genotype codings.

By default, we use the additive model (\Rfunarg{model}=``a''). Other available models include dominant (\Rfunarg{model}=``d''), recessive (\Rfunarg{model}=``r''), and
genotype (\Rfunarg{model}=``g'') models.

The function argument \Rfunarg{traitMethod} indicates the trait
coding method. If \Rfunarg{traitMethod} is equal to $1$, then the
trait is represented by \code{trait-offset} where \code{trait} is
the sixth column (i.e., \code{affection} status) of the pedigree
matrix and the value of \code{offset} is provided by the argument
\Rfunarg{traitOffset}. If the argument \Rfunarg{traitMethod} takes
value other than $1$, then the trait is set to be $1$ if the sixth
column of the pedigree matrix takes value $2$ and the trait is set
to be $0$ if the sixth column of the pedigree matrix takes value
$1$.

The function \Rfunction{fbat} returns a list. To summarize the
values, degrees of freedom, and $p$-values of the test statistics
for the markers, we can use the function \Rfunction{summaryPvalue}:
<<>>= 
summaryPvalue(res) 
@

To adjust multiple comparisons, we can use the function \Rfunction{p.adjust} in the R package \Rpackage{base} to adjust the $p$-values. For example,
<<>>=
pvals<-res$statPvalue[,3]
p.adjust.M <- p.adjust.methods
p.adj <- sapply(p.adjust.M, function(meth) p.adjust(pvals, meth))
noquote(apply(p.adj, 2, format.pval, digits = 3))
@

To view summary statistics of individual marker, we can use the function 
\Rfunction{viewstat}. For example,
<<>>=
viewstat(res, "p79")
@
Note that if the covariance matrix of the S score vector is singular, the Moore-Penrose generalized inverse is used.

Sometimes the user might want to know if a genotype a homozygous or heterozygous. The function \Rpackage{pedFlagHomo} can provide those information. For example,
<<>>=
res.f<-pedFlagHomo(CAMP)
@

The function \Rfunction{pedGFreq} gets genotype frequencies and percentages. For example,
<<>>=
  res<-pedGFreq(CAMP)
@

The function \Rfunction{pedAFreq} gets allele frequencies and percentages. For example,
<<>>=
  res<-pedAFreq(CAMP)
@

The functions \Rfunction{fbat}, \Rfunction{pedHardyWeinberg}, \Rfunction{pedFlagHomo}, \Rfunction{pedGFreq}, and \Rfunction{pedAFreq} have default forms (\Rfunction{fbat.default}, \Rfunction{pedHardyWeinberg.default}, \Rfunction{pedFlagHomo.default}, \Rfunction{pedGFreq.default}, and \Rfunction{pedAFreq.default}) that use a pedigree matrix as input.

\appendix

\begin{center}
{\large\bf Appendix}
\end{center}
  

\section{Notation}
For a given marker,
\begin{itemize}
  \item $Y_{ij}$ --- Observed trait of the $j$-th offspring in family
  $i$.

  \item $T_{ij}$ --- A function of $Y_{ij}$.
  \begin{equation*}
    T_{ij}=T(Y_{ij}).
  \end{equation*}
  For example
  \begin{equation*}
    T_{ij}=T(Y_{ij})=Y_{ij}-\mu_{ij},
  \end{equation*}
  where $\mu_{ij}$ is an offset.

  \item $g_{ij}$ --- Genotype of the $j$-th offspring in family $i$;

  \item $X_{ij}$ --- A function of $g_{ij}$.
  \begin{equation*}
    X_{ij}=X(g_{ij}).
  \end{equation*}

  \item $S$ score:
  \begin{equation*}
    S=\sum_{ij}T_{ij}X_{ij}=\sum_{ij}T(Y_{ij})X(g_{ij}).
  \end{equation*}

  \item test statistic:
  \begin{equation*}
    U=S-\mbox{E}[S|H_0, {\cal C}],
  \end{equation*}
where ${\cal C}$ is a condition set. When parental genotypes are
complete, the condition set $\cal{C}=\cal{T}\cup{\cal G}$, where
${\cal T}$ is the observed traits in all family members and ${\cal
G}$ is the parental genotypes. When parental genotypes are
incomplete, the condition set ${\cal C}={\cal T}\cup{\cal
G}^*\cup{\cal G}_{\mbox{offspring}}$, ${\cal G}^*$ is the partially
observed parental genotypes and ${\cal G}_{\mbox{offspring}}$ is the
set of offspring genotypes (i.e., the offspring genotype
configuration).

  \item $V$ -- variance or covariance matrix of $U$ under the null hypothesis $H_0$. I.e.,
  \begin{equation*}
    V=\mbox{Cov}(U|H_0, {\cal C})=\Cov(S|H_0, {\cal C}).
  \end{equation*}

  \item For the univariate case,
  \begin{equation*}
  Z=\left.\frac{U}{\sqrt{V}}\right| H_0, {\cal C}\stackrel{\cdot}{\rightarrow} \N(0,
  1).
  \end{equation*}

  \item For the multivariate case,
  \begin{equation*}
  \chi^2=\left.U'V^{-1}U\right| H_0, {\cal C}\stackrel{\cdot}{\rightarrow}
  \chi^2_r,
  \end{equation*}
  where $r=\rank(V)$.
\end{itemize}


%\section{Calculation of $\E(S|H_0, {\cal C})$ and $\Cov(S|H_0, {\cal C})$}
%Denote
%\begin{equation*}
%S_i=\sum_{j}X_{ij}T_{ij}.
%\end{equation*}
%Then
%\begin{equation*}
%S=\sum_{i}S_i
%\end{equation*}
%and
%\begin{equation*}
%\begin{aligned}
%\E(S|H_0, {\cal C})=&\sum_{i}\E(S_i|H_0, {\cal C})\\
%\Cov(S|H_0, {\cal C})=&\sum_{i}\Cov(S_i|H_0, {\cal C}).
%\end{aligned}
%\end{equation*}
%Note that we assume family $i$ and $j$ are independent for $i\neq
%j$.
%
%For family $i$,
%\begin{equation*}
%\begin{aligned}
%\E(S_i|H_0, {\cal C})=&\sum_{j}\E(X_{ij}|H_0, {\cal C})T_{ij},\\
%\Cov(S_i|H_0, {\cal C})=&\sum_{j}T_{ij}^2\Var(X_{ij}|H_0, {\cal
%C})+\sum_{j}\sum_{k\neq j}T_{ij}T_{ik}\Cov(X_{ij},\; X_{ik}|H_0,
%{\cal C}).
%\end{aligned}
%\end{equation*}
%
%It is not difficult to get
%\begin{equation*}
%\begin{aligned}
%\E(X_{ij} | H_0, {\cal C})=&\sum_{g_i}X(g_i)Pr(g_{ij}=g_i|H_0, {\cal C}),\\
%\Var(X_{ij} | H_0, {\cal C})=&\sum_{g_i}X(g_i)X'(g_i)Pr(g_{ij}=g_i|
%H_0, {\cal C})\\
%&-\sum_{g_i}\sum_{\tilde{g}_i}X(g_i)X'(g_i)Pr(g_{ij}=g_i | H_0,
%{\cal C})
%Pr(g_{ij}=\tilde{g}_i| H_0, {\cal C})\\
%\Cov(X_{ij},\; X_{ik} | H_0, {\cal
%C})=&\sum_{g_i}\sum_{\tilde{g}_i}X(g_i)X'(\tilde{g}_i)Pr(g_{ij}=g_i,
%g_{ik}=\tilde{g}_i|H_0, {\cal C})\\
%&-\sum_{g_i}\sum_{\tilde{g}_i}X(g_i)X'(\tilde{g}_i)Pr(g_{ij}=g_i|
%H_0, {\cal C})Pr(g_{ij}=\tilde{g}_i| H_0, {\cal C}).
%\end{aligned}
%\end{equation*}
%where $\sum_{g_i}$ denotes the sum over all offspring genotypes that
%are possible in the $i$th family.
%
%%\begin{equation*}
%%\begin{aligned}
%%\E(S_i|H_0, {\cal C})=&\sum_{j}T_{ij}\left[\sum_{g_i}X(g_i)Pr(g_{ij}=g_i|H_0, {\cal C})\right],\\\\
%%\Var(S_{i}|{\cal
%%C})=&\left(\sum_{j}T_{ij}\right)^2\sum_{g_i}\sum_{\tilde{g}_i}\left\{X(g_i)
%%X(\tilde{g}_i)'\left[Pr(g_{ij}=g_i, g_{ik}=\tilde{g}_i|{\cal
%%C})-Pr(g_{ij}=g_i|H_0, {\cal C})Pr(g_{ik}=\tilde{g}_i)
%%\right]\right\}\\
%%&+\sum_{j}T_{ij}^2\left[\sum_{g_i}X(g_i)X'(g_i)Pr(g_{ij}=g_i|{\cal
%%C})-\sum_{g_i}\sum_{\tilde{g}_i}X(g_i)X'(\tilde{g}_i)Pr(g_{ij}=g_i,
%%g_{ik}=\tilde{g}_i|H_0, {\cal C})\right]
%%\end{aligned}
%%\end{equation*}
%
%
%\section{Example}
%one parental genotype $AB$, $2$ affected children with genotypes
%$AA$ and $AC$ and one unaffected child with genotype $BC$.
%
%Assume that $T_{ij}=1$ if the $j$-th child is affected and
%$T_{ij}=0$ otherwise, $i=1$, $j=1, 2, 3$. Assume that the marker has
%$3$ alleles $A, B, C$ and that a multi-allelic, recessive marker
%coding (GREC, see table 4) is used.
%
%Possible genotypes and corresponding coding in children of this
%family are listed in Table \ref{Table: genotype and coding Xij by
%GREC}.
%\begin{table}[ht]
%\centering \caption{Genotype $g_1^{(i)}$ and coding
%$X\left(g_1^{(i)}\right)$ by GREC} \label{Table: genotype and coding
%Xij by GREC}
%\begin{tabular}{l|c}
%\hline Genotype $g_1^{(i)}$ & Coding $X\left(g_1^{(i)}\right)$\\
%\hline $g_1^{(1)}=AA$ & $X\left(g_1^{(1)}\right)=X(AA)=\left(\begin{array}{c}1\\0\\0\end{array}\right)$\\
%\hline $g_1^{(2)}=AB$ & $X\left(g_1^{(2)}\right)=X(AB)=\left(\begin{array}{c}0\\0\\0\end{array}\right)$\\
%\hline $g_1^{(3)}=AC$ & $X\left(g_1^{(3)}\right)=X(AC)=\left(\begin{array}{c}0\\0\\0\end{array}\right)$\\
%\hline $g_1^{(4)}=BB$ & $X\left(g_1^{(4)}\right)=X(BB)=\left(\begin{array}{c}0\\1\\0\end{array}\right)$\\
%\hline $g_1^{(5)}=BC$ & $X\left(g_1^{(5)}\right)=X(BC)=\left(\begin{array}{c}0\\0\\0\end{array}\right)$\\
%\hline $g_1^{(6)}=CC$ & $X\left(g_1^{(6)}\right)=X(CC)=\left(\begin{array}{c}0\\0\\1\end{array}\right)$\\
%\hline
%\end{tabular}
%\end{table}
%
%The conditional probabilities for family $1$ is given in Table
%\ref{Table: conditional probabilities for family 1}.
%\begin{table}[ht]
%\begin{center}
%\caption{conditional probabilities for family $1$} \label{Table:
%conditional probabilities for family 1}
%\begin{tabular}{l|l}
%\hline Children's genotype & Conditional
%probabilities\\
%configuration& \\
%\hline$\{AA, AC, BC\}$& $Pr(g_{1j}=AA|H_0,{\cal C})=0.4, Pr(g_{1j}=AB|H_0,{\cal C})=0.133$\\
%&\\
%&$Pr(g_{1j}=AC|H_0,{\cal C})=Pr(g_{1j}=BC|H_0,{\cal C})=0.233$\\
%&$Pr(g_{1j}=AA, g_{1k}=AA|H_0,{\cal C})$\\
%&$=Pr(g_{1j}=AA, g_{1k}=AB|H_0,{\cal C})=0.067$\\
%&\\
%&$Pr(g_{1j}=AA, g_{1k}=AC|H_0,{\cal C})$\\
%&$=Pr(g_{1j}=AA, g_{1k}=BC|H_0,{\cal C})=0.133$\\
%&\\
%&$Pr(g_{1j}=AB, g_{1k}=AB|H_0,{\cal C})=0$\\
%&$Pr(g_{1j}=AC, g_{1k}=AC|H_0,{\cal C})=Pr(g_{1j}=BC, g_{1k}=BC|H_0,{\cal C})$\\
%&$=Pr(g_{1j}=AB, g_{1k}=AC|H_0,{\cal C})
%=Pr(g_{1j}=AB, g_{1k}=BC|H_0,{\cal C})$\\
%&$=Pr(g_{1j}=AC, g_{1k}=BC|H_0,{\cal C})=0.033$\\
% \hline
%\end{tabular}
%\end{center}
%${\cal C}$ $=$ $\{\mbox{one parental genotype is $AB$}$, $T_{11}=1,
%T_{12}=1, T_{13}=0$, and the offspring genotype configuration is
%$\{AA, AC, BC\}\}$.
%\end{table}
%
%  Given the one parental genotype $AB$ and the offspring genotype
%configuration ($AA$, $AC$ and $BC$), we know that the other parental
%genotype has to be $AC$. And all possible offspring genotypes are in
%the set
%\begin{equation*}
%G=\{AA, AB, AC, BC\}.
%\end{equation*}
%
%We can get
%\begin{equation*}
%\begin{aligned}
%S_1=&X_{11}T_{11}+X_{12}T_{12}+X_{13}T_{13}\\
%=&X(g_{11})\cdot 1+X(g_{12})\cdot 1 + X(g_{13})\cdot 0\\
%=&X(g_1^{(1)})+X(g_1^{(3)})\\
%=&X(AA)+X(AC)\\
% =&\left(\begin{array}{c}1\\0\\0\end{array}\right)
%\end{aligned}
%\end{equation*}
%and
%\begin{equation*}
%\begin{aligned}
%E(S_1|H_0, {\cal
%C})=&T_{11}\E(X_{11})+T_{12}\E(X_{12})+T_{13}\E(X_{13})\\
%=&\E(X_{11})+\E(X_{12}),
%\end{aligned}
%\end{equation*}
%where
%\begin{equation*}
%\begin{aligned}
%\E(X_{1j}|H_0, {\cal C})=&X\left(AA\right)Pr\left(g_{11}=AA|H_0,
%{\cal C}\right)+X\left(AC\right)Pr\left(g_{11}=AC|H_0, {\cal
%C}\right)\\
%&+X\left(BC\right)Pr\left(g_{11}=BC|H_0, {\cal C}\right)
%+X\left(AB\right)Pr\left(g_{11}=AB|H_0, {\cal
%C}\right)\\
%=&\left(\begin{array}{c}0.4\\0\\0\end{array}\right).
%\end{aligned}
%\end{equation*}
%Note that $Pr(g_{11}=AA|H_0, {\cal C})+Pr(g_{11}=AB|H_0, {\cal
%C})+Pr(g_{11}=AC|H_0, {\cal C})+Pr(g_{11}=BC|H_0, {\cal
%C})=2/5+4/30+7/30+7/30=1$.
%
%Thus,
%\begin{equation*}
%\E(S_1|H_0, {\cal
%C})=\left(\begin{array}{c}0.8\\0\\0\end{array}\right)
%\end{equation*}
%
%Now we calculate the covariance matrix of $S_1 | H_0, {\cal C}$.
%\begin{equation*}
%\begin{aligned}
%\Var(X_{1j}|H_0, {\cal C})=&\sum_{g_1\in
%G}X(g_1)X'(g_1)Pr(g_{1j}=g_1|H_0, {\cal C})\\
%&-\sum_{g_1\in G}X(g_1)Pr(g_{1j}=g_1|H_0, {\cal
%C})\sum_{\tilde{g}_1\in
%G}X'(\tilde{g}_1)Pr(g_{1j}=\tilde{g}_1|H_0, {\cal C})\\
%=&X(AA)X'(AA)Pr(g_{1j}=AA|H_0, {\cal C})\\
%&-X(AA)Pr(g_{1j}=AA|H_0, {\cal
%C})X'(AA)Pr(g_{1j}=AA|H_0, {\cal C})\\
%=&\left(\begin{array}{ccc}0.4&0&0\\0&0&0\\0&0&0\end{array}\right)
%-\left(\begin{array}{ccc}0.16&0&0\\0&0&0\\0&0&0\end{array}\right)\\
%=&\left(\begin{array}{ccc}0.24&0&0\\0&0&0\\0&0&0\end{array}\right).
%\end{aligned}
%\end{equation*}
%\begin{equation*}
%\begin{aligned}
%\Cov(X_{1j},\; X_{1k}|H_0, {\cal C})=&\sum_{g_1\in
%G}\sum_{\tilde{g}_1\in G}X(g_1)X'(\tilde{g}_1)Pr(g_{1j}=g_1, g_{1k}=\tilde{g}_1|H_0, {\cal C})\\
%&-\sum_{g_1\in G}X(g_1)Pr(g_{1j}=g_1|H_0, {\cal
%C})\sum_{\tilde{g}_1\in
%G}X'(\tilde{g}_1)Pr(g_{1j}=\tilde{g}_1|H_0, {\cal C})\\
%=&X(AA)X'(AA)Pr(g_{1j}=AA, g_{1k}=AA|H_0, {\cal
%C})\\
%&-X(AA)Pr(g_{1j}=AA|H_0, {\cal C})X'(AA)Pr(g_{1j}=AA|H_0, {\cal C})\\
%=&\left(\begin{array}{ccc}0.067&0&0\\0&0&0\\0&0&0\end{array}\right)
%-\left(\begin{array}{ccc}0.16&0&0\\0&0&0\\0&0&0\end{array}\right)\\
%=&\left(\begin{array}{ccc}-0.093&0&0\\0&0&0\\0&0&0\end{array}\right).
%\end{aligned}
%\end{equation*}
%Thus,
%\begin{equation*}
%\begin{aligned}
%\Var(S_1|H_0, {\cal C})=&\sum_{j=1}^{3}T_{1j}^2\Var(X_{1j}|H_0,
%{\cal C})+\sum_{j=1}^{3}\sum_{k\neq j}T_{1j}T_{1k}\Cov(X_{1j},
%X_{1k}|H_0,
%{\cal C})\\
%=&2\Var(X_{1j})+\Cov(X_{11}, X_{12}|H_0, {\cal C})+\Cov(X_{12},
%X_{11}|H_0,
%{\cal C})\\
%=&\left(\begin{array}{ccc}0.48&0&0\\0&0&0\\0&0&0\end{array}\right)+
%\left(\begin{array}{ccc}-0.186&0&0\\0&0&0\\0&0&0\end{array}\right)\\
%=&\left(\begin{array}{ccc}0.294&0&0\\0&0&0\\0&0&0\end{array}\right)
%\end{aligned}
%\end{equation*}
%
%Therefore
%\begin{equation*}
%\begin{aligned}
%U=S_1-\E(S_1|H_0, {\cal
%C})=\left(\begin{array}{c}1\\0\\0\end{array}\right)-\left(\begin{array}{c}0.8\\0\\0\end{array}\right)
%=\left(\begin{array}{c}0.2\\0\\0\end{array}\right)\\
%V^{-}=\left(\begin{array}{ccc}0.294&0&0\\0&0&0\\0&0&0\end{array}\right)^{-}=
%\left(\begin{array}{ccc}1/0.294&0&0\\0&0&0\\0&0&0\end{array}\right)
%\end{aligned}
%\end{equation*}
%and
%\begin{equation*}
%\chi^2=U'V^{-1}U|H_0, {\cal C}=\frac{0.04}{0.294}=0.136.
%\end{equation*}
%
%The asymptotic $p$-value is
%\begin{equation*}
%Pr(\chi^2_1>0.136|H_0, {\cal C})=0.712.
%\end{equation*}
%So there is no enough evidence to reject the null hypothesis $H_0:
%\mbox{no association and no linkage}$. Note that there is only one
%family. Hence the asymptotic $p$-value is not reliable.

\section{Genotype coding methods}

Denote $K$ as the number of all possible different alleles for the
locus and $X$ as the vector of genotype coding.

\begin{description}
  \item[GEN] $X$ is a vector with length equal to the
number of genotypes that are possible given the parental genotypes
in the sample, a maximum of $K(K+1)/2$ genotypes, and with elements
equal to $1$ or $0$ to indicate which of the possible genotypes is
equal to the genotype $g$.

  \item[GDOM] codes the $j$th element of the vector $X$ as $x_j=1$
  if genotype $g$ has one or two alleles of type $j$, otherwise
  $x_j=0$. $X$ is a vector of length $K$.

  \item[GREC] codes the $j$th element of the vector $X$ as $x_j=1$
  if genotype $g$ has two alleles of type $j$, otherwise
  $x_j=0$. $X$ is a vector of length $K$.

  \item[GTDT] scores the number of alleles of a particular type by coding
$x_j$ equal to the number of alleles of type $j$ in the genotype $g$
(i.e., $x_j=0, 1$, or $2$ if $g$ has $0$, $1$ or $2$ alleles of type
$j$). $X$ is a vector of length $K$.
\end{description}


\begin{description}

  \item[2-allele case]\quad
%\begin{center}
%\begin{tabular}{l|ccc}
%\hline & $AA$ &$Aa$&$aa$\\
%\hline Additive& 0 & 1 & 2\\
%Recessive & 0 & 0 & 1\\
%Dominant & 0 & 1 & 1\\
%\hline
%\end{tabular}
%\end{center}

%  \begin{table}[ht]
%  \centering
\begin{center}
  %\caption{
  Example of different marker codings for a marker with
  $K=2$
  alleles, see Schaid (1996)
  %}

  \begin{tabular}{c|cccc}
  \hline
  genotype& \multicolumn{4}{c}{$X(g)$}\\
  \hline
  $g$&GEN&GDOM&GREC&GTDT\\
  \hline
  & & ($A$, $a$)& ($A$, $a$)&($A$, $a$)\\
  \hline
  $AA$&(0,0,0)&(1,0)&(1,0)&(2,0)\\
  $Aa$&(1,0,0)&(1,1)&(0,0)&(1,1)\\
  $aa$&(0,1,0)&(0,1)&(0,1)&(0,2)\\
  \hline
  \end{tabular}
  \end{center}
%  \end{table}



  \item[3-allele case]\quad
%  \begin{table}[ht]
  \begin{center}
%  \centering
%  \caption{
Example of different marker codings for a marker with
  $K=3$
  alleles, see Schaid (1996) (This table is Table 4 of Horvath et al.'s report for FBAT software)
  %}

  \begin{tabular}{c|cccc}
  \hline
  genotype& \multicolumn{4}{c}{$X(g)$}\\
  \hline
  $g$&GEN&GDOM&GREC&GTDT\\
  \hline
  & & ($A$, $B$, $C$)& ($A$, $B$, $C$)& ($A$, $B$, $C$)\\
  \hline
  $AA$&(0,0,0,0,0)&(1,0,0)&(1,0,0)&(2,0,0)\\
  $AB$&(1,0,0,0,0)&(1,1,0)&(0,0,0)&(1,1,0)\\
  $AC$&(0,1,0,0,0)&(1,0,1)&(0,0,0)&(1,0,1)\\
  $BB$&(0,0,1,0,0)&(0,1,0)&(0,1,0)&(0,2,0)\\
  $BC$&(0,0,0,1,0)&(0,1,1)&(0,0,0)&(0,1,1)\\
  $CC$&(0,0,0,0,1)&(0,0,1)&(0,0,1)&(0,0,2)\\
  \hline
  \end{tabular}
  \end{center}
%  \end{table}



 % How to specify which allele is $A$, which allele is $B$,
 % and which allele is $C$?

%  \item[4-allele case]
\end{description}

\section{Trait coding methods} Denote $Y_{ij}$ as the trait of the
$j$-th child of the $i$th nuclear family. $Y_{ij}$ can be
dichotomous, measured (i.e., continuous?), time-to-onset (i.e.,
censored?)

The trait coding methods ($T_{ij}=T(Y_{ij})$) are listed below:
\begin{itemize}
  \item $T_{ij}=1$ if the $j$th child is affected; $T_{ij}=0$
  otherwise.
  \item $T_{ij}=Y_{ij}-\mu_{ij}$, where $\mu_{ij}$ is an offset.
  \item $T_{ij}=Y_{ij}-\mu_{ij}(\mb{x}'\mb{\beta})$, where
  $\E(Y_{ij}|\mb{x})=\mu_{ij}(\mb{x}'\mb{\beta})$, and $\mb{x}$
  are design matrix of covariates, $\mb{\beta}$ are unknown
  parameters.
\end{itemize}



\end{document}