%\VignetteIndexEntry{iCARE Vignette Model Validation}
%\VignettePackage{iCARE}
%\VigetteDepends{iCARE}

\documentclass[a4paper]{article}
\begin{document}
\SweaveOpts{concordance=TRUE}

\title{iCARE (Individualized Coherent Absolute Risk Estimation) Package}
\maketitle

%Install the iCARE package 
%<<start>>=
%if (!requireNamespace("BiocManager", quietly=TRUE))
%    install.packages("BiocManager")
%BiocManager::install("iCARE", version = 3.8)
%@

Load the iCARE library
<<start>>=
library(iCARE)
@

Load the breast cancer data and set the seed.
<<load-data>>=
data("bc_data", package="iCARE")
set.seed(50)
@

\section*{Example 1: SNP-only model}
In this example, we will estimate the risk of breast cancer in ages 50-80.
A SNP-only model is fit, with no specific genotypes supplied for estimation. 
The population disease rates are from SEER.
<<model-1a>>=
res_snps_miss = computeAbsoluteRisk(model.snp.info = bc_72_snps,
                       model.disease.incidence.rates = bc_inc,
                       model.competing.incidence.rates = mort_inc,
                       apply.age.start = 50, apply.age.interval.length = 30,
                       return.refs.risk = TRUE)
@
Compute a summary of the risks.
<<summary-1a>>=
summary(res_snps_miss$refs.risk)
@

Next, suppose we want to predict risk for three specific women whom we have genotyped; we can then call:
<<model 1b>>=
res_snps_dat = computeAbsoluteRisk(model.snp.info = bc_72_snps,
                     model.disease.incidence.rates = bc_inc,
                     model.competing.incidence.rates = mort_inc,
                     apply.age.start = 50, apply.age.interval.length = 30,
                     apply.snp.profile = new_snp_prof,
                     return.refs.risk = TRUE)
names(res_snps_dat)
@
These results allow us to create a useful plot showing the distribution of risks in our reference dataset 
and to add the risks of the three women to see where they fall on the population distribution.
<<plot-1b, fig=TRUE>>=
plot(density(res_snps_dat$refs.risk),
           xlim = c(0.04,0.18), xlab = "Absolute Risk of Breast Cancer",
           main = "Referent SNP-only Risk Distribution: Ages 50-80 years")
abline(v = res_snps_dat$risk, col = "red")
legend("topright", legend = "New profiles", col = "red", lwd = 1)
@

\section*{Example 2: Breast cancer risk model with risk-factors and SNPs}

In this example, we will estimate the risk of breast cancer in ages 50-80 by fitting a model with classical risk factors and 72 SNPs, 
with three specific covariate profiles supplied for estimation (with some missing data). 
More details on risk factors are available in the manual.

<<ex-2>>=
res_covs_snps = computeAbsoluteRisk(model.formula = bc_model_formula,
                                model.cov.info = bc_model_cov_info,
                                model.snp.info = bc_72_snps,
                                model.log.RR = bc_model_log_or,
                                model.ref.dataset = ref_cov_dat,
                                model.disease.incidence.rates = bc_inc,
                                model.competing.incidence.rates = mort_inc,
                                model.bin.fh.name = "famhist",
                                apply.age.start = 50,
                                apply.age.interval.length = 30,
                                apply.cov.profile = new_cov_prof,
                                apply.snp.profile = new_snp_prof,
                                return.refs.risk = TRUE)
@
In addition to summarizing and plotting the risk estimates, iCARE includes an option to view more detailed output, by calling:
<<fit-2>>=
print(res_covs_snps$details)
@
\section*{Illustration of the validation component}
We want to validate a model for predicting absolute risk of disease based on a combined model of classical risk 
 factors and 72 SNPs using the nested case-control dataset. 

The first step is to compute sampling weights. We fit a logistic regression model of inclusion depending on the case/control status, 
 age of study entry and observed followup using the R function \textbf{glm}, as shown below:
<<sampling-weights>>=
validation.cohort.data$inclusion = 0
subjects_included = intersect(validation.cohort.data$id,
                            validation.nested.case.control.data$id)
validation.cohort.data$inclusion[subjects_included] = 1

validation.cohort.data$observed.followup =
                        validation.cohort.data$study.exit.age -
                            validation.cohort.data$study.entry.age

selection.model = glm(inclusion ~ observed.outcome
                       * (study.entry.age + observed.followup),
                              data = validation.cohort.data,
                              family = binomial(link = "logit"))
validation.nested.case.control.data$sampling.weights =
        selection.model$fitted.values[validation.cohort.data$inclusion == 1]
@
The next step is to call the \textbf{ModelValidation} function to implement the validation analysis.
<<model-validation>>=
data = validation.nested.case.control.data
risk.model = list(model.formula = bc_model_formula,
                   model.cov.info = bc_model_cov_info,
                   model.snp.info = bc_72_snps,
                   model.log.RR = bc_model_log_or,
                   model.ref.dataset = ref_cov_dat,
                   model.ref.dataset.weights = NULL,
                   model.disease.incidence.rates = bc_inc,
                   model.competing.incidence.rates = mort_inc,
                   model.bin.fh.name = "famhist",
                   apply.cov.profile = data[,all.vars(bc_model_formula)[-1]],
                   apply.snp.profile = data[,bc_72_snps$snp.name],
                   n.imp = 5, use.c.code = 1, return.lp = TRUE,
                   return.refs.risk = TRUE)

output = ModelValidation(study.data = data,
                         total.followup.validation = TRUE,
                         predicted.risk.interval = NULL,
                         iCARE.model.object = risk.model,
                         number.of.percentiles = 10)
@
\setkeys{Gin}{width=1.3\textwidth}
We can also produce a set of useful plots showing the results of the validation analysis.
<<plot-model-validation, fig=TRUE, height = 12, width = 9>>=
plotModelValidation(study.data = data,validation.results = output)
@
\section*{Session Information}
<<sessionInfo>>=
sessionInfo()
@ 

\end{document}