\documentclass[12pt]{article}
%% vignette index specifications need to be *after* \documentclass{}
%\VignetteEngine{knitr::knitr}
%\VignetteIndexEntry{Model evaluation}
%\VignettePackage{glmmTMB}
%\VignetteDepends{ggplot2}
%\VignetteDepends{car}
%\VignetteDepends{emmeans}
%\VignetteDepends{effects}
%\VignetteDepends{multcomp}
%\VignetteDepends{MuMIn}
%\VignetteDepends{DHARMa}
%\VignetteDepends{broom}
%\VignetteDepends{broom.mixed}
%\VignetteDepends{dotwhisker}
%\VignetteDepends{texreg}
%\VignetteDepends{xtable}
%\VignetteDepends{huxtable}
%\usepackage{lineno}
\usepackage[utf8]{inputenc}
\usepackage{graphicx}
\usepackage[american]{babel}
%% for huxtable
\usepackage{array}
\usepackage{caption}
\usepackage{graphicx}
%% siunitx is needed for *some* huxtable functions,
%%  but messes up Solaris tests
%% \usepackage{siunitx}
\usepackage{colortbl}
\usepackage{multirow}
\usepackage{hhline}
\usepackage{calc}
\usepackage{tabularx}
\usepackage{threeparttable} % maybe not available elsewhere?
\usepackage{wrapfig}
\usepackage{adjustbox}
\newcommand{\R}{{\sf R}}
\newcommand{\fixme}[1]{\textbf{\color{red} fixme: #1}}
\newcommand{\notimpl}[1]{\emph{\color{magenta} #1}}
\usepackage{url}
\usepackage{hyperref}
\usepackage{fancyvrb}
\usepackage{natbib}
%% \code{} below is not safe with \section{} etc.
\newcommand{\tcode}[1]{{\tt #1}}
\VerbatimFootnotes
\bibliographystyle{chicago}
%% need this for output of citation() below ...
\newcommand{\bold}[1]{\textbf{#1}}
%% code formatting
%% https://tex.stackexchange.com/questions/273843/inline-verbatim-with-line-breaks-colored-font-and-highlighting/280212
% \usepackage{xcolor} %% see knit_hooks$set(...) below
\newcommand\code[1]{\mytokenshelp#1 \relax\relax}
\def\mytokenshelp#1 #2\relax{\allowbreak\grayspace\tokenscolor{#1}\ifx\relax#2\else
 \mytokenshelp#2\relax\fi}
%\newcommand\tokenscolor[1]{\colorbox{gray!20}{\textcolor{blue}{%
%  \ttfamily\mystrut\smash{\detokenize{#1}}}}}
\newcommand\tokenscolor[1]{\colorbox{gray!0}{\textcolor{black}{%
  \ttfamily\mystrut\smash{\detokenize{#1}}}}}

\def\mystrut{\rule[\dimexpr-\dp\strutbox+\fboxsep]{0pt}{%
 \dimexpr\normalbaselineskip-2\fboxsep}}
\def\grayspace{\hspace{0pt minus \fboxsep}}

\title{Post-model-fitting procedures with \tcode{glmmTMB} models: diagnostics, inference, and model output}
\date{\today}
\author{}
\begin{document}
\maketitle

%\linenumbers
%% FIXME: pipeline for re-running stored objects
%% FIXME: improve owls fit so DHARMa looks OK?
%% FIXME: fix broom.mixed:tidy method
%% FIXME: huxtable issues

<<setopts,echo=FALSE,message=FALSE>>=
library("knitr")
opts_chunk$set(fig.width=5,fig.height=5,
               error=FALSE,
               out.width="0.8\\textwidth",echo=TRUE)
## https://tex.stackexchange.com/questions/148188/knitr-xcolor-incompatible-color-definition/254482
knit_hooks$set(document = function(x) {sub('\\usepackage[]{color}', '\\usepackage{xcolor}', x, fixed = TRUE)})
Rver <- paste(R.version$major,R.version$minor,sep=".")
used.pkgs <- c("glmmTMB","bbmle")  ## packages to report below
@ 

<<solaris_check, echo=FALSE>>=
## https://stackoverflow.com/questions/23840523/check-if-os-is-solaris
is.solaris <- function() {
  grepl('SunOS', Sys.info()['sysname'])
}
is.windows <- function() {
  .Platform$OS.type == "windows"
}
is.cran <- function() {
  !identical(Sys.getenv("NOT_CRAN"), "true")
}
huxtable_OK <- (!is.solaris()) && !(is.windows() && is.cran())
@

The purpose of this vignette is to describe (and test) the
functions in various downstream packages that are available for summarizing
and otherwise interpreting glmmTMB fits.
Some of the packages/functions discussed below may
not be suitable for inference on parameters of
the zero-inflation or dispersion
models, but will be restricted to the conditional-mean model.

<<packages,message=FALSE>>=
library(glmmTMB)
library(car)
library(emmeans)
library(effects)
library(multcomp)
library(MuMIn)
require(DHARMa, quietly = TRUE) ## may be missing ...
library(broom)
library(broom.mixed)
require(dotwhisker, quietly = TRUE)
library(ggplot2); theme_set(theme_bw())
library(texreg)
library(xtable)
if (huxtable_OK) library(huxtable)
## retrieve slow stuff
L <- gt_load("vignette_data/model_evaluation.rda")
@

A couple of example models:

% don't need to evaluate this since we have loaded owls_nb1 from model_evaluation.rda
<<examples,eval=TRUE>>=
owls_nb1 <- glmmTMB(SiblingNegotiation ~ FoodTreatment*SexParent +
                        (1|Nest)+offset(log(BroodSize)),
                    contrasts=list(FoodTreatment="contr.sum",
                                   SexParent="contr.sum"),
                    family = nbinom1,
                    zi = ~1, data=Owls)
@

<<fit_model3,cache=TRUE>>=
data("cbpp",package="lme4")
cbpp_b1 <- glmmTMB(incidence/size~period+(1|herd),
                   weights=size,family=binomial,
                   data=cbpp)
## simulated three-term Beta example
set.seed(1001)
dd <- data.frame(z=rbeta(1000,shape1=2,shape2=3),
                 a=rnorm(1000),b=rnorm(1000),c=rnorm(1000))
simex_b1 <- glmmTMB(z~a*b*c,family=beta_family,data=dd)
@

\section{model checking and diagnostics}

\subsection{\tcode{DHARMa}}

The \code{DHARMa} package provides diagnostics for hierarchical models. After running

% set to eval=FALSE since we have this stored in model_evaluation.rda
<<dharma_sim,eval=FALSE,message=FALSE>>=
owls_nb1_simres <- simulateResiduals(owls_nb1)
@

you can plot the results:

<<fake_dharma_plotfig, eval=FALSE>>=
plot(owls_nb1_simres)
@

<<dharma_plotfig,fig.width=8,fig.height=4, echo=FALSE>>=
if (require(DHARMa, quietly = TRUE)) plot(owls_nb1_simres)
@

\code{DHARMa} provides lots of other methods based on the simulated residuals:
see \tcode{vignette("DHARMa", package="DHARMa")}

\subsubsection{issues}

\begin{itemize}
\item \code{DHARMa} will only work for models using families for which a simulate method has been implemented (in \code{TMB}, and appropriately reflected in \code{glmmTMB})
\end{itemize}

\section{Inference}

\subsection{\tcode{car::Anova}}

We can use \code{car::Anova()} to get traditional ANOVA-style tables from \code{glmmTMB} fits. A few limitations/reminders:

\begin{itemize}
\item these tables use Wald $\chi^2$ statistics for comparisons (neither likelihood ratio tests nor $F$ tests)
\item they apply to the fixed effects of the conditional component of the model only (other components \emph{might} work, but haven't been tested at all)
\item as always, if you want to do type 3 tests, you should probably set sum-to-zero contrasts on factors and center numerical covariates (see \code{contrasts} argument above)
\end{itemize}

<<caranova1>>=
if (requireNamespace("car") && getRversion() >= "3.6.0") {
    Anova(owls_nb1)  ## default type II
    Anova(owls_nb1,type="III")
}
@


\subsection{effects}

<<effects1,fig.width=8,fig.height=4>>=
effects_ok <- (requireNamespace("effects") && getRversion() >= "3.6.0")
if (effects_ok) {
    (ae <- allEffects(owls_nb1))
    plot(ae)
}
@

(the error can probably be ignored)

<<effects2, fig.width=12,fig.height=12>>=
if (effects_ok) {
  plot(allEffects(simex_b1))
}
@


\subsection{\tcode{emmeans}}

<<emmeans1>>=
emmeans(owls_nb1, poly ~ FoodTreatment | SexParent)
@
Let us also consider a corresponding hurdle model:
<<hurdle>>=
owls_hnb1 <- update(owls_nb1, family = truncated_nbinom1, ziformula = ~.)
@
On the response scale, this model estimates the means of the component distribution 
as follows:
<<emmeans2>>=
emmeans(owls_hnb1, ~ FoodTreatment * SexParent, component = "cond", type = "response")
# --- or ---
emmeans(owls_hnb1, ~ FoodTreatment * SexParent, component = "cmean")
@
These estimates differ because the first ones are back-transformed from the 
linear predictor, which is based on the \emph{un-truncated} component distribution,
while the second ones are estimates of the means of the \emph{truncated}
distribution (with zero omitted). This discrepancy occurs only with
hurdle models.

The response means combine both the conditional and the zero-inflation model:
<<emmeans3>>=
emmeans(owls_hnb1, ~ FoodTreatment * SexParent, component = "response")
@

\subsection{\tcode{drop1}}

\code{stats::drop1} is a built-in R function that refits the model with various terms dropped. In its default mode it respects marginality (i.e., it will only drop the top-level interactions, not the main effects):

<<drop1_eval,cache=TRUE>>=
system.time(owls_nb1_d1 <- drop1(owls_nb1,test="Chisq"))
@
<<print_drop1>>=
print(owls_nb1_d1)
@

In principle, using \code{scope = . ~ . - (1|Nest)} should work to execute a ``type-3-like'' series of tests, dropping the main effects one at a time while leaving the interaction in (we have to use \code{- (1|Nest)} to exclude the random effects because \code{drop1} can't handle them). However, due to the way that R handles formulas, dropping main effects from an interaction of *factors* has no effect on the overall model. (It would work if we were testing the interaction of continuous variables.)

\subsubsection{issues}

The \code{mixed} package implements a true ``type-3-like'' parameter-dropping mechanism for \code{[g]lmer} models. Something like that could in principle be applied here.

\subsection{Model selection and averaging with \tcode{MuMIn}}

We can run \code{MuMIn::dredge(owls_nb1)} on the model to fit all possible submodels.
Since this takes a little while (45 seconds or so), we've instead loaded some previously computed results:

% stored in vignette_data/model_evaluation.rda ...
<<dredge1>>=
print(owls_nb1_dredge)
@

<<plot_dredge1,fig.width=8,fig.height=8>>=
op <- par(mar=c(2,5,14,3))
plot(owls_nb1_dredge)
par(op) ## restore graphics parameters
@

Model averaging:

<<mumin_MA>>=
model.avg(owls_nb1_dredge)
@

\subsubsection{issues}

\begin{itemize}
\item may not work for Beta models because the \code{family} component (``beta") is not identical to the name of the family function (\code{beta_family()})? (Kamil Bartoń, pers. comm.)
\end{itemize}


\subsection{\tcode{multcomp} for multiple comparisons and \emph{post hoc} tests}

<<glht_use>>=
g1 <- glht(cbpp_b1, linfct = mcp(period = "Tukey"))
summary(g1)
@

\section{Extracting coefficients, coefficient plots and tables}

\subsection{\tcode{broom} and friends}

The \code{broom} and \code{broom.mixed} packages are designed to extract information from a broad range of models in a convenient (tidy) format; the dotwhisker package builds on this platform to draw elegant coefficient plots.

<<broom_mixed,fig.height=3,fig.width=5>>=
if (requireNamespace("broom.mixed") && requireNamespace("dotwhisker")) {
  t1 <- broom.mixed::tidy(owls_nb1, conf.int = TRUE)
  t1 <- transform(t1,
                  term=sprintf("%s.%s", component, term))

  if (packageVersion("dotwhisker")>"0.4.1") {
    dw <- dwplot(t1)
  } else {
    owls_nb1$coefficients <- TRUE  ## hack!
    dw <- dwplot(owls_nb1,by_2sd=FALSE)
  }
  print(dw+geom_vline(xintercept=0,lty=2))
}
@


\subsubsection{issues}

(these are more general \code{dwplot} issues)


\begin{itemize}
\item use black rather than color(1) when there's only a single model, i.e. only add aes(colour=model) conditionally?
- draw points even if std err / confint are NA (draw \code{geom_point()} as well as \code{geom_pointrange()}? need to apply all aesthetics, dodging, etc. to both ...)
\item for glmmTMB models, allow labeling by component? or should this be done by manipulating the tidied frame first? (i.e.: \verb+tidy(.) \%>\% tidyr::unite(term,c(component,term))+)
\end{itemize}

\subsection{coefficient tables with \tcode{xtable}}

The \code{xtable} package can output data frames as \LaTeX\ tables;
this isn't quite as elegant as \code{stargazer} etc., but is not a bad
start. I've sprinkled lots of hard line-breaks, spaces, and newlines in below: someone who was better at \TeX\ could certainly do a better job. (\code{xtable} can also produce HTML output.)

<<xtable_prep>>=
ss <- summary(owls_nb1)
## print table; add space, 
pxt <- function(x,title) {
  cat(sprintf("{\n\n\\textbf{%s}\n\\ \\\\\\vspace{2pt}\\ \\\\\n",title))
  print(xtable(x), floating=FALSE); cat("\n\n")
  cat("\\ \\\\\\vspace{5pt}\\ \\\\\n")
}

<<xtable_sum,eval=FALSE>>=
pxt(lme4::formatVC(ss$varcor$cond),"random effects variances")
pxt(coef(ss)$cond,"conditional fixed effects")
pxt(coef(ss)$zi,"conditional zero-inflation effects")
@

<<xtable_sum_real,results="asis",echo=FALSE>>=
if (requireNamespace("xtable")) {
  pxt(lme4::formatVC(ss$varcor$cond),"random effects variances")
  pxt(coef(ss)$cond,"conditional fixed effects")
  pxt(coef(ss)$zi,"conditional zero-inflation effects")
}
@

\subsection{coefficient tables with \tcode{texreg}}

For now, to avoid needing to import the \tcode{texreg} package,
we are providing the required \tcode{extract.glmmTMB} in a separate
R file that you can import with \tcode{source()}, as follows:

<<texreg1,results="asis">>=
source(system.file("other_methods","extract.R",package="glmmTMB"))
texreg(owls_nb1,caption="Owls model", label="tab:owls")
@

See output in Table~\ref{tab:owls}.

\subsection{coefficient tables with \tcode{huxtable}}

The \code{huxtable} package allows output in either \LaTeX\ or HTML: this example is tuned for \LaTeX.


<<huxtable,results="asis">>=
if (!huxtable_OK) {
  cat("Sorry, huxtable+LaTeX is unreliable on this platform; skipping\n")
} else {
  cc <- c("intercept (mean)"="(Intercept)",
          "food treatment (starvation)"="FoodTreatment1",
          "parental sex (M)"="SexParent1",
          "food $\\times$ sex"="FoodTreatment1:SexParent1")
  h0 <- huxreg(" " = owls_nb1, # give model blank name so we don't get '(1)'
               tidy_args = list(effects="fixed"),
               coefs = cc,
               error_pos = "right",
               statistics = "nobs" # don't include logLik and AIC
               )
  names(h0)[2:3] <- c("estimate", "std. err.")
  ## allow use of math notation in name
  h1 <- set_cell_properties(h0,row=5,col=1,escape_contents=FALSE)
  cat(to_latex(h1,tabular_only=TRUE))
}
@    


\subsubsection{issues}

\begin{itemize}
  \item \code{huxtable} needs quite a few additional \LaTeX\ packages: use \code{report_latex_dependencies()} to see what they are.
\end{itemize}

\section{influence measures}

\emph{Influence measures} quantify the effects of particular observations, or groups of observations, on the results of a statistical model; \emph{leverage} and \emph{Cook's distance} are the two most common formats for influence measures. If a \href{https://en.wikipedia.org/wiki/Projection_matrix}{projection matrix} (or ``hat matrix'') is available, influence measures can be computed efficiently; otherwise, the same quantities can be estimated by brute-force methods, refitting the model with each group or observation successively left out.

We've adapted the \tcode{car::influence.merMod} function to handle \tcode{glmmTMB} models; because it uses brute force, it can be slow, especially if evaluating the influence of individual observations.  For now, it is included as a separate source file rather than exported as a method (see below), although it may be included in the package (or incorporated in the \tcode{car} package) in the future.

<<load_infl>>=
source(system.file("other_methods","influence_mixed.R", package="glmmTMB"))
@ 


<<infl, eval=FALSE>>=
owls_nb1_influence_time <- system.time(
  owls_nb1_influence <- influence_mixed(owls_nb1, groups="Nest")
)
@

Re-fitting the model with each of the \Sexpr{length(unique(Owls$Nest))} nests excluded takes \Sexpr{round(owls_nb1_influence_time[["elapsed"]])} seconds (on an old Macbook Pro). The \tcode{car::infIndexPlot()} function is one way of displaying the results:

<<plot_infl>>=
car::infIndexPlot(owls_nb1_influence)
@ 

Or, you can transform the results and plot them however you like:

<<plot_infl2,fig.width=10,fig.height=6,out.width="\\textwidth">>=
inf <- as.data.frame(owls_nb1_influence[["fixed.effects[-Nest]"]])
inf <- transform(inf,
                 nest=rownames(inf),
                 cooks=cooks.distance(owls_nb1_influence))
inf$ord <- rank(inf$cooks)
if (require(reshape2)) {
  inf_long <- melt(inf, id.vars=c("ord","nest"))
  gg_infl <- (ggplot(inf_long,aes(ord,value))
    + geom_point()
    + facet_wrap(~variable, scale="free_y")
    ## n.b. may need expand_scale() in older ggplot versions ?
    + scale_x_reverse(expand=expansion(mult=0.15))
    + scale_y_continuous(expand=expansion(mult=0.15))
    + geom_text(data=subset(inf_long,ord>24),
                aes(label=nest),vjust=-1.05)
  )
  print(gg_infl)
}
@ 

\section{Robust sandwich standard errors}

Sandwich estimators provide robust standard errors for a wide range of models. We have added the appropriate methods for \code{glmmTMB} models, following the \code{sandwich} package's functions. 
The \code{vcovHC()} function computes the sandwich estimator for the variance-covariance matrix of the model coefficients:

<<sandwich1>>=
vcovHC(owls_nb1)
@

This robust covariance matrix estimate can be used when calculating the standard errors and corresponding p-values for the model coefficients, too:

<<sandwich3>>=
summary(owls_nb1, sandwich = TRUE)
@

Finally, also for the least-square means we can use the robust covariance matrix estimate as follows:

<<sandwich_emmeans>>=
emmeans(owls_nb1, ~ FoodTreatment * SexParent, vcov = vcovHC(owls_nb1))
@

A few notes here regarding the definition and scope of the sandwich estimator: The sandwich estimator implemented here is defined as

\[
\hat{S} = \hat{B}^{-1} \hat{M} \hat{B}^{-1}
\]

where $\hat{B}$ is the ``Bread" matrix (the inverse Hessian), and $\hat{M}$ is the ``Meat" matrix, which is defined as

\[
\hat{M} = \sum_{k=1}^{K} \hat{u}_k \hat{u}_k^T
\]

where $\hat{u}_k$ is the score vector with regards to the log-likelihood for the $k$-th cluster (or subject) in the data, $k=1,\dots,K$. The clustering is defined conveniently by the \code{getGroups} function, which returns the random effect grouping:

<<sandwich_groups>>=
head(getGroups(owls_nb1))
@

and may be modified (if really needed) by the user via the \code{cluster} argument in \code{vcovHC} etc.

Note that the parameter vector includes here both the fixed effects as well as the variance parameters, so always first calculate the "full" sandwich estimator. By default then the variance parameter part is excluded from the output, but can be included by setting \code{full = TRUE}:

<<sandwich_full>>=
vcovHC(owls_nb1, full = TRUE)
@

Therefore, we need to have the fixed effects included in the parameter vector. Hence the sandwich estimator only works for models fit with maximum likelihood (ML), but not for models fit with restricted maximum likelihood (REML):

<<sandwich_reml_fails>>=
try(vcovHC(update(owls_nb1, REML = TRUE)))
@

In addition, the code checks whether the sum of the cluster-wise log-likelihood equals the overall log-likelihood, and a warning is issued otherwise: 

<<sandwich_warning>>=
tryCatch(
  vcovHC(owls_nb1, cluster = Owls$SexParent),
  warning = function(w) print(w)
)
@

This is in particular the case if non-nested random effects are present in the model. The sandwich estimator should only be used with a single random effect term, or with nested random effects.

\section{to do}

\begin{itemize}
\item more plotting methods (\code{sjplot})
\item output with \code{memisc}
\item AUC etc. with \code{ModelMetrics}
\end{itemize}

<<save_out,echo=FALSE>>=
## store time-consuming stuff
save("owls_nb1",
     "owls_nb1_simres",
     "owls_nb1_dredge",
     "owls_nb1_influence",
     "owls_nb1_influence_time",
     file="../inst/vignette_data/model_evaluation.rda",
     version=2 ## for compatibility with R < 3.6.0
     )
@
\end{document}