\documentclass[article,nojss]{jss} %% NOTA BENE: More definitions --> further down %%%%%%%%%%%% % \author{Martin M\"achler \\ ETH Zurich% \\ May 2011 -- July 2012 {\tiny (\LaTeX'ed \today)}%---- for now } \title{Numerically Stable Frank Copula Functions via Multiprecision: \proglang{R} Package \pkg{Rmpfr}} % \def\mythanks{a version of this paper, for \pkg{copula} 0.4\_4, has been published % in JSS, \url{http://www.jstatsoft.org/v39/i09}.} %% for pretty printing and a nice hypersummary also set: \Plainauthor{Martin M\"achler} %% comma-separated \Plaintitle{Numerically Stable Frank Copula Functions via Multiprecision: R Package 'Rmpfr'} \Shorttitle{Numerically Stable Frank via Multiprecision in R} % %\VignetteIndexEntry{Numerically stable Frank Copulas via Multiprecision (Rmpfr)} %\VignetteDepends{copula} %\VignetteDepends{Rmpfr} \SweaveOpts{engine=R,eps=FALSE,pdf=TRUE,width=7,height=5,strip.white=true,keep.source=TRUE} %% an abstract and keywords \Abstract{ The package \pkg{copula} (formerly \texttt{nacopula}) has provided functionality for Archimedean copulas, one of them the ``Frank copula''. Recently, explicit formulas for the density of those copulas have allowed for maximum likelihood estimation in high (e.g., $d=150)$) dimensions, (\cite{hofertmaechlermcneil2012a}). However for non-small dimensions, the evaluation of these densities is numerically challenging, and in some cases difficult. Here, we use high precision arithmetic from MPFR, via R package \pkg{Rmpfr} in order to get accurate values for the diagonal density of the Frank copula. Subsequently, judiciously analysing and circumventing the numerical infelicities of the ``traditional'' evaluation, we gain accurate values even with traditional (double precision) arithmetic. } \Keywords{Archimedean copulas, Frank Copula, Multiple Precision, R, MPFR, Rmpfr} %% at least one keyword must be supplied %% publication information %% NOTE: Typically, this can be left commented and will be filled out by the technical editor %% \Volume{13} %% \Issue{9} %% \Month{September} %% \Year{2004} %% \Submitdate{2004-09-29} %% \Acceptdate{2004-09-29} %% The address of (at least) one author should be given %% in the following format: \Address{ Martin M\"achler\\ Seminar f\"ur Statistik, HG G~16\\ ETH Zurich\\ 8092 Zurich, Switzerland\\ E-mail: \email{maechler@stat.math.ethz.ch}\\ URL: \url{http://stat.ethz.ch/people/maechler} } %% It is also possible to add a telephone and fax number %% before the e-mail in the following format: %% Telephone: +43/1/31336-5053 %% Fax: +43/1/31336-734 %% for those who use Sweave please include the following line (with % symbols): %% MM: this is "substituted" by jss.cls: %% need no \usepackage{Sweave.sty} \usepackage[american]{babel}%for American English \usepackage{amsmath}%sophisticated mathematical formulas with amstex (includes \text{}) \usepackage{mathtools}%fix amsmath deficiencies \usepackage{amssymb}%sophisticated mathematical symbols with amstex (includes \mathbb{}) % \usepackage{amsthm}%theorem environments \usepackage{bm}%for bold math symbols: \bm (= bold math) % \usepackage{enumitem}%for automatic numbering of new enumerate environments % This is already in jss above -- but withOUT the fontsize=\small part !! \DefineVerbatimEnvironment{Sinput}{Verbatim}{fontsize=\small,fontshape=sl} \DefineVerbatimEnvironment{Soutput}{Verbatim}{fontsize=\small} \DefineVerbatimEnvironment{Scode}{Verbatim}{fontsize=\small,fontshape=sl} %% makes space between Sinput and Soutput smaller: \fvset{listparameters={\setlength{\topsep}{0pt}}}% !! quite an effect! %% ??? FIXME but it also influences all other lists, itemize, ... ???? FIXME %% \setkeys{Gin}{width=\textwidth}% Sweave.sty has {width=0.8\textwidth} \newcommand*{\R}{\proglang{R}}%{\textsf{R}} %% Marius' commands \newcommand*{\eps}{\varepsilon} %NON-STANDARD{bbm}:\newcommand*{\I}{\mathbbm{1}} % \newcommand*{\I}{\mathbf{1}} % \newcommand*{\IN}{\mathbb{N}} % \newcommand*{\IK}{\mathbb{K}} % \newcommand*{\IR}{\mathbb{R}} % \newcommand*{\IC}{\mathbb{C}} % \newcommand*{\IP}{\Prob} % \newcommand*{\IE}{\E} % \newcommand*{\V}{\operatorname*{V}} %- \abs{ab} --> | ab | ``absolut Betrag'' \newcommand{\abs}[1] {\left| #1 \right|} \newcommand*{\Li}[1]{\mathrm{Li}_{#1}}% polylog / polylogarithm % \renewcommand*{\S}{\operatorname*{S}} % \newcommand*{\tS}{\operatorname*{\tilde{S}}} % \newcommand*{\ran}{\operatorname*{ran}} %\newcommand*{\sgn}{\operatorname*{sgn}} \DeclareMathOperator{\sign}{sign} \newcommand*{\psiD}{\psi^\prime} \newcommand*{\psii}{{\psi^{-1}}} \newcommand*{\psiis}[1]{{\psi_{#1}^{-1}}} % \renewcommand*{\L}{\mathcal{L}} % \newcommand*{\Li}{\mathcal{L}^{-1}} % \newcommand*{\LS}{\mathcal{LS}} % \newcommand*{\LSi}{\LS^{-1}} \newcommand{\tr}{\ensuremath{^\mathsf{T}}}% or ^{\intercal} \renewcommand*{\O}{\mathcal{O}} % \newcommand*{\Geo}{\operatorname*{Geo}} % \newcommand*{\Exp}{\operatorname*{Exp}} % \newcommand*{\Sibuya}{\operatorname*{Sibuya}} % \newcommand*{\Log}{\operatorname*{Log}} % \newcommand*{\U}{\operatorname*{U}} % \newcommand*{\B}{\operatorname*{B}} % \newcommand*{\NB}{\operatorname*{NB}} % \newcommand*{\N}{\operatorname*{N}} \DeclareMathOperator{\var}{var} \DeclareMathOperator{\Var}{Var} \DeclareMathOperator{\Cov}{Cov} \DeclareMathOperator{\cov}{cov} \DeclareMathOperator{\Cor}{Corr} \DeclareMathOperator{\cor}{corr} % \newcommand*{\Var}{\operatorname*{Var}} % \newcommand*{\Cov}{\operatorname*{Cov}} % \newcommand*{\Cor}{\operatorname*{Cor}} \hyphenation{Ar-chi-me-dean} %% journal specific aliases \newcommand*{\setcapwidth}[1]{} %% end of declarations %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{document} %% include your article here, just as usual %% Note that you should use the \pkg{}, \proglang{} and \code{} commands. % \section[About Java]{About \proglang{Java}} %% Note: If there is markup in \(sub)section, then it has to be escape as above. %% Note: These are explained in '?RweaveLatex' : <>= op.orig <- options(width = 75, SweaveHooks= list(fig=function() par(mar=c(5.1, 4.1, 1.1, 2.1))), useFancyQuotes = FALSE, ## for JSS, but otherwise MM does not like it: ## prompt="R> ", continue=" ")# 2 (or 3) blanks: use same length as 'prompt' copDDir <- system.file('doc', package='copula') Sys.setenv(LANGUAGE = "en") if(.Platform$OS.type != "windows") Sys.setlocale("LC_MESSAGES","C") @ %\section[Introduction]{Introduction \small~\footnote{\mythanks}} %\section{Introduction} \section{The diagonal density of Frank's copula} The ``diagonal density'' of a copula $C(\cdot)$ is the density of $\max(U_1,U_2,\dots,U_d)$, where \linebreak[4] $\bm U = (U_1,U_2,\dots,U_d)\tr \sim C$. The (cumulative) distribution function, by definition, \begin{equation} F^D(u) := P\left[ \max(U_1,U_2,\dots,U_d) \le u \right] = P\left[U_1\le u, U_2,\le u, \dots, U_d\le u\right] = C(u,u,\dots,u), \label{eq:diagF} \end{equation} evaluates the copula only on the diagonal and is therefore called \emph{``the diagonal of $C$''}. Its density $f^D(u) := \frac{d}{du}F^D(u)$, is therefore called the diagonal density of $C$. For Archimedean copulas, i.e., where \begin{align} C(\bm{u})=C(\bm{u};\psi)=\psi(\psii(u_1)+\dots+\psii(u_d)),\ \bm{u}\in[0,1]^d,\label{ac} \end{align} the diagonal density is \begin{eqnarray} f^D(u) &=& \frac{d}{du}F^D(u) = \frac{d}{du}\psi\left(\sum\nolimits_{j=1}^d \psii(u)\right) = \frac{d}{du}\psi(d\cdot \psii(u)) = \nonumber\\ &=& \psi^\prime\!\left(d\cdot \psii(u)\right) \cdot d \cdot \frac{d}{du} \psii(u)\nonumber\\ &=& d\cdot \psi^\prime\!\left(d\cdot \psii(u)\right) \cdot \left[\psii\right]^\prime(u). \label{eq:fD} \end{eqnarray} For this reason, the \pkg{copula} package's \code{dDiag()} function for computing the diagonal density $f^D(u)$ makes use of the following %% ==> lines 261 ff from ~/R/Pkgs/copula/R/estimation.R %% ================================ <>= copula:::dDiagA <>= writeLines(head(capture.output(print( <> )), -1)) @ where the three functions \begin{align} \mathtt{absdPsi(t,thet)} &= \bigl|\psi_\theta^\prime(t)\bigr|,\label{def:absdPsi}\\ \mathtt{iPsi(u,thet)} &= \psi_\theta^{-1}(u), \ \mathrm{and}\label{def:iPsi}\\ \mathtt{absdiPsi(u,thet)} &= \bigl|[\psi_\theta^{-1}]^\prime(u) \bigr| \label{def:absdiPsi} \end{align} are all provided by the slots of the corresponding Archimedean copula family. For the following explorations, we need a definition of \code{dDiagA} which is more flexible as it does not work with the copula family object but gets the three functions as arguments, <>= dDiagA <- function(u, th, d, iPsi, absdPsi, absdiPsi, log = FALSE) { stopifnot(is.finite(th), d > 0, is.function(iPsi), is.function(absdPsi), is.function(absdiPsi)) if(log) { log(d) + absdPsi(d*iPsi(u,th), th, log = TRUE) + absdiPsi(u, th, log = TRUE) } else { d* absdPsi(d*iPsi(u,th), th) * absdiPsi(u,th) } } @ Now, for the Frank copula (\cite{Hofert:Maechler:2010:JSSOBK:v39i09}), \begin{eqnarray} \psi_\theta(t) &=& - \frac{1}{\theta}\log\left(1 - (1 - e^{-\theta})e^{-t}\right), \label{eq:psiFrank} \ \ \ \theta > 0, \ \ \mathrm{\ \ hence,} \\ \psi_\theta^{-1}(u) &=& - \log\left(\frac{e^{-u\theta} - 1}{e^{-\theta} - 1}\right), \label{eq:iPsiFrank} \ \ \mathrm{\ \ and} \\ %% absdPsi(t,thet) (-1)^k {\psi_\theta}^{(k)}(t) = \bigl|{\psi_\theta}^{(k)}(t)\bigr| &=& \frac{1}{\theta} \Li{k-1}((1-e^{-\theta})e^{-t}), \label{eq:absdPsi} \ \ \ \ \ \ \mathrm{and} \\ %% absdiPsi.1(u,theta) \bigl|[\psi_\theta^{-1}]^\prime(u) \bigr| &=& \theta/(e^{u\cdot\theta} - 1), \label{eq:iPsiD1} \end{eqnarray} where $\Li{s}(z)$ is the \emph{polylogarithm of order} $s$ at $z$, defined as (analytic continuation of) $\sum_{k=1}^\infty \frac{z^k}{k^s}$. When $s = -n$, $n\in\mathbb{N}$, one has \begin{equation} \label{eq:Li-n} \Li{-n}(z) = \left(z \cdot \frac{\partial}{\partial z} \right)^n \frac{z}{1-z}, \end{equation} and we note that here, only the first derivative is needed, $ - {\psi_\theta}^\prime(t) = \left|{\psi_\theta}^\prime(t)\right|$, and hence only \begin{equation} \mathtt{polylog(z,\: s=0)} = \Li{0}(z) = z / (1 - z). \label{eq:polylog-0} \end{equation} First note that numerically, $e^{-a} - 1$ suffers from cancellation when $0 < a \ll 1 $, and the \R\ (and C) function \code{expm1(-a)} is advisably used instead of \code{exp(-a) - 1}. For this reason, in \pkg{copula}, I had replaced the original \code{iPsi.0(u, th)} for $\psi_\theta^{-1}(u)$ by \code{iPsi.1()}, making use of \code{expm1()}. These and the derivative (\ref{eq:iPsiD1}) originally were <>= iPsi.0 <- function(u,theta) -log( (exp(-theta*u)-1) / (exp(-theta)-1) ) iPsi.1 <- function(u,theta) -log(expm1(-u*theta) / expm1(-theta)) absdiPsi.1 <- function(u, theta, log = FALSE) if(log) log(theta)-log(expm1(u*theta)) else theta/expm1(u*theta) @ and the general $k$-th derivative (\ref{eq:absdPsi}), simplified, for $k= 1$ (\code{degree = 1}), <>= require("copula")# for polylog() absdPsi.1 <- function(t, theta, log=FALSE) { p <- -expm1(-theta) Li. <- polylog(log(p) - t, s = 0, log=log, method="negI-s-Eulerian", is.log.z=TRUE) if(log) Li. - log(theta) else Li. / theta } @ where we however now assume that the \code{polylog()} function for \code{s=0} would basically use the direct formula (\ref{eq:polylog-0}), such that we use <>= absdPsi.2 <- function(t, theta, log=FALSE) { w <- log(-expm1(-theta)) - t Li. <- if(log) w - log(-expm1(w)) else -exp(w)/expm1(w) if(log) Li. - log(theta) else Li. / theta } @ \section{Computing the ``diagonal MLE''} The most important use of the diagonal density is to compute the ``diagonal maximum likelihood estimator'', \code{dmle}, ${\hat\theta}^D$ which for a sample of observations $\bm u_1,\bm u_2,\ldots, \bm u_n$, ($\bm u_i \in [0,1]^d$) is defined as minimizer of the negative log-likelihood, \begin{align} {\hat\theta}^D &= \arg\min_\theta - l(\theta; \bm u_1,\ldots,\bm u_n),\ \ \mathrm{where}\\ l(\theta; \bm u_1,\ldots,\bm u_n) &= \sum_{i=1}^n \log f^D(\tilde u_i)\ \ \mathrm{and}\\ \tilde{u_i} &= \max_{j=1,\dots,d} u_{i,j}. \label{eq:dmle} \end{align} In our exploration of the \code{dmle} estimator, we found cases with numerical problems, at first already in evaluating the logarithm of the diagonal density $\log f^D = \log f^D_\theta(u)$\code{= dDiag(u, theta, *, log=TRUE)} for non-small $\theta$ and ``large'' $u$, i.e., $u \approx 1$: <>= curve(dDiagA(x, th = 38, d = 2, iPsi = iPsi.0, absdPsi=absdPsi.1, absdiPsi=absdiPsi.1, log = TRUE), ylab = "dDiagA(x, theta= 38, *, log=TRUE)", 0, 1, col = 4, n = 1000) ## and using the slightly better iPsi.1 does not help here: curve(dDiagA(x, th = 38, d = 2, iPsi = iPsi.1, absdPsi=absdPsi.2, absdiPsi=absdiPsi.1, log = TRUE), add = TRUE, col = 2, n=1000) legend("bottom", c("iPsi.0()","iPsi.1()"),col=c(4,2), lty=1, bty="n") @ However, it's not hard to see that indeed our initial computation of Frank's $\psiis{\theta}$, i.e, (\ref{eq:iPsiFrank}), $\psiis{\theta}(u) = -\log\left(\frac{1-e^{-u\theta}}{1-e^{-\theta}}\right)$, suffers from ``division cancellation'' for ``large'' $\theta$ ($\theta=38$ in ex.) when computed directly with \code{iPsi.0()}, see above, and that the improvement of using \code{expm1(-t)} instead of \code{exp(-t) - 1}, as used in \code{iPsi.1()}, see above, helps only for $t\approx 0$. However, we can rewrite $\psiis{\theta}$ as \begin{equation} \label{eq:psii:log:1} \psiis{\theta}(u) = -\log\left(1 -\frac{e^{-u\theta}-e^{-\theta}}{1-e^{-\theta}}\right), \end{equation} which when using \code{log1p(e)} for $\log(1+e)$ is much better numerically: <>= iPsi.2 <- function(u,theta) -log1p((exp(-u*theta)-exp(-theta)) / expm1(-theta)) curve(dDiagA(x, th = 38, d = 2, iPsi = iPsi.2, absdPsi=absdPsi.2, absdiPsi=absdiPsi.1, log = TRUE), ylab = "dDiagA(x, theta= 38, *, log=TRUE)", 0, 1, col = 4, n = 1000) ## previously curve(dDiagA(x, th = 38, d = 2, iPsi = iPsi.1, absdPsi=absdPsi.2, absdiPsi=absdiPsi.1, log = TRUE), add = TRUE, col = "darkgray", lwd=2, lty=3, n=1000) @ Unfortunately, this is not enough to get numerically stable evaluations of the negative log-likelihood $l()$: <>= d <- 5 (theta <- copFrank@iTau(tau = 0.75)) cop <- onacopulaL("Frank", list(theta, 1:d)) set.seed(1); for(l in 1:4) U <- rnacopula(n = 100, cop) U. <- sort(apply(U, 1, max)) # build the max <>= mlogL <- function(theta) -sum(dDiagA(U., theta, d=d, iPsi = iPsi.2, absdPsi=absdPsi.2, absdiPsi=absdiPsi.1, log = TRUE)) @ Now, plot this negative log likelihood function in an interval $\theta$, close to what is proposed \pkg{copula}'s \code{initOpt()} function, defining a utility function that we'll reuse later: <>= p.mlogL <- function(th, mlogL, col= "red2", lwd = 1, lty = 1, add= FALSE) { stopifnot(is.numeric(th), is.function(mlogL)) nll <- vapply(th, mlogL, 0.) if(add) lines(nll ~ th, col=col, lwd=lwd, lty=lty) else plot(nll ~ th, xlab=expression(theta), ylab = expression(- logLik(theta, .)), type = "l", col=col, lwd=lwd, lty=lty) invisible(nll) # return invisibly } thet <- seq(11, 99, by = 1/4) p.mlogL(thet, mlogL) require("Rmpfr")## compute the same with *high* accuracy ... ## using three different precisions: MPrecBits <- c(160, 128, 96) mkNm <- function(bits) sprintf("%03d.bits", bits) ## As it takes a while, cache the result: fnam <- sprintf("mlogL_mpfr_%s.rds", Sys.info()[["machine"]]) if(file.exists(fn <- file.path(copDDir, fnam))) { nllMP <- readRDS(fn) } else { print(system.time( nllMP <- lapply(MPrecBits, function(pBit) { nlM <- thM <- mpfr(thet, precBits = pBit) ## (vapply() does not work for "Rmpfr":) for(i in seq_along(thet)) nlM[i] <- mlogL(thM[i]) nlM }) )) ## ~ 18 sec [R 3.5.1 nb-mm4 core I7vPro]; was 91 sec [2011-06-14 nb-mm icore 5] names(nllMP) <- mkNm(MPrecBits) copSrcDDir <- if(Sys.getenv("USER") == "maechler") '~/R/Pkgs/copula/inst/doc' else "" if(file.exists(copSrcDDir))# <<- only for certain users; not on CRAN etc saveRDS(nllMP, file = file.path(copSrcDDir, fnam), version = 2) } colB <- c("blue3","violetred4","tan3") ltyB <- c(5:3) lwdB <- c(2,2,2) for(i in seq_along(nllMP)) { lines(thet, as.numeric(nllMP[[i]]), col=colB[i], lty = ltyB[i], lwd = lwdB[i]) } leg <- c("double prec.", sprintf("mpfr(*, precBits = %d)", MPrecBits)) legend("top", leg, col= c("red3",colB), lty=c(1, ltyB), lwd=c(1,lwdB), bty="n") @ So, clearly, high-precision computations can solve the numerical problems, if the precision is high enough. E.g., for $\theta = 100$, it needs more than 128 bits precision. Let us look at the phenomenon in more details now. The flesh in the \code{mlogL()} computation is (up to the constant $\log(d)$, $d=5$), only the sum of the two terms <>= op <- options(prompt = " ") <>= absdPsi(d*iPsi(u,th), th, log = TRUE) + absdiPsi(u, th, log = TRUE) @ currently, with the three functions <<3f, eval=false>>= iPsi = iPsi.2; absdPsi = absdPsi.2; absdiPsi = absdiPsi.1 <>= options(op) @ where we have already tried to ensure that the \code{iPsi()} function is ok, but now can confirm it, e.g., for $\theta = 50$. Further note, that using high-precision arithmetic, we can also ``partially afford'' to use the simplistic \code{iPsi.0()} function instead of the more stable \code{iPsi.2()} one: <>= stopifnot(all.equal(iPsi.2(U., 50 ), iPsi.2(U., mpfr(50, 96))), all.equal(iPsi.0(U., mpfr(50, 200)), pI.U <- iPsi.2(U., mpfr(50, 200)), tol=1e-50) ) @ However, we can observe dramatic differences in \code{absdPsi.2()} ($= |\psi^\prime(.)|$): <>= psD.n <- absdPsi.2(as.numeric(pI.U), 40) psD.M <- as.numeric(absdPsi.2(pI.U, mpfr(40, 200))) matplot(U., cbind(psD.n, psD.M), type="l", log="y") legend("top", c("double prec.", "mpfr(*, precBits = 200)"), col= 1:2, lty=1:2, bty="n") @ where we see a very large difference (note the \emph{$\log$} scale!) for $u \approx 1$, i.e., for very small \linebreak[3] \code{pI.U= iPsi.2(U., theta)}$ = \psi_\theta^{-1}(u)$. <>= u0 <- 2^-(100:1) psD.n <- absdPsi.2(u0, 40) psD.M <- as.numeric(absdPsi.2(u0, mpfr(40, 200))) matplot(u0, cbind(psD.n, psD.M), type="l", log="xy") legend("top", c("double prec.", "mpfr(*, precBits = 200)"), col= 1:2, lty=1:2, bty="n") @ Further investigation shows that the culprit is really the use of \code{log(-expm1(-theta))} inside \code{absdPsi.2()} which underflows for large theta, and hence should be replaced by the generally accurate \code{log1mexp()}, see \citet{maechler2012}. %% M\"achler, M. (2012) Accurately Computing $\log(1-\exp(-\lvert a\rvert))$, %% https://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf %% (MM: ~/R/Pkgs/Rmpfr/inst/doc/log1mexp-note.Rnw + ~/R/MM/NUMERICS/log1-exp.R) <>= log1mexp <- function(a) # accurately compute log(1-exp(-a)) { stopifnot(a >= 0) r <- a tst <- a <= log(2) r[ tst] <- log(-expm1(-a[ tst])) r[!tst] <- log1p(-exp(-a[!tst])) r } @ so that we rather compute $|\psi^\prime(.)|$ via <>= absdPsi.3 <- function(t, theta, log=FALSE) { w <- log1mexp(theta) - t Li. <- if(log) w - log1mexp(-w) else -exp(w)/expm1(w) if(log) Li. - log(theta) else Li. / theta } @ Does this already solve the ``diagonal likelihood'' problem? We investigate via the graphic <>= p.mlogL(th = seq(11, 99, by = 1/4), mlogL = (mlogL2 <- function(theta) -sum(dDiagA(U., theta, d=d, iPsi = iPsi.2, absdPsi = absdPsi.3, absdiPsi=absdiPsi.1, log = TRUE))), lwd = 2) @ Yes, indeed, using a numerically stable version for \code{absdPsi()} did solve the numerical problem of computing, the ``diagonal likelihood'', and hence the \code{dmle()} (``diagonal MLE''). Well, if we really insist, there are more problems, but probably not really practical: <>= thet <- 9:1000 nll <- p.mlogL(thet, mlogL = mlogL2, lwd=2) (th0 <- thet[i0 <- max(which(is.finite(nll)))]) abline(v = th0, col="red2", lty="15", lwd=2) @ where we see that for $\theta > \Sexpr{th0}$, \code{mlogL()} is not finite, e.g., <>= dDiagA(0.999, 715, d = d, iPsi = iPsi.2, absdPsi = absdPsi.3, absdiPsi = absdiPsi.1, log = TRUE) @ which after closer inspection is from the \code{absdiPsi(u, th, log = TRUE)} part of \code{dDiagA()} and that, see (\ref{def:absdiPsi}), uses \code{log(theta)-log(expm1(u*theta))}, where clearly already \code{expm1(u*theta)) = expm1(0.999 * 715)} numerically overflows to \code{Inf}: <>= absdiPsi.1(0.999, th = 715, log=TRUE) @ However, as $\log(\mathrm{expm1}(y)) = \log(e^y -1) = \log(e^y(1 - e^{-y})) = y + \log(1 - e^{-y})$, the ``numerical stable'' solution is to replace \code{log(expm1(y))} by \code{y + log1mexp(y)}, such that we will use <>= absdiPsi.2 <- function(u, theta, log = FALSE) if(log) log(theta)- {y <- u*theta; y + log1mexp(y)} else theta/expm1(u*theta) @ Unfortunately, this improves the situation for large $\theta$ only slightly: <>= plot(nll ~ thet, xlab=expression(theta), ylab = expression(- logLik(theta, .)), type = "l", col="red2", lwd=2) abline(v = th0, col="red2", lty="15", lwd=2) nll3 <- p.mlogL(thet, mlogL = function(theta) -sum(dDiagA(U., theta, d=d, iPsi= iPsi.2, absdPsi= absdPsi.3, absdiPsi = absdiPsi.2, log = TRUE)), col = "blue2", lwd=3, lty=2, add = TRUE) nll3[thet == 800] @ where we can see, that this time, the numerical overflow to $-\infty$ happens in \code{absdPsi(d*iPsi(u,th), th, log = TRUE)}, as \code{iPsi(u,th) = iPsi(0.999, th=800)}$=$\Sexpr{format(iPsi.2(u=0.999,th=800))} underflows to zero --- by necessity: the correct value is smaller than the smallest representable double precision number: <>= pI <- iPsi.2(u=0.999, th= mpfr(800, 200)) cat(sapply(list(pI, .Machine$double.xmin), format, digits = 7), "\n") @ The only solution here is to allow passing the \emph{logarithm} $\log(t)$ instead of $t$ to \code{absdPsi()}, i.e., we start computing \code{log(iPsi(.))} directly (evading the underflow to 0 there). \dots \dots to be continued. \section{Session Information} <>= toLatex(sessionInfo()) <>= options(op.orig) @ \section{Conclusion} \bibliography{nacopula}% Rmpfr \end{document}