Introduction to the multiocc package

library(interp)
library(MCMCpack)
#> Loading required package: coda
#> Loading required package: MASS
#> 
#> Attaching package: 'MASS'
#> The following object is masked from 'package:interp':
#> 
#>     area
#> ##
#> ## Markov Chain Monte Carlo Package (MCMCpack)
#> ## Copyright (C) 2003-2023 Andrew D. Martin, Kevin M. Quinn, and Jong Hee Park
#> ##
#> ## Support provided by the U.S. National Science Foundation
#> ## (Grants SES-0350646 and SES-0350613)
#> ##
library(tmvtnorm)
#> Loading required package: mvtnorm
#> Loading required package: Matrix
#> Loading required package: stats4
#> Loading required package: gmm
#> Loading required package: sandwich
library(truncnorm)
library(multiocc)
library(MASS)
library(corrplot)
#> corrplot 0.92 loaded
library(fields)
#> Loading required package: spam
#> Spam version 2.9-1 (2022-08-07) is loaded.
#> Type 'help( Spam)' or 'demo( spam)' for a short introduction 
#> and overview of this package.
#> Help for individual functions is also obtained by adding the
#> suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
#> 
#> Attaching package: 'spam'
#> The following object is masked from 'package:stats4':
#> 
#>     mle
#> The following object is masked from 'package:Matrix':
#> 
#>     det
#> The following objects are masked from 'package:mvtnorm':
#> 
#>     rmvnorm, rmvt
#> The following objects are masked from 'package:base':
#> 
#>     backsolve, forwardsolve
#> Loading required package: viridis
#> Loading required package: viridisLite
#> 
#> Try help(fields) to get started.
data(detection)
data(occupancy)
data(coords)
DataNames <- list("species"=colnames(detection)[4:9],
             "detection"=c("duration"),"occupancy"=c("forest","elev"))
model.input <- multioccbuild(detection, occupancy, coords, DataNames, threshold = 15000)
#> Warning: Rows in detection with missing covariates have been removed for purposes of fitting the model, but the site/season combination is retained in occupancy and therefore predictions will be outputted.

Perform some exploratory data analysis

par(mfrow=c(1,3))
hist(occupancy$forest, main="", xlab="Forest")
hist(occupancy$elev, main="", xlab="Elevation")
hist(detection$duration, main="", xlab="Duration")


par(mfrow=c(3,2), mar=c(3,3,3,1))
quilt.plot(coords[,2:3], occupancy$forest[1:267], main="Forest Cover", zlim=c(-1.5,3))
fit <- Tps(coords[,2:3], occupancy$forest[1:267])
out <- predictSurface(fit, df=100)
image.plot(out, main="Forest Cover (interpolated)", zlim=c(-1.5,2))

quilt.plot(coords[,2:3], occupancy$elev[1:267], main="Elevation", zlim=c(-1.5,3.5))
fit <- Tps(coords[,2:3], occupancy$elev[1:267])
out <- predictSurface(fit, df=100)
image.plot(out, main="Elevation (interpolated)", zlim=c(-1.5,2))

quilt.plot(coords[,2:3], detection$duration[1:267], main="Duration", zlim=c(-2.5,3))
fit <- Tps(coords[,2:3], detection$duration[1:267])
out <- predictSurface(fit, df=100)
image.plot(out, main="Duration (Survey 1)", zlim=c(-2.5,2.5))

A short run for demonstration purposes

## Shorter run for demonstration purposes.
## library(tmvtnorm)
mcmc.out <- GibbsSampler(M.iter=10, M.burn=1, M.thin=1, model.input, q=10, sv=FALSE)
#> 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |======================================================================| 100%

A longer run (not executed) for scientific results

mcmc.out <- GibbsSampler(M.iter=50000, M.burn=20000, M.thin=1, model.input, q=10, sv=FALSE)

Can summarize output

summary(mcmc.out$samples$alpha)
#> 
#> Iterations = 1:9
#> Thinning interval = 1 
#> Number of chains = 1 
#> Sample size per chain = 9 
#> 
#> 1. Empirical mean and standard deviation for each variable,
#>    plus standard error of the mean:
#> 
#>                         Mean      SD Naive SE Time-series SE
#> Great.tit Int       0.698707 0.09326 0.031088       0.069273
#> Great.tit forest   -0.089256 0.02151 0.007170       0.013382
#> Great.tit elev     -0.192767 0.04426 0.014753       0.029396
#> Blue.tit Int        0.439971 0.08167 0.027222       0.051716
#> Blue.tit forest    -0.090960 0.01842 0.006140       0.006140
#> Blue.tit elev      -0.188308 0.03656 0.012188       0.022614
#> Coal.tit Int        0.853628 0.12596 0.041988       0.099619
#> Coal.tit forest     0.044023 0.03265 0.010882       0.010882
#> Coal.tit elev      -0.102762 0.06223 0.020744       0.051960
#> Crested.tit Int     0.582261 0.11157 0.037189       0.073593
#> Crested.tit forest  0.001666 0.02451 0.008169       0.008169
#> Crested.tit elev   -0.100175 0.04213 0.014043       0.024750
#> Marsh.tit Int       0.344407 0.10376 0.034586       0.080931
#> Marsh.tit forest   -0.067825 0.02550 0.008501       0.017780
#> Marsh.tit elev     -0.172450 0.02585 0.008615       0.008615
#> Willow.tit Int      0.099657 0.04912 0.016372       0.037068
#> Willow.tit forest   0.088219 0.02913 0.009710       0.008112
#> Willow.tit elev     0.009839 0.04008 0.013361       0.012051
#> 
#> 2. Quantiles for each variable:
#> 
#>                         2.5%      25%       50%      75%    97.5%
#> Great.tit Int       0.550420  0.65055  0.722540  0.77391  0.80607
#> Great.tit forest   -0.121357 -0.10107 -0.094432 -0.07166 -0.05957
#> Great.tit elev     -0.241750 -0.22941 -0.211867 -0.14620 -0.12877
#> Blue.tit Int        0.283328  0.41013  0.466847  0.50294  0.50622
#> Blue.tit forest    -0.116787 -0.10813 -0.087694 -0.07958 -0.06541
#> Blue.tit elev      -0.228103 -0.20680 -0.204454 -0.17534 -0.12272
#> Coal.tit Int        0.646843  0.76095  0.910108  0.95214  0.96928
#> Coal.tit forest    -0.006413  0.02529  0.047015  0.05359  0.09255
#> Coal.tit elev      -0.184399 -0.17130 -0.077050 -0.04792 -0.03211
#> Crested.tit Int     0.371065  0.56690  0.627426  0.64852  0.68221
#> Crested.tit forest -0.028672 -0.02192  0.002133  0.01831  0.03800
#> Crested.tit elev   -0.165211 -0.10889 -0.100382 -0.08258 -0.03330
#> Marsh.tit Int       0.175768  0.24558  0.394543  0.42080  0.43173
#> Marsh.tit forest   -0.111344 -0.07860 -0.063362 -0.05343 -0.03398
#> Marsh.tit elev     -0.207261 -0.18494 -0.170340 -0.16749 -0.12584
#> Willow.tit Int      0.033253  0.06739  0.095719  0.13542  0.17105
#> Willow.tit forest   0.044381  0.06722  0.096488  0.10410  0.12945
#> Willow.tit elev    -0.031720 -0.01973 -0.009290  0.05071  0.07027
summary(mcmc.out$samples$rho)
#> 
#> Iterations = 1:9
#> Thinning interval = 1 
#> Number of chains = 1 
#> Sample size per chain = 9 
#> 
#> 1. Empirical mean and standard deviation for each variable,
#>    plus standard error of the mean:
#> 
#>                   Mean      SD Naive SE Time-series SE
#> Great.tit rho   0.7674 0.11043  0.03681        0.03681
#> Blue.tit rho    0.8295 0.08158  0.02719        0.02719
#> Coal.tit rho    0.8358 0.06809  0.02270        0.03943
#> Crested.tit rho 0.8931 0.15861  0.05287        0.05287
#> Marsh.tit rho   0.8240 0.06263  0.02088        0.02088
#> Willow.tit rho  0.9390 0.06069  0.02023        0.04083
#> 
#> 2. Quantiles for each variable:
#> 
#>                   2.5%    25%    50%    75%  97.5%
#> Great.tit rho   0.5628 0.7286 0.7775 0.8437 0.8690
#> Blue.tit rho    0.7195 0.7491 0.8334 0.8866 0.9272
#> Coal.tit rho    0.7329 0.8044 0.8400 0.8548 0.9476
#> Crested.tit rho 0.5651 0.8656 0.9603 0.9781 0.9962
#> Marsh.tit rho   0.7461 0.7852 0.8295 0.8545 0.9266
#> Willow.tit rho  0.8326 0.9211 0.9650 0.9790 0.9964

Visualize correlation matrix

par(mfrow=c(1,1), mar=c(3,3,3,1))
sigout <- mcmc.out$samples$sig
Sig <- matrix(colMeans(sigout),6,6)
SpeciesCor <- cov2cor(Sig)
rownames(SpeciesCor) <- DataNames$species
colnames(SpeciesCor) <- DataNames$species
corrplot::corrplot(SpeciesCor)

Make predictions from fitted model

y.agg1 <-  aggregate(model.input$y[,1], by=list(model.input$detection.info$siteID, 
                                              model.input$detection.info$season), FUN=sum, na.rm=TRUE)
y.plot1 <- 1*(y.agg1$x>0)

y.agg2 <- aggregate(model.input$y[,2], by=list(model.input$detection.info$siteID, 
                                              model.input$detection.info$season), FUN=sum, na.rm=TRUE)
y.plot2 <- 1*(y.agg2$x>0)

y.agg3 <- aggregate(model.input$y[,3], by=list(model.input$detection.info$siteID, 
                                              model.input$detection.info$season), FUN=sum, na.rm=TRUE)
y.plot3 <- 1*(y.agg3$x>0)

y.agg4 <- aggregate(model.input$y[,4], by=list(model.input$detection.info$siteID, 
                                              model.input$detection.info$season), FUN=sum, na.rm=TRUE)
y.plot4 <- 1*(y.agg4$x>0)

y.agg5 <- aggregate(model.input$y[,5], by=list(model.input$detection.info$siteID, 
                                              model.input$detection.info$season), FUN=sum, na.rm=TRUE)
y.plot5 <- 1*(y.agg5$x>0)

y.agg6 <- aggregate(model.input$y[,6], by=list(model.input$detection.info$siteID, 
                                              model.input$detection.info$season), FUN=sum, na.rm=TRUE)
y.plot6 <- 1*(y.agg6$x>0)

for (yr in c(1,4,7,10)){
  print(yr)

  range <- which(model.input$occupancy.info$season == yr)

  psiout <- mcmc.out$samples$psi
  #pout <- mcmc.out$p
  dim(psiout)

  psi1 <- apply(psiout[,0*2670+range],2,mean)
  psi2 <- apply(psiout[,1*2670+range],2,mean)
  psi3 <- apply(psiout[,2*2670+range],2,mean)
  psi4 <- apply(psiout[,3*2670+range],2,mean)
  psi5 <- apply(psiout[,4*2670+range],2,mean)
  psi6 <- apply(psiout[,5*2670+range],2,mean)

  par(mfrow=c(3,2), mar=c(1,3,3,1))
  fit <- Tps(coords[1:267,2:3], psi1)
  out <- predictSurface(fit, df=100)
  image.plot(out, main="Great Tit", zlim=c(-0.01,1.01))
  mtext(paste("Year",yr), side=3, line=-2, outer=TRUE)

  y.plot1.in <- y.plot1[which(model.input$occupancy.info$season ==yr)]
  points(coords[which(y.plot1.in==1),2:3])

  fit <- Tps(coords[1:267,2:3], psi2)
  out <- predictSurface(fit, df=100)
  image.plot(out, main="Blue Tit", zlim=c(-0.01,1.01))

  y.plot2.in <- y.plot2[which(model.input$occupancy.info$season ==yr)]
  points(coords[which(y.plot2.in==1),2:3])

  fit <- Tps(coords[1:267,2:3], psi3)
  out <- predictSurface(fit, df=100)
  image.plot(out, main="Coal Tit", zlim=c(-0.01,1.01))

  y.plot3.in <- y.plot3[which(model.input$occupancy.info$season ==yr)]
  points(coords[which(y.plot3.in==1),2:3])

  fit <- Tps(coords[1:267,2:3], psi4)
  out <- predictSurface(fit, df=100)
  image.plot(out, main="Crested Tit", zlim=c(-0.01,1.01))

  y.plot4.in <- y.plot4[which(model.input$occupancy.info$season ==yr)]
  points(coords[which(y.plot4.in==1),2:3])

  fit <- Tps(coords[1:267,2:3], psi5)
  out <- predictSurface(fit, df=100)
  image.plot(out, main="Marsh Tit", zlim=c(-0.01,1.01))

  y.plot5.in <- y.plot5[which(model.input$occupancy.info$season ==yr)]
  points(coords[which(y.plot5.in==1),2:3])

  fit <- Tps(coords[1:267,2:3], psi6)
  out <- predictSurface(fit, df=100)
  image.plot(out, main="Willow Tit", zlim=c(-0.01,1.01))

  y.plot6.in <- y.plot6[which(model.input$occupancy.info$season ==yr)]
  points(coords[which(y.plot6.in==1),2:3])
}
#> [1] 1

#> [1] 4

#> [1] 7

#> [1] 10