## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.align = "center", warning = FALSE, message = FALSE ) options(rmarkdown.html_vignette.check_title = FALSE) ## ----GHRmodel_structure,fig.width=10, fig.height=10, fig.align='center', out.width='100%', echo= FALSE, fig.cap="**GHRmodel** functions to streamline INLA-compatible model formula development."---- knitr::include_graphics("https://gitlab.earth.bsc.es/ghr/ghrmodel/-/raw/510e4737684359badce248d83a9113be8a3b976f/inst/images/GHRmodel_structure.png") ## ----cov function table, echo = FALSE----------------------------------------- knitr::kable( data.frame( Function = c("`extract_names()`", "`cov_uni()`", "`cov_multi()`", "`cov_nl()`", "`cov_interact()`", "`cov_varying()`"), Purpose = c( "Selects covariate names from a dataset", "Prepares covariates for univariable INLA models", "Generates combinations of covariates for multivariable models.", "Convert covariates to nonlinear effect terms, with optional replication.", "Creates interaction terms between 2 or 3 covariates (e.g., var1:var2).", "Creates spatially or temporally varying effect terms using INLA's f() structure." ), `Input` = c( "Data frame", "Character vector", "Character vector or a list of character vectors", "Character vector or a list of character vectors", "List of character vectors", "Character vector or a list of character vectors" ), `Output` = c( "Character vector", "List of linear covariates", "List with INLA nonlinear effect terms", "List of multivariable covariate sets", "List including interaction terms", "List including spatially/temporally varying terms" ) ), format = "markdown", caption = "Overview of helper functions to prepare covariate lists for INLA-compatible formulas." ) ## ----Load required libraries, eval=FALSE-------------------------------------- # # Load necessary package dependencies # library(dplyr) # Data manipulation # library(tidyr) # Data tidying # library(tidyselect) # Helpers for selecting variables programmatically # library(rlang) # Tools for tidy evaluation and non-standard evaluation in tidyverse code # library(ggplot2) # Data visualization: creating plots and graphs # library(cowplot) # Combining and arranging multiple ggplot2 plots into a single figure # library(grDevices) # Base R graphics device functions (e.g., color palettes, saving plots) # library(RColorBrewer) # Predefined color palettes for plots # library(colorspace) # Advanced color space manipulation and palettes # library(sf) # Handling spatial vector data (simple features) # library(spdep) # Spatial dependence and autocorrelation analysis # library(sn) # Skew-normal and skew-t distributions (for modeling skewed data) # library(INLA) # Integrated Nested Laplace Approximation for Bayesian models # library(GHRexplore) # Exploratory analysis of health data # # # Load GHRmodel # library(GHRmodel) ## ----Load required libraries invisible, echo=FALSE---------------------------- # Load necessary package dependencies library(dplyr) # Data manipulation library(tidyr) # Data tidying library(tidyselect) # Helpers for selecting variables programmatically library(rlang) # Tools for tidy evaluation and non-standard evaluation in tidyverse code library(ggplot2) # Data visualization: creating plots and graphs library(cowplot) # Combining and arranging multiple ggplot2 plots into a single figure library(grDevices) # Base R graphics device functions (e.g., color palettes, saving plots) library(RColorBrewer) # Predefined color palettes for plots library(colorspace) # Advanced color space manipulation and palettes library(sf) # Handling spatial vector data (simple features) library(spdep) # Spatial dependence and autocorrelation analysis library(sn) # Skew-normal and skew-t distributions (for modeling skewed data) library(GHRexplore) # Exploratory analysis of health data # Load GHRmodel library(GHRmodel) ## ----Process the data set----------------------------------------------------- #Load data data("dengue_MS") df <- dengue_MS # Create ID variables df <- df |> # Filter out the year 2000 filter(year > 2000) |> # Create numeric IDs for year, month and various spatial units. mutate( year_id = as.numeric(as.factor(year)), # year numeric ID year_id2 = as.numeric(as.factor(year)), # year second numeric ID month_id = as.numeric(as.factor(month)), # month numeric ID spat_id = as.numeric(as.factor(micro_code)), # microregion numeric ID spat_meso_id = as.numeric(as.factor(meso_code)), # meso-region numeric ID main_climate_f = as.numeric(as.factor(main_climate)) # climate zone numeric ID ) ## ----Load map_MS-------------------------------------------------------------- # Load map included in package data("map_MS") # Create adjacency Matrix nb <- spdep::poly2nb(map_MS) g <- spdep::nb2mat(nb, style = "B") ## ----lag_cov lags 1 to 6------------------------------------------------------ data <- lag_cov(data = df, name = c("tmin", "pdsi"), # variables to lag time = "date", # time variable lag = c(1:6), # 1 to 6-month lags group = "micro_code", # identify spatial units with independent time series add = TRUE) # lagged variables appended to original data # Visualize lagged variables head(data[34:39]) ## ----Define monthly and yearly random effects priors-------------------------- # Define Gamma priors for the precision of temporal random effects prior_t <- list(prec = list(prior = 'loggamma', param = c(0.01, 0.01))) ## ----Define spatial random effect priors-------------------------------------- # Define penalized complexity (PC) priors for spatial random effects using BYM2 prior_sp <- list( prec = list(prior = 'pc.prec', param = c(0.5 / 0.31, 0.01)), # Precision of spatial effect phi = list(prior = 'pc', param = c(0.5, 2 / 3)) # Mixing parameter: structured vs unstructured ) ## ----Extracts variable names-------------------------------------------------- # Extract variable names matching the specified patterns cov_names <- extract_names(data = data, pattern = c("tmin.", "pdsi.", "urban", "main_climate_f")) # Visualize output: character vector of covariate names glimpse(cov_names) ## ----univariable models------------------------------------------------------- # Generate list of single linear covariate names uni_cov_lin <- cov_uni(covariates = cov_names, # Input character vector of covariate names pattern = c("pdsi.l", # Select lagged pdsi and tmin from the vector of covariate names "tmin.l")) # Visualize output: list of single linear covariate names head(uni_cov_lin,2) ## ----Non linear univariable--------------------------------------------------- # Generate list of single nonlinear covariate names uni_cov_nl <- cov_nl(covariates = cov_names, method = "quantile", model = "rw2", pattern = c("pdsi", "tmin"), n = 10, add =FALSE) # Visualize output: list of single nonlinear covariate names head(uni_cov_nl,2) ## ----Replicated non linear univariable---------------------------------------- # Generate list of replicated nonlinear covariate names uni_cov_nl_rep <- cov_nl(covariates = uni_cov_lin, method = "quantile", pattern = c("pdsi"), n = 10, replicate = "main_climate_f", add = TRUE) # Visualize output: list of replicated nonlinear covariate names head(uni_cov_nl_rep[13:14]) ## ----Combine linear and nonlinear replicated covariates----------------------- # Create a list of combined predictors, some non linear and replicated multi_cov_nl_rep <- cov_multi(covariates = uni_cov_nl_rep, pattern = c("pdsi","tmin")) # Visualize output: list of replicated nonlinear pdsi lagged terms combined with linear tmin lagged terms head(multi_cov_nl_rep[7:8]) ## ----multi_cov_lin------------------------------------------------------------ # Generate list of combinations of linear covariate names multi_cov_lin <- cov_multi(covariates = uni_cov_lin, pattern = c("pdsi","tmin"), add = FALSE) # Visualize output: list of combinations of linear covariate names head(multi_cov_lin,2) ## ----triple_cov_lin----------------------------------------------------------- # Add urban to each element of the bivariate covariate list triple_cov_lin <- cov_add(covariates = multi_cov_lin, name = "urban") # Visualize output: list of combinations of 3 linear covariate names head(triple_cov_lin,2) ## ----interacting_cov---------------------------------------------------------- # Create a list of interacting linear predictors interacting_cov <- cov_interact(covariates = triple_cov_lin, pattern = c("pdsi","tmin", "urban")) # Visualize output: list of interactions between linear pdsi terms and linear tmin terms head(interacting_cov, 2) ## ----Varying covariates------------------------------------------------------- # Create a list of varying univariable predictors varying_cov <- cov_varying(covariates = multi_cov_lin, pattern = c("pdsi"), unit = "main_climate_f") # Visualize output: list of combinations of lagged pdsi varying by main_climate_f and linear tmin terms head(varying_cov,2) ## ----Varying vs. Replicated Effects in INLA table, echo = FALSE--------------- knitr::kable( data.frame( Feature = c( "Applied to", "Purpose", "INLA syntax", "Structure assumption", "Example use" ), `Varying Effect` = c( "Linear terms", "Group-specific linear slopes", "`f(group, covariate, model = 'iid')`", "Unstructured (`iid`)", "Region-specific slopes of the linear effect of rainfall" ), `Replicate Effect` = c( "Nonlinear terms", "Group-specific nonlinear functions", "`f(covariate, model = ..., replicate = group)`", "Same structure (e.g., `rw2`), different curves", "Region-specific nonlinear effect of rainfall" ) , check.names = FALSE ), caption = "Comparison of Replicate and Varying Effects in INLA" ) ## ----List of covariate combinations------------------------------------------- # Build a combined list of various transformations and combinations of pdsi.l1 cov_list <- c( list( uni_cov_lin[[1]], # [1] pdsi.l1 (linear) uni_cov_nl_rep[[13]], # [2] pdsi.l1 (nonlinear replicated by 'main_climate_f') multi_cov_lin[[1]], # [3] pdsi.l1 (linear) + tmin.l1 (linear) triple_cov_lin[[1]], # [4] pdsi.l1 (linear) + tmin.l1 (linear) + urban (linear) multi_cov_nl_rep[[7]], # [5] pdsi.l1 (nonlinear replicated by 'main_climate_f') + tmin.l1 (linear) interacting_cov[[1]], # [6] pdsi.l1, tmin.l1, urban (linear main effects + 2-way & 3-way interactions) varying_cov[[1]] # [7] pdsi.l1 (linear varying by 'main_climate_f') + tmin.l1 (linear) ), uni_cov_nl[7:12] # [8–13] pdsi.l1 through pdsi.l6 (nonlinear) ) ## ----formulas_cov_list-------------------------------------------------------- formulas_cov_list <- write_inla_formulas( outcome = "dengue_cases", covariates = cov_list, re1 = list(id ="month_id", model ="rw1", cyclic = TRUE, hyper = "prior_t", replicate = "spat_meso_id" ), re2 = list(id = "year_id", model = "iid", hyper = "prior_t"), re3 = list(id = "spat_id", model = "bym2", graph = "g", hyper = "prior_sp"), baseline = TRUE) # Example of INLA formula generated formulas_cov_list[1] class(formulas_cov_list) ## ----formulas_cov_list_ghr---------------------------------------------------- formulas_cov_list_ghr <- as_GHRformulas(formulas = formulas_cov_list) class(formulas_cov_list_ghr) ## ----fit_models model_list, eval = FALSE-------------------------------------- # # model_cov_list <- fit_models( # formulas = formulas_cov_list_ghr, # data = data, # family = "nbinomial", # Negative binomial likelihood # name = "mod", # Label prefix for each model # offset = "population", # Offset variable to account for population size # control_compute = list( # config = FALSE, # Do not posterior predictive distribution # vcov = FALSE # Do not return variance-covariance matrix # ), # pb = TRUE, # Display progress bar # nthreads = 8 # Use 8 threads for parallel computation # ) # # class(model_cov_list) # # model_cov_list_gof <- model_cov_list$mod_gof ## ----fit_models model_list readrds, echo = FALSE------------------------------ model_cov_list <- readRDS( system.file("examples", "model_cov_list.rds", package = "GHRmodel") ) model_cov_list_gof <- model_cov_list$mod_gof ## ----plot interactions, fig.width = 6, fig.height = 4------------------------ # Plot linear effects and their interactions plot_coef_lin( model = model_cov_list, # A list of fitted INLA models mod_id = c("mod2", "mod4", "mod5", "mod7"), # Select models with linear effects to be plotted # Custom labels for variables (applied only to non-interacting fixed effects) var_label = c( "tmin.l1" = "Min. temp lag 1", # Rename 'tmin.l1' to a descriptive label "pdsi.l1" = "PDSI lag 1", # Rename 'pdsi.l1' to a descriptive label "urban" = "Prop. urban population" # Rename 'urban' to a descriptive label ), title = "Effects of linear and interacting covariates" # Title for the plot summarizing what is being visualized ) ## ----Köppen-Geiger climate regimes, echo = FALSE------------------------------ knitr::kable( data.frame( `main_climate_f` = c("1", "2", "3", "4"), `main_climate` = c("AF", "AM", "AW", "CFA"), Description = c( "Tropical Rainforest Climate", "Tropical Monsoon Climate", "Tropical Savanna Climate with Dry Winter", "Humid Subtropical Climate" ) ), col.names = c("main_climate_f", "main_climate", "Climate Zone Description"), caption = "Main Köppen-Geiger Climate Regimes used in the varying coefficient analysis" ) ## ----Varying coefficients, fig.width = 8, fig.height = 3, out.width='100%'---- # Plot linear slopes varying by climate zone. plot_coef_varying( models = model_cov_list, # A list of fitted INLA model objects mod_id = "mod8", # Select the model with varying slopes palette = "Blues", # Color palette for the plot (from RColorBrewer) name = "main_climate_f", # The grouping variable (factor) title = "Effect of PDSI at one-month lag for each climate zone", # Plot title ylab = "Main climate zones", # Label for the y-axis (groups/climate zones) unit_label = c( # Map factor levels to descriptive names "1" = "Tropical Rainforest Climate", "2" = "Tropical Monsoon Climate", "3" = "Tropical Savanna Climate with Dry Winter", "4" = "Humid Subtropical Climate" ) ) ## ----plot_coef_nl grid replicated, fig.width = 8, fig.height = 8, out.width='100%'---- # PLot replicated nonlinear effects plot_coef_nl( models = model_cov_list, # List of fitted INLA model objects mod_id = c("mod3", "mod6"), # Select which models to include in the plot mod_label = c( # Custom display labels for the selected models "mod3" = "pdsi.l1_rep_clim", "mod6" = "pdsi.l1_rep_clim + tmin.l1" ), var_label = c( # Rename variables for clearer axis/legend labels "pdsi.l1" = "PDSI lag 1" ), name = "pdsi.l1", # Variable to plot: nonlinear effect of pdsi.l1 title = "Nonlinear effect of PDSI at one-month lag replicated by main climate", xlab = "PDSI", # X-axis label palette = "IDE2", # Color palette for plotting the nonlinear curves collapse = FALSE, # Display results in a grid (one plot per covariate-model pair) rug = FALSE, # Do not show rug plot for data density along the x-axis histogram = TRUE, # Show histogram of covariate distribution instead of rug plot legend = "Climate zone" # Add legend title ) ## ----formulas_user_ghr-------------------------------------------------------- # Convert list of user-defined INLA formulas into a GHRformulas object formulas_user_ghr <- as_GHRformulas(c( # Model 1: random effects only, where monthly random effect is replicated by meso region and the spatial random effect is replicated by year "dengue_cases ~ 1 + f(month_id, model = 'rw1', replicate = spat_meso_id, cyclic = TRUE, constr = TRUE, hyper = prior_t) + f(year_id, model = 'iid', constr = TRUE, hyper = prior_t) + f(spat_id, model = 'bym2', graph = g, constr = TRUE, hyper = prior_sp, replicate = year_id2)", # Model 2: random effects and a varying effect for pdsi lag 1 by climate zone "dengue_cases ~ 1 + f(main_climate_f, pdsi.l1, model = 'iid') + f(month_id, model = 'rw1', replicate = spat_meso_id, cyclic = TRUE, constr = TRUE, hyper = prior_t) + f(year_id, model = 'iid', constr = TRUE, hyper = prior_t) + f(spat_id, model = 'bym2', graph = g, constr = TRUE, hyper = prior_sp, replicate = year_id2)", # Model 3: random effects and a 3-way interaction between different pdsi and tmin lags "dengue_cases ~ 1 + pdsi.l1 + tmin.l3 + pdsi.l6 + pdsi.l1:tmin.l3:pdsi.l6 + f(month_id, model = 'rw1', replicate = spat_meso_id, cyclic = TRUE, constr = TRUE, hyper = prior_t) + f(year_id, model = 'iid', constr = TRUE, hyper = prior_t) + f(spat_id, model = 'bym2', graph = g, constr = TRUE, hyper = prior_sp, replicate = year_id2)" )) # Visualize output: GHRformulas object class(formulas_user_ghr) ## ----fit_models model_user, eval = FALSE-------------------------------------- # # User-defined INLA-compatible formulas can be passed into fit_models() as a GHRformulas object # model_user <- fit_models( # formulas = formulas_user_ghr, # data = data, # family = "nbinomial", # Negative binomial likelihood # name = "mod", # Label prefix for each model # offset = "population", # Offset variable to account for population size # control_compute = list( # config = FALSE, # Do not posterior predictive distribution # vcov = FALSE # Do not return variance-covariance matrix # ), # pb = TRUE, # Display progress bar # nthreads = 8 # Use 8 threads for parallel computation # ) ## ----fit_models model_user readrds, echo=FALSE-------------------------------- model_user<- readRDS(system.file("examples", "model_user.rds", package = "GHRmodel")) model_user_gof <- model_user$mod_gof ## ----plot user model,fig.width = 6, fig.height = 3---------------------------- # Plot any linear coefficients found in the fitted model results. plot_coef_lin( model = model_user, # Provide fitted model GHRmodels object exp = TRUE, # Exponentiate coefficients to relative risk scale title = "Relative Risk (RR)" # Plot title )