params <- list(EVAL = TRUE) ## ----echo = FALSE---------------------------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", message = FALSE, warning = FALSE, eval = if (isTRUE(exists("params"))) params$EVAL else FALSE ) ## ----setup, include=FALSE-------------------------------------------------------------------- knitr::opts_chunk$set( echo = TRUE, dpi = 100, fig.asp = 0.8, fig.width = 6, out.width = "60%", fig.align = "center") library(mvgam) library(ggplot2) theme_set(theme_bw(base_size = 12, base_family = 'serif')) ## -------------------------------------------------------------------------------------------- set.seed(122) simdat <- sim_mvgam(trend_model = AR(), prop_trend = 0.6, mu = c(0, 1, 2), family = poisson()) trend_map <- data.frame(series = unique(simdat$data_train$series), trend = c(1, 1, 2)) trend_map ## -------------------------------------------------------------------------------------------- all.equal(levels(trend_map$series), levels(simdat$data_train$series)) ## -------------------------------------------------------------------------------------------- fake_mod <- mvgam(y ~ # observation model formula, which has a # different intercept per series series - 1, # process model formula, which has a shared seasonal smooth # (each latent process model shares the SAME smooth) trend_formula = ~ s(season, bs = 'cc', k = 6), # AR1 dynamics (each latent process model has DIFFERENT) # dynamics; processes are estimated using the noncentred # parameterisation for improved efficiency trend_model = AR(), noncentred = TRUE, # supplied trend_map trend_map = trend_map, # data and observation family family = poisson(), data = simdat$data_train, run_model = FALSE) ## -------------------------------------------------------------------------------------------- code(fake_mod) ## -------------------------------------------------------------------------------------------- fake_mod$model_data$Z ## ----full_mod, include = FALSE, results='hide'----------------------------------------------- full_mod <- mvgam(y ~ series - 1, trend_formula = ~ s(season, bs = 'cc', k = 6), trend_model = AR(), noncentred = TRUE, trend_map = trend_map, family = poisson(), data = simdat$data_train, silent = 2) ## ----eval=FALSE------------------------------------------------------------------------------ # full_mod <- mvgam(y ~ series - 1, # trend_formula = ~ s(season, bs = 'cc', k = 6), # trend_model = AR(), # noncentred = TRUE, # trend_map = trend_map, # family = poisson(), # data = simdat$data_train, # silent = 2) ## -------------------------------------------------------------------------------------------- summary(full_mod) ## -------------------------------------------------------------------------------------------- plot(full_mod, type = 'trend', series = 1) plot(full_mod, type = 'trend', series = 2) plot(full_mod, type = 'trend', series = 3) ## -------------------------------------------------------------------------------------------- set.seed(0) # simulate a nonlinear relationship using the mgcv function gamSim signal_dat <- mgcv::gamSim(n = 100, eg = 1, scale = 1) # productivity is one of the variables in the simulated data productivity <- signal_dat$x2 # simulate the true signal, which already has a nonlinear relationship # with productivity; we will add in a fairly strong AR1 process to # contribute to the signal true_signal <- as.vector(scale(signal_dat$y) + arima.sim(100, model = list(ar = 0.8, sd = 0.1))) ## -------------------------------------------------------------------------------------------- plot(true_signal, type = 'l', bty = 'l', lwd = 2, ylab = 'True signal', xlab = 'Time') ## -------------------------------------------------------------------------------------------- sim_series = function(n_series = 3, true_signal){ temp_effects <- mgcv::gamSim(n = 100, eg = 7, scale = 0.1) temperature <- temp_effects$y alphas <- rnorm(n_series, sd = 2) do.call(rbind, lapply(seq_len(n_series), function(series){ data.frame(observed = rnorm(length(true_signal), mean = alphas[series] + 1.5*as.vector(scale(temp_effects[, series + 1])) + true_signal, sd = runif(1, 1, 2)), series = paste0('sensor_', series), time = 1:length(true_signal), temperature = temperature, productivity = productivity, true_signal = true_signal) })) } model_dat <- sim_series(true_signal = true_signal) %>% dplyr::mutate(series = factor(series)) ## -------------------------------------------------------------------------------------------- plot_mvgam_series(data = model_dat, y = 'observed', series = 'all') ## -------------------------------------------------------------------------------------------- plot(observed ~ temperature, data = model_dat %>% dplyr::filter(series == 'sensor_1'), pch = 16, bty = 'l', ylab = 'Sensor 1', xlab = 'Temperature') plot(observed ~ temperature, data = model_dat %>% dplyr::filter(series == 'sensor_2'), pch = 16, bty = 'l', ylab = 'Sensor 2', xlab = 'Temperature') plot(observed ~ temperature, data = model_dat %>% dplyr::filter(series == 'sensor_3'), pch = 16, bty = 'l', ylab = 'Sensor 3', xlab = 'Temperature') ## ----sensor_mod, include = FALSE, results='hide'--------------------------------------------- mod <- mvgam(formula = # formula for observations, allowing for different # intercepts and smooth effects of temperature observed ~ series + s(temperature, k = 10) + s(series, temperature, bs = 'sz', k = 8), trend_formula = # formula for the latent signal, which can depend # nonlinearly on productivity ~ s(productivity, k = 8) - 1, trend_model = # in addition to productivity effects, the signal is # assumed to exhibit temporal autocorrelation AR(), noncentred = TRUE, trend_map = # trend_map forces all sensors to track the same # latent signal data.frame(series = unique(model_dat$series), trend = c(1, 1, 1)), # informative priors on process error # and observation error will help with convergence priors = c(prior(normal(2, 0.5), class = sigma), prior(normal(1, 0.5), class = sigma_obs)), # Gaussian observations family = gaussian(), burnin = 600, adapt_delta = 0.95, data = model_dat, silent = 2) ## ----eval=FALSE------------------------------------------------------------------------------ # mod <- mvgam(formula = # # formula for observations, allowing for different # # intercepts and hierarchical smooth effects of temperature # observed ~ series + # s(temperature, k = 10) + # s(series, temperature, bs = 'sz', k = 8), # # trend_formula = # # formula for the latent signal, which can depend # # nonlinearly on productivity # ~ s(productivity, k = 8) - 1, # # trend_model = # # in addition to productivity effects, the signal is # # assumed to exhibit temporal autocorrelation # AR(), # noncentred = TRUE, # # trend_map = # # trend_map forces all sensors to track the same # # latent signal # data.frame(series = unique(model_dat$series), # trend = c(1, 1, 1)), # # # informative priors on process error # # and observation error will help with convergence # priors = c(prior(normal(2, 0.5), class = sigma), # prior(normal(1, 0.5), class = sigma_obs)), # # # Gaussian observations # family = gaussian(), # data = model_dat, # silent = 2) ## -------------------------------------------------------------------------------------------- summary(mod, include_betas = FALSE) ## -------------------------------------------------------------------------------------------- conditional_effects(mod, type = 'link') ## -------------------------------------------------------------------------------------------- require(marginaleffects) plot_predictions(mod, condition = c('temperature', 'series', 'series'), points = 0.5) + theme(legend.position = 'none') ## -------------------------------------------------------------------------------------------- plot(mod, type = 'trend') # Overlay the true simulated signal points(true_signal, pch = 16, cex = 1, col = 'white') points(true_signal, pch = 16, cex = 0.8)