## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
library(SBMTrees)
library(mitml)
library(lme4)

## ----prediction_sim-----------------------------------------------------------
# Simulate data
data <- simulation_prediction_conti(
   train_prop = 0.5,
   n_subject = 20,
   n_obs_per_sub = 5,
   nonlinear = TRUE,
   residual = "normal",
   randeff = "skewed_MVN",
   seed = 123)


## ----prediction---------------------------------------------------------------
# Fit the predictive model
model <- BMTrees_prediction(
   X_train = data$X_train,
   Y_train = data$Y_train,
   Z_train = data$Z_train,
   subject_id_train = data$subject_id_train,
   X_test = data$X_test,
   Z_test = data$Z_test,
   subject_id_test = data$subject_id_test,
   model = "BMTrees",
   binary = FALSE,
   nburn = 1L, npost = 1L, skip = 1L, verbose = FALSE, seed = 1234
 )

# Posterior expectation for the testing dataset
posterior_predictions <- model$post_predictive_y_test
head(colMeans(posterior_predictions))

## ----prediction_evaluation----------------------------------------------------
point_predictions = colMeans(posterior_predictions)

# Compute MAE
mae <- mean(abs(point_predictions - data$Y_test))
cat("Mean Absolute Error (MAE):", mae, "\n")

# Compute MSE
mse <- mean((point_predictions - data$Y_test)^2)
cat("Mean Squared Error (MSE):", mse, "\n")

# Compute 95% credible intervals
lower_bounds <- apply(posterior_predictions, 2, quantile, probs = 0.025)
upper_bounds <- apply(posterior_predictions, 2, quantile, probs = 0.975)

# Check if true values fall within the intervals
coverage <- mean(data$Y_test >= lower_bounds & data$Y_test <= upper_bounds)
cat("95% Posterior Predictive Interval Coverage:", coverage * 100, "%\n")


plot(data$Y_test, point_predictions, 
     xlab = "True Values", 
     ylab = "Predicted Values", 
     main = "True vs Predicted Values")
abline(0, 1, col = "red") # Add a 45-degree reference line


## ----imputation_sim-----------------------------------------------------------
# Simulate data with missing values
data <- simulation_imputation(NNY = TRUE, NNX = TRUE, 
                                  n_subject = 20, seed = 123)


## ----imputation---------------------------------------------------------------
imputed_model <- sequential_imputation(X = data$data_M[,3:14], Y = data$data_M$Y, Z = data$Z,
   subject_id = data$data_M$subject_id, type = c(0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1),
   outcome_model = "BMLM", binary_outcome = FALSE, model = "BMTrees", nburn = 0,
   npost = 2, skip = 1, verbose = FALSE, seed = 123)

# Extract imputed data
imputed_data <- imputed_model$imputed_data
dim(imputed_data) # Dimensions: posterior samples x observations x variables

## ----imputation_evaluation----------------------------------------------------
# create structure which can be used in mitml
MI_data = list()
for (i in 1:dim(imputed_data)[1]) {
  MI_data[[i]] = cbind(as.data.frame(imputed_data[i,,]), data$Z, data$data_M$subject_id)
  colnames(MI_data[[i]]) = c(colnames(data$data_M[,3:14]), "Y", "Z1", "Z2", "subject_id")
}
MI_data <- as.mitml.list(MI_data)  # Replace with actual datasets
# Fit the model on each imputed dataset
lmm_results <- with(MI_data, lmer(Y ~ X_1 + X_2 + X_3 + X_4 + X_5 + X_6
                                  + X_7 + X_8 + X_9 + X_10 + X_11 + X_12
                                  + (0 + Z1 + Z2 | subject_id)))

# Pool fixed effects using Rubin's Rules
pooled_results <- testEstimates(lmm_results)

# Print pooled results
print(pooled_results)