## ----setup, message=FALSE, warning=FALSE-------------------------------------- library(haldensify) library(data.table) library(ggplot2) set.seed(75681) ## ----make_example_data, message=FALSE, warning=FALSE-------------------------- make_example_data <- function(n_obs) { W <- runif(n_obs, -4, 4) A <- rnorm(n_obs, mean = W, sd = 0.25) dat <- as.data.table(list(A = A, W = W)) return(dat) } ## ----example_data, message=FALSE, warning=FALSE------------------------------- # number of observations in our simulated dataset n_obs <- 200 (example_data <- make_example_data(n_obs)) ## ----fit_haldensify, message=FALSE, warning=FALSE----------------------------- haldensify_fit <- haldensify( A = example_data$A, W = example_data$W, n_bins = c(4, 6, 8), grid_type = "equal_range", lambda_seq = exp(seq(-0.1, -10, length = 300)), # the following are passed to hal9001::fit_hal() internally max_degree = 1, reduce_basis = 1 / sqrt(n_obs) ) ## ----plot_risk_haldensify, fig.alt = "CV-risk of regularized conditional density estimators", message=FALSE, warning=FALSE---- p_risk <- plot(haldensify_fit) p_risk ## ----plot_haldensify, fig.alt = "Estimated conditional density of A|W for several choices of W", message=FALSE, warning=FALSE---- # predictions to recover conditional density of A|W new_a <- seq(-4, 4, by = 0.05) new_dat <- as.data.table(list( a = new_a, w_neg = rep(-2, length(new_a)), w_zero = rep(0, length(new_a)), w_pos = rep(2, length(new_a)) )) new_dat[, pred_w_neg := predict(haldensify_fit, new_A = new_dat$a, new_W = new_dat$w_neg )] new_dat[, pred_w_zero := predict(haldensify_fit, new_A = new_dat$a, new_W = new_dat$w_zero )] new_dat[, pred_w_pos := predict(haldensify_fit, new_A = new_dat$a, new_W = new_dat$w_pos )] # visualize results dens_dat <- melt(new_dat, id = c("a"), measure.vars = c("pred_w_pos", "pred_w_zero", "pred_w_neg") ) p_dens <- ggplot(dens_dat, aes(x = a, y = value, colour = variable)) + geom_point() + geom_line() + stat_function( fun = dnorm, args = list(mean = -2, sd = 0.25), colour = "blue", linetype = "dashed" ) + stat_function( fun = dnorm, args = list(mean = 0, sd = 0.25), colour = "darkgreen", linetype = "dashed" ) + stat_function( fun = dnorm, args = list(mean = 2, sd = 0.25), colour = "red", linetype = "dashed" ) + labs( x = "Observed value of W", y = "Estimated conditional density", title = "Conditional density estimates g(A|W)" ) + theme_bw() + theme(legend.position = "none") p_dens ## ----make_example_data_ipw, message=FALSE, warning=FALSE---------------------- # set up data-generating process make_example_data <- function(n_obs) { W <- runif(n_obs, 1, 4) A <- rpois(n_obs, 2 * W + 1) Y <- rbinom(n_obs, 1, plogis(2 - A + W + 3)) dat <- as.data.table(list(Y = Y, A = A, W = W)) return(dat) } # generate data and take a look (dat_obs <- make_example_data(n_obs = 200)) ## ----estimate_ipw, message=FALSE, warning=FALSE------------------------------- est_ipw <- ipw_shift( W = dat_obs$W, A = dat_obs$A, Y = dat_obs$Y, delta = 2, cv_folds = 5L, n_bins = 4L, bin_type = "equal_range", selector_type = "all", lambda_seq = exp(seq(-1, -10, length = 500L)), # arguments passed to hal9001::fit_hal() max_degree = 1, reduce_basis = 1 / sqrt(n_obs) ) confint(est_ipw)