## ----setup, message=FALSE, warning=FALSE--------------------------------------
library(haldensify)
library(data.table)
library(ggplot2)
set.seed(75681)

## ----make_example_data, message=FALSE, warning=FALSE--------------------------
make_example_data <- function(n_obs) {
  W <- runif(n_obs, -4, 4)
  A <- rnorm(n_obs, mean = W, sd = 0.25)
  dat <- as.data.table(list(A = A, W = W))
  return(dat)
}

## ----example_data, message=FALSE, warning=FALSE-------------------------------
# number of observations in our simulated dataset
n_obs <- 200
(example_data <- make_example_data(n_obs))

## ----fit_haldensify, message=FALSE, warning=FALSE-----------------------------
haldensify_fit <- haldensify(
  A = example_data$A,
  W = example_data$W,
  n_bins = c(4, 6, 8),
  grid_type = "equal_range",
  lambda_seq = exp(seq(-0.1, -10, length = 300)),
  # the following are passed to hal9001::fit_hal() internally
  max_degree = 1,
  reduce_basis = 1 / sqrt(n_obs)
)

## ----plot_risk_haldensify, fig.alt = "CV-risk of regularized conditional density estimators", message=FALSE, warning=FALSE----
p_risk <- plot(haldensify_fit)
p_risk

## ----plot_haldensify, fig.alt = "Estimated conditional density of A|W for several choices of W", message=FALSE, warning=FALSE----
# predictions to recover conditional density of A|W
new_a <- seq(-4, 4, by = 0.05)
new_dat <- as.data.table(list(
  a = new_a,
  w_neg = rep(-2, length(new_a)),
  w_zero = rep(0, length(new_a)),
  w_pos = rep(2, length(new_a))
))
new_dat[, pred_w_neg := predict(haldensify_fit,
  new_A = new_dat$a,
  new_W = new_dat$w_neg
)]
new_dat[, pred_w_zero := predict(haldensify_fit,
  new_A = new_dat$a,
  new_W = new_dat$w_zero
)]
new_dat[, pred_w_pos := predict(haldensify_fit,
  new_A = new_dat$a,
  new_W = new_dat$w_pos
)]
# visualize results
dens_dat <- melt(new_dat,
  id = c("a"),
  measure.vars = c("pred_w_pos", "pred_w_zero", "pred_w_neg")
)
p_dens <- ggplot(dens_dat, aes(x = a, y = value, colour = variable)) +
  geom_point() +
  geom_line() +
  stat_function(
    fun = dnorm, args = list(mean = -2, sd = 0.25),
    colour = "blue", linetype = "dashed"
  ) +
  stat_function(
    fun = dnorm, args = list(mean = 0, sd = 0.25),
    colour = "darkgreen", linetype = "dashed"
  ) +
  stat_function(
    fun = dnorm, args = list(mean = 2, sd = 0.25),
    colour = "red", linetype = "dashed"
  ) +
  labs(
    x = "Observed value of W",
    y = "Estimated conditional density",
    title = "Conditional density estimates g(A|W)"
  ) +
  theme_bw() +
  theme(legend.position = "none")
p_dens

## ----make_example_data_ipw, message=FALSE, warning=FALSE----------------------
# set up data-generating process
make_example_data <- function(n_obs) {
  W <- runif(n_obs, 1, 4)
  A <- rpois(n_obs, 2 * W + 1)
  Y <- rbinom(n_obs, 1, plogis(2 - A + W + 3))
  dat <- as.data.table(list(Y = Y, A = A, W = W))
  return(dat)
}

# generate data and take a look
(dat_obs <- make_example_data(n_obs = 200))

## ----estimate_ipw, message=FALSE, warning=FALSE-------------------------------
est_ipw <- ipw_shift(
  W = dat_obs$W, A = dat_obs$A, Y = dat_obs$Y,
  delta = 2,
  cv_folds = 5L,
  n_bins = 4L,
  bin_type = "equal_range",
  selector_type = "all",
  lambda_seq = exp(seq(-1, -10, length = 500L)),
  # arguments passed to hal9001::fit_hal()
  max_degree = 1,
  reduce_basis = 1 / sqrt(n_obs)
)
confint(est_ipw)