## ----setup, include = FALSE--------------------------------------------------- library(eVCGsampler) library(knitr) ## ----echo=TRUE, warning=FALSE, fig.dim = c(6, 6)------------------------------ # Generate data POOL (n=100) to sample from # Generate baseline data (n=30), for three treatment groups n=10 (all treated groups taken together) set.seed(3453) dat <- data.frame( treat = rep(0:1, c(100, 30)), weight = c(rlnorm(100, 2, 1), rnorm(30, 8.5, 1.7)) ) # Check whether the POOL has a different covariate distribution than the TG energy_test(treat ~ weight, data=dat) # Sample a smaller subset that is balanced to TG (it balance group=0 to group=1) out <- VCG_sampler(treat ~ weight, data=dat, n=10) dat2 <- out[[1]] # data frame with balance information, with new variable VCG, if 1, mean the observation was selected to be in VCG. out[[2]] # Balance target plot (optional) # Permutation ellipses are generated by randomly permuting the pool and treat groups to estimate usual (random) variability. # Only the X and Y axes are computed directly; the ellipse is interpolated between the axes. # This method is intended as a visual approximation rather than a precise statistical test. vcg <- dat2[which(dat2$VCG==1), ] # select the VCG that was sampled by VCG_sampler # Test if the balancing was successful (variable "treat_balanced", was created by the VCG_sampler function) energy_test(treat_balanced ~ weight, data=dat2) # Check covariate balance visual for the 'weight' covariate. plot_var(dat2, what='weight') ## ----echo=TRUE, warning=FALSE, fig.dim = c(6, 6)------------------------------ set.seed(2244) # Generate baseline data for treatment groups (all treated groups taken together) d_tg <- data.frame( age = rnorm(20, 10, 1), weight= rnorm(20, 7, 1), class = rbinom(20, 1, 0.3) ) # Generate data lake (pool) data (to sample from) d_pool <- data.frame( age = rnorm(50, 12, 2), weight= rnorm(50, 6, 2), class = rbinom(50, 1, 0.6) ) # Combine the data to balance the covariates between the two data sets (creates the variable treat POOL vs TG) dat <- combine_data(d_pool, d_tg, indicator_name='treat') # Calculate current energy distance (optional) energy_distance(treat ~ age + weight + class, data=dat) # Check whether the POOL has a different covariate distribution than the TG energy_test(treat ~ age + weight + class, data=dat) # If you are unsure about the size of VCG, try the BestVCGsize function to determine the optimal size. # This is a purely exploratory approach! BestVCGsize(treat ~ age + weight + class, data=dat) # It is only intended for exploratory purposes, as the VCG size is normally given. # Sample a smaller subset that is balanced to TG (it balance treat=0 to treat=1) out <- VCG_sampler(treat ~ age + weight + class, data=dat, n=10) dat2 <- out[[1]] out[[2]] # Balance target plot (optional) # Test if the balancing was successful energy_test(treat_balanced ~ age + weight + class, data=dat2) # Check covariate balance one by one plot_var(dat2, what='age') plot_var(dat2, what='weight') plot_var(dat2, what='class') ## ----echo=TRUE, warning=FALSE, fig.dim = c(6, 6)------------------------------ # Generate data POOL (n=100) to sample from # Generate baseline data (n=30), for all treatment groups together. # And a stratum factor variable sex set.seed(14495) dat <- data.frame( treat = rep(0:1, c(100, 30)), cov1 = c(rnorm(100, 11, 2), rnorm(30, 10, 1)), cov2 = c(rnorm(100, 11, 2), rnorm(30, 10, 1)), cov3 = c(rnorm(100, 9, 2), rnorm(30, 10, 1)), sex = c(rbinom(100, 1, 0.5), rbinom(30, 1, 0.5))) dat$sex <- factor(dat$sex, labels=c('M', 'F')) # Sample a VCG of size 7 for male and 5 for female animals. out <- VCG_sampler(treat ~ cov1 + cov2 + cov3 | sex, data=dat, n=c(7, 5), plot=TRUE) dat2 <- out[[1]] out[[2]] # Test if the balancing was successful energy_test(treat_balanced ~ cov1 + cov2 + cov3 | sex, data=dat2, plot=F) # plot cov1 plot_var(dat2, what='cov1') ## ----echo=TRUE, warning=FALSE, fig.dim = c(6, 6)------------------------------ # Create some bimodal sample data for the TG, the POOL data is normal distributed set.seed(5435) dat <- data.frame( treat = rep(0:1, c(100, 50)), cov_1 = c(rnorm(100, 7, 2), c(rnorm(25, 5, 1), rnorm(25, 9, 1)))) # Balance cov_1 covariate dat2 <- VCG_sampler(treat ~ cov_1, data=dat, n=25, plot=F) # Create histogram plot to show the results data <- data.frame( value = c(dat$cov_1[which(dat$treat==0)], dat$cov_1[which(dat$treat==1)], dat2$cov_1[which(dat2$VCG==1)]), group = factor(rep(c("POOL", "TG", "VCG"), c(100, 50, 25))) ) custom_colors <- c("POOL" = "#00A091", "TG" = "#7A00E6", "VCG" = "#FE7500") sample_sizes <- data.frame(label=c('n=100', 'n=50', 'n=25'),group=c("POOL", "TG", "VCG")) ggplot(data, aes(x = value, fill = group)) + geom_density(alpha = 0.6, color = NA) + scale_fill_manual(values = custom_colors) + facet_wrap(~ group, ncol = 1) + labs(title = "", x = "", y = "Density") + geom_text(data = sample_sizes, aes(x = Inf, y = Inf, label = label), hjust = 1.1, vjust = 1.5, inherit.aes = F) + theme_minimal() + theme(legend.position = "none",strip.text = element_text(size = 14, face = "bold"), plot.title = element_text(size = 16, face = "bold" ) ) ## ----echo=TRUE, warning=FALSE, fig.dim = c(6, 6)------------------------------ # Create data dat <- data.frame( treat = rep(0:1, c(50, 30)), cov_1 = c(rnorm(50, 5, 2), rnorm(30, 5, 1)), cov_2 = c(rnorm(50, 11, 2), rnorm(30, 10, 1)) ) # Generate 10 VCGs of size 5 out <- multiSampler(treat ~ cov_1 + cov_2, data = dat, n = 5, Nsamples = 10) out[[2]] str(out[[1]])