## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = 5
)

## ----setup, message=FALSE, warning=FALSE--------------------------------------
library(ci)
library(dplyr)

## ----eval=FALSE---------------------------------------------------------------
# # Binomial CI: two categories (e.g., mutant/wild-type)
# ci_binom(x = 54, n = 80)
# 
# # Multinomial CI: 3+ categories (e.g., blood types)
# ci_multinom(c("A" = 20, "B" = 35, "AB" = 25, "O" = 15))
# 
# # Mean CI based on data: average with groups
# data |>
#   group_by(group_var) |>
#   ci_mean_t(numeric_var)
# 
# # Mean CI based on summary statistics (mean, SD, n): e.g., literature values
# ci_mean_t_stat(mean_ = 75, sd_ = 10, n = 25)
# 
# # Any statistic (median, correlation, etc.)
# data |> ci_boot(var1, var2, FUN = cor, R = 1000)

## -----------------------------------------------------------------------------
# 54 out of 80 Drosophila are mutation-free
ci_binom(x = 54, n = 80)

## ----paged.print=FALSE--------------------------------------------------------
ci_n1 <- ci_binom(x = 54, n = 80) # Small sample
ci_n2 <- ci_binom(x = 135, n = 200) # Larger sample
ci_n3 <- ci_binom(x = 675, n = 1000) # Even larger sample

## -----------------------------------------------------------------------------
bind_rows(ci_n1, ci_n2, ci_n3) |>
  mutate(diff_upr_lwr = round(upr.ci - lwr.ci, digits = 3))

## -----------------------------------------------------------------------------
ci_q1 <- ci_binom(x = 54, n = 80, conf.level = 0.90) # Less confident, narrower
ci_q2 <- ci_binom(x = 54, n = 80, conf.level = 0.95) # Standard
ci_q3 <- ci_binom(x = 54, n = 80, conf.level = 0.99) # More confident, wider

## -----------------------------------------------------------------------------
bind_rows(ci_q1, ci_q2, ci_q3) |>
  mutate(diff_upr_lwr = round(upr.ci - lwr.ci, digits = 3))

## -----------------------------------------------------------------------------
# Blood type distribution
blood_types <- c("A" = 20, "B" = 35, "AB" = 25, "O" = 15)
ci_multinom(blood_types)

## -----------------------------------------------------------------------------
# Transportation preferences
c("Car" = 20, "Bus" = 45, "Bike" = 15, "Walk" = 20) |>
  ci_multinom(method = "goodman")

## -----------------------------------------------------------------------------
# Using built-in dataset as example
data(npk, package = "datasets")
head(npk, n = 3)

## -----------------------------------------------------------------------------
# Average crop yield (overall)
npk |>
  ci_mean_t(yield)

## -----------------------------------------------------------------------------
# Compare yields with and without nitrogen fertilizer
npk |>
  group_by(N) |>
  ci_mean_t(yield)

## -----------------------------------------------------------------------------
# Multiple grouping factors
npk |>
  group_by(N, P, K) |>
  ci_mean_t(yield)

## -----------------------------------------------------------------------------
data(iris, package = "datasets")
head(iris, n = 3)

## -----------------------------------------------------------------------------
# Petal length by flower species
iris |>
  group_by(Species) |>
  ci_mean_t(Petal.Length)

## -----------------------------------------------------------------------------
# Calculate CI from reported statistics
ci_mean_t_stat(mean_ = 75, sd_ = 10, n = 25)

## -----------------------------------------------------------------------------
# Three enzyme activity measurements
mean_val <- c(78, 82, 75)
std_dev  <- c(8,   7,  9)
n        <- c(30, 28, 32)
group <- c("Enzyme A", "Enzyme B", "Enzyme C")

ci_mean_t_stat(mean_val, std_dev, n, group)

## -----------------------------------------------------------------------------
# Three different treatment conditions from literature
treatments <-
  tibble(
    treatment = c("Control", "Low dose", "High dose"),
    mean_response = c(78, 82, 75),
    sd = c(8, 7, 9),
    n = c(30, 28, 32)
  )

treatments |>
  with(ci_mean_t_stat(mean_response, sd, n, treatment))

## -----------------------------------------------------------------------------
# Same mean and SD but four different sample sizes
ci_mean_t_stat(mean_ = 75, sd_ = 10, n = c(10, 25, 50, 100))

## -----------------------------------------------------------------------------
set.seed(123) # For reproducible results

# Median petal length for all flowers
iris |>
  ci_boot(Petal.Length, FUN = median, R = 1000, bci.method = "bca")

## -----------------------------------------------------------------------------
set.seed(456)

# Median by species
iris |>
  group_by(Species) |>
  ci_boot(Petal.Length, FUN = median, R = 1000, bci.method = "bca")

## -----------------------------------------------------------------------------
set.seed(789)

# How variable is petal length?
iris |>
  ci_boot(Petal.Length, FUN = sd, R = 1000, bci.method = "bca")

## -----------------------------------------------------------------------------
set.seed(101)

# Relationship between petal length and width for each species
iris |>
  group_by(Species) |>
  ci_boot(
    Petal.Length, Petal.Width,
    FUN = cor, method = "pearson",
    R = 1000, bci.method = "bca"
  )

## -----------------------------------------------------------------------------
set.seed(101)

# Correlation between petal length and width
iris |>
  group_by(Species) |>
  ci_boot(
    Petal.Length, Petal.Width,
    FUN = cor, method = "spearman",
    R = 1000, bci.method = "bca"
  )

## -----------------------------------------------------------------------------
set.seed(222)

# Percentile method (simpler, more robust)
iris |> ci_boot(Petal.Length, FUN = median, R = 1000, bci.method = "perc")

# BCa method (often more accurate)
iris |> ci_boot(Petal.Length, FUN = median, R = 1000, bci.method = "bca")

## -----------------------------------------------------------------------------
# Same data, different confidence levels
ci_mean_t_stat(mean_ = 75, sd_ = 10, n = 25, conf.level = 0.90)
ci_mean_t_stat(mean_ = 75, sd_ = 10, n = 25, conf.level = 0.95)
ci_mean_t_stat(mean_ = 75, sd_ = 10, n = 25, conf.level = 0.99)

## -----------------------------------------------------------------------------
# Effect of sample size on precision
ci_mean_t_stat(mean_ = 85, sd_ = 15, n = c(10, 25, 50, 100, 400))

## -----------------------------------------------------------------------------
# Two treatment methods
ci_mean_t_stat(
  mean_ = c(78, 82),
  sd_ = c(8, 7),
  n = c(30, 28),
  group = c("Treatment A", "Treatment B")
)

## -----------------------------------------------------------------------------
# Function to run bootstrap CI and measure computation time
simulate_ci <- function(R) {
  # system.time() tracks how long the code takes to run
  time_s <- system.time(
    result <- iris |> ci_boot(Petal.Length, FUN = mean, R = R, bci.method = "bca")
  )

  # Add useful metrics to the results:
  # - diff: CI width (difference between upper and lower bounds)
  # - R: number of resamples used
  # - time_s: elapsed computation time in seconds
  result <- result |>
    mutate(
      diff = (upr.ci - lwr.ci) |> round(digits = 3),
      upr.ci = upr.ci |> round(digits = 3),
      lwr.ci = lwr.ci |> round(digits = 3),
      time_s = time_s[3], # [3] extracts elapsed time (wall-clock time),
      R = R,
    )

  return(result)
}

## -----------------------------------------------------------------------------
# Run simulation with different numbers of bootstrap resamples
# Each R value is tested 5 times to assess variability

set.seed(307)
results <- bind_rows(
  # 500 resamples
  simulate_ci(R = 500),
  simulate_ci(R = 500),
  simulate_ci(R = 500),
  simulate_ci(R = 500),
  simulate_ci(R = 500),
  # 1000 resamples
  simulate_ci(R = 1000),
  simulate_ci(R = 1000),
  simulate_ci(R = 1000),
  simulate_ci(R = 1000),
  simulate_ci(R = 1000),
  # 3000 resamples
  simulate_ci(R = 3000),
  simulate_ci(R = 3000),
  simulate_ci(R = 3000),
  simulate_ci(R = 3000),
  simulate_ci(R = 3000),
  # 5000 resamples
  simulate_ci(R = 5000),
  simulate_ci(R = 5000),
  simulate_ci(R = 5000),
  simulate_ci(R = 5000),
  simulate_ci(R = 5000),
  # 9999 resamples
  simulate_ci(R = 9999),
  simulate_ci(R = 9999),
  simulate_ci(R = 9999),
  simulate_ci(R = 9999),
  simulate_ci(R = 9999),
)

print(results, n = Inf)

## -----------------------------------------------------------------------------
# Summary by number of replications (R)
summary_of_results <-
  results |>
  group_by(R) |>
  summarize(
    diff_mean =   mean(diff) |> round(digits = 3),
    diff_sd   =     sd(diff) |> round(digits = 3),
    time_mean = mean(time_s) |> round(digits = 2),
    time_sd   =   sd(time_s) |> round(digits = 2)
  )

summary_of_results