## ----include = FALSE---------------------------------------------------------- clean_output <- function(x, options) { x <- gsub("0x[0-9a-f]+", "0xdeadbeef", x) x <- gsub("dataframe_[0-9]*_[0-9]*", " dataframe_42_42 ", x) x <- gsub("[0-9]*\\.___row_number ASC", "42.___row_number ASC", x) x <- gsub("─", "-", x) x } local({ hook_source <- knitr::knit_hooks$get("document") knitr::knit_hooks$set(document = clean_output) }) knitr::opts_chunk$set( collapse = TRUE, eval = identical(Sys.getenv("IN_PKGDOWN"), "true") || (getRversion() >= "4.1" && rlang::is_installed(c("conflicted", "nycflights13"))), comment = "#>" ) Sys.setenv(DUCKPLYR_FALLBACK_COLLECT = 0) ## ----attach------------------------------------------------------------------- library(conflicted) library(dplyr) conflict_prefer("filter", "dplyr") ## ----------------------------------------------------------------------------- df <- duckplyr::duckdb_tibble(x = 1:3) |> mutate(y = x + 1) df class(df) df$y nrow(df) ## ----------------------------------------------------------------------------- flights <- duckplyr::flights_df() flights_duckdb <- flights |> duckplyr::as_duckdb_tibble() system.time( mean_arr_delay_ewr <- flights_duckdb |> filter(origin == "EWR", !is.na(arr_delay)) |> summarize( .by = month, mean_arr_delay = mean(arr_delay), min_arr_delay = min(arr_delay), max_arr_delay = max(arr_delay), median_arr_delay = median(arr_delay), ) ) ## ----------------------------------------------------------------------------- mean_arr_delay_ewr |> explain() ## ----------------------------------------------------------------------------- system.time(mean_arr_delay_ewr$mean_arr_delay[[1]]) ## ----------------------------------------------------------------------------- system.time( flights |> filter(origin == "EWR", !is.na(arr_delay)) |> summarize( .by = c(month, day), mean_arr_delay = mean(arr_delay), min_arr_delay = min(arr_delay), max_arr_delay = max(arr_delay), median_arr_delay = median(arr_delay), ) ) ## ----------------------------------------------------------------------------- flights_stingy <- flights |> duckplyr::as_duckdb_tibble(prudence = "stingy") ## ----------------------------------------------------------------------------- flights_stingy names(flights_stingy)[1:10] class(flights_stingy) class(flights_stingy[[1]]) ## ----error = TRUE------------------------------------------------------------- try({ nrow(flights_stingy) flights_stingy[[1]] }) ## ----error = TRUE------------------------------------------------------------- try({ flights_stingy |> group_by(origin) |> summarize(n = n()) |> ungroup() }) ## ----------------------------------------------------------------------------- flights_stingy |> duckplyr::as_duckdb_tibble(prudence = "lavish") |> group_by(origin) |> summarize(n = n()) |> ungroup() ## ----------------------------------------------------------------------------- flights_stingy |> duckplyr::as_duckdb_tibble(prudence = "lavish") |> class() flights_stingy |> collect() |> class() ## ----------------------------------------------------------------------------- flights_stingy |> as_tibble() |> class() flights_stingy |> as.data.frame() |> class() ## ----------------------------------------------------------------------------- nrow(flights) flights_partial <- flights |> duckplyr::as_duckdb_tibble(prudence = "thrifty") ## ----error = TRUE------------------------------------------------------------- try({ flights_partial |> select(origin, dest, dep_delay, arr_delay) |> nrow() }) ## ----------------------------------------------------------------------------- flights_partial |> count(origin) |> nrow()