## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE ) ## ----setup-------------------------------------------------------------------- # library(gooseR) # library(dplyr) ## ----------------------------------------------------------------------------- # # Save any R object # my_model <- lm(mpg ~ wt + cyl, data = mtcars) # # goose_save( # my_model, # category = "models", # tags = c("mtcars", "regression", "fuel_efficiency") # ) # # # Save data frames # clean_data <- mtcars %>% # filter(mpg > 20) %>% # select(mpg, wt, cyl, hp) # # goose_save( # clean_data, # category = "datasets", # tags = c("filtered", "mtcars", "high_mpg") # ) # # # Save multiple objects at once # results <- list( # model = my_model, # data = clean_data, # summary = summary(my_model) # ) # # goose_save( # results, # category = "analysis", # tags = c("complete_analysis", "2024_q4") # ) ## ----------------------------------------------------------------------------- # # List what's saved # goose_list() # # # List by category # goose_list(category = "models") # # # List by tags # goose_list(tags = "mtcars") # # # Load an object # my_saved_model <- goose_load("my_model") # # # Load with full metadata # obj_with_meta <- goose_load("my_model", include_metadata = TRUE) # print(obj_with_meta$metadata$created_at) ## ----------------------------------------------------------------------------- # # Categories: Broad classifications # # - "models", "datasets", "plots", "reports", "temp" # # # Tags: Specific descriptors # # - "production", "test", "client_a", "2024_q4", "regression" # # # Example: Organizing a project # goose_save(raw_data, category = "datasets", tags = c("raw", "client_a", "2024")) # goose_save(clean_data, category = "datasets", tags = c("clean", "client_a", "2024")) # goose_save(model_v1, category = "models", tags = c("v1", "client_a", "baseline")) # goose_save(model_v2, category = "models", tags = c("v2", "client_a", "improved")) # goose_save(final_plot, category = "plots", tags = c("final", "client_a", "presentation")) ## ----------------------------------------------------------------------------- # # Backup everything before major changes # goose_backup() # # Creates timestamped backup: backup_20241204_143022 # # # Work on your analysis... # # If something goes wrong: # # # List available backups # list.files(path = "~/.config/goose/memory/backups") # # # Restore from backup # goose_restore("backup_20241204_143022") ## ----------------------------------------------------------------------------- # # Remove all test objects # goose_clear_tags(c("test", "temp")) # # # Remove draft versions # goose_clear_tags("draft") # # # Clean up after experimentation # goose_clear_tags(c("experiment", "sandbox")) ## ----------------------------------------------------------------------------- # # Work in a temporary session that auto-cleans # with_goose_session({ # # # Experimental work # test_data <- mtcars %>% # mutate(mpg_squared = mpg^2) # # test_model <- lm(mpg_squared ~ wt + cyl, data = test_data) # # # Save temporarily # goose_save(test_model, category = "session_temp", tags = "experiment") # # # Do analysis # print(summary(test_model)) # # }, cleanup = TRUE) # Everything in session_temp is deleted after # # # For persistent session work # with_goose_session({ # # # Production work # final_model <- lm(mpg ~ wt + cyl + hp, data = mtcars) # goose_save(final_model, category = "production", tags = "final") # # }, cleanup = FALSE) # Keeps everything ## ----------------------------------------------------------------------------- # # Load your data # my_data <- read.csv("complex_dataset.csv") # # # Share a sample with goose for context # goose_give_sample(my_data, n = 10) # # # Now goose understands your data structure # advice <- goose_ask("What's the best way to handle the missing values in this dataset?") ## ----------------------------------------------------------------------------- # # Share your data first # goose_give_sample(my_data) # # # Get an exploratory analysis plan # exploratory_plan <- goose_make_a_plan("exploratory") # cat(exploratory_plan) # # # Output: # # Based on your data structure, here's an exploratory analysis plan: # # # # 1. Data Overview # # - Check dimensions: 1000 rows × 15 columns # # - Examine variable types # # - Missing value analysis # # # # 2. Univariate Analysis # # - Distribution of continuous variables # # - Frequency tables for categorical # # ... # # # Get a predictive modeling plan # predictive_plan <- goose_make_a_plan("predictive") # # # Get a diagnostic plan # diagnostic_plan <- goose_make_a_plan("diagnostic") ## ----------------------------------------------------------------------------- # # After completing analysis # results <- list( # model = final_model, # performance = model_metrics, # plots = list(residual_plot, prediction_plot) # ) # # # Create comprehensive handoff documentation # handoff <- goose_handoff() # # # This generates: # # - Summary of work completed # # - Key findings # # - Code snippets for reproduction # # - List of saved objects # # - Next steps recommendations # # # Save the handoff # writeLines(handoff, "project_handoff.md") ## ----------------------------------------------------------------------------- # # At the end of your work session # goose_continuation_prompt() # # # This creates a prompt you can use tomorrow: # # "Continue analysis of customer churn model. Last session: # # - Completed data cleaning (saved as 'clean_data') # # - Built baseline model (saved as 'baseline_model', AUC=0.72) # # - Identified class imbalance issue # # Next: Try SMOTE for balancing, feature engineering on date fields" # # # Save it # prompt <- goose_continuation_prompt() # writeLines(prompt, paste0("continue_", Sys.Date(), ".txt")) ## ----------------------------------------------------------------------------- # # Summarize what you've done # summary <- goose_summarize_session() # cat(summary) # # # Output: # # Session Summary - 2024-12-04 # # # # Objects Created: # # - clean_data (datasets): 5000 rows × 12 columns # # - model_v1 (models): Linear regression, R² = 0.84 # # - model_v2 (models): Random forest, R² = 0.91 # # - comparison_plot (plots): Model comparison visualization # # # # Key Activities: # # - Data cleaning and preprocessing # # - Feature engineering (3 new features) # # - Model comparison (linear vs. tree-based) # # # # Recommendations: # # - model_v2 shows better performance # # - Consider ensemble approach # # - Validate on holdout set ## ----------------------------------------------------------------------------- # # Morning: Continue from yesterday # yesterday_prompt <- readLines("continue_2024-12-03.txt") # cat(yesterday_prompt) # # # Load saved objects # my_data <- goose_load("clean_data") # my_model <- goose_load("baseline_model") # # # Work on improvements # improved_model <- improve_model(my_model, my_data) # # # Get feedback # goose_honk(severity = "moderate") # # # Save progress # goose_save(improved_model, category = "models", tags = c("improved", "day2")) # # # End of day # goose_continuation_prompt() # goose_summarize_session() ## ----------------------------------------------------------------------------- # # Team member A completes initial analysis # model_a <- build_model(data) # goose_save(model_a, category = "models", tags = c("team_a", "initial")) # # # Create handoff for Team member B # handoff <- goose_handoff() # writeLines(handoff, "handoff_to_team_b.md") # # # Team member B picks up work # goose_list(tags = "team_a") # model_a <- goose_load("model_a") # # # Continue work # model_b <- enhance_model(model_a) # goose_save(model_b, category = "models", tags = c("team_b", "enhanced")) ## ----------------------------------------------------------------------------- # # Structure for a complete project # project_setup <- function(project_name) { # # # Save raw data # goose_save( # raw_data, # category = "data_raw", # tags = c(project_name, "raw", Sys.Date()) # ) # # # Save cleaned data # goose_save( # clean_data, # category = "data_clean", # tags = c(project_name, "clean", Sys.Date()) # ) # # # Save models with versioning # goose_save( # model_v1, # category = "models", # tags = c(project_name, "v1", "baseline") # ) # # # Save visualizations # goose_save( # plots, # category = "visualizations", # tags = c(project_name, "final") # ) # # # Save reports # goose_save( # report, # category = "reports", # tags = c(project_name, "final", Sys.Date()) # ) # } # # # List everything for a project # goose_list(tags = project_name) ## ----------------------------------------------------------------------------- # # Get memory usage statistics # stats <- goose_memory_stats() # print(stats) # # # Output: # # Memory Statistics: # # Total objects: 47 # # Total size: 15.3 MB # # By category: # # - models: 12 objects (8.1 MB) # # - datasets: 20 objects (5.2 MB) # # - plots: 15 objects (2.0 MB) ## ----------------------------------------------------------------------------- # # Delete specific objects # goose_delete("old_model") # # # Delete by pattern # objects <- goose_list() # old_objects <- objects %>% # filter(grepl("^test_", name)) # # for(obj in old_objects$name) { # goose_delete(obj) # } ## ----------------------------------------------------------------------------- # # Export memory for sharing # goose_export_memory("project_memory.zip") # # # On another machine # goose_import_memory("project_memory.zip") ## ----------------------------------------------------------------------------- # # Check if it exists # all_objects <- goose_list() # grep("my_object", all_objects$name, value = TRUE) # # # Check with different category # goose_list(category = "models") # # # Check metadata # meta <- goose_get_metadata("object_name") ## ----------------------------------------------------------------------------- # # Check what's taking space # stats <- goose_memory_stats() # # # Clean old backups # goose_clean_backups(keep_last = 3) # # # Remove by tags # goose_clear_tags(c("old", "deprecated", "test"))