## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE ) ## ----------------------------------------------------------------------------- # # Install from CRAN # install.packages("localLLM") ## ----------------------------------------------------------------------------- # library(localLLM) # install_localLLM() ## ----------------------------------------------------------------------------- # library(localLLM) # # response <- quick_llama("What is the capital of France?") # cat(response) ## ----------------------------------------------------------------------------- # response <- quick_llama( # 'Classify the sentiment of the following tweet into one of two # categories: Positive or Negative. # # Tweet: "This paper is amazing! I really like it."' # ) # # cat(response) ## ----------------------------------------------------------------------------- # # Process multiple prompts at once # prompts <- c( # # "What is 2 + 2?", # "Name one planet in our solar system.", # "What color is the sky?" # ) # # responses <- quick_llama(prompts) # print(responses) ## ----------------------------------------------------------------------------- # # From Hugging Face URL # response <- quick_llama( # "Explain quantum physics simply", # model = "https://huggingface.co/unsloth/gemma-3-4b-it-qat-GGUF/resolve/main/gemma-3-4b-it-qat-Q5_K_M.gguf" # ) # # # From local file # response <- quick_llama( # "Explain quantum physics simply", # model = "/path/to/your/model.gguf" # ) # # # From cache (name fragment) # response <- quick_llama( # "Explain quantum physics simply", # model = "Llama-3.2" # ) ## ----------------------------------------------------------------------------- # # List all cached models # cached <- list_cached_models() # print(cached) ## ----------------------------------------------------------------------------- # response <- quick_llama( # prompt = "Write a haiku about programming", # temperature = 0.8, # Higher = more creative (default: 0) # # max_tokens = 100, # Maximum response length # seed = 42, # For reproducibility # n_gpu_layers = 999 # Use GPU if available # )