## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(quickOutlier) ## ----univariate--------------------------------------------------------------- # Create dummy data with one obvious outlier (500) df <- data.frame( id = 1:10, revenue = c(10, 12, 11, 10, 12, 11, 13, 10, 500, 11) ) # Detect using Interquartile Range (IQR) outliers <- detect_outliers(df, column = "revenue", method = "iqr") print(outliers) ## ----plot, fig.width=6, fig.height=4------------------------------------------ plot_outliers(df, column = "revenue", method = "iqr") ## ----treat-------------------------------------------------------------------- # Cap the outliers based on IQR limits df_clean <- treat_outliers(df, column = "revenue", method = "iqr") # The value 500 has been replaced by the upper bound print(df_clean$revenue) ## ----multivariate------------------------------------------------------------- # Generate data: y correlates with x df_multi <- data.frame(x = rnorm(50), y = rnorm(50)) df_multi$y <- df_multi$x * 2 + rnorm(50, sd = 0.5) # Add an anomaly: normal x, but impossible y given x anomaly <- data.frame(x = 0, y = 10) df_multi <- rbind(df_multi, anomaly) # Detect using Mahalanobis Distance detect_multivariate(df_multi, columns = c("x", "y")) ## ----lof---------------------------------------------------------------------- # Create a dense cluster and one distant point df_density <- data.frame( x = c(rnorm(50), 10), y = c(rnorm(50), 10) ) # Run LOF detection detect_density(df_density, k = 5) ## ----scan--------------------------------------------------------------------- scan_data(df, method = "iqr")