--- title: "Examples of joint grid discretization" author: "Jiandong Wang, Sajal Kumar, and Joe Song" date: 2025-12-12 output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Examples of joint grid discretization} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ### History Updated: 2025-12-12; 2022-01-27; 2022-01-17; 2020-09-13; 2020-08-01 Created: 2020-03-17 ## Example 1. Nonlinear curves using kmeans+silhouette and Ball+BIC clustering with a fixed number of clusters ```{r clustering nonlinear patterns by fixed numbers of clusters, out.width="40%", fig.show="hold", fig.cap="Example 1. Nonlinear curves using kmeans+silhouette and Ball+BIC clustering with a fixed number of clusters."} require(GridOnClusters) x = rnorm(500) y = sin(x)+rnorm(500, sd = 0) z = cos(x)+rnorm(500, sd = 0) data = cbind(x, y, z) ks = 2:20 res = discretize.jointly(data, k=ks, cluster_method = "kmeans+silhouette", grid_method = "Sort+split") plot(res) res = discretize.jointly( data, k=ks, cluster_method = "kmeans+silhouette", grid_method = "DP exact likelihood") plot(res) res = discretize.jointly( data, k=ks, cluster_method = "Ball+BIC", grid_method = "DP approx likelihood 1-way") plot(res) ``` ## Example 2. Nonlinear curves and patterns using kmeans+silhouette and Ball+BIC clustering with a range for the number of clusters ```{r clustering nonlinear patterns by varying numbers of clusters, out.width="40%", fig.show="hold", fig.cap="Example 2. Using a range for the number of kmeans+silhouette and Ball+BIC clusters"} x = rnorm(100) y = log1p(abs(x)) z = ifelse(x >= -0.5 & x <= 0.5, 0, 1) + rnorm(100, 0, 0.1) data = cbind(x, y, z) ks = c(2:5) #res = discretize.jointly(data, k=ks, cluster_method = "Ball+BIC", # grid_method = "Sort+split", min_level = 1) #plot(res) res = discretize.jointly( data, k=ks, cluster_method = "kmeans+silhouette", grid_method = "Sort+split", min_level = 1) plot(res) #res = discretize.jointly(data, k=ks, cluster_method = "kmeans+silhouette", # grid_method = "DP exact likelihood", min_level = 1) #plot(res) res = discretize.jointly( data, k=ks, cluster_method = "Ball+BIC", grid_method = "DP exact likelihood", min_level = 1) plot(res) ``` ## Example 3. Using the partition around medoids clustering method ```{r Example 3 using PAM for clustering, out.width="40%", fig.show="hold", fig.cap="Example 3. Using the partition around medoids clustering method."} # using a clustering method other than kmeans+silhouette x = rnorm(100) y = log1p(abs(x)) z = sin(x) data = cbind(x, y, z) # pre-cluster the data using partition around medoids (PAM) cluster_label = cluster::pam(x=data, diss = FALSE, metric = "euclidean", k = 4)$clustering res = discretize.jointly( data, cluster_label = cluster_label, grid_method = "Sort+split", min_level = 1) res = discretize.jointly( data, cluster_label = cluster_label, grid_method = "DP exact likelihood", min_level = 1) plot(res, main="Original data\nPAM clustering", main.table="Discretized data\nPAM & Sort+split") ``` ## Example 4 Random patterns using kmeans+silhouette and Ball+BIC clustering with a range for the number of clusters ```{r Example 4, out.width="40%", fig.show="hold", fig.cap="Example 4. Random patterns using kmeans+silhouette and Ball+BIC clustering with a range."} ks = 2:20 n = 40*10 sd = 60*4 x=rnorm(2*n, sd=sd) y=rnorm(2*n, sd=sd) x=c(x,rnorm(2*n, sd=sd/3)) y=c(y,rnorm(2*n, sd=sd/3)+200) data = cbind(x, y) res = discretize.jointly( data, k=ks, cluster_method = "Ball+BIC", grid_method = "Sort+split", min_level = 1) plot(res) res = discretize.jointly( data, k=ks, cluster_method = "kmeans+silhouette", grid_method = "Sort+split", min_level = 1) plot(res) res = discretize.jointly( data, k=ks, cluster_method = "kmeans+silhouette", grid_method = "DP approx likelihood 1-way", min_level = 1) plot(res) res = discretize.jointly( data, k=ks, cluster_method = "Ball+BIC", grid_method = "DP approx likelihood 1-way", min_level = 1) plot(res) ``` ## Example 5. Multi-cluster random patterns using kmeans+silhouette and Ball+BIC clustering with a range for the number of clusters ```{r Example 5 bivariate, out.width="40%", fig.show="hold", fig.cap="Example 5. Multi-cluster random patterns using kmeans+silhouette and Ball+BIC clustering with a range."} n <- 50*8 ks <- 2:20 X.C1 <- matrix( c(rnorm(n, 5, sd=2), rnorm(n, 0, sd=40)), ncol = 2, byrow = FALSE ) X.C2 <- matrix( c(rnorm(n, 70, sd=1), rnorm(n, 0, sd=1)), ncol = 2, byrow = FALSE ) X.C3 <- matrix( c(rnorm(n, 150, sd=30), rnorm(n, 0, sd=30)), ncol = 2, byrow = FALSE ) data = rbind(X.C1, X.C3) res = discretize.jointly( data, k=ks, cluster_method = "Ball+BIC", grid_method = "Sort+split", min_level = 1) plot(res) res = discretize.jointly( data, k=ks, cluster_method = "kmeans+silhouette", grid_method = "Sort+split", min_level = 1) plot(res) res = discretize.jointly( data, k=ks, cluster_method = "kmeans+silhouette", grid_method = "DP approx likelihood 1-way", min_level = 1, cutoff = 1) plot(res) res = discretize.jointly( data, k=ks, cluster_method = "Ball+BIC", grid_method = "DP approx likelihood 1-way", min_level = 1) plot(res) ``` ## Example 6. Exclusive or. ```{r Example 6. Exclusive or, out.width="40%", fig.show="hold", fig.cap="Example 6. Exclusive or. Dim1 ⊕ Dim2, Dim3 and Dim4 are random"} n <- 100 ks <- 2:10 label = c(rep(1, n), rep(2,n), rep(3,n), rep(4,n)) X1 = c(rnorm(n, 0, sd=2), rnorm(n, 0, sd=2), rnorm(n, 10, sd=2), rnorm(n, 10, sd=2)) X2 = c(rnorm(n, 10, sd=2), rnorm(n, 0, sd=2), rnorm(n, 10, sd=2), rnorm(n, 0, sd=2)) X3 = c(rnorm(4*n, 20, sd=10)) X4 = c(rnorm(4*n, 3, sd=20)) data = cbind(X1, X2, X3, X4) #res = discretize.jointly(data, k=ks, cluster_method = "kmeans+silhouette", # grid_method = "DP approx likelihood", min_level = 1) #plot(res) # #res = discretize.jointly(data, k=ks, cluster_method = "kmeans+silhouette", # grid_method = "DP Compressed majority", min_level = 1) #plot(res) #res = discretize.jointly(data, k=ks, cluster_method = "Ball+BIC", # grid_method = "DP exact likelihood", min_level = 1) #plot(res) res = discretize.jointly( data, k=ks, cluster_method = "Ball+BIC", grid_method = "DP approx likelihood 1-way", min_level = 1) plot(res) res = discretize.jointly( data, k=ks, cluster_label = label, grid_method = "DP approx likelihood 1-way", min_level = 1) plot(res) #res = discretize.jointly(data, k=ks, cluster_label = label, # grid_method = "Sort+split", min_level = 1) #plot(res) # #res = discretize.jointly(data, k=ks, cluster_method = "kmeans+silhouette", # grid_method = "Sort+split", min_level = 1) #plot(res) ``` ## Example 7. Three well separated clusters ```{r Example 7. Three well separated rounds., out.width="40%", fig.show="hold", fig.cap="Example 7. Three rounds well seperated on x axis"} n <- 20 ks <- 2:10 label = c(rep(1, n), rep(2,5*n), rep(3, 5*n)) X1 = c(rnorm(n, 0, sd=1), rnorm(5*n, 15, sd=3), rnorm(5*n, 35, sd=3)) X2 = c(rnorm(n, 0, sd=1), rnorm(5*n, 0, sd=3), rnorm(5*n, 0, sd=3)) data = cbind(X1, X2) res = discretize.jointly( data, cluster_label = label, grid_method = "DP exact likelihood", min_level = 1) plot(res) res = discretize.jointly( data, cluster_label = label, grid_method = "DP approx likelihood 1-way", min_level = 1) plot(res) res.entropy = discretize.jointly( data, cluster_label = label, grid_method = "DP exact likelihood", min_level = 1, entropy = TRUE) plot(res.entropy) ``` ## Example 8. Four spheres with varying centers and radii ```{r Example 8. Four spheres with varying centers and radius, out.width="40%", fig.show="hold", fig.cap="Example 8. Four sphers with different centers and radius"} n <- 200 ks <- 2:10 label = c(rep(1, n), rep(2,5*n), rep(3,n), rep(4,3*n)) X1 = c(rnorm(n, 0, sd=3), rnorm(5*n, 15, sd=3), rnorm(n, 5, sd=2), rnorm(3*n, 15, sd=4)) X2 = c(rnorm(n, 0, sd=4), rnorm(5*n, 15, sd=3), rnorm(n, 10, sd=2), rnorm(3*n, 0, sd=4)) data = cbind(X1, X2) res = discretize.jointly( data, k=ks, cluster_method = "Ball+BIC", grid_method = "DP exact likelihood", min_level = 1) plot(res) res = discretize.jointly( data, k=ks, cluster_method = "Ball+BIC", grid_method = "DP approx likelihood 1-way", min_level = 1) plot(res) res = discretize.jointly( data, k=ks, cluster_method = "Ball+BIC", grid_method = "DP exact likelihood", min_level = 1, entropy = TRUE) plot(res) ``` ## Example 9. A small dense sphere overlapping a large sphere Here we evaluate whether a method can recognize the two spheres. ```{r Example 9. Two overlapping spheres, out.width="40%", fig.show="hold", fig.cap="Example 9. A small dense sphere overlapping a large sphere."} n <- 200 ks <- 2:10 label = c(rep(1, n), rep(2,5*n)) X1 = c(rnorm(n, 0, sd=1), rnorm(5*n, 6, sd=3)) X2 = c(rnorm(n, 0, sd=1), rnorm(5*n, 0, sd=3)) data = cbind(X1, X2) res = discretize.jointly( data, k=ks, cluster_label = label, grid_method = "DP exact likelihood", min_level = 1) plot(res) res = discretize.jointly( data, k=ks, cluster_label = label, grid_method = "DP approx likelihood 1-way", min_level = 1) plot(res) res.entropy = discretize.jointly( data, k=ks, cluster_label = label, grid_method = "DP exact likelihood", min_level = 1, entropy = TRUE) plot(res.entropy) ```