dataprep: data preprocessing and plots

library(dataprep)
library(ggplot2)
library(scales)

Figure 1. Line plots for variables with names that are essentially numeric and logarithmic

# Descriptive statistics
descplot(data,5,65)
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Time used by descdata: 0.0449 secs 
#> Time used by descplot: 0.103 secs
#> Warning: Removed 3 row(s) containing missing values (geom_path).

Figure 2. Line plots for variables whose names are essentially numeric and logarithmic

# Selected descriptive statistics, equal to descdata(data,5,65,c('na','min','max','IQR'))
descplot(data,5,65,c(2,7:9))
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Time used by descdata: 0.0369 secs 
#> Time used by descplot: 0.0399 secs
#> Warning: Removed 3 row(s) containing missing values (geom_path).

Figure 3. Bar charts for the type of variable names that is character

# Descriptive statistics
descplot(data1,3,7)+
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=30,hjust=1,vjust=1.1))
#> Time used by descdata: 0.00399 secs 
#> Time used by descplot: 0.00798 secs

Figure 4. Bar charts for the type of variable names that is character

# Selected descriptive statistics, equal to descplot(data1,3,7,c('min','max','IQR'))
descplot(data1,3,7,7:9)+
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=30,hjust=1,vjust=1.1))
#> Time used by descdata: 0.00399 secs 
#> Time used by descplot: 0.00599 secs

Figure 5. Particle number size distributions in logarithmic scales

# Top and bottom percentiles
percplot(data,5,65,4)
#> Time used by percdata: 0.0529 secs 
#> Time used by percplot: 0.0688 secs
#> Warning: Removed 144 row(s) containing missing values (geom_path).

Figure 6. Particle number size distributions in logarithmic scales with only one part

# Top percentiles
percplot(data,5,65,4,part=1)
#> Time used by percdata: 0.0469 secs 
#> Time used by percplot: 0.0608 secs
#> Warning: Removed 72 row(s) containing missing values (geom_path).

Figure 7. Particle number size distributions in logarithmic scales with only one part

# Bottom percentiles
percplot(data,5,65,4,part=0)
#> Time used by percdata: 0.0439 secs 
#> Time used by percplot: 0.0588 secs
#> Warning: Removed 72 row(s) containing missing values (geom_path).

Figure 8. Percentiles of modes in linear scales

# Top and bottom percentiles
percplot(data1,3,7,2)+
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=30,hjust=1,vjust=1.1))
#> Time used by percdata: 0.00898 secs 
#> Time used by percplot: 0.0199 secs

Figure 9. Percentiles of modes in linear scales with only one part

# Top percentiles
percplot(data1,3,7,2,part=1)+
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=30,hjust=1,vjust=1.1))
#> Time used by percdata: 0.00598 secs 
#> Time used by percplot: 0.015 secs

Figure 10. Percentiles of modes in linear scales with only one part

# Bottom percentiles
percplot(data1,3,7,2,part=0)+
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=30,hjust=1,vjust=1.1))
#> Time used by percdata: 0.00598 secs 
#> Time used by percplot: 0.015 secs