# DAL ToolBox
# version 1.0.777
source("https://raw.githubusercontent.com/cefet-rj-dal/daltoolbox/main/jupyter.R")
#loading DAL
load_library("daltoolbox")
Loading required package: daltoolbox Registered S3 method overwritten by 'quantmod': method from as.zoo.data.frame zoo Attaching package: ‘daltoolbox’ The following object is masked from ‘package:base’: transform
Discretization is the process of transferring continuous functions, models, variables, and equations into discrete counterparts.
Smoothing is a technique that creates an approximating function that attempts to capture important patterns in the data while leaving out noise or other fine-scale structures/rapid phenomena.
An important part of the discretization/smoothing is to set up bins for proceeding the approximation.
iris <- datasets::iris
head(iris)
Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species | |
---|---|---|---|---|---|
<dbl> | <dbl> | <dbl> | <dbl> | <fct> | |
1 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
2 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |
3 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
4 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
5 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
6 | 5.4 | 3.9 | 1.7 | 0.4 | setosa |
# smoothing using clustering
obj <- smoothing_cluster(n = 2)
obj <- fit(obj, iris$Sepal.Length)
sl.bi <- transform(obj, iris$Sepal.Length)
print(table(sl.bi))
obj$interval
sl.bi 5.22409638554217 6.61044776119403 83 67
entro <- evaluate(obj, as.factor(names(sl.bi)), iris$Species)
print(entro$entropy)
[1] 1.12088
opt_obj <- smoothing_cluster(n=1:20)
obj <- fit(opt_obj, iris$Sepal.Length)
obj$n
obj <- fit(obj, iris$Sepal.Length)
sl.bi <- transform(obj, iris$Sepal.Length)
print(table(sl.bi))
sl.bi 4.52727272727273 5.00294117647059 5.49 5.88333333333333 11 34 20 30 6.352 6.75294117647059 7.2 7.71666666666667 25 17 7 6