# DAL ToolBox
# version 1.0.777
source("https://raw.githubusercontent.com/cefet-rj-dal/daltoolbox/main/jupyter.R")
#loading DAL
load_library("daltoolbox")
Loading required package: daltoolbox Registered S3 method overwritten by 'quantmod': method from as.zoo.data.frame zoo Attaching package: ‘daltoolbox’ The following object is masked from ‘package:base’: transform
iris <- datasets::iris
head(iris)
table(iris$Species)
Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species | |
---|---|---|---|---|---|
<dbl> | <dbl> | <dbl> | <dbl> | <fct> | |
1 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
2 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |
3 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
4 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
5 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
6 | 5.4 | 3.9 | 1.7 | 0.4 | setosa |
setosa versicolor virginica 50 50 50
#using random sampling
tt <- train_test(sample_stratified("Species"), iris)
# distribution of train
print(table(tt$train$Species))
# distribution of test
print(table(tt$test$Species))
setosa versicolor virginica 40 40 40 setosa versicolor virginica 10 10 10
#using stratified sampling
# preparing dataset into four folds
sample <- sample_stratified("Species")
folds <- k_fold(sample, iris, 4)
# distribution of folds
tbl <- NULL
for (f in folds) {
tbl <- rbind(tbl, table(f$Species))
}
print(tbl)
setosa versicolor virginica [1,] 13 13 13 [2,] 13 13 13 [3,] 12 12 12 [4,] 12 12 12