# run this cell to install pycaret in Google Colab
# !pip install pycaret
# If you are using Jupyter notebook, you can pip install pycaret using jupyter notebook or command line
# pip install pycaret
from pycaret.utils import version
version()
1.0.0
# only run this cell if you are using google colab
# from pycaret.utils import enable_colab
# enable_colab()
from pycaret.datasets import get_data
data = get_data('country-data')
country | child_mort | exports | health | imports | income | inflation | life_expec | total_fer | gdpp | |
---|---|---|---|---|---|---|---|---|---|---|
0 | Afghanistan | 90.2 | 10.0 | 7.58 | 44.9 | 1610 | 9.44 | 56.2 | 5.82 | 553 |
1 | Albania | 16.6 | 28.0 | 6.55 | 48.6 | 9930 | 4.49 | 76.3 | 1.65 | 4090 |
2 | Algeria | 27.3 | 38.4 | 4.17 | 31.4 | 12900 | 16.10 | 76.5 | 2.89 | 4460 |
3 | Angola | 119.0 | 62.3 | 2.85 | 42.9 | 5900 | 22.40 | 60.1 | 6.16 | 3530 |
4 | Antigua and Barbuda | 10.3 | 45.5 | 6.03 | 58.9 | 19100 | 1.44 | 76.8 | 2.13 | 12200 |
from pycaret.clustering import *
clu1 = setup(data, normalize=True, session_id=786,
ignore_features = ['country'])
Setup Succesfully Completed!
Description | Value | |
---|---|---|
0 | session_id | 786 |
1 | Original Data | (167, 10) |
2 | Missing Values | False |
3 | Numeric Features | 9 |
4 | Categorical Features | 1 |
5 | Ordinal Features | False |
6 | High Cardinality Features | False |
7 | Transformed Data | (167, 9) |
8 | Numeric Imputer | mean |
9 | Categorical Imputer | constant |
10 | Normalize | True |
11 | Normalize Method | zscore |
12 | Transformation | False |
13 | Transformation Method | None |
14 | PCA | False |
15 | PCA Method | None |
16 | PCA components | None |
17 | Ignore Low Variance | False |
18 | Combine Rare Levels | False |
19 | Rare Level Threshold | None |
20 | Numeric Binning | False |
21 | Remove Multicollinearity | False |
22 | Multicollinearity Threshold | None |
23 | Group Features | False |
kmeans = create_model('kmeans')
print(kmeans)
KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300, n_clusters=4, n_init=10, n_jobs=None, precompute_distances='auto', random_state=786, tol=0.0001, verbose=0)
hclust = create_model('hclust')
print(hclust)
AgglomerativeClustering(affinity='euclidean', compute_full_tree='auto', connectivity=None, distance_threshold=None, linkage='ward', memory=None, n_clusters=4)
plot_model(kmeans, label=True)
plot_model(hclust, label=True)
plot_model(kmeans, plot = 'tsne', label=True)
plot_model(kmeans, plot = 'elbow')
plot_model(kmeans, plot = 'silhouette')
plot_model(kmeans, plot = 'distribution', feature = 'income')
results = assign_model(kmeans)
results.head()
country | child_mort | exports | health | imports | income | inflation | life_expec | total_fer | gdpp | Cluster | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | Afghanistan | 90.2 | 10.0 | 7.58 | 44.9 | 1610 | 9.44 | 56.2 | 5.82 | 553 | Cluster 2 |
1 | Albania | 16.6 | 28.0 | 6.55 | 48.6 | 9930 | 4.49 | 76.3 | 1.65 | 4090 | Cluster 0 |
2 | Algeria | 27.3 | 38.4 | 4.17 | 31.4 | 12900 | 16.10 | 76.5 | 2.89 | 4460 | Cluster 0 |
3 | Angola | 119.0 | 62.3 | 2.85 | 42.9 | 5900 | 22.40 | 60.1 | 6.16 | 3530 | Cluster 2 |
4 | Antigua and Barbuda | 10.3 | 45.5 | 6.03 | 58.9 | 19100 | 1.44 | 76.8 | 2.13 | 12200 | Cluster 0 |
predictions = predict_model(kmeans, data=data)
predictions.head()