!pip install catboost
!pip install xgboost
!pip install lightgbm
import pandas as pd
from sklearn.model_selection import train_test_split
from catboost import CatBoostClassifier, Pool
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix as cm
import xgboost as xgb
import lightgbm as lgb
# Load the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/zoo/zoo.data"
names = ['animal_name', 'hair', 'feathers', 'eggs', 'milk', 'airborne', 'aquatic', 'predator', 'toothed', 'backbone', 'breathes', 'venomous', 'fins', 'legs', 'tail', 'domestic', 'catsize', 'class_type']
df = pd.read_csv(url, names=names)
df.head()
animal_name | hair | feathers | eggs | milk | airborne | aquatic | predator | toothed | backbone | breathes | venomous | fins | legs | tail | domestic | catsize | class_type | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | aardvark | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 0 | 0 | 4 | 0 | 0 | 1 | 1 |
1 | antelope | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 4 | 1 | 0 | 1 | 1 |
2 | bass | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 4 |
3 | bear | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 0 | 0 | 4 | 0 | 0 | 1 | 1 |
4 | boar | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 0 | 0 | 4 | 1 | 0 | 1 | 1 |
# Prepare the dataset
X = df.iloc[:, 1:-1] # Features
y = df.iloc[:, -1] # Labels
# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train the model
model = CatBoostClassifier()
model.fit(X_train, y_train, verbose=0)
<catboost.core.CatBoostClassifier at 0x7fd12419fdc0>
# Make predictions
y_pred = model.predict(X_test)
# Create confusion matrix
confusion_mat = cm(y_test, y_pred)
# Visualize the confusion matrix using seaborn heatmap
plt.figure(figsize=(10, 7))
sns.heatmap(confusion_mat, annot=True, fmt='d', cmap='YlGnBu')
plt.xlabel('Predicted')
plt.ylabel('Truth')
plt.show()
# Print classification report
print(classification_report(y_test, y_pred))
precision recall f1-score support 1 1.00 1.00 1.00 12 2 1.00 1.00 1.00 2 3 0.00 0.00 0.00 1 4 0.67 1.00 0.80 2 6 1.00 1.00 1.00 3 7 1.00 1.00 1.00 1 accuracy 0.95 21 macro avg 0.78 0.83 0.80 21 weighted avg 0.92 0.95 0.93 21
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. _warn_prf(average, modifier, msg_start, len(result)) /usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. _warn_prf(average, modifier, msg_start, len(result)) /usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. _warn_prf(average, modifier, msg_start, len(result))
# Adjust labels for XGBoost
y_train_xgb = y_train - 1
y_test_xgb = y_test - 1
# Train the model
xgb_model = xgb.XGBClassifier()
xgb_model.fit(X_train, y_train_xgb)
# Make predictions
y_pred_xgb = xgb_model.predict(X_test)
# Adjust predictions back to original class values
y_pred_xgb = y_pred_xgb + 1
# Create confusion matrix
confusion_mat_xgb = cm(y_test, y_pred_xgb)
# Visualize the confusion matrix using seaborn heatmap
plt.figure(figsize=(10, 7))
sns.heatmap(confusion_mat_xgb, annot=True, fmt='d', cmap='YlGnBu')
plt.xlabel('Predicted')
plt.ylabel('Truth')
plt.show()
# Print classification report
print(classification_report(y_test, y_pred_xgb))
precision recall f1-score support 1 1.00 1.00 1.00 12 2 1.00 1.00 1.00 2 3 0.00 0.00 0.00 1 4 0.67 1.00 0.80 2 6 1.00 1.00 1.00 3 7 1.00 1.00 1.00 1 accuracy 0.95 21 macro avg 0.78 0.83 0.80 21 weighted avg 0.92 0.95 0.93 21
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. _warn_prf(average, modifier, msg_start, len(result)) /usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. _warn_prf(average, modifier, msg_start, len(result)) /usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. _warn_prf(average, modifier, msg_start, len(result))
# Train the model
lgb_model = lgb.LGBMClassifier()
lgb_model.fit(X_train, y_train)
# Make predictions
y_pred_lgb = lgb_model.predict(X_test)
# Create confusion matrix
confusion_mat_lgb = cm(y_test, y_pred_lgb)
# Visualize the confusion matrix using seaborn heatmap
plt.figure(figsize=(10, 7))
sns.heatmap(confusion_mat_lgb, annot=True, fmt='d', cmap='YlGnBu')
plt.xlabel('Predicted')
plt.ylabel('Truth')
plt.show()
# Print classification report
print(classification_report(y_test, y_pred_lgb))
precision recall f1-score support 1 1.00 1.00 1.00 12 2 1.00 1.00 1.00 2 3 0.00 0.00 0.00 1 4 0.67 1.00 0.80 2 6 1.00 1.00 1.00 3 7 1.00 1.00 1.00 1 accuracy 0.95 21 macro avg 0.78 0.83 0.80 21 weighted avg 0.92 0.95 0.93 21
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. _warn_prf(average, modifier, msg_start, len(result)) /usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. _warn_prf(average, modifier, msg_start, len(result)) /usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. _warn_prf(average, modifier, msg_start, len(result))