from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import numpy as np
import pandas as pd
!wget https://raw.githubusercontent.com/susanli2016/Machine-Learning-with-Python/master/diabetes.csv
dataset = pd.read_csv("diabetes.csv")
dataset.head()
dataset.shape
features = dataset.drop(["Outcome"], axis=1)
X = np.array(features)
y = np.array(dataset["Outcome"])
X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=0, test_size=0.20)
tree = DecisionTreeClassifier()
tree.fit(X_train, y_train)
tree.tree_.max_depth
validation_prediction = tree.predict(X_val)
training_prediction = tree.predict(X_train)
print('Exactitud training data: ', accuracy_score(y_true=y_train, y_pred=training_prediction))
print('Exactitud validation data: ', accuracy_score(y_true=y_val, y_pred=validation_prediction))
!apt-get install graphviz
!pip install graphviz
import graphviz
from sklearn.tree import export_graphviz
feature_names = features.columns
dot_data = export_graphviz(tree, out_file=None,
feature_names=feature_names,
class_names=True,
filled=True, rounded=True,
special_characters=True)
graph = graphviz.Source(dot_data)
graph
tree = DecisionTreeClassifier(min_samples_leaf=10, max_depth=8, min_samples_split=50)
tree.fit(X_train, y_train)
validation_prediction = tree.predict(X_val)
training_prediction = tree.predict(X_train)
print('Exactitud training data: ', accuracy_score(y_true=y_train, y_pred=training_prediction))
print('Exactitud validation data: ', accuracy_score(y_true=y_val, y_pred=validation_prediction))
dot_data = export_graphviz(tree, out_file=None,
feature_names=feature_names,
class_names=True,
filled=True, rounded=True,
special_characters=True)
graph = graphviz.Source(dot_data)
graph