#!/usr/bin/env python # coding: utf-8 # In[185]: import numpy as np from sklearn import datasets import matplotlib.pyplot as plt import seaborn as sns import pandas as pd sns.set(color_codes=True) sns.set_style('white') sns.set_context('paper',font_scale=2) from mpl_toolkits import mplot3d get_ipython().run_line_magic('config', "InlineBackend.print_figure_kwargs = {'bbox_inches':None}") get_ipython().run_line_magic('matplotlib', 'inline') from sklearn.tree import DecisionTreeClassifier from sklearn.model_selection import train_test_split from sklearn import metrics # In[179]: # load dataset pima = pd.read_csv('../data/diabetes.csv') # In[178]: pima.head() # In[182]: #split dataset in features and target variable feature_cols = ['Pregnancies', 'Insulin', 'BMI', 'Age','Glucose','BloodPressure','DiabetesPedigreeFunction'] X = pima[feature_cols] # Features y = pima['Outcome'] # Target variable # In[186]: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) # 70% training and 30% test # In[193]: help(DecisionTreeClassifier) # In[187]: # Create Decision Tree classifer object clf = DecisionTreeClassifier() # Train Decision Tree Classifer clf = clf.fit(X_train,y_train) #Predict the response for test dataset y_pred = clf.predict(X_test) # In[188]: print("Accuracy:",metrics.accuracy_score(y_test, y_pred)) # In[194]: from sklearn.tree import export_graphviz from sklearn.externals.six import StringIO from IPython.display import Image import pydotplus # In[195]: dot_data = StringIO() export_graphviz(clf, out_file=dot_data, filled=True, rounded=True, special_characters=True,feature_names = feature_cols,class_names=['0','1']) graph = pydotplus.graph_from_dot_data(dot_data.getvalue()) graph.write_png('diabetes.png') Image(graph.create_png())