#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd from sklearn import datasets, model_selection, svm, metrics # In[2]: # http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_iris.html # http://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html iris = datasets.load_iris() # In[3]: print(type(iris)) print(iris.keys()) # In[4]: iris_data = pd.DataFrame(data=iris.data, columns=iris.feature_names) print(iris_data.head()) # In[5]: iris_label = pd.Series(data=iris.target) print(iris_label.head()) # In[6]: print(len(iris_data)) # In[7]: # http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html data_train, data_test, label_train, label_test = model_selection.train_test_split(iris_data, iris_label) # In[8]: print(data_train.head()) # In[9]: print(label_train.head()) # In[10]: # default value of test_size = 0.25 print(len(data_train), len(data_test)) # In[11]: clf = svm.SVC() clf.fit(data_train, label_train) pre = clf.predict(data_test) # In[12]: print(type(pre)) print(pre) # In[13]: # http://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html ac_score = metrics.accuracy_score(label_test, pre) # In[14]: print(ac_score) # In[15]: scores = model_selection.cross_val_score(clf, iris_data, iris_label, cv=3) print(scores) print(scores.mean())