%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
iris = datasets.load_iris()
x = iris.data[:, :2] # 只選前兩項 (花萼的長度和寬度)
y = iris.target
x_train, x_test, y_train, y_test = train_test_split(x, y,
test_size=0.2,
random_state=42)
欣賞一下三個類型的樣子。
plt.scatter(x_train[:,0], x_train[:,1], c=y_train);
clf = SVC()
clf.fit(x_train, y_train)
SVC()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
SVC()
y_pred = clf.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'正確率: {accuracy*100}%')
正確率: 90.0%
圖示化結果, 和最大宗點顏色不同就是錯的。
plt.scatter(x_test[:,0], x_test[:,1], c=y_pred-y_test);
x = iris.data
y = iris.target
切訓練資料、測試資料。
x_train, x_test, y_train, y_test = train_test_split(x, y,
test_size=0.2,
random_state=42)
PCA 降維其實也是 3 部曲。
pca = PCA(n_components=2) # 用 PCA 降到 2 維
x_train_reduced = pca.fit_transform(x_train)
plt.scatter(x_train_reduced[:,0], x_train_reduced[:,1], c=y_train);
x_test_reduced = pca.transform(x_test)
clf_pca = SVC()
clf_pca.fit(x_train_reduced, y_train)
SVC()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
SVC()
y_pred = clf_pca.predict(x_test_reduced)
accuracy = accuracy_score(y_test, y_pred)
print(f'正確率: {accuracy*100}%')
正確率: 100.0%
是不是真的變強了!! 最後再畫差異圖, 可以發現所有點顏色都相同 (也就是完全正確)。
plt.scatter(x_test[:,0], x_test[:,1], c=y_pred-y_test);