GridSearchCV 查找最优的条件:参数
PCA降维,图片的数据比较大,维度 28*28 = 784
437*640 = 273280
1.导入相应模块工具:GridSearchCV、fetch_lfw_people、PCA、loggin等
2.设置logging,控制台显示程序处理数据的进度
3.使用fetch_lfw_people导入数据,如果本地没有会从网络下载,如果本地有数据,加载本地
4.查看人脸数据结构
5.从人脸数据中提取进行机器学习的关键数据
6.对数据进行分割,获取训练数据和测试数据
7.数据太复杂了,使用PCA对数据进行降维处理,去除一些不重要的数据
8.使用GridSerchCV查询最佳的机器学习模型
9.定义方法获取预测人名和真实人名
10.定义方法绘制人脸识别结果图形
11.调用方法进行数据展示
导入相应模块工具:GridSearchCV、fetch_lfw_people、PCA、logging等
from sklearn.datasets import fetch_lfw_people
from sklearn.decomposition import PCA
from sklearn.grid_search import GridSearchCV
import logging as logging
from sklearn.svm import SVC
import numpy as np
import time as time
/usr/local/lib/python3.5/dist-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20. "This module will be removed in 0.20.", DeprecationWarning) /usr/local/lib/python3.5/dist-packages/sklearn/grid_search.py:43: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. This module will be removed in 0.20. DeprecationWarning)
设置logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
控制台输出记录数据,显示程序处理数据的进度
logging.basicConfig(level = logging.INFO,format = '%(asctime)s %(message)s')
使用fetch_lfw_people导入数据,如果本地没有会从网络下载,如果本地有数据,加载本地
lfw_people = fetch_lfw_people(min_faces_per_person=70,resize= 1,
slice_=(slice(0, 250, None), slice(0, 250, None)))
x_data = lfw_people.data
y_target = lfw_people.target
x_data.shape
(1288, 62500)
y_name = lfw_people.target_names
y_name
array(['Ariel Sharon', 'Colin Powell', 'Donald Rumsfeld', 'George W Bush', 'Gerhard Schroeder', 'Hugo Chavez', 'Tony Blair'], dtype='<U17')
x_image = lfw_people.images
x_image.shape
(1288, 250, 250)
import matplotlib.pyplot as plt
%matplotlib inline
plt.imshow(x_data[150].reshape((250,250)),cmap = 'gray')
<matplotlib.image.AxesImage at 0x7f6e5a9e89e8>
# plt.figure(figsize=(2,2))
plt.imshow(x_image[150],cmap = 'gray')
<matplotlib.image.AxesImage at 0x7f64095ba518>
查看人脸数据结构
从人脸数据中提取进行机器学习的关键数据
分割训练数据和预测数据
#x_data,y_target
from sklearn.model_selection import train_test_split
X_train,x_test,y_train,y_test = train_test_split(x_data,y_target,test_size = 0.05)
X_train.shape
(1223, 62500)
svc = SVC()
svc.fit(X_train,y_train)
使用PCA进行降维
#whiten std标准偏差趋同
pca = PCA(n_components= 150,whiten= True,svd_solver='randomized')
pca.fit(X_train)
X_train_pca = pca.transform(X_train)
X_train_pca.shape
(1223, 150)
svc = SVC()
print(time.time())
svc.fit(X_train_pca,y_train)
print(time.time())
1511939146.5530653 1511939147.4628181
使用GridSearchCV我们找到了最佳的支持向量机方法
estimator = SVC()
#If gamma is 'auto' then 1/n_features will be used instead.
#支持向量机中需要的参数
c = [0.5,1.0,2,5,10]
gamma = [0.0001,0.001,0.006,0.06,0.1,1]
param_grid = {'C' : c,'gamma':gamma}
gv = GridSearchCV(estimator,param_grid)
gv.fit(X_train_pca,y_train)
GridSearchCV(cv=None, error_score='raise', estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, decision_function_shape=None, degree=3, gamma='auto', kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False), fit_params={}, iid=True, n_jobs=1, param_grid={'C': [0.5, 1.0, 2, 5, 10], 'gamma': [0.0001, 0.001, 0.006, 0.06, 0.1, 1]}, pre_dispatch='2*n_jobs', refit=True, scoring=None, verbose=0)
#选取最合适的算法
svc = gv.best_estimator_
svc
SVC(C=5, cache_size=200, class_weight=None, coef0=0.0, decision_function_shape=None, degree=3, gamma=0.001, kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False)
gv.best_score_
0.76778413736713
svc.fit(X_train_pca,y_train)
SVC(C=5, cache_size=200, class_weight=None, coef0=0.0, decision_function_shape=None, degree=3, gamma=0.001, kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False)
进行数据预测
x_test.shape
(65, 62500)
#将预测数据进行降维
x_test_pca = pca.transform(x_test)
x_test_pca.shape
(65, 150)
y_ = svc.predict(x_test_pca)
y_[:20]
array([2, 1, 3, 3, 3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 1, 3, 2, 3, 4, 3])
y_test[:20]
array([2, 0, 3, 4, 3, 4, 3, 1, 3, 3, 3, 3, 3, 3, 1, 3, 2, 3, 4, 3])
定义方法获取预测人名和真实人名
y_name
array(['Ariel Sharon', 'Colin Powell', 'Donald Rumsfeld', 'George W Bush', 'Gerhard Schroeder', 'Hugo Chavez', 'Tony Blair'], dtype='<U17')
def titles(y_,y_test,num):
names = []
for i in range(num):
#获取预测的名字
pre_label = y_[i]
pre_name = y_name[pre_label]
pre_name = pre_name.split(' ')[-1]
#获取真实的名字
true_label = y_test[i]
true_name = y_name[true_label]
true_name = true_name.split(' ')[-1]
#添加数据到names列表中
names.append('true:%s \n predict:%s'%(true_name,pre_name))
return names
titles(y_,y_test,10)
['true:Rumsfeld \n predict:Rumsfeld', 'true:Sharon \n predict:Powell', 'true:Bush \n predict:Bush', 'true:Schroeder \n predict:Bush', 'true:Bush \n predict:Bush', 'true:Schroeder \n predict:Bush', 'true:Bush \n predict:Bush', 'true:Powell \n predict:Powell', 'true:Bush \n predict:Bush', 'true:Bush \n predict:Bush']
调用方法获取人脸预测的名字,以及人脸的真实名字
定义方法绘制人脸识别结果图形
x_test.shape
(65, 62500)
def show_face_recognize(names,row,cols,x_test):
plt.figure(figsize=(cols*2,row*2.8))
for i in range(row*cols):
axes = plt.subplot(row,cols,i+1)
axes.imshow(x_test[i].reshape((250,250)),cmap = 'gray')
axes.set_title(names[i])
plt.axis('off')
调用方法,显示数据
(y_target == 3).sum()
array([False, False, True, ..., False, True, False], dtype=bool)
y_target.shape
(1288,)
(y_target == 3).sum()
530
#George W Bush == 3
y_name
array(['Ariel Sharon', 'Colin Powell', 'Donald Rumsfeld', 'George W Bush', 'Gerhard Schroeder', 'Hugo Chavez', 'Tony Blair'], dtype='<U17')
#4行5列
names = titles(y_,y_test,20)
show_face_recognize(names,row= 4,cols=5,x_test = x_test)
网络上查找图片使用机器学习模型进行预测
bush = plt.imread('./bush.jpg')
bush.shape
(500, 396, 3)
gray_bush = bush.mean(axis = 2)
gray_bush.shape
(500, 396)
plt.imshow(gray_bush,cmap = 'gray')
<matplotlib.image.AxesImage at 0x7f6e4037f518>
deal_bush = gray_bush[50:300,50:300]
plt.imshow(deal_bush,cmap = 'gray')
<matplotlib.image.AxesImage at 0x7f6e403aaa90>
bush_pca = pca.transform(deal_bush.reshape((1,-1)))
svc.predict(bush_pca)
array([3])
y_name[3]
'George W Bush'
import scipy.ndimage as ndimage
chavez = plt.imread('./Chavez.jpg')
plt.imshow(chavez)
chavez.shape
(332, 500, 3)
# 332,500,3
gray_chavez = chavez.mean(axis = 2)
gray_chavez.shape
(332, 500)
handle_chavez = ndimage.zoom(gray_chavez,zoom = [250/gray_chavez.shape[0],250/gray_chavez.shape[1]])
handle_chavez.shape
(250, 250)
plt.imshow(handle_chavez,cmap = 'gray')
<matplotlib.image.AxesImage at 0x7f6e3b06d828>
chavez = handle_chavez.reshape((1,-1))
hcavez_pca = pca.transform(chavez)
svc.predict(hcavez_pca)
array([3])
y_name
array(['Ariel Sharon', 'Colin Powell', 'Donald Rumsfeld', 'George W Bush', 'Gerhard Schroeder', 'Hugo Chavez', 'Tony Blair'], dtype='<U17')
(y_target == 5).sum()
71
读取网络数据进行灰度处理gray = [0.299,0.587,0.114]
如果是jpg图片进行归一化操作
如果图片尺寸不符合要求,那么修改尺寸