ruby ../bin/pick_works_from_ans.rb gw3_ex_ans.ipynb -1 '' '14'
train_A.dataには特徴量が,train_b.dataには悪性(1)か良性(-1)かを示す数値が入っている. 訓練(tain(ing))データを読み込み, 仮説ベクトル$\boldsymbol{w}$の初期値を全て0.0001として, 最初の30データの正誤を表示せよ. テキストのshow_accuracyを少し改良すればできる.
import numpy as np
tmp = np.fromfile('./train_A.data',
np.float64, -1, " ")
A = tmp.reshape(300,30)
tmp = np.fromfile('./train_b.data',
np.float64, -1, " ")
b = tmp.reshape(300,1)
w = np.zeros(30).reshape(30,1)
for i in range(30):
w[i] = 0.0001
def print_w(w):
params = ["radius", "texture","perimeter","area",
"smoothness","compactness","concavity",
"concave points",
"symmetry","fractal dimension"]
print(" (params) : ",end="")
print(" (mean) (stderr) (worst)")
for i, param in enumerate(params):
print("%18s:" %param, end="")
for j in range(3):
print("%13.9f" % w[i*3+j], end="")
print()
print_w(w)
(params) : (mean) (stderr) (worst) radius: 0.000100000 0.000100000 0.000100000 texture: 0.000100000 0.000100000 0.000100000 perimeter: 0.000100000 0.000100000 0.000100000 area: 0.000100000 0.000100000 0.000100000 smoothness: 0.000100000 0.000100000 0.000100000 compactness: 0.000100000 0.000100000 0.000100000 concavity: 0.000100000 0.000100000 0.000100000 concave points: 0.000100000 0.000100000 0.000100000 symmetry: 0.000100000 0.000100000 0.000100000 fractal dimension: 0.000100000 0.000100000 0.000100000
def show_accuracy(mA, vb, vw):
# M:悪性(-1),B:良性(1)
correct,safe_error,critical_error=0,0,0
predict = mA.dot(vw)
n = vb.size
for i in range(n):
if predict[i]*vb[i]>0:
correct += 1
elif (predict[i]<0 and vb[i]>0): # 良性なのに悪性と予測:再検査
safe_error += 1
elif (predict[i]>0 and vb[i]<0): # 悪性なのに良性と予測:見落とし
critical_error += 1
print(" correct: %4d/%4d" % (correct,n))
print(" safe error: %4d" % safe_error)
print("critical error: %4d" % critical_error)
def show_first_n_data(mA, vb, vw, nn):
# M:悪性(-1),B:良性(1)
correct=0
predict = mA.dot(vw)
n = nn
for i in range(n):
print("%4d predict: %10.5f" % (i, predict[i]))
print("M(-1)or B(1): %4d" % vb[i])
if predict[i]*vb[i]>0:
correct += 1
print(" correct: %4d/%4d" % (correct,n))
show_first_n_data(A, b, w, 30)
0 predict: 0.35662 M(-1)or B(1): -1 1 predict: 0.37409 M(-1)or B(1): -1 2 predict: 0.33874 M(-1)or B(1): -1 3 predict: 0.12400 M(-1)or B(1): -1 4 predict: 0.33367 M(-1)or B(1): -1 5 predict: 0.15055 M(-1)or B(1): -1 6 predict: 0.30679 M(-1)or B(1): -1 7 predict: 0.18144 M(-1)or B(1): -1 8 predict: 0.15649 M(-1)or B(1): -1 9 predict: 0.14934 M(-1)or B(1): -1 10 predict: 0.23125 M(-1)or B(1): -1 11 predict: 0.24632 M(-1)or B(1): -1 12 predict: 0.29685 M(-1)or B(1): -1 13 predict: 0.20019 M(-1)or B(1): -1 14 predict: 0.15879 M(-1)or B(1): -1 15 predict: 0.19595 M(-1)or B(1): -1 16 predict: 0.21775 M(-1)or B(1): -1 17 predict: 0.25104 M(-1)or B(1): -1 18 predict: 0.41973 M(-1)or B(1): -1 19 predict: 0.15553 M(-1)or B(1): 1 20 predict: 0.14147 M(-1)or B(1): 1 21 predict: 0.07822 M(-1)or B(1): 1 22 predict: 0.20324 M(-1)or B(1): -1 23 predict: 0.45552 M(-1)or B(1): -1 24 predict: 0.36149 M(-1)or B(1): -1 25 predict: 0.28429 M(-1)or B(1): -1 26 predict: 0.18760 M(-1)or B(1): -1 27 predict: 0.29502 M(-1)or B(1): -1 28 predict: 0.24021 M(-1)or B(1): -1 29 predict: 0.25732 M(-1)or B(1): -1 correct: 3/ 30
print(A.shape)
print(w.shape)
print(b.shape)
print(np.dot(A, w).shape)
LL = np.dot(A, w) - b
print(np.dot(LL.transpose(),LL))
(300, 30) (30, 1) (300, 1) (300, 1) [[354.73973502]]
A[i]にはデータが入っている.またwは分類器である.これらの積$A[i].w$は悪性か良性かを判断 する数値を与える.従ってそのベクトルの距離は,全てのデータに対する正誤の2乗和となる.
最急降下法による仮説ベクトル$\boldsymbol{w}$の最適化を試みる. 最急降下法の概念図を以下に示した.損失関数の値$L(\boldsymbol{w})$ をz軸にとって(x,y)平面を$\boldsymbol{w}$と見立てて, その勾配$dL/dw$に従って極小値を求めるステップを 刻んでいく様子を示している. 単なるイメージ図なんで,コードの中身は無視してください.
%matplotlib notebook
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np
def f(x,y):
return x**2+y**2+x*y
x = np.arange(-np.pi, np.pi, 0.02)
y = np.arange(-np.pi, np.pi, 0.02)
X, Y = np.meshgrid(x, y)
Z1 = f(X,Y)
x_p = [2, 1, 0.5]
y_p = [2, 1, 0.5]
z_p = [f(x_p[0], y_p[0]),f(x_p[1], y_p[1]),
f(x_p[2], y_p[2])]
fig = plt.figure()
plot3d = Axes3D(fig)
plot3d.plot(x_p, y_p, z_p, "o-", color="red")
plot3d.plot_surface(X,Y,Z1,alpha=0.6)
plt.show()
<ipython-input-8-6a9f96ea6a91>:20: MatplotlibDeprecationWarning: Axes3D(fig) adding itself to the figure is deprecated since 3.4. Pass the keyword argument auto_add_to_figure=False and use fig.add_axes(ax) to suppress this warning. The default value of auto_add_to_figure will change to False in mpl3.5 and True values will no longer work in 3.6. This is consistent with other Axes classes. plot3d = Axes3D(fig)
損失関数の偏微分 $$ \begin{aligned} \frac{\partial L}{\partial w_j} &= \sum_{i=1}^n \frac{\partial}{\partial w_j}(A_i \cdot w -b_i)^2 \\ &= \sum_{i=1}^n 2(A_i \cdot w -b_i) A_{ij} \end{aligned} $$ の最後の式の$A_{ij}$の係数ベクトルをdLwとして求めよ.その次元をshapeで確かめよ?
dLw = A.dot(w)-b
print(dLw.shape)
(300, 1)
係数ベクトルdLwとAのdot積が勾配ベクトルとなる.
$$
w = w - \sigma(dLw^t \cdot A)^t
$$
として仮説ベクトル$w$を勾配に従って進めたベクトルを求めよ.
ここで$\sigma$はステップ幅と呼ばれ,勾配に従ってどの程度進むかを調整するパラメータで,大きすぎると最適値を通り越し,小さすぎると最適値にたどり着くまでに繰り返し(iteration)が多くなる.ここでは,3.0*10**(-9)
程度とせよ.
loop, sigma = 300, 3.0*10**(-9)
print_w(w)
LL = np.dot(A, w) - b
print(np.dot(LL.transpose(),LL))
w = w - (dLw.transpose().dot(A)).transpose()*sigma
print_w(w)
LL = np.dot(A, w) - b
print(np.dot(LL.transpose(),LL))
(params) : (mean) (stderr) (worst) radius: 0.000100000 0.000100000 0.000100000 texture: 0.000100000 0.000100000 0.000100000 perimeter: 0.000100000 0.000100000 0.000100000 area: 0.000100000 0.000100000 0.000100000 smoothness: 0.000100000 0.000100000 0.000100000 compactness: 0.000100000 0.000100000 0.000100000 concavity: 0.000100000 0.000100000 0.000100000 concave points: 0.000100000 0.000100000 0.000100000 symmetry: 0.000100000 0.000100000 0.000100000 fractal dimension: 0.000100000 0.000100000 0.000100000 [[354.73973502]] (params) : (mean) (stderr) (worst) radius: 0.000095254 0.000095018 0.000067803 texture: -0.000248336 0.000099981 0.000099951 perimeter: 0.000099933 0.000099963 0.000099964 area: 0.000099991 0.000099783 0.000099833 smoothness: 0.000098420 0.000069150 0.000099999 compactness: 0.000099991 0.000099989 0.000099996 concavity: 0.000099997 0.000099999 0.000093656 concave points: 0.000092865 0.000056598 -0.000459621 symmetry: 0.000099970 0.000099861 0.000099823 fractal dimension: 0.000099930 0.000099927 0.000099981 [[300.68126776]]
先ほどの漸近操作を300回程度繰り返し,その前後でwを表示してみよ. 最初の30データの予測値を比較せよ.
loop, sigma = 300, 3.0*10**(-9)
for i in range(loop):
dLw = A.dot(w)-b
w = w - (dLw.transpose().dot(A)).transpose()*sigma
LL = np.dot(A, w) - b
print_w(w)
show_accuracy(A, b, w)
show_first_n_data(A, b, w, 30)
(params) : (mean) (stderr) (worst) radius: 0.000521147 0.000830565 0.002610776 texture: 0.001656688 0.000104646 0.000100058 perimeter: 0.000095983 0.000097902 0.000108838 area: 0.000103554 0.000102348 0.000169429 smoothness: 0.000099522 -0.000795121 0.000100425 compactness: 0.000100420 0.000100697 0.000100259 concavity: 0.000101184 0.000100187 0.000505184 concave points: 0.001006849 0.002453114 -0.001933241 symmetry: 0.000105845 0.000096075 0.000091685 fractal dimension: 0.000097602 0.000111376 0.000103466 correct: 274/ 300 safe error: 5 critical error: 21 0 predict: -1.54364 M(-1)or B(1): -1 1 predict: -0.84466 M(-1)or B(1): -1 2 predict: -0.60572 M(-1)or B(1): -1 3 predict: 0.02368 M(-1)or B(1): -1 4 predict: -0.19352 M(-1)or B(1): -1 5 predict: -0.14373 M(-1)or B(1): -1 6 predict: -0.67041 M(-1)or B(1): -1 7 predict: -0.24813 M(-1)or B(1): -1 8 predict: -0.03406 M(-1)or B(1): -1 9 predict: -0.07117 M(-1)or B(1): -1 10 predict: -0.28983 M(-1)or B(1): -1 11 predict: -0.59345 M(-1)or B(1): -1 12 predict: -0.01581 M(-1)or B(1): -1 13 predict: 0.18380 M(-1)or B(1): -1 14 predict: 0.17176 M(-1)or B(1): -1 15 predict: -0.12344 M(-1)or B(1): -1 16 predict: -0.48622 M(-1)or B(1): -1 17 predict: -0.57535 M(-1)or B(1): -1 18 predict: -1.76553 M(-1)or B(1): -1 19 predict: 0.06399 M(-1)or B(1): 1 20 predict: 0.13846 M(-1)or B(1): 1 21 predict: 0.18655 M(-1)or B(1): 1 22 predict: -0.14155 M(-1)or B(1): -1 23 predict: -1.90316 M(-1)or B(1): -1 24 predict: -2.07107 M(-1)or B(1): -1 25 predict: -0.66764 M(-1)or B(1): -1 26 predict: -0.05932 M(-1)or B(1): -1 27 predict: -0.24644 M(-1)or B(1): -1 28 predict: -0.56389 M(-1)or B(1): -1 29 predict: -0.15493 M(-1)or B(1): -1 correct: 27/ 30
行列$A$のQR分解を行い,Q, R行列の次元をshapeで確かめよ.
q, r = np.linalg.qr(A)
print(q.shape)
print(r.shape)
(300, 30) (30, 30)
仮説ベクトル$\boldsymbol{w}$の最適値 $$ ww = R^{-1}.Q^t.b $$ を求めよ.その値と精度を確かめよ.また距離の2乗 $$ ||A.w -b ||^2 $$ が下がっていることを確かめよ.
ww =np.linalg.inv(r).dot(np.transpose(q).dot(b))
print(ww.shape)
print_w(ww)
show_accuracy(A, b, ww)
(30, 1) (params) : (mean) (stderr) (worst) radius: 0.869921844 -0.024313948 -0.062679561 texture: -0.003274619 -8.790300861 1.747147500 perimeter: -0.202849407 -6.506451098 5.061760446 area: 49.167541566 -0.956591421 -0.082052658 smoothness: -0.007943157 0.004976908-27.841944367 compactness: 3.301527110 4.985959134-16.318886295 concavity: 10.316289081-21.332232171 -0.408605816 concave points: -0.003345722 -0.000677873 0.002510735 symmetry: 4.531369718 0.590110016 -0.719368704 fractal dimension: -2.158965299 -3.803467225-12.298417038 correct: 286/ 300 safe error: 1 critical error: 13
LL = np.dot(A, ww) - b
print(np.dot(LL.transpose(),LL))
[[77.50051002]]
show_first_n_data(A, b, ww, 30)
0 predict: -0.96687 M(-1)or B(1): -1 1 predict: -0.51795 M(-1)or B(1): -1 2 predict: -1.35431 M(-1)or B(1): -1 3 predict: -1.20772 M(-1)or B(1): -1 4 predict: -0.67853 M(-1)or B(1): -1 5 predict: -0.42976 M(-1)or B(1): -1 6 predict: -0.80193 M(-1)or B(1): -1 7 predict: -0.26553 M(-1)or B(1): -1 8 predict: -0.58596 M(-1)or B(1): -1 9 predict: -1.46669 M(-1)or B(1): -1 10 predict: -0.28045 M(-1)or B(1): -1 11 predict: -0.92474 M(-1)or B(1): -1 12 predict: -0.44804 M(-1)or B(1): -1 13 predict: -0.01676 M(-1)or B(1): -1 14 predict: -0.33535 M(-1)or B(1): -1 15 predict: -0.96765 M(-1)or B(1): -1 16 predict: -0.80508 M(-1)or B(1): -1 17 predict: -1.00873 M(-1)or B(1): -1 18 predict: -1.24782 M(-1)or B(1): -1 19 predict: 0.25212 M(-1)or B(1): 1 20 predict: 0.98083 M(-1)or B(1): 1 21 predict: 1.27821 M(-1)or B(1): 1 22 predict: -0.35908 M(-1)or B(1): -1 23 predict: -0.98992 M(-1)or B(1): -1 24 predict: -1.35707 M(-1)or B(1): -1 25 predict: -1.31556 M(-1)or B(1): -1 26 predict: -1.21891 M(-1)or B(1): -1 27 predict: -0.73490 M(-1)or B(1): -1 28 predict: -1.28252 M(-1)or B(1): -1 29 predict: -0.11118 M(-1)or B(1): -1 correct: 30/ 30