import numpy as np
import matplotlib.pyplot as plt

a = np.arange(-10, 10, 0.1)
s = 1.0 / (1.0 + np.exp(-a))
plt.plot(a, s)
plt.show()

d = 2
N = 1000
x1 = np.random.randn(N, d)
x2 = np.random.randn(N, d) + np.array([5, 5])

plt.scatter(x1[:, 0], x1[:, 1], c='r')
plt.scatter(x2[:, 0], x2[:, 1], c='b')
plt.show()

x = np.vstack((x1, x2))

label1 = np.zeros(N)
label2 = np.ones(N)
label = np.hstack((label1, label2))

def sigmoid(a):
    return 1.0 / (1.0 + np.exp(-a))

def p_y_given_x(x):
    return sigmoid(np.dot(x, w) + b)

def grad(x, label):
    error = label - p_y_given_x(x)
    w_grad = -np.mean(x.T * error, axis=1)
    b_grad = -np.mean(error)
    
    return w_grad, b_grad

dataset = np.column_stack((x,label))
np.random.shuffle(dataset) #データ点の順番をシャッフル

x = dataset[:, :2]
label = dataset[:, 2]

w = np.random.rand(d)
b = np.random.random()

eta = 0.1
minibatch_size = 10

errors = list()
for _ in range(10):
    for index in range(0, x.shape[0], minibatch_size):
        _x = x[index:index + minibatch_size]
        _label = label[index:index + minibatch_size]
        w_grad, b_grad = grad(_x, _label)
        w -= eta * w_grad
        b -= eta * b_grad
        errors.append(np.mean(np.abs(label - p_y_given_x(x))))

print np.mean(np.abs(label - p_y_given_x(x)))
plt.plot(errors)
plt.show()

bx = np.arange(-6, 10, 0.1)
by = -b/w[1] - w[0]/w[1]*bx

plt.xlim([-5, 10])
plt.ylim([-5, 9])
plt.plot(bx, by)
plt.scatter(x1[:, 0], x1[:, 1], c='r')
plt.scatter(x2[:, 0], x2[:, 1], c='b')
plt.show()

K = 10
a = np.random.rand(K)

def softmax(a):
    return np.exp(a) / np.sum(np.exp(a))

y = softmax(a)

plt.xlim([-1, K])
plt.bar(np.arange(K), y, width=0.1)
plt.show()

print 'summation:', np.sum(y) # 要素の合計値出力

D = 2
N = 1500
K = 3

# データ点を用意
mean1 = [-2, 2]  # クラス1の平均
mean2 = [0, 0]   # クラス2の平均
mean3 = [2, -2]   # クラス3の平均
cov = [[1.0,0.8], [0.8,1.0]]  # 共分散行列（全クラス共通）

x1 = np.random.multivariate_normal(mean1, cov, N / 3)
x2 = np.random.multivariate_normal(mean2, cov, N / 3)
x3 = np.random.multivariate_normal(mean3, cov, N / 3)
x = np.vstack((x1, x2, x3))

# 教師ベクトルを用意
label1 = np.zeros((N / 3, 3), dtype=np.int32) + np.array([1, 0, 0])
label2 = np.zeros((N / 3, 3), dtype=np.int32) + np.array([0, 1, 0])
label3 = np.zeros((N / 3, 3), dtype=np.int32) + np.array([0, 0, 1])
label = np.vstack((label1, label2, label3))

# 図示
plt.xlim((-6, 6))
plt.ylim((-6, 6))
plt.scatter(x1[:, 0], x1[:, 1], c='r')
plt.scatter(x2[:, 0], x2[:, 1], c='g')
plt.scatter(x3[:, 0], x3[:, 1], c='b')
plt.show()

dataset = np.column_stack((x, label))
np.random.shuffle(dataset) #データ点の順番をシャッフル

x = dataset[:, :2]
label = dataset[:, 2:]

def softmax(x):
    return (np.exp(x).T / np.sum(np.exp(x), axis=1)).T

def p_y_given_x(x):
    return softmax(np.dot(x, w.T) + b)

def grad(x, label):
    error = p_y_given_x(x) - label
    w_grad = np.zeros_like(w)
    b_grad = np.zeros_like(b)
    
    for j in range(w.shape[0]):
        w_grad[j] = np.mean(error[:, j] * x.T, axis=1)
        b_grad[j] = np.mean(error[:, j])
        
    return w_grad, b_grad, np.mean(np.abs(error), axis=0)

w = np.random.rand(K, D)
b = np.random.rand(K)

eta = 0.1
minibatch_size = 500

# import numpy.linalg as LA

errors = []
for _ in range(100):
    for index in range(0, N, minibatch_size):
        _x = x[index: index+minibatch_size]
        _label = label[index: index+minibatch_size]
        w_grad, b_grad, error = grad(_x, _label)
        w -= eta * w_grad
        b -= eta * b_grad

        errors.append(error)
errors = np.asarray(errors)

plt.plot(errors[:, 0])
plt.plot(errors[:, 1])
plt.plot(errors[:, 2])
plt.show()

bx = np.arange(-10, 10, 0.1)
by0 = -(w[0, 0] - w[1, 0]) / (w[0, 1] - w[1, 1]) * bx - (b[0] - b[1]) / (w[0, 1] - w[1, 1])
by1 = -(w[1, 0] - w[2, 0]) / (w[1, 1] - w[2, 1]) * bx - (b[1] - b[2]) / (w[1, 1] - w[2, 1])

plt.plot(bx, by0)
plt.plot(bx, by1)

plt.xlim((-6, 6))
plt.ylim((-6, 6))
plt.scatter(x1[:, 0], x1[:, 1], c='r')
plt.scatter(x2[:, 0], x2[:, 1], c='g')
plt.scatter(x3[:, 0], x3[:, 1], c='b')
plt.show()