In [1]:
import numpy as np
import matplotlib.pyplot as plt

%matplotlib notebook

6.1 Example: Learning $XOR$

In [2]:
X = np.array([[0, 0],
              [0, 1],
              [1, 0],
              [1, 1]])

y = np.array([[0],
              [1],
              [1],
              [0]])

bias = np.ones((4,1), dtype=int)

Linear approach

Attempt to derive a model for $f(x; \, \theta)$ using the normal equations : $(X^{\intercal} X)^{-1} \, X^{\intercal} y$

In [3]:
Xb = np.hstack((X, bias))

XbTXb = Xb.T.dot(Xb)

Xb_y = Xb.T.dot(y)

sol = np.linalg.inv(XbTXb).dot(Xb_y)
print(sol)
[[1.66533454e-16]
 [1.11022302e-16]
 [5.00000000e-01]]

The solution turns out to be

\begin{align} \text{linear solution} &= \begin{bmatrix} 0 \\ 0 \\ \frac{1}{2} \end{bmatrix} \end{align}

which ignores the inputs and simply yields $\frac{1}{2}$

In [4]:
def _relu(x):
    """ Return max{0, x} """
    return x * (x > 0)
In [5]:
W = np.array([[1, 1],
              [1, 1]])

c = np.array([[0, -1]])
In [6]:
colors = ['#7570b3', '#d95f02', '#d95f02', '#7570b3']
markers = list('oxxo')

fig, (ax1, ax2) = plt.subplots(1, 2, gridspec_kw={'width_ratios': [1, 3]})

ax1.table(cellText=X, colLabels=[r'$x_1$', r'$x_2$'], loc='center')
ax1.axis('off')

for (x,y), col, m in zip(X, colors, markers):
    ax2.scatter(x, y, color=col, marker=m)
ax2.set_xlabel(r'$x_1$')
ax2.set_ylabel(r'$x_2$')
fig.subplots_adjust(wspace=0.5, left=-.5, right=0.5)
In [7]:
XW = X.dot(W)

fig, (ax1, ax2) = plt.subplots(1, 2, gridspec_kw={'width_ratios': [1, 3]})

ax1.table(cellText=XW, colLabels=[r'$x_1$', r'$x_2$'], loc='center')
ax1.axis('off')

for (x,y), col, m in zip(XW, colors, markers):
    ax2.scatter(x, y, color=col, marker=m)
ax2.set_xlabel(r'$x_1$')
ax2.set_ylabel(r'$x_2$')
fig.subplots_adjust(wspace=0.5, hspace=1.2, left=-.5, right=1.)
In [8]:
XW_c = XW + c

fig, (ax1, ax2) = plt.subplots(1, 2, gridspec_kw={'width_ratios': [1, 3]})

ax1.table(cellText=XW_c, colLabels=[r'$x_1$', r'$x_2$'], loc='center')
ax1.axis('off')

for (x,y), col, m in zip(XW_c, colors, markers):
    ax2.scatter(x, y, color=col, marker=m)
ax2.set_xlabel(r'$x_1$')
ax2.set_ylabel(r'$x_2$')
fig.subplots_adjust(wspace=0.5, left=-.5, right=1.)
In [9]:
h = _relu(XW_c)

fig, (ax1, ax2) = plt.subplots(1, 2, gridspec_kw={'width_ratios': [1, 3]})

ax1.table(cellText=h, colLabels=[r'$x_1$', r'$x_2$'], loc='center')
ax1.axis('off')

for (x,y), col, m in zip(h, colors, markers):
    ax2.scatter(x, y, color=col, marker=m)
ax2.set_xlabel(r'$x_1$')
ax2.set_ylabel(r'$x_2$')
fig.subplots_adjust(wspace=0.5, left=-.5, right=1.)
In [10]:
w = np.array([[1], 
              [-2]])

b = 0

y = h.dot(w) + b
print(y)
[[0]
 [1]
 [1]
 [0]]
In [ ]: