Here I will attempt to demonstrate how Marr and Poggio's (1976) cooperative algorithm computes stereo disparity. As usual, we want to solve the stereo correspondence problem - that is, we want to reject false matches to recover the correct disparity.
To do so, Marr and Poggio used a recurrent network that incorporates two constraints:
These constraints were implemented by, respectively:
The algorithm can be described by the equation below. The activity $C$ of a unit at the 3-dimensional location $(x,y,d)$ at iteration $n+1$ can be described as:
$$ C_{(x,y,d)}^{n+1} = \sigma \left( \sum C_{(x,y,d) \in E}^{n} - \epsilon \sum C_{(x,y,d) \in I}^{n} + C_{(x,y,d)}^{0} \right) $$where $E$ and $I$ are the excitatory and inhibitory intputs to the unit, and epsilon is an inhibitory constant. Below is an implementation of the algorithm (highly inefficient, by the way).
import numpy as np
import matplotlib.pyplot as plt
%pylab inline
Populating the interactive namespace from numpy and matplotlib
def coop_stereo(limg, rimg, max_iter=5, plot_flag=0, n_layers=10):
im_h = np.shape(limg)[0]
im_w = np.shape(limg)[1]
# initialization
c = np.zeros((im_h, im_w, im_w, max_iter))
for i in np.arange(im_h):
c[i,:,:,0] = np.dot(limg[i,:].reshape(-1,1),rimg[i,:].reshape(1,-1))
c[c<0] = 0.
# depth layers
d_layers = np.arange(n_layers+1)-np.floor(n_layers/2.)
# layer mask
l_mask = np.zeros((im_w,im_w))
for d in d_layers:
for i in np.arange(im_w):
if (i+d>=0) & (i+d<im_w):
l_mask[i, i+d] = 1.
d_map = np.zeros((im_h,im_w,max_iter))
# model parameters
theta = 3.
epsilon = 2.
m = 5.
n = 1
while n < max_iter:
for r in np.arange(np.shape(c)[0]):
# current state at row r
c_s = c[r,:,:,n-1] * l_mask
# inhibitory input
c_i = np.tile(np.sum(c_s, axis=0).reshape(-1,1),(1,im_w)) + np.tile(np.sum(c_s, axis=1),(im_w,1))
c_i *= l_mask
# excitatory input - loop over diagonals
c_e = np.zeros(np.shape(c_s))
# loop over depth layers
for d in d_layers:
n_nodes = im_w-np.absolute(d)
# loop over nodes at depth layer d
for i in np.arange(n_nodes):
# loop over y-dimension
for h in (np.arange(m)-np.floor(m/2)):
if (((r+h)>=0) & ((r+h)<im_h)):
# loop over x-dimension
for xe in (np.arange(m)-np.floor(m/2)):
if (((i+xe)>=0) & ((i+xe)<im_w) & ((i+d+xe)>=0)
& ((i+d+xe)<im_w) & ((i+d)<im_w) & ((i+d)>=0)):
# integrate the excitatory input
c_e[i,i+d] += c[r+h, i+xe, i+d+xe, n-1]
### update activity
act = c_e - epsilon*c_i + c[r,:,:,0]
# threshold function (we could also use a sigmoid activation function here)
c[r,:,:,n] = (act>=theta)
d_map[r,:,n] = np.arange(im_w) - np.argmax(act, axis=1)
d_map[d_map<np.amin(d_layers)] = 0
d_map[d_map>np.amax(d_layers)] = 0
if plot_flag == 1:
plt.imshow(d_map[:,:,n], interpolation='nearest', cmap='gray', origin='lower')
plt.colorbar()
plt.title('Iteration ' + str(n))
plt.show()
# next iteration
n += 1
Let us demonstrate the algorithm on a simple random dot stereogram depicting a central square in depth.
# 2d image
d = 4. # px
im_size = 80.
sq_size = 40.
sq_start = (im_size - sq_size)/2.
# generate 2d image
cimg = np.random.randint(0,2,(im_size,im_size))*2-1
limg = np.array(cimg[:,:])
rimg = np.array(cimg[:,:])
sq_img = np.random.randint(0,2,(sq_size,sq_size))*2-1
limg[sq_start:(sq_start+sq_size), (sq_start-d/2):(sq_start+sq_size-d/2)] = sq_img
rimg[sq_start:(sq_start+sq_size), (sq_start+d/2):(sq_start+sq_size+d/2)] = sq_img
plt.subplot(2,2,3)
plt.imshow(rimg, interpolation='nearest', cmap='gray')
plt.axis('off')
plt.subplot(2,2,4)
plt.imshow(limg, interpolation='nearest', cmap='gray')
plt.axis('off')
plt.show()
coop_stereo(limg, rimg, max_iter=5, plot_flag=1, n_layers=10)
Now let us try a more complex stereogram, similar to those used in Marr and Poggio's original paper.
# 2d image
im_size = 80.
sq_size = [50., 30.]
d = [2., 4.] # px
# generate 2d image
cimg = np.random.randint(0,2,(im_size,im_size))*2-1
limg = np.array(cimg[:,:])
rimg = np.array(cimg[:,:])
for ss in np.arange(np.shape(sq_size)[0]):
sq_start = (im_size - sq_size[ss])/2.
sq_img = np.random.randint(0,2,(sq_size[ss],sq_size[ss]))*2-1
limg[sq_start:(sq_start+sq_size[ss]), (sq_start-d[ss]/2):(sq_start+sq_size[ss]-d[ss]/2)] = sq_img
rimg[sq_start:(sq_start+sq_size[ss]), (sq_start+d[ss]/2):(sq_start+sq_size[ss]+d[ss]/2)] = sq_img
plt.subplot(2,2,3)
plt.imshow(rimg, interpolation='nearest', cmap='gray')
plt.subplot(2,2,4)
plt.imshow(limg, interpolation='nearest', cmap='gray')
plt.show()
coop_stereo(limg, rimg, max_iter=5, plot_flag=1, n_layers=10)