Notebook

This notebook is an interactive version of the companion webpage for the article, Deep Probabilistic Programming (Tran et al., 2017). See Edward's API for details on how to interact with data, models, inference, and criticism.

The code snippets assume the following versions.

pip install -e "git+https://github.com/blei-lab/edward.git#egg=edward"
pip install tensorflow==1.0.0  # alternatively, tensorflow-gpu==1.0.0

In [ ]:

import tensorflow as tf
from edward.models import Bernoulli, Beta

theta = Beta(a=1.0, b=1.0)
x = Bernoulli(p=tf.ones(50) * theta)

In [ ]:

import tensorflow as tf
from edward.models import Bernoulli, Normal
from keras.layers import Dense

N = 55000  # number of data points
d = 50  # latent dimension

# Probabilistic model
z = Normal(mu=tf.zeros([N, d]), sigma=tf.ones([N, d]))
h = Dense(256, activation='relu')(z)
x = Bernoulli(logits=Dense(28 * 28, activation=None)(h))

# Variational model
qx = tf.placeholder(tf.float32, [N, 28 * 28])
qh = Dense(256, activation='relu')(qx)
qz = Normal(mu=Dense(d, activation=None)(qh),
            sigma=Dense(d, activation='softplus')(qh))

In [ ]:

import edward as ed
import tensorflow as tf
from edward.models import Normal

H = 50  # number of hidden units
D = 10  # number of features

def rnn_cell(hprev, xt):
  return tf.tanh(ed.dot(hprev, Wh) + ed.dot(xt, Wx) + bh)

Wh = Normal(mu=tf.zeros([H, H]), sigma=tf.ones([H, H]))
Wx = Normal(mu=tf.zeros([D, H]), sigma=tf.ones([D, H]))
Wy = Normal(mu=tf.zeros([H, 1]), sigma=tf.ones([H, 1]))
bh = Normal(mu=tf.zeros(H), sigma=tf.ones(H))
by = Normal(mu=tf.zeros(1), sigma=tf.ones(1))

x = tf.placeholder(tf.float32, [None, D])
h = tf.scan(rnn_cell, x, initializer=tf.zeros(H))
y = Normal(mu=tf.matmul(h, Wy) + by, sigma=1.0)

In [ ]:

import tensorflow as tf
from edward.models import Categorical, Normal

N = 10000  # number of data points
D = 2  # data dimension
K = 5  # number of clusters

beta = Normal(mu=tf.zeros([K, D]), sigma=tf.ones([K, D]))
z = Categorical(logits=tf.zeros([N, K]))
x = Normal(mu=tf.gather(beta, z), sigma=tf.ones([N, D]))

In [ ]:

import edward as ed
import numpy as np
import tensorflow as tf
from edward.models import Categorical, Normal

x_train = np.zeros([N, D], dtype=np.float32)

qbeta = Normal(mu=tf.Variable(tf.zeros([K, D])),
               sigma=tf.exp(tf.Variable(tf.zeros([K, D]))))
qz = Categorical(logits=tf.Variable(tf.zeros([N, K])))

inference = ed.VariationalInference({beta: qbeta, z: qz}, data={x: x_train})

In [ ]:

import edward as ed
import numpy as np
import tensorflow as tf
from edward.models import Empirical

x_train = np.zeros([N, D], dtype=np.float32)

T = 10000  # number of samples
qbeta = Empirical(params=tf.Variable(tf.zeros([T, K, D])))
qz = Empirical(params=tf.Variable(tf.zeros([T, N])))

In [ ]:

import edward as ed
import numpy as np
import tensorflow as tf
from edward.models import Normal
from keras.layers import Dense

N = 55000  # number of data points
d = 50  # latent dimension

def generative_network(eps):
  h = Dense(256, activation='relu')(eps)
  return Dense(28 * 28, activation=None)(h)

def discriminative_network(x):
  h = Dense(28 * 28, activation='relu')(x)
  return Dense(1, activation=None)(h)

# DATA
x_train = np.zeros([N, 28 * 28], dtype=np.float32)

# MODEL
eps = Normal(mu=tf.zeros([N, d]), sigma=tf.ones([N, d]))
x = generative_network(eps)

# INFERENCE
inference = ed.GANInference(data={x: x_train},
    discriminator=discriminative_network)

In [ ]:

import edward as ed
import numpy as np
import tensorflow as tf
from edward.models import Categorical, PointMass

# DATA
x_train = np.zeros([N, D], dtype=np.float32)

# INFERENCE
qbeta = PointMass(params=tf.Variable(tf.zeros([K, D])))
qz = Categorical(logits=tf.Variable(tf.zeros([N, K])))

inference_e = ed.VariationalInference({z: qz}, data={x: x_train, beta: qbeta})
inference_m = ed.MAP({beta: qbeta}, data={x: x_train, z: qz})

inference_e.initialize()
inference_m.initialize()

tf.initialize_all_variables().run()

for _ in range(10000):
  inference_e.update()
  inference_m.update()

In [ ]:

import edward as ed
import tensorflow as tf
from edward.models import Categorical, Normal

N = 10000  # number of data points
M = 128  # batch size during training
D = 2  # data dimension
K = 5  # number of clusters

# DATA
x_batch = tf.placeholder(tf.float32, [M, D])

# MODEL
beta = Normal(mu=tf.zeros([K, D]), sigma=tf.ones([K, D]))
z = Categorical(logits=tf.zeros([M, K]))
x = Normal(mu=tf.gather(beta, z), sigma=tf.ones([M, D]))

# INFERENCE
qbeta = Normal(mu=tf.Variable(tf.zeros([K, D])),
               sigma=tf.nn.softplus(tf.Variable(tf.zeros([K, D]))))
qz = Categorical(logits=tf.Variable(tf.zeros([M, D])))

inference = ed.VariationalInference({beta: qbeta, z: qz}, data={x: x_batch})
inference.initialize(scale={x: float(N) / M, z: float(N) / M})

In [ ]:

import edward as ed
import numpy as np
import tensorflow as tf
from edward.models import Bernoulli, Empirical, Normal

N = 581012  # number of data points
D = 54  # number of features
T = 100  # number of empirical samples

# DATA
x_data = np.zeros([N, D], dtype=np.float32)
y_data = np.zeros([N], dtype=np.float32)

# MODEL
x = tf.Variable(x_data, trainable=False)
beta = Normal(mu=tf.zeros(D), sigma=tf.ones(D))
y = Bernoulli(logits=ed.dot(x, beta))

# INFERENCE
qbeta = Empirical(params=tf.Variable(tf.zeros([T, D])))
inference = ed.HMC({beta: qbeta}, data={y: y_data})
inference.run(step_size=0.5 / N, n_steps=10)

In [ ]:

import tensorflow as tf
from edward.models import Bernoulli, Normal

N = 1000  # number of data points
D = 528  # number of features
H = 256  # hidden layer size

W_0 = Normal(mu=tf.zeros([D, H]), sigma=tf.ones([D, H]))
W_1 = Normal(mu=tf.zeros([H, 1]), sigma=tf.ones([H, 1]))
b_0 = Normal(mu=tf.zeros(H), sigma=tf.ones(H))
b_1 = Normal(mu=tf.zeros(1), sigma=tf.ones(1))

x = tf.placeholder(tf.float32, [N, D])
y = Bernoulli(logits=tf.matmul(tf.nn.tanh(tf.matmul(x, W_0) + b_0), W_1) + b_1)

In [ ]:

import tensorflow as tf
from edward.models import Categorical, Dirichlet

D = 4  # number of documents
N = [11502, 213, 1523, 1351]  # words per doc
K = 10  # number of topics
V = 100000  # vocabulary size

theta = Dirichlet(alpha=tf.zeros([D, K]) + 0.1)
phi = Dirichlet(alpha=tf.zeros([K, V]) + 0.05)
z = [[0] * N] * D
w = [[0] * N] * D
for d in range(D):
  for n in range(N[d]):
    z[d][n] = Categorical(pi=theta[d, :])
    w[d][n] = Categorical(pi=phi[z[d][n], :])

In [ ]:

import tensorflow as tf
from edward.models import Normal

N = 10
M = 10
K = 5  # latent dimension

U = Normal(mu=tf.zeros([M, K]), sigma=tf.ones([M, K]))
V = Normal(mu=tf.zeros([N, K]), sigma=tf.ones([N, K]))
Y = Normal(mu=tf.matmul(U, V, transpose_b=True), sigma=tf.ones([N, M]))

In [ ]:

import tensorflow as tf
from edward.models import Normal

mu = DirichletProcess(  # see script for implementation
    alpha=0.1, base_cls=Normal, mu=tf.zeros(D), sigma=tf.ones(D), sample_n=N)
x = Normal(mu=mu, sigma=tf.ones([N, D]))