%%capture
%load_ext autoreload
%load_ext tikzmagic
%autoreload 2
import sys
sys.path.append("..")
import numpy as np
#reveal configuration
from notebook.services.config import ConfigManager
cm = ConfigManager()
cm.update('livereveal', {
'theme': 'white',
'transition': 'none',
'controls': 'false',
'progress': 'true',
})
%%html
<style>
.red { color: #E41A1C; }
.orange { color: #FF7F00 }
.yellow { color: #FFC020 }
.green { color: #4DAF4A }
.blue { color: #377EB8; }
.purple { color: #984EA3 }
h1 {
color: #377EB8;
}
ctb_global_show div.ctb_hideshow.ctb_show {
display: inline;
}
div.tabContent {
padding: 0px;
background: #ffffff;
border: 0px;
}
.left {
float: left;
width: 50%;
vertical-align: text-top;
}
.right {
margin-left: 50%;
vertical-align: text-top;
}
.small {
zoom: 0.9;
-ms-zoom: 0.9;
-webkit-zoom: 0.9;
-moz-transform: scale(0.9,0.9);
-moz-transform-origin: left center;
}
.verysmall {
zoom: 0.75;
-ms-zoom: 0.75;
-webkit-zoom: 0.75;
-moz-transform: scale(0.75,0.75);
-moz-transform-origin: left center;
}
.tiny {
zoom: 0.6;
-ms-zoom: 0.6;
-webkit-zoom: 0.6;
-moz-transform: scale(0.6,0.6);
-moz-transform-origin: left center;
}
.rendered_html blockquote {
border-left-width: 0px;
padding: 15px;
margin: 0px;
width: 100%;
}
.rendered_html th {
padding: 0.5em;
border: 0px;
}
.rendered_html td {
padding: 0.25em;
border: 0px;
}
#for reveal
.aside .controls, .reveal .controls {
display: none !important;
width: 0px !important;
height: 0px !important;
}
.rise-enabled .reveal .slide-number {
right: 25px;
bottom: 25px;
font-size: 200%;
color: #377EB8;
}
.rise-enabled .reveal .progress span {
background: #377EB8;
}
.present .top {
position: fixed !important;
top: 0 !important;
}
.present .rendered_html * + p, .present .rendered_html p, .present .rendered_html * + br, .present .rendered_html br {
margin: 0.5em 0;
}
.present tr, .present td {
border: 0px;
padding: 0.35em;
}
.present th {
border: 1px;
}
present .prompt {
min-width: 0px !important;
transition-duration: 0s !important;
}
.prompt {
min-width: 0px !important;
transition-duration: 0s !important;
}
.rise-enabled .cell li {
line-height: 135%;
}
</style>
Graphical Models, Structured Prediction, Probabilistic Inference, Feature Engineering
Relation Extraction, Matrix Factorization, Representation Learning
Representation Learning, Deep Learning
Change of notation: $$ s_\params(\x,y) \in \mathbb{R} $$ becomes $$ f_\params(\x)_y \in \mathbb{R} $$
where $f_\params(\x) \in \mathbb{R}^{|\Ys|}$ represents the scores for each possible solution $y$
A function $\mathcal{L}$ that given a model $f_\theta$, input $x$ and gold output $y$ measures how far we are away from the truth, for example
Goal: find parameters $\theta$ of model $f_\theta$ that minimize loss function $\mathcal{L}$
$f_\theta: \mathbb{R}^4 \to \mathbb{R}^2$
import tensorflow as tf
seed = 0
#input
input_sz = 3
output_sz = 1
x = tf.placeholder("float")
#parameters
W = tf.Variable(tf.random_uniform([output_sz,input_sz], -0.1, 0.1, seed=seed))
b = tf.Variable(tf.zeros(output_sz))
#f_theta
z = tf.nn.sigmoid(tf.matmul(W,x) + b) #sigmoid(Wx + b)
sess = tf.Session()
sess.run(tf.global_variables_initializer()) #initialize W and b
sess.run(W)
array([[-0.07982747, 0.09403337, 0.06975283]], dtype=float32)
sess.run(b)
array([ 0.], dtype=float32)
Forward: $\mathbf{z} = f_\theta(\mathbf{x})$
sess.run(z, feed_dict={x: [[-5.5],[2.0],[-0.5]]})
array([[ 0.64387923]], dtype=float32)
Backward: $\partial\mathbf{W},\partial\mathbf{b},\partial\mathbf{x}$ given upstream gradient $\partial\mathbf{z}$
sess.run(tf.global_variables_initializer())
gradz = [[0.1]]
grad = tf.gradients(z,[W, b, x], grad_ys=gradz)
sess.run(grad, feed_dict={x: [[-5.5],[2.0],[-0.5]]})
[array([[-0.13708647, 0.04984963, -0.01246241]], dtype=float32), array([ 0.02492481], dtype=float32), array([[ 0.00034354], [-0.00093437], [-0.00204336]], dtype=float32)]
#input
x = tf.placeholder(tf.float32, shape=[5,1])
#parameters
W1 = tf.Variable(tf.random_uniform([3,5], seed=seed))
b1 = tf.Variable(tf.zeros([3,1]))
W2 = tf.Variable(tf.random_uniform([3,3], seed=seed))
b2 = tf.Variable(tf.zeros([3,1]))
W3 = tf.Variable(tf.random_uniform([1,3], seed=seed))
b3 = tf.Variable(tf.zeros([1,1]))
#model
h1 = tf.nn.sigmoid(tf.matmul(W1,x) + b1)
h2 = tf.nn.sigmoid(tf.matmul(W2,h1) + b2)
mlp_z = tf.matmul(W3,h2) + b3
sess.run(tf.global_variables_initializer())
x_value = [[-5.5], [2.0], [-0.5], [2.0], [4.0]]
sess.run(mlp_z, feed_dict={x: x_value})
array([[ 1.35592151]], dtype=float32)
target_z = tf.constant([[1.0]]) # what the output should be
loss = tf.square(target_z - mlp_z) # the loss function
optimizer = tf.train.AdagradOptimizer(learning_rate=0.1)
opt_op = optimizer.minimize(loss) # the TF operation that performs optimisation steps
sess.run(tf.global_variables_initializer())
for epoch in range(0,5):
_, loss_value = sess.run([opt_op, loss], feed_dict={x: x_value})
if epoch % 1 == 0:
print(loss_value)
[[ 0.07483862]] [[ 0.00129254]] [[ 7.06945139e-06]] [[ 3.01882075e-08]] [[ 1.25567112e-10]]
It learned!
sess.run(mlp_z, feed_dict={x: x_value})
array([[ 0.99999923]], dtype=float32)