Welcome! If you are new to Google Colab/Jupyter notebooks, you might take a look at this notebook first.
I recommend you run the first code cell of this notebook immediately, to start provisioning drake on the cloud machine, then you can leave this window open as you read the textbook.
The following cell will:
/opt/drake
, install Drake's prerequisites via apt
, and add pydrake to sys.path
. This will take approximately two minutes on the first time it runs (to provision the machine), but should only need to reinstall once every 12 hours. If you navigate between notebooks using Colab's "File->Open" menu, then you can avoid provisioning a separate machine for each notebook.You will need to rerun this cell if you restart the kernel, but it should be fast (even on Colab) because the machine will already have drake installed.
import importlib
import sys
from urllib.request import urlretrieve
# Install drake (and underactuated).
if 'google.colab' in sys.modules and importlib.util.find_spec('underactuated') is None:
urlretrieve(f"http://underactuated.csail.mit.edu/scripts/setup/setup_underactuated_colab.py",
"setup_underactuated_colab.py")
from setup_underactuated_colab import setup_underactuated
setup_underactuated(underactuated_sha='ffe2b28ed89637889c04405e5d7d2d98be3df5b6', drake_version='0.27.0', drake_build='release')
server_args = []
if 'google.colab' in sys.modules:
server_args = ['--ngrok_http_tunnel']
# Start a single meshcat server instance to use for the remainder of this notebook.
from meshcat.servers.zmqserver import start_zmq_server_as_subprocess
proc, zmq_url, web_url = start_zmq_server_as_subprocess(server_args=server_args)
# Imports.
import numpy as np
import meshcat
from ipywidgets import interact, FloatSlider, ToggleButton
from IPython.display import display
import pydrake.all
from pydrake.all import (AddMultibodyPlantSceneGraph, DiagramBuilder, PlanarSceneGraphVisualizer, SceneGraph, Simulator)
from pydrake.systems.jupyter_widgets import WidgetSystem
from pydrake.examples.pendulum import PendulumGeometry, PendulumPlant
import underactuated
from underactuated.jupyter import AdvanceToAndVisualize, SetupMatplotlibBackend, running_as_notebook
import underactuated.meshcat_utils as mutil
I find it extremely useful to use simulation to get physical intuition about these systems. Let's make sure we understand how the simple pendulum moves when it is exposed to a torque.
For the duration of this notebook, we'll use the equations of motion $$ml^2 \ddot\theta + b\dot\theta + mgl \sin\theta = u,$$ where $u$ is our torque input.
The pendulum is a core example in Drake. We could certainly load it from a .urdf file, but Drake offers a Pendulum implementation that makes it convenient to manipulate the parameters (and visualize the system with different parameters).
builder = DiagramBuilder()
pendulum = builder.AddSystem(PendulumPlant())
# Setup visualization
scene_graph = builder.AddSystem(SceneGraph())
PendulumGeometry.AddToBuilder(builder, pendulum.get_state_output_port(), scene_graph)
visualizer = pydrake.systems.meshcat_visualizer.ConnectMeshcatVisualizer(
builder,
scene_graph=scene_graph,
zmq_url=zmq_url)
visualizer.set_planar_viewpoint()
visualizer.vis.delete()
# Setup slider input
slider = FloatSlider(value=0.0, min=-5., max=5., step=0.1, description='u', continuous_update=True)
torque_system = builder.AddSystem(WidgetSystem([slider]))
builder.Connect(torque_system.get_output_port(0), pendulum.get_input_port(0))
diagram = builder.Build()
# Set up a simulator to run this diagram
simulator = Simulator(diagram)
context = simulator.get_mutable_context()
stop_button = ToggleButton(value=False, description='Stop Simulation')
display(stop_button)
# Set the initial conditions
context.SetContinuousState([0.5, 0]) # theta, thetadot
context.SetTime(0.0)
if running_as_notebook: # Then we're not just running as a test on CI.
simulator.set_target_realtime_rate(1.0)
while not stop_button.value:
simulator.AdvanceTo(simulator.get_context().get_time() + 1.0)
stop_button.value = False
else:
simulator.AdvanceTo(0.1)
After running the cell above, you should see an interactive slider that will control the torque, and be prompted to open a window for the visualizer (you'll have to open the new window if you run that cell again). Press the "Stop Simulation" button to regain control and continue in the notebook.
Note: I've used a different visualizer here so that you can interact with the simulation as it runs, even on Google Colab. But the matplotlib-based visualizer will serve us well for most of the course.
The simplest recurrent neural network model. $$\dot{x} = -x + \tanh(wx + u)$$
def autapse(x, w=1, u=0):
"""Args:
w is feedback weight
u is input
"""
return -x + np.tanh(w * x + u)
Autapse = np.vectorize(autapse)
xmax = 2.
ymax = 1.
x = np.arange(-xmax, xmax, 0.01)
vis = meshcat.Visualizer(zmq_url=zmq_url, server_args=server_args)
mutil.set_planar_viewpoint(vis, xmax=xmax, xmin=-xmax, ymin=-ymax, ymax=ymax)
vis.delete()
vis['/Grid'].set_property("visible", True)
vis['/Axes'].set_property("visible", True)
def update(w=1, u=0):
vertices = np.vstack([x,0*x, Autapse(x, w=w, u=u)])
vis["autpase"].set_object(meshcat.geometry.Line(meshcat.geometry.PointsGeometry(vertices),meshcat.geometry.LineBasicMaterial(color=0x000000)))
interact(update, w=(0,3,0.1), u=(-1.5,1.5,0.1));
A recurrent neural network component.. Wikipedia
We'll look at the "gated recurrent unit" version... \begin{align} i[n] &= \sigma_g(A_{i} x[n] + B_{i} u[n] + c_i) & \text{input gate} \\ o[n] &= \sigma_g(A_{o} x[n] + B_{o} u[n] + c_o) & \text{output gate} \\ f[n] &= \sigma_g(A_{f} x[n] + B_{f} u[n] + c_f) & \text{forget gate} \\ x[n+1] &= f[n] \circ x[n] + i[n] \circ \tanh(B_{x} u[n] + c_c) \\ y[n] &= \tanh(o[n] \circ x[n]) \end{align}
where the operator $\circ$ denotes the Hadamard product (element-wise product). $\sigma _{g}(x) = \frac{1}{1+e^{-x}} \in (0,1)$ is the sigmoid function and recall that $\tanh(x) \in (-1,1)$.
In the example below, we will plot the continuous-time version of this (to stay consistent with the rest of the analysis in the chapter), $\dot{x}$ vs $x$. To keep the number of parameters reasonable, I've assumed that there are three inputs -- with $$u = \begin{bmatrix} u_{input} \\ u_{forget} \\ u_{x} \end{bmatrix}, B_i = [ 1, 0 0 ], B_f = [ 0, 1, 0 ], B_x = [ 0, 0, 1].$$ Note that the output $y$ has no impact on the stability and is not represented below.
def sigma(x):
return 1./(1+np.exp(-x))
def lstm(x, uf=0, ui=0, ux=0, af=1, bf=1, cf=0, ai=1, bi=1, ci=0, bx=1, cx=0):
return - x + sigma(af*x + bf*uf + cf)*x + sigma(ai*x + bi*ui + ci) * np.tanh(bx*ux+cx)
Lstm = np.vectorize(lstm)
xmax = 10.
ymax = 4.
x = np.arange(-xmax, xmax, 0.01)
vis = meshcat.Visualizer(zmq_url=zmq_url, server_args=server_args)
mutil.set_planar_viewpoint(vis, xmax=xmax, xmin=-xmax, ymin=-ymax, ymax=ymax)
vis.delete()
vis['/Grid'].set_property("visible", True)
vis['/Axes'].set_property("visible", True)
def update(u_forget, u_input, u_x,
a_forget, b_forget, c_forget,
a_input, b_input, c_input,
b_x, c_x):
vertices = np.vstack([x,0*x, Lstm(x,
uf=u_forget, ui=u_input, ux=u_x,
af=a_forget, bf=b_forget, cf=c_forget,
ai=a_input, bi=b_input, ci=c_input,
bx=b_x, cx=c_x)])
vis["autpase"].set_object(meshcat.geometry.Line(meshcat.geometry.PointsGeometry(vertices),meshcat.geometry.LineBasicMaterial(color=0x000000)))
interact(update,
u_forget=(-10,10,0.1),
u_input=(-10,10,0.1),
u_x=(-10,10,0.1),
a_forget=(0,2,0.1),
b_forget=(0,2,0.1),
c_forget=(-1,1,.1),
a_input=(0,2,0.1),
b_input=(0,2,0.1),
c_input=(-1,1,.1),
b_x=(0,2,0.1),
c_x=(-1,1,.1),
);
I will make a few observations. First, I'll admit that the behavior here looks pretty strange to me! When "forget gate" is on (corresponding to a large negative input $u_{forget}$) and the "input gate" is off (corresponding to a large negative input $u_{input}$), the system has a stable fixed point close to zero. When the forget gate is off (large $u_{forget}$ -- yes, it feels backwards to me, too) then the system becomes an integrator which will accumulate the input $u_x$. Yes -- it's unstable! And no, it's not an artifact of the continuous-time implementation. The total output of the system is limited by the output $\tanh$, but the internal state is unstable.
The design of these units does not appear to have been made by someone who knew dynamics (nor graphical analysis). That they seem to work somewhat well in practice is fairly remarkable to me. Shouldn't we be able to do better?
Note: The parameter space here is large and the interpretations subtle. If you find a better parameter regime and/or different interpretation, then I'd love to hear about it.
First we will design the energy shaping controller (only), and plot the closed-loop phase portrait. Remember, this system is not actually stable at the upright. It is only attractive!
import numpy as np
import matplotlib.pyplot as plt
from copy import copy
from pydrake.all import (DiagramBuilder, Saturation, SignalLogger,
Simulator, wrap_to, VectorSystem)
from pydrake.examples.pendulum import (PendulumParams, PendulumPlant)
class EnergyShapingController(VectorSystem):
def __init__(self, pendulum):
VectorSystem.__init__(self, 2, 1)
self.pendulum = pendulum
self.pendulum_context = pendulum.CreateDefaultContext()
self.SetPendulumParams(PendulumParams())
def SetPendulumParams(self, params):
self.pendulum_context.get_mutable_numeric_parameter(0).SetFromVector(params.CopyToVector())
self.pendulum_context.SetContinuousState([np.pi, 0])
self.desired_energy = self.pendulum.EvalPotentialEnergy(self.pendulum_context)
def DoCalcVectorOutput(self, context, pendulum_state, unused, output):
self.pendulum_context.SetContinuousState(pendulum_state)
params = self.pendulum_context.get_numeric_parameter(0)
theta = pendulum_state[0]
thetadot = pendulum_state[1]
total_energy = self.pendulum.EvalPotentialEnergy(self.pendulum_context) + self.pendulum.EvalKineticEnergy(self.pendulum_context)
output[:] = (params.damping() * thetadot - .1 * thetadot *
(total_energy - self.desired_energy))
def PhasePlot(pendulum):
phase_plot = plt.figure()
ax = phase_plot.gca()
theta_lim = [-np.pi, 3. * np.pi]
ax.set_xlim(theta_lim)
ax.set_ylim(-10., 10.)
theta = np.linspace(theta_lim[0], theta_lim[1], 601) # 4*k + 1
thetadot = np.zeros(theta.shape)
context = pendulum.CreateDefaultContext()
params = context.get_numeric_parameter(0)
context.SetContinuousState([np.pi, 0])
E_upright = pendulum.EvalPotentialEnergy(context)
E = [E_upright, .1 * E_upright, 1.5 * E_upright]
for e in E:
for i in range(theta.size):
v = ((e + params.mass() * params.gravity() * params.length() *
np.cos(theta[i])) /
(.5 * params.mass() * params.length() * params.length()))
if (v >= 0):
thetadot[i] = np.sqrt(v)
else:
thetadot[i] = float("nan")
ax.plot(theta, thetadot, color=[.6, .6, .6])
ax.plot(theta, -thetadot, color=[.6, .6, .6])
return ax
builder = DiagramBuilder()
pendulum = builder.AddSystem(PendulumPlant())
ax = PhasePlot(pendulum)
saturation = builder.AddSystem(Saturation(min_value=[-3], max_value=[3]))
builder.Connect(saturation.get_output_port(0), pendulum.get_input_port(0))
controller = builder.AddSystem(EnergyShapingController(pendulum))
builder.Connect(pendulum.get_output_port(0), controller.get_input_port(0))
builder.Connect(controller.get_output_port(0), saturation.get_input_port(0))
#visualizer = pydrake.systems.meshcat_visualizer.ConnectMeshcatVisualizer(
# builder,
# scene_graph=scene_graph,
# zmq_url=zmq_url)
#visualizer.vis.delete()
#visualizer.set_planar_viewpoint(xmin=-3.0, xmax=3.0, ymin=-3.0, ymax=4.0)
logger = builder.AddSystem(SignalLogger(2))
builder.Connect(pendulum.get_output_port(0), logger.get_input_port(0))
diagram = builder.Build()
simulator = Simulator(diagram)
context = simulator.get_mutable_context()
for i in range(5):
context.SetTime(0.)
simulator.Initialize()
logger.reset()
context.SetContinuousState(np.random.randn(2,))
simulator.AdvanceTo(4)
ax.plot(logger.data()[0, :], logger.data()[1, :])
Now we will combine our simple energy shaping controller with a linear controller that stabilizes the upright fixed point once we get close enough. We'll read more about this approach in the Acrobot and Cart-Pole notes.
from pydrake.all import Linearize, LinearQuadraticRegulator
def BalancingLQR(pendulum):
context = pendulum.CreateDefaultContext()
pendulum.get_input_port(0).FixValue(context, [0])
context.SetContinuousState([np.pi, 0])
Q = np.diag((10., 1.))
R = [1]
linearized_pendulum = Linearize(pendulum, context)
(K, S) = LinearQuadraticRegulator(linearized_pendulum.A(),
linearized_pendulum.B(), Q, R)
return (K, S)
class SwingUpAndBalanceController(VectorSystem):
def __init__(self, pendulum):
VectorSystem.__init__(self, 2, 1)
(self.K, self.S) = BalancingLQR(pendulum)
self.energy_shaping = EnergyShapingController(pendulum)
self.energy_shaping_context = self.energy_shaping.CreateDefaultContext()
# TODO(russt): Add a witness function to tell the simulator about the
# discontinuity when switching to LQR.
def DoCalcVectorOutput(self, context, pendulum_state, unused, output):
xbar = copy(pendulum_state)
xbar[0] = wrap_to(xbar[0], 0, 2. * np.pi) - np.pi
# If x'Sx <= 2, then use the LQR controller
if (xbar.dot(self.S.dot(xbar)) < 2.):
output[:] = -self.K.dot(xbar)
else:
self.get_input_port(0).FixValue(self.energy_shaping_context, pendulum_state)
output[:] = self.energy_shaping.get_output_port(0).Eval(self.energy_shaping_context)
builder = DiagramBuilder()
pendulum = builder.AddSystem(PendulumPlant())
ax = PhasePlot(pendulum)
saturation = builder.AddSystem(Saturation(min_value=[-3], max_value=[3]))
builder.Connect(saturation.get_output_port(0), pendulum.get_input_port(0))
controller = builder.AddSystem(SwingUpAndBalanceController(pendulum))
builder.Connect(pendulum.get_output_port(0), controller.get_input_port(0))
builder.Connect(controller.get_output_port(0), saturation.get_input_port(0))
logger = builder.AddSystem(SignalLogger(2))
builder.Connect(pendulum.get_output_port(0), logger.get_input_port(0))
diagram = builder.Build()
simulator = Simulator(diagram)
context = simulator.get_mutable_context()
for i in range(5):
context.SetTime(0.)
simulator.Initialize()
logger.reset()
context.SetContinuousState(np.random.randn(2,))
simulator.AdvanceTo(4)
ax.plot(logger.data()[0, :], logger.data()[1, :])
ax.set_xlim(np.pi - 3., np.pi + 3.)
ax.set_ylim(-5., 5.)