In [1]:

# set up matplotlib and rendering
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.animation as animation

from IPython import display

OpenAI Gym¶

https://gym.openai.com/docs/ simple tutorial

https://gym.openai.com/envs/#robotics Robotics environments

https://www.gymlibrary.ml/environments/mujoco/ Mujoco environments

https://www.gymlibrary.ml/ documentation

https://www.gymlibrary.ml/content/environment_creation/ Creating custom environments

In [2]:

import gym
display.HTML('<iframe width="560" height="315" src="https://www.youtube.com/embed/PGLzm-Gy0dQ" frameborder="0" allowfullscreen></iframe>')

/home/yasu/.local/lib/python3.10/site-packages/gym/wrappers/monitoring/video_recorder.py:9: DeprecationWarning: The distutils package is deprecated and slated for removal in Python 3.12. Use setuptools or check PEP 632 for potential alternatives
  import distutils.spawn
/home/yasu/.local/lib/python3.10/site-packages/IPython/core/display.py:419: UserWarning: Consider using IPython.display.IFrame instead
  warnings.warn("Consider using IPython.display.IFrame instead")

Out[2]:

MuJoCo environment¶

mujoco environment for gym is probably already prepared if you have mujoco-py

But for some weird reason, if the other environments are run before this, notebook kernel fails...

In [3]:

env = gym.make('Ant-v2')
env.reset()

fig = plt.figure()
ims = []
for _ in range(20):
    ims.append([plt.imshow(env.render(mode='rgb_array', width=256, height=256))])
    env.step(env.action_space.sample()) # take a random action

env.close()
ani = animation.ArtistAnimation(fig, ims, interval=50)
html = display.HTML(ani.to_jshtml())
display.display(html)
plt.close()

/home/yasu/.local/lib/python3.10/site-packages/gym/envs/registration.py:505: UserWarning: WARN: The environment Ant-v2 is out of date. You should consider upgrading to version `v3` with the environment ID `Ant-v3`.
  logger.warn(
/home/yasu/.local/lib/python3.10/site-packages/mujoco_py/builder.py:9: DeprecationWarning: The distutils.sysconfig module is deprecated, use sysconfig instead
  from distutils.sysconfig import customize_compiler
<frozen importlib._bootstrap>:283: DeprecationWarning: the load_module() method is deprecated and slated for removal in Python 3.12; use exec_module() instead

Robotics environment¶

pip3 install gym-robotics

This must also be run before the other environments...

In [4]:

env = gym.make('HandManipulateBlock-v0')
env.reset()

fig = plt.figure()
ims = []
for _ in range(20):
    ims.append([plt.imshow(env.render(mode='rgb_array', width=256, height=256))])
    env.step(env.action_space.sample()) # take a random action

env.close()
ani = animation.ArtistAnimation(fig, ims, interval=50)
html = display.HTML(ani.to_jshtml())
display.display(html)
plt.close()

Box2D environments¶

https://www.gymlibrary.ml/environments/box2d/

to get the Box2D environment...

sudo apt install swig
pip3 install box2d
pip3 install box2d-py

In [5]:

env = gym.make('CarRacing-v1')
env.reset()

fig = plt.figure()
ims = []
for _ in range(20):
    ims.append([plt.imshow(env.render(mode='rgb_array'))])
    env.step(env.action_space.sample()) # take a random action

env.close()
ani = animation.ArtistAnimation(fig, ims, interval=50)
html = display.HTML(ani.to_jshtml())
display.display(html)
plt.close()

Track generation: 1021..1288 -> 267-tiles track

In [6]:

env = gym.make('BipedalWalker-v3')
env.reset()

fig = plt.figure()
ims = []
for _ in range(20):
    ims.append([plt.imshow(env.render(mode='rgb_array'))])
    env.step(env.action_space.sample()) # take a random action

env.close()
ani = animation.ArtistAnimation(fig, ims, interval=50)
html = display.HTML(ani.to_jshtml())
display.display(html)
plt.close()

In [7]:

env = gym.make('CartPole-v1')
env.reset()

fig = plt.figure()
ims = []
for _ in range(50):
    ims.append([plt.imshow(env.render(mode='rgb_array'))])
    env.step(env.action_space.sample()) # take a random action

env.close()
ani = animation.ArtistAnimation(fig, ims, interval=50)
html = display.HTML(ani.to_jshtml())
display.display(html)
plt.close()

/home/yasu/.local/lib/python3.10/site-packages/gym/envs/classic_control/cartpole.py:163: UserWarning: WARN: You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.
  logger.warn(

In [8]:

env = gym.make('MountainCar-v0')
env.reset()

fig = plt.figure()
ims = []
for _ in range(50):
    ims.append([plt.imshow(env.render(mode='rgb_array'))])
    env.step(env.action_space.sample()) # take a random action

env.close()
ani = animation.ArtistAnimation(fig, ims, interval=50)
html = display.HTML(ani.to_jshtml())
display.display(html)
plt.close()

let's look at the action / observation space, and what env.step returns...

In [9]:

env = gym.make('CartPole-v1')
env.reset()
observation, info = env.reset(return_info=True)
print(f"info:{info}")
print(f"action space:{env.action_space}")
# one action pushes cart to right and the other to the left
print(f"observation space:{env.observation_space}")
print(f"reward range:{env.reward_range}")

for i in range(3):
    observation, reward, done, info = env.step(env.action_space.sample()) # take a random action
    print("-"*10)
    print(f"observation:{observation}")
    print(f"reward:{reward}")
    print(f"done:{done}")
    print(f"info:{info}")

env.close()

info:{}
action space:Discrete(2)
observation space:Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32)
reward range:(-inf, inf)
----------
observation:[-0.00195471 -0.17760813  0.0406442   0.35323688]
reward:1.0
done:False
info:{}
----------
observation:[-0.00550687 -0.37328374  0.04770894  0.65845406]
reward:1.0
done:False
info:{}
----------
observation:[-0.01297255 -0.5690361   0.06087802  0.9657696 ]
reward:1.0
done:False
info:{}

In [10]:

from gym import spaces
space = spaces.Discrete(8)
print(space)
print(space.contains(2))
print(space.contains(8))
print(space.n)

Discrete(8)
True
False
8

Control environment with keyboard input

In [11]:

import pygame
from gym.utils.play import play
mapping = {(pygame.K_UP,): 2, (pygame.K_DOWN,): 3}
env = gym.make("Pong-ram-v0")
print(env.unwrapped.get_action_meanings())
play(env, keys_to_action=mapping)

['NOOP', 'FIRE', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']

/home/yasu/.local/lib/python3.10/site-packages/gym/utils/play.py:11: UserWarning: WARN: failed to set matplotlib backend, plotting will not work: No module named 'tkinter'
  logger.warn(f"failed to set matplotlib backend, plotting will not work: {str(e)}")
/home/yasu/.local/lib/python3.10/site-packages/gym/envs/registration.py:505: UserWarning: WARN: The environment Pong-ram-v0 is out of date. You should consider upgrading to version `v5` with the environment ID `ALE/Pong-ram-v5`.
  logger.warn(
A.L.E: Arcade Learning Environment (version 0.7.5+db37282)
[Powered by Stella]