In Nb.2.2ma
I had "first structure error" and I want to check what exactly was wrong by checking this example :)
Example showing how one can create a multi-agent env, in which the different agents
have different observation and action spaces.
These spaces do NOT necessarily have to be specified manually by the user. Instead,
RLlib will try to automatically infer them from the env provided spaces dicts
(agentID -> obs/act space) and the policy mapping fn (mapping agent IDs to policy IDs).
---
Run this example with defaults (using Tune):
$ python multi_agent_different_spaces_for_agents.py
%reload_ext autoreload
%autoreload 2
import argparse
import gym
import os
import ray
from ray import tune # air,
from ray.rllib.env.multi_agent_env import MultiAgentEnv
C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\tqdm\auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html from .autonotebook import tqdm as notebook_tqdm
from ray.rllib.agents import ppo
from ray.tune.registry import register_env
class BasicMultiAgentMultiSpaces(MultiAgentEnv):
"""A simple multi-agent example environment where agents have different spaces.
agent0: obs=(10,), act=Discrete(2)
agent1: obs=(20,), act=Discrete(3)
The logic of the env doesn't really matter for this example. The point of this env
is to show how one can use multi-agent envs, in which the different agents utilize
different obs- and action spaces.
"""
def __init__(self, config=None):
self.agents = {"agent0", "agent1"}
self._agent_ids = set(self.agents)
self.dones = set()
# Provide full (preferred format) observation- and action-spaces as Dicts
# mapping agent IDs to the individual agents' spaces.
self._spaces_in_preferred_format = True
self.observation_space = gym.spaces.Dict(
{
"agent0": gym.spaces.Box(low=-1.0, high=1.0, shape=(10,)),
"agent1": gym.spaces.Box(low=-1.0, high=1.0, shape=(20,)),
}
)
self.action_space = gym.spaces.Dict(
{"agent0": gym.spaces.Discrete(2), "agent1": gym.spaces.Discrete(3)}
)
super().__init__()
def reset(self):
self.dones = set()
return {i: self.observation_space[i].sample() for i in self.agents}
def step(self, action_dict):
obs, rew, done, info = {}, {}, {}, {}
for i, action in action_dict.items():
obs[i] = self.observation_space[i].sample()
rew[i] = 1.0
done[i] = False
info[i] = {}
done["__all__"] = len(self.dones) == len(self.agents)
return obs, rew, done, info
env_config = {}
config={
"env": BasicMultiAgentMultiSpaces,
# "env_config": env_config,
# Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
"num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
"num_workers": 1,
"multiagent": {
# Use a simple set of policy IDs. Spaces for the individual policies
# will be inferred automatically using reverse lookup via the
# `policy_mapping_fn` and the env provided spaces for the different
# agents. Alternatively, you could use:
# policies: {main0: PolicySpec(...), main1: PolicySpec}
"policies": {"main0", "main1"},
# Simple mapping fn, mapping agent0 to main0 and agent1 to main1.
"policy_mapping_fn": (
lambda aid, episode, worker, **kw: f"main{aid[-1]}"
),
# Only train main0.
"policies_to_train": ["main0"],
},
"framework": "torch", # torch tf tf2
"eager_tracing": "store_true",
}
agent_config = ppo.DEFAULT_CONFIG.copy()
agent_config.update(config)
# (Re)Start the ray runtime.
if ray.is_initialized():
ray.shutdown()
ray.init(include_dashboard=False, ignore_reinit_error=True)
RayContext(dashboard_url=None, python_version='3.8.12', ray_version='1.12.1', ray_commit='4863e33856b54ccf8add5cbe75e41558850a1b75', address_info={'node_ip_address': '127.0.0.1', 'raylet_ip_address': '127.0.0.1', 'redis_address': None, 'object_store_address': 'tcp://127.0.0.1:64403', 'raylet_socket_name': 'tcp://127.0.0.1:57948', 'webui_url': None, 'session_dir': 'C:\\Users\\milos\\AppData\\Local\\Temp\\ray\\session_2022-08-03_09-43-00_258476_15284', 'metrics_export_port': 56566, 'gcs_address': '127.0.0.1:57990', 'address': '127.0.0.1:57990', 'node_id': '605602b8041dae54fb0356d4e3ef35120be096c1fdd1e69b98f68f85'})
%%time
analysis = tune.run(
ppo.PPOTrainer,
stop={
"training_iteration": 10,
"timesteps_total": 10_000,
"episode_reward_mean": 80.0,
},
config=agent_config,
# Milos
verbose = 0,
fail_fast = "raise", # for debugging!
)
C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\tune\trial_runner.py:321: UserWarning: fail_fast='raise' detected. Be careful when using this mode as resources (such as Ray processes, file descriptors, and temporary files) may not be cleaned up properly. To use a safer mode, use fail_fast=True. warnings.warn( 2022-08-03 09:43:04,761 INFO trial_runner.py:803 -- starting PPOTrainer_BasicMultiAgentMultiSpaces_e890c_00000 2022-08-03 09:43:04,818 ERROR syncer.py:119 -- Log sync requires rsync to be installed. (PPOTrainer pid=2020) 2022-08-03 09:43:09,263 INFO ppo.py:268 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you. (PPOTrainer pid=2020) 2022-08-03 09:43:09,264 INFO trainer.py:864 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags. (RolloutWorker pid=13316) 2022-08-03 09:43:13,216 WARNING rollout_worker.py:498 -- We've added a module for checking environments that are used in experiments. It will cause your environment to fail if your environment is not set upcorrectly. You can disable check env by setting `disable_env_checking` to True in your experiment config dictionary. You can run the environment checking module standalone by calling ray.rllib.utils.check_env(env). 2022-08-03 09:43:14,479 ERROR trial_runner.py:872 -- Trial PPOTrainer_BasicMultiAgentMultiSpaces_e890c_00000: Error processing event. (PPOTrainer pid=2020) 2022-08-03 09:43:14,471 WARNING trainer.py:1083 -- Worker crashed during call to `step_attempt()`. To try to continue training without the failed worker, set `ignore_worker_failures=True`.
--------------------------------------------------------------------------- RayTaskError(ValueError) Traceback (most recent call last) File <timed exec>:1, in <module> File ~\Anaconda3\envs\EnvRL\lib\site-packages\ray\tune\tune.py:672, in run(run_or_experiment, name, metric, mode, stop, time_budget_s, config, resources_per_trial, num_samples, local_dir, search_alg, scheduler, keep_checkpoints_num, checkpoint_score_attr, checkpoint_freq, checkpoint_at_end, verbose, progress_reporter, log_to_file, trial_name_creator, trial_dirname_creator, sync_config, export_formats, max_failures, fail_fast, restore, server_port, resume, reuse_actors, trial_executor, raise_on_failed_trial, callbacks, max_concurrent_trials, _experiment_checkpoint_dir, queue_trials, loggers, _remote) 670 progress_reporter.set_start_time(tune_start) 671 while not runner.is_finished() and not state[signal.SIGINT]: --> 672 runner.step() 673 if has_verbosity(Verbosity.V1_EXPERIMENT): 674 _report_progress(runner, progress_reporter) File ~\Anaconda3\envs\EnvRL\lib\site-packages\ray\tune\trial_runner.py:767, in TrialRunner.step(self) 761 self._callbacks.on_step_begin( 762 iteration=self._iteration, trials=self._trials 763 ) 765 next_trial = self._update_trial_queue_and_get_next_trial() --> 767 self._wait_and_handle_event(next_trial) 769 self._stop_experiment_if_needed() 771 try: File ~\Anaconda3\envs\EnvRL\lib\site-packages\ray\tune\trial_runner.py:745, in TrialRunner._wait_and_handle_event(self, next_trial) 743 except Exception as e: 744 if e is TuneError or self._fail_fast == TrialRunner.RAISE: --> 745 raise e 746 else: 747 raise TuneError(traceback.format_exc()) File ~\Anaconda3\envs\EnvRL\lib\site-packages\ray\tune\trial_runner.py:730, in TrialRunner._wait_and_handle_event(self, next_trial) 728 result = future_result.result 729 if future_result.type == ExecutorEventType.ERROR: --> 730 self._on_executor_error(trial, result) 731 elif future_result.type == ExecutorEventType.RESTORING_RESULT: 732 self._on_restoring_result(trial) File ~\Anaconda3\envs\EnvRL\lib\site-packages\ray\tune\trial_runner.py:874, in TrialRunner._on_executor_error(self, trial, result) 872 logger.error(error_msg) 873 assert isinstance(result[0], Exception) --> 874 raise result[0] 875 else: 876 logger.exception(error_msg) File ~\Anaconda3\envs\EnvRL\lib\site-packages\ray\tune\ray_trial_executor.py:901, in RayTrialExecutor.get_next_executor_event(self, live_trials, next_trial_exists) 899 assert isinstance(trial, Trial) 900 try: --> 901 future_result = ray.get(ready_future) 902 # For local mode 903 if isinstance(future_result, _LocalWrapper): File ~\Anaconda3\envs\EnvRL\lib\site-packages\ray\_private\client_mode_hook.py:105, in client_mode_hook.<locals>.wrapper(*args, **kwargs) 103 if func.__name__ != "init" or is_client_mode_enabled_by_default: 104 return getattr(ray, func.__name__)(*args, **kwargs) --> 105 return func(*args, **kwargs) File ~\Anaconda3\envs\EnvRL\lib\site-packages\ray\worker.py:1809, in get(object_refs, timeout) 1807 worker.core_worker.dump_object_store_memory_usage() 1808 if isinstance(value, RayTaskError): -> 1809 raise value.as_instanceof_cause() 1810 else: 1811 raise value RayTaskError(ValueError): ray::PPOTrainer.train() (pid=2020, ip=127.0.0.1, repr=PPOTrainer) File "python\ray\_raylet.pyx", line 663, in ray._raylet.execute_task File "python\ray\_raylet.pyx", line 667, in ray._raylet.execute_task File "python\ray\_raylet.pyx", line 614, in ray._raylet.execute_task.function_executor File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\_private\function_manager.py", line 701, in actor_method_executor return method(__ray_actor, *args, **kwargs) File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\util\tracing\tracing_helper.py", line 462, in _resume_span return method(self, *_args, **_kwargs) File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\tune\trainable.py", line 349, in train result = self.step() File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\util\tracing\tracing_helper.py", line 462, in _resume_span return method(self, *_args, **_kwargs) File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\rllib\agents\trainer.py", line 1088, in step raise e File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\rllib\agents\trainer.py", line 1074, in step step_attempt_results = self.step_attempt() File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\util\tracing\tracing_helper.py", line 462, in _resume_span return method(self, *_args, **_kwargs) File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\rllib\agents\trainer.py", line 1155, in step_attempt step_results = self._exec_plan_or_training_iteration_fn() File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\util\tracing\tracing_helper.py", line 462, in _resume_span return method(self, *_args, **_kwargs) File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\rllib\agents\trainer.py", line 2174, in _exec_plan_or_training_iteration_fn results = next(self.train_exec_impl) File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\util\iter.py", line 779, in __next__ return next(self.built_iterator) File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\util\iter.py", line 807, in apply_foreach for item in it: File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\util\iter.py", line 807, in apply_foreach for item in it: File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\util\iter.py", line 869, in apply_filter for item in it: File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\util\iter.py", line 869, in apply_filter for item in it: File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\util\iter.py", line 807, in apply_foreach for item in it: File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\util\iter.py", line 807, in apply_foreach for item in it: File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\util\iter.py", line 807, in apply_foreach for item in it: [Previous line repeated 1 more time] File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\util\iter.py", line 904, in apply_flatten for item in it: File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\util\iter.py", line 807, in apply_foreach for item in it: File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\util\iter.py", line 807, in apply_foreach for item in it: File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\util\iter.py", line 807, in apply_foreach for item in it: [Previous line repeated 1 more time] File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\util\iter.py", line 492, in base_iterator yield ray.get(futures, timeout=timeout) File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\_private\client_mode_hook.py", line 105, in wrapper return func(*args, **kwargs) File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\worker.py", line 1809, in get raise value.as_instanceof_cause() ray.exceptions.RayTaskError(ValueError): ray::RolloutWorker.par_iter_next() (pid=13316, ip=127.0.0.1, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x000001D6484670A0>) ValueError: The two structures don't have the same nested structure. First structure: type=ndarray str=[-0.6484811 -0.24129443 0.08446018 -0.60363895 -0.2602198 -0.5603006 -0.61620265 0.35957032 0.97200704 -0.9894121 0.8001538 0.54838014 0.485548 0.69472945 -0.7663002 0.21971236 0.01151593 0.59717155 0.69245756 -0.2534561 ] Second structure: type=OrderedDict str=OrderedDict([('agent0', array([ 0.665456 , 0.8671211 , -0.80171853, 0.930409 , -0.9449052 , -0.21203883, -0.2598595 , 0.6611393 , -0.6856699 , 0.7166635 ], dtype=float32)), ('agent1', array([ 0.19255306, 0.8727036 , -0.09591831, -0.44907162, -0.47003892, 0.24288067, 0.03934164, 0.1409452 , -0.2482065 , 0.4656972 , -0.62981224, -0.955172 , 0.5294034 , 0.34777784, -0.840635 , -0.9268064 , 0.8727926 , 0.16473597, -0.2449859 , 0.0885732 ], dtype=float32))]) More specifically: Substructure "type=OrderedDict str=OrderedDict([('agent0', array([ 0.665456 , 0.8671211 , -0.80171853, 0.930409 , -0.9449052 , -0.21203883, -0.2598595 , 0.6611393 , -0.6856699 , 0.7166635 ], dtype=float32)), ('agent1', array([ 0.19255306, 0.8727036 , -0.09591831, -0.44907162, -0.47003892, 0.24288067, 0.03934164, 0.1409452 , -0.2482065 , 0.4656972 , -0.62981224, -0.955172 , 0.5294034 , 0.34777784, -0.840635 , -0.9268064 , 0.8727926 , 0.16473597, -0.2449859 , 0.0885732 ], dtype=float32))])" is a sequence, while substructure "type=ndarray str=[-0.6484811 -0.24129443 0.08446018 -0.60363895 -0.2602198 -0.5603006 -0.61620265 0.35957032 0.97200704 -0.9894121 0.8001538 0.54838014 0.485548 0.69472945 -0.7663002 0.21971236 0.01151593 0.59717155 0.69245756 -0.2534561 ]" is not During handling of the above exception, another exception occurred: ray::RolloutWorker.par_iter_next() (pid=13316, ip=127.0.0.1, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x000001D6484670A0>) File "python\ray\_raylet.pyx", line 656, in ray._raylet.execute_task File "python\ray\_raylet.pyx", line 697, in ray._raylet.execute_task File "python\ray\_raylet.pyx", line 663, in ray._raylet.execute_task File "python\ray\_raylet.pyx", line 667, in ray._raylet.execute_task File "python\ray\_raylet.pyx", line 614, in ray._raylet.execute_task.function_executor File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\_private\function_manager.py", line 701, in actor_method_executor return method(__ray_actor, *args, **kwargs) File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\util\tracing\tracing_helper.py", line 462, in _resume_span return method(self, *_args, **_kwargs) File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\util\iter.py", line 1186, in par_iter_next return next(self.local_it) File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 404, in gen_rollouts yield self.sample() File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\util\tracing\tracing_helper.py", line 462, in _resume_span return method(self, *_args, **_kwargs) File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 815, in sample batches = [self.input_reader.next()] File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\rllib\evaluation\sampler.py", line 116, in next batches = [self.get_data()] File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\rllib\evaluation\sampler.py", line 289, in get_data item = next(self._env_runner) File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\rllib\evaluation\sampler.py", line 679, in _env_runner active_envs, to_eval, outputs = _process_observations( File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\rllib\evaluation\sampler.py", line 906, in _process_observations prep_obs = preprocessor.transform(raw_obs) File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\rllib\models\preprocessors.py", line 282, in transform self.check_shape(observation) File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\rllib\models\preprocessors.py", line 69, in check_shape observation = convert_element_to_space_type( File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\ray\rllib\utils\spaces\space_utils.py", line 344, in convert_element_to_space_type return tree.map_structure(map_, element, sampled_element, check_types=False) File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\tree\__init__.py", line 428, in map_structure assert_same_structure(structures[0], other, check_types=check_types) File "C:\Users\milos\Anaconda3\envs\EnvRL\lib\site-packages\tree\__init__.py", line 284, in assert_same_structure raise type(e)("%s\n" ValueError: The two structures don't have the same nested structure. First structure: type=ndarray str=[-0.6484811 -0.24129443 0.08446018 -0.60363895 -0.2602198 -0.5603006 -0.61620265 0.35957032 0.97200704 -0.9894121 0.8001538 0.54838014 0.485548 0.69472945 -0.7663002 0.21971236 0.01151593 0.59717155 0.69245756 -0.2534561 ] Second structure: type=OrderedDict str=OrderedDict([('agent0', array([ 0.665456 , 0.8671211 , -0.80171853, 0.930409 , -0.9449052 , -0.21203883, -0.2598595 , 0.6611393 , -0.6856699 , 0.7166635 ], dtype=float32)), ('agent1', array([ 0.19255306, 0.8727036 , -0.09591831, -0.44907162, -0.47003892, 0.24288067, 0.03934164, 0.1409452 , -0.2482065 , 0.4656972 , -0.62981224, -0.955172 , 0.5294034 , 0.34777784, -0.840635 , -0.9268064 , 0.8727926 , 0.16473597, -0.2449859 , 0.0885732 ], dtype=float32))]) More specifically: Substructure "type=OrderedDict str=OrderedDict([('agent0', array([ 0.665456 , 0.8671211 , -0.80171853, 0.930409 , -0.9449052 , -0.21203883, -0.2598595 , 0.6611393 , -0.6856699 , 0.7166635 ], dtype=float32)), ('agent1', array([ 0.19255306, 0.8727036 , -0.09591831, -0.44907162, -0.47003892, 0.24288067, 0.03934164, 0.1409452 , -0.2482065 , 0.4656972 , -0.62981224, -0.955172 , 0.5294034 , 0.34777784, -0.840635 , -0.9268064 , 0.8727926 , 0.16473597, -0.2449859 , 0.0885732 ], dtype=float32))])" is a sequence, while substructure "type=ndarray str=[-0.6484811 -0.24129443 0.08446018 -0.60363895 -0.2602198 -0.5603006 -0.61620265 0.35957032 0.97200704 -0.9894121 0.8001538 0.54838014 0.485548 0.69472945 -0.7663002 0.21971236 0.01151593 0.59717155 0.69245756 -0.2534561 ]" is not Entire first structure: . Entire second structure: OrderedDict([('agent0', .), ('agent1', .)])