ray/rllib/tuned_examples/pg/multi-agent-cartpole-crashing-with-remote-envs-pg.yaml

50 lines
1.9 KiB
YAML
Raw Normal View History

multi-agent-cartpole-crashing-pg:
env: ray.rllib.examples.env.cartpole_crashing.MultiAgentCartPoleCrashing
run: PG
stop:
evaluation/episode_reward_mean: 300.0
num_env_steps_sampled: 300000
config:
# Works for both torch and tf.
framework: tf
env_config:
config:
num_agents: 2
# Crash roughly every 300 ts. This should be ok to measure 300+
# reward (episodes are 200 ts long).
p_crash: 0.0025 # prob to crash during step()
p_crash_reset: 0.01 # prob to crash during reset()
# Time for the env to initialize when newly created.
# Every time a remote sub-environment crashes, a new env is created
# in its place and will take this long (sleep) to "initialize".
init_time_s: 1.0
horizon: 200
num_workers: 4
num_envs_per_worker: 3
# Use parallel remote envs.
remote_worker_envs: true
# Disable env checking. Env checker doesn't handle Exceptions from
# user envs, and will crash rollout worker.
disable_env_checking: true
# Switch on resiliency for failed sub environments (within a vectorized stack).
restart_failed_sub_environments: true
evaluation_num_workers: 1
evaluation_interval: 1
evaluation_duration: 10
evaluation_duration_unit: episodes
evaluation_parallel_to_training: true
evaluation_config:
explore: false
env_config:
config:
# Make eval workers solid.
# This test is to prove that we can learn with crashing env,
# not eval with crashing env.
p_crash: 0.0
p_crash_reset: 0.0
init_time_s: 0.0