cartpole-crashing-pg: env: ray.rllib.examples.env.cartpole_crashing.CartPoleCrashing run: PG stop: evaluation/episode_reward_mean: 180.0 num_env_steps_sampled: 150000 config: # Works for both torch and tf. framework: tf env_config: config: # Crash roughly every 300 ts. This should be ok to measure 180.0 # reward (episodes are 200 ts long). p_crash: 0.0025 # prob to crash during step() p_crash_reset: 0.01 # prob to crash during reset() # Time for the env to initialize when newly created. # Every time a remote sub-environment crashes, a new env is created # in its place and will take this long (sleep) to "initialize". init_time_s: 1.0 horizon: 200 num_workers: 2 num_envs_per_worker: 5 # Switch on resiliency for failed sub environments (within a vectorized stack). restart_failed_sub_environments: true # Disable env checking. Otherwise, RolloutWorkers will crash during # initialization, which is not covered by the # `restart_failed_sub_environments=True` failure tolerance mode. disable_env_checking: true evaluation_num_workers: 2 evaluation_interval: 1 evaluation_duration: 20 evaluation_duration_unit: episodes evaluation_parallel_to_training: true evaluation_config: explore: false