ray/rllib/tuned_examples/compact-regression-test.yaml

# This file runs on a single g3.16xl or p3.16xl node. It is suggested
# to run these in a DLAMI / tensorflow_p36 env. Note that RL runs are
# inherently high variance, so you'll have to check to see if the
# rewards reached seem reasonably in line with previous results.
#
# You can find the reference results here:
# https://github.com/ray-project/ray/tree/master/doc/dev/release_logs
atari-impala:
    env: BreakoutNoFrameskip-v4
    run: IMPALA
    num_samples: 4
    stop:
        time_total_s: 3600
    config:
        sample_batch_size: 50
        train_batch_size: 500
        num_workers: 10
        num_envs_per_worker: 5
        clip_rewards: True
        lr_schedule: [
            [0, 0.0005],
            [20000000, 0.000000000001],
        ]
        num_gpus: 1
atari-ppo:
    env: BreakoutNoFrameskip-v4
    run: PPO
    num_samples: 4
    stop:
        time_total_s: 3600
    config:
        lambda: 0.95
        kl_coeff: 0.5
        clip_rewards: True
        clip_param: 0.1
        vf_clip_param: 10.0
        entropy_coeff: 0.01
        train_batch_size: 5000
        sample_batch_size: 100
        sgd_minibatch_size: 500
        num_sgd_iter: 10
        num_workers: 10
        num_envs_per_worker: 5
        batch_mode: truncate_episodes
        observation_filter: NoFilter
        vf_share_layers: true
        num_gpus: 1
apex:
    env: BreakoutNoFrameskip-v4
    run: APEX
    num_samples: 4
    stop:
        time_total_s: 3600
    config:
        double_q: false
        dueling: false
        num_atoms: 1
        noisy: false
        n_step: 3
        lr: .0001
        adam_epsilon: .00015
        hiddens: [512]
        buffer_size: 1000000
        schedule_max_timesteps: 2000000
        exploration_final_eps: 0.01
        exploration_fraction: .1
        prioritized_replay_alpha: 0.5
        beta_annealing_fraction: 1.0
        final_prioritized_replay_beta: 1.0
        num_gpus: 1
        num_workers: 8
        num_envs_per_worker: 8
        sample_batch_size: 20
        train_batch_size: 512
        target_network_update_freq: 50000
        timesteps_per_iteration: 25000
atari-a2c:
    env: BreakoutNoFrameskip-v4
    run: A2C
    num_samples: 4
    stop:
        time_total_s: 3600
    config:
        sample_batch_size: 20
        clip_rewards: True
        num_workers: 5
        num_envs_per_worker: 5
        num_gpus: 1
        lr_schedule: [
            [0, 0.0007],
            [20000000, 0.000000000001],
        ]
atari-basic-dqn:
    env: BreakoutNoFrameskip-v4
    run: DQN
    num_samples: 4
    stop:
        time_total_s: 3600
    config:
        double_q: false
        dueling: false
        num_atoms: 1
        noisy: false
        prioritized_replay: false
        n_step: 1
        target_network_update_freq: 8000
        lr: .0000625
        adam_epsilon: .00015
        hiddens: [512]
        learning_starts: 20000
        buffer_size: 1000000
        sample_batch_size: 4
        train_batch_size: 32
        schedule_max_timesteps: 2000000
        exploration_final_eps: 0.01
        exploration_fraction: .1
        prioritized_replay_alpha: 0.5
        beta_annealing_fraction: 1.0
        final_prioritized_replay_beta: 1.0
        num_gpus: 0.2
        timesteps_per_iteration: 10000
Add large scale regression test for RLlib (#6093) 2019-11-13 12:22:55 -08:00			`# This file runs on a single g3.16xl or p3.16xl node. It is suggested`
			`# to run these in a DLAMI / tensorflow_p36 env. Note that RL runs are`
			`# inherently high variance, so you'll have to check to see if the`
			`# rewards reached seem reasonably in line with previous results.`
			`#`
[rllib] Deprecate custom preprocessors (#6833) * deprecation warnings * add log warn * fix test 2020-01-18 23:30:09 -08:00			`# You can find the reference results here:`
			`# https://github.com/ray-project/ray/tree/master/doc/dev/release_logs`
Add large scale regression test for RLlib (#6093) 2019-11-13 12:22:55 -08:00			`atari-impala:`
			`env: BreakoutNoFrameskip-v4`
			`run: IMPALA`
			`num_samples: 4`
			`stop:`
			`time_total_s: 3600`
			`config:`
			`sample_batch_size: 50`
			`train_batch_size: 500`
			`num_workers: 10`
			`num_envs_per_worker: 5`
			`clip_rewards: True`
			`lr_schedule: [`
			`[0, 0.0005],`
			`[20000000, 0.000000000001],`
			`]`
			`num_gpus: 1`
			`atari-ppo:`
			`env: BreakoutNoFrameskip-v4`
			`run: PPO`
			`num_samples: 4`
			`stop:`
			`time_total_s: 3600`
			`config:`
			`lambda: 0.95`
			`kl_coeff: 0.5`
			`clip_rewards: True`
			`clip_param: 0.1`
			`vf_clip_param: 10.0`
			`entropy_coeff: 0.01`
			`train_batch_size: 5000`
			`sample_batch_size: 100`
			`sgd_minibatch_size: 500`
			`num_sgd_iter: 10`
			`num_workers: 10`
			`num_envs_per_worker: 5`
			`batch_mode: truncate_episodes`
			`observation_filter: NoFilter`
			`vf_share_layers: true`
			`num_gpus: 1`
			`apex:`
			`env: BreakoutNoFrameskip-v4`
			`run: APEX`
			`num_samples: 4`
			`stop:`
			`time_total_s: 3600`
			`config:`
			`double_q: false`
			`dueling: false`
			`num_atoms: 1`
			`noisy: false`
			`n_step: 3`
			`lr: .0001`
			`adam_epsilon: .00015`
			`hiddens: [512]`
			`buffer_size: 1000000`
			`schedule_max_timesteps: 2000000`
			`exploration_final_eps: 0.01`
			`exploration_fraction: .1`
			`prioritized_replay_alpha: 0.5`
			`beta_annealing_fraction: 1.0`
			`final_prioritized_replay_beta: 1.0`
			`num_gpus: 1`
			`num_workers: 8`
			`num_envs_per_worker: 8`
			`sample_batch_size: 20`
			`train_batch_size: 512`
			`target_network_update_freq: 50000`
			`timesteps_per_iteration: 25000`
			`atari-a2c:`
			`env: BreakoutNoFrameskip-v4`
			`run: A2C`
			`num_samples: 4`
			`stop:`
			`time_total_s: 3600`
			`config:`
			`sample_batch_size: 20`
			`clip_rewards: True`
			`num_workers: 5`
			`num_envs_per_worker: 5`
			`num_gpus: 1`
			`lr_schedule: [`
			`[0, 0.0007],`
			`[20000000, 0.000000000001],`
			`]`
			`atari-basic-dqn:`
			`env: BreakoutNoFrameskip-v4`
			`run: DQN`
			`num_samples: 4`
			`stop:`
			`time_total_s: 3600`
			`config:`
			`double_q: false`
			`dueling: false`
			`num_atoms: 1`
			`noisy: false`
			`prioritized_replay: false`
			`n_step: 1`
			`target_network_update_freq: 8000`
			`lr: .0000625`
			`adam_epsilon: .00015`
			`hiddens: [512]`
			`learning_starts: 20000`
			`buffer_size: 1000000`
			`sample_batch_size: 4`
			`train_batch_size: 32`
			`schedule_max_timesteps: 2000000`
			`exploration_final_eps: 0.01`
			`exploration_fraction: .1`
			`prioritized_replay_alpha: 0.5`
			`beta_annealing_fraction: 1.0`
			`final_prioritized_replay_beta: 1.0`
			`num_gpus: 0.2`
			`timesteps_per_iteration: 10000`