From 2cfd6c2e977c955bd87ab160415531dd9437e689 Mon Sep 17 00:00:00 2001 From: Avnish Narayan <38871737+avnishn@users.noreply.github.com> Date: Sat, 23 Jul 2022 04:53:03 -0700 Subject: [PATCH] [RLlib] Fix apex breakout release test performance. (#26867) --- release/release_tests.yaml | 2 +- release/rllib_tests/1gpu_24cpus.yaml | 21 +++++++++++++++++++ .../apex/apex-breakoutnoframeskip-v4.yaml | 8 +++---- 3 files changed, 26 insertions(+), 5 deletions(-) create mode 100644 release/rllib_tests/1gpu_24cpus.yaml diff --git a/release/release_tests.yaml b/release/release_tests.yaml index 4c9c002b9..7fa4da8b1 100644 --- a/release/release_tests.yaml +++ b/release/release_tests.yaml @@ -2689,7 +2689,7 @@ cluster: cluster_env: app_config.yaml - cluster_compute: 2gpus_32cpus.yaml + cluster_compute: 1gpus_24cpus.yaml run: timeout: 18000 diff --git a/release/rllib_tests/1gpu_24cpus.yaml b/release/rllib_tests/1gpu_24cpus.yaml new file mode 100644 index 000000000..9da227f46 --- /dev/null +++ b/release/rllib_tests/1gpu_24cpus.yaml @@ -0,0 +1,21 @@ +cloud_id: {{env["ANYSCALE_CLOUD_ID"]}} +region: us-west-2 + +max_workers: 0 + +head_node_type: + name: head_node + instance_type: g3.4xlarge + +worker_node_types: + - name: worker_node + instance_type: m5.2xlarge + min_workers: 1 + max_workers: 1 + use_spot: false + +aws: + BlockDeviceMappings: + - DeviceName: /dev/sda1 + Ebs: + VolumeSize: 500 diff --git a/release/rllib_tests/learning_tests/yaml_files/apex/apex-breakoutnoframeskip-v4.yaml b/release/rllib_tests/learning_tests/yaml_files/apex/apex-breakoutnoframeskip-v4.yaml index 4e4d9580e..3216a4b02 100644 --- a/release/rllib_tests/learning_tests/yaml_files/apex/apex-breakoutnoframeskip-v4.yaml +++ b/release/rllib_tests/learning_tests/yaml_files/apex/apex-breakoutnoframeskip-v4.yaml @@ -4,8 +4,7 @@ apex-breakoutnoframeskip-v4: # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: episode_reward_mean: 100.0 - timesteps_total: 7000000 - stop: + timesteps_total: 12000000 time_total_s: 7200 config: double_q: false @@ -13,7 +12,8 @@ apex-breakoutnoframeskip-v4: num_atoms: 1 noisy: false n_step: 3 - lr: .0001 + lr: 0.001 + grad_clip: 40.0 adam_epsilon: .00015 hiddens: [512] replay_buffer_config: @@ -23,7 +23,7 @@ apex-breakoutnoframeskip-v4: epsilon_timesteps: 200000 final_epsilon: 0.01 num_gpus: 1 - num_workers: 8 + num_workers: 16 num_envs_per_worker: 8 rollout_fragment_length: 20 train_batch_size: 512