ray/release/rllib_tests/performance_tests/performance_tests.yaml
gjoliver d8a61f801f
[RLlib] Create a set of performance benchmark tests to run nightly. (#19945)
* Create a core set of algorithms tests to run nightly.

* Run release tests under tf, tf2, and torch frameworks.

* Fix

* Add eager_tracing option for tf2 framework.

* make sure core tests can run in parallel.

* cql

* Report progress while running nightly/weekly tests.

* Innclude SAC in nightly lineup.

* Revert changes to learning_tests

* rebrand to performance test.

* update build_pipeline.py with new performance_tests name.

* Record stats.

* bug fix, need to populate experiments dict.

* Alphabetize yaml files.

* Allow specifying frameworks. And do not run tf2 by default.

* remove some debugging code.

* fix

* Undo testing changes.

* Do not run CQL regression for now.

* LINT.

Co-authored-by: sven1977 <svenmika1977@gmail.com>
2021-11-08 18:15:13 +01:00

136 lines
3.7 KiB
YAML

apex-breakoutnoframeskip-v4:
env: BreakoutNoFrameskip-v4
run: APEX
frameworks: [ "tf", "tf2", "torch" ]
stop:
time_total_s: 3600
config:
double_q: false
dueling: false
num_atoms: 1
noisy: false
n_step: 3
lr: .0001
adam_epsilon: .00015
hiddens: [512]
buffer_size: 1000000
exploration_config:
epsilon_timesteps: 200000
final_epsilon: 0.01
prioritized_replay_alpha: 0.5
final_prioritized_replay_beta: 1.0
prioritized_replay_beta_annealing_timesteps: 2000000
num_gpus: 1
num_workers: 8
num_envs_per_worker: 8
rollout_fragment_length: 20
train_batch_size: 512
target_network_update_freq: 50000
timesteps_per_iteration: 25000
appo-pongnoframeskip-v4:
env: PongNoFrameskip-v4
run: APPO
frameworks: [ "tf", "tf2", "torch" ]
stop:
time_total_s: 2000
config:
vtrace: True
use_kl_loss: False
rollout_fragment_length: 50
train_batch_size: 750
num_workers: 31
broadcast_interval: 1
max_sample_requests_in_flight_per_worker: 1
num_multi_gpu_tower_stacks: 1
num_envs_per_worker: 8
num_sgd_iter: 2
vf_loss_coeff: 1.0
clip_param: 0.3
num_gpus: 1
grad_clip: 10
model:
dim: 42
# Bring cql test back after we make sure it learns.
#cql-halfcheetahbulletenv-v0:
# env: HalfCheetahBulletEnv-v0
# run: CQL
# frameworks: [ "tf", "tf2", "torch" ]
# stop:
# time_total_s: 1800
# config:
# # Use input produced by expert SAC algo.
# input: ["~/halfcheetah_expert_sac.zip"]
# actions_in_input_normalized: true
#
# soft_horizon: False
# horizon: 1000
# Q_model:
# fcnet_activation: relu
# fcnet_hiddens: [256, 256, 256]
# policy_model:
# fcnet_activation: relu
# fcnet_hiddens: [256, 256, 256]
# tau: 0.005
# target_entropy: auto
# no_done_at_end: false
# n_step: 3
# rollout_fragment_length: 1
# prioritized_replay: false
# train_batch_size: 256
# target_network_update_freq: 0
# timesteps_per_iteration: 1000
# learning_starts: 256
# optimization:
# actor_learning_rate: 0.0001
# critic_learning_rate: 0.0003
# entropy_learning_rate: 0.0001
# num_workers: 0
# num_gpus: 1
# metrics_smoothing_episodes: 5
#
# # CQL Configs
# min_q_weight: 5.0
# bc_iters: 20000
# temperature: 1.0
# num_actions: 10
# lagrangian: False
#
# # Switch on online evaluation.
# evaluation_interval: 3
# evaluation_config:
# input: sampler
sac-halfcheetahbulletenv-v0:
env: HalfCheetahBulletEnv-v0
run: SAC
frameworks: [ "tf", "tf2", "torch" ]
stop:
time_total_s: 3600
config:
horizon: 1000
soft_horizon: false
Q_model:
fcnet_activation: relu
fcnet_hiddens: [256, 256]
policy_model:
fcnet_activation: relu
fcnet_hiddens: [256, 256]
tau: 0.005
target_entropy: auto
no_done_at_end: false
n_step: 3
rollout_fragment_length: 1
prioritized_replay: true
train_batch_size: 256
target_network_update_freq: 1
timesteps_per_iteration: 1000
learning_starts: 10000
optimization:
actor_learning_rate: 0.0003
critic_learning_rate: 0.0003
entropy_learning_rate: 0.0003
num_workers: 0
num_gpus: 1
metrics_smoothing_episodes: 5