ray/rllib/tuned_examples/halfcheetah-appo.yaml

36 lines
950 B
YAML
Raw Normal View History

# This can reach 9k reward in 2 hours on a Titan XP GPU
# with 16 workers and 8 envs per worker.
halfcheetah-appo:
env: HalfCheetah-v2
run: APPO
stop:
time_total_s: 10800
config:
vtrace: True
gamma: 0.99
lambda: 0.95
rollout_fragment_length: 512
train_batch_size: 4096
num_workers: 16
num_gpus: 1
broadcast_interval: 1
max_sample_requests_in_flight_per_worker: 1
num_data_loader_buffers: 1
num_envs_per_worker: 32
minibatch_buffer_size: 16
num_sgd_iter: 32
clip_param: 0.2
lr_schedule: [
[0, 0.0005],
[150000000, 0.000001],
]
vf_loss_coeff: 0.5
entropy_coeff: 0.01
grad_clip: 0.5
batch_mode: truncate_episodes
use_kl_loss: True
kl_coeff: 1.0
kl_target: 0.04
observation_filter: MeanStdFilter