ray/rllib/tuned_examples/halfcheetah-appo.yaml
Eric Liang dd70720578
[rllib] Rename sample_batch_size => rollout_fragment_length (#7503)
* bulk rename

* deprecation warn

* update doc

* update fig

* line length

* rename

* make pytest comptaible

* fix test

* fi sys

* rename

* wip

* fix more

* lint

* update svg

* comments

* lint

* fix use of batch steps
2020-03-14 12:05:04 -07:00

35 lines
950 B
YAML

# This can reach 9k reward in 2 hours on a Titan XP GPU
# with 16 workers and 8 envs per worker.
halfcheetah-appo:
env: HalfCheetah-v2
run: APPO
stop:
time_total_s: 10800
config:
vtrace: True
gamma: 0.99
lambda: 0.95
rollout_fragment_length: 512
train_batch_size: 4096
num_workers: 16
num_gpus: 1
broadcast_interval: 1
max_sample_requests_in_flight_per_worker: 1
num_data_loader_buffers: 1
num_envs_per_worker: 32
minibatch_buffer_size: 16
num_sgd_iter: 32
clip_param: 0.2
lr_schedule: [
[0, 0.0005],
[150000000, 0.000001],
]
vf_loss_coeff: 0.5
entropy_coeff: 0.01
grad_clip: 0.5
batch_mode: truncate_episodes
use_kl_loss: True
kl_coeff: 1.0
kl_target: 0.04
observation_filter: MeanStdFilter