ray/rllib/tuned_examples/ppo/pendulum-ddppo.yaml

20 lines
498 B
YAML

pendulum-ddppo:
env: Pendulum-v1
run: DDPPO
stop:
episode_reward_mean: -300
timesteps_total: 1500000
config:
framework: torch
rollout_fragment_length: 250
num_gpus_per_worker: 0
num_workers: 4
num_envs_per_worker: 10
observation_filter: MeanStdFilter
gamma: 0.95
sgd_minibatch_size: 50
num_sgd_iter: 5
clip_param: 0.3
vf_clip_param: 10.0
lambda: 0.1
lr: 0.00015