ray/rllib/tuned_examples/regression_tests/pendulum-ppo-tf.yaml

22 lines
526 B
YAML
Raw Normal View History

pendulum-ppo-tf:
env: Pendulum-v0
run: PPO
stop:
episode_reward_mean: -500
timesteps_total: 400000
config:
use_pytorch: false
train_batch_size: 2048
vf_clip_param: 10.0
num_workers: 0
num_envs_per_worker: 10
lambda: 0.1
gamma: 0.95
lr: 0.0003
sgd_minibatch_size: 64
num_sgd_iter: 10
model:
fcnet_hiddens: [64, 64]
batch_mode: complete_episodes
observation_filter: MeanStdFilter