ray/rllib/tuned_examples/regression_tests/cartpole-ppo-tf.yaml

17 lines
404 B
YAML

cartpole-ppo-tf:
env: CartPole-v0
run: PPO
stop:
episode_reward_mean: 150
timesteps_total: 100000
config:
gamma: 0.99
lr: 0.0003
num_workers: 1
observation_filter: MeanStdFilter
num_sgd_iter: 6
vf_share_layers: true
vf_loss_coeff: 0.01
model:
fcnet_hiddens: [32]
fcnet_activation: linear