ray/rllib/tuned_examples/ppo/cartpole-ppo.yaml

19 lines
470 B
YAML

cartpole-ppo:
env: CartPole-v0
run: PPO
stop:
episode_reward_mean: 150
timesteps_total: 100000
config:
# Works for both torch and tf.
framework: tf
gamma: 0.99
lr: 0.0003
num_workers: 1
observation_filter: MeanStdFilter
num_sgd_iter: 6
vf_loss_coeff: 0.01
model:
fcnet_hiddens: [32]
fcnet_activation: linear
vf_share_layers: true