ray/rllib/tuned_examples/ppo/cartpole-ppo-hyperband.yaml

16 lines
397 B
YAML

cartpole-ppo:
env: CartPole-v0
run: PPO
num_samples: 3
stop:
episode_reward_mean: 200
time_total_s: 180
config:
# Works for both torch and tf.
framework: tf
num_workers: 1
num_sgd_iter:
grid_search: [1, 4]
sgd_minibatch_size:
grid_search: [128, 256, 512]
observation_filter: MeanStdFilter