ray/rllib/tuned_examples/ppo/cartpole-ppo-hyperband.yaml

cartpole-ppo:
    env: CartPole-v0
    run: PPO
    num_samples: 3
    stop:
        episode_reward_mean: 200
        time_total_s: 180
    config:
        # Works for both torch and tf.
        framework: tf
        num_workers: 1
        num_sgd_iter:
            grid_search: [1, 4]
        sgd_minibatch_size:
            grid_search: [128, 256, 512]
        observation_filter: MeanStdFilter