ray/rllib/tuned_examples/ppo/cartpole-appo-vtrace.yaml

cartpole-appo-vtrace:
    env: CartPole-v0
    run: APPO
    stop:
        episode_reward_mean: 150
        timesteps_total: 200000
    config:
        # Works for both torch and tf.
        framework: tf
        rollout_fragment_length: 10
        train_batch_size: 10
        num_envs_per_worker: 5
        num_workers: 1
        num_gpus: 0
        vtrace: true