ray/rllib/tuned_examples/alpha_zero/cartpole-sparse-rewards-alpha-zero.yaml

cartpole-sparse-rewards-alpha-zero:
    env: ray.rllib.examples.env.cartpole_sparse_rewards.CartPoleSparseRewards
    run: AlphaZero
    stop:
        episode_reward_mean: 30.0
        timesteps_total: 100000
    config:
        # Only supported for torch right now.
        framework: torch
        num_workers: 6
        rollout_fragment_length: 50
        train_batch_size: 500
        sgd_minibatch_size: 64
        lr: 0.0001
        num_sgd_iter: 1
        mcts_config:
           puct_coefficient: 1.5
           num_simulations: 100
           temperature: 1.0
           dirichlet_epsilon: 0.20
           dirichlet_noise: 0.03
           argmax_tree_policy: false
           add_dirichlet_noise: true
        ranked_rewards:
           enable: true
        model:
           custom_model: ray.rllib.algorithms.alpha_zero.models.custom_torch_models.DenseModel