ray/rllib/tuned_examples/alpha_zero/cartpole-sparse-rewards-alpha-zero.yaml

27 lines
869 B
YAML

cartpole-sparse-rewards-alpha-zero:
env: ray.rllib.examples.env.cartpole_sparse_rewards.CartPoleSparseRewards
run: AlphaZero
stop:
episode_reward_mean: 30.0
timesteps_total: 100000
config:
# Only supported for torch right now.
framework: torch
num_workers: 6
rollout_fragment_length: 50
train_batch_size: 500
sgd_minibatch_size: 64
lr: 0.0001
num_sgd_iter: 1
mcts_config:
puct_coefficient: 1.5
num_simulations: 100
temperature: 1.0
dirichlet_epsilon: 0.20
dirichlet_noise: 0.03
argmax_tree_policy: false
add_dirichlet_noise: true
ranked_rewards:
enable: true
model:
custom_model: ray.rllib.algorithms.alpha_zero.models.custom_torch_models.DenseModel