mirror of
https://github.com/vale981/ray
synced 2025-03-10 05:16:49 -04:00
28 lines
869 B
YAML
28 lines
869 B
YAML
![]() |
cartpole-sparse-rewards-alpha-zero:
|
||
|
env: ray.rllib.examples.env.cartpole_sparse_rewards.CartPoleSparseRewards
|
||
|
run: AlphaZero
|
||
|
stop:
|
||
|
episode_reward_mean: 30.0
|
||
|
timesteps_total: 100000
|
||
|
config:
|
||
|
# Only supported for torch right now.
|
||
|
framework: torch
|
||
|
num_workers: 6
|
||
|
rollout_fragment_length: 50
|
||
|
train_batch_size: 500
|
||
|
sgd_minibatch_size: 64
|
||
|
lr: 0.0001
|
||
|
num_sgd_iter: 1
|
||
|
mcts_config:
|
||
|
puct_coefficient: 1.5
|
||
|
num_simulations: 100
|
||
|
temperature: 1.0
|
||
|
dirichlet_epsilon: 0.20
|
||
|
dirichlet_noise: 0.03
|
||
|
argmax_tree_policy: false
|
||
|
add_dirichlet_noise: true
|
||
|
ranked_rewards:
|
||
|
enable: true
|
||
|
model:
|
||
|
custom_model: ray.rllib.algorithms.alpha_zero.models.custom_torch_models.DenseModel
|