ray/rllib/tuned_examples/crr/cartpole-v0-crr_expectation.yaml

44 lines
1.3 KiB
YAML

cartpole_crr:
env: 'CartPole-v0'
run: CRR
stop:
evaluation/episode_reward_mean: 200
training_iteration: 100
config:
input:
- 'tests/data/cartpole/large.json'
framework: torch
gamma: 0.99
train_batch_size: 2048
critic_hidden_activation: 'tanh'
critic_hiddens: [128, 128, 128]
critic_lr: 0.0003
actor_hidden_activation: 'tanh'
actor_hiddens: [128, 128, 128]
actor_lr: 0.0003
actions_in_input_normalized: True
clip_actions: True
# Q function update setting
twin_q: True
target_update_grad_intervals: 1
tau: 0.0005
# evaluation
evaluation_config:
explore: False
input: sampler
evaluation_duration: 10
evaluation_duration_unit: episodes
evaluation_interval: 1
evaluation_num_workers: 1
evaluation_parallel_to_training: True
# replay buffer
replay_buffer_config:
type: ray.rllib.utils.replay_buffers.MultiAgentReplayBuffer
learning_starts: 0
capacity: 100000
# specific to CRR
temperature: 1.0
weight_type: bin
advantage_type: expectation
max_weight: 20.0
n_action_sample: 4