2022-06-08 02:42:02 -07:00
|
|
|
cartpole_crr:
|
|
|
|
env: 'CartPole-v0'
|
|
|
|
run: CRR
|
|
|
|
stop:
|
|
|
|
evaluation/episode_reward_mean: 200
|
|
|
|
training_iteration: 100
|
|
|
|
config:
|
|
|
|
input:
|
|
|
|
- 'tests/data/cartpole/large.json'
|
|
|
|
framework: torch
|
|
|
|
gamma: 0.99
|
|
|
|
train_batch_size: 2048
|
|
|
|
critic_hidden_activation: 'tanh'
|
|
|
|
critic_hiddens: [128, 128, 128]
|
|
|
|
critic_lr: 0.0003
|
|
|
|
actor_hidden_activation: 'tanh'
|
|
|
|
actor_hiddens: [128, 128, 128]
|
|
|
|
actor_lr: 0.0003
|
|
|
|
actions_in_input_normalized: True
|
|
|
|
clip_actions: True
|
|
|
|
# Q function update setting
|
|
|
|
twin_q: True
|
|
|
|
target_update_grad_intervals: 1
|
|
|
|
tau: 0.0005
|
|
|
|
# evaluation
|
|
|
|
evaluation_config:
|
|
|
|
explore: False
|
|
|
|
input: sampler
|
|
|
|
evaluation_duration: 10
|
|
|
|
evaluation_duration_unit: episodes
|
|
|
|
evaluation_interval: 1
|
|
|
|
evaluation_num_workers: 1
|
|
|
|
evaluation_parallel_to_training: True
|
|
|
|
# replay buffer
|
|
|
|
replay_buffer_config:
|
|
|
|
type: ray.rllib.utils.replay_buffers.MultiAgentReplayBuffer
|
|
|
|
learning_starts: 0
|
|
|
|
capacity: 100000
|
|
|
|
# specific to CRR
|
2022-06-28 06:40:09 -07:00
|
|
|
temperature: 1.0
|
2022-06-08 02:42:02 -07:00
|
|
|
weight_type: bin
|
2022-06-28 06:40:09 -07:00
|
|
|
advantage_type: mean
|
2022-06-08 02:42:02 -07:00
|
|
|
max_weight: 20.0
|
|
|
|
n_action_sample: 4
|