ray/rllib/tuned_examples/crr/cartpole-v0-crr_expectation.yaml
2022-07-20 12:08:57 -07:00

42 lines
1.2 KiB
YAML

cartpole_crr:
env: 'CartPole-v0'
run: CRR
stop:
evaluation/episode_reward_mean: 200
training_iteration: 100
config:
input: 'dataset'
input_config:
paths: 'tests/data/cartpole/large.json'
format: 'json'
framework: torch
num_workers: 3
gamma: 0.99
train_batch_size: 2048
critic_hidden_activation: 'tanh'
critic_hiddens: [128, 128, 128]
critic_lr: 0.0003
actor_hidden_activation: 'tanh'
actor_hiddens: [128, 128, 128]
actor_lr: 0.0003
actions_in_input_normalized: True
clip_actions: True
# Q function update setting
twin_q: True
target_network_update_freq: 1
tau: 0.0005
# evaluation
evaluation_config:
explore: False
input: sampler
evaluation_duration: 10
evaluation_duration_unit: episodes
evaluation_interval: 1
evaluation_num_workers: 1
evaluation_parallel_to_training: True
# specific to CRR
temperature: 1.0
weight_type: exp
advantage_type: expectation
max_weight: 20.0
n_action_sample: 4