ray/rllib/tuned_examples/crr/pendulum-v1-crr.yaml

44 lines
1.3 KiB
YAML

pendulum_crr:
env: 'Pendulum-v1'
run: CRR
stop:
# We could make this -200, but given that we have 4 cpus for our tests, we will have to settle for -300.
evaluation/episode_reward_mean: -300
timesteps_total: 2000000
config:
input: 'dataset'
input_config:
paths: 'tests/data/pendulum/pendulum_replay_v1.1.0.zip'
format: 'json'
framework: torch
num_workers: 3
gamma: 0.99
train_batch_size: 1024
critic_hidden_activation: 'relu'
critic_hiddens: [256, 256]
critic_lr: 0.0003
actor_hidden_activation: 'relu'
actor_hiddens: [256, 256]
actor_lr: 0.0003
actions_in_input_normalized: False
clip_actions: True
# Q function update setting
twin_q: True
target_network_update_freq: 1
tau: 0.0001
# evaluation
evaluation_config:
explore: False
input: sampler
evaluation_duration: 10
evaluation_duration_unit: episodes
evaluation_interval: 1
evaluation_num_workers: 1
evaluation_parallel_to_training: True
# replay buffer
# specific to CRR
temperature: 1.0
weight_type: exp
advantage_type: max
max_weight: 20.0
n_action_sample: 4