ray/release/rllib_tests/learning_tests/yaml_files/cql-halfcheetahbulletenv-v0.yaml

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

57 lines
1.6 KiB
YAML
Raw Normal View History

cql-halfcheetahbulletenv-v0:
env: HalfCheetahBulletEnv-v0
run: CQL
pass_criteria:
evaluation/episode_reward_mean: 400.0
timesteps_total: 10000000
stop:
time_total_s: 3600
config:
# Use input produced by expert SAC algo.
input: ["~/halfcheetah_expert_sac.zip"]
actions_in_input_normalized: true
soft_horizon: False
horizon: 1000
q_model_config:
fcnet_activation: relu
fcnet_hiddens: [256, 256, 256]
policy_model_config:
fcnet_activation: relu
fcnet_hiddens: [256, 256, 256]
tau: 0.005
target_entropy: auto
no_done_at_end: false
n_step: 3
rollout_fragment_length: 1
replay_buffer_config:
type: MultiAgentReplayBuffer
learning_starts: 256
train_batch_size: 256
target_network_update_freq: 0
min_train_timesteps_per_reporting: 1000
optimization:
actor_learning_rate: 0.0001
critic_learning_rate: 0.0003
entropy_learning_rate: 0.0001
num_workers: 0
num_gpus: 1
metrics_smoothing_episodes: 5
# CQL Configs
min_q_weight: 5.0
bc_iters: 20000
temperature: 1.0
num_actions: 10
lagrangian: False
# Switch on online evaluation.
evaluation_interval: 3
evaluation_parallel_to_training: true
evaluation_num_workers: 1
evaluation_duration: 10
evaluation_duration_unit: episodes
evaluation_config:
input: sampler
always_attach_evaluation_results: True