ray/rllib/tuned_examples/cql/hopper-bc.yaml

54 lines
1.6 KiB
YAML
Raw Normal View History

hopper_bc:
env:
grid_search:
- ray.rllib.examples.env.d4rl_env.hopper_random
#- ray.rllib.examples.env.d4rl_env.hopper_medium
#- ray.rllib.examples.env.d4rl_env.hopper_expert
#- ray.rllib.examples.env.d4rl_env.hopper_medium_replay
run: CQL
config:
# SAC Configs
input: d4rl.hopper-random-v0
#input: d4rl.hopper-medium-v0
#input: d4rl.hopper-expert-v0
#input: d4rl.hopper-medium-replay-v0
framework: torch
soft_horizon: False
horizon: 1000
Q_model:
fcnet_activation: relu
fcnet_hiddens: [256, 256, 256]
policy_model:
fcnet_activation: relu
fcnet_hiddens: [256, 256, 256]
tau: 0.005
target_entropy: auto
no_done_at_end: false
n_step: 1
rollout_fragment_length: 1
replay_buffer_config:
type: MultiAgentReplayBuffer
learning_starts: 10
train_batch_size: 256
target_network_update_freq: 0
min_train_timesteps_per_reporting: 1000
optimization:
actor_learning_rate: 0.0001
critic_learning_rate: 0.0003
entropy_learning_rate: 0.0001
num_workers: 0
num_gpus: 1
clip_actions: false
normalize_actions: true
evaluation_interval: 1
metrics_smoothing_episodes: 5
# CQL Configs
min_q_weight: 5.0
bc_iters: 200000000
temperature: 1.0
num_actions: 10
lagrangian: False
evaluation_config:
input: sampler