mirror of
https://github.com/vale981/ray
synced 2025-03-10 05:16:49 -04:00
40 lines
1.2 KiB
YAML
40 lines
1.2 KiB
YAML
# Given a SAC-generated offline file generated via:
|
|
# rllib train -f tuned_examples/sac/pendulum-sac.yaml --no-ray-ui
|
|
|
|
# Pendulum CQL can attain ~ -300 reward in 10k from that file.
|
|
pendulum-cql:
|
|
env: Pendulum-v1
|
|
run: CQL
|
|
stop:
|
|
evaluation/episode_reward_mean: -700
|
|
timesteps_total: 800000
|
|
config:
|
|
# Works for both torch and tf.
|
|
framework: tf
|
|
|
|
# Use one or more offline files or "input: sampler" for online learning.
|
|
input: 'dataset'
|
|
input_config:
|
|
paths: ["tests/data/pendulum/enormous.zip"]
|
|
format: 'json'
|
|
# Our input file above comes from an SAC run. Actions in there
|
|
# are already normalized (produced by SquashedGaussian).
|
|
actions_in_input_normalized: true
|
|
clip_actions: true
|
|
|
|
twin_q: true
|
|
train_batch_size: 2000
|
|
bc_iters: 100
|
|
num_workers: 2
|
|
min_time_s_per_iteration: 10
|
|
|
|
metrics_smoothing_episodes: 5
|
|
|
|
# Evaluate in an actual environment.
|
|
evaluation_interval: 1
|
|
evaluation_num_workers: 2
|
|
evaluation_duration: 10
|
|
evaluation_parallel_to_training: true
|
|
evaluation_config:
|
|
input: sampler
|
|
explore: False
|