ray/rllib/tuned_examples/cql/pendulum-cql.yaml
gjoliver e7f9e8ceec
[RLlib] Report total_train_steps correctly for offline agents like CQL. (#20541)
* Fix trainer timestep reporting for offline agents like CQL.

* wip.

* extend timesteps_total to 200K for learning_tests_pendulum_cql test

Co-authored-by: sven1977 <svenmika1977@gmail.com>
2021-11-22 21:46:45 +01:00

36 lines
1.1 KiB
YAML

# Given a SAC-generated offline file generated via:
# rllib train -f tuned_examples/sac/pendulum-sac.yaml --no-ray-ui
# Pendulum CQL can attain ~ -300 reward in 10k from that file.
pendulum-cql:
env: Pendulum-v1
run: CQL
stop:
evaluation/episode_reward_mean: -700
timesteps_total: 200000
config:
# Works for both torch and tf.
framework: tf
# Use one or more offline files or "input: sampler" for online learning.
input: ["tests/data/pendulum/enormous.zip"]
# Our input file above comes from an SAC run. Actions in there
# are already normalized (produced by SquashedGaussian).
actions_in_input_normalized: true
clip_actions: true
twin_q: true
train_batch_size: 2000
learning_starts: 0
bc_iters: 100
metrics_smoothing_episodes: 5
# Evaluate in an actual environment.
evaluation_interval: 1
evaluation_num_workers: 2
evaluation_num_episodes: 10
evaluation_parallel_to_training: true
evaluation_config:
input: sampler
explore: False