mirror of
https://github.com/vale981/ray
synced 2025-03-08 11:31:40 -05:00

* Unpin gym and deprecate pendulum v0 Many tests in rllib depended on pendulum v0, however in gym 0.21, pendulum v0 was deprecated in favor of pendulum v1. This may change reward thresholds, so will have to potentially rerun all of the pendulum v1 benchmarks, or use another environment in favor. The same applies to frozen lake v0 and frozen lake v1 Lastly, all of the RLlib tests and Tune tests have been moved to python 3.7 * fix tune test_sampler::testSampleBoundsAx * fix re-install ray for py3.7 tests Co-authored-by: avnishn <avnishn@uw.edu>
29 lines
949 B
YAML
29 lines
949 B
YAML
cartpole-appo-vtrace-separate-losses:
|
|
env: CartPole-v0
|
|
run: APPO
|
|
stop:
|
|
episode_reward_mean: 150
|
|
timesteps_total: 200000
|
|
config:
|
|
# Only works for tf|tf2 so far.
|
|
framework: tf
|
|
# Switch on >1 loss/optimizer API for TFPolicy and EagerTFPolicy.
|
|
_tf_policy_handles_more_than_one_loss: true
|
|
# APPO will produce two separate loss terms:
|
|
# policy loss + value function loss.
|
|
_separate_vf_optimizer: true
|
|
# Separate learning rate for the value function branch.
|
|
_lr_vf: 0.00075
|
|
|
|
num_envs_per_worker: 5
|
|
num_workers: 1
|
|
num_gpus: 0
|
|
observation_filter: MeanStdFilter
|
|
num_sgd_iter: 6
|
|
vf_loss_coeff: 0.01
|
|
vtrace: true
|
|
model:
|
|
fcnet_hiddens: [32]
|
|
fcnet_activation: linear
|
|
# Make sure we really have completely separate branches.
|
|
vf_share_layers: false
|