ray/rllib/tuned_examples/appo/cartpole-appo-vtrace-separate-losses.yaml

29 lines
949 B
YAML

cartpole-appo-vtrace-separate-losses:
env: CartPole-v0
run: APPO
stop:
episode_reward_mean: 150
timesteps_total: 200000
config:
# Only works for tf|tf2 so far.
framework: tf
# Switch on >1 loss/optimizer API for TFPolicy and EagerTFPolicy.
_tf_policy_handles_more_than_one_loss: true
# APPO will produce two separate loss terms:
# policy loss + value function loss.
_separate_vf_optimizer: true
# Separate learning rate for the value function branch.
_lr_vf: 0.00075
num_envs_per_worker: 5
num_workers: 1
num_gpus: 0
observation_filter: MeanStdFilter
num_sgd_iter: 6
vf_loss_coeff: 0.01
vtrace: true
model:
fcnet_hiddens: [32]
fcnet_activation: linear
# Make sure we really have completely separate branches.
vf_share_layers: false