ray/rllib/tuned_examples/regression_tests/pendulum-appo-vtrace.yaml

20 lines
516 B
YAML

pendulum-appo-vt:
env: Pendulum-v0
run: APPO
stop:
episode_reward_mean: -1200 # just check it learns a bit
timesteps_total: 500000
config:
vtrace: False
num_gpus: 0
num_workers: 1
lambda: 0.1
gamma: 0.95
lr: 0.0003
train_batch_size: 100
minibatch_buffer_size: 16
num_sgd_iter: 10
model:
fcnet_hiddens: [64, 64]
batch_mode: complete_episodes
observation_filter: MeanStdFilter