# This can reach 9k reward in 2 hours on a Titan XP GPU # with 16 workers and 8 envs per worker. halfcheetah-appo: env: HalfCheetah-v2 run: APPO stop: time_total_s: 10800 config: # Works for both torch and tf. framework: tf vtrace: True gamma: 0.99 lambda: 0.95 rollout_fragment_length: 512 train_batch_size: 4096 num_workers: 16 num_gpus: 1 broadcast_interval: 1 max_sample_requests_in_flight_per_worker: 1 num_multi_gpu_tower_stacks: 1 num_envs_per_worker: 32 minibatch_buffer_size: 16 num_sgd_iter: 32 clip_param: 0.2 lr_schedule: [ [0, 0.0005], [150000000, 0.000001], ] vf_loss_coeff: 0.5 entropy_coeff: 0.01 grad_clip: 0.5 batch_mode: truncate_episodes use_kl_loss: True kl_coeff: 1.0 kl_target: 0.04 observation_filter: MeanStdFilter