ray/rllib/tuned_examples/halfcheetah-appo.yaml

# This can reach 9k reward in 2 hours on a Titan XP GPU 
# with 16 workers and 8 envs per worker.
halfcheetah-appo:
    env: HalfCheetah-v2
    run: APPO
    stop:
        time_total_s: 10800   
    config:
        vtrace: True
        gamma: 0.99
        lambda: 0.95
        sample_batch_size: 512
        train_batch_size: 4096
        num_workers: 16
        num_gpus: 1
        broadcast_interval: 1
        max_sample_requests_in_flight_per_worker: 1
        num_data_loader_buffers: 1
        num_envs_per_worker: 32
        minibatch_buffer_size: 16
        num_sgd_iter: 32
        clip_param: 0.2
        lr_schedule: [
            [0, 0.0005],
            [150000000, 0.000001],
        ]
        vf_loss_coeff: 0.5
        entropy_coeff: 0.01
        grad_clip: 0.5
        batch_mode: truncate_episodes
        use_kl_loss: True
        kl_coeff: 1.0
        kl_target: 0.04             
        observation_filter: MeanStdFilter
[rllib] Importance Sampling and KL Loss for APPO (#5051) 2019-07-29 15:02:32 -07:00			`# This can reach 9k reward in 2 hours on a Titan XP GPU`
			`# with 16 workers and 8 envs per worker.`
			`halfcheetah-appo:`
			`env: HalfCheetah-v2`
			`run: APPO`
			`stop:`
			`time_total_s: 10800`
			`config:`
			`vtrace: True`
			`gamma: 0.99`
			`lambda: 0.95`
			`sample_batch_size: 512`
			`train_batch_size: 4096`
			`num_workers: 16`
			`num_gpus: 1`
			`broadcast_interval: 1`
			`max_sample_requests_in_flight_per_worker: 1`
			`num_data_loader_buffers: 1`
			`num_envs_per_worker: 32`
			`minibatch_buffer_size: 16`
			`num_sgd_iter: 32`
			`clip_param: 0.2`
			`lr_schedule: [`
			`[0, 0.0005],`
			`[150000000, 0.000001],`
			`]`
			`vf_loss_coeff: 0.5`
			`entropy_coeff: 0.01`
			`grad_clip: 0.5`
			`batch_mode: truncate_episodes`
			`use_kl_loss: True`
			`kl_coeff: 1.0`
			`kl_target: 0.04`
			`observation_filter: MeanStdFilter`