halfcheetah-mb-mpo: env: ray.rllib.examples.env.halfcheetah.HalfCheetahWrapper run: MBMPO stop: training_iteration: 500 config: # Only supported in torch right now framework: torch horizon: 200 num_envs_per_worker: 20 inner_adaptation_steps: 1 maml_optimizer_steps: 8 gamma: 0.99 lambda: 1.0 lr: 0.001 clip_param: 0.5 kl_target: 0.01 kl_coeff: 0.0000000001 num_workers: 20 num_gpus: 1 inner_lr: 0.001 clip_actions: False model: fcnet_hiddens: [32, 32] free_log_std: True