multi-agent-cartpole-impala: env: ray.rllib.examples.env.multi_agent.MultiAgentCartPole run: IMPALA stop: episode_reward_mean: 600 # 600 / 4 (==num_agents) = 150 timesteps_total: 200000 config: # Works for both torch and tf. framework: tf # 4-agent MA cartpole. env_config: config: num_agents: 4 num_envs_per_worker: 5 num_workers: 4 num_gpus: 1 _fake_gpus: true observation_filter: MeanStdFilter num_sgd_iter: 1 vf_loss_coeff: 0.005 vtrace: true vtrace_drop_last_ts: false model: fcnet_hiddens: [32] fcnet_activation: linear vf_share_layers: true replay_ratio: 0.0 multiagent: policies: ["p0", "p1", "p2", "p3"] # YAML-capable policy_mapping_fn definition via providing a callable class here. policy_mapping_fn: type: ray.rllib.examples.multi_agent_and_self_play.policy_mapping_fn.PolicyMappingFn