two-step-game-maddpg: env: ray.rllib.examples.env.two_step_game.TwoStepGame run: MADDPG stop: episode_reward_mean: 7.2 timesteps_total: 20000 config: # MADDPG only supports tf for now. framework: tf env_config: env_config: actions_are_logits: true num_steps_sampled_before_learning_starts: 200 multiagent: policies: p0: - null - null - null - { agent_id: 0 } p1: - null - null - null - { agent_id: 1 } # YAML-capable policy_mapping_fn definition via providing a callable class here. policy_mapping_fn: type: ray.rllib.examples.multi_agent_and_self_play.policy_mapping_fn.PolicyMappingFn