# NOTE: This example will not run w/o a proper config.multiagent setup, # which currently cannot be done in yaml. # This setup should learn a decent (not perfect) policy within 100k timesteps # on a single GPU machine (16 CPUS) using 10 workers (collecting data from # 10 compiled game binaries in parallel). # Reported rewards will be the sum of both strikers (+1 if goal) plus the # goalie's reward (-1 if goal) across all within-scene parallelized playing # fields (8 fields with each 2 strikers + 1 goalie, for the soccer env). unity3d-soccer-strikers-vs-goalie-ppo: env: ray.rllib.env.wrappers.unity3d_env.Unity3DEnv run: PPO stop: timesteps_total: 1000000 config: # NOTE: This example will not run w/o the following multiagent setup: # Multi-agent setup for SoccerStrikersVsGoalie Unity3D Env. # multiagent: # policies: [policies list] # policy_mapping_fn: [agent-to-policy mapping function] # Works for both torch and tf. framework: tf env_config: # Put the path to your compiled game executable here. file_name: /home/ubuntu/soccer_strikers_vs_goalie_linux.x86_64 # Timesteps after which a hard-reset will happen (all agents). episode_horizon: 3000 lr: 0.0003 lambda: 0.95 gamma: 0.99 sgd_minibatch_size: 256 train_batch_size: 4000 clip_param: 0.2 # For running in editor, just use one Worker (we only have # one Unity running)! num_workers: 10 num_sgd_iter: 20 rollout_fragment_length: 200 no_done_at_end: true model: fcnet_hiddens: [512, 512] # If no executable is provided (use Unity3D editor), do not evaluate, # b/c the editor only allows one connection at a time. evaluation_interval: 0 evaluation_num_episodes: 1