ray/rllib/tuned_examples/ppo/unity3d-soccer-strikers-vs-goalie-ppo.yaml

46 lines
1.9 KiB
YAML

# NOTE: This example will not run w/o a proper config.multiagent setup,
# which currently cannot be done in yaml.
# This setup should learn a decent (not perfect) policy within 100k timesteps
# on a single GPU machine (16 CPUS) using 10 workers (collecting data from
# 10 compiled game binaries in parallel).
# Reported rewards will be the sum of both strikers (+1 if goal) plus the
# goalie's reward (-1 if goal) across all within-scene parallelized playing
# fields (8 fields with each 2 strikers + 1 goalie, for the soccer env).
unity3d-soccer-strikers-vs-goalie-ppo:
env: ray.rllib.env.wrappers.unity3d_env.Unity3DEnv
run: PPO
stop:
timesteps_total: 1000000
config:
# NOTE: This example will not run w/o the following multiagent setup:
# Multi-agent setup for SoccerStrikersVsGoalie Unity3D Env.
# multiagent:
# policies: [policies list]
# policy_mapping_fn: [agent-to-policy mapping function]
# Works for both torch and tf.
framework: tf
env_config:
# Put the path to your compiled game executable here.
file_name: /home/ubuntu/soccer_strikers_vs_goalie_linux.x86_64
# Timesteps after which a hard-reset will happen (all agents).
episode_horizon: 3000
lr: 0.0003
lambda: 0.95
gamma: 0.99
sgd_minibatch_size: 256
train_batch_size: 4000
clip_param: 0.2
# For running in editor, just use one Worker (we only have
# one Unity running)!
num_workers: 10
num_sgd_iter: 20
rollout_fragment_length: 200
no_done_at_end: true
model:
fcnet_hiddens: [512, 512]
# If no executable is provided (use Unity3D editor), do not evaluate,
# b/c the editor only allows one connection at a time.
evaluation_interval: 0
evaluation_duration: 1