2020-09-04 01:37:46 -04:00
|
|
|
from ray import tune
|
|
|
|
from ray.tune.registry import register_env
|
2021-01-19 10:09:39 +01:00
|
|
|
from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv
|
2022-01-04 18:30:26 +01:00
|
|
|
from pettingzoo.sisl import waterworld_v3
|
2020-09-04 01:37:46 -04:00
|
|
|
|
|
|
|
# Based on code from github.com/parametersharingmadrl/parametersharingmadrl
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
# RDQN - Rainbow DQN
|
|
|
|
# ADQN - Apex DQN
|
|
|
|
|
2022-01-04 18:30:26 +01:00
|
|
|
register_env("waterworld", lambda _: PettingZooEnv(waterworld_v3.env()))
|
2020-09-04 01:37:46 -04:00
|
|
|
|
|
|
|
tune.run(
|
|
|
|
"APEX_DDPG",
|
|
|
|
stop={"episodes_total": 60000},
|
|
|
|
checkpoint_freq=10,
|
|
|
|
config={
|
2021-07-15 05:51:24 -04:00
|
|
|
# Enviroment specific.
|
2020-09-04 01:37:46 -04:00
|
|
|
"env": "waterworld",
|
|
|
|
# General
|
|
|
|
"num_gpus": 1,
|
|
|
|
"num_workers": 2,
|
|
|
|
"num_envs_per_worker": 8,
|
2022-05-17 13:43:49 +02:00
|
|
|
"replay_buffer_config": {
|
|
|
|
"learning_starts": 1000,
|
|
|
|
"capacity": int(1e5),
|
|
|
|
"prioritized_replay_alpha": 0.5,
|
|
|
|
},
|
2020-09-04 01:37:46 -04:00
|
|
|
"compress_observations": True,
|
2020-09-06 10:58:00 +02:00
|
|
|
"rollout_fragment_length": 20,
|
2020-09-04 01:37:46 -04:00
|
|
|
"train_batch_size": 512,
|
2022-01-29 18:41:57 -08:00
|
|
|
"gamma": 0.99,
|
2020-09-04 01:37:46 -04:00
|
|
|
"n_step": 3,
|
2022-01-29 18:41:57 -08:00
|
|
|
"lr": 0.0001,
|
2020-09-04 01:37:46 -04:00
|
|
|
"target_network_update_freq": 50000,
|
2022-06-10 17:09:18 +02:00
|
|
|
"min_sample_timesteps_per_iteration": 25000,
|
2021-07-15 05:51:24 -04:00
|
|
|
# Method specific.
|
2020-09-04 01:37:46 -04:00
|
|
|
"multiagent": {
|
2021-07-15 05:51:24 -04:00
|
|
|
# We only have one policy (calling it "shared").
|
|
|
|
# Class, obs/act-spaces, and config will be derived
|
|
|
|
# automatically.
|
|
|
|
"policies": {"shared_policy"},
|
|
|
|
# Always use "shared" policy.
|
2021-06-21 13:46:01 +02:00
|
|
|
"policy_mapping_fn": (
|
2022-01-29 18:41:57 -08:00
|
|
|
lambda agent_id, episode, **kwargs: "shared_policy"
|
|
|
|
),
|
2020-09-04 01:37:46 -04:00
|
|
|
},
|
|
|
|
},
|
|
|
|
)
|