ray/python/ray/rllib/tests/test_supported_spaces.py

import unittest
import traceback

import gym
from gym.spaces import Box, Discrete, Tuple, Dict
from gym.envs.registration import EnvSpec
import numpy as np
import sys

import ray
from ray.rllib.agents.registry import get_agent_class
from ray.rllib.tests.test_multi_agent_env import (MultiCartpole,
                                                  MultiMountainCar)
from ray.rllib.utils.error import UnsupportedSpaceException
from ray.tune.registry import register_env

ACTION_SPACES_TO_TEST = {
    "discrete": Discrete(5),
    "vector": Box(-1.0, 1.0, (5, ), dtype=np.float32),
    "tuple": Tuple(
        [Discrete(2),
         Discrete(3),
         Box(-1.0, 1.0, (5, ), dtype=np.float32)]),
}

OBSERVATION_SPACES_TO_TEST = {
    "discrete": Discrete(5),
    "vector": Box(-1.0, 1.0, (5, ), dtype=np.float32),
    "image": Box(-1.0, 1.0, (84, 84, 1), dtype=np.float32),
    "atari": Box(-1.0, 1.0, (210, 160, 3), dtype=np.float32),
    "tuple": Tuple([Discrete(10),
                    Box(-1.0, 1.0, (5, ), dtype=np.float32)]),
    "dict": Dict({
        "task": Discrete(10),
        "position": Box(-1.0, 1.0, (5, ), dtype=np.float32),
    }),
}


def make_stub_env(action_space, obs_space, check_action_bounds):
    class StubEnv(gym.Env):
        def __init__(self):
            self.action_space = action_space
            self.observation_space = obs_space
            self.spec = EnvSpec("StubEnv-v0")

        def reset(self):
            sample = self.observation_space.sample()
            return sample

        def step(self, action):
            if check_action_bounds and not self.action_space.contains(action):
                raise ValueError("Illegal action for {}: {}".format(
                    self.action_space, action))
            if (isinstance(self.action_space, Tuple)
                    and len(action) != len(self.action_space.spaces)):
                raise ValueError("Illegal action for {}: {}".format(
                    self.action_space, action))
            return self.observation_space.sample(), 1, True, {}

    return StubEnv


def check_support(alg, config, stats, check_bounds=False):
    for a_name, action_space in ACTION_SPACES_TO_TEST.items():
        for o_name, obs_space in OBSERVATION_SPACES_TO_TEST.items():
            print("=== Testing", alg, action_space, obs_space, "===")
            stub_env = make_stub_env(action_space, obs_space, check_bounds)
            register_env("stub_env", lambda c: stub_env())
            stat = "ok"
            a = None
            try:
                a = get_agent_class(alg)(config=config, env="stub_env")
                a.train()
            except UnsupportedSpaceException:
                stat = "unsupported"
            except Exception as e:
                stat = "ERROR"
                print(e)
                print(traceback.format_exc())
            finally:
                if a:
                    try:
                        a.stop()
                    except Exception as e:
                        print("Ignoring error stopping agent", e)
                        pass
            print(stat)
            print()
            stats[alg, a_name, o_name] = stat


def check_support_multiagent(alg, config):
    register_env("multi_mountaincar", lambda _: MultiMountainCar(2))
    register_env("multi_cartpole", lambda _: MultiCartpole(2))
    if "DDPG" in alg:
        a = get_agent_class(alg)(config=config, env="multi_mountaincar")
    else:
        a = get_agent_class(alg)(config=config, env="multi_cartpole")
    try:
        a.train()
    finally:
        a.stop()


class ModelSupportedSpaces(unittest.TestCase):
    def setUp(self):
        ray.init(num_cpus=4)

    def tearDown(self):
        ray.shutdown()

    def testAll(self):
        stats = {}
        check_support("IMPALA", {"num_gpus": 0}, stats)
        check_support("APPO", {"num_gpus": 0, "vtrace": False}, stats)
        check_support(
            "DDPG", {
                "noise_scale": 100.0,
                "timesteps_per_iteration": 1
            },
            stats,
            check_bounds=True)
        check_support("DQN", {"timesteps_per_iteration": 1}, stats)
        check_support(
            "A3C", {
                "num_workers": 1,
                "optimizer": {
                    "grads_per_step": 1
                }
            },
            stats,
            check_bounds=True)
        check_support(
            "PPO", {
                "num_workers": 1,
                "num_sgd_iter": 1,
                "train_batch_size": 10,
                "sample_batch_size": 10,
                "sgd_minibatch_size": 1,
            },
            stats,
            check_bounds=True)
        check_support(
            "ES", {
                "num_workers": 1,
                "noise_size": 10000000,
                "episodes_per_batch": 1,
                "train_batch_size": 1
            }, stats)
        check_support(
            "ARS", {
                "num_workers": 1,
                "noise_size": 10000000,
                "num_rollouts": 1,
                "rollouts_used": 1
            }, stats)
        check_support(
            "PG", {
                "num_workers": 1,
                "optimizer": {}
            },
            stats,
            check_bounds=True)
        num_unexpected_errors = 0
        for (alg, a_name, o_name), stat in sorted(stats.items()):
            if stat not in ["ok", "unsupported"]:
                num_unexpected_errors += 1
            print(alg, "action_space", a_name, "obs_space", o_name, "result",
                  stat)
        self.assertEqual(num_unexpected_errors, 0)

    def testMultiAgent(self):
        check_support_multiagent(
            "APEX", {
                "num_workers": 2,
                "timesteps_per_iteration": 1000,
                "num_gpus": 0,
                "min_iter_time_s": 1,
                "learning_starts": 1000,
                "target_network_update_freq": 100,
            })
        check_support_multiagent(
            "APEX_DDPG", {
                "num_workers": 2,
                "timesteps_per_iteration": 1000,
                "num_gpus": 0,
                "min_iter_time_s": 1,
                "learning_starts": 1000,
                "target_network_update_freq": 100,
            })
        check_support_multiagent("IMPALA", {"num_gpus": 0})
        check_support_multiagent("DQN", {"timesteps_per_iteration": 1})
        check_support_multiagent("A3C", {
            "num_workers": 1,
            "optimizer": {
                "grads_per_step": 1
            }
        })
        check_support_multiagent(
            "PPO", {
                "num_workers": 1,
                "num_sgd_iter": 1,
                "train_batch_size": 10,
                "sample_batch_size": 10,
                "sgd_minibatch_size": 1,
            })
        check_support_multiagent("PG", {"num_workers": 1, "optimizer": {}})
        check_support_multiagent("DDPG", {"timesteps_per_iteration": 1})


if __name__ == "__main__":
    if len(sys.argv) > 1 and sys.argv[1] == "--smoke":
        ACTION_SPACES_TO_TEST = {
            "discrete": Discrete(5),
        }
        OBSERVATION_SPACES_TO_TEST = {
            "vector": Box(0.0, 1.0, (5, ), dtype=np.float32),
            "atari": Box(0.0, 1.0, (210, 160, 3), dtype=np.float32),
        }
    unittest.main(verbosity=2)
[rllib] test all combinations of {obs_space} x {action_space} (#1449) 2018-01-24 11:03:43 -08:00			`import unittest`
			`import traceback`

			`import gym`
[rllib] Native support for Dict and Tuple spaces; fix Tuple action spaces; add prev a, r to LSTM (#3051) 2018-10-20 15:21:22 -07:00			`from gym.spaces import Box, Discrete, Tuple, Dict`
[rllib] test all combinations of {obs_space} x {action_space} (#1449) 2018-01-24 11:03:43 -08:00			`from gym.envs.registration import EnvSpec`
[rllib] Upgrade to OpenAI Gym 0.10.3 (#1601) 2018-03-06 08:31:02 +00:00			`import numpy as np`
[rllib] Fix stats collection and some docs bugs since the refactoring (#2361) * fix * fix pbt example * fix * fix * single thread by default * vec * fix * fix 2018-07-07 13:29:20 -07:00			`import sys`
[rllib] test all combinations of {obs_space} x {action_space} (#1449) 2018-01-24 11:03:43 -08:00
			`import ray`
[rllib] [rfc] add contrib module and guideline for merging (#3565) This adds guidelines for merging code into `rllib/contrib` vs `rllib/agents`. Also, clean up the agent import code to make registration easier. 2018-12-21 03:44:34 +09:00			`from ray.rllib.agents.registry import get_agent_class`
Move test folders under rllib/tune from test -> tests. (#4214) 2019-03-02 13:37:16 -08:00			`from ray.rllib.tests.test_multi_agent_env import (MultiCartpole,`
			`MultiMountainCar)`
[rllib] test all combinations of {obs_space} x {action_space} (#1449) 2018-01-24 11:03:43 -08:00			`from ray.rllib.utils.error import UnsupportedSpaceException`
			`from ray.tune.registry import register_env`

			`ACTION_SPACES_TO_TEST = {`
			`"discrete": Discrete(5),`
[rllib] Native support for Dict and Tuple spaces; fix Tuple action spaces; add prev a, r to LSTM (#3051) 2018-10-20 15:21:22 -07:00			`"vector": Box(-1.0, 1.0, (5, ), dtype=np.float32),`
			`"tuple": Tuple(`
[rllib] Fix support for mixed discrete and continuous action spaces, add to regression test (#2655) * fix * lint * fix 2018-08-15 10:19:41 -07:00			`[Discrete(2),`
			`Discrete(3),`
[rllib] Native support for Dict and Tuple spaces; fix Tuple action spaces; add prev a, r to LSTM (#3051) 2018-10-20 15:21:22 -07:00			`Box(-1.0, 1.0, (5, ), dtype=np.float32)]),`
[rllib] test all combinations of {obs_space} x {action_space} (#1449) 2018-01-24 11:03:43 -08:00			`}`

			`OBSERVATION_SPACES_TO_TEST = {`
			`"discrete": Discrete(5),`
[rllib] Native support for Dict and Tuple spaces; fix Tuple action spaces; add prev a, r to LSTM (#3051) 2018-10-20 15:21:22 -07:00			`"vector": Box(-1.0, 1.0, (5, ), dtype=np.float32),`
			`"image": Box(-1.0, 1.0, (84, 84, 1), dtype=np.float32),`
			`"atari": Box(-1.0, 1.0, (210, 160, 3), dtype=np.float32),`
			`"tuple": Tuple([Discrete(10),`
			`Box(-1.0, 1.0, (5, ), dtype=np.float32)]),`
			`"dict": Dict({`
			`"task": Discrete(10),`
			`"position": Box(-1.0, 1.0, (5, ), dtype=np.float32),`
			`}),`
[rllib] test all combinations of {obs_space} x {action_space} (#1449) 2018-01-24 11:03:43 -08:00			`}`


[rllib] Native support for Dict and Tuple spaces; fix Tuple action spaces; add prev a, r to LSTM (#3051) 2018-10-20 15:21:22 -07:00			`def make_stub_env(action_space, obs_space, check_action_bounds):`
[rllib] test all combinations of {obs_space} x {action_space} (#1449) 2018-01-24 11:03:43 -08:00			`class StubEnv(gym.Env):`
			`def __init__(self):`
			`self.action_space = action_space`
			`self.observation_space = obs_space`
[rllib] Upgrade to OpenAI Gym 0.10.3 (#1601) 2018-03-06 08:31:02 +00:00			`self.spec = EnvSpec("StubEnv-v0")`
[rllib] test all combinations of {obs_space} x {action_space} (#1449) 2018-01-24 11:03:43 -08:00
			`def reset(self):`
			`sample = self.observation_space.sample()`
			`return sample`

			`def step(self, action):`
[rllib] Native support for Dict and Tuple spaces; fix Tuple action spaces; add prev a, r to LSTM (#3051) 2018-10-20 15:21:22 -07:00			`if check_action_bounds and not self.action_space.contains(action):`
			`raise ValueError("Illegal action for {}: {}".format(`
			`self.action_space, action))`
			`if (isinstance(self.action_space, Tuple)`
			`and len(action) != len(self.action_space.spaces)):`
			`raise ValueError("Illegal action for {}: {}".format(`
			`self.action_space, action))`
[rllib] test all combinations of {obs_space} x {action_space} (#1449) 2018-01-24 11:03:43 -08:00			`return self.observation_space.sample(), 1, True, {}`

			`return StubEnv`


[rllib] Native support for Dict and Tuple spaces; fix Tuple action spaces; add prev a, r to LSTM (#3051) 2018-10-20 15:21:22 -07:00			`def check_support(alg, config, stats, check_bounds=False):`
[rllib] test all combinations of {obs_space} x {action_space} (#1449) 2018-01-24 11:03:43 -08:00			`for a_name, action_space in ACTION_SPACES_TO_TEST.items():`
			`for o_name, obs_space in OBSERVATION_SPACES_TO_TEST.items():`
			`print("=== Testing", alg, action_space, obs_space, "===")`
[rllib] Native support for Dict and Tuple spaces; fix Tuple action spaces; add prev a, r to LSTM (#3051) 2018-10-20 15:21:22 -07:00			`stub_env = make_stub_env(action_space, obs_space, check_bounds)`
[rllib] Remove need to pass around registry (#2250) * remove registry * fix * too many _ * fix * cloudpickle * Update registry.py * yapf * fix test * fix kv check 2018-06-19 22:47:00 -07:00			`register_env("stub_env", lambda c: stub_env())`
[rllib] test all combinations of {obs_space} x {action_space} (#1449) 2018-01-24 11:03:43 -08:00			`stat = "ok"`
			`a = None`
			`try:`
			`a = get_agent_class(alg)(config=config, env="stub_env")`
			`a.train()`
Fix linting errors. (#3127) 2018-10-24 16:30:00 -07:00			`except UnsupportedSpaceException:`
[rllib] test all combinations of {obs_space} x {action_space} (#1449) 2018-01-24 11:03:43 -08:00			`stat = "unsupported"`
			`except Exception as e:`
			`stat = "ERROR"`
			`print(e)`
			`print(traceback.format_exc())`
			`finally:`
			`if a:`
			`try:`
			`a.stop()`
			`except Exception as e:`
			`print("Ignoring error stopping agent", e)`
			`pass`
			`print(stat)`
			`print()`
			`stats[alg, a_name, o_name] = stat`


[rllib] Add test for multi-agent support and fix IMPALA multi-agent (#3289) IMPALA support for multiagent was broken since IMPALA has a requirement that batch sizes be of a certain length. However multi-agent envs can create variable-length batches. Fix this by adding zero-padding as needed (similar to the RNN case). 2018-11-14 14:14:07 -08:00			`def check_support_multiagent(alg, config):`
			`register_env("multi_mountaincar", lambda _: MultiMountainCar(2))`
			`register_env("multi_cartpole", lambda _: MultiCartpole(2))`
[rllib] Misc fixes: set lr for PG, better error message for LSTM/PPO, fix multi-agent/APEX (#3697) * fix * update test * better error * compute * eps fix * add get_policy() api * Update agent.py * better err msg * fix * pass in rew 2019-01-06 19:37:35 -08:00			`if "DDPG" in alg:`
[rllib] Add test for multi-agent support and fix IMPALA multi-agent (#3289) IMPALA support for multiagent was broken since IMPALA has a requirement that batch sizes be of a certain length. However multi-agent envs can create variable-length batches. Fix this by adding zero-padding as needed (similar to the RNN case). 2018-11-14 14:14:07 -08:00			`a = get_agent_class(alg)(config=config, env="multi_mountaincar")`
			`else:`
			`a = get_agent_class(alg)(config=config, env="multi_cartpole")`
			`try:`
			`a.train()`
			`finally:`
			`a.stop()`


[rllib] test all combinations of {obs_space} x {action_space} (#1449) 2018-01-24 11:03:43 -08:00			`class ModelSupportedSpaces(unittest.TestCase):`
[rllib] Add test for multi-agent support and fix IMPALA multi-agent (#3289) IMPALA support for multiagent was broken since IMPALA has a requirement that batch sizes be of a certain length. However multi-agent envs can create variable-length batches. Fix this by adding zero-padding as needed (similar to the RNN case). 2018-11-14 14:14:07 -08:00			`def setUp(self):`
Revert [rllib] Reserve CPUs for replay actors in apex (#4404) * Revert "[rllib] Reserve CPUs for replay actors in apex (#4217)" This reverts commit 2781d74680bba8dd1a490514161d4e779280c893. * comment 2019-03-19 09:58:45 -07:00			`ray.init(num_cpus=4)`
[rllib] Add test for multi-agent support and fix IMPALA multi-agent (#3289) IMPALA support for multiagent was broken since IMPALA has a requirement that batch sizes be of a certain length. However multi-agent envs can create variable-length batches. Fix this by adding zero-padding as needed (similar to the RNN case). 2018-11-14 14:14:07 -08:00
			`def tearDown(self):`
			`ray.shutdown()`

[rllib] test all combinations of {obs_space} x {action_space} (#1449) 2018-01-24 11:03:43 -08:00			`def testAll(self):`
			`stats = {}`
[rllib] Parallel-data loading and multi-gpu support for IMPALA (#2766) 2018-10-15 11:02:50 -07:00			`check_support("IMPALA", {"num_gpus": 0}, stats)`
Appo (#3779) * Deleted old fork, updated new ray and moved PPO-impala to APPO in ppo folder * Deleted unneccesary vtrace.py file * Update pong-impala.yaml * Cleaned PPO Code * Update pong-impala.yaml * Update pong-impala.yaml * wip * new ifle * refactor * add vtrace off option * revert * support any space * docs * fix comment * remove kl * Update cartpole-appo-vtrace.yaml 2019-01-18 13:40:26 -08:00			`check_support("APPO", {"num_gpus": 0, "vtrace": False}, stats)`
[rllib] Clip DDPG ou-noise to avoid exceeding action bounds (#3386) Closes #2965 2018-11-24 00:56:50 -08:00			`check_support(`
			`"DDPG", {`
			`"noise_scale": 100.0,`
			`"timesteps_per_iteration": 1`
			`},`
			`stats,`
			`check_bounds=True)`
[rllib] test all combinations of {obs_space} x {action_space} (#1449) 2018-01-24 11:03:43 -08:00			`check_support("DQN", {"timesteps_per_iteration": 1}, stats)`
[rllib] Auto clip actions to Box space range; deprecate squash_to_range (#3426) * fix clip * tweak wording * remove squash entirely * Update rllib-models.rst * fix argument order * Apply suggestions from code review Co-Authored-By: ericl <ekhliang@gmail.com> 2018-12-03 19:55:25 -08:00			`check_support(`
			`"A3C", {`
			`"num_workers": 1,`
			`"optimizer": {`
			`"grads_per_step": 1`
			`}`
			`},`
			`stats,`
			`check_bounds=True)`
[rllib] test all combinations of {obs_space} x {action_space} (#1449) 2018-01-24 11:03:43 -08:00			`check_support(`
[rllib] format with yapf (#2427) * initial yapf * manual fix yapf bugs 2018-07-19 15:30:36 -07:00			`"PPO", {`
			`"num_workers": 1,`
			`"num_sgd_iter": 1,`
[rllib] clarify train batch size for PPO (#2793) It's possible to configure PPO in a way that ends up discarding most of the samples (they are treated as "stragglers"). Add a warning when this happens, and raise an exception if the waste is particularly egregious. 2018-09-05 12:06:13 -07:00			`"train_batch_size": 10,`
			`"sample_batch_size": 10,`
[rllib] Native support for Dict and Tuple spaces; fix Tuple action spaces; add prev a, r to LSTM (#3051) 2018-10-20 15:21:22 -07:00			`"sgd_minibatch_size": 1,`
			`},`
			`stats,`
			`check_bounds=True)`
[rllib] test all combinations of {obs_space} x {action_space} (#1449) 2018-01-24 11:03:43 -08:00			`check_support(`
[rllib] format with yapf (#2427) * initial yapf * manual fix yapf bugs 2018-07-19 15:30:36 -07:00			`"ES", {`
			`"num_workers": 1,`
			`"noise_size": 10000000,`
			`"episodes_per_batch": 1,`
[rllib] clarify train batch size for PPO (#2793) It's possible to configure PPO in a way that ends up discarding most of the samples (they are treated as "stragglers"). Add a warning when this happens, and raise an exception if the waste is particularly egregious. 2018-09-05 12:06:13 -07:00			`"train_batch_size": 1`
[rllib] format with yapf (#2427) * initial yapf * manual fix yapf bugs 2018-07-19 15:30:36 -07:00			`}, stats)`
[rllib] add augmented random search (#2714) * added ars * functioning ars with regression test * added regression tests for ARs * fixed default config for ARS * ARS code runs, now time to test * ARS working and tested, changed std deviation of meanstd filter to initialize to 1 * ARS working and tested, changed std deviation of meanstd filter to initialize to 1 * pep8 fixes * removed unused linear model * address comments * more fixing comments * post yapf * fixed support failure * Update LICENSE * Update policies.py * Update test_supported_spaces.py * Update policies.py * Update LICENSE * Update test_supported_spaces.py * Update policies.py * Update policies.py * Update filter.py 2018-08-24 22:20:02 -07:00			`check_support(`
			`"ARS", {`
			`"num_workers": 1,`
			`"noise_size": 10000000,`
[rllib] Use SGD optimizer for ARS (#2916) 2018-09-26 22:32:26 -07:00			`"num_rollouts": 1,`
			`"rollouts_used": 1`
[rllib] add augmented random search (#2714) * added ars * functioning ars with regression test * added regression tests for ARs * fixed default config for ARS * ARS code runs, now time to test * ARS working and tested, changed std deviation of meanstd filter to initialize to 1 * ARS working and tested, changed std deviation of meanstd filter to initialize to 1 * pep8 fixes * removed unused linear model * address comments * more fixing comments * post yapf * fixed support failure * Update LICENSE * Update policies.py * Update test_supported_spaces.py * Update policies.py * Update LICENSE * Update test_supported_spaces.py * Update policies.py * Update policies.py * Update filter.py 2018-08-24 22:20:02 -07:00			`}, stats)`
[rllib] Auto clip actions to Box space range; deprecate squash_to_range (#3426) * fix clip * tweak wording * remove squash entirely * Update rllib-models.rst * fix argument order * Apply suggestions from code review Co-Authored-By: ericl <ekhliang@gmail.com> 2018-12-03 19:55:25 -08:00			`check_support(`
			`"PG", {`
			`"num_workers": 1,`
			`"optimizer": {}`
			`},`
			`stats,`
			`check_bounds=True)`
[rllib] test all combinations of {obs_space} x {action_space} (#1449) 2018-01-24 11:03:43 -08:00			`num_unexpected_errors = 0`
			`for (alg, a_name, o_name), stat in sorted(stats.items()):`
[rllib] Refactor rllib to have a common sample collection pathway (#2149) 2018-06-09 00:21:35 -07:00			`if stat not in ["ok", "unsupported"]:`
			`num_unexpected_errors += 1`
[rllib] format with yapf (#2427) * initial yapf * manual fix yapf bugs 2018-07-19 15:30:36 -07:00			`print(alg, "action_space", a_name, "obs_space", o_name, "result",`
			`stat)`
[rllib] test all combinations of {obs_space} x {action_space} (#1449) 2018-01-24 11:03:43 -08:00			`self.assertEqual(num_unexpected_errors, 0)`

[rllib] Add test for multi-agent support and fix IMPALA multi-agent (#3289) IMPALA support for multiagent was broken since IMPALA has a requirement that batch sizes be of a certain length. However multi-agent envs can create variable-length batches. Fix this by adding zero-padding as needed (similar to the RNN case). 2018-11-14 14:14:07 -08:00			`def testMultiAgent(self):`
[rllib] Misc fixes: set lr for PG, better error message for LSTM/PPO, fix multi-agent/APEX (#3697) * fix * update test * better error * compute * eps fix * add get_policy() api * Update agent.py * better err msg * fix * pass in rew 2019-01-06 19:37:35 -08:00			`check_support_multiagent(`
			`"APEX", {`
			`"num_workers": 2,`
			`"timesteps_per_iteration": 1000,`
			`"num_gpus": 0,`
			`"min_iter_time_s": 1,`
			`"learning_starts": 1000,`
			`"target_network_update_freq": 100,`
			`})`
			`check_support_multiagent(`
			`"APEX_DDPG", {`
			`"num_workers": 2,`
			`"timesteps_per_iteration": 1000,`
			`"num_gpus": 0,`
			`"min_iter_time_s": 1,`
			`"learning_starts": 1000,`
			`"target_network_update_freq": 100,`
			`})`
[rllib] Add test for multi-agent support and fix IMPALA multi-agent (#3289) IMPALA support for multiagent was broken since IMPALA has a requirement that batch sizes be of a certain length. However multi-agent envs can create variable-length batches. Fix this by adding zero-padding as needed (similar to the RNN case). 2018-11-14 14:14:07 -08:00			`check_support_multiagent("IMPALA", {"num_gpus": 0})`
			`check_support_multiagent("DQN", {"timesteps_per_iteration": 1})`
			`check_support_multiagent("A3C", {`
			`"num_workers": 1,`
			`"optimizer": {`
			`"grads_per_step": 1`
			`}`
			`})`
			`check_support_multiagent(`
			`"PPO", {`
			`"num_workers": 1,`
			`"num_sgd_iter": 1,`
			`"train_batch_size": 10,`
			`"sample_batch_size": 10,`
			`"sgd_minibatch_size": 1,`
			`})`
			`check_support_multiagent("PG", {"num_workers": 1, "optimizer": {}})`
			`check_support_multiagent("DDPG", {"timesteps_per_iteration": 1})`

[rllib] test all combinations of {obs_space} x {action_space} (#1449) 2018-01-24 11:03:43 -08:00
			`if __name__ == "__main__":`
[rllib] Fix stats collection and some docs bugs since the refactoring (#2361) * fix * fix pbt example * fix * fix * single thread by default * vec * fix * fix 2018-07-07 13:29:20 -07:00			`if len(sys.argv) > 1 and sys.argv[1] == "--smoke":`
			`ACTION_SPACES_TO_TEST = {`
			`"discrete": Discrete(5),`
			`}`
			`OBSERVATION_SPACES_TO_TEST = {`
[rllib] format with yapf (#2427) * initial yapf * manual fix yapf bugs 2018-07-19 15:30:36 -07:00			`"vector": Box(0.0, 1.0, (5, ), dtype=np.float32),`
[rllib] Fix stats collection and some docs bugs since the refactoring (#2361) * fix * fix pbt example * fix * fix * single thread by default * vec * fix * fix 2018-07-07 13:29:20 -07:00			`"atari": Box(0.0, 1.0, (210, 160, 3), dtype=np.float32),`
			`}`
[rllib] test all combinations of {obs_space} x {action_space} (#1449) 2018-01-24 11:03:43 -08:00			`unittest.main(verbosity=2)`