ray/rllib/tests/test_supported_spaces.py

from gym.spaces import Box, Dict, Discrete, Tuple, MultiDiscrete
import numpy as np
import unittest

import ray
from ray.rllib.agents.registry import get_trainer_class
from ray.rllib.examples.env.random_env import RandomEnv
from ray.rllib.models.tf.fcnet import FullyConnectedNetwork as FCNetV2
from ray.rllib.models.tf.visionnet import VisionNetwork as VisionNetV2
from ray.rllib.models.torch.visionnet import VisionNetwork as TorchVisionNetV2
from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFCNetV2
from ray.rllib.utils.error import UnsupportedSpaceException
from ray.rllib.utils.test_utils import framework_iterator

ACTION_SPACES_TO_TEST = {
    "discrete": Discrete(5),
    "vector": Box(-1.0, 1.0, (5, ), dtype=np.float32),
    "vector2": Box(-1.0, 1.0, (5, 5), dtype=np.float32),
    "int_actions": Box(0, 3, (2, 3), dtype=np.int32),
    "multidiscrete": MultiDiscrete([1, 2, 3, 4]),
    "tuple": Tuple(
        [Discrete(2),
         Discrete(3),
         Box(-1.0, 1.0, (5, ), dtype=np.float32)]),
    "dict": Dict({
        "action_choice": Discrete(3),
        "parameters": Box(-1.0, 1.0, (1, ), dtype=np.float32),
        "yet_another_nested_dict": Dict({
            "a": Tuple([Discrete(2), Discrete(3)])
        })
    }),
}

OBSERVATION_SPACES_TO_TEST = {
    "discrete": Discrete(5),
    "vector": Box(-1.0, 1.0, (5, ), dtype=np.float32),
    "vector2": Box(-1.0, 1.0, (5, 5), dtype=np.float32),
    "image": Box(-1.0, 1.0, (84, 84, 1), dtype=np.float32),
    "atari": Box(-1.0, 1.0, (210, 160, 3), dtype=np.float32),
    "tuple": Tuple([Discrete(10),
                    Box(-1.0, 1.0, (5, ), dtype=np.float32)]),
    "dict": Dict({
        "task": Discrete(10),
        "position": Box(-1.0, 1.0, (5, ), dtype=np.float32),
    }),
}


def check_support(alg, config, train=True, check_bounds=False, tfe=False):
    config["log_level"] = "ERROR"
    config["train_batch_size"] = 10
    config["rollout_fragment_length"] = 10

    def _do_check(alg, config, a_name, o_name):
        fw = config["framework"]
        action_space = ACTION_SPACES_TO_TEST[a_name]
        obs_space = OBSERVATION_SPACES_TO_TEST[o_name]
        print("=== Testing {} (fw={}) A={} S={} ===".format(
            alg, fw, action_space, obs_space))
        config.update(
            dict(
                env_config=dict(
                    action_space=action_space,
                    observation_space=obs_space,
                    reward_space=Box(1.0, 1.0, shape=(), dtype=np.float32),
                    p_done=1.0,
                    check_action_bounds=check_bounds)))
        stat = "ok"

        try:
            a = get_trainer_class(alg)(config=config, env=RandomEnv)
        except UnsupportedSpaceException:
            stat = "unsupported"
        else:
            if alg not in ["DDPG", "ES", "ARS", "SAC"]:
                if o_name in ["atari", "image"]:
                    if fw == "torch":
                        assert isinstance(a.get_policy().model,
                                          TorchVisionNetV2)
                    else:
                        assert isinstance(a.get_policy().model, VisionNetV2)
                elif o_name in ["vector", "vector2"]:
                    if fw == "torch":
                        assert isinstance(a.get_policy().model, TorchFCNetV2)
                    else:
                        assert isinstance(a.get_policy().model, FCNetV2)
            if train:
                a.train()
            a.stop()
        print(stat)

    frameworks = ("tf", "torch")
    if tfe:
        frameworks += ("tf2", "tfe")
    for _ in framework_iterator(config, frameworks=frameworks):
        # Zip through action- and obs-spaces.
        for a_name, o_name in zip(ACTION_SPACES_TO_TEST.keys(),
                                  OBSERVATION_SPACES_TO_TEST.keys()):
            _do_check(alg, config, a_name, o_name)
        # Do the remaining obs spaces.
        assert len(OBSERVATION_SPACES_TO_TEST) >= len(ACTION_SPACES_TO_TEST)
        for i, o_name in enumerate(OBSERVATION_SPACES_TO_TEST.keys()):
            if i < len(ACTION_SPACES_TO_TEST):
                continue
            _do_check(alg, config, "discrete", o_name)


class TestSupportedSpacesPG(unittest.TestCase):
    @classmethod
    def setUpClass(cls) -> None:
        ray.init(num_cpus=6)

    @classmethod
    def tearDownClass(cls) -> None:
        ray.shutdown()

    def test_a3c(self):
        config = {"num_workers": 1, "optimizer": {"grads_per_step": 1}}
        check_support("A3C", config, check_bounds=True)

    def test_appo(self):
        check_support("APPO", {"num_gpus": 0, "vtrace": False}, train=False)
        check_support("APPO", {"num_gpus": 0, "vtrace": True})

    def test_impala(self):
        check_support("IMPALA", {"num_gpus": 0})

    def test_ppo(self):
        config = {
            "num_workers": 0,
            "train_batch_size": 100,
            "rollout_fragment_length": 10,
            "num_sgd_iter": 1,
            "sgd_minibatch_size": 10,
        }
        check_support("PPO", config, check_bounds=True, tfe=True)

    def test_pg(self):
        config = {"num_workers": 1, "optimizer": {}}
        check_support("PG", config, train=False, check_bounds=True, tfe=True)


class TestSupportedSpacesOffPolicy(unittest.TestCase):
    @classmethod
    def setUpClass(cls) -> None:
        ray.init(num_cpus=4)

    @classmethod
    def tearDownClass(cls) -> None:
        ray.shutdown()

    def test_ddpg(self):
        check_support(
            "DDPG", {
                "exploration_config": {
                    "ou_base_scale": 100.0
                },
                "timesteps_per_iteration": 1,
                "buffer_size": 1000,
                "use_state_preprocessor": True,
            },
            check_bounds=True)

    def test_dqn(self):
        config = {"timesteps_per_iteration": 1, "buffer_size": 1000}
        check_support("DQN", config, tfe=True)

    def test_sac(self):
        check_support("SAC", {"buffer_size": 1000}, check_bounds=True)


class TestSupportedSpacesEvolutionAlgos(unittest.TestCase):
    @classmethod
    def setUpClass(cls) -> None:
        ray.init(num_cpus=4)

    @classmethod
    def tearDownClass(cls) -> None:
        ray.shutdown()

    def test_ars(self):
        check_support(
            "ARS", {
                "num_workers": 1,
                "noise_size": 1500000,
                "num_rollouts": 1,
                "rollouts_used": 1
            })

    def test_es(self):
        check_support(
            "ES", {
                "num_workers": 1,
                "noise_size": 1500000,
                "episodes_per_batch": 1,
                "train_batch_size": 1
            })


if __name__ == "__main__":
    import pytest
    import sys

    # One can specify the specific TestCase class to run.
    # None for all unittest.TestCase classes in this file.
    class_ = sys.argv[1] if len(sys.argv) > 1 else None
    sys.exit(
        pytest.main(
            ["-v", __file__ + ("" if class_ is None else "::" + class_)]))
[RLlib] Remove TupleActions and support arbitrarily nested action spaces. (#8143) Deprecate TupleActions and support arbitrarily nested action spaces. Closes issue #8143. 2020-04-28 14:59:16 +02:00			`from gym.spaces import Box, Dict, Discrete, Tuple, MultiDiscrete`
[rllib] Upgrade to OpenAI Gym 0.10.3 (#1601) 2018-03-06 08:31:02 +00:00			`import numpy as np`
[RLlib] Exploration API: merge deterministic flag with exploration classes (SoftQ and StochasticSampling). (#7155) 2020-02-19 21:18:45 +01:00			`import unittest`
[rllib] test all combinations of {obs_space} x {action_space} (#1449) 2018-01-24 11:03:43 -08:00
			`import ray`
[RLlib] Allow `rllib rollout` to run distributed via evaluation workers. (#13718) 2021-02-08 12:05:16 +01:00			`from ray.rllib.agents.registry import get_trainer_class`
[RLlib] rllib/examples folder restructuring (#8250) Cleans up of the rllib/examples folder by moving all example Envs into rllibexamples/env (so they can be used by other scripts and tests as well). 2020-05-01 22:59:34 +02:00			`from ray.rllib.examples.env.random_env import RandomEnv`
[RLlib] Attention Net integration into ModelV2 and learning RL example. (#8371) 2020-05-18 17:26:40 +02:00			`from ray.rllib.models.tf.fcnet import FullyConnectedNetwork as FCNetV2`
			`from ray.rllib.models.tf.visionnet import VisionNetwork as VisionNetV2`
Fix issue with torch PPO not handling action spaces of shape=(>1,). (#7398) 2020-03-02 19:53:19 +01:00			`from ray.rllib.models.torch.visionnet import VisionNetwork as TorchVisionNetV2`
			`from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFCNetV2`
[rllib] test all combinations of {obs_space} x {action_space} (#1449) 2018-01-24 11:03:43 -08:00			`from ray.rllib.utils.error import UnsupportedSpaceException`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`from ray.rllib.utils.test_utils import framework_iterator`
[rllib] test all combinations of {obs_space} x {action_space} (#1449) 2018-01-24 11:03:43 -08:00
			`ACTION_SPACES_TO_TEST = {`
			`"discrete": Discrete(5),`
[rllib] Native support for Dict and Tuple spaces; fix Tuple action spaces; add prev a, r to LSTM (#3051) 2018-10-20 15:21:22 -07:00			`"vector": Box(-1.0, 1.0, (5, ), dtype=np.float32),`
[RLlib] Allow SAC to use custom models as Q- or policy nets and deprecate "state-preprocessor" for image spaces. (#13522) 2021-02-02 13:05:58 +01:00			`"vector2": Box(-1.0, 1.0, (5, 5), dtype=np.float32),`
[RLlib] Add support for Int-Box action spaces. (#15012) 2021-04-11 13:16:01 +02:00			`"int_actions": Box(0, 3, (2, 3), dtype=np.int32),`
[rllib] Fix Multidiscrete support (#4869) 2019-05-29 20:41:02 -07:00			`"multidiscrete": MultiDiscrete([1, 2, 3, 4]),`
[rllib] Native support for Dict and Tuple spaces; fix Tuple action spaces; add prev a, r to LSTM (#3051) 2018-10-20 15:21:22 -07:00			`"tuple": Tuple(`
[rllib] Fix support for mixed discrete and continuous action spaces, add to regression test (#2655) * fix * lint * fix 2018-08-15 10:19:41 -07:00			`[Discrete(2),`
			`Discrete(3),`
[rllib] Native support for Dict and Tuple spaces; fix Tuple action spaces; add prev a, r to LSTM (#3051) 2018-10-20 15:21:22 -07:00			`Box(-1.0, 1.0, (5, ), dtype=np.float32)]),`
[RLlib] Remove TupleActions and support arbitrarily nested action spaces. (#8143) Deprecate TupleActions and support arbitrarily nested action spaces. Closes issue #8143. 2020-04-28 14:59:16 +02:00			`"dict": Dict({`
			`"action_choice": Discrete(3),`
			`"parameters": Box(-1.0, 1.0, (1, ), dtype=np.float32),`
			`"yet_another_nested_dict": Dict({`
			`"a": Tuple([Discrete(2), Discrete(3)])`
			`})`
			`}),`
[rllib] test all combinations of {obs_space} x {action_space} (#1449) 2018-01-24 11:03:43 -08:00			`}`

			`OBSERVATION_SPACES_TO_TEST = {`
			`"discrete": Discrete(5),`
[rllib] Native support for Dict and Tuple spaces; fix Tuple action spaces; add prev a, r to LSTM (#3051) 2018-10-20 15:21:22 -07:00			`"vector": Box(-1.0, 1.0, (5, ), dtype=np.float32),`
[rllib] Properly flatten 2-d observations as input to FCnet (#5733) 2019-09-19 12:10:31 -07:00			`"vector2": Box(-1.0, 1.0, (5, 5), dtype=np.float32),`
[rllib] Native support for Dict and Tuple spaces; fix Tuple action spaces; add prev a, r to LSTM (#3051) 2018-10-20 15:21:22 -07:00			`"image": Box(-1.0, 1.0, (84, 84, 1), dtype=np.float32),`
			`"atari": Box(-1.0, 1.0, (210, 160, 3), dtype=np.float32),`
			`"tuple": Tuple([Discrete(10),`
			`Box(-1.0, 1.0, (5, ), dtype=np.float32)]),`
			`"dict": Dict({`
			`"task": Discrete(10),`
			`"position": Box(-1.0, 1.0, (5, ), dtype=np.float32),`
			`}),`
[rllib] test all combinations of {obs_space} x {action_space} (#1449) 2018-01-24 11:03:43 -08:00			`}`


[RLlib] Issue 8769 broken OOM tests_dir cases (R & S). (#8770) 2020-06-05 08:34:21 +02:00			`def check_support(alg, config, train=True, check_bounds=False, tfe=False):`
[rllib] Adds eager support with a generic `TFEagerPolicy` class (#5436) 2019-08-23 02:21:11 -04:00			`config["log_level"] = "ERROR"`
[RLlib] Extend on_learn_on_batch callback to allow for custom metrics to be added. (#13584) 2021-02-08 15:02:19 +01:00			`config["train_batch_size"] = 10`
			`config["rollout_fragment_length"] = 10`
[rllib] test all combinations of {obs_space} x {action_space} (#1449) 2018-01-24 11:03:43 -08:00
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`def _do_check(alg, config, a_name, o_name):`
			`fw = config["framework"]`
			`action_space = ACTION_SPACES_TO_TEST[a_name]`
			`obs_space = OBSERVATION_SPACES_TO_TEST[o_name]`
			`print("=== Testing {} (fw={}) A={} S={} ===".format(`
			`alg, fw, action_space, obs_space))`
			`config.update(`
			`dict(`
			`env_config=dict(`
			`action_space=action_space,`
			`observation_space=obs_space,`
			`reward_space=Box(1.0, 1.0, shape=(), dtype=np.float32),`
			`p_done=1.0,`
			`check_action_bounds=check_bounds)))`
			`stat = "ok"`
[RLlib] Make sure torch and tf behave the same wrt conv2d nets. (#8785) 2020-06-20 00:05:19 +02:00
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`try:`
[RLlib] Allow `rllib rollout` to run distributed via evaluation workers. (#13718) 2021-02-08 12:05:16 +01:00			`a = get_trainer_class(alg)(config=config, env=RandomEnv)`
[RLlib] Make sure torch and tf behave the same wrt conv2d nets. (#8785) 2020-06-20 00:05:19 +02:00			`except UnsupportedSpaceException:`
			`stat = "unsupported"`
			`else:`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`if alg not in ["DDPG", "ES", "ARS", "SAC"]:`
			`if o_name in ["atari", "image"]:`
			`if fw == "torch":`
			`assert isinstance(a.get_policy().model,`
			`TorchVisionNetV2)`
			`else:`
			`assert isinstance(a.get_policy().model, VisionNetV2)`
			`elif o_name in ["vector", "vector2"]:`
			`if fw == "torch":`
			`assert isinstance(a.get_policy().model, TorchFCNetV2)`
			`else:`
			`assert isinstance(a.get_policy().model, FCNetV2)`
[RLlib] Issue 8769 broken OOM tests_dir cases (R & S). (#8770) 2020-06-05 08:34:21 +02:00			`if train:`
			`a.train()`
[RLlib] DDPG and SAC eager support (preparation for tf2.x) (#9204) 2020-07-08 16:12:20 +02:00			`a.stop()`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`print(stat)`

[RLlib] PPO, APPO, and DD-PPO code cleanup. (#10420) 2020-09-02 14:03:01 +02:00			`frameworks = ("tf", "torch")`
Fix dict/tuple hybrid action space for tensorflow eager execution (#8781) 2020-06-04 22:28:46 +02:00			`if tfe:`
[RLlib] Extend on_learn_on_batch callback to allow for custom metrics to be added. (#13584) 2021-02-08 15:02:19 +01:00			`frameworks += ("tf2", "tfe")`
Fix dict/tuple hybrid action space for tensorflow eager execution (#8781) 2020-06-04 22:28:46 +02:00			`for _ in framework_iterator(config, frameworks=frameworks):`
[RLlib] Extend on_learn_on_batch callback to allow for custom metrics to be added. (#13584) 2021-02-08 15:02:19 +01:00			`# Zip through action- and obs-spaces.`
			`for a_name, o_name in zip(ACTION_SPACES_TO_TEST.keys(),`
			`OBSERVATION_SPACES_TO_TEST.keys()):`
[RLlib] Issue 8769 broken OOM tests_dir cases (R & S). (#8770) 2020-06-05 08:34:21 +02:00			`_do_check(alg, config, a_name, o_name)`
[RLlib] Extend on_learn_on_batch callback to allow for custom metrics to be added. (#13584) 2021-02-08 15:02:19 +01:00			`# Do the remaining obs spaces.`
			`assert len(OBSERVATION_SPACES_TO_TEST) >= len(ACTION_SPACES_TO_TEST)`
			`for i, o_name in enumerate(OBSERVATION_SPACES_TO_TEST.keys()):`
			`if i < len(ACTION_SPACES_TO_TEST):`
			`continue`
			`_do_check(alg, config, "discrete", o_name)`
[rllib] Add test for multi-agent support and fix IMPALA multi-agent (#3289) IMPALA support for multiagent was broken since IMPALA has a requirement that batch sizes be of a certain length. However multi-agent envs can create variable-length batches. Fix this by adding zero-padding as needed (similar to the RNN case). 2018-11-14 14:14:07 -08:00

[RLlib] DDPG and SAC eager support (preparation for tf2.x) (#9204) 2020-07-08 16:12:20 +02:00			`class TestSupportedSpacesPG(unittest.TestCase):`
[RLlib] Issue 8769 broken OOM tests_dir cases (R & S). (#8770) 2020-06-05 08:34:21 +02:00			`@classmethod`
			`def setUpClass(cls) -> None:`
[RLlib] Extend on_learn_on_batch callback to allow for custom metrics to be added. (#13584) 2021-02-08 15:02:19 +01:00			`ray.init(num_cpus=6)`
[rllib] Add test for multi-agent support and fix IMPALA multi-agent (#3289) IMPALA support for multiagent was broken since IMPALA has a requirement that batch sizes be of a certain length. However multi-agent envs can create variable-length batches. Fix this by adding zero-padding as needed (similar to the RNN case). 2018-11-14 14:14:07 -08:00
[RLlib] Issue 8769 broken OOM tests_dir cases (R & S). (#8770) 2020-06-05 08:34:21 +02:00			`@classmethod`
			`def tearDownClass(cls) -> None:`
[rllib] Add test for multi-agent support and fix IMPALA multi-agent (#3289) IMPALA support for multiagent was broken since IMPALA has a requirement that batch sizes be of a certain length. However multi-agent envs can create variable-length batches. Fix this by adding zero-padding as needed (similar to the RNN case). 2018-11-14 14:14:07 -08:00			`ray.shutdown()`

[RLlib] Exploration API: merge deterministic flag with exploration classes (SoftQ and StochasticSampling). (#7155) 2020-02-19 21:18:45 +01:00			`def test_a3c(self):`
[rllib] First pass at pipeline implementation of DQN (#7433) * wip iters * add test * speed up * update docs * document it * support serial sampling * add test * spacing * annotate it * update * rename to pipeline * comment * iter2 wip * update * update * context test * update * fix * fix * a3c pipeline * doc * update * move timer * comment * add piepline test * fix * clean up * document * iter s * wip dqn * wip * wip * metrics * metrics rename * metrics ctx * wip * constants * add todo * suppport .union * wip * support union * remove prints * add todo * remove auto timer * fix up * fix pipeline test * typing * fix breakage * remove bad assert * wip * fix multiagent example * fixapply * update a3c * remove a2c pl * 0 workers * wip * wip * share metrics * wip * wip * doc * fix weight sync and global var updates * mode * fix * fix * doc * fix 2020-03-07 14:47:58 -08:00			`config = {"num_workers": 1, "optimizer": {"grads_per_step": 1}}`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`check_support("A3C", config, check_bounds=True)`
[RLlib] Exploration API: merge deterministic flag with exploration classes (SoftQ and StochasticSampling). (#7155) 2020-02-19 21:18:45 +01:00
			`def test_appo(self):`
[RLlib] Issue 8769 broken OOM tests_dir cases (R & S). (#8770) 2020-06-05 08:34:21 +02:00			`check_support("APPO", {"num_gpus": 0, "vtrace": False}, train=False)`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`check_support("APPO", {"num_gpus": 0, "vtrace": True})`
[RLlib] Exploration API: merge deterministic flag with exploration classes (SoftQ and StochasticSampling). (#7155) 2020-02-19 21:18:45 +01:00
[RLlib] DDPG and SAC eager support (preparation for tf2.x) (#9204) 2020-07-08 16:12:20 +02:00			`def test_impala(self):`
			`check_support("IMPALA", {"num_gpus": 0})`

			`def test_ppo(self):`
			`config = {`
[RLlib] Extend on_learn_on_batch callback to allow for custom metrics to be added. (#13584) 2021-02-08 15:02:19 +01:00			`"num_workers": 0,`
			`"train_batch_size": 100,`
[RLlib] DDPG and SAC eager support (preparation for tf2.x) (#9204) 2020-07-08 16:12:20 +02:00			`"rollout_fragment_length": 10,`
[RLlib] Extend on_learn_on_batch callback to allow for custom metrics to be added. (#13584) 2021-02-08 15:02:19 +01:00			`"num_sgd_iter": 1,`
			`"sgd_minibatch_size": 10,`
[RLlib] DDPG and SAC eager support (preparation for tf2.x) (#9204) 2020-07-08 16:12:20 +02:00			`}`
			`check_support("PPO", config, check_bounds=True, tfe=True)`

			`def test_pg(self):`
			`config = {"num_workers": 1, "optimizer": {}}`
			`check_support("PG", config, train=False, check_bounds=True, tfe=True)`


			`class TestSupportedSpacesOffPolicy(unittest.TestCase):`
			`@classmethod`
			`def setUpClass(cls) -> None:`
			`ray.init(num_cpus=4)`

			`@classmethod`
			`def tearDownClass(cls) -> None:`
			`ray.shutdown()`
[RLlib] Exploration API: merge deterministic flag with exploration classes (SoftQ and StochasticSampling). (#7155) 2020-02-19 21:18:45 +01:00
			`def test_ddpg(self):`
[rllib] Clip DDPG ou-noise to avoid exceeding action bounds (#3386) Closes #2965 2018-11-24 00:56:50 -08:00			`check_support(`
			`"DDPG", {`
[RLlib] DDPG refactor and Exploration API action noise classes. (#7314) * WIP. * WIP. * WIP. * WIP. * WIP. * Fix * WIP. * Add TD3 quick Pendulum regresison. * Cleanup. * Fix. * LINT. * Fix. * Sort quick_learning test cases, add TD3. * Sort quick_learning test cases, add TD3. * Revert test_checkpoint_restore.py (debugging) changes. * Fix old soft_q settings in documentation and test configs. * More doc fixes. * Fix test case. * Fix test case. * Lower test load. * WIP. 2020-03-01 20:53:35 +01:00			`"exploration_config": {`
			`"ou_base_scale": 100.0`
			`},`
[rllib] TD3/DDPG improvements and MuJoCo benchmarks (#4694) * [rllib] Separate optimisers for DDPG actor & crit. * [rllib] Better names for DDPG variables & options Config changes: - noise_scale -> exploration_ou_noise_scale - exploration_theta -> exploration_ou_theta - exploration_sigma -> exploration_ou_sigma - act_noise -> exploration_gaussian_sigma - noise_clip -> target_noise_clip * [rllib] Make DDPG less class-y Used functions to replace three classes with only an __init__ method & a handful of unrelated attributes. * [rllib] Refactor DDPG noise * [rllib] Unify DDPG exploration annealing Added option "exploration_should_anneal" to enable linear annealing of exploration noise. By default this is off, for consistency with DDPG & TD3 papers. Also renamed "exploration_final_eps" to "exploration_final_scale" (that name seems to have been carried over from DQN, and doesn't really make sense here). Finally, tried to rename "eps" to "noise_scale" wherever possible. 2019-04-26 17:49:53 -07:00			`"timesteps_per_iteration": 1,`
[RLlib] Issue 8769 broken OOM tests_dir cases (R & S). (#8770) 2020-06-05 08:34:21 +02:00			`"buffer_size": 1000,`
[rllib] TD3/DDPG improvements and MuJoCo benchmarks (#4694) * [rllib] Separate optimisers for DDPG actor & crit. * [rllib] Better names for DDPG variables & options Config changes: - noise_scale -> exploration_ou_noise_scale - exploration_theta -> exploration_ou_theta - exploration_sigma -> exploration_ou_sigma - act_noise -> exploration_gaussian_sigma - noise_clip -> target_noise_clip * [rllib] Make DDPG less class-y Used functions to replace three classes with only an __init__ method & a handful of unrelated attributes. * [rllib] Refactor DDPG noise * [rllib] Unify DDPG exploration annealing Added option "exploration_should_anneal" to enable linear annealing of exploration noise. By default this is off, for consistency with DDPG & TD3 papers. Also renamed "exploration_final_eps" to "exploration_final_scale" (that name seems to have been carried over from DQN, and doesn't really make sense here). Finally, tried to rename "eps" to "noise_scale" wherever possible. 2019-04-26 17:49:53 -07:00			`"use_state_preprocessor": True,`
[rllib] Clip DDPG ou-noise to avoid exceeding action bounds (#3386) Closes #2965 2018-11-24 00:56:50 -08:00			`},`
			`check_bounds=True)`
[RLlib] Exploration API: merge deterministic flag with exploration classes (SoftQ and StochasticSampling). (#7155) 2020-02-19 21:18:45 +01:00
			`def test_dqn(self):`
[RLlib] Issue 8769 broken OOM tests_dir cases (R & S). (#8770) 2020-06-05 08:34:21 +02:00			`config = {"timesteps_per_iteration": 1, "buffer_size": 1000}`
Fix dict/tuple hybrid action space for tensorflow eager execution (#8781) 2020-06-04 22:28:46 +02:00			`check_support("DQN", config, tfe=True)`
[RLlib] Exploration API: merge deterministic flag with exploration classes (SoftQ and StochasticSampling). (#7155) 2020-02-19 21:18:45 +01:00
[RLlib] DDPG and SAC eager support (preparation for tf2.x) (#9204) 2020-07-08 16:12:20 +02:00			`def test_sac(self):`
			`check_support("SAC", {"buffer_size": 1000}, check_bounds=True)`


			`class TestSupportedSpacesEvolutionAlgos(unittest.TestCase):`
			`@classmethod`
			`def setUpClass(cls) -> None:`
			`ray.init(num_cpus=4)`

			`@classmethod`
			`def tearDownClass(cls) -> None:`
			`ray.shutdown()`

			`def test_ars(self):`
			`check_support(`
			`"ARS", {`
			`"num_workers": 1,`
			`"noise_size": 1500000,`
			`"num_rollouts": 1,`
			`"rollouts_used": 1`
			`})`

[RLlib] Exploration API: merge deterministic flag with exploration classes (SoftQ and StochasticSampling). (#7155) 2020-02-19 21:18:45 +01:00			`def test_es(self):`
[rllib] Auto clip actions to Box space range; deprecate squash_to_range (#3426) * fix clip * tweak wording * remove squash entirely * Update rllib-models.rst * fix argument order * Apply suggestions from code review Co-Authored-By: ericl <ekhliang@gmail.com> 2018-12-03 19:55:25 -08:00			`check_support(`
[RLlib] Exploration API: merge deterministic flag with exploration classes (SoftQ and StochasticSampling). (#7155) 2020-02-19 21:18:45 +01:00			`"ES", {`
[rllib] Auto clip actions to Box space range; deprecate squash_to_range (#3426) * fix clip * tweak wording * remove squash entirely * Update rllib-models.rst * fix argument order * Apply suggestions from code review Co-Authored-By: ericl <ekhliang@gmail.com> 2018-12-03 19:55:25 -08:00			`"num_workers": 1,`
[RLlib] Fix broken tune tests in master due to framework=auto errors. (#8672) 2020-05-29 11:55:47 +02:00			`"noise_size": 1500000,`
[RLlib] Exploration API: merge deterministic flag with exploration classes (SoftQ and StochasticSampling). (#7155) 2020-02-19 21:18:45 +01:00			`"episodes_per_batch": 1,`
			`"train_batch_size": 1`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`})`
[RLlib] Exploration API: merge deterministic flag with exploration classes (SoftQ and StochasticSampling). (#7155) 2020-02-19 21:18:45 +01:00
[rllib] test all combinations of {obs_space} x {action_space} (#1449) 2018-01-24 11:03:43 -08:00
			`if __name__ == "__main__":`
[RLlib] Cleanup/unify all test cases. (#7533) 2020-03-12 04:39:47 +01:00			`import pytest`
			`import sys`

[RLlib] DDPG and SAC eager support (preparation for tf2.x) (#9204) 2020-07-08 16:12:20 +02:00			`# One can specify the specific TestCase class to run.`
			`# None for all unittest.TestCase classes in this file.`
[RLlib] Tf2.x native. (#8752) 2020-07-11 22:06:35 +02:00			`class_ = sys.argv[1] if len(sys.argv) > 1 else None`
ci: Redo `format.sh --all` script & backfill lint fixes (#9956) 2020-08-07 16:49:49 -07:00			`sys.exit(`
			`pytest.main(`
			`["-v", __file__ + ("" if class_ is None else "::" + class_)]))`