diff --git a/.travis.yml b/.travis.yml index d23cac22c..80f0c0774 100644 --- a/.travis.yml +++ b/.travis.yml @@ -303,7 +303,7 @@ script: # ray serve tests - if [ $RAY_CI_SERVE_AFFECTED == "1" ]; then ./ci/keep_alive bazel test --config=ci --test_tag_filters=-jenkins_only python/ray/serve/...; fi - + # ray dashboard tests - if [ "$RAY_CI_DASHBOARD_AFFECTED" == "1" ]; then ./ci/keep_alive bazel test python/ray/dashboard/...; fi diff --git a/rllib/BUILD b/rllib/BUILD index 834093114..653d3b6a4 100644 --- a/rllib/BUILD +++ b/rllib/BUILD @@ -1174,6 +1174,20 @@ py_test( srcs = ["tests/test_evaluators.py"] ) +py_test( + name = "tests/test_exec_api", + tags = ["tests_dir", "tests_dir_E"], + size = "medium", + srcs = ["tests/test_exec_api.py"] +) + +py_test( + name = "tests/test_execution", + tags = ["tests_dir", "tests_dir_E"], + size = "medium", + srcs = ["tests/test_execution.py"] +) + py_test( name = "tests/test_export", tags = ["tests_dir", "tests_dir_E"], @@ -1216,13 +1230,6 @@ py_test( srcs = ["tests/test_io.py"] ) -py_test( - name = "tests/test_execution", - tags = ["tests_dir", "tests_dir_E"], - size = "medium", - srcs = ["tests/test_execution.py"] -) - py_test( name = "tests/test_local", tags = ["tests_dir", "tests_dir_L"], @@ -1248,7 +1255,7 @@ py_test( py_test( name = "tests/test_multi_agent_env", tags = ["tests_dir", "tests_dir_M"], - size = "large", + size = "medium", srcs = ["tests/test_multi_agent_env.py"] ) @@ -1267,17 +1274,10 @@ py_test( srcs = ["tests/test_nested_observation_spaces.py"] ) -py_test( - name = "tests/test_exec_api", - tags = ["tests_dir", "tests_dir_E"], - size = "medium", - srcs = ["tests/test_exec_api.py"] -) - py_test( name = "tests/test_reproducibility", tags = ["tests_dir", "tests_dir_R"], - size = "large", + size = "medium", srcs = ["tests/test_reproducibility.py"] ) @@ -1293,7 +1293,7 @@ py_test( py_test( name = "tests/test_rollout_worker", tags = ["tests_dir", "tests_dir_R"], - size = "large", + size = "medium", srcs = ["tests/test_rollout_worker.py"] ) @@ -1307,7 +1307,7 @@ py_test( py_test( name = "tests/test_supported_spaces", tags = ["tests_dir", "tests_dir_S"], - size = "large", + size = "enormous", srcs = ["tests/test_supported_spaces.py"] ) diff --git a/rllib/agents/a3c/a3c.py b/rllib/agents/a3c/a3c.py index eb9fb0449..19c6e589f 100644 --- a/rllib/agents/a3c/a3c.py +++ b/rllib/agents/a3c/a3c.py @@ -54,6 +54,10 @@ def get_policy_class(config): def validate_config(config): if config["entropy_coeff"] < 0: raise DeprecationWarning("entropy_coeff must be >= 0") + if config["sample_async"] and config["framework"] == "torch": + config["sample_async"] = False + logger.warning("`sample_async=True` is not supported for PyTorch! " + "Multithreading can lead to crashes.") def execution_plan(workers, config): diff --git a/rllib/agents/ars/ars.py b/rllib/agents/ars/ars.py index 1f2e91f24..87c4b7cfc 100644 --- a/rllib/agents/ars/ars.py +++ b/rllib/agents/ars/ars.py @@ -11,8 +11,8 @@ import ray from ray.rllib.agents import Trainer, with_common_config from ray.rllib.agents.ars.ars_tf_policy import ARSTFPolicy -from ray.rllib.agents.es import optimizers -from ray.rllib.agents.es import utils +from ray.rllib.agents.es import optimizers, utils +from ray.rllib.agents.es.es import validate_config from ray.rllib.agents.es.es_tf_policy import rollout from ray.rllib.env.env_context import EnvContext from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID @@ -179,6 +179,7 @@ class ARSTrainer(Trainer): @override(Trainer) def _init(self, config, env_creator): + validate_config(config) env_context = EnvContext(config["env_config"] or {}, worker_index=0) env = env_creator(env_context) diff --git a/rllib/agents/es/es.py b/rllib/agents/es/es.py index 5acc0caaa..7ab320656 100644 --- a/rllib/agents/es/es.py +++ b/rllib/agents/es/es.py @@ -169,6 +169,11 @@ def get_policy_class(config): return policy_cls +def validate_config(config): + if config["num_workers"] <= 0: + raise ValueError("`num_workers` must be > 0 for ES!") + + class ESTrainer(Trainer): """Large-scale implementation of Evolution Strategies in Ray.""" @@ -177,6 +182,7 @@ class ESTrainer(Trainer): @override(Trainer) def _init(self, config, env_creator): + validate_config(config) env_context = EnvContext(config["env_config"] or {}, worker_index=0) env = env_creator(env_context) policy_cls = get_policy_class(config) diff --git a/rllib/tests/test_rollout.py b/rllib/tests/test_rollout.py index 04c370e3e..e6433c50d 100644 --- a/rllib/tests/test_rollout.py +++ b/rllib/tests/test_rollout.py @@ -6,9 +6,13 @@ import unittest from ray.rllib.utils.test_utils import framework_iterator -def rollout_test(algo, env="CartPole-v0"): +def rollout_test(algo, env="CartPole-v0", test_episode_rollout=False): + extra_config = "" + if algo == "ES": + extra_config = ",\"episodes_per_batch\": 1,\"train_batch_size\": 10, "\ + "\"noise_size\": 250000" - for fw in framework_iterator(frameworks=("torch", "tf")): + for fw in framework_iterator(frameworks=("tf", "torch")): fw_ = ", \"framework\": \"{}\"".format(fw) tmp_dir = os.popen("mktemp -d").read()[:-1] @@ -22,8 +26,8 @@ def rollout_test(algo, env="CartPole-v0"): os.path.exists(rllib_dir))) os.system("python {}/train.py --local-dir={} --run={} " "--checkpoint-freq=1 ".format(rllib_dir, tmp_dir, algo) + - "--config='{" + - "\"num_workers\": 0, \"num_gpus\": 0{}".format(fw_) + + "--config='{" + "\"num_workers\": 1, \"num_gpus\": 0{}{}". + format(fw_, extra_config) + ", \"model\": {\"fcnet_hiddens\": [10]}" "}' --stop='{\"training_iteration\": 1, " "\"timesteps_per_iter\": 5, " @@ -44,12 +48,13 @@ def rollout_test(algo, env="CartPole-v0"): print("rollout output (10 steps) exists!".format(checkpoint_path)) # Test rolling out 1 episode. - os.popen("python {}/rollout.py --run={} \"{}\" --episodes=1 " - "--out=\"{}/rollouts_1episode.pkl\" --no-render".format( - rllib_dir, algo, checkpoint_path, tmp_dir)).read() - if not os.path.exists(tmp_dir + "/rollouts_1episode.pkl"): - sys.exit(1) - print("rollout output (1 ep) exists!".format(checkpoint_path)) + if test_episode_rollout: + os.popen("python {}/rollout.py --run={} \"{}\" --episodes=1 " + "--out=\"{}/rollouts_1episode.pkl\" --no-render".format( + rllib_dir, algo, checkpoint_path, tmp_dir)).read() + if not os.path.exists(tmp_dir + "/rollouts_1episode.pkl"): + sys.exit(1) + print("rollout output (1 ep) exists!".format(checkpoint_path)) # Cleanup. os.popen("rm -rf \"{}\"".format(tmp_dir)).read() @@ -72,13 +77,10 @@ class TestRollout(unittest.TestCase): rollout_test("ES") def test_impala(self): - rollout_test("IMPALA", env="Pong-ram-v4") - - def test_pg(self): - rollout_test("PG") + rollout_test("IMPALA", env="CartPole-v0") def test_ppo(self): - rollout_test("PPO", env="Pendulum-v0") + rollout_test("PPO", env="CartPole-v0", test_episode_rollout=True) def test_sac(self): rollout_test("SAC", env="Pendulum-v0") diff --git a/rllib/tests/test_supported_multi_agent.py b/rllib/tests/test_supported_multi_agent.py index f40e28292..0899d6f36 100644 --- a/rllib/tests/test_supported_multi_agent.py +++ b/rllib/tests/test_supported_multi_agent.py @@ -14,23 +14,25 @@ def check_support_multiagent(alg, config): register_env("multi_agent_cartpole", lambda _: MultiAgentCartPole({"num_agents": 2})) config["log_level"] = "ERROR" - for _ in framework_iterator(config, frameworks=("tf", "torch")): + for _ in framework_iterator(config, frameworks=("torch", "tf")): if alg in ["DDPG", "APEX_DDPG", "SAC"]: a = get_agent_class(alg)( config=config, env="multi_agent_mountaincar") else: a = get_agent_class(alg)(config=config, env="multi_agent_cartpole") try: - a.train() + print(a.train()) finally: a.stop() -class ModelSupportedSpaces(unittest.TestCase): - def setUp(self): - ray.init(num_cpus=4, ignore_reinit_error=True) +class TestSupportedMultiAgent(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + ray.init(num_cpus=4) - def tearDown(self): + @classmethod + def tearDownClass(cls) -> None: ray.shutdown() def test_a3c_multiagent(self): @@ -45,10 +47,11 @@ class ModelSupportedSpaces(unittest.TestCase): check_support_multiagent( "APEX", { "num_workers": 2, - "timesteps_per_iteration": 1000, + "timesteps_per_iteration": 100, "num_gpus": 0, + "buffer_size": 1000, "min_iter_time_s": 1, - "learning_starts": 1000, + "learning_starts": 10, "target_network_update_freq": 100, }) @@ -56,10 +59,11 @@ class ModelSupportedSpaces(unittest.TestCase): check_support_multiagent( "APEX_DDPG", { "num_workers": 2, - "timesteps_per_iteration": 1000, + "timesteps_per_iteration": 100, + "buffer_size": 1000, "num_gpus": 0, "min_iter_time_s": 1, - "learning_starts": 1000, + "learning_starts": 10, "target_network_update_freq": 100, "use_state_preprocessor": True, }) @@ -68,12 +72,16 @@ class ModelSupportedSpaces(unittest.TestCase): check_support_multiagent( "DDPG", { "timesteps_per_iteration": 1, + "buffer_size": 1000, "use_state_preprocessor": True, "learning_starts": 500, }) def test_dqn_multiagent(self): - check_support_multiagent("DQN", {"timesteps_per_iteration": 1}) + check_support_multiagent("DQN", { + "timesteps_per_iteration": 1, + "buffer_size": 1000, + }) def test_impala_multiagent(self): check_support_multiagent("IMPALA", {"num_gpus": 0}) @@ -94,6 +102,7 @@ class ModelSupportedSpaces(unittest.TestCase): def test_sac_multiagent(self): check_support_multiagent("SAC", { "num_workers": 0, + "buffer_size": 1000, "normalize_actions": False, }) diff --git a/rllib/tests/test_supported_spaces.py b/rllib/tests/test_supported_spaces.py index 59d30b79a..b7e128924 100644 --- a/rllib/tests/test_supported_spaces.py +++ b/rllib/tests/test_supported_spaces.py @@ -48,7 +48,7 @@ OBSERVATION_SPACES_TO_TEST = { } -def check_support(alg, config, check_bounds=False, tfe=False): +def check_support(alg, config, train=True, check_bounds=False, tfe=False): config["log_level"] = "ERROR" def _do_check(alg, config, a_name, o_name): @@ -83,7 +83,8 @@ def check_support(alg, config, check_bounds=False, tfe=False): assert isinstance(a.get_policy().model, TorchFCNetV2) else: assert isinstance(a.get_policy().model, FCNetV2) - a.train() + if train: + a.train() except UnsupportedSpaceException: stat = "unsupported" finally: @@ -99,19 +100,22 @@ def check_support(alg, config, check_bounds=False, tfe=False): if tfe: frameworks += ("tfe", ) for _ in framework_iterator(config, frameworks=frameworks): - # Check all action spaces. + # Check all action spaces (using a discrete obs-space). for a_name, action_space in ACTION_SPACES_TO_TEST.items(): _do_check(alg, config, a_name, "discrete") - # Check all obs spaces. + # Check all obs spaces (using a supported action-space). for o_name, obs_space in OBSERVATION_SPACES_TO_TEST.items(): - _do_check(alg, config, "discrete", o_name) + a_name = "discrete" if alg not in ["DDPG", "SAC"] else "vector" + _do_check(alg, config, a_name, o_name) -class ModelSupportedSpaces(unittest.TestCase): - def setUp(self): - ray.init(num_cpus=4, ignore_reinit_error=True, local_mode=True) +class TestSupportedSpaces(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + ray.init(num_cpus=4) - def tearDown(self): + @classmethod + def tearDownClass(cls) -> None: ray.shutdown() def test_a3c(self): @@ -119,7 +123,7 @@ class ModelSupportedSpaces(unittest.TestCase): check_support("A3C", config, check_bounds=True) def test_appo(self): - check_support("APPO", {"num_gpus": 0, "vtrace": False}) + check_support("APPO", {"num_gpus": 0, "vtrace": False}, train=False) check_support("APPO", {"num_gpus": 0, "vtrace": True}) def test_ars(self): @@ -138,12 +142,13 @@ class ModelSupportedSpaces(unittest.TestCase): "ou_base_scale": 100.0 }, "timesteps_per_iteration": 1, + "buffer_size": 1000, "use_state_preprocessor": True, }, check_bounds=True) def test_dqn(self): - config = {"timesteps_per_iteration": 1} + config = {"timesteps_per_iteration": 1, "buffer_size": 1000} check_support("DQN", config, tfe=True) def test_es(self): @@ -170,10 +175,10 @@ class ModelSupportedSpaces(unittest.TestCase): def test_pg(self): config = {"num_workers": 1, "optimizer": {}} - check_support("PG", config, check_bounds=True, tfe=True) + check_support("PG", config, train=False, check_bounds=True, tfe=True) def test_sac(self): - check_support("SAC", {}, check_bounds=True) + check_support("SAC", {"buffer_size": 1000}, check_bounds=True) if __name__ == "__main__": diff --git a/rllib/tuned_examples/ppo/pendulum-ppo.yaml b/rllib/tuned_examples/ppo/pendulum-ppo.yaml index 81a0388cd..3941dda47 100644 --- a/rllib/tuned_examples/ppo/pendulum-ppo.yaml +++ b/rllib/tuned_examples/ppo/pendulum-ppo.yaml @@ -8,16 +8,15 @@ pendulum-ppo: config: # Works for both torch and tf. framework: tf - train_batch_size: 2048 + train_batch_size: 512 vf_clip_param: 10.0 num_workers: 0 - num_envs_per_worker: 10 + num_envs_per_worker: 20 lambda: 0.1 gamma: 0.95 lr: 0.0003 sgd_minibatch_size: 64 - num_sgd_iter: 10 + num_sgd_iter: 6 model: - fcnet_hiddens: [64, 64] - batch_mode: complete_episodes + fcnet_hiddens: [256, 256] observation_filter: MeanStdFilter