[RLlib] Fix broken RLlib tests in master. (#7894)

2025-03-05 18:11:42 -05:00 · 2020-04-05 18:34:23 +02:00 · 2020-04-05 18:34:23 +02:00 · 82c2d9faba
commit 82c2d9faba
parent 38fad274aa
6 changed files with 22 additions and 54 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -192,7 +192,7 @@ matrix:
        - ./ci/suppress_output ./ci/travis/install-ray.sh
      script:
        - if [ $RAY_CI_RLLIB_AFFECTED != "1" ]; then exit; fi
-        - travis_wait 60 bazel test --build_tests_only --test_tag_filters=learning_tests --spawn_strategy=local --flaky_test_attempts=3 --nocache_test_results --test_verbose_timeout_warnings --progress_report_interval=100 --show_progress_rate_limit=100 --show_timestamps --test_output=streamed rllib/...
+        - travis_wait 60 bazel test --build_tests_only --test_tag_filters=learning_tests --spawn_strategy=local --flaky_test_attempts=3 --nocache_test_results --test_verbose_timeout_warnings --progress_report_interval=100 --show_progress_rate_limit=100 --show_timestamps --test_output=errors rllib/...

    # RLlib: Learning tests with tf=1.x (from rllib/tuned_examples/regression_tests/*.yaml).
    # Requested by Edi (MS): Test all learning capabilities with tf1.x
@ -213,7 +213,7 @@ matrix:
        - ./ci/suppress_output ./ci/travis/install-ray.sh
      script:
        - if [ $RAY_CI_RLLIB_FULL_AFFECTED != "1" ]; then exit; fi
-        - travis_wait 60 bazel test --build_tests_only --test_tag_filters=learning_tests --spawn_strategy=local --flaky_test_attempts=3 --nocache_test_results --test_verbose_timeout_warnings --progress_report_interval=100 --show_progress_rate_limit=100 --show_timestamps --test_output=streamed rllib/...
+        - travis_wait 60 bazel test --build_tests_only --test_tag_filters=learning_tests --spawn_strategy=local --flaky_test_attempts=3 --nocache_test_results --test_verbose_timeout_warnings --progress_report_interval=100 --show_progress_rate_limit=100 --show_timestamps --test_output=errors rllib/...

    # RLlib: Quick Agent train.py runs (compilation & running, no(!) learning).
    # Agent single tests (compilation, loss-funcs, etc..).
--- a/rllib/BUILD
+++ b/rllib/BUILD
@ -41,12 +41,23 @@
 # --------------------------------------------------------------------

 py_test(
-    name = "run_regression_tests",
+    name = "run_regression_tests_cartpole",
    main = "tests/run_regression_tests.py",
-    tags = ["learning_tests"],
+    tags = ["learning_tests", "learning_tests_cartpole"],
    size = "enormous",  # = 60min timeout
    srcs = ["tests/run_regression_tests.py"],
-    data = glob(["tuned_examples/regression_tests/*.yaml"]),
+    data = glob(["tuned_examples/regression_tests/cartpole*.yaml"]),
+    # Pass `BAZEL` option and the path to look for yaml regression files.
+    args = ["BAZEL", "tuned_examples/regression_tests"]
+)
+
+py_test(
+    name = "run_regression_tests_pendulum",
+    main = "tests/run_regression_tests.py",
+    tags = ["learning_tests", "learning_tests_pendulum"],
+    size = "enormous",  # = 60min timeout
+    srcs = ["tests/run_regression_tests.py"],
+    data = glob(["tuned_examples/regression_tests/pendulum*.yaml"]),
    # Pass `BAZEL` option and the path to look for yaml regression files.
    args = ["BAZEL", "tuned_examples/regression_tests"]
 )
--- a/rllib/agents/dqn/tests/test_dqn.py
+++ b/rllib/agents/dqn/tests/test_dqn.py
@ -97,37 +97,17 @@ class TestDQN(unittest.TestCase):
                actions.append(trainer.compute_action(obs))
            check(np.std(actions), 0.0, false=True)

-            if eager_mode_ctx:
-                eager_mode_ctx.__exit__(None, None, None)
-
    def test_dqn_parameter_noise_exploration(self):
        """Tests, whether a DQN Agent works with ParameterNoise."""
        obs = np.array(0)
+        core_config = dqn.DEFAULT_CONFIG.copy()
+        core_config["num_workers"] = 0  # Run locally.
+        core_config["env_config"] = {"is_slippery": False, "map_name": "4x4"}

-        for fw in ["eager", "tf", "torch"]:
-            if fw == "torch":
-                continue
-            print("framework={}".format(fw))
-
-            core_config = dqn.DEFAULT_CONFIG.copy()
-            core_config["num_workers"] = 0  # Run locally.
-            core_config["env_config"] = {
-                "is_slippery": False,
-                "map_name": "4x4"
-            }
-            core_config["eager"] = fw == "eager"
-            core_config["use_pytorch"] = fw == "torch"
+        for fw in framework_iterator(core_config, ["tf", "eager"]):

            config = core_config.copy()

-            eager_mode_ctx = None
-            if fw == "tf":
-                assert not tf.executing_eagerly()
-            elif fw == "eager":
-                eager_mode_ctx = eager_mode()
-                eager_mode_ctx.__enter__()
-                assert tf.executing_eagerly()
-
            # DQN with ParameterNoise exploration (config["explore"]=True).
            # ----
            config["exploration_config"] = {"type": "ParameterNoise"}
@ -258,9 +238,6 @@ class TestDQN(unittest.TestCase):
                a = trainer.compute_action(obs, explore=True)
                check(a, a_)

-            if eager_mode_ctx:
-                eager_mode_ctx.__exit__(None, None, None)
-
    def _get_current_noise(self, policy, fw):
        # If noise not even created yet, return 0.0.
        if policy.exploration.noise is None:
--- a/rllib/tests/run_regression_tests.py
+++ b/rllib/tests/run_regression_tests.py
@ -51,7 +51,7 @@ if __name__ == "__main__":

        passed = False
        for i in range(3):
-            trials = run_experiments(experiments, resume=False)
+            trials = run_experiments(experiments, resume=False, verbose=1)

            for t in trials:
                if (t.last_result["episode_reward_mean"] >=
--- a/rllib/tuned_examples/regression_tests/cartpole-ppo-tf-multi-gpu.yaml
+++ b/rllib/tuned_examples/regression_tests/cartpole-ppo-tf-multi-gpu.yaml
@ -1,20 +0,0 @@
-cartpole-ppo-tf-multi-gpu:
-    env: CartPole-v0
-    run: PPO
-    stop:
-        episode_reward_mean: 150
-        timesteps_total: 100000
-    config:
-        gamma: 0.99
-        lr: 0.0003
-        num_workers: 1
-        observation_filter: MeanStdFilter
-        num_sgd_iter: 6
-        vf_share_layers: true
-        vf_loss_coeff: 0.01
-        model:
-          fcnet_hiddens: [32]
-          fcnet_activation: linear
-        # Use fake-GPU setup to prove towers are working and learning.
-        num_gpus: 6
-        _fake_gpus: true
--- a/rllib/utils/exploration/tests/test_explorations.py
+++ b/rllib/utils/exploration/tests/test_explorations.py
@ -68,7 +68,7 @@ def do_test_explorations(run,
            # Make sure actions drawn are different
            # (around some mean value), given constant observations.
            actions = []
-            for _ in range(50):
+            for _ in range(100):
                actions.append(
                    trainer.compute_action(
                        observation=dummy_obs,