[RLlib] Stabilize Pendulum-v0 regression test cases. (#8232)

Stabilize Pendulum regression test cases.
2025-03-05 18:11:42 -05:00 · 2020-04-30 15:48:11 +02:00 · 2020-04-30 15:48:11 +02:00 · b23b6addfc
commit b23b6addfc
parent 05df80afad
12 changed files with 82 additions and 17 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -141,7 +141,7 @@ matrix:
      install:
        - . ./ci/travis/ci.sh build
      script:
-        - travis_wait 120 bazel test --config=ci --test_output=streamed --build_tests_only --test_tag_filters=learning_tests rllib/...
+        - travis_wait 90 bazel test --config=ci --test_output=streamed --build_tests_only --test_tag_filters=learning_tests_tf rllib/...

    # RLlib: Learning tests with tf=1.x (from rllib/tuned_examples/regression_tests/*.yaml).
    # Requested by Edi (MS): Test all learning capabilities with tf1.x
@ -159,7 +159,24 @@ matrix:
      install:
        - . ./ci/travis/ci.sh build
      script:
-        - travis_wait 120 bazel test --config=ci --test_output=streamed --build_tests_only --test_tag_filters=learning_tests rllib/...
+        - travis_wait 90 bazel test --config=ci --test_output=streamed --build_tests_only --test_tag_filters=learning_tests_tf rllib/...
+
+    # RLlib: Learning tests with torch (from rllib/tuned_examples/regression_tests/*.yaml).
+    - os: linux
+      env:
+        - RLLIB_TESTING=1
+        - RLLIB_REGRESSION_TESTS_TORCH=1
+        - TF_VERSION=2.0.0b1
+        - TFP_VERSION=0.8
+        - TORCH_VERSION=1.4
+        - PYTHON=3.6
+        - PYTHONWARNINGS=ignore
+      before_install:
+        - . ./ci/travis/ci.sh init RAY_CI_RLLIB_FULL_AFFECTED
+      install:
+        - . ./ci/travis/ci.sh build
+      script:
+        - travis_wait 90 bazel test --config=ci --test_output=streamed --build_tests_only --test_tag_filters=learning_tests_torch rllib/...

    # RLlib: Quick Agent train.py runs (compilation & running, no(!) learning).
    # Agent single tests (compilation, loss-funcs, etc..).
--- a/rllib/BUILD
+++ b/rllib/BUILD
@ -41,23 +41,45 @@
 # --------------------------------------------------------------------

 py_test(
-    name = "run_regression_tests_cartpole",
+    name = "run_regression_tests_cartpole_tf",
    main = "tests/run_regression_tests.py",
-    tags = ["learning_tests", "learning_tests_cartpole"],
+    tags = ["learning_tests_tf", "learning_tests_cartpole"],
    size = "enormous",  # = 60min timeout
    srcs = ["tests/run_regression_tests.py"],
-    data = glob(["tuned_examples/regression_tests/cartpole-*.yaml"]),
+    data = glob(["tuned_examples/regression_tests/cartpole-*-tf.yaml"]),
    # Pass `BAZEL` option and the path to look for yaml regression files.
    args = ["BAZEL", "tuned_examples/regression_tests"]
 )

 py_test(
-    name = "run_regression_tests_pendulum",
+    name = "run_regression_tests_cartpole_torch",
    main = "tests/run_regression_tests.py",
-    tags = ["learning_tests", "learning_tests_pendulum"],
+    tags = ["learning_tests_torch", "learning_tests_cartpole"],
    size = "enormous",  # = 60min timeout
    srcs = ["tests/run_regression_tests.py"],
-    data = glob(["tuned_examples/regression_tests/pendulum-*.yaml"]),
+    data = glob(["tuned_examples/regression_tests/cartpole-*-torch.yaml"]),
+    # Pass `BAZEL` option and the path to look for yaml regression files.
+    args = ["BAZEL", "tuned_examples/regression_tests"]
+)
+
+py_test(
+    name = "run_regression_tests_pendulum_tf",
+    main = "tests/run_regression_tests.py",
+    tags = ["learning_tests_tf", "learning_tests_pendulum"],
+    size = "enormous",  # = 60min timeout
+    srcs = ["tests/run_regression_tests.py"],
+    data = glob(["tuned_examples/regression_tests/pendulum-*-tf.yaml"]),
+    # Pass `BAZEL` option and the path to look for yaml regression files.
+    args = ["BAZEL", "tuned_examples/regression_tests"]
+)
+
+py_test(
+    name = "run_regression_tests_pendulum_torch",
+    main = "tests/run_regression_tests.py",
+    tags = ["learning_tests_torch", "learning_tests_pendulum"],
+    size = "enormous",  # = 60min timeout
+    srcs = ["tests/run_regression_tests.py"],
+    data = glob(["tuned_examples/regression_tests/pendulum-*-torch.yaml"]),
    # Pass `BAZEL` option and the path to look for yaml regression files.
    args = ["BAZEL", "tuned_examples/regression_tests"]
 )
--- a/rllib/tuned_examples/regression_tests/cartpole-a2c-microbatch.yaml
+++ b/rllib/tuned_examples/regression_tests/cartpole-a2c-microbatch.yaml
@ -1,10 +1,11 @@
-cartpole-a2c-microbatch:
+cartpole-a2c-microbatch-tf:
    env: CartPole-v0
    run: A2C
    stop:
        episode_reward_mean: 100
        timesteps_total: 100000
    config:
+        use_pytorch: false
        num_workers: 1
        gamma: 0.95
        microbatch_size: 50
--- a/rllib/tuned_examples/regression_tests/cartpole-a3c-tf.yaml
+++ b/rllib/tuned_examples/regression_tests/cartpole-a3c-tf.yaml
@ -1,9 +1,10 @@
-cartpole-a3c:
+cartpole-a3c-tf:
    env: CartPole-v0
    run: A3C
    stop:
        episode_reward_mean: 100
        timesteps_total: 100000
    config:
+        use_pytorch: false
        num_workers: 1
        gamma: 0.95
--- a/rllib/tuned_examples/regression_tests/cartpole-ddppo.yaml
+++ b/rllib/tuned_examples/regression_tests/cartpole-ddppo.yaml
@ -1,8 +1,9 @@
-cartpole-ddppo:
+cartpole-ddppo-torch:
    env: CartPole-v0
    run: DDPPO
    stop:
        episode_reward_mean: 100
        timesteps_total: 100000
    config:
+        use_pytorch: true
        num_gpus_per_worker: 0
--- a/rllib/tuned_examples/regression_tests/cartpole-dqn-param-noise-tf.yaml
+++ b/rllib/tuned_examples/regression_tests/cartpole-dqn-param-noise-tf.yaml
--- a/rllib/tuned_examples/regression_tests/cartpole-dqn-param-noise-torch.yaml
+++ b/rllib/tuned_examples/regression_tests/cartpole-dqn-param-noise-torch.yaml
--- a/rllib/tuned_examples/regression_tests/pendulum-ddpg-tf.yaml
+++ b/rllib/tuned_examples/regression_tests/pendulum-ddpg-tf.yaml
@ -2,7 +2,7 @@ pendulum-ddpg-tf:
    env: Pendulum-v0
    run: DDPG
    stop:
-        episode_reward_mean: -900
+        episode_reward_mean: -700
        timesteps_total: 100000
    config:
        use_pytorch: false
--- a/rllib/tuned_examples/regression_tests/pendulum-ddpg-torch.yaml
+++ b/rllib/tuned_examples/regression_tests/pendulum-ddpg-torch.yaml
@ -2,7 +2,7 @@ pendulum-ddpg-torch:
    env: Pendulum-v0
    run: DDPG
    stop:
-        episode_reward_mean: -900
+        episode_reward_mean: -700
        timesteps_total: 100000
    config:
        use_pytorch: true
--- a/rllib/tuned_examples/regression_tests/pendulum-ppo-tf.yaml
+++ b/rllib/tuned_examples/regression_tests/pendulum-ppo-tf.yaml
@ -1,10 +1,11 @@
-pendulum-ppo:
+pendulum-ppo-tf:
    env: Pendulum-v0
    run: PPO
    stop:
-        episode_reward_mean: -200
-        timesteps_total: 500000
+        episode_reward_mean: -500
+        timesteps_total: 400000
    config:
+        use_pytorch: false
        train_batch_size: 2048
        vf_clip_param: 10.0
        num_workers: 0
--- a/rllib/tuned_examples/regression_tests/pendulum-ppo-torch.yaml
+++ b/rllib/tuned_examples/regression_tests/pendulum-ppo-torch.yaml
@ -0,0 +1,21 @@
+pendulum-ppo-torch:
+    env: Pendulum-v0
+    run: PPO
+    stop:
+        episode_reward_mean: -500
+        timesteps_total: 400000
+    config:
+        use_pytorch: true
+        train_batch_size: 2048
+        vf_clip_param: 10.0
+        num_workers: 0
+        num_envs_per_worker: 10
+        lambda: 0.1
+        gamma: 0.95
+        lr: 0.0003
+        sgd_minibatch_size: 64
+        num_sgd_iter: 10
+        model:
+            fcnet_hiddens: [64, 64]
+        batch_mode: complete_episodes
+        observation_filter: MeanStdFilter
--- a/rllib/tuned_examples/regression_tests/pendulum-td3.yaml
+++ b/rllib/tuned_examples/regression_tests/pendulum-td3.yaml
@ -1,6 +1,7 @@
-pendulum-td3:
+pendulum-td3-tf:
    env: Pendulum-v0
    run: TD3
    stop:
+        use_pytorch: false
        episode_reward_mean: -900
        timesteps_total: 100000