[RLlib] Stabilize Pendulum-v0 regression test cases. (#8232)

Stabilize Pendulum regression test cases.
This commit is contained in:
Sven Mika 2020-04-30 15:48:11 +02:00 committed by GitHub
parent 05df80afad
commit b23b6addfc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 82 additions and 17 deletions

View file

@ -141,7 +141,7 @@ matrix:
install:
- . ./ci/travis/ci.sh build
script:
- travis_wait 120 bazel test --config=ci --test_output=streamed --build_tests_only --test_tag_filters=learning_tests rllib/...
- travis_wait 90 bazel test --config=ci --test_output=streamed --build_tests_only --test_tag_filters=learning_tests_tf rllib/...
# RLlib: Learning tests with tf=1.x (from rllib/tuned_examples/regression_tests/*.yaml).
# Requested by Edi (MS): Test all learning capabilities with tf1.x
@ -159,7 +159,24 @@ matrix:
install:
- . ./ci/travis/ci.sh build
script:
- travis_wait 120 bazel test --config=ci --test_output=streamed --build_tests_only --test_tag_filters=learning_tests rllib/...
- travis_wait 90 bazel test --config=ci --test_output=streamed --build_tests_only --test_tag_filters=learning_tests_tf rllib/...
# RLlib: Learning tests with torch (from rllib/tuned_examples/regression_tests/*.yaml).
- os: linux
env:
- RLLIB_TESTING=1
- RLLIB_REGRESSION_TESTS_TORCH=1
- TF_VERSION=2.0.0b1
- TFP_VERSION=0.8
- TORCH_VERSION=1.4
- PYTHON=3.6
- PYTHONWARNINGS=ignore
before_install:
- . ./ci/travis/ci.sh init RAY_CI_RLLIB_FULL_AFFECTED
install:
- . ./ci/travis/ci.sh build
script:
- travis_wait 90 bazel test --config=ci --test_output=streamed --build_tests_only --test_tag_filters=learning_tests_torch rllib/...
# RLlib: Quick Agent train.py runs (compilation & running, no(!) learning).
# Agent single tests (compilation, loss-funcs, etc..).

View file

@ -41,23 +41,45 @@
# --------------------------------------------------------------------
py_test(
name = "run_regression_tests_cartpole",
name = "run_regression_tests_cartpole_tf",
main = "tests/run_regression_tests.py",
tags = ["learning_tests", "learning_tests_cartpole"],
tags = ["learning_tests_tf", "learning_tests_cartpole"],
size = "enormous", # = 60min timeout
srcs = ["tests/run_regression_tests.py"],
data = glob(["tuned_examples/regression_tests/cartpole-*.yaml"]),
data = glob(["tuned_examples/regression_tests/cartpole-*-tf.yaml"]),
# Pass `BAZEL` option and the path to look for yaml regression files.
args = ["BAZEL", "tuned_examples/regression_tests"]
)
py_test(
name = "run_regression_tests_pendulum",
name = "run_regression_tests_cartpole_torch",
main = "tests/run_regression_tests.py",
tags = ["learning_tests", "learning_tests_pendulum"],
tags = ["learning_tests_torch", "learning_tests_cartpole"],
size = "enormous", # = 60min timeout
srcs = ["tests/run_regression_tests.py"],
data = glob(["tuned_examples/regression_tests/pendulum-*.yaml"]),
data = glob(["tuned_examples/regression_tests/cartpole-*-torch.yaml"]),
# Pass `BAZEL` option and the path to look for yaml regression files.
args = ["BAZEL", "tuned_examples/regression_tests"]
)
py_test(
name = "run_regression_tests_pendulum_tf",
main = "tests/run_regression_tests.py",
tags = ["learning_tests_tf", "learning_tests_pendulum"],
size = "enormous", # = 60min timeout
srcs = ["tests/run_regression_tests.py"],
data = glob(["tuned_examples/regression_tests/pendulum-*-tf.yaml"]),
# Pass `BAZEL` option and the path to look for yaml regression files.
args = ["BAZEL", "tuned_examples/regression_tests"]
)
py_test(
name = "run_regression_tests_pendulum_torch",
main = "tests/run_regression_tests.py",
tags = ["learning_tests_torch", "learning_tests_pendulum"],
size = "enormous", # = 60min timeout
srcs = ["tests/run_regression_tests.py"],
data = glob(["tuned_examples/regression_tests/pendulum-*-torch.yaml"]),
# Pass `BAZEL` option and the path to look for yaml regression files.
args = ["BAZEL", "tuned_examples/regression_tests"]
)

View file

@ -1,10 +1,11 @@
cartpole-a2c-microbatch:
cartpole-a2c-microbatch-tf:
env: CartPole-v0
run: A2C
stop:
episode_reward_mean: 100
timesteps_total: 100000
config:
use_pytorch: false
num_workers: 1
gamma: 0.95
microbatch_size: 50

View file

@ -1,9 +1,10 @@
cartpole-a3c:
cartpole-a3c-tf:
env: CartPole-v0
run: A3C
stop:
episode_reward_mean: 100
timesteps_total: 100000
config:
use_pytorch: false
num_workers: 1
gamma: 0.95

View file

@ -1,8 +1,9 @@
cartpole-ddppo:
cartpole-ddppo-torch:
env: CartPole-v0
run: DDPPO
stop:
episode_reward_mean: 100
timesteps_total: 100000
config:
use_pytorch: true
num_gpus_per_worker: 0

View file

@ -2,7 +2,7 @@ pendulum-ddpg-tf:
env: Pendulum-v0
run: DDPG
stop:
episode_reward_mean: -900
episode_reward_mean: -700
timesteps_total: 100000
config:
use_pytorch: false

View file

@ -2,7 +2,7 @@ pendulum-ddpg-torch:
env: Pendulum-v0
run: DDPG
stop:
episode_reward_mean: -900
episode_reward_mean: -700
timesteps_total: 100000
config:
use_pytorch: true

View file

@ -1,10 +1,11 @@
pendulum-ppo:
pendulum-ppo-tf:
env: Pendulum-v0
run: PPO
stop:
episode_reward_mean: -200
timesteps_total: 500000
episode_reward_mean: -500
timesteps_total: 400000
config:
use_pytorch: false
train_batch_size: 2048
vf_clip_param: 10.0
num_workers: 0

View file

@ -0,0 +1,21 @@
pendulum-ppo-torch:
env: Pendulum-v0
run: PPO
stop:
episode_reward_mean: -500
timesteps_total: 400000
config:
use_pytorch: true
train_batch_size: 2048
vf_clip_param: 10.0
num_workers: 0
num_envs_per_worker: 10
lambda: 0.1
gamma: 0.95
lr: 0.0003
sgd_minibatch_size: 64
num_sgd_iter: 10
model:
fcnet_hiddens: [64, 64]
batch_mode: complete_episodes
observation_filter: MeanStdFilter

View file

@ -1,6 +1,7 @@
pendulum-td3:
pendulum-td3-tf:
env: Pendulum-v0
run: TD3
stop:
use_pytorch: false
episode_reward_mean: -900
timesteps_total: 100000