mirror of
https://github.com/vale981/ray
synced 2025-03-05 18:11:42 -05:00
[RLlib] Stabilize Pendulum-v0 regression test cases. (#8232)
Stabilize Pendulum regression test cases.
This commit is contained in:
parent
05df80afad
commit
b23b6addfc
12 changed files with 82 additions and 17 deletions
21
.travis.yml
21
.travis.yml
|
@ -141,7 +141,7 @@ matrix:
|
|||
install:
|
||||
- . ./ci/travis/ci.sh build
|
||||
script:
|
||||
- travis_wait 120 bazel test --config=ci --test_output=streamed --build_tests_only --test_tag_filters=learning_tests rllib/...
|
||||
- travis_wait 90 bazel test --config=ci --test_output=streamed --build_tests_only --test_tag_filters=learning_tests_tf rllib/...
|
||||
|
||||
# RLlib: Learning tests with tf=1.x (from rllib/tuned_examples/regression_tests/*.yaml).
|
||||
# Requested by Edi (MS): Test all learning capabilities with tf1.x
|
||||
|
@ -159,7 +159,24 @@ matrix:
|
|||
install:
|
||||
- . ./ci/travis/ci.sh build
|
||||
script:
|
||||
- travis_wait 120 bazel test --config=ci --test_output=streamed --build_tests_only --test_tag_filters=learning_tests rllib/...
|
||||
- travis_wait 90 bazel test --config=ci --test_output=streamed --build_tests_only --test_tag_filters=learning_tests_tf rllib/...
|
||||
|
||||
# RLlib: Learning tests with torch (from rllib/tuned_examples/regression_tests/*.yaml).
|
||||
- os: linux
|
||||
env:
|
||||
- RLLIB_TESTING=1
|
||||
- RLLIB_REGRESSION_TESTS_TORCH=1
|
||||
- TF_VERSION=2.0.0b1
|
||||
- TFP_VERSION=0.8
|
||||
- TORCH_VERSION=1.4
|
||||
- PYTHON=3.6
|
||||
- PYTHONWARNINGS=ignore
|
||||
before_install:
|
||||
- . ./ci/travis/ci.sh init RAY_CI_RLLIB_FULL_AFFECTED
|
||||
install:
|
||||
- . ./ci/travis/ci.sh build
|
||||
script:
|
||||
- travis_wait 90 bazel test --config=ci --test_output=streamed --build_tests_only --test_tag_filters=learning_tests_torch rllib/...
|
||||
|
||||
# RLlib: Quick Agent train.py runs (compilation & running, no(!) learning).
|
||||
# Agent single tests (compilation, loss-funcs, etc..).
|
||||
|
|
34
rllib/BUILD
34
rllib/BUILD
|
@ -41,23 +41,45 @@
|
|||
# --------------------------------------------------------------------
|
||||
|
||||
py_test(
|
||||
name = "run_regression_tests_cartpole",
|
||||
name = "run_regression_tests_cartpole_tf",
|
||||
main = "tests/run_regression_tests.py",
|
||||
tags = ["learning_tests", "learning_tests_cartpole"],
|
||||
tags = ["learning_tests_tf", "learning_tests_cartpole"],
|
||||
size = "enormous", # = 60min timeout
|
||||
srcs = ["tests/run_regression_tests.py"],
|
||||
data = glob(["tuned_examples/regression_tests/cartpole-*.yaml"]),
|
||||
data = glob(["tuned_examples/regression_tests/cartpole-*-tf.yaml"]),
|
||||
# Pass `BAZEL` option and the path to look for yaml regression files.
|
||||
args = ["BAZEL", "tuned_examples/regression_tests"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "run_regression_tests_pendulum",
|
||||
name = "run_regression_tests_cartpole_torch",
|
||||
main = "tests/run_regression_tests.py",
|
||||
tags = ["learning_tests", "learning_tests_pendulum"],
|
||||
tags = ["learning_tests_torch", "learning_tests_cartpole"],
|
||||
size = "enormous", # = 60min timeout
|
||||
srcs = ["tests/run_regression_tests.py"],
|
||||
data = glob(["tuned_examples/regression_tests/pendulum-*.yaml"]),
|
||||
data = glob(["tuned_examples/regression_tests/cartpole-*-torch.yaml"]),
|
||||
# Pass `BAZEL` option and the path to look for yaml regression files.
|
||||
args = ["BAZEL", "tuned_examples/regression_tests"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "run_regression_tests_pendulum_tf",
|
||||
main = "tests/run_regression_tests.py",
|
||||
tags = ["learning_tests_tf", "learning_tests_pendulum"],
|
||||
size = "enormous", # = 60min timeout
|
||||
srcs = ["tests/run_regression_tests.py"],
|
||||
data = glob(["tuned_examples/regression_tests/pendulum-*-tf.yaml"]),
|
||||
# Pass `BAZEL` option and the path to look for yaml regression files.
|
||||
args = ["BAZEL", "tuned_examples/regression_tests"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "run_regression_tests_pendulum_torch",
|
||||
main = "tests/run_regression_tests.py",
|
||||
tags = ["learning_tests_torch", "learning_tests_pendulum"],
|
||||
size = "enormous", # = 60min timeout
|
||||
srcs = ["tests/run_regression_tests.py"],
|
||||
data = glob(["tuned_examples/regression_tests/pendulum-*-torch.yaml"]),
|
||||
# Pass `BAZEL` option and the path to look for yaml regression files.
|
||||
args = ["BAZEL", "tuned_examples/regression_tests"]
|
||||
)
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
cartpole-a2c-microbatch:
|
||||
cartpole-a2c-microbatch-tf:
|
||||
env: CartPole-v0
|
||||
run: A2C
|
||||
stop:
|
||||
episode_reward_mean: 100
|
||||
timesteps_total: 100000
|
||||
config:
|
||||
use_pytorch: false
|
||||
num_workers: 1
|
||||
gamma: 0.95
|
||||
microbatch_size: 50
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
cartpole-a3c:
|
||||
cartpole-a3c-tf:
|
||||
env: CartPole-v0
|
||||
run: A3C
|
||||
stop:
|
||||
episode_reward_mean: 100
|
||||
timesteps_total: 100000
|
||||
config:
|
||||
use_pytorch: false
|
||||
num_workers: 1
|
||||
gamma: 0.95
|
|
@ -1,8 +1,9 @@
|
|||
cartpole-ddppo:
|
||||
cartpole-ddppo-torch:
|
||||
env: CartPole-v0
|
||||
run: DDPPO
|
||||
stop:
|
||||
episode_reward_mean: 100
|
||||
timesteps_total: 100000
|
||||
config:
|
||||
use_pytorch: true
|
||||
num_gpus_per_worker: 0
|
||||
|
|
|
@ -2,7 +2,7 @@ pendulum-ddpg-tf:
|
|||
env: Pendulum-v0
|
||||
run: DDPG
|
||||
stop:
|
||||
episode_reward_mean: -900
|
||||
episode_reward_mean: -700
|
||||
timesteps_total: 100000
|
||||
config:
|
||||
use_pytorch: false
|
||||
|
|
|
@ -2,7 +2,7 @@ pendulum-ddpg-torch:
|
|||
env: Pendulum-v0
|
||||
run: DDPG
|
||||
stop:
|
||||
episode_reward_mean: -900
|
||||
episode_reward_mean: -700
|
||||
timesteps_total: 100000
|
||||
config:
|
||||
use_pytorch: true
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
pendulum-ppo:
|
||||
pendulum-ppo-tf:
|
||||
env: Pendulum-v0
|
||||
run: PPO
|
||||
stop:
|
||||
episode_reward_mean: -200
|
||||
timesteps_total: 500000
|
||||
episode_reward_mean: -500
|
||||
timesteps_total: 400000
|
||||
config:
|
||||
use_pytorch: false
|
||||
train_batch_size: 2048
|
||||
vf_clip_param: 10.0
|
||||
num_workers: 0
|
|
@ -0,0 +1,21 @@
|
|||
pendulum-ppo-torch:
|
||||
env: Pendulum-v0
|
||||
run: PPO
|
||||
stop:
|
||||
episode_reward_mean: -500
|
||||
timesteps_total: 400000
|
||||
config:
|
||||
use_pytorch: true
|
||||
train_batch_size: 2048
|
||||
vf_clip_param: 10.0
|
||||
num_workers: 0
|
||||
num_envs_per_worker: 10
|
||||
lambda: 0.1
|
||||
gamma: 0.95
|
||||
lr: 0.0003
|
||||
sgd_minibatch_size: 64
|
||||
num_sgd_iter: 10
|
||||
model:
|
||||
fcnet_hiddens: [64, 64]
|
||||
batch_mode: complete_episodes
|
||||
observation_filter: MeanStdFilter
|
|
@ -1,6 +1,7 @@
|
|||
pendulum-td3:
|
||||
pendulum-td3-tf:
|
||||
env: Pendulum-v0
|
||||
run: TD3
|
||||
stop:
|
||||
use_pytorch: false
|
||||
episode_reward_mean: -900
|
||||
timesteps_total: 100000
|
||||
|
|
Loading…
Add table
Reference in a new issue