[RLlib] Fix broken RLlib tests in master. (#7894)

This commit is contained in:
Sven Mika 2020-04-05 18:34:23 +02:00 committed by GitHub
parent 38fad274aa
commit 82c2d9faba
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 22 additions and 54 deletions

View file

@ -192,7 +192,7 @@ matrix:
- ./ci/suppress_output ./ci/travis/install-ray.sh
script:
- if [ $RAY_CI_RLLIB_AFFECTED != "1" ]; then exit; fi
- travis_wait 60 bazel test --build_tests_only --test_tag_filters=learning_tests --spawn_strategy=local --flaky_test_attempts=3 --nocache_test_results --test_verbose_timeout_warnings --progress_report_interval=100 --show_progress_rate_limit=100 --show_timestamps --test_output=streamed rllib/...
- travis_wait 60 bazel test --build_tests_only --test_tag_filters=learning_tests --spawn_strategy=local --flaky_test_attempts=3 --nocache_test_results --test_verbose_timeout_warnings --progress_report_interval=100 --show_progress_rate_limit=100 --show_timestamps --test_output=errors rllib/...
# RLlib: Learning tests with tf=1.x (from rllib/tuned_examples/regression_tests/*.yaml).
# Requested by Edi (MS): Test all learning capabilities with tf1.x
@ -213,7 +213,7 @@ matrix:
- ./ci/suppress_output ./ci/travis/install-ray.sh
script:
- if [ $RAY_CI_RLLIB_FULL_AFFECTED != "1" ]; then exit; fi
- travis_wait 60 bazel test --build_tests_only --test_tag_filters=learning_tests --spawn_strategy=local --flaky_test_attempts=3 --nocache_test_results --test_verbose_timeout_warnings --progress_report_interval=100 --show_progress_rate_limit=100 --show_timestamps --test_output=streamed rllib/...
- travis_wait 60 bazel test --build_tests_only --test_tag_filters=learning_tests --spawn_strategy=local --flaky_test_attempts=3 --nocache_test_results --test_verbose_timeout_warnings --progress_report_interval=100 --show_progress_rate_limit=100 --show_timestamps --test_output=errors rllib/...
# RLlib: Quick Agent train.py runs (compilation & running, no(!) learning).
# Agent single tests (compilation, loss-funcs, etc..).

View file

@ -41,12 +41,23 @@
# --------------------------------------------------------------------
py_test(
name = "run_regression_tests",
name = "run_regression_tests_cartpole",
main = "tests/run_regression_tests.py",
tags = ["learning_tests"],
tags = ["learning_tests", "learning_tests_cartpole"],
size = "enormous", # = 60min timeout
srcs = ["tests/run_regression_tests.py"],
data = glob(["tuned_examples/regression_tests/*.yaml"]),
data = glob(["tuned_examples/regression_tests/cartpole*.yaml"]),
# Pass `BAZEL` option and the path to look for yaml regression files.
args = ["BAZEL", "tuned_examples/regression_tests"]
)
py_test(
name = "run_regression_tests_pendulum",
main = "tests/run_regression_tests.py",
tags = ["learning_tests", "learning_tests_pendulum"],
size = "enormous", # = 60min timeout
srcs = ["tests/run_regression_tests.py"],
data = glob(["tuned_examples/regression_tests/pendulum*.yaml"]),
# Pass `BAZEL` option and the path to look for yaml regression files.
args = ["BAZEL", "tuned_examples/regression_tests"]
)

View file

@ -97,37 +97,17 @@ class TestDQN(unittest.TestCase):
actions.append(trainer.compute_action(obs))
check(np.std(actions), 0.0, false=True)
if eager_mode_ctx:
eager_mode_ctx.__exit__(None, None, None)
def test_dqn_parameter_noise_exploration(self):
"""Tests, whether a DQN Agent works with ParameterNoise."""
obs = np.array(0)
core_config = dqn.DEFAULT_CONFIG.copy()
core_config["num_workers"] = 0 # Run locally.
core_config["env_config"] = {"is_slippery": False, "map_name": "4x4"}
for fw in ["eager", "tf", "torch"]:
if fw == "torch":
continue
print("framework={}".format(fw))
core_config = dqn.DEFAULT_CONFIG.copy()
core_config["num_workers"] = 0 # Run locally.
core_config["env_config"] = {
"is_slippery": False,
"map_name": "4x4"
}
core_config["eager"] = fw == "eager"
core_config["use_pytorch"] = fw == "torch"
for fw in framework_iterator(core_config, ["tf", "eager"]):
config = core_config.copy()
eager_mode_ctx = None
if fw == "tf":
assert not tf.executing_eagerly()
elif fw == "eager":
eager_mode_ctx = eager_mode()
eager_mode_ctx.__enter__()
assert tf.executing_eagerly()
# DQN with ParameterNoise exploration (config["explore"]=True).
# ----
config["exploration_config"] = {"type": "ParameterNoise"}
@ -258,9 +238,6 @@ class TestDQN(unittest.TestCase):
a = trainer.compute_action(obs, explore=True)
check(a, a_)
if eager_mode_ctx:
eager_mode_ctx.__exit__(None, None, None)
def _get_current_noise(self, policy, fw):
# If noise not even created yet, return 0.0.
if policy.exploration.noise is None:

View file

@ -51,7 +51,7 @@ if __name__ == "__main__":
passed = False
for i in range(3):
trials = run_experiments(experiments, resume=False)
trials = run_experiments(experiments, resume=False, verbose=1)
for t in trials:
if (t.last_result["episode_reward_mean"] >=

View file

@ -1,20 +0,0 @@
cartpole-ppo-tf-multi-gpu:
env: CartPole-v0
run: PPO
stop:
episode_reward_mean: 150
timesteps_total: 100000
config:
gamma: 0.99
lr: 0.0003
num_workers: 1
observation_filter: MeanStdFilter
num_sgd_iter: 6
vf_share_layers: true
vf_loss_coeff: 0.01
model:
fcnet_hiddens: [32]
fcnet_activation: linear
# Use fake-GPU setup to prove towers are working and learning.
num_gpus: 6
_fake_gpus: true

View file

@ -68,7 +68,7 @@ def do_test_explorations(run,
# Make sure actions drawn are different
# (around some mean value), given constant observations.
actions = []
for _ in range(50):
for _ in range(100):
actions.append(
trainer.compute_action(
observation=dummy_obs,