mirror of
https://github.com/vale981/ray
synced 2025-03-05 18:11:42 -05:00
[RLlib] Fix broken RLlib tests in master. (#7894)
This commit is contained in:
parent
38fad274aa
commit
82c2d9faba
6 changed files with 22 additions and 54 deletions
|
@ -192,7 +192,7 @@ matrix:
|
|||
- ./ci/suppress_output ./ci/travis/install-ray.sh
|
||||
script:
|
||||
- if [ $RAY_CI_RLLIB_AFFECTED != "1" ]; then exit; fi
|
||||
- travis_wait 60 bazel test --build_tests_only --test_tag_filters=learning_tests --spawn_strategy=local --flaky_test_attempts=3 --nocache_test_results --test_verbose_timeout_warnings --progress_report_interval=100 --show_progress_rate_limit=100 --show_timestamps --test_output=streamed rllib/...
|
||||
- travis_wait 60 bazel test --build_tests_only --test_tag_filters=learning_tests --spawn_strategy=local --flaky_test_attempts=3 --nocache_test_results --test_verbose_timeout_warnings --progress_report_interval=100 --show_progress_rate_limit=100 --show_timestamps --test_output=errors rllib/...
|
||||
|
||||
# RLlib: Learning tests with tf=1.x (from rllib/tuned_examples/regression_tests/*.yaml).
|
||||
# Requested by Edi (MS): Test all learning capabilities with tf1.x
|
||||
|
@ -213,7 +213,7 @@ matrix:
|
|||
- ./ci/suppress_output ./ci/travis/install-ray.sh
|
||||
script:
|
||||
- if [ $RAY_CI_RLLIB_FULL_AFFECTED != "1" ]; then exit; fi
|
||||
- travis_wait 60 bazel test --build_tests_only --test_tag_filters=learning_tests --spawn_strategy=local --flaky_test_attempts=3 --nocache_test_results --test_verbose_timeout_warnings --progress_report_interval=100 --show_progress_rate_limit=100 --show_timestamps --test_output=streamed rllib/...
|
||||
- travis_wait 60 bazel test --build_tests_only --test_tag_filters=learning_tests --spawn_strategy=local --flaky_test_attempts=3 --nocache_test_results --test_verbose_timeout_warnings --progress_report_interval=100 --show_progress_rate_limit=100 --show_timestamps --test_output=errors rllib/...
|
||||
|
||||
# RLlib: Quick Agent train.py runs (compilation & running, no(!) learning).
|
||||
# Agent single tests (compilation, loss-funcs, etc..).
|
||||
|
|
17
rllib/BUILD
17
rllib/BUILD
|
@ -41,12 +41,23 @@
|
|||
# --------------------------------------------------------------------
|
||||
|
||||
py_test(
|
||||
name = "run_regression_tests",
|
||||
name = "run_regression_tests_cartpole",
|
||||
main = "tests/run_regression_tests.py",
|
||||
tags = ["learning_tests"],
|
||||
tags = ["learning_tests", "learning_tests_cartpole"],
|
||||
size = "enormous", # = 60min timeout
|
||||
srcs = ["tests/run_regression_tests.py"],
|
||||
data = glob(["tuned_examples/regression_tests/*.yaml"]),
|
||||
data = glob(["tuned_examples/regression_tests/cartpole*.yaml"]),
|
||||
# Pass `BAZEL` option and the path to look for yaml regression files.
|
||||
args = ["BAZEL", "tuned_examples/regression_tests"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "run_regression_tests_pendulum",
|
||||
main = "tests/run_regression_tests.py",
|
||||
tags = ["learning_tests", "learning_tests_pendulum"],
|
||||
size = "enormous", # = 60min timeout
|
||||
srcs = ["tests/run_regression_tests.py"],
|
||||
data = glob(["tuned_examples/regression_tests/pendulum*.yaml"]),
|
||||
# Pass `BAZEL` option and the path to look for yaml regression files.
|
||||
args = ["BAZEL", "tuned_examples/regression_tests"]
|
||||
)
|
||||
|
|
|
@ -97,37 +97,17 @@ class TestDQN(unittest.TestCase):
|
|||
actions.append(trainer.compute_action(obs))
|
||||
check(np.std(actions), 0.0, false=True)
|
||||
|
||||
if eager_mode_ctx:
|
||||
eager_mode_ctx.__exit__(None, None, None)
|
||||
|
||||
def test_dqn_parameter_noise_exploration(self):
|
||||
"""Tests, whether a DQN Agent works with ParameterNoise."""
|
||||
obs = np.array(0)
|
||||
core_config = dqn.DEFAULT_CONFIG.copy()
|
||||
core_config["num_workers"] = 0 # Run locally.
|
||||
core_config["env_config"] = {"is_slippery": False, "map_name": "4x4"}
|
||||
|
||||
for fw in ["eager", "tf", "torch"]:
|
||||
if fw == "torch":
|
||||
continue
|
||||
print("framework={}".format(fw))
|
||||
|
||||
core_config = dqn.DEFAULT_CONFIG.copy()
|
||||
core_config["num_workers"] = 0 # Run locally.
|
||||
core_config["env_config"] = {
|
||||
"is_slippery": False,
|
||||
"map_name": "4x4"
|
||||
}
|
||||
core_config["eager"] = fw == "eager"
|
||||
core_config["use_pytorch"] = fw == "torch"
|
||||
for fw in framework_iterator(core_config, ["tf", "eager"]):
|
||||
|
||||
config = core_config.copy()
|
||||
|
||||
eager_mode_ctx = None
|
||||
if fw == "tf":
|
||||
assert not tf.executing_eagerly()
|
||||
elif fw == "eager":
|
||||
eager_mode_ctx = eager_mode()
|
||||
eager_mode_ctx.__enter__()
|
||||
assert tf.executing_eagerly()
|
||||
|
||||
# DQN with ParameterNoise exploration (config["explore"]=True).
|
||||
# ----
|
||||
config["exploration_config"] = {"type": "ParameterNoise"}
|
||||
|
@ -258,9 +238,6 @@ class TestDQN(unittest.TestCase):
|
|||
a = trainer.compute_action(obs, explore=True)
|
||||
check(a, a_)
|
||||
|
||||
if eager_mode_ctx:
|
||||
eager_mode_ctx.__exit__(None, None, None)
|
||||
|
||||
def _get_current_noise(self, policy, fw):
|
||||
# If noise not even created yet, return 0.0.
|
||||
if policy.exploration.noise is None:
|
||||
|
|
|
@ -51,7 +51,7 @@ if __name__ == "__main__":
|
|||
|
||||
passed = False
|
||||
for i in range(3):
|
||||
trials = run_experiments(experiments, resume=False)
|
||||
trials = run_experiments(experiments, resume=False, verbose=1)
|
||||
|
||||
for t in trials:
|
||||
if (t.last_result["episode_reward_mean"] >=
|
||||
|
|
|
@ -1,20 +0,0 @@
|
|||
cartpole-ppo-tf-multi-gpu:
|
||||
env: CartPole-v0
|
||||
run: PPO
|
||||
stop:
|
||||
episode_reward_mean: 150
|
||||
timesteps_total: 100000
|
||||
config:
|
||||
gamma: 0.99
|
||||
lr: 0.0003
|
||||
num_workers: 1
|
||||
observation_filter: MeanStdFilter
|
||||
num_sgd_iter: 6
|
||||
vf_share_layers: true
|
||||
vf_loss_coeff: 0.01
|
||||
model:
|
||||
fcnet_hiddens: [32]
|
||||
fcnet_activation: linear
|
||||
# Use fake-GPU setup to prove towers are working and learning.
|
||||
num_gpus: 6
|
||||
_fake_gpus: true
|
|
@ -68,7 +68,7 @@ def do_test_explorations(run,
|
|||
# Make sure actions drawn are different
|
||||
# (around some mean value), given constant observations.
|
||||
actions = []
|
||||
for _ in range(50):
|
||||
for _ in range(100):
|
||||
actions.append(
|
||||
trainer.compute_action(
|
||||
observation=dummy_obs,
|
||||
|
|
Loading…
Add table
Reference in a new issue