ray/rllib/tests/run_regression_tests.py
Sven Mika 2d24ef0d32
[RLlib] Add all simple learning tests as framework=tf2. (#19273)
* Unpin gym and deprecate pendulum v0

Many tests in rllib depended on pendulum v0,
however in gym 0.21, pendulum v0 was deprecated
in favor of pendulum v1. This may change reward
thresholds, so will have to potentially rerun
all of the pendulum v1 benchmarks, or use another
environment in favor. The same applies to frozen
lake v0 and frozen lake v1

Lastly, all of the RLlib tests and Tune tests have
been moved to python 3.7

* fix tune test_sampler::testSampleBoundsAx

* fix re-install ray for py3.7 tests

Co-authored-by: avnishn <avnishn@uw.edu>
2021-11-02 12:10:17 +01:00

152 lines
5.5 KiB
Python

#!/usr/bin/env python
# Runs one or more regression tests. Retries tests up to 3 times.
#
# Example usage:
# $ python run_regression_tests.py regression-tests/cartpole-es-[tf|torch].yaml
#
# When using in BAZEL (with py_test), e.g. see in ray/rllib/BUILD:
# py_test(
# name = "run_regression_tests",
# main = "tests/run_regression_tests.py",
# tags = ["learning_tests"],
# size = "medium", # 5min timeout
# srcs = ["tests/run_regression_tests.py"],
# data = glob(["tuned_examples/regression_tests/*.yaml"]),
# # Pass `BAZEL` option and the path to look for yaml regression files.
# args = ["BAZEL", "tuned_examples/regression_tests"]
# )
import argparse
import os
from pathlib import Path
import sys
import yaml
import ray
from ray.tune import run_experiments
from ray.rllib import _register_all
from ray.rllib.utils.deprecation import deprecation_warning
parser = argparse.ArgumentParser()
parser.add_argument(
"--framework",
choices=["jax", "tf2", "tf", "tfe", "torch"],
default="tf",
help="The deep learning framework to use.")
parser.add_argument(
"--yaml-dir",
type=str,
required=True,
help="The directory in which to find all yamls to test.")
parser.add_argument("--num-cpus", type=int, default=6)
parser.add_argument(
"--local-mode",
action="store_true",
help="Run ray in local mode for easier debugging.")
# Obsoleted arg, use --framework=torch instead.
parser.add_argument(
"--torch",
action="store_true",
help="Runs all tests with PyTorch enabled.")
if __name__ == "__main__":
args = parser.parse_args()
# Bazel regression test mode: Get path to look for yaml files.
# Get the path or single file to use.
rllib_dir = Path(__file__).parent.parent
print("rllib dir={}".format(rllib_dir))
abs_yaml_path = os.path.join(rllib_dir, args.yaml_dir)
# Single file given.
if os.path.isfile(abs_yaml_path):
yaml_files = [abs_yaml_path]
# Given path/file does not exist.
elif not os.path.isdir(abs_yaml_path):
raise ValueError("yaml-dir ({}) not found!".format(args.yaml_dir))
# Path given -> Get all yaml files in there via rglob.
else:
yaml_files = rllib_dir.rglob(args.yaml_dir + "/*.yaml")
yaml_files = sorted(
map(lambda path: str(path.absolute()), yaml_files), reverse=True)
print("Will run the following regression tests:")
for yaml_file in yaml_files:
print("->", yaml_file)
# Loop through all collected files.
for yaml_file in yaml_files:
experiments = yaml.safe_load(open(yaml_file).read())
assert len(experiments) == 1,\
"Error, can only run a single experiment per yaml file!"
# Add torch option to exp config.
exp = list(experiments.values())[0]
exp["config"]["framework"] = args.framework
if args.torch:
deprecation_warning(old="--torch", new="--framework=torch")
exp["config"]["framework"] = "torch"
args.framework = "torch"
# Always run with eager-tracing when framework=tf2.
if args.framework in ["tf2", "tfe"]:
exp["config"]["eager_tracing"] = True
# Print out the actual config.
print("== Test config ==")
print(yaml.dump(experiments))
# Try running each test 3 times and make sure it reaches the given
# reward.
passed = False
for i in range(3):
# Try starting a new ray cluster.
try:
ray.init(num_cpus=args.num_cpus, local_mode=args.local_mode)
# Allow running this script on existing cluster as well.
except ConnectionError:
ray.init()
else:
try:
trials = run_experiments(
experiments, resume=False, verbose=2)
finally:
ray.shutdown()
_register_all()
for t in trials:
# If we have evaluation workers, use their rewards.
# This is useful for offline learning tests, where
# we evaluate against an actual environment.
check_eval = \
exp["config"].get("evaluation_interval", None) is not None
reward_mean = \
t.last_result["evaluation"]["episode_reward_mean"] if \
check_eval else t.last_result["episode_reward_mean"]
# If we are using evaluation workers, we may have
# a stopping criterion under the "evaluation/" scope. If
# not, use `episode_reward_mean`.
if check_eval:
min_reward = t.stopping_criterion.get(
"evaluation/episode_reward_mean",
t.stopping_criterion.get("episode_reward_mean"))
# Otherwise, expect `episode_reward_mean` to be set.
else:
min_reward = t.stopping_criterion.get(
"episode_reward_mean")
# If min reward not defined, always pass.
if min_reward is None or reward_mean >= min_reward:
passed = True
break
if passed:
print("Regression test PASSED")
break
else:
print("Regression test FAILED on attempt {}".format(i + 1))
if not passed:
print("Overall regression FAILED: Exiting with Error.")
sys.exit(1)