#!/usr/bin/env python
# Runs one or more regression tests. Retries tests up to 3 times.
#
# Example usage:
# $ python run_regression_tests.py regression-tests/cartpole-es-[tf|torch].yaml
#
# When using in BAZEL (with py_test), e.g. see in ray/rllib/BUILD:
# py_test(
#     name = "run_regression_tests",
#     main = "tests/run_regression_tests.py",
#     tags = ["learning_tests"],
#     size = "medium",  # 5min timeout
#     srcs = ["tests/run_regression_tests.py"],
#     data = glob(["tuned_examples/regression_tests/*.yaml"]),
#     # Pass `BAZEL` option and the path to look for yaml regression files.
#     args = ["BAZEL", "tuned_examples/regression_tests"]
# )

import argparse
import os
from pathlib import Path
import sys
import yaml

import ray
from ray.tune import run_experiments
from ray.rllib import _register_all

parser = argparse.ArgumentParser()
parser.add_argument(
    "--framework",
    choices=["jax", "tf2", "tf", "tfe", "torch"],
    default="tf",
    help="The deep learning framework to use.",
)
parser.add_argument(
    "--yaml-dir",
    type=str,
    required=True,
    help="The directory in which to find all yamls to test.",
)
parser.add_argument("--num-cpus", type=int, default=6)
parser.add_argument(
    "--local-mode",
    action="store_true",
    help="Run ray in local mode for easier debugging.",
)
parser.add_argument(
    "--override-mean-reward",
    type=float,
    default=0.0,
    help=(
        "Override "
        "the mean reward specified by the yaml file in the stopping criteria. This "
        "is particularly useful for timed tests."
    ),
)
# Obsoleted arg, use --framework=torch instead.
parser.add_argument(
    "--torch", action="store_true", help="Runs all tests with PyTorch enabled."
)

if __name__ == "__main__":
    args = parser.parse_args()

    # Bazel regression test mode: Get path to look for yaml files.
    # Get the path or single file to use.
    rllib_dir = Path(__file__).parent.parent
    print("rllib dir={}".format(rllib_dir))

    abs_yaml_path = os.path.join(rllib_dir, args.yaml_dir)
    # Single file given.
    if os.path.isfile(abs_yaml_path):
        yaml_files = [abs_yaml_path]
    # Given path/file does not exist.
    elif not os.path.isdir(abs_yaml_path):
        raise ValueError("yaml-dir ({}) not found!".format(args.yaml_dir))
    # Path given -> Get all yaml files in there via rglob.
    else:
        yaml_files = rllib_dir.rglob(args.yaml_dir + "/*.yaml")
        yaml_files = sorted(
            map(lambda path: str(path.absolute()), yaml_files), reverse=True
        )

    print("Will run the following regression tests:")
    for yaml_file in yaml_files:
        print("->", yaml_file)

    # Loop through all collected files.
    for yaml_file in yaml_files:
        experiments = yaml.safe_load(open(yaml_file).read())
        assert (
            len(experiments) == 1
        ), "Error, can only run a single experiment per yaml file!"

        exp = list(experiments.values())[0]
        exp["config"]["framework"] = args.framework

        # Override the mean reward if specified. This is used by the ray ci
        # for overriding the episode reward mean for tf2 tests for off policy
        # long learning tests such as sac and ddpg on the pendulum environment.
        if args.override_mean_reward != 0.0:
            exp["stop"]["episode_reward_mean"] = args.override_mean_reward

        # QMIX does not support tf yet -> skip.
        if exp["run"] == "QMIX" and args.framework != "torch":
            print(f"Skipping framework='{args.framework}' for QMIX.")
            continue

        # Always run with eager-tracing when framework=tf2 if not in local-mode.
        if args.framework in ["tf2", "tfe"] and not args.local_mode:
            exp["config"]["eager_tracing"] = True

        # Print out the actual config.
        print("== Test config ==")
        print(yaml.dump(experiments))

        # Try running each test 3 times and make sure it reaches the given
        # reward.
        passed = False
        for i in range(3):
            # Try starting a new ray cluster.
            try:
                ray.init(num_cpus=args.num_cpus, local_mode=args.local_mode)
            # Allow running this script on existing cluster as well.
            except ConnectionError:
                ray.init()
            else:
                try:
                    trials = run_experiments(experiments, resume=False, verbose=2)
                finally:
                    ray.shutdown()
                    _register_all()

            for t in trials:
                # If we have evaluation workers, use their rewards.
                # This is useful for offline learning tests, where
                # we evaluate against an actual environment.
                check_eval = exp["config"].get("evaluation_interval", None) is not None
                reward_mean = (
                    t.last_result["evaluation"]["episode_reward_mean"]
                    if check_eval
                    else t.last_result["episode_reward_mean"]
                )

                # If we are using evaluation workers, we may have
                # a stopping criterion under the "evaluation/" scope. If
                # not, use `episode_reward_mean`.
                if check_eval:
                    min_reward = t.stopping_criterion.get(
                        "evaluation/episode_reward_mean",
                        t.stopping_criterion.get("episode_reward_mean"),
                    )
                # Otherwise, expect `episode_reward_mean` to be set.
                else:
                    min_reward = t.stopping_criterion.get("episode_reward_mean")

                # If min reward not defined, always pass.
                if min_reward is None or reward_mean >= min_reward:
                    passed = True
                    break

            if passed:
                print("Regression test PASSED")
                break
            else:
                print("Regression test FAILED on attempt {}".format(i + 1))

        if not passed:
            print("Overall regression FAILED: Exiting with Error.")
            sys.exit(1)