#!/usr/bin/env python
# Runs one or more memory leak tests.
#
# Example usage:
# $ python run_memory_leak_tests.py memory-leak-test-ppo.yaml
#
# When using in BAZEL (with py_test), e.g. see in ray/rllib/BUILD:
# py_test(
#     name = "memory_leak_ppo",
#     main = "tests/test_memory_leak.py",
#     tags = ["memory_leak_tests"],
#     size = "medium",  # 5min timeout
#     srcs = ["tests/test_memory_leak.py"],
#     data = glob(["tuned_examples/ppo/*.yaml"]),
#     # Pass `BAZEL` option and the path to look for yaml files.
#     args = ["BAZEL", "tuned_examples/ppo/memory-leak-test-ppo.yaml"]
# )

import argparse
import os
from pathlib import Path
import sys
import yaml

import ray
from ray.rllib.algorithms.registry import get_algorithm_class
from ray.rllib.utils.debug.memory import check_memory_leaks

parser = argparse.ArgumentParser()
parser.add_argument(
    "--framework",
    required=False,
    choices=["jax", "tf2", "tf", "tfe", "torch", None],
    default=None,
    help="The deep learning framework to use.",
)
parser.add_argument(
    "--yaml-dir",
    required=True,
    type=str,
    help="The directory in which to find all yamls to test.",
)
parser.add_argument(
    "--local-mode",
    action="store_true",
    help="Run ray in local mode for easier debugging.",
)
parser.add_argument(
    "--to-check",
    nargs="+",
    default=["env", "policy", "rollout_worker"],
    help="List of 'env', 'policy', 'rollout_worker', 'model'.",
)


if __name__ == "__main__":
    args = parser.parse_args()

    # Bazel regression test mode: Get path to look for yaml files.
    # Get the path or single file to use.
    rllib_dir = Path(__file__).parent.parent.parent
    print("rllib dir={}".format(rllib_dir))

    abs_yaml_path = os.path.join(rllib_dir, args.yaml_dir)
    # Single file given.
    if os.path.isfile(abs_yaml_path):
        yaml_files = [abs_yaml_path]
    # Given path/file does not exist.
    elif not os.path.isdir(abs_yaml_path):
        raise ValueError("yaml-dir ({}) not found!".format(args.yaml_dir))
    # Path given -> Get all yaml files in there via rglob.
    else:
        yaml_files = rllib_dir.rglob(args.yaml_dir + "/*.yaml")
        yaml_files = sorted(
            map(lambda path: str(path.absolute()), yaml_files), reverse=True
        )

    print("Will run the following memory-leak tests:")
    for yaml_file in yaml_files:
        print("->", yaml_file)

    # Loop through all collected files.
    for yaml_file in yaml_files:
        experiments = yaml.safe_load(open(yaml_file).read())
        assert (
            len(experiments) == 1
        ), "Error, can only run a single experiment per yaml file!"

        experiment = list(experiments.values())[0]

        # Add framework option to exp configs.
        if args.framework:
            experiment["config"]["framework"] = args.framework
        # Create env on local_worker for memory leak testing just the env.
        experiment["config"]["create_env_on_driver"] = True
        # Always run with eager-tracing when framework=tf2 if not in local-mode.
        if args.framework in ["tf2", "tfe"] and not args.local_mode:
            experiment["config"]["eager_tracing"] = True
        # experiment["config"]["callbacks"] = MemoryTrackingCallbacks

        # Move "env" specifier into config.
        experiment["config"]["env"] = experiment["env"]
        experiment.pop("env", None)

        # Print out the actual config.
        print("== Test config ==")
        print(yaml.dump(experiment))

        # Construct the trainer instance based on the given config.
        leaking = True
        try:
            ray.init(num_cpus=5, local_mode=args.local_mode)
            trainer = get_algorithm_class(experiment["run"])(experiment["config"])
            results = check_memory_leaks(
                trainer,
                to_check=set(args.to_check),
            )
            if not results:
                leaking = False
        finally:
            ray.shutdown()

        if not leaking:
            print("Memory leak test PASSED")
        else:
            print("Memory leak test FAILED. Exiting with Error.")
            sys.exit(1)