ray/rllib/utils/tests/run_memory_leak_tests.py

#!/usr/bin/env python
# Runs one or more memory leak tests.
#
# Example usage:
# $ python run_memory_leak_tests.py memory-leak-test-ppo.yaml
#
# When using in BAZEL (with py_test), e.g. see in ray/rllib/BUILD:
# py_test(
#     name = "memory_leak_ppo",
#     main = "tests/test_memory_leak.py",
#     tags = ["memory_leak_tests"],
#     size = "medium",  # 5min timeout
#     srcs = ["tests/test_memory_leak.py"],
#     data = glob(["tuned_examples/ppo/*.yaml"]),
#     # Pass `BAZEL` option and the path to look for yaml files.
#     args = ["BAZEL", "tuned_examples/ppo/memory-leak-test-ppo.yaml"]
# )

import argparse
import os
from pathlib import Path
import sys
import yaml

import ray
from ray.rllib.agents.registry import get_trainer_class
from ray.rllib.utils.debug.memory import check_memory_leaks
from ray.rllib import _register_all

parser = argparse.ArgumentParser()
parser.add_argument(
    "--framework",
    required=False,
    choices=["jax", "tf2", "tf", "tfe", "torch", None],
    default=None,
    help="The deep learning framework to use.",
)
parser.add_argument(
    "--yaml-dir",
    required=True,
    type=str,
    help="The directory in which to find all yamls to test.",
)
parser.add_argument(
    "--local-mode",
    action="store_true",
    help="Run ray in local mode for easier debugging.",
)
parser.add_argument(
    "--to-check",
    nargs="+",
    default=["env", "policy", "rollout_worker"],
    help="List of 'env', 'policy', 'rollout_worker', 'model'.",
)


if __name__ == "__main__":
    args = parser.parse_args()

    # Bazel regression test mode: Get path to look for yaml files.
    # Get the path or single file to use.
    rllib_dir = Path(__file__).parent.parent.parent
    print("rllib dir={}".format(rllib_dir))

    abs_yaml_path = os.path.join(rllib_dir, args.yaml_dir)
    # Single file given.
    if os.path.isfile(abs_yaml_path):
        yaml_files = [abs_yaml_path]
    # Given path/file does not exist.
    elif not os.path.isdir(abs_yaml_path):
        raise ValueError("yaml-dir ({}) not found!".format(args.yaml_dir))
    # Path given -> Get all yaml files in there via rglob.
    else:
        yaml_files = rllib_dir.rglob(args.yaml_dir + "/*.yaml")
        yaml_files = sorted(
            map(lambda path: str(path.absolute()), yaml_files), reverse=True
        )

    print("Will run the following memory-leak tests:")
    for yaml_file in yaml_files:
        print("->", yaml_file)

    # Loop through all collected files.
    for yaml_file in yaml_files:
        experiments = yaml.safe_load(open(yaml_file).read())
        assert (
            len(experiments) == 1
        ), "Error, can only run a single experiment per yaml file!"

        experiment = list(experiments.values())[0]

        # Add framework option to exp configs.
        if args.framework:
            experiment["config"]["framework"] = args.framework
        # Create env on local_worker for memory leak testing just the env.
        experiment["config"]["create_env_on_driver"] = True
        # Always run with eager-tracing when framework=tf2 if not in local-mode.
        if args.framework in ["tf2", "tfe"] and not args.local_mode:
            experiment["config"]["eager_tracing"] = True
        # experiment["config"]["callbacks"] = MemoryTrackingCallbacks

        # Move "env" specifier into config.
        experiment["config"]["env"] = experiment["env"]
        experiment.pop("env", None)

        # Print out the actual config.
        print("== Test config ==")
        print(yaml.dump(experiment))

        # Construct the trainer instance based on the given config.
        leaking = True
        try:
            ray.init(num_cpus=5, local_mode=args.local_mode)
            trainer = get_trainer_class(experiment["run"])(experiment["config"])
            results = check_memory_leaks(
                trainer,
                to_check=set(args.to_check),
            )
            if not results:
                leaking = False
        finally:
            ray.shutdown()
            _register_all()

        if not leaking:
            print("Memory leak test PASSED")
        else:
            print("Memory leak test FAILED. Exiting with Error.")
            sys.exit(1)
[RLlib] Memory leak finding toolset using tracemalloc + CI memory leak tests. (#15412) 2022-04-12 07:50:09 +02:00			`#!/usr/bin/env python`
			`# Runs one or more memory leak tests.`
			`#`
			`# Example usage:`
			`# $ python run_memory_leak_tests.py memory-leak-test-ppo.yaml`
			`#`
			`# When using in BAZEL (with py_test), e.g. see in ray/rllib/BUILD:`
			`# py_test(`
			`# name = "memory_leak_ppo",`
			`# main = "tests/test_memory_leak.py",`
			`# tags = ["memory_leak_tests"],`
			`# size = "medium", # 5min timeout`
			`# srcs = ["tests/test_memory_leak.py"],`
			`# data = glob(["tuned_examples/ppo/*.yaml"]),`
			# # Pass `BAZEL` option and the path to look for yaml files.
			`# args = ["BAZEL", "tuned_examples/ppo/memory-leak-test-ppo.yaml"]`
			`# )`

			`import argparse`
			`import os`
			`from pathlib import Path`
			`import sys`
			`import yaml`

			`import ray`
			`from ray.rllib.agents.registry import get_trainer_class`
			`from ray.rllib.utils.debug.memory import check_memory_leaks`
			`from ray.rllib import _register_all`

			`parser = argparse.ArgumentParser()`
			`parser.add_argument(`
			`"--framework",`
			`required=False,`
			`choices=["jax", "tf2", "tf", "tfe", "torch", None],`
			`default=None,`
			`help="The deep learning framework to use.",`
			`)`
			`parser.add_argument(`
			`"--yaml-dir",`
			`required=True,`
			`type=str,`
			`help="The directory in which to find all yamls to test.",`
			`)`
			`parser.add_argument(`
			`"--local-mode",`
			`action="store_true",`
			`help="Run ray in local mode for easier debugging.",`
			`)`
			`parser.add_argument(`
			`"--to-check",`
			`nargs="+",`
			`default=["env", "policy", "rollout_worker"],`
			`help="List of 'env', 'policy', 'rollout_worker', 'model'.",`
			`)`


			`if __name__ == "__main__":`
			`args = parser.parse_args()`

			`# Bazel regression test mode: Get path to look for yaml files.`
			`# Get the path or single file to use.`
			`rllib_dir = Path(__file__).parent.parent.parent`
			`print("rllib dir={}".format(rllib_dir))`

			`abs_yaml_path = os.path.join(rllib_dir, args.yaml_dir)`
			`# Single file given.`
			`if os.path.isfile(abs_yaml_path):`
			`yaml_files = [abs_yaml_path]`
			`# Given path/file does not exist.`
			`elif not os.path.isdir(abs_yaml_path):`
			`raise ValueError("yaml-dir ({}) not found!".format(args.yaml_dir))`
			`# Path given -> Get all yaml files in there via rglob.`
			`else:`
			`yaml_files = rllib_dir.rglob(args.yaml_dir + "/*.yaml")`
			`yaml_files = sorted(`
			`map(lambda path: str(path.absolute()), yaml_files), reverse=True`
			`)`

			`print("Will run the following memory-leak tests:")`
			`for yaml_file in yaml_files:`
			`print("->", yaml_file)`

			`# Loop through all collected files.`
			`for yaml_file in yaml_files:`
			`experiments = yaml.safe_load(open(yaml_file).read())`
			`assert (`
			`len(experiments) == 1`
			`), "Error, can only run a single experiment per yaml file!"`

			`experiment = list(experiments.values())[0]`

			`# Add framework option to exp configs.`
			`if args.framework:`
			`experiment["config"]["framework"] = args.framework`
			`# Create env on local_worker for memory leak testing just the env.`
			`experiment["config"]["create_env_on_driver"] = True`
			`# Always run with eager-tracing when framework=tf2 if not in local-mode.`
			`if args.framework in ["tf2", "tfe"] and not args.local_mode:`
			`experiment["config"]["eager_tracing"] = True`
			`# experiment["config"]["callbacks"] = MemoryTrackingCallbacks`

			`# Move "env" specifier into config.`
			`experiment["config"]["env"] = experiment["env"]`
			`experiment.pop("env", None)`

			`# Print out the actual config.`
			`print("== Test config ==")`
			`print(yaml.dump(experiment))`

			`# Construct the trainer instance based on the given config.`
			`leaking = True`
			`try:`
			`ray.init(num_cpus=5, local_mode=args.local_mode)`
			`trainer = get_trainer_class(experiment["run"])(experiment["config"])`
			`results = check_memory_leaks(`
			`trainer,`
			`to_check=set(args.to_check),`
			`)`
			`if not results:`
			`leaking = False`
			`finally:`
			`ray.shutdown()`
			`_register_all()`

			`if not leaking:`
			`print("Memory leak test PASSED")`
			`else:`
			`print("Memory leak test FAILED. Exiting with Error.")`
			`sys.exit(1)`