mirror of
https://github.com/vale981/ray
synced 2025-03-05 10:01:43 -05:00

Clean up the ci/ directory. This means getting rid of the travis/ path completely and moving the files into sensible subdirectories. Details: - Moves everything under ci/travis into subdirectories, e.g. ci/build, ci/lint, etc. - Minor adjustments to some scripts (variable renames) - Removes the outdated (unused) asan tests
384 lines
12 KiB
Python
384 lines
12 KiB
Python
#!/usr/bin/env python
|
|
#
|
|
# This file contains utilities for understanding dependencies between python
|
|
# source files and tests.
|
|
#
|
|
# Utils are assumed to be used from top level ray/ folder, since that is how
|
|
# our tests are defined today.
|
|
#
|
|
# Example usage:
|
|
# To find all circular dependencies under ray/python/:
|
|
# python ci/pipeline/py_dep_analysis.py --mode=circular-dep
|
|
# To find all the RLlib tests that depend on a file:
|
|
# python ci/pipeline/py_dep_analysis.py --mode=test-dep \
|
|
# --file=python/ray/tune/tune.py
|
|
# For testing, add --smoke-test to any commands, so it doesn't spend
|
|
# tons of time querying for available RLlib tests.
|
|
|
|
import argparse
|
|
import ast
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
from typing import Dict, List, Tuple
|
|
|
|
|
|
class DepGraph(object):
|
|
def __init__(self):
|
|
self.edges: Dict[str, Dict[str, bool]] = {}
|
|
self.ids: Dict[str, int] = {}
|
|
self.inv_ids: Dict[int, str] = {}
|
|
|
|
|
|
def _run_shell(args: List[str]) -> str:
|
|
return subprocess.check_output(args).decode(sys.stdout.encoding)
|
|
|
|
|
|
def list_rllib_tests(n: int = -1, test: str = None) -> Tuple[str, List[str]]:
|
|
"""List RLlib tests.
|
|
|
|
Args:
|
|
n: return at most n tests. all tests if n = -1.
|
|
test: only return information about a specific test.
|
|
"""
|
|
tests_res = _run_shell(
|
|
["bazel", "query", "tests(//python/ray/rllib:*)", "--output", "label"]
|
|
)
|
|
|
|
all_tests = []
|
|
|
|
# Strip, also skip any empty lines
|
|
tests = [t.strip() for t in tests_res.splitlines() if t.strip()]
|
|
for t in tests:
|
|
if test and t != test:
|
|
continue
|
|
|
|
src_out = _run_shell(
|
|
[
|
|
"bazel",
|
|
"query",
|
|
'kind("source file", deps({}))'.format(t),
|
|
"--output",
|
|
"label",
|
|
]
|
|
)
|
|
|
|
srcs = [f.strip() for f in src_out.splitlines()]
|
|
srcs = [f for f in srcs if f.startswith("//python") and f.endswith(".py")]
|
|
if srcs:
|
|
all_tests.append((t, srcs))
|
|
|
|
# Break early if smoke test.
|
|
if n > 0 and len(all_tests) >= n:
|
|
break
|
|
|
|
return all_tests
|
|
|
|
|
|
def _new_dep(graph: DepGraph, src_module: str, dep: str):
|
|
"""Create a new dependency between src_module and dep."""
|
|
if dep not in graph.ids:
|
|
graph.ids[dep] = len(graph.ids)
|
|
|
|
src_id = graph.ids[src_module]
|
|
dep_id = graph.ids[dep]
|
|
|
|
if src_id not in graph.edges:
|
|
graph.edges[src_id] = {}
|
|
graph.edges[src_id][dep_id] = True
|
|
|
|
|
|
def _new_import(graph: DepGraph, src_module: str, dep_module: str):
|
|
"""Process a new import statement in src_module."""
|
|
# We don't care about system imports.
|
|
if not dep_module.startswith("ray"):
|
|
return
|
|
|
|
_new_dep(graph, src_module, dep_module)
|
|
|
|
|
|
def _is_path_module(module: str, name: str, _base_dir: str) -> bool:
|
|
"""Figure out if base.sub is a python module or not."""
|
|
# Special handling for _raylet, which is a C++ lib.
|
|
if module == "ray._raylet":
|
|
return False
|
|
|
|
bps = ["python"] + module.split(".")
|
|
path = os.path.join(_base_dir, os.path.join(*bps), name + ".py")
|
|
if os.path.isfile(path):
|
|
return True # file module
|
|
return False
|
|
|
|
|
|
def _new_from_import(
|
|
graph: DepGraph, src_module: str, dep_module: str, dep_name: str, _base_dir: str
|
|
):
|
|
"""Process a new "from ... import ..." statement in src_module."""
|
|
# We don't care about imports outside of ray package.
|
|
if not dep_module or not dep_module.startswith("ray"):
|
|
return
|
|
|
|
if _is_path_module(dep_module, dep_name, _base_dir):
|
|
# dep_module.dep_name points to a file.
|
|
_new_dep(graph, src_module, _full_module_path(dep_module, dep_name))
|
|
else:
|
|
# sub is an obj on base dir/file.
|
|
_new_dep(graph, src_module, dep_module)
|
|
|
|
|
|
def _process_file(graph: DepGraph, src_path: str, src_module: str, _base_dir=""):
|
|
"""Create dependencies from src_module to all the valid imports in src_path.
|
|
|
|
Args:
|
|
graph: the DepGraph to be added to.
|
|
src_path: .py file to be processed.
|
|
src_module: full module path of the source file.
|
|
_base_dir: use a different base dir than current dir. For unit testing.
|
|
"""
|
|
with open(os.path.join(_base_dir, src_path), "r") as in_f:
|
|
tree = ast.parse(in_f.read())
|
|
|
|
for node in ast.walk(tree):
|
|
if isinstance(node, ast.Import):
|
|
for alias in node.names:
|
|
_new_import(graph, src_module, alias.name)
|
|
elif isinstance(node, ast.ImportFrom):
|
|
for alias in node.names:
|
|
_new_from_import(
|
|
graph, src_module, node.module, alias.name, _base_dir
|
|
)
|
|
|
|
|
|
def build_dep_graph() -> DepGraph:
|
|
"""Build index from py files to their immediate dependees."""
|
|
graph = DepGraph()
|
|
|
|
# Assuming we run from root /ray directory.
|
|
# Follow links since rllib is linked to /rllib.
|
|
for root, sub_dirs, files in os.walk("python", followlinks=True):
|
|
if _should_skip(root):
|
|
continue
|
|
|
|
module = _bazel_path_to_module_path(root)
|
|
|
|
# Process files first.
|
|
for f in files:
|
|
if not f.endswith(".py"):
|
|
continue
|
|
|
|
full = _full_module_path(module, f)
|
|
|
|
if full not in graph.ids:
|
|
graph.ids[full] = len(graph.ids)
|
|
|
|
# Process file:
|
|
_process_file(graph, os.path.join(root, f), full)
|
|
|
|
# Build reverse index for convenience.
|
|
graph.inv_ids = {v: k for k, v in graph.ids.items()}
|
|
|
|
return graph
|
|
|
|
|
|
def _full_module_path(module, f) -> str:
|
|
if f == "__init__.py":
|
|
# __init__ file for this module.
|
|
# Full path is the same as the module name.
|
|
return module
|
|
|
|
fn = re.sub(r"\.py$", "", f)
|
|
|
|
if not module:
|
|
return fn
|
|
return module + "." + fn
|
|
|
|
|
|
def _should_skip(d: str) -> bool:
|
|
"""Skip directories that should not contain py sources."""
|
|
if d.startswith("python/.eggs/"):
|
|
return True
|
|
if d.startswith("python/."):
|
|
return True
|
|
if d.startswith("python/build"):
|
|
return True
|
|
if d.startswith("python/ray/cpp"):
|
|
return True
|
|
return False
|
|
|
|
|
|
def _bazel_path_to_module_path(d: str) -> str:
|
|
"""Convert a Bazel file path to python module path.
|
|
|
|
Example: //python/ray/rllib:xxx/yyy/dd -> ray.rllib.xxx.yyy.dd
|
|
"""
|
|
# Do this in 3 steps, so all of 'python:', 'python/', or '//python', etc
|
|
# will get stripped.
|
|
d = re.sub(r"^\/\/", "", d)
|
|
d = re.sub(r"^python", "", d)
|
|
d = re.sub(r"^[\/:]", "", d)
|
|
return d.replace("/", ".").replace(":", ".")
|
|
|
|
|
|
def _file_path_to_module_path(f: str) -> str:
|
|
"""Return the corresponding module path for a .py file."""
|
|
dir, fn = os.path.split(f)
|
|
return _full_module_path(_bazel_path_to_module_path(dir), fn)
|
|
|
|
|
|
def _depends(
|
|
graph: DepGraph, visited: Dict[int, bool], tid: int, qid: int
|
|
) -> List[int]:
|
|
"""Whether there is a dependency path from module tid to module qid.
|
|
|
|
Given graph, and without going through visited.
|
|
"""
|
|
if tid not in graph.edges or qid not in graph.edges:
|
|
return []
|
|
if qid in graph.edges[tid]:
|
|
# tid directly depends on qid.
|
|
return [tid, qid]
|
|
for c in graph.edges[tid]:
|
|
if c in visited:
|
|
continue
|
|
visited[c] = True
|
|
# Reduce to a question of whether there is a path from c to qid.
|
|
ds = _depends(graph, visited, c, qid)
|
|
if ds:
|
|
# From tid -> c -> qid.
|
|
return [tid] + ds
|
|
return []
|
|
|
|
|
|
def test_depends_on_file(
|
|
graph: DepGraph, test: Tuple[str, Tuple[str]], path: str
|
|
) -> List[int]:
|
|
"""Give dependency graph, check if a test depends on a specific .py file.
|
|
|
|
Args:
|
|
graph: the dependency graph.
|
|
test: information about a test, in the format of:
|
|
[test_name, (src files for the test)]
|
|
"""
|
|
query = _file_path_to_module_path(path)
|
|
if query not in graph.ids:
|
|
# Not a file that we care about.
|
|
return []
|
|
|
|
t, srcs = test
|
|
|
|
# Skip tuned_examples/ and examples/ tests.
|
|
if t.startswith("//python/ray/rllib:examples/"):
|
|
return []
|
|
|
|
for src in srcs:
|
|
if src == "ray.rllib.tests.run_regression_tests":
|
|
return []
|
|
|
|
tid = _file_path_to_module_path(src)
|
|
if tid not in graph.ids:
|
|
# Not a test that we care about.
|
|
# TODO(jungong): What tests are these?????
|
|
continue
|
|
|
|
branch = _depends(graph, {}, graph.ids[tid], graph.ids[query])
|
|
if branch:
|
|
return branch
|
|
|
|
# Does not depend on file.
|
|
return []
|
|
|
|
|
|
def _find_circular_dep_impl(graph: DepGraph, id: str, branch: str) -> bool:
|
|
if id not in graph.edges:
|
|
return False
|
|
for c in graph.edges[id]:
|
|
if c in branch:
|
|
# Found a circle.
|
|
branch.append(c)
|
|
return True
|
|
branch.append(c)
|
|
if _find_circular_dep_impl(graph, c, branch):
|
|
return True
|
|
branch.pop()
|
|
return False
|
|
|
|
|
|
def find_circular_dep(graph: DepGraph) -> Dict[str, List[int]]:
|
|
"""Find circular dependencies among a dependency graph."""
|
|
known = {}
|
|
circles = {}
|
|
for m, id in graph.ids.items():
|
|
branch = []
|
|
if _find_circular_dep_impl(graph, id, branch):
|
|
if branch[-1] in known:
|
|
# Already knew, skip.
|
|
continue
|
|
# Since this is a cycle dependency, any step along the circle
|
|
# will form a different circle.
|
|
# So we mark every entry on this circle known.
|
|
for n in branch:
|
|
known[n] = True
|
|
# Mark that module m contains a potential circular dep.
|
|
circles[m] = branch
|
|
return circles
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser()
|
|
|
|
parser.add_argument(
|
|
"--mode",
|
|
type=str,
|
|
default="test-dep",
|
|
help=(
|
|
"test-dep: find dependencies for a specified test. "
|
|
"circular-dep: find circular dependencies in "
|
|
"the specific codebase."
|
|
),
|
|
)
|
|
parser.add_argument(
|
|
"--file", type=str, help="Path of a .py source file relative to --base_dir."
|
|
)
|
|
parser.add_argument("--test", type=str, help="Specific test to check.")
|
|
parser.add_argument(
|
|
"--smoke-test", action="store_true", help="Load only a few tests for testing."
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
print("building dep graph ...")
|
|
graph = build_dep_graph()
|
|
print(
|
|
"done. total {} files, {} of which have dependencies.".format(
|
|
len(graph.ids), len(graph.edges)
|
|
)
|
|
)
|
|
|
|
if args.mode == "circular-dep":
|
|
circles = find_circular_dep(graph)
|
|
print("Found following circular dependencies: \n")
|
|
for m, b in circles.items():
|
|
print(m)
|
|
for n in b:
|
|
print(" ", graph.inv_ids[n])
|
|
print()
|
|
|
|
if args.mode == "test-dep":
|
|
assert args.file, "Must specify --file for the query."
|
|
|
|
# Only support RLlib tests for now.
|
|
# The way Tune tests are defined, they all depend on
|
|
# the entire tune codebase.
|
|
tests = list_rllib_tests(5 if args.smoke_test else -1, args.test)
|
|
print("Total # of tests: ", len(tests))
|
|
|
|
for t in tests:
|
|
branch = test_depends_on_file(graph, t, args.file)
|
|
if branch:
|
|
print("{} depends on {}".format(t[0], args.file))
|
|
# Print some debugging info.
|
|
for n in branch:
|
|
print(" ", graph.inv_ids[n])
|
|
else:
|
|
print("{} does not depend on {}".format(t[0], args.file))
|