[CI] Check test files for if __name__... snippet (#25322)

Bazel operates by simply running the python scripts given to it in `py_test`. If the script doesn't invoke pytest on itself in the `if _name__ == "__main__"` snippet, no tests will be ran, and the script will pass. This has led to several tests (indeed, some are fixed in this PR) that, despite having been written, have never ran in CI. This PR adds a lint check to check all `py_test` sources for the presence of `if _name__ == "__main__"` snippet, and will fail CI if there are any detected without it. This system is only enabled for libraries right now (tune, train, air, rllib), but it could be trivially extended to other modules if approved.
This commit is contained in:
Antoni Baum 2022-06-02 11:30:00 +02:00 committed by GitHub
parent 64f9a9066f
commit 045c47f172
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 142 additions and 21 deletions

View file

@ -10,12 +10,12 @@ RUN set -x; apt update \
&& ln -fs /usr/share/zoneinfo/America/Los_Angeles /etc/localtime \
&& apt install emacs gdb wget npm git build-essential curl unzip zip psmisc curl gnupg python3 pip iptables ycmd -y \
&& dpkg-reconfigure --frontend noninteractive tzdata \
&& apt install default-jre default-jdk clang rtags tmux clang-format shellcheck cmake autogen python-dev automake autoconf libtool -y \
&& apt install default-jre default-jdk clang rtags tmux clang-format shellcheck cmake autogen python-dev automake autoconf libtool jq -y \
&& curl -fsSL https://bazel.build/bazel-release.pub.gpg | gpg --dearmor > bazel.gpg \
&& mv bazel.gpg /etc/apt/trusted.gpg.d/ \
&& echo "deb [arch=amd64] https://storage.googleapis.com/bazel-apt stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list \
&& apt update && apt install bazel-3.7.2 -y \
&& pip3 install cython==0.29.26 pytest pandas tree tabulate pexpect sklearn joblib black==21.12b0 flake8==3.9.1 mypy==0.782 flake8-quotes flake8-bugbear==21.9.2 setproctitle==1.1.10 psutil \
&& pip3 install cython==0.29.26 pytest pandas tree tabulate pexpect sklearn joblib black==21.12b0 flake8==3.9.1 mypy==0.782 flake8-quotes flake8-bugbear==21.9.2 setproctitle==1.1.10 psutil yq \
&& python3 -c 'print("startup --output_base=/workspace/ray/.bazel-cache\nstartup --host_jvm_args=-Xmx1800m\nbuild --jobs=6")' > /etc/bazel.bazelrc
RUN update-alternatives --install /usr/local/bin/python python /usr/bin/python3 30 \

View file

@ -522,6 +522,12 @@ lint_bazel() {
)
}
lint_bazel_pytest() {
pip install yq
cd "${WORKSPACE_DIR}"
bazel query 'kind(py_test.*, tests(python/...) intersect attr(tags, "\bteam:ml\b", python/...) except attr(tags, "\bno_main\b", python/...))' --output xml | xq | python scripts/pytest_checker.py
}
lint_web() {
(
cd "${WORKSPACE_DIR}"/python/ray/dashboard/client
@ -588,6 +594,9 @@ _lint() {
# Run Bazel linter Buildifier.
lint_bazel
# Check if py_test files have the if __name__... snippet
lint_bazel_pytest
# Run TypeScript and HTML linting.
lint_web

View file

@ -7,7 +7,7 @@ py_test(
size = "small",
main = "examples/custom_trainer.py",
srcs = ["examples/custom_trainer.py"],
tags = ["team:ml", "exclusive"],
tags = ["team:ml", "exclusive", "no_main"],
deps = [":ml_lib"]
)

View file

@ -104,3 +104,10 @@ def test_predict_no_preprocessor():
predictions = predictor.predict(data_batch)
assert len(predictions) == 3
if __name__ == "__main__":
import pytest
import sys
sys.exit(pytest.main(["-sv", __file__]))

View file

@ -1,6 +1,6 @@
py_test(
name = "test_ludwig",
size = "small",
size = "medium",
srcs = ["test_ludwig.py"],
deps = ["//:ray_lib"],
tags = ["team:ml", "exclusive"],

View file

@ -143,7 +143,7 @@ def random_string(length=5):
def numerical_feature(normalization=None, **kwargs):
feature = {
"name": "num_" + random_string(),
"type": "numerical",
"type": "number",
"preprocessing": {"normalization": normalization},
}
feature.update(kwargs)
@ -602,6 +602,7 @@ def train_with_backend(
model = LudwigModel(config, backend=backend)
output_dir = None
ret = False
try:
_, _, output_dir = model.train(
dataset=dataset,
@ -624,7 +625,8 @@ def train_with_backend(
_, eval_preds, _ = model.evaluate(dataset=dataset)
assert backend.df_engine.compute(eval_preds) is not None
return model.model.get_weights()
ret = True
finally:
# Remove results/intermediate data saved to disk
shutil.rmtree(output_dir, ignore_errors=True)
return ret

View file

@ -57,6 +57,7 @@ if not skip:
from ray.tests.ludwig.ludwig_test_utils import set_feature
from ray.tests.ludwig.ludwig_test_utils import train_with_backend
from ray.tests.ludwig.ludwig_test_utils import vector_feature
else:
def spawn(func):
@ -86,12 +87,13 @@ def ray_start_2_cpus():
def run_api_experiment(config, data_parquet):
# Sanity check that we get 4 slots over 1 host
kwargs = get_horovod_kwargs()
assert kwargs.get("num_hosts") == 1
assert kwargs.get("num_slots") == 2
assert kwargs.get("num_workers") == 2
# Train on Parquet
dask_backend = RayBackend()
train_with_backend(dask_backend, config, dataset=data_parquet, evaluate=False)
assert train_with_backend(
dask_backend, config, dataset=data_parquet, evaluate=False
)
@spawn
@ -155,3 +157,10 @@ def test_ray_tabular_client():
assert ray.util.client.ray.is_connected()
test_ray_tabular()
if __name__ == "__main__":
import pytest
import sys
sys.exit(pytest.main(["-v", "-x", __file__]))

View file

@ -17,7 +17,7 @@ py_test(
size = "medium",
main = "examples/mlflow_simple_example.py",
srcs = ["examples/mlflow_simple_example.py"],
tags = ["team:ml", "exclusive"],
tags = ["team:ml", "exclusive", "no_main"],
deps = [":train_lib"],
)
@ -99,7 +99,7 @@ py_test(
name = "pytorch_pbt_failure",
size = "medium",
srcs = ["tests/pytorch_pbt_failure.py"],
tags = ["team:ml", "exlusive"],
tags = ["team:ml", "exlusive", "no_main"],
deps = [":train_lib"],
args = ["--smoke-test"]
)

View file

@ -428,7 +428,7 @@ py_test(
size = "small",
srcs = ["tests/example.py"],
deps = [":tune_lib"],
tags = ["team:ml", "exclusive", "example"],
tags = ["team:ml", "exclusive", "example", "no_main"],
)
py_test(
@ -445,7 +445,7 @@ py_test(
size = "medium",
srcs = ["tests/tutorial.py"],
deps = [":tune_lib"],
tags = ["team:ml", "exclusive", "example"],
tags = ["team:ml", "exclusive", "example", "no_main"],
)
# --------------------------------------------------------------------

View file

@ -7,3 +7,4 @@ requests
tabulate
tensorflow
black==21.12b0
yq

View file

@ -57,3 +57,10 @@ class TestTrialExecutorInheritance(unittest.TestCase):
class _AnotherMyRayTrialExecutor(_MyRayTrialExecutor):
pass
if __name__ == "__main__":
import pytest
import sys
sys.exit(pytest.main(["-v", __file__]))

View file

@ -560,7 +560,7 @@ class TestTopJobResourceAllocationAddBundles(TestTopJobResourceAllocation):
self._allocateAndAssertNewResources(
trial1,
scheduler,
PlacementGroupFactory([{"CPU": 2, "GPU": 2}] * 4),
PlacementGroupFactory([{}] + [{"CPU": 2, "GPU": 2}] * 4),
metric=1.2,
)
@ -631,3 +631,10 @@ class TestTopJobResourceAllocationAddBundles(TestTopJobResourceAllocation):
self._allocateAndAssertNewResources(
trial1, scheduler, PlacementGroupFactory([{"CPU": 1}, {"GPU": 2}])
)
if __name__ == "__main__":
import pytest
import sys
sys.exit(pytest.main(["-v", __file__]))

View file

@ -36,3 +36,10 @@ class TuneUtilsTest(unittest.TestCase):
),
"a_c_x=ok,some=",
)
if __name__ == "__main__":
import pytest
import sys
sys.exit(pytest.main(["-v", __file__]))

View file

@ -235,6 +235,12 @@ class PlacementGroupFactory:
self._bound = None
self._bind()
def __repr__(self) -> str:
return (
f"<PlacementGroupFactory (_bound={self._bound}, "
f"head_bundle_is_empty={self.head_bundle_is_empty})>"
)
def resource_dict_to_pg_factory(spec: Optional[Dict[str, float]]):
spec = spec or {"cpu": 1}

View file

@ -2578,7 +2578,7 @@ py_test(
py_test(
name = "examples/export/onnx_tf",
main = "examples/export/onnx_tf.py",
tags = ["team:ml", "examples", "examples_E"],
tags = ["team:ml", "examples", "examples_E", "no_main"],
size = "medium",
srcs = ["examples/export/onnx_tf.py"],
)
@ -2586,7 +2586,7 @@ py_test(
py_test(
name = "examples/export/onnx_torch",
main = "examples/export/onnx_torch.py",
tags = ["team:ml", "examples", "examples_E"],
tags = ["team:ml", "examples", "examples_E", "no_main"],
size = "medium",
srcs = ["examples/export/onnx_torch.py"],
)
@ -3109,7 +3109,7 @@ py_test(
py_test(
name = "examples/documentation/custom_gym_env",
main = "examples/documentation/custom_gym_env.py",
tags = ["team:ml", "documentation"],
tags = ["team:ml", "documentation", "no_main"],
size = "medium",
srcs = ["examples/documentation/custom_gym_env.py"],
)
@ -3117,7 +3117,7 @@ py_test(
py_test(
name = "examples/documentation/rllib_in_60s",
main = "examples/documentation/rllib_in_60s.py",
tags = ["team:ml", "documentation"],
tags = ["team:ml", "documentation", "no_main"],
size = "medium",
srcs = ["examples/documentation/rllib_in_60s.py"],
)
@ -3125,7 +3125,7 @@ py_test(
py_test(
name = "examples/documentation/rllib_on_ray_readme",
main = "examples/documentation/rllib_on_ray_readme.py",
tags = ["team:ml", "documentation"],
tags = ["team:ml", "documentation", "no_main"],
size = "medium",
srcs = ["examples/documentation/rllib_on_ray_readme.py"],
)
@ -3133,7 +3133,7 @@ py_test(
py_test(
name = "examples/documentation/rllib_on_rllib_readme",
main = "examples/documentation/rllib_on_rllib_readme.py",
tags = ["team:ml", "documentation"],
tags = ["team:ml", "documentation", "no_main"],
size = "medium",
srcs = ["examples/documentation/rllib_on_rllib_readme.py"],
)
@ -3161,5 +3161,5 @@ py_test_module_list(
size = "large",
extra_srcs = [],
deps = [],
tags = ["manual", "team:ml"],
tags = ["manual", "team:ml", "no_main"],
)

View file

@ -113,3 +113,10 @@ class NestedActionSpacesTest(unittest.TestCase):
bc_trainer.stop()
config["output"] = tmp_dir
config["input"] = "sampler"
if __name__ == "__main__":
import pytest
import sys
sys.exit(pytest.main(["-v", __file__]))

59
scripts/pytest_checker.py Normal file
View file

@ -0,0 +1,59 @@
import json
import re
import sys
from pathlib import Path
def check_file(file_contents: str) -> bool:
return bool(re.search(r"^if __name__ == \"__main__\":", file_contents, re.M))
def parse_json(data: str) -> dict:
return json.loads(data)
def treat_path(path: str) -> Path:
path = path[2:].replace(":", "/")
return Path(path)
def get_paths_from_parsed_data(parsed_data: dict) -> list:
paths = []
for rule in parsed_data["query"]["rule"]:
if "label" in rule and rule["label"]["@name"] == "main":
paths.append(treat_path(rule["label"]["@value"]))
else:
list_args = {e["@name"]: e for e in rule["list"]}
paths.append(treat_path(list_args["srcs"]["label"]["@value"]))
return paths
def main(data: str):
print("Checking files for the pytest snippet...")
parsed_data = parse_json(data)
paths = get_paths_from_parsed_data(parsed_data)
bad_paths = []
for path in paths:
print(f"Checking file {path}...")
with open(path, "r") as f:
if not check_file(f.read()):
print(f"File {path} is missing the pytest snippet.")
bad_paths.append(path)
if bad_paths:
raise RuntimeError(
'Found py_test files without `if __name__ == "__main__":` snippet:'
f" {[str(x) for x in bad_paths]}\n"
"If this is intentional, please add a `no_main` tag to bazel BUILD "
"entry for that file."
)
if __name__ == "__main__":
# Expects a json
# Invocation from workspace root:
# bazel query 'kind(py_test.*, tests(python/...) intersect
# attr(tags, "\bteam:ml\b", python/...) except attr(tags, "\bno_main\b",
# python/...))' --output xml | xq | python scripts/pytest_checker.py
data = sys.stdin.read()
main(data)