From 9f3e9e7e9fd19fffe5ff647965ace2a0b022f899 Mon Sep 17 00:00:00 2001 From: Richard Liaw Date: Mon, 20 Apr 2020 11:14:44 -0700 Subject: [PATCH] [tune] Add more intensive tests (#7667) * make_heavier_tests * help --- ci/jenkins_tests/run_tune_tests.sh | 11 +++-------- docker/examples/Dockerfile | 5 ++--- docker/tune_test/Dockerfile | 2 +- python/ray/tune/examples/bohb_example.py | 13 ++----------- python/ray/tune/examples/hyperband_example.py | 4 ++-- python/ray/tune/examples/pbt_example.py | 4 ++-- 6 files changed, 12 insertions(+), 27 deletions(-) diff --git a/ci/jenkins_tests/run_tune_tests.sh b/ci/jenkins_tests/run_tune_tests.sh index 167108264..22c8c07ed 100755 --- a/ci/jenkins_tests/run_tune_tests.sh +++ b/ci/jenkins_tests/run_tune_tests.sh @@ -51,9 +51,6 @@ $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --memory-swap=-1 $DOCKER_SHA \ bash -c 'pip install -U tensorflow==1.14 && python /ray/python/ray/tune/tests/test_logger.py' -$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --memory-swap=-1 $DOCKER_SHA \ - bash -c 'pip install -U tensorflow==1.12 && python /ray/python/ray/tune/tests/test_logger.py' - $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --memory-swap=-1 -e MPLBACKEND=Agg $DOCKER_SHA \ python /ray/python/ray/tune/tests/tutorial.py @@ -62,8 +59,7 @@ $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --smoke-test $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --memory-swap=-1 $DOCKER_SHA \ - python /ray/python/ray/tune/examples/hyperband_example.py \ - --smoke-test + python /ray/python/ray/tune/examples/hyperband_example.py $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --memory-swap=-1 $DOCKER_SHA \ python /ray/python/ray/tune/examples/async_hyperband_example.py \ @@ -145,9 +141,8 @@ $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --smoke-test # uncomment once statsmodels is updated. -# $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --memory-swap=-1 $DOCKER_SHA \ -# python /ray/python/ray/tune/examples/bohb_example.py \ -# --smoke-test +$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --memory-swap=-1 $DOCKER_SHA \ + python /ray/python/ray/tune/examples/bohb_example.py # Moved to bottom because flaky $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --memory-swap=-1 $DOCKER_SHA \ diff --git a/docker/examples/Dockerfile b/docker/examples/Dockerfile index e8c68e8bf..a3ec9e04b 100644 --- a/docker/examples/Dockerfile +++ b/docker/examples/Dockerfile @@ -9,10 +9,9 @@ RUN apt-get install -y zlib1g-dev libgl1-mesa-dev RUN pip install -U pip RUN pip install gym[atari] opencv-python-headless tensorflow lz4 pytest-timeout smart_open tensorflow_probability dm_tree RUN pip install -U h5py # Mutes FutureWarnings -RUN pip install --upgrade bayesian-optimization -RUN pip install --upgrade hyperopt==0.1.2 +RUN pip install --upgrade bayesian-optimization hyperopt RUN pip install ConfigSpace==0.4.10 RUN pip install --upgrade sigopt nevergrad scikit-optimize hpbandster lightgbm xgboost torch torchvision tensorboardX dragonfly-opt zoopt RUN pip install -U tabulate mlflow RUN pip install -U pytest-remotedata>=0.3.1 -RUN pip install -U matplotlib jupyter pandas +RUN pip install -U matplotlib jupyter pandas diff --git a/docker/tune_test/Dockerfile b/docker/tune_test/Dockerfile index 573d2b911..b6a3343cf 100644 --- a/docker/tune_test/Dockerfile +++ b/docker/tune_test/Dockerfile @@ -13,7 +13,7 @@ RUN pip install torch==1.4.0+cpu torchvision==0.5.0+cpu -f https://download.pyto RUN pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow_cpu-2.1.0-cp36-cp36m-manylinux2010_x86_64.whl COPY requirements.txt . -RUN pip install -r requirements.txt +RUN pip install -r requirements.txt # We port the source code in so that we run the most up-to-date stress tests. ADD ray.tar /ray diff --git a/python/ray/tune/examples/bohb_example.py b/python/ray/tune/examples/bohb_example.py index 3fc1cda8c..a7478e5f1 100644 --- a/python/ray/tune/examples/bohb_example.py +++ b/python/ray/tune/examples/bohb_example.py @@ -1,6 +1,5 @@ #!/usr/bin/env python -import argparse import json import os @@ -11,14 +10,6 @@ from ray.tune import Trainable, run from ray.tune.schedulers.hb_bohb import HyperBandForBOHB from ray.tune.suggest.bohb import TuneBOHB -parser = argparse.ArgumentParser() -parser.add_argument( - "--smoke-test", action="store_true", help="Finish quickly for testing") -parser.add_argument( - "--ray-address", - help="Address of Ray cluster for seamless distributed execution.") -args, _ = parser.parse_known_args() - class MyTrainableClass(Trainable): """Example agent whose learning curve is a random sigmoid. @@ -52,7 +43,7 @@ class MyTrainableClass(Trainable): if __name__ == "__main__": import ConfigSpace as CS - ray.init(address=args.ray_address) + ray.init(num_cpus=8) # BOHB uses ConfigSpace for their hyperparameter search space config_space = CS.ConfigurationSpace() @@ -75,4 +66,4 @@ if __name__ == "__main__": scheduler=bohb_hyperband, search_alg=bohb_search, num_samples=10, - stop={"training_iteration": 10 if args.smoke_test else 100}) + stop={"training_iteration": 100}) diff --git a/python/ray/tune/examples/hyperband_example.py b/python/ray/tune/examples/hyperband_example.py index 1da764a54..f8ee12162 100755 --- a/python/ray/tune/examples/hyperband_example.py +++ b/python/ray/tune/examples/hyperband_example.py @@ -47,7 +47,7 @@ if __name__ == "__main__": parser.add_argument( "--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() - ray.init() + ray.init(num_cpus=4 if args.smoke_test else None) # Hyperband early stopping, configured with `episode_reward_mean` as the # objective and `training_iteration` as the time unit, @@ -56,7 +56,7 @@ if __name__ == "__main__": time_attr="training_iteration", metric="episode_reward_mean", mode="max", - max_t=100) + max_t=200) run(MyTrainableClass, name="hyperband_test", diff --git a/python/ray/tune/examples/pbt_example.py b/python/ray/tune/examples/pbt_example.py index 13ed2b09f..c906fe238 100755 --- a/python/ray/tune/examples/pbt_example.py +++ b/python/ray/tune/examples/pbt_example.py @@ -110,9 +110,9 @@ if __name__ == "__main__": reuse_actors=True, verbose=False, stop={ - "training_iteration": 2000, + "training_iteration": 200, }, - num_samples=4, + num_samples=8, config={ "lr": 0.0001, # note: this parameter is perturbed but has no effect on