[tune] Fixup exception messages (#5238)

This commit is contained in:
Richard Liaw 2019-07-20 22:36:27 -07:00 committed by GitHub
parent d58b986858
commit b0c0de49a2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 18 additions and 9 deletions

View file

@ -50,8 +50,11 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--smoke-test", action="store_true", help="Finish quickly for testing")
parser.add_argument(
"--ray-redis-address",
help="Address of Ray cluster for seamless distributed execution.")
args, _ = parser.parse_known_args()
ray.init()
ray.init(redis_address=args.ray_redis_address)
# asynchronous hyperband early stopping, configured with
# `episode_reward_mean` as the

View file

@ -55,7 +55,7 @@ class NodeSyncMixin():
def _check_valid_worker_ip(self):
if not self.worker_ip:
logger.info("Worker ip unknown, skipping log sync for {}".format(
logger.debug("Worker ip unknown, skipping log sync for {}".format(
self._local_dir))
return False
if self.worker_ip == self.local_ip:

View file

@ -180,11 +180,8 @@ class RayTrialExecutor(TrialExecutor):
logger.debug("Reusing actor for {}".format(trial.runner))
self._cached_actor = trial.runner
else:
logger.info(
"Destroying actor for trial {}. If your trainable is "
"slow to initialize, consider setting "
"reuse_actors=True to reduce actor creation "
"overheads.".format(trial))
logger.debug(
"Destroying actor for trial {}.".format(trial))
trial.runner.stop.remote()
trial.runner.__ray_terminate__.remote()
except Exception:

View file

@ -26,6 +26,8 @@ from ray.tune.util import UtilMonitor
logger = logging.getLogger(__name__)
SETUP_TIME_THRESHOLD = 10
class Trainable(object):
"""Abstract class for trainable models, functions, etc.
@ -93,7 +95,14 @@ class Trainable(object):
self._timesteps_since_restore = 0
self._iterations_since_restore = 0
self._restored = False
start_time = time.time()
self._setup(copy.deepcopy(self.config))
setup_time = time.time() - start_time
if setup_time > SETUP_TIME_THRESHOLD:
logger.info("_setup took {:.3f} seconds. If your trainable is "
"slow to initialize, consider setting "
"reuse_actors=True to reduce actor creation "
"overheads.".format(setup_time))
self._local_ip = ray.services.get_node_ip_address()
self._monitor = UtilMonitor(start=log_sys_usage)

View file

@ -43,10 +43,10 @@ class UtilMonitor(Thread):
def __init__(self, start=True, delay=0.7):
self.stopped = True
if GPUtil is None:
if GPUtil is None and start:
logger.warning("Install gputil for GPU system monitoring.")
if psutil is None:
if psutil is None and start:
logger.warning("Install psutil to monitor system performance.")
if GPUtil is None and psutil is None: