From 9e7ddddff780f7e9f17a304b74c31790666409f4 Mon Sep 17 00:00:00 2001 From: Alex Wu Date: Wed, 20 Jul 2022 16:17:25 -0700 Subject: [PATCH] [log_monitor] Always reopen files (#26730) This PR prevents the log monitor for keeping files open for long periods of time. In settings in which the autoscaler and head node are not tightly coupled, leaving files open implies that the inode for a file never changes, but depending on how fs synchronization between the autoscaler and head node containers works, the inode could change. Thus, we should keep try reopening files. This is done via setting max open files to 1, so that it's easy to revert this behavior. Co-authored-by: Alex --- python/ray/_private/ray_constants.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/ray/_private/ray_constants.py b/python/ray/_private/ray_constants.py index 427f599ab..2ff0a2837 100644 --- a/python/ray/_private/ray_constants.py +++ b/python/ray/_private/ray_constants.py @@ -264,7 +264,9 @@ WORKER_PROCESS_TYPE_RESTORE_WORKER_DELETE = ( f"ray::DELETE_{WORKER_PROCESS_TYPE_RESTORE_WORKER_NAME}" ) -LOG_MONITOR_MAX_OPEN_FILES = 200 +# Effectively reopen every file, every time. To fall back to the old behavior +# one should set this constant to 200. +LOG_MONITOR_MAX_OPEN_FILES = int(os.environ.get("RAY_LOG_MONITOR_MAX_OPEN_FILES", "1")) # Autoscaler events are denoted by the ":event_summary:" magic token. LOG_PREFIX_EVENT_SUMMARY = ":event_summary:"