ray/dashboard/modules/job/utils.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

67 lines
2.5 KiB
Python
Raw Normal View History

import logging
import os
from typing import Iterator, List, Optional
logger = logging.getLogger(__name__)
MAX_CHUNK_LINE_LENGTH = 10
MAX_CHUNK_CHAR_LENGTH = 20000
def file_tail_iterator(path: str) -> Iterator[Optional[List[str]]]:
"""Yield lines from a file as it's written.
Returns lines in batches of up to 10 lines or 20000 characters,
whichever comes first. If it's a chunk of 20000 characters, then
the last line that is yielded could be an incomplete line.
New line characters are kept in the line string.
Returns None until the file exists or if no new line has been written.
"""
if not isinstance(path, str):
raise TypeError(f"path must be a string, got {type(path)}.")
while not os.path.exists(path):
logger.debug(f"Path {path} doesn't exist yet.")
yield None
with open(path, "r") as f:
lines = []
chunk_char_count = 0
curr_line = None
while True:
if curr_line is None:
# Only read the next line in the file
# if there's no remaining "curr_line" to process
curr_line = f.readline()
new_chunk_char_count = chunk_char_count + len(curr_line)
if new_chunk_char_count > MAX_CHUNK_CHAR_LENGTH:
# Too many characters, return 20000 in this chunk, and then
# continue loop with remaining characters in curr_line
truncated_line = curr_line[0 : MAX_CHUNK_CHAR_LENGTH - chunk_char_count]
lines.append(truncated_line)
# Set remainder of current line to process next
curr_line = curr_line[MAX_CHUNK_CHAR_LENGTH - chunk_char_count :]
yield lines or None
lines = []
chunk_char_count = 0
elif len(lines) >= 9:
# Too many lines, return 10 lines in this chunk, and then
# continue reading the file.
lines.append(curr_line)
yield lines or None
lines = []
chunk_char_count = 0
curr_line = None
elif curr_line:
# Add line to current chunk
lines.append(curr_line)
chunk_char_count = new_chunk_char_count
curr_line = None
else:
# readline() returns empty string when there's no new line.
yield lines or None
lines = []
chunk_char_count = 0
curr_line = None