Fix log files being opened as unicode files (#5545)

This commit is contained in:
Philipp Moritz 2019-08-27 12:47:00 -07:00 committed by Robert Nishihara
parent 52a6a1b9f7
commit ddfababb82
2 changed files with 32 additions and 1 deletions

View file

@ -144,7 +144,7 @@ class LogMonitor(object):
# file.
if file_size > file_info.size_when_last_opened:
try:
f = open(file_info.filename, "r")
f = open(file_info.filename, "rb")
except (IOError, OSError) as e:
if e.errno == errno.ENOENT:
logger.warning("Warning: The file {} was not "
@ -179,6 +179,10 @@ class LogMonitor(object):
for _ in range(max_num_lines_to_read):
try:
next_line = file_info.file_handle.readline()
# Replace any characters not in UTF-8 with
# a replacement character, see
# https://stackoverflow.com/a/38565489/10891801
next_line = next_line.decode("utf-8", "replace")
if next_line == "":
break
if next_line[-1] == "\n":

View file

@ -5,6 +5,7 @@ from __future__ import print_function
import collections
from concurrent.futures import ThreadPoolExecutor
import glob
import json
import logging
from multiprocessing import Process
@ -3113,3 +3114,29 @@ def test_export_after_shutdown(ray_start_regular):
ray.get(actor_handle.method.remote())
ray.get(export_definitions_from_worker.remote(f, Actor))
def test_invalid_unicode_in_worker_log(shutdown_only):
info = ray.init(num_cpus=1)
logs_dir = os.path.join(info["session_dir"], "logs")
# Wait till first worker log file is created.
while True:
log_file_paths = glob.glob("{}/worker*.out".format(logs_dir))
if len(log_file_paths) == 0:
time.sleep(0.2)
else:
break
with open(log_file_paths[0], "wb") as f:
f.write(b"\xe5abc\nline2\nline3\n")
f.write(b"\xe5abc\nline2\nline3\n")
f.write(b"\xe5abc\nline2\nline3\n")
f.flush()
# Wait till the log monitor reads the file.
time.sleep(1.0)
# Make sure that nothing has died.
assert ray.services.remaining_processes_alive()