mirror of
https://github.com/vale981/ray
synced 2025-03-06 10:31:39 -05:00
Send raylet error logs through the log monitor (#5351)
This commit is contained in:
parent
a08ea09760
commit
0a3ff489fa
3 changed files with 38 additions and 5 deletions
|
@ -90,10 +90,12 @@ class LogMonitor(object):
|
|||
|
||||
def update_log_filenames(self):
|
||||
"""Update the list of log files to monitor."""
|
||||
# we only monior worker log files
|
||||
# output of user code is written here
|
||||
log_file_paths = glob.glob("{}/worker*[.out|.err]".format(
|
||||
self.logs_dir))
|
||||
for file_path in log_file_paths:
|
||||
# segfaults and other serious errors are logged here
|
||||
raylet_err_paths = glob.glob("{}/raylet*.err".format(self.logs_dir))
|
||||
for file_path in log_file_paths + raylet_err_paths:
|
||||
if os.path.isfile(
|
||||
file_path) and file_path not in self.log_filenames:
|
||||
self.log_filenames.add(file_path)
|
||||
|
@ -195,6 +197,8 @@ class LogMonitor(object):
|
|||
file_info.worker_pid = int(
|
||||
lines_to_publish[0].split(" ")[-1])
|
||||
lines_to_publish = lines_to_publish[1:]
|
||||
elif "/raylet" in file_info.filename:
|
||||
file_info.worker_pid = "raylet"
|
||||
|
||||
# Record the current position in the file.
|
||||
file_info.file_position = file_info.file_handle.tell()
|
||||
|
|
|
@ -1594,15 +1594,22 @@ def print_logs(redis_client, threads_stopped):
|
|||
num_consecutive_messages_received += 1
|
||||
|
||||
data = json.loads(ray.utils.decode(msg["data"]))
|
||||
|
||||
def color_for(data):
|
||||
if data["pid"] == "raylet":
|
||||
return colorama.Fore.YELLOW
|
||||
else:
|
||||
return colorama.Fore.CYAN
|
||||
|
||||
if data["ip"] == localhost:
|
||||
for line in data["lines"]:
|
||||
print("{}{}(pid={}){} {}".format(
|
||||
colorama.Style.DIM, colorama.Fore.CYAN, data["pid"],
|
||||
colorama.Style.DIM, color_for(data), data["pid"],
|
||||
colorama.Style.RESET_ALL, line))
|
||||
else:
|
||||
for line in data["lines"]:
|
||||
print("{}{}(pid={}, ip={}){} {}".format(
|
||||
colorama.Style.DIM, colorama.Fore.CYAN, data["pid"],
|
||||
colorama.Style.DIM, color_for(data), data["pid"],
|
||||
data["ip"], colorama.Style.RESET_ALL, line))
|
||||
|
||||
if (num_consecutive_messages_received % 100 == 0
|
||||
|
|
|
@ -11,11 +11,28 @@
|
|||
#include <iostream>
|
||||
|
||||
#ifdef RAY_USE_GLOG
|
||||
#include <sys/stat.h>
|
||||
#include "glog/logging.h"
|
||||
#endif
|
||||
|
||||
namespace ray {
|
||||
|
||||
#ifdef RAY_USE_GLOG
|
||||
struct StdoutLogger : public google::base::Logger {
|
||||
virtual void Write(bool /* should flush */, time_t /* timestamp */, const char *message,
|
||||
int length) {
|
||||
// note: always flush otherwise it never shows up in raylet.out
|
||||
std::cout << std::string(message, length) << std::flush;
|
||||
}
|
||||
|
||||
virtual void Flush() { std::cout.flush(); }
|
||||
|
||||
virtual google::uint32 LogSize() { return 0; }
|
||||
};
|
||||
|
||||
static StdoutLogger stdout_logger_singleton;
|
||||
#endif
|
||||
|
||||
// This is the default implementation of ray log,
|
||||
// which is independent of any libs.
|
||||
class CerrLog {
|
||||
|
@ -122,7 +139,12 @@ void RayLog::StartRayLog(const std::string &app_name, RayLogLevel severity_thres
|
|||
#ifdef RAY_USE_GLOG
|
||||
google::InitGoogleLogging(app_name_.c_str());
|
||||
int mapped_severity_threshold = GetMappedSeverity(severity_threshold_);
|
||||
google::SetStderrLogging(mapped_severity_threshold);
|
||||
google::SetStderrLogging(GetMappedSeverity(RayLogLevel::ERROR));
|
||||
for (int i = static_cast<int>(severity_threshold_);
|
||||
i <= static_cast<int>(RayLogLevel::FATAL); ++i) {
|
||||
int level = GetMappedSeverity(static_cast<RayLogLevel>(i));
|
||||
google::base::SetLogger(level, &stdout_logger_singleton);
|
||||
}
|
||||
// Enable log file if log_dir_ is not empty.
|
||||
if (!log_dir_.empty()) {
|
||||
auto dir_ends_with_slash = log_dir_;
|
||||
|
|
Loading…
Add table
Reference in a new issue