mirror of
https://github.com/vale981/ray
synced 2025-03-04 17:41:43 -05:00
[Core] Set c++ terminate handler to print stack trace (#26444)
This commit is contained in:
parent
45ba0e3cac
commit
53d878804a
8 changed files with 99 additions and 44 deletions
1
.bazelrc
1
.bazelrc
|
@ -178,6 +178,7 @@ test:segfault --run_under="bash -c 'unset GREP_OPTIONS && if ! grep -q -o Micros
|
|||
# Debug build:
|
||||
build:debug -c dbg
|
||||
build:debug --copt="-g"
|
||||
build:debug --copt -fno-omit-frame-pointer
|
||||
build:debug --strip="never"
|
||||
|
||||
# Undefined Behavior Sanitizer
|
||||
|
|
|
@ -1401,7 +1401,11 @@ cc_test(
|
|||
name = "logging_test",
|
||||
size = "small",
|
||||
srcs = ["src/ray/util/logging_test.cc"],
|
||||
args = ["--gtest_filter=PrintLogTest*"],
|
||||
args = [
|
||||
"--gtest_filter=PrintLogTest*",
|
||||
# Disable so we can test terminate handler.
|
||||
"--gtest_catch_exceptions=0",
|
||||
],
|
||||
copts = COPTS,
|
||||
tags = ["team:core"],
|
||||
deps = [
|
||||
|
|
|
@ -95,6 +95,7 @@ CoreWorkerProcessImpl::CoreWorkerProcessImpl(const CoreWorkerOptions &options)
|
|||
// Also, call the previous crash handler, e.g. the one installed by the Python
|
||||
// worker.
|
||||
RayLog::InstallFailureSignalHandler(nullptr, /*call_previous_handler=*/true);
|
||||
RayLog::InstallTerminateHandler();
|
||||
}
|
||||
} else {
|
||||
RAY_CHECK(options_.log_dir.empty())
|
||||
|
|
|
@ -40,6 +40,7 @@ int main(int argc, char *argv[]) {
|
|||
ray::RayLogLevel::INFO,
|
||||
/*log_dir=*/"");
|
||||
ray::RayLog::InstallFailureSignalHandler(argv[0]);
|
||||
ray::RayLog::InstallTerminateHandler();
|
||||
|
||||
gflags::ParseCommandLineFlags(&argc, &argv, true);
|
||||
const std::string redis_address = FLAGS_redis_address;
|
||||
|
|
|
@ -83,6 +83,7 @@ int main(int argc, char *argv[]) {
|
|||
ray::RayLogLevel::INFO,
|
||||
/*log_dir=*/"");
|
||||
ray::RayLog::InstallFailureSignalHandler(argv[0]);
|
||||
ray::RayLog::InstallTerminateHandler();
|
||||
|
||||
gflags::ParseCommandLineFlags(&argc, &argv, true);
|
||||
const std::string raylet_socket_name = FLAGS_raylet_socket_name;
|
||||
|
|
|
@ -55,19 +55,55 @@ long RayLog::log_rotation_file_num_ = 10;
|
|||
bool RayLog::is_failure_signal_handler_installed_ = false;
|
||||
std::atomic<bool> RayLog::initialized_ = false;
|
||||
|
||||
std::string GetCallTrace() {
|
||||
std::vector<void *> local_stack;
|
||||
local_stack.resize(50);
|
||||
absl::GetStackTrace(local_stack.data(), 50, 0);
|
||||
static constexpr size_t buf_size = 16 * 1024;
|
||||
char buf[buf_size];
|
||||
std::string output;
|
||||
for (auto &stack : local_stack) {
|
||||
if (absl::Symbolize(stack, buf, buf_size)) {
|
||||
output.append(" ").append(buf).append("\n");
|
||||
std::ostream &operator<<(std::ostream &os, const StackTrace &stack_trace) {
|
||||
static constexpr int MAX_NUM_FRAMES = 64;
|
||||
char buf[16 * 1024];
|
||||
void *frames[MAX_NUM_FRAMES];
|
||||
|
||||
#ifndef _WIN32
|
||||
const int num_frames = backtrace(frames, MAX_NUM_FRAMES);
|
||||
char **frame_symbols = backtrace_symbols(frames, num_frames);
|
||||
for (int i = 0; i < num_frames; ++i) {
|
||||
os << frame_symbols[i];
|
||||
|
||||
if (absl::Symbolize(frames[i], buf, sizeof(buf))) {
|
||||
os << " " << buf;
|
||||
}
|
||||
|
||||
os << "\n";
|
||||
}
|
||||
free(frame_symbols);
|
||||
#else
|
||||
const int num_frames = absl::GetStackTrace(frames, MAX_NUM_FRAMES, 0);
|
||||
for (int i = 0; i < num_frames; ++i) {
|
||||
if (absl::Symbolize(frames[i], buf, sizeof(buf))) {
|
||||
os << buf;
|
||||
} else {
|
||||
os << "unknown";
|
||||
}
|
||||
os << "\n";
|
||||
}
|
||||
#endif
|
||||
|
||||
return os;
|
||||
}
|
||||
|
||||
void TerminateHandler() {
|
||||
// Print the exception info, if any.
|
||||
if (auto e_ptr = std::current_exception()) {
|
||||
try {
|
||||
std::rethrow_exception(e_ptr);
|
||||
} catch (std::exception &e) {
|
||||
RAY_LOG(ERROR) << "Unhandled exception: " << typeid(e).name()
|
||||
<< ". what(): " << e.what();
|
||||
} catch (...) {
|
||||
RAY_LOG(ERROR) << "Unhandled unknown exception.";
|
||||
}
|
||||
}
|
||||
return output;
|
||||
|
||||
RAY_LOG(ERROR) << "Stack trace: \n " << ray::StackTrace();
|
||||
|
||||
std::abort();
|
||||
}
|
||||
|
||||
inline const char *ConstBasename(const char *filepath) {
|
||||
|
@ -119,10 +155,10 @@ class SpdLogMessage final {
|
|||
}
|
||||
|
||||
if (loglevel_ == static_cast<int>(spdlog::level::critical)) {
|
||||
stream() << "\n*** StackTrace Information ***\n" << ray::GetCallTrace();
|
||||
stream() << "\n*** StackTrace Information ***\n" << ray::StackTrace();
|
||||
}
|
||||
if (expose_osstream_) {
|
||||
*expose_osstream_ << "\n*** StackTrace Information ***\n" << ray::GetCallTrace();
|
||||
*expose_osstream_ << "\n*** StackTrace Information ***\n" << ray::StackTrace();
|
||||
}
|
||||
// NOTE(lingxuan.zlx): See more fmt by visiting https://github.com/fmtlib/fmt.
|
||||
logger->log(
|
||||
|
@ -355,6 +391,8 @@ void RayLog::InstallFailureSignalHandler(const char *argv0, bool call_previous_h
|
|||
is_failure_signal_handler_installed_ = true;
|
||||
}
|
||||
|
||||
void RayLog::InstallTerminateHandler() { std::set_terminate(TerminateHandler); }
|
||||
|
||||
bool RayLog::IsLevelEnabled(RayLogLevel log_level) {
|
||||
return log_level >= severity_threshold_;
|
||||
}
|
||||
|
|
|
@ -85,8 +85,10 @@ enum { ERROR = 0 };
|
|||
#endif
|
||||
|
||||
namespace ray {
|
||||
/// This function returns the current call stack information.
|
||||
std::string GetCallTrace();
|
||||
class StackTrace {
|
||||
/// This dumps the current stack trace information.
|
||||
friend std::ostream &operator<<(std::ostream &os, const StackTrace &stack_trace);
|
||||
};
|
||||
|
||||
enum class RayLogLevel {
|
||||
TRACE = -2,
|
||||
|
@ -272,6 +274,10 @@ class RayLog : public RayLogBase {
|
|||
static void InstallFailureSignalHandler(const char *argv0,
|
||||
bool call_previous_handler = false);
|
||||
|
||||
/// Install the terminate handler to output call stack when std::terminate() is called
|
||||
/// (e.g. unhandled exception).
|
||||
static void InstallTerminateHandler();
|
||||
|
||||
/// To check failure signal handler enabled or not.
|
||||
static bool IsFailureSignalHandlerEnabled();
|
||||
|
||||
|
|
|
@ -256,9 +256,11 @@ TEST(PrintLogTest, TestCheckOp) {
|
|||
}
|
||||
|
||||
std::string TestFunctionLevel0() {
|
||||
std::string call_trace = GetCallTrace();
|
||||
RAY_LOG(INFO) << "TestFunctionLevel0\n" << call_trace;
|
||||
return call_trace;
|
||||
std::ostringstream oss;
|
||||
oss << ray::StackTrace();
|
||||
std::string stack_trace = oss.str();
|
||||
RAY_LOG(INFO) << "TestFunctionLevel0\n" << stack_trace;
|
||||
return stack_trace;
|
||||
}
|
||||
|
||||
std::string TestFunctionLevel1() {
|
||||
|
@ -271,37 +273,38 @@ std::string TestFunctionLevel2() {
|
|||
return TestFunctionLevel1();
|
||||
}
|
||||
|
||||
#ifndef _WIN32
|
||||
TEST(PrintLogTest, CallstackTraceTest) {
|
||||
TEST(PrintLogTest, TestStackTrace) {
|
||||
auto ret0 = TestFunctionLevel0();
|
||||
EXPECT_TRUE(ret0.find("TestFunctionLevel0") != std::string::npos);
|
||||
EXPECT_TRUE(ret0.find("TestFunctionLevel0") != std::string::npos) << ret0;
|
||||
auto ret1 = TestFunctionLevel1();
|
||||
EXPECT_TRUE(ret1.find("TestFunctionLevel1") != std::string::npos);
|
||||
EXPECT_TRUE(ret1.find("TestFunctionLevel1") != std::string::npos) << ret1;
|
||||
auto ret2 = TestFunctionLevel2();
|
||||
EXPECT_TRUE(ret2.find("TestFunctionLevel2") != std::string::npos);
|
||||
}
|
||||
#endif
|
||||
|
||||
/// Catch abort signal handler for testing RAY_CHECK.
|
||||
/// We'd better to run the following test case manually since process
|
||||
/// will terminated if abort signal raising.
|
||||
/*
|
||||
bool get_abort_signal = false;
|
||||
void signal_handler(int signum) {
|
||||
RAY_LOG(WARNING) << "Interrupt signal (" << signum << ") received.";
|
||||
get_abort_signal = signum == SIGABRT;
|
||||
exit(0);
|
||||
EXPECT_TRUE(ret2.find("TestFunctionLevel2") != std::string::npos) << ret2;
|
||||
}
|
||||
|
||||
TEST(PrintLogTest, RayCheckAbortTest) {
|
||||
get_abort_signal = false;
|
||||
// signal(SIGABRT, signal_handler);
|
||||
ray::RayLog::InstallFailureSignalHandler();
|
||||
RAY_CHECK(0) << "Check for aborting";
|
||||
sleep(1);
|
||||
EXPECT_TRUE(get_abort_signal);
|
||||
int TerminateHandlerLevel0() {
|
||||
RAY_LOG(INFO) << "TerminateHandlerLevel0";
|
||||
auto terminate_handler = std::get_terminate();
|
||||
(*terminate_handler)();
|
||||
return 0;
|
||||
}
|
||||
|
||||
int TerminateHandlerLevel1() {
|
||||
RAY_LOG(INFO) << "TerminateHandlerLevel1";
|
||||
TerminateHandlerLevel0();
|
||||
return 1;
|
||||
}
|
||||
|
||||
TEST(PrintLogTest, TestTerminateHandler) {
|
||||
ray::RayLog::InstallTerminateHandler();
|
||||
ASSERT_DEATH(TerminateHandlerLevel1(),
|
||||
".*TerminateHandlerLevel0.*TerminateHandlerLevel1.*");
|
||||
}
|
||||
|
||||
TEST(PrintLogTest, TestFailureSignalHandler) {
|
||||
ray::RayLog::InstallFailureSignalHandler(nullptr);
|
||||
ASSERT_DEATH(abort(), ".*SIGABRT received.*");
|
||||
}
|
||||
*/
|
||||
|
||||
} // namespace ray
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue