[Core] Set c++ terminate handler to print stack trace (#26444)

This commit is contained in:
Jiajun Yao 2022-07-12 13:54:20 -07:00 committed by GitHub
parent 45ba0e3cac
commit 53d878804a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 99 additions and 44 deletions

View file

@ -178,6 +178,7 @@ test:segfault --run_under="bash -c 'unset GREP_OPTIONS && if ! grep -q -o Micros
# Debug build:
build:debug -c dbg
build:debug --copt="-g"
build:debug --copt -fno-omit-frame-pointer
build:debug --strip="never"
# Undefined Behavior Sanitizer

View file

@ -1401,7 +1401,11 @@ cc_test(
name = "logging_test",
size = "small",
srcs = ["src/ray/util/logging_test.cc"],
args = ["--gtest_filter=PrintLogTest*"],
args = [
"--gtest_filter=PrintLogTest*",
# Disable so we can test terminate handler.
"--gtest_catch_exceptions=0",
],
copts = COPTS,
tags = ["team:core"],
deps = [

View file

@ -95,6 +95,7 @@ CoreWorkerProcessImpl::CoreWorkerProcessImpl(const CoreWorkerOptions &options)
// Also, call the previous crash handler, e.g. the one installed by the Python
// worker.
RayLog::InstallFailureSignalHandler(nullptr, /*call_previous_handler=*/true);
RayLog::InstallTerminateHandler();
}
} else {
RAY_CHECK(options_.log_dir.empty())

View file

@ -40,6 +40,7 @@ int main(int argc, char *argv[]) {
ray::RayLogLevel::INFO,
/*log_dir=*/"");
ray::RayLog::InstallFailureSignalHandler(argv[0]);
ray::RayLog::InstallTerminateHandler();
gflags::ParseCommandLineFlags(&argc, &argv, true);
const std::string redis_address = FLAGS_redis_address;

View file

@ -83,6 +83,7 @@ int main(int argc, char *argv[]) {
ray::RayLogLevel::INFO,
/*log_dir=*/"");
ray::RayLog::InstallFailureSignalHandler(argv[0]);
ray::RayLog::InstallTerminateHandler();
gflags::ParseCommandLineFlags(&argc, &argv, true);
const std::string raylet_socket_name = FLAGS_raylet_socket_name;

View file

@ -55,19 +55,55 @@ long RayLog::log_rotation_file_num_ = 10;
bool RayLog::is_failure_signal_handler_installed_ = false;
std::atomic<bool> RayLog::initialized_ = false;
std::string GetCallTrace() {
std::vector<void *> local_stack;
local_stack.resize(50);
absl::GetStackTrace(local_stack.data(), 50, 0);
static constexpr size_t buf_size = 16 * 1024;
char buf[buf_size];
std::string output;
for (auto &stack : local_stack) {
if (absl::Symbolize(stack, buf, buf_size)) {
output.append(" ").append(buf).append("\n");
std::ostream &operator<<(std::ostream &os, const StackTrace &stack_trace) {
static constexpr int MAX_NUM_FRAMES = 64;
char buf[16 * 1024];
void *frames[MAX_NUM_FRAMES];
#ifndef _WIN32
const int num_frames = backtrace(frames, MAX_NUM_FRAMES);
char **frame_symbols = backtrace_symbols(frames, num_frames);
for (int i = 0; i < num_frames; ++i) {
os << frame_symbols[i];
if (absl::Symbolize(frames[i], buf, sizeof(buf))) {
os << " " << buf;
}
os << "\n";
}
free(frame_symbols);
#else
const int num_frames = absl::GetStackTrace(frames, MAX_NUM_FRAMES, 0);
for (int i = 0; i < num_frames; ++i) {
if (absl::Symbolize(frames[i], buf, sizeof(buf))) {
os << buf;
} else {
os << "unknown";
}
os << "\n";
}
#endif
return os;
}
void TerminateHandler() {
// Print the exception info, if any.
if (auto e_ptr = std::current_exception()) {
try {
std::rethrow_exception(e_ptr);
} catch (std::exception &e) {
RAY_LOG(ERROR) << "Unhandled exception: " << typeid(e).name()
<< ". what(): " << e.what();
} catch (...) {
RAY_LOG(ERROR) << "Unhandled unknown exception.";
}
}
return output;
RAY_LOG(ERROR) << "Stack trace: \n " << ray::StackTrace();
std::abort();
}
inline const char *ConstBasename(const char *filepath) {
@ -119,10 +155,10 @@ class SpdLogMessage final {
}
if (loglevel_ == static_cast<int>(spdlog::level::critical)) {
stream() << "\n*** StackTrace Information ***\n" << ray::GetCallTrace();
stream() << "\n*** StackTrace Information ***\n" << ray::StackTrace();
}
if (expose_osstream_) {
*expose_osstream_ << "\n*** StackTrace Information ***\n" << ray::GetCallTrace();
*expose_osstream_ << "\n*** StackTrace Information ***\n" << ray::StackTrace();
}
// NOTE(lingxuan.zlx): See more fmt by visiting https://github.com/fmtlib/fmt.
logger->log(
@ -355,6 +391,8 @@ void RayLog::InstallFailureSignalHandler(const char *argv0, bool call_previous_h
is_failure_signal_handler_installed_ = true;
}
void RayLog::InstallTerminateHandler() { std::set_terminate(TerminateHandler); }
bool RayLog::IsLevelEnabled(RayLogLevel log_level) {
return log_level >= severity_threshold_;
}

View file

@ -85,8 +85,10 @@ enum { ERROR = 0 };
#endif
namespace ray {
/// This function returns the current call stack information.
std::string GetCallTrace();
class StackTrace {
/// This dumps the current stack trace information.
friend std::ostream &operator<<(std::ostream &os, const StackTrace &stack_trace);
};
enum class RayLogLevel {
TRACE = -2,
@ -272,6 +274,10 @@ class RayLog : public RayLogBase {
static void InstallFailureSignalHandler(const char *argv0,
bool call_previous_handler = false);
/// Install the terminate handler to output call stack when std::terminate() is called
/// (e.g. unhandled exception).
static void InstallTerminateHandler();
/// To check failure signal handler enabled or not.
static bool IsFailureSignalHandlerEnabled();

View file

@ -256,9 +256,11 @@ TEST(PrintLogTest, TestCheckOp) {
}
std::string TestFunctionLevel0() {
std::string call_trace = GetCallTrace();
RAY_LOG(INFO) << "TestFunctionLevel0\n" << call_trace;
return call_trace;
std::ostringstream oss;
oss << ray::StackTrace();
std::string stack_trace = oss.str();
RAY_LOG(INFO) << "TestFunctionLevel0\n" << stack_trace;
return stack_trace;
}
std::string TestFunctionLevel1() {
@ -271,37 +273,38 @@ std::string TestFunctionLevel2() {
return TestFunctionLevel1();
}
#ifndef _WIN32
TEST(PrintLogTest, CallstackTraceTest) {
TEST(PrintLogTest, TestStackTrace) {
auto ret0 = TestFunctionLevel0();
EXPECT_TRUE(ret0.find("TestFunctionLevel0") != std::string::npos);
EXPECT_TRUE(ret0.find("TestFunctionLevel0") != std::string::npos) << ret0;
auto ret1 = TestFunctionLevel1();
EXPECT_TRUE(ret1.find("TestFunctionLevel1") != std::string::npos);
EXPECT_TRUE(ret1.find("TestFunctionLevel1") != std::string::npos) << ret1;
auto ret2 = TestFunctionLevel2();
EXPECT_TRUE(ret2.find("TestFunctionLevel2") != std::string::npos);
}
#endif
/// Catch abort signal handler for testing RAY_CHECK.
/// We'd better to run the following test case manually since process
/// will terminated if abort signal raising.
/*
bool get_abort_signal = false;
void signal_handler(int signum) {
RAY_LOG(WARNING) << "Interrupt signal (" << signum << ") received.";
get_abort_signal = signum == SIGABRT;
exit(0);
EXPECT_TRUE(ret2.find("TestFunctionLevel2") != std::string::npos) << ret2;
}
TEST(PrintLogTest, RayCheckAbortTest) {
get_abort_signal = false;
// signal(SIGABRT, signal_handler);
ray::RayLog::InstallFailureSignalHandler();
RAY_CHECK(0) << "Check for aborting";
sleep(1);
EXPECT_TRUE(get_abort_signal);
int TerminateHandlerLevel0() {
RAY_LOG(INFO) << "TerminateHandlerLevel0";
auto terminate_handler = std::get_terminate();
(*terminate_handler)();
return 0;
}
int TerminateHandlerLevel1() {
RAY_LOG(INFO) << "TerminateHandlerLevel1";
TerminateHandlerLevel0();
return 1;
}
TEST(PrintLogTest, TestTerminateHandler) {
ray::RayLog::InstallTerminateHandler();
ASSERT_DEATH(TerminateHandlerLevel1(),
".*TerminateHandlerLevel0.*TerminateHandlerLevel1.*");
}
TEST(PrintLogTest, TestFailureSignalHandler) {
ray::RayLog::InstallFailureSignalHandler(nullptr);
ASSERT_DEATH(abort(), ".*SIGABRT received.*");
}
*/
} // namespace ray