mirror of
https://github.com/vale981/ray
synced 2025-03-05 18:11:42 -05:00
[Core] Set c++ terminate handler to print stack trace (#26444)
This commit is contained in:
parent
45ba0e3cac
commit
53d878804a
8 changed files with 99 additions and 44 deletions
1
.bazelrc
1
.bazelrc
|
@ -178,6 +178,7 @@ test:segfault --run_under="bash -c 'unset GREP_OPTIONS && if ! grep -q -o Micros
|
||||||
# Debug build:
|
# Debug build:
|
||||||
build:debug -c dbg
|
build:debug -c dbg
|
||||||
build:debug --copt="-g"
|
build:debug --copt="-g"
|
||||||
|
build:debug --copt -fno-omit-frame-pointer
|
||||||
build:debug --strip="never"
|
build:debug --strip="never"
|
||||||
|
|
||||||
# Undefined Behavior Sanitizer
|
# Undefined Behavior Sanitizer
|
||||||
|
|
|
@ -1401,7 +1401,11 @@ cc_test(
|
||||||
name = "logging_test",
|
name = "logging_test",
|
||||||
size = "small",
|
size = "small",
|
||||||
srcs = ["src/ray/util/logging_test.cc"],
|
srcs = ["src/ray/util/logging_test.cc"],
|
||||||
args = ["--gtest_filter=PrintLogTest*"],
|
args = [
|
||||||
|
"--gtest_filter=PrintLogTest*",
|
||||||
|
# Disable so we can test terminate handler.
|
||||||
|
"--gtest_catch_exceptions=0",
|
||||||
|
],
|
||||||
copts = COPTS,
|
copts = COPTS,
|
||||||
tags = ["team:core"],
|
tags = ["team:core"],
|
||||||
deps = [
|
deps = [
|
||||||
|
|
|
@ -95,6 +95,7 @@ CoreWorkerProcessImpl::CoreWorkerProcessImpl(const CoreWorkerOptions &options)
|
||||||
// Also, call the previous crash handler, e.g. the one installed by the Python
|
// Also, call the previous crash handler, e.g. the one installed by the Python
|
||||||
// worker.
|
// worker.
|
||||||
RayLog::InstallFailureSignalHandler(nullptr, /*call_previous_handler=*/true);
|
RayLog::InstallFailureSignalHandler(nullptr, /*call_previous_handler=*/true);
|
||||||
|
RayLog::InstallTerminateHandler();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
RAY_CHECK(options_.log_dir.empty())
|
RAY_CHECK(options_.log_dir.empty())
|
||||||
|
|
|
@ -40,6 +40,7 @@ int main(int argc, char *argv[]) {
|
||||||
ray::RayLogLevel::INFO,
|
ray::RayLogLevel::INFO,
|
||||||
/*log_dir=*/"");
|
/*log_dir=*/"");
|
||||||
ray::RayLog::InstallFailureSignalHandler(argv[0]);
|
ray::RayLog::InstallFailureSignalHandler(argv[0]);
|
||||||
|
ray::RayLog::InstallTerminateHandler();
|
||||||
|
|
||||||
gflags::ParseCommandLineFlags(&argc, &argv, true);
|
gflags::ParseCommandLineFlags(&argc, &argv, true);
|
||||||
const std::string redis_address = FLAGS_redis_address;
|
const std::string redis_address = FLAGS_redis_address;
|
||||||
|
|
|
@ -83,6 +83,7 @@ int main(int argc, char *argv[]) {
|
||||||
ray::RayLogLevel::INFO,
|
ray::RayLogLevel::INFO,
|
||||||
/*log_dir=*/"");
|
/*log_dir=*/"");
|
||||||
ray::RayLog::InstallFailureSignalHandler(argv[0]);
|
ray::RayLog::InstallFailureSignalHandler(argv[0]);
|
||||||
|
ray::RayLog::InstallTerminateHandler();
|
||||||
|
|
||||||
gflags::ParseCommandLineFlags(&argc, &argv, true);
|
gflags::ParseCommandLineFlags(&argc, &argv, true);
|
||||||
const std::string raylet_socket_name = FLAGS_raylet_socket_name;
|
const std::string raylet_socket_name = FLAGS_raylet_socket_name;
|
||||||
|
|
|
@ -55,19 +55,55 @@ long RayLog::log_rotation_file_num_ = 10;
|
||||||
bool RayLog::is_failure_signal_handler_installed_ = false;
|
bool RayLog::is_failure_signal_handler_installed_ = false;
|
||||||
std::atomic<bool> RayLog::initialized_ = false;
|
std::atomic<bool> RayLog::initialized_ = false;
|
||||||
|
|
||||||
std::string GetCallTrace() {
|
std::ostream &operator<<(std::ostream &os, const StackTrace &stack_trace) {
|
||||||
std::vector<void *> local_stack;
|
static constexpr int MAX_NUM_FRAMES = 64;
|
||||||
local_stack.resize(50);
|
char buf[16 * 1024];
|
||||||
absl::GetStackTrace(local_stack.data(), 50, 0);
|
void *frames[MAX_NUM_FRAMES];
|
||||||
static constexpr size_t buf_size = 16 * 1024;
|
|
||||||
char buf[buf_size];
|
#ifndef _WIN32
|
||||||
std::string output;
|
const int num_frames = backtrace(frames, MAX_NUM_FRAMES);
|
||||||
for (auto &stack : local_stack) {
|
char **frame_symbols = backtrace_symbols(frames, num_frames);
|
||||||
if (absl::Symbolize(stack, buf, buf_size)) {
|
for (int i = 0; i < num_frames; ++i) {
|
||||||
output.append(" ").append(buf).append("\n");
|
os << frame_symbols[i];
|
||||||
|
|
||||||
|
if (absl::Symbolize(frames[i], buf, sizeof(buf))) {
|
||||||
|
os << " " << buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
os << "\n";
|
||||||
|
}
|
||||||
|
free(frame_symbols);
|
||||||
|
#else
|
||||||
|
const int num_frames = absl::GetStackTrace(frames, MAX_NUM_FRAMES, 0);
|
||||||
|
for (int i = 0; i < num_frames; ++i) {
|
||||||
|
if (absl::Symbolize(frames[i], buf, sizeof(buf))) {
|
||||||
|
os << buf;
|
||||||
|
} else {
|
||||||
|
os << "unknown";
|
||||||
|
}
|
||||||
|
os << "\n";
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
|
||||||
|
void TerminateHandler() {
|
||||||
|
// Print the exception info, if any.
|
||||||
|
if (auto e_ptr = std::current_exception()) {
|
||||||
|
try {
|
||||||
|
std::rethrow_exception(e_ptr);
|
||||||
|
} catch (std::exception &e) {
|
||||||
|
RAY_LOG(ERROR) << "Unhandled exception: " << typeid(e).name()
|
||||||
|
<< ". what(): " << e.what();
|
||||||
|
} catch (...) {
|
||||||
|
RAY_LOG(ERROR) << "Unhandled unknown exception.";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return output;
|
|
||||||
|
RAY_LOG(ERROR) << "Stack trace: \n " << ray::StackTrace();
|
||||||
|
|
||||||
|
std::abort();
|
||||||
}
|
}
|
||||||
|
|
||||||
inline const char *ConstBasename(const char *filepath) {
|
inline const char *ConstBasename(const char *filepath) {
|
||||||
|
@ -119,10 +155,10 @@ class SpdLogMessage final {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (loglevel_ == static_cast<int>(spdlog::level::critical)) {
|
if (loglevel_ == static_cast<int>(spdlog::level::critical)) {
|
||||||
stream() << "\n*** StackTrace Information ***\n" << ray::GetCallTrace();
|
stream() << "\n*** StackTrace Information ***\n" << ray::StackTrace();
|
||||||
}
|
}
|
||||||
if (expose_osstream_) {
|
if (expose_osstream_) {
|
||||||
*expose_osstream_ << "\n*** StackTrace Information ***\n" << ray::GetCallTrace();
|
*expose_osstream_ << "\n*** StackTrace Information ***\n" << ray::StackTrace();
|
||||||
}
|
}
|
||||||
// NOTE(lingxuan.zlx): See more fmt by visiting https://github.com/fmtlib/fmt.
|
// NOTE(lingxuan.zlx): See more fmt by visiting https://github.com/fmtlib/fmt.
|
||||||
logger->log(
|
logger->log(
|
||||||
|
@ -355,6 +391,8 @@ void RayLog::InstallFailureSignalHandler(const char *argv0, bool call_previous_h
|
||||||
is_failure_signal_handler_installed_ = true;
|
is_failure_signal_handler_installed_ = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RayLog::InstallTerminateHandler() { std::set_terminate(TerminateHandler); }
|
||||||
|
|
||||||
bool RayLog::IsLevelEnabled(RayLogLevel log_level) {
|
bool RayLog::IsLevelEnabled(RayLogLevel log_level) {
|
||||||
return log_level >= severity_threshold_;
|
return log_level >= severity_threshold_;
|
||||||
}
|
}
|
||||||
|
|
|
@ -85,8 +85,10 @@ enum { ERROR = 0 };
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace ray {
|
namespace ray {
|
||||||
/// This function returns the current call stack information.
|
class StackTrace {
|
||||||
std::string GetCallTrace();
|
/// This dumps the current stack trace information.
|
||||||
|
friend std::ostream &operator<<(std::ostream &os, const StackTrace &stack_trace);
|
||||||
|
};
|
||||||
|
|
||||||
enum class RayLogLevel {
|
enum class RayLogLevel {
|
||||||
TRACE = -2,
|
TRACE = -2,
|
||||||
|
@ -272,6 +274,10 @@ class RayLog : public RayLogBase {
|
||||||
static void InstallFailureSignalHandler(const char *argv0,
|
static void InstallFailureSignalHandler(const char *argv0,
|
||||||
bool call_previous_handler = false);
|
bool call_previous_handler = false);
|
||||||
|
|
||||||
|
/// Install the terminate handler to output call stack when std::terminate() is called
|
||||||
|
/// (e.g. unhandled exception).
|
||||||
|
static void InstallTerminateHandler();
|
||||||
|
|
||||||
/// To check failure signal handler enabled or not.
|
/// To check failure signal handler enabled or not.
|
||||||
static bool IsFailureSignalHandlerEnabled();
|
static bool IsFailureSignalHandlerEnabled();
|
||||||
|
|
||||||
|
|
|
@ -256,9 +256,11 @@ TEST(PrintLogTest, TestCheckOp) {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string TestFunctionLevel0() {
|
std::string TestFunctionLevel0() {
|
||||||
std::string call_trace = GetCallTrace();
|
std::ostringstream oss;
|
||||||
RAY_LOG(INFO) << "TestFunctionLevel0\n" << call_trace;
|
oss << ray::StackTrace();
|
||||||
return call_trace;
|
std::string stack_trace = oss.str();
|
||||||
|
RAY_LOG(INFO) << "TestFunctionLevel0\n" << stack_trace;
|
||||||
|
return stack_trace;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string TestFunctionLevel1() {
|
std::string TestFunctionLevel1() {
|
||||||
|
@ -271,37 +273,38 @@ std::string TestFunctionLevel2() {
|
||||||
return TestFunctionLevel1();
|
return TestFunctionLevel1();
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef _WIN32
|
TEST(PrintLogTest, TestStackTrace) {
|
||||||
TEST(PrintLogTest, CallstackTraceTest) {
|
|
||||||
auto ret0 = TestFunctionLevel0();
|
auto ret0 = TestFunctionLevel0();
|
||||||
EXPECT_TRUE(ret0.find("TestFunctionLevel0") != std::string::npos);
|
EXPECT_TRUE(ret0.find("TestFunctionLevel0") != std::string::npos) << ret0;
|
||||||
auto ret1 = TestFunctionLevel1();
|
auto ret1 = TestFunctionLevel1();
|
||||||
EXPECT_TRUE(ret1.find("TestFunctionLevel1") != std::string::npos);
|
EXPECT_TRUE(ret1.find("TestFunctionLevel1") != std::string::npos) << ret1;
|
||||||
auto ret2 = TestFunctionLevel2();
|
auto ret2 = TestFunctionLevel2();
|
||||||
EXPECT_TRUE(ret2.find("TestFunctionLevel2") != std::string::npos);
|
EXPECT_TRUE(ret2.find("TestFunctionLevel2") != std::string::npos) << ret2;
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/// Catch abort signal handler for testing RAY_CHECK.
|
|
||||||
/// We'd better to run the following test case manually since process
|
|
||||||
/// will terminated if abort signal raising.
|
|
||||||
/*
|
|
||||||
bool get_abort_signal = false;
|
|
||||||
void signal_handler(int signum) {
|
|
||||||
RAY_LOG(WARNING) << "Interrupt signal (" << signum << ") received.";
|
|
||||||
get_abort_signal = signum == SIGABRT;
|
|
||||||
exit(0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(PrintLogTest, RayCheckAbortTest) {
|
int TerminateHandlerLevel0() {
|
||||||
get_abort_signal = false;
|
RAY_LOG(INFO) << "TerminateHandlerLevel0";
|
||||||
// signal(SIGABRT, signal_handler);
|
auto terminate_handler = std::get_terminate();
|
||||||
ray::RayLog::InstallFailureSignalHandler();
|
(*terminate_handler)();
|
||||||
RAY_CHECK(0) << "Check for aborting";
|
return 0;
|
||||||
sleep(1);
|
}
|
||||||
EXPECT_TRUE(get_abort_signal);
|
|
||||||
|
int TerminateHandlerLevel1() {
|
||||||
|
RAY_LOG(INFO) << "TerminateHandlerLevel1";
|
||||||
|
TerminateHandlerLevel0();
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(PrintLogTest, TestTerminateHandler) {
|
||||||
|
ray::RayLog::InstallTerminateHandler();
|
||||||
|
ASSERT_DEATH(TerminateHandlerLevel1(),
|
||||||
|
".*TerminateHandlerLevel0.*TerminateHandlerLevel1.*");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(PrintLogTest, TestFailureSignalHandler) {
|
||||||
|
ray::RayLog::InstallFailureSignalHandler(nullptr);
|
||||||
|
ASSERT_DEATH(abort(), ".*SIGABRT received.*");
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
|
||||||
} // namespace ray
|
} // namespace ray
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue