mirror of
https://github.com/vale981/ray
synced 2025-03-06 10:31:39 -05:00
Improve log message when failing to fork worker process (#2990)
## What do these changes do? ```c++ // Try to execute the worker command. int rv = execvp(worker_command_args[0], const_cast<char *const *>(worker_command_args.data())); // The worker failed to start. This is a fatal error. RAY_LOG(FATAL) << "Failed to start worker with return value " << rv; ``` When starting a process fails, the return value `rv` always be set to -1. It is useless for us. The log message should show some meaningful infos. For example, If we did't install java. The message showed for us should be: ```shell Failed to start worker: No such file or directory. ``` This could help us to locate issue quickly. ## Related issue number N/A
This commit is contained in:
parent
c5b8840193
commit
a879302355
1 changed files with 9 additions and 2 deletions
|
@ -118,12 +118,18 @@ void WorkerPool::StartWorkerProcess(const Language &language) {
|
|||
|
||||
// Launch the process to create the worker.
|
||||
pid_t pid = fork();
|
||||
if (pid != 0) {
|
||||
if (pid < 0) {
|
||||
// Failure case.
|
||||
RAY_LOG(FATAL) << "Failed to fork worker process: " << strerror(errno);
|
||||
return;
|
||||
} else if (pid > 0) {
|
||||
// Parent process case.
|
||||
RAY_LOG(DEBUG) << "Started worker process with pid " << pid;
|
||||
starting_worker_processes_.emplace(std::make_pair(pid, num_workers_per_process_));
|
||||
return;
|
||||
}
|
||||
|
||||
// Child process case.
|
||||
// Reset the SIGCHLD handler for the worker.
|
||||
signal(SIGCHLD, SIG_DFL);
|
||||
|
||||
|
@ -138,7 +144,8 @@ void WorkerPool::StartWorkerProcess(const Language &language) {
|
|||
int rv = execvp(worker_command_args[0],
|
||||
const_cast<char *const *>(worker_command_args.data()));
|
||||
// The worker failed to start. This is a fatal error.
|
||||
RAY_LOG(FATAL) << "Failed to start worker with return value " << rv;
|
||||
RAY_LOG(FATAL) << "Failed to start worker with return value " << rv << ": "
|
||||
<< strerror(errno);
|
||||
}
|
||||
|
||||
void WorkerPool::RegisterWorker(std::shared_ptr<Worker> worker) {
|
||||
|
|
Loading…
Add table
Reference in a new issue