mirror of
https://github.com/vale981/ray
synced 2025-03-06 10:31:39 -05:00
Fix: ServerConnection should be closed before being removed (#3626)
Otherwise, in the event of a remote raylet crashing, the connection might be held by boost asio forever, and the pending callbacks will never get invoked. See also #3586.
This commit is contained in:
parent
5426234cd8
commit
f4011754d6
2 changed files with 15 additions and 2 deletions
|
@ -16,7 +16,13 @@ ray::Status TcpConnect(boost::asio::ip::tcp::socket &socket,
|
|||
boost::asio::ip::tcp::endpoint endpoint(ip_address, port);
|
||||
boost::system::error_code error;
|
||||
socket.connect(endpoint, error);
|
||||
return boost_to_ray_status(error);
|
||||
const auto status = boost_to_ray_status(error);
|
||||
if (!status.ok()) {
|
||||
// Close the socket if the connect failed.
|
||||
boost::system::error_code close_error;
|
||||
socket.close(close_error);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
|
|
|
@ -405,7 +405,14 @@ void NodeManager::ClientRemoved(const ClientTableDataT &client_data) {
|
|||
cluster_resource_map_.erase(client_id);
|
||||
|
||||
// Remove the remote server connection.
|
||||
remote_server_connections_.erase(client_id);
|
||||
const auto connection_entry = remote_server_connections_.find(client_id);
|
||||
if (connection_entry != remote_server_connections_.end()) {
|
||||
connection_entry->second->Close();
|
||||
remote_server_connections_.erase(connection_entry);
|
||||
} else {
|
||||
RAY_LOG(WARNING) << "Received ClientRemoved callback for an unknown client "
|
||||
<< client_id << ".";
|
||||
}
|
||||
|
||||
// For any live actors that were on the dead node, broadcast a notification
|
||||
// about the actor's death
|
||||
|
|
Loading…
Add table
Reference in a new issue