mirror of
https://github.com/vale981/ray
synced 2025-03-06 10:31:39 -05:00
GCS Server add SIGTERM signal handler (#8795)
This commit is contained in:
parent
d04953ab3c
commit
68718b33b4
7 changed files with 98 additions and 56 deletions
|
@ -910,6 +910,12 @@ class Node:
|
||||||
check_alive=check_alive,
|
check_alive=check_alive,
|
||||||
allow_graceful=allow_graceful)
|
allow_graceful=allow_graceful)
|
||||||
|
|
||||||
|
if ray_constants.PROCESS_TYPE_GCS_SERVER in self.all_processes:
|
||||||
|
self._kill_process_type(
|
||||||
|
ray_constants.PROCESS_TYPE_GCS_SERVER,
|
||||||
|
check_alive=check_alive,
|
||||||
|
allow_graceful=allow_graceful)
|
||||||
|
|
||||||
# We call "list" to copy the keys because we are modifying the
|
# We call "list" to copy the keys because we are modifying the
|
||||||
# dictionary while iterating over it.
|
# dictionary while iterating over it.
|
||||||
for process_type in list(self.all_processes.keys()):
|
for process_type in list(self.all_processes.keys()):
|
||||||
|
|
|
@ -35,8 +35,10 @@ class GlobalStateAccessorTest : public ::testing::Test {
|
||||||
config.redis_address = "127.0.0.1";
|
config.redis_address = "127.0.0.1";
|
||||||
config.is_test = true;
|
config.is_test = true;
|
||||||
config.redis_port = TEST_REDIS_SERVER_PORTS.front();
|
config.redis_port = TEST_REDIS_SERVER_PORTS.front();
|
||||||
gcs_server_.reset(new gcs::GcsServer(config));
|
|
||||||
io_service_.reset(new boost::asio::io_service());
|
io_service_.reset(new boost::asio::io_service());
|
||||||
|
gcs_server_.reset(new gcs::GcsServer(config, *io_service_));
|
||||||
|
gcs_server_->Start();
|
||||||
|
|
||||||
thread_io_service_.reset(new std::thread([this] {
|
thread_io_service_.reset(new std::thread([this] {
|
||||||
std::unique_ptr<boost::asio::io_service::work> work(
|
std::unique_ptr<boost::asio::io_service::work> work(
|
||||||
|
@ -44,8 +46,6 @@ class GlobalStateAccessorTest : public ::testing::Test {
|
||||||
io_service_->run();
|
io_service_->run();
|
||||||
}));
|
}));
|
||||||
|
|
||||||
thread_gcs_server_.reset(new std::thread([this] { gcs_server_->Start(); }));
|
|
||||||
|
|
||||||
// Wait until server starts listening.
|
// Wait until server starts listening.
|
||||||
while (!gcs_server_->IsStarted()) {
|
while (!gcs_server_->IsStarted()) {
|
||||||
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
||||||
|
@ -67,8 +67,9 @@ class GlobalStateAccessorTest : public ::testing::Test {
|
||||||
void TearDown() override {
|
void TearDown() override {
|
||||||
gcs_server_->Stop();
|
gcs_server_->Stop();
|
||||||
io_service_->stop();
|
io_service_->stop();
|
||||||
|
gcs_server_.reset();
|
||||||
thread_io_service_->join();
|
thread_io_service_->join();
|
||||||
thread_gcs_server_->join();
|
|
||||||
gcs_client_->Disconnect();
|
gcs_client_->Disconnect();
|
||||||
global_state_->Disconnect();
|
global_state_->Disconnect();
|
||||||
global_state_.reset();
|
global_state_.reset();
|
||||||
|
@ -84,7 +85,6 @@ class GlobalStateAccessorTest : public ::testing::Test {
|
||||||
gcs::GcsServerConfig config;
|
gcs::GcsServerConfig config;
|
||||||
std::unique_ptr<gcs::GcsServer> gcs_server_;
|
std::unique_ptr<gcs::GcsServer> gcs_server_;
|
||||||
std::unique_ptr<std::thread> thread_io_service_;
|
std::unique_ptr<std::thread> thread_io_service_;
|
||||||
std::unique_ptr<std::thread> thread_gcs_server_;
|
|
||||||
std::unique_ptr<boost::asio::io_service> io_service_;
|
std::unique_ptr<boost::asio::io_service> io_service_;
|
||||||
|
|
||||||
// GCS client.
|
// GCS client.
|
||||||
|
|
|
@ -30,22 +30,28 @@ class ServiceBasedGcsClientTest : public ::testing::Test {
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void SetUp() override {
|
void SetUp() override {
|
||||||
config.grpc_server_port = 0;
|
config_.grpc_server_port = 0;
|
||||||
config.grpc_server_name = "MockedGcsServer";
|
config_.grpc_server_name = "MockedGcsServer";
|
||||||
config.grpc_server_thread_num = 1;
|
config_.grpc_server_thread_num = 1;
|
||||||
config.redis_address = "127.0.0.1";
|
config_.redis_address = "127.0.0.1";
|
||||||
config.is_test = true;
|
config_.is_test = true;
|
||||||
config.redis_port = TEST_REDIS_SERVER_PORTS.front();
|
config_.redis_port = TEST_REDIS_SERVER_PORTS.front();
|
||||||
gcs_server_.reset(new gcs::GcsServer(config));
|
|
||||||
io_service_.reset(new boost::asio::io_service());
|
|
||||||
|
|
||||||
thread_io_service_.reset(new std::thread([this] {
|
client_io_service_.reset(new boost::asio::io_service());
|
||||||
|
client_io_service_thread_.reset(new std::thread([this] {
|
||||||
std::unique_ptr<boost::asio::io_service::work> work(
|
std::unique_ptr<boost::asio::io_service::work> work(
|
||||||
new boost::asio::io_service::work(*io_service_));
|
new boost::asio::io_service::work(*client_io_service_));
|
||||||
io_service_->run();
|
client_io_service_->run();
|
||||||
}));
|
}));
|
||||||
|
|
||||||
thread_gcs_server_.reset(new std::thread([this] { gcs_server_->Start(); }));
|
server_io_service_.reset(new boost::asio::io_service());
|
||||||
|
gcs_server_.reset(new gcs::GcsServer(config_, *server_io_service_));
|
||||||
|
gcs_server_->Start();
|
||||||
|
server_io_service_thread_.reset(new std::thread([this] {
|
||||||
|
std::unique_ptr<boost::asio::io_service::work> work(
|
||||||
|
new boost::asio::io_service::work(*server_io_service_));
|
||||||
|
server_io_service_->run();
|
||||||
|
}));
|
||||||
|
|
||||||
// Wait until server starts listening.
|
// Wait until server starts listening.
|
||||||
while (!gcs_server_->IsStarted()) {
|
while (!gcs_server_->IsStarted()) {
|
||||||
|
@ -53,28 +59,40 @@ class ServiceBasedGcsClientTest : public ::testing::Test {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create GCS client.
|
// Create GCS client.
|
||||||
gcs::GcsClientOptions options(config.redis_address, config.redis_port,
|
gcs::GcsClientOptions options(config_.redis_address, config_.redis_port,
|
||||||
config.redis_password, config.is_test);
|
config_.redis_password, config_.is_test);
|
||||||
gcs_client_.reset(new gcs::ServiceBasedGcsClient(options));
|
gcs_client_.reset(new gcs::ServiceBasedGcsClient(options));
|
||||||
RAY_CHECK_OK(gcs_client_->Connect(*io_service_));
|
RAY_CHECK_OK(gcs_client_->Connect(*client_io_service_));
|
||||||
}
|
}
|
||||||
|
|
||||||
void TearDown() override {
|
void TearDown() override {
|
||||||
io_service_->stop();
|
client_io_service_->stop();
|
||||||
gcs_server_->Stop();
|
|
||||||
thread_io_service_->join();
|
|
||||||
thread_gcs_server_->join();
|
|
||||||
gcs_client_->Disconnect();
|
gcs_client_->Disconnect();
|
||||||
|
|
||||||
|
gcs_server_->Stop();
|
||||||
|
server_io_service_->stop();
|
||||||
|
gcs_server_.reset();
|
||||||
|
server_io_service_thread_->join();
|
||||||
TestSetupUtil::FlushAllRedisServers();
|
TestSetupUtil::FlushAllRedisServers();
|
||||||
|
client_io_service_thread_->join();
|
||||||
}
|
}
|
||||||
|
|
||||||
void RestartGcsServer() {
|
void RestartGcsServer() {
|
||||||
RAY_LOG(INFO) << "Stopping GCS service, port = " << gcs_server_->GetPort();
|
RAY_LOG(INFO) << "Stopping GCS service, port = " << gcs_server_->GetPort();
|
||||||
gcs_server_->Stop();
|
gcs_server_->Stop();
|
||||||
thread_gcs_server_->join();
|
server_io_service_->stop();
|
||||||
|
gcs_server_.reset();
|
||||||
|
server_io_service_thread_->join();
|
||||||
|
RAY_LOG(INFO) << "Finished stopping GCS service.";
|
||||||
|
|
||||||
gcs_server_.reset(new gcs::GcsServer(config));
|
server_io_service_.reset(new boost::asio::io_service());
|
||||||
thread_gcs_server_.reset(new std::thread([this] { gcs_server_->Start(); }));
|
gcs_server_.reset(new gcs::GcsServer(config_, *server_io_service_));
|
||||||
|
gcs_server_->Start();
|
||||||
|
server_io_service_thread_.reset(new std::thread([this] {
|
||||||
|
std::unique_ptr<boost::asio::io_service::work> work(
|
||||||
|
new boost::asio::io_service::work(*server_io_service_));
|
||||||
|
server_io_service_->run();
|
||||||
|
}));
|
||||||
|
|
||||||
// Wait until server starts listening.
|
// Wait until server starts listening.
|
||||||
while (gcs_server_->GetPort() == 0) {
|
while (gcs_server_->GetPort() == 0) {
|
||||||
|
@ -466,14 +484,15 @@ class ServiceBasedGcsClientTest : public ::testing::Test {
|
||||||
}
|
}
|
||||||
|
|
||||||
// GCS server.
|
// GCS server.
|
||||||
gcs::GcsServerConfig config;
|
gcs::GcsServerConfig config_;
|
||||||
std::unique_ptr<gcs::GcsServer> gcs_server_;
|
std::unique_ptr<gcs::GcsServer> gcs_server_;
|
||||||
std::unique_ptr<std::thread> thread_io_service_;
|
std::unique_ptr<std::thread> server_io_service_thread_;
|
||||||
std::unique_ptr<std::thread> thread_gcs_server_;
|
std::unique_ptr<boost::asio::io_service> server_io_service_;
|
||||||
std::unique_ptr<boost::asio::io_service> io_service_;
|
|
||||||
|
|
||||||
// GCS client.
|
// GCS client.
|
||||||
std::unique_ptr<gcs::GcsClient> gcs_client_;
|
std::unique_ptr<gcs::GcsClient> gcs_client_;
|
||||||
|
std::unique_ptr<std::thread> client_io_service_thread_;
|
||||||
|
std::unique_ptr<boost::asio::io_service> client_io_service_;
|
||||||
|
|
||||||
// Timeout waiting for GCS server reply, default is 2s.
|
// Timeout waiting for GCS server reply, default is 2s.
|
||||||
const std::chrono::milliseconds timeout_ms_{2000};
|
const std::chrono::milliseconds timeout_ms_{2000};
|
||||||
|
@ -878,6 +897,7 @@ TEST_F(ServiceBasedGcsClientTest, TestActorTableReSubscribe) {
|
||||||
ASSERT_TRUE(UpdateActor(actor2_id, actor2_table_data));
|
ASSERT_TRUE(UpdateActor(actor2_id, actor2_table_data));
|
||||||
WaitPendingDone(actor1_update_count, 3);
|
WaitPendingDone(actor1_update_count, 3);
|
||||||
WaitPendingDone(actor2_update_count, 1);
|
WaitPendingDone(actor2_update_count, 1);
|
||||||
|
UnsubscribeActor(actor1_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(ServiceBasedGcsClientTest, TestObjectTableReSubscribe) {
|
TEST_F(ServiceBasedGcsClientTest, TestObjectTableReSubscribe) {
|
||||||
|
|
|
@ -28,11 +28,13 @@
|
||||||
namespace ray {
|
namespace ray {
|
||||||
namespace gcs {
|
namespace gcs {
|
||||||
|
|
||||||
GcsServer::GcsServer(const ray::gcs::GcsServerConfig &config)
|
GcsServer::GcsServer(const ray::gcs::GcsServerConfig &config,
|
||||||
|
boost::asio::io_service &main_service)
|
||||||
: config_(config),
|
: config_(config),
|
||||||
|
main_service_(main_service),
|
||||||
rpc_server_(config.grpc_server_name, config.grpc_server_port,
|
rpc_server_(config.grpc_server_name, config.grpc_server_port,
|
||||||
config.grpc_server_thread_num),
|
config.grpc_server_thread_num),
|
||||||
client_call_manager_(main_service_) {}
|
client_call_manager_(main_service) {}
|
||||||
|
|
||||||
GcsServer::~GcsServer() { Stop(); }
|
GcsServer::~GcsServer() { Stop(); }
|
||||||
|
|
||||||
|
@ -112,24 +114,17 @@ void GcsServer::Start() {
|
||||||
gcs_actor_manager_->LoadInitialData(on_done);
|
gcs_actor_manager_->LoadInitialData(on_done);
|
||||||
gcs_object_manager_->LoadInitialData(on_done);
|
gcs_object_manager_->LoadInitialData(on_done);
|
||||||
gcs_node_manager_->LoadInitialData(on_done);
|
gcs_node_manager_->LoadInitialData(on_done);
|
||||||
|
|
||||||
// Run the event loop.
|
|
||||||
// Using boost::asio::io_context::work to avoid ending the event loop when
|
|
||||||
// there are no events to handle.
|
|
||||||
boost::asio::io_context::work worker(main_service_);
|
|
||||||
main_service_.run();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GcsServer::Stop() {
|
void GcsServer::Stop() {
|
||||||
RAY_LOG(INFO) << "Stopping gcs server.";
|
if (!is_stopped_) {
|
||||||
|
RAY_LOG(INFO) << "Stopping GCS server.";
|
||||||
// Shutdown the rpc server
|
// Shutdown the rpc server
|
||||||
rpc_server_.Shutdown();
|
rpc_server_.Shutdown();
|
||||||
|
|
||||||
// Stop the event loop.
|
|
||||||
main_service_.stop();
|
|
||||||
|
|
||||||
is_stopped_ = true;
|
is_stopped_ = true;
|
||||||
RAY_LOG(INFO) << "Finished stopping gcs server.";
|
RAY_LOG(INFO) << "GCS server stopped.";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GcsServer::InitBackendClient() {
|
void GcsServer::InitBackendClient() {
|
||||||
|
|
|
@ -48,7 +48,8 @@ class GcsActorManager;
|
||||||
/// https://docs.google.com/document/d/1d-9qBlsh2UQHo-AWMWR0GptI_Ajwu4SKx0Q0LHKPpeI/edit#heading=h.csi0gaglj2pv
|
/// https://docs.google.com/document/d/1d-9qBlsh2UQHo-AWMWR0GptI_Ajwu4SKx0Q0LHKPpeI/edit#heading=h.csi0gaglj2pv
|
||||||
class GcsServer {
|
class GcsServer {
|
||||||
public:
|
public:
|
||||||
explicit GcsServer(const GcsServerConfig &config);
|
explicit GcsServer(const GcsServerConfig &config,
|
||||||
|
boost::asio::io_service &main_service);
|
||||||
virtual ~GcsServer();
|
virtual ~GcsServer();
|
||||||
|
|
||||||
/// Start gcs server.
|
/// Start gcs server.
|
||||||
|
@ -108,10 +109,10 @@ class GcsServer {
|
||||||
|
|
||||||
/// Gcs server configuration
|
/// Gcs server configuration
|
||||||
GcsServerConfig config_;
|
GcsServerConfig config_;
|
||||||
|
/// The main io service to drive event posted from grpc threads.
|
||||||
|
boost::asio::io_context &main_service_;
|
||||||
/// The grpc server
|
/// The grpc server
|
||||||
rpc::GrpcServer rpc_server_;
|
rpc::GrpcServer rpc_server_;
|
||||||
/// The main io service to drive event posted from grpc threads.
|
|
||||||
boost::asio::io_context main_service_;
|
|
||||||
/// The `ClientCallManager` object that is shared by all `NodeManagerWorkerClient`s.
|
/// The `ClientCallManager` object that is shared by all `NodeManagerWorkerClient`s.
|
||||||
rpc::ClientCallManager client_call_manager_;
|
rpc::ClientCallManager client_call_manager_;
|
||||||
/// The gcs node manager.
|
/// The gcs node manager.
|
||||||
|
|
|
@ -54,6 +54,8 @@ int main(int argc, char *argv[]) {
|
||||||
|
|
||||||
RayConfig::instance().initialize(config_map);
|
RayConfig::instance().initialize(config_map);
|
||||||
|
|
||||||
|
boost::asio::io_service main_service;
|
||||||
|
|
||||||
ray::gcs::GcsServerConfig gcs_server_config;
|
ray::gcs::GcsServerConfig gcs_server_config;
|
||||||
gcs_server_config.grpc_server_name = "GcsServer";
|
gcs_server_config.grpc_server_name = "GcsServer";
|
||||||
gcs_server_config.grpc_server_port = 0;
|
gcs_server_config.grpc_server_port = 0;
|
||||||
|
@ -62,6 +64,26 @@ int main(int argc, char *argv[]) {
|
||||||
gcs_server_config.redis_port = redis_port;
|
gcs_server_config.redis_port = redis_port;
|
||||||
gcs_server_config.redis_password = redis_password;
|
gcs_server_config.redis_password = redis_password;
|
||||||
gcs_server_config.retry_redis = retry_redis;
|
gcs_server_config.retry_redis = retry_redis;
|
||||||
ray::gcs::GcsServer gcs_server(gcs_server_config);
|
ray::gcs::GcsServer gcs_server(gcs_server_config, main_service);
|
||||||
|
|
||||||
|
// Destroy the GCS server on a SIGTERM. The pointer to main_service is
|
||||||
|
// guaranteed to be valid since this function will run the event loop
|
||||||
|
// instead of returning immediately.
|
||||||
|
auto handler = [&main_service, &gcs_server](const boost::system::error_code &error,
|
||||||
|
int signal_number) {
|
||||||
|
RAY_LOG(INFO) << "GCS server received SIGTERM, shutting down...";
|
||||||
|
gcs_server.Stop();
|
||||||
|
main_service.stop();
|
||||||
|
};
|
||||||
|
boost::asio::signal_set signals(main_service);
|
||||||
|
#ifdef _WIN32
|
||||||
|
signals.add(SIGBREAK);
|
||||||
|
#else
|
||||||
|
signals.add(SIGTERM);
|
||||||
|
#endif
|
||||||
|
signals.async_wait(handler);
|
||||||
|
|
||||||
gcs_server.Start();
|
gcs_server.Start();
|
||||||
|
|
||||||
|
main_service.run();
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,7 +34,8 @@ class GcsServerTest : public ::testing::Test {
|
||||||
config.redis_address = "127.0.0.1";
|
config.redis_address = "127.0.0.1";
|
||||||
config.is_test = true;
|
config.is_test = true;
|
||||||
config.redis_port = TEST_REDIS_SERVER_PORTS.front();
|
config.redis_port = TEST_REDIS_SERVER_PORTS.front();
|
||||||
gcs_server_.reset(new gcs::GcsServer(config));
|
gcs_server_.reset(new gcs::GcsServer(config, io_service_));
|
||||||
|
gcs_server_->Start();
|
||||||
|
|
||||||
thread_io_service_.reset(new std::thread([this] {
|
thread_io_service_.reset(new std::thread([this] {
|
||||||
std::unique_ptr<boost::asio::io_service::work> work(
|
std::unique_ptr<boost::asio::io_service::work> work(
|
||||||
|
@ -42,8 +43,6 @@ class GcsServerTest : public ::testing::Test {
|
||||||
io_service_.run();
|
io_service_.run();
|
||||||
}));
|
}));
|
||||||
|
|
||||||
thread_gcs_server_.reset(new std::thread([this] { gcs_server_->Start(); }));
|
|
||||||
|
|
||||||
// Wait until server starts listening.
|
// Wait until server starts listening.
|
||||||
while (gcs_server_->GetPort() == 0) {
|
while (gcs_server_->GetPort() == 0) {
|
||||||
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
||||||
|
@ -58,8 +57,8 @@ class GcsServerTest : public ::testing::Test {
|
||||||
void TearDown() override {
|
void TearDown() override {
|
||||||
gcs_server_->Stop();
|
gcs_server_->Stop();
|
||||||
io_service_.stop();
|
io_service_.stop();
|
||||||
|
gcs_server_.reset();
|
||||||
thread_io_service_->join();
|
thread_io_service_->join();
|
||||||
thread_gcs_server_->join();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool AddJob(const rpc::AddJobRequest &request) {
|
bool AddJob(const rpc::AddJobRequest &request) {
|
||||||
|
@ -391,7 +390,6 @@ class GcsServerTest : public ::testing::Test {
|
||||||
// Gcs server
|
// Gcs server
|
||||||
std::unique_ptr<gcs::GcsServer> gcs_server_;
|
std::unique_ptr<gcs::GcsServer> gcs_server_;
|
||||||
std::unique_ptr<std::thread> thread_io_service_;
|
std::unique_ptr<std::thread> thread_io_service_;
|
||||||
std::unique_ptr<std::thread> thread_gcs_server_;
|
|
||||||
boost::asio::io_service io_service_;
|
boost::asio::io_service io_service_;
|
||||||
|
|
||||||
// Gcs client
|
// Gcs client
|
||||||
|
|
Loading…
Add table
Reference in a new issue