mirror of
https://github.com/vale981/ray
synced 2025-03-06 02:21:39 -05:00
[gcs] Fix the server standalone tests in HA mode (#21480)
CoreWorker hangs there before exiting if gcs exits first due to in correct ordering of destruction. This PR fixed this. It'll stop gcs client first and then job the thread.
This commit is contained in:
parent
bdfba88082
commit
4ab059eaa1
2 changed files with 5 additions and 4 deletions
|
@ -331,7 +331,7 @@
|
|||
--test_env=RAY_gcs_grpc_based_pubsub=1
|
||||
--test_env=RAY_bootstrap_with_gcs=1
|
||||
--test_env=RAY_gcs_storage=memory
|
||||
-- python/ray/serve/... -//python/ray/serve:test_cli -//python/ray/serve:test_standalone
|
||||
-- python/ray/serve/...
|
||||
# Re-enable after fixing.
|
||||
#- bazel test --config=ci $(./scripts/bazel_export_options)
|
||||
# --test_tag_filters=team:serve
|
||||
|
|
|
@ -594,12 +594,13 @@ void CoreWorker::OnNodeRemoved(const NodeID &node_id) {
|
|||
}
|
||||
|
||||
void CoreWorker::WaitForShutdown() {
|
||||
if (io_thread_.joinable()) {
|
||||
io_thread_.join();
|
||||
}
|
||||
// Stop gcs client first since it runs in io_thread_
|
||||
if (gcs_client_) {
|
||||
gcs_client_->Disconnect();
|
||||
}
|
||||
if (io_thread_.joinable()) {
|
||||
io_thread_.join();
|
||||
}
|
||||
if (options_.worker_type == WorkerType::WORKER) {
|
||||
RAY_CHECK(task_execution_service_.stopped());
|
||||
// Asyncio coroutines could still run after CoreWorker is removed because it is
|
||||
|
|
Loading…
Add table
Reference in a new issue