From 5f74d0e40d873300b2c86ce763d5f134b8190019 Mon Sep 17 00:00:00 2001 From: SangBin Cho Date: Fri, 16 Apr 2021 18:09:02 -0700 Subject: [PATCH] [Test] Fix flaky test failure (#15326) * Fix trial. * unskip test. * Mock commit --- python/ray/tests/test_failure_2.py | 1 - src/ray/raylet/node_manager.cc | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/python/ray/tests/test_failure_2.py b/python/ray/tests/test_failure_2.py index 83e17d105..b86f19094 100644 --- a/python/ray/tests/test_failure_2.py +++ b/python/ray/tests/test_failure_2.py @@ -374,7 +374,6 @@ def test_connect_with_disconnected_node(shutdown_only): p.close() -@pytest.mark.skip(reason="Temporarily disabled due to flakyniess.") @pytest.mark.parametrize( "ray_start_cluster_head", [{ "num_cpus": 5, diff --git a/src/ray/raylet/node_manager.cc b/src/ray/raylet/node_manager.cc index 356406a0b..1068fcd7b 100644 --- a/src/ray/raylet/node_manager.cc +++ b/src/ray/raylet/node_manager.cc @@ -708,6 +708,9 @@ void NodeManager::WarnResourceDeadlock() { exemplar.GetTaskSpecification().JobId()); RAY_CHECK_OK(gcs_client_->Errors().AsyncReportJobError(error_data_ptr, nullptr)); } + // Try scheduling tasks. Without this, if there's no more tasks coming in, deadlocked + // tasks are never be scheduled. + cluster_task_manager_->ScheduleAndDispatchTasks(); } void NodeManager::GetObjectManagerProfileInfo() {