Fix RAY_ENABLE_NEW_SCHEDULER=1 pytest test_advanced_2.py::test_zero_cpus_actor (#11817)

2025-03-06 18:41:40 -05:00 · 2020-11-05 16:02:04 -08:00 · 2020-11-05 16:02:04 -08:00 · f86c4f992c
commit f86c4f992c
parent 347e871409
4 changed files with 16 additions and 29 deletions
--- a/python/ray/tests/BUILD
+++ b/python/ray/tests/BUILD
@ -16,13 +16,16 @@ py_test_module_list(
    "test_actor_advanced.py",
    "test_advanced.py",
    "test_advanced_2.py",
+    "test_array.py",
    "test_basic.py",
    "test_basic_2.py",
+    "test_cancel.py",
    "test_cli.py",
    "test_component_failures_3.py",
    "test_error_ray_not_initialized.py",
    "test_gcs_fault_tolerance.py",
    "test_iter.py",
+    "test_joblib.py",
    "test_resource_demand_scheduler.py",
  ],
  size = "medium",
@ -35,13 +38,10 @@ py_test_module_list(
  files = [
    "test_actor_resources.py",
    "test_advanced_3.py",
-    "test_array.py",
-    "test_cancel.py",
    "test_component_failures_2.py",
    "test_dynres.py",
    "test_global_gc.py",
    "test_global_state.py",
-    "test_joblib.py",
  ],
  size = "medium",
  extra_srcs = SRCS,
@ -60,6 +60,7 @@ py_test_module_list(
    "test_output.py",
    "test_reference_counting_2.py",
    "test_unreconstructable_errors.py",
+    "test_serialization.py",
    "test_tensorflow.py",
    "test_object_spilling.py",
  ],
@ -76,7 +77,6 @@ py_test_module_list(
    "test_object_manager.py",
    "test_reconstruction.py",
    "test_reference_counting.py",
-    "test_serialization.py",
    "test_stress.py",
    "test_stress_sharded.py",
    "test_multi_tenancy.py",
@ -89,6 +89,7 @@ py_test_module_list(

 py_test_module_list(
  files = [
+    "test_actor_pool.py",
    "test_args.py",
    "test_asyncio.py",
    "test_asyncio_cluster.py",
@ -102,7 +103,9 @@ py_test_module_list(
    "test_dask_callback.py",
    "test_debug_tools.py",
    "test_job.py",
+    "test_memstat.py",
    "test_metrics_agent.py",
+    "test_microbenchmarks.py",
    "test_mini.py",
    "test_monitor.py",
    "test_node_manager.py",
@ -118,18 +121,6 @@ py_test_module_list(
  deps = ["//:ray_lib"],
 )

-py_test_module_list(
-  files = [
-    "test_actor_pool.py",
-    "test_memstat.py",
-    "test_microbenchmarks.py",
-  ],
-  size = "small",
-  extra_srcs = SRCS,
-  tags = ["exclusive", "new_scheduler_broken"],
-  deps = ["//:ray_lib"],
-)
-
 py_test_module_list(
  files = [
    "test_stress_failure.py",
--- a/python/ray/tests/test_advanced_2.py
+++ b/python/ray/tests/test_advanced_2.py
@ -14,7 +14,6 @@ import ray.test_utils
 from ray.test_utils import (
    RayTestTimeoutException,
    wait_for_condition,
-    new_scheduler_enabled,
 )

 logger = logging.getLogger(__name__)
@ -252,7 +251,6 @@ def test_zero_cpus(shutdown_only):
    ray.get(x)


-@pytest.mark.skipif(new_scheduler_enabled(), reason="zero cpu handling")
 def test_zero_cpus_actor(ray_start_cluster):
    cluster = ray_start_cluster
    cluster.add_node(num_cpus=0)
--- a/src/ray/raylet/node_manager.cc
+++ b/src/ray/raylet/node_manager.cc
@ -610,9 +610,7 @@ void NodeManager::WarnResourceDeadlock() {
    SchedulingResources &local_resources = cluster_resource_map_[self_node_id_];
    error_message
        << "The actor or task with ID " << exemplar.GetTaskSpecification().TaskId()
-        << " is pending and cannot currently be scheduled. It requires "
-        << exemplar.GetTaskSpecification().GetRequiredResources().ToString()
-        << " for execution and "
+        << " cannot be scheduled right now. It requires "
        << exemplar.GetTaskSpecification().GetRequiredPlacementResources().ToString()
        << " for placement, but this node only has remaining "
        << local_resources.GetAvailableResources().ToString() << ". In total there are "
@ -2087,9 +2085,7 @@ void NodeManager::ScheduleTasks(
      std::ostringstream error_message;
      error_message
          << "The actor or task with ID " << task.GetTaskSpecification().TaskId()
-          << " is infeasible and cannot currently be scheduled. It requires "
-          << task.GetTaskSpecification().GetRequiredResources().ToString()
-          << " for execution and "
+          << " cannot be scheduled right now. It requires "
          << task.GetTaskSpecification().GetRequiredPlacementResources().ToString()
          << " for placement, however the cluster currently cannot provide the requested "
             "resources. The required resources may be added as autoscaling takes place "
--- a/src/ray/raylet/scheduling/cluster_task_manager.cc
+++ b/src/ray/raylet/scheduling/cluster_task_manager.cc
@ -30,13 +30,14 @@ bool ClusterTaskManager::SchedulePendingTasks() {
      // tasks from being scheduled.
      Work work = *work_it;
      Task task = std::get<0>(work);
-      auto request_resources =
-          task.GetTaskSpecification().GetRequiredResources().GetResourceMap();
+      auto placement_resources =
+          task.GetTaskSpecification().GetRequiredPlacementResources().GetResourceMap();
      int64_t _unused;
      // TODO (Alex): We should distinguish between infeasible tasks and a fully
      // utilized cluster.
      std::string node_id_string = cluster_resource_scheduler_->GetBestSchedulableNode(
-          request_resources, task.GetTaskSpecification().IsActorCreationTask(), &_unused);
+          placement_resources, task.GetTaskSpecification().IsActorCreationTask(),
+          &_unused);
      if (node_id_string.empty()) {
        // There is no node that has available resources to run the request.
        // Move on to the next shape.
@ -49,8 +50,9 @@ bool ClusterTaskManager::SchedulePendingTasks() {
          did_schedule = task_scheduled || did_schedule;
        } else {
          // Should spill over to a different node.
-          cluster_resource_scheduler_->AllocateRemoteTaskResources(node_id_string,
-                                                                   request_resources);
+          cluster_resource_scheduler_->AllocateRemoteTaskResources(
+              node_id_string,
+              task.GetTaskSpecification().GetRequiredResources().GetResourceMap());

          NodeID node_id = NodeID::FromBinary(node_id_string);
          auto node_info_opt = get_node_info_(node_id);