mirror of
https://github.com/vale981/ray
synced 2025-03-06 18:41:40 -05:00
Fix RAY_ENABLE_NEW_SCHEDULER=1 pytest test_advanced_2.py::test_zero_cpus_actor (#11817)
This commit is contained in:
parent
347e871409
commit
f86c4f992c
4 changed files with 16 additions and 29 deletions
|
@ -16,13 +16,16 @@ py_test_module_list(
|
|||
"test_actor_advanced.py",
|
||||
"test_advanced.py",
|
||||
"test_advanced_2.py",
|
||||
"test_array.py",
|
||||
"test_basic.py",
|
||||
"test_basic_2.py",
|
||||
"test_cancel.py",
|
||||
"test_cli.py",
|
||||
"test_component_failures_3.py",
|
||||
"test_error_ray_not_initialized.py",
|
||||
"test_gcs_fault_tolerance.py",
|
||||
"test_iter.py",
|
||||
"test_joblib.py",
|
||||
"test_resource_demand_scheduler.py",
|
||||
],
|
||||
size = "medium",
|
||||
|
@ -35,13 +38,10 @@ py_test_module_list(
|
|||
files = [
|
||||
"test_actor_resources.py",
|
||||
"test_advanced_3.py",
|
||||
"test_array.py",
|
||||
"test_cancel.py",
|
||||
"test_component_failures_2.py",
|
||||
"test_dynres.py",
|
||||
"test_global_gc.py",
|
||||
"test_global_state.py",
|
||||
"test_joblib.py",
|
||||
],
|
||||
size = "medium",
|
||||
extra_srcs = SRCS,
|
||||
|
@ -60,6 +60,7 @@ py_test_module_list(
|
|||
"test_output.py",
|
||||
"test_reference_counting_2.py",
|
||||
"test_unreconstructable_errors.py",
|
||||
"test_serialization.py",
|
||||
"test_tensorflow.py",
|
||||
"test_object_spilling.py",
|
||||
],
|
||||
|
@ -76,7 +77,6 @@ py_test_module_list(
|
|||
"test_object_manager.py",
|
||||
"test_reconstruction.py",
|
||||
"test_reference_counting.py",
|
||||
"test_serialization.py",
|
||||
"test_stress.py",
|
||||
"test_stress_sharded.py",
|
||||
"test_multi_tenancy.py",
|
||||
|
@ -89,6 +89,7 @@ py_test_module_list(
|
|||
|
||||
py_test_module_list(
|
||||
files = [
|
||||
"test_actor_pool.py",
|
||||
"test_args.py",
|
||||
"test_asyncio.py",
|
||||
"test_asyncio_cluster.py",
|
||||
|
@ -102,7 +103,9 @@ py_test_module_list(
|
|||
"test_dask_callback.py",
|
||||
"test_debug_tools.py",
|
||||
"test_job.py",
|
||||
"test_memstat.py",
|
||||
"test_metrics_agent.py",
|
||||
"test_microbenchmarks.py",
|
||||
"test_mini.py",
|
||||
"test_monitor.py",
|
||||
"test_node_manager.py",
|
||||
|
@ -118,18 +121,6 @@ py_test_module_list(
|
|||
deps = ["//:ray_lib"],
|
||||
)
|
||||
|
||||
py_test_module_list(
|
||||
files = [
|
||||
"test_actor_pool.py",
|
||||
"test_memstat.py",
|
||||
"test_microbenchmarks.py",
|
||||
],
|
||||
size = "small",
|
||||
extra_srcs = SRCS,
|
||||
tags = ["exclusive", "new_scheduler_broken"],
|
||||
deps = ["//:ray_lib"],
|
||||
)
|
||||
|
||||
py_test_module_list(
|
||||
files = [
|
||||
"test_stress_failure.py",
|
||||
|
|
|
@ -14,7 +14,6 @@ import ray.test_utils
|
|||
from ray.test_utils import (
|
||||
RayTestTimeoutException,
|
||||
wait_for_condition,
|
||||
new_scheduler_enabled,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -252,7 +251,6 @@ def test_zero_cpus(shutdown_only):
|
|||
ray.get(x)
|
||||
|
||||
|
||||
@pytest.mark.skipif(new_scheduler_enabled(), reason="zero cpu handling")
|
||||
def test_zero_cpus_actor(ray_start_cluster):
|
||||
cluster = ray_start_cluster
|
||||
cluster.add_node(num_cpus=0)
|
||||
|
|
|
@ -610,9 +610,7 @@ void NodeManager::WarnResourceDeadlock() {
|
|||
SchedulingResources &local_resources = cluster_resource_map_[self_node_id_];
|
||||
error_message
|
||||
<< "The actor or task with ID " << exemplar.GetTaskSpecification().TaskId()
|
||||
<< " is pending and cannot currently be scheduled. It requires "
|
||||
<< exemplar.GetTaskSpecification().GetRequiredResources().ToString()
|
||||
<< " for execution and "
|
||||
<< " cannot be scheduled right now. It requires "
|
||||
<< exemplar.GetTaskSpecification().GetRequiredPlacementResources().ToString()
|
||||
<< " for placement, but this node only has remaining "
|
||||
<< local_resources.GetAvailableResources().ToString() << ". In total there are "
|
||||
|
@ -2087,9 +2085,7 @@ void NodeManager::ScheduleTasks(
|
|||
std::ostringstream error_message;
|
||||
error_message
|
||||
<< "The actor or task with ID " << task.GetTaskSpecification().TaskId()
|
||||
<< " is infeasible and cannot currently be scheduled. It requires "
|
||||
<< task.GetTaskSpecification().GetRequiredResources().ToString()
|
||||
<< " for execution and "
|
||||
<< " cannot be scheduled right now. It requires "
|
||||
<< task.GetTaskSpecification().GetRequiredPlacementResources().ToString()
|
||||
<< " for placement, however the cluster currently cannot provide the requested "
|
||||
"resources. The required resources may be added as autoscaling takes place "
|
||||
|
|
|
@ -30,13 +30,14 @@ bool ClusterTaskManager::SchedulePendingTasks() {
|
|||
// tasks from being scheduled.
|
||||
Work work = *work_it;
|
||||
Task task = std::get<0>(work);
|
||||
auto request_resources =
|
||||
task.GetTaskSpecification().GetRequiredResources().GetResourceMap();
|
||||
auto placement_resources =
|
||||
task.GetTaskSpecification().GetRequiredPlacementResources().GetResourceMap();
|
||||
int64_t _unused;
|
||||
// TODO (Alex): We should distinguish between infeasible tasks and a fully
|
||||
// utilized cluster.
|
||||
std::string node_id_string = cluster_resource_scheduler_->GetBestSchedulableNode(
|
||||
request_resources, task.GetTaskSpecification().IsActorCreationTask(), &_unused);
|
||||
placement_resources, task.GetTaskSpecification().IsActorCreationTask(),
|
||||
&_unused);
|
||||
if (node_id_string.empty()) {
|
||||
// There is no node that has available resources to run the request.
|
||||
// Move on to the next shape.
|
||||
|
@ -49,8 +50,9 @@ bool ClusterTaskManager::SchedulePendingTasks() {
|
|||
did_schedule = task_scheduled || did_schedule;
|
||||
} else {
|
||||
// Should spill over to a different node.
|
||||
cluster_resource_scheduler_->AllocateRemoteTaskResources(node_id_string,
|
||||
request_resources);
|
||||
cluster_resource_scheduler_->AllocateRemoteTaskResources(
|
||||
node_id_string,
|
||||
task.GetTaskSpecification().GetRequiredResources().GetResourceMap());
|
||||
|
||||
NodeID node_id = NodeID::FromBinary(node_id_string);
|
||||
auto node_info_opt = get_node_info_(node_id);
|
||||
|
|
Loading…
Add table
Reference in a new issue