mirror of
https://github.com/vale981/ray
synced 2025-03-06 02:21:39 -05:00
[Autoscaler][Local Node Provider] Log a warning if max_workers < len(worker_ips) (#24635)
Logs a warning when a user sets max_workers for local node provider less than the number of available ips. Also removes defaults of 0 for min_workers and max_workers from example configs to help prevent users inadvertantly setting max_workers=0 again.
This commit is contained in:
parent
4c1f27118a
commit
29eebdfef2
4 changed files with 39 additions and 6 deletions
|
@ -100,6 +100,13 @@ def prepare_manual(config: Dict[str, Any]) -> Dict[str, Any]:
|
|||
else:
|
||||
node_type["max_workers"] = max_workers
|
||||
|
||||
if max_workers < num_ips:
|
||||
cli_logger.warning(
|
||||
f"The value of `max_workers` supplied ({max_workers}) is less"
|
||||
f" than the number of available worker ips ({num_ips})."
|
||||
f" At most {max_workers} Ray worker nodes will connect to the cluster."
|
||||
)
|
||||
|
||||
return config
|
||||
|
||||
|
||||
|
|
|
@ -1,11 +1,7 @@
|
|||
cluster_name: default
|
||||
min_workers: 0
|
||||
max_workers: 0
|
||||
docker:
|
||||
image: ""
|
||||
container_name: ""
|
||||
upscaling_speed: 1.0
|
||||
idle_timeout_minutes: 5
|
||||
provider:
|
||||
type: local
|
||||
head_ip: YOUR_HEAD_NODE_HOSTNAME
|
||||
|
|
|
@ -45,12 +45,14 @@ auth:
|
|||
# The minimum number of workers nodes to launch in addition to the head
|
||||
# node. This number should be >= 0.
|
||||
# Typically, min_workers == max_workers == len(worker_ips).
|
||||
min_workers: 0
|
||||
# This field is optional.
|
||||
min_workers: TYPICALLY_THE_NUMBER_OF_WORKER_IPS
|
||||
|
||||
# The maximum number of workers nodes to launch in addition to the head node.
|
||||
# This takes precedence over min_workers.
|
||||
# Typically, min_workers == max_workers == len(worker_ips).
|
||||
max_workers: 0
|
||||
# This field is optional.
|
||||
max_workers: TYPICALLY_THE_NUMBER_OF_WORKER_IPS
|
||||
# The default behavior for manually managed clusters is
|
||||
# min_workers == max_workers == len(worker_ips),
|
||||
# meaning that Ray is started on all available nodes of the cluster.
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import jsonschema
|
||||
import logging
|
||||
import mock
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
|
@ -262,6 +263,33 @@ class AutoscalingConfigTest(unittest.TestCase):
|
|||
# Check that worker config numbers were clipped to 3.
|
||||
assert prepared_config == expected_prepared
|
||||
|
||||
not_enough_workers_config = copy.deepcopy(base_config)
|
||||
|
||||
# Max workers is less than than the three available ips.
|
||||
# The user is probably has probably made an error. Make sure we log a warning.
|
||||
not_enough_workers_config["max_workers"] = 0
|
||||
not_enough_workers_config["min_workers"] = 0
|
||||
with mock.patch(
|
||||
"ray.autoscaler._private.local.config.cli_logger.warning"
|
||||
) as warning:
|
||||
prepared_config = prepare_config(not_enough_workers_config)
|
||||
warning.assert_called_with(
|
||||
"The value of `max_workers` supplied (0) is less"
|
||||
" than the number of available worker ips (3)."
|
||||
" At most 0 Ray worker nodes will connect to the cluster."
|
||||
)
|
||||
expected_prepared = yaml.safe_load(EXPECTED_LOCAL_CONFIG_STR)
|
||||
# We logged a warning.
|
||||
# However, prepare_config does not repair the strange config setting:
|
||||
expected_prepared["max_workers"] = 0
|
||||
expected_prepared["available_node_types"]["local.cluster.node"][
|
||||
"max_workers"
|
||||
] = 0
|
||||
expected_prepared["available_node_types"]["local.cluster.node"][
|
||||
"min_workers"
|
||||
] = 0
|
||||
assert prepared_config == expected_prepared
|
||||
|
||||
def testValidateNetworkConfig(self):
|
||||
web_yaml = (
|
||||
"https://raw.githubusercontent.com/ray-project/ray/"
|
||||
|
|
Loading…
Add table
Reference in a new issue