diff --git a/doc/source/installation.rst b/doc/source/installation.rst index 4b75c8957..bd3114023 100644 --- a/doc/source/installation.rst +++ b/doc/source/installation.rst @@ -278,7 +278,8 @@ Start out by launching the deployment container. docker run --shm-size= -t -i rayproject/ray Replace ```` with a limit appropriate for your system, for example -``512M`` or ``2G``. The ``-t`` and ``-i`` options here are required to support +``512M`` or ``2G``. A good estimate for this is to use roughly 30% of your available memory (this is +what Ray uses internally for its Object Store). The ``-t`` and ``-i`` options here are required to support interactive use of the container. **Note:** Ray requires a **large** amount of shared memory because each object diff --git a/python/ray/autoscaler/_private/command_runner.py b/python/ray/autoscaler/_private/command_runner.py index 546ff8c7b..d37d16382 100644 --- a/python/ray/autoscaler/_private/command_runner.py +++ b/python/ray/autoscaler/_private/command_runner.py @@ -12,6 +12,9 @@ import time import warnings from ray.autoscaler.command_runner import CommandRunnerInterface +from ray.autoscaler._private.constants import \ + DEFAULT_OBJECT_STORE_MAX_MEMORY_BYTES,\ + DEFAULT_OBJECT_STORE_MEMORY_PROPORTION from ray.autoscaler._private.docker import check_bind_mounts_cmd, \ check_docker_running_cmd, \ check_docker_image, \ @@ -716,8 +719,8 @@ class DockerCommandRunner(CommandRunnerInterface): self.container_name, self.docker_config.get( "run_options", []) + self.docker_config.get( - f"{'head' if as_head else 'worker'}_run_options", - []) + self._configure_runtime(), + f"{'head' if as_head else 'worker'}_run_options", []) + + self._configure_runtime() + self._auto_configure_shm(), self.ssh_command_runner.cluster_name, home_directory) self.run(start_command, run_env="host") else: @@ -781,6 +784,27 @@ class DockerCommandRunner(CommandRunnerInterface): return [] + def _auto_configure_shm(self): + if self.docker_config.get("disable_shm_size_detection"): + return [] + try: + shm_output = self.ssh_command_runner.run( + "cat /proc/meminfo || true", + with_output=True).decode().strip() + available_memory = int([ + ln for ln in shm_output.split("\n") if "MemAvailable" in ln + ][0].split()[1]) + available_memory_bytes = available_memory * 1024 + # Overestimate SHM size by 10% + shm_size = min((available_memory_bytes * + DEFAULT_OBJECT_STORE_MEMORY_PROPORTION * 1.1), + DEFAULT_OBJECT_STORE_MAX_MEMORY_BYTES) + return [f"--shm-size='{shm_size}b'"] + except Exception as e: + logger.warning( + f"Received error while trying to auto-compute SHM size {e}") + return [] + def _get_docker_host_mount_location(self, cluster_name: str) -> str: """Return the docker host mount directory location.""" # Imported here due to circular dependency in imports. diff --git a/python/ray/autoscaler/_private/constants.py b/python/ray/autoscaler/_private/constants.py index 06c01ddd9..3007eedc6 100644 --- a/python/ray/autoscaler/_private/constants.py +++ b/python/ray/autoscaler/_private/constants.py @@ -1,7 +1,8 @@ import os from ray.ray_constants import ( # noqa F401 - AUTOSCALER_RESOURCE_REQUEST_CHANNEL, LOGGER_FORMAT, + AUTOSCALER_RESOURCE_REQUEST_CHANNEL, DEFAULT_OBJECT_STORE_MAX_MEMORY_BYTES, + DEFAULT_OBJECT_STORE_MEMORY_PROPORTION, LOGGER_FORMAT, MEMORY_RESOURCE_UNIT_BYTES, RESOURCES_ENVIRONMENT_VARIABLE) diff --git a/python/ray/autoscaler/ray-schema.json b/python/ray/autoscaler/ray-schema.json index 74e873736..0f6c3bd52 100644 --- a/python/ray/autoscaler/ray-schema.json +++ b/python/ray/autoscaler/ray-schema.json @@ -242,6 +242,11 @@ "type": "boolean", "description": "disable Ray from automatically using the NVIDIA runtime if available", "default": false + }, + "disable_shm_size_detection" : { + "type": "boolean", + "description": "disable Ray from automatically detecting /dev/shm size for the container", + "default": false } } }, diff --git a/python/ray/ray_constants.py b/python/ray/ray_constants.py index 23d42789d..421bd58ba 100644 --- a/python/ray/ray_constants.py +++ b/python/ray/ray_constants.py @@ -24,6 +24,8 @@ ID_SIZE = 20 # The default maximum number of bytes to allocate to the object store unless # overridden by the user. DEFAULT_OBJECT_STORE_MAX_MEMORY_BYTES = 200 * 10**9 +# The default proportion of available memory allocated to the object store +DEFAULT_OBJECT_STORE_MEMORY_PROPORTION = 0.3 # The smallest cap on the memory used by the object store that we allow. # This must be greater than MEMORY_RESOURCE_UNIT_BYTES * 0.7 OBJECT_STORE_MINIMUM_MEMORY_BYTES = 75 * 1024 * 1024 diff --git a/python/ray/resource_spec.py b/python/ray/resource_spec.py index 8bdf57bcb..294b67152 100644 --- a/python/ray/resource_spec.py +++ b/python/ray/resource_spec.py @@ -179,7 +179,9 @@ class ResourceSpec( avail_memory = ray.utils.estimate_available_memory() object_store_memory = self.object_store_memory if object_store_memory is None: - object_store_memory = int(avail_memory * 0.3) + object_store_memory = int( + avail_memory * + ray_constants.DEFAULT_OBJECT_STORE_MEMORY_PROPORTION) # Cap memory to avoid memory waste and perf issues on large nodes if (object_store_memory > ray_constants.DEFAULT_OBJECT_STORE_MAX_MEMORY_BYTES): diff --git a/python/ray/tests/test_autoscaler.py b/python/ray/tests/test_autoscaler.py index 0965744b9..ee17a3455 100644 --- a/python/ray/tests/test_autoscaler.py +++ b/python/ray/tests/test_autoscaler.py @@ -1516,6 +1516,37 @@ class AutoscalingTest(unittest.TestCase): f"{file_mount_dir}/ ubuntu@172.0.0.{i}:" f"{docker_mount_prefix}/home/test-folder/") + def testAutodetectResources(self): + self.provider = MockProvider() + config = SMALL_CLUSTER.copy() + config_path = self.write_config(config) + runner = MockProcessRunner() + proc_meminfo = """ +MemTotal: 16396056 kB +MemFree: 12869528 kB +MemAvailable: 33000000 kB + """ + runner.respond_to_call("cat /proc/meminfo", [proc_meminfo]) + runner.respond_to_call(".Runtimes", ["nvidia-container-runtime"]) + runner.respond_to_call("nvidia-smi", ["works"]) + lm = LoadMetrics() + autoscaler = StandardAutoscaler( + config_path, + lm, + max_failures=0, + process_runner=runner, + update_interval_s=0) + + autoscaler.update() + self.waitForNodes(2) + self.provider.finish_starting_nodes() + autoscaler.update() + self.waitForNodes( + 2, tag_filters={TAG_RAY_NODE_STATUS: STATUS_UP_TO_DATE}) + autoscaler.update() + runner.assert_has_call("172.0.0.0", pattern="--shm-size") + runner.assert_has_call("172.0.0.0", pattern="--runtime=nvidia") + if __name__ == "__main__": import sys