[docker] auto-populate shared memory size (#11953)

2025-03-05 18:11:42 -05:00 · 2020-11-12 17:22:42 -08:00 · 2020-11-12 17:22:42 -08:00 · 3b56a1a522
commit 3b56a1a522
parent 59bc1e6c09
7 changed files with 71 additions and 5 deletions
--- a/doc/source/installation.rst
+++ b/doc/source/installation.rst
@ -278,7 +278,8 @@ Start out by launching the deployment container.
  docker run --shm-size=<shm-size> -t -i rayproject/ray

 Replace ``<shm-size>`` with a limit appropriate for your system, for example
-``512M`` or ``2G``. The ``-t`` and ``-i`` options here are required to support
+``512M`` or ``2G``. A good estimate for this is to use roughly 30% of your available memory (this is 
+what Ray uses internally for its Object Store). The ``-t`` and ``-i`` options here are required to support
 interactive use of the container.

 **Note:** Ray requires a **large** amount of shared memory because each object
--- a/python/ray/autoscaler/_private/command_runner.py
+++ b/python/ray/autoscaler/_private/command_runner.py
@ -12,6 +12,9 @@ import time
 import warnings

 from ray.autoscaler.command_runner import CommandRunnerInterface
+from ray.autoscaler._private.constants import \
+                                     DEFAULT_OBJECT_STORE_MAX_MEMORY_BYTES,\
+                                     DEFAULT_OBJECT_STORE_MEMORY_PROPORTION
 from ray.autoscaler._private.docker import check_bind_mounts_cmd, \
                                  check_docker_running_cmd, \
                                  check_docker_image, \
@ -716,8 +719,8 @@ class DockerCommandRunner(CommandRunnerInterface):
                self.container_name,
                self.docker_config.get(
                    "run_options", []) + self.docker_config.get(
-                        f"{'head' if as_head else 'worker'}_run_options",
-                        []) + self._configure_runtime(),
+                        f"{'head' if as_head else 'worker'}_run_options", []) +
+                self._configure_runtime() + self._auto_configure_shm(),
                self.ssh_command_runner.cluster_name, home_directory)
            self.run(start_command, run_env="host")
        else:
@ -781,6 +784,27 @@ class DockerCommandRunner(CommandRunnerInterface):

        return []

+    def _auto_configure_shm(self):
+        if self.docker_config.get("disable_shm_size_detection"):
+            return []
+        try:
+            shm_output = self.ssh_command_runner.run(
+                "cat /proc/meminfo || true",
+                with_output=True).decode().strip()
+            available_memory = int([
+                ln for ln in shm_output.split("\n") if "MemAvailable" in ln
+            ][0].split()[1])
+            available_memory_bytes = available_memory * 1024
+            # Overestimate SHM size by 10%
+            shm_size = min((available_memory_bytes *
+                            DEFAULT_OBJECT_STORE_MEMORY_PROPORTION * 1.1),
+                           DEFAULT_OBJECT_STORE_MAX_MEMORY_BYTES)
+            return [f"--shm-size='{shm_size}b'"]
+        except Exception as e:
+            logger.warning(
+                f"Received error while trying to auto-compute SHM size {e}")
+            return []
+
    def _get_docker_host_mount_location(self, cluster_name: str) -> str:
        """Return the docker host mount directory location."""
        # Imported here due to circular dependency in imports.
--- a/python/ray/autoscaler/_private/constants.py
+++ b/python/ray/autoscaler/_private/constants.py
@ -1,7 +1,8 @@
 import os

 from ray.ray_constants import (  # noqa F401
-    AUTOSCALER_RESOURCE_REQUEST_CHANNEL, LOGGER_FORMAT,
+    AUTOSCALER_RESOURCE_REQUEST_CHANNEL, DEFAULT_OBJECT_STORE_MAX_MEMORY_BYTES,
+    DEFAULT_OBJECT_STORE_MEMORY_PROPORTION, LOGGER_FORMAT,
    MEMORY_RESOURCE_UNIT_BYTES, RESOURCES_ENVIRONMENT_VARIABLE)


--- a/python/ray/autoscaler/ray-schema.json
+++ b/python/ray/autoscaler/ray-schema.json
@ -242,6 +242,11 @@
                    "type": "boolean",
                    "description": "disable Ray from automatically using the NVIDIA runtime if available",
                    "default": false
+                },
+                "disable_shm_size_detection" : {
+                    "type": "boolean",
+                    "description": "disable Ray from automatically detecting /dev/shm size for the container",
+                    "default": false
                }
            }
        },
--- a/python/ray/ray_constants.py
+++ b/python/ray/ray_constants.py
@ -24,6 +24,8 @@ ID_SIZE = 20
 # The default maximum number of bytes to allocate to the object store unless
 # overridden by the user.
 DEFAULT_OBJECT_STORE_MAX_MEMORY_BYTES = 200 * 10**9
+# The default proportion of available memory allocated to the object store
+DEFAULT_OBJECT_STORE_MEMORY_PROPORTION = 0.3
 # The smallest cap on the memory used by the object store that we allow.
 # This must be greater than MEMORY_RESOURCE_UNIT_BYTES * 0.7
 OBJECT_STORE_MINIMUM_MEMORY_BYTES = 75 * 1024 * 1024
--- a/python/ray/resource_spec.py
+++ b/python/ray/resource_spec.py
@ -179,7 +179,9 @@ class ResourceSpec(
        avail_memory = ray.utils.estimate_available_memory()
        object_store_memory = self.object_store_memory
        if object_store_memory is None:
-            object_store_memory = int(avail_memory * 0.3)
+            object_store_memory = int(
+                avail_memory *
+                ray_constants.DEFAULT_OBJECT_STORE_MEMORY_PROPORTION)
            # Cap memory to avoid memory waste and perf issues on large nodes
            if (object_store_memory >
                    ray_constants.DEFAULT_OBJECT_STORE_MAX_MEMORY_BYTES):
--- a/python/ray/tests/test_autoscaler.py
+++ b/python/ray/tests/test_autoscaler.py
@ -1516,6 +1516,37 @@ class AutoscalingTest(unittest.TestCase):
                f"{file_mount_dir}/ ubuntu@172.0.0.{i}:"
                f"{docker_mount_prefix}/home/test-folder/")

+    def testAutodetectResources(self):
+        self.provider = MockProvider()
+        config = SMALL_CLUSTER.copy()
+        config_path = self.write_config(config)
+        runner = MockProcessRunner()
+        proc_meminfo = """
+MemTotal:       16396056 kB
+MemFree:        12869528 kB
+MemAvailable:   33000000 kB
+        """
+        runner.respond_to_call("cat /proc/meminfo", [proc_meminfo])
+        runner.respond_to_call(".Runtimes", ["nvidia-container-runtime"])
+        runner.respond_to_call("nvidia-smi", ["works"])
+        lm = LoadMetrics()
+        autoscaler = StandardAutoscaler(
+            config_path,
+            lm,
+            max_failures=0,
+            process_runner=runner,
+            update_interval_s=0)
+
+        autoscaler.update()
+        self.waitForNodes(2)
+        self.provider.finish_starting_nodes()
+        autoscaler.update()
+        self.waitForNodes(
+            2, tag_filters={TAG_RAY_NODE_STATUS: STATUS_UP_TO_DATE})
+        autoscaler.update()
+        runner.assert_has_call("172.0.0.0", pattern="--shm-size")
+        runner.assert_has_call("172.0.0.0", pattern="--runtime=nvidia")
+

 if __name__ == "__main__":
    import sys