[docs] Editing pass on clusters docs, removing legacy material and fixing style issues (#27816) (#27817)

Signed-off-by: Eric Liang <ekhliang@gmail.com>
2025-03-05 10:01:43 -05:00 · 2022-08-12 09:14:14 -07:00 · 2022-08-12 09:14:14 -07:00 · e7630c8a0d
commit e7630c8a0d
parent 3059eb564f
187 changed files with 3810 additions and 4956 deletions
--- a/doc/BUILD
+++ b/doc/BUILD
@ -9,33 +9,6 @@ exports_files(["test_myst_doc.py"])
 # root directory.
 # --------------------------------------------------------------------

-# Support for Dask has been dropped in 3.6.
-py_test(
-    name = "dask_xgboost",
-    size = "medium",
-    main = "test_myst_doc.py",
-    srcs = ["test_myst_doc.py"],
-    tags = ["exclusive", "team:ml", "py37"],
-    data = ["//doc/source/ray-core/examples:core_examples"],
-    args = ["--path", "doc/source/ray-core/examples/dask_xgboost/dask_xgboost.ipynb",
-            "--smoke-test", "--address ''", "--num-actors 4", "--cpus-per-actor 1", "--num-actors-inference 4",
-            "--cpus-per-actor-inference 1"]
-)
-
-# Support for Modin has been dropped in 3.6.
-py_test(
-    name = "modin_xgboost",
-    size = "medium",
-    main = "test_myst_doc.py",
-    srcs = ["test_myst_doc.py"],
-    tags = ["exclusive", "team:ml", "py37"],
-    data = ["//doc/source/ray-core/examples:core_examples"],
-    args = ["--path", "doc/source/ray-core/examples/modin_xgboost/modin_xgboost.ipynb",
-            "--smoke-test", "--address ''", "--num-actors 4",
-            "--cpus-per-actor 1", "--num-actors-inference 4",
-            "--cpus-per-actor-inference 1"]
-)
-
 py_test(
    name = "highly_parallel",
    size = "medium",
--- a/doc/source/_toc.yml
+++ b/doc/source/_toc.yml
@ -214,14 +214,9 @@ parts:
        title: More Libraries
        sections:
          - file: ray-more-libs/joblib
-          - file: ray-more-libs/lightgbm-ray
          - file: ray-more-libs/multiprocessing
          - file: ray-more-libs/ray-collective
-          - file: ray-more-libs/ray-lightning
          - file: ray-core/examples/using-ray-with-pytorch-lightning
-          - file: ray-more-libs/xgboost-ray
-          - file: ray-core/examples/dask_xgboost/dask_xgboost
-          - file: ray-core/examples/modin_xgboost/modin_xgboost
          - file: workflows/concepts
            title: Ray Workflows
            sections:
@ -254,97 +249,54 @@ parts:

  - caption: Ray Clusters
    chapters:
-      - file: cluster/index
-      - file: cluster/quickstart
+      - file: cluster/getting-started
+        title: Getting Started
      - file: cluster/key-concepts
-      - file: cluster/user-guide
-      - file: cluster/cloud
-      - file: cluster/kuberay
+        title: Key Concepts
+      - file: cluster/kubernetes/index
+        title: Deploying on Kubernetes
        sections:
-          - file: cluster/kuberay/quickstart
-          - file: cluster/kuberay/config
-          - file: cluster/kuberay/k8s-cluster-setup
-          - file: cluster/kuberay/logging
-          - file: cluster/kuberay/networking
-          - file: cluster/kuberay/gpu
-          - file: cluster/kuberay/ml-example
-          - file: cluster/kuberay/kuberay-vs-legacy
-      - file: cluster/deploy
-      - file: cluster/api
-      - file: cluster/usage-stats
-      - file: cluster/cluster_under_construction
-        title: Ray Clusters (under construction)
+        - file: cluster/kubernetes/getting-started
+        - file: cluster/kubernetes/user-guides
+          sections:
+            - file: cluster/kubernetes/user-guides/k8s-cluster-setup.md
+            - file: cluster/kubernetes/user-guides/config.md
+            - file: cluster/kubernetes/user-guides/configuring-autoscaling.md
+            - file: cluster/kubernetes/user-guides/logging.md
+            - file: cluster/kubernetes/user-guides/gpu.md
+        - file: cluster/kubernetes/examples
+          sections:
+            - file: cluster/kubernetes/examples/ml-example.md
+        - file: cluster/kubernetes/references
+      - file: cluster/vms/index
+        title: Deploying on VMs
        sections:
-          - file: cluster/cluster_under_construction/getting-started
-            title: Getting Started
-          - file: cluster/cluster_under_construction/key-concepts
-            title: Key Concepts
-          - file: cluster/cluster_under_construction/ray-clusters-on-kubernetes/index
-            title: Ray Clusters on Kubernetes
+        - file: cluster/vms/getting-started
+        - file: cluster/vms/user-guides/index
+          title: User Guides
+          sections:
+          - file: cluster/vms/user-guides/launching-clusters/index
+            title: Launching Clusters
+          - file: cluster/vms/user-guides/large-cluster-best-practices
+          - file: cluster/vms/user-guides/configuring-autoscaling
+          - file: cluster/vms/user-guides/community/index
+            title: Community-supported Cluster Managers
            sections:
-            - file: cluster/cluster_under_construction/ray-clusters-on-kubernetes/getting-started
-            - file: cluster/cluster_under_construction/ray-clusters-on-kubernetes/user-guides
-            - file: cluster/cluster_under_construction/ray-clusters-on-kubernetes/examples
-            - file: cluster/cluster_under_construction/ray-clusters-on-kubernetes/references
-          - file: cluster/cluster_under_construction/ray-clusters-on-vms/index
-            title: Ray Clusters on VMs
-            sections:
-            - file: cluster/cluster_under_construction/ray-clusters-on-vms/getting-started
-            - file: cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/index
-              title: User Guides
-              sections:
-              - file: cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/installing-ray
-              - file: cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/launching-clusters/index
-                title: Launching Clusters
-                sections:
-                - file: cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/launching-clusters/aws
-                - file: cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/launching-clusters/azure
-                - file: cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/launching-clusters/gcp
-                - file: cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/launching-clusters/add-your-own-cloud-provider
-              - file: cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/running-ray-cluster-on-prem
-              - file: cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/monitoring-and-observing-ray-cluster
-              - file: cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/manual-cluster-setup
-              - file: cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/large-cluster-best-practices
-              - file: cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/multi-tenancy-best-practices
-              - file: cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/configuring-autoscaling
-              - file: cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/community-supported-cluster-manager/index
-                title: Community-supported Cluster Managers
-                sections:
-                - file: cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/community-supported-cluster-manager/yarn
-                - file: cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/community-supported-cluster-manager/slurm
-                - file: cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/community-supported-cluster-manager/lsf
-              - file: cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/running-jobs/index
-                title: Running Jobs
-                sections:
-                - file: cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/running-jobs/job-submission-cli
-                - file: cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/running-jobs/job-submission-sdk
-                - file: cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/running-jobs/job-submission-rest
-                - file: cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/running-jobs/ray-client
-            - file: cluster/cluster_under_construction/ray-clusters-on-vms/examples/index
-              title: Examples
-              sections:
-              - file: cluster/cluster_under_construction/ray-clusters-on-vms/examples/learn-ray-cluster-basics/index
-                sections:
-                  - file: cluster/cluster_under_construction/ray-clusters-on-vms/examples/learn-ray-cluster-basics/launching-ray-cluster
-                  - file: cluster/cluster_under_construction/ray-clusters-on-vms/examples/learn-ray-cluster-basics/running-job-via-job-submission
-                  - file: cluster/cluster_under_construction/ray-clusters-on-vms/examples/learn-ray-cluster-basics/scale-your-job-with-autoscaling
-                  - file: cluster/cluster_under_construction/ray-clusters-on-vms/examples/learn-ray-cluster-basics/observe-your-app
-                  - file: cluster/cluster_under_construction/ray-clusters-on-vms/examples/learn-ray-cluster-basics/shutting-down-your-ray-cluster
-              - file: cluster/cluster_under_construction/ray-clusters-on-vms/examples/setting-up-ray-cluster-for-development/index
-                sections:
-                  - file: cluster/cluster_under_construction/ray-clusters-on-vms/examples/setting-up-ray-cluster-for-development/todo
-              - file: cluster/cluster_under_construction/ray-clusters-on-vms/examples/setting-up-ray-cluster-for-production/index
-                sections:
-                  - file: cluster/cluster_under_construction/ray-clusters-on-vms/examples/setting-up-ray-cluster-for-production/todo
-            - file: cluster/cluster_under_construction/ray-clusters-on-vms/references/index
-              sections:
-                - file: cluster/cluster_under_construction/ray-clusters-on-vms/references/job-submission-apis
-                - file: cluster/cluster_under_construction/ray-clusters-on-vms/references/ray-cluster-configuration
-                - file: cluster/cluster_under_construction/ray-clusters-on-vms/references/ray-job-submission
+            - file: cluster/vms/user-guides/community/yarn
+            - file: cluster/vms/user-guides/community/slurm
+            - file: cluster/vms/user-guides/community/lsf
+        - file: cluster/vms/examples/index
+          title: Examples
+          sections:
+          - file: cluster/vms/examples/ml-example
+        - file: cluster/vms/references/index
+      - file: cluster/running-applications/index
+        title: Applications Guide

  - caption: References
    chapters:
      - file: ray-references/api
+      - file: cluster/usage-stats

  - caption: Developer Guides
    chapters:
--- a/doc/source/cluster/api.rst
+++ b/doc/source/cluster/api.rst
@ -1,11 +0,0 @@
-.. _ref-cluster-api:
-
-Ray Cluster API
-===============
-
-.. toctree::
-    :maxdepth: 2
-    :caption: Ray Cluster API References
-
-    ../cluster/reference.rst
-    ../cluster/jobs-package-ref.rst
--- a/doc/source/cluster/cloud.rst
+++ b/doc/source/cluster/cloud.rst
@ -1,444 +0,0 @@
-.. include:: we_are_hiring.rst
-
-.. _cluster-cloud:
-
-Launching Cloud Clusters
-========================
-
-This section provides instructions for configuring the Ray Cluster Launcher to use with various cloud providers or on a private cluster of host machines.
-
-See this blog post for a `step by step guide`_ to using the Ray Cluster Launcher.
-
-To learn about deploying Ray on an existing Kubernetes cluster, refer to the guide :ref:`here<kuberay-index>`.
-
-.. _`step by step guide`: https://medium.com/distributed-computing-with-ray/a-step-by-step-guide-to-scaling-your-first-python-application-in-the-cloud-8761fe331ef1
-
-.. _ref-cloud-setup:
-
-Ray with cloud providers
------------------------
-
-.. toctree::
-    :hidden:
-
-    /cluster/aws-tips.rst
-
-.. tabbed::  AWS
-
-    First, install boto (``pip install boto3``) and configure your AWS credentials in ``~/.aws/credentials``,
-    as described in `the boto docs <http://boto3.readthedocs.io/en/latest/guide/configuration.html>`__.
-
-    Once boto is configured to manage resources on your AWS account, you should be ready to launch your cluster. The provided `ray/python/ray/autoscaler/aws/example-full.yaml <https://github.com/ray-project/ray/tree/master/python/ray/autoscaler/aws/example-full.yaml>`__ cluster config file will create a small cluster with an m5.large head node (on-demand) configured to autoscale up to two m5.large `spot workers <https://aws.amazon.com/ec2/spot/>`__.
-
-    Test that it works by running the following commands from your local machine:
-
-    .. code-block:: bash
-
-        # Create or update the cluster. When the command finishes, it will print
-        # out the command that can be used to SSH into the cluster head node.
-        $ ray up ray/python/ray/autoscaler/aws/example-full.yaml
-
-        # Get a remote screen on the head node.
-        $ ray attach ray/python/ray/autoscaler/aws/example-full.yaml
-        $ # Try running a Ray program.
-
-        # Tear down the cluster.
-        $ ray down ray/python/ray/autoscaler/aws/example-full.yaml
-
-
-    AWS Node Provider Maintainers (GitHub handles): pdames, Zyiqin-Miranda, DmitriGekhtman, wuisawesome
-
-    See :ref:`aws-cluster` for recipes on customizing AWS clusters.
-.. tabbed:: Azure
-
-    First, install the Azure CLI (``pip install azure-cli azure-identity``) then login using (``az login``).
-
-    Set the subscription to use from the command line (``az account set -s <subscription_id>``) or by modifying the provider section of the config provided e.g: `ray/python/ray/autoscaler/azure/example-full.yaml`
-
-    Once the Azure CLI is configured to manage resources on your Azure account, you should be ready to launch your cluster. The provided `ray/python/ray/autoscaler/azure/example-full.yaml <https://github.com/ray-project/ray/tree/master/python/ray/autoscaler/azure/example-full.yaml>`__ cluster config file will create a small cluster with a Standard DS2v3 head node (on-demand) configured to autoscale up to two Standard DS2v3 `spot workers <https://docs.microsoft.com/en-us/azure/virtual-machines/windows/spot-vms>`__. Note that you'll need to fill in your resource group and location in those templates.
-
-    Test that it works by running the following commands from your local machine:
-
-    .. code-block:: bash
-
-        # Create or update the cluster. When the command finishes, it will print
-        # out the command that can be used to SSH into the cluster head node.
-        $ ray up ray/python/ray/autoscaler/azure/example-full.yaml
-
-        # Get a remote screen on the head node.
-        $ ray attach ray/python/ray/autoscaler/azure/example-full.yaml
-        # test ray setup
-        $ python -c 'import ray; ray.init()'
-        $ exit
-        # Tear down the cluster.
-        $ ray down ray/python/ray/autoscaler/azure/example-full.yaml
-
-    **Azure Portal**:
-    Alternatively, you can deploy a cluster using Azure portal directly. Please note that autoscaling is done using Azure VM Scale Sets and not through
-    the Ray autoscaler. This will deploy `Azure Data Science VMs (DSVM) <https://azure.microsoft.com/en-us/services/virtual-machines/data-science-virtual-machines/>`_
-    for both the head node and the auto-scalable cluster managed by `Azure Virtual Machine Scale Sets <https://azure.microsoft.com/en-us/services/virtual-machine-scale-sets/>`_.
-    The head node conveniently exposes both SSH as well as JupyterLab.
-
-    .. image:: https://aka.ms/deploytoazurebutton
-       :target: https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fray-project%2Fray%2Fmaster%2Fdoc%2Fazure%2Fazure-ray-template.json
-       :alt: Deploy to Azure
-
-    Once the template is successfully deployed the deployment Outputs page provides the ssh command to connect and the link to the JupyterHub on the head node (username/password as specified on the template input).
-    Use the following code in a Jupyter notebook (using the conda environment specified in the template input, py38_tensorflow by default) to connect to the Ray cluster.
-
-    .. code-block:: python
-
-        import ray
-        ray.init()
-
-    Note that on each node the `azure-init.sh <https://github.com/ray-project/ray/blob/master/doc/azure/azure-init.sh>`_ script is executed and performs the following actions:
-
-    1. Activates one of the conda environments available on DSVM
-    2. Installs Ray and any other user-specified dependencies
-    3. Sets up a systemd task (``/lib/systemd/system/ray.service``) to start Ray in head or worker mode
-
-
-    Azure Node Provider Maintainers (GitHub handles): gramhagen, eisber, ijrsvt
-    .. note:: The Azure Node Provider is community-maintained. It is maintained by its authors, not the Ray team.
-
-.. tabbed:: GCP
-
-    First, install the Google API client (``pip install google-api-python-client``), set up your GCP credentials, and create a new GCP project.
-
-    Once the API client is configured to manage resources on your GCP account, you should be ready to launch your cluster. The provided `ray/python/ray/autoscaler/gcp/example-full.yaml <https://github.com/ray-project/ray/tree/master/python/ray/autoscaler/gcp/example-full.yaml>`__ cluster config file will create a small cluster with a n1-standard-2 head node (on-demand) configured to autoscale up to two n1-standard-2 `preemptible workers <https://cloud.google.com/preemptible-vms/>`__. Note that you'll need to fill in your project id in those templates.
-
-    Test that it works by running the following commands from your local machine:
-
-    .. code-block:: bash
-
-        # Create or update the cluster. When the command finishes, it will print
-        # out the command that can be used to SSH into the cluster head node.
-        $ ray up ray/python/ray/autoscaler/gcp/example-full.yaml
-
-        # Get a remote screen on the head node.
-        $ ray attach ray/python/ray/autoscaler/gcp/example-full.yaml
-        $ # Try running a Ray program with 'ray.init()'.
-
-        # Tear down the cluster.
-        $ ray down ray/python/ray/autoscaler/gcp/example-full.yaml
-
-    GCP Node Provider Maintainers (GitHub handles): wuisawesome, DmitriGekhtman, ijrsvt
-
-.. tabbed:: Aliyun
-
-    First, install the aliyun client package (``pip install aliyun-python-sdk-core aliyun-python-sdk-ecs``). Obtain the AccessKey pair of the Aliyun account as described in `the docs <https://www.alibabacloud.com/help/en/doc-detail/175967.htm>`__ and grant AliyunECSFullAccess/AliyunVPCFullAccess permissions to the RAM user. Finally, set the AccessKey pair in your cluster config file.
-
-    Once the above is done, you should be ready to launch your cluster. The provided `aliyun/example-full.yaml </ray/python/ray/autoscaler/aliyun/example-full.yaml>`__ cluster config file will create a small cluster with an ``ecs.n4.large`` head node (on-demand) configured to autoscale up to two ``ecs.n4.2xlarge`` nodes.
-
-    Make sure your account balance is not less than 100 RMB, otherwise you will receive a `InvalidAccountStatus.NotEnoughBalance` error.
-
-    Test that it works by running the following commands from your local machine:
-
-    .. code-block:: bash
-
-        # Create or update the cluster. When the command finishes, it will print
-        # out the command that can be used to SSH into the cluster head node.
-        $ ray up ray/python/ray/autoscaler/aliyun/example-full.yaml
-
-        # Get a remote screen on the head node.
-        $ ray attach ray/python/ray/autoscaler/aliyun/example-full.yaml
-        $ # Try running a Ray program with 'ray.init()'.
-
-        # Tear down the cluster.
-        $ ray down ray/python/ray/autoscaler/aliyun/example-full.yaml
-
-    Aliyun Node Provider Maintainers (GitHub handles): zhuangzhuang131419, chenk008
-
-    .. note:: The Aliyun Node Provider is community-maintained. It is maintained by its authors, not the Ray team.
-
-
-.. tabbed:: Custom
-
-    Ray also supports external node providers (check `node_provider.py <https://github.com/ray-project/ray/tree/master/python/ray/autoscaler/node_provider.py>`__ implementation).
-    You can specify the external node provider using the yaml config:
-
-    .. code-block:: yaml
-
-        provider:
-            type: external
-            module: mypackage.myclass
-
-    The module needs to be in the format ``package.provider_class`` or ``package.sub_package.provider_class``.
-
-
-.. _cluster-private-setup:
-
-Local On Premise Cluster (List of nodes)
----------------------------------------
-You would use this mode if you want to run distributed Ray applications on some local nodes available on premise.
-
-The most preferable way to run a Ray cluster on a private cluster of hosts is via the Ray Cluster Launcher.
-
-There are two ways of running private clusters:
-
- Manually managed, i.e., the user explicitly specifies the head and worker ips.
-
- Automatically managed, i.e., the user only specifies a coordinator address to a coordinating server that automatically coordinates its head and worker ips.
-
-.. tip:: To avoid getting the password prompt when running private clusters make sure to setup your ssh keys on the private cluster as follows:
-
-    .. code-block:: bash
-
-        $ ssh-keygen
-        $ cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
-
-.. tabbed:: Manually Managed
-
-
-    You can get started by filling out the fields in the provided `ray/python/ray/autoscaler/local/example-full.yaml <https://github.com/ray-project/ray/tree/master/python/ray/autoscaler/local/example-full.yaml>`__.
-    Be sure to specify the proper ``head_ip``, list of ``worker_ips``, and the ``ssh_user`` field.
-
-    Test that it works by running the following commands from your local machine:
-
-    .. code-block:: bash
-
-        # Create or update the cluster. When the command finishes, it will print
-        # out the command that can be used to get a remote shell into the head node.
-        $ ray up ray/python/ray/autoscaler/local/example-full.yaml
-
-        # Get a remote screen on the head node.
-        $ ray attach ray/python/ray/autoscaler/local/example-full.yaml
-        $ # Try running a Ray program with 'ray.init()'.
-
-        # Tear down the cluster
-        $ ray down ray/python/ray/autoscaler/local/example-full.yaml
-
-.. tabbed:: Automatically Managed
-
-
-    Start by launching the coordinator server that will manage all the on prem clusters. This server also makes sure to isolate the resources between different users. The script for running the coordinator server is `ray/python/ray/autoscaler/local/coordinator_server.py <https://github.com/ray-project/ray/tree/master/python/ray/autoscaler/local/coordinator_server.py>`__. To launch the coordinator server run:
-
-    .. code-block:: bash
-
-        $ python coordinator_server.py --ips <list_of_node_ips> --port <PORT>
-
-    where ``list_of_node_ips`` is a comma separated list of all the available nodes on the private cluster. For example, ``160.24.42.48,160.24.42.49,...`` and ``<PORT>`` is the port that the coordinator server will listen on.
-    After running the coordinator server it will print the address of the coordinator server. For example:
-
-    .. code-block:: bash
-
-      >> INFO:ray.autoscaler.local.coordinator_server:Running on prem coordinator server
-            on address <Host:PORT>
-
-    Next, the user only specifies the ``<Host:PORT>`` printed above in the ``coordinator_address`` entry instead of specific head/worker ips in the provided `ray/python/ray/autoscaler/local/example-full.yaml <https://github.com/ray-project/ray/tree/master/python/ray/autoscaler/local/example-full.yaml>`__.
-
-    Now we can test that it works by running the following commands from your local machine:
-
-    .. code-block:: bash
-
-        # Create or update the cluster. When the command finishes, it will print
-        # out the command that can be used to get a remote shell into the head node.
-        $ ray up ray/python/ray/autoscaler/local/example-full.yaml
-
-        # Get a remote screen on the head node.
-        $ ray attach ray/python/ray/autoscaler/local/example-full.yaml
-        $ # Try running a Ray program with 'ray.init()'.
-
-        # Tear down the cluster
-        $ ray down ray/python/ray/autoscaler/local/example-full.yaml
-
-
-.. _manual-cluster:
-
-Manual Ray Cluster Setup
------------------------
-
-The most preferable way to run a Ray cluster is via the Ray Cluster Launcher. However, it is also possible to start a Ray cluster by hand.
-
-This section assumes that you have a list of machines and that the nodes in the cluster can communicate with each other. It also assumes that Ray is installed
-on each machine. To install Ray, follow the `installation instructions`_.
-
-.. _`installation instructions`: http://docs.ray.io/en/master/installation.html
-
-Starting Ray on each machine
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-On the head node (just choose one node to be the head node), run the following.
-If the ``--port`` argument is omitted, Ray will choose port 6379, falling back to a
-random port.
-
-.. code-block:: bash
-
-  $ ray start --head --port=6379
-  ...
-  Next steps
-    To connect to this Ray runtime from another node, run
-      ray start --address='<ip address>:6379'
-
-  If connection fails, check your firewall settings and network configuration.
-
-The command will print out the address of the Ray GCS server that was started
-(the local node IP address plus the port number you specified).
-
-.. note::
-
-    If you already has remote Redis instances, you can specify environment variable
-    `RAY_REDIS_ADDRESS=ip1:port1,ip2:port2...` to use them. The first one is
-    primary and rest are shards.
-
-**Then on each of the other nodes**, run the following. Make sure to replace
-``<address>`` with the value printed by the command on the head node (it
-should look something like ``123.45.67.89:6379``).
-
-Note that if your compute nodes are on their own subnetwork with Network
-Address Translation, to connect from a regular machine outside that subnetwork,
-the command printed by the head node will not work. You need to find the
-address that will reach the head node from the second machine. If the head node
-has a domain address like compute04.berkeley.edu, you can simply use that in
-place of an IP address and rely on the DNS.
-
-.. code-block:: bash
-
-  $ ray start --address=<address>
-  --------------------
-  Ray runtime started.
-  --------------------
-
-  To terminate the Ray runtime, run
-    ray stop
-
-If you wish to specify that a machine has 10 CPUs and 1 GPU, you can do this
-with the flags ``--num-cpus=10`` and ``--num-gpus=1``. See the :ref:`Configuration <configuring-ray>` page for more information.
-
-If you see ``Unable to connect to GCS at ...``,
-this means the head node is inaccessible at the given ``--address`` (because, for
-example, the head node is not actually running, a different version of Ray is
-running at the specified address, the specified address is wrong, or there are
-firewall settings preventing access).
-
-If you see ``Ray runtime started.``, then the node successfully connected to
-the head node at the ``--address``. You should now be able to connect to the
-cluster with ``ray.init()``.
-
-.. code-block:: bash
-
-  If connection fails, check your firewall settings and network configuration.
-
-If the connection fails, to check whether each port can be reached from a node,
-you can use a tool such as ``nmap`` or ``nc``.
-
-.. code-block:: bash
-
-  $ nmap -sV --reason -p $PORT $HEAD_ADDRESS
-  Nmap scan report for compute04.berkeley.edu (123.456.78.910)
-  Host is up, received echo-reply ttl 60 (0.00087s latency).
-  rDNS record for 123.456.78.910: compute04.berkeley.edu
-  PORT     STATE SERVICE REASON         VERSION
-  6379/tcp open  redis?  syn-ack
-  Service detection performed. Please report any incorrect results at https://nmap.org/submit/ .
-  $ nc -vv -z $HEAD_ADDRESS $PORT
-  Connection to compute04.berkeley.edu 6379 port [tcp/*] succeeded!
-
-If the node cannot access that port at that IP address, you might see
-
-.. code-block:: bash
-
-  $ nmap -sV --reason -p $PORT $HEAD_ADDRESS
-  Nmap scan report for compute04.berkeley.edu (123.456.78.910)
-  Host is up (0.0011s latency).
-  rDNS record for 123.456.78.910: compute04.berkeley.edu
-  PORT     STATE  SERVICE REASON       VERSION
-  6379/tcp closed redis   reset ttl 60
-  Service detection performed. Please report any incorrect results at https://nmap.org/submit/ .
-  $ nc -vv -z $HEAD_ADDRESS $PORT
-  nc: connect to compute04.berkeley.edu port 6379 (tcp) failed: Connection refused
-
-
-Stopping Ray
-~~~~~~~~~~~~
-
-When you want to stop the Ray processes, run ``ray stop`` on each node.
-
-
-Additional Cloud Providers
--------------------------
-
-To use Ray autoscaling on other Cloud providers or cluster management systems, you can implement the ``NodeProvider`` interface (100 LOC) and register it in `node_provider.py <https://github.com/ray-project/ray/tree/master/python/ray/autoscaler/node_provider.py>`__. Contributions are welcome!
-
-
-Security
--------
-
-On cloud providers, nodes will be launched into their own security group by default, with traffic allowed only between nodes in the same group. A new SSH key will also be created and saved to your local machine for access to the cluster.
-
-.. _using-ray-on-a-cluster:
-
-Running a Ray program on the Ray cluster
----------------------------------------
-
-To run a distributed Ray program, you'll need to execute your program on the same machine as one of the nodes.
-
-.. tabbed:: Python
-
-    Within your program/script, ``ray.init()`` will now automatically find and connect to the latest Ray cluster.
-    For example:
-
-    .. code-block:: python
-
-        ray.init()
-        # Connecting to existing Ray cluster at address: <IP address>...
-
-.. tabbed:: Java
-
-    You need to add the ``ray.address`` parameter to your command line (like ``-Dray.address=...``).
-
-    To connect your program to the Ray cluster, run it like this:
-
-        .. code-block:: bash
-
-            java -classpath <classpath> \
-              -Dray.address=<address> \
-              <classname> <args>
-
-    .. note:: Specifying ``auto`` as the address hasn't been implemented in Java yet. You need to provide the actual address. You can find the address of the server from the output of the ``ray up`` command.
-
-.. tabbed:: C++
-
-    You need to add the ``RAY_ADDRESS`` env var to your command line (like ``RAY_ADDRESS=...``).
-
-    To connect your program to the Ray cluster, run it like this:
-
-        .. code-block:: bash
-
-            RAY_ADDRESS=<address> ./<binary> <args>
-
-    .. note:: Specifying ``auto`` as the address hasn't been implemented in C++ yet. You need to provide the actual address. You can find the address of the server from the output of the ``ray up`` command.
-
-
-.. note:: A common mistake is setting the address to be a cluster node while running the script on your laptop. This will not work because the script needs to be started/executed on one of the Ray nodes.
-
-To verify that the correct number of nodes have joined the cluster, you can run the following.
-
-.. code-block:: python
-
-  import time
-
-  @ray.remote
-  def f():
-      time.sleep(0.01)
-      return ray._private.services.get_node_ip_address()
-
-  # Get a list of the IP addresses of the nodes that have joined the cluster.
-  set(ray.get([f.remote() for _ in range(1000)]))
-
-
-What's Next?
-------------
-
-Now that you have a working understanding of the cluster launcher, check out:
-
-* :ref:`ref-cluster-quick-start`: A end-to-end demo to run an application that autoscales.
-* :ref:`cluster-config`: A complete reference of how to configure your Ray cluster.
-* :ref:`cluster-commands`: A short user guide to the various cluster launcher commands.
-
-
-
-Questions or Issues?
--------------------
-
-.. include:: /_includes/_help.rst
--- a/doc/source/cluster/cluster_under_construction.md
+++ b/doc/source/cluster/cluster_under_construction.md
@ -1,4 +0,0 @@
-# Ray Clusters (under construction)
-:::{warning}
-This page is under construction!
-:::
--- a/doc/source/cluster/cluster_under_construction/getting-started.rst
+++ b/doc/source/cluster/cluster_under_construction/getting-started.rst
@ -1,264 +0,0 @@
-.. include:: /_includes/clusters/announcement.rst
-
-.. include:: we_are_hiring.rst
-
-.. _ref-cluster-getting-started-under-construction:
-
-.. warning::
-    This page is under construction!
-
-TODO(cade)
-Direct users, based on what they are trying to accomplish, to the
-correct page between "Managing Ray Clusters on Kubernetes",
-"Managing Ray Clusters via `ray up`", and "Using Ray Clusters".
-There should be some discussion on Kubernetes vs. `ray up` for
-those looking to create new Ray clusters for the first time.
-
-
-Getting Started with Ray Clusters
-=================================
-
-This page demonstrates the capabilities of the Ray cluster. Using the Ray cluster, we'll take a sample application designed to run on a laptop and scale it up in the cloud. Ray will launch clusters and scale Python with just a few commands.
-
-For launching a Ray cluster manually, you can refer to the :ref:`on-premise cluster setup <cluster-private-setup>` guide.
-
-About the demo
--------------
-
-This demo will walk through an end-to-end flow:
-
-1. Create a (basic) Python application.
-2. Launch a cluster on a cloud provider.
-3. Run the application in the cloud.
-
-Requirements
-~~~~~~~~~~~~
-
-To run this demo, you will need:
-
-* Python installed on your development machine (typically your laptop), and
-* an account at your preferred cloud provider (AWS, Azure or GCP).
-
-Setup
-~~~~~
-
-Before we start, you will need to install some Python dependencies as follows:
-
-.. tabbed:: AWS
-
-    .. code-block:: shell
-
-        $ pip install -U "ray[default]" boto3
-
-.. tabbed:: Azure
-
-    .. code-block:: shell
-
-        $ pip install -U "ray[default]" azure-cli azure-core
-
-.. tabbed:: GCP
-
-    .. code-block:: shell
-
-        $ pip install -U "ray[default]" google-api-python-client
-
-Next, if you're not set up to use your cloud provider from the command line, you'll have to configure your credentials:
-
-.. tabbed:: AWS
-
-    Configure your credentials in ``~/.aws/credentials`` as described in `the AWS docs <https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html>`_.
-
-.. tabbed:: Azure
-
-    Log in using ``az login``, then configure your credentials with ``az account set -s <subscription_id>``.
-
-.. tabbed:: GCP
-
-    Set the ``GOOGLE_APPLICATION_CREDENTIALS`` environment variable as described in `the GCP docs <https://cloud.google.com/docs/authentication/getting-started>`_.
-
-Create a (basic) Python application
-----------------------------------
-
-We will write a simple Python application that tracks the IP addresses of the machines that its tasks are executed on:
-
-.. code-block:: python
-
-    from collections import Counter
-    import socket
-    import time
-
-    def f():
-        time.sleep(0.001)
-        # Return IP address.
-        return socket.gethostbyname(socket.gethostname())
-
-    ip_addresses = [f() for _ in range(10000)]
-    print(Counter(ip_addresses))
-
-Save this application as ``script.py`` and execute it by running the command ``python script.py``. The application should take 10 seconds to run and output something similar to ``Counter({'127.0.0.1': 10000})``.
-
-With some small changes, we can make this application run on Ray (for more information on how to do this, refer to :ref:`the Ray Core Walkthrough<core-walkthrough>`):
-
-.. code-block:: python
-
-    from collections import Counter
-    import socket
-    import time
-
-    import ray
-
-    ray.init()
-
-    @ray.remote
-    def f():
-        time.sleep(0.001)
-        # Return IP address.
-        return socket.gethostbyname(socket.gethostname())
-
-    object_ids = [f.remote() for _ in range(10000)]
-    ip_addresses = ray.get(object_ids)
-    print(Counter(ip_addresses))
-
-Finally, let's add some code to make the output more interesting:
-
-.. code-block:: python
-
-    from collections import Counter
-    import socket
-    import time
-
-    import ray
-
-    ray.init()
-
-    print('''This cluster consists of
-        {} nodes in total
-        {} CPU resources in total
-    '''.format(len(ray.nodes()), ray.cluster_resources()['CPU']))
-
-    @ray.remote
-    def f():
-        time.sleep(0.001)
-        # Return IP address.
-        return socket.gethostbyname(socket.gethostname())
-
-    object_ids = [f.remote() for _ in range(10000)]
-    ip_addresses = ray.get(object_ids)
-
-    print('Tasks executed')
-    for ip_address, num_tasks in Counter(ip_addresses).items():
-        print('    {} tasks on {}'.format(num_tasks, ip_address))
-
-Running ``python script.py`` should now output something like:
-
-.. parsed-literal::
-
-    This cluster consists of
-        1 nodes in total
-        4.0 CPU resources in total
-
-    Tasks executed
-        10000 tasks on 127.0.0.1
-
-Launch a cluster on a cloud provider
------------------------------------
-
-To start a Ray Cluster, first we need to define the cluster configuration. The cluster configuration is defined within a YAML file that will be used by the Cluster Launcher to launch the head node, and by the Autoscaler to launch worker nodes.
-
-A minimal sample cluster configuration file looks as follows:
-
-.. tabbed:: AWS
-
-    .. code-block:: yaml
-
-        # An unique identifier for the head node and workers of this cluster.
-        cluster_name: minimal
-
-        # Cloud-provider specific configuration.
-        provider:
-            type: aws
-            region: us-west-2
-
-.. tabbed:: Azure
-
-    .. code-block:: yaml
-
-        # An unique identifier for the head node and workers of this cluster.
-        cluster_name: minimal
-
-        # Cloud-provider specific configuration.
-        provider:
-            type: azure
-            location: westus2
-            resource_group: ray-cluster
-
-        # How Ray will authenticate with newly launched nodes.
-        auth:
-            ssh_user: ubuntu
-            # you must specify paths to matching private and public key pair files
-            # use `ssh-keygen -t rsa -b 4096` to generate a new ssh key pair
-            ssh_private_key: ~/.ssh/id_rsa
-            # changes to this should match what is specified in file_mounts
-            ssh_public_key: ~/.ssh/id_rsa.pub
-
-.. tabbed:: GCP
-
-    .. code-block:: yaml
-
-        # A unique identifier for the head node and workers of this cluster.
-        cluster_name: minimal
-
-        # Cloud-provider specific configuration.
-        provider:
-            type: gcp
-            region: us-west1
-
-Save this configuration file as ``config.yaml``. You can specify a lot more details in the configuration file: instance types to use, minimum and maximum number of workers to start, autoscaling strategy, files to sync, and more. For a full reference on the available configuration properties, please refer to the :ref:`cluster YAML configuration options reference <cluster-config>`.
-
-After defining our configuration, we will use the Ray Cluster Launcher to start a cluster on the cloud, creating a designated "head node" and worker nodes. To start the Ray cluster, we will use the :ref:`Ray CLI <ray-cli>`. Run the following command:
-
-.. code-block:: shell
-
-    $ ray up -y config.yaml
-
-Run the application in the cloud
--------------------------------
-
-We are now ready to execute the application in across multiple machines on our Ray cloud cluster.
-First, we need to edit the initialization command ``ray.init()`` in ``script.py``.
-Change it to
-
-.. code-block:: python
-
-    ray.init(address='auto')
-
-This tells your script to connect to the Ray runtime on the remote cluster instead of initializing a new Ray runtime.
-
-Next, run the following command:
-
-.. code-block:: shell
-
-    $ ray submit config.yaml script.py
-
-The output should now look similar to the following:
-
-.. parsed-literal::
-
-    This cluster consists of
-        3 nodes in total
-        6.0 CPU resources in total
-
-    Tasks executed
-        3425 tasks on xxx.xxx.xxx.xxx
-        3834 tasks on xxx.xxx.xxx.xxx
-        2741 tasks on xxx.xxx.xxx.xxx
-
-In this sample output, 3 nodes were started. If the output only shows 1 node, you may want to increase the ``secs`` in ``time.sleep(secs)`` to give Ray more time to start additional nodes.
-
-The Ray CLI offers additional functionality. For example, you can monitor the Ray cluster status with ``ray monitor config.yaml``, and you can connect to the cluster (ssh into the head node) with ``ray attach config.yaml``. For a full reference on the Ray CLI, please refer to :ref:`the cluster commands reference <cluster-commands>`.
-
-To finish, don't forget to shut down the cluster. Run the following command:
-
-.. code-block:: shell
-
-    $ ray down -y config.yaml
--- a/doc/source/cluster/cluster_under_construction/key-concepts.rst
+++ b/doc/source/cluster/cluster_under_construction/key-concepts.rst
@ -1,26 +0,0 @@
-.. include:: we_are_hiring.rst
-
-.. warning::
-    This page is under construction!
-
-
-Key Concepts
-============
-
-TODO(cade) Can we simplify this? From https://github.com/ray-project/ray/pull/26754#issuecomment-1192927645:
-* Worker Nodes
-* Head Node
-* Autoscaler
-* Clients and Jobs
-
-Need to add the following sections + break out existing content into them.
-See ray-core/user-guide.rst for a TOC example
-
-overview
-high-level-architecture
-jobs
-nodes-vs-workers
-scheduling-and-autoscaling
-configuration
-Things-to-know
-
--- a/doc/source/cluster/cluster_under_construction/ray-cluster.jpg
+++ b/doc/source/cluster/cluster_under_construction/ray-cluster.jpg
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-kubernetes/examples.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-kubernetes/examples.md
@ -1,4 +0,0 @@
-# Examples
-:::{warning}
-This page is under construction!
-:::
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-kubernetes/getting-started.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-kubernetes/getting-started.md
@ -1,4 +0,0 @@
-# Getting Started
-:::{warning}
-This page is under construction!
-:::
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-kubernetes/index.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-kubernetes/index.md
@ -1,4 +0,0 @@
-# Index
-:::{warning}
-This page is under construction!
-:::
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-kubernetes/references.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-kubernetes/references.md
@ -1,4 +0,0 @@
-# References
-:::{warning}
-This page is under construction!
-:::
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-kubernetes/user-guides.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-kubernetes/user-guides.md
@ -1,4 +0,0 @@
-# User Guides
-:::{warning}
-This page is under construction!
-:::
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/examples/index.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/examples/index.md
@ -1,4 +0,0 @@
-# Examples
-:::{warning}
-This page is under construction!
-:::
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/examples/learn-ray-cluster-basics/index.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/examples/learn-ray-cluster-basics/index.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# Learn Ray Cluster basics
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/examples/learn-ray-cluster-basics/launching-ray-cluster.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/examples/learn-ray-cluster-basics/launching-ray-cluster.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# Launching a Ray cluster
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/examples/learn-ray-cluster-basics/observe-your-app.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/examples/learn-ray-cluster-basics/observe-your-app.md
@ -1,5 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# Observe your app
-TODO(cade) with basic observability API
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/examples/learn-ray-cluster-basics/running-job-via-job-submission.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/examples/learn-ray-cluster-basics/running-job-via-job-submission.md
@ -1,5 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# Running a job via job submission
-## Quick start example
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/examples/learn-ray-cluster-basics/scale-your-job-with-autoscaling.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/examples/learn-ray-cluster-basics/scale-your-job-with-autoscaling.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# Scale your job
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/examples/learn-ray-cluster-basics/shutting-down-your-ray-cluster.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/examples/learn-ray-cluster-basics/shutting-down-your-ray-cluster.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# Shutting down your Ray cluster
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/examples/setting-up-ray-cluster-for-development/index.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/examples/setting-up-ray-cluster-for-development/index.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# Setting up a Ray cluster for development
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/examples/setting-up-ray-cluster-for-development/todo.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/examples/setting-up-ray-cluster-for-development/todo.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# TODO
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/examples/setting-up-ray-cluster-for-production/index.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/examples/setting-up-ray-cluster-for-production/index.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# Setting up a Ray cluster for production
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/examples/setting-up-ray-cluster-for-production/todo.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/examples/setting-up-ray-cluster-for-production/todo.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# TODO
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/getting-started.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/getting-started.md
@ -1,4 +0,0 @@
-# Getting Started
-:::{warning}
-This page is under construction!
-:::
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/index.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/index.md
@ -1,4 +0,0 @@
-# Index
-:::{warning}
-This page is under construction!
-:::
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/references/index.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/references/index.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# References
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/references/job-submission-apis.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/references/job-submission-apis.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# Job submission API
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/references/ray-cluster-configuration.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/references/ray-cluster-configuration.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# Ray cluster configuration file
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/references/ray-job-submission.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/references/ray-job-submission.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# Ray Job submission
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/community-supported-cluster-manager/index.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/community-supported-cluster-manager/index.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# Using a community-supported cluster manager
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/community-supported-cluster-manager/lsf.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/community-supported-cluster-manager/lsf.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# LSF
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/community-supported-cluster-manager/slurm.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/community-supported-cluster-manager/slurm.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# SLURM
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/community-supported-cluster-manager/yarn.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/community-supported-cluster-manager/yarn.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# YARN
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/configuring-autoscaling.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/configuring-autoscaling.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# Configuring autoscaling
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/index.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/index.md
@ -1,4 +0,0 @@
-# User Guides
-:::{warning}
-This page is under construction!
-:::
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/installing-ray.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/installing-ray.md
@ -1,6 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# Installing Ray
-## Install Ray via `pip`
-## Use the Ray docker images
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/large-cluster-best-practices.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/large-cluster-best-practices.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# Best practices for deploying large clusters
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/launching-clusters/add-your-own-cloud-provider.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/launching-clusters/add-your-own-cloud-provider.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# Adding your own cloud provider
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/launching-clusters/aws.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/launching-clusters/aws.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# AWS
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/launching-clusters/azure.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/launching-clusters/azure.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# Azure
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/launching-clusters/gcp.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/launching-clusters/gcp.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# GCP
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/launching-clusters/index.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/launching-clusters/index.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# Launching a Ray Cluster on Cloud VMs
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/manual-cluster-setup.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/manual-cluster-setup.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# Manual cluster setup
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/monitoring-and-observing-ray-cluster.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/monitoring-and-observing-ray-cluster.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# Monitoring and Observing a Ray Cluster
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/multi-tenancy-best-practices.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/multi-tenancy-best-practices.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# Best practices for multi-tenancy
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/running-jobs/index.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/running-jobs/index.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# Running jobs
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/running-jobs/job-submission-cli.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/running-jobs/job-submission-cli.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# Submit jobs via the CLI
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/running-jobs/job-submission-rest.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/running-jobs/job-submission-rest.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# Submit jobs via the REST API
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/running-jobs/job-submission-sdk.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/running-jobs/job-submission-sdk.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# Submit jobs via the SDK
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/running-jobs/ray-client.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/running-jobs/ray-client.md
@ -1,6 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# Interacting with the cluster via the Ray Client
-## When to use
-## How to use
--- a/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/running-ray-cluster-on-prem.md
+++ b/doc/source/cluster/cluster_under_construction/ray-clusters-on-vms/user-guides/running-ray-cluster-on-prem.md
@ -1,4 +0,0 @@
-:::{warning}
-This page is under construction!
-:::
-# Running a Ray cluster on-prem
--- a/doc/source/cluster/cluster_under_construction/we_are_hiring.rst
+++ b/doc/source/cluster/cluster_under_construction/we_are_hiring.rst
@ -1,8 +0,0 @@
-.. Comment this out for now.
-
-..
-    .. admonition:: We're hiring!
-
-        `Anyscale Inc. <https://anyscale.com>`__, the company behind Ray, is hiring interns and full-time **software engineers** to help advance and maintain Ray autoscaler, cluster launcher, cloud providers, the Kubernetes operator, and Ray Client.
-        If you have a background in distributed computing/cluster orchestration/Kubernetes and are interested in making Ray **the** industry-leading open-source platform for distributed computing, `apply here today <https://jobs.lever.co/anyscale/814c0d0e-08f5-419a-bdd8-0819b8b8df24>`__.
-        We'd be thrilled to welcome you on the team!
--- a/doc/source/cluster/deploy.rst
+++ b/doc/source/cluster/deploy.rst
@ -1,19 +0,0 @@
-.. include:: we_are_hiring.rst
-
-.. _ref-cluster-setup:
-
-Ray with Cluster Managers
-=========================
-
-.. note::
-
-    If you're using AWS, Azure or GCP you can use the :ref:`Ray Cluster Launcher <cluster-cloud>` to simplify the cluster setup process.
-
-.. toctree::
-   :maxdepth: 2
-
-   kubernetes.rst
-   yarn.rst
-   slurm.rst
-   lsf.rst
-
--- a/doc/source/cluster/doc_code/simple-trainer.py
+++ b/doc/source/cluster/doc_code/simple-trainer.py
--- a/doc/source/cluster/doc_code/slurm-basic.sh
+++ b/doc/source/cluster/doc_code/slurm-basic.sh
@ -61,5 +61,5 @@ done
 # __doc_worker_ray_end__

 # __doc_script_start__
-# ray/doc/source/cluster/examples/simple-trainer.py
-python -u simple-trainer.py "$SLURM_CPUS_PER_TASK"
+# ray/doc/source/cluster/doc_code/simple-trainer.py
+python -u simple-trainer.py "$SLURM_CPUS_PER_TASK"
--- a/doc/source/cluster/doc_code/slurm-launch.py
+++ b/doc/source/cluster/doc_code/slurm-launch.py
--- a/doc/source/cluster/doc_code/slurm-template.sh
+++ b/doc/source/cluster/doc_code/slurm-template.sh
--- a/doc/source/cluster/doc_code/xgboost_submit.py
+++ b/doc/source/cluster/doc_code/xgboost_submit.py
@ -0,0 +1,19 @@
+from ray.job_submission import JobSubmissionClient
+
+client = JobSubmissionClient("http://127.0.0.1:8265")
+
+kick_off_xgboost_benchmark = (
+    # Clone ray. If ray is already present, don't clone again.
+    "git clone https://github.com/ray-project/ray || true;"
+    # Run the benchmark.
+    " python ray/release/air_tests/air_benchmarks/workloads/xgboost_benchmark.py"
+    " --size 100G --disable-check"
+)
+
+
+submission_id = client.submit_job(
+    entrypoint=kick_off_xgboost_benchmark,
+)
+
+print("Use the following command to follow this Job's logs:")
+print(f"ray job logs '{submission_id}' --follow")
--- a/doc/source/cluster/doc_code/yarn/example.py
+++ b/doc/source/cluster/doc_code/yarn/example.py
@ -0,0 +1,50 @@
+import sys
+import time
+from collections import Counter
+
+import ray
+
+
+@ray.remote
+def get_host_name(x):
+    import platform
+    import time
+
+    time.sleep(0.01)
+    return x + (platform.node(),)
+
+
+def wait_for_nodes(expected):
+    # Wait for all nodes to join the cluster.
+    while True:
+        num_nodes = len(ray.nodes())
+        if num_nodes < expected:
+            print(
+                "{} nodes have joined so far, waiting for {} more.".format(
+                    num_nodes, expected - num_nodes
+                )
+            )
+            sys.stdout.flush()
+            time.sleep(1)
+        else:
+            break
+
+
+def main():
+    wait_for_nodes(4)
+
+    # Check that objects can be transferred from each node to each other node.
+    for i in range(10):
+        print("Iteration {}".format(i))
+        results = [get_host_name.remote(get_host_name.remote(())) for _ in range(100)]
+        print(Counter(ray.get(results)))
+        sys.stdout.flush()
+
+    print("Success!")
+    sys.stdout.flush()
+    time.sleep(20)
+
+
+if __name__ == "__main__":
+    ray.init(address="localhost:6379")
+    main()
--- a/doc/source/cluster/doc_code/yarn/ray-skein.yaml
+++ b/doc/source/cluster/doc_code/yarn/ray-skein.yaml
@ -0,0 +1,63 @@
+name: ray
+
+services:
+    # Head service.
+    ray-head:
+        # There should only be one instance of the head node per cluster.
+        instances: 1
+        resources:
+            # The resources for the head node.
+            vcores: 1
+            memory: 2048
+        files:
+            # ray/doc/source/cluster/doc_code/yarn/example.py
+            example.py: example.py
+        #     # A packaged python environment using `conda-pack`. Note that Skein
+        #     # doesn't require any specific way of distributing files, but this
+        #     # is a good one for python projects. This is optional.
+        #     # See https://jcrist.github.io/skein/distributing-files.html
+        #     environment: environment.tar.gz
+        script: |
+            # Activate the packaged conda environment
+            #  - source environment/bin/activate
+
+            # This stores the Ray head address in the Skein key-value store so that the workers can retrieve it later.
+            skein kv put current --key=RAY_HEAD_ADDRESS --value=$(hostname -i)
+
+            # This command starts all the processes needed on the ray head node.
+            # By default, we set object store memory and heap memory to roughly 200 MB. This is conservative
+            # and should be set according to application needs.
+            #
+            ray start --head --port=6379 --object-store-memory=200000000 --memory 200000000 --num-cpus=1
+
+            # This executes the user script.
+            python example.py
+
+            # After the user script has executed, all started processes should also die.
+            ray stop
+            skein application shutdown current
+    # Worker service.
+    ray-worker:
+        # The number of instances to start initially. This can be scaled
+        # dynamically later.
+        instances: 4
+        resources:
+            # The resources for the worker node
+            vcores: 1
+            memory: 2048
+        # files:
+        #     environment: environment.tar.gz
+        depends:
+            # Don't start any worker nodes until the head node is started
+            - ray-head
+        script: |
+            # Activate the packaged conda environment
+            #  - source environment/bin/activate
+
+            # This command gets any addresses it needs (e.g. the head node) from
+            # the skein key-value store.
+            RAY_HEAD_ADDRESS=$(skein kv get --key=RAY_HEAD_ADDRESS current)
+
+            # The below command starts all the processes needed on a ray worker node, blocking until killed with sigterm.
+            # After sigterm, all started processes should also die (ray stop).
+            ray start --object-store-memory=200000000 --memory 200000000 --num-cpus=1 --address=$RAY_HEAD_ADDRESS:6379 --block; ray stop
--- a/doc/source/cluster/getting-started.rst
+++ b/doc/source/cluster/getting-started.rst
@ -0,0 +1,81 @@
+.. _cluster-index:
+
+Ray Clusters Overview
+=====================
+
+Ray enables seamless scaling of workloads from a laptop to a large cluster. While Ray
+works out of the box on single machines with just a call to ``ray.init``, to run Ray
+applications on multiple nodes you must first *deploy a Ray cluster*.
+
+A Ray cluster is a set of worker nodes connected to a common :ref:`Ray head node <cluster-head-node>`.
+Ray clusters can be fixed-size, or they may :ref:`autoscale up and down <cluster-autoscaler>` according
+to the resources requested by applications running on the cluster.
+
+Where can I deploy Ray clusters?
+--------------------------------
+
+Ray provides native cluster deployment support on the following technology stacks:
+
+* On :ref:`AWS and GCP <cloud-vm-index>`. Community-supported Azure and Aliyun integrations also exist.
+* On :ref:`Kubernetes <kuberay-index>`, via the officially supported KubeRay project.
+
+Advanced users may want to :ref:`deploy Ray manually <on-prem>`
+or onto :ref:`platforms not listed here <ref-cluster-setup>`.
+
+What's next?
+------------
+
+.. panels::
+    :container: text-center
+    :column: col-lg-6 px-3 py-2
+    :card:
+
+    **I want to learn key Ray cluster concepts**
+    ^^^
+    Understand the key concepts and main ways of interacting with a Ray cluster.
+
+    +++
+    .. link-button:: cluster-key-concepts
+        :type: ref
+        :text: Learn Key Concepts
+        :classes: btn-outline-info btn-block
+
+    ---
+
+    **I want to run Ray on Kubernetes**
+    ^^^
+    Deploy a Ray application to a Kubernetes cluster. You can run the tutorial on a
+    Kubernetes cluster or on your laptop via KinD.
+
+    +++
+    .. link-button:: kuberay-quickstart
+        :type: ref
+        :text: Get Started with Ray on Kubernetes
+        :classes: btn-outline-info btn-block
+
+    ---
+
+    **I want to run Ray on a cloud provider**
+    ^^^
+    Take a sample application designed to run on a laptop and scale it up in the
+    cloud. Access to an AWS or GCP account is required.
+
+    +++
+    .. link-button:: vm-cluster-quick-start
+        :type: ref
+        :text: Get Started with Ray on VMs
+        :classes: btn-outline-info btn-block
+
+    ---
+
+    **I want to run my application on an existing Ray cluster**
+    ^^^
+    Guide to submitting applications as Jobs to existing Ray clusters.
+
+    +++
+    .. link-button:: jobs-quickstart
+        :type: ref
+        :text: Job Submission
+        :classes: btn-outline-info btn-block
+
+.. include:: /_includes/clusters/announcement_bottom.rst
--- a/doc/source/cluster/guide.rst
+++ b/doc/source/cluster/guide.rst
@ -1,312 +0,0 @@
-.. include:: we_are_hiring.rst
-
-.. _deployment-guide:
-
-Cluster Deployment Guide
-========================
-
-This page provides an overview of how to deploy a multi-node Ray cluster, including how to:
-
-* Launch the cluster.
-* Set up the autoscaler.
-* Deploy a Ray application.
-* Monitor a multi-node cluster.
-* Best practices for setting up large Ray clusters.
-
-Launching a Ray cluster
-----------------------
-
-There 2 recommended ways of launching a Ray cluster are via:
-
-1. :ref:`The cluster launcher <cluster-cloud>`
-2. :ref:`The kubernetes operator <Ray-operator>`
-
-Cluster Launcher
-^^^^^^^^^^^^^^^^
-
-The goal of :ref:`the cluster launcher <cluster-cloud>` is to make it easy to deploy a Ray cluster on
-any cloud. It will:
-
-* provision a new instance/machine using the cloud provider's SDK.
-* execute shell commands to set up Ray with the provided options.
-* (optionally) run any custom, user defined setup commands.  This can be useful for setting environment variables and installing packages.  (To dynamically set up environments after the cluster has been deployed, you can use :ref:`Runtime Environments<runtime-environments>`.)
-* Initialize the Ray cluster.
-* Deploy an autoscaler process.
-
-Kubernetes Operator
-^^^^^^^^^^^^^^^^^^^
-
-The goal of the :ref:`Ray Kubernetes Operator <Ray-operator>` is to make it easy
-to deploy a Ray cluster on an existing Kubernetes cluster.
-
-To simplify Operator configuration, Ray provides a :ref:`a Helm chart <Ray-helm>`.
-Installing the Helm chart will create an Operator Deployment.
-The Operator manages autoscaling Ray clusters; each Ray node runs in its own K8s Pod.
-
-.. _deployment-guide-autoscaler:
-
-Autoscaling with Ray
--------------------
-
-Ray is designed to support highly elastic workloads which are most efficient on
-an autoscaling cluster. At a high level, the autoscaler attempts to
-launch/terminate nodes in order to ensure that workloads have sufficient
-resources to run, while minimizing the idle resources.
-
-It does this by taking into consideration:
-
-* User specified hard limits (min/max workers).
-* User specified node types (nodes in a Ray cluster do _not_ have to be
-  homogenous).
-* Information from the Ray core's scheduling layer about the current resource
-  usage/demands of the cluster.
-* Programmatic autoscaling hints.
-
-Take a look at :ref:`the cluster reference <cluster-config>` to learn more
-about configuring the autoscaler.
-
-
-How does it work?
-^^^^^^^^^^^^^^^^^
-
-The Ray Cluster Launcher will automatically enable a load-based autoscaler. The
-autoscaler resource demand scheduler will look at the pending tasks, actors,
-and placement groups resource demands from the cluster, and try to add the
-minimum list of nodes that can fulfill these demands. Autoscaler uses a simple 
-binpacking algorithm to binpack the user demands into
-the available cluster resources. The remaining unfulfilled demands are placed
-on the smallest list of nodes that satisfies the demand while maximizing
-utilization (starting from the smallest node).
-
-**Downscaling**: When worker nodes are
-idle (without active Tasks or Actors running on it) 
-for more than :ref:`idle_timeout_minutes
-<cluster-configuration-idle-timeout-minutes>`, they are subject to
-removal from the cluster. But there are two important additional conditions
-to note: 
-
-* The head node is never removed unless the cluster is torn down.
-* If the Ray Object Store is used, and a Worker node still holds objects (including spilled objects on disk), it won't be removed.
-
-
-
-**Here is "A Glimpse into the Ray Autoscaler" and how to debug/monitor your cluster:**
-
-2021-19-01 by Ameer Haj-Ali, Anyscale Inc.
-
-.. youtube:: BJ06eJasdu4
-
-
-Deploying an application
------------------------
-
-To submit an application to the Ray cluster, use the Ray :ref:`Job submission interface <jobs-overview>`.
-
-.. code:: bash
-
-  export RAY_ADDRESS=<your_cluster_address>:8265
-  ray job submit ... -- "python script.py"
-
-
-To interactively connect to a Ray cluster, connect via the :ref:`Ray Client<ray-client>`.
-
-.. code-block:: python
-
-  # outside python, set the ``RAY_ADDRESS`` environment variable to the address of the Ray client server
-  ray.init("ray://<host>:<port>")
-
-
-:ref:`Learn more about setting up the Ray client server here <Ray-client>`.
-
-You can dynamically specify local files, Python packages, and environment variables for your
-application using :ref:`Runtime Environments <runtime-environments>`.
-
-.. note::
-
-  When deploying an application, the job will be killed if the driver
-  disconnects.
-
-  :ref:`A detached actor <actor-lifetimes>` can be used to avoid having a long running driver.
-
-Monitoring and observability
----------------------------
-
-Ray comes with 3 main observability features:
-
-1. :ref:`The dashboard <Ray-dashboard>`
-2. :ref:`ray status <monitor-cluster>`
-3. :ref:`Prometheus metrics <multi-node-metrics>`
-
-Monitoring the cluster via the dashboard
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-:ref:`The dashboard provides detailed information about the state of the cluster <Ray-dashboard>`,
-including the running jobs, actors, workers, nodes, etc.
-
-By default, the cluster launcher and operator will launch the dashboard, but
-not publicly expose it.
-
-If you launch your application via the cluster launcher, you can securely
-portforward local traffic to the dashboard via the ``ray dashboard`` command
-(which establishes an SSH tunnel). The dashboard will now be visible at
-``http://localhost:8265``.
-
-The Kubernetes Operator makes the dashboard available via a Service targeting the Ray head pod.
-You can :ref:`access the dashboard <ray-k8s-dashboard>` using ``kubectl port-forward``.
-
-
-Observing the autoscaler
-^^^^^^^^^^^^^^^^^^^^^^^^
-
-The autoscaler makes decisions by scheduling information, and programmatic
-information from the cluster. This information, along with the status of
-starting nodes, can be accessed via the ``ray status`` command.
-
-To dump the current state of a cluster launched via the cluster launcher, you
-can run ``ray exec cluster.yaml "Ray status"``.
-
-For a more "live" monitoring experience, it is recommended that you run ``ray
-status`` in a watch loop: ``ray exec cluster.yaml "watch -n 1 Ray status"``.
-
-With the kubernetes operator, you should replace ``ray exec cluster.yaml`` with
-``kubectl exec <head node pod>``.
-
-Prometheus metrics
-^^^^^^^^^^^^^^^^^^
-
-Ray is capable of producing prometheus metrics. When enabled, Ray produces some
-metrics about the Ray core, and some internal metrics by default. It also
-supports custom, user-defined metrics.
-
-These metrics can be consumed by any metrics infrastructure which can ingest
-metrics from the prometheus server on the head node of the cluster.
-
-:ref:`Learn more about setting up prometheus here. <multi-node-metrics>`
-
-Best practices for deploying large clusters
-------------------------------------------
-
-This section aims to document best practices for deploying Ray clusters at
-large scale.
-
-Networking configuration
-^^^^^^^^^^^^^^^^^^^^^^^^
-
-End users should only need to directly interact with the head node of the
-cluster. In particular, there are 2 services which should be exposed to users:
-
-1. The dashboard
-2. The Ray client server
-
-.. note::
-
-  While users only need 2 ports to connect to a cluster, the nodes within a
-  cluster require a much wider range of ports to communicate.
-
-  See :ref:`Ray port configuration <Ray-ports>` for a comprehensive list.
-
-  Applications (such as :ref:`Ray Serve <Rayserve>`) may also require
-  additional ports to work properly.
-
-System configuration
-^^^^^^^^^^^^^^^^^^^^
-
-There are a few system level configurations that should be set when using Ray
-at a large scale.
-
-* Make sure ``ulimit -n`` is set to at least 65535. Ray opens many direct
-  connections between worker processes to avoid bottlenecks, so it can quickly
-  use a large number of file descriptors.
-* Make sure ``/dev/shm`` is sufficiently large. Most ML/RL applications rely
-  heavily on the plasma store. By default, Ray will try to use ``/dev/shm`` for
-  the object store, but if it is not large enough (i.e. ``--object-store-memory``
-  > size of ``/dev/shm``), Ray will write the plasma store to disk instead, which
-  may cause significant performance problems.
-* Use NVMe SSDs (or other high perforfmance storage) if possible. If
-  :ref:`object spilling <object-spilling>` is enabled Ray will spill objects to
-  disk if necessary. This is most commonly needed for data processing
-  workloads.
-
-Configuring the head node
-^^^^^^^^^^^^^^^^^^^^^^^^^
-
-In addition to the above changes, when deploying a large cluster, Ray's
-architecture means that the head node will have extra stress due to GCS.
-
-* Make sure the head node has sufficient bandwidth. The most heavily stressed
-  resource on the head node is outbound bandwidth. For large clusters (see the
-  scalability envelope), we recommend using machines networking characteristics
-  at least as good as an r5dn.16xlarge on AWS EC2.
-* Set ``resources: {"CPU": 0}`` on the head node. (For Ray clusters deployed using Helm,
-  set ``rayResources: {"CPU": 0}``.) Due to the heavy networking
-  load (and the GCS and dashboard processes), we recommend setting the number of
-  CPUs to 0 on the head node to avoid scheduling additional tasks on it.
-
-Configuring the autoscaler
-^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-For large, long running clusters, there are a few parameters that can be tuned.
-
-* Ensure your quotas for node types are set correctly.
-* For long running clusters, set the ``AUTOSCALER_MAX_NUM_FAILURES`` environment
-  variable to a large number (or ``inf``) to avoid unexpected autoscaler
-  crashes. The variable can be set by prepending \ ``export AUTOSCALER_MAX_NUM_FAILURES=inf;``
-  to the head node's Ray start command.
-  (Note: you may want a separate mechanism to detect if the autoscaler
-  errors too often).
-* For large clusters, consider tuning ``upscaling_speed`` for faster
-  autoscaling.
-
-Picking nodes
-^^^^^^^^^^^^^
-
-Here are some tips for how to set your ``available_node_types`` for a cluster,
-using AWS instance types as a concrete example.
-
-General recommendations with AWS instance types:
-
-**When to use GPUs**
-
-* If you’re using some RL/ML framework
-* You’re doing something with tensorflow/pytorch/jax (some framework that can
-  leverage GPUs well)
-
-**What type of GPU?**
-
-* The latest gen GPU is almost always the best bang for your buck (p3 > p2, g4
-  > g3), for most well designed applications the performance outweighs the
-  price (the instance price may be higher, but you’ll use the instance for less
-  time.
-* You may want to consider using older instances if you’re doing dev work and
-  won’t actually fully utilize the GPUs though.
-* If you’re doing training (ML or RL), you should use a P instance. If you’re
-  doing inference, you should use a G instance. The difference is
-  processing:VRAM ratio (training requires more memory).
-
-**What type of CPU?**
-
-* Again stick to the latest generation, they’re typically cheaper and faster.
-* When in doubt use M instances, they have typically have the highest
-  availability.
-* If you know your application is memory intensive (memory utilization is full,
-  but cpu is not), go with an R instance
-* If you know your application is CPU intensive go with a C instance
-* If you have a big cluster, make the head node an instance with an n (r5dn or
-  c5n)
-
-**How many CPUs/GPUs?**
-
-* Focus on your CPU:GPU ratio first and look at the utilization (Ray dashboard
-  should help with this). If your CPU utilization is low add GPUs, or vice
-  versa.
-* The exact ratio will be very dependent on your workload.
-* Once you find a good ratio, you should be able to scale up and and keep the
-  same ratio.
-* You can’t infinitely scale forever. Eventually, as you add more machines your
-  performance improvements will become sub-linear/not worth it. There may not
-  be a good one-size fits all strategy at this point.
-
-.. note::
-
-   If you're using RLlib, check out :ref:`the RLlib scaling guide
-   <rllib-scaling-guide>` for RLlib specific recommendations.
--- a/doc/source/cluster/images/ray-cluster.svg
+++ b/doc/source/cluster/images/ray-cluster.svg
--- a/doc/source/cluster/images/yarn-job.png
+++ b/doc/source/cluster/images/yarn-job.png
--- a/doc/source/cluster/index.rst
+++ b/doc/source/cluster/index.rst
@ -1,85 +0,0 @@
-.. include:: /_includes/clusters/announcement.rst
-
-.. include:: we_are_hiring.rst
-
-.. _cluster-index:
-
-Ray Clusters Overview
-=====================
-
-What is a Ray cluster?
----------------------
-
-One of Ray's strengths is the ability to leverage multiple machines for
-distributed execution. Ray can, of course, be run on a single machine (and is
-done so often), but the real power is using Ray on a cluster of machines.
-
-Ray can automatically interact with the cloud provider to request or release
-instances. You can specify :ref:`a configuration <cluster-config>` to launch
-clusters on :ref:`AWS, GCP, Azure (community-maintained), Aliyun (community-maintained), on-premise, or even on
-your custom node provider <cluster-cloud>`. Ray can also be run on :ref:`Kubernetes <kuberay-index>` infrastructure.
-Your cluster can have a fixed size
-or :ref:`automatically scale up and down<cluster-autoscaler>` depending on the
-demands of your application.
-
-Where to go from here?
----------------------
-
-.. panels::
-    :container: text-center
-    :column: col-lg-6 px-2 py-2
-    :card:
-
-    **Quick Start** 
-    ^^^
-
-    In this quick start tutorial you will take a sample application designed to
-    run on a laptop and scale it up in the cloud.
-
-    +++
-    .. link-button:: ref-cluster-quick-start
-        :type: ref
-        :text: Ray Clusters Quick Start
-        :classes: btn-outline-info btn-block
-    ---
-
-    **Key Concepts**
-    ^^^
-
-    Understand the key concepts behind Ray Clusters. Learn about the main
-    concepts and the different ways to interact with a cluster.
-
-    +++
-    .. link-button:: cluster-key-concepts
-        :type: ref
-        :text: Learn Key Concepts
-        :classes: btn-outline-info btn-block
-    ---
-
-    **Deployment Guide**
-    ^^^
-
-    Learn how to set up a distributed Ray cluster and run your workloads on it.
-
-    +++
-    .. link-button:: ref-deployment-guide
-        :type: ref
-        :text: Deploy on a Ray Cluster
-        :classes: btn-outline-info btn-block
-    ---
-
-    **API**
-    ^^^
-
-    Get more in-depth information about the various APIs to interact with Ray
-    Clusters, including the :ref:`Ray cluster config YAML and CLI<cluster-config>`,
-    the :ref:`Ray Client API<ray-client>` and the
-    :ref:`Ray job submission API<ray-job-submission-api-ref>`.
-
-    +++
-    .. link-button:: ref-cluster-api
-        :type: ref
-        :text: Read the API Reference
-        :classes: btn-outline-info btn-block
-
-.. include:: /_includes/clusters/announcement_bottom.rst
--- a/doc/source/cluster/job-submission.rst
+++ b/doc/source/cluster/job-submission.rst
@ -1,382 +0,0 @@
-.. _jobs-overview:
-
-==================
-Ray Job Submission
-==================
-
-.. note::
-
-    This component is in **beta**.  APIs may change before becoming stable.  This feature requires a full installation of Ray using ``pip install "ray[default]"``.
-
-Ray Job submission is a mechanism to submit locally developed and tested applications to a remote Ray cluster. It simplifies the experience of packaging, deploying, and managing a Ray application.
-
-
-
-Jump to the :ref:`API Reference<ray-job-submission-api-ref>`, or continue reading for a quick overview.
-
-Concepts
--------
-
- **Job**: A Ray application submitted to a Ray cluster for execution. Consists of (1) an entrypoint command and (2) a :ref:`runtime environment<runtime-environments>`, which may contain file and package dependencies.
-
- **Job Lifecycle**: When a job is submitted, it runs once to completion or failure. Retries or different runs with different parameters should be handled by the submitter. Jobs are bound to the lifetime of a Ray cluster, so if the cluster goes down, all running jobs on that cluster will be terminated.
-
- **Job Manager**: An entity external to the Ray cluster that manages the lifecycle of a job (scheduling, killing, polling status, getting logs, and persisting inputs/outputs), and potentially also manages the lifecycle of Ray clusters. Can be any third-party framework with these abilities, such as Apache Airflow or Kubernetes Jobs.
-
-Quick Start Example
-------------------
-
-Let's start with a sample job that can be run locally. The following script uses Ray APIs to increment a counter and print its value, and print the version of the ``requests`` module it's using:
-
-.. code-block:: python
-
-    # script.py
-
-    import ray
-    import requests
-
-    ray.init()
-
-    @ray.remote
-    class Counter:
-        def __init__(self):
-            self.counter = 0
-
-        def inc(self):
-            self.counter += 1
-
-        def get_counter(self):
-            return self.counter
-
-    counter = Counter.remote()
-
-    for _ in range(5):
-        ray.get(counter.inc.remote())
-        print(ray.get(counter.get_counter.remote()))
-
-    print(requests.__version__)
-
-Put this file in a local directory of your choice, with filename ``script.py``, so your working directory will look like:
-
-.. code-block:: bash
-
-  | your_working_directory ("./")
-  | ├── script.py
-
-
-Next, start a local Ray cluster:
-
-.. code-block:: bash
-
-   ❯ ray start --head
-    Local node IP: 127.0.0.1
-    INFO services.py:1360 -- View the Ray dashboard at http://127.0.0.1:8265
-
-Note the address and port returned in the terminal---this will be where we submit job requests to, as explained further in the examples below.  If you do not see this, ensure the Ray Dashboard is installed by running :code:`pip install "ray[default]"`.
-
-At this point, the job is ready to be submitted by one of the :ref:`Ray Job APIs<ray-job-apis>`.
-Continue on to see examples of running and interacting with this sample job. 
-
-.. _ray-job-apis:
-
-Ray Job Submission APIs
-----------------------
-
-Ray provides three APIs for job submission: 
-
-* A :ref:`command line interface<ray-job-cli>`, the easiest way to get started.
-* A :ref:`Python SDK<ray-job-sdk>`, the recommended way to submit jobs programmatically.
-* An :ref:`HTTP REST API<ray-job-rest-api>`. Both the CLI and SDK call into the REST API under the hood.
-
-All three APIs for job submission share the following key inputs:
-
-* **Entrypoint**: The shell command to run the job.
-
-    * Example: :code:`python my_ray_script.py`
-    * Example: :code:`echo hello`
-
-* **Runtime Environment**: Specifies files, packages, and other dependencies for your job.  See :ref:`Runtime Environments<runtime-environments>` for details.
-
-    * Example: ``{working_dir="/data/my_files", pip=["requests", "pendulum==2.1.2"]}``
-    * Of special note: the field :code:`working_dir` specifies the files your job needs to run. The entrypoint command will be run in the remote cluster's copy of the `working_dir`, so for the entrypoint ``python my_ray_script.py``, the file ``my_ray_script.py`` must be in the directory specified by ``working_dir``. 
-
-        * If :code:`working_dir` is a local directory: It will be automatically zipped and uploaded to the target Ray cluster, then unpacked to where your submitted application runs.  This option has a size limit of 100 MB and is recommended for rapid iteration and experimentation.
-        * If :code:`working_dir` is a remote URI hosted on S3, GitHub or others: It will be downloaded and unpacked to where your submitted application runs.  This option has no size limit and is recommended for production use.  For details, see :ref:`remote-uris`.
-
-
-.. _ray-job-cli:
-
-CLI
-^^^
-
-The easiest way to get started with Ray job submission is to use the Job Submission CLI. 
-
-Jump to the :ref:`API Reference<ray-job-submission-cli-ref>`, or continue reading for a walkthrough.
-
-
-Using the CLI on a local cluster
-""""""""""""""""""""""""""""""""
-
-First, start a local Ray cluster (e.g. with ``ray start --head``) and open a terminal (on the head node, which is your local machine).  
-
-Next, set the :code:`RAY_ADDRESS` environment variable:
-
-.. code-block:: bash
-
-    export RAY_ADDRESS="http://127.0.0.1:8265"
-
-This tells the jobs CLI how to find your Ray cluster.  Here we are specifying port ``8265`` on the head node, the port that the Ray Dashboard listens on.  
-(Note that this port is different from the port used to connect to the cluster via :ref:`Ray Client <ray-client>`, which is ``10001`` by default.)
-
-Now you are ready to use the CLI.  
-Here are some examples of CLI commands from the Quick Start example and their output:
-
-.. code-block::
-
-    ❯ ray job submit --runtime-env-json='{"working_dir": "./", "pip": ["requests==2.26.0"]}' -- python script.py
-    2021-12-01 23:04:52,672	INFO cli.py:25 -- Creating JobSubmissionClient at address: http://127.0.0.1:8265
-    2021-12-01 23:04:52,809	INFO sdk.py:144 -- Uploading package gcs://_ray_pkg_bbcc8ca7e83b4dc0.zip.
-    2021-12-01 23:04:52,810	INFO packaging.py:352 -- Creating a file package for local directory './'.
-    2021-12-01 23:04:52,878	INFO cli.py:105 -- Job submitted successfully: raysubmit_RXhvSyEPbxhcXtm6.
-    2021-12-01 23:04:52,878	INFO cli.py:106 -- Query the status of the job using: `ray job status raysubmit_RXhvSyEPbxhcXtm6`.
-
-    ❯ ray job status raysubmit_RXhvSyEPbxhcXtm6
-    2021-12-01 23:05:00,356	INFO cli.py:25 -- Creating JobSubmissionClient at address: http://127.0.0.1:8265
-    2021-12-01 23:05:00,371	INFO cli.py:127 -- Job status for 'raysubmit_RXhvSyEPbxhcXtm6': PENDING.
-    2021-12-01 23:05:00,371	INFO cli.py:129 -- Job has not started yet, likely waiting for the runtime_env to be set up.
-
-    ❯ ray job status raysubmit_RXhvSyEPbxhcXtm6
-    2021-12-01 23:05:37,751	INFO cli.py:25 -- Creating JobSubmissionClient at address: http://127.0.0.1:8265
-    2021-12-01 23:05:37,764	INFO cli.py:127 -- Job status for 'raysubmit_RXhvSyEPbxhcXtm6': SUCCEEDED.
-    2021-12-01 23:05:37,764	INFO cli.py:129 -- Job finished successfully.
-
-    ❯ ray job logs raysubmit_RXhvSyEPbxhcXtm6
-    2021-12-01 23:05:59,026	INFO cli.py:25 -- Creating JobSubmissionClient at address: http://127.0.0.1:8265
-    2021-12-01 23:05:23,037	INFO worker.py:851 -- Connecting to existing Ray cluster at address: 127.0.0.1:6379
-    (pid=runtime_env) 2021-12-01 23:05:23,212	WARNING conda.py:54 -- Injecting /Users/jiaodong/Workspace/ray/python to environment /tmp/ray/session_2021-12-01_23-04-44_771129_7693/runtime_resources/conda/99305e1352b2dcc9d5f38c2721c7c1f1cc0551d5 because _inject_current_ray flag is on.
-    (pid=runtime_env) 2021-12-01 23:05:23,212	INFO conda.py:328 -- Finished setting up runtime environment at /tmp/ray/session_2021-12-01_23-04-44_771129_7693/runtime_resources/conda/99305e1352b2dcc9d5f38c2721c7c1f1cc0551d5
-    (pid=runtime_env) 2021-12-01 23:05:23,213	INFO working_dir.py:85 -- Setup working dir for gcs://_ray_pkg_bbcc8ca7e83b4dc0.zip
-    1
-    2
-    3
-    4
-    5
-    2.26.0
-
-    ❯ ray job list
-    {'raysubmit_AYhLMgDJ6XBQFvFP': JobInfo(status='SUCCEEDED', message='Job finished successfully.', error_type=None, start_time=1645908622, end_time=1645908623, metadata={}, runtime_env={}),
-    'raysubmit_su9UcdUviUZ86b1t': JobInfo(status='SUCCEEDED', message='Job finished successfully.', error_type=None, start_time=1645908669, end_time=1645908670, metadata={}, runtime_env={})}
-
-.. warning::
-
-    When using the CLI, do not wrap the entrypoint command in quotes.  For example, use 
-    ``ray job submit --working_dir="." -- python script.py`` instead of ``ray job submit --working_dir="." -- "python script.py"``.
-    Otherwise you may encounter the error ``/bin/sh: 1: python script.py: not found``.
-
-.. tip::
-
-    If your job is stuck in `PENDING`, the runtime environment installation may be stuck.
-    (For example, the `pip` installation or `working_dir` download may be stalled due to internet issues.)
-    You can check the installation logs at `/tmp/ray/session_latest/logs/runtime_env_setup-*.log` for details.
-
-Using the CLI on a remote cluster
-"""""""""""""""""""""""""""""""""
-
-Above, we ran the "Quick Start" example on a local Ray cluster.  When connecting to a `remote` cluster via the CLI, you need to be able to access the Ray Dashboard port of the cluster over HTTP.
-
-One way to do this is to port forward ``127.0.0.1:8265`` on your local machine to ``127.0.0.1:8265`` on the head node. 
-If you started your remote cluster with the :ref:`Ray Cluster Launcher <ref-cluster-quick-start>`, then the port forwarding can be set up automatically using the ``ray dashboard`` command (see :ref:`monitor-cluster` for details).
-
-To use this, run the following command on your local machine, where ``cluster.yaml`` is the configuration file you used to launch your cluster:
-
-.. code-block:: bash
-
-    ray dashboard cluster.yaml
-
-Once this is running, check that you can view the Ray Dashboard in your local browser at ``http://127.0.0.1:8265``.  
-
-Next, set the :code:`RAY_ADDRESS` environment variable:
-
-.. code-block:: bash
-
-    export RAY_ADDRESS="http://127.0.0.1:8265"
-
-(Note that this port is different from the port used to connect to the cluster via :ref:`Ray Client <ray-client>`, which is ``10001`` by default.)
-
-Now you will be able to use the Jobs CLI on your local machine as in the example above to interact with your remote Ray cluster.
-
-Using the CLI on Kubernetes
-"""""""""""""""""""""""""""
-
-The instructions above still apply, but you can achieve the dashboard port forwarding using ``kubectl port-forward``:
-https://kubernetes.io/docs/tasks/access-application-cluster/port-forward-access-application-cluster/
-
-Alternatively, you can set up Ingress to the dashboard port of the cluster over HTTP: https://kubernetes.io/docs/concepts/services-networking/ingress/
-
-
-.. _ray-job-sdk:
-
-Python SDK
-^^^^^^^^^^
-
-The Job Submission Python SDK is the recommended way to submit jobs programmatically.  Jump to the :ref:`API Reference<ray-job-submission-sdk-ref>`, or continue reading for a quick overview.
-
-SDK calls are made via a ``JobSubmissionClient`` object.  To initialize the client, provide the Ray cluster head node address and the port used by the Ray Dashboard (``8265`` by default). For this example, we'll use a local Ray cluster, but the same example will work for remote Ray cluster addresses.
-
-.. code-block:: python
-
-    from ray.job_submission import JobSubmissionClient
-
-    # If using a remote cluster, replace 127.0.0.1 with the head node's IP address.
-    client = JobSubmissionClient("http://127.0.0.1:8265")
-
-Then we can submit our application to the Ray cluster via the Job SDK.
-
-.. code-block:: python
-
-    job_id = client.submit_job(
-        # Entrypoint shell command to execute
-        entrypoint="python script.py",
-        # Runtime environment for the job, specifying a working directory and pip package
-        runtime_env={
-            "working_dir": "./",
-            "pip": ["requests==2.26.0"]
-        }
-    )
-
-.. tip::
-
-    By default, the Ray job server will generate a new ``job_id`` and return it, but you can alternatively choose a unique ``job_id`` string first and pass it into :code:`submit_job`.
-    In this case, the Job will be executed with your given id, and will throw an error if the same ``job_id`` is submitted more than once for the same Ray cluster.
-
-Now we can write a simple polling loop that checks the job status until it reaches a terminal state (namely, ``JobStatus.SUCCEEDED``, ``JobStatus.STOPPED``, or ``JobStatus.FAILED``), and gets the logs at the end.
-We expect to see the numbers printed from our actor, as well as the correct version of the :code:`requests` module specified in the ``runtime_env``.
-
-.. code-block:: python
-
-    from ray.job_submission import JobStatus
-    import time
-
-    def wait_until_finish(job_id):
-        start = time.time()
-        timeout = 5
-        while time.time() - start <= timeout:
-            status = client.get_job_status(job_id)
-            print(f"status: {status}")
-            if status in {JobStatus.SUCCEEDED, JobStatus.STOPPED, JobStatus.FAILED}:
-                break
-            time.sleep(1)
-
-
-    wait_until_finish(job_id)
-    logs = client.get_job_logs(job_id)
-
-The output should be as follows:
-
-.. code-block:: bash
-
-    status: JobStatus.PENDING
-    status: JobStatus.RUNNING
-    status: JobStatus.SUCCEEDED
-
-    1
-    2
-    3
-    4
-    5
-
-    2.26.0
-
-.. tip::
-
-    Instead of a local directory (``"./"`` in this example), you can also specify remote URIs for your job's working directory, such as S3 buckets or Git repositories. See :ref:`remote-uris` for details.
-
-A submitted job can be stopped by the user before it finishes executing.
-
-.. code-block:: python
-
-    job_id = client.submit_job(
-        # Entrypoint shell command to execute
-        entrypoint="python -c 'import time; time.sleep(60)'",
-        runtime_env={}
-    )
-    wait_until_finish(job_id)
-    client.stop_job(job_id)
-    wait_until_finish(job_id)
-    logs = client.get_job_logs(job_id)
-
-To get information about all jobs, call ``client.list_jobs()``.  This returns a ``Dict[str, JobInfo]`` object mapping Job IDs to their information.
-
-For full details, see the :ref:`API Reference<ray-job-submission-sdk-ref>`.
-
-
-.. _ray-job-rest-api:
-
-REST API
-^^^^^^^^
-
-Under the hood, both the Python SDK and the CLI make HTTP calls to the job server running on the Ray head node. You can also directly send requests to the corresponding endpoints via HTTP if needed:
-
-**Submit Job**
-
-.. code-block:: python
-
-    import requests
-    import json
-    import time
-
-    resp = requests.post(
-        "http://127.0.0.1:8265/api/jobs/",
-        json={
-            "entrypoint": "echo hello",
-            "runtime_env": {},
-            "job_id": None,
-            "metadata": {"job_submission_id": "123"}
-        }
-    )
-    rst = json.loads(resp.text)
-    job_id = rst["job_id"]
-
-**Query and poll for Job status**
-
-.. code-block:: python
-
-    start = time.time()
-    while time.time() - start <= 10:
-        resp = requests.get(
-            "http://127.0.0.1:8265/api/jobs/<job_id>"
-        )
-        rst = json.loads(resp.text)
-        status = rst["status"]
-        print(f"status: {status}")
-        if status in {JobStatus.SUCCEEDED, JobStatus.STOPPED, JobStatus.FAILED}:
-            break
-        time.sleep(1)
-
-**Query for logs**
-
-.. code-block:: python
-
-    resp = requests.get(
-        "http://127.0.0.1:8265/api/jobs/<job_id>/logs"
-    )
-    rst = json.loads(resp.text)
-    logs = rst["logs"]
-
-**List all jobs**
-
-.. code-block:: python
-
-    resp = requests.get(
-        "http://127.0.0.1:8265/api/jobs/"
-    )
-    print(resp.json())
-    # {"job_id": {"metadata": ..., "status": ..., "message": ...}, ...}
-
-
-Job Submission Architecture
----------------------------
-
-The following diagram shows the underlying structure and steps for each submitted job.
-
-.. image:: https://raw.githubusercontent.com/ray-project/images/master/docs/job/job_submission_arch_v2.png
--- a/doc/source/cluster/key-concepts.rst
+++ b/doc/source/cluster/key-concepts.rst
@ -1,107 +1,78 @@
-.. include:: we_are_hiring.rst
-
-.. _cluster-key-concepts:
-
 Key Concepts
 ============

-Cluster
-------
+.. _cluster-key-concepts:

-A Ray cluster is a set of one or more nodes that are running Ray and share the
-same :ref:`head node<cluster-node-types>`.
+This page introduces key concepts for Ray clusters:

-.. _cluster-node-types:
+.. contents::
+    :local:

-Node types
----------
+Ray Cluster
+-----------
+A Ray cluster consists of a single :ref:`head node <cluster-head-node>`
+and any number of connected :ref:`worker nodes <cluster-worker-nodes>`:

-A Ray cluster consists of a :ref:`head node<cluster-head-node>` and a set of
-:ref:`worker nodes<cluster-worker-node>`.
-
-.. image:: ray-cluster.jpg
+.. figure:: images/ray-cluster.svg
    :align: center
    :width: 600px
+    
+    *A Ray cluster with two worker nodes. Each node runs Ray helper processes to
+    facilitate distributed scheduling and memory management. The head node runs
+    additional control processes (highlighted in blue).*
+
+The number of worker nodes may be *autoscaled* with application demand as specified
+by your Ray cluster configuration. The head node runs the :ref:`autoscaler <cluster-autoscaler>`.
+
+.. note::
+    Ray nodes are implemented as pods when :ref:`running on Kubernetes <kuberay-index>`.
+
+Users can submit jobs for execution on the Ray cluster, or can interactively use the
+cluster by connecting to the head node and running `ray.init`. See
+:ref:`Ray Jobs <jobs-quickstart>` for more information.

 .. _cluster-head-node:

-Head node
-~~~~~~~~~
+Head Node
+---------
+Every Ray cluster has one node which is designated as the *head node* of the cluster.
+The head node is identical to other worker nodes, except that it also runs singleton processes responsible for cluster management such as the
+:ref:`autoscaler <cluster-autoscaler>` and the Ray driver processes
+:ref:`which run Ray jobs <cluster-clients-and-jobs>`. Ray may schedule
+tasks and actors on the head node just like any other worker node, unless configured otherwise.

-The head node is the first node started by the
-:ref:`Ray cluster launcher<cluster-launcher>` when trying to launch a Ray
-cluster. Among other things, the head node holds the :ref:`Global Control Store
-(GCS)<memory>` and runs the :ref:`autoscaler<cluster-autoscaler>`. Once the head
-node is started, it will be responsible for launching any additional
-:ref:`worker nodes<cluster-worker-node>`. The head node itself will also execute
-tasks and actors to utilize its capacity.
+.. _cluster-worker-nodes:

-.. _cluster-worker-node:
-
-Worker node
-~~~~~~~~~~~
-
-A worker node is any node in the Ray cluster that is not functioning as head node.
-Therefore, worker nodes are simply responsible for executing tasks and actors.
-When a worker node is launched, it will be given the address of the head node to
-form a cluster.
-
-.. _cluster-launcher:
-
-Cluster launcher
----------------
-
-The cluster launcher is a process responsible for bootstrapping the Ray cluster
-by launching the :ref:`head node<cluster-head-node>`. For more information on how
-to use the cluster launcher, refer to
-:ref:`cluster launcher CLI commands documentation<cluster-commands>` and the
-corresponding :ref:`documentation for the configuration file<cluster-config>`.
+Worker Node
+------------
+*Worker nodes* do not run any head node management processes, and serve only to run user code in Ray tasks and actors. They participate in distributed scheduling, as well as the storage and distribution of Ray objects in :ref:`cluster memory <memory>`.

 .. _cluster-autoscaler:

-Autoscaler
----------
+Autoscaling
+-----------

-The autoscaler is a process that runs on the :ref:`head node<cluster-head-node>`
-and is responsible for adding or removing :ref:`worker nodes<cluster-worker-node>`
-to meet the needs of the Ray workload while matching the specification in the
-:ref:`cluster config file<cluster-config>`. In particular, if the resource
-demands of the Ray workload exceed the current capacity of the cluster, the
-autoscaler will try to add nodes. Conversely, if a node is idle for long enough,
-the autoscaler will remove it from the cluster. To learn more about autoscaling,
-refer to the :ref:`Ray cluster deployment guide<deployment-guide-autoscaler>`.
+The *Ray autoscaler* is a process that runs on the :ref:`head node <cluster-head-node>` (or as a sidecar container in the head pod if :ref:`using Kubernetes <kuberay-index>`).
+When the resource demands of the Ray workload exceed the
+current capacity of the cluster, the autoscaler will try to increase the number of worker nodes. When worker nodes
+sit idle, the autoscaler will remove worker nodes from the cluster.

-Ray Client
----------
-The Ray Client is an API that connects a Python script to a remote Ray cluster.
-To learn more about the Ray Client, you can refer to the :ref:`documentation<ray-client>`.
+It is important to understand that the autoscaler only reacts to task and actor resource requests, and not application metrics or physical resource utilization.
+To learn more about autoscaling, refer to the user guides for Ray clusters on :ref:`VMs <cloud-vm-index>` and :ref:`Kubernetes <kuberay-index>`.

-Job submission
--------------
+.. _cluster-clients-and-jobs:

-Ray Job submission is a mechanism to submit locally developed and tested applications
-to a remote Ray cluster. It simplifies the experience of packaging, deploying,
-and managing a Ray application. To learn more about Ray jobs, refer to the
-:ref:`documentation<ray-job-submission-api-ref>`.
+Ray Jobs
+--------

-Cloud clusters
--------------
+The main method for running a workload on a Ray cluster is to use Ray Jobs.
+Ray Jobs enable users to submit locally developed-and-tested applications to a
+remote Ray cluster. Ray Job Submission simplifies the experience of packaging,
+deploying, and managing a Ray application.

-If you’re using AWS, GCP, Azure (community-maintained) or Aliyun (community-maintained), you can use the
-:ref:`Ray cluster launcher<cluster-launcher>` to launch cloud clusters, which
-greatly simplifies the cluster setup process.
+For interactive development, the following additional methods are available:

-Cluster managers
----------------
+* Directly running a script or notebook on any head or worker node.
+* Using the Ray Client to connect remotely to the cluster.

-You can simplify the process of managing Ray clusters using a number of popular
-cluster managers including :ref:`Kubernetes<kuberay-index>`,
-:ref:`YARN<ray-yarn-deploy>`, :ref:`Slurm<ray-slurm-deploy>` and :ref:`LSF<ray-LSF-deploy>`.
-
-Kubernetes (K8s) operator
-------------------------
-
-Deployments of Ray on Kubernetes are managed by the Ray Kubernetes Operator. The
-Ray Operator makes it easy to deploy clusters of Ray pods within a Kubernetes
-cluster. To learn more about the K8s operator, refer to
-the :ref:`documentation<kuberay-index>`.
+To learn how to run workloads on a Ray cluster, refer to the :ref:`Ray Jobs guide <jobs-overview>`.
--- a/doc/source/cluster/kuberay.md
+++ b/doc/source/cluster/kuberay.md
@ -1,40 +0,0 @@
-# Ray on Kubernetes
-(kuberay-index)=
-## Overview
-
-You can execute your distributed Ray programs on a Kubernetes cluster.
-
-The [KubeRay Operator](https://ray-project.github.io/kuberay/components/operator/) provides a Kubernetes-native
-interface for managing Ray clusters. Each Ray cluster consist of a head pod and collection of worker pods.
-Optional autoscaling support allows the KubeRay Operator to size your Ray clusters according to the requirements
-of your Ray workload, adding and removing Ray pods as needed.
-
-## Learn More
-
-The Ray docs present all the information you need to start running Ray workloads on Kubernetes.
-
-```{eval-rst}
-.. panels::
-    :container: text-center
-    :column: col-lg-12 p-2
-    :card:
-
-    **Getting started**
-    ^^^
-
-    Learn how to start a Ray cluster and deploy Ray applications on Kubernetes.
-
-    +++
-    .. link-button:: kuberay-quickstart
-        :type: ref
-        :text: Get Started with Ray on Kubernetes
-        :classes: btn-outline-info btn-block
-```
-## The KubeRay project
-
-Ray's Kubernetes support is developed at the [KubeRay GitHub repository](https://github.com/ray-project/kuberay), under the broader [Ray project](https://github.com/ray-project/).
-
- Visit the [KubeRay GitHub repo](https://github.com/ray-project/kuberay) to track progress, report bugs, propose new features, or contribute to
-the project.
- Check out the [KubeRay docs](https://ray-project.github.io/kuberay/) for further technical information, developer guides,
-and discussion of new and upcoming features.
--- a/doc/source/cluster/kuberay/config.md
+++ b/doc/source/cluster/kuberay/config.md
@ -1,9 +0,0 @@
-(kuberay-config)=
-
-# Configuration
-
-:::{warning}
-This page is under construction!
-:::
-
-Details on the key fields in the RayCluster CRD.
--- a/doc/source/cluster/kuberay/gpu.md
+++ b/doc/source/cluster/kuberay/gpu.md
@ -1,9 +0,0 @@
-(kuberay-gpu)=
-
-# Deploying GPU workloads
-
-:::{warning}
-This page is under construction!
-:::
-
-Details on how to use GPUs with Ray on Kubernetes.
--- a/doc/source/cluster/kuberay/k8s-cluster-setup.md
+++ b/doc/source/cluster/kuberay/k8s-cluster-setup.md
@ -1,17 +0,0 @@
-(kuberay-k8s-setup)=
-
-# Kubernetes cluster setup
-
-:::{warning}
-This page is under construction!
-:::
-
-Notes on how to set up Kubernetes infrastructure for Ray
-on the major cloud providers.
-Terraform configs could eventually be included here.
-
-## AWS
-
-## GCP
-
-## Azure
--- a/doc/source/cluster/kuberay/kuberay-vs-legacy.md
+++ b/doc/source/cluster/kuberay/kuberay-vs-legacy.md
@ -1,24 +0,0 @@
-(kuberay-vs-legacy)=
-
-# KubeRay vs. the legacy Ray Operator
-
-:::{warning}
-This page is under construction!
-:::
-
-## Discussion
-
-High-level comparison.
-
-### What is the same.
-
-High-level purpose. Style of interface.
-
-### What is different.
-
-Internal design, choice of language and framework, overall stability and scalability.
-Interface details.
-
-## Migration notes
-
-Migration instructions, configuration differences.
--- a/doc/source/cluster/kuberay/logging.md
+++ b/doc/source/cluster/kuberay/logging.md
@ -1,10 +0,0 @@
-(kuberay-logging)=
-
-# Logging
-
-:::{warning}
-This page is under construction!
-:::
-
-Notes on logging, including instructions for setting up fluentd.
-A guide to logging using fluentd.
--- a/doc/source/cluster/kuberay/ml-example.md
+++ b/doc/source/cluster/kuberay/ml-example.md
@ -1,10 +0,0 @@
-(kuberay-ml-example)=
-
-# Example machine learning workloads
-
-:::{warning}
-This page is under construction!
-:::
-
-At least one end-to-end example of an actual machine learning workload,
-preferably with GPUs, possibly engaging the autoscaling functionality.
--- a/doc/source/cluster/kuberay/networking.md
+++ b/doc/source/cluster/kuberay/networking.md
@ -1,10 +0,0 @@
-(kuberay-networking)=
-
-# Networking
-
-:::{warning}
-This page is under construction!
-:::
-
-How to expose services (client, serve, dashboard) to
-the internet. Ingress setup.
--- a/doc/source/cluster/kubernetes-advanced.rst
+++ b/doc/source/cluster/kubernetes-advanced.rst
@ -1,243 +0,0 @@
-:orphan:
-
-.. include:: we_are_hiring.rst
-
-.. _k8s-advanced:
-
-Ray Operator Advanced Configuration
-===================================
-This document covers configuration options and other details concerning autoscaling Ray clusters on Kubernetes.
-We recommend first reading this :ref:`introductory guide<ray-k8s-deploy>`.
-
-.. _helm-config:
-
-Helm chart configuration
------------------------
-This section discusses ``RayCluster`` configuration options exposed in the Ray Helm chart's `values.yaml`_ file.
-The default settings in ``values.yaml`` were chosen for the purposes of demonstration.
-For production use cases, the values should be modified. For example, you will probably want to increase Ray Pod resource requests.
-
-Setting custom chart values
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
-To configure Helm chart values, you can pass in a custom values ``yaml`` and/or set individual fields.
-
-.. code-block:: shell
-
-   # Pass in a custom values yaml.
-   $ helm install example-cluster -f custom_values.yaml ./ray
-   # Set custom values on the command line.
-   $ helm install example-cluster --set image=rayproject/ray:1.2.0 ./ray
-
-Refer to the `Helm docs`_ for more information.
-
-Ray cluster configuration
-~~~~~~~~~~~~~~~~~~~~~~~~~
-A :ref:`Ray cluster<cluster-index>` consists of a head node and a collection of worker nodes.
-When deploying Ray on Kubernetes, each Ray node runs in its own Kubernetes Pod.
-The ``podTypes`` field of ``values.yaml`` represents the pod configurations available for use as nodes in the Ray cluster.
-The key of each ``podType`` is a user-defined name. The field ``headPodType`` identifies the name of the ``podType`` to use for the Ray head node.
-The rest of the ``podTypes`` are used as configuration for the Ray worker nodes.
-
-Each ``podType`` specifies ``minWorkers`` and ``maxWorkers`` fields.
-The autoscaler will try to maintain at least ``minWorkers`` of the ``podType`` and can scale up to
-``maxWorkers`` according to the resource demands of the Ray workload. A common pattern is to specify ``minWorkers`` = ``maxWorkers`` = 0
-for the head ``podType``; this signals that the ``podType`` is to be used only for the head node.
-You can use `helm upgrade`_ to adjust the fields ``minWorkers`` and ``maxWorkers`` without :ref:`restarting<k8s-restarts>` the Ray cluster.
-
-The fields ``CPU``, ``GPU``, ``memory``, and ``nodeSelector`` configure the Kubernetes ``PodSpec`` to use for nodes
-of the ``podType``. The ``image`` field determines the Ray container image used by all nodes in the Ray cluster.
-
-The ``rayResources`` field of each ``podType`` can be used to signal the presence of custom resources to Ray.
-To schedule Ray tasks and actors that use custom hardware resources, ``rayResources`` can be used in conjunction with
-``nodeSelector``:
-
- Use ``nodeSelector`` to constrain workers of a ``podType`` to run on a Kubernetes Node with specialized hardware (e.g. a particular GPU accelerator.)
- Signal availability of the hardware for that ``podType`` with ``rayResources: {"custom_resource": 3}``.
- Schedule a Ray task or actor to use that resource with ``@ray.remote(resources={"custom_resource": 1})``.
-
-By default, the fields ``CPU``, ``GPU``, and ``memory`` are used to configure cpu, gpu, and memory resources advertised to Ray.
-However, ``rayResources`` can be used to override this behavior. For example, ``rayResources: {"CPU": 0}`` can be set for head podType,
-to :ref:``avoid scheduling tasks on the Ray head``.
-
-Refer to the documentation in `values.yaml`_ for more details.
-
-.. note::
-
-  If your application could benefit from additional configuration options in the Ray Helm chart,
-  (e.g. exposing more PodSpec fields), feel free to open a `feature request`_ on
-  the Ray GitHub or a `discussion thread`_ on the Ray forums.
-
-  For complete configurability, it is also possible launch a Ray cluster :ref:`without the Helm chart<no-helm>`
-  or to modify the Helm chart.
-
-.. note::
-
-  Some things to keep in mind about the scheduling of Ray worker pods and Ray tasks/actors:
-
-  1. The Ray Autoscaler executes scaling decisions by sending pod creation requests to the Kubernetes API server.
-  If your Kubernetes cluster cannot accomodate more worker pods of a given ``podType``, requested pods will enter
-  a ``Pending`` state until the pod can be scheduled or a `timeout`_ expires.
-
-  2. If a Ray task requests more resources than available in any ``podType``, the Ray task cannot be scheduled.
-
-
-Running multiple Ray clusters
-----------------------------
-The Ray Operator can manage multiple Ray clusters running within a single Kubernetes cluster.
-Since Helm does not support sharing resources between different releases, an additional Ray cluster
-must be launched in a Helm release separate from the release used to launch the Operator.
-
-To enable launching with multiple Ray Clusters, the Ray Helm chart includes two flags:
-
- ``operatorOnly``: Start the Operator without launching a Ray cluster.
- ``clusterOnly``: Create a RayCluster custom resource without installing the Operator. \(If the Operator has already been installed, a new Ray cluster will be launched.)
-
-The following commands will install the Operator and two Ray Clusters in
-three separate Helm releases:
-
-.. code-block:: shell
-
-  # Install the operator in its own Helm release.
-  $ helm install ray-operator --set operatorOnly=true ./ray
-
-  # Install a Ray cluster in a new namespace "ray".
-  $ helm -n ray install example-cluster --set clusterOnly=true ./ray --create-namespace
-
-  # Install a second Ray cluster. Launch the second cluster without any workers.
-  $ helm -n ray install example-cluster2 \
-      --set podTypes.rayWorkerType.minWorkers=0 --set clusterOnly=true ./ray
-
-  # Examine the pods in both clusters.
-  $ kubectl -n ray get pods
-  NAME                                    READY   STATUS    RESTARTS   AGE
-   example-cluster-ray-head-type-v6tt9     1/1     Running   0          35s
-   example-cluster-ray-worker-type-fmn4k   1/1     Running   0          22s
-   example-cluster-ray-worker-type-r6m7k   1/1     Running   0          22s
-   example-cluster2-ray-head-type-tj666    1/1     Running   0          15s
-
-Alternatively, the Operator and one of the Ray Clusters can be installed in the same Helm release:
-
-.. code-block:: shell
-
-   # Start the operator. Install a Ray cluster in a new namespace.
-   helm -n ray install example-cluster --create-namespace ./ray
-
-   # Start another Ray cluster.
-   # The cluster will be managed by the operator created in the last command.
-   $ helm -n ray install example-cluster2 \
-      --set podTypes.rayWorkerType.minWorkers=0 --set clusterOnly=true ./ray
-
-
-The Operator pod outputs autoscaling logs for all of the Ray clusters it manages.
-Each line of output is prefixed by the string :code:`<cluster name>,<namespace>`.
-This string can be used to filter for a specific Ray cluster's logs:
-
-.. code-block:: shell
-
-    # The last 100 lines of logging output for the cluster with name "example-cluster2" in namespace "ray":
-    $ kubectl logs \
-      $(kubectl get pod -l cluster.ray.io/component=operator -o custom-columns=:metadata.name) \
-      | grep example-cluster2,ray | tail -n 100
-
-.. _k8s-cleanup:
-
-Cleaning up resources
---------------------
-When cleaning up,
-**RayCluster resources must be deleted before the Operator deployment is deleted**.
-This is because the Operator must remove a `finalizer`_ from the ``RayCluster`` resource to allow
-deletion of the resource to complete.
-
-If the Operator and ``RayCluster`` are created as part of the same Helm release,
-the ``RayCluster`` must be deleted :ref:`before<k8s-cleanup-basic>` uninstalling the Helm release.
-If the Operator and one or more ``RayClusters`` are created in multiple Helm releases,
-the ``RayCluster`` releases must be uninstalled before the Operator release.
-
-To remedy a situation where the Operator deployment was deleted first and ``RayCluster`` deletion is hanging, try one of the following:
-
- Manually delete the ``RayCluster``'s finalizers with ``kubectl edit`` or ``kubectl patch``.
- Restart the Operator so that it can remove ``RayCluster`` finalizers. Then remove the Operator.
-
-Cluster-scoped vs. namespaced operators
---------------------------------------
-By default, the Ray Helm chart installs a ``cluster-scoped`` operator.
-This means that the operator manages all Ray clusters in your Kubernetes cluster, across all namespaces.
-The namespace into which the Operator Deployment is launched is determined by the chart field ``operatorNamespace``.
-If this field is unset, the operator is launched into namespace ``default``.
-
-It is also possible to run a ``namespace-scoped`` Operator.
-This means that the Operator is launched into the namespace of the Helm release and manages only
-Ray clusters in that namespace. To run a namespaced Operator, add the flag ``--set namespacedOperator=True``
-to your Helm install command.
-
-.. warning::
-   Do not simultaneously run namespaced and cluster-scoped Ray Operators within one Kubernetes cluster, as this will lead to unintended effects.
-
-.. _no-helm:
-
-Deploying without Helm
----------------------
-It is possible to deploy the Ray Operator without Helm.
-The necessary configuration files are available on the `Ray GitHub`_.
-The following manifests should be installed in the order listed:
-
- The `RayCluster CRD`_.
- The Ray Operator, `namespaced`_ or `cluster-scoped`_.\Note that the cluster-scoped operator is configured to run in namespaced ``default``. Modify as needed.
- A RayCluster custom resource: `example`_.
-
-Ray Cluster Lifecycle
---------------------
-
-.. _k8s-restarts:
-
-Restart behavior
-~~~~~~~~~~~~~~~~
-
-The Ray cluster will restart under the following circumstances:
-
- There is an error in the cluster's autoscaling process. This will happen if the Ray head node goes down.
- There has been a change to the Ray head pod configuration. In terms of the Ray Helm chart, this means either ``image`` or one of the following fields of the head's ``podType`` has been modified: ``CPU``, ``GPU``, ``memory``, ``nodeSelector``.
-
-Similarly, all workers of a given ``podType`` will be discarded if
-
- There has been a change to ``image`` or one of the following fields of the ``podType``: ``CPU``, ``GPU``, ``memory``, ``nodeSelector``.
-
-Status information
-~~~~~~~~~~~~~~~~~~
-
-Running ``kubectl -n <namespace> get raycluster`` will show all Ray clusters in the namespace with status information.
-
-.. code-block:: shell
-
-   kubectl -n ray get rayclusters
-   NAME              STATUS    RESTARTS   AGE
-   example-cluster   Running   0          9s
-
-The ``STATUS`` column reports the RayCluster's ``status.phase`` field. The following values are possible:
-
- ``Empty/nil``: This means the RayCluster resource has not yet been registered by the Operator.
- ``Updating``: The Operator is launching the Ray cluster or processing an update to the cluster's configuration.
- ``Running``: The Ray cluster's autoscaling process is running in a normal state.
- ``AutoscalingExceptionRecovery`` The Ray cluster's autoscaling process has crashed. Ray processes will restart. This can happen if the Ray head node goes down.
- ``Error`` There was an unexpected error while updating the Ray cluster. (The Ray maintainers would be grateful if you file a `bug report`_ with operator logs.)
-
-The ``RESTARTS`` column reports the RayCluster's ``status.autoscalerRetries`` field. This tracks the number of times the cluster has restarted due to an autoscaling error.
-
-Questions or Issues?
--------------------
-
-.. include:: /_includes/_help.rst
-
-.. _`RayCluster CRD`: https://github.com/ray-project/ray/tree/master/deploy/charts/ray/crds/cluster_crd.yaml
-.. _`finalizer` : https://kubernetes.io/docs/tasks/extend-kubernetes/custom-resources/custom-resource-definitions/#finalizers
-.. _`namespaced`: https://github.com/ray-project/ray/tree/master/deploy/components/operator_namespaced.yaml
-.. _`cluster-scoped`: https://github.com/ray-project/ray/tree/master/deploy/components/operator_cluster_scoped.yaml
-.. _`example`: https://github.com/ray-project/ray/tree/master/deploy/components/example_cluster.yaml
-.. _`values.yaml`: https://github.com/ray-project/ray/tree/master/deploy/charts/ray/values.yaml
-.. _`bug report`: https://github.com/ray-project/ray/issues/new?assignees=&labels=bug%2C+triage&template=bug_report.md&title=
-.. _`helm upgrade`: https://helm.sh/docs/helm/helm_upgrade/
-.. _`feature request`: https://github.com/ray-project/ray/issues/new?assignees=&labels=enhancement&template=feature_request.md&title=
-.. _`discussion thread`: https://discuss.ray.io/c/ray-clusters/ray-kubernetes/11
-.. _`timeout`: https://github.com/ray-project/ray/blob/b08b2c5103c634c680de31b237b2bfcceb9bc150/python/ray/autoscaler/_private/constants.py#L22
-.. _`Helm docs`: https://helm.sh/docs/helm/helm_install/
-.. _`Ray GitHub`: https://github.com/ray-project/ray/tree/master/deploy/components/
--- a/doc/source/cluster/kubernetes-gpu.rst
+++ b/doc/source/cluster/kubernetes-gpu.rst
@ -1,93 +0,0 @@
-:orphan:
-
-.. include:: we_are_hiring.rst
-
-.. _k8s-gpus:
-
-GPU Usage with Kubernetes
-=========================
-This document provides some notes on GPU usage with Kubernetes.
-
-To use GPUs on Kubernetes, you will need to configure both your Kubernetes setup and add additional values to your Ray cluster configuration.
-
-For relevant documentation for GPU usage on different clouds, see instructions for `GKE`_, for `EKS`_, and for `AKS`_.
-
-The `Ray Docker Hub <https://hub.docker.com/r/rayproject/>`_ hosts CUDA-based images packaged with Ray for use in Kubernetes pods.
-For example, the image ``rayproject/ray-ml:nightly-gpu`` is ideal for running GPU-based ML workloads with the most recent nightly build of Ray.
-Read :ref:`here<docker-images>` for further details on Ray images.
-
-Using Nvidia GPUs requires specifying the relevant resource `limits` in the container fields of your Kubernetes configurations.
-(Kubernetes `sets <https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/#using-device-plugins>`_
-the GPU request equal to the limit.) The configuration for a pod running a Ray GPU image and
-using one Nvidia GPU looks like this:
-
-.. code-block:: yaml
-
-  apiVersion: v1
-  kind: Pod
-  metadata:
-   generateName: example-cluster-ray-worker
-   spec:
-    ...
-    containers:
-     - name: ray-node
-       image: rayproject/ray:nightly-gpu
-       ...
-       resources:
-        cpu: 1000m
-        memory: 512Mi
-       limits:
-        memory: 512Mi
-        nvidia.com/gpu: 1
-
-GPU taints and tolerations
--------------------------
-.. note::
-
-  Users using a managed Kubernetes service probably don't need to worry about this section.
-
-The `Nvidia gpu plugin`_ for Kubernetes applies `taints`_ to GPU nodes; these taints prevent non-GPU pods from being scheduled on GPU nodes.
-Managed Kubernetes services like GKE, EKS, and AKS automatically apply matching `tolerations`_
-to pods requesting GPU resources. Tolerations are applied by means of Kubernetes's `ExtendedResourceToleration`_ `admission controller`_.
-If this admission controller is not enabled for your Kubernetes cluster, you may need to manually add a GPU toleration each of to your GPU pod configurations. For example,
-
-.. code-block:: yaml
-
-  apiVersion: v1
-  kind: Pod
-  metadata:
-   generateName: example-cluster-ray-worker
-   spec:
-   ...
-   tolerations:
-   - effect: NoSchedule
-     key: nvidia.com/gpu
-     operator: Exists
-   ...
-   containers:
-   - name: ray-node
-     image: rayproject/ray:nightly-gpu
-     ...
-
-Further reference and discussion
--------------------------------
-Read about Kubernetes device plugins `here <https://kubernetes.io/docs/concepts/extend-kubernetes/compute-storage-net/device-plugins/>`__,
-about Kubernetes GPU plugins `here <https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus>`__,
-and about Nvidia's GPU plugin for Kubernetes `here <https://github.com/NVIDIA/k8s-device-plugin>`__.
-
-If you run into problems setting up GPUs for your Ray cluster on Kubernetes, please reach out to us at `<https://discuss.ray.io>`_.
-
-Questions or Issues?
--------------------
-
-.. include:: /_includes/_help.rst
-
-.. _`GKE`: https://cloud.google.com/kubernetes-engine/docs/how-to/gpus
-.. _`EKS`: https://docs.aws.amazon.com/eks/latest/userguide/eks-optimized-ami.html
-.. _`AKS`: https://docs.microsoft.com/en-us/azure/aks/gpu-cluster
-
-.. _`tolerations`: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/
-.. _`taints`: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/
-.. _`Nvidia gpu plugin`: https://github.com/NVIDIA/k8s-device-plugin
-.. _`admission controller`: https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/
-.. _`ExtendedResourceToleration`: https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#extendedresourcetoleration
--- a/doc/source/cluster/kubernetes-manual.rst
+++ b/doc/source/cluster/kubernetes-manual.rst
@ -1,159 +0,0 @@
-:orphan:
-
-.. include:: we_are_hiring.rst
-
-.. _ray-k8s-static:
-
-Deploying a Static Ray Cluster on Kubernetes
-============================================
-
-This document gives an example of how to manually deploy a non-autoscaling Ray cluster on Kubernetes.
-
- Learn about deploying an autoscaling Ray cluster using the :ref:`Ray Helm chart<kuberay-index>`.
-
-Creating a Ray Namespace
------------------------
-
-First, create a `Kubernetes Namespace`_ for Ray resources on your cluster. The
-following commands will create resources under this Namespace, so if you want
-to use a different one than ``ray``, please be sure to also change the
-``namespace`` fields in the provided ``yaml`` files and anytime you see a ``-n``
-flag passed to ``kubectl``.
-
-.. code-block:: shell
-
-  $ kubectl create namespace ray
-
-Starting a Ray Cluster
----------------------
-
-
-A Ray cluster consists of a single head node and a set of worker nodes (the
-provided `ray-cluster.yaml <https://github.com/ray-project/ray/blob/master/doc/kubernetes/ray-cluster.yaml>`__ file will start 3 worker nodes). In the example
-Kubernetes configuration, this is implemented as:
-
- A ``ray-head`` `Kubernetes Service`_ that enables the worker nodes to discover the location of the head node on start up.
-  This Service also enables access to the Ray Client and Ray Dashboard.
- A ``ray-head`` `Kubernetes Deployment`_ that backs the ``ray-head`` Service with a single head node pod (replica).
- A ``ray-worker`` `Kubernetes Deployment`_ with multiple worker node pods (replicas) that connect to the ``ray-head`` pod using the ``ray-head`` Service.
-
-Note that because the head and worker nodes are Deployments, Kubernetes will
-automatically restart pods that crash to maintain the correct number of
-replicas.
-
- If a worker node goes down, a replacement pod will be started and joined to the cluster.
- If the head node goes down, it will be restarted. This will start a new Ray cluster. Worker nodes that were connected to the old head node will crash and be restarted, connecting to the new head node when they come back up.
-
-Try deploying a cluster with the provided Kubernetes config by running the
-following command:
-
-.. code-block:: shell
-
-  $ kubectl apply -f ray/doc/kubernetes/ray-cluster.yaml
-
-Verify that the pods are running by running ``kubectl get pods -n ray``. You
-may have to wait up to a few minutes for the pods to enter the 'Running'
-state on the first run.
-
-.. code-block:: shell
-
-  $ kubectl -n ray get pods
-  NAME                          READY   STATUS    RESTARTS   AGE
-  ray-head-5455bb66c9-6bxvz     1/1     Running   0          10s
-  ray-worker-5c49b7cc57-c6xs8   1/1     Running   0          5s
-  ray-worker-5c49b7cc57-d9m86   1/1     Running   0          5s
-  ray-worker-5c49b7cc57-kzk4s   1/1     Running   0          5s
-
-.. note::
-
-  You might see a nonzero number of RESTARTS for the worker pods. That can
-  happen when the worker pods start up before the head pod and the workers
-  aren't able to connect. This shouldn't affect the behavior of the cluster.
-
-To change the number of worker nodes in the cluster, change the ``replicas``
-field in the worker deployment configuration in that file and then re-apply
-the config as follows:
-
-.. code-block:: shell
-
-  # Edit 'ray/doc/kubernetes/ray-cluster.yaml' and change the 'replicas'
-  # field under the ray-worker deployment to, e.g., 4.
-
-  # Re-apply the new configuration to the running deployment.
-  $ kubectl apply -f ray/doc/kubernetes/ray-cluster.yaml
-  service/ray-head unchanged
-  deployment.apps/ray-head unchanged
-  deployment.apps/ray-worker configured
-
-  # Verify that there are now the correct number of worker pods running.
-  $ kubectl -n ray get pods
-  NAME                          READY   STATUS    RESTARTS   AGE
-  ray-head-5455bb66c9-6bxvz     1/1     Running   0          30s
-  ray-worker-5c49b7cc57-c6xs8   1/1     Running   0          25s
-  ray-worker-5c49b7cc57-d9m86   1/1     Running   0          25s
-  ray-worker-5c49b7cc57-kzk4s   1/1     Running   0          25s
-  ray-worker-5c49b7cc57-zzfg2   1/1     Running   0          0s
-
-To validate that the restart behavior is working properly, try killing pods
-and checking that they are restarted by Kubernetes:
-
-.. code-block:: shell
-
-  # Delete a worker pod.
-  $ kubectl -n ray delete pod ray-worker-5c49b7cc57-c6xs8
-  pod "ray-worker-5c49b7cc57-c6xs8" deleted
-
-  # Check that a new worker pod was started (this may take a few seconds).
-  $ kubectl -n ray get pods
-  NAME                          READY   STATUS    RESTARTS   AGE
-  ray-head-5455bb66c9-6bxvz     1/1     Running   0          45s
-  ray-worker-5c49b7cc57-d9m86   1/1     Running   0          40s
-  ray-worker-5c49b7cc57-kzk4s   1/1     Running   0          40s
-  ray-worker-5c49b7cc57-ypq8x   1/1     Running   0          0s
-
-  # Delete the head pod.
-  $ kubectl -n ray delete pod ray-head-5455bb66c9-6bxvz
-  pod "ray-head-5455bb66c9-6bxvz" deleted
-
-  # Check that a new head pod was started and the worker pods were restarted.
-  $ kubectl -n ray get pods
-  NAME                          READY   STATUS    RESTARTS   AGE
-  ray-head-5455bb66c9-gqzql     1/1     Running   0          0s
-  ray-worker-5c49b7cc57-d9m86   1/1     Running   1          50s
-  ray-worker-5c49b7cc57-kzk4s   1/1     Running   1          50s
-  ray-worker-5c49b7cc57-ypq8x   1/1     Running   1          10s
-
-  # You can even try deleting all of the pods in the Ray namespace and checking
-  # that Kubernetes brings the right number back up.
-  $ kubectl -n ray delete pods --all
-  $ kubectl -n ray get pods
-  NAME                          READY   STATUS    RESTARTS   AGE
-  ray-head-5455bb66c9-7l6xj     1/1     Running   0          10s
-  ray-worker-5c49b7cc57-57tpv   1/1     Running   0          10s
-  ray-worker-5c49b7cc57-6m4kp   1/1     Running   0          10s
-  ray-worker-5c49b7cc57-jx2w2   1/1     Running   0          10s
-
-Now that we have a running cluster, :ref:`we can execute Ray programs <ray-k8s-client>`.
-
-Cleaning Up
-----------
-
-To delete a running Ray cluster, you can run the following command:
-
-.. code-block:: shell
-
-  kubectl delete -f ray/doc/kubernetes/ray-cluster.yaml
-
-
-Questions or Issues?
--------------------
-
-.. include:: /_includes/_help.rst
-
-
-.. _`Kubernetes Namespace`: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
-.. _`Kubernetes Service`: https://kubernetes.io/docs/concepts/services-networking/service/
-.. _`Kubernetes Deployment`: https://kubernetes.io/docs/concepts/workloads/controllers/deployment/
-.. _`Kubernetes Job`: https://kubernetes.io/docs/concepts/workloads/controllers/jobs-run-to-completion/
-
-.. _`Discussion Board`: https://discuss.ray.io/
--- a/doc/source/cluster/kubernetes.rst
+++ b/doc/source/cluster/kubernetes.rst
@ -1,338 +0,0 @@
-.. include:: we_are_hiring.rst
-
-.. _ray-k8s-deploy:
-
-The legacy Ray Kubernetes Operator
-==================================
-
-.. note::
-
-   This documentation describes deploying Ray on Kubernetes using the legacy Ray Operator hosted in
-   the Ray repo.
-   Going forward, the :ref:`preferred tool for deploying Ray on Kubernetes<kuberay-index>` will be the `KubeRay operator`_.
-   The legacy operator described on this page can still be used to deploy on Kubernetes. However, the legacy operator
-   will enter maintenance mode in a future Ray release.
-
-   To learn more about KubeRay, see the links below:
-
-   - :ref:`Ray's guides for deploying using KubeRay<kuberay-index>`.
-   - `The KubeRay documentation`_.
-   - `The KubeRay GitHub`_.
-   - :ref:`A comparison of KubeRay and the legacy Ray Operator<kuberay-vs-legacy>`.
-
-
-Overview
--------
-You can leverage your `Kubernetes`_ cluster as a substrate for execution of distributed Ray programs.
-The :ref:`Ray Autoscaler<cluster-index>` spins up and deletes Kubernetes `Pods`_ according to the resource demands of the Ray workload. Each Ray node runs in its own Kubernetes Pod.
-
-Quick Guide
-----------
-
-This document cover the following topics:
-
- :ref:`Intro to the Ray Kubernetes Operator<ray-operator>`
- :ref:`Launching Ray clusters with the Ray Helm Chart<ray-helm>`
- :ref:`Monitoring Ray clusters<ray-k8s-monitor>`
- :ref:`Running Ray programs using Ray Client<ray-k8s-client>`
-
-You can find more information at the following links:
-
- :ref:`Ray Operator and Helm chart configuration<k8s-advanced>`
- :ref:`GPU usage with Kubernetes<k8s-gpus>`
- :ref:`Using Ray Tune on your Kubernetes cluster<tune-kubernetes>`
- :ref:`How to manually set up a non-autoscaling Ray cluster on Kubernetes<ray-k8s-static>`
-
-.. _ray-operator:
-
-The Ray Kubernetes Operator
---------------------------
-Deployments of Ray on Kubernetes are managed by the ``Ray Kubernetes Operator``.
-The Ray Operator follows the standard Kubernetes `Operator pattern`_. The main players are
-
- A `Custom Resource`_ called a ``RayCluster``, which describes the desired state of the Ray cluster.
- A `Custom Controller`_, the ``Ray Operator``, which processes ``RayCluster`` resources and manages the Ray cluster.
-
-Under the hood, the Operator uses the :ref:`Ray Autoscaler<cluster-index>` to launch and scale your Ray cluster.
-
-The rest of this document explains how to launch a small example Ray cluster on Kubernetes.
-
- :ref:`Ray on Kubernetes Configuration and Advanced Usage<k8s-advanced>`.
-
-.. _ray-helm:
-
-Installing the Ray Operator with Helm
-------------------------------------
-Ray provides a `Helm`_ chart to simplify deployment of the Ray Operator and Ray clusters.
-
-The `Ray Helm chart`_ is available as part of the Ray GitHub repository.
-The chart will be published to a public Helm repository as part of a future Ray release.
-
-Preparation
-~~~~~~~~~~~
-
- Configure `kubectl`_ to access your Kubernetes cluster.
- Install `Helm 3`_.
- Download the `Ray Helm chart`_.
-
-To run the default example in this document, make sure your Kubernetes cluster can accomodate
-additional resource requests of 4 CPU and 2.5Gi memory.
-
-Installation
-~~~~~~~~~~~~
-
-You can install a small Ray cluster with a single ``helm`` command.
-The default cluster configuration consists of a Ray head pod and two worker pods,
-with scaling allowed up to three workers.
-
-.. code-block:: shell
-
-  # Navigate to the directory containing the chart
-  $ cd ray/deploy/charts
-
-  # Install a small Ray cluster with the default configuration
-  # in a new namespace called "ray". Let's name the Helm release "example-cluster."
-  $ helm -n ray install example-cluster --create-namespace ./ray
-  NAME: example-cluster
-  LAST DEPLOYED: Fri May 14 11:44:06 2021
-  NAMESPACE: ray
-  STATUS: deployed
-  REVISION: 1
-  TEST SUITE: None
-
-View the installed resources as follows.
-
-.. code-block:: shell
-
-  # The custom resource representing the state of the Ray cluster.
-  $ kubectl -n ray get rayclusters
-  NAME              STATUS    RESTARTS   AGE
-  example-cluster   Running   0          53s
-
-  # The Ray head node and two Ray worker nodes.
-  $ kubectl -n ray get pods
-  NAME                                    READY   STATUS    RESTARTS   AGE
-  example-cluster-ray-head-type-5926k     1/1     Running   0          57s
-  example-cluster-ray-worker-type-8gbwx   1/1     Running   0          40s
-  example-cluster-ray-worker-type-l6cvx   1/1     Running   0          40s
-
-  # A service exposing the Ray head node.
-  $ kubectl -n ray get service
-  NAME                       TYPE        CLUSTER-IP   EXTERNAL-IP   PORT(S)                       AGE
-  example-cluster-ray-head   ClusterIP   10.8.11.17   <none>        10001/TCP,8265/TCP,8000/TCP   115s
-
-  # The operator deployment.
-  # By default, the deployment is launched in namespace "default".
-  $ kubectl get deployment ray-operator
-  NAME           READY   UP-TO-DATE   AVAILABLE   AGE
-  ray-operator   1/1     1            1           3m1s
-
-  # The single pod of the operator deployment.
-  $ kubectl get pod -l cluster.ray.io/component=operator
-  NAME                            READY   STATUS    RESTARTS   AGE
-  ray-operator-84f5d57b7f-xkvtm   1/1     Running   0          3m35
-
-  # The Custom Resource Definition defining a RayCluster.
-  $ kubectl get crd rayclusters.cluster.ray.io
-  NAME                         CREATED AT
-  rayclusters.cluster.ray.io   2021-05-14T18:44:02
-
-.. _ray-k8s-monitor:
-
-Observability
-------------
-
-To view autoscaling logs, run a ``kubectl logs`` command on the operator pod:
-
-.. code-block:: shell
-
-  # The last 100 lines of logs.
-  $ kubectl logs \
-    $(kubectl get pod -l cluster.ray.io/component=operator -o custom-columns=:metadata.name) \
-    | tail -n 100
-
-.. _ray-k8s-dashboard:
-
-The :ref:`Ray dashboard<ray-dashboard>` can be accessed on the Ray head node at port ``8265``.
-
-.. code-block:: shell
-
-  # Forward the relevant port from the service exposing the Ray head.
-  $ kubectl -n ray port-forward service/example-cluster-ray-head 8265:8265
-
-  # The dashboard can now be viewed in a browser at http://localhost:8265
-
-.. _ray-k8s-client:
-
-Running Ray programs with Ray Jobs Submission
---------------------------------------------
-
-:ref:`Ray Job Submission <jobs-overview>` can be used to submit Ray programs to your Ray cluster.
-To do this, you must be able to access the Ray Dashboard, which runs on the Ray head node on port ``8265``.
-One way to do this is to port forward ``127.0.0.1:8265`` on your local machine to ``127.0.0.1:8265`` on the head node using the :ref:`Kubernetes port-forwarding command<ray-k8s-dashboard>`.
-
-.. code-block:: bash
-
-  $ kubectl -n ray port-forward service/example-cluster-ray-head 8265:8265
-
-Then in a new shell, you can run a job using the CLI:
-
-.. code-block:: bash
-
-  $ export RAY_ADDRESS="http://127.0.0.1:8265"
-
-  $ ray job submit --runtime-env-json='{"working_dir": "./", "pip": ["requests==2.26.0"]}' -- python script.py
-  2021-12-01 23:04:52,672 INFO cli.py:25 -- Creating JobSubmissionClient at address: http://127.0.0.1:8265
-  2021-12-01 23:04:52,809 INFO sdk.py:144 -- Uploading package gcs://_ray_pkg_bbcc8ca7e83b4dc0.zip.
-  2021-12-01 23:04:52,810 INFO packaging.py:352 -- Creating a file package for local directory './'.
-  2021-12-01 23:04:52,878 INFO cli.py:105 -- Job submitted successfully: raysubmit_RXhvSyEPbxhcXtm6.
-  2021-12-01 23:04:52,878 INFO cli.py:106 -- Query the status of the job using: `ray job status raysubmit_RXhvSyEPbxhcXtm6`.
-
-For more ways to run jobs, including a Python SDK and a REST API, see :ref:`Ray Job Submission <jobs-overview>`.
-
-
-
-Running Ray programs with Ray Client
------------------------------------
-
-:ref:`Ray Client <ray-client>` can be used to interactively execute Ray programs on your Ray cluster. The Ray Client server runs on the Ray head node, on port ``10001``.
-
-.. note::
-
-  Connecting with Ray client requires using matching minor versions of Python (for example 3.7)
-  on the server and client end, that is, on the Ray head node and in the environment where
-  ``ray.init("ray://<host>:<port>")`` is invoked. Note that the default ``rayproject/ray`` images use Python 3.7.
-  The latest offical Ray release builds are available for Python 3.6 and 3.8 at the `Ray Docker Hub <https://hub.docker.com/r/rayproject/ray>`_.
-
-  Connecting with Ray client also requires matching Ray versions. To connect from a local machine to a cluster running the examples in this document, the :ref:`latest release version<installation>` of Ray must be installed locally.
-
-Using Ray Client to connect from outside the Kubernetes cluster
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-One way to connect to the Ray cluster from outside your Kubernetes cluster
-is to forward the Ray Client server port:
-
-.. code-block:: shell
-
-  $ kubectl -n ray port-forward service/example-cluster-ray-head 10001:10001
-
-Then open a new shell and try out a `sample Ray program`_:
-
-.. code-block:: shell
-
-  $ python ray/doc/kubernetes/example_scripts/run_local_example.py
-
-The program in this example uses ``ray.init("ray://127.0.0.1:10001")`` to connect to the Ray cluster.
-The program waits for three Ray nodes to connect and then tests object transfer
-between the nodes.
-
-
-Using Ray Client to connect from within the Kubernetes cluster
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-You can also connect to your Ray cluster from another pod in the same Kubernetes cluster.
-
-For example, you can submit a Ray application to run on the Kubernetes cluster as a `Kubernetes
-Job`_. The Job will run a single pod running the Ray driver program to
-completion, then terminate the pod but allow you to access the logs.
-
-The following command submits a Job which executes an `example Ray program`_.
-
-.. code-block:: yaml
-
-  $ kubectl -n ray create -f https://raw.githubusercontent.com/ray-project/ray/master/doc/kubernetes/job-example.yaml
-  job.batch/ray-test-job created
-
-The program executed by the job uses the name of the Ray cluster's head Service to connect:
-``ray.init("ray://example-cluster-ray-head:10001")``.
-The program waits for three Ray nodes to connect and then tests object transfer
-between the nodes.
-
-To view the output of the Job, first find the name of the pod that ran it,
-then fetch its logs:
-
-.. code-block:: shell
-
-  $ kubectl -n ray get pods
-  NAME                                    READY   STATUS    RESTARTS   AGE
-  example-cluster-ray-head-type-5926k     1/1     Running   0          21m
-  example-cluster-ray-worker-type-8gbwx   1/1     Running   0          21m
-  example-cluster-ray-worker-type-l6cvx   1/1     Running   0          21m
-  ray-test-job-dl9fv                      1/1     Running   0          3s
-
-  # Fetch the logs. You should see repeated output for 10 iterations and then
-  # 'Success!'
-  $ kubectl -n ray logs ray-test-job-dl9fv
-
-  # Cleanup
-  $ kubectl -n ray delete job ray-test-job
-  job.batch "ray-test-job" deleted
-
-.. tip::
-
-  Code dependencies for a given Ray task or actor must be installed on each Ray node that might run the task or actor.
-  Typically, this means that all Ray nodes need to have the same dependencies installed.
-  To achieve this, you can build a custom container image, using one of the `official Ray images <https://hub.docker.com/r/rayproject/ray>`_ as the base.
-  Alternatively, try out the experimental :ref:`Runtime Environments<runtime-environments>` API (latest Ray release version recommended.)
-
-.. _k8s-cleanup-basic:
-
-Cleanup
-------
-
-To remove a Ray Helm release and the associated API resources, use `kubectl delete`_ and `helm uninstall`_.
-Note the order of the commands below.
-
-.. code-block:: shell
-
-  # First, delete the RayCluster custom resource.
-  $ kubectl -n ray delete raycluster example-cluster
-  raycluster.cluster.ray.io "example-cluster" deleted
-
-  # Delete the Ray release.
-  $ helm -n ray uninstall example-cluster
-  release "example-cluster" uninstalled
-
-  # Optionally, delete the namespace created for our Ray release.
-  $ kubectl delete namespace ray
-  namespace "ray" deleted
-
-Note that ``helm uninstall`` `does not delete`_ the RayCluster CRD. If you wish to delete the CRD,
-make sure all Ray Helm releases have been uninstalled, then run ``kubectl delete crd rayclusters.cluster.ray.io``.
-
- :ref:`More details on resource cleanup<k8s-cleanup>`
-
-Next steps
----------
-:ref:`Ray Operator Advanced Configuration<k8s-advanced>`
-
-Questions or Issues?
--------------------
-
-.. include:: /_includes/_help.rst
-
-.. _`Kubernetes`: https://kubernetes.io/
-.. _`Kubernetes Job`: https://kubernetes.io/docs/concepts/workloads/controllers/jobs-run-to-completion/
-.. _`Kubernetes Service`: https://kubernetes.io/docs/concepts/services-networking/service/
-.. _`operator pattern`: https://kubernetes.io/docs/concepts/extend-kubernetes/operator/
-.. _`Custom Resource`: https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/
-.. _`Custom Controller`: https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/#custom-controllers
-.. _`Kubernetes Custom Resource Definition`: https://kubernetes.io/docs/tasks/extend-kubernetes/custom-resources/custom-resource-definitions/
-.. _`annotation`: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/#attaching-metadata-to-objects
-.. _`permissions`: https://kubernetes.io/docs/reference/access-authn-authz/rbac/
-.. _`minikube`: https://minikube.sigs.k8s.io/docs/start/
-.. _`namespace`: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
-.. _`Deployment`: https://kubernetes.io/docs/concepts/workloads/controllers/deployment/
-.. _`Ray Helm chart`: https://github.com/ray-project/ray/tree/master/deploy/charts/ray/
-.. _`kubectl`: https://kubernetes.io/docs/tasks/tools/
-.. _`Helm 3`: https://helm.sh/
-.. _`Helm`: https://helm.sh/
-.. _`kubectl delete`: https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#delete
-.. _`helm uninstall`: https://helm.sh/docs/helm/helm_uninstall/
-.. _`does not delete`: https://helm.sh/docs/chart_best_practices/custom_resource_definitions/
-.. _`Pods`: https://kubernetes.io/docs/concepts/workloads/pods/
-.. _`example Ray program`: https://github.com/ray-project/ray/tree/master/doc/kubernetes/example_scripts/job_example.py
-.. _`sample Ray program`: https://github.com/ray-project/ray/tree/master/doc/kubernetes/example_scripts/run_local_example.py
-.. _`official Ray images`: https://hub.docker.com/r/rayproject/ray
-.. _`Ray Docker Hub`: https://hub.docker.com/r/rayproject/ray
-.. _`KubeRay operator`: https://github.com/ray-project/kuberay
-.. _`The KubeRay GitHub`: https://github.com/ray-project/kuberay
-.. _`The KubeRay documentation`: https://ray-project.github.io/kuberay/
--- a/doc/source/cluster/kubernetes/configs/migration-example.yaml
+++ b/doc/source/cluster/kubernetes/configs/migration-example.yaml
@ -0,0 +1,100 @@
+apiVersion: ray.io/v1alpha1
+kind: RayCluster
+metadata:
+  labels:
+    controller-tools.k8s.io: "1.0"
+  name: raycluster-example
+spec:
+  # To use autoscaling, the following field must be included.
+  enableInTreeAutoscaling: true
+  # The Ray version must be supplied.
+  rayVersion: '2.0.0'
+  headGroupSpec:
+    serviceType: ClusterIP
+    rayStartParams:
+      dashboard-host: '0.0.0.0'
+      block: 'true'
+      # Annotate the head pod as having 0 CPU
+      # to prevent the head pod from scheduling Ray workloads.
+      num-cpus: 0
+    template:
+      spec:
+        containers:
+        - name: ray-head
+          image: rayproject/ray-ml:2.0.0-gpu
+          resources:
+            limits:
+              cpu: "14"
+              memory: "54Gi"
+            requests:
+              cpu: "14"
+              memory: "54Gi"
+          # Keep this in container configs.
+          lifecycle:
+            preStop:
+              exec:
+                command: ["/bin/sh","-c","ray stop"]
+  workerGroupSpecs:
+  # Start with 2 CPU workers. Allow scaling up to 3 CPU workers.
+  - replicas: 2
+    minReplicas: 2
+    maxReplicas: 3
+    groupName: rayCPUWorkerType
+    rayStartParams:
+      block: 'true'
+      # Annotate the Ray worker pod as having 1 unit of "Custom" capacity and 5 units of "Custom2" capacity
+      resources: '"{\"Custom\": 1, \"Custom2\": 5}"'
+    template:
+      spec:
+        containers:
+        - name: ray-worker
+          image: rayproject/ray-ml:2.0.0-gpu
+          resources:
+            limits:
+              cpu: "14"
+              memory: "54Gi"
+            requests:
+              cpu: "14"
+              memory: "54Gi"
+          # Keep the lifecycle block in Ray container configs.
+          lifecycle:
+            preStop:
+              exec:
+                command: ["/bin/sh","-c","ray stop"]
+        # Keep the initContainers block in worker pod configs.
+        initContainers:
+        - name: init-myservice
+          image: busybox:1.28
+          command: ['sh', '-c', "until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; do echo waiting for myservice; sleep 2; done"]
+  # Start with 0 GPU workers. Allow scaling up to 5 GPU workers.
+  - replicas: 0
+    minReplicas: 0
+    maxReplicas: 5
+    groupName: rayGPUWorkerType
+    rayStartParams:
+      block: 'true'
+    template:
+      spec:
+        containers:
+        - name: ray-worker
+          image: rayproject/ray-ml:2.0.0-gpu
+          resources:
+            limits:
+              cpu: "3"
+              memory: "50Gi"
+              nvidia.com/gpu: 1
+            requests:
+              cpu: "3"
+              memory: "50Gi"
+              nvidia.com/gpu: 1
+          # Keep the lifecycle block in Ray container configs.
+          lifecycle:
+            preStop:
+              exec:
+                command: ["/bin/sh","-c","ray stop"]
+        # Keep the initContainers block in worker pod configs.
+        initContainers:
+        - name: init-myservice
+          image: busybox:1.28
+          command: ['sh', '-c', "until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; do echo waiting for myservice; sleep 2; done"]
+# Operator configuration is not specified here -- the KubeRay operator should be deployed before creating Ray clusters.
--- a/doc/source/cluster/kubernetes/configs/ray-cluster.log.yaml
+++ b/doc/source/cluster/kubernetes/configs/ray-cluster.log.yaml
@ -0,0 +1,77 @@
+# Fluent Bit ConfigMap
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: fluentbit-config
+data:
+  fluent-bit.conf: |
+    [INPUT]
+        Name tail
+        Path /tmp/ray/session_latest/logs/*
+        Tag ray
+        Path_Key true
+        Refresh_Interval 5
+    [OUTPUT]
+        Name stdout
+        Match *
+---
+# RayCluster CR with a FluentBit sidecar
+apiVersion: ray.io/v1alpha1
+kind: RayCluster
+metadata:
+  labels:
+    controller-tools.k8s.io: "1.0"
+  name: raycluster-complete-logs
+spec:
+  rayVersion: '2.0.0'
+  headGroupSpec:
+    serviceType: ClusterIP
+    rayStartParams:
+      dashboard-host: '0.0.0.0'
+      block: 'true'
+    template:
+      spec:
+        containers:
+        - name: ray-head
+          image: rayproject/ray:2.0.0
+          lifecycle:
+            preStop:
+              exec:
+                command: ["/bin/sh","-c","ray stop"]
+          # This config is meant for demonstration purposes only.
+          # Use larger Ray containers in production!
+          resources:
+            limits:
+              cpu: "1"
+              memory: "1G"
+            requests:
+              cpu: "1"
+              memory: "1G"
+          # Share logs with Fluent Bit
+          volumeMounts:
+          - mountPath: /tmp/ray
+            name: ray-logs
+        # Fluent Bit sidecar
+        - name: fluentbit
+          image: fluent/fluent-bit:1.9.6
+          # These resource requests for Fluent Bit should be sufficient in production.
+          resources:
+            requests:
+              cpu: 100m
+              memory: 128Mi
+            limits:
+              cpu: 100m
+              memory: 128Mi
+          volumeMounts:
+          - mountPath: /tmp/ray
+            name: ray-logs
+          - mountPath: /fluent-bit/etc/fluent-bit.conf
+            subPath: fluent-bit.conf
+            name: fluentbit-config
+        # Log and config volumes
+        volumes:
+        - name: ray-logs
+          emptyDir: {}
+        - name: fluentbit-config
+          configMap:
+            name: fluentbit-config
--- a/doc/source/cluster/kubernetes/configs/xgboost-benchmark-autoscaler.yaml
+++ b/doc/source/cluster/kubernetes/configs/xgboost-benchmark-autoscaler.yaml
@ -0,0 +1,82 @@
+# This is a RayCluster configuration for exploration of the 100Gi Ray AIR XGBoostTrainer benchmark.
+
+# This configuration here modifies the file xgboost-benchmark.yaml in this directory
+# to demonstrate autoscaling.
+#
+# See the discussion in xgboost-benchmark.yaml for further details.
+---
+apiVersion: ray.io/v1alpha1
+kind: RayCluster
+metadata:
+  labels:
+    controller-tools.k8s.io: "1.0"
+  name: raycluster-xgboost-benchmark
+spec:
+  # The KubeRay operator will insert the Ray autoscaler sidecar
+  # into the Ray head node's pod config:
+  enableInTreeAutoscaling: true
+  # The version of Ray you are using. Make sure all Ray containers are running this version of Ray.
+  rayVersion: '2.0.0'
+  headGroupSpec:
+    serviceType: ClusterIP
+    rayStartParams:
+      dashboard-host: '0.0.0.0'
+      block: 'true'
+    template:
+      spec:
+        containers:
+        # The Ray head container
+        - name: ray-head
+          image: rayproject/ray-ml:2.0.0
+          imagePullPolicy: Always
+          # Optimal resource allocation will depend on your Kubernetes infrastructure and might
+          # require some experimentation.
+          # Setting requests=limits is recommended with Ray. K8s limits are used for Ray-internal
+          # resource accounting. K8s requests are not used by Ray.
+          resources:
+            limits:
+              cpu: "14"
+              memory: "54Gi"
+            requests:
+              cpu: "14"
+              memory: "54Gi"
+          lifecycle:
+            preStop:
+              exec:
+                command: ["/bin/sh","-c","ray stop"]
+  workerGroupSpecs:
+  # Start with 0 workers. Allow scaling up to 9 workers.
+  - replicas: 0
+    minReplicas: 0
+    maxReplicas: 9
+    groupName: large-group
+    # the following params are used to complete the ray start: ray start --block --node-ip-address= ...
+    rayStartParams:
+      block: 'true'
+    template:
+      spec:
+        containers:
+        - name: machine-learning # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name',  or '123-abc')
+          image: rayproject/ray-ml:2.0.0
+          # Optimal resource allocation will depend on your Kubernetes infrastructure and might
+          # require some experimentation.
+          # Setting requests=limits is recommended with Ray. K8s limits are used for Ray-internal
+          # resource accounting. K8s requests are not used by Ray.
+          resources:
+            limits:
+              # Slightly less than 16 to accommodate placement on 16 vCPU virtual machine.
+              cpu: "14"
+              memory: "54Gi"
+            requests:
+              cpu: "14"
+              memory: "54Gi"
+          lifecycle:
+            preStop:
+              exec:
+                command: ["/bin/sh","-c","ray stop"]
+        # Waits for availability of the Ray head's GCS service.
+        initContainers:
+        # the env var $RAY_IP is set by the operator, with the value of the head service name
+        - name: init-myservice
+          image: busybox:1.28
+          command: ['sh', '-c', "until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; do echo waiting for myservice; sleep 2; done"]
--- a/doc/source/cluster/kubernetes/configs/xgboost-benchmark.yaml
+++ b/doc/source/cluster/kubernetes/configs/xgboost-benchmark.yaml
@ -0,0 +1,116 @@
+# This is a RayCluster configuration for exploration of the 100Gi Ray AIR XGBoostTrainer benchmark.
+
+# The configuration includes 1 Ray head pod and 9 Ray worker pods.
+# Each Ray container requests 54 Gi memory and 14 CPU.
+
+# For underlying Kubernetes node configuration, we suggest a node group or pool with
+# the following features:
+# - 10 virtual machines
+# - 64 Gi memory and 16 CPU each
+#   (AWS: m5.4xlarge, GCP: e2-standard-16, Azure: Standard_D5_v2)
+# - Each node should be configured with 1000 Gi of disk space (for data set storage).
+
+# One Ray pod will be scheduled per Kubernetes node.
+
+# The suggested gap between the Ray container resource requests and the K8s node's totals accounts
+# for K8s control processes and cloud-provider-specific daemons.
+# Optimal resource allocation will depend on your Kubernetes infrastructure and might
+# require some experimentation.
+#
+# A note on autoscaling:
+# If you wish to observe Ray autoscaling in this example, make the following modification:
+# to your Kubernetes configuration:
+# - Configure your Kubernetes node group or pool to autoscale with min 1, max 10 nodes.
+
+# Make the following changes to this configuration file:
+# 1. Uncomment the line `enableInTreeAutoscaler: True` in this configuration.
+# 2. Under `workerGroupSpecs` set `replicas: 0` and `minReplicas: 0`.
+# Alternatively, use the configuration xgboost-benchmark-autoscaler.yaml in this directory;
+# the config xgboost-benchmark-autoscaler.yaml already includes the above modifications.
+
+# * The Ray cluster will then start with 0 Ray worker pods. The Ray autoscaler will automatically
+# scale up to 9 worker pods to accommodate the XGBoost-on-Ray workload.
+# * The underlying Kubernetes cluster will start with 1 node. The Kubernete cluster autoscaler will
+# scale up to 9 nodes to accommodate the Ray pods.
+#
+# Shortly after the job is complete, the Ray worker pods and corresponding Kubernetes nodes will
+# be scaled down.
+---
+apiVersion: ray.io/v1alpha1
+kind: RayCluster
+metadata:
+  labels:
+    controller-tools.k8s.io: "1.0"
+  name: raycluster-xgboost-benchmark
+spec:
+  # Uncomment the next line to experiment with autoscaling.
+  # enableInTreeAutoscaling: true
+  # The version of Ray you are using. Make sure all Ray containers are running this version of Ray.
+  rayVersion: '2.0.0'
+  headGroupSpec:
+    # Kubernetes Service Type, valid values are 'ClusterIP', 'NodePort' and 'LoadBalancer'
+    serviceType: ClusterIP
+    rayStartParams:
+      dashboard-host: '0.0.0.0'
+      block: 'true'
+    template:
+      spec:
+        containers:
+        # The Ray head container
+        - name: ray-head
+          image: rayproject/ray-ml:2.0.0
+          imagePullPolicy: Always
+          # Optimal resource allocation will depend on your Kubernetes infrastructure and might
+          # require some experimentation.
+          # Setting requests=limits is recommended with Ray. K8s limits are used for Ray-internal
+          # resource accounting. K8s requests are not used by Ray.
+          resources:
+            limits:
+              cpu: "14"
+              memory: "54Gi"
+            requests:
+              cpu: "14"
+              memory: "54Gi"
+          lifecycle:
+            preStop:
+              exec:
+                command: ["/bin/sh","-c","ray stop"]
+  workerGroupSpecs:
+  - replicas: 9
+    minReplicas: 9
+    maxReplicas: 9
+    # To experiment with autoscaling,
+    # set replicas and minReplicas to 0.
+    # replicas: 0
+    # minReplicas: 0
+    groupName: large-group
+    # the following params are used to complete the ray start: ray start --block
+    rayStartParams:
+      block: 'true'
+    template:
+      spec:
+        containers:
+        - name: machine-learning # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name',  or '123-abc')
+          image: rayproject/ray-ml:2.0.0
+          # Optimal resource allocation will depend on your Kubernetes infrastructure and might
+          # require some experimentation.
+          # Setting requests=limits is recommended with Ray. K8s limits are used for Ray-internal
+          # resource accounting. K8s requests are not used by Ray.
+          resources:
+            limits:
+              # Slightly less than 16 to accomodate placement on 16 vCPU virtual machine.
+              cpu: "14"
+              memory: "54Gi"
+            requests:
+              cpu: "14"
+              memory: "54Gi"
+          lifecycle:
+            preStop:
+              exec:
+                command: ["/bin/sh","-c","ray stop"]
+        # Waits for availability of the Ray head's GCS service.
+        initContainers:
+        # the env var $RAY_IP is set by the operator, with the value of the head service name
+        - name: init-myservice
+          image: busybox:1.28
+          command: ['sh', '-c', "until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; do echo waiting for myservice; sleep 2; done"]
--- a/doc/source/cluster/kubernetes/examples.md
+++ b/doc/source/cluster/kubernetes/examples.md
@ -0,0 +1,17 @@
+(kuberay-examples)=
+
+# Examples
+
+:::{note}
+To learn the basics of Ray on Kubernetes, we recommend taking a look
+at the {ref}`introductory guide <kuberay-quickstart>` first.
+
+If you are new to your cloud provider's Kubernetes service, we recommend
+taking a look at the documentation links collected {ref}`here <kuberay-k8s-setup>`
+:::
+
+This section presents example Ray workloads to try out on your Kubernetes cluster.
+
+More examples will be added in the future. Running the distributed XGBoost example below is a
+great way to start experimenting with production Ray workloads on Kubernetes.
+- {ref}`kuberay-ml-example`
--- a/doc/source/cluster/kubernetes/examples/ml-example.md
+++ b/doc/source/cluster/kubernetes/examples/ml-example.md
@ -0,0 +1,189 @@
+(kuberay-ml-example)=
+
+# Ray AIR XGBoostTrainer on Kubernetes
+
+:::{note}
+To learn the basics of Ray on Kubernetes, we recommend taking a look
+at the {ref}`introductory guide <kuberay-quickstart>` first.
+:::
+
+
+In this guide, we show you how to run a sample Ray machine learning
+workload on Kubernetes infrastructure.
+
+We will run Ray's {ref}`XGBoost training benchmark <xgboost-benchmark>` with a 100 gigabyte training set.
+To learn more about using Ray's XGBoostTrainer, check out {ref}`the XGBoostTrainer documentation <train-gbdt-guide>`.
+
+## Kubernetes infrastructure setup
+
+If you are new to Kubernetes and you are planning to deploy Ray workloads on a managed
+Kubernetes service, we recommend taking a look at this {ref}`introductory guide <kuberay-k8s-setup>`
+first.
+
+For the workload in this guide, it is recommended to use a pool or group of Kubernetes nodes
+with the following properties:
+- 10 nodes total
+- A capacity of 16 CPU and 64 Gi memory per node. For the major cloud providers, suitable instance types include
+    * m5.4xlarge (Amazon Web Services)
+    * Standard_D5_v2 (Azure)
+    * e2-standard-16 (Google Cloud)
+- Each node should be configured with 1000 gigabytes of disk space (to store the training set).
+
+```{admonition} Optional: Set up an autoscaling node pool
+**If you would like to try running the workload with autoscaling enabled**, use an autoscaling
+node group or pool with a 1 node minimum and a 10 node maximum.
+The 1 static node will be used to run the Ray head pod. This node may also host the KubeRay
+operator and Kubernetes system components. After the workload is submitted, 9 additional nodes will
+scale up to accommodate Ray worker pods. These nodes will scale back down after the workload is complete.
+```
+
+## Deploy the KubeRay operator
+
+Once you have set up your Kubernetes cluster, deploy the KubeRay operator.
+Refer to the {ref}`Getting Started guide <kuberay-operator-deploy>`
+for instructions on this step.
+
+## Deploy a Ray cluster
+
+Now we're ready to deploy the Ray cluster that will execute our workload.
+
+:::{tip}
+The Ray cluster we'll deploy is configured such that one Ray pod will be scheduled
+per 16-CPU Kubernetes node. The pattern of one Ray pod per Kubernetes node is encouraged, but not required.
+Broadly speaking, it is more efficient to use a few large Ray pods than many small ones.
+:::
+
+We recommend taking a look at the [config file][ConfigLink] applied in the following command.
+```shell
+# Starting from the parent directory of cloned Ray master,
+pushd ray/doc/source/cluster/kubernetes/configs/
+kubectl apply -f xgboost-benchmark.yaml
+popd
+```
+
+A Ray head pod and 9 Ray worker pods will be created.
+
+
+```{admonition} Optional: Deploying an autoscaling Ray cluster
+If you've set up an autoscaling node group or pool, you may wish to deploy
+an autoscaling cluster by applying the config `xgboost-benchmark-autoscaler.yaml`.
+One Ray head pod will be created. Once the workload starts, the Ray autoscaler will trigger
+creation of Ray worker pods. Kubernetes autoscaling will then create nodes to place the Ray pods.
+```
+
+## Run the workload
+
+To observe the startup progress of the Ray head pod, run the following command.
+
+```shell
+# If you're on MacOS, first `brew install watch`.
+watch -n 1 kubectl get pod
+```
+
+Once the Ray head pod enters `Running` state, we are ready to execute the XGBoost workload.
+We will use {ref}`Ray Job Submission <jobs-overview>` to kick off the workload.
+
+### Connect to the cluster.
+
+First, we connect to the Job server. Run the following blocking command
+in a separate shell.
+```shell
+kubectl port-forward service/raycluster-xgboost-benchmark-head-svc 8265:8265
+```
+
+### Submit the workload.
+
+We'll use the {ref}`Ray Job Python SDK <ray-job-sdk>` to submit the XGBoost workload.
+
+```{literalinclude} /cluster/doc_code/xgboost_submit.py
+:language: python
+```
+
+To submit the workload, run the above Python script.
+The script is available in the Ray repository.
+
+```shell
+# From the parent directory of cloned Ray master.
+pushd ray/doc/source/cluster/doc_code/
+python xgboost_submit.py
+popd
+```
+
+### Observe progress.
+
+The benchmark may take up to 30 minutes to run.
+Use the following tools to observe its progress.
+
+#### Job logs
+
+To follow the job's logs, use the command printed by the above submission script.
+```shell
+# Subsitute the Ray Job's submission id.
+ray job logs 'raysubmit_xxxxxxxxxxxxxxxx' --follow
+```
+
+#### Kubectl
+
+Observe the pods in your cluster with
+```shell
+# If you're on MacOS, first `brew install watch`.
+watch -n 1 kubectl get pod
+```
+
+#### Ray Dashboard
+
+View `localhost:8265` in your browser to access the Ray Dashboard.
+
+#### Ray Status
+
+Observe autoscaling status and Ray resource usage with
+```shell
+# Substitute the name of your Ray cluster's head pod.
+watch -n 1 kubectl exec -it raycluster-xgboost-benchmark-head-xxxxx -- ray status
+```
+
+:::{note}
+Under some circumstances and for certain cloud providers,
+the K8s API server may become briefly unavailable during Kuberentes
+cluster resizing events.
+
+Don't worry if that happens -- the Ray workload should be uninterrupted.
+For the example in this guide, wait until the API server is back up, restart the port-forwarding process,
+and re-run the job log command.
+:::
+
+### Job completion
+
+#### Benchmark results
+
+Once the benchmark is complete, the job log will display the results:
+
+```
+Results: {'training_time': 1338.488839321999, 'prediction_time': 403.36653568099973}
+```
+
+The performance of the benchmark is sensitive to the underlying cloud infrastructure --
+you might not match {ref}`the numbers quoted in the benchmark docs <xgboost-benchmark>`.
+
+#### Model parameters
+The file `model.json` in the Ray head pod contains the parameters for the trained model.
+Other result data will be available in the directory `ray_results` in the head pod.
+Refer to the {ref}`the XGBoostTrainer documentation <train-gbdt-guide>` for details.
+
+```{admonition} Scale-down
+If autoscaling is enabled, Ray worker pods will scale down after 60 seconds.
+After the Ray worker pods are gone, your Kubernetes infrastructure should scale down
+the nodes that hosted these pods.
+```
+
+#### Clean-up
+Delete your Ray cluster with the following command:
+```shell
+kubectl delete raycluster raycluster-xgboost-benchmark
+```
+If you're on a public cloud, don't forget to clean up the underlying
+node group and/or Kubernetes cluster.
+
+<!-- TODO: Fix this -->
+<!-- [ConfigLink]: https://raw.githubusercontent.com/ray-project/ray/291bba69fb90ee5e8401540ef55b7b74dd13f5c5/doc/source/cluster/ray-clusters-on-kubernetes/configs/xgboost-benchmark-autoscaler.yaml -->
+[ConfigLink]: https://github.com/ray-project/ray/tree/master/doc/source/cluster/
--- a/doc/source/cluster/kubernetes/getting-started.ipynb
+++ b/doc/source/cluster/kubernetes/getting-started.ipynb
@ -6,6 +6,9 @@
   "metadata": {},
   "source": [
    "(kuberay-quickstart)=\n",
+    "\n",
+    "[//]: <> (TODO: migrate this content away from ipynb)\n",
+    "\n",
    "# Getting Started\n",
    "\n",
    "In this guide, we show you how to manage and interact with Ray clusters on Kubernetes.\n",
@ -16,7 +19,7 @@
    "## Preparation\n",
    "\n",
    "### Install the latest Ray release\n",
-    "This step is needed to interact with remote Ray clusters using {ref}`Ray Job Submission <kuberay-job>` and {ref}`Ray Client <kuberay-client>`."
+    "This step is needed to interact with remote Ray clusters using {ref}`Ray Job Submission <kuberay-job>`."
   ]
  },
  {
@ -73,6 +76,7 @@
    "To run the example in this guide, make sure your Kubernetes cluster (or local Kind cluster) can accomodate\n",
    "additional resource requests of 3 CPU and 2Gi memory. \n",
    "\n",
+    "(kuberay-operator-deploy)=\n",
    "## Deploying the KubeRay operator\n",
    "\n",
    "Deploy the KubeRay Operator by cloning the KubeRay repo and applying the relevant configuration files from the master branch.  "
@ -85,8 +89,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# After the KubeRay 0.3.0 release branch cut, this documentation will be updated to refer to the 0.3.0 branch.\n",
-    "! git clone https://github.com/ray-project/kuberay\n",
+    "! git clone https://github.com/ray-project/kuberay -b release-0.3\n",
    "\n",
    "# This creates the KubeRay operator and all of the resources it needs.\n",
    "! kubectl create -k kuberay/ray-operator/config/default\n",
@ -225,12 +228,20 @@
    "! watch -n 1 kubectl get pod"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "4fab157b",
+   "metadata": {},
+   "source": [
+    "Note that in production scenarios, you will want to use larger Ray pods. In fact, it is advantageous to size each Ray pod to take up an entire Kubernetes node. See the [configuration guide](kuberay-config) for more details."
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "b63e1ab9",
   "metadata": {},
   "source": [
-    "## Interacting with a Ray Cluster\n",
+    "## Running Applications on a Ray Cluster\n",
    "\n",
    "Now, let's interact with the Ray cluster we've deployed.\n",
    "\n",
@ -257,9 +268,7 @@
   "id": "190b2163",
   "metadata": {},
   "source": [
-    "Now, we can run a Ray program on the head pod. The Ray program in the next cell asks the autoscaler to scale the cluster to a total of 3 CPUs. The head and worker in our example cluster each have a capacity of 1 CPU, so the request should trigger upscaling of an additional worker pod.\n",
-    "\n",
-    "Note that in real-life scenarios, you will want to use larger Ray pods. In fact, it is advantageous to size each Ray pod to take up an entire Kubernetes node. See the {ref}`configuration guide<kuberay-config>` for more details."
+    "Now, we can run a Ray program on the head pod. The Ray program in the next cell simply connects to the Ray Cluster, then exits."
   ]
  },
  {
@ -271,102 +280,34 @@
   "source": [
    "# Substitute your output from the last cell in place of \"raycluster-autoscaler-head-xxxxx\"\n",
    "\n",
-    "! kubectl exec raycluster-autoscaler-head-xxxxx -it -c ray-head -- python -c \"import ray; ray.init(); ray.autoscaler.sdk.request_resources(num_cpus=3)\""
+    "! kubectl exec raycluster-autoscaler-head-xxxxx -it -c ray-head -- python -c \"import ray; ray.init()\"\n",
+    "# 2022-08-10 11:23:17,093 INFO worker.py:1312 -- Connecting to existing Ray cluster at address: <IP address>:6379...\n",
+    "# 2022-08-10 11:23:17,097 INFO worker.py:1490 -- Connected to Ray cluster."
   ]
  },
  {
   "cell_type": "markdown",
-   "id": "b1d81b29",
+   "id": "fa9c6e9d",
   "metadata": {},
   "source": [
-    "### Autoscaling\n",
+    "While this can be useful for ad-hoc execution on the Ray Cluster, the recommended way to execute an application on a Ray Cluster is to use [Ray Jobs](jobs-quickstart).\n",
    "\n",
-    "The last command should have triggered Ray pod upscaling. To confirm the new worker pod is up, let's query the RayCluster's pods again:"
+    "(kuberay-job)=\n",
+    "### Ray Job submission\n",
+    "\n",
+    "To set up your Ray Cluster for Ray Jobs submission, we just need to make sure that the Ray Jobs port is visible to the client.\n",
+    "Ray listens for Job requests through the head pod's Dashboard server.\n",
+    "\n",
+    "First, we need to find the location of the Ray head node. The KubeRay operator configures a [Kubernetes service](https://kubernetes.io/docs/concepts/services-networking/service/) targeting the Ray head pod. This service allows us to interact with Ray clusters without directly executing commands in the Ray container. To identify the Ray head service for our example cluster, run:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": null,
-   "id": "37842ea9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "! kubectl get pod --selector=ray.io/cluster=raycluster-autoscaler\n",
-    "\n",
-    "# NAME                                             READY   STATUS    RESTARTS   AGE\n",
-    "# raycluster-autoscaler-head-xxxxx                 2/2     Running   0          XXs\n",
-    "# raycluster-autoscaler-worker-small-group-yyyyy   1/1     Running   0          XXs\n",
-    "# raycluster-autoscaler-worker-small-group-zzzzz   1/1     Running   0          XXs "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "222c64f4",
-   "metadata": {},
-   "source": [
-    "To get a summary of your cluster's status, run `ray status` on your cluster's Ray head node."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "6fa373aa",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Substitute your head pod's name in place of \"raycluster-autoscaler-head-xxxxx\"\n",
-    "! kubectl exec raycluster-autoscaler-head-xxxxx -it -c ray-head -- ray status\n",
-    "\n",
-    "# ======== Autoscaler status: 2022-07-21 xxxxxxxxxx ========\n",
-    "# ...."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b56e07a3",
-   "metadata": {},
-   "source": [
-    "Alternatively, to examine the full autoscaling logs, fetch the stdout of the Ray head pod's autoscaler sidecar:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4ec93304",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# This command gets the last 20 lines of autoscaler logs.\n",
-    "\n",
-    "# Substitute your head pod's name in place of \"raycluster-autoscaler-head-xxxxx\"\n",
-    "! kubectl logs raycluster-autoscaler-head-xxxxx -c autoscaler | tail -n 20\n",
-    "\n",
-    "# ======== Autoscaler status: 2022-07-21 xxxxxxxxxx ========\n",
-    "# ..."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "40d0a503",
-   "metadata": {},
-   "source": [
-    "### The Ray head service"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "c6e257f2",
-   "metadata": {},
-   "source": [
-    "The KubeRay operator configures a [Kubernetes service](https://kubernetes.io/docs/concepts/services-networking/service/) targeting the Ray head pod. This service allows us to interact with Ray clusters without directly executing commands in the Ray container. To identify the Ray head service for our example cluster, run"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
   "id": "d3dae5fd",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+   ],
   "source": [
    "! kubectl get service raycluster-autoscaler-head-svc\n",
    "\n",
@ -376,19 +317,10 @@
  },
  {
   "cell_type": "markdown",
-   "id": "e29c48ba",
+   "id": "57b3c759",
   "metadata": {},
   "source": [
-    "(kuberay-job)=\n",
-    "### Ray Job submission"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "243ad524",
-   "metadata": {},
-   "source": [
-    "Ray provides a [Job Submission API](https://docs.ray.io/en/master/cluster/job-submission.html#ray-job-submission) which can be used to submit Ray workloads to a remote Ray cluster. The Ray Job Submission server listens on the Ray head's Dashboard port, 8265 by default. Let's access the dashboard port via port-forwarding. \n",
+    "Now that we have the name of the service, we can use port-forwarding to access the Ray Dashboard port (8265 by default).\n",
    "\n",
    "Note: The following port-forwarding command is blocking. If you are following along from a Jupyter notebook, the command must be executed in a separate shell outside of the notebook."
   ]
@ -432,74 +364,10 @@
  },
  {
   "cell_type": "markdown",
-   "id": "c5d52948",
+   "id": "f8453b2a",
   "metadata": {},
   "source": [
-    "### Viewing the Ray Dashboard\n",
-    "\n",
-    "Assuming the port-forwarding process described above is still running, you may view the {ref}`ray-dashboard` by visiting `localhost:8265` in you browser.\n",
-    "\n",
-    "The dashboard port will not be used in the rest of this guide. You may stop the port-forwarding process if you wish.\n",
-    "\n",
-    "(kuberay-client)=\n",
-    "### Accessing the cluster using Ray Client\n",
-    "\n",
-    "[Ray Client](https://docs.ray.io/en/latest/cluster/ray-client.html) allows you to interact programatically with a remote Ray cluster using the core Ray APIs.\n",
-    "To try out Ray Client, first make sure your local Ray version and Python minor version match the versions used in your Ray cluster. The Ray cluster in our example is running Ray 2.0.0 and Python 3.7, so that's what we'll need locally. If you have a different local Python version and would like to avoid changing it, you can modify the images specified in the yaml file `ray-cluster.autoscaler.yaml`. For example, use `rayproject/ray:2.0.0-py38` for Python 3.8.\n",
-    "\n",
-    "After confirming the Ray and Python versions match up, the next step is to port-forward the Ray Client server port (10001 by default).\n",
-    "If you are following along in a Jupyter notebook, execute the following command in a separate shell."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d1f4154f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Execute this in a separate shell.\n",
-    "! kubectl port-forward service/raycluster-autoscaler-head-svc 10001:10001"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "6ab3bdfe",
-   "metadata": {},
-   "source": [
-    "Now that we have port-forwarding set up, we can connect to the Ray Client from a local Python shell as follows:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c30245c2",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import ray\n",
-    "import platform\n",
-    "\n",
-    "ray.init(\"ray://localhost:10001\")\n",
-    "\n",
-    "# The network name of the local machine.\n",
-    "local_host_name = platform.node()\n",
-    "\n",
-    "# This is a Ray task.\n",
-    "# The task will returns the name of the Ray pod that executes it.\n",
-    "@ray.remote\n",
-    "def get_host_name():\n",
-    "    return platform.node()\n",
-    "\n",
-    "# The task will be scheduled on the head node.\n",
-    "# Thus, this variable will hold the head pod's name.\n",
-    "remote_host_name = ray.get(get_host_name.remote())\n",
-    "\n",
-    "print(\"The local host name is {}\".format(local_host_name))\n",
-    "print(\"The Ray head pod's name is {}\".format(remote_host_name))\n",
-    "\n",
-    "# Disconnect from Ray.\n",
-    "ray.shutdown()"
+    "For a more detailed guide on using Ray Jobs to run applications on a Ray Cluster, check out the [quickstart guide](jobs-quickstart)"
   ]
  },
  {
@ -622,7 +490,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.7.13"
+   "version": "3.7.11"
  }
 },
 "nbformat": 4,
--- a/doc/source/cluster/kubernetes/images/AutoscalerOperator.svg
+++ b/doc/source/cluster/kubernetes/images/AutoscalerOperator.svg
--- a/doc/source/cluster/kubernetes/images/ray_on_kubernetes.png
+++ b/doc/source/cluster/kubernetes/images/ray_on_kubernetes.png
--- a/doc/source/cluster/kubernetes/index.md
+++ b/doc/source/cluster/kubernetes/index.md
@ -0,0 +1,90 @@
+# Ray on Kubernetes
+(kuberay-index)=
+## Overview
+
+In this section we cover how to execute your distributed Ray programs on a Kubernetes cluster.
+
+Using the [KubeRay Operator](https://ray-project.github.io/kuberay/components/operator/) is the
+recommended way to do so. The operator provides a Kubernetes-native way to manage Ray clusters.
+Each Ray cluster consists of a head node pod and a collection of worker node pods. Optional
+autoscaling support allows the KubeRay Operator to size your Ray clusters according to the
+requirements of your Ray workload, adding and removing Ray pods as needed. KubeRay supports
+heterogenous compute nodes (including GPUs) as well as running multiple Ray clusters with
+different Ray versions in the same Kubernetes cluster.
+
+```{eval-rst}
+.. image:: images/ray_on_kubernetes.png
+    :align: center
+..
+  Find source document here: https://docs.google.com/drawings/d/1E3FQgWWLuj8y2zPdKXjoWKrfwgYXw6RV_FWRwK8dVlg/edit
+```
+
+
+Concretely, you will learn how to:
+
+- Set up and configure Ray on a Kubernetes cluster
+- Deploy and monitor Ray applications
+- Integrate Ray applications with Kubernetes networking
+
+## Learn More
+
+The Ray docs present all the information you need to start running Ray workloads on Kubernetes.
+
+```{eval-rst}
+.. panels::
+    :container: text-center
+    :column: col-lg-12 p-2
+    :card:
+
+    **Getting Started**
+    ^^^
+
+    Learn how to start a Ray cluster and deploy Ray applications on Kubernetes.
+
+    +++
+    .. link-button:: kuberay-quickstart
+        :type: ref
+        :text: Get Started with Ray on Kubernetes
+        :classes: btn-outline-info btn-block
+    ---
+    **Examples**
+    ^^^
+
+    Try example Ray workloads on Kubernetes.
+
+    +++
+    .. link-button:: kuberay-examples
+        :type: ref
+        :text: Try example workloads
+        :classes: btn-outline-info btn-block
+    ---
+    **User Guides**
+    ^^^
+
+    Learn best practices for configuring Ray clusters on Kubernetes.
+
+    +++
+    .. link-button:: kuberay-guides
+        :type: ref
+        :text: Read the User Guides
+        :classes: btn-outline-info btn-block
+    ---
+    **API Reference**
+    ^^^
+
+    Find API references on RayCluster configuration.
+
+    +++
+    .. link-button:: kuberay-api-reference
+        :type: ref
+        :text: Check API references
+        :classes: btn-outline-info btn-block
+```
+## About KubeRay
+
+Ray's Kubernetes support is developed at the [KubeRay GitHub repository](https://github.com/ray-project/kuberay), under the broader [Ray project](https://github.com/ray-project/). KubeRay is used by several companies to run production Ray deployments.
+
+- Visit the [KubeRay GitHub repo](https://github.com/ray-project/kuberay) to track progress, report bugs, propose new features, or contribute to
+the project.
+- Check out the [KubeRay docs](https://ray-project.github.io/kuberay/) for further technical information, developer guides,
+and discussion of new and upcoming features.
--- a/doc/source/cluster/kubernetes/references.md
+++ b/doc/source/cluster/kubernetes/references.md
@ -0,0 +1,10 @@
+(kuberay-api-reference)=
+# API Reference
+
+To learn about RayCluster configuration, we recommend taking a look at
+the {ref}`configuration guide <kuberay-config>`.
+
+For comprehensive coverage of all supported RayCluster fields,
+refer to the [Golang structs][RayClusterDef] used to generate the RayCluster CRD.
+
+[RayClusterDef]: https://github.com/ray-project/kuberay/blob/release-0.3/ray-operator/apis/ray/v1alpha1/raycluster_types.go#L12
--- a/doc/source/cluster/kubernetes/user-guides.md
+++ b/doc/source/cluster/kubernetes/user-guides.md
@ -0,0 +1,15 @@
+(kuberay-guides)=
+# User Guides
+
+:::{note}
+To learn the basics of Ray on Kubernetes, we recommend taking a look
+at the {ref}`introductory guide <kuberay-quickstart>` first.
+:::
+
+In these guides, we go into further depth on several topics related to
+deployments of Ray on Kubernetes.
+* {ref}`kuberay-k8s-setup`
+* {ref}`kuberay-config`
+* {ref}`kuberay-autoscaling`
+* {ref}`kuberay-gpu`
+* {ref}`kuberay-logging`
--- a/doc/source/cluster/kubernetes/user-guides/config.md
+++ b/doc/source/cluster/kubernetes/user-guides/config.md
@ -0,0 +1,348 @@
+(kuberay-config)=
+
+# RayCluster Configuration
+
+This guide covers the key aspects of Ray cluster configuration on Kubernetes.
+
+## Introduction
+
+Deployments of Ray on Kubernetes follow the [operator pattern](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/). The key players are
+- A [custom resource](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/)
+    called a `RayCluster` describing the desired state of a Ray cluster.
+- A [custom controller](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/#custom-controllers),
+    the KubeRay operator, which manages Ray pods in order to match the `RayCluster`'s spec.
+
+To deploy a Ray cluster, one creates a `RayCluster` custom resource (CR):
+```shell
+kubectl apply -f raycluster.yaml
+```
+
+This guide covers the salient features of `RayCluster` CR configuration.
+
+For reference, here is a condensed example of a `RayCluster` CR in yaml format.
+```yaml
+apiVersion: ray.io/v1alpha1
+kind: RayCluster
+metadata:
+  name: raycluster-complete
+spec:
+  rayVersion: "2.0.0"
+  enableInTreeAutoscaling: true
+  autoscalerOptions:
+     ...
+  headGroupSpec:
+    serviceType: ClusterIP # Options are ClusterIP, NodePort, and LoadBalancer
+    enableIngress: false # Optional
+    rayStartParams:
+      block: true
+      dashboard-host: "0.0.0.0"
+      ...
+    template: # Pod template
+        metadata: # Pod metadata
+        spec: # Pod spec
+            containers:
+            - name: ray-head
+              image: rayproject/ray-ml:2.0.0
+              resources:
+                limits:
+                  cpu: 14
+                  memory: 54Gi
+                requests:
+                  cpu: 14
+                  memory: 54Gi
+              # Keep this preStop hook in each Ray container config.
+              lifecycle:
+                preStop:
+                  exec:
+                    command: ["/bin/sh","-c","ray stop"]
+              ports: # Optional service port overrides
+              - containerPort: 6379
+                name: gcs
+              - containerPort: 8265
+                name: dashboard
+              - containerPort: 10001
+                name: client
+              - containerPort: 8000
+                name: serve
+                ...
+  workerGroupSpecs:
+  - groupName: small-group
+    replicas: 1
+    minReplicas: 1
+    maxReplicas: 5
+    rayStartParams:
+        ...
+    template: # Pod template
+      spec:
+        # Keep this initContainer in each workerGroup template.
+        initContainers:
+        - name: init-myservice
+          image: busybox:1.28
+          command: ['sh', '-c', "until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; do echo waiting for myservice; sleep 2; done"]
+        ...
+  # Another workerGroup
+  - groupName: medium-group
+    ...
+  # Yet another workerGroup, with access to special hardware perhaps.
+  - groupName: gpu-group
+    ...
+```
+
+The rest of this guide will discuss the `RayCluster` CR's config fields.
+See also the [guide](kuberay-autoscaling-config) on configuring Ray autoscaling with KubeRay.
+
+(kuberay-config-ray-version)=
+## The Ray Version
+The field `rayVersion` specifies the version of Ray used in the Ray cluster.
+The `rayVersion` is used to fill default values for certain config fields.
+The Ray container images specified in the RayCluster CR should carry
+the same Ray version as the CR's `rayVersion`. If you are using a nightly or development
+Ray image, it is fine to set `rayVersion` to the latest release version of Ray.
+
+## Pod configuration: headGroupSpec and workerGroupSpecs
+
+At a high level, a RayCluster is a collection of Kubernetes pods, similar to a Kubernetes Deployment or StatefulSet.
+Just as with the Kubernetes built-ins, the key pieces of configuration are
+* Pod specification
+* Scale information (how many pods are desired)
+
+The key difference between a Deployment and a `RayCluster` is that a `RayCluster` is
+specialized for running Ray applications. A Ray cluster consists of
+
+* One **head pod** which hosts global control processes for the Ray cluster.
+  The head pod can also run Ray tasks and actors.
+* Any number of **worker pods**, which run Ray tasks and actors.
+  Workers come in **worker groups** of identically configured pods.
+  For each worker group, we must specify **replicas**, the number of
+  pods we want of that group.
+
+The head pod’s configuration is
+specified under `headGroupSpec`, while configuration for worker pods is
+specified under `workerGroupSpecs`. There may be multiple worker groups,
+each group with its own configuration. The `replicas` field
+of a `workerGroupSpec` specifies the number of worker pods of that group to
+keep in the cluster.
+
+### Pod templates
+The bulk of the configuration for a `headGroupSpec` or
+`workerGroupSpec` goes in the `template` field. The `template` is a Kubernetes Pod
+template which determines the configuration for the pods in the group.
+Here are some of the subfields of the pod `template` to pay attention to:
+
+
+#### resources
+It’s important to specify container CPU and memory requests and limits for
+each group spec. For GPU workloads, you may also wish to specify GPU
+limits. For example, set `nvidia.com/gpu:2` if using an Nvidia GPU device plugin
+and you wish to specify a pod with access to 2 GPUs.
+See {ref}`this guide <kuberay-gpu>` for more details on GPU support.
+
+It's ideal to size each Ray pod to take up the
+entire Kubernetes node on which it is scheduled. In other words, it’s
+best to run one large Ray pod per Kubernetes node.
+In general, it is more efficient to use a few large Ray pods than many small ones.
+The pattern of fewer large Ray pods has the following advantages:
+- more efficient use of each Ray pod's shared memory object store
+- reduced communication overhead between Ray pods
+- reduced redundancy of per-pod Ray control structures such as Raylets
+
+The CPU, GPU, and memory **limits** specified in the Ray container config
+will be automatically advertised to Ray. These values will be used as
+the logical resource capacities of Ray pods in the head or worker group.
+Note that CPU quantities will be rounded up to the nearest integer
+before being relayed to Ray.
+The resource capacities advertised to Ray may be overridden in the {ref}`rayStartParams`.
+
+On the other hand CPU, GPU, and memory **requests** will be ignored by Ray.
+For this reason, it is best when possible to set resource requests equal to resource limits.
+
+#### nodeSelector and tolerations
+You can control the scheduling of worker groups' Ray pods by setting the `nodeSelector` and
+`tolerations` fields of the pod spec. Specifically, these fields determine on which Kubernetes
+nodes the pods may be scheduled.
+See the [Kubernetes docs](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/)
+for more about Pod-to-Node assignment.
+
+#### image
+The Ray container images specified in the `RayCluster` CR should carry
+the same Ray version as the CR's `spec.rayVersion`.
+If you are using a nightly or development Ray image, it is fine to specify Ray's
+latest release version under `spec.rayVersion`.
+
+Code dependencies for a given Ray task or actor must be installed on each Ray node that
+might run the task or actor.
+To achieve this, it is simplest to use the same Ray image for the Ray head and all worker groups.
+In any case, do make sure that all Ray images in your CR carry the same Ray version and
+Python version.
+To distribute custom code dependencies across your cluster, you can build a custom container image,
+using one of the [official Ray images](https://hub.docker.com/r/rayproject/ray>) as the base.
+See {ref}`this guide <docker-images>` to learn more about the official Ray images.
+For dynamic dependency management geared towards iteration and developement,
+you can also use {ref}`Runtime Environments <runtime-environments>`.
+
+(rayStartParams)=
+## Ray Start Parameters
+The ``rayStartParams`` field of each group spec is a string-string map of arguments to the Ray
+container’s `ray start` entrypoint. For the full list of arguments, refer to
+the documentation for {ref}`ray start <ray-start-doc>`. We make special note of the following arguments:
+
+### block
+For most use-cases, this field should be set to "true" for all Ray pod. The container's Ray
+entrypoint will then block forever until a Ray process exits, at which point the container
+will exit. If this field is omitted, `ray start` will start Ray processes in the background and the container
+will subsequently sleep forever until terminated. (Future versions of KubeRay may set
+block to true by default. See [KubeRay issue #368](https://github.com/ray-project/kuberay/issues/368).)
+
+### dashboard-host
+For most use-cases, this field should be set to "0.0.0.0" for the Ray head pod.
+This is required to expose the Ray dashboard outside the Ray cluster. (Future versions might set
+this parameter by default.)
+
+### num-cpus
+This optional field tells the Ray scheduler and autoscaler how many CPUs are
+available to the Ray pod. The CPU count can be autodetected from the
+Kubernetes resource limits specified in the group spec’s pod
+`template`. However, it is sometimes useful to override this autodetected
+value. For example, setting `num-cpus:"0"` for the Ray head pod will prevent Ray
+workloads with non-zero CPU requirements from being scheduled on the head.
+Note that the values of all Ray start parameters, including `num-cpus`,
+must be supplied as **strings**.
+
+### num-gpus
+This optional field specifies the number of GPUs available to the Ray container.
+In KubeRay versions since 0.3.0, the number of GPUs can be auto-detected from Ray container resource limits.
+For certain advanced use-cases, you may wish to use `num-gpus` to set an {ref}`override <kuberay-gpu-override>`.
+Note that the values of all Ray start parameters, including `num-gpus`,
+must be supplied as **strings**.
+
+### memory
+The memory available to the Ray is detected automatically from the Kubernetes resource
+limits. If you wish, you may override this autodetected value by setting the desired memory value,
+in bytes, under `rayStartParams.memory`.
+Note that the values of all Ray start parameters, including `memory`,
+must be supplied as **strings**.
+
+### resources
+This field can be used to specify custom resource capacities for the Ray pod.
+These resource capacities will be advertised to the Ray scheduler and Ray autoscaler.
+For example, the following annotation will mark a Ray pod as having 1 unit of `Custom1` capacity
+and 5 units of `Custom2` capacity.
+```yaml
+rayStartParams:
+    resources: '"{\"Custom1\": 1, \"Custom2\": 5}"'
+```
+You can then annotate tasks and actors with annotations like `@ray.remote(resources={"Custom2": 1})`.
+The Ray scheduler and autoscaler will take appropriate action to schedule such tasks.
+
+Note the format used to express the resources string. In particular, note
+that the backslashes are present as actual characters in the string.
+If you are specifying a `RayCluster` programmatically, you may have to
+[escape the backslashes](https://github.com/ray-project/ray/blob/cd9cabcadf1607bcda1512d647d382728055e688/python/ray/tests/kuberay/test_autoscaling_e2e.py#L92) to make sure they are processed as part of the string.
+
+The field `rayStartParams.resources` should only be used for custom resources. The keys
+`CPU`, `GPU`, and `memory` are forbidden. If you need to specify overrides for those resource
+fields, use the Ray start parameters `num-cpus`, `num-gpus`, or `memory`.
+
+(kuberay-networking)=
+## Services and Networking
+### The Ray head service.
+The KubeRay operator automatically configures a Kubernetes Service exposing the default ports
+for several services of the Ray head pod, including
+- Ray Client (default port 10001)
+- Ray Dashboard (default port 8265)
+- Ray GCS server (default port 6379)
+- Ray Serve (default port 8000)
+
+The name of the configured Kubernetes Service is the name, `metadata.name`, of the RayCluster
+followed by the suffix <nobr>`head-svc`</nobr>. For the example CR given on this page, the name of
+the head service will be
+<nobr>`raycluster-example-head-svc`</nobr>. Kubernetes networking (`kube-dns`) then allows us to address
+the Ray head's services using the name <nobr>`raycluster-example-head-svc`</nobr>.
+For example, the Ray Client server can be accessed from a pod
+in the same Kubernetes namespace using
+```python
+ray.init("ray://raycluster-example-head-svc:10001")
+```
+The Ray Client server can be accessed from a pod in another namespace using
+```python
+ray.init("ray://raycluster-example-head-svc.default.svc.cluster.local:10001")
+```
+(This assumes the Ray cluster was deployed into the default Kuberentes namespace.
+If the Ray cluster is deployed in a non-default namespace, use that namespace in
+place of `default`.)
+
+### ServiceType, Ingresses
+Ray Client and other services can be exposed outside the Kubernetes cluster
+using port-forwarding or an ingress.
+The simplest way to access the Ray head's services is to use port-forwarding.
+
+Other means of exposing the head's services outside the cluster may require using
+a service of type LoadBalancer or NodePort. Set `headGroupSpec.serviceType`
+to the appropriate type for your application.
+
+You may wish to set up an ingress to expose the Ray head's services outside the cluster.
+If you set the optional boolean field `headGroupSpec.enableIngress` to `true`,
+the KubeRay operator will create an ingress for your Ray cluster. See the [KubeRay documentation][IngressDoc]
+for details. However, it is up to you to set up an ingress controller.
+Moreover, the ingress created by the KubeRay operator [might not be compatible][IngressIssue] with your network setup.
+It is valid to omit the `headGroupSpec.enableIngress` field and configure an ingress object yourself.
+
+
+### Specifying non-default ports.
+If you wish to override the ports exposed by the Ray head service, you may do so by specifying
+the Ray head container's `ports` list, under `headGroupSpec`.
+Here is an example of a list of non-default ports for the Ray head service.
+```yaml
+ports:
+- containerPort: 6380
+  name: gcs
+- containerPort: 8266
+  name: dashboard
+- containerPort: 10002
+  name: client
+```
+If the head container's `ports` list is specified, the Ray head service will expose precisely
+the ports in the list. In the above example, the head service will expose just three ports;
+in particular there will be no port exposed for Ray Serve.
+
+For the Ray head to actually use the non-default ports specified in the ports list,
+you must also specify the relevant `rayStartParams`. For the above example,
+```yaml
+rayStartParams:
+  port: "6380"
+  dashboard-port: "8266"
+  ray-client-server-port: "10002"
+  ...
+```
+(kuberay-config-miscellaneous)=
+## Pod and container lifecyle: preStop hooks and initContainers
+There are two pieces of pod configuration that should always be included
+in the RayCluster CR. Future versions of KubeRay may configure these elements automatically.
+
+## initContainer
+It is required for the configuration of each `workerGroupSpec`'s pod template to include
+the following block:
+```yaml
+initContainers:
+- name: init-myservice
+  image: busybox:1.28
+  command: ['sh', '-c', "until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; do echo waiting for myservice; sleep 2; done"]
+```
+This instructs the worker pod to wait for creation of the Ray head service. The worker's `ray start`
+command will use this service to connect to the Ray head.
+
+(It is not required to include this init container in the Ray head pod's configuration.)
+
+## preStopHook
+It is recommended for every Ray container's configuration
+to include the following blocking block:
+```yaml
+lifecycle:
+  preStop:
+    exec:
+      command: ["/bin/sh","-c","ray stop"]
+```
+To ensure graceful termination, `ray stop` is executed prior to the Ray pod's termination.
+
+[IngressDoc]: https://ray-project.github.io/kuberay/guidance/ingress/
+[IngressIssue]: https://github.com/ray-project/kuberay/issues/441
--- a/doc/source/cluster/kubernetes/user-guides/configuring-autoscaling.md
+++ b/doc/source/cluster/kubernetes/user-guides/configuring-autoscaling.md
@ -0,0 +1,231 @@
+(kuberay-autoscaling)=
+
+# KubeRay Autoscaling
+
+This guide explains how to configure the Ray autoscaler on Kubernetes.
+The Ray autoscaler is a Ray cluster process that automatically scales a cluster up and down based on resource demand.
+The autoscaler does this by adjusting the number of nodes (Ray pods) in the cluster based on the resources required by tasks, actors or placement groups.
+
+Note that the autoscaler only considers logical resource requests for scaling (i.e., those specified in ``@ray.remote`` and displayed in `ray status`), not physical machine utilization. If a user tries to launch an actor, task, or placement group but there are insufficient resources, the request will be queued. The autoscaler adds nodes to satisfy resource demands in this queue.
+The autoscaler also removes nodes after they become idle for some time.
+A node is considered idle if it has no active tasks, actors, or objects.
+
+<!-- TODO(ekl): probably should change the default kuberay examples to not use autoscaling -->
+```{admonition} When to use Autoscaling?
+Autoscaling can reduce workload costs, but adds node launch overheads and can be tricky to configure.
+We recommend starting with non-autoscaling clusters if you're new to Ray.
+```
+
+## Overview
+The following diagram illustrates the integration of the Ray Autoscaler
+with the KubeRay operator.
+
+```{eval-rst}
+.. image:: ../images/AutoscalerOperator.svg
+    :align: center
+..
+    Find the source document here (https://docs.google.com/drawings/d/1LdOg9JQuN5AOII-vDpSaFBsTeg0JGWcsbyNNLP1yovg/edit)
+```
+
+Worker pod upscaling occurs through the following sequence of events:
+1. The user submits a Ray workload.
+2. Workload resource requirements are aggregated by the Ray head container
+   and communicated to the Ray autoscaler sidecar.
+3. The autoscaler determines that a Ray worker pod must be added to satisfy the workload's resource requirement.
+4. The autoscaler requests an addtional worker pod by incrementing the RayCluster CR's `replicas` field.
+5. The KubeRay operator creates a Ray worker pod to match the new `replicas` specification.
+6. The Ray scheduler places the user's workload on the new worker pod.
+
+See also the operator architecture diagram in the [KubeRay documentation](https://ray-project.github.io/kuberay/components/operator/).
+
+## Quickstart
+
+First, follow the [quickstart guide](kuberay-quickstart) to create an autoscaling cluster. The commands to create the KubeRay operator and deploy an autoscaling cluster are summarized here:
+
+```bash
+# Optionally use kind to run the examples locally.
+# kind create cluster
+
+$ git clone https://github.com/ray-project/kuberay -b release-0.3
+# Create the KubeRay operator.
+$ kubectl create -k kuberay/ray-operator/config/default
+# Create an autoscaling Ray cluster.
+$ kubectl apply -f kuberay/ray-operator/config/samples/ray-cluster.autoscaler.yaml
+```
+
+Now, we can run a Ray program on the head pod that uses [``request_resources``](ref-autoscaler-sdk) to scale the cluster to a total of 3 CPUs. The head and worker pods in our [example cluster config](https://github.com/ray-project/kuberay/blob/master/ray-operator/config/samples/ray-cluster.autoscaler.yaml) each have a capacity of 1 CPU, and we specified a minimum of 1 worker pod. Thus, the request should trigger upscaling of one additional worker pod.
+
+Note that in real-life scenarios, you will want to use larger Ray pods. In fact, it is advantageous to size each Ray pod to take up an entire Kubernetes node. See the [configuration guide](kuberay-config) for more details.
+
+To run the Ray program, we will first get the name of the Ray head pod:
+
+```bash
+$ kubectl get pods --selector=ray.io/cluster=raycluster-autoscaler --selector=ray.io/node-type=head -o custom-columns=POD:metadata.name --no-headers
+# raycluster-autoscaler-head-xxxxx
+```
+
+Then, we can run the Ray program using ``kubectl exec``:
+```bash
+$ kubectl exec raycluster-autoscaler-head-xxxxx -it -c ray-head -- python -c \"import ray; ray.init(); ray.autoscaler.sdk.request_resources(num_cpus=3)
+```
+
+The last command should have triggered Ray pod upscaling. To confirm the new worker pod is up, let's query the RayCluster's pods again:
+
+```bash
+$ kubectl get pod --selector=ray.io/cluster=raycluster-autoscaler
+# NAME                                             READY   STATUS    RESTARTS   AGE
+# raycluster-autoscaler-head-xxxxx                 2/2     Running   0          XXs
+# raycluster-autoscaler-worker-small-group-yyyyy   1/1     Running   0          XXs
+# raycluster-autoscaler-worker-small-group-zzzzz   1/1     Running   0          XXs 
+```
+
+To get a summary of your cluster's status, run `ray status` on your cluster's Ray head node.
+```bash
+# Substitute your head pod's name in place of \"raycluster-autoscaler-head-xxxxx
+$ kubectl exec raycluster-autoscaler-head-xxxxx -it -c ray-head -- ray status
+# ======== Autoscaler status: 2022-07-21 xxxxxxxxxx ========
+# ....
+```
+
+Alternatively, to examine the full autoscaling logs, fetch the stdout of the Ray head pod's autoscaler sidecar:
+```bash
+# This command gets the last 20 lines of autoscaler logs.
+# Substitute your head pod's name in place of \"raycluster-autoscaler-head-xxxxx
+$ kubectl logs raycluster-autoscaler-head-xxxxx -c autoscaler | tail -n 20
+# ======== Autoscaler status: 2022-07-21 xxxxxxxxxx ========
+# ...
+```
+
+(kuberay-autoscaling-config)=
+## KubeRay Config Parameters
+
+There are two steps to enabling Ray autoscaling in the KubeRay `RayCluster` custom resource (CR) config:
+
+1. Set `enableInTreeAutoscaling:true`. The KubeRay operator will then automatically configure an autoscaling sidecar container
+for the Ray head pod. The autoscaler container collects resource metrics from the Ray cluster
+and automatically adjusts the `replicas` field of each `workerGroupSpec` as needed to fulfill
+the requirements of your Ray application.
+
+2. Set the fields `minReplicas` and `maxReplicas` to constrain the number of `replicas` of an autoscaling
+`workerGroup`. When deploying an autoscaling cluster, one typically sets `replicas` and `minReplicas`
+to the same value.
+The Ray autoscaler will then take over and modify the `replicas` field as pods are added to or removed from the cluster.
+
+For an example, check out the [config file](https://github.com/ray-project/kuberay/blob/master/ray-operator/config/samples/ray-cluster.autoscaler.yaml) that we used in the above quickstart guide.
+
+### Upscaling and downscaling speed
+
+If needed, you can also control the rate at which nodes should be added to or removed from the cluster. For applications with many short-lived tasks, you may wish to adjust the upscaling and downscaling speed to be more conservative.
+
+Use the `RayCluster` CR's `autoscalerOptions` field to do so. The `autoscalerOptions` field
+carries the following subfields:
+
+**`upscalingMode`**: This controls the rate of Ray pod upscaling. The valid values are:
+    - `Conservative`: Upscaling is rate-limited; the number of pending worker pods is at most the number
+      of worker pods connected to the Ray cluster.
+    - `Default`: Upscaling is not rate-limited.
+    - `Aggressive`: An alias for Default; upscaling is not rate-limited.
+
+**`idleTimeoutSeconds`** (default 60s): This is the number of seconds to wait before scaling down an idle worker pod. Worker nodes are considered idle when they hold no active tasks, actors, or referenced objects (either in-memory or spilled to disk).
+
+### Configuring the autoscaler sidecar container
+
+The `autoscalerOptions` field also provides options for configuring the autoscaler container. Usually, it is not necessary to specify these options.
+
+**`resources`**: The `resources` subfield of `autoscalerOptions` sets optional resource overrides
+for the autoscaler sidecar container. These overrides
+should be specified in the standard [container resource
+spec format](https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#resources).
+The default values are indicated below:
+```
+resources:
+  limits:
+    cpu: "500m"
+    memory: "512Mi"
+  requests:
+    cpu: "500m"
+    memory: "512Mi"
+```
+
+The following `autoscalerOptions` suboptions are also available for testing and development of the autoscaler itself.
+
+**`image`**: This field overrides the autoscaler container image.
+If your `RayCluster`'s `spec.RayVersion` is at least `2.0.0`, the autoscaler will default to using
+**the same image** as the Ray container. (Ray autoscaler code is bundled with the rest of Ray.)
+For older Ray versions, the autoscaler will default to the image `rayproject/ray:2.0.0`.
+
+**`imagePullPolicy`**: This field overrides the autoscaler container's
+image pull policy. The default is `Always`.
+
+**`env`** and **`envFrom`**: These fields specify autoscaler container
+environment variables. These fields should be formatted following the
+[Kuberentes API](https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#environment-variables)
+for container environment variables.
+
+## Understanding the Ray Autoscaler in the Context of Kubernetes
+We describe the relationship between the Ray autoscaler and other autoscalers in the Kubernetes
+ecosystem.
+
+### Ray Autoscaler vs. Horizontal Pod Autoscaler
+The Ray autoscaler adjusts the number of Ray nodes in a Ray cluster.
+On Kubernetes, each Ray node is run as a Kubernetes pod. Thus in the context of Kubernetes,
+the Ray autoscaler scales Ray **pod quantities**. In this sense, the Ray autoscaler
+plays a role similar to that of the Kubernetes
+[Horizontal Pod Autoscaler](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/) (HPA).
+However, the following features distinguish the Ray Autoscaler from the HPA.
+#### Load metrics are based on application semantics
+The Horizontal Pod Autoscaler determines scale based on physical usage metrics like CPU
+and memory. By contrast, the Ray autoscaler uses the logical resources expressed in
+task and actor annotations. For instance, if each Ray container spec in your RayCluster CR indicates
+a limit of 10 CPUs, and you submit twenty tasks annotated with `@ray.remote(num_cpus=5)`,
+10 Ray pods will be created to satisfy the 100-CPU resource demand.
+In this respect, the Ray autoscaler is similar to the
+[Kuberentes Cluster Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler),
+which makes scaling decisions based on the logical resources expressed in container
+resource requests.
+#### Fine-grained control of scale-down
+To accommodate the statefulness of Ray applications, the Ray autoscaler has more
+fine-grained control over scale-down than the Horizontal Pod Autoscaler. In addition to
+determining desired scale, the Ray Autoscaler is able to select precisely which pods
+to scale down. The KubeRay operator then deletes that pod.
+By contrast, the Horizontal Pod Autoscaler can only decrease a replica count, without much
+control over which pods are deleted. For a Ray application, downscaling a random
+pod could be dangerous.
+#### Architecture: One Ray Autoscaler per Ray Cluster.
+Horizontal Pod Autoscaling is centrally controlled by a manager in the Kubernetes control plane;
+the manager controls the scale of many Kubernetes objects.
+By contrast, each Ray cluster is managed by its own Ray autoscaler process,
+running as a sidecar container in the Ray head pod. This design choice is motivated
+by the following considerations:
+- **Scalability.** Autoscaling each Ray cluster requires processing a significant volume of resource
+  data from that Ray cluster.
+- **Simplified versioning and compatibility.** The autoscaler and Ray are both developed
+  as part of the Ray repository. The interface between the autoscaler and the Ray core is complex.
+  To support multiple Ray clusters running at different Ray versions, it is thus best to match
+  Ray and Autoscaler code versions. Running one autoscaler per Ray cluster and matching the code versions
+  ensures compatibility.
+
+### Ray Autoscaler with Kubernetes Cluster Autoscaler
+The Ray Autoscaler and the
+[Kubernetes Cluster Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler)
+complement each other.
+After the Ray autoscaler decides to create a Ray pod, the Kubernetes Cluster Autoscaler
+can provision a Kubernetes node so that the pod can be placed.
+Similarly, after the Ray autoscaler decides to delete an idle pod, the Kubernetes
+Cluster Autoscaler can clean up the idle Kubernetes node that remains.
+It is recommended to configure your RayCluster so that only one Ray pod fits per Kubernetes node.
+If you follow this pattern, Ray Autoscaler pod scaling events will correspond roughly one-to-one with cluster autoscaler
+node scaling events. (We say "roughly" because it is possible for a Ray pod be deleted and replaced
+with a new Ray pod before the underlying Kubernetes node is scaled down.)
+
+
+### Vertical Pod Autoscaler
+There is no relationship between the Ray Autoscaler and the Kubernetes
+[Vertical Pod Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler) (VPA),
+which is meant to size individual pods to the appropriate size based on current and past usage.
+If you find that the load on your individual Ray pods is too high, there are a number
+of manual techniques to decrease the load
+One method is to schedule fewer tasks/actors per node by increasing the resource
+requirements specified in the `ray.remote` annotation.
+For example, changing `@ray.remote(num_cpus=2)` to `@ray.remote(num_cpus=4)`
+will halve the quantity of that task or actor that can fit in a given Ray pod.
--- a/doc/source/cluster/kubernetes/user-guides/gpu.rst
+++ b/doc/source/cluster/kubernetes/user-guides/gpu.rst
@ -0,0 +1,217 @@
+.. _kuberay-gpu:
+
+Using GPUs
+==========
+This document provides tips on GPU usage with Ray on Kubernetes.
+
+To use GPUs on Kubernetes, you will need to configure both your Kubernetes setup and add additional values to your Ray cluster configuration.
+
+To learn about GPU usage on different clouds, see instructions for `GKE`_, for `EKS`_, and for `AKS`_.
+
+Dependencies for GPU-based machine learning
+___________________________________________
+The `Ray Docker Hub <https://hub.docker.com/r/rayproject/>`_ hosts CUDA-based container images packaged
+with Ray and certain machine learning libraries.
+For example, the image ``rayproject/ray-ml:2.0.0-gpu`` is ideal for running GPU-based ML workloads with Ray 2.0.0.
+The Ray ML images are packaged with dependencies (such as TensorFlow and PyTorch) needed to use the :ref:`Ray AI Runtime <air>`
+and the Ray Libraries covered in these docs.
+To add custom dependencies, we recommend one, or both, of the following methods:
+
+* Building a docker image using one of the official :ref:`Ray docker images <docker-images>` as base.
+* Using :ref:`Ray Runtime environments <runtime-environments>`.
+
+
+Configuring Ray pods for GPU usage
+__________________________________
+
+Using Nvidia GPUs requires specifying `nvidia.com/gpu` resource `limits` in the container fields of your `RayCluster`'s
+`headGroupSpec` and/or `workerGroupSpecs`.
+(Kubernetes `automatically sets <https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/#using-device-plugins>`_
+the GPU request equal to the limit. However, you might want to specify requests for purposes of documentation.)
+
+Here is a config snippet for a RayCluster workerGroup of up
+to 5 GPU workers.
+
+.. code-block:: yaml
+
+   groupName: gpu-group
+   replicas: 0
+   minReplicas: 0
+   maxReplicas: 5
+   ...
+   template:
+       spec:
+        ...
+        containers:
+         - name: ray-node
+           image: rayproject/ray-ml:2.0.0-gpu
+           ...
+           resources:
+            cpu: 3
+            memory: 50Gi
+            nvidia.com/gpu: 1 # Optional, included just for documentation.
+           limits:
+            cpu: 3
+            memory: 50Gi
+            nvidia.com/gpu: 1 # Required to use GPU.
+            ...
+
+Each of the Ray pods in the group can be scheduled on an AWS `p2.xlarge` instance (1 GPU, 4vCPU, 61Gi RAM).
+
+.. tip::
+
+    GPU instances are expensive -- consider setting up autoscaling for your GPU Ray workers,
+    as demonstrated with the `minReplicas:0` and `maxReplicas:5` settings above.
+    To enable autoscaling, remember also to set `enableInTreeAutoscaling:True` in your RayCluster's `spec`
+    Finally, make sure your group or pool of GPU Kubernetes nodes are configured to autoscale.
+    Refer to your :ref:`cloud provider's documentation <kuberay-k8s-setup>` for details on autoscaling node pools.
+
+GPUs and Ray
+____________
+
+This section discuss GPU usage for Ray applications running on Kubernetes.
+For general guidance on GPU usage with Ray, see also :ref:`gpu-support`.
+
+The KubeRay operator advertises container GPU resource limits to
+the Ray scheduler and the Ray autoscaler. In particular, the Ray container's
+`ray start` entrypoint will be automatically configured with the appropriate `--num-gpus` option.
+
+GPU workload scheduling
+~~~~~~~~~~~~~~~~~~~~~~~
+After a Ray pod with access to GPU is deployed, it will
+be able to execute tasks and actors annotated with gpu requests.
+For example, the decorator `@ray.remote(num_gpus=1)` annotates a task or actor
+requiring 1 GPU.
+
+
+GPU autoscaling
+~~~~~~~~~~~~~~~
+The Ray autoscaler is aware of each Ray worker group's GPU capacity.
+Say we have a RayCluster configured as in the config snippet above:
+
+- There is a worker group of Ray pods with 1 unit of GPU capacity each.
+- The Ray cluster does not currently have any workers from that group.
+- `maxReplicas` for the group is at least 2.
+
+Then the following Ray program will trigger upscaling of 2 GPU workers.
+
+.. code-block:: python
+
+    import ray
+
+    ray.init()
+
+    @ray.remote(num_gpus=1)
+    class GPUActor:
+        def say_hello(self):
+            print("I live in a pod with GPU access.")
+
+    # Request actor placement.
+    gpu_actors = [GPUActor.remote() for _ in range(2)]
+    # The following command will block until two Ray pods with GPU access are scaled
+    # up and the actors are placed.
+    ray.get([actor.say_hello.remote() for actor in gpu_actors])
+
+After the program exits, the actors will be garbage collected.
+The GPU worker pods will be scaled down after the idle timeout (60 seconds by default).
+If the GPU worker pods were running on an autoscaling pool of Kubernetes nodes, the Kubernetes
+nodes will be scaled down as well.
+
+Requesting GPUs
+~~~~~~~~~~~~~~~
+You can also make a :ref:`direct request to the autoscaler <ref-autoscaler-sdk-request-resources>` to scale up GPU resources.
+
+.. code-block:: python
+
+    import ray
+
+    ray.init()
+    ray.autoscaler.sdk.request_resources(bundles=[{"GPU": 1}] * 2)
+
+After the nodes are scaled up, they will persist until the request is explicitly overridden.
+The following program will remove the resource request.
+
+.. code-block:: python
+
+    import ray
+
+    ray.init()
+    ray.autoscaler.sdk.request_resources(bundles=[])
+
+The GPU workers can then scale down.
+
+.. _kuberay-gpu-override:
+
+Overriding Ray GPU capacity (advanced)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+For specialized use-cases, it is possible to override the Ray pod GPU capacities advertised to Ray.
+To do so, set a value for the `num-gpus` key of the head or worker group's `rayStartParams`.
+For example,
+
+.. code-block:: yaml
+
+    rayStartParams:
+        # Note that all rayStartParam values must be supplied as strings.
+        num-gpus: "2"
+
+The Ray scheduler and autoscaler will then account 2 units of GPU capacity for each
+Ray pod in the group, even if the container limits do not indicate the presence of GPU.
+
+GPU pod scheduling (advanced)
+_____________________________
+
+GPU taints and tolerations
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. note::
+
+  Managed Kubernetes services typically take care of GPU-related taints and tolerations
+  for you. If you are using a managed Kubernetes service, you might not need to worry
+  about this section.
+
+The `Nvidia gpu plugin`_ for Kubernetes applies `taints`_ to GPU nodes; these taints prevent non-GPU pods from being scheduled on GPU nodes.
+Managed Kubernetes services like GKE, EKS, and AKS automatically apply matching `tolerations`_
+to pods requesting GPU resources. Tolerations are applied by means of Kubernetes's `ExtendedResourceToleration`_ `admission controller`_.
+If this admission controller is not enabled for your Kubernetes cluster, you may need to manually add a GPU toleration each of to your GPU pod configurations. For example,
+
+.. code-block:: yaml
+
+  apiVersion: v1
+  kind: Pod
+  metadata:
+   generateName: example-cluster-ray-worker
+   spec:
+   ...
+   tolerations:
+   - effect: NoSchedule
+     key: nvidia.com/gpu
+     operator: Exists
+   ...
+   containers:
+   - name: ray-node
+     image: rayproject/ray:nightly-gpu
+     ...
+
+Node selectors and node labels
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+To ensure Ray pods are bound to Kubernetes nodes satisfying specific
+conditions (such as the presence of GPU hardware), you may wish to use
+the `nodeSelector` field of your `workerGroup`'s pod template `spec`.
+See the `Kubernetes docs`_ for more about Pod-to-Node assignment.
+
+
+Further reference and discussion
+--------------------------------
+Read about Kubernetes device plugins `here <https://kubernetes.io/docs/concepts/extend-kubernetes/compute-storage-net/device-plugins/>`__,
+about Kubernetes GPU plugins `here <https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus>`__,
+and about Nvidia's GPU plugin for Kubernetes `here <https://github.com/NVIDIA/k8s-device-plugin>`__.
+
+.. _`GKE`: https://cloud.google.com/kubernetes-engine/docs/how-to/gpus
+.. _`EKS`: https://docs.aws.amazon.com/eks/latest/userguide/eks-optimized-ami.html
+.. _`AKS`: https://docs.microsoft.com/en-us/azure/aks/gpu-cluster
+
+.. _`tolerations`: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/
+.. _`taints`: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/
+.. _`Nvidia gpu plugin`: https://github.com/NVIDIA/k8s-device-plugin
+.. _`admission controller`: https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/
+.. _`ExtendedResourceToleration`: https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#extendedresourcetoleration
+.. _`Kubernetes docs`: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/
--- a/doc/source/cluster/kubernetes/user-guides/k8s-cluster-setup.md
+++ b/doc/source/cluster/kubernetes/user-guides/k8s-cluster-setup.md
@ -0,0 +1,34 @@
+(kuberay-k8s-setup)=
+
+# Managed Kubernetes services
+
+The KubeRay operator and Ray can run on any cloud or on-prem Kubernetes cluster.
+The simplest way to provision a remote Kubernetes cluster is to use a cloud-based managed service.
+We collect a few helpful links for users who are getting started with a managed Kubernetes service.
+
+(gke-setup)=
+# Setting up a GKE cluster (Google Cloud)
+You can find the landing page for GKE [here](https://cloud.google.com/kubernetes-engine).
+If you have an account set up, you can immediately start experimenting with Kubernetes clusters in the provider's console.
+Alternatively, check out the [documentation](https://cloud.google.com/kubernetes-engine/docs/) and
+[quickstart guides](https://cloud.google.com/kubernetes-engine/docs/deploy-app-cluster). To successfully deploy Ray on Kubernetes,
+you will need to configure pools of Kubernetes nodes;
+find guidance [here](https://cloud.google.com/kubernetes-engine/docs/concepts/node-pools).
+
+(eks-setup)=
+# Setting up an EKS cluster (AWS)
+You can find the landing page for EKS [here](https://aws.amazon.com/eks/).
+If you have an account set up, you can immediately start experimenting with Kubernetes clusters in the provider's console.
+Alternatively, check out the [documentation](https://docs.aws.amazon.com/eks/latest/userguide/) and
+[quickstart guides](https://docs.aws.amazon.com/eks/latest/userguide/getting-started.html). To successfully deploy Ray on Kubernetes,
+you will need to configure groups of Kubernetes nodes;
+find guidance [here](https://docs.aws.amazon.com/eks/latest/userguide/managed-node-groups.html).
+
+(aks-setup)=
+# Setting up an AKS (Microsoft Azure)
+You can find the landing page for AKS [here](https://azure.microsoft.com/en-us/services/kubernetes-service/).
+If you have an account set up, you can immediately start experimenting with Kubernetes clusters in the provider's console.
+Alternatively, check out the [documentation](https://docs.microsoft.com/en-us/azure/aks/) and
+[quickstart guides](https://docs.microsoft.com/en-us/azure/aks/learn/quick-kubernetes-deploy-portal?tabs=azure-cli). To successfully deploy Ray on Kubernetes,
+you will need to configure pools of Kubernetes nodes;
+find guidance [here](https://docs.microsoft.com/en-us/azure/aks/use-multiple-node-pools).
--- a/doc/source/cluster/kubernetes/user-guides/logging.md
+++ b/doc/source/cluster/kubernetes/user-guides/logging.md
@ -0,0 +1,150 @@
+(kuberay-logging)=
+
+# Logging
+
+This page provides tips on how to collect logs from
+Ray clusters running on Kubernetes.
+
+:::{tip}
+Skip to {ref}`the deployment instructions <kuberay-logging-tldr>`
+for a sample configuration showing how to extract logs from a Ray pod.
+:::
+
+## The Ray log directory
+Each Ray pod runs several component processes, such as the Raylet, object manager, dashboard agent, etc.
+These components log to files in the directory `/tmp/ray/session_latest/logs` in the pod's file system.
+Extracting and persisting these logs requires some setup.
+
+## Log processing tools
+There are a number of log processing tools available within the Kubernetes
+ecosystem. This page will shows how to extract Ray logs using [Fluent Bit][FluentBit].
+Other popular tools include [Fluentd][Fluentd], [Filebeat][Filebeat], and [Promtail][Promtail].
+
+## Log collection strategies
+We mention two strategies for collecting logs written to a pod's filesystem,
+**sidecar containers** and **daemonsets**. You can read more about these logging
+patterns in the [Kubernetes documentation][KubDoc].
+
+### Sidecar containers
+We will provide an {ref}`example <kuberay-fluentbit>` of the sidecar strategy in this guide.
+You can process logs by configuring a log-processing sidecar
+for each Ray pod. Ray containers should be configured to share the `/tmp/ray`
+directory with the logging sidecar via a volume mount.
+
+You can configure the sidecar to do either of the following:
+* Stream Ray logs to the sidecar's stdout.
+* Export logs to an external service.
+
+### Daemonset
+Alternatively, it is possible to collect logs at the Kubernetes node level.
+To do this, one deploys a log-processing daemonset onto the Kubernetes cluster's
+nodes. With this strategy, it is key to mount
+the Ray container's `/tmp/ray` directory to the relevant `hostPath`.
+
+(kuberay-fluentbit)=
+# Setting up logging sidecars with Fluent Bit.
+In this section, we give an example of how to set up log-emitting
+[Fluent Bit][FluentBit] sidecars for Ray pods.
+
+See the full config for a single-pod RayCluster with a logging sidecar [here][ConfigLink].
+We now discuss this configuration and show how to deploy it.
+
+## Configure log processing
+The first step is to create a ConfigMap with configuration
+for Fluent Bit.
+
+Here is a minimal ConfigMap which tells a Fluent Bit sidecar to
+* Tail Ray logs.
+* Output the logs to the container's stdout.
+```{literalinclude} ../configs/ray-cluster.log.yaml
+:language: yaml
+:start-after: Fluent Bit ConfigMap
+:end-before: ---
+```
+A few notes on the above config:
+- In addition to streaming logs to stdout, you can use an [OUTPUT] clause to export logs to any
+  [storage backend][FluentBitStorage] supported by Fluent Bit.
+- The `Path_Key true` line above ensures that file names are included in the log records
+  emitted by Fluent Bit.
+- The `Refresh_Interval 5` line asks Fluent Bit to refresh the list of files
+  in the log directory once per 5 seconds, rather than the default 60.
+  The reason is that the directory `/tmp/ray/session_latest/logs/` does not exist
+  initially (Ray must create it first). Setting the `Refresh_Interval` low allows us to see logs
+  in the Fluent Bit container's stoud sooner.
+
+
+## Add logging sidecars to your RayCluster CR.
+
+### Add log and config volumes.
+For each pod template in our RayCluster CR, we
+need to add two volumes: One volume for Ray's logs
+and another volume to store Fluent Bit configuration from the ConfigMap
+applied above.
+```{literalinclude} ../configs/ray-cluster.log.yaml
+:language: yaml
+:start-after: Log and config volumes
+```
+
+### Mount the Ray log directory
+Add the following volume mount to the Ray container's configuration.
+```{literalinclude} ../configs/ray-cluster.log.yaml
+:language: yaml
+:start-after: Share logs with Fluent Bit
+:end-before: Fluent Bit sidecar
+```
+
+### Add the Fluent Bit sidecar
+Finally, add the Fluent Bit sidecar container to each Ray pod config
+in your RayCluster CR.
+```{literalinclude} ../configs/ray-cluster.log.yaml
+:language: yaml
+:start-after: Fluent Bit sidecar
+:end-before: Log and config volumes
+```
+Mounting the `ray-logs` volume gives the sidecar container access to Ray's logs.
+The <nobr>`fluentbit-config`</nobr> volume gives the sidecar access to logging configuration.
+
+### Putting everything together
+Putting all of the above elements together, we have the following yaml configuration
+for a single-pod RayCluster will a log-processing sidecar.
+```{literalinclude} ../configs/ray-cluster.log.yaml
+:language: yaml
+```
+
+## Deploying a RayCluster with logging CR.
+(kuberay-logging-tldr)=
+Now, we will see how to deploy the configuration described above.
+
+Deploy the KubeRay Operator if you haven't yet.
+Refer to the {ref}`Getting Started guide <kuberay-operator-deploy>`
+for instructions on this step.
+
+Now, run the following commands to deploy the Fluent Bit ConfigMap and a single-pod RayCluster with
+a Fluent Bit sidecar.
+```shell
+# Starting from the parent of cloned Ray master.
+pushd ray/doc/source/cluster/kubernetes/configs/
+kubectl apply -f ray-cluster.log.yaml
+popd
+```
+
+Determine the Ray pod's name with
+```shell
+kubectl get pod | grep raycluster-complete-logs
+```
+
+Examine the FluentBit sidecar's STDOUT to see logs for Ray's component processes.
+```shell
+# Substitute the name of your Ray pod.
+kubectl logs raycluster-complete-logs-head-xxxxx -c fluentbit
+```
+
+[FluentBit]: https://docs.fluentbit.io/manual
+[FluentBitStorage]: https://docs.fluentbit.io/manual
+[Filebeat]: https://www.elastic.co/guide/en/beats/filebeat/7.17/index.html
+[Fluentd]: https://docs.fluentd.org/
+[Promtail]: https://grafana.com/docs/loki/latest/clients/promtail/
+[KubDoc]: https://kubernetes.io/docs/concepts/cluster-administration/logging/
+<!-- TODO: fix this -->
+[ConfigLink]:  https://github.com/ray-project/ray/tree/master/doc/source/cluster/
+<!-- [ConfigLink]: https://raw.githubusercontent.com/ray-project/ray/779e9f7c5733ef9a471ad2bb61723158ff942e92/doc/source/cluster/ray-clusters-on-kubernetes/configs/ray-cluster.log.yaml -->
--- a/doc/source/cluster/ray-cluster.jpg
+++ b/doc/source/cluster/ray-cluster.jpg
--- a/doc/source/cluster/reference.rst
+++ b/doc/source/cluster/reference.rst
@ -1,13 +0,0 @@
-.. include:: we_are_hiring.rst
-
-.. _cluster-reference:
-
-Ray Cluster Config YAML and CLI
-===============================
-
-.. toctree::
-    :maxdepth: 2
-
-    config.rst
-    commands.rst
-    sdk.rst
--- a/doc/source/cluster/running-applications/autoscaling/reference.rst
+++ b/doc/source/cluster/running-applications/autoscaling/reference.rst
@ -1,9 +1,7 @@
-.. include:: we_are_hiring.rst
-
 .. _ref-autoscaler-sdk:

-Autoscaler SDK
-==============
+Programmatic Cluster Scaling
+============================

 .. _ref-autoscaler-sdk-request-resources:

@ -13,3 +11,4 @@ ray.autoscaler.sdk.request_resources
 Within a Ray program, you can command the autoscaler to scale the cluster up to a desired size with ``request_resources()`` call. The cluster will immediately attempt to scale to accommodate the requested resources, bypassing normal upscaling speed constraints.

 .. autofunction:: ray.autoscaler.sdk.request_resources
+    :noindex:
--- a/Show more
+++ b/Show more