mirror of
https://github.com/vale981/ray
synced 2025-03-13 14:46:38 -04:00

This PR Adds notes and example on logging for Ray/K8s. Implements an API Reference paging pointing to the configuration guide and the RayCluster CR definition. Takes managed K8s services out of the tabbed structure, to make that page look less sad. Adds a comparison of the KubeRay operator and legacy K8s operator Adds an architecture diagram for the autoscaling sections Fixes some other minor items Adds some info about networking to the configuration guide, removes the previously planned networking page Signed-off-by: Dmitri Gekhtman <dmitri.m.gekhtman@gmail.com>
100 lines
3.2 KiB
YAML
100 lines
3.2 KiB
YAML
apiVersion: ray.io/v1alpha1
|
|
kind: RayCluster
|
|
metadata:
|
|
labels:
|
|
controller-tools.k8s.io: "1.0"
|
|
name: raycluster-example
|
|
spec:
|
|
# To use autoscaling, the following field must be included.
|
|
enableInTreeAutoscaling: true
|
|
# The Ray version must be supplied.
|
|
rayVersion: '2.0.0'
|
|
headGroupSpec:
|
|
serviceType: ClusterIP
|
|
rayStartParams:
|
|
dashboard-host: '0.0.0.0'
|
|
block: 'true'
|
|
# Annotate the head pod as having 0 CPU
|
|
# to prevent the head pod from scheduling Ray workloads.
|
|
num-cpus: 0
|
|
template:
|
|
spec:
|
|
containers:
|
|
- name: ray-head
|
|
image: rayproject/ray-ml:2.0.0-gpu
|
|
resources:
|
|
limits:
|
|
cpu: "14"
|
|
memory: "54Gi"
|
|
requests:
|
|
cpu: "14"
|
|
memory: "54Gi"
|
|
# Keep this in container configs.
|
|
lifecycle:
|
|
preStop:
|
|
exec:
|
|
command: ["/bin/sh","-c","ray stop"]
|
|
workerGroupSpecs:
|
|
# Start with 2 CPU workers. Allow scaling up to 3 CPU workers.
|
|
- replicas: 2
|
|
minReplicas: 2
|
|
maxReplicas: 3
|
|
groupName: rayCPUWorkerType
|
|
rayStartParams:
|
|
block: 'true'
|
|
# Annotate the Ray worker pod as having 1 unit of "Custom" capacity and 5 units of "Custom2" capacity
|
|
resources: '"{\"Custom\": 1, \"Custom2\": 5}"'
|
|
template:
|
|
spec:
|
|
containers:
|
|
- name: ray-worker
|
|
image: rayproject/ray-ml:2.0.0-gpu
|
|
resources:
|
|
limits:
|
|
cpu: "14"
|
|
memory: "54Gi"
|
|
requests:
|
|
cpu: "14"
|
|
memory: "54Gi"
|
|
# Keep the lifecycle block in Ray container configs.
|
|
lifecycle:
|
|
preStop:
|
|
exec:
|
|
command: ["/bin/sh","-c","ray stop"]
|
|
# Keep the initContainers block in worker pod configs.
|
|
initContainers:
|
|
- name: init-myservice
|
|
image: busybox:1.28
|
|
command: ['sh', '-c', "until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; do echo waiting for myservice; sleep 2; done"]
|
|
# Start with 0 GPU workers. Allow scaling up to 5 GPU workers.
|
|
- replicas: 0
|
|
minReplicas: 0
|
|
maxReplicas: 5
|
|
groupName: rayGPUWorkerType
|
|
rayStartParams:
|
|
block: 'true'
|
|
template:
|
|
spec:
|
|
containers:
|
|
- name: ray-worker
|
|
image: rayproject/ray-ml:2.0.0-gpu
|
|
resources:
|
|
limits:
|
|
cpu: "3"
|
|
memory: "50Gi"
|
|
nvidia.com/gpu: 1
|
|
requests:
|
|
cpu: "3"
|
|
memory: "50Gi"
|
|
nvidia.com/gpu: 1
|
|
# Keep the lifecycle block in Ray container configs.
|
|
lifecycle:
|
|
preStop:
|
|
exec:
|
|
command: ["/bin/sh","-c","ray stop"]
|
|
# Keep the initContainers block in worker pod configs.
|
|
initContainers:
|
|
- name: init-myservice
|
|
image: busybox:1.28
|
|
command: ['sh', '-c', "until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; do echo waiting for myservice; sleep 2; done"]
|
|
# Operator configuration is not specified here -- the KubeRay operator should be deployed before creating Ray clusters.
|