mirror of
https://github.com/vale981/ray
synced 2025-03-14 23:26:39 -04:00
101 lines
3.2 KiB
YAML
101 lines
3.2 KiB
YAML
![]() |
apiVersion: ray.io/v1alpha1
|
||
|
kind: RayCluster
|
||
|
metadata:
|
||
|
labels:
|
||
|
controller-tools.k8s.io: "1.0"
|
||
|
name: raycluster-example
|
||
|
spec:
|
||
|
# To use autoscaling, the following field must be included.
|
||
|
enableInTreeAutoscaling: true
|
||
|
# The Ray version must be supplied.
|
||
|
rayVersion: '2.0.0'
|
||
|
headGroupSpec:
|
||
|
serviceType: ClusterIP
|
||
|
rayStartParams:
|
||
|
dashboard-host: '0.0.0.0'
|
||
|
block: 'true'
|
||
|
# Annotate the head pod as having 0 CPU
|
||
|
# to prevent the head pod from scheduling Ray workloads.
|
||
|
num-cpus: 0
|
||
|
template:
|
||
|
spec:
|
||
|
containers:
|
||
|
- name: ray-head
|
||
|
image: rayproject/ray-ml:2.0.0-gpu
|
||
|
resources:
|
||
|
limits:
|
||
|
cpu: "14"
|
||
|
memory: "54Gi"
|
||
|
requests:
|
||
|
cpu: "14"
|
||
|
memory: "54Gi"
|
||
|
# Keep this in container configs.
|
||
|
lifecycle:
|
||
|
preStop:
|
||
|
exec:
|
||
|
command: ["/bin/sh","-c","ray stop"]
|
||
|
workerGroupSpecs:
|
||
|
# Start with 2 CPU workers. Allow scaling up to 3 CPU workers.
|
||
|
- replicas: 2
|
||
|
minReplicas: 2
|
||
|
maxReplicas: 3
|
||
|
groupName: rayCPUWorkerType
|
||
|
rayStartParams:
|
||
|
block: 'true'
|
||
|
# Annotate the Ray worker pod as having 1 unit of "Custom" capacity and 5 units of "Custom2" capacity
|
||
|
resources: '"{\"Custom\": 1, \"Custom2\": 5}"'
|
||
|
template:
|
||
|
spec:
|
||
|
containers:
|
||
|
- name: ray-worker
|
||
|
image: rayproject/ray-ml:2.0.0-gpu
|
||
|
resources:
|
||
|
limits:
|
||
|
cpu: "14"
|
||
|
memory: "54Gi"
|
||
|
requests:
|
||
|
cpu: "14"
|
||
|
memory: "54Gi"
|
||
|
# Keep the lifecycle block in Ray container configs.
|
||
|
lifecycle:
|
||
|
preStop:
|
||
|
exec:
|
||
|
command: ["/bin/sh","-c","ray stop"]
|
||
|
# Keep the initContainers block in worker pod configs.
|
||
|
initContainers:
|
||
|
- name: init-myservice
|
||
|
image: busybox:1.28
|
||
|
command: ['sh', '-c', "until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; do echo waiting for myservice; sleep 2; done"]
|
||
|
# Start with 0 GPU workers. Allow scaling up to 5 GPU workers.
|
||
|
- replicas: 0
|
||
|
minReplicas: 0
|
||
|
maxReplicas: 5
|
||
|
groupName: rayGPUWorkerType
|
||
|
rayStartParams:
|
||
|
block: 'true'
|
||
|
template:
|
||
|
spec:
|
||
|
containers:
|
||
|
- name: ray-worker
|
||
|
image: rayproject/ray-ml:2.0.0-gpu
|
||
|
resources:
|
||
|
limits:
|
||
|
cpu: "3"
|
||
|
memory: "50Gi"
|
||
|
nvidia.com/gpu: 1
|
||
|
requests:
|
||
|
cpu: "3"
|
||
|
memory: "50Gi"
|
||
|
nvidia.com/gpu: 1
|
||
|
# Keep the lifecycle block in Ray container configs.
|
||
|
lifecycle:
|
||
|
preStop:
|
||
|
exec:
|
||
|
command: ["/bin/sh","-c","ray stop"]
|
||
|
# Keep the initContainers block in worker pod configs.
|
||
|
initContainers:
|
||
|
- name: init-myservice
|
||
|
image: busybox:1.28
|
||
|
command: ['sh', '-c', "until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; do echo waiting for myservice; sleep 2; done"]
|
||
|
# Operator configuration is not specified here -- the KubeRay operator should be deployed before creating Ray clusters.
|