mirror of
https://github.com/vale981/ray
synced 2025-03-06 10:31:39 -05:00
132 lines
4 KiB
YAML
132 lines
4 KiB
YAML
# Ray head node service, allowing worker pods to discover the head node.
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
namespace: ray
|
|
name: ray-head
|
|
spec:
|
|
ports:
|
|
- name: client
|
|
protocol: TCP
|
|
port: 10001
|
|
targetPort: 10001
|
|
- name: dashboard
|
|
protocol: TCP
|
|
port: 8265
|
|
targetPort: 8265
|
|
- name: redis
|
|
protocol: TCP
|
|
port: 6379
|
|
targetPort: 6379
|
|
selector:
|
|
component: ray-head
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
namespace: ray
|
|
name: ray-head
|
|
spec:
|
|
# Do not change this - Ray currently only supports one head node per cluster.
|
|
replicas: 1
|
|
selector:
|
|
matchLabels:
|
|
component: ray-head
|
|
type: ray
|
|
template:
|
|
metadata:
|
|
labels:
|
|
component: ray-head
|
|
type: ray
|
|
spec:
|
|
# If the head node goes down, the entire cluster (including all worker
|
|
# nodes) will go down as well. If you want Kubernetes to bring up a new
|
|
# head node in this case, set this to "Always," else set it to "Never."
|
|
restartPolicy: Always
|
|
|
|
# This volume allocates shared memory for Ray to use for its plasma
|
|
# object store. If you do not provide this, Ray will fall back to
|
|
# /tmp which cause slowdowns if is not a shared memory volume.
|
|
volumes:
|
|
- name: dshm
|
|
emptyDir:
|
|
medium: Memory
|
|
containers:
|
|
- name: ray-head
|
|
image: rayproject/ray:latest
|
|
imagePullPolicy: IfNotPresent
|
|
command: [ "/bin/bash", "-c", "--" ]
|
|
args:
|
|
- "ray start --head --port=6379 --redis-shard-ports=6380,6381 --num-cpus=$MY_CPU_REQUEST --object-manager-port=12345 --node-manager-port=12346 --dashboard-host=0.0.0.0 --block"
|
|
ports:
|
|
- containerPort: 6379 # Redis port
|
|
- containerPort: 10001 # Used by Ray Client
|
|
- containerPort: 8265 # Used by Ray Dashboard
|
|
|
|
# This volume allocates shared memory for Ray to use for its plasma
|
|
# object store. If you do not provide this, Ray will fall back to
|
|
# /tmp which cause slowdowns if is not a shared memory volume.
|
|
volumeMounts:
|
|
- mountPath: /dev/shm
|
|
name: dshm
|
|
env:
|
|
# This is used in the ray start command so that Ray can spawn the
|
|
# correct number of processes. Omitting this may lead to degraded
|
|
# performance.
|
|
- name: MY_CPU_REQUEST
|
|
valueFrom:
|
|
resourceFieldRef:
|
|
resource: requests.cpu
|
|
resources:
|
|
requests:
|
|
cpu: 100m
|
|
memory: 512Mi
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
namespace: ray
|
|
name: ray-worker
|
|
spec:
|
|
# Change this to scale the number of worker nodes started in the Ray cluster.
|
|
replicas: 3
|
|
selector:
|
|
matchLabels:
|
|
component: ray-worker
|
|
type: ray
|
|
template:
|
|
metadata:
|
|
labels:
|
|
component: ray-worker
|
|
type: ray
|
|
spec:
|
|
restartPolicy: Always
|
|
volumes:
|
|
- name: dshm
|
|
emptyDir:
|
|
medium: Memory
|
|
containers:
|
|
- name: ray-worker
|
|
image: rayproject/ray:latest
|
|
imagePullPolicy: IfNotPresent
|
|
command: ["/bin/bash", "-c", "--"]
|
|
args:
|
|
- "ray start --num-cpus=$MY_CPU_REQUEST --address=$RAY_HEAD_SERVICE_HOST:$RAY_HEAD_SERVICE_PORT_REDIS --object-manager-port=12345 --node-manager-port=12346 --block"
|
|
# This volume allocates shared memory for Ray to use for its plasma
|
|
# object store. If you do not provide this, Ray will fall back to
|
|
# /tmp which cause slowdowns if is not a shared memory volume.
|
|
volumeMounts:
|
|
- mountPath: /dev/shm
|
|
name: dshm
|
|
env:
|
|
# This is used in the ray start command so that Ray can spawn the
|
|
# correct number of processes. Omitting this may lead to degraded
|
|
# performance.
|
|
- name: MY_CPU_REQUEST
|
|
valueFrom:
|
|
resourceFieldRef:
|
|
resource: requests.cpu
|
|
resources:
|
|
requests:
|
|
cpu: 100m
|
|
memory: 512Mi
|