ray/doc/kubernetes/ray-cluster.yaml
Chen Shen 320f9dc234
[Core][CoreWorker] increase the default port range (#19541)
* increase the port range

* Update doc/source/configure.rst

Co-authored-by: Edward Oakes <ed.nmi.oakes@gmail.com>

Co-authored-by: Edward Oakes <ed.nmi.oakes@gmail.com>
2021-11-05 09:25:44 -07:00

132 lines
4.1 KiB
YAML

# Ray head node service, allowing worker pods to discover the head node.
apiVersion: v1
kind: Service
metadata:
namespace: ray
name: example-cluster-ray-head
spec:
ports:
- name: client
protocol: TCP
port: 10001
targetPort: 10001
- name: dashboard
protocol: TCP
port: 8265
targetPort: 8265
- name: redis
protocol: TCP
port: 6379
targetPort: 6379
selector:
component: ray-head
---
apiVersion: apps/v1
kind: Deployment
metadata:
namespace: ray
name: ray-head
spec:
# Do not change this - Ray currently only supports one head node per cluster.
replicas: 1
selector:
matchLabels:
component: ray-head
type: ray
template:
metadata:
labels:
component: ray-head
type: ray
spec:
# If the head node goes down, the entire cluster (including all worker
# nodes) will go down as well. If you want Kubernetes to bring up a new
# head node in this case, set this to "Always," else set it to "Never."
restartPolicy: Always
# This volume allocates shared memory for Ray to use for its plasma
# object store. If you do not provide this, Ray will fall back to
# /tmp which cause slowdowns if is not a shared memory volume.
volumes:
- name: dshm
emptyDir:
medium: Memory
containers:
- name: ray-head
image: rayproject/ray:latest
imagePullPolicy: IfNotPresent
command: [ "/bin/bash", "-c", "--" ]
args:
- "ray start --head --port=6379 --redis-shard-ports=6380,6381 --num-cpus=$MY_CPU_REQUEST --object-manager-port=22345 --node-manager-port=22346 --dashboard-host=0.0.0.0 --block"
ports:
- containerPort: 6379 # Redis port
- containerPort: 10001 # Used by Ray Client
- containerPort: 8265 # Used by Ray Dashboard
# This volume allocates shared memory for Ray to use for its plasma
# object store. If you do not provide this, Ray will fall back to
# /tmp which cause slowdowns if is not a shared memory volume.
volumeMounts:
- mountPath: /dev/shm
name: dshm
env:
# This is used in the ray start command so that Ray can spawn the
# correct number of processes. Omitting this may lead to degraded
# performance.
- name: MY_CPU_REQUEST
valueFrom:
resourceFieldRef:
resource: requests.cpu
resources:
requests:
cpu: 100m
memory: 512Mi
---
apiVersion: apps/v1
kind: Deployment
metadata:
namespace: ray
name: ray-worker
spec:
# Change this to scale the number of worker nodes started in the Ray cluster.
replicas: 3
selector:
matchLabels:
component: ray-worker
type: ray
template:
metadata:
labels:
component: ray-worker
type: ray
spec:
restartPolicy: Always
volumes:
- name: dshm
emptyDir:
medium: Memory
containers:
- name: ray-worker
image: rayproject/ray:latest
imagePullPolicy: IfNotPresent
command: ["/bin/bash", "-c", "--"]
args:
- "ray start --num-cpus=$MY_CPU_REQUEST --address=$EXAMPLE_CLUSTER_RAY_HEAD_SERVICE_HOST:$EXAMPLE_CLUSTER_RAY_HEAD_SERVICE_PORT_REDIS --object-manager-port=22345 --node-manager-port=22346 --block"
# This volume allocates shared memory for Ray to use for its plasma
# object store. If you do not provide this, Ray will fall back to
# /tmp which cause slowdowns if is not a shared memory volume.
volumeMounts:
- mountPath: /dev/shm
name: dshm
env:
# This is used in the ray start command so that Ray can spawn the
# correct number of processes. Omitting this may lead to degraded
# performance.
- name: MY_CPU_REQUEST
valueFrom:
resourceFieldRef:
resource: requests.cpu
resources:
requests:
cpu: 100m
memory: 512Mi