mirror of
https://github.com/vale981/ray
synced 2025-03-06 18:41:40 -05:00

* Ray-Operator first PR 1.RayCluster CRD and CR, structure code in golang 2.config file in Kubernetes * Delete go.sum * Ray-Operator first PR 1.add directory structure 2.add guide for submitting RayCluster * Delete ray_v1_raycluster.bk.yaml * Ray-Operator first PR 1.delete file bk 2.add more description about kubernetes and ray-operator features * Ray-Operator first PR: adjust grammar * Ray-Operator first PR: add More Information about proposal * Ray-Operator first PR: 1.add heterogeneous version of CR 2.add reference ot key words, and reference links to the props in yaml 3.file structure to yaml level and function description * Ray-Operator first PR: add ray operator proposal doc * Ray-Operator first PR: add More Information about proposal * Ray-Operator first PR: add command to start * Ray-Operator first PR: add More Information about proposal * Update deploy/ray-operator/README.md Co-Authored-By: Edward Oakes <ed.nmi.oakes@gmail.com> * Update deploy/ray-operator/api/v1/raycluster_types.go Co-Authored-By: Edward Oakes <ed.nmi.oakes@gmail.com> * Update deploy/ray-operator/api/v1/raycluster_types.go Co-Authored-By: Edward Oakes <ed.nmi.oakes@gmail.com> * Ray-Operator first PR: add More Information about proposal * Ray-Operator first PR: remove License * Ray-Operator first PR: rename version from v1 to v1alpha1 * Ray-Operator first PR: use replicas instead of numNodes * Ray-Operator first PR: update replicas in CR yaml file * Ray-Operator first PR: add More Information about proposal
260 lines
8.3 KiB
YAML
260 lines
8.3 KiB
YAML
apiVersion: ray.io/v1alpha1
|
|
kind: RayCluster
|
|
metadata:
|
|
labels:
|
|
controller-tools.k8s.io: "1.0"
|
|
# An unique identifier for the head node and workers of this cluster.
|
|
name: raycluster-sample
|
|
spec:
|
|
# An unique identifier for the head node and workers of this cluster.
|
|
clusterName: raycluster-sample
|
|
images:
|
|
defaultImage: "docker-image"
|
|
imagePullPolicy: "Always"
|
|
|
|
extensions:
|
|
# the pod replicas in this group typed worker
|
|
- replicas: 1
|
|
# logical group name, for this called small-group, also can be functional
|
|
groupName: small-group
|
|
# pod type
|
|
type: worker
|
|
|
|
# Command to start ray
|
|
command: ray stop; ulimit -n 65536; ray start --object-manager-port=8076
|
|
|
|
# custom labels. NOTE: do not define custom labels start with `raycluster.`, they may be used in controller.
|
|
# Refer to https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
|
|
labels:
|
|
raycluster.group.name: small-group
|
|
|
|
# annotations for pod
|
|
annotations:
|
|
key: value
|
|
|
|
# use affinity to select nodes.Optional.
|
|
# Refer to https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity
|
|
affinity:
|
|
nodeAffinity:
|
|
requiredDuringSchedulingIgnoredDuringExecution:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: key1
|
|
operator: In
|
|
values: ["true"]
|
|
- key: key2
|
|
operator: In
|
|
values: ["true"]
|
|
|
|
# use tolerations to select nodes.Optional.
|
|
# Refer to https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
|
|
tolerations:
|
|
- key: "key1"
|
|
operator: "Equal"
|
|
value: "true"
|
|
effect: "NoSchedule"
|
|
|
|
# resource requirements.
|
|
# Refer to https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/
|
|
resources:
|
|
limits:
|
|
cpu: 1000m
|
|
memory: 2Gi
|
|
ephemeral-storage: 2Gi
|
|
requests:
|
|
cpu: 1000m
|
|
memory: 2Gi
|
|
ephemeral-storage: 2Gi
|
|
|
|
# environment variables to set in the container.Optional.
|
|
# Refer to https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/
|
|
containerEnv:
|
|
- name: APPNAME
|
|
value: raycluster-sample
|
|
- name: MY_POD_NAME
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: metadata.name
|
|
- name: MY_POD_IP
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: status.podIP
|
|
|
|
# head service suffix: {namespace}.svc , follows Kubernetes standard
|
|
headServiceSuffix: "ray-operator.svc"
|
|
|
|
# use volumes.Optional.
|
|
# Refer to https://kubernetes.io/docs/concepts/storage/volumes/
|
|
volumes:
|
|
- name: log-volume
|
|
emptyDir: {}
|
|
|
|
# use volumeMounts.Optional.
|
|
# Refer to https://kubernetes.io/docs/concepts/storage/volumes/
|
|
volumeMounts:
|
|
- mountPath: /path/to/log
|
|
name: log-volume
|
|
|
|
# the pod replicas in this group typed worker
|
|
- replicas: 1
|
|
# logical group name
|
|
groupName: medium-group
|
|
# pod type
|
|
type: worker
|
|
|
|
# custom labels. NOTE: do not define custom labels start with `raycluster.`, they may be used in controller.
|
|
# Refer to https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
|
|
labels:
|
|
raycluster.group.name: medium-group
|
|
|
|
# annotations for pod
|
|
annotations:
|
|
key: value
|
|
|
|
# Command to start ray
|
|
command: ray stop; ulimit -n 65536; ray start --object-manager-port=8076
|
|
|
|
# use affinity to select nodes.Optional.
|
|
# Refer to https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity
|
|
affinity:
|
|
nodeAffinity:
|
|
requiredDuringSchedulingIgnoredDuringExecution:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: key1
|
|
operator: In
|
|
values: ["true"]
|
|
- key: key2
|
|
operator: In
|
|
values: ["true"]
|
|
|
|
# use tolerations to select nodes.Optional.
|
|
# Refer to https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
|
|
tolerations:
|
|
- key: "key1"
|
|
operator: "Equal"
|
|
value: "true"
|
|
effect: "NoSchedule"
|
|
|
|
# resource requirements.
|
|
# Refer to https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/
|
|
resources:
|
|
limits:
|
|
cpu: 2000m
|
|
memory: 4Gi
|
|
ephemeral-storage: 4Gi
|
|
requests:
|
|
cpu: 2000m
|
|
memory: 4Gi
|
|
ephemeral-storage: 4Gi
|
|
|
|
# environment variables to set in the container.Optional.
|
|
# Refer to https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/
|
|
containerEnv:
|
|
- name: APPNAME
|
|
value: raycluster-sample
|
|
- name: MY_POD_NAME
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: metadata.name
|
|
- name: MY_POD_IP
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: status.podIP
|
|
|
|
# head service suffix: {namespace}.svc , follows Kubernetes standard
|
|
headServiceSuffix: "ray-operator.svc"
|
|
|
|
# use volumes.Optional.
|
|
# Refer to https://kubernetes.io/docs/concepts/storage/volumes/
|
|
volumes:
|
|
- name: log-volume
|
|
emptyDir: {}
|
|
|
|
# use volumeMounts.Optional.
|
|
# Refer to https://kubernetes.io/docs/concepts/storage/volumes/
|
|
volumeMounts:
|
|
- mountPath: /path/to/log
|
|
name: log-volume
|
|
|
|
# the pod replicas in this group typed head
|
|
- replicas: 1
|
|
# logical group name
|
|
groupName: headgroup
|
|
# pod type
|
|
type: head
|
|
|
|
# custom labels. NOTE: do not define custom labels start with `raycluster.`, they may be used in controller.
|
|
# Refer to https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
|
|
labels:
|
|
raycluster.group.name: headgroup
|
|
|
|
# annotations for pod
|
|
annotations:
|
|
key: value
|
|
|
|
# Command to start ray
|
|
command: ray stop; ulimit -n 65536; ray start --object-manager-port=8076
|
|
|
|
# use affinity to select nodes.Optional.
|
|
# Refer to https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity
|
|
affinity:
|
|
nodeAffinity:
|
|
requiredDuringSchedulingIgnoredDuringExecution:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: key1
|
|
operator: In
|
|
values: ["true"]
|
|
- key: key2
|
|
operator: In
|
|
values: ["true"]
|
|
|
|
# use tolerations to select nodes.Optional.
|
|
# Refer to https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
|
|
tolerations:
|
|
- key: "key1"
|
|
operator: "Equal"
|
|
value: "true"
|
|
effect: "NoSchedule"
|
|
|
|
# resource requirements.
|
|
# Refer to https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/
|
|
resources:
|
|
limits:
|
|
cpu: 1000m
|
|
memory: 1Gi
|
|
ephemeral-storage: 4Gi
|
|
requests:
|
|
cpu: 1000m
|
|
memory: 1Gi
|
|
ephemeral-storage: 4Gi
|
|
|
|
# environment variables to set in the container.Optional.
|
|
# Refer to https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/
|
|
containerEnv:
|
|
- name: APPNAME
|
|
value: raycluster-sample
|
|
- name: MY_POD_NAME
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: metadata.name
|
|
- name: MY_POD_IP
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: status.podIP
|
|
|
|
# head service suffix: {namespace}.svc , follows Kubernetes standard
|
|
headServiceSuffix: "ray-operator.svc"
|
|
|
|
# use volumes.Optional.
|
|
# Refer to https://kubernetes.io/docs/concepts/storage/volumes/
|
|
volumes:
|
|
- name: log-volume
|
|
emptyDir: {}
|
|
|
|
# use volumeMounts.Optional.
|
|
# Refer to https://kubernetes.io/docs/concepts/storage/volumes/
|
|
volumeMounts:
|
|
- mountPath: /path/to/log
|
|
name: log-volume
|