Added support to use tolerations for head and worker nodes (#17608)

* Added support to use tolerations for head and worker nodes

* removed the imagePullSecret configuration

* Update comments

* minor comment change

* add back rayproject/ray:nightly comment

Co-authored-by: Dmitri Gekhtman <dmitri.m.gekhtman@gmail.com>
This commit is contained in:
Navneet Nandan 2021-08-16 23:06:15 +02:00 committed by GitHub
parent c02f91fa2d
commit 35d86ebfee
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 35 additions and 2 deletions

View file

@ -78,7 +78,11 @@ spec:
{{- end }}
{{- if .nodeSelector }}
nodeSelector:
{{- toYaml .nodeSelector | nindent 12 }}
{{- toYaml $val.nodeSelector | nindent 10 }}
{{- end }}
{{- if $val.tolerations }}
tolerations:
{{- toYaml $val.tolerations | nindent 10 }}
{{- end }}
{{- end }}
# Commands to start Ray on the head node. You don't need to change this.

View file

@ -31,6 +31,17 @@ podTypes:
rayResources: {}
# Optionally, set a node selector for this podType: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector
nodeSelector: {}
# tolerations for Ray pods of this podType (the head's podType in this case)
# ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
# Note that it is often not necessary to manually specify tolerations for GPU
# usage on managed platforms such as AKS, EKS, and GKE.
# ref: https://docs.ray.io/en/master/cluster/kubernetes-gpu.html
tolerations: []
# - key: "nvidia.com/gpu"
# operator: Exists
# effect: NoSchedule
# The key for each podType is a user-defined string.
rayWorkerType:
# minWorkers is the minimum number of Ray workers of this pod type to keep running.
@ -54,6 +65,22 @@ podTypes:
# Optionally, set a node selector for this Pod type. See https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector
nodeSelector: {}
# tolerations for Ray pods of this podType
# ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
# Note that it is often not necessary to manually specify tolerations for GPU
# usage on managed platforms such as AKS, EKS, and GKE.
# ref: https://docs.ray.io/en/master/cluster/kubernetes-gpu.html
tolerations: []
# - key: nvidia.com/gpu
# operator: Exists
# effect: NoSchedule
# Optionally, define more worker podTypes
# rayWorkerType2:
# minWorkers: 0
# maxWorkers: 10
# memory: ...
# Operator settings:
@ -74,7 +101,9 @@ operatorNamespace: default
# operatorImage - The image used in the operator deployment.
operatorImage: rayproject/ray:latest
# `rayproject/ray:latest` contains the latest official release version of Ray.
# `rayproject/ray:nightly` runs the current master version of Ray and carries some stability fixes.
# `rayproject/ray:nightly` runs the current master version of Ray.
# For a particular official release version of Ray, use `rayproject/ray:1.x.y`.
# For a specific master commit, use the first 6 characters of the commit SHA, e.g. `rayproject/ray:050a07`.
# The operator and Ray cluster can use different Ray versions, provided both versions are >= 1.2.0