ray/doc/yarn/ray-skein.yaml

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

64 lines
2.7 KiB
YAML
Raw Normal View History

name: ray
services:
2019-11-19 16:15:08 -08:00
# Head service.
ray-head:
# There should only be one instance of the head node per cluster.
instances: 1
resources:
# The resources for the head node.
vcores: 1
memory: 2048
files:
# ray/doc/yarn/example.py
example.py: example.py
# # A packaged python environment using `conda-pack`. Note that Skein
# # doesn't require any specific way of distributing files, but this
2019-11-19 16:15:08 -08:00
# # is a good one for python projects. This is optional.
# # See https://jcrist.github.io/skein/distributing-files.html
# environment: environment.tar.gz
script: |
# Activate the packaged conda environment
# - source environment/bin/activate
2019-11-19 16:15:08 -08:00
# This stores the Ray head address in the Skein key-value store so that the workers can retrieve it later.
skein kv put current --key=RAY_HEAD_ADDRESS --value=$(hostname -i)
# This command starts all the processes needed on the ray head node.
# By default, we set object store memory and heap memory to roughly 200 MB. This is conservative
# and should be set according to application needs.
#
2020-09-23 17:04:13 -07:00
ray start --head --port=6379 --object-store-memory=200000000 --memory 200000000 --num-cpus=1
# This executes the user script.
python example.py
# After the user script has executed, all started processes should also die.
ray stop
2019-11-19 16:15:08 -08:00
skein application shutdown current
# Worker service.
ray-worker:
# The number of instances to start initially. This can be scaled
# dynamically later.
instances: 4
resources:
# The resources for the worker node
vcores: 1
memory: 2048
# files:
# environment: environment.tar.gz
depends:
# Don't start any worker nodes until the head node is started
- ray-head
script: |
# Activate the packaged conda environment
# - source environment/bin/activate
# This command gets any addresses it needs (e.g. the head node) from
# the skein key-value store.
2019-11-19 16:15:08 -08:00
RAY_HEAD_ADDRESS=$(skein kv get --key=RAY_HEAD_ADDRESS current)
# The below command starts all the processes needed on a ray worker node, blocking until killed with sigterm.
# After sigterm, all started processes should also die (ray stop).
ray start --object-store-memory=200000000 --memory 200000000 --num-cpus=1 --address=$RAY_HEAD_ADDRESS:6379 --block; ray stop