mirror of
https://github.com/vale981/ray
synced 2025-03-06 10:31:39 -05:00
63 lines
2.7 KiB
YAML
63 lines
2.7 KiB
YAML
name: ray
|
|
|
|
services:
|
|
# Head service.
|
|
ray-head:
|
|
# There should only be one instance of the head node per cluster.
|
|
instances: 1
|
|
resources:
|
|
# The resources for the head node.
|
|
vcores: 1
|
|
memory: 2048
|
|
files:
|
|
# ray/doc/yarn/example.py
|
|
example.py: example.py
|
|
# # A packaged python environment using `conda-pack`. Note that Skein
|
|
# # doesn't require any specific way of distributing files, but this
|
|
# # is a good one for python projects. This is optional.
|
|
# # See https://jcrist.github.io/skein/distributing-files.html
|
|
# environment: environment.tar.gz
|
|
script: |
|
|
# Activate the packaged conda environment
|
|
# - source environment/bin/activate
|
|
|
|
# This stores the Ray head address in the Skein key-value store so that the workers can retrieve it later.
|
|
skein kv put current --key=RAY_HEAD_ADDRESS --value=$(hostname -i)
|
|
|
|
# This command starts all the processes needed on the ray head node.
|
|
# By default, we set object store memory and heap memory to roughly 200 MB. This is conservative
|
|
# and should be set according to application needs.
|
|
#
|
|
ray start --head --port=6379 --object-store-memory=200000000 --memory 200000000 --num-cpus=1
|
|
|
|
# This executes the user script.
|
|
python example.py
|
|
|
|
# After the user script has executed, all started processes should also die.
|
|
ray stop
|
|
skein application shutdown current
|
|
# Worker service.
|
|
ray-worker:
|
|
# The number of instances to start initially. This can be scaled
|
|
# dynamically later.
|
|
instances: 4
|
|
resources:
|
|
# The resources for the worker node
|
|
vcores: 1
|
|
memory: 2048
|
|
# files:
|
|
# environment: environment.tar.gz
|
|
depends:
|
|
# Don't start any worker nodes until the head node is started
|
|
- ray-head
|
|
script: |
|
|
# Activate the packaged conda environment
|
|
# - source environment/bin/activate
|
|
|
|
# This command gets any addresses it needs (e.g. the head node) from
|
|
# the skein key-value store.
|
|
RAY_HEAD_ADDRESS=$(skein kv get --key=RAY_HEAD_ADDRESS current)
|
|
|
|
# The below command starts all the processes needed on a ray worker node, blocking until killed with sigterm.
|
|
# After sigterm, all started processes should also die (ray stop).
|
|
ray start --object-store-memory=200000000 --memory 200000000 --num-cpus=1 --address=$RAY_HEAD_ADDRESS:6379 --block; ray stop
|