name: ray services: # Head service. ray-head: # There should only be one instance of the head node per cluster. instances: 1 resources: # The resources for the head node. vcores: 1 memory: 2048 files: # ray/doc/yarn/example.py example.py: example.py # # A packaged python environment using `conda-pack`. Note that Skein # # doesn't require any specific way of distributing files, but this # # is a good one for python projects. This is optional. # # See https://jcrist.github.io/skein/distributing-files.html # environment: environment.tar.gz script: | # Activate the packaged conda environment # - source environment/bin/activate # This stores the Ray head address in the Skein key-value store so that the workers can retrieve it later. skein kv put current --key=RAY_HEAD_ADDRESS --value=$(hostname -i) # This command starts all the processes needed on the ray head node. # By default, we set object store memory and heap memory to roughly 200 MB. This is conservative # and should be set according to application needs. # ray start --head --port=6379 --object-store-memory=200000000 --memory 200000000 --num-cpus=1 # This executes the user script. python example.py # After the user script has executed, all started processes should also die. ray stop skein application shutdown current # Worker service. ray-worker: # The number of instances to start initially. This can be scaled # dynamically later. instances: 4 resources: # The resources for the worker node vcores: 1 memory: 2048 # files: # environment: environment.tar.gz depends: # Don't start any worker nodes until the head node is started - ray-head script: | # Activate the packaged conda environment # - source environment/bin/activate # This command gets any addresses it needs (e.g. the head node) from # the skein key-value store. RAY_HEAD_ADDRESS=$(skein kv get --key=RAY_HEAD_ADDRESS current) # The below command starts all the processes needed on a ray worker node, blocking until killed with sigterm. # After sigterm, all started processes should also die (ray stop). ray start --object-store-memory=200000000 --memory 200000000 --num-cpus=1 --address=$RAY_HEAD_ADDRESS:6379 --block; ray stop