2021-01-25 18:48:31 -08:00
|
|
|
cluster_name: distributed-benchmarks
|
|
|
|
min_workers: 0
|
|
|
|
max_workers: 999999
|
|
|
|
|
|
|
|
upscaling_speed: 9999999
|
|
|
|
|
|
|
|
provider:
|
|
|
|
type: aws
|
|
|
|
region: us-west-2
|
|
|
|
availability_zone: us-west-2a, us-west-2b, us-west-2c, us-west-2d
|
|
|
|
|
|
|
|
auth:
|
|
|
|
ssh_user: ubuntu
|
|
|
|
|
2021-05-04 23:10:04 +02:00
|
|
|
|
2021-01-25 18:48:31 -08:00
|
|
|
available_node_types:
|
|
|
|
head_node:
|
|
|
|
node_config:
|
2021-03-01 18:36:52 -08:00
|
|
|
InstanceType: r5dn.16xlarge # Network optimized.
|
2021-04-16 13:20:05 -04:00
|
|
|
ImageId: ami-0a2363a9cff180a64
|
2021-01-25 18:48:31 -08:00
|
|
|
resources:
|
2021-03-01 18:36:52 -08:00
|
|
|
CPU: 0
|
2021-01-25 18:48:31 -08:00
|
|
|
node: 1
|
|
|
|
small: 1
|
2021-04-16 13:20:05 -04:00
|
|
|
max_workers: 0
|
2021-01-25 18:48:31 -08:00
|
|
|
worker_node:
|
|
|
|
node_config:
|
|
|
|
InstanceType: m5.16xlarge
|
2021-03-01 18:36:52 -08:00
|
|
|
ImageId: ami-0a2363a9cff180a64
|
2021-01-25 18:48:31 -08:00
|
|
|
resources:
|
|
|
|
node: 1
|
2021-03-01 18:36:52 -08:00
|
|
|
min_workers: 64
|
|
|
|
max_workers: 64
|
2021-01-25 18:48:31 -08:00
|
|
|
small_worker_node:
|
|
|
|
node_config:
|
|
|
|
InstanceType: m5.xlarge
|
2021-03-01 18:36:52 -08:00
|
|
|
ImageId: ami-0a2363a9cff180a64
|
2021-01-25 18:48:31 -08:00
|
|
|
resources:
|
|
|
|
node: 1
|
|
|
|
max_workers: 999999
|
|
|
|
|
|
|
|
head_node_type: head_node
|
|
|
|
|
|
|
|
worker_default_node_type: worker_node
|
|
|
|
|
2021-05-04 23:10:04 +02:00
|
|
|
file_mounts: {
|
|
|
|
"~/benchmarks": "."
|
|
|
|
}
|
|
|
|
|
2021-01-25 18:48:31 -08:00
|
|
|
setup_commands:
|
2021-05-04 23:10:04 +02:00
|
|
|
- pip uninstall -y ray
|
2021-06-08 01:15:25 -06:00
|
|
|
- pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/releases/1.4.0/6ac5e0e5ad45070e27c77aca7267bcee30cc4b4a/ray-1.4.0-cp37-cp37m-manylinux2014_x86_64.whl
|
2021-01-25 18:48:31 -08:00
|
|
|
- pip install tqdm
|
|
|
|
- sudo bash -c 'rm -rf /etc/security/limits.d; echo "* soft nofile 65535" >> /etc/security/limits.conf; echo "* hard nofile 65535" >> /etc/security/limits.conf;'
|
|
|
|
|
|
|
|
idle_timeout_minutes: 1
|
|
|
|
|
|
|
|
head_start_ray_commands:
|
|
|
|
- ray stop
|
|
|
|
- ulimit -n 65535; ray start --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml
|
|
|
|
|
|
|
|
# Command to start ray on worker nodes. You don't need to change this.
|
|
|
|
worker_start_ray_commands:
|
|
|
|
- ray stop
|
|
|
|
- ulimit -n 65535; ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076
|