ray/benchmarks/distributed/config.yaml

66 lines
1.8 KiB
YAML
Raw Normal View History

2021-01-25 18:48:31 -08:00
cluster_name: distributed-benchmarks
min_workers: 0
max_workers: 999999
upscaling_speed: 9999999
provider:
type: aws
region: us-west-2
availability_zone: us-west-2a, us-west-2b, us-west-2c, us-west-2d
auth:
ssh_user: ubuntu
2021-01-25 18:48:31 -08:00
available_node_types:
head_node:
node_config:
InstanceType: r5dn.16xlarge # Network optimized.
ImageId: ami-0a2363a9cff180a64
2021-01-25 18:48:31 -08:00
resources:
CPU: 0
2021-01-25 18:48:31 -08:00
node: 1
small: 1
max_workers: 0
2021-01-25 18:48:31 -08:00
worker_node:
node_config:
InstanceType: m5.16xlarge
ImageId: ami-0a2363a9cff180a64
2021-01-25 18:48:31 -08:00
resources:
node: 1
min_workers: 64
max_workers: 64
2021-01-25 18:48:31 -08:00
small_worker_node:
node_config:
InstanceType: m5.xlarge
ImageId: ami-0a2363a9cff180a64
2021-01-25 18:48:31 -08:00
resources:
node: 1
max_workers: 999999
head_node_type: head_node
worker_default_node_type: worker_node
file_mounts: {
"~/benchmarks": "."
}
2021-01-25 18:48:31 -08:00
setup_commands:
- pip uninstall -y ray
- pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/releases/1.4.0/6ac5e0e5ad45070e27c77aca7267bcee30cc4b4a/ray-1.4.0-cp37-cp37m-manylinux2014_x86_64.whl
2021-01-25 18:48:31 -08:00
- pip install tqdm
- sudo bash -c 'rm -rf /etc/security/limits.d; echo "* soft nofile 65535" >> /etc/security/limits.conf; echo "* hard nofile 65535" >> /etc/security/limits.conf;'
idle_timeout_minutes: 1
head_start_ray_commands:
- ray stop
- ulimit -n 65535; ray start --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml
# Command to start ray on worker nodes. You don't need to change this.
worker_start_ray_commands:
- ray stop
- ulimit -n 65535; ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076