ray/benchmarks/distributed/config.yaml
Kai Fricke 1d52ab819f
[release] release 1.3.0 results and test updates (#15366)
Convert a number of release tests and add logs for release 1.3.0
2021-05-04 22:10:04 +01:00

65 lines
1.8 KiB
YAML

cluster_name: distributed-benchmarks
min_workers: 0
max_workers: 999999
upscaling_speed: 9999999
provider:
type: aws
region: us-west-2
availability_zone: us-west-2a, us-west-2b, us-west-2c, us-west-2d
auth:
ssh_user: ubuntu
available_node_types:
head_node:
node_config:
InstanceType: r5dn.16xlarge # Network optimized.
ImageId: ami-0a2363a9cff180a64
resources:
CPU: 0
node: 1
small: 1
max_workers: 0
worker_node:
node_config:
InstanceType: m5.16xlarge
ImageId: ami-0a2363a9cff180a64
resources:
node: 1
min_workers: 64
max_workers: 64
small_worker_node:
node_config:
InstanceType: m5.xlarge
ImageId: ami-0a2363a9cff180a64
resources:
node: 1
max_workers: 999999
head_node_type: head_node
worker_default_node_type: worker_node
file_mounts: {
"~/benchmarks": "."
}
setup_commands:
- pip uninstall -y ray
- pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/releases/1.3.0/197fa3eff0526d7fb825a33278702f9c952d31c5/ray-1.3.0-cp37-cp37m-manylinux2014_x86_64.whl
- pip install tqdm
- sudo bash -c 'rm -rf /etc/security/limits.d; echo "* soft nofile 65535" >> /etc/security/limits.conf; echo "* hard nofile 65535" >> /etc/security/limits.conf;'
idle_timeout_minutes: 1
head_start_ray_commands:
- ray stop
- ulimit -n 65535; ray start --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml
# Command to start ray on worker nodes. You don't need to change this.
worker_start_ray_commands:
- ray stop
- ulimit -n 65535; ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076