#################################################################### # All nodes in this cluster will auto-terminate in 1 hour #################################################################### # An unique identifier for the head node and workers of this cluster. cluster_name: autoscaler-stress-test # The minimum number of workers nodes to launch in addition to the head # node. This number should be >= 0. min_workers: 100 # The maximum number of workers nodes to launch in addition to the head # node. This takes precedence over min_workers. max_workers: 100 # If a node is idle for this many minutes, it will be removed. idle_timeout_minutes: 5 # Cloud-provider specific configuration. provider: type: aws region: us-west-1 availability_zone: us-west-1a cache_stopped_nodes: False # How Ray will authenticate with newly launched nodes. auth: ssh_user: ubuntu # By default Ray creates a new private keypair, but you can also use your own. # If you do so, make sure to also set "KeyName" in the head and worker node # configurations below. # ssh_private_key: /path/to/your/key.pem # Provider-specific config for the head node, e.g. instance type. By default # Ray will auto-configure unspecified fields such as SubnetId and KeyName. # For more documentation on available fields, see: # http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances head_node: InstanceType: m4.16xlarge ImageId: ami-0cc472544ce594a19 # Custom ami # Set primary volume to 25 GiB BlockDeviceMappings: - DeviceName: /dev/sda1 Ebs: VolumeSize: 100 # Additional options in the boto docs. docker: image: "rayproject/ray:latest-gpu" # You can change this to latest-cpu if you don't need GPU support and want a faster startup container_name: "ray_container" # If true, pulls latest version of image. Otherwise, `docker run` will only pull the image # if no cached version is present. pull_before_run: True run_options: ["--ulimit nofile=1045876"] # Extra options to pass into "docker run" # Provider-specific config for worker nodes, e.g. instance type. By default # Ray will auto-configure unspecified fields such as SubnetId and KeyName. # For more documentation on available fields, see: # http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances worker_nodes: InstanceType: m4.large ImageId: ami-0cc472544ce594a19 # Custom ami # Set primary volume to 25 GiB BlockDeviceMappings: - DeviceName: /dev/sda1 Ebs: VolumeSize: 100 # Run workers on spot by default. Comment this out to use on-demand. InstanceMarketOptions: MarketType: spot # Additional options can be found in the boto docs, e.g. # SpotOptions: # MaxPrice: MAX_HOURLY_PRICE # Additional options in the boto docs. # List of shell commands to run to set up nodes. setup_commands: # Uncomment these if you want to build ray from source. # - sudo apt-get -qq update # - sudo apt-get install -y build-essential curl unzip # # Build Ray. # - git clone https://github.com/ray-project/ray || true # - ray/ci/travis/install-bazel.sh - pip install -U pip - pip install terminado - pip install boto3==1.4.8 cython==0.29.0 # - cd ray/python; git checkout master; git pull; pip install -e . --verbose - pip install -U pip install https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-2.0.0.dev0-cp38-cp38-manylinux2014_x86_64.whl # Custom commands that will be run on the head node after common setup. head_setup_commands: [] # Custom commands that will be run on worker nodes after common setup. worker_setup_commands: [] # Command to start ray on the head node. You don't need to change this. head_start_ray_commands: - ray stop - ulimit -n 65536; ray start --head --port=6379 --autoscaling-config=~/ray_bootstrap_config.yaml # Command to start ray on worker nodes. You don't need to change this. worker_start_ray_commands: - ray stop - ulimit -n 65536; ray start --address=$RAY_HEAD_IP:6379 --num-gpus=100