ray/release/tune_tests/scalability_tests/cluster_16x64_data.yaml
Kai Fricke 1ef2a6790c
[tune] add scalability release tests (#13986)
* Add scalability tests

* Network overhead cluster

* Update xgboost tests

* Document release tests

* Don't raise on failed trial

* Update to multi node yamls

* Update yamls

* Revert xgboost test changes

* Fix import

* Update release/tune_tests/scalability_tests/workloads/test_bookkeeping_overhead.py

Co-authored-by: Richard Liaw <rliaw@berkeley.edu>

* Pass aws credentials (WIP)

* Update durable trainable example

* Update xgboost sweep

* Change xgboost scope, fix durable trainable stop condition

* Fix max depth to limit total test length

* Add cluster information to test descriptions. Update release checklist/process docs

Co-authored-by: Richard Liaw <rliaw@berkeley.edu>
2021-02-10 17:16:31 +01:00

53 lines
1.6 KiB
YAML

cluster_name: ray-tune-scalability-tests-16x64_data
max_workers: 16
upscaling_speed: 16
idle_timeout_minutes: 0
docker:
image: anyscale/ray:nightly
container_name: ray_container
pull_before_run: true
provider:
type: aws
region: us-west-2
availability_zone: us-west-2a
cache_stopped_nodes: false
available_node_types:
cpu_64_ondemand:
node_config:
InstanceType: m5.16xlarge
resources: {"CPU": 64}
min_workers: 0
max_workers: 0
cpu_64_spot:
node_config:
InstanceType: m5.16xlarge
InstanceMarketOptions:
MarketType: spot
resources: {"CPU": 64}
min_workers: 15
max_workers: 15
auth:
ssh_user: ubuntu
head_node_type: cpu_64_ondemand
worker_default_node_type: cpu_64_spot
file_mounts: {
"~/release-automation-tune_scalability_tests": "."
}
setup_commands:
- ray install-nightly
- pip install pytest xgboost_ray
- mkdir -p ~/data || true
- rm -rf ~/data/train.parquet || true
- rm -rf ~/data/test.parquet || true
- cp -R /tmp/ray_tmp_mount/release-automation-tune_scalability_tests ~/release-automation-tune_scalability_tests || echo "Copy failed"
- python ~/release-automation-tune_scalability_tests/create_test_data.py ~/data/train.parquet --seed 1234 --num-rows 40000000 --num-cols 40 --num-partitions 128 --num-classes 2
- python ~/release-automation-tune_scalability_tests/create_test_data.py ~/data/test.parquet --seed 1234 --num-rows 10000000 --num-cols 40 --num-partitions 128 --num-classes 2