mirror of
https://github.com/vale981/ray
synced 2025-03-06 18:41:40 -05:00

* Update XGBoost release test configs * Use GPU containers * Fix elastic check * Use spot instances for GPU * Add debugging output * Fix success check, failure checking, outputs, sync behavior * Update release checklist, rename mounts
49 lines
No EOL
973 B
YAML
49 lines
No EOL
973 B
YAML
cluster_name: ray-xgboost-release-gpu-small
|
|
|
|
max_workers: 5
|
|
|
|
upscaling_speed: 32
|
|
|
|
idle_timeout_minutes: 15
|
|
|
|
docker:
|
|
image: rayproject/ray:latest-gpu
|
|
container_name: ray_container
|
|
pull_before_run: true
|
|
|
|
provider:
|
|
type: aws
|
|
region: us-west-2
|
|
availability_zone: us-west-2a
|
|
cache_stopped_nodes: false
|
|
|
|
available_node_types:
|
|
cpu_4_ondemand:
|
|
node_config:
|
|
InstanceType: m5.xlarge
|
|
resources: {"CPU": 4}
|
|
min_workers: 0
|
|
max_workers: 0
|
|
gpu_1_spot:
|
|
node_config:
|
|
InstanceType: p2.xlarge
|
|
InstanceMarketOptions:
|
|
MarketType: spot
|
|
resources: {"CPU": 4, "GPU": 1}
|
|
min_workers: 4
|
|
max_workers: 4
|
|
|
|
auth:
|
|
ssh_user: ubuntu
|
|
|
|
head_node_type: cpu_4_ondemand
|
|
worker_default_node_type: gpu_1_spot
|
|
|
|
file_mounts: {
|
|
"~/xgboost_tests": "."
|
|
}
|
|
|
|
file_mounts_sync_continuously: false
|
|
|
|
setup_commands:
|
|
- /bin/bash ~/xgboost_tests/setup_xgboost.sh |