ray/release/xgboost_tests/xgboost_tests.yaml
Kai Fricke f96078687f
[xgboost/release] Xgboost/connect gpu test (#19838)
* [xgboost/release] Add GPU connect user test

* Use scaling cluster

* typo

* Increase xgboost placement group timeout

* Much higher timeout

* Move os environment timeout

* Move os environ

* [dev] install xgboost-ray from master

* GPU xgboost master

* Remove master install after new xgboost release

* Install latest

* Add master test
2021-11-02 08:40:48 -07:00

115 lines
2.5 KiB
YAML

- name: train_small
cluster:
app_config: app_config.yaml
compute_template: tpl_cpu_small.yaml
run:
use_connect: True
autosuspend_mins: 10
timeout: 600
prepare: python wait_cluster.py 4 600
script: python workloads/train_small.py
- name: train_moderate
cluster:
app_config: app_config.yaml
compute_template: tpl_cpu_moderate.yaml
run:
timeout: 600
prepare: python wait_cluster.py 32 600
script: python workloads/train_moderate.py
- name: train_gpu
cluster:
app_config: app_config_gpu.yaml
compute_template: tpl_gpu_small.yaml
run:
timeout: 600
prepare: python wait_cluster.py 5 600
script: python workloads/train_gpu.py
- name: train_gpu_connect_latest
cluster:
app_config: app_config_gpu.yaml
compute_template: tpl_gpu_small_scaling.yaml
run:
use_connect: True
timeout: 1200
script: python workloads/train_gpu_connect.py
- name: train_gpu_connect_master
cluster:
app_config: app_config_gpu_master.yaml
compute_template: tpl_gpu_small_scaling.yaml
run:
use_connect: True
timeout: 1200
script: python workloads/train_gpu_connect.py
- name: distributed_api_test
cluster:
app_config: app_config.yaml
compute_template: tpl_cpu_small.yaml
results:
run:
timeout: 600
prepare: python wait_cluster.py 4 600
script: python workloads/distributed_api_test.py
results: ""
- name: ft_small_elastic
cluster:
app_config: app_config.yaml
compute_template: tpl_cpu_small.yaml
run:
timeout: 900
prepare: python wait_cluster.py 4 600
script: python workloads/ft_small_elastic.py
results: ""
- name: ft_small_non_elastic
cluster:
app_config: app_config.yaml
compute_template: tpl_cpu_small.yaml
run:
timeout: 900
prepare: python wait_cluster.py 4 600
script: python workloads/ft_small_non_elastic.py
results: ""
- name: tune_small
cluster:
app_config: app_config.yaml
compute_template: tpl_cpu_small.yaml
run:
timeout: 600
prepare: python wait_cluster.py 4 600
script: python workloads/tune_small.py
- name: tune_32x4
cluster:
app_config: app_config.yaml
compute_template: tpl_cpu_moderate.yaml
run:
timeout: 900
prepare: python wait_cluster.py 32 600
script: python workloads/tune_32x4.py
- name: tune_4x32
cluster:
app_config: app_config.yaml
compute_template: tpl_cpu_moderate.yaml
run:
timeout: 900
prepare: python wait_cluster.py 32 600
script: python workloads/tune_4x32.py