2021-06-01 20:19:15 +02:00
|
|
|
- name: horovod_test
|
|
|
|
cluster:
|
2021-11-01 18:28:07 -07:00
|
|
|
app_config: app_config_master.yaml
|
2021-06-01 20:19:15 +02:00
|
|
|
compute_template: compute_tpl.yaml
|
|
|
|
|
|
|
|
run:
|
2021-06-24 07:56:02 -07:00
|
|
|
timeout: 36000
|
|
|
|
prepare: python wait_cluster.py 3 600
|
2021-06-01 20:19:15 +02:00
|
|
|
script: python workloads/horovod_test.py
|
2021-06-24 13:56:21 +02:00
|
|
|
long_running: True
|
|
|
|
|
|
|
|
smoke_test:
|
2021-06-24 07:56:02 -07:00
|
|
|
run:
|
|
|
|
timeout: 1800
|
2021-11-01 18:28:07 -07:00
|
|
|
|
|
|
|
- name: horovod_user_test_latest
|
|
|
|
cluster:
|
|
|
|
app_config: app_config.yaml
|
|
|
|
compute_template: compute_tpl_autoscaling.yaml
|
|
|
|
|
|
|
|
run:
|
|
|
|
use_connect: True
|
|
|
|
autosuspend_mins: 10
|
|
|
|
timeout: 1200
|
|
|
|
script: python workloads/horovod_user_test.py
|
|
|
|
|
|
|
|
- name: horovod_user_test_master
|
|
|
|
cluster:
|
|
|
|
app_config: app_config_master.yaml
|
|
|
|
compute_template: compute_tpl_autoscaling.yaml
|
|
|
|
|
|
|
|
run:
|
|
|
|
use_connect: True
|
|
|
|
autosuspend_mins: 10
|
|
|
|
timeout: 1200
|
|
|
|
script: python workloads/horovod_user_test.py
|